好友
阅读权限100
听众
最后登录1970-1-1
|
本帖最后由 侃遍天下无二人 于 2024-10-27 19:21 编辑
当然是先研究下文件的url是咋来的了,我几年前写过一个,可以给你参考,当时api版本还是 v1
[Java] 纯文本查看 复制代码 import com.alibaba.fastjson.JSONArray;
import com.alibaba.fastjson.JSONObject;
import java.io.*;
import java.net.HttpURLConnection;
import java.net.URL;
import java.net.URLEncoder;
import java.nio.charset.StandardCharsets;
import java.text.SimpleDateFormat;
import java.util.*;
public class ZSXQ_downloader {
static Map<String,Object> config_map = new TreeMap<>();
private static void init() throws Exception{
//这两项是关键参数,不能变更
config_map.put("url_pattern","https://api.zsxq.com/v1/groups/%s/%s?scope=all&count=20");
config_map.put("Header",new File("header"));
//适应习惯表达
config_map.put("month",Calendar.getInstance().get(Calendar.MONTH)+1);
config_map.put("local_dir","");
config_map.put("qid","824552281252");
config_map.put("download",false);
config_map.put("need_img",false);
config_map.put("need_file",false);
config_map.put("debug",false);
File config = new File("config.json");
//如果配置文件不存在,把默认值写进去
if(!config.exists()) {
JSONObject jconfig = new JSONObject();
for(String key:config_map.keySet()){
jconfig.put(key,config_map.get(key));
}
JSONArray default_author = new JSONArray();
default_author.add(48582851245248L);
jconfig.put("accept_author_id",default_author);
jconfig.put("block_topics_id",new JSONArray());
FileOutputStream fos = new FileOutputStream(config);
fos.write(jconfig.toString().getBytes(StandardCharsets.UTF_8));
return;
}
BufferedReader br = new BufferedReader(new InputStreamReader(new FileInputStream(config), StandardCharsets.UTF_8));
String str; StringBuilder sb = new StringBuilder();
while((str = br.readLine())!=null){
sb.append(str);
}
JSONObject conf = JSONObject.parseObject(sb.toString());
for(String key:conf.keySet()){
//防止覆盖关键字段
if(key.equals("url_pattern")||key.equals("Header")) continue;
config_map.put(key,conf.get(key));
}
}
public static void main(String[] args) throws Exception{
init();
JsonProcessor j = new JsonProcessor(config_map);
String result = j.read_in().generate_MD();
FileOutputStream fos = new FileOutputStream(new File("output.md"));
fos.write(result.getBytes());
fos.close();
if((boolean)config_map.get("download")){
j.downloadFile();
}
}
}
[Java] 纯文本查看 复制代码 import com.alibaba.fastjson.JSONArray;
import com.alibaba.fastjson.JSONObject;
import java.io.*;
import java.net.URLEncoder;
import java.nio.charset.StandardCharsets;
import java.text.SimpleDateFormat;
import java.util.Calendar;
import java.util.Date;
import java.util.Map;
class JsonProcessor {
private Map<String,Object> config_map;
private String url_pattern;
private File Header;
private int month;
private JSONArray data;
public JsonProcessor(Map<String,Object> config_map){
this.config_map = config_map;
this.url_pattern = (String)config_map.get("url_pattern");
this.Header = (File)config_map.get("Header");
//为了配合 Calendar类从0开始表示月份,我们将按照表达习惯传入的month-1
this.month = (Integer)config_map.get("month")-1;
}
public String generate_MD() throws Exception{
return generate_MD(data);
}
public JsonProcessor read_in() throws Exception{
String topic_url = String.format(url_pattern,config_map.get("qid"),"topics");
data = read_in(topic_url,month);
return this;
}
public void downloadFile() throws Exception{
String file_url = String.format(url_pattern,config_map.get("qid"),"files");
String dl_path = new File("").getAbsolutePath() + "\\星球文件\\";
if(!new File(dl_path).exists()){
new File(dl_path).mkdirs();
}
JSONArray files = downloadFile(file_url);
NetReq n = new NetReq(Header);
//SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd");
StringBuilder sb = new StringBuilder();
for(int i=0;i<files.size();i++){
JSONObject file = files.getJSONObject(i).getJSONObject("file");
String file_id = file.getString("file_id");
//特别处理,防止出错导致无法下载
String name = file.getString("name").replaceAll("[/\\\\:*?<>|]"," ");
//Date date = file.getDate("create_time");
// [url=https://api.zsxq.com/v2/files/51588882112214/download_url]https://api.zsxq.com/v2/files/51588882112214/download_url[/url]
String download_url = JSONObject.parseObject(n.get(String.format("https://api.zsxq.com/v2/files/%s/download_url",file_id)))
.getJSONObject("resp_data").getString("download_url");
//在windows7及更高版本的系统中可以直接利用powershell自带的功能下载
System.out.println("正在尝试下载文件:" + name);
System.out.println(String.format("powershell (new-object Net.WebClient).DownloadFile('%s','%s')",download_url,dl_path + name));
Runtime.getRuntime().exec(String.format("powershell (new-object Net.WebClient).DownloadFile('%s','%s')",download_url,dl_path + name));
//Thread.sleep(100);
sb.append(download_url).append('\n');
}
FileOutputStream fos = new FileOutputStream(new File("如果星球文件没有自动下载成功,请将此目录导入到迅雷等工具批量下载.txt"));
fos.write(sb.toString().getBytes(StandardCharsets.UTF_8));
}
private JSONArray downloadFile(String url) throws Exception{
String req_url = url;
NetReq n = new NetReq(Header);
JSONArray files = new JSONArray();
while(true) {
String result = n.get(req_url);
JSONArray new_files = JSONObject.parseObject(result).getJSONObject("resp_data").getJSONArray("files");
if (new_files == null || new_files.size() == 0) {
return files;
}
Date last_date = new_files.getJSONObject(new_files.size() - 1).getJSONObject("file").getDate("create_time");
addToJarr(files,new_files);
SimpleDateFormat sdf_t = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss");
String end_time = sdf_t.format(last_date) + ".000+0800";
req_url = url + "&end_time=" + URLEncoder.encode(end_time, StandardCharsets.UTF_8);
}
}
private void addToJarr(JSONArray add_to, JSONArray to_add){
addToJarr(add_to,to_add,true);
}
private boolean is_wanted(JSONObject cur_obj,boolean limit){
//没有限制添加,则直接返回true
if(!limit) return true;
//首先检查月份限制
Date date = cur_obj.getDate("create_time");
if(date==null) date = cur_obj.getJSONObject("file").getDate("create_time");
Calendar c = Calendar.getInstance(); c.setTime(date);
if(month>-1&&c.get(Calendar.MONTH)!=month) {
//不在同一个月份,不能添加
return false;
}
//接下来检查作者是不是在指定列表中
boolean in_auth_list = false;
JSONArray author_list = (JSONArray)config_map.get("accept_author_id");
if(author_list!=null) {
long author_id = 0;
try {
JSONObject content;
String type = cur_obj.getString("type");
if(type==null) {type=cur_obj.getJSONObject("topic").getString("type"); cur_obj=cur_obj.getJSONObject("topic");}
if (type.equals("q&a")) content = cur_obj.getJSONObject("answer");
else {
content = cur_obj.getJSONObject(type);
if(type.equals("topic")) content = content.getJSONObject("talk");
}
//System.out.println(content.toString());
author_id = content.getJSONObject("owner").getLongValue("user_id");
for (int i = 0; i < author_list.size(); i++) {
if (author_id==author_list.getLongValue(i)) in_auth_list=true;
}
if(!in_auth_list) return false;
} catch (NullPointerException npe) {
//这种情况下关闭检查,直接认为接收当前作者
System.err.println("警告:未匹配到作者的id,请增加判断规则");
System.out.println(cur_obj.toString());
}
}
//接下来检查主题是不是处于屏蔽列表中
boolean not_in_block_list = true;
JSONArray block_list = (JSONArray)config_map.get("block_topics_id");
if(block_list!=null) {
long topic_id = 0;
try {
JSONObject type = cur_obj.getJSONObject("topic");
if (type == null) type = cur_obj;
topic_id = type.getLongValue("topic_id");
} catch (NullPointerException npe) {
System.err.println("警告:未匹配到主题的id,请增加判断规则");
System.out.println(cur_obj.toString());
return true;
}
for (int i = 0; i < block_list.size(); i++) {
if (topic_id==block_list.getLongValue(i)) {
System.out.printf("跳过了主题%d\n",topic_id);
return false;
}
}
}
return not_in_block_list;
}
private void addToJarr(JSONArray add_to, JSONArray to_add,boolean limit){
if(add_to==null||to_add==null) return;
for(int i=0;i<to_add.size();i++){
JSONObject cur_obj = to_add.getJSONObject(i);
if(is_wanted(cur_obj,limit)){
add_to.add(cur_obj);
}
}
}
private JSONArray read_in(String url,int month) throws Exception{
JSONArray topics = null;
String req_url = url;
boolean debug = (boolean)config_map.get("debug");
String ts = "" + System.currentTimeMillis();
File debug_out = new File(ts);
int loop_count = 1;
//如果指定了本地目录,从本地目录下读取json文件
String local_dir = (String)config_map.get("local_dir");
if(local_dir!=null&& !local_dir.equals("")){
File dir = new File(local_dir).getAbsoluteFile();
for(File f:dir.listFiles()){
if(!f.getName().contains("json")) continue;
BufferedReader br = new BufferedReader(new InputStreamReader(new FileInputStream(f), StandardCharsets.UTF_8));
String str;
StringBuilder sb = new StringBuilder();
while((str=br.readLine())!=null){
sb.append(str);
}
JSONArray new_topics = JSONObject.parseObject(sb.toString()).getJSONObject("resp_data").getJSONArray("topics");
if(topics==null) {
topics = new_topics;
}else{
for (int i=0;i<new_topics.size();i++){
topics.add(new_topics.getJSONObject(i));
}
}
}
} //否则使用网络请求在线获取数据
else{
while(true) {
NetReq n = new NetReq(Header);
String result = n.get(req_url);
//调试模式下保存请求得到的Json文件
if(debug) {
if(!debug_out.exists()) debug_out.mkdir();
File f = new File(ts + "/" + (loop_count++)+".json");
FileOutputStream fos = new FileOutputStream(f);
fos.write(result.getBytes(StandardCharsets.UTF_8));
fos.close();
}
System.out.println(req_url);
JSONArray new_topics = JSONObject.parseObject(result).getJSONObject("resp_data").getJSONArray("topics");
//到此说明资源已经取完
if(new_topics==null||new_topics.size()==0){
return topics;
}
Date last_date = new_topics.getJSONObject(new_topics.size() - 1).getDate("create_time");
//确保结果数组非空
if(topics==null) {
topics = new JSONArray();
}
//添加新增的主题
addToJarr(topics,new_topics);
//检测本月的资源是否已经遍历完毕
Calendar c = Calendar.getInstance();c.setTime(last_date);
int m = c.get(Calendar.MONTH);
//最后的月份比指定月份小,说明指定月份的资源已经获取完毕(m必然大于-1)
//特殊情况:如果当前是1月,获取完本月数据后会遍历到12月的,因此增加新判断条件
if(m < month || (month==0&&m==11) ) {
//资源遍历完毕,退出循环
return topics;
}
//将请求地址指向下一组资源
//2020-11-20T12:39:50.032+0800
SimpleDateFormat sdf_t = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss");
String end_time = sdf_t.format(last_date) + ".000+0800";
req_url = url + "&end_time=" + URLEncoder.encode(end_time, StandardCharsets.UTF_8);
}
}
return topics;
}
private String get_imgs(JSONObject j_obj){
//如果不需要图片则不追加
if(!(boolean)config_map.get("need_img")) return "";
JSONArray imgs = j_obj.getJSONArray("images");
StringBuilder sb = new StringBuilder();
if(imgs!=null){
for(int j=0;j<imgs.size();j++){
String large_img_url = imgs.getJSONObject(j).getJSONObject("large").getString("url");
sb.append(String.format("<img src='%s'/>\n",large_img_url));
}
}
return sb.toString();
}
private String generate_MD(JSONArray topics) throws Exception{
StringBuilder sb = new StringBuilder();
boolean debug = (boolean)config_map.get("debug");
SimpleDateFormat sdf_t = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss");
SimpleDateFormat sdf = new SimpleDateFormat("yyyy/MM/dd");
sb.append(String.format("# 知识星球%d月精选",month+1)).append('\n');
sb.append("## 分享\n");
int topic_count = 0,q_a_count = 0;
int max_title_length = 16;
for(int i=0;i<topics.size();i++){
JSONObject cur_topic = topics.getJSONObject(i);
String type = cur_topic.getString("type");
String time = sdf.format(sdf_t.parse(cur_topic.getString("create_time")));
if(type!=null&&type.equals("talk")){
JSONObject topic_obj = cur_topic.getJSONObject("talk");
//去掉 与其他前后空白字符
String text = topic_obj.getString("text").replace("\u00a0","").strip();
//当一条讨论只是分享了文件时,text的内容为空
if(text==null) continue;
//当不需要记录文件且本条存在文件时,跳过(猫哥往往不会在有文件的情况下写太多东西)
if((!(boolean)config_map.get("need_file")) && topic_obj.getJSONArray("files")!=null) continue;
//跳过分享的公众号文章
//if(text.contains("type=\"web\"")) continue;
//分享的标题和正文内容
String title = text;
if(text.contains("\n")) title = text.substring(0,title.indexOf('\n'));
if(title.length()>max_title_length) {
title = title.substring(0, max_title_length);
}
sb.append(String.format("\n\n### <font color=#0099ff>%d. %s<br/>%s</font>\n\n",++topic_count,title,time));
if(debug){
//调试模式下输出主题的id,方便创建排除名单
long topic_id = 0;
try {
topic_id = cur_topic.getLongValue("topic_id");
} catch (NullPointerException npe) {
System.err.println("警告:未匹配到主题的id,请增加判断规则");
System.out.println(cur_topic.toString());
}
sb.append("id: ").append(topic_id).append('\n');
}
sb.append(String.format("<font color=#000000>%s<br/></font>",text)).append('\n');
//如果附带了文件就追加文件信息,且需要文件(需要配合下载文件功能使用)
if((boolean)config_map.get("need_file")){
JSONArray files = topic_obj.getJSONArray("files");
if(files!=null){
for(int j=0;j<files.size();j++){
JSONObject file = files.getJSONObject(j);
sb.append(String.format("附件%d:[%s](%s)\n",j+1,file.getString("name"),URLEncoder.encode("星球文件/"+file.getString("name"), StandardCharsets.UTF_8)));
}
}
}
//追加图片(如果有)
sb.append(get_imgs(topic_obj));
}
}
sb.append("<br/>\n## 答疑解惑\n");
for(int i=0;i<topics.size();i++){
JSONObject cur_topic = topics.getJSONObject(i);
String type = cur_topic.getString("type");
String time = sdf.format(sdf_t.parse(cur_topic.getString("create_time")));
if(type!=null&&type.equals("q&a")){
JSONObject q = null,a = null;
try {
q = cur_topic.getJSONObject("question");
a = cur_topic.getJSONObject("answer");
String title = "提问";
if(cur_topic.getBoolean("answered")){
title = a.getString("text");
if(title.contains("\n")) title = title.substring(0,title.indexOf("\n"));
if(title.length()>max_title_length) {
title = title.substring(0, max_title_length);
}
}
sb.append(String.format("\n\n### <font color=#000000>%d. %s<br/>%s </font>\n\n",++q_a_count,title,time)).append('\n');
if(debug){
long topic_id = 0;
try {
topic_id = cur_topic.getLongValue("topic_id");
} catch (NullPointerException npe) {
System.err.println("警告:未匹配到主题的id,请增加判断规则");
System.out.println(cur_topic.toString());
}
sb.append("id: ").append(topic_id).append('\n');
}
//提问部分
String q_text = q.getString("text");
sb.append(String.format("> %s\n", q_text)).append('\n');
sb.append(get_imgs(q));
if(cur_topic.getBoolean("answered")){
//回答部分
String a_text = a.getString("text");
sb.append(String.format("\n<font color=#0099ff>%s</font>\n", a_text));
sb.append(get_imgs(a));
}else{
sb.append(String.format("\n<font color=#0099ff>%s</font>\n", "暂无回答"));
}
}catch (Exception e){
e.printStackTrace();
System.out.println(cur_topic);
}
}
}
return sb.toString();
}
}
|
免费评分
-
查看全部评分
|