吾爱破解 - 52pojie.cn

 找回密码
 注册[Register]

QQ登录

只需一步,快速开始

查看: 1683|回复: 24
收起左侧

[Python 原创] 星球批量下载文件

[复制链接]
蜗牛很牛 发表于 2024-10-27 15:20
本帖最后由 苏紫方璇 于 2024-10-28 11:22 编辑
import requests
import os
import json
import tkinter as tk
from tkinter import messagebox

# 定义保存配置的文件名
CONFIG_FILE = "config.json"

# 读取配置
def load_config():
    if os.path.exists(CONFIG_FILE):
        with open(CONFIG_FILE, "r") as f:
            return json.load(f)
    return {}

# 保存配置
def save_config(session_id, access_token, url):
    config = {
        "zsxqsessionid": session_id,
        "zsxq_access_token": access_token,
        "url": url
    }
    with open(CONFIG_FILE, "w") as f:
        json.dump(config, f)

# 下载文件的函数
def download_files():
    config = load_config()
    session_id = config.get("zsxqsessionid")
    access_token = config.get("zsxq_access_token")
    url = config.get("url")

    if not session_id or not access_token or not url:
        messagebox.showerror("Error", "请确保已输入所有信息并保存。")
        return

    headers = {
        'cookie': f'zsxqsessionid={session_id}; zsxq_access_token={access_token}',
        'accept': 'application/json, text/plain, */*',
        'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/127.0.0.0 Safari/537.36'
    }

    response = requests.get(url, headers=headers)

    if response.status_code != 200:
        messagebox.showerror("Error", f"获取主题失败: {response.status_code}")
        return

    data = response.json()
    for topic in data.get("resp_data", {}).get("topics", []):
        if "files" in topic.get("talk", {}):
            for file in topic["talk"]["files"]:
                if "file_id" in file:
                    file_id = file["file_id"]
                    download_url_api = f"https://api.zsxq.com/v2/files/{file_id}/download_url"
                    try:
                        download_response = requests.get(download_url_api, headers=headers)
                        download_response.raise_for_status()
                        download_data = download_response.json()

                        if download_data.get("succeeded"):
                            file_url = download_data["resp_data"]["download_url"]
                            file_name = file["name"]
                            file_response = requests.get(file_url, headers=headers)
                            file_response.raise_for_status()

                            if os.path.exists(file_name):
                                base, extension = os.path.splitext(file_name)
                                counter = 1
                                while os.path.exists(file_name):
                                    file_name = f"{base}_{counter}{extension}"
                                    counter += 1

                            with open(file_name, "wb") as file_obj:
                                file_obj.write(file_response.content)
                            print(f"Downloaded: {file_name}")
                        else:
                            print(f"获取文件 ID {file_id} 的下载链接失败")

                    except requests.RequestException as e:
                        print(f"获取文件 ID {file_id} 的下载链接失败: {e}")

# 保存配置的回调
def save_button_click():
    session_id = session_id_entry.get()
    access_token = access_token_entry.get()
    url = url_entry.get()
    save_config(session_id, access_token, url)
    messagebox.showinfo("Info", "配置已保存!")

# 创建 UI
root = tk.Tk()
root.title("下载配置")

tk.Label(root, text="zsxqsessionid:").grid(row=0, column=0)
session_id_entry = tk.Entry(root)
session_id_entry.grid(row=0, column=1)

tk.Label(root, text="zsxq_access_token:").grid(row=1, column=0)
access_token_entry = tk.Entry(root)
access_token_entry.grid(row=1, column=1)

tk.Label(root, text="URL:").grid(row=2, column=0)
url_entry = tk.Entry(root)
url_entry.grid(row=2, column=1)

save_button = tk.Button(root, text="保存配置", command=save_button_click)
save_button.grid(row=3, column=0, columnspan=2)

download_button = tk.Button(root, text="下载文件", command=download_files)
download_button.grid(row=4, column=0, columnspan=2)

# 加载现有配置
config = load_config()
session_id_entry.insert(0, config.get("zsxqsessionid", ""))
access_token_entry.insert(0, config.get("zsxq_access_token", ""))
url_entry.insert(0, config.get("url", ""))

root.mainloop()
录制_2024_10_27_13_57_58_189.gif

免费评分

参与人数 3吾爱币 +9 热心值 +3 收起 理由
苏紫方璇 + 7 + 1 欢迎分析讨论交流,吾爱破解论坛有你更精彩!
helian147 + 1 + 1 热心回复!
hwf411 + 1 + 1 谢谢@Thanks!

查看全部评分

发帖前要善用论坛搜索功能,那里可能会有你要找的答案或者已经有人发布过相同内容了,请勿重复发帖。

侃遍天下无二人 发表于 2024-10-27 19:19
本帖最后由 侃遍天下无二人 于 2024-10-27 19:21 编辑

当然是先研究下文件的url是咋来的了,我几年前写过一个,可以给你参考,当时api版本还是 v1

[Java] 纯文本查看 复制代码
import com.alibaba.fastjson.JSONArray;
import com.alibaba.fastjson.JSONObject;

import java.io.*;
import java.net.HttpURLConnection;
import java.net.URL;
import java.net.URLEncoder;
import java.nio.charset.StandardCharsets;
import java.text.SimpleDateFormat;
import java.util.*;

public class ZSXQ_downloader {

    static Map<String,Object> config_map = new TreeMap<>();
    private static void init() throws Exception{
        //这两项是关键参数,不能变更
        config_map.put("url_pattern","https://api.zsxq.com/v1/groups/%s/%s?scope=all&count=20");
        config_map.put("Header",new File("header"));
        //适应习惯表达
        config_map.put("month",Calendar.getInstance().get(Calendar.MONTH)+1);
        config_map.put("local_dir","");
        config_map.put("qid","824552281252");
        config_map.put("download",false);
        config_map.put("need_img",false);
        config_map.put("need_file",false);
        config_map.put("debug",false);
        File config = new File("config.json");
        //如果配置文件不存在,把默认值写进去
        if(!config.exists()) {
            JSONObject jconfig = new JSONObject();
            for(String key:config_map.keySet()){
                jconfig.put(key,config_map.get(key));
            }
            JSONArray default_author = new JSONArray();
            default_author.add(48582851245248L);
            jconfig.put("accept_author_id",default_author);
            jconfig.put("block_topics_id",new JSONArray());
            FileOutputStream fos = new FileOutputStream(config);
            fos.write(jconfig.toString().getBytes(StandardCharsets.UTF_8));
            return;
        }
        BufferedReader br = new BufferedReader(new InputStreamReader(new FileInputStream(config), StandardCharsets.UTF_8));
        String str; StringBuilder sb = new StringBuilder();
        while((str = br.readLine())!=null){
            sb.append(str);
        }
        JSONObject conf = JSONObject.parseObject(sb.toString());
        for(String key:conf.keySet()){
            //防止覆盖关键字段
            if(key.equals("url_pattern")||key.equals("Header")) continue;
            config_map.put(key,conf.get(key));
        }
    }

    public static void main(String[] args) throws Exception{
        init();
        JsonProcessor j = new JsonProcessor(config_map);
        String result = j.read_in().generate_MD();
        FileOutputStream fos = new FileOutputStream(new File("output.md"));
        fos.write(result.getBytes());
        fos.close();
        if((boolean)config_map.get("download")){
            j.downloadFile();
        }
    }
}



[Java] 纯文本查看 复制代码
import com.alibaba.fastjson.JSONArray;
import com.alibaba.fastjson.JSONObject;

import java.io.*;
import java.net.URLEncoder;
import java.nio.charset.StandardCharsets;
import java.text.SimpleDateFormat;
import java.util.Calendar;
import java.util.Date;
import java.util.Map;

class JsonProcessor {
    private Map<String,Object> config_map;
    private String url_pattern;
    private File Header;
    private int month;
    private JSONArray data;
    public JsonProcessor(Map<String,Object> config_map){
        this.config_map = config_map;
        this.url_pattern = (String)config_map.get("url_pattern");
        this.Header = (File)config_map.get("Header");
        //为了配合 Calendar类从0开始表示月份,我们将按照表达习惯传入的month-1
        this.month = (Integer)config_map.get("month")-1;
    }

    public String generate_MD() throws Exception{
        return generate_MD(data);
    }
    public JsonProcessor read_in() throws Exception{
        String topic_url = String.format(url_pattern,config_map.get("qid"),"topics");
        data = read_in(topic_url,month);
        return this;
    }

    public void downloadFile() throws Exception{
        String file_url = String.format(url_pattern,config_map.get("qid"),"files");
        String dl_path = new File("").getAbsolutePath() + "\\星球文件\\";
        if(!new File(dl_path).exists()){
            new File(dl_path).mkdirs();
        }
        JSONArray files = downloadFile(file_url);
        NetReq n = new NetReq(Header);
        //SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd");
        StringBuilder sb = new StringBuilder();
        for(int i=0;i<files.size();i++){
            JSONObject file = files.getJSONObject(i).getJSONObject("file");
            String file_id = file.getString("file_id");
            //特别处理,防止出错导致无法下载
            String name = file.getString("name").replaceAll("[/\\\\:*?<>|]"," ");
            //Date date = file.getDate("create_time");
            // [url=https://api.zsxq.com/v2/files/51588882112214/download_url]https://api.zsxq.com/v2/files/51588882112214/download_url[/url]
            String download_url = JSONObject.parseObject(n.get(String.format("https://api.zsxq.com/v2/files/%s/download_url",file_id)))
                    .getJSONObject("resp_data").getString("download_url");
            //在windows7及更高版本的系统中可以直接利用powershell自带的功能下载
            System.out.println("正在尝试下载文件:" + name);
            System.out.println(String.format("powershell (new-object Net.WebClient).DownloadFile('%s','%s')",download_url,dl_path + name));
            Runtime.getRuntime().exec(String.format("powershell (new-object Net.WebClient).DownloadFile('%s','%s')",download_url,dl_path + name));
            //Thread.sleep(100);
            sb.append(download_url).append('\n');
        }
        FileOutputStream fos = new FileOutputStream(new File("如果星球文件没有自动下载成功,请将此目录导入到迅雷等工具批量下载.txt"));
        fos.write(sb.toString().getBytes(StandardCharsets.UTF_8));
    }

    private JSONArray downloadFile(String url) throws Exception{
        String req_url = url;
        NetReq n = new NetReq(Header);
        JSONArray files = new JSONArray();
        while(true) {
            String result = n.get(req_url);
            JSONArray new_files = JSONObject.parseObject(result).getJSONObject("resp_data").getJSONArray("files");
            if (new_files == null || new_files.size() == 0) {
                return files;
            }
            Date last_date = new_files.getJSONObject(new_files.size() - 1).getJSONObject("file").getDate("create_time");
            addToJarr(files,new_files);
            SimpleDateFormat sdf_t = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss");
            String end_time = sdf_t.format(last_date) + ".000+0800";
            req_url = url + "&end_time=" + URLEncoder.encode(end_time, StandardCharsets.UTF_8);
        }
    }

    private void addToJarr(JSONArray add_to, JSONArray to_add){
        addToJarr(add_to,to_add,true);
    }
    private boolean is_wanted(JSONObject cur_obj,boolean limit){
        //没有限制添加,则直接返回true
        if(!limit) return true;
        //首先检查月份限制
        Date date = cur_obj.getDate("create_time");
        if(date==null) date = cur_obj.getJSONObject("file").getDate("create_time");
        Calendar c = Calendar.getInstance(); c.setTime(date);
        if(month>-1&&c.get(Calendar.MONTH)!=month) {
            //不在同一个月份,不能添加
            return false;
        }
        //接下来检查作者是不是在指定列表中
        boolean in_auth_list = false;
        JSONArray author_list = (JSONArray)config_map.get("accept_author_id");
        if(author_list!=null) {
            long author_id = 0;
            try {
                JSONObject content;
                String type = cur_obj.getString("type");
                if(type==null) {type=cur_obj.getJSONObject("topic").getString("type"); cur_obj=cur_obj.getJSONObject("topic");}
                if (type.equals("q&a")) content = cur_obj.getJSONObject("answer");
                else {
                    content = cur_obj.getJSONObject(type);
                    if(type.equals("topic")) content = content.getJSONObject("talk");
                }

                //System.out.println(content.toString());
                author_id = content.getJSONObject("owner").getLongValue("user_id");
                for (int i = 0; i < author_list.size(); i++) {
                    if (author_id==author_list.getLongValue(i)) in_auth_list=true;
                }
                if(!in_auth_list) return false;

            } catch (NullPointerException npe) {
                //这种情况下关闭检查,直接认为接收当前作者
                System.err.println("警告:未匹配到作者的id,请增加判断规则");
                System.out.println(cur_obj.toString());
            }
        }
        //接下来检查主题是不是处于屏蔽列表中
        boolean not_in_block_list = true;
        JSONArray block_list = (JSONArray)config_map.get("block_topics_id");
        if(block_list!=null) {
            long topic_id = 0;
            try {
                JSONObject type = cur_obj.getJSONObject("topic");
                if (type == null) type = cur_obj;
                topic_id = type.getLongValue("topic_id");
            } catch (NullPointerException npe) {
                System.err.println("警告:未匹配到主题的id,请增加判断规则");
                System.out.println(cur_obj.toString());
                return true;
            }
            for (int i = 0; i < block_list.size(); i++) {
                if (topic_id==block_list.getLongValue(i)) {
                    System.out.printf("跳过了主题%d\n",topic_id);
                    return false;
                }
            }
        }
        return not_in_block_list;
    }
    private void addToJarr(JSONArray add_to, JSONArray to_add,boolean limit){
        if(add_to==null||to_add==null) return;
        for(int i=0;i<to_add.size();i++){
            JSONObject cur_obj = to_add.getJSONObject(i);
            if(is_wanted(cur_obj,limit)){
                add_to.add(cur_obj);
            }
        }
    }
    private JSONArray read_in(String url,int month) throws Exception{
        JSONArray topics = null;
        String req_url = url;
        boolean debug = (boolean)config_map.get("debug");
        String ts = "" + System.currentTimeMillis();
        File debug_out = new File(ts);
        int loop_count = 1;
        //如果指定了本地目录,从本地目录下读取json文件
        String local_dir = (String)config_map.get("local_dir");
        if(local_dir!=null&& !local_dir.equals("")){
            File dir = new File(local_dir).getAbsoluteFile();
            for(File f:dir.listFiles()){
                if(!f.getName().contains("json")) continue;
                BufferedReader br = new BufferedReader(new InputStreamReader(new FileInputStream(f), StandardCharsets.UTF_8));
                String str;
                StringBuilder sb = new StringBuilder();
                while((str=br.readLine())!=null){
                    sb.append(str);
                }
                JSONArray new_topics = JSONObject.parseObject(sb.toString()).getJSONObject("resp_data").getJSONArray("topics");
                if(topics==null) {
                    topics = new_topics;
                }else{
                    for (int i=0;i<new_topics.size();i++){
                        topics.add(new_topics.getJSONObject(i));
                    }
                }
            }
        } //否则使用网络请求在线获取数据
        else{
            while(true) {
                NetReq n = new NetReq(Header);
                String result = n.get(req_url);
                //调试模式下保存请求得到的Json文件
                if(debug) {
                    if(!debug_out.exists()) debug_out.mkdir();
                    File f = new File(ts + "/" + (loop_count++)+".json");
                    FileOutputStream fos = new FileOutputStream(f);
                    fos.write(result.getBytes(StandardCharsets.UTF_8));
                    fos.close();
                }
                System.out.println(req_url);
                JSONArray new_topics = JSONObject.parseObject(result).getJSONObject("resp_data").getJSONArray("topics");
                //到此说明资源已经取完
                if(new_topics==null||new_topics.size()==0){
                    return topics;
                }
                Date last_date = new_topics.getJSONObject(new_topics.size() - 1).getDate("create_time");
                //确保结果数组非空
                if(topics==null) {
                    topics = new JSONArray();
                }
                //添加新增的主题
                addToJarr(topics,new_topics);
                //检测本月的资源是否已经遍历完毕
                Calendar c = Calendar.getInstance();c.setTime(last_date);
                int m = c.get(Calendar.MONTH);
                //最后的月份比指定月份小,说明指定月份的资源已经获取完毕(m必然大于-1)
                //特殊情况:如果当前是1月,获取完本月数据后会遍历到12月的,因此增加新判断条件
                if(m < month || (month==0&&m==11) ) {
                    //资源遍历完毕,退出循环
                    return topics;
                }
                //将请求地址指向下一组资源
                //2020-11-20T12:39:50.032+0800
                SimpleDateFormat sdf_t = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss");
                String end_time = sdf_t.format(last_date) + ".000+0800";
                req_url = url + "&end_time=" + URLEncoder.encode(end_time, StandardCharsets.UTF_8);
            }
        }
        return topics;
    }
    private String get_imgs(JSONObject j_obj){
        //如果不需要图片则不追加
        if(!(boolean)config_map.get("need_img")) return "";
        JSONArray imgs = j_obj.getJSONArray("images");
        StringBuilder sb = new StringBuilder();
        if(imgs!=null){
            for(int j=0;j<imgs.size();j++){
                String large_img_url = imgs.getJSONObject(j).getJSONObject("large").getString("url");
                sb.append(String.format("<img src='%s'/>\n",large_img_url));
            }
        }
        return sb.toString();
    }
    private String generate_MD(JSONArray topics) throws Exception{
        StringBuilder sb = new StringBuilder();
        boolean debug = (boolean)config_map.get("debug");
        SimpleDateFormat sdf_t = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss");
        SimpleDateFormat sdf = new SimpleDateFormat("yyyy/MM/dd");
        sb.append(String.format("# 知识星球%d月精选",month+1)).append('\n');
        sb.append("## 分享\n");
        int topic_count = 0,q_a_count = 0;
        int max_title_length = 16;
        for(int i=0;i<topics.size();i++){
            JSONObject cur_topic = topics.getJSONObject(i);
            String type = cur_topic.getString("type");
            String time = sdf.format(sdf_t.parse(cur_topic.getString("create_time")));
            if(type!=null&&type.equals("talk")){
                JSONObject topic_obj = cur_topic.getJSONObject("talk");
                //去掉   与其他前后空白字符
                String text = topic_obj.getString("text").replace("\u00a0","").strip();
                //当一条讨论只是分享了文件时,text的内容为空
                if(text==null) continue;
                //当不需要记录文件且本条存在文件时,跳过(猫哥往往不会在有文件的情况下写太多东西)
                if((!(boolean)config_map.get("need_file")) && topic_obj.getJSONArray("files")!=null) continue;
                //跳过分享的公众号文章
                //if(text.contains("type=\"web\"")) continue;
                //分享的标题和正文内容
                String title = text;
                if(text.contains("\n")) title = text.substring(0,title.indexOf('\n'));
                if(title.length()>max_title_length) {
                    title = title.substring(0, max_title_length);
                }
                sb.append(String.format("\n\n### <font color=#0099ff>%d. %s<br/>%s</font>\n\n",++topic_count,title,time));
                if(debug){
                    //调试模式下输出主题的id,方便创建排除名单
                    long topic_id = 0;
                    try {
                        topic_id = cur_topic.getLongValue("topic_id");
                    } catch (NullPointerException npe) {
                        System.err.println("警告:未匹配到主题的id,请增加判断规则");
                        System.out.println(cur_topic.toString());
                    }
                    sb.append("id: ").append(topic_id).append('\n');
                }
                sb.append(String.format("<font color=#000000>%s<br/></font>",text)).append('\n');
                //如果附带了文件就追加文件信息,且需要文件(需要配合下载文件功能使用)
                if((boolean)config_map.get("need_file")){
                    JSONArray files = topic_obj.getJSONArray("files");
                    if(files!=null){
                        for(int j=0;j<files.size();j++){
                            JSONObject file = files.getJSONObject(j);
                            sb.append(String.format("附件%d:[%s](%s)\n",j+1,file.getString("name"),URLEncoder.encode("星球文件/"+file.getString("name"), StandardCharsets.UTF_8)));
                        }
                    }
                }
                //追加图片(如果有)
                sb.append(get_imgs(topic_obj));
            }

        }

        sb.append("<br/>\n## 答疑解惑\n");

        for(int i=0;i<topics.size();i++){
            JSONObject cur_topic = topics.getJSONObject(i);
            String type = cur_topic.getString("type");
            String time = sdf.format(sdf_t.parse(cur_topic.getString("create_time")));
            if(type!=null&&type.equals("q&a")){
                JSONObject q = null,a = null;
                try {
                    q = cur_topic.getJSONObject("question");
                    a = cur_topic.getJSONObject("answer");
                    String title = "提问";
                    if(cur_topic.getBoolean("answered")){
                        title = a.getString("text");
                        if(title.contains("\n")) title = title.substring(0,title.indexOf("\n"));
                        if(title.length()>max_title_length) {
                            title = title.substring(0, max_title_length);
                        }
                    }
                    sb.append(String.format("\n\n### <font color=#000000>%d. %s<br/>%s </font>\n\n",++q_a_count,title,time)).append('\n');
                    if(debug){
                        long topic_id = 0;
                        try {
                            topic_id = cur_topic.getLongValue("topic_id");
                        } catch (NullPointerException npe) {
                            System.err.println("警告:未匹配到主题的id,请增加判断规则");
                            System.out.println(cur_topic.toString());
                        }
                        sb.append("id: ").append(topic_id).append('\n');
                    }
                    //提问部分
                    String q_text = q.getString("text");
                    sb.append(String.format("> %s\n", q_text)).append('\n');
                    sb.append(get_imgs(q));
                    if(cur_topic.getBoolean("answered")){
                        //回答部分
                        String a_text = a.getString("text");
                        sb.append(String.format("\n<font color=#0099ff>%s</font>\n", a_text));
                        sb.append(get_imgs(a));
                    }else{
                        sb.append(String.format("\n<font color=#0099ff>%s</font>\n", "暂无回答"));
                    }
                }catch (Exception e){
                    e.printStackTrace();
                    System.out.println(cur_topic);
                }
            }
        }
        return sb.toString();
    }
}

免费评分

参与人数 1吾爱币 +1 热心值 +1 收起 理由
helian147 + 1 + 1 热心回复!

查看全部评分

xiao52 发表于 2024-10-27 22:19
hly2233 发表于 2024-10-27 22:39
xixicoco 发表于 2024-10-27 23:34
有点意思,适合大批量下载
紫云互联 发表于 2024-10-27 23:54
星球的我之前试过了。这鬼玩意有下载限制。除非你能破了服务器的下载限制
hwf411 发表于 2024-10-28 00:49
感谢楼主的分享
 楼主| 蜗牛很牛 发表于 2024-10-28 07:43
紫云互联 发表于 2024-10-27 23:54
星球的我之前试过了。这鬼玩意有下载限制。除非你能破了服务器的下载限制

服务器直接做了限制,没得法。只能多搞几个账号
zkh201411 发表于 2024-10-28 09:11
感谢分享
小丑恶人 发表于 2024-10-28 09:39
拿走研究研究先
您需要登录后才可以回帖 登录 | 注册[Register]

本版积分规则

返回列表

RSS订阅|小黑屋|处罚记录|联系我们|吾爱破解 - LCG - LSG ( 京ICP备16042023号 | 京公网安备 11010502030087号 )

GMT+8, 2025-1-8 19:41

Powered by Discuz!

Copyright © 2001-2020, Tencent Cloud.

快速回复 返回顶部 返回列表