本帖最后由 IvenLuck 于 2020-12-12 15:36 编辑
1、浏览器打开百度首页,登录百度账号
2、按F12打开开发者工具,切换到网络选项卡,刷新页面
3、点开首页请求记录
百度首页
4、复制bduss粘贴到程序中,软件即可正常运行,至于具体干什么就看你自己了
百度文库文档解析核心代码到此全部呈上,对接哪里?对接什么就靠你自己发挥了
[Python] 纯文本查看 复制代码 import requests
from bs4 import BeautifulSoup
class BaiDuWenKu(object):
def __init__(self):
# 初始化操作
headers = {
"User-Agent":
"Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:77.0) Gecko/20100101 Firefox/77.0"
}
self.session = requests.Session()
self.session.headers.update(headers)
def setBduss(self, bduss):
'''
设置cookie,将作为解析下载时的身份验证
'''
cookies = {"BDUSS": bduss}
self.session.cookies.update(cookies)
def parserFileInfo(self, file_url):
'''
根据文档url解析文档信息
'''
res = self.session.get(file_url)
soup = BeautifulSoup(res.text, "html.parser")
file_info_options = [
"ct", "doc_id", "retType", "sns_type", "storage", "useTicket",
"target_uticket_num", "downloadToken", "sz", "v_code", "v_input"
]
data = {}
try:
for option in file_info_options:
value = soup.find(
"input", attrs={
"name": option
}).get("value") or ""
data[option] = value
except BaseException as e:
raise Exception("文档信息解析失败!")
data["req_vip_free_doc"] = "1"
return data
def download(self, data):
'''
根据解析的数据下载文档
'''
url = "https://wenku.baidu.com/user/interface/getvipfreedoc"
params = {"doc_id": data["doc_id"]}
jRes = self.session.get(url, params=params).json()
if not jRes["data"]["is_vip_free_doc"]:
raise Exception("只能下载vip免费文档哦")
url = "https://wenku.baidu.com/user/submit/download"
res = self.session.post(url, data=data, allow_redirects=False)
if res.status_code == 302:
return res.headers.get("Location")
else:
raise Exception("下载失败,请稍后再试!")
def main():
'''
负责逻辑调度
'''
bduss = "Vx*************81djg2WDgtUU5pMkE5UnhmRV*********AAAAAAEAAABlTk4wbnZwZW5neW91amlnZQAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAIBo9V6AaPVeWG"
bdwk = BaiDuWenKu()
bdwk.setBduss(bduss)
# url=input("请输入要下载的文档url:")
url = "https://wenku.baidu.com/view/96fec4f4f321dd36a32d7375a417866fb94ac074.html?fr=search-income2"
file_info = bdwk.parserFileInfo(url)
download_url = bdwk.download(file_info)
input("文档下载地址为:{}\n请及时下载".format(download_url))
if __name__ == "__main__":
try:
main()
except BaseException as e:
input(e)
喜欢的话记得给个赞,点个关注哦
附运行图一张:
|