使用Python实现某茄小说搜索与下载
本帖最后由 oncall12 于 2024-7-23 16:21 编辑注意事项:
[*]本项目仅用于学习和研究目的,不得用于任何非法活动或侵犯他人权益的行为。
[*]使用本脚本所产生的一切法律责任和风险,均由用户自行承担。
[*]在使用本脚本时,请遵守相关法律法规和网站的使用协议。
使用说明:
[*]确保你的Python环境中已经安装了requests和parsel库。如果没有安装,可以使用pip进行安装:
pip install requests parsel
# 导入模块
import requests
import parsel
import time
# 字符编码映射表(用于解密内容)
dit_data = {
'58670': '0',
'58413': '1',
'58678': '2',
'58371': '3',
'58353': '4',
'58480': '5',
'58359': '6',
'58449': '7',
'58540': '8',
'58692': '9',
'58712': 'a',
'58542': 'b',
'58575': 'c',
'58626': 'a',
'58691': 'e',
'58561': 'f',
'58362': 'g',
'58619': 'h',
'58430': 'i',
'58531': 'j',
'58588': 'k',
'58440': 'l',
'58681': 'm',
'58631': 'n',
'58376': 'o',
'58429': 'p',
'58555': 'q',
'58498': 'r',
'58518': 's',
'58453': 't',
'58397': 'u',
'58356': 'v',
'58435': 'w',
'58514': 'x',
'58482': 'y',
'58529': 'z',
'58515': 'A',
'58688': 'B',
'58709': 'C',
'58344': 'D',
'58656': 'E',
'58381': 'F',
'58576': 'G',
'58516': 'H',
'58463': 'I',
'58649': 'J',
'58571': 'K',
'58558': 'L',
'58433': 'M',
'58517': 'N',
'58387': 'O',
'58687': 'P',
'58537': 'Q',
'58541': 'R',
'58458': 'S',
'58390': 'T',
'58466': 'U',
'58386': 'V',
'58697': 'W',
'58519': 'X',
'58511': 'Y',
'58634': 'Z',
'58611': '的',
'58590': '一',
'58398': '是',
'58422': '了',
'58657': '我',
'58666': '不',
'58562': '人',
'58345': '在',
'58510': '他',
'58496': '有',
'58654': '这',
'58441': '个',
'58493': '上',
'58714': '们',
'58618': '来',
'58528': '到',
'58620': '时',
'58403': '大',
'58461': '地',
'58481': '为',
'58700': '子',
'58708': '中',
'58503': '你',
'58442': '说',
'58639': '生',
'58506': '国',
'58663': '年',
'58436': '着',
'58563': '就',
'58391': '那',
'58357': '和',
'58354': '要',
'58695': '她',
'58372': '出',
'58696': '也',
'58551': '得',
'58445': '里',
'58408': '后',
'58599': '自',
'58424': '以',
'58394': '会',
'58348': '家',
'58426': '可',
'58673': '下',
'58417': '而',
'58556': '过',
'58603': '天',
'58565': '去',
'58604': '能',
'58522': '对',
'58632': '小',
'58622': '多',
'58350': '然',
'58605': '于',
'58617': '心',
'58401': '学',
'58637': '么',
'58684': '之',
'58382': '都',
'58464': '好',
'58487': '看',
'58693': '起',
'58608': '发',
'58392': '当',
'58474': '没',
'58601': '成',
'58355': '只',
'58573': '如',
'58499': '事',
'58469': '把',
'58361': '还',
'58698': '用',
'58489': '第',
'58711': '样',
'58457': '道',
'58635': '想',
'58492': '作',
'58647': '种',
'58623': '开',
'58521': '美',
'58609': '总',
'58530': '从',
'58665': '无',
'58652': '情',
'58676': '己',
'58456': '面',
'58581': '最',
'58509': '女',
'58488': '但',
'58363': '现',
'58685': '前',
'58396': '些',
'58523': '所',
'58471': '同',
'58485': '日',
'58613': '手',
'58533': '又',
'58589': '行',
'58527': '意',
'58593': '动',
'58699': '方',
'58707': '期',
'58414': '它',
'58596': '头',
'58570': '经',
'58660': '长',
'58364': '儿',
'58526': '回',
'58501': '位',
'58638': '分',
'58404': '爱',
'58677': '老',
'58535': '因',
'58629': '很',
'58577': '给',
'58606': '名',
'58497': '法',
'58662': '间',
'58479': '斯',
'58532': '知',
'58380': '世',
'58385': '什',
'58405': '两',
'58644': '次',
'58578': '使',
'58505': '身',
'58564': '者',
'58412': '被',
'58686': '高',
'58624': '已',
'58667': '亲',
'58607': '其',
'58616': '进',
'58368': '此',
'58427': '话',
'58423': '常',
'58633': '与',
'58525': '活',
'58543': '正',
'58418': '感',
'58597': '见',
'58683': '明',
'58507': '问',
'58621': '力',
'58703': '理',
'58438': '尔',
'58536': '点',
'58384': '文',
'58484': '几',
'58539': '定',
'58554': '本',
'58421': '公',
'58347': '特',
'58569': '做',
'58710': '外',
'58574': '孩',
'58375': '相',
'58645': '西',
'58592': '果',
'58572': '走',
'58388': '将',
'58370': '月',
'58399': '十',
'58651': '实',
'58546': '向',
'58504': '声',
'58419': '车',
'58407': '全',
'58672': '信',
'58675': '重',
'58538': '三',
'58465': '机',
'58374': '工',
'58579': '物',
'58402': '气',
'58702': '每',
'58553': '并',
'58360': '别',
'58389': '真',
'58560': '打',
'58690': '太',
'58473': '新',
'58512': '比',
'58653': '才',
'58704': '便',
'58545': '夫',
'58641': '再',
'58475': '书',
'58583': '部',
'58472': '水',
'58478': '像',
'58664': '眼',
'58586': '等',
'58568': '体',
'58674': '却',
'58490': '加',
'58476': '电',
'58346': '主',
'58630': '界',
'58595': '门',
'58502': '利',
'58713': '海',
'58587': '受',
'58548': '听',
'58351': '表',
'58547': '德',
'58443': '少',
'58460': '克',
'58636': '代',
'58585': '员',
'58625': '许',
'58694': '陵',
'58428': '先',
'58640': '口',
'58628': '由',
'58612': '死',
'58446': '安',
'58468': '写',
'58410': '性',
'58508': '马',
'58594': '光',
'58483': '白',
'58544': '或',
'58495': '住',
'58450': '难',
'58643': '望',
'58486': '教',
'58406': '命',
'58447': '花',
'58669': '结',
'58415': '乐',
'58444': '色',
'58549': '更',
'58494': '拉',
'58409': '东',
'58658': '神',
'58557': '记',
'58602': '处',
'58559': '让',
'58610': '母',
'58513': '父',
'58500': '应',
'58378': '直',
'58680': '字',
'58352': '场',
'58383': '平',
'58454': '报',
'58671': '友',
'58668': '关',
'58452': '放',
'58627': '至',
'58400': '张',
'58455': '认',
'58416': '接',
'58552': '告',
'58614': '入',
'58582': '笑',
'58534': '内',
'58701': '英',
'58349': '军',
'58491': '候',
'58467': '民',
'58365': '岁',
'58598': '往',
'58425': '何',
'58462': '度',
'58420': '山',
'58661': '觉',
'58615': '路',
'58648': '带',
'58470': '万',
'58377': '男',
'58520': '边',
'58646': '风',
'58600': '解',
'58431': '叫',
'58715': '任',
'58524': '金',
'58439': '快',
'58566': '原',
'58477': '吃',
'58642': '妈',
'58437': '变',
'58411': '通',
'58451': '师',
'58395': '立',
'58369': '象',
'58706': '数',
'58705': '四',
'58379': '失',
'58567': '满',
'58373': '战',
'58448': '远',
'58659': '格',
'58434': '士',
'58679': '音',
'58432': '轻',
'58689': '目',
'58591': '条',
'58682': '呢',
}
# 模拟浏览器
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36'
}
# 打印版权和免责声明
print("\n\n \033[31m免责声明:\033[0m")
print(" 本程序仅用于学习和研究Python网络爬虫和网页处理技术")
print(" 本程序不得用于任何非法活动或侵犯他人权益的行为")
print(" 使用本程序所产生的一切法律责任和风险,均由用户自行承担")
print(" 作者不对因使用该程序而导致的任何损失或损害承担任何责任")
print("\n\n")
def search_and_download():
# 搜索功能
while True:
key = input('请输入书名或者作者名进行搜索:')
if key.strip():# 检查是否为空
break
else:
print('\033[31m请输入有效的书名或作者名!\033[0m')
print('\n\033[31m正在搜索中,请稍等....\033[0m')
info = []
for i in range(3):# 假设每页显示10本书,最多搜索3页
search_url = f'https://novel.snssdk.com/api/novel/channel/homepage/search/search/v1/?device_platform=android&parent_enterfrom=novel_channel_search.tab&aid=1967&offset={i * 10}&q={key}'
try:
search_data = requests.get(url=search_url, headers=headers).json()
except requests.exceptions.RequestException as e:
print(f'搜索请求失败: {e}')
return# 返回搜索
if search_data['message'] != 'success':
print(search_data['message'])
continue
for book in search_data['data']['ret_data']:
if book['author'] not in ['番茄漫画', '番茄畅听']:
book_id = book['book_id']
# 获取章节数量
book_url = f"https://fanqienovel.com/page/{book_id}?enter_from=search"
try:
book_response = requests.get(book_url, headers=headers)
book_selector = parsel.Selector(book_response.text)
chapter_count = len(book_selector.css('.chapter-item'))
except requests.exceptions.RequestException as e:
print(f"获取章节数量失败: {e}")
continue# 跳过该书籍,继续搜索下一个
dit = {
'title': book['title'],
'author': book['author'],
'book_id': book_id,
'chapter_count': chapter_count
}
info.append(dit)
if not search_data['data']['has_more']:
break
# 判断搜索结果中是否有章节数量为0的书籍
all_zero_chapters = all(book['chapter_count'] == 0 for book in info)
# 展示搜索结果(可选)
print("\n搜索结果:")
for i, book in enumerate(info):
print(f"{i + 1}. {book['title']} - 作者:{book['author']} (共{book['chapter_count']}章) ID: {book['book_id']}")
if all_zero_chapters:
print("\033[31m所有搜索到的书籍都没有章节,网络请求受限,请更换IP!!!\033[0m")
return# 返回搜索
while True:
try:
selected_index = int(input('\n\033[31m请输入要下载的书籍序号\033[0m(输入0返回搜索):')) - 1# 序号从1开始,数组从0开始
if selected_index == -1:
return# 返回搜索
elif selected_index >= 0 and selected_index < len(info):
break# 输入有效序号
else:
print("无效的序号!")
except ValueError:
print("请输入数字!")
# 获取小说名字和章节
while True:# 添加循环,如果获取小说信息失败,重新输入序号
try:
book_id = info['book_id']
url = f'https://fanqienovel.com/page/{book_id}?enter_from=search'
response = requests.get(url=url, headers=headers)
response.raise_for_status()# 检查请求是否成功
break# 获取小说信息成功,退出循环
except requests.exceptions.RequestException as e:
print(f'获取小说信息失败: {e}')
print('\033[31m该小说没有章节,请重新选择序号下载\033[0m')
while True:
try:
selected_index = int(input('\n\033[31m请输入要下载的书籍序号\033[0m(输入0返回搜索):')) - 1
if selected_index == -1:
return# 返回搜索
elif selected_index >= 0 and selected_index < len(info):
break
else:
print("无效的序号!")
except ValueError:
print("请输入数字!")
html_data = response.text
# 解析数据
selector = parsel.Selector(html_data)
# 小说名称(直接写入代码中)
name = selector.css('.info-name h1::text').get()
# 提取章节
chapter_items = selector.css('.chapter-item')
# 保存章节标题和链接的列表
title_href_list = []
# 遍历章节列表,提取章节标题和链接
for item in chapter_items:
title = item.css('.chapter-item-title::text').get()
href = item.css('.chapter-item-title::attr(href)').get()
title_href_list.append((title, href))
# 使用字典存储章节标题和链接,并使用章节标题作为键
chapter_dict = {title: href for title, href in title_href_list}
# 获取章节序号
chapter_order = list(chapter_dict.keys())
# 打印正在下载信息
print(f"\n==========\033},共 {len(chapter_order)} 章,ID: {book_id}\033[0m ==========\n")
# 开始下载时间
start_time = time.time()
# 遍历章节字典,按照章节序号下载
download_success = True
for i, title in enumerate(chapter_order, 1):# 从1开始计数
print(f'正在下载 {i}/{len(chapter_order)}: {title}')
href = chapter_dict
link_url = 'https://fanqienovel.com' + href + '?enter_from=page'
try:
# 发送请求 获取数据
link_data = requests.get(url=link_url, headers=headers).text
# 解析数据
link_selector = parsel.Selector(link_data)
# 提取小说内容
content_list = link_selector.css('.muye-reader-content-16 p::text').getall()
# 列表合并字符串
content = '\n\n'.join(content_list)
# 字典解码
novel_content = ''
for index in content:
try:
word = dit_data
except:
word = index
novel_content += word
with open(name + '.txt', mode='a', encoding='utf-8') as f:
f.write(title)
f.write('\n\n')
f.write(novel_content)
f.write('\n\n')
except requests.exceptions.RequestException as e:
print(f'获取章节内容失败: {e}')
download_success = False# 设置下载失败标志
# 结束下载时间
end_time = time.time()
# 计算下载耗时
download_duration = end_time - start_time
# 判断耗时是否超过一分钟
if download_duration < 60:
print(f'下载耗时: {download_duration:.2f} 秒')
else:
minutes = int(download_duration // 60)
seconds = int(download_duration % 60)
print(f'下载耗时: {minutes} 分 {seconds} 秒')
if download_success:
print(f'=============== {name} 下载完成,存放在软件目录里 ==========\n')
else:
print(f'=============== {name} 下载失败 ==========\n')
while True:
search_and_download()
感谢分享 感谢大佬分享! 试了试,后面的章节加锁的话,就下不下来了。请问怎么办啊?需要登录吗? 感谢大佬分享。 非常实用,好用,太棒了!大佬太聪明了,为我们提供这么好的福利! 请问大佬,怎么这里面的计算机方面的书籍,好多没有章节,下载不了,可以改进下吗?谢谢! 好用,谢谢{:1_893:}{:1_893:}
页:
[1]