[Asm] 纯文本查看 复制代码 #coding=gb2312
import logging
import urllib.parse
import requests,random,time
import parsel
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/95.0.4636.4 Safari/537.36'
}
def get_book_url(s):
word = urllib.parse.quote(s, encoding="gb2312") # 转码 和urllib.parse.quote() 一样
url = f"https://www.qb5.la/modules/article/search.php?searchkey={word}&submit=%CB%D1%CB%F7"
# 获得重定向的Location
s = requests.session()
logging.captureWarnings(True)
resp = s.get(url=url, headers=headers, allow_redirects=False, verify=False)
# 这是因为 requets 库自动处理了重定向请求了,默认是allow_redirects=True 是启动重定向,就在我们requests.get(url)中添加allow_redirects=False
print(resp.status_code) # 打印响应的状态码
print(resp.headers["Location"])
book_url = resp.headers["Location"]
return book_url
def html_response(url):
time.sleep(random.randint(1,3))
resp = requests.get(url=url, headers=headers)
resp.encoding = resp.apparent_encoding
selector = parsel.Selector(resp.text)
return selector
def get_char_url(url):
selector = html_response(url)
char_urls = selector.xpath("//div[@class='zjbox']//dd/a/@href").extract()
char_titles = selector.xpath("//div[@class='zjbox']//dd/a/text()").extract()
char_dic={}
for char_title,char_url in zip(char_titles,char_urls):
char_url = book_url + char_url
char_dic[char_title]=char_url
print(char_title,char_url)
return char_dic
def char_book_down(dic):
for char_title,char_url in dic.items():
selector=html_response(char_url)
content=selector.xpath("//div[@id='content']//text()").extract()
char_content="".join([x.strip() for x in content])
path = s + ".txt"
with open(path,"a",encoding="utf-8") as f:
f.write(char_title)
f.write('\n')
f.write(char_content)
f.write('\n')
print(char_title,"下载完成")
print(s,"小说下载完成")
if __name__ == '__main__':
s = input("请输入需要搜索的小说:")
book_url = get_book_url(s)
char_dic = get_char_url(book_url)
char_book_down(char_dic) |