[Python] 纯文本查看 复制代码
import os
import requests
from lxml import etree
from docx import Document
from urllib import parse
from pyquery import PyQuery as pq
from bs4 import BeautifulSoup
import time
from tkinter import *
from tkinter import messagebox
# 代码
headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.97 Safari/537.36'}
# 查询小说
def search_book():
url = 'http://www.xbiquge.la/modules/article/waps.php?searchkey='
book_name = entry1.get()
# 对输入的字符进行编码转换为gbk编码,再对其进行url编码操作
book_name = parse.quote(book_name.encode('utf-8'))
book_url = url + book_name
# print(book_url)
return book_url
# # 判断是否有书,并提示是否下载
def find_book(book_url):
# 调用查找search_book()函数 查找跳转至相应界面
url = book_url
# 请求页面
html = requests.get(url, headers)
root = etree.HTML(html.content.decode('UTF-8'))
# print(root)
if(root.xpath('//*[@id="checkform"]/table/tr/td/a/@href') == []):
text1.insert(END, '搜索的书不存在,请正确输入书名')
text1.see(END)
text1.update()
else:
# print('找到了作者为', root.xpath('//div[@class="search-list"]/ul/li/span[@class="s4"]/text()')[0],
# ',书名为《', root.xpath('//div[@class="search-list"]/ul/li/span/a/text()')[0], '》,请问是否下载[Y][N]'
# , end='')
text1.insert(END, '找到作品《' + str(root.xpath('//*[@id="checkform"]/table/tr/td[1]/a/text()')[0]) + '》,作者为 ' + str(root.xpath('//*[@id="checkform"]/table/tr/td[3]/text()')[0]) + ' ,仔细看一下哦!请问是否下载?')
text1.see(END)
text1.update()
text1.insert(END, )
text1.see(END)
text1.update()
def download(book_url):
# 调用查找search_book()函数 查找跳转至相应界面
url = book_url
# 请求页面
html = requests.get(url, headers)
root = etree.HTML(html.content.decode('UTF-8'))
# print(root)
if (root.xpath('//*[@id="checkform"]/table/tr/td/a/@href') != []):
text1.insert(END, '开始下载!')
text1.see(END)
text1.update()
path = "下载"
if not os.path.exists(path):
os.mkdir(path)
book_content_list(root.xpath('//*[@id="checkform"]/table/tr/td/a/@href')[0])
# 获取目录的URL 并调用download_book()函数下载内容
def book_content_list(url):
html = requests.get(url, headers)
root = etree.HTML(html.content)
# 获取目录的URL 并传入download_book()函数
s_list = root.xpath('//*[@id="list"]/dl/dd/a/@href')
for dd_url in s_list[0:]:
download_book('http://www.xbiquge.la' + str(dd_url))
# print(' content_url:' + 'http://www.xbiquge.la' + str(dd_url))
def download_book(url):
try:
response = requests.get(url, headers=headers) # 提取目录
response.encoding = response.apparent_encoding # 解决乱码问题
# print(response.text) #输出
doc = pq(response.text) # 解决pyquer对象
title = doc('#wrapper > div.content_read > div > div.bookname > h1').text()
content = doc('#content').text() # 提取内容
global is_pause
is_pause = True
if is_pause:
text1.insert(END, title)
text1.see(END)
text1.update()
# print(content)
time.sleep(1) # 反爬3:隔1s爬取一章
else:
root.destroy()
with open('下载/{}.txt'.format(entry1.get()), 'a+', encoding='utf-8') as f:
f.write(title)
f.write("\n\n") # 使各章分隔开
f.write(content)
f.write("\n\n") # 使各章分隔开
except:
pass
def button2_even():
if entry1.get() != '':
book_url = search_book()
find_book(book_url)
def download1():
try:
global is_pause
is_pause = True
if is_pause:
book_url = search_book()
download(book_url)
else:
time.sleep(500)
except:
pass
# def is_pause():
# global is_pause
# is_pause = not is_pause
# if not is_pause:
# pauseBut["text"] = "继续"
# else:
# pauseBut["text"] = "暂停"
def is_quit():
ask_quit = messagebox.askquestion("退出程序", "您确定要退出程序并结束下载吗?")
if ask_quit == 'yes':
root.destroy()
exit()
#界面
root = Tk()
root.title('XX小说下载器')
root.geometry('550x400+400+200')
label1 = Label(root, text="搜 索:", font=('楷体', 15))
label1.place(x=65, y=30)
entry1 = Entry(root, font=('楷体', 15))
entry1.place(x=170, y=30)
# label2 = Label(root, text="确认下载:", font=('楷体',15))
# label2.place(x=60, y=40)
# entry2 = Entry(root, font=('楷体', 15))
# entry2.place(x=170, y=40)
# label3 = Label(root, text="(输入小说名称确认下载)", font=('楷体', 10), fg='red')
# label3.place(x=375, y=43)
text1 = Listbox(root, font=('楷体') ,background='#aaa')
text1.place(x=10, y=80, relwidth=0.95, relheight=0.65)
xscrollbar = Scrollbar(text1, command=text1.xview, orient=HORIZONTAL)
xscrollbar.pack(side=BOTTOM, fill=X)
yscrollbar = Scrollbar(text1, command=text1.yview)
yscrollbar.pack(side=RIGHT, fill=Y)
text1.config(yscrollcommand=yscrollbar.set)
text1.config(xscrollcommand=xscrollbar.set)
button1 = Button(root, text="确定", font=('楷体', 10), bd=4 ,activeforeground='#000', activebackground='#ace', highlightcolor="#ddd", command=button2_even)# relief,边框样式,设置控件3D效果,可选的有:FLAT、SUNKEN、RAISED、GROOVE、RIDGE。默认为 FLAT。
button1.place(x=400, y=28)
button2 = Button(root, text="下载", font=('楷体', 15), bd=4 ,activeforeground='#000', activebackground='#ace', highlightcolor="#ddd", command=download1)
button2.place(x=100, y=350)
# pauseBut = Button(root, text="暂停", font=('楷体', 15), bd=4 ,activeforeground='#000', activebackground='#ace', highlightcolor="#ddd",command=is_pause)
# pauseBut.place(x=250, y=350)
button4 = Button(root, text="退出", font=('楷体', 15), bd=4 ,activeforeground='#000', activebackground='#ace', highlightcolor="#ddd",command=is_quit)
button4.place(x=400, y=350)
root.mainloop()