[Python] 纯文本查看 复制代码
import tkinter as tk
import re
import urllib.request
import os
import time
from urllib.parse import quote
from tkinter import *
import tkinter.messagebox
def search1():
search = entryUrl.get()
search_Book = "https://www.qidian.com/search?kw=" + quote(search)
if search=="":
tkinter.messagebox.showinfo('错误', "请输入书名!")
else:
req = urllib.request.Request(search_Book)
req.add_header = {
'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:30.0) Gecko/20100101 Firefox/30.0'
}
page = urllib.request.urlopen(req).read()
txt = page.decode('utf-8')
filter_bookname = r'<h4><a href=".+?" target="_blank" data-eid=".+?" data-bid=".+?" data-algrid="0.0.0">(.+?)</h4>'
book_name_nosub = re.findall(filter_bookname, txt, re.S)
book_name_sub = re.sub(r'<cite class="red-kw">|</a>|</cite>', '', " ".join(book_name_nosub))
book_name = book_name_sub.split(' ')
filter_Chapter = r'<p class="update"><a href=".+?>(.+?)</a>'
state_Chapter = re.findall(filter_Chapter, txt, re.S)
filter_time = r'<em>·</em><span>(.+?)</span>'
state_time = re.findall(filter_time, txt, re.S)
list1 = [" "] * 10
list2 = [" "] * 10
number = ["0 ", "1 ", "2 ", "3 ", "4 ", "5 ", "6 ", "7 ", "8 ", "9 "]
a = list(map(lambda c, x, a, y, b, z,: c + x + a + y + b + z, number, book_name, list1, state_Chapter, list2, state_time))
b = "\n".join(a)
var.set(b + "\n\n\n只限起点免费章节小说\n输入你要下载书的号数和章节(需5位,例;00100(第0项100章) -- 书号+章节):")
return search_Book
def mkdir(path):
floder=os.path.exists(path)
if not floder:
os.makedirs(path)
print("创建成功")
else:
print("文件已存在")
def download():
img_path = "E:/txt/txt/"
mkdir(img_path)
search = entryUrl.get()
search_Book = "https://www.qidian.com/search?kw=" + quote(search)
req = urllib.request.Request(search_Book)
req.add_header = {
'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:30.0) Gecko/20100101 Firefox/30.0'
}
page = urllib.request.urlopen(req).read()
txt = page.decode('utf-8')
filter_book_id = r'<a class="red-btn" href="(.+?)"'
book_id = re.findall(filter_book_id,txt, re.S)
book_id_add = " http:".join(book_id)
book_id_list = ("http:" + book_id_add).split(' ')
book_id_love=book_id_list[int(str(entryUrl1.get()[0]))]
page = urllib.request.urlopen(book_id_love).read()
txt = page.decode('utf-8')
filter_book_id = r'data-firstchapterjumpurl="(.+?)">'
link = "http:" + re.findall(filter_book_id, txt, re.S)[0]
z = 0
url = []
x=int(str(entryUrl1.get()[1])+str(entryUrl1.get()[2])+str(entryUrl1.get()[3])+str(entryUrl1.get()[4]))
for read in (range(0, x)):
url.append(link)
req = urllib.request.Request(url[z])
req.add_header = {
'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:30.0) Gecko/20100101 Firefox/30.0'
}
page = urllib.request.urlopen(req, timeout=50).read().decode('UTF-8')
filter_page = r'p>\u3000\u3000(.+?)<' # 小说的文本 <p> ****<p> \u3000 代表空格
html = re.findall(filter_page, page, re.S)
filter_bookname = r'60c;</em>(.+?)</a>'
bookname = re.findall(filter_bookname, page, re.S)
filter_chaptername = r'<h3 class="j_chapterName">(.+?)</h3>'
chaptername = re.findall(filter_chaptername, page, re.S)
i = 0
for txt in html:
line = html[i]
f = open(img_path + chaptername[0] + ".txt", "a")
f.write(line + "\n")
f.close()
i = i + 1
entryUrl.insert('insert', "1")
next = r'<a id="j_chapterNext".+?href="//(.+?)"'
nextread = re.findall(next, page, re.S)
b = ''
link = "https://" + b.join(nextread)
z = z + 1
var.set( "下载完成")
img_path2 = "E:/txt/" + bookname[0]
if not os.path.exists(img_path2):
os.rename(img_path, img_path2)
else:
path = "E:/txt/txt/"
downloadtime = time.strftime("%Y%m%d%I%M%S", time.localtime())
os.rename(img_path, img_path2 + downloadtime)
print("文件名:" + bookname[0] + " 已存在,重命名为:" + bookname[0] + downloadtime + "\n" + "请勿重复操作")
window = tk.Tk()
window.title('小说爬虫——吾爱:luoluoovo')
var = tk.StringVar()
textLabel = Label(window, textvariable=var, bg='lightgreen', font=('Arial', 12), width=85,justify=LEFT,
height=20)
textLabel.pack()
entryUrl = tk.Entry(window,width=37 )
entryUrl.place(x=3, y=372)
entryUrl1 = tk.Entry(window,width=37 )
entryUrl1.place(x=250, y=372)
b = tk.Button(window, text='下载', width=15,
command=download)
c = tk.Button(window, text='搜索', width=15,
command=search1)
window.minsize(750, 400)
window.maxsize(750,400)
b.pack(side=tk.RIGHT)
c.pack(side=tk.RIGHT)
window.mainloop()