import requests
from lxml import etree
import time
import os
import re
headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.25 Safari/537.36 Core/1.70.3741.400 QQBrowser/10.5.3863.400'}
url = 'https://www.booktxt.com/28_28498/'
res = requests.get(url,headers=headers)
res.encoding = res.apparent_encoding
response = res.text
#print(response)
html = etree.HTML(response)
title = html.xpath("//*[@id='list']/dl/dd[1]/a/text()")
title_url = html.xpath("//*[@id='list']/dl/dd/a/@href")
pingjialianjie = 'https://www.booktxt.com/28_28498/'
real_url = []
for neirong in title_url:
real_url.append(pingjialianjie+neirong)
print(len(real_url))
os.chdir(os.getcwd())
if not os.path.exists(os.getcwd()+"/教官之从特种兵开始"):
print("目录不存在,准备创建目录")
os.mkdir("教官之从特种兵开始")
os.chdir(os.getcwd()+"/教官之从特种兵开始")
else:
print("目录已存在")
os.chdir(os.getcwd()+"/教官之从特种兵开始")
localpath=os.getcwd()#原始目录
print("\n原始地址是:"+str(localpath))
for i in range(len(real_url)):
res = requests.get(headers=headers,url=real_url)
res.encoding = res.apparent_encoding
response = res.text
html = etree.HTML(response)
word = html.xpath("//*[@id='content']/text()")
word = str(word)
c=word.replace(r"\xa0\xa0\xa0\xa0","")
c=c.replace(r"'\r',","\n")
c=c.replace(r"\r","")
book=re.sub(r"[\'\]\[","",c)
with open("教官之从特种兵开始" +".txt","a+") as f:
print("正在下载")
f.write("\n\n" )
f.write(book)
print("下载完成" + "\n")