url="https://www.tianyabook.com/list/7/" 这是主页
headers={
'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.100 Safari/537.36',
}
url2="https://www.tianyabook.com" 这是为了给每个小说个章节下载用的
#-----------------------------------------------------
def mkdir(path:str,path2:str):
isexists=os.path.exists(path+"/"+path2)
if not isexists:
os.makedirs(path+"/"+path2)
print("{}/{}创建成功".format(path,path2))
else:
print("{}/{}已经存在".format(path,path2))
#=====================================================
dict={}
lock=threading.Lock()
def get_html(): 这里是解析主页面小说名
for i in range(1,2):
r =requests.get(url+str(i)+".html",headers=headers)
r.encoding="gbk"
soup=BeautifulSoup(r.text,'lxml')
s=soup.find("div",class_="col-md-8")
time.sleep(5)
print("这是第{}页".format(str(i)))
for j in s.find_all("a"):
if j.get("title") is not None:
print(j.get("title")+"\n"+j.get("href"))
sm=os.path.exists(str(j.get("title")))
dict["shuming"]=str(j.get("title"))
mkdir("test",dict["shuming"])
print(dict["shuming"],str(j.get("href")))
thread_list=[]
#get_mulu(str(j.get("href")),str(j.get("title")))
def get_mulu(ip,sname): # 这里获取每一部小说的所有章节 和章节名字
lock.acquire()
r =requests.get(ip,headers=headers)
r.encoding="gbk"
soup=BeautifulSoup(r.text,"lxml")
s=soup.find("div",class_="panel panel-default",id="list-chapterAll")
start=time.time()
for i in s.find_all("dd"):
for k in i.find_all("a"):
if k.get("href") is not None:
print(k.getText(),k.get("href"))
down(k.get("href"),k.getText(),sname)
end=time.time()
lock.release()
print("{}所有章节下载完毕,共用时{}".format(sname,(end-start)))
filename="{}\{}.txt".format("test",shuming)
with open(filename) as f:
f.write((end-start))
for i in s.find_all("dd"):
for k in i.find_all("a"):
if k.get("href") is not None:
print(k.getText(),k.get("href"))
#main(k.get("href"),k.getText(),sname)
t1=threading.Thread(target=main,args=(str(k.get("href")),k.getText(),sname))
t2=threading.Thread(target=main,args=(k.get("href"),k.getText(),sname))
t1.start()
t2.start()
t1.join()
t2.join()