下全站的时候老是卡死或者报错,不知道是不是我电脑的原因。
于是我简单的改造了一下,加了个 超时重试 和 缓存判断
[Python] 纯文本查看 复制代码 #!/usr/bin/env python3
# -*- coding: utf-8 -*-
# Code by 此称江初
import random
import os
import requests
import threading
import time
from lxml import etree
Url = "https://www.moestack.com/all"
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.79 Safari/537.36"}
def Mkdir(path):
path = path.strip()
path = path.rstrip("\\")
isExists = os.path.exists(path)
if not isExists:
os.makedirs(path)
return True
else:
pass
def Get_Page(x):
Page = gethtml(x, headers=headers)
Page.encoding = "utf-8"
Page = Page.text
Page = etree.HTML(Page)
return Page
def gethtml(url, headers):
i = 0
while i < 3:
try:
html = requests.get(url, headers=headers, timeout=5)
return html
except requests.exceptions.RequestException:
i += 1
print("超时:" + url)
def end():
ImgUrl = GetImgUrl[i]
save_img = gethtml(ImgUrl, headers=headers)
if not os.path.exists(r"Moe/" + Title[0] + "/" + ImgUrl[-27:] + ".jpg"):
with open(r"Moe/" + Title[0] + "/" + ImgUrl[-27:] + ".jpg", "wb") as fh:
fh.write(save_img.content)
fh.flush()
def GetFiles(path):
dirnum = 0
filenum = 0
for lists in os.listdir(path):
sub_path = os.path.join(path, lists)
# print(sub_path)
if os.path.isfile(sub_path):
filenum = filenum+1
elif os.path.isdir(sub_path):
dirnum = dirnum+1
return filenum
def DownImg():
global i
global t
path = "Moe/" + Title[0] + "/"
if os.path.exists(path):
if GetFiles(path) == len(GetImgUrl):
print('以缓存:' + Title[0])
return
print('开始下载:' + Title[0])
Mkdir(path)
threads = []
for i in range(len(GetImgUrl)):
t = threading.Thread(target=end, daemon=True)
t.start()
threads.append(t)
for t in threads:
# time.sleep(0.1)
t.join()
print("下载完成")
def OnePageDown(x):
global Title, GetImgUrl
GetImgUrl = Get_Page(x).xpath('//*/div[2]/div/div[1]/p/img/@src')
Title = Get_Page(x).xpath('//*[@class="entry-title"]/text()')
print("标题:" + Title[0])
print("一共有%d张图片" % len(GetImgUrl))
DownImg()
def PageDown(x):
ImgPageUrl = Get_Page(x).xpath('//*[@class="entry-media"]/div/a/@href')
print("此页共有%d章" % len(ImgPageUrl))
# OnePageDown(ImgPageUrl[random.randint(1, len(ImgPageUrl))])
for i in ImgPageUrl:
OnePageDown(i)
def AllDown(x):
PageNum = Get_Page(x).xpath('/html/body/div/div[3]/div/div[2]/div/div/main/div[2]/ul/li[6]/a/text()')
print("全站共有%d页" % int(PageNum[0]))
PageUrl = "https://www.moestack.com/all" + "/page/" + str(random.randint(1, int(PageNum[0])))
#PageDown(PageUrl)
for i in range(int(PageNum[0])):
i = i + 1
if i == '1':
PageUrl = "https://www.moestack.com/all"
PageDown(PageUrl)
else:
PageUrl = "https://www.moestack.com/all" + "/page/" + str(i)
PageDown(PageUrl)
def main():
print("菜单:\n1.单页下载\n2.页面下载\n3.全站下载(Boom!!!)")
Choice = input("请选择:")
if Choice == '1':
ImgPageUrl = input("请输入链接:")
OnePageDown(ImgPageUrl)
elif Choice == '2':
PageUrl = input("请输入页面链接:")
PageDown(PageUrl)
elif Choice == '3':
AllDown(Url)
if __name__ == "__main__":
main() # Code by 此称江初
|