[Python] 纯文本查看 复制代码
import sys,requests,time,random,os
from lxml import etree
from PyQt5.QtWidgets import *
from PyQt5.QtGui import *
from PyQt5.QtCore import *
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/103.0.0.0 Safari/537.36',
'Cookie':'__yjs_duid=1_ab3fe12855cab7146c74394a0afac7921669271760011; Hm_lvt_c59f2e992a863c2744e1ba985abaea6c=1669271760,1669965331; Hm_lpvt_c59f2e992a863c2744e1ba985abaea6c=1669965339'
}
class MyWin(QWidget):
def __init__(self):
super().__init__()
self.folderLabel = QLabel("文件夹名称:",self)
self.folderLabel.setToolTip("会在当前目录创建一个文件夹保存文件")
self.folderInput = QLineEdit(self)
self.folderInput.setPlaceholderText('image') # 占位符
self.linkLabel = QLabel("目标链接:",self)
self.linkLabel.setToolTip("请找到分页的链接,<br />其中分页的数字需要用{}代替")
self.linkInput = QLineEdit(self)
self.linkInput.setPlaceholderText('https://www.xxxxxx.com/image/{}/')
self.cookieLabel = QLabel("Cookie",self)
self.cookieLabel.setToolTip("打开开发者模式,刷新页面抓请求,cookie在请求头里")
self.cookieInput = QLineEdit(self)
self.cookieInput.setPlaceholderText('Hm_lvt_77cec74343dfa437649ee3545756d76e=1669197695;')
self.charsetLabel = QLabel("网页编码",self)
self.charsetLabel.setToolTip("请输入网页的编码格式,meta标签中charset的值")
self.charsetInput = QLineEdit(self)
self.charsetInput.setPlaceholderText('utf8')
self.startPageLabel = QLabel("开始页码",self)
self.startPageLabel.setToolTip("请输入开始的页码")
self.startPageInput = QLineEdit(self)
self.startPageInput.setPlaceholderText('1')
self.finishPageLabel = QLabel("结束页码",self)
self.finishPageLabel.setToolTip("请输入结束的页面")
self.finishPageInput = QLineEdit(self)
self.finishPageInput.setPlaceholderText('5')
self.linkXpathLabel = QLabel("链接Xpath",self)
self.linkXpathLabel.setToolTip("请输入跳转详情链接的xpath")
self.linkXpathInput = QLineEdit(self)
self.linkXpathInput.setPlaceholderText('//div[@class="img"]/a/@href')
self.imgXpathLabel = QLabel("图片Xpath",self)
self.imgXpathLabel.setToolTip("请输入要保存图片的xpath")
self.imgXpathInput = QLineEdit(self)
self.imgXpathInput.setPlaceholderText('//div[@class="img"]/img/@src')
self.linkPrefixLabel = QLabel("链接前缀",self)
self.linkPrefixLabel.setToolTip("跳转详情的链接是否需要添加前缀,<br />如果不需要就留空<br />例如:/tupian/30593.html")
self.linkPrefixInput = QLineEdit(self)
self.linkPrefixInput.setPlaceholderText('http://www.xxxxxx.com')
self.imgPrefixLabel = QLabel("图片前缀",self)
self.imgPrefixLabel.setToolTip("下载图片的链接是否需要添加前缀,<br />如果不需要就留空<br />例如:/uploads/allimg/221108/1-22110Q54019625.jpg")
self.imgPrefixInput = QLineEdit(self)
self.imgPrefixInput.setPlaceholderText('http://www.xxxxxx.com')
self.startButton = QPushButton('开始', self)
self.startButton.resize(self.startButton.sizeHint())
self.startButton.clicked.connect(self.click)
self.grid = QGridLayout()
self.grid.setSpacing(10)
self.grid.addWidget(self.folderLabel,0,0)
self.grid.addWidget(self.folderInput,0,1)
self.grid.addWidget(self.linkLabel,0,2)
self.grid.addWidget(self.linkInput,0,3)
self.grid.addWidget(self.cookieLabel,1,0)
self.grid.addWidget(self.cookieInput,1,1)
self.grid.addWidget(self.charsetLabel,1,2)
self.grid.addWidget(self.charsetInput,1,3)
self.grid.addWidget(self.startPageLabel,2,0)
self.grid.addWidget(self.startPageInput,2,1)
self.grid.addWidget(self.finishPageLabel,2,2)
self.grid.addWidget(self.finishPageInput,2,3)
self.grid.addWidget(self.linkXpathLabel,3,0)
self.grid.addWidget(self.linkXpathInput,3,1)
self.grid.addWidget(self.imgXpathLabel,3,2)
self.grid.addWidget(self.imgXpathInput,3,3)
self.grid.addWidget(self.linkPrefixLabel,4,0)
self.grid.addWidget(self.linkPrefixInput,4,1)
self.grid.addWidget(self.imgPrefixLabel,4,2)
self.grid.addWidget(self.imgPrefixInput,4,3)
self.grid.addWidget(self.startButton,5,1)
self.setLayout(self.grid)
def click(self):
Func.mkdir(self)
headers['Cookie'] = ex.cookieInput.text()
self.pagehome = PageHome()
self.pagehome.start()
PageHomeLock = QMutex()
class PageHome(QThread):
def __init__(self):
super().__init__()
def run(self):
PageHomeLock.lock()
self.itemhome = ItemHome()
try:
for index in range(int(ex.startPageInput.text()),int(ex.finishPageInput.text())+1):
ItemHomeLock.lock()
res = requests.get(url=ex.linkInput.text().format(index),headers=headers)
Func.CheckStatus(self,res)
res.encoding = ex.charsetInput.text()
self.itemhome.urlList = etree.HTML(res.text).xpath(ex.linkXpathInput.text())
self.itemhome.start()
except:
print('输入有误')
PageHomeLock.unlock()
ItemHomeLock = QMutex()
class ItemHome(QThread):
def __init__(self):
super().__init__()
def run(self):
self.saveimage = SaveImage()
for item in self.urlList:
SaveImageLock.lock()
res = requests.get(url=ex.linkPrefixInput.text() + item,headers=headers)
res.encoding = ex.charsetInput.text()
Func.CheckStatus(self,res)
self.saveimage.images = etree.HTML(res.text).xpath(ex.imgXpathInput.text())
self.saveimage.title = etree.HTML(res.text).xpath('string(//title)')
self.saveimage.start()
ItemHomeLock.unlock()
SaveImageLock = QMutex()
class SaveImage(QThread):
def __init__(self):
super().__init__()
def run(self):
for item in self.images:
type = item.split('.')[-1]
print(item)
with open(f'./{ex.folderInput.text()}/{self.title}-{random.randint(1,9999)}.{type}',mode='wb') as fs:
fs.write(requests.get(url=ex.imgPrefixInput.text() + item ,headers=headers).content)
fs.close()
SaveImageLock.unlock()
class Func():# 通用函数
# 创建文件夹
def mkdir(self):
static=os.path.exists(f'./{ex.folderInput.text()}')
if not static:
os.makedirs(f'./{ex.folderInput.text()}')
return True
else:
return False
# 检查响应状态
def CheckStatus(self,res):
if res.status_code == 200:
print("请求成功")
elif res.status_code == 404:
print('访问出错')
elif res.status_code > 500:
raise Exception('服务器崩了')
else:
print('其他情况',res.status_code)
if __name__ == '__main__':
app = QApplication(sys.argv)
QToolTip.setFont(QFont('SansSerif', 12))
ex = MyWin()
ex.setGeometry(500,200,800,600)
ex.setWindowTitle('通用图片下载器')
ex.show()
sys.exit(app.exec_())