本帖最后由 chaiasd 于 2020-9-14 10:34 编辑
最近受到以前一个种子搜索软件开源代码启发,决定自己也弄一个,思路是每一个网站做一个模块,最后合在一起再做一个GUI就可以了。
下面分享的是其中一个网站的制作思路
本人是个菜鸡,自学python,以下代码不足之处还望大牛轻喷啊
[Python] 纯文本查看 复制代码
import random
import requests,time
from lxml import etree
import threading
from queue import Queue
urlHome = '网址自行脑补'
myque = Queue()
path = {
//此处可以替换成你自己的XPATH
'HomePage':'//*[@id="list-panel"]/div/h2/a/@href',
'pmagnet':'//*[@id="main-container"]/div/div[1]/div[2]/div[3]/a/@href',
'pname':'//*[@class="title"]/text()',
'pdata':'//*[@class="dtbox"]/ul[4]/li[2]/text()',
'psize':'//*[@class="dtbox"]/ul[3]/li[2]/text()',
}
def run(urlHome,xpath):
a = requests.get(urlHome,timeout = (20,20))
doc = etree.HTML(a.text)
realUrl = doc.xpath(xpath)
for i in realUrl:
myque.put('网址自行脑补' + i)
return myque
def getMagnet(url,pmagnet,psize,pdata,pname):
try:
f = requests.get(url,timeout = (20,20))
if f.status_code == 200:
doc = etree.HTML(f.text)
magnet = doc.xpath(pmagnet)
magnet = magnet[0].split('&')[0]
size = doc.xpath(psize)[0]
data = doc.xpath(pdata)[0]
name = doc.xpath(pname)[0]
print({'片名':name,
'磁力':magnet,
'大小':size,
'种子日期':data})
except:
pass
class Mythread(threading.Thread):
def __init__(self, name):
super().__init__()
self.name = name
def run(self):
while not myque.empty():
url = myque.get()
getMagnet(url,path['pmagnet'],path['psize'],path['pdata'],path['pname'])
time.sleep(random.randint(1, 4))
def main(keyword,maxpage = 5):
for page in range(1,maxpage+1):
urlHome = '网址自行脑补' + keyword + '/p/' + str(page) + '.html'//此处替换成你的网址
run(urlHome,path['HomePage'])
t1 = Mythread('1')
t2 = Mythread('2')
t3 = Mythread('3')
t4 = Mythread('4')
t5 = Mythread('5')
li = [t1,t2,t3,t4,t5]
for each in li:
each.start()
for each in li:
each.join()
if __name__ == '__main__':
main('苍老师')//传入你想搜索的片名;www
|