大佬求助。我写了一个多线爬虫但是,下载只下载了两张是为啥呀
[Python] 纯文本查看 复制代码 import requests
from lxml import etree
import queue
import threading
import os
class bsSpaider:
def __init__(self):
self.baseurl=''
self.headers={
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/84.0.4147.105 Safari/537.36 Edg/84.0.522.52'
}
self.urlQueue=queue.Queue()
self.picQueue=queue.Queue()
self.num=0
def getUrl(self):
for i in range(1,10):
self.pgeNum=i
url= 'https://gitee.com/kevin2046/picture/raw/master/img/' + str(i) + '.jpg'
num=url.split('/')[-1].split('.')[0]
print(url)
self.num=num
self.urlQueue.put(url)
def getHtml(self):
while not self.urlQueue.empty():
pic=self.urlQueue.get()
req=requests.get(pic,headers=self.headers)
pic=req
self.picQueue.put(pic)
self.urlQueue.task_done()
def down(self):
while not self.picQueue.empty():
pic = self.picQueue.get()
if not os.path.exists("图片"):
os.mkdir("图片")
print(pic)
# with open("图片\%s.jpg" % self.num,"wb") as fp:
# fp.write(pic)
self.picQueue.task_done()
def runSpider(self):
Thead_list=[]
self.getUrl()
for i in range(3):
t=threading.Thread(target=self.getHtml())
Thead_list.append(t)
for i in range(2):
t=threading.Thread(target=self.down())
Thead_list.append(t)
for th in Thead_list:
th.setDaemon(True)
th.start()
self.urlQueue.join()
self.picQueue.join()
if __name__ == '__main__':
spider=bsSpaider()
spider.runSpider()
|