python多线程少图求助
大佬求助。我写了一个多线爬虫但是,下载只下载了两张是为啥呀import requests
fromlxml importetree
import queue
importthreading
importos
class bsSpaider:
def __init__(self):
self.baseurl=''
self.headers={
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/84.0.4147.105 Safari/537.36 Edg/84.0.522.52'
}
self.urlQueue=queue.Queue()
self.picQueue=queue.Queue()
self.num=0
def getUrl(self):
for i in range(1,10):
self.pgeNum=i
url= 'https://gitee.com/kevin2046/picture/raw/master/img/' + str(i) + '.jpg'
num=url.split('/')[-1].split('.')
print(url)
self.num=num
self.urlQueue.put(url)
def getHtml(self):
while not self.urlQueue.empty():
pic=self.urlQueue.get()
req=requests.get(pic,headers=self.headers)
pic=req
self.picQueue.put(pic)
self.urlQueue.task_done()
def down(self):
while not self.picQueue.empty():
pic = self.picQueue.get()
if not os.path.exists("图片"):
os.mkdir("图片")
print(pic)
# with open("图片\%s.jpg" % self.num,"wb") as fp:
# fp.write(pic)
self.picQueue.task_done()
def runSpider(self):
Thead_list=[]
self.getUrl()
for i in range(3):
t=threading.Thread(target=self.getHtml())
Thead_list.append(t)
for i in range(2):
t=threading.Thread(target=self.down())
Thead_list.append(t)
for th in Thead_list:
th.setDaemon(True)
th.start()
self.urlQueue.join()
self.picQueue.join()
if __name__ == '__main__':
spider=bsSpaider()
spider.runSpider()
谢谢大佬们,找到错误了 还是有问题,呜呜呜,下载速度还是很慢跟没用多线程一样,想哭。 有bug吗这是:eee建议百度一下 个人感觉如果是多线程爬虫,最好在结合上代{过}{滤}理ip,不然被封禁的太快了. 米粒米粒 发表于 2020-8-25 21:55
有bug吗这是建议百度一下
没用bug逻辑有错误但是找不着呀 axin1999 发表于 2020-8-25 21:55
个人感觉如果是多线程爬虫,最好在结合上代{过}{滤}理ip,不然被封禁的太快了.
还不会:'(weeqw 学习一下python scrapy 重复拿了四次是为啥啊 aiqianqian 发表于 2020-8-25 22:05
学习一下python scrapy
我想先把多线程弄懂再弄scrapy:'(weeqw:'(weeqw