本帖最后由 ∫護着妳佉遠方 于 2022-1-31 15:45 编辑
下载完成以后,程序还在运行,没有退出,不知道为什么
求助:问题所在,解决方法
[Python] 纯文本查看 复制代码
import re
import time
import urllib.parse
import urllib.request
from multiprocessing import Queue, Process
import requests
from bs4 import BeautifulSoup
# 生产者
def producer(name_, q):
url = 'https://www.starbucks.com.cn/menu/'
head = {
'Cookie': 'ZHh6ku4z=AMCsJHZ-AQAAOcyxX6Yl2s20HjtkZfHK87MTPofIn6iYXJHzUvqd_HT1ZDMD|1|0'
'|36c0e1a8addcb91415dc5a8a4223425eca69e3d4',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, '
'like Gecko) Chrome/97.0.4692.99 Safari/537.36 Edg/97.0.1072.76'
}
# 请求对象定制
request = urllib.request.Request(url=url, headers=head)
# 模拟浏览器向服务器发送请求
response = urllib.request.urlopen(request)
content = response.read().decode('utf-8')
# print(content)
soup = BeautifulSoup(content, 'lxml')
# 名称
# //ul[@class="grid padded-3 product"]//strong/text()
# 名称列表
name_list = soup.select('ul[class="grid padded-3 product"] strong')
# 图片列表.jpg
# //ul[@class="grid padded-3 product"]//div/@style
img_lisi = soup.select('ul[class="grid padded-3 product"] div')
for i in range(len(name_list)):
name = name_list[i]
aaa = name.get_text()
yyy = re.sub('/', ',', aaa)
img = img_lisi[i]
bbb = img.attrs.get('style')
ccc = re.findall(r'url\("(.*)"\)', bbb)[0]
ddd = 'https://www.starbucks.com.cn' + ccc
q.put({'name': yyy, 'img': ddd})
# print('生产完毕')
# 消费者
def consumer(name_, q):
while True:
res = q.get()
if q.qsize() == 0 or q.empty():
break
else:
name1 = res['name']
img1 = res['img']
# print(name_, name1, img1)
f = open(name1 + '.jpg', 'wb')
d = requests.get(img1).content
f.write(d)
f.close()
print(name_, '成功下载', name1)
if __name__ == '__main__':
# 创建一个队列
q = Queue()
p1 = Process(target=producer, args=('生产者', q))
p1.start()
for i in range(6):
c = Process(target=consumer, args=('消费者' + str(i), q))
c.start()
|