我已经控制了访问量了,依然如何
sem = asyncio.Semaphore(3) # 信号量,控制协程数,防止爬的过快
请各位指点指点
补充
··································
[Python] 纯文本查看复制代码
def main_get_html():
loop = asyncio.get_event_loop() # 获取事件循环
tasks = [asyncio.ensure_future(get_html(url)) for url in urlss()] # 把所有任务放到一个列表中
loop.run_until_complete(asyncio.wait(tasks)) # 激活协程
url_lst_unrequested = []
for url in urlss:
if url not in url_lst_successed or url_lst_failed:
url_lst_unrequested.append(url)
while len(url_lst_unrequested) > 0:
tasks_unrequested = [asyncio.ensure_future(get_info(url)) for url in url_lst_unrequested]
loop.run_until_complete(asyncio.wait(tasks_unrequested))
url_lst_unrequested = []
for url in urlss:
if url not in url_lst_successed:
url_lst_unrequested.append(url)
loop.close() # 关闭事件循环
提示错误
Traceback (most recent call last):
File "G:\python\he.py", line 107, in <module>
main_get_html()
File "G:\python\he.py", line 89, in main_get_html
for url in urlss:
TypeError: 'function' object is not iterable