“RuntimeError: asyncio.run() cannot be called from a running event loop”错误
最近在学习异步爬取视频,程序运行一会就会出现“RuntimeError: asyncio.run() cannot be called from a running event loop”错误,在网上也寻求了答案,但是网上都是说在jupyter notebook 中运行异步编程会出现该问题,但是我是在pycharm中运行的,一直在求解,以爬取“美剧网”中的《中国乒乓之绝地反击》为例,请各位同仁帮忙调试下,谢谢!代码如下:import requestsimport re
import asyncio
import aiohttp
import aiofiles
import os
path= input("请输入要保存到的文件夹名称:")
if not os.path.exists(path):
path = os.makedirs(path)
def get_m3u8(url,headers):
sp = requests.get(url, headers=headers)
rec = re.compile(r'var player_aaaa=.*?"url":"(?P<url>(.*?))",', re.S)
lts = rec.finditer(sp.text)
for ls in lts:
m3u8_line = ls.group("url").replace("\\", "")
#print(m3u8_line)
return m3u8_line
async def download_text(url,name,session):
async with session.get(url) as req:
async with aiofiles.open(path +"\\" + name,"wb") as f:
await f.write(await req.content.read())
print(name,"下载完毕!")
async def download_ts(url):
tasks = [] #创建一个任务
async with aiohttp.ClientSession() as session:
async with aiofiles.open("中国乒乓之绝地反击.txt", "r",encoding = "utf-8") as f:
async for line in f:
if line.startswith("#"):
continue
line = line.strip()
ts_url = url + line
name = line
task = asyncio.create_task(download_text(ts_url,name,session))
tasks.append(task)
await asyncio.run(tasks)
def main():
url = "https://www.meiju56.com/vodplay/537732-2-1.html"
headers = {
"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/110.0.0.0 Safari/537.36"
}
#1.从网页源代码获取m3u8链接,定义函数为:get_m3u8
First_url = get_m3u8(url,headers) #返回 https://ikcdn01.ikzybf.com/20230317/IhCXk0NM/index.m3u8
#2.从原m3u8中获取真实的m3u8链接,定义函数:get_S_m3u8()
req = requests.get(First_url)
hls = req.text.split("\n") #返回 hls ="3000kb/hls/index.m3u8"
m3u8_url = First_url.replace("index.m3u8",hls) #返回:https://ikcdn01.ikzybf.com/20230317/IhCXk0NM/3000kb/hls/index.m3u8
htl = requests.get(m3u8_url)
# 3.保存m3u8文件
with open("中国乒乓之绝地反击.txt", "wb") as f:
f.write(htl.content)
#4.读取m3u8文件,并下载ts文件(采用异步方法)
ts_url = m3u8_url.replace("index.m3u8","")
loop = asyncio.get_event_loop()
loop.run_until_complete(download_ts(ts_url))#ts_url = "https://ikcdn01.ikzybf.com/20230317/IhCXk0NM/3000kb/hls/S7faeRza.ts"
if __name__ == '__main__':
main()
async def download_ts(url):
background_tasks = set()
async with aiohttp.ClientSession() as session:
async with aiofiles.open("中国乒乓之绝地反击.txt", "r", encoding="utf-8") as f:
async for line in f:
if line.startswith("#"):
continue
line = line.strip()
ts_url = url + line
name = line
task = asyncio.create_task(download_text(ts_url, name, session))
background_tasks.add(task)
task.add_done_callback(background_tasks.discard)
把download_ts函数替换下再试试 Prozacs 发表于 2023-3-30 15:51
async def download_ts(url):
background_tasks = set()
async with aiohttp.ClientSession() as ...
感谢,现在不报错误了,但是下载不全 create_task() 换成 ensure_future() 试试 redballoon 发表于 2023-3-31 01:05
create_task() 换成 ensure_future() 试试
谢谢,还是不行 import requests
import re
import asyncio
import aiohttp
import aiofiles
import os
path = input("请输入要保存到的文件夹名称:")
if not os.path.exists(path):
os.makedirs(path)
def get_m3u8(url, headers):
sp = requests.get(url, headers=headers)
rec = re.compile(r'var player_aaaa=.*?"url":"(?P<url>(.*?))",', re.S)
lts = rec.finditer(sp.text)
for ls in lts:
if ls:
m3u8_line = ls.group("url").replace("\\", "")
return m3u8_line
else:
continue
async def download_text(url, name, session):
async with session.get(url) as req:
async with aiofiles.open(path + "\\" + name, "wb") as f:
await f.write(await req.content.read())
print(name, "下载完毕!")
async def download_ts(url):
tasks = []
async with aiohttp.ClientSession() as session:
async with aiofiles.open("中国乒乓之绝地反击.txt", "r", encoding="utf-8") as f:
async for line in f:
if line.startswith("#"):
continue
line = line.strip()
ts_url = url + line
name = line
tasks.append(asyncio.create_task(download_text(ts_url, name, session)))
await asyncio.wait(tasks)
def main():
url = "https://www.meiju56.com/vodplay/537732-2-1.html"
headers = {
"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/110.0.0.0 Safari/537.36"
}
# 1.从网页源代码获取m3u8链接,定义函数为:get_m3u8
First_url = get_m3u8(url, headers)# 返回 https://ikcdn01.ikzybf.com/20230317/IhCXk0NM/index.m3u8
# 2.从原m3u8中获取真实的m3u8链接,定义函数:get_S_m3u8()
req = requests.get(First_url)
hls = req.text.split("\n")# 返回 hls ="3000kb/hls/index.m3u8"
m3u8_url = First_url.replace("index.m3u8",
hls)# 返回:https://ikcdn01.ikzybf.com/20230317/IhCXk0NM/3000kb/hls/index.m3u8
htl = requests.get(m3u8_url)
# 3.保存m3u8文件
with open("中国乒乓之绝地反击.txt", "wb") as f:
f.write(htl.content)
# 4.读取m3u8文件,并下载ts文件(采用异步方法)
ts_url = m3u8_url.replace("index.m3u8", "")
loop = asyncio.get_event_loop()
loop.run_until_complete(download_ts(ts_url))
loop.run_forever()
if __name__ == '__main__':
main()
修改了两处代码 一处是 事件循环后面加了 loop.run_forever() 这个好像可以去掉 不用加
还有一处是 把添加任务列表的地方 变成 asyncio.wait 来收集tasks
不过还有个问题 就是这个下载久了就会报网络超时,我大概下载了
三千多个 ts吧,下面有图
hybpjx 发表于 2023-4-4 10:14
import requests
import re
import asyncio
谢谢,找到问题是关键
页:
[1]