暗无天日 发表于 2023-3-30 14:32

“RuntimeError: asyncio.run() cannot be called from a running event loop”错误

最近在学习异步爬取视频,程序运行一会就会出现“RuntimeError: asyncio.run() cannot be called from a running event loop”错误,在网上也寻求了答案,但是网上都是说在jupyter notebook 中运行异步编程会出现该问题,但是我是在pycharm中运行的,一直在求解,以爬取“美剧网”中的《中国乒乓之绝地反击》为例,请各位同仁帮忙调试下,谢谢!代码如下:import requests
import re
import asyncio
import aiohttp
import aiofiles
import os

path= input("请输入要保存到的文件夹名称:")
if not os.path.exists(path):
    path = os.makedirs(path)

def get_m3u8(url,headers):

    sp = requests.get(url, headers=headers)
    rec = re.compile(r'var player_aaaa=.*?"url":"(?P<url>(.*?))",', re.S)
    lts = rec.finditer(sp.text)
    for ls in lts:
      m3u8_line = ls.group("url").replace("\\", "")
    #print(m3u8_line)
    return m3u8_line
async def download_text(url,name,session):
    async with session.get(url) as req:
      async with aiofiles.open(path +"\\" + name,"wb") as f:
            await f.write(await req.content.read())
      print(name,"下载完毕!")
async def download_ts(url):
    tasks = []   #创建一个任务
    async with aiohttp.ClientSession() as session:
      async with aiofiles.open("中国乒乓之绝地反击.txt", "r",encoding = "utf-8") as f:
            async for line in f:
                if line.startswith("#"):
                  continue
                line = line.strip()
                ts_url = url + line
                name = line

                task = asyncio.create_task(download_text(ts_url,name,session))
                tasks.append(task)
      await asyncio.run(tasks)


def main():
    url = "https://www.meiju56.com/vodplay/537732-2-1.html"
    headers = {
      "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/110.0.0.0 Safari/537.36"
    }
    #1.从网页源代码获取m3u8链接,定义函数为:get_m3u8
    First_url = get_m3u8(url,headers)            #返回 https://ikcdn01.ikzybf.com/20230317/IhCXk0NM/index.m3u8
    #2.从原m3u8中获取真实的m3u8链接,定义函数:get_S_m3u8()
    req = requests.get(First_url)
    hls = req.text.split("\n")            #返回 hls ="3000kb/hls/index.m3u8"
    m3u8_url = First_url.replace("index.m3u8",hls)      #返回:https://ikcdn01.ikzybf.com/20230317/IhCXk0NM/3000kb/hls/index.m3u8
    htl = requests.get(m3u8_url)
    # 3.保存m3u8文件
    with open("中国乒乓之绝地反击.txt", "wb") as f:
      f.write(htl.content)
    #4.读取m3u8文件,并下载ts文件(采用异步方法)
    ts_url = m3u8_url.replace("index.m3u8","")
    loop = asyncio.get_event_loop()
    loop.run_until_complete(download_ts(ts_url))#ts_url = "https://ikcdn01.ikzybf.com/20230317/IhCXk0NM/3000kb/hls/S7faeRza.ts"
   
if __name__ == '__main__':
    main()

Prozacs 发表于 2023-3-30 15:51

async def download_ts(url):
    background_tasks = set()
    async with aiohttp.ClientSession() as session:
      async with aiofiles.open("中国乒乓之绝地反击.txt", "r", encoding="utf-8") as f:
            async for line in f:
                if line.startswith("#"):
                  continue
                line = line.strip()
                ts_url = url + line
                name = line
                task = asyncio.create_task(download_text(ts_url, name, session))
                background_tasks.add(task)
                task.add_done_callback(background_tasks.discard)

把download_ts函数替换下再试试

暗无天日 发表于 2023-3-30 16:20

Prozacs 发表于 2023-3-30 15:51
async def download_ts(url):
    background_tasks = set()
    async with aiohttp.ClientSession() as ...

感谢,现在不报错误了,但是下载不全

redballoon 发表于 2023-3-31 01:05

create_task() 换成 ensure_future() 试试

暗无天日 发表于 2023-4-3 14:42

redballoon 发表于 2023-3-31 01:05
create_task() 换成 ensure_future() 试试

谢谢,还是不行

hybpjx 发表于 2023-4-4 10:14

import requests
import re
import asyncio
import aiohttp
import aiofiles
import os

path = input("请输入要保存到的文件夹名称:")
if not os.path.exists(path):
    os.makedirs(path)


def get_m3u8(url, headers):
    sp = requests.get(url, headers=headers)
    rec = re.compile(r'var player_aaaa=.*?"url":"(?P<url>(.*?))",', re.S)
    lts = rec.finditer(sp.text)
    for ls in lts:
      if ls:
            m3u8_line = ls.group("url").replace("\\", "")
            return m3u8_line
      else:
            continue


async def download_text(url, name, session):
    async with session.get(url) as req:
      async with aiofiles.open(path + "\\" + name, "wb") as f:
            await f.write(await req.content.read())
      print(name, "下载完毕!")


async def download_ts(url):
    tasks = []
    async with aiohttp.ClientSession() as session:
      async with aiofiles.open("中国乒乓之绝地反击.txt", "r", encoding="utf-8") as f:
            async for line in f:
                if line.startswith("#"):
                  continue
                line = line.strip()
                ts_url = url + line
                name = line

                tasks.append(asyncio.create_task(download_text(ts_url, name, session)))
            await asyncio.wait(tasks)


def main():
    url = "https://www.meiju56.com/vodplay/537732-2-1.html"
    headers = {
      "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/110.0.0.0 Safari/537.36"
    }
    # 1.从网页源代码获取m3u8链接,定义函数为:get_m3u8
    First_url = get_m3u8(url, headers)# 返回 https://ikcdn01.ikzybf.com/20230317/IhCXk0NM/index.m3u8
    # 2.从原m3u8中获取真实的m3u8链接,定义函数:get_S_m3u8()
    req = requests.get(First_url)
    hls = req.text.split("\n")# 返回 hls ="3000kb/hls/index.m3u8"
    m3u8_url = First_url.replace("index.m3u8",
                                 hls)# 返回:https://ikcdn01.ikzybf.com/20230317/IhCXk0NM/3000kb/hls/index.m3u8
    htl = requests.get(m3u8_url)
    # 3.保存m3u8文件
    with open("中国乒乓之绝地反击.txt", "wb") as f:
      f.write(htl.content)
    # 4.读取m3u8文件,并下载ts文件(采用异步方法)
    ts_url = m3u8_url.replace("index.m3u8", "")
    loop = asyncio.get_event_loop()
    loop.run_until_complete(download_ts(ts_url))
    loop.run_forever()


if __name__ == '__main__':
    main()


修改了两处代码 一处是 事件循环后面加了 loop.run_forever() 这个好像可以去掉 不用加
还有一处是 把添加任务列表的地方 变成 asyncio.wait 来收集tasks
不过还有个问题 就是这个下载久了就会报网络超时,我大概下载了
三千多个 ts吧,下面有图

暗无天日 发表于 2023-4-5 07:57

hybpjx 发表于 2023-4-4 10:14
import requests
import re
import asyncio


谢谢,找到问题是关键
页: [1]
查看完整版本: “RuntimeError: asyncio.run() cannot be called from a running event loop”错误