最近在学习异步爬取视频,程序运行一会就会出现“RuntimeError: asyncio.run() cannot be called from a running event loop”错误,在网上也寻求了答案,但是网上都是说在jupyter notebook 中运行异步编程会出现该问题,但是我是在pycharm中运行的,一直在求解,以爬取“美剧网”中的《中国乒乓之绝地反击》为例,请各位同仁帮忙调试下,谢谢!代码如下:[Python] 纯文本查看 复制代码 import requests
import re
import asyncio
import aiohttp
import aiofiles
import os
path = input("请输入要保存到的文件夹名称:")
if not os.path.exists(path):
path = os.makedirs(path)
def get_m3u8(url,headers):
sp = requests.get(url, headers=headers)
rec = re.compile(r'var player_aaaa=.*?"url":"(?P<url>(.*?))",', re.S)
lts = rec.finditer(sp.text)
for ls in lts:
m3u8_line = ls.group("url").replace("\\", "")
#print(m3u8_line)
return m3u8_line
async def download_text(url,name,session):
async with session.get(url) as req:
async with aiofiles.open(path +"\\" + name,"wb") as f:
await f.write(await req.content.read())
print(name,"下载完毕!")
async def download_ts(url):
tasks = [] #创建一个任务
async with aiohttp.ClientSession() as session:
async with aiofiles.open("中国乒乓之绝地反击.txt", "r",encoding = "utf-8") as f:
async for line in f:
if line.startswith("#"):
continue
line = line.strip()
ts_url = url + line
name = line
task = asyncio.create_task(download_text(ts_url,name,session))
tasks.append(task)
await asyncio.run(tasks)
def main():
url = "https://www.meiju56.com/vodplay/537732-2-1.html"
headers = {
"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/110.0.0.0 Safari/537.36"
}
#1.从网页源代码获取m3u8链接,定义函数为:get_m3u8
First_url = get_m3u8(url,headers) #返回 https://ikcdn01.ikzybf.com/20230317/IhCXk0NM/index.m3u8
#2.从原m3u8中获取真实的m3u8链接,定义函数:get_S_m3u8()
req = requests.get(First_url)
hls = req.text.split("\n")[2] #返回 hls ="3000kb/hls/index.m3u8"
m3u8_url = First_url.replace("index.m3u8",hls) #返回:https://ikcdn01.ikzybf.com/20230317/IhCXk0NM/3000kb/hls/index.m3u8
htl = requests.get(m3u8_url)
# 3.保存m3u8文件
with open("中国乒乓之绝地反击.txt", "wb") as f:
f.write(htl.content)
#4.读取m3u8文件,并下载ts文件(采用异步方法)
ts_url = m3u8_url.replace("index.m3u8","")
loop = asyncio.get_event_loop()
loop.run_until_complete(download_ts(ts_url)) #ts_url = "https://ikcdn01.ikzybf.com/20230317/IhCXk0NM/3000kb/hls/S7faeRza.ts"
if __name__ == '__main__':
main()
|