协程爬取九九美剧视频并保存本地
import os.path
import urllib.parse
import asyncio
import aiohttp
import re
# 禁用证书验证警告
import urllib3
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
path = 'ts'
# 目标URL
url = 'https://www.99meijutt.com/play/102788-1-0.html'
headers = {
'User-Agent':
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36'
}
ts_url_list = []
# 获取第一次url
async def get_url(url):
async with aiohttp.ClientSession(headers=headers) as session:
async with session.get(url, verify_ssl=False) as response:
data = await response.text()
url_m3u8 = re.search('now="(.*?)"', data).group(1)
return url_m3u8
# 获取m3u8文件的URL
async def get_m3u8url(url_m3u8):
async with aiohttp.ClientSession(headers=headers) as session:
async with session.get(url_m3u8, verify_ssl=False) as response:
data = await response.text()
# 使用最后一行的URL拼接完整的m3u8文件URL
url2 = urllib.parse.urljoin(url_m3u8, data.split()[-1])
return url2
# 获取ts文件的URL并下载
async def get_tsurl(url2, base_url):
i = 0
async with aiohttp.ClientSession(headers=headers) as session:
async with session.get(url2, verify_ssl=False) as response:
data = await response.content.read()
with open('u8m3_url.m3u8', 'wb') as f:
f.write(data)
with open('u8m3_url.m3u8', 'r') as f:
get_tsurll = f.readlines()
aa = open(os.path.join(path, 'do_m3u8.m3u8'), 'w')
for ts_url in get_tsurll:
if ts_url.startswith("#"):
aa.write(ts_url)
continue
else:
aa.write(str(i) + f'.{path}\n')
# 拼接完整的ts文件URL
full_ts_url = urllib.parse.urljoin(base_url, ts_url.strip())
ts_url_list.append(full_ts_url)
i += 1
# 下载ts文件
async def download(full_ts_url, i):
async with aiohttp.ClientSession(headers=headers) as session:
async with session.get(full_ts_url, verify_ssl=False) as response:
data = await response.content.read()
if not os.path.exists(path):
os.mkdir(path)
with open(os.path.join(path, str(i) + '.ts'), 'wb') as f:
f.write(data)
print('下载完成')
def merge(filename='output'):
'''
进行ts文件合并 解决视频音频不同步的问题 建议使用这种
:param filePath:
:return:
'''
print('开始')
c = os.chdir(path)
print(c)
cmd = f'ffmpeg -i do_m3u8.m3u8 -c copy {filename}.mp4'
os.system(cmd)
print('结束')
async def main():
# 获取第一次的URL
first_url = await get_url(url)
print('第一次的URL:', first_url)
# 获取m3u8文件的URL
m3u8_url = await get_m3u8url(first_url)
print('m3u8文件的URL:', m3u8_url)
# 获取ts文件的URL并下载
await get_tsurl(m3u8_url, first_url)
# 并发下载TS文件
tasks = []
for i, ts_url in enumerate(ts_url_list):
tasks.append(download(ts_url, i))
await asyncio.gather(*tasks)
merge()
if __name__ == '__main__':
asyncio.run(main())
|