【爬虫】爬取a站目标列表视频

Derik 发表于 2024-11-12 13:50

（仅做学习，如有侵权，请私信）

import os
import pprint
import re
import json
import requests
import fake_useragent
from tqdm import tqdm# 显示进度条
from bs4 import BeautifulSoup

ua = fake_useragent.UserAgent().random
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/127.0.0.0 Safari/537.36'
}

# 获取m3u8列表文件
def get_m3u8_list(url):
r = requests.get(url, headers=headers)
info = re.findall('window.pageInfo = window.videoInfo = (.*?)window.videoResource =', r.text, re.S).strip()[:-1]
info_json = \
json.loads(json.loads(info)["currentVideoInfo"]["ksPlayJsonHevc"])['adaptationSet']['representation'][
   'url']
# pprint.pprint(info_json)
name = json.loads(info)["title"]
name = re.sub(r'[|?<>/\\]','',name)
return info_json, name

# 提取所有视频片段的播放地址 ts文件
def get_ts_files(url):
r = requests.get(url, headers=headers)
ts_files = re.sub('#.*', '', r.text).split()
return ts_files

# 下载并合并视频片段
def download_combine(ts_files, name):
path = os.getcwd()
with open(f'{path}/{name}.mp4', 'ab') as f:
   for ts in tqdm(ts_files):
         url = 'https://tx-safety-video.acfun.cn/mediacloud/acfun/acfun_video/' + ts
         content = requests.get(url, headers=headers).content
         f.write(content)
f.close()

# 获取目录页的视频链接
def get_index_links(index_url):
r = requests.get(index_url, headers=headers)
soup = BeautifulSoup(r.text, 'html.parser')
links = soup.find_all('div', class_="list-content-item")
links_list = []
for link in links:
   url = "https://www.acfun.cn" + link.a.get('href')
   links_list.append(url)
return links_list

def main(urll):
index_url = urll
links = get_index_links(index_url)
for url in links:
   m3u8_url, name = get_m3u8_list(url)
   ts_files = get_ts_files(m3u8_url)
   download_combine(ts_files, name)

if __name__ == '__main__':
url = "https://www.acfun.cn/v/list135/index.htm?sortField=rankScore&duration=all&date=default&page=1"
main(url)

rose521rain 发表于 2024-11-12 16:31

试了一下，能下载，给力哦！

页: [1]

吾爱破解 - 52pojie.cn's Archiver

【爬虫】爬取a站目标列表视频