Derik 发表于 2024-11-12 13:50

【爬虫】爬取a站目标列表视频

(仅做学习,如有侵权,请私信)


import os
import pprint
import re
import json
import requests
import fake_useragent
from tqdm import tqdm# 显示进度条
from bs4 import BeautifulSoup

ua = fake_useragent.UserAgent().random
headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/127.0.0.0 Safari/537.36'
}


# 获取m3u8列表文件
def get_m3u8_list(url):
    r = requests.get(url, headers=headers)
    info = re.findall('window.pageInfo = window.videoInfo = (.*?)window.videoResource =', r.text, re.S).strip()[:-1]
    info_json = \
    json.loads(json.loads(info)["currentVideoInfo"]["ksPlayJsonHevc"])['adaptationSet']['representation'][
      'url']
    # pprint.pprint(info_json)
    name = json.loads(info)["title"]
    name = re.sub(r'[|?<>/\\]','',name)
    return info_json, name


# 提取所有视频片段的播放地址 ts文件
def get_ts_files(url):
    r = requests.get(url, headers=headers)
    ts_files = re.sub('#.*', '', r.text).split()
    return ts_files


# 下载并合并视频片段
def download_combine(ts_files, name):
    path = os.getcwd()
    with open(f'{path}/{name}.mp4', 'ab') as f:
      for ts in tqdm(ts_files):
            url = 'https://tx-safety-video.acfun.cn/mediacloud/acfun/acfun_video/' + ts
            content = requests.get(url, headers=headers).content
            f.write(content)
    f.close()


# 获取目录页的视频链接
def get_index_links(index_url):
    r = requests.get(index_url, headers=headers)
    soup = BeautifulSoup(r.text, 'html.parser')
    links = soup.find_all('div', class_="list-content-item")
    links_list = []
    for link in links:
      url = "https://www.acfun.cn" + link.a.get('href')
      links_list.append(url)
    return links_list



def main(urll):
    index_url = urll
    links = get_index_links(index_url)
    for url in links:
      m3u8_url, name = get_m3u8_list(url)
      ts_files = get_ts_files(m3u8_url)
      download_combine(ts_files, name)


if __name__ == '__main__':
    url = "https://www.acfun.cn/v/list135/index.htm?sortField=rankScore&duration=all&date=default&page=1"
    main(url)

rose521rain 发表于 2024-11-12 16:31

试了一下,能下载,给力哦!
页: [1]
查看完整版本: 【爬虫】爬取a站目标列表视频