tinyzimu 发表于 2024-3-28 14:53

批量下载B站视频

import json
import os
import re

import requests
from lxml import etree

headers = {
    'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/103.0.0.0 Safari/537.36',
    'referer': 'https://www.bilibili.com/'
}



def downloadVideoAudio(file_name,url):
    # 获取页面源码
    page_source = requests.get(url, headers=headers).text
    html = etree.HTML(page_source)
    # 解析出标题
    title = html.xpath('//*[@id="viewbox_report"]/h1/text()')
    # 使用正则表达式匹配出视频数据JSON,并解析出视频地址和声音地址
    json_text = re.findall('window.__playinfo__=(.*?)</script>', page_source)
    data = json.loads(json_text)
    video_url = data['data']['dash']['video']['baseUrl']
    audio_url = data['data']['dash']['audio']['baseUrl']
    #   下载视频和声音
    if not os.path.exists(f'./videos/{dir_name}/'):
      os.makedirs(f'./videos/{dir_name}/')
    print(f"正在下载视频{file_name}")
    content = requests.get(video_url, headers=headers).content
    video_file=f"./videos/{file_name}.mp4"
    with open(video_file, "wb") as f:
      f.write(content)
    print(f"正在下载音频{file_name}")
    content = requests.get(audio_url, headers=headers).content
    audio_file=f"./videos/{file_name}.mp3"
    with open(audio_file, "wb") as f:
      f.write(content)
    out_video_file=f"./videos/{dir_name}/{file_name}.mp4"
    mergeVideoAndAudio(video_file,audio_file,out_video_file)
    #删除下载的音频和视频文件
    os.remove(video_file)
    os.remove(audio_file)
def mergeVideoAndAudio(video,audio,out_video):
    print(f"ffmpeg -i {video} -i {audio} {out_video}")
    os.system(f'ffmpeg -i "{video}" -i "{audio}" "{out_video}"')



dir_name="高斯数学动画"
url="https://www.bilibili.com/video/BV15y4y1Z7nn"

page_source = requests.get(url, headers=headers).text
json_text = re.findall('window.__INITIAL_STATE__=(.*?);\(function\(\)', page_source)
pages=json.loads(json_text)["videoData"]["pages"]

for i in range(1,len(pages)):
    file_name = pages['part']
    downloadVideoAudio(file_name,f"{url}?p={i}")

nbwww 发表于 2024-3-30 18:59

本帖最后由 nbwww 于 2024-3-30 19:20 编辑

学习了    可以下载了{:1_919:}

heart8619 发表于 2024-4-14 14:38

怎么使用啊》?

LittiePang 发表于 2024-4-16 11:35

感觉爬虫要学的还是挺多的
页: [1]
查看完整版本: 批量下载B站视频