初学Python，写一个m3u8视频下载练手

bluerabbit · 发表于 2019-7-30 18:14

刚学Python，正好朋友想把“乐队我做东”下载来看，就写个程序练练

[Python] 纯文本查看 复制代码

#!/usr/bin/env python3
# -*- coding: utf-8 -*-

r'''
===========================
*     乐队我做东 m3u8     *
===========================
'''

import os, sys, urllib, requests, re, random
from lxml import etree
from time import sleep

def getWorkdir(subdir):
    if os.name == 'nt':
        #电脑
        workdir = os.path.join('F:\\BaiduNetdiskDownload', subdir)
    elif os.name == 'posix':
        #手机
        workdir = os.path.join('/storage/emulated/0/Download', subdir)
    else:
        workdir = os.path.join(os.getcwd(), subdir)
    if not os.path.exists(workdir):
        os.mkdir(workdir)
    return workdir

def getM3U8(p_title):
    print('\n%s' % p_title)
    #解析期页面，得到m3u8链接
    try:
        p_content = requests.get(host_toc+phase_urls[phase_titles.index(p_title)], headers=header, timeout=30).content.decode('utf-8', errors='ignore')
    except:
        print('   ... failed opening phase page ... %s' % host_toc+phase_urls[phase_titles.index(p_title)])
        return
    url_m3u8 = re.findall(r'cms_player = {"yun":true,"url":"(.*?)"', p_content, re.S)[0].replace('\/', '/')
    parsed_flag, m3u8_items, host_m3u8 = reParser_m3u8(url_m3u8)
    if parsed_flag == 'Y':
        getTS(p_title, m3u8_items, host_m3u8)
    else:
        print('   ... failed parsing m3u8 content ... %s' % url_m3u8)

def reParser_m3u8(url):
    try:
        m3u8_content = requests.get(url, headers=header, timeout=30).content.decode('utf-8', errors='ignore')
    except:
        return 'N', [], ''
    if re.findall('^(\S+m3u8)$', m3u8_content, re.M):
        #如果有嵌套的m3u8
        sub_url_m3u8 = os.path.split(url)[0] + '/' + re.findall('^(\S+m3u8)$', m3u8_content, re.M)[0]
        return reParser_m3u8(sub_url_m3u8)
    else:
        #解析并返回播放列表（ts文件序列）
        return 'Y', re.findall('^(\S+ts)$', m3u8_content, re.M), os.path.split(url)[0]+'/'

def getTS(p_title, m3u8_items, host_m3u8):
    with open(os.path.join(workdir, '%s.txt' % p_title), 'w', encoding='utf-8') as f:
        for ts in m3u8_items:
            f.write(host_m3u8+ts+'\n')
    print('   ... playlist fetched')
    #下载ts文件
    print('   ... downloading ts ... %d files' % len(m3u8_items))
    p_dir = os.path.join(workdir, p_title)
    if not os.path.exists(p_dir):
        os.mkdir(p_dir)
    flag_combine = True
    for ts in m3u8_items:
        try:
            ts_resp = requests.get(host_m3u8+ts).content
        except:
            print('   ... failed reading ts ... %s' % host_m3u8+ts)
            flag_combine = False
        else:
            try:
                with open(os.path.join(p_dir, os.path.split(ts)[1]), 'wb') as f:
                    f.write(ts_resp)
            except:
                print('   ... failed saving ts ... %s' % os.path.split(ts)[1])
                flag_combine = False
    #合并ts文件
    if flag_combine:
        print('   ... combining ts')
        os.chdir(p_dir)
        os.system('copy/b *.ts %s.ts' % p_title)
        print('   ... done')
    else:
        print('   ... pls check failed ts & manually combine\n   ... done')

print(__doc__)

subdir = 'Band'
workdir = getWorkdir(subdir)

headerpool = ['Mozilla/5.0 (Macintosh; Intel Mac OS X 10.11; rv:49.0) Gecko/20100101 Firefox/49.0', 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_5) AppleWebKit/602.2.14 (KHTML, like Gecko) Version/10.0.1 Safari/602.2.14', 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.98 Safari/537.36', 'Mozilla/5.0 (Macintosh; U; PPC Mac OS X 10.5; en-US; rv:1.9.2.15) Gecko/20110303 Firefox/3.6.15', 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/64.0.3282.186 Safari/537.36', 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.99 Safari/537.36', 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.106 Safari/537.36', 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.169 Safari/537.36', 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:50.0) Gecko/20100101 Firefox/50.0', 'Mozilla/5.0 (Windows NT 10.0; …) Gecko/20100101 Firefox/61.0', 'Mozilla/5.0 (Windows NT 5.1) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.84 Safari/535.11 SE 2.X MetaSr 1.0', 'Mozilla/5.0 (Windows NT 5.1; U; en; rv:1.8.1) Gecko/20061208 Firefox/2.0.0 Opera 9.50', 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/534.57.2 (KHTML, like Gecko) Version/5.1.7 Safari/534.57.2', 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.1 (KHTML, like Gecko) Chrome/21.0.1180.71 Safari/537.1 LBBROWSER', 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.71 Safari/537.36', 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36 OPR/26.0.1656.60', 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/45.0.2454.101 Safari/537.36', 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Maxthon/4.4.3.4000 Chrome/30.0.1599.101 Safari/537.36', 'Mozilla/5.0 (Windows NT 6.1; WOW64; Trident/7.0; rv:11.0) like Gecko', 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:34.0) Gecko/20100101 Firefox/34.0', 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:45.0) Gecko/20100101 Firefox/45.0', 'Mozilla/5.0 (Windows NT 6.1; rv:50.0) Gecko/20100101 Firefox/50.0', 'Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.71 Safari/537.36', 'Mozilla/5.0 (Windows NT 6.3; WOW64; rv:50.0) Gecko/20100101 Firefox/50.0', 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/62.0.3202.62 Safari/537.36', 'Mozilla/5.0 (X11; Linux x86_64; rv:50.0) Gecko/20100101 Firefox/50.0', 'Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Trident/5.0;  Trident/5.0)', 'Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; WOW64; Trident/5.0; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; Media Center PC 6.0; .NET4.0C; .NET4.0E; QQBrowser/7.0.3698.400)', 'Mozilla/5.0 (iPad; CPU OS 10_1_1 like Mac OS X) AppleWebKit/602.2.14 (KHTML, like Gecko) Version/10.0 Mobile/14B100 Safari/602.1', 'Opera/8.0 (Windows NT 5.1; U; en)']

header = dict()
header["user-agent"] = random.choice(headerpool)

#目录页
url_toc = 'https://www.116bt.com/vodshow/4193.html'
host_toc = urllib.parse.urlsplit(url_toc)[0] + '://' + urllib.parse.urlsplit(url_toc)[1]

#解析目录页，得到每期的名字、链接
toc = requests.get(url_toc, headers=header, timeout=30).content.decode('utf-8', errors='ignore')
toc_html = etree.HTML(toc)
phase_urls = toc_html.xpath('//ul[@class="detail-play-list clearfix tab-pane ff-playurl ff-playurl-tab-2 fade"]/li/a/@href')
phase_titles = toc_html.xpath('//ul[@class="detail-play-list clearfix tab-pane ff-playurl ff-playurl-tab-2 fade"]/li/a/@title')
for p_title in phase_titles:
    print(p_title)
phase_titles.append('q')
phase_titles.append('a')

#选择要下载的期号
while True:
    phase_chosen = ''
    while phase_chosen not in phase_titles:
        phase_chosen = input('\nWhich one to get? (a)ll? (q)uit? ... ')
    if phase_chosen == 'q':
        sys.exit()
    elif phase_chosen == 'a':
        for p_title in phase_titles:
            getM3U8(p_title)
            sleep(2)
    else:
        getM3U8(phase_chosen)
        sleep(2)

bluerabbit · 发表于 2019-7-31 07:54

隰则有泮发表于 2019-7-30 21:44
几个语句有点长。。

嗯，我自己也觉得写的好累赘，还要继续学习和练习

隰则有泮 · 发表于 2019-7-30 21:44

几个语句有点长。。

随梦期初 · 发表于 2019-7-31 08:05

就是看起来有点乱

wjdgh2016 · 发表于 2019-7-31 19:08

向大佬学习

小黑太阳 · 发表于 2019-7-31 19:41

小白看不懂，支持。

wenjie2008 · 发表于 2019-7-31 20:48

感谢分享，收藏学习

刘浩有个大梦想 · 发表于 2019-8-12 08:56

这就完了吗这要是C++写 ...不敢想象

fjcjyl · 发表于 2020-2-9 20:49

谢谢分享感谢

帐号		自动登录	找回密码
密码			注册[Register]

[Python 转载] 初学Python，写一个m3u8视频下载练手

免费评分