Python小白白 发表于 2021-4-21 22:04

那位大神给讲解一下这段代码怎么应用

import requests
from lxml import etree
import os
import time
import json


def Get_ID_Name(url, headers):
    Contents_IDS = []
    r = requests.get(url, headers=headers)
    r.encoding = r.apparent_encoding
    html = etree.HTML(r.text)
    Titles = html.xpath('//div[@class="sound-list _Qp"]/ul/li/div/a/@title')
    Contents_lists = html.xpath('//div[@class="sound-list _Qp"]/ul/li/div/a/@href')
    for Contents_list in Contents_lists:
      links_Cache = str(Contents_list).split('/')[-1]
      JsonURL = 'https://www.ximalaya.com/revision/play/v1/audio?id={}&ptype=1'.format(links_Cache)
      Contents_IDS.append(JsonURL)
    return Titles, Contents_IDS


def Json_Get_links(Contents_IDS, headers):
    Itemlists = []
    n = 0
    for Contents_ID in Contents_IDS:
      contents = {}
      time.sleep(1.5)
      r1 = requests.get(Contents_ID, headers=headers)
      r1.encoding = r1.apparent_encoding
      results = json.loads(r1.text)
      id = results['data']['trackId']
      m4alinks = results['data']['src']
      contents['ID'] = id
      contents['M4aLinks'] = m4alinks
      Itemlists.append(contents)
      n += 1
      print('已采集{}个链接!'.format(n))
    # print(Itemlists)
    return Itemlists


def DownLoadM4A(Itemlists, filename):
    headers = {
      'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.116 Safari/537.36',
      'If-None-Match': '"llN9ISnSdOkEmb835lC9NQ_j47Kl"',
      'Host': 'fdfs.xmcdn.com',
    }
    if not os.path.exists('./XMLYFM'):
      os.mkdir('./XMLYFM')
    count = 0
    for filename1, Itemlist in zip(filename, Itemlists):
      srclinks = Itemlist['M4aLinks']
      print(srclinks)
      r2 = requests.get(srclinks, headers=headers)
      print(r2.raise_for_status())
      with open('./XMLYFM/' + str(filename1) + '.m4a', 'wb')as f:
            f.write(r2.content)
            count += 1
            print('已下载{}个音频文件!'.format(count))
    print("{}个音频文件已全部下载完成!".format(count))


if __name__ == '__main__':
    print('正在加载...')
    url = 'https://www.ximalaya.com/youshengshu/41785430/'
    headers = {
      'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.100 Safari/537.36',
      'Cookie': '',
      'Referer': 'https://www.ximalaya.com/youshengshu/41785430/',
      'Host': 'www.ximalaya.com'
    }
    data1 = Get_ID_Name(url, headers)
    IDlinks = data1[1]
    fileName = data1[0]
    DownLoadM4A(Json_Get_links(IDlinks, headers), fileName)

哈_喽 发表于 2021-4-21 22:39

python里运行

白衣国度 发表于 2021-4-21 22:43

新疆记事本,复制粘贴,后缀改名.py

chaifengbox 发表于 2021-4-21 22:43

好像是采集喜马拉雅有声小说的python代码.

looooooc 发表于 2021-4-21 22:46

python3好像是再爬取喜马拉雅的音频

一条孤独的狗 发表于 2021-4-21 22:48

好家伙,连代码是什么都不知道就敢拿来用
这是一个python的爬虫,你需要安装python,然后还要导入相应模块
想要会用至少还要学习一小时{:301_997:}

qdyangdi 发表于 2021-4-21 23:02

这是啥 python么
页: [1]
查看完整版本: 那位大神给讲解一下这段代码怎么应用