【Python原创】requests-人人视频下载爬虫多线程2.0
本帖最后由 Pastwill 于 2022-2-11 22:37 编辑一.环境
windows 10 Python 3.6 x86
二.调用模块
import requests
import json
import re
import os
from concurrent.futures import ThreadPoolExecutor
三.代码
#windows 10 Python 3.6 x86
import requests
import json
import re
import os
from concurrent.futures import ThreadPoolExecutor
headers = {
'Referer':'https://m.rr.tv/',#全局设置
'User-Agent':'Mozilla/5.0 (Linux; Android 11; Pixel 5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.91 Mobile Safari/537.36'
}
vod_list = []#存储视频链接
vod_name = []#存储视频标题
get_down_url =[]#存储下载直链
def get_vod(url):
response = requests.get(url = url,headers = headers)#请求url
vod_list.append(re.findall(re.compile(r'data:\[{id:(.*?),title:"'),response.text))#找找视频链接
vod_name.append(re.findall(re.compile(r',title:"(.*?)",desc:"'),response.text))#找找视频标题
for i in re.findall(re.compile(r'{sid:"(.*?)",key:'),response.text):
url ="https://web-api.rr.tv/web/drama/play?webChannel=M_STATION&dramaId="+vod_list+"&episodeId="+i+"&2-7-17xx"#拼接地址
response = requests.get(url = url,headers = headers)
get_down_url.append(str(json.loads(response.text)['data']['url']))#拿下载直链进list
def down_begin(url,i):
print("开始多线程下载"+vod_name+"第"+str(i)+"集")
r = requests.get(url = url,headers = headers)#下载请求
f = open("./"+vod_name+"/第"+str(i)+"集.MP4", "wb")#保存视频
for chunk in r.iter_content(chunk_size=512):
if chunk:
f.write(chunk)
if __name__ == '__main__':
url='https://m.rr.tv/detail/xxxxx?snum=1&episode=1'#进入rr.tv自行获取
get_vod(url)
os.mkdir('./'+vod_name)#创建视频保存目录
with ThreadPoolExecutor(10) as f:#这里写多线程参数,适合几十集的电视剧使用
for i,url in enumerate(get_down_url):
i=int(i)+1
f.submit(down_begin,url = url,i=i)
IDLE (Python 3.8 64-bit)测试出现:
"
Traceback (most recent call last):
File "C:\Users\Administrator\Desktop\人人视频.py", line 31, in <module>
get_vod(url)
File "C:\Users\Administrator\Desktop\人人视频.py", line 16, in get_vod
vod_list.append(re.findall(re.compile(r'data:\[{id:(.*?),title:"'),response.text))#找找视频链接
IndexError: list index out of range
" chadd 发表于 2022-2-12 09:39
IDLE (Python 3.8 64-bit)测试出现:
"
Traceback (most recent call last):
就是这个写法问题,单独拿出来re的列表再加进去就可以了,3.6测试没问题 为啥我复制你代码用不了? 用的PY编辑器是PyCharm 2022.1 EAP (Professional Edition)221.3427.103 binweiwei 发表于 2022-2-12 07:33
3.6 x86测试没问题 binweiwei 发表于 2022-2-12 07:33
海龟编辑器?不会是编程猫的人吧。。 随遇而安8 发表于 2022-2-12 18:50
海龟编辑器?不会是编程猫的人吧。。
编程猫哈哈哈哈;www Pastwill 发表于 2022-2-13 09:26
编程猫哈哈哈哈
以前我也在猫厂,你是猫老祖吗
页:
[1]
2