本帖最后由 d8349565 于 2021-5-14 14:30 编辑
闲来无事,把之前一直爬不到视频直链的在线之家又捣鼓了一次,成功获取。(代码暂未优化,分享着玩)
主要是从主页开始爬,有兴趣可以自己定义。
[Python] 纯文本查看 复制代码 import requests
import re
import time
import base64
from lxml import etree
# 从主页获取各影视的url
def next_page(start_url):
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Safari/537.36"}
response = requests.post(start_url, headers=headers)
content = etree.HTML(response.text)
link = content.xpath('/html/body/div[1]/div//ul/li/div/a/@href')
# name = content.xpath('/html/body/div[1]/div//ul/li/div/a/@title')
return link
# 获取需要post的url(用来post获取视频地址)
def get_post_link(next_page):
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Safari/537.36"}
response = requests.post(next_page, headers=headers)
result = re.findall("url\":\"(.*?)\",", response.text)
link = result[2].replace('\\','')
return link
def get_num(url,referer):
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Safari/537.36",
'referer': referer}
# print(url)
response = requests.post(url, headers=headers)
response.encoding = response.apparent_encoding
result = re.findall("time=(.*?)&", response.text)[0]
# print(result[0:6])
return result[0:6]
# 带上referer post获取 加密的url信息
def get_vd_url (url,referer):
headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Safari/537.36",
'referer': referer}
# print(url)
response = requests.post(url, headers=headers)
response.encoding = response.apparent_encoding
result = re.findall("var url =(.*?),",response.text)[0]
return result
# 加密的url信息逆序(十六进制)
def Reverse_order(result):
b = list(result)
b.reverse()
re_result = ''.join(b)
return re_result
# 获取其他集数的url
def url_next(url):
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Safari/537.36"}
response = requests.post(url, headers=headers)
content = etree.HTML(response.text)
link = content.xpath('//*[@id="play-box"]/div[1]/div[2]/div[1]/ul/li/a/@href')
link = [f'https://www.zxzj.me/{i}' for i in link]
return link
# print(link)
if __name__ == '__main__':
link_list = next_page('https://www.zxzj.me/')
link_list = [f'https://www.zxzj.me/{i}' for i in link_list]
# print(link_list)
for url in link_list:
referer = url_next(url)
# print(referer)
for url_1 in referer:
# print(url_1)
referer_1 = get_post_link(url_1)
# print(referer_1)
result = get_vd_url(referer_1,url_1)
num = get_num(referer_1,url_1)
# print(result)
result = Reverse_order(result).replace("'","").strip()
# 解密十六进制
a = base64.b16decode(result)
# print(a)
a = str(a, encoding="utf-8")
# num 是与日期相关的数字,估计经常需要更换
a = a.replace(num,'',1).strip()
# print(f"b'{a}")
print(a) |