JS破解初探，折腾到头秃的美拍视频Python采集下载

huguo002 发表于 2019-12-5 16:13

本帖最后由 huguo002 于 2019-12-5 16:18 编辑

JS破解无疑是头秃的问题，额好在有度娘搜索，实在不行，那就谷妹搜索？
确实头秃无比！！目前想到的三种处理方法，当然都是借鉴啦！！

方法一：直接破解JS，python直接模拟js算法得出视频地址
方法二：扒取js，借用python第三方库execjs执行js获取视频地址
方法三：借用网络工具，其实是别人的破解接口获取视频地址，比如：https://meipai.iiilab.com

采集效果

方法一：直接破解JS，python直接模拟js算法得出视频地址存储路径设置 #存储路径
os.makedirs(f'meipai/',exist_ok=True)
视频地址解密
# 解密美拍视频真实地址
def decode(encoded_string):
def getHex(param1):
   return {
         'str': param1,
         'hex': ''.join(list(param1[:4])[::-1]),
   }

def getDec(param1):
   loc2 = str(int(param1, 16))
   return {
         'pre': list(loc2[:2]),
         'tail': list(loc2),
   }

def substr(param1, param2):
   loc3 = param1)]
   loc4 = param1): int(param2) + int(param2)]
   return loc3 + param1):].replace(loc4, "")

def getPos(param1, param2):
   param2 = len(param1) - int(param2) - int(param2)
   return param2

dict2 = getHex(encoded_string)
dict3 = getDec(dict2['hex'])
str4 = substr(dict2['str'], dict3['pre'])
return base64.b64decode(substr(str4, getPos(str4, dict3['tail'])))

#来源：https://blog.csdn.net/weixin_42590877/article/details/99898650
#CSDN博客-iplaypy(蟒蛇师)

协议头生成，超级重要，头秃了许久！注意，cookie的添加，不然无法翻页！！
#随机生成协议头
def ua():
ua=UserAgent()
headers={
   'Cookie': 'MUSID=0su478f5e30e4u8jlf9gqquhn5; MP_WEB_GID=748151043219051; sid=0su478f5e30e4u8jlf9gqquhn5; UM_distinctid=16ea084234822-0be7691d606889-43450521-1fa400-16ea0842349133; virtual_device_id=8818a5d35ed03e6b6b4fd638a6f765ae; pvid=TVqin0MOgIjpLnJZKxhiL%2FwcrYA2K7Ke; CNZZDATA1256786412=937407365-1574650874-https%253A%252F%252Fwww.baidu.com%252F%7C1574831840',
   'User-Agent': ua.random,
}
return headers
关于协议头的处理，只能一步步尝试分析！这里花费了不少时间测试！思路不对！！！
requests访问网页#访问网页
def get_req(url):
response = requests.get(url, headers=ua())
if response.status_code==200:
   response=response.content.decode('utf-8')
   time.sleep(2)
else:
   response =None
return response
获取视频def get_video(response):
reqs=json.loads(response)
reqs=reqs['medias']
for req in reqs:
   videoname=req['caption']
   if videoname:
         video_name=videoname
   else:
         video_name =req['weibo_share_caption']
   video_name = video_name.replace(' ', '')
   video_name = re.sub(r'[\|\/\<\>\:\*\?\\\"]', "_", video_name)# 剔除不合法字符
   print(video_name)
   video_url=req['video']
   print(video_url)
   try:
         videourl = decode(video_url).decode('utf8')# 解密视频地址
         print(videourl)
         try:
            down(video_name, videourl)
            #server(video_name, videourl)
         except Exception as e:
            print(f'视频下载出错，错误代码：{e}')
            with open(r'meipai/spider.txt', 'a+', encoding='utf-8') as f:
               f.write(f'视频下载出错，错误代码：{e}---采集{videourl}|{video_name}内容失败\n')
            pass
   except Exception as e:
         print(f'视频地址解密出错，错误代码：{e}')
         with open(r'meipai/spider.txt', 'a+', encoding='utf-8') as f:
            f.write(f'视频解密出错，错误代码：{e}---采集{video_url}|{video_name}内容失败\n')
         pass

下载视频
def down(h1,url):
print("准备下载！")
file_path = f"meipai/{h1}.mp4"
r = requests.get(url)
print("开始下载！")
with open(file_path, "wb") as file:
   file.write(r.content)
   print("下载完成！")
time.sleep(2)

#网络问题，出错？
# 存储视频，附下载进度显示
def server(h1,videourl):
print("准备下载！")
file_path=f'meipai/{h1}.mp4'
with closing(requests.get(videourl,headers=ua(),stream=True)) as response:
   chunk_size = 1024# 单次请求最大值
   content_size = int(response.headers['content-length'])# 内容体总大小
   data_count = 0
   with open(file_path, "wb") as file:
         for data in response.iter_content(chunk_size=chunk_size):
            file.write(data)
            data_count = data_count + len(data)
            now_jd = (data_count / content_size) * 100
            print("\r 文件下载进度：%d%%(%d/%d) - %s" % (now_jd, data_count, content_size, file_path), end=" ")
         print("\n>>> 获取视频成功了！")
time.sleep(2)
附完整代码：
#美拍视频采集2

# -*- coding: UTF-8 -*-
import requests
import re,time,os,json
from fake_useragent import UserAgent
import base64
from contextlib import closing

#存储路径
os.makedirs(f'meipai/',exist_ok=True)

# 解密美拍视频真实地址
def decode(encoded_string):
def getHex(param1):
   return {
         'str': param1,
         'hex': ''.join(list(param1[:4])[::-1]),
   }

def getDec(param1):
   loc2 = str(int(param1, 16))
   return {
         'pre': list(loc2[:2]),
         'tail': list(loc2),
   }

def substr(param1, param2):
   loc3 = param1)]
   loc4 = param1): int(param2) + int(param2)]
   return loc3 + param1):].replace(loc4, "")

def getPos(param1, param2):
   param2 = len(param1) - int(param2) - int(param2)
   return param2

dict2 = getHex(encoded_string)
dict3 = getDec(dict2['hex'])
str4 = substr(dict2['str'], dict3['pre'])
return base64.b64decode(substr(str4, getPos(str4, dict3['tail'])))

#随机生成协议头
def ua():
ua=UserAgent()
headers={
   'Cookie': 'MUSID=0su478f5e30e4u8jlf9gqquhn5; MP_WEB_GID=748151043219051; sid=0su478f5e30e4u8jlf9gqquhn5; UM_distinctid=16ea084234822-0be7691d606889-43450521-1fa400-16ea0842349133; virtual_device_id=8818a5d35ed03e6b6b4fd638a6f765ae; pvid=TVqin0MOgIjpLnJZKxhiL%2FwcrYA2K7Ke; CNZZDATA1256786412=937407365-1574650874-https%253A%252F%252Fwww.baidu.com%252F%7C1574831840',
   'Host': 'www.meipai.com',
   'Referer': 'https://www.meipai.com/square/13',
   'User-Agent': ua.random,
   #'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.100 Safari/537.36',
}
return headers

#访问网页
def get_req(url):
response = requests.get(url, headers=ua())
if response.status_code==200:
   response=response.content.decode('utf-8')
   time.sleep(2)
else:
   response =None
return response

def get_video(response):
reqs=json.loads(response)
reqs=reqs['medias']
for req in reqs:
   videoname=req['caption']
   if videoname:
         video_name=videoname
   else:
         video_name =req['weibo_share_caption']
   video_name = video_name.replace(' ', '')
   video_name = re.sub(r'[\|\/\<\>\:\*\?\\\"]', "_", video_name)# 剔除不合法字符
   print(video_name)
   video_url=req['video']
   print(video_url)
   try:
         videourl = decode(video_url).decode('utf8')# 解密视频地址
         print(videourl)
         try:
            down(video_name, videourl)
            #server(video_name, videourl)
         except Exception as e:
            print(f'视频下载出错，错误代码：{e}')
            with open(r'meipai/spider.txt', 'a+', encoding='utf-8') as f:
               f.write(f'视频下载出错，错误代码：{e}---采集{videourl}|{video_name}内容失败\n')
            pass
   except Exception as e:
         print(f'视频地址解密出错，错误代码：{e}')
         with open(r'meipai/spider.txt', 'a+', encoding='utf-8') as f:
            f.write(f'视频解密出错，错误代码：{e}---采集{video_url}|{video_name}内容失败\n')
         pass

#下载视频
def down(h1,url):
print("准备下载！")
file_path = f"meipai/{h1}.mp4"
r = requests.get(url)
print("开始下载！")
with open(file_path, "wb") as file:
   file.write(r.content)
   print("下载完成！")
time.sleep(2)

# 存储视频，附下载进度显示
def server(h1,videourl):
print("准备下载！")
file_path=f'meipai/{h1}.mp4'
with closing(requests.get(videourl,headers=ua(),stream=True)) as response:
   chunk_size = 1024# 单次请求最大值
   content_size = int(response.headers['content-length'])# 内容体总大小
   data_count = 0
   with open(file_path, "wb") as file:
         for data in response.iter_content(chunk_size=chunk_size):
            file.write(data)
            data_count = data_count + len(data)
            now_jd = (data_count / content_size) * 100
            print("\r 文件下载进度：%d%%(%d/%d) - %s" % (now_jd, data_count, content_size, file_path), end=" ")
         print("\n>>> 获取视频成功了！")
time.sleep(2)

#运行主函数
def main():
for i in range(1, 100):
   url = f"https://www.meipai.com/squares/new_timeline?page={i}&count=24&tid=13"
   print(url)
   response=get_req(url)
   if response:
         try:
            get_video(response)
         except Exception as e:
            print(f'获取视频出错了，错误代码：{e}')
            pass

if __name__ == '__main__':
main()

方法二：execjs库运行js破解视频地址test.js文件h = "substring"
, i = "split"
, j = "replace"
, k = "substr";

function getHex(a) {
return {
   str: a(4),
   hex: a(0, 4)("").reverse().join("")
}
}

function getDec(a) {
var b = parseInt(a, 16).toString();
return {
   pre: b(0, 2)(""),
   tail: b(2)("")
}
}

function substr(a,b) {
var c = a(0, b)
   , d = a(b, b);
return c + a(b)(d, "")
}

function getPos(a,b) {
return b = a.length - b - b,
   b
}

function atob(a) {
var e = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/=";
if (a = a.replace(/=+$/, ""),
a.length % 4 == 1)
   throw f;
for (var b, c, d = 0, g = 0, h = ""; c = a.charAt(g++); ~c && (b = d % 4 ? 64 * b + c : c,
d++ % 4) ? h += String.fromCharCode(255 & b >> (-2 * d & 6)) : 0)
   c = e.indexOf(c);
return h
}

function main(a) {
var b = getHex(a)
   , c = getDec(b.hex)
   , d = substr(b.str, c.pre);
zz = substr(d, getPos(d, c.tail));
return atob(zz)
}

//a = '55e0aHRVhJYAS0cDovL212dmlkZW8xMC5tZWl0dWRhdGEuY29tLzVkMzNmMGRmM2RkYzRwMm5sd3N0cWg3OTA2X0gyNjRfMV85YzNiM2QwOTNhMWRhNS5tcDQ/az1kODlkOTVmZmRlNWZjNzQ0YzUwOTY0NDZiZjNhNTE0OCZ0PTVkNDEIQbtCanOZiOWY4'
//console.log(main(a))

#运行js文件解密视频网址
import execjs

def decode2(target):
"""
执行 js 的解码函数
:param target:
:return: 解码后的结果
来源：木下瞳/木下学Python
"""
js = execjs.compile(open(r'./test.js').read())
resualt = js.call("main",target)
return resualt

def main1():
url="https://www.meipai.com/squares/new_timeline?page=1&count=24&tid=13"
response=get_req(url)
get_video2(response)
方法三：借助网页破解工具，由于post包两个参数没搞清楚头绪，这里直接采用无头浏览器,selenium!运行效果：

from selenium import webdriver
import time

#selenium打开浏览器
def s_html():
#videourl="https://www.meipai.com/media/1155453414"
url="https://meipai.iiilab.com"
chromedriver_path = r"替换成你自己的chromedriver.exe路径"# 完整路径
options=webdriver.ChromeOptions() #配置 chrome 启动属性
options.add_experimental_option("excludeSwitches",['enable-automation']) # 此步骤很重要，设置为开发者模式，防止被各大网站识别出来使用了Selenium
browser=webdriver.Chrome(executable_path=chromedriver_path,options=options)
browser.get(url)
time.sleep(2)
return browser

#获取视频连接
def get_s_video(browser,videourl):
input =browser.find_element_by_xpath('//*[@class="form-control link-input"]')
input.send_keys(videourl)
time.sleep(2)
button=browser.find_element_by_xpath('//*[@class="btn btn-default"]')
button.click()
time.sleep(8)
href=browser.find_element_by_xpath('//*/div[@class="caption"]/p/a[@class="btn btn-success"]').get_attribute("href")
print(href)
clear_button = browser.find_element_by_xpath('//*[@class="btn btn-danger"]')
clear_button.click()
#browser.refresh()# 刷新页面
time.sleep(2)
return href

#关闭浏览器
def close_s(browser):
browser.quit()# 关闭浏览器

#获取美拍视频
def get_video3(response,browser):
reqs=json.loads(response)
reqs=reqs['medias']
print(reqs)
for req in reqs:
   videoname = req['caption']
   if videoname:
         video_name = videoname
   else:
         video_name = req['weibo_share_caption']
   video_name = video_name.replace(' ', '')
   video_name = re.sub(r'[\|\/\<\>\:\*\?\\\"]', "_", video_name)# 剔除不合法字符
   print(video_name)
   video_url=req['id']
   video_url=f'https://www.meipai.com/media/{video_url}'
   print(video_url)
   videourl=get_s_video(browser, video_url)
   down(video_name, videourl)# 下载视频

def main2():
browser=s_html()
url = "https://www.meipai.com/squares/new_timeline?page=1&count=24&tid=13"
response = get_req(url)
get_video3(response,browser)
close_s(browser)

if __name__ == '__main__':
main2()
部分函数方法沿用方法一！

天空宫阙 发表于 2019-12-6 09:36

huguo002 发表于 2019-12-6 09:04
解密需要js基础吧？

基础的语法肯定要懂，但最让人头秃的是解密函数的定位，还有就是混淆后的函数名称abcd挨个用一遍太影响理解了，美拍这个还好函数名还是比较容易理解的，全局搜decode就能定位解密视频url的这段代码

ofo 发表于 2020-1-14 15:07

带色的小马甲发表于 2019-12-5 17:29
美拍加密还是很简单的，没必要用别人接口

这些都是老方法了，解析出来的视频是有水印的，K值没拿到

MrCenter 发表于 2019-12-5 16:20

超级强，你们好

乡熊发表于 2019-12-5 16:32

给看不懂的点个赞

lxj12328 发表于 2019-12-5 16:34

谢谢大佬分享！

林铁柱和张翠花 发表于 2019-12-5 17:05

看不懂啊

带色的小马甲 发表于 2019-12-5 17:29

美拍加密还是很简单的，没必要用别人接口

https://i.loli.net/2019/12/05/piPDOISWFetwEYf.png
https://i.loli.net/2019/12/05/kjUIdDuVoTWNHtL.png

huguo002 发表于 2019-12-5 17:38

带色的小马甲发表于 2019-12-5 17:29
美拍加密还是很简单的，没必要用别人接口

给老哥双击，点赞，666！

天空宫阙 发表于 2019-12-6 00:03

JavaScript导入包好像没有python方便，看到base64的解密还自己写了个函数

huguo002 发表于 2019-12-6 09:04

天空宫阙发表于 2019-12-6 00:03
JavaScript导入包好像没有python方便，看到base64的解密还自己写了个函数

{:1_937:}解密需要js基础吧？

页: [1] 2

吾爱破解 - 52pojie.cn's Archiver

JS破解初探，折腾到头秃的美拍视频Python采集下载