百度翻译-js逆向 问题求助
原文 https://www.52pojie.cn/thread-1474990-1-1.html我也是通过此方法破解sign,对于字符串长度在30以内的,获取的sign是正确的,但是当长文本(字符串长度大于30)此时获取的sign就是错误的,
有没有遇到此问题的朋友,欢迎指教 谢谢! 学会提好一个问题也是一门技术活 本帖最后由 Prozacs 于 2021-10-21 10:48 编辑
用有道吧。google也行,百度翻译的不敢恭维。 QingYi. 发表于 2021-10-21 10:40
学会提好一个问题也是一门技术活
好的大哥,我一定好好描述问题 Prozacs 发表于 2021-10-21 10:46
用有道吧。google也行,百度翻译的不敢恭维。
谷歌翻译需要selenium模拟浏览器才能使用,有道挺好的,请问您有谷歌翻译/有道翻译的代码么 SuperSpiderMan 发表于 2021-10-21 11:13
谷歌翻译需要selenium模拟浏览器才能使用,有道挺好的,请问您有谷歌翻译/有道翻译的代码么
from urllib import parse
import urllib
from win32com.client import Dispatch
import pythoncom
import hashlib
import urllib.parse
import random
import json
g_headers = {
'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/93.0.4577.82 Safari/537.36'}
def getWinHttpReq(typeid, timeOut = 25):
pythoncom.CoInitialize()
WinHttpReq = Dispatch('WinHttp.WinHttpRequest.5.1')
WinHttpReq.SetTimeouts(8*10000, 8*10000, timeOut*10000, timeOut*10000)
if typeid == 2:
HTTPREQUEST_PROXYSETTING_PROXY = 2
WinHttpReq.SetProxy(HTTPREQUEST_PROXYSETTING_PROXY,"xxxxxx", "")
return WinHttpReq
winhttp = getWinHttpReq(1)
def SendWinHttpReq_2(WinHttpReq,sendUrl,reUrl=False,body=None,reType=1,headDic=None):
globalg_headers, g_headerss, GPcookie
WinHttpReq.SetOption(6, reUrl)# 设置是否跳转,False为重定向不自动跳转True为自动跳转
if body==None:# 设置/判断 请求类型 是GET 还是 post
WinHttpReq.Open('GET', sendUrl, False)
else:
WinHttpReq.Open('POST', sendUrl, False)
WinHttpReq.SetRequestHeader('User-Agent',g_headers['User-Agent']) #设置 浏览器 请求头
if headDic!=None:
for i in headDic:
try:
WinHttpReq.SetRequestHeader(i,headDic) # 设置头部信息
except:
continue
if body == None:
WinHttpReq.Send()
else:
if type(body) != type('11'):# 判断post数据类型是否为字符串类型
body = urllib.parse.urlencode(body) # 进行url编码
WinHttpReq.Send(body) # 发送
WinHttpReq.WaitForResponse() # 等待响应
if reType==1:
try:
objstr = WinHttpReq.ResponseBody.obj # get 服务器响应返回内容
except:
return ''
return objstr
if reType==2:
try:
return WinHttpReq.GetResponseHeader('Location')
except:
try:
objstr = WinHttpReq.ResponseBody.obj
except:
return ''
if objstr.startswith(b'\xef\xbb\xbf'):
objstr = objstr
return objstr.decode('utf-8','ignore')
return WinHttpReq.GetAllResponseHeaders()
def SendWinHttpReq(WinHttpReq, sendUrl, reUrl=False, body=None, reType=1, headDic=None, code='utf-8'):
global g_headers
WinHttpReq.SetOption(6, reUrl)
if body == None:
WinHttpReq.Open('GET', sendUrl, False)
else:
WinHttpReq.Open('POST', sendUrl, False)
WinHttpReq.SetRequestHeader('User-Agent', g_headers['User-Agent'])
if headDic != None:
for i in headDic:
try:
WinHttpReq.SetRequestHeader(i,headDic)
except:
continue
if body == None:
WinHttpReq.Send()
else:
if type(body) != type('11'):
body = parse.urlencode(body)
WinHttpReq.Send(body)
WinHttpReq.WaitForResponse()
if reType == 1:
try:
objstr = WinHttpReq.ResponseBody.obj
except:
return ''
if objstr.startswith(b'\xef\xbb\xbf'):
objstr = objstr
if code != None:
return objstr.decode(code,'ignore')
return objstr
if reType == 2:
try:
return WinHttpReq.GetResponseHeader('Location')
except:
try:
objstr = WinHttpReq.ResponseBody.obj
except:
return ''
if objstr.startswith(b'\xef\xbb\xbf'):
objstr = objstr
if code != None:
return objstr.decode(code, 'ignore')
return objstr
return WinHttpReq.GetAllResponseHeaders()
def strGetlen(strn, strx, strend):
sint = strn.find(strx)
strn = strn
if strend == "":
return strn
eint = strn.find(strend)
return strn[:eint]
def getMD5(content):
encoding = 'utf-8'# 编码用utf-8
m2 = hashlib.md5()
m2.update(content.encode(encoding))
return m2.hexdigest()
def translate(text):
headData={'X-Requested-With': 'XMLHttpRequest',
'Accept': 'application/json, text/javascript, */*; q=0.01',
'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8',
'Referer': 'https://fanyi.youdao.com/',
'Cookie':'OUTFOX_SEARCH_USER_ID=1246911609@10.169.0.81; OUTFOX_SEARCH_USER_ID_NCOO=1980893172.0992112; UM_distinctid=17c7e122e12be-0fd8d04e99cb3a-b7a1438-1fa400-17c7e122e1310ed; P_INFO=18817122609|1634202753|1|youdao_zhiyun2018|00&99|null&null&null#hun&430100#10#0|&0||18817122609; JSESSIONID=aaa0_e_9U6Id95XIy8cYx; ___rl__test__cookies=1634284455005'
}
salt = str(random.random()*1000)
lts = str(random.random()*1000)
bv =getMD5(g_headers['User-Agent'])
sign =getMD5("fanyideskweb" + text + salt + "Y2FYu%TNSbMCxc3t2u^XT")
postUrl = 'https://fanyi.youdao.com/translate_o?smartresult=dict&smartresult=rule'
postData='i='+urllib.parse.quote(text)+'&from=AUTO&to=AUTO&smartresult=dict&client=fanyideskweb&salt='+salt+'&sign='+sign+'<s='+lts+'&bv='+bv+'&doctype=json&version=2.1&keyfrom=fanyi.web&action=FY_BY_REALTlME'
authUrl= SendWinHttpReq(winhttp, postUrl, True, postData, 1, headData)
authUrl = json.loads(authUrl)
if authUrl['errorCode'] == 0:
try:
src = authUrl['translateResult']['tgt']
return src
except Exception as e:
print(e)
else:
return '翻译失败:'+str(authUrl['errorCode'])
if __name__ == '__main__':
text ='我也是通过此方法破解sign,对于字符串长度在30以内的,获取的sign是正确的,但是当长文本(字符串长度大于30)此时获取的sign就是错误的'
msg = translate(text)
print(msg)
有道 Prozacs 发表于 2021-10-21 11:14
from urllib import parse
import urllib
from win32com.client import Dispatch
感谢感谢{:1_919:} SuperSpiderMan 发表于 2021-10-21 11:13
谷歌翻译需要selenium模拟浏览器才能使用,有道挺好的,请问您有谷歌翻译/有道翻译的代码么
import requests
import execjs
from urllib.parse import quote
debug = False
import json
class Py4Js:
def __init__(self):
self.ctx = execjs.compile("""
function TL(a) {
var k = "";
var b = 406644;
var b1 = 3293161072;
var jd = ".";
var $b = "+-a^+6";
var Zb = "+-3^+b+-f";
for (var e = [], f = 0, g = 0; g < a.length; g++) {
var m = a.charCodeAt(g);
128 > m ? e = m : (2048 > m ? e = m >> 6 | 192 : (55296 == (m & 64512) && g + 1 < a.length && 56320 == (a.charCodeAt(g + 1) & 64512) ? (m = 65536 + ((m & 1023) << 10) + (a.charCodeAt(++g) & 1023),
e = m >> 18 | 240,
e = m >> 12 & 63 | 128) : e = m >> 12 | 224,
e = m >> 6 & 63 | 128),
e = m & 63 | 128)
}
a = b;
for (f = 0; f < e.length; f++) a += e,
a = RL(a, $b);
a = RL(a, Zb);
a ^= b1 || 0;
0 > a && (a = (a & 2147483647) + 2147483648);
a %= 1E6;
return a.toString() + jd + (a ^ b)
};
function RL(a, b) {
var t = "a";
var Yb = "+";
for (var c = 0; c < b.length - 2; c += 3) {
var d = b.charAt(c + 2),
d = d >= t ? d.charCodeAt(0) - 87 : Number(d),
d = b.charAt(c + 1) == Yb ? a >>> d: a << d;
a = b.charAt(c) == Yb ? a + d & 4294967295 : a ^ d
}
return a
}
""")
def get_tk(self, text):
return self.ctx.call("TL", text)
def build_url(text, tk, tl='zh-CN'):
"""
需要用转URLEncoder
:param text:
:param tk:
:param tl:
:return:
"""
return 'https://translate.google.cn/translate_a/single?client=webapp&sl=auto&tl=' + tl + '&hl=zh-CN&dt=at&dt=bd&dt=ex&dt=ld&dt=md&dt=qca&dt=rw&dt=rm&dt=ss&dt=t&source=btn&ssel=0&tsel=0&kc=0&tk=' \
+ str(tk) + '&q=' + quote(text, encoding='utf-8')
def translate(js, text, tl='zh-CN'):
header = {
'authority': 'translate.google.cn',
'method': 'GET',
'path': '',
'scheme': 'https',
'accept': '*/*',
'accept-encoding': 'gzip, deflate, br',
'accept-language': 'zh-CN,zh;q=0.9,ja;q=0.8',
'cookie':'_ga=GA1.3.110668007.1547438795; _gid=GA1.3.1522575542.1548327032; 1P_JAR=2019-1-24-10; NID=156=ELGmtJHel1YG9Q3RxRI4HTgAc3l1n7Y6PAxGwvecTJDJ2ScgW2p-CXdvh88XFb9dTbYEBkoayWb-2vjJbB-Rhf6auRj-M-2QRUKdZG04lt7ybh8GgffGtepoA4oPN9OO9TeAoWDY0HJHDWCUwCpYzlaQK-gKCh5aVC4HVMeoppI',
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64)AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.108 Safari/537.36',
'x-client-data': 'CKi1yQEIhrbJAQijtskBCMG2yQEIqZ3KAQioo8oBCL+nygEI7KfKAQjiqMoBGPmlygE='
}
url = build_url(text, js.get_tk(text), tl)
res = []
try:
r = requests.get(url, headers=header)
result = json.loads(r.text)
r.encoding = "UTF-8"
if debug:
print(result)
res = result
if res is None:
if result is not None:
try:
correct_text = result.replace('<b><i>', ' ').replace('</i></b>', '')
if debug:
print(correct_text)
correct_url = build_url(correct_text, js.get_tk(correct_text), tl)
correct_response = requests.get(correct_url)
correct_result = json.loads(correct_response.text)
res = correct_result
except Exception as e:
if debug:
print(e)
res = []
except Exception as e:
res = []
if debug:
print(url)
print("翻译" + text + "失败")
print("错误信息:")
print(e)
finally:
return res
def get_translate(word, tl):
js = Py4Js()
translate_result = translate(js, word, tl)
if debug:
print("word== %s, tl== %s" % (word, tl))
print(translate_result)
return translate_result
if __name__ == '__main__':
debug = False
content= ' I also use this method to crack the sign. For the string length less than 30, the obtained sign is correct, but when the long text (string length greater than 30), the obtained sign is wrong'
results = get_translate(content, 'zh-CN')
print(results)
谷歌 首先感谢楼上朋友分享谷歌翻译与有道翻译代码
百度翻译遇到的问题也已经解决了,不是因为传入的字符太长引起的,
是因为python执行js的模块execjs生成的sign与浏览器生成的sign不一样
解决方法: 使用nodejs 执行JS文件,生成的sign就是正常的
页:
[1]