爬虫 某翻译网站 中文翻译
```python# 该code主要提供交流学习使用,请勿利用其进行不当行为!
import re
from pprint import pprint
import requests
import execjs
headers = {
"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/84.0.4147.105 Safari/537.36",
"cookie": "BAIDUID=1EF58568F9751E80F2BFDA80A797BBE4:FG=1"
}
def get_token():
url = "https://XXXXX.XXXXX.XXXXX/"
rest = requests.get(url=url, headers=headers)
token = re.findall(r"token: '(.*?)',", rest.text)
gtk = re.findall(r"window.gtk = '(.*?)';", rest.text)
return token, gtk
def get_sign(query, gtk):
js_str = """
function e(r, s) {
var o = r.match(/[\\uD800-\\uDBFF][\\uDC00-\\uDFFF]/g);
if (null === o) {
var t = r.length;
t > 30 && (r = "" + r.substr(0, 10) + r.substr(Math.floor(t / 2) - 5, 10) + r.substr(-10, 10))
} else {
for (var e = r.split(/[\\uD800-\\uDBFF][\\uDC00-\\uDFFF]/), C = 0, h = e.length, f = []; h > C; C++)
"" !== e && f.push.apply(f, a(e.split(""))),
C !== h - 1 && f.push(o);
var g = f.length;
g > 30 && (r = f.slice(0, 10).join("") + f.slice(Math.floor(g / 2) - 5, Math.floor(g / 2) + 5).join("") + f.slice(-10).join(""))
}
var u = void 0
, l = "" + String.fromCharCode(103) + String.fromCharCode(116) + String.fromCharCode(107);
u = s;
for (var d = u.split("."), m = Number(d) || 0, s = Number(d) || 0, S = [], c = 0, v = 0; v < r.length; v++) {
var A = r.charCodeAt(v);
128 > A ? S = A : (2048 > A ? S = A >> 6 | 192 : (55296 === (64512 & A) && v + 1 < r.length && 56320 === (64512 & r.charCodeAt(v + 1)) ? (A = 65536 + ((1023 & A) << 10) + (1023 & r.charCodeAt(++v)),
S = A >> 18 | 240,
S = A >> 12 & 63 | 128) : S = A >> 12 | 224,
S = A >> 6 & 63 | 128),
S = 63 & A | 128)
}
for (var p = m, F = "" + String.fromCharCode(43) + String.fromCharCode(45) + String.fromCharCode(97) + ("" + String.fromCharCode(94) + String.fromCharCode(43) + String.fromCharCode(54)), D = "" + String.fromCharCode(43) + String.fromCharCode(45) + String.fromCharCode(51) + ("" + String.fromCharCode(94) + String.fromCharCode(43) + String.fromCharCode(98)) + ("" + String.fromCharCode(43) + String.fromCharCode(45) + String.fromCharCode(102)), b = 0; b < S.length; b++)
p += S,
p = n(p, F);
return p = n(p, D),
p ^= s,
0 > p && (p = (2147483647 & p) + 2147483648),
p %= 1e6,
p.toString() + "." + (p ^ m)
}
function n(r, o) {
for (var t = 0; t < o.length - 2; t += 3) {
var a = o.charAt(t + 2);
a = a >= "a" ? a.charCodeAt(0) - 87 : Number(a),
a = "+" === o.charAt(t + 1) ? r >>> a : r << a,
r = "+" === o.charAt(t) ? r + a & 4294967295 : r ^ a
}
return r
}
"""
js_co = execjs.compile(js_str)
js_j = js_co.call("e", query, gtk)
return js_j
def get_j(sign, token, query):
url = "https://XXXXX.XXXXX.XXXXX/XXXXX?XXXXX=zh&to=en"
data = {
"from": "zh",
"to": "en",
"query": query,
"simple_means_flag": "3",
"sign": sign,
"token": token,
"domain": "common",
}
rest = requests.post(url=url, headers=headers, data=data)
pprint(rest.json())
def main():
query = "字典"
token, gtk = get_token()
sign = get_sign(query, gtk)
get_j(sign, token, query)
if __name__ == '__main__':
main()
``` cyansto 发表于 2020-12-8 11:36
你好我想问下,js_str的js函数是bd的加密函数?
嗯嗯是的,sign 参数 js 代码 我来学习我来学习 用心讨论,共获提升! 来学习我来学习感谢楼主分享 reat.json()报错,
pprint(rest.json())
File "C:\Users\huhao\Anaconda3\lib\site-packages\requests\models.py", line 898, in json
return complexjson.loads(self.text, **kwargs)
File "C:\Users\huhao\Anaconda3\lib\json\__init__.py", line 357, in loads
return _default_decoder.decode(s)
File "C:\Users\huhao\Anaconda3\lib\json\decoder.py", line 337, in decode
obj, end = self.raw_decode(s, idx=_w(s, 0).end())
File "C:\Users\huhao\Anaconda3\lib\json\decoder.py", line 355, in raw_decode
raise JSONDecodeError("Expecting value", s, err.value) from None
JSONDecodeError: Expecting value who_unknown 发表于 2020-12-7 12:07
reat.json()报错,
pprint(rest.json())
翻译网站你找对了吗,这个是bd翻译的,我这运行正常,你看看网站地址对不 一条游泳的鱼 发表于 2020-12-8 10:10
翻译网站你找对了吗,这个是bd翻译的,我这运行正常,你看看网站地址对不
请问您隐藏的两处网址是什么。 who_unknown 发表于 2020-12-8 10:16
请问您隐藏的两处网址是什么。
url = "https://fanyi.baidu.com"
url = "https://fanyi.baidu.com/v2transapi?from=zh&to=en"
你好我想问下,js_str的js函数是bd的加密函数? 一条游泳的鱼 发表于 2020-12-8 10:30
url = "https://fanyi.baidu.com"
url = "https://fanyi.baidu.com/v2transapi?from=zh&to=en"
谢谢,返回正确的json格式数据了
页:
[1]