一条游泳的鱼 发表于 2020-12-2 15:00

爬虫 某翻译网站 中文翻译

```python
# 该code主要提供交流学习使用,请勿利用其进行不当行为!
import re
from pprint import pprint

import requests
import execjs

headers = {
    "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/84.0.4147.105 Safari/537.36",
    "cookie": "BAIDUID=1EF58568F9751E80F2BFDA80A797BBE4:FG=1"
}

def get_token():
    url = "https://XXXXX.XXXXX.XXXXX/"

    rest = requests.get(url=url, headers=headers)

    token = re.findall(r"token: '(.*?)',", rest.text)
    gtk = re.findall(r"window.gtk = '(.*?)';", rest.text)

    return token, gtk


def get_sign(query, gtk):

    js_str = """
    function e(r, s) {
    var o = r.match(/[\\uD800-\\uDBFF][\\uDC00-\\uDFFF]/g);
    if (null === o) {
      var t = r.length;
      t > 30 && (r = "" + r.substr(0, 10) + r.substr(Math.floor(t / 2) - 5, 10) + r.substr(-10, 10))
    } else {
      for (var e = r.split(/[\\uD800-\\uDBFF][\\uDC00-\\uDFFF]/), C = 0, h = e.length, f = []; h > C; C++)
            "" !== e && f.push.apply(f, a(e.split(""))),
            C !== h - 1 && f.push(o);
      var g = f.length;
      g > 30 && (r = f.slice(0, 10).join("") + f.slice(Math.floor(g / 2) - 5, Math.floor(g / 2) + 5).join("") + f.slice(-10).join(""))
    }
    var u = void 0
      , l = "" + String.fromCharCode(103) + String.fromCharCode(116) + String.fromCharCode(107);
    u = s;
    for (var d = u.split("."), m = Number(d) || 0, s = Number(d) || 0, S = [], c = 0, v = 0; v < r.length; v++) {
      var A = r.charCodeAt(v);
      128 > A ? S = A : (2048 > A ? S = A >> 6 | 192 : (55296 === (64512 & A) && v + 1 < r.length && 56320 === (64512 & r.charCodeAt(v + 1)) ? (A = 65536 + ((1023 & A) << 10) + (1023 & r.charCodeAt(++v)),
      S = A >> 18 | 240,
      S = A >> 12 & 63 | 128) : S = A >> 12 | 224,
      S = A >> 6 & 63 | 128),
      S = 63 & A | 128)
    }
    for (var p = m, F = "" + String.fromCharCode(43) + String.fromCharCode(45) + String.fromCharCode(97) + ("" + String.fromCharCode(94) + String.fromCharCode(43) + String.fromCharCode(54)), D = "" + String.fromCharCode(43) + String.fromCharCode(45) + String.fromCharCode(51) + ("" + String.fromCharCode(94) + String.fromCharCode(43) + String.fromCharCode(98)) + ("" + String.fromCharCode(43) + String.fromCharCode(45) + String.fromCharCode(102)), b = 0; b < S.length; b++)
      p += S,
      p = n(p, F);
    return p = n(p, D),
    p ^= s,
    0 > p && (p = (2147483647 & p) + 2147483648),
    p %= 1e6,
    p.toString() + "." + (p ^ m)
}


function n(r, o) {
    for (var t = 0; t < o.length - 2; t += 3) {
      var a = o.charAt(t + 2);
      a = a >= "a" ? a.charCodeAt(0) - 87 : Number(a),
      a = "+" === o.charAt(t + 1) ? r >>> a : r << a,
      r = "+" === o.charAt(t) ? r + a & 4294967295 : r ^ a
    }
    return r
}
   
    """
    js_co = execjs.compile(js_str)

    js_j = js_co.call("e", query, gtk)

    return js_j


def get_j(sign, token, query):

    url = "https://XXXXX.XXXXX.XXXXX/XXXXX?XXXXX=zh&to=en"

    data = {
      "from": "zh",
      "to": "en",
      "query": query,
      "simple_means_flag": "3",
      "sign": sign,
      "token": token,
      "domain": "common",
    }
    rest = requests.post(url=url, headers=headers, data=data)
   
    pprint(rest.json())

def main():
    query = "字典"
    token, gtk = get_token()
    sign = get_sign(query, gtk)
    get_j(sign, token, query)



if __name__ == '__main__':
    main()
```

一条游泳的鱼 发表于 2020-12-8 17:30

cyansto 发表于 2020-12-8 11:36
你好我想问下,js_str的js函数是bd的加密函数?

嗯嗯是的,sign 参数 js 代码

Alfred斯斯 发表于 2020-12-2 16:40

我来学习我来学习

诅咒者之魂 发表于 2020-12-2 17:04

用心讨论,共获提升!

zgz1979clh 发表于 2020-12-6 19:17

来学习我来学习感谢楼主分享

who_unknown 发表于 2020-12-7 12:07

reat.json()报错,
pprint(rest.json())

File "C:\Users\huhao\Anaconda3\lib\site-packages\requests\models.py", line 898, in json
    return complexjson.loads(self.text, **kwargs)

File "C:\Users\huhao\Anaconda3\lib\json\__init__.py", line 357, in loads
    return _default_decoder.decode(s)

File "C:\Users\huhao\Anaconda3\lib\json\decoder.py", line 337, in decode
    obj, end = self.raw_decode(s, idx=_w(s, 0).end())

File "C:\Users\huhao\Anaconda3\lib\json\decoder.py", line 355, in raw_decode
    raise JSONDecodeError("Expecting value", s, err.value) from None

JSONDecodeError: Expecting value

一条游泳的鱼 发表于 2020-12-8 10:10

who_unknown 发表于 2020-12-7 12:07
reat.json()报错,
pprint(rest.json())



翻译网站你找对了吗,这个是bd翻译的,我这运行正常,你看看网站地址对不

who_unknown 发表于 2020-12-8 10:16

一条游泳的鱼 发表于 2020-12-8 10:10
翻译网站你找对了吗,这个是bd翻译的,我这运行正常,你看看网站地址对不

请问您隐藏的两处网址是什么。

一条游泳的鱼 发表于 2020-12-8 10:30

who_unknown 发表于 2020-12-8 10:16
请问您隐藏的两处网址是什么。

url = "https://fanyi.baidu.com"
url = "https://fanyi.baidu.com/v2transapi?from=zh&to=en"

cyansto 发表于 2020-12-8 11:36

你好我想问下,js_str的js函数是bd的加密函数?

who_unknown 发表于 2020-12-8 13:39

一条游泳的鱼 发表于 2020-12-8 10:30
url = "https://fanyi.baidu.com"
url = "https://fanyi.baidu.com/v2transapi?from=zh&to=en"

谢谢,返回正确的json格式数据了
页: [1]
查看完整版本: 爬虫 某翻译网站 中文翻译