本帖最后由 Yanpeen 于 2022-8-5 18:03 编辑
返回的不是源码,而是一段js代码,要怎么获取网页源码,大家帮忙看看
[Python] 纯文本查看 复制代码 import requests, urllib3
urllib3.disable_warnings()
head = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.163 Safari/537.36"}
url = "https://www.tupianzj.com/"
response = requests.get(url, headers=head, verify=False)
print(response.text)
以下是返回的结果:
[Python] 纯文本查看 复制代码 <script language="javascript" type="text/javascript">eval(function(p,a,c,k,e,d){e=function(c){return(c<a?"":e(parseInt(c/a)))+((c=c%a)>35?String.fromCharCode(c+29):c.toString(36))};if(!''.replace(/^/,String)){while(c--)d[e(c)]=k[c]||e(c);k=[function(e){return d[e]}];e=function(){return'\\w+'};c=1;};while(c--)if(k[c])p=p.replace(new RegExp('\\b'+e(c)+'\\b','g'),k[c]);return p;}('p b(j){1 7=j+"=";1 a=3.4.o(\';\');u(1 i=0;i<a.9;i++){1 c=a[i].s();f(c.q(7)==0)g c.v(7.9,c.9)}g""}1 6=b("6");1 5=B(b("5"));f(6==""||5==""){D("8=8; ",C)}x{1 k=5-y;3.4="6=; d=e, m l n 2:2:2 h;";3.4="5=; d=e, m l n 2:2:2 h;";3.4="t="+6+";";3.4="r="+k+";";A.8.z(w)}',40,40,'|var|00|document|cookie|secret|token|name|location|length|ca|getCookie||expires|Thu|if|return|UTC||cname|random|Jan|01|1970|split|function|indexOf||trim||for|substring|true|else|100|reload|window|parseInt|3000|setTimeout'.split('|'),0,{}))
</script>
使用js格式化工具转化后:
[JavaScript] 纯文本查看 复制代码 function getCookie(cname) {
var name = cname + "=";
var ca = document.cookie.split(';');
for (var i = 0; i < ca.length; i++) {
var c = ca[i].trim();
if (c.indexOf(name) == 0) return c.substring(name.length, c.length)
}
return ""
}
var token = getCookie("token");
var secret = parseInt(getCookie("secret"));
if (token == "" || secret == "") {
setTimeout("location=location; ", 3000)
} else {
var random = secret - 100;
document.cookie = "token=; expires=Thu, 01 Jan 1970 00:00:00 UTC;";
document.cookie = "secret=; expires=Thu, 01 Jan 1970 00:00:00 UTC;";
document.cookie = "t=" + token + ";";
document.cookie = "r=" + random + ";";
window.location.reload(true)
}
--------------------------------------------------------------------------------------------
| 谢谢大家的热心解答,目前已获取到网页源码 |
--------------------------------------------------------------------------------------------
在之前的基础上增加了cookies参数,至于为什么加cookies以及加什么cookies内容,需要根据之前返回的js代码的内容来确定,根据大家的解答,大概内容如下:
[JavaScript] 纯文本查看 复制代码 function getCookie(cname) { #获取指定字段的键值
var name = cname + "=";
var ca = document.cookie.split(';'); #分割字段
for (var i = 0; i < ca.length; i++) {
var c = ca[i].trim();
if (c.indexOf(name) == 0) return c.substring(name.length, c.length) #若键值不空,则返回对应键值,否则返回空串
}
return ""
}
var token = getCookie("token"); #获取字段'token'的键值
var secret = parseInt(getCookie("secret")); #获取字段'secret'的键值
if (token == "" || secret == "") {
setTimeout("location=location; ", 3000)
} else {
var random = secret - 100; #字段'secret'的键值在上一次的基础上减去100
document.cookie = "token=; expires=Thu, 01 Jan 1970 00:00:00 UTC;";
document.cookie = "secret=; expires=Thu, 01 Jan 1970 00:00:00 UTC;";
document.cookie = "t=" + token + ";"; #字段'token'的键值跟上一次访问一样
document.cookie = "r=" + random + ";";
window.location.reload(true)
}
以下是能获取网页源码的代码:
[Python] 纯文本查看 复制代码 import requests, urllib3
urllib3.disable_warnings()
head = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.163 Safari/537.36",
}
url = "https://www.tupianzj.com/"
response = requests.get(url, headers=head, verify=False) #第一次访问,获取cookies值
cookies = {'t':'', 'r':''}
cookies['t'] = response.cookies['token'] #该字段继续沿用第一次访问时返回的cookies键值
cookies['r'] = str(int(response.cookies['secret'])-100) #该字段在第一次访问时返回的cookies键值的基础上减去100
response = requests.get(url, headers=head, verify=False, cookies=cookies) #此时传入指定cookies,则返回正确的网页源码
print(response.text)
--------------------------------------------------------------------------------------------
| 再次感谢大家的热心解答,共勉!!! |
--------------------------------------------------------------------------------------------
以下附上返回的js转译过程
将以下代码转存为html文件
[HTML] 纯文本查看 复制代码 <html>
<body>
<script>
a=62;
function encode() {
var code = document.getElementById('code').value;
code = code.replace(/[ ]+/g, '');
code = code.replace(/'/g, "\'");
var tmp = code.match(/ (w+) /g);
tmp.sort();
var dict = [];
var i, t = '';
for(var i=0; i<tmp.length; i++) {
if(tmp[i] != t) dict.push(t = tmp[i]);
}
var len = dict.length;
var ch;
for(i=0; i<len; i++) {
ch = num(i);
code = code.replace(new RegExp('\b'+dict[i]+'\b','g'), ch);
if(ch == dict[i]) dict[i] = '';
}
document.getElementById('code').value = "eval(function(p,a,c,k,e,d){e=function(c){return(c<a?'':e(parseInt(c/a)))+((c=c%a)>35?String.fromCharCode(c+29):c.toString(36))};if(!''.replace(/^/,String)){while(c--)d[e(c)]=k[c]||e(c);k=[function(e){return d[e]}];e=function(){return'\\w+'};c=1};while(c--)if(k[c])p=p.replace(new RegExp('\\b'+e(c)+'\\b','g'),k[c]);return p}("
+ "'"+code+"',"+a+","+len+",'"+ dict.join('|')+"'.split('|'),0,{}))";
}
function num(c) {
return(c<a?'':num(parseInt(c/a)))+((c=c%a)>35?String.fromCharCode(c+29):c.toString(36));
}
function run() {
eval(document.getElementById('code').value);
}
function decode() {
var code = document.getElementById('code').value;
code2 = code.replace(/^eval/, '');
//alert(code);
document.getElementById('code').value = eval(code2);
}
</script>
<textarea id=code cols=80 rows=20>
</textarea>
<input type=button onclick=encode() value=编码>
<input type=button onclick=run() value=执行>
<input type=button onclick=decode() value=解码>
</body>
</html>
双击html文件打开
再将js放进去(需要将<script>标签去掉)
点解码
|