python 爬飞卢小说代码
import jsonimport requests
import hashlib
import time
import random
from bs4 import BeautifulSoup
from click._compat import raw_input
s = requests.session()
header = {
'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/61.0.3163.100 Safari/537.36',
'Referer':'http://b.faloo.com/'
}
def log():
url = 'http://u.faloo.com/regist/login.aspx?backurl=http%3A%2F%2Fb.faloo.com%2F'
zh = raw_input('账号 ')
mm = raw_input("密码")
m = random.random()
yzm = 'http://u.faloo.com/Common/ValidateImage.aspx?u=&height=36&width=106&m=' + str(m)
yz = s.get(url=yzm, headers=header)
while yz.status_code != 200:
m = random.random()
yzm = 'http://u.faloo.com/Common/ValidateImage.aspx?u=&height=36&width=106&m=' + str(m)
yz = s.get(url=yzm, headers=header)
open('logo.jpg', 'wb').write(yz.content)
inyzm = raw_input("yzm")
ps =mm
hash = hashlib.md5()
t = str(int(time.time()))
jm = 'EW234@![#$&]*{,OP}Kd^w349Op+-32_' + ps + t
hash.update(jm.encode('utf-8'))
jm2 = str(hash.hexdigest())
jm2 = '@345Kie(873_dfbKe>d3<.d23432=' + jm2
hash = hashlib.md5()
psw = hash.update(jm2.encode('utf-8'))
psw = hash.hexdigest()
data = {
'txtUserID': zh,
'txtPwd': str(psw),
'verifyCode': inyzm,
'ts': t,
't': '1'
}
a = s.post(url=url, headers=header, data=data)
a = s.get(url='http://bbs.faloo.com/MyInfo/MesList.aspx', headers=header)
a.encoding = 'gbk'
if '选择全部消息'in a.text:
print('yes')
select()
else:
print('验证码不对')
log()
return
def select():
print('1')
url = raw_input('url')
a = s.get(url=url, headers=header)
a.encoding = 'gbk'
soup = BeautifulSoup(a.text, 'lxml')
list = soup.select('.centent a')
for li in list:
try:
url = li.get('href')
a = s.get(url=url, headers=header)
a.encoding = 'gbk'
soup = BeautifulSoup(a.text, 'lxml')
zw = {
'title': soup.select('#title h1')[0].text,
'zw': soup.select('#content')[0].text.replace('\u3000', '').replace('\r', '').replace('\n', '')
}
if '您还没有订阅本章节' in zw['zw']:
print('您还没有订阅本章节')
select()
return
open(zw['title'] + '.txt', 'w+', encoding='utf-8').write(
json.dumps(zw['title'], ensure_ascii=False) + "\n" + json.dumps(zw['zw'] + "\n",
ensure_ascii=False))
print(zw)
except:
print('下载结束 ')
select()
def main():
log()
if __name__ == '__main__':
main()
http://pan.baidu.com/s/1miP3NnE
可惜实用价值并不大 为啥后面加个函数专门调用上一个函数 感谢楼主,评分了 嗯嗯,学习了 ila 发表于 2017-11-11 23:51
为啥后面加个函数专门调用上一个函数
用来实现循环 小白问下 VIP章节可以下载不 糖福禄 发表于 2017-11-12 11:51
小白问下 VIP章节可以下载不
不可以的欸 我用pycharm,
默认#coding:utf-8 这个有啥用?
页:
[1]
2