python 爬飞卢小说代码

寒小凡 · 发表于 2017-11-11 23:43

import json
import requests
import hashlib
import time
import random
from bs4 import BeautifulSoup
from click._compat import raw_input
s = requests.session()
header = {
'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/61.0.3163.100 Safari/537.36',
'Referer':'http://b.faloo.com/'
}

def log():
url = 'http://u.faloo.com/regist/login.aspx?backurl=http%3A%2F%2Fb.faloo.com%2F'

zh = raw_input('账号 ')
mm = raw_input("密码")

m = random.random()
yzm = 'http://u.faloo.com/Common/ValIDAteImage.aspx?u=&height=36&width=106&m=' + str(m)
yz = s.get(url=yzm, headers=header)
while yz.status_code != 200:
      m = random.random()
      yzm = 'http://u.faloo.com/Common/ValidateImage.aspx?u=&height=36&width=106&m=' + str(m)
      yz = s.get(url=yzm, headers=header)
open('logo.jpg', 'wb').write(yz.content)
inyzm = raw_input("yzm")
ps =mm
hash = hashlib.md5()
t = str(int(time.time()))
jm = 'EW234@![#$&]*{,OP}Kd^w349Op+-32_' + ps + t
hash.update(jm.encode('utf-8'))
jm2 = str(hash.hexdigest())
jm2 = '@345Kie(873_dfbKe>d3<.d23432=' + jm2
hash = hashlib.md5()
psw = hash.update(jm2.encode('utf-8'))
psw = hash.hexdigest()
data = {
      'txtUserID': zh,
      'txtPwd': str(psw),
      'verifyCode': inyzm,
      'ts': t,
      't': '1'
}
a = s.post(url=url, headers=header, data=data)

a = s.get(url='http://bbs.faloo.com/MyInfo/MesList.aspx', headers=header)
a.encoding = 'gbk'
if '选择全部消息'in a.text:
      print('yes')
      select()
else:
      print('验证码不对')
      log()
      return

def select():
print('1')
url = raw_input('url')
a = s.get(url=url, headers=header)
a.encoding = 'gbk'
soup = BeautifulSoup(a.text, 'lxml')
list = soup.select('.centent a')
for li in list:
      try:
         url = li.get('href')
         a = s.get(url=url, headers=header)
         a.encoding = 'gbk'
         soup = BeautifulSoup(a.text, 'lxml')
         zw = {
            'title': soup.select('#title h1')[0].text,
            'zw': soup.select('#content')[0].text.replace('\u3000', '').replace('\r', '').replace('\n', '')
         }
         if '您还没有订阅本章节' in zw['zw']:
            print('您还没有订阅本章节')
            select()
            return

         open(zw['title'] + '.txt', 'w+', encoding='utf-8').write(
            json.dumps(zw['title'], ensure_ascii=False) + "\n" + json.dumps(zw['zw'] + "\n",
                                                                              ensure_ascii=False))
         print(zw)
      except:
         print('下载结束 ')
         select()

def main():
log()

if __name__ == '__main__':
main()

http://pan.baidu.com/s/1miP3NnE

zhaoxishm · 发表于 2018-1-26 18:55

可惜实用价值并不大

ila · 发表于 2017-11-11 23:51

为啥后面加个函数专门调用上一个函数

娜伤love狠疼 · 发表于 2017-11-11 23:52

感谢楼主，评分了

天空里的云 · 发表于 2017-11-11 23:57

嗯嗯，学习了

寒小凡 · 发表于 2017-11-12 00:00

ila 发表于 2017-11-11 23:51
为啥后面加个函数专门调用上一个函数

用来实现循环

糖福禄 · 发表于 2017-11-12 11:51

小白问下 VIP章节可以下载不

寒小凡 · 发表于 2017-11-12 11:52

糖福禄发表于 2017-11-12 11:51
小白问下 VIP章节可以下载不

不可以的欸

ila · 发表于 2017-11-30 21:47

我用pycharm，
默认#coding:utf-8

如鱼得金水 · 发表于 2018-1-26 14:34

这个有啥用?

帐号		自动登录	找回密码
密码			注册[Register]

[Python 转载] python 爬飞卢小说代码

免费评分