好友
阅读权限10
听众
最后登录1970-1-1
|
财才666
发表于 2020-4-19 01:28
[ 本帖最后由 财才666 于 2020-4-19 12:03 编辑 ]\n\n# encoding: utf-8
# _author:Miachel Zhang
# date: 2020/4/18
# 周末充电,利用xpath爬取起点玄幻小说排行榜
from lxml import etree
import requests
import csv
import time
from fake_useragent import UserAgent
headers ={
"User-Agent": UserAgent().chrome
}
csv_header = ["Rank", "Novel_name", "Votes", "Author", "Update_chapter"]
info_list = []
def get_data(url):
# 发送请求
response = requests.get(url, headers=headers)
e = etree.HTML(response.text)
# 用xpath解析获取需要的数据
ranks = e.xpath("//em[@class='number']/span/text()")
names = e.xpath("//td/a[@class='name']/text()")
authors = e.xpath("//td/a[@class='author']/text()")
votes = e.xpath("//td[@class='month']/text()")
newchapter = e.xpath("//td/a[@class='chapter']/text()")
# 将各个数据列表元素遍历转换成列表字典写入info_list
for rank, name, votes, author, chapter in zip(ranks, names, votes, authors, newchapter):
result = dict(zip(csv_header, [rank, name, votes, author, chapter]))
#print(result)
info_list.append(result)
# 写入csv文件方法
def sava_info(filename):
with open(filename, "w+", encoding='utf-8', newline='') as f:
f_csv = csv.DictWriter(f, csv_header)
f_csv.writeheader()
f_csv.writerows(info_list)
def main():
filename= "XuanHuan_Novel_April_Rank.csv"
base_url = "https://www.qidian.com/rank/fengyun?chn=21&style=2&page="
# 获取数据共十页信息,玄幻小说前630名
for i in range(1, 11):
url = base_url+str(i)
get_data(url)
sava_info(filename)
time.sleep(10)
if __name__ == '__main__':
main()
print("执行完成") |
免费评分
-
查看全部评分
|
发帖前要善用【论坛搜索】功能,那里可能会有你要找的答案或者已经有人发布过相同内容了,请勿重复发帖。 |
|
|
|
|