PYTHON 爬虫 求 帮忙写段保存代码!
本帖最后由 wang2019 于 2020-8-11 01:51 编辑研究好几天了 还是没研究出来 {:1_901:}
请大佬帮忙写段代码,把数据保存到EXCEL(一共五列)里面
尽量不要写函数小弟刚开始学爬虫,感谢!
import requests
from lxml import etree
import xlwt #进行excel操作
datalist = []
url = 'https://www.bilibili.com/ranking'
headers = {
"User-Agent": "Mozilla / 5.0(Windows NT 10.0; Win64; x64) AppleWebKit / 537.36(KHTML, like Gecko) Chrome / 80.0.3987.122Safari / 537.36"
}
resp = requests.get(url,headers = headers)
html = resp.text
parse_html = etree.HTML(html)
#解析数据
Upzhu = parse_html.xpath('//*[@id="app"]/div/div/div/div/div/ul/li/div/div/div/a/span/text()')
datalist.append(Upzhu)
title = parse_html.xpath('//*[@id="app"]/div/div/div/div/div/ul/li/div/div/a/text()')
datalist.append(title)
bofangliang = parse_html.xpath('//*[@id="app"]/div/div/div/div/div/ul/li/div/div/div/span/text()')
datalist.append(bofangliang)
pinglunshu = parse_html.xpath('//*[@id="app"]/div/div/div/div/div/ul/li/div/div/div/span/text()')
datalist.append(pinglunshu)
zonghedefen = parse_html.xpath('//*[@id="app"]/div/div/div/div/div/ul/li/div/div/div/div/text()')
datalist.append(zonghedefen)
本帖最后由 pwp 于 2020-8-11 02:26 编辑
import pandas as pd
filename = 'xxxxx.xlsx'
excel['你要在表格显示的名字'] = pd.DataFrame(Upzhu)# 转换成电子表格识别的格式excel['你要在表格显示的名字'] = pd.DataFrame(title)# 转换成电子表格识别的格式excel['你要在表格显示的名字'] = pd.DataFrame(bofangliang)# 转换成电子表格识别的格式excel['你要在表格显示的名字'] = pd.DataFrame(pinglunshu)# 转换成电子表格识别的格式excel['你要在表格显示的名字'] = pd.DataFrame(zonghedefen)# 转换成电子表格识别的格式excel.to_excel(filename, index=False)
不知为啥上面好乱,重新定义:
import pandas as pd
filename = 'xxxxx.xlsx'
excel['你要在表格显示的名字'] = pd.DataFrame(你的数据变量名)
excel.to_excel(filename, index=False) 本帖最后由 wang2019 于 2020-8-11 04:15 编辑
pwp 发表于 2020-8-11 02:57
excel.to_excel(filename, index=False)
大佬 先谢谢这么晚回复
按你的方法试了一下会报错,显示没有定义,不知道是不是我的方法不对
import pandas as pd
filename = 'Bilibili.xlsx'
excel['test'] = pd.DataFrame(datalist)
excel.to_excel(filename, index=False)
这是报的错误:
Traceback (most recent call last):
File "H:/Pycharm/dou.py", line 39, in <module>
excel['test'] = pd.DataFrame(datalist)
NameError: name 'excel' is not defined NameError: name 'excel' is not defined import pandas as pd
filename = 'Bilibili.xlsx'
excel = pd.DataFrame(datalist)
excel.to_excel(filename,sheet_name='Sheet1', index=False) 本帖最后由 baolinguo 于 2020-8-11 15:45 编辑
#encoding:utf-8
import requests
from lxml import etree
import xlwt
import os
def spider():
video_list = []
url = "https://www.bilibili.com/ranking?spm_id_from=333.851.b_7072696d61727950616765546162.3"
html = requests.get(url, headers={"User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.100 Safari/537.36"}).text
html = etree.HTML(html)
infolist = html.xpath("//li[@class='rank-item']")
for item in infolist:
rank = "".join(item.xpath("./div[@class='num']/text()"))
video_link = "".join(item.xpath(".//div[@class='info']/a/@href"))
title = "".join(item.xpath(".//div[@class='info']/a/text()"))
payinfo = "".join(item.xpath(".//div[@class='detail']/span/text()")).split("万")
play = payinfo + "万"
comment = payinfo
if comment.isdigit() == False:
comment += "万"
upname = "".join(item.xpath(".//div[@class='detail']/a/span/text()"))
uplink = "http://" + "".join(item.xpath(".//div[@class='detail']/a/@href"))
hot = "".join(item.xpath(".//div[@class='pts']/div/text()"))
video_list.append({
'rank': rank,
'videolink': video_link,
'title': title,
'play': play,
'comment': comment,
'upname': upname,
'uplink': uplink,
'hot': hot
})
return video_list
def write_Excel():
video_list = spider()
workbook = xlwt.Workbook()
sheet = workbook.add_sheet("b站热门视频")
xstyle = xlwt.XFStyle()
xstyle.alignment.horz = 0x02
xstyle.alignment.vert = 0x01
head = ['视频名', 'up主','排名', '热度','播放量','评论数']
for h in range(len(head)):
sheet.write(0, h, head, xstyle)
i = 1
for item in video_list:
if '"' in item["title"]:
item["title"] = item["title"].split('"')
title_data = 'HYPERLINK("'+item["videolink"]+'";"'+item["title"]+'")'
cell_overwrite_ok = True
sheet.col(0).width = int(256 * len(title_data) * 3/5)
sheet.write(i, 0, xlwt.Formula(title_data), xstyle)
name_data = 'HYPERLINK("'+item["uplink"]+'";"'+item["upname"]+'")'
sheet.col(1).width = int(256 * len(name_data) * 3/5)
sheet.write(i, 1, xlwt.Formula(name_data), xstyle)
sheet.write(i, 2, item["rank"], xstyle)
sheet.write(i, 3, item["hot"], xstyle)
sheet.write(i, 4, item["play"], xstyle)
sheet.write(i, 5, item["comment"], xstyle)
#print(title_data)
#print(name_data)
#exit()
i += 1
file = "b站热门视频信息.xls"
if os.path.exists(file):
os.remove(file)
workbook.save(file)
if __name__ == '__main__':
write_Excel() 楼上写的比较规范 python操作.csv也很方便 想学python私我,给你教程资源,自己去学
页:
[1]
2