PYTHON 爬虫求帮忙写段保存代码！

wang2019 发表于 2020-8-11 01:48

本帖最后由 wang2019 于 2020-8-11 01:51 编辑

                                                                                                研究好几天了还是没研究出来       {:1_901:}

                                                                                       请大佬帮忙写段代码，把数据保存到EXCEL(一共五列)里面

                                                                                                尽量不要写函数小弟刚开始学爬虫，感谢！

import requests
from lxml import etree
import xlwt #进行excel操作

datalist = []
url = 'https://www.bilibili.com/ranking'

headers = {
"User-Agent": "Mozilla / 5.0(Windows NT 10.0; Win64; x64) AppleWebKit / 537.36(KHTML, like Gecko) Chrome / 80.0.3987.122Safari / 537.36"
}
resp = requests.get(url,headers = headers)
html = resp.text

parse_html = etree.HTML(html)

#解析数据
Upzhu = parse_html.xpath('//*[@id="app"]/div/div/div/div/div/ul/li/div/div/div/a/span/text()')
datalist.append(Upzhu)

title = parse_html.xpath('//*[@id="app"]/div/div/div/div/div/ul/li/div/div/a/text()')
datalist.append(title)

bofangliang = parse_html.xpath('//*[@id="app"]/div/div/div/div/div/ul/li/div/div/div/span/text()')
datalist.append(bofangliang)

pinglunshu = parse_html.xpath('//*[@id="app"]/div/div/div/div/div/ul/li/div/div/div/span/text()')
datalist.append(pinglunshu)

zonghedefen = parse_html.xpath('//*[@id="app"]/div/div/div/div/div/ul/li/div/div/div/div/text()')
datalist.append(zonghedefen)

pwp 发表于 2020-8-11 02:21

本帖最后由 pwp 于 2020-8-11 02:26 编辑

import pandas as pd
filename = 'xxxxx.xlsx'
excel['你要在表格显示的名字'] = pd.DataFrame(Upzhu)# 转换成电子表格识别的格式excel['你要在表格显示的名字'] = pd.DataFrame(title)# 转换成电子表格识别的格式excel['你要在表格显示的名字'] = pd.DataFrame(bofangliang)# 转换成电子表格识别的格式excel['你要在表格显示的名字'] = pd.DataFrame(pinglunshu)# 转换成电子表格识别的格式excel['你要在表格显示的名字'] = pd.DataFrame(zonghedefen)# 转换成电子表格识别的格式excel.to_excel(filename, index=False)

不知为啥上面好乱，重新定义：

import pandas as pd
filename = 'xxxxx.xlsx'
excel['你要在表格显示的名字'] = pd.DataFrame(你的数据变量名)

pwp 发表于 2020-8-11 02:57

excel.to_excel(filename, index=False)

wang2019 发表于 2020-8-11 04:14

本帖最后由 wang2019 于 2020-8-11 04:15 编辑

pwp 发表于 2020-8-11 02:57
excel.to_excel(filename, index=False)
大佬先谢谢这么晚回复
按你的方法试了一下会报错，显示没有定义，不知道是不是我的方法不对
import pandas as pd
filename = 'Bilibili.xlsx'
excel['test'] = pd.DataFrame(datalist)
excel.to_excel(filename, index=False)

这是报的错误：
Traceback (most recent call last):
File "H:/Pycharm/dou.py", line 39, in <module>
excel['test'] = pd.DataFrame(datalist)
NameError: name 'excel' is not defined

Menguy 发表于 2020-8-11 07:47

dbu00956 发表于 2020-8-11 07:53

NameError: name 'excel' is not defined

kof21411 发表于 2020-8-11 07:54

import pandas as pd
filename = 'Bilibili.xlsx'
excel = pd.DataFrame(datalist)
excel.to_excel(filename,sheet_name='Sheet1', index=False)

baolinguo 发表于 2020-8-11 08:29

本帖最后由 baolinguo 于 2020-8-11 15:45 编辑

#encoding:utf-8
import requests
from lxml import etree
import xlwt
import os

def spider():
video_list = []
url = "https://www.bilibili.com/ranking?spm_id_from=333.851.b_7072696d61727950616765546162.3"
html = requests.get(url, headers={"User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.100 Safari/537.36"}).text
html = etree.HTML(html)
infolist = html.xpath("//li[@class='rank-item']")
for item in infolist:
   rank = "".join(item.xpath("./div[@class='num']/text()"))
   video_link = "".join(item.xpath(".//div[@class='info']/a/@href"))
   title = "".join(item.xpath(".//div[@class='info']/a/text()"))
   payinfo = "".join(item.xpath(".//div[@class='detail']/span/text()")).split("万")
   play = payinfo + "万"
   comment = payinfo
   if comment.isdigit() == False:
         comment += "万"
   upname = "".join(item.xpath(".//div[@class='detail']/a/span/text()"))
   uplink = "http://" + "".join(item.xpath(".//div[@class='detail']/a/@href"))
   hot = "".join(item.xpath(".//div[@class='pts']/div/text()"))
   video_list.append({
            'rank': rank,
            'videolink': video_link,
            'title': title,
            'play': play,
            'comment': comment,
            'upname': upname,
            'uplink': uplink,
            'hot': hot
                        })
   return video_list

def write_Excel():

video_list = spider()
workbook = xlwt.Workbook()
sheet = workbook.add_sheet("b站热门视频")
xstyle = xlwt.XFStyle()
xstyle.alignment.horz = 0x02
xstyle.alignment.vert = 0x01
head = ['视频名', 'up主','排名', '热度','播放量','评论数']
for h in range(len(head)):
   sheet.write(0, h, head, xstyle)
   i = 1
   for item in video_list:
         if '"' in item["title"]:
            item["title"] = item["title"].split('"')
         title_data = 'HYPERLINK("'+item["videolink"]+'";"'+item["title"]+'")'
         cell_overwrite_ok = True
         sheet.col(0).width = int(256 * len(title_data) * 3/5)
         sheet.write(i, 0, xlwt.Formula(title_data), xstyle)
         name_data = 'HYPERLINK("'+item["uplink"]+'";"'+item["upname"]+'")'
         sheet.col(1).width = int(256 * len(name_data) * 3/5)
         sheet.write(i, 1, xlwt.Formula(name_data), xstyle)
         sheet.write(i, 2, item["rank"], xstyle)
         sheet.write(i, 3, item["hot"], xstyle)
         sheet.write(i, 4, item["play"], xstyle)
         sheet.write(i, 5, item["comment"], xstyle)
         #print(title_data)
         #print(name_data)
         #exit()
         i += 1
         file = "b站热门视频信息.xls"
         if os.path.exists(file):
            os.remove(file)
            workbook.save(file)

if __name__ == '__main__':
write_Excel()

rosemaryzed 发表于 2020-8-11 08:57

楼上写的比较规范 python操作.csv也很方便

hyolyn 发表于 2020-8-11 09:02

想学python私我，给你教程资源，自己去学

页: [1] 2

吾爱破解 - 52pojie.cn's Archiver

PYTHON 爬虫 求 帮忙写段保存代码！

PYTHON 爬虫求帮忙写段保存代码！