为啥写不进excel呢 思路出啥问题了
本帖最后由 lihu5841314 于 2021-6-10 18:47 编辑import requests
import pymongo
from copyimportdeepcopy
import time
importpandas as pd
from multiprocessing.dummy import Pool
fromlxmlimportetree
#请求每一页 获取每个电影的详细数据
defresp(url):
headers = {
"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.77 Safari/537.36"
}
rep1 = requests.get(url=url,headers=headers)
rep1.encoding =rep1.apparent_encoding
tree = etree.HTML(rep1.text)
"""
解析 电影名title评分scroe多少人评价assess和 简介info
1.获取页面所有电影的标签列表
2.遍历所有列表,解析单个标签里面的内容
"""
li_list = tree.xpath('//*[@class="grid_view"]/li')
data = pd.DataFrame()
for liin li_list:
item = {}
title = li.xpath('.//div[@class="pic"]/a/img/@alt')
rep2 =li.xpath('.//div[@class="bd"]/p//text()')
info = "".join()
score =li.xpath('.//div[@class="star"]/span/text()')
assess =li.xpath('.//div[@class="star"]/span/text()')
item['title'] = title
item['info'] = info
item['score'] = score
item['assess'] = assess
# print(item)
# top_col.insert_one(item)
data = data.append(item,ignore_index=True)
print(data)
return data
if __name__ == '__main__':
# 初始url=https://movie.douban.com/top250?start=0&filter=
"""
一共250个电影一共10页构建一个urls全包
"""
# clint = pymongo.MongoClient("mongodb://localhost")
# db = clint.top250
# top_col = db.collect
start = time.time()
urls = []
for i in range(0, 251, 25):
url = f'https://movie.douban.com/top250?start={i}&filter='
urls.append(url)
pool= Pool(4)
db_pf = pd.DataFrame()
datas = pool.map(resp,urls)
fordataindatas:
db_pf = db_pf.append(data,ignore_index=True)
pool.close()
pool.join()
db_pf.to_excel("db.xlsx")
end = time.time()
print("一共耗时",end-start)
import requests
import time
importpandas as pd
from multiprocessing.dummy import Pool
fromlxmlimportetree
#请求每一页 获取每个电影的详细数据
defresp(url):
headers = {
"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.77 Safari/537.36"
}
rep1 = requests.get(url=url,headers=headers)
rep1.encoding =rep1.apparent_encoding
tree = etree.HTML(rep1.text)
"""
解析 电影名title评分scroe多少人评价assess和 简介info
1.获取页面所有电影的标签列表
2.遍历所有列表,解析单个标签里面的内容
"""
item = {}
li_list = tree.xpath('//*[@class="grid_view"]/li')
for liin li_list:
title = li.xpath('.//div[@class="pic"]/a/img/@alt')
rep2 =li.xpath('.//div[@class="bd"]/p//text()')
info = "".join()
score =li.xpath('.//div[@class="star"]/span/text()')
assess =li.xpath('.//div[@class="star"]/span/text()')
item['title'] = title
item['info'] = info
item['score'] = score
item['assess'] = assess
# print(item)
db_df.append(item,ignore_index=True)
"""
为什么存不进去excel思路是 先建立一个空的dataframe然后db_df append 添加数据最后存入excel
为啥append会存不进去呢刚看完pandas视频 水货一个
"""
if __name__ == '__main__':
# 初始url=https://movie.douban.com/top250?start=0&filter=
"""
一共250个电影一共10页构建一个urls全包
"""
start = time.time()
urls = []
for i in range(0, 251, 25):
url = f'https://movie.douban.com/top250?start={i}&filter='
urls.append(url)
db_df = pd.DataFrame()
pool= Pool(4)
pool.map(resp,urls)
pool.close()
pool.join()
db_df.to_excel("db.xlsx")
end = time.time()
print("一共耗时",end-start)
“”“搞定了 多谢 dataframe用2次才行 ”“” xls才支持吧 你这写法db_df都是空值 Python中定义函数时,若想在函数内部对函数外的变量进行操作,就需要在函数内部声明其为global。函数里加上global db_df kisszju 发表于 2021-6-10 17:26
Python中定义函数时,若想在函数内部对函数外的变量进行操作,就需要在函数内部声明其为global。函数里加上gl ...
db_df 在main 下面已经定义了 为全局变量 咸鱼灭 发表于 2021-6-10 16:52
你这写法db_df都是空值
确实是空值 该怎么写不是空值呢
lihu5841314 发表于 2021-6-10 17:32
确实是空值 该怎么写不是空值呢
db_df = db_df.append(item,ignore_index=True) debugger一下,看一下变量值是不是自己想要的。pandas的引擎是openpyxl? 处女-大龙猫 发表于 2021-6-10 18:43
debugger一下,看一下变量值是不是自己想要的。pandas的引擎是openpyxl?
不是包的问题 函数里面接收不到 main里面的data变量 咸鱼灭 发表于 2021-6-10 17:42
db_df = db_df.append(item,ignore_index=True)
犯了错误 必须要接收 不然就是空值
页:
[1]
2