学习网上的一个案列:
知识点:
while 循环
for 循环
xpath使用
request使用
列表推导
[Python] 纯文本查看 复制代码
# -*- coding: utf-8 -*-
import csv
import requests
from lxml import etree
def getWeather(url):
weather_info = []
# 模拟浏览器访问头部
headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/114.0'}
# 取回Html数据
resp = requests.get(url, headers=headers)
# 数据预处理
resp_html = etree.HTML(resp.text)
# xpath 处理数据
# class="thrui"
resp_list = resp_html.xpath("//ul[@class='thrui']/li")
for li in resp_list:
day_weather_info = {}
# 日期
day_weather_info['data_time'] = li.xpath("./div[1]/text()")[0].split(' ')[0]
# 最高天气
high = li.xpath("./div[2]/text()")[0]
day_weather_info['high'] = high[:high.find('℃')]
# 最低天气
low = li.xpath("./div[3]/text()")[0]
day_weather_info['low'] = low[:low.find('℃')]
# 天气
day_weather_info['weaher'] = li.xpath("./div[4]/text()")[0]
weather_info.append(day_weather_info)
return weather_info
weathers = []
# for 循环生成月份信息,即1-12
for month in range(1, 13):
# 获取某一天的天所信息
# 三元表达式
weather_time = '2022' + ('0' + str(month) if month < 10 else str(month))
print(weather_time)
url = f'https://lishi.tianqi.com/zhongshanqu1/{weather_time}.html'
# 使用getWeather爬取数据
weather = getWeather(url)
# 存到列表中
weathers.append(weather)
print(weathers)
# 数据一次性写入
with open("weather_uft-8.csv", "w", newline='', encoding='utf-8') as csvsave:
writer = csv.writer(csvsave)
# 先写入列名:columns_name 日期,最高天气,最低天所,天气
writer.writerow(['日期', '最高天所', '最低天所', '天气'])
# 一次写入多行用writerows(写入的数据类型是列表,一个列表对应一行)
# [{'data_time': '2022-01-01', 'high': '6', 'low': '-7', 'weaher': '晴'}]
writer.writerows(
[list(day_weather_dict.values()) for monther_weathers in weathers for day_weather_dict in monther_weathers])
|