【Python爬虫原创代码】利用python爬取全国各地天气预报
本帖最后由 袅袅系秋风 于 2019-3-23 18:20 编辑最近在学习python爬虫,写了这个程序练手,有谁想参考的可以看一下,大家共同学习。
程序利用的是python的requests模块以及re模块
主要通过requests.get()获取网页代码保存到response中,然后通过正则从html代码中提取自己想要的数据。
最后将这些数据封装为图形化界面
程序运行截图如下:
(tip: 也可以通过字符查询,默认显示查询的第一个,如果想要显示准确的地方天气建议填完整城市名或完整城市拼音)
下面是代码:
from tkinter import *
import tkinter as tk
import requests
from PIL import ImageTk as itk
class MyFrame(Frame):
def __init__(self):
self.root=Tk()
self.root.title("天气查询")
self.root.geometry('1200x700+400+220')
bg = tk.Canvas(self.root, width=1200, height=600, bg='white')
self.img = itk.PhotoImage(file="bg.gif")
bg.place(x=100, y=40)
bg.create_image(0, 0, anchor=NW, image=self.img)
self.city = Entry(self.root, width=16, font=("仿宋", 18, "normal"))
self.city.place(x=200, y=60)
citylabel=Label(self.root,text='查询城市',font=("仿宋", 18, "normal"))
citylabel.place(x=80,y=60)
#查询按钮
chaxun = Button(self.root, width=10, height=3, text="查询", bg='#00CCFF', bd=5, font="bold")
chaxun.bind("<Button-1>", self.search)
chaxun.place(x=800, y=50)
self.result=Listbox(self.root,heigh=18,width=65,font=("仿宋", 20, "normal"))#显示天气框
self.result.place(x=125,y=120)
def tianqiforecast(self,searchcity):
print('请输入所要查询天气的城市:')
city = searchcity
# city='minquan'
url = 'http://toy1.weather.com.cn/search?cityname=' + city + '&callback=success_jsonpCallback&_=1548048506469'
# print(url)
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36',
'Cookie': '__guid=182823328.3322839646442213000.1543932524694.901; vjuids=1858d43b6.167798cbdb7.0.8c4d7463d5c5d; vjlast=1543932526.1543932526.30; userNewsPort0=1; f_city=%E5%B9%B3%E9%A1%B6%E5%B1%B1%7C101180501%7C; Hm_lvt_080dabacb001ad3dc8b9b9049b36d43b=1543932526,1543932551,1543932579; Wa_lvt_1=1547464114,1547464115,1547880054,1547983123; defaultCty=101181001; defaultCtyName=%u5546%u4E18; monitor_count=6; Wa_lpvt_1=1547983809'
}
response = requests.get(url, headers=headers)
html1 = response.content.decode('utf-8')
# print(html)
citys = re.findall('"ref":"(.*?)~.*?~(.*?)~.*?~(.*?)~.*?~.*?~.*?~.*?~(.*?)"', html1, re.S)
if (len(citys) == 0):
print('未查找到该城市')
exit(-5)
for i in range(0, len(citys)):
print(i + 1, ':%14s%14s%14s%14s ' % (citys, citys, citys, citys))
#choose = int(input('请选择城市编号:\n'))
choose=1
if (len(citys) == 9):
if (citys != '1' or citys != '0' or citys != '1'):
print('暂时无法查询国外天气,程序已退出')
exit(404)
else:
url2 = 'http://www.weather.com.cn/weathern/' + citys + '.shtml'
responseweather = requests.get(url2, headers=headers)
html2 = responseweather.content.decode('utf-8')
weather = re.findall('<li class="date-.*?".*?".*?">(.*?)</.*?"date-i.*?">(.*?)<.*?', html2, re.S)
weather.append(re.findall(
'<p class="weather-in.*?" title="(.*?)".*?title="(.*?)".*?title="(.*?)".*?<p class="wind-i.*?">(.*?)</p>',
html2, re.S))
Hightempture = re.findall(
'<script>var eventDay =\["(.*?)","(.*?)","(.*?)","(.*?)","(.*?)","(.*?)","(.*?)","(.*?)"\];', html2,
re.S)
Lowtempture = re.findall(
'var eventNight =\["(.*?)","(.*?)","(.*?)","(.*?)","(.*?)","(.*?)","(.*?)","(.*?)"\];',
html2, re.S)
# print(Hightempture,Lowtempture)
b='查询城市为:'+ str(citys)+' '+str(citys)
self.result.insert(END, b)
for i in range(0, 8):
# print(weather)
'''print("%4s%4s%10s\t\t\t%s℃ ~ %s℃\t\t\t%s%s%-s" % (
weather, weather, weather, Lowtempture, Hightempture,
weather,
weather, weather))'''
a = weather + ' ' + weather + ' ' + weather + ' ' + Lowtempture + '℃~' + \
Hightempture + '℃ ' + weather+ weather+ weather
self.result.insert(END, a)
if (len(citys) == 12):
url2 = 'http://forecast.weather.com.cn/town/weathern/' + citys + '.shtml'
responseweather = requests.get(url2, headers=headers)
html2 = responseweather.content.decode('utf-8')
weather = re.findall('<li class="date-.*?".*?"da.*?">(.*?)</.*?"date-i.*?">(.*?)<.*?', html2, re.S)
html2 = re.sub('lt;', '<', html2)
weather.append(re.findall(
'<p class="weather-in.*?" title="(.*?)".*?title="(.*?)".*?title="(.*?)".*?<p class="wind-i.*?">\\r\\n(.*?)\\r\\n',
html2, re.S))
Hightempture = re.findall(
'var eventDay = \["(.*?)","(.*?)","(.*?)","(.*?)","(.*?)","(.*?)","(.*?)","(.*?)"\];', html2, re.S)
Lowtempture = re.findall(
'var eventNight = \["(.*?)","(.*?)","(.*?)","(.*?)","(.*?)","(.*?)","(.*?)","(.*?)"\];',
html2, re.S)
# print(Hightempture,Lowtempture)
b='查询城市为:'+str(citys)+' '+str( citys)+ ' ' +str(citys)
self.result.insert(0,b)
#print(weather)
#print(weather)
for i in range(0, 8):
# print(weather)
'''print("%4s%4s%10s\t\t\t%s℃ ~ %s℃\t\t\t%s%s%-s" % (
weather, weather, weather, Lowtempture, Hightempture,
weather,
weather, weather))'''
a = weather + ' ' + weather + ' ' + weather + ' ' + Lowtempture + '℃~' + \
Hightempture + '℃ ' + weather+ weather+ weather
self.result.insert(END, a)
def search(self,event):
mycity=self.city.get()
if(mycity!=''):
self.result.delete(0,END)
self.city.delete(0,END)
self.tianqiforecast(mycity)
if __name__=='__main__':
myframe=MyFrame()
myframe.root.mainloop()
想下载的也可以在这里下载源代码
from tkinter import *
import tkinter as tk
import requests
from PIL import ImageTk as itk
class MyFrame(Frame):
def __init__(self):
self.root=Tk()
self.root.title("天气查询")
self.root.geometry('1200x700+400+220')
bg = tk.Canvas(self.root, width=1200, height=600, bg='white')
self.img = itk.PhotoImage(file="bg.gif")
bg.place(x=100, y=40)
bg.create_image(0, 0, anchor=NW, image=self.img)
self.city = Entry(self.root, width=16, font=("仿宋", 18, "normal"))
self.city.place(x=200, y=60)
citylabel=Label(self.root,text='查询城市',font=("仿宋", 18, "normal"))
citylabel.place(x=80,y=60)
#查询按钮
chaxun = Button(self.root, width=10, height=3, text="查询", bg='#00CCFF', bd=5, font="bold")
chaxun.bind("<Button-1>", self.search)
chaxun.place(x=800, y=50)
self.result=Listbox(self.root,heigh=18,width=65,font=("仿宋", 20, "normal"))#显示天气框
self.result.place(x=125,y=120)
def tianqiforecast(self,searchcity):
print('请输入所要查询天气的城市:')
city = searchcity
# city='minquan'
url = 'http://toy1.weather.com.cn/search?cityname=' + city + '&callback=success_jsonpCallback&_=1548048506469'
# print(url)
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36',
'Cookie': '__guid=182823328.3322839646442213000.1543932524694.901; vjuids=1858d43b6.167798cbdb7.0.8c4d7463d5c5d; vjlast=1543932526.1543932526.30; userNewsPort0=1; f_city=%E5%B9%B3%E9%A1%B6%E5%B1%B1%7C101180501%7C; Hm_lvt_080dabacb001ad3dc8b9b9049b36d43b=1543932526,1543932551,1543932579; Wa_lvt_1=1547464114,1547464115,1547880054,1547983123; defaultCty=101181001; defaultCtyName=%u5546%u4E18; monitor_count=6; Wa_lpvt_1=1547983809'
}
response = requests.get(url, headers=headers)
html1 = response.content.decode('utf-8')
# print(html)
citys = re.findall('"ref":"(.*?)~.*?~(.*?)~.*?~(.*?)~.*?~.*?~.*?~.*?~(.*?)"', html1, re.S)
if (len(citys) == 0):
print('未查找到该城市')
exit(-5)
for i in range(0, len(citys)):
print(i + 1, ':%14s%14s%14s%14s ' % (citys, citys, citys, citys))
#choose = int(input('请选择城市编号:\n'))
choose=1
if (len(citys) == 9):
if (citys != '1' or citys != '0' or citys != '1'):
print('暂时无法查询国外天气,程序已退出')
exit(404)
else:
url2 = 'http://www.weather.com.cn/weathern/' + citys + '.shtml'
responseweather = requests.get(url2, headers=headers)
html2 = responseweather.content.decode('utf-8')
weather = re.findall('<li class="date-.*?".*?".*?">(.*?)</.*?"date-i.*?">(.*?)<.*?', html2, re.S)
weather.append(re.findall(
'<p class="weather-in.*?" title="(.*?)".*?title="(.*?)".*?title="(.*?)".*?<p class="wind-i.*?">(.*?)</p>',
html2, re.S))
Hightempture = re.findall(
'<script>var eventDay =\["(.*?)","(.*?)","(.*?)","(.*?)","(.*?)","(.*?)","(.*?)","(.*?)"\];', html2,
re.S)
Lowtempture = re.findall(
'var eventNight =\["(.*?)","(.*?)","(.*?)","(.*?)","(.*?)","(.*?)","(.*?)","(.*?)"\];',
html2, re.S)
# print(Hightempture,Lowtempture)
b='查询城市为:'+ str(citys)+' '+str(citys)
self.result.insert(END, b)
for i in range(0, 8):
# print(weather)
'''print("%4s%4s%10s\t\t\t%s℃ ~ %s℃\t\t\t%s%s%-s" % (
weather, weather, weather, Lowtempture, Hightempture,
weather,
weather, weather))'''
a = weather + ' ' + weather + ' ' + weather + ' ' + Lowtempture + '℃~' + \
Hightempture + '℃ ' + weather+ weather+ weather
self.result.insert(END, a)
if (len(citys) == 12):
url2 = 'http://forecast.weather.com.cn/town/weathern/' + citys + '.shtml'
responseweather = requests.get(url2, headers=headers)
html2 = responseweather.content.decode('utf-8')
weather = re.findall('<li class="date-.*?".*?"da.*?">(.*?)</.*?"date-i.*?">(.*?)<.*?', html2, re.S)
html2 = re.sub('lt;', '<', html2)
weather.append(re.findall(
'<p class="weather-in.*?" title="(.*?)".*?title="(.*?)".*?title="(.*?)".*?<p class="wind-i.*?">\\r\\n(.*?)\\r\\n',
html2, re.S))
Hightempture = re.findall(
'var eventDay = \["(.*?)","(.*?)","(.*?)","(.*?)","(.*?)","(.*?)","(.*?)","(.*?)"\];', html2, re.S)
Lowtempture = re.findall(
'var eventNight = \["(.*?)","(.*?)","(.*?)","(.*?)","(.*?)","(.*?)","(.*?)","(.*?)"\];',
html2, re.S)
# print(Hightempture,Lowtempture)
b='查询城市为:'+str(citys)+' '+str( citys)+ ' ' +str(citys)
self.result.insert(0,b)
#print(weather)
#print(weather)
for i in range(0, 8):
# print(weather)
'''print("%4s%4s%10s\t\t\t%s℃ ~ %s℃\t\t\t%s%s%-s" % (
weather, weather, weather, Lowtempture, Hightempture,
weather,
weather, weather))'''
a = weather + ' ' + weather + ' ' + weather + ' ' + Lowtempture + '℃~' + \
Hightempture + '℃ ' + weather+ weather+ weather
self.result.insert(END, a)
def search(self,event):
mycity=self.city.get()
if(mycity!=''):
self.result.delete(0,END)
self.city.delete(0,END)
self.tianqiforecast(mycity)
if __name__=='__main__':
myframe=MyFrame()
myframe.root.mainloop() 感谢分享,为新手提供了宝贵的参考资料 感谢, 可以好好学习一下 niebaohua 发表于 2019-3-22 21:52
from tkinter import *
import tkinter as tk
import requests
请问下你是怎么发的这样格式的代码的 niebaohua 发表于 2019-3-22 21:52
from tkinter import *
import tkinter as tk
import requests
不用了,我找到了 谢谢分享! 袅袅系秋风 发表于 2019-3-23 18:18
请问下你是怎么发的这样格式的代码的
评论窗口 在表情 左边 ,,发帖的时候 也是一样的 谢谢大佬研究一下 justFunny 发表于 2019-3-24 13:14
GUI是用什么模块封装的啊?
我用的是tkinter模块