用python爬取主要城市房价工资比排行榜
import bs4import requests
import re
import openpyxl
def open_url(url):
headers = {
"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) "
"Chrome/89.0.4389.82 Safari/537.36"}
res = requests.get(url, headers=headers)
return res
def find_data(res):
data = []
soup = bs4.BeautifulSoup(res.text, "html.parser")
content = soup.find(id="Cnt-Main-Article-QQ")
target = content.find_all("p", style="TEXT-INDENT: 2em")
target = iter(target)
for each in target:
if each.text.isnumeric():
data.append([
re.search(r"\[(.+)\]", next(target).text).group(1),
re.search(r"\d.*", next(target).text).group(),
re.search(r"\d.*", next(target).text).group(),
re.search(r"\d.*", next(target).text).group()])
return data
def to_excel(data):
wb = openpyxl.Workbook()
wb.guess_types = True
ws = wb.active
ws.append(["城市", "平均房价", "平均工资", "房价工资比"])
for each in data:
ws.append(each)
wb.save("主要城市房价工资比排行榜.xlsx")
def main():
url = "https://news.house.qq.com/a/20170702/003985.htm"
res = open_url(url)
with open("text.txt", "w", encoding="utf-8") as file:
file.write(res.text)
data = find_data(res)
to_excel(data)
if __name__ == "__main__":
main()
{:301_999:} 感谢分享,一天一平米 老哥学习多久了,我也想学习爬虫听说很难就一直没有实践 感谢楼主分享,非常好用 这个是从一个特定网站上爬吗? 感谢楼主分享,房价速度飞涨 这个得支持 这个好 杀伤力不大,侮辱性极强 {:1_918:}{:1_918:} 哇塞,谢谢楼主欸 干货呀,支持一下 真的谢谢楼主了 这个厉害了。看看哪个城市宜居点儿:loveliness: 感谢分享,论坛有你更精彩