用AI生成的获取豆瓣电影top250
用xpath 和 BeautifulSoup AI确实牛掰,我服了,
这是AI生成的代码,我只修改了一点点,
xpath解析后保存到csv文件!
[Python] 纯文本查看 复制代码 import requests
from bs4 import BeautifulSoup
from lxml import etree
import csv
# 请安装:requests,bs4,lxml
def doubantop250beauti():
url = 'https://movie.douban.com/top250'
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3'
}
movies = []
for i in range(0, 250, 25):
params = {'start': str(i), 'filter': ''}
response = requests.get(url, headers=headers, params=params)
soup = BeautifulSoup(response.text, 'html.parser')
movie_list = soup.find_all('div', class_='hd')
for movie in movie_list:
title = movie.a.span.text.strip()
link = movie.a['href']
movies.append({'title': title, 'link': link})
for movie in movies:
print(movie['title'], movie['link'])
def doubantop250xpath():
# 设置UA
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3'}
# 获取Top250页面
url = 'https://movie.douban.com/top250'
for i in range(0, 250, 25):
params = {'start': str(i), 'filter': ''}
response = requests.get(url, headers=headers, params=params)
html = response.text
# 解析页面
selector = etree.HTML(html)
movies = selector.xpath('//div[@class="info"]')
# 遍历电影信息
for movie in movies:
# 电影名称
title = movie.xpath('div[@class="hd"]/a/span[@class="title"]/text()')[0]
# 电影评分
score = movie.xpath('div[@class="bd"]/div[@class="star"]/span[@class="rating_num"]/text()')[0]
# 电影链接
link = movie.xpath('div[@class="hd"]/a/@href')[0]
# 打印电影信息
print('电影名称:', title)
print('电影评分:', score)
print('电影链接:', link)
print('-----------------------')
# 写入CSV文件
with open('top250.csv', 'a', encoding='"utf-8_sig"', newline='') as f:
writer = csv.writer(f)
for movie in movies:
title = movie.xpath('div[@class="hd"]/a/span[@class="title"]/text()')[0]
score = movie.xpath('div[@class="bd"]/div[@class="star"]/span[@class="rating_num"]/text()')[0]
link = movie.xpath('div[@class="hd"]/a/@href')[0]
writer.writerow([title, score, link])
if __name__ == '__main__':
doubantop250xpath()
#doubantop250beauti()
|