本帖最后由 cdsgg 于 2020-12-2 17:52 编辑
[Python] 纯文本查看 复制代码
import requests
from bs4 import BeautifulSoup
import xlsxwriter
page=input("请输入爬取电影的数量:")
workbook = xlsxwriter.Workbook('猫眼电影.xlsx') # 创建一个excel文件
worksheet = workbook.add_worksheet(u'sheet1') # 在文件中创建一个名为TEST的sheet,不加名字默认为sheet1
worksheet.write('A1', '电影名字')
worksheet.write('B1', '外文名字')
worksheet.write('C1', '类型')
worksheet.write('D1', '上映日期')
worksheet.write('E1', '评分')
# worksheet.write('A2', 'WORLD', bold)
# worksheet.write('B2', U'中文测试', bold)
url=f'https://m.maoyan.com/ajax/moreClassicList?sortId=1&showType=3&limit={page}'
headers={'User-Agent':'Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.150 Aef/5.53.1.1 Qianniu/7.26.03N Safari/537.36'}
req=requests.get(url=url,headers=headers).content.decode()
soup=BeautifulSoup(req,'lxml')
res=soup.select('div[class="movie-info"]>div[class="title line-ellipsis"]')#中文名字
res2=soup.select('div[class="movie-info"]>div[class="english-title line-ellipsis"]')#英文名字
res3=soup.select('div[class="movie-info"]>div[class="actors line-ellipsis"]')#类型
res4=soup.select('div[class="movie-info"]>div[class="show-info line-ellipsis"]')#上印日期
res5=soup.select('div[class="score line-ellipsis"]>span[class="grade"]')
#body > a:nth-child(5) > div > div.movie-score > div > span.grade
b=2
for i,j,x,k,t in zip(res,res2,res3,res4,res5):
worksheet.write(f'A{b}', f'{i.text}')
worksheet.write(f'B{b}', f'{j.text}')
worksheet.write(f'C{b}', f'{x.text}')
worksheet.write(f'D{b}', f'{k.text}')
worksheet.write(f'E{b}', f'{t.text}')
print(f'电影名字:{i.text}\n外文名字:{j.text}\n类型:{x.text}\n上映日期:{k.text}\n评分:{t.text}分')
print('-------------我是分界线------------------------------------------------------------------')
b=b+1
workbook.close()
# print(req)
# jsdata= req.json()
# title=jsonpath(jsdata,'$..nm')
# for i in title:
# print(i50
|