本帖最后由 illuminate123 于 2022-9-21 18:51 编辑
一共约2500条数据,爬取结果如下:
一共大约2500条数据,爬取结果如下:
import requests
import csv
import parsel
#注意headers里面的大小写
with open('福布斯富豪榜.csv', 'w', encoding='utf_8_sig', newline='') as f:
csv_writer = csv.writer(f)
csv_writer.writerow(['rank', 'english_name',"chinese_name","wealth_value","wealth_source","country","age"])
headers={
'Cookie':'acw_tc=0bc1598f16626875705425258e66473deef35a301a9a96feee4c846eb71426; Hm_lvt_aa8b760f41278f94669da4685a1ce4fa=1662687577; XSRF-TOKEN=eyJpdiI6InRKaUhwTzkrXC81VXJlVVFQT3UzNkZ3PT0iLCJ2YWx1ZSI6IlMydkkxUkt1M2tiQ1FIZ2lTVTRZeUwyS09TNjJqNUJxejVxbjZ4SmcrUnlSZTFWaTFETGlFRERSSXFrbTIwVjIiLCJtYWMiOiIzYWY1ZjczNWVjN2Q4NjE4NjdlYjIyYzk5MzJlYTM0MDQ2YWZhMzM0OGEyMzQ4NjdkMjM1YmExNzg5MTcyZGU5In0=; laravel_session=eyJpdiI6Im1seHhRZGtcL0gxZWdcL1RaVzJaRmJaQT09IiwidmFsdWUiOiJBN2tDM1JMODkxOERvMGZjd1RWUE5kam5Cd2puanBtQWNVWVRTNGZkVzlLWHZMcUdHeHNYMlppQUVpNWlQejdhIiwibWFjIjoiMDkxNDE3MzY4MzI4M2Q0ZWIyZDZjMzI2ZDRhZTJkMDQyMTE1NThkODQwOTczZmRlZGUzNmJmOTBlYWU3MjNmYyJ9; Hm_lpvt_aa8b760f41278f94669da4685a1ce4fa=1662687601',
'Referer':'https://cn.bing.com/',
'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/105.0.0.0 Safari/537.36 Edg/105.0.1343.27'
}
requests.packages.urllib3.disable_warnings()
url='https://www.forbeschina.com/lists/1781'
response=requests.get(url=url,headers=headers,verify=False)
# pprint(response.text)
selector=parsel.Selector(response.text)
rank=selector.xpath('//*[@id="data-view"]/tbody/tr/td[1]/text()').getall()
english_name=selector.xpath('//*[@id="data-view"]/tbody/tr/td[2]/text()').getall()
chinese_name=selector.xpath('//*[@id="data-view"]/tbody/tr/td[3]/text()').getall()
wealth_value=selector.xpath('//*[@id="data-view"]/tbody/tr/td[4]/text()').getall()
wealth_source=selector.xpath('//*[@id="data-view"]/tbody/tr/td[5]/text()').getall()
country=selector.xpath('//*[@id="data-view"]/tbody/tr/td[6]/text()').getall()
age=selector.xpath('//*[@id="data-view"]/tbody/tr/td[7]/text()').getall()
for i in range(len(country)):
with open('福布斯富豪榜.csv', 'a', encoding='utf_8_sig', newline='') as f:
csv_writer = csv.writer(f)
csv_writer.writerow([rank[i], english_name[i], chinese_name[i], wealth_value[i], wealth_source[i], country[i], age[i]])
|