[Python] 纯文本查看 复制代码 # -*- coding: utf-8 -*-
import re
import requests
from bs4 import BeautifulSoup
from fake_useragent import UserAgent
from sqlalchemy import create_engine
import pandas as pd
if __name__ == '__main__':
n = 1
datas = []
while n < 3:
url = "https://www.amazon.co.jp/gp/bestsellers/computers/ref=zg_bs_pg_"+str(2)+"?ie=UTF8&pg="+str(2)
datas = []
htmls = requests.get(url).text
soup = BeautifulSoup(htmls,'lxml')
product = soup.find_all("li",{"class":"zg-item-immersion"})
for i in product:
aaa = i.find_all("span",{"class":"aok-inline-block zg-item"})
for y in aaa:
datas.append({"name":y.span.div.img.get('alt'),"link":"https://www.amazon.co.jp/"+y.a["href"],"photo":y.span.div.img.get('src')})
print("商品名:"+y.span.div.img.get('alt'))
print("链接: https://www.amazon.co.jp/"+y.a["href"])
print("图片:"+y.span.div.img.get('src'))
print("----------------------------")
n += 1
print(datas)
try:
json_data = pd.DataFrame(datas)
#json_data.to_csv("/config/workspace/jupyter/666.csv",mode='a',encoding="utf_8_sig")
except Exception as e:
print("异常:",e)
这是我之前爬取亚马逊日本的代码你可以参考一下 |