[Python] 纯文本查看 复制代码
import requests,parsel,time
import pandas as pd
today = time.strftime("%Y-%m-%d", time.localtime())
cookies = {
'Hm_lvt_1c4432afacfa2301369a5625795031b8': '1650779015',
'_last_loginuname': '137889107',
'_login_psd': '52c34b23e5fd4a677f43ed0714fcb88a4',
'_rememberStatus': 'true',
'_login_token': '579c51aab4de7885dc6a0a225904cd68',
'_login_uid': '1687431',
'_login_mid': '2524876',
'_login_ip': '116.236.93.204',
'579c51aab4de7885dc6a0a225904cd68': '1^%^3D5',
'_last_ch_r_t': '1650779033232',
'fingerprint': '9240407d4f8b8d3979184963585431f7',
'qimo_seosource_5d36a9e0-919c-11e9-903c-ab24dbab411b': '^%^E5^%^85^%^B6^%^E4^%^BB^%^96^%^E7^%^BD^%^91^%^E7^%^AB^%^99',
'qimo_seokeywords_5d36a9e0-919c-11e9-903c-ab24dbab411b': '^%^E6^%^9C^%^AA^%^E7^%^9F^%^A5',
'qimo_xstKeywords_5d36a9e0-919c-11e9-903c-ab24dbab411b': '',
'href': 'https^%^3A^%^2F^%^2Fjiancai.mysteel.com^%^2Fmarket^%^2Fpa228a15407aa0aaaaa1.html',
'accessId': '5d36a9e0-919c-11e9-903c-ab24dbab411b',
'pageViewNum': '1',
'Hm_lpvt_1c4432afacfa2301369a5625795031b8': '1650779385',
}
headers = {
'Connection': 'keep-alive',
'Cache-Control': 'max-age=0',
'DNT': '1',
'Upgrade-Insecure-Requests': '1',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.198 Safari/537.36',
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9',
'Sec-Fetch-Site': 'same-origin',
'Sec-Fetch-Mode': 'navigate',
'Sec-Fetch-User': '?1',
'Sec-Fetch-Dest': 'document',
'Referer': 'https://jiancai.mysteel.com/market/pa228a15407aa0aaaaa1.html',
'Accept-Language': 'zh-CN,zh;q=0.9',
}
r = requests.get('https://jiancai.mysteel.com/m/22042412/DE9E66348384288D.html', headers=headers, cookies=cookies)
data_list=[]
selector=parsel.Selector(text=r.text)
title = selector.xpath("//tr[contains(@id,'ctr')]")
j=1
for item in title:
text = item.xpath('./td/text()').extract()#.get()
for i in range(0,len(text)):
text[i]=text[i].strip()
# print((text))
text.insert(0,j)
j+=1
data_list.append(text)
def wirte_xls(data_list):
df = pd.DataFrame(data_list) #以数组方式写入
df.columns = ["序号", "品名", "规格", "材质", "钢厂/产地", "价格(元/吨)", "建仓线", "涨跌", "备注"]
df.to_excel('./我的钢铁网南京钢铁价.xlsx',sheet_name=today,index=False)
print("=====================================已全部导出!=====================================")
wirte_xls(data_list)