利用python爬取二手车之家
import requestsimport parsel
import csv
#伪装 请求头
headers = {
#用户基本信息
'Cookie': 'fvlid=1652681259525EL1JOKlG4jwu; sessionid=00da8547-d1be-4b57-aaff-70118f7a00cc; sessionip=60.210.137.50; area=370705; sessionvisit=e3633597-6c82-4400-931d-26860d0ca426; sessionvisitInfo=00da8547-d1be-4b57-aaff-70118f7a00cc|cn.bing.com|0; Hm_lvt_d381ec2f88158113b9b76f14c497ed48=1652681260; che_sessionid=454B1328-3097-475C-BCF3-7A5F7D5D5762%7C%7C2022-05-16+14%3A07%3A40.049%7C%7Ccn.bing.com; che_sessionvid=BD3C8084-56F8-4FE7-B6F1-E50FC81FB11D; UsedCarBrowseHistory=0%3A43533318; userarea=0; listuserarea=0; ahpvno=13; ahuuid=C077C62F-23F6-406B-89D7-27E5B5AE4DCE; Hm_lpvt_d381ec2f88158113b9b76f14c497ed48=1652682194; v_no=13; visit_info_ad=454B1328-3097-475C-BCF3-7A5F7D5D5762||BD3C8084-56F8-4FE7-B6F1-E50FC81FB11D||-1||-1||13; che_ref=cn.bing.com%7C0%7C0%7C0%7C2022-05-16+14%3A23%3A14.350%7C2022-05-16+14%3A07%3A40.049; showNum=13; sessionuid=00da8547-d1be-4b57-aaff-70118f7a00cc'
#浏览器基本信息
'User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/101.0.4951.64 Safari/537.36 Edg/101.0.1210.47'
}
url = 'https://www.che168.com/china/list/'#发送请求
respones = requests.get(url=url,headers=headers)
#获取数据,网页源代码
html_data = (respones.text)
#解析数据(网页构造,css样式选择器)
selector = parsel.Selector(html_data)
lis = selector.css('.viewlist_ul li')
for li in lis:
card_name = li.css('.card-name::text').get()
cards_unit = li.css('.cards-unit::text').get()
pirce = li.css('.pirce em::text').get()
if pirce == None:
pirce = li.css('.pirce em::text').get()
print(card_name,cards_unit,pirce)
#保存数据
with open('汽车之家.csv',mode='a',newline='', encoding='utf-8') as d:
csv_writer = csv.writer(d)
csv_writer.writerow()
自己改文件保存路径
冒个泡 发表于 2022-5-16 16:38
请教一下B列里的4个信息能分类分列保存吗?
Option Explicit
Sub 分列()
'工作表根据各项目类别进行分列
Dim rowNum As Integer, colNum As Integer, sh As Worksheet
Dim k As Boolean, i As Integer, j As Integer
Dim inputCol As Integer, inputRow As Integer
k = True
'根据用户选择,对哪一列的内容进行分列。
inputCol = InputBox("要拆分第几列的数据?")
'因为每个表的标题行数不一样,手动让用户输入
inputRow = InputBox("标题行在第几行?")
'rowNum:获取sheets(1)的总行数
'colNum:获取sheets(1)的总列数
rowNum = ThisWorkbook.Worksheets(1).Cells(inputRow, 1).End(xlDown).row
colNum = ThisWorkbook.Worksheets(1).Cells(1, inputRow).End(xlToRight).Column
'根据要分的列,遍历用户输入的行数+1到总行数,确定要新建多少个sheets
For i = inputRow + 1 To rowNum
For Each sh In Sheets
'判断单元格内容,如果表名已存在,就不干啥
If sh.Name = Sheets(1).Cells(i, inputCol) Then
k = False
Exit For
End If
Next
If k = True Then
'判断单元格内容,如果表名不存在,就新建一张表,命名为这个单元格的内容
Sheets.Add(after:=Sheets(Sheets.Count)).Name = Sheets(1).Cells(i, inputCol)
End If
k = True
Next
'新建完sheets后,筛选sheets(1)表格,复制到每张表里
For i = 2 To Sheets.Count
'从sheets(2)开始,先清除表格内容,防止存在其他内容,并设置为文本格式,防止身份证等长数字复制出错
Sheets(i).Cells.ClearContents
Sheets(i).Cells.NumberFormat = "@"
'根据用户输入的函数进行筛选
Sheets(1).Select
Rows(inputRow).Select
Selection.AutoFilter
'根据输入要分列的列数,匹配各sheets名称,对应名称复制过去
Cells(inputRow, 1).AutoFilter Field:=inputCol, Criteria1:=Sheets(i).Name
Sheets(1).Range(Cells(1, 1), Cells(rowNum, colNum)).Copy Sheets(i).Range("A1")
Sheets(1).Cells(inputRow, 1).AutoFilter
Next
'让每个生成完毕的表格的列宽度和sheets(1)的宽度一致
For i = 2 To Worksheets.Count
For j = 1 To colNum
Sheets(i).Columns(j).ColumnWidth = Sheets(1).Columns(j).ColumnWidth
Next j
Next i
'全部完成后,返回到sheets(1)
Sheets(1).Select
End Sub 楼主是用parsel选择器,已经抄了一边,但是想用XPATH实现下,把CSDN翻了一圈,实现了根原来一样得效果# file0 = open("二手车 test.html", "r")
# content = file0.read()
# print(type(content))# <class 'str'>
# print(content)
# file0.close()
import xlwt
from lxml import etree
with open('二手车 test.html', 'r') as xml_file:
xml_file = xml_file.read()
# print(xml_file)
tree = etree.HTML(xml_file)
# tree = etree.parse(xml_file)
r1 = tree.xpath('//*/a/div/h4')# 直接从上往下挨着找节点宝马X3 2020款 xDrive28i M运动套装
# //*[@id="shucar_42586534"]/a/div/p4.15万公里/2019-12/淮南/2年黄金商家
r2 = tree.xpath('//*/a/div/p')
# //*[@id="shucar_42586534"]/a/div/div/span/em
r3 = tree.xpath('//*/a/div/div/span/em')
# 但是这个ID是固定得我想取得所有得,所以试着把ID删了
# print(r1)
# 接下来写到XLS中去
wb = xlwt.Workbook(encoding="utf-8")
ws = wb.add_sheet('测试数据')
for i in range(0, len(r1)):
ws.write(i, 0, r1.text)
ws.write(i, 1, r2.text)
ws.write(i, 2, r1.text)
print(r1.text)
print(r2.text)
print(r3.text)
wb.save('xpth汽车之家.xls')
请教一下B列里的4个信息能分类分列保存吗? 如果实现实时的话 那这个就厉害了 二手车的信息就太方便了。! 学习学习经验,谢谢大佬 一直不知道怎么制作爬虫 韩哲 发表于 2022-5-16 17:00
一直不知道怎么制作爬虫
可以去B站看下,有很多 非常感谢,有用 Microsoft Windows [版本 6.1.7601]
版权所有 (c) 2009 Microsoft Corporation。保留所有权利。
C:\Users\Administrator\AppData\Local\Programs\Python\Python38-32>python qichezhijia.py
Traceback (most recent call last):
File "qichezhijia.py", line 1, in <module>
import requests
ModuleNotFoundError: No module named 'requests'
怎么运行?菜鸟不懂啊