假期了~无聊中随手写一个避免手生,不喜勿喷,大家共同进步
[Python] 纯文本查看 复制代码 import requests
import re
from bs4 import BeautifulSoup
import json
def insert_json(lists):
with open("car.json", "a", encoding='utf-8') as f:
for info in lists:
json.dump(info, f, ensure_ascii=False, indent=4)
return True
def get_data(num):
base_url = 'https://www.akd.cn/carlist/pg' + str(num) + '/'
page = requests.get(base_url, timeout=10)
soup = BeautifulSoup(page.content, 'html.parser')
a_list = soup.select('.CarList_main > a')
lists = []
if a_list:
for info in a_list:
info = str(info)
car_name = re.findall(r'<p class="CarName">(.*?)</p>', info)
car_date = re.findall(r'<p class="CarDate">(.*?)</p>', info)
car_price = re.findall(r'<span>车价:</span>(.*?)万', info)
car_price_zb = re.findall(r'<span>\+整备服务费(.*?)元</span>', info)
old_car_price = re.findall(r'<p class="OldCarprice">原新车价:(.*?)万 为您节省(.*?)万</p>', info)
lists.append([car_name[0] if len(car_name) > 0 else '',
car_date[0] if len(car_date) > 0 else '',
car_price[0] if len(car_price) > 0 else '',
car_price_zb[0] if len(car_price_zb) > 0 else '',
old_car_price[0] if len(old_car_price) > 0 else ''
])
# print(car_name)
# print(car_date)
# print(car_price)
# print(car_price_zb)
# print(old_car_price)
# print('----------------------------')
print(lists)
s = insert_json(lists)
return s
def main():
for i in range(1, 9):
ss = get_data(i)
if ss:
print('成功完成' + str(i) + '页')
if __name__ == '__main__':
main()
侵权删哈,纯属娱乐 |