本帖最后由 苏紫方璇 于 2024-11-17 21:47 编辑
[Python] 纯文本查看 复制代码 """
使用前需修改sku内容
todo: 文件输入路径:sku_path
"""
import csv
import json
import os
import random
import time
from DrissionPage import WebPage
from DrissionPage.common import Settings
Settings.singleton_tab_obj = False
class IdlefishPCWeb:
"""问价pc端web"""
# 类属性
sku_path = r'.\sku.json'
"""
sku.json格式:
{
"产品关键字1": {
"lowest_price": 0,
"highest_price": 9999
},
"产品关键字2": {
"lowest_price": 500,
"highest_price": 2000
},
"产品关键字3": {
"lowest_price": 666,
"highest_price": 8888
}
}
"""
def __init__(self):
"""初始化实例属性"""
pass
# 不依赖于类的状态或实例变量,仅根据传入的参数执行操作时使用静态方法。
@staticmethod
def create_file(path):
"""创建目录或文件"""
result = open(path, 'w', encoding='utf-8')
return result
@staticmethod
def delete_file(filename):
"""删除文件"""
os.remove(filename)
@staticmethod
def read_json_file(path):
"""逐行读取json文件"""
try:
with open(path, 'r', encoding='utf-8') as file:
data = json.load(file)
return data
except FileNotFoundError:
print('文件不存在')
@staticmethod
def writer_csv(file_path, data):
"""写入csv文件"""
with open(file=file_path, mode='a', encoding='utf-8', newline='') as f:
writer = csv.writer(f)
writer.writerow(data)
@staticmethod
def read_csv(file_path):
"""读取csv文件"""
with open(file=file_path, mode='r', encoding='utf-8') as f:
reader = csv.reader(f)
data = []
for row in reader:
data.extend(row) # 使用 extend 方法将每一行的数据添加到 data 列表中
return data
@staticmethod
def launch_page(url):
"""启动浏览器"""
page = WebPage('d')
# page = WebPage('s')
# page.set.window.max()
page.set.load_mode.none()
page.get(url)
if page.ele('登录', timeout=3):
input('请手动完成登录~完成后按下Enter键继续执行')
return page
def list_page_data(self, products, select_product, page):
"""获取列表页数据"""
# 价格区间
lowest_price = str(int(products[select_product['lowest_price')) # 价格区间最低价
highest_price = str(int(products[select_product['highest_price')) # 价格区间最高价
# 匹配价格区间元素
price_range = page.eles('tag:input@@class^search-price-input--')
price_range[0].input(lowest_price) # 输入最低价
price_range[1].input(highest_price) # 输入最高价
# 价格排序条件筛选
page.eles('@class=search-select-title--zzthyzLG')[3].hover() # 价格元素悬停,露出价格排序元素
time.sleep(random.uniform(1, 1.5))
page.eles('@class=search-select-item--H_AJBURX')[9].click() # 价格从低到高排序
# time.sleep(random.uniform(0.5, 1))
# page.eles('@class^search-checkbox--fULWOSyM ')[4].click() # 筛选全新
# 列表页产品
products = page.eles('@class=feeds-item-wrap--rGdH_KoF') # 列表页产品列表(单页最大30)
self.details_page_data(page, products)
# 产品最大列表页数(筛选步骤后)
page_num = 1
max_page_num = page.eles('@class=search-pagination-page-box--AbqmJFFp ')[-.text
# 详细说明页数据获取完毕后翻页(若当前为最后一页则跳过)
while page_num != max_page_num:
page.eles('@class=search-page-tiny-arrow-container--tVZE99sy')[1].click() # 向后翻页
products = page.eles('@class=feeds-item-wrap--rGdH_KoF') # 列表页产品列表(单页最大30)
self.details_page_data(page, products)
page_num += 1
@staticmethod
def details_page_data(page, products):
"""新标签页打开详情页并发起问题"""
for product in products:
product_url = product.attr('href')
# product.click() # 点击元素进入详情页
detail_tab = page.new_tab(product_url)
iwant_url = detail_tab.eles('@class=want--mVAXJTGv')[1].attr('href')
iwant_tab = page.new_tab(iwant_url)
iwant_tab.ele('@class=ant-input css-apn68 ant-input-outlined textarea-no-border--cIId06_i ').input(
'麻烦问一下价格多少') # 问价语句 可自定义
iwant_tab.ele(
'@class=ant-btn css-apn68 ant-btn-default ant-btn-color-default ant-btn-variant-outlined').click() # 确认发送
iwant_tab.close()
detail_tab.close()
def main(self):
products = self.read_json_file(self.sku_path)
keyword = list(products.keys()) # 关键字列表(需遍历)
for select_product in keyword:
url = f'https://www.goofish.com/search?q={select_product}'
print(f'当前搜索:{select_product}')
page = self.launch_page(url)
self.list_page_data(products, select_product, page)
if __name__ == '__main__':
web1 = IdlefishPCWeb()
web1.main()
|