吾爱破解 - 52pojie.cn

 找回密码
 注册[Register]

QQ登录

只需一步,快速开始

查看: 731|回复: 14
收起左侧

[求助] python报错请求 麻烦帮忙看看

[复制链接]
HEDouble 发表于 2024-3-30 12:40
import json
import time
import openpyxl
import requests
from jsonpath import jsonpath
from selenium import webdriver
from selenium.webdriver.common.by import By
"""

"""

# 定义请求头
headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36',
    'Cookie': 'thw=cn; ariaDefaultTheme=undefined; _m_h5_tk=e8b578691cdbf72619e2571ff1290233_1703845222215; _m_h5_tk_enc=3f1ecbe0ee0ae145cfdbaaad4f79d8ca; _samesite_flag_=true; cookie2=11d16aa0a97fdbc9c1ee20904e824d18; t=70a4cbfa9d516484a66819c0facef55a; _tb_token_=b33f7edbf651; xlly_s=1; mt=ci=0_0; cna=l7sWHtN+mEICAToULNELmVHd; sgcookie=E100pyHgp%2FS3k56ATKLIzx32byCl7KsbhBGan%2B7061jzyqIBciECvSb2DtDB4HZBUGmmvUHMnEEmrQg1hLoKbkeySXCgtPZyejZXKd1oRXCevR6oQztW8tIkaBBwmruacXDB; unb=2206883529046; uc3=lg2=URm48syIIVrSKA%3D%3D&nk2=F5RFh6bUkulDP3A%3D&vt3=F8dD3CbxhTqJzEWvbGE%3D&id2=UUphzOrCqmq%2BnowGGA%3D%3D; csg=ac21f60b; lgc=tb078603918; cancelledSubSites=empty; cookie17=UUphzOrCqmq%2BnowGGA%3D%3D; dnk=tb078603918; skt=a4ee62e329cb6a26; existShop=MTcwMzkyNDA3Ng%3D%3D; uc4=nk4=0%40FY4O7o%2BW2sUtoZOvRjnQm%2FLlrrWYbQ%3D%3D&id4=0%40U2grF8GKQLKzjVQmoKP4swQldtKSoZBG; tracknick=tb078603918; _cc_=W5iHLLyFfA%3D%3D; _l_g_=Ug%3D%3D; sg=864; _nk_=tb078603918; cookie1=ACznZRRtgBulyxtCJb4u0on6oLlfL6syfEnnWiB3%2FOM%3D; uc1=cookie16=VFC%2FuZ9az08KUQ56dCrZDlbNdA%3D%3D&cookie15=VT5L2FSpMGV7TQ%3D%3D&cookie21=UIHiLt3xTIkz&pas=0&existShop=false&cookie14=UoYelqB%2FyEjSUA%3D%3D; l=fBg5WSU7PoD1i0GYBO5Courza7796IRb4sPzaNbMiIEGa1Ph9Fsl9NCOtJNkWdtjgTCXZetrip0J_dLHR3fRwxDDB3h2q_zr3xv9QaVb5; tfstk=e7ekng9LM7lSJN9_DzH5hgcsMDCYVUMINypKJv3FgquXeXudNyc3JrGUeYE8o2zT-De8pJVXKlZQegMdVYZSdv7OWOe3VuMIYVBAyOQ84vK5WNBOHKff7A7Twxc11idX91nFz8SosVxN_xLIVCYmSuuDIowm4U0GCqvEY-ooZJrro0mun0co7gR9gCRKnpiVv-RBObojmVpIGhkRqG8nAijD_MGrc0gOmiABObojmVIcmC5SamiSW; isg=BPf3m4LaXkFHC9rJhFBVJC91hutBvMse20xsO0mkE0Yt-Bc6UYxbbrXe2limEKOW',
    'Referer': 'https://s.taobao.com/'
}


# 创建一个excel文件
def create_excel(file_name):
    # 实例化工作簿对象
    workbook = openpyxl.Workbook()
    # 激活当前工作表
    worksheet = workbook.active
    # 定义表头列表
    title = ['标题', '价格', '店铺', '地区', '销量']
    # 将表头数据写入第一行
    worksheet.append(title)
    # 保存创建好的excel文件
    workbook.save(f'{file_name}.xlsx')


# 将抓取到的数据写入到excel文件中
def write_excel(name, shop_info):
    # 打开 Excel 文件
    workbook = openpyxl.load_workbook(f'{name}.xlsx')
    # 获取默认的工作表对象
    worksheet = workbook.active
    # 将商品数据追加到excel表格的下一行
    worksheet.append(shop_info)
    # 保存 Excel 文件
    workbook.save(f'{name}.xlsx')


# 搜索商品
def search_data(word):
    # 打开淘宝扫码登录
    driver.find_element(By.XPATH, '//a[@class="sn-login"]').click()
    driver.find_element(By.XPATH, '//i[@class="iconfont icon-qrcode"]').click()

    # 等待十秒,扫码登录
    time.sleep(10)

    # 在搜索框中输入查找的关键词并点击搜索
    driver.find_element(By.XPATH, '//input').send_keys(word)
    driver.find_element(By.XPATH, '//div[@class="rax-view-v2 SearchInput--searchButton--1Sz2UIn"]').click()
    driver.implicitly_wait(3)  # 跳转页面后暂停3秒

    url = 'https://h5api.m.taobao.com/h5/mtop.relationrecommend.wirelessrecommend.recommend/2.0/?jsv=2.6.2&appKey=12574478&t=1703923985466&sign=1327afc3c96e32d2c4d37d258d5ccf57&api=mtop.relationrecommend.WirelessRecommend.recommend&v=2.0&type=jsonp&dataType=jsonp&callback=mtopjsonp1&data=%7B%22appId%22%3A%2234385%22%2C%22params%22%3A%22%7B%5C%22device%5C%22%3A%5C%22HMA-AL00%5C%22%2C%5C%22isBeta%5C%22%3A%5C%22false%5C%22%2C%5C%22grayHair%5C%22%3A%5C%22false%5C%22%2C%5C%22from%5C%22%3A%5C%22nt_history%5C%22%2C%5C%22brand%5C%22%3A%5C%22HUAWEI%5C%22%2C%5C%22info%5C%22%3A%5C%22wifi%5C%22%2C%5C%22index%5C%22%3A%5C%224%5C%22%2C%5C%22rainbow%5C%22%3A%5C%22%5C%22%2C%5C%22schemaType%5C%22%3A%5C%22auction%5C%22%2C%5C%22elderHome%5C%22%3A%5C%22false%5C%22%2C%5C%22isEnterSrpSearch%5C%22%3A%5C%22true%5C%22%2C%5C%22newSearch%5C%22%3A%5C%22false%5C%22%2C%5C%22network%5C%22%3A%5C%22wifi%5C%22%2C%5C%22subtype%5C%22%3A%5C%22%5C%22%2C%5C%22hasPreposeFilter%5C%22%3A%5C%22false%5C%22%2C%5C%22prepositionVersion%5C%22%3A%5C%22v2%5C%22%2C%5C%22client_os%5C%22%3A%5C%22Android%5C%22%2C%5C%22gpsEnabled%5C%22%3A%5C%22false%5C%22%2C%5C%22searchDoorFrom%5C%22%3A%5C%22srp%5C%22%2C%5C%22debug_rerankNewOpenCard%5C%22%3A%5C%22false%5C%22%2C%5C%22homePageVersion%5C%22%3A%5C%22v7%5C%22%2C%5C%22searchElderHomeOpen%5C%22%3A%5C%22false%5C%22%2C%5C%22search_action%5C%22%3A%5C%22initiative%5C%22%2C%5C%22sugg%5C%22%3A%5C%22_4_1%5C%22%2C%5C%22sversion%5C%22%3A%5C%2213.6%5C%22%2C%5C%22style%5C%22%3A%5C%22list%5C%22%2C%5C%22ttid%5C%22%3A%5C%22600000%40taobao_pc_10.7.0%5C%22%2C%5C%22needTabs%5C%22%3A%5C%22true%5C%22%2C%5C%22areaCode%5C%22%3A%5C%22CN%5C%22%2C%5C%22vm%5C%22%3A%5C%22nw%5C%22%2C%5C%22countryNum%5C%22%3A%5C%22156%5C%22%2C%5C%22m%5C%22%3A%5C%22pc%5C%22%2C%5C%22page%5C%22%3A1%2C%5C%22n%5C%22%3A48%2C%5C%22q%5C%22%3A%5C%22%25E5%258D%258E%25E4%25B8%25BA%25E6%2589%258B%25E6%259C%25BA%5C%22%2C%5C%22tab%5C%22%3A%5C%22mall%5C%22%2C%5C%22pageSize%5C%22%3A48%2C%5C%22totalPage%5C%22%3A100%2C%5C%22totalResults%5C%22%3A4800%2C%5C%22sourceS%5C%22%3A%5C%220%5C%22%2C%5C%22sort%5C%22%3A%5C%22_coefp%5C%22%2C%5C%22bcoffset%5C%22%3A%5C%22%5C%22%2C%5C%22ntoffset%5C%22%3A%5C%22%5C%22%2C%5C%22filterTag%5C%22%3A%5C%22%5C%22%2C%5C%22service%5C%22%3A%5C%22%5C%22%2C%5C%22prop%5C%22%3A%5C%22%5C%22%2C%5C%22loc%5C%22%3A%5C%22%5C%22%2C%5C%22start_price%5C%22%3Anull%2C%5C%22end_price%5C%22%3Anull%2C%5C%22startPrice%5C%22%3Anull%2C%5C%22endPrice%5C%22%3Anull%2C%5C%22itemIds%5C%22%3Anull%2C%5C%22p4pIds%5C%22%3Anull%7D%22%7D'
    response = requests.get(url, headers=headers)
    # print(response.text)

    # 使用正则表达式匹配出括号内的内容
    data = response.text.replace(' mtopjsonp1(', '')[:-1]
    # print(type(data), data)
    json_data = json.loads(data)

    goods_data_list = jsonpath(json_data, '$.data.itemsArray')[0]
    # print(goods_list_info)

    for goods_data in goods_data_list:
        goods_title = jsonpath(goods_data, '$.title')[0]
        goods_title = goods_title.replace('/<span class=H>', '').replace('</span>', '').replace('<span class=H>', '')
        print(goods_title)
        goods_price = jsonpath(goods_data, '$.priceWap')[0]
        print(goods_price)
        goods_store = jsonpath(goods_data, '$.nick')[0]
        print(goods_store)
        store_addr = jsonpath(goods_data, '$.procity')[0]
        print(store_addr)
        buy_count = jsonpath(goods_data, '$.realSales')[0]
        if buy_count == '':
            print('0人付款')
        else:
            print(buy_count)

        goods_info = [goods_title, goods_price, goods_store, store_addr, buy_count]

        write_excel(word, goods_info)


if __name__ == '__main__':
    # 输入要爬取的名称然后会在淘宝搜索栏进行搜索
    search_word = input('请输入商品名称:')

    # 打开网页
    driver = webdriver.Chrome()
    driver.get('https://www.taobao.com/')
    driver.maximize_window()

    # 调用create_excel函数创建excel文件
    create_excel(search_word)

    # 调用自定义函数,开始爬取
    search_data(search_word)

"""
以下是报错
"""
D:\Users\XXXXX\AppData\Local\Programs\Python\Python311\python.exe H:\Users\天猫数据抓取\天猫数据抓取\数据抓取.py
请输入商品名称:铜螺母
Traceback (most recent call last):
  File "H:\Users\天猫数据抓取\天猫数据抓取\数据抓取.py", line 106, in <module>
    search_data(search_word)
  File "H:\Users\天猫数据抓取\天猫数据抓取\数据抓取.py", line 49, in search_data
    driver.find_element(By.XPATH, '//a[@class="sn-login"]').click()
    ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "D:\Users\XXXXX\AppData\Local\Programs\Python\Python311\Lib\site-packages\selenium\webdriver\remote\webdriver.py", line 741, in find_element
    return self.execute(Command.FIND_ELEMENT, {"using": by, "value": value})["value"]
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "D:\Users\XXXXX\AppData\Local\Programs\Python\Python311\Lib\site-packages\selenium\webdriver\remote\webdriver.py", line 347, in execute
    self.error_handler.check_response(response)
  File "D:\Users\XXXXX\AppData\Local\Programs\Python\Python311\Lib\site-packages\selenium\webdriver\remote\errorhandler.py", line 229, in check_response
    raise exception_class(message, screen, stacktrace)
selenium.common.exceptions.NoSuchElementException: Message: no such element: Unable to locate element: {"method":"xpath","selector":"//a[@class="sn-login"]"}
  (Session info: chrome=123.0.6312.59); For documentation on this error, please visit: https://www.selenium.dev/documentation/webdriver/troubleshooting/errors#no-such-element-exception
Stacktrace:
        GetHandleVerifier [0x00007FF7693670C2+63090]
        (No symbol) [0x00007FF7692D2D12]
        (No symbol) [0x00007FF76916EC65]
        (No symbol) [0x00007FF7691B499D]
        (No symbol) [0x00007FF7691B4ADC]
        (No symbol) [0x00007FF7691F5B37]
        (No symbol) [0x00007FF7691D701F]
        (No symbol) [0x00007FF7691F3412]
        (No symbol) [0x00007FF7691D6D83]
        (No symbol) [0x00007FF7691A83A8]
        (No symbol) [0x00007FF7691A9441]
        GetHandleVerifier [0x00007FF76976262D+4238301]
        GetHandleVerifier [0x00007FF76979F78D+4488509]
        GetHandleVerifier [0x00007FF769797A6F+4456479]
        GetHandleVerifier [0x00007FF769440606+953270]
        (No symbol) [0x00007FF7692DE5DF]
        (No symbol) [0x00007FF7692D92B4]
        (No symbol) [0x00007FF7692D93EB]
        (No symbol) [0x00007FF7692C9C24]
        BaseThreadInitThunk [0x00007FF9AD65257D+29]
        RtlUserThreadStart [0x00007FF9AEEAAA58+40]


进程已结束,退出代码为 1

发帖前要善用论坛搜索功能,那里可能会有你要找的答案或者已经有人发布过相同内容了,请勿重复发帖。

 楼主| HEDouble 发表于 2024-3-30 12:43
新手,在网上找了一个爬取淘宝数据的 然后就报错了...
dwn6905 发表于 2024-3-30 13:48
微信截图_20240330134603.png
xpath表达式有问题吧,可以用浏览器开发者工具copy full xpath
只有午安 发表于 2024-3-30 13:50
本帖最后由 只有午安 于 2024-3-30 13:52 编辑

selenium.common.exceptions.NoSuchElementException: Message: no such element: Unable to locate element: {"method":"xpath","selector":"//a[@class="sn-login"]"}

[Python] 纯文本查看 复制代码
"//a[@class="sn-login"]"

没有找到这个元素,先把这个改一下

改成
[Python] 纯文本查看 复制代码
//*[@id="J_SiteNavLogin"]/div[1]/div[1]/a[1]
   试试
 楼主| HEDouble 发表于 2024-3-30 15:27
只有午安 发表于 2024-3-30 13:50
selenium.common.exceptions.NoSuchElementException: Message: no such element: Unable to locate elemen ...

您好 感谢您的讲解 更换代码后还是相同的报错 可以跳转页面到淘宝扫码登录 但后面就直接关闭浏览器报错了 报错结果也是和之前一样
 楼主| HEDouble 发表于 2024-3-30 15:28
dwn6905 发表于 2024-3-30 13:48
xpath表达式有问题吧,可以用浏览器开发者工具copy full xpath

您好 抱歉我是小白 不太理解您说的 我该如何进行替换或者操作 谢谢
dwn6905 发表于 2024-3-30 15:44
按F12打开开发者工具,或通过在页面上右键,最下面有个检查,点击也能打开。
然后找到左上角一个鼠标样式的按钮。这个可以通过点击真实页面的元素,快速定位到它在网页源码中的位置。

选中元素后,开发者工具会高亮显示选中的元素,你可以再看看你需要的具体到哪个标签,然后按下图操作

另外还有一点就是,双引号里嵌套使用双引号的话需要转义字符,你可以单引号嵌套双引号或者双引号嵌套单引号,在你这里作用是一样的。
2.png
1.png
 楼主| HEDouble 发表于 2024-3-30 15:56
dwn6905 发表于 2024-3-30 15:44
按F12打开开发者工具,或通过在页面上右键,最下面有个检查,点击也能打开。
然后找到左上角一个鼠标样式 ...

您好 我是想爬取淘宝某个商品链接里面的 名字 SKU规格(商品分类)和对应的价格 这种有没有简单方法来实现 或者说您知道有这种教学帖子吗
dwn6905 发表于 2024-3-30 16:25
HEDouble 发表于 2024-3-30 15:56
您好 我是想爬取淘宝某个商品链接里面的 名字 SKU规格(商品分类)和对应的价格 这种有没有简单方法来实 ...


我看你爬虫逻辑写得没啥问题。我这边网有点卡,会响应超时程序光报错。我给你的代码加了几行代码,主要是获取Chrome对象时加了几个参数,让浏览器在程序结束不会自动关闭啥的,方便你调试。


import json
import time

import openpyxl
import requests
from jsonpath import jsonpath
from selenium.webdriver import Chrome # 导入Chrome包
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.options import Options  # 导入参数设置的包
"""

"""

# 定义请求头
headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36',
    'Cookie': 'thw=cn; ariaDefaultTheme=undefined; _m_h5_tk=e8b578691cdbf72619e2571ff1290233_1703845222215; _m_h5_tk_enc=3f1ecbe0ee0ae145cfdbaaad4f79d8ca; _samesite_flag_=true; cookie2=11d16aa0a97fdbc9c1ee20904e824d18; t=70a4cbfa9d516484a66819c0facef55a; _tb_token_=b33f7edbf651; xlly_s=1; mt=ci=0_0; cna=l7sWHtN+mEICAToULNELmVHd; sgcookie=E100pyHgp%2FS3k56ATKLIzx32byCl7KsbhBGan%2B7061jzyqIBciECvSb2DtDB4HZBUGmmvUHMnEEmrQg1hLoKbkeySXCgtPZyejZXKd1oRXCevR6oQztW8tIkaBBwmruacXDB; unb=2206883529046; uc3=lg2=URm48syIIVrSKA%3D%3D&nk2=F5RFh6bUkulDP3A%3D&vt3=F8dD3CbxhTqJzEWvbGE%3D&id2=UUphzOrCqmq%2BnowGGA%3D%3D; csg=ac21f60b; lgc=tb078603918; cancelledSubSites=empty; cookie17=UUphzOrCqmq%2BnowGGA%3D%3D; dnk=tb078603918; skt=a4ee62e329cb6a26; existShop=MTcwMzkyNDA3Ng%3D%3D; uc4=nk4=0%40FY4O7o%2BW2sUtoZOvRjnQm%2FLlrrWYbQ%3D%3D&id4=0%40U2grF8GKQLKzjVQmoKP4swQldtKSoZBG; tracknick=tb078603918; _cc_=W5iHLLyFfA%3D%3D; _l_g_=Ug%3D%3D; sg=864; _nk_=tb078603918; cookie1=ACznZRRtgBulyxtCJb4u0on6oLlfL6syfEnnWiB3%2FOM%3D; uc1=cookie16=VFC%2FuZ9az08KUQ56dCrZDlbNdA%3D%3D&cookie15=VT5L2FSpMGV7TQ%3D%3D&cookie21=UIHiLt3xTIkz&pas=0&existShop=false&cookie14=UoYelqB%2FyEjSUA%3D%3D; l=fBg5WSU7PoD1i0GYBO5Courza7796IRb4sPzaNbMiIEGa1Ph9Fsl9NCOtJNkWdtjgTCXZetrip0J_dLHR3fRwxDDB3h2q_zr3xv9QaVb5; tfstk=e7ekng9LM7lSJN9_DzH5hgcsMDCYVUMINypKJv3FgquXeXudNyc3JrGUeYE8o2zT-De8pJVXKlZQegMdVYZSdv7OWOe3VuMIYVBAyOQ84vK5WNBOHKff7A7Twxc11idX91nFz8SosVxN_xLIVCYmSuuDIowm4U0GCqvEY-ooZJrro0mun0co7gR9gCRKnpiVv-RBObojmVpIGhkRqG8nAijD_MGrc0gOmiABObojmVIcmC5SamiSW; isg=BPf3m4LaXkFHC9rJhFBVJC91hutBvMse20xsO0mkE0Yt-Bc6UYxbbrXe2limEKOW',
    'Referer': 'https://s.taobao.com/'
}

# 创建一个excel文件
def create_excel(file_name):
    # 实例化工作簿对象
    workbook = openpyxl.Workbook()
    # 激活当前工作表
    worksheet = workbook.active
    # 定义表头列表
    title = ['标题', '价格', '店铺', '地区', '销量']
    # 将表头数据写入第一行
    worksheet.append(title)
    # 保存创建好的excel文件
    workbook.save(f'{file_name}.xlsx')

# 将抓取到的数据写入到excel文件中
def write_excel(name, shop_info):
    # 打开 Excel 文件
    workbook = openpyxl.load_workbook(f'{name}.xlsx')
    # 获取默认的工作表对象
    worksheet = workbook.active
    # 将商品数据追加到excel表格的下一行
    worksheet.append(shop_info)
    # 保存 Excel 文件
    workbook.save(f'{name}.xlsx')

# 搜索商品
def search_data(word):
    # 打开淘宝扫码登录
    driver.find_element(By.XPATH, '/html/body/div[1]/div[1]/div/ul[1]/li[2]/div[1]/div[1]/a[1]').click()
    driver.find_element(By.XPATH, '/html/body/div/div[2]/div[3]/div/div/div/div[1]/i').click()

    # 等待十秒,扫码登录
    time.sleep(10)

    # 在搜索框中输入查找的关键词并点击搜索
    driver.find_element(By.XPATH, '//input').send_keys(word)
    driver.find_element(By.XPATH, '//div[@class="rax-view-v2 SearchInput--searchButton--1Sz2UIn"]').click()
    driver.implicitly_wait(3)  # 跳转页面后暂停3秒

    url = 'https://h5api.m.taobao.com/h5/mtop.relationrecommend.wirelessrecommend.recommend/2.0/?jsv=2.6.2&appKey=12574478&t=1703923985466&sign=1327afc3c96e32d2c4d37d258d5ccf57&api=mtop.relationrecommend.WirelessRecommend.recommend&v=2.0&type=jsonp&dataType=jsonp&callback=mtopjsonp1&data=%7B%22appId%22%3A%2234385%22%2C%22params%22%3A%22%7B%5C%22device%5C%22%3A%5C%22HMA-AL00%5C%22%2C%5C%22isBeta%5C%22%3A%5C%22false%5C%22%2C%5C%22grayHair%5C%22%3A%5C%22false%5C%22%2C%5C%22from%5C%22%3A%5C%22nt_history%5C%22%2C%5C%22brand%5C%22%3A%5C%22HUAWEI%5C%22%2C%5C%22info%5C%22%3A%5C%22wifi%5C%22%2C%5C%22index%5C%22%3A%5C%224%5C%22%2C%5C%22rainbow%5C%22%3A%5C%22%5C%22%2C%5C%22schemaType%5C%22%3A%5C%22auction%5C%22%2C%5C%22elderHome%5C%22%3A%5C%22false%5C%22%2C%5C%22isEnterSrpSearch%5C%22%3A%5C%22true%5C%22%2C%5C%22newSearch%5C%22%3A%5C%22false%5C%22%2C%5C%22network%5C%22%3A%5C%22wifi%5C%22%2C%5C%22subtype%5C%22%3A%5C%22%5C%22%2C%5C%22hasPreposeFilter%5C%22%3A%5C%22false%5C%22%2C%5C%22prepositionVersion%5C%22%3A%5C%22v2%5C%22%2C%5C%22client_os%5C%22%3A%5C%22Android%5C%22%2C%5C%22gpsEnabled%5C%22%3A%5C%22false%5C%22%2C%5C%22searchDoorFrom%5C%22%3A%5C%22srp%5C%22%2C%5C%22debug_rerankNewOpenCard%5C%22%3A%5C%22false%5C%22%2C%5C%22homePageVersion%5C%22%3A%5C%22v7%5C%22%2C%5C%22searchElderHomeOpen%5C%22%3A%5C%22false%5C%22%2C%5C%22search_action%5C%22%3A%5C%22initiative%5C%22%2C%5C%22sugg%5C%22%3A%5C%22_4_1%5C%22%2C%5C%22sversion%5C%22%3A%5C%2213.6%5C%22%2C%5C%22style%5C%22%3A%5C%22list%5C%22%2C%5C%22ttid%5C%22%3A%5C%22600000%40taobao_pc_10.7.0%5C%22%2C%5C%22needTabs%5C%22%3A%5C%22true%5C%22%2C%5C%22areaCode%5C%22%3A%5C%22CN%5C%22%2C%5C%22vm%5C%22%3A%5C%22nw%5C%22%2C%5C%22countryNum%5C%22%3A%5C%22156%5C%22%2C%5C%22m%5C%22%3A%5C%22pc%5C%22%2C%5C%22page%5C%22%3A1%2C%5C%22n%5C%22%3A48%2C%5C%22q%5C%22%3A%5C%22%25E5%258D%258E%25E4%25B8%25BA%25E6%2589%258B%25E6%259C%25BA%5C%22%2C%5C%22tab%5C%22%3A%5C%22mall%5C%22%2C%5C%22pageSize%5C%22%3A48%2C%5C%22totalPage%5C%22%3A100%2C%5C%22totalResults%5C%22%3A4800%2C%5C%22sourceS%5C%22%3A%5C%220%5C%22%2C%5C%22sort%5C%22%3A%5C%22_coefp%5C%22%2C%5C%22bcoffset%5C%22%3A%5C%22%5C%22%2C%5C%22ntoffset%5C%22%3A%5C%22%5C%22%2C%5C%22filterTag%5C%22%3A%5C%22%5C%22%2C%5C%22service%5C%22%3A%5C%22%5C%22%2C%5C%22prop%5C%22%3A%5C%22%5C%22%2C%5C%22loc%5C%22%3A%5C%22%5C%22%2C%5C%22start_price%5C%22%3Anull%2C%5C%22end_price%5C%22%3Anull%2C%5C%22startPrice%5C%22%3Anull%2C%5C%22endPrice%5C%22%3Anull%2C%5C%22itemIds%5C%22%3Anull%2C%5C%22p4pIds%5C%22%3Anull%7D%22%7D'
    response = requests.get(url, headers=headers)
    # print(response.text)

    # 使用正则表达式匹配出括号内的内容
    data = response.text.replace(' mtopjsonp1(', '')[:-1]
    # print(type(data), data)
    json_data = json.loads(data)

    goods_data_list = jsonpath(json_data, '$.data.itemsArray')[0]
    # print(goods_list_info)

    for goods_data in goods_data_list:
        goods_title = jsonpath(goods_data, '$.title')[0]
        goods_title = goods_title.replace('/<span class=H>', '').replace('</span>', '').replace('<span class=H>', '')
        print(goods_title)
        goods_price = jsonpath(goods_data, '$.priceWap')[0]
        print(goods_price)
        goods_store = jsonpath(goods_data, '$.nick')[0]
        print(goods_store)
        store_addr = jsonpath(goods_data, '$.procity')[0]
        print(store_addr)
        buy_count = jsonpath(goods_data, '$.realSales')[0]
        if buy_count == '':
            print('0人付款')
        else:
            print(buy_count)

        goods_info = [goods_title, goods_price, goods_store, store_addr, buy_count]

        write_excel(word, goods_info)

if __name__ == '__main__':
    option = Options()
    option.add_experimental_option('detach', True)  # selenium默认代码执行完浏览器关闭,该行代码使代码执行完浏览器也不会自动关闭
    option.add_experimental_option('excludeSwitches', ['enable-automation'])  # 去掉“Chrome正受到自动测试软件的控制”字样
    option.add_argument('--disable-blink-features=AutomationControlled')
    # 输入要爬取的名称然后会在淘宝搜索栏进行搜索
    search_word = input('请输入商品名称:')

    # 打开网页
    driver = Chrome(options=option)
    driver.get('https://www.taobao.com/')
    driver.maximize_window()

    # 调用create_excel函数创建excel文件
    create_excel(search_word)

    # 调用自定义函数,开始爬取
    search_data(search_word)

然后,我上面回复你的copy full XPath可以看看,我改完登录就没问题了,就是登录完,有点卡,网页没加载完,程序等不到就报错了。

我觉得你应该可以写出来的,只爬这些数据的话,很简单的。

只有午安 发表于 2024-3-30 16:35
HEDouble 发表于 2024-3-30 15:27
您好 感谢您的讲解 更换代码后还是相同的报错 可以跳转页面到淘宝扫码登录 但后面就直接关闭浏览器报错了 ...
以跳转页面到淘宝扫码登录 但后面就直接关闭浏览器报错

是可以扫码,然后才关闭报错,还是还没有出现扫码页面
您需要登录后才可以回帖 登录 | 注册[Register]

本版积分规则

返回列表

RSS订阅|小黑屋|处罚记录|联系我们|吾爱破解 - LCG - LSG ( 京ICP备16042023号 | 京公网安备 11010502030087号 )

GMT+8, 2024-11-24 14:20

Powered by Discuz!

Copyright © 2001-2020, Tencent Cloud.

快速回复 返回顶部 返回列表