HEDouble 发表于 2024-3-30 12:40

python报错请求 麻烦帮忙看看

import json
import time
import openpyxl
import requests
from jsonpath import jsonpath
from selenium import webdriver
from selenium.webdriver.common.by import By
"""

"""

# 定义请求头
headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36',
    'Cookie': 'thw=cn; ariaDefaultTheme=undefined; _m_h5_tk=e8b578691cdbf72619e2571ff1290233_1703845222215; _m_h5_tk_enc=3f1ecbe0ee0ae145cfdbaaad4f79d8ca; _samesite_flag_=true; cookie2=11d16aa0a97fdbc9c1ee20904e824d18; t=70a4cbfa9d516484a66819c0facef55a; _tb_token_=b33f7edbf651; xlly_s=1; mt=ci=0_0; cna=l7sWHtN+mEICAToULNELmVHd; sgcookie=E100pyHgp%2FS3k56ATKLIzx32byCl7KsbhBGan%2B7061jzyqIBciECvSb2DtDB4HZBUGmmvUHMnEEmrQg1hLoKbkeySXCgtPZyejZXKd1oRXCevR6oQztW8tIkaBBwmruacXDB; unb=2206883529046; uc3=lg2=URm48syIIVrSKA%3D%3D&nk2=F5RFh6bUkulDP3A%3D&vt3=F8dD3CbxhTqJzEWvbGE%3D&id2=UUphzOrCqmq%2BnowGGA%3D%3D; csg=ac21f60b; lgc=tb078603918; cancelledSubSites=empty; cookie17=UUphzOrCqmq%2BnowGGA%3D%3D; dnk=tb078603918; skt=a4ee62e329cb6a26; existShop=MTcwMzkyNDA3Ng%3D%3D; uc4=nk4=0%40FY4O7o%2BW2sUtoZOvRjnQm%2FLlrrWYbQ%3D%3D&id4=0%40U2grF8GKQLKzjVQmoKP4swQldtKSoZBG; tracknick=tb078603918; _cc_=W5iHLLyFfA%3D%3D; _l_g_=Ug%3D%3D; sg=864; _nk_=tb078603918; cookie1=ACznZRRtgBulyxtCJb4u0on6oLlfL6syfEnnWiB3%2FOM%3D; uc1=cookie16=VFC%2FuZ9az08KUQ56dCrZDlbNdA%3D%3D&cookie15=VT5L2FSpMGV7TQ%3D%3D&cookie21=UIHiLt3xTIkz&pas=0&existShop=false&cookie14=UoYelqB%2FyEjSUA%3D%3D; l=fBg5WSU7PoD1i0GYBO5Courza7796IRb4sPzaNbMiIEGa1Ph9Fsl9NCOtJNkWdtjgTCXZetrip0J_dLHR3fRwxDDB3h2q_zr3xv9QaVb5; tfstk=e7ekng9LM7lSJN9_DzH5hgcsMDCYVUMINypKJv3FgquXeXudNyc3JrGUeYE8o2zT-De8pJVXKlZQegMdVYZSdv7OWOe3VuMIYVBAyOQ84vK5WNBOHKff7A7Twxc11idX91nFz8SosVxN_xLIVCYmSuuDIowm4U0GCqvEY-ooZJrro0mun0co7gR9gCRKnpiVv-RBObojmVpIGhkRqG8nAijD_MGrc0gOmiABObojmVIcmC5SamiSW; isg=BPf3m4LaXkFHC9rJhFBVJC91hutBvMse20xsO0mkE0Yt-Bc6UYxbbrXe2limEKOW',
    'Referer': 'https://s.taobao.com/'
}


# 创建一个excel文件
def create_excel(file_name):
    # 实例化工作簿对象
    workbook = openpyxl.Workbook()
    # 激活当前工作表
    worksheet = workbook.active
    # 定义表头列表
    title = ['标题', '价格', '店铺', '地区', '销量']
    # 将表头数据写入第一行
    worksheet.append(title)
    # 保存创建好的excel文件
    workbook.save(f'{file_name}.xlsx')


# 将抓取到的数据写入到excel文件中
def write_excel(name, shop_info):
    # 打开 Excel 文件
    workbook = openpyxl.load_workbook(f'{name}.xlsx')
    # 获取默认的工作表对象
    worksheet = workbook.active
    # 将商品数据追加到excel表格的下一行
    worksheet.append(shop_info)
    # 保存 Excel 文件
    workbook.save(f'{name}.xlsx')


# 搜索商品
def search_data(word):
    # 打开淘宝扫码登录
    driver.find_element(By.XPATH, '//a[@class="sn-login"]').click()
    driver.find_element(By.XPATH, '//i[@class="iconfont icon-qrcode"]').click()

    # 等待十秒,扫码登录
    time.sleep(10)

    # 在搜索框中输入查找的关键词并点击搜索
    driver.find_element(By.XPATH, '//input').send_keys(word)
    driver.find_element(By.XPATH, '//div[@class="rax-view-v2 SearchInput--searchButton--1Sz2UIn"]').click()
    driver.implicitly_wait(3)# 跳转页面后暂停3秒

    url = 'https://h5api.m.taobao.com/h5/mtop.relationrecommend.wirelessrecommend.recommend/2.0/?jsv=2.6.2&appKey=12574478&t=1703923985466&sign=1327afc3c96e32d2c4d37d258d5ccf57&api=mtop.relationrecommend.WirelessRecommend.recommend&v=2.0&type=jsonp&dataType=jsonp&callback=mtopjsonp1&data=%7B%22appId%22%3A%2234385%22%2C%22params%22%3A%22%7B%5C%22device%5C%22%3A%5C%22HMA-AL00%5C%22%2C%5C%22isBeta%5C%22%3A%5C%22false%5C%22%2C%5C%22grayHair%5C%22%3A%5C%22false%5C%22%2C%5C%22from%5C%22%3A%5C%22nt_history%5C%22%2C%5C%22brand%5C%22%3A%5C%22HUAWEI%5C%22%2C%5C%22info%5C%22%3A%5C%22wifi%5C%22%2C%5C%22index%5C%22%3A%5C%224%5C%22%2C%5C%22rainbow%5C%22%3A%5C%22%5C%22%2C%5C%22schemaType%5C%22%3A%5C%22auction%5C%22%2C%5C%22elderHome%5C%22%3A%5C%22false%5C%22%2C%5C%22isEnterSrpSearch%5C%22%3A%5C%22true%5C%22%2C%5C%22newSearch%5C%22%3A%5C%22false%5C%22%2C%5C%22network%5C%22%3A%5C%22wifi%5C%22%2C%5C%22subtype%5C%22%3A%5C%22%5C%22%2C%5C%22hasPreposeFilter%5C%22%3A%5C%22false%5C%22%2C%5C%22prepositionVersion%5C%22%3A%5C%22v2%5C%22%2C%5C%22client_os%5C%22%3A%5C%22Android%5C%22%2C%5C%22gpsEnabled%5C%22%3A%5C%22false%5C%22%2C%5C%22searchDoorFrom%5C%22%3A%5C%22srp%5C%22%2C%5C%22debug_rerankNewOpenCard%5C%22%3A%5C%22false%5C%22%2C%5C%22homePageVersion%5C%22%3A%5C%22v7%5C%22%2C%5C%22searchElderHomeOpen%5C%22%3A%5C%22false%5C%22%2C%5C%22search_action%5C%22%3A%5C%22initiative%5C%22%2C%5C%22sugg%5C%22%3A%5C%22_4_1%5C%22%2C%5C%22sversion%5C%22%3A%5C%2213.6%5C%22%2C%5C%22style%5C%22%3A%5C%22list%5C%22%2C%5C%22ttid%5C%22%3A%5C%22600000%40taobao_pc_10.7.0%5C%22%2C%5C%22needTabs%5C%22%3A%5C%22true%5C%22%2C%5C%22areaCode%5C%22%3A%5C%22CN%5C%22%2C%5C%22vm%5C%22%3A%5C%22nw%5C%22%2C%5C%22countryNum%5C%22%3A%5C%22156%5C%22%2C%5C%22m%5C%22%3A%5C%22pc%5C%22%2C%5C%22page%5C%22%3A1%2C%5C%22n%5C%22%3A48%2C%5C%22q%5C%22%3A%5C%22%25E5%258D%258E%25E4%25B8%25BA%25E6%2589%258B%25E6%259C%25BA%5C%22%2C%5C%22tab%5C%22%3A%5C%22mall%5C%22%2C%5C%22pageSize%5C%22%3A48%2C%5C%22totalPage%5C%22%3A100%2C%5C%22totalResults%5C%22%3A4800%2C%5C%22sourceS%5C%22%3A%5C%220%5C%22%2C%5C%22sort%5C%22%3A%5C%22_coefp%5C%22%2C%5C%22bcoffset%5C%22%3A%5C%22%5C%22%2C%5C%22ntoffset%5C%22%3A%5C%22%5C%22%2C%5C%22filterTag%5C%22%3A%5C%22%5C%22%2C%5C%22service%5C%22%3A%5C%22%5C%22%2C%5C%22prop%5C%22%3A%5C%22%5C%22%2C%5C%22loc%5C%22%3A%5C%22%5C%22%2C%5C%22start_price%5C%22%3Anull%2C%5C%22end_price%5C%22%3Anull%2C%5C%22startPrice%5C%22%3Anull%2C%5C%22endPrice%5C%22%3Anull%2C%5C%22itemIds%5C%22%3Anull%2C%5C%22p4pIds%5C%22%3Anull%7D%22%7D'
    response = requests.get(url, headers=headers)
    # print(response.text)

    # 使用正则表达式匹配出括号内的内容
    data = response.text.replace(' mtopjsonp1(', '')[:-1]
    # print(type(data), data)
    json_data = json.loads(data)

    goods_data_list = jsonpath(json_data, '$.data.itemsArray')
    # print(goods_list_info)

    for goods_data in goods_data_list:
      goods_title = jsonpath(goods_data, '$.title')
      goods_title = goods_title.replace('/<span class=H>', '').replace('</span>', '').replace('<span class=H>', '')
      print(goods_title)
      goods_price = jsonpath(goods_data, '$.priceWap')
      print(goods_price)
      goods_store = jsonpath(goods_data, '$.nick')
      print(goods_store)
      store_addr = jsonpath(goods_data, '$.procity')
      print(store_addr)
      buy_count = jsonpath(goods_data, '$.realSales')
      if buy_count == '':
            print('0人付款')
      else:
            print(buy_count)

      goods_info =

      write_excel(word, goods_info)


if __name__ == '__main__':
    # 输入要爬取的名称然后会在淘宝搜索栏进行搜索
    search_word = input('请输入商品名称:')

    # 打开网页
    driver = webdriver.Chrome()
    driver.get('https://www.taobao.com/')
    driver.maximize_window()

    # 调用create_excel函数创建excel文件
    create_excel(search_word)

    # 调用自定义函数,开始爬取
    search_data(search_word)

"""
以下是报错
"""
D:\Users\XXXXX\AppData\Local\Programs\Python\Python311\python.exe H:\Users\天猫数据抓取\天猫数据抓取\数据抓取.py
请输入商品名称:铜螺母
Traceback (most recent call last):
File "H:\Users\天猫数据抓取\天猫数据抓取\数据抓取.py", line 106, in <module>
    search_data(search_word)
File "H:\Users\天猫数据抓取\天猫数据抓取\数据抓取.py", line 49, in search_data
    driver.find_element(By.XPATH, '//a[@class="sn-login"]').click()
    ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "D:\Users\XXXXX\AppData\Local\Programs\Python\Python311\Lib\site-packages\selenium\webdriver\remote\webdriver.py", line 741, in find_element
    return self.execute(Command.FIND_ELEMENT, {"using": by, "value": value})["value"]
         ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "D:\Users\XXXXX\AppData\Local\Programs\Python\Python311\Lib\site-packages\selenium\webdriver\remote\webdriver.py", line 347, in execute
    self.error_handler.check_response(response)
File "D:\Users\XXXXX\AppData\Local\Programs\Python\Python311\Lib\site-packages\selenium\webdriver\remote\errorhandler.py", line 229, in check_response
    raise exception_class(message, screen, stacktrace)
selenium.common.exceptions.NoSuchElementException: Message: no such element: Unable to locate element: {"method":"xpath","selector":"//a[@class="sn-login"]"}
(Session info: chrome=123.0.6312.59); For documentation on this error, please visit: https://www.selenium.dev/documentation/webdriver/troubleshooting/errors#no-such-element-exception
Stacktrace:
        GetHandleVerifier
        (No symbol)
        (No symbol)
        (No symbol)
        (No symbol)
        (No symbol)
        (No symbol)
        (No symbol)
        (No symbol)
        (No symbol)
        (No symbol)
        GetHandleVerifier
        GetHandleVerifier
        GetHandleVerifier
        GetHandleVerifier
        (No symbol)
        (No symbol)
        (No symbol)
        (No symbol)
        BaseThreadInitThunk
        RtlUserThreadStart


进程已结束,退出代码为 1

HEDouble 发表于 2024-3-30 12:43

新手,在网上找了一个爬取淘宝数据的 然后就报错了...

dwn6905 发表于 2024-3-30 13:48


xpath表达式有问题吧,可以用浏览器开发者工具copy full xpath

只有午安 发表于 2024-3-30 13:50

本帖最后由 只有午安 于 2024-3-30 13:52 编辑

selenium.common.exceptions.NoSuchElementException: Message: no such element: Unable to locate element: {"method":"xpath","selector":"//a[@class="sn-login"]"}

"//a[@class="sn-login"]"
没有找到这个元素,先把这个改一下

改成 //*[@id="J_SiteNavLogin"]/div/div/a   试试

HEDouble 发表于 2024-3-30 15:27

只有午安 发表于 2024-3-30 13:50
selenium.common.exceptions.NoSuchElementException: Message: no such element: Unable to locate elemen ...

您好 感谢您的讲解 更换代码后还是相同的报错 可以跳转页面到淘宝扫码登录 但后面就直接关闭浏览器报错了 报错结果也是和之前一样

HEDouble 发表于 2024-3-30 15:28

dwn6905 发表于 2024-3-30 13:48
xpath表达式有问题吧,可以用浏览器开发者工具copy full xpath

您好 抱歉我是小白 不太理解您说的 我该如何进行替换或者操作 谢谢

dwn6905 发表于 2024-3-30 15:44

按F12打开开发者工具,或通过在页面上右键,最下面有个检查,点击也能打开。
然后找到左上角一个鼠标样式的按钮。这个可以通过点击真实页面的元素,快速定位到它在网页源码中的位置。
https://attach.52pojie.cn//forum/202403/30/153742psqippqeornbbru4.png?l
选中元素后,开发者工具会高亮显示选中的元素,你可以再看看你需要的具体到哪个标签,然后按下图操作
https://attach.52pojie.cn//forum/202403/30/153744e1lhk5r5lrtlturr.png?l
另外还有一点就是,双引号里嵌套使用双引号的话需要转义字符,你可以单引号嵌套双引号或者双引号嵌套单引号,在你这里作用是一样的。

HEDouble 发表于 2024-3-30 15:56

dwn6905 发表于 2024-3-30 15:44
按F12打开开发者工具,或通过在页面上右键,最下面有个检查,点击也能打开。
然后找到左上角一个鼠标样式 ...

您好 我是想爬取淘宝某个商品链接里面的 名字 SKU规格(商品分类)和对应的价格 这种有没有简单方法来实现 或者说您知道有这种教学帖子吗

dwn6905 发表于 2024-3-30 16:25

HEDouble 发表于 2024-3-30 15:56
您好 我是想爬取淘宝某个商品链接里面的 名字 SKU规格(商品分类)和对应的价格 这种有没有简单方法来实 ...



我看你爬虫逻辑写得没啥问题。我这边网有点卡,会响应超时程序光报错。我给你的代码加了几行代码,主要是获取Chrome对象时加了几个参数,让浏览器在程序结束不会自动关闭啥的,方便你调试。
```python

import json
import time


import openpyxl
import requests
from jsonpath import jsonpath
from selenium.webdriver import Chrome # 导入Chrome包
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.options import Options# 导入参数设置的包
"""

"""

# 定义请求头
headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36',
    'Cookie': 'thw=cn; ariaDefaultTheme=undefined; _m_h5_tk=e8b578691cdbf72619e2571ff1290233_1703845222215; _m_h5_tk_enc=3f1ecbe0ee0ae145cfdbaaad4f79d8ca; _samesite_flag_=true; cookie2=11d16aa0a97fdbc9c1ee20904e824d18; t=70a4cbfa9d516484a66819c0facef55a; _tb_token_=b33f7edbf651; xlly_s=1; mt=ci=0_0; cna=l7sWHtN+mEICAToULNELmVHd; sgcookie=E100pyHgp%2FS3k56ATKLIzx32byCl7KsbhBGan%2B7061jzyqIBciECvSb2DtDB4HZBUGmmvUHMnEEmrQg1hLoKbkeySXCgtPZyejZXKd1oRXCevR6oQztW8tIkaBBwmruacXDB; unb=2206883529046; uc3=lg2=URm48syIIVrSKA%3D%3D&nk2=F5RFh6bUkulDP3A%3D&vt3=F8dD3CbxhTqJzEWvbGE%3D&id2=UUphzOrCqmq%2BnowGGA%3D%3D; csg=ac21f60b; lgc=tb078603918; cancelledSubSites=empty; cookie17=UUphzOrCqmq%2BnowGGA%3D%3D; dnk=tb078603918; skt=a4ee62e329cb6a26; existShop=MTcwMzkyNDA3Ng%3D%3D; uc4=nk4=0%40FY4O7o%2BW2sUtoZOvRjnQm%2FLlrrWYbQ%3D%3D&id4=0%40U2grF8GKQLKzjVQmoKP4swQldtKSoZBG; tracknick=tb078603918; _cc_=W5iHLLyFfA%3D%3D; _l_g_=Ug%3D%3D; sg=864; _nk_=tb078603918; cookie1=ACznZRRtgBulyxtCJb4u0on6oLlfL6syfEnnWiB3%2FOM%3D; uc1=cookie16=VFC%2FuZ9az08KUQ56dCrZDlbNdA%3D%3D&cookie15=VT5L2FSpMGV7TQ%3D%3D&cookie21=UIHiLt3xTIkz&pas=0&existShop=false&cookie14=UoYelqB%2FyEjSUA%3D%3D; l=fBg5WSU7PoD1i0GYBO5Courza7796IRb4sPzaNbMiIEGa1Ph9Fsl9NCOtJNkWdtjgTCXZetrip0J_dLHR3fRwxDDB3h2q_zr3xv9QaVb5; tfstk=e7ekng9LM7lSJN9_DzH5hgcsMDCYVUMINypKJv3FgquXeXudNyc3JrGUeYE8o2zT-De8pJVXKlZQegMdVYZSdv7OWOe3VuMIYVBAyOQ84vK5WNBOHKff7A7Twxc11idX91nFz8SosVxN_xLIVCYmSuuDIowm4U0GCqvEY-ooZJrro0mun0co7gR9gCRKnpiVv-RBObojmVpIGhkRqG8nAijD_MGrc0gOmiABObojmVIcmC5SamiSW; isg=BPf3m4LaXkFHC9rJhFBVJC91hutBvMse20xsO0mkE0Yt-Bc6UYxbbrXe2limEKOW',
    'Referer': 'https://s.taobao.com/'
}


# 创建一个excel文件
def create_excel(file_name):
    # 实例化工作簿对象
    workbook = openpyxl.Workbook()
    # 激活当前工作表
    worksheet = workbook.active
    # 定义表头列表
    title = ['标题', '价格', '店铺', '地区', '销量']
    # 将表头数据写入第一行
    worksheet.append(title)
    # 保存创建好的excel文件
    workbook.save(f'{file_name}.xlsx')


# 将抓取到的数据写入到excel文件中
def write_excel(name, shop_info):
    # 打开 Excel 文件
    workbook = openpyxl.load_workbook(f'{name}.xlsx')
    # 获取默认的工作表对象
    worksheet = workbook.active
    # 将商品数据追加到excel表格的下一行
    worksheet.append(shop_info)
    # 保存 Excel 文件
    workbook.save(f'{name}.xlsx')


# 搜索商品
def search_data(word):
    # 打开淘宝扫码登录
    driver.find_element(By.XPATH, '/html/body/div/div/div/ul/li/div/div/a').click()
    driver.find_element(By.XPATH, '/html/body/div/div/div/div/div/div/div/i').click()

    # 等待十秒,扫码登录
    time.sleep(10)

    # 在搜索框中输入查找的关键词并点击搜索
    driver.find_element(By.XPATH, '//input').send_keys(word)
    driver.find_element(By.XPATH, '//div[@class="rax-view-v2 SearchInput--searchButton--1Sz2UIn"]').click()
    driver.implicitly_wait(3)# 跳转页面后暂停3秒

    url = 'https://h5api.m.taobao.com/h5/mtop.relationrecommend.wirelessrecommend.recommend/2.0/?jsv=2.6.2&appKey=12574478&t=1703923985466&sign=1327afc3c96e32d2c4d37d258d5ccf57&api=mtop.relationrecommend.WirelessRecommend.recommend&v=2.0&type=jsonp&dataType=jsonp&callback=mtopjsonp1&data=%7B%22appId%22%3A%2234385%22%2C%22params%22%3A%22%7B%5C%22device%5C%22%3A%5C%22HMA-AL00%5C%22%2C%5C%22isBeta%5C%22%3A%5C%22false%5C%22%2C%5C%22grayHair%5C%22%3A%5C%22false%5C%22%2C%5C%22from%5C%22%3A%5C%22nt_history%5C%22%2C%5C%22brand%5C%22%3A%5C%22HUAWEI%5C%22%2C%5C%22info%5C%22%3A%5C%22wifi%5C%22%2C%5C%22index%5C%22%3A%5C%224%5C%22%2C%5C%22rainbow%5C%22%3A%5C%22%5C%22%2C%5C%22schemaType%5C%22%3A%5C%22auction%5C%22%2C%5C%22elderHome%5C%22%3A%5C%22false%5C%22%2C%5C%22isEnterSrpSearch%5C%22%3A%5C%22true%5C%22%2C%5C%22newSearch%5C%22%3A%5C%22false%5C%22%2C%5C%22network%5C%22%3A%5C%22wifi%5C%22%2C%5C%22subtype%5C%22%3A%5C%22%5C%22%2C%5C%22hasPreposeFilter%5C%22%3A%5C%22false%5C%22%2C%5C%22prepositionVersion%5C%22%3A%5C%22v2%5C%22%2C%5C%22client_os%5C%22%3A%5C%22Android%5C%22%2C%5C%22gpsEnabled%5C%22%3A%5C%22false%5C%22%2C%5C%22searchDoorFrom%5C%22%3A%5C%22srp%5C%22%2C%5C%22debug_rerankNewOpenCard%5C%22%3A%5C%22false%5C%22%2C%5C%22homePageVersion%5C%22%3A%5C%22v7%5C%22%2C%5C%22searchElderHomeOpen%5C%22%3A%5C%22false%5C%22%2C%5C%22search_action%5C%22%3A%5C%22initiative%5C%22%2C%5C%22sugg%5C%22%3A%5C%22_4_1%5C%22%2C%5C%22sversion%5C%22%3A%5C%2213.6%5C%22%2C%5C%22style%5C%22%3A%5C%22list%5C%22%2C%5C%22ttid%5C%22%3A%5C%22600000%40taobao_pc_10.7.0%5C%22%2C%5C%22needTabs%5C%22%3A%5C%22true%5C%22%2C%5C%22areaCode%5C%22%3A%5C%22CN%5C%22%2C%5C%22vm%5C%22%3A%5C%22nw%5C%22%2C%5C%22countryNum%5C%22%3A%5C%22156%5C%22%2C%5C%22m%5C%22%3A%5C%22pc%5C%22%2C%5C%22page%5C%22%3A1%2C%5C%22n%5C%22%3A48%2C%5C%22q%5C%22%3A%5C%22%25E5%258D%258E%25E4%25B8%25BA%25E6%2589%258B%25E6%259C%25BA%5C%22%2C%5C%22tab%5C%22%3A%5C%22mall%5C%22%2C%5C%22pageSize%5C%22%3A48%2C%5C%22totalPage%5C%22%3A100%2C%5C%22totalResults%5C%22%3A4800%2C%5C%22sourceS%5C%22%3A%5C%220%5C%22%2C%5C%22sort%5C%22%3A%5C%22_coefp%5C%22%2C%5C%22bcoffset%5C%22%3A%5C%22%5C%22%2C%5C%22ntoffset%5C%22%3A%5C%22%5C%22%2C%5C%22filterTag%5C%22%3A%5C%22%5C%22%2C%5C%22service%5C%22%3A%5C%22%5C%22%2C%5C%22prop%5C%22%3A%5C%22%5C%22%2C%5C%22loc%5C%22%3A%5C%22%5C%22%2C%5C%22start_price%5C%22%3Anull%2C%5C%22end_price%5C%22%3Anull%2C%5C%22startPrice%5C%22%3Anull%2C%5C%22endPrice%5C%22%3Anull%2C%5C%22itemIds%5C%22%3Anull%2C%5C%22p4pIds%5C%22%3Anull%7D%22%7D'
    response = requests.get(url, headers=headers)
    # print(response.text)

    # 使用正则表达式匹配出括号内的内容
    data = response.text.replace(' mtopjsonp1(', '')[:-1]
    # print(type(data), data)
    json_data = json.loads(data)

    goods_data_list = jsonpath(json_data, '$.data.itemsArray')
    # print(goods_list_info)

    for goods_data in goods_data_list:
      goods_title = jsonpath(goods_data, '$.title')
      goods_title = goods_title.replace('/<span class=H>', '').replace('</span>', '').replace('<span class=H>', '')
      print(goods_title)
      goods_price = jsonpath(goods_data, '$.priceWap')
      print(goods_price)
      goods_store = jsonpath(goods_data, '$.nick')
      print(goods_store)
      store_addr = jsonpath(goods_data, '$.procity')
      print(store_addr)
      buy_count = jsonpath(goods_data, '$.realSales')
      if buy_count == '':
            print('0人付款')
      else:
            print(buy_count)

      goods_info =

      write_excel(word, goods_info)


if __name__ == '__main__':
    option = Options()
    option.add_experimental_option('detach', True)# selenium默认代码执行完浏览器关闭,该行代码使代码执行完浏览器也不会自动关闭
    option.add_experimental_option('excludeSwitches', ['enable-automation'])# 去掉“Chrome正受到自动测试软件的控制”字样
    option.add_argument('--disable-blink-features=AutomationControlled')
    # 输入要爬取的名称然后会在淘宝搜索栏进行搜索
    search_word = input('请输入商品名称:')

    # 打开网页
    driver = Chrome(options=option)
    driver.get('https://www.taobao.com/')
    driver.maximize_window()

    # 调用create_excel函数创建excel文件
    create_excel(search_word)

    # 调用自定义函数,开始爬取
    search_data(search_word)
```

然后,我上面回复你的`copy full XPath`可以看看,我改完登录就没问题了,就是登录完,有点卡,网页没加载完,程序等不到就报错了。

我觉得你应该可以写出来的,只爬这些数据的话,很简单的。

只有午安 发表于 2024-3-30 16:35

HEDouble 发表于 2024-3-30 15:27
您好 感谢您的讲解 更换代码后还是相同的报错 可以跳转页面到淘宝扫码登录 但后面就直接关闭浏览器报错了 ...

以跳转页面到淘宝扫码登录 但后面就直接关闭浏览器报错
是可以扫码,然后才关闭报错,还是还没有出现扫码页面
页: [1] 2
查看完整版本: python报错请求 麻烦帮忙看看