吾爱破解 - 52pojie.cn

 找回密码
 注册[Register]

QQ登录

只需一步,快速开始

查看: 2624|回复: 9
收起左侧

[求助] python爬虫有时出现这个错误 ,请大佬帮忙看下

  [复制链接]
molinchz 发表于 2021-10-24 12:00
timeout                                   Traceback (most recent call last)
E:\Anaconda3\lib\site-packages\urllib3\connection.py in _new_conn(self)
    168         try:
--> 169             conn = connection.create_connection(
    170                 (self._dns_host, self.port), self.timeout, **extra_kw

E:\Anaconda3\lib\site-packages\urllib3\util\connection.py in create_connection(address, timeout, source_address, socket_options)
     95     if err is not None:
---> 96         raise err
     97

E:\Anaconda3\lib\site-packages\urllib3\util\connection.py in create_connection(address, timeout, source_address, socket_options)
     85                 sock.bind(source_address)
---> 86             sock.connect(sa)
     87             return sock

timeout: timed out

During handling of the above exception, another exception occurred:

ConnectTimeoutError                       Traceback (most recent call last)
E:\Anaconda3\lib\site-packages\urllib3\connectionpool.py in urlopen(self, method, url, body, headers, retries, redirect, assert_same_host, timeout, pool_timeout, release_conn, chunked, body_pos, **response_kw)
    698             # Make the request on the httplib connection object.
--> 699             httplib_response = self._make_request(
    700                 conn,

E:\Anaconda3\lib\site-packages\urllib3\connectionpool.py in _make_request(self, conn, method, url, timeout, chunked, **httplib_request_kw)
    393             else:
--> 394                 conn.request(method, url, **httplib_request_kw)
    395

E:\Anaconda3\lib\site-packages\urllib3\connection.py in request(self, method, url, body, headers)
    233             headers["User-Agent"] = _get_default_user_agent()
--> 234         super(HTTPConnection, self).request(method, url, body=body, headers=headers)
    235

E:\Anaconda3\lib\http\client.py in request(self, method, url, body, headers, encode_chunked)
   1254         """Send a complete request to the server."""
-> 1255         self._send_request(method, url, body, headers, encode_chunked)
   1256

E:\Anaconda3\lib\http\client.py in _send_request(self, method, url, body, headers, encode_chunked)
   1300             body = _encode(body, 'body')
-> 1301         self.endheaders(body, encode_chunked=encode_chunked)
   1302

E:\Anaconda3\lib\http\client.py in endheaders(self, message_body, encode_chunked)
   1249             raise CannotSendHeader()
-> 1250         self._send_output(message_body, encode_chunked=encode_chunked)
   1251

E:\Anaconda3\lib\http\client.py in _send_output(self, message_body, encode_chunked)
   1009         del self._buffer[:]
-> 1010         self.send(msg)
   1011

E:\Anaconda3\lib\http\client.py in send(self, data)
    949             if self.auto_open:
--> 950                 self.connect()
    951             else:

E:\Anaconda3\lib\site-packages\urllib3\connection.py in connect(self)
    199     def connect(self):
--> 200         conn = self._new_conn()
    201         self._prepare_conn(conn)

E:\Anaconda3\lib\site-packages\urllib3\connection.py in _new_conn(self)
    173         except SocketTimeout:
--> 174             raise ConnectTimeoutError(
    175                 self,

ConnectTimeoutError: (<urllib3.connection.HTTPConnection object at 0x0000000004E7BF70>, 'Connection to vip.stock.finance.sina.com.cn timed out. (connect timeout=3)')

During handling of the above exception, another exception occurred:

MaxRetryError                             Traceback (most recent call last)
E:\Anaconda3\lib\site-packages\requests\adapters.py in send(self, request, stream, timeout, verify, cert, proxies)
    438             if not chunked:
--> 439                 resp = conn.urlopen(
    440                     method=request.method,

E:\Anaconda3\lib\site-packages\urllib3\connectionpool.py in urlopen(self, method, url, body, headers, retries, redirect, assert_same_host, timeout, pool_timeout, release_conn, chunked, body_pos, **response_kw)
    754
--> 755             retries = retries.increment(
    756                 method, url, error=e, _pool=self, _stacktrace=sys.exc_info()[2]

E:\Anaconda3\lib\site-packages\urllib3\util\retry.py in increment(self, method, url, response, error, _pool, _stacktrace)
    573         if new_retry.is_exhausted():
--> 574             raise MaxRetryError(_pool, url, error or ResponseError(cause))
    575

MaxRetryError: HTTPConnectionPool(host='vip.stock.finance.sina.com.cn', port=80): Max retries exceeded with url: /quotes_service/api/json_v2.php/Market_Center.getHQNodeData?page=+1&num=40&sort=symbol&asc=1&node=chgn_730454&symbol=&_s_r_a=+init (Caused by ConnectTimeoutError(<urllib3.connection.HTTPConnection object at 0x0000000004E7BF70>, 'Connection to vip.stock.finance.sina.com.cn timed out. (connect timeout=3)'))

During handling of the above exception, another exception occurred:

ConnectTimeout                            Traceback (most recent call last)
<ipython-input-60-7da0076a46de> in <module>
     85 # sw2_url='http://vip.stock.finance.sina.com.cn/quotes_service/api/json_v2.php/Market_Center.getHQNodeData?page=1&num=40&sort=symbol&asc=1&node=sw2_730200&symbol=&_s_r_a=init'
     86
---> 87 gg_json=requests.get(url=url,params=params,headers=headers,timeout=(3,5))    #取JSON数据,类型为list
     88 gg_data=gg_json.json()          #转换成字典
     89 for dict in gg_data:          #遍历字典取代码和名称

E:\Anaconda3\lib\site-packages\requests\api.py in get(url, params, **kwargs)
     74
     75     kwargs.setdefault('allow_redirects', True)
---> 76     return request('get', url, params=params, **kwargs)
     77
     78

E:\Anaconda3\lib\site-packages\requests\api.py in request(method, url, **kwargs)
     59     # cases, and look like a memory leak in others.
     60     with sessions.Session() as session:
---> 61         return session.request(method=method, url=url, **kwargs)
     62
     63

E:\Anaconda3\lib\site-packages\requests\sessions.py in request(self, method, url, params, data, headers, cookies, files, auth, timeout, allow_redirects, proxies, hooks, stream, verify, cert, json)
    540         }
    541         send_kwargs.update(settings)
--> 542         resp = self.send(prep, **send_kwargs)
    543
    544         return resp

E:\Anaconda3\lib\site-packages\requests\sessions.py in send(self, request, **kwargs)
    653
    654         # Send the request
--> 655         r = adapter.send(request, **kwargs)
    656
    657         # Total elapsed time of the request (approximately)

E:\Anaconda3\lib\site-packages\requests\adapters.py in send(self, request, stream, timeout, verify, cert, proxies)
    502                 # TODO: Remove this in 3.0.0: see #2811
    503                 if not isinstance(e.reason, NewConnectionError):
--> 504                     raise ConnectTimeout(e, request=request)
    505
    506             if isinstance(e.reason, ResponseError):

ConnectTimeout: HTTPConnectionPool(host='vip.stock.finance.sina.com.cn', port=80): Max retries exceeded with url: /quotes_service/api/json_v2.php/Market_Center.getHQNodeData?page=+1&num=40&sort=symbol&asc=1&node=chgn_730454&symbol=&_s_r_a=+init (Caused by ConnectTimeoutError(<urllib3.connection.HTTPConnection object at 0x0000000004E7BF70>, 'Connection to vip.stock.finance.sina.com.cn timed out. (connect timeout=3)'))

免费评分

参与人数 1吾爱币 +1 热心值 +1 收起 理由
yan182 + 1 + 1 我很赞同!

查看全部评分

发帖前要善用论坛搜索功能,那里可能会有你要找的答案或者已经有人发布过相同内容了,请勿重复发帖。

wkfy 发表于 2021-10-24 12:05
1.降低进程并发数
2.延长超时时间
3.增加延时间隔
4.求解决问题贴代码,不是贴错误信息。
ForGot_227 发表于 2021-10-24 12:19
该说的楼上都说了,可能是你请求太快被拉黑之类的。

timeout: timed out
 楼主| molinchz 发表于 2021-10-24 12:30
wkfy 发表于 2021-10-24 12:05
1.降低进程并发数
2.延长超时时间
3.增加延时间隔

代码 如下,我是在Jupyter Notebook 网页上测试,点几下运行这个代码 ,就有几率出现这种


import requests
import json
import random

def get_user_agent_pc():
    user_agent_pc = [
    # 谷歌
    'Mozilla/5.0.html (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.html.2171.71 Safari/537.36',
    'Mozilla/5.0.html (X11; Linux x86_64) AppleWebKit/537.11 (KHTML, like Gecko) Chrome/23.0.html.1271.64 Safari/537.11',
    'Mozilla/5.0.html (Windows; U; Windows NT 6.1; en-US) AppleWebKit/534.16 (KHTML, like Gecko) Chrome/10.0.html.648.133 Safari/534.16',
    'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/94.0.4606.81 Safari/537.36',
    ]
    return random.choice(user_agent_pc)

hy_id="chgn_730454"

# 各行业成分股页面请求参数
params={

    "node": {hy_id},   

}   
gg_json=requests.get(url=url,params=params,headers=headers,timeout=(3,5))   
gg_data=gg_json.json()         
for dict in gg_data:        
    print(dict['name'])
gg_json.close()            
茫茫狐 发表于 2021-10-24 13:28
连接超时,换个请求头试试
 楼主| molinchz 发表于 2021-10-24 13:39
茫茫狐 发表于 2021-10-24 13:28
连接超时,换个请求头试试

headers={
     "Host": "vip.stock.finance.sina.com.cn",
     "user-agent": get_user_agent_pc(),
     "Referer": "http://vip.stock.finance.sina.com.cn/mkt/",
    "Content-type": "application/x-www-form-urlencoded"
}

都 换过了
Teachers 发表于 2021-10-24 15:13
爬的新浪股票啊,打小就看你刑,你这问题加下超时就行了  还有try
emir 发表于 2021-10-24 18:52
IP多弄些代{过}{滤}理啊
Establish 发表于 2021-10-24 19:15
molinchz 发表于 2021-10-24 12:30
代码 如下,我是在Jupyter Notebook 网页上测试,点几下运行这个代码 ,就有几率出现这种

timeout加大点试试 然后加个代{过}{滤}理池跑
 楼主| molinchz 发表于 2021-10-24 22:03
谢谢各位大佬
您需要登录后才可以回帖 登录 | 注册[Register]

本版积分规则

返回列表

RSS订阅|小黑屋|处罚记录|联系我们|吾爱破解 - LCG - LSG ( 京ICP备16042023号 | 京公网安备 11010502030087号 )

GMT+8, 2024-11-25 21:48

Powered by Discuz!

Copyright © 2001-2020, Tencent Cloud.

快速回复 返回顶部 返回列表