molinchz 发表于 2021-10-24 12:00

python爬虫有时出现这个错误 ,请大佬帮忙看下

timeout                                 Traceback (most recent call last)
E:\Anaconda3\lib\site-packages\urllib3\connection.py in _new_conn(self)
    168         try:
--> 169             conn = connection.create_connection(
    170               (self._dns_host, self.port), self.timeout, **extra_kw

E:\Anaconda3\lib\site-packages\urllib3\util\connection.py in create_connection(address, timeout, source_address, socket_options)
   95   if err is not None:
---> 96         raise err
   97

E:\Anaconda3\lib\site-packages\urllib3\util\connection.py in create_connection(address, timeout, source_address, socket_options)
   85               sock.bind(source_address)
---> 86             sock.connect(sa)
   87             return sock

timeout: timed out

During handling of the above exception, another exception occurred:

ConnectTimeoutError                     Traceback (most recent call last)
E:\Anaconda3\lib\site-packages\urllib3\connectionpool.py in urlopen(self, method, url, body, headers, retries, redirect, assert_same_host, timeout, pool_timeout, release_conn, chunked, body_pos, **response_kw)
    698             # Make the request on the httplib connection object.
--> 699             httplib_response = self._make_request(
    700               conn,

E:\Anaconda3\lib\site-packages\urllib3\connectionpool.py in _make_request(self, conn, method, url, timeout, chunked, **httplib_request_kw)
    393             else:
--> 394               conn.request(method, url, **httplib_request_kw)
    395

E:\Anaconda3\lib\site-packages\urllib3\connection.py in request(self, method, url, body, headers)
    233             headers["User-Agent"] = _get_default_user_agent()
--> 234         super(HTTPConnection, self).request(method, url, body=body, headers=headers)
    235

E:\Anaconda3\lib\http\client.py in request(self, method, url, body, headers, encode_chunked)
   1254         """Send a complete request to the server."""
-> 1255         self._send_request(method, url, body, headers, encode_chunked)
   1256

E:\Anaconda3\lib\http\client.py in _send_request(self, method, url, body, headers, encode_chunked)
   1300             body = _encode(body, 'body')
-> 1301         self.endheaders(body, encode_chunked=encode_chunked)
   1302

E:\Anaconda3\lib\http\client.py in endheaders(self, message_body, encode_chunked)
   1249             raise CannotSendHeader()
-> 1250         self._send_output(message_body, encode_chunked=encode_chunked)
   1251

E:\Anaconda3\lib\http\client.py in _send_output(self, message_body, encode_chunked)
   1009         del self._buffer[:]
-> 1010         self.send(msg)
   1011

E:\Anaconda3\lib\http\client.py in send(self, data)
    949             if self.auto_open:
--> 950               self.connect()
    951             else:

E:\Anaconda3\lib\site-packages\urllib3\connection.py in connect(self)
    199   def connect(self):
--> 200         conn = self._new_conn()
    201         self._prepare_conn(conn)

E:\Anaconda3\lib\site-packages\urllib3\connection.py in _new_conn(self)
    173         except SocketTimeout:
--> 174             raise ConnectTimeoutError(
    175               self,

ConnectTimeoutError: (<urllib3.connection.HTTPConnection object at 0x0000000004E7BF70>, 'Connection to vip.stock.finance.sina.com.cn timed out. (connect timeout=3)')

During handling of the above exception, another exception occurred:

MaxRetryError                           Traceback (most recent call last)
E:\Anaconda3\lib\site-packages\requests\adapters.py in send(self, request, stream, timeout, verify, cert, proxies)
    438             if not chunked:
--> 439               resp = conn.urlopen(
    440                     method=request.method,

E:\Anaconda3\lib\site-packages\urllib3\connectionpool.py in urlopen(self, method, url, body, headers, retries, redirect, assert_same_host, timeout, pool_timeout, release_conn, chunked, body_pos, **response_kw)
    754
--> 755             retries = retries.increment(
    756               method, url, error=e, _pool=self, _stacktrace=sys.exc_info()

E:\Anaconda3\lib\site-packages\urllib3\util\retry.py in increment(self, method, url, response, error, _pool, _stacktrace)
    573         if new_retry.is_exhausted():
--> 574             raise MaxRetryError(_pool, url, error or ResponseError(cause))
    575

MaxRetryError: HTTPConnectionPool(host='vip.stock.finance.sina.com.cn', port=80): Max retries exceeded with url: /quotes_service/api/json_v2.php/Market_Center.getHQNodeData?page=+1&num=40&sort=symbol&asc=1&node=chgn_730454&symbol=&_s_r_a=+init (Caused by ConnectTimeoutError(<urllib3.connection.HTTPConnection object at 0x0000000004E7BF70>, 'Connection to vip.stock.finance.sina.com.cn timed out. (connect timeout=3)'))

During handling of the above exception, another exception occurred:

ConnectTimeout                            Traceback (most recent call last)
<ipython-input-60-7da0076a46de> in <module>
   85 # sw2_url='http://vip.stock.finance.sina.com.cn/quotes_service/api/json_v2.php/Market_Center.getHQNodeData?page=1&num=40&sort=symbol&asc=1&node=sw2_730200&symbol=&_s_r_a=init'
   86
---> 87 gg_json=requests.get(url=url,params=params,headers=headers,timeout=(3,5))    #取JSON数据,类型为list
   88 gg_data=gg_json.json()          #转换成字典
   89 for dict in gg_data:          #遍历字典取代码和名称

E:\Anaconda3\lib\site-packages\requests\api.py in get(url, params, **kwargs)
   74
   75   kwargs.setdefault('allow_redirects', True)
---> 76   return request('get', url, params=params, **kwargs)
   77
   78

E:\Anaconda3\lib\site-packages\requests\api.py in request(method, url, **kwargs)
   59   # cases, and look like a memory leak in others.
   60   with sessions.Session() as session:
---> 61         return session.request(method=method, url=url, **kwargs)
   62
   63

E:\Anaconda3\lib\site-packages\requests\sessions.py in request(self, method, url, params, data, headers, cookies, files, auth, timeout, allow_redirects, proxies, hooks, stream, verify, cert, json)
    540         }
    541         send_kwargs.update(settings)
--> 542         resp = self.send(prep, **send_kwargs)
    543
    544         return resp

E:\Anaconda3\lib\site-packages\requests\sessions.py in send(self, request, **kwargs)
    653
    654         # Send the request
--> 655         r = adapter.send(request, **kwargs)
    656
    657         # Total elapsed time of the request (approximately)

E:\Anaconda3\lib\site-packages\requests\adapters.py in send(self, request, stream, timeout, verify, cert, proxies)
    502               # TODO: Remove this in 3.0.0: see #2811
    503               if not isinstance(e.reason, NewConnectionError):
--> 504                     raise ConnectTimeout(e, request=request)
    505
    506             if isinstance(e.reason, ResponseError):

ConnectTimeout: HTTPConnectionPool(host='vip.stock.finance.sina.com.cn', port=80): Max retries exceeded with url: /quotes_service/api/json_v2.php/Market_Center.getHQNodeData?page=+1&num=40&sort=symbol&asc=1&node=chgn_730454&symbol=&_s_r_a=+init (Caused by ConnectTimeoutError(<urllib3.connection.HTTPConnection object at 0x0000000004E7BF70>, 'Connection to vip.stock.finance.sina.com.cn timed out. (connect timeout=3)'))

wkfy 发表于 2021-10-24 12:05

1.降低进程并发数
2.延长超时时间
3.增加延时间隔
4.求解决问题贴代码,不是贴错误信息。

ForGot_227 发表于 2021-10-24 12:19

该说的楼上都说了,可能是你请求太快被拉黑之类的。

timeout: timed out

molinchz 发表于 2021-10-24 12:30

wkfy 发表于 2021-10-24 12:05
1.降低进程并发数
2.延长超时时间
3.增加延时间隔


代码 如下,我是在Jupyter Notebook 网页上测试,点几下运行这个代码 ,就有几率出现这种


import requests
import json
import random

def get_user_agent_pc():
    user_agent_pc = [
    # 谷歌
    'Mozilla/5.0.html (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.html.2171.71 Safari/537.36',
    'Mozilla/5.0.html (X11; Linux x86_64) AppleWebKit/537.11 (KHTML, like Gecko) Chrome/23.0.html.1271.64 Safari/537.11',
    'Mozilla/5.0.html (Windows; U; Windows NT 6.1; en-US) AppleWebKit/534.16 (KHTML, like Gecko) Chrome/10.0.html.648.133 Safari/534.16',
    'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/94.0.4606.81 Safari/537.36',
    ]
    return random.choice(user_agent_pc)

hy_id="chgn_730454"

# 各行业成分股页面请求参数
params={

    "node": {hy_id},   

}   
gg_json=requests.get(url=url,params=params,headers=headers,timeout=(3,5))   
gg_data=gg_json.json()         
for dict in gg_data:      
    print(dict['name'])
gg_json.close()            

茫茫狐 发表于 2021-10-24 13:28

连接超时,换个请求头试试

molinchz 发表于 2021-10-24 13:39

茫茫狐 发表于 2021-10-24 13:28
连接超时,换个请求头试试

headers={
   "Host": "vip.stock.finance.sina.com.cn",
   "user-agent": get_user_agent_pc(),
   "Referer": "http://vip.stock.finance.sina.com.cn/mkt/",
    "Content-type": "application/x-www-form-urlencoded"
}

都 换过了

Teachers 发表于 2021-10-24 15:13

爬的新浪股票啊,打小就看你刑,你这问题加下超时就行了还有try

emir 发表于 2021-10-24 18:52

IP多弄些代{过}{滤}理啊

Establish 发表于 2021-10-24 19:15

molinchz 发表于 2021-10-24 12:30
代码 如下,我是在Jupyter Notebook 网页上测试,点几下运行这个代码 ,就有几率出现这种




timeout加大点试试 然后加个代{过}{滤}理池跑

molinchz 发表于 2021-10-24 22:03

谢谢各位大佬
页: [1]
查看完整版本: python爬虫有时出现这个错误 ,请大佬帮忙看下