python爬虫有时出现这个错误 ,请大佬帮忙看下
timeout Traceback (most recent call last)E:\Anaconda3\lib\site-packages\urllib3\connection.py in _new_conn(self)
168 try:
--> 169 conn = connection.create_connection(
170 (self._dns_host, self.port), self.timeout, **extra_kw
E:\Anaconda3\lib\site-packages\urllib3\util\connection.py in create_connection(address, timeout, source_address, socket_options)
95 if err is not None:
---> 96 raise err
97
E:\Anaconda3\lib\site-packages\urllib3\util\connection.py in create_connection(address, timeout, source_address, socket_options)
85 sock.bind(source_address)
---> 86 sock.connect(sa)
87 return sock
timeout: timed out
During handling of the above exception, another exception occurred:
ConnectTimeoutError Traceback (most recent call last)
E:\Anaconda3\lib\site-packages\urllib3\connectionpool.py in urlopen(self, method, url, body, headers, retries, redirect, assert_same_host, timeout, pool_timeout, release_conn, chunked, body_pos, **response_kw)
698 # Make the request on the httplib connection object.
--> 699 httplib_response = self._make_request(
700 conn,
E:\Anaconda3\lib\site-packages\urllib3\connectionpool.py in _make_request(self, conn, method, url, timeout, chunked, **httplib_request_kw)
393 else:
--> 394 conn.request(method, url, **httplib_request_kw)
395
E:\Anaconda3\lib\site-packages\urllib3\connection.py in request(self, method, url, body, headers)
233 headers["User-Agent"] = _get_default_user_agent()
--> 234 super(HTTPConnection, self).request(method, url, body=body, headers=headers)
235
E:\Anaconda3\lib\http\client.py in request(self, method, url, body, headers, encode_chunked)
1254 """Send a complete request to the server."""
-> 1255 self._send_request(method, url, body, headers, encode_chunked)
1256
E:\Anaconda3\lib\http\client.py in _send_request(self, method, url, body, headers, encode_chunked)
1300 body = _encode(body, 'body')
-> 1301 self.endheaders(body, encode_chunked=encode_chunked)
1302
E:\Anaconda3\lib\http\client.py in endheaders(self, message_body, encode_chunked)
1249 raise CannotSendHeader()
-> 1250 self._send_output(message_body, encode_chunked=encode_chunked)
1251
E:\Anaconda3\lib\http\client.py in _send_output(self, message_body, encode_chunked)
1009 del self._buffer[:]
-> 1010 self.send(msg)
1011
E:\Anaconda3\lib\http\client.py in send(self, data)
949 if self.auto_open:
--> 950 self.connect()
951 else:
E:\Anaconda3\lib\site-packages\urllib3\connection.py in connect(self)
199 def connect(self):
--> 200 conn = self._new_conn()
201 self._prepare_conn(conn)
E:\Anaconda3\lib\site-packages\urllib3\connection.py in _new_conn(self)
173 except SocketTimeout:
--> 174 raise ConnectTimeoutError(
175 self,
ConnectTimeoutError: (<urllib3.connection.HTTPConnection object at 0x0000000004E7BF70>, 'Connection to vip.stock.finance.sina.com.cn timed out. (connect timeout=3)')
During handling of the above exception, another exception occurred:
MaxRetryError Traceback (most recent call last)
E:\Anaconda3\lib\site-packages\requests\adapters.py in send(self, request, stream, timeout, verify, cert, proxies)
438 if not chunked:
--> 439 resp = conn.urlopen(
440 method=request.method,
E:\Anaconda3\lib\site-packages\urllib3\connectionpool.py in urlopen(self, method, url, body, headers, retries, redirect, assert_same_host, timeout, pool_timeout, release_conn, chunked, body_pos, **response_kw)
754
--> 755 retries = retries.increment(
756 method, url, error=e, _pool=self, _stacktrace=sys.exc_info()
E:\Anaconda3\lib\site-packages\urllib3\util\retry.py in increment(self, method, url, response, error, _pool, _stacktrace)
573 if new_retry.is_exhausted():
--> 574 raise MaxRetryError(_pool, url, error or ResponseError(cause))
575
MaxRetryError: HTTPConnectionPool(host='vip.stock.finance.sina.com.cn', port=80): Max retries exceeded with url: /quotes_service/api/json_v2.php/Market_Center.getHQNodeData?page=+1&num=40&sort=symbol&asc=1&node=chgn_730454&symbol=&_s_r_a=+init (Caused by ConnectTimeoutError(<urllib3.connection.HTTPConnection object at 0x0000000004E7BF70>, 'Connection to vip.stock.finance.sina.com.cn timed out. (connect timeout=3)'))
During handling of the above exception, another exception occurred:
ConnectTimeout Traceback (most recent call last)
<ipython-input-60-7da0076a46de> in <module>
85 # sw2_url='http://vip.stock.finance.sina.com.cn/quotes_service/api/json_v2.php/Market_Center.getHQNodeData?page=1&num=40&sort=symbol&asc=1&node=sw2_730200&symbol=&_s_r_a=init'
86
---> 87 gg_json=requests.get(url=url,params=params,headers=headers,timeout=(3,5)) #取JSON数据,类型为list
88 gg_data=gg_json.json() #转换成字典
89 for dict in gg_data: #遍历字典取代码和名称
E:\Anaconda3\lib\site-packages\requests\api.py in get(url, params, **kwargs)
74
75 kwargs.setdefault('allow_redirects', True)
---> 76 return request('get', url, params=params, **kwargs)
77
78
E:\Anaconda3\lib\site-packages\requests\api.py in request(method, url, **kwargs)
59 # cases, and look like a memory leak in others.
60 with sessions.Session() as session:
---> 61 return session.request(method=method, url=url, **kwargs)
62
63
E:\Anaconda3\lib\site-packages\requests\sessions.py in request(self, method, url, params, data, headers, cookies, files, auth, timeout, allow_redirects, proxies, hooks, stream, verify, cert, json)
540 }
541 send_kwargs.update(settings)
--> 542 resp = self.send(prep, **send_kwargs)
543
544 return resp
E:\Anaconda3\lib\site-packages\requests\sessions.py in send(self, request, **kwargs)
653
654 # Send the request
--> 655 r = adapter.send(request, **kwargs)
656
657 # Total elapsed time of the request (approximately)
E:\Anaconda3\lib\site-packages\requests\adapters.py in send(self, request, stream, timeout, verify, cert, proxies)
502 # TODO: Remove this in 3.0.0: see #2811
503 if not isinstance(e.reason, NewConnectionError):
--> 504 raise ConnectTimeout(e, request=request)
505
506 if isinstance(e.reason, ResponseError):
ConnectTimeout: HTTPConnectionPool(host='vip.stock.finance.sina.com.cn', port=80): Max retries exceeded with url: /quotes_service/api/json_v2.php/Market_Center.getHQNodeData?page=+1&num=40&sort=symbol&asc=1&node=chgn_730454&symbol=&_s_r_a=+init (Caused by ConnectTimeoutError(<urllib3.connection.HTTPConnection object at 0x0000000004E7BF70>, 'Connection to vip.stock.finance.sina.com.cn timed out. (connect timeout=3)'))
1.降低进程并发数
2.延长超时时间
3.增加延时间隔
4.求解决问题贴代码,不是贴错误信息。 该说的楼上都说了,可能是你请求太快被拉黑之类的。
timeout: timed out wkfy 发表于 2021-10-24 12:05
1.降低进程并发数
2.延长超时时间
3.增加延时间隔
代码 如下,我是在Jupyter Notebook 网页上测试,点几下运行这个代码 ,就有几率出现这种
import requests
import json
import random
def get_user_agent_pc():
user_agent_pc = [
# 谷歌
'Mozilla/5.0.html (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.html.2171.71 Safari/537.36',
'Mozilla/5.0.html (X11; Linux x86_64) AppleWebKit/537.11 (KHTML, like Gecko) Chrome/23.0.html.1271.64 Safari/537.11',
'Mozilla/5.0.html (Windows; U; Windows NT 6.1; en-US) AppleWebKit/534.16 (KHTML, like Gecko) Chrome/10.0.html.648.133 Safari/534.16',
'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/94.0.4606.81 Safari/537.36',
]
return random.choice(user_agent_pc)
hy_id="chgn_730454"
# 各行业成分股页面请求参数
params={
"node": {hy_id},
}
gg_json=requests.get(url=url,params=params,headers=headers,timeout=(3,5))
gg_data=gg_json.json()
for dict in gg_data:
print(dict['name'])
gg_json.close() 连接超时,换个请求头试试 茫茫狐 发表于 2021-10-24 13:28
连接超时,换个请求头试试
headers={
"Host": "vip.stock.finance.sina.com.cn",
"user-agent": get_user_agent_pc(),
"Referer": "http://vip.stock.finance.sina.com.cn/mkt/",
"Content-type": "application/x-www-form-urlencoded"
}
都 换过了 爬的新浪股票啊,打小就看你刑,你这问题加下超时就行了还有try IP多弄些代{过}{滤}理啊 molinchz 发表于 2021-10-24 12:30
代码 如下,我是在Jupyter Notebook 网页上测试,点几下运行这个代码 ,就有几率出现这种
timeout加大点试试 然后加个代{过}{滤}理池跑 谢谢各位大佬
页:
[1]