抓取失败,该怎么修改
from urllib import requestimport json
import time
from datetime import datetime
from datetime import timedelta
# 获取数据,根据url获取
def get_data(url):
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.122 Safari/537.36'
}
req = request.Request(url, headers=headers)
response = request.urlopen(req)
if response.getcode() == 200:
return response.read()
return None
if __name__ == '__main__':
html = get_data('http://www.nnedu.com/App.Search/index.php?m=Search&c=Resource&a=index&ph=03&s=01&pb=01&b=0101010101-412&from=ew)
print(html)
抓取这个网址下的就会报错,换其他网站的就可以显示正常内容
可能他反扒了。我也不清楚。你试试加个浏览器标示。。可能服务器拒绝访问吧。 302重定向? import requests
import json
import time
from datetime import datetime
from datetime import timedelta
# 获取数据,根据url获取
def get_data(url):
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.122 Safari/537.36'
}
req = requests.get(url, headers=headers)
if req.status_code== 200:
return req.text
return None
if __name__ == '__main__':
html = get_data('http://www.nnedu.com/App.Search/index.php?m=Search&c=Resource&a=index&ph=03&s=01&pb=01&b=0101010101-412&from=ew')
print(html)
用requests模块不想吗?非要用啥urllib{:301_971:}
页:
[1]