#得到指定的一个URL的网页内容
def askURL(url):
head = { #模拟浏览器头部信息,向服务器发送消息
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/99.0.4844.74 Safari/537.36 Edg/99.0.1150.46"
}
#用户代{过}{滤}理,表示告诉服务器,我们是什么类型的机器、浏览器(本质上是告诉浏览器我们可以接收什么水平的文件内容。
request = urllib.request.Request(url,headers=head)
html = ""
try:
response = urllib.request.urlopen(request)
html = response.read().decode("utf-8")
# print(html)
except urllib.error.URLError as e:
if hasattr(e,"code"):
print(e.code)
if hasattr(e,"reason"):
print(e.reason)
return html
#保存数据
def saveData(datalist,savepath):
print("save.....")
book = xlwt.Workbook(encoding="utf-8",style_compression=0) # 创建workbook对象
sheet = book.add_sheet('工程图集',cell_overwrite_ok=True) # 创建工作表
col = ("详情连接","图集名称")
for i in range(0,2):
sheet.write(0,i,col) #写列名
for i in range(0,2):
print("第%d"%(i+1)) #print(f'第{i}条')
data = datalist
for j in range(0,2):
sheet.write(i+1,j,data[j])
book.save(savepath) #保存
if __name__ == "__main__": #当程序执行时
#调用程序
main()
print("爬取完毕!")