好友
阅读权限10
听众
最后登录1970-1-1
|
执_念
发表于 2023-7-24 08:51
# python 爬虫爬取歪歪会员CDK
import requests
# 获取网页源代码
def getHTMLText(url):
try:
r = requests.get(url, timeout=30)
r.raise_for_status() # 如果状态不是200,引发HTTPError异常
r.encoding = r.apparent_encoding
return r.text
except:
return "产生异常"
# 解析网页源代码
def parsePage(ilt, html):
try:
plt = "{:4}\t{:8}\t{:16}"
print(plt.format("序号", "会员等级", "CDK"))
count = 0
for i in range(1, 100):
count = count + 1
start = html.find('<div class="c-abstract">')
end = html.find('</div>', start)
if start == -1:
break
info = html[start + 24:end]
html = html[end + 6:]
print(plt.format(count, info[0:8], info[8:]))
ilt.append([info[0:8], info[8:]])
except:
print("")
# 打印解析后的数据
def printGoodsList(ilt):
plt = "{:4}\t{:8}\t{:16}"
print(plt.format("序号", "会员等级", "CDK"))
count = 0
for g in ilt:
count = count + 1
print(plt.format(count, g[0], g[1]))
# 主函数
def main():
goods = "歪歪会员CDK"
depth = 2
start_url = "https://www.baidu.com/s?wd=" + goods
infoList = []
for i in range(depth):
try:
url = start_url + "&pn=" + str(50 * i)
html = getHTMLText(url)
parsePage(infoList, html)
except:
continue
printGoodsList(infoList)
main()
|
免费评分
-
查看全部评分
|
发帖前要善用【论坛搜索】功能,那里可能会有你要找的答案或者已经有人发布过相同内容了,请勿重复发帖。 |
|
|
|
|