刚学python,写了一个随着网页链接乱跳的爬虫,效果不是很理想老哥们看看哪里能改进。学习学习
打开输入网址就行了http://xxxx网址打全
[Python] 纯文本查看 复制代码 import requests
import re
import time
from bs4 import BeautifulSoup
import lxml
from random import randint,choice
global url_list
url_list = []
headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/88.0.4324.96 Safari/537.36 Edg/88.0.705.50'}
#uu = input(':-)')
def get_link(url):
global url_list
try:
response = requests.get(url,headers = headers)
bs0bj = BeautifulSoup(response.text,'lxml')
reg = re.findall(r'(www..*?.com)',response.text)
#print(reg)
url1 = 'http://' + reg[randint(0,5)]
#print(url1)
if url1 not in url_list:
url_list.append(url1)
print(bs0bj.title.get_text(),'\n')
print(bs0bj.h1.get_text(),'\n')
print(bs0bj.h2.get_text(),'\n')
print(bs0bj.p.get_text(),'\n')
#time.sleep(1)
print('*'*80)
get_link(url1)
except:
get_link(choice(url_list))
get_link(input('输入url:')) |