本帖最后由 double07 于 2021-5-10 09:38 编辑
直入主题,上代码,请问这样错在哪里,怎样修改?[Python] 纯文本查看 复制代码 import chardet
import requests
from bs4 import BeautifulSoup
import re
response = requests.get("https://itemcdn.tmall.com/desc/icoss37757230056cf740963d9fe136?var=desc")
encodingInfo = chardet.detect(response.content)
r_response = response.content.decode(encodingInfo['encoding'], 'ignore')
a = response.text.strip('var desc=')
soup = BeautifulSoup(a, features="lxml")
lst = []
for l in soup.find_all('span'):
if l.text != '\xa0':
lst.append(l.text)
concat="".join(lst) #合并列表字符串
try:
use_righ = re.findall(r'出让', concat)[0] # 土地类型
except Exception as e:
print(str(e), "执行B计划,取划拨")
use_righ = re.findall(r'划拨', concat)[0] # 土地类型
except Exception as e:
print(str(e), "执行C计划,取集体")
use_righ = re.findall(r'集体', concat)[0] # 土地类型
except Exception as e:
print(str(e), "执行D计划,取转让")
use_righ = re.findall(r'转让', concat)[0] # 土地类型
except Exception as e:
print(str(e), "非出让")
use_righ = ' '
print(use_righ) |