akon007 发表于 2020-3-23 17:29

python 爬取网站单选题-问题已解决

本帖最后由 akon007 于 2020-3-23 17:36 编辑

from bs4 import BeautifulSoup
import requests

hds = {
      'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.149 Safari/537.36'
   }
'''

url = 'https://xxx/xxxx/detail?id=3996'
r = requests.get(url=url,headers=hds)
sources = r.content
soup = BeautifulSoup(sources,'lxml')
# print(soup.find('div',class_='J_SubjectList'))
soup_new = soup.findAll('a',class_= 'subject-tit')
for chapters in soup_new:
    if 'paper' in chapters.attrs['href']:
      print(chapters.text)
      print(f"http://xxxx/{chapters.attrs['href']}")
'''
url2 ='http://xxxx/view-2570724.shtml'
r2 = requests.get(url=url2,headers=hds)
sources = r2.content
soup2 = BeautifulSoup(sources,'lxml')
#print(soup2)
questions = soup2.find_all('div',attrs={'class':'q-tit'})
options = soup2.find_all('div',attrs={'class':'exam-s'})
for question in questions:
    for option in options:
      print(question.text.replace('\n',''))
      print(option.text)**** 1.   Students in Grade Nine ____________a maths exam at this time yesterday.A . takeB . are takingC . were takingD . have taken
页: [1]
查看完整版本: python 爬取网站单选题-问题已解决