啊,排版是乱的,再发一次
[Python] 纯文本查看 复制代码 from urllib.parse import urlencode
import requests
from lxml import etree
import re
import time
from selenium import webdriver
for uuu in range(1,11):
if uuu!=10:
uuu='0'+str(uuu)
url = "http://www.henanjianan.com/tiku/dianhan/"+str(uuu)+".html"
ua = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/42.0.2311.135 Safari/537.36 Edge/12.10240"
#点击模拟
driver = webdriver.Edge()
driver.get('http://www.henanjianan.com/tiku/dianhan/10.html') #打开网页
# driver.maximize_window() #最大化窗口
time.sleep(10) #加载等待
driver.find_element_by_xpath("/html/body/form/div[5]/input").click()#点击按钮
time.sleep(3)
alertObject = driver.switch_to.alert
print(alertObject.text) # 打印提示信息
time.sleep(1)
alertObject.accept() # 点击确定按钮
r = requests.get(url)
#r=
r.encoding="utf-8"
#获取网页的html,爬取文本
content = r.text #获取HTML的内容
print(content)
html = etree.HTML(content) #分析HTML,返回DOM根节点
print(html)
#path = //div[@class='billboard-bd']//td//a/text()
for i in range(2,32):
#选择题题目
orders = html.xpath("/html/body/form/div[2]/div["+str(i)+"]/div[1]/text()")[0]
with open("D:/test2.txt","a+") as f:
f.write(orders) # 自带文件关闭功能,不需要再写f.close()
|