Python用Xpath获取title无法显示
本帖最后由 qqilin1213 于 2020-11-15 10:09 编辑import requests
from lxml import etree
import os
import re
# //div[@Class = 'list clearfix']//h3
url = 'https://www.dpm.org.cn/lights/royal.html'
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) '
'Chrome/86.0.4240.193 '
'Safari/537.36 '
}
response_data = requests.get(url, headers=headers).text
html = etree.HTML(response_data)
images_coup = html.xpath("//div[@class = 'pic']//@href")
image_title = html.xpath("//div//img/@title/text()")
print(image_title)
for i in images_coup:
url = i
image_url = "https://www.dpm.org.cn" + url
# print(image_url)
response_images = requests.get(image_url, headers=headers).text
# print(response_images)
html1 = etree.HTML(response_images)
image_data = html1.xpath("//img[@style ='visibility: visible;width: 100%;']/@src")
# print(image_data)
image_url = requests.get(image_data, headers=headers)
# print(image_url)
save = './壁纸/'
address_save = str(save)
# 判断文件夹是否存在,然后自己创建
count = 1
if not os.path.exists(address_save):
os.makedirs('./壁纸/')
else:
with open(address_save + '/.png', 'wb') as f:
f.write(image_url.content) http://imgs.bizha.top/bizha/20201114/lDIsDb34fKSN.png?imageslim 有的响应里的和浏览器中的不一样,以响应中的为准 import requests
from lxml import etree
url = 'https://www.dpm.org.cn/lights/royal.html'
xpath = '//*[@id="lights"]/div/div/h3'
headers = {'User-Agent': 'User-Agent: Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.108 Safari/537.36'}
response_data = requests.get(url, headers=headers)
html = etree.HTML(response_data.content.decode('utf-8','ignore'))
image_title = html.xpath(xpath)
In:image_title.text
Out: '清雍正 孔雀绿釉菊瓣式撇口尊' //div//img/@title
改成这样就行 //div/a/img/@titlep/text这样是完整的
//div/h3/text 这样也能取到
本帖最后由 wanwfy 于 2020-11-15 02:45 编辑
image_titles = html.xpath("//div[@class='pic']/following-sibling::h3/text()")
image_titles = html.xpath("//div/h3/text()")
image_titles = html.xpath("//div[@class='pic']/a/img/@title")
image_titles = html.xpath("//a/img/@title")
image_titles = html.xpath("//a/img[@title]/@title")
楼主,@title 是获取元素属性,后面不需要加/text() yjn866y 发表于 2020-11-14 22:49
//div/a/img/@titlep/text这样是完整的
测试都没有测试都回复,
//div/a/img/@title
@TITLE 是获取属性的,后面还加个毛线text(),多此一举. wanwfy 发表于 2020-11-15 02:43
测试都没有测试都回复,
@TITLE 是获取属性 ...
批评的对
页:
[1]