[Asm] 纯文本查看 复制代码 import scrapy
from scrapy.linkextractors import LinkExtractor
from scrapy.spiders import CrawlSpider, Rule
class ZxSpider(CrawlSpider):
name = 'zx'
allowed_domains = ['zzxrjc.com']
start_urls = ['http://zzxrjc.com/']
rules = (
Rule(LinkExtractor(allow=r'(/txt/\d+/)'), callback='parse_item'),
Rule(LinkExtractor(allow=r'(/sort/\d+/)'), follow=True),
)
def parse_item(self, response):
item = {}
item['book_img'] = response.xpath('//div[@class="novel_info_main"]/img/@src').extract_first()
item['book_name'] = response.xpath('//div[@class="novel_info_main"]/img/@alt').extract_first()
item['book_detail'] = response.xpath('//div[@id="info"]/div[1]//text()').extract_first()
print(item)
return item |