import scrapy
class QuotesSpider(scrapy.Spider):
name =
"quotes"
start_urls = [
'http://www.gmei.com/promotion_list/province_268?page=1',
]
def parse(self, response):
for quote
in response.css(
'a.commodity'):
yield {
'title': quote.css(
'p.commodity-title::text').extract(),
'yuyue':quote.css(
'div.commodity-num span').re(
r'[1-9]\d*'),
'riji':quote.css(
'span.diary-num').re(
r'[1-9]\d*'),
'xianjia':quote.css(
'h3.commodity-price').re(
r'[1-9]\d*')[
1],
'yuanjia':quote.css(
'h3.commodity-price').re(
r'[1-9]\d*')[
2],
'address':quote.css(
'p.commodity-address::text').extract(),
'url': quote.css(
'a.commodity::attr(href)').extract(),
}
next_page = response.css(
'li.next-page a::attr(href)').extract_first()
if next_page
is not None:
next_page = response.urljoin(next_page)
yield scrapy.Request(next_page, callback=self.parse)
转载请注明原文地址: https://ju.6miu.com/read-20544.html