Issue
when I run this code, the spider only crawls 3 pages and stop. It doesn't go to the next page.
I tried in different ways to change and change, but I can't move to the third page with.
# -*- coding: utf-8 -*-
import scrapy
from realstatedata.items import RealstatedataItem
class RsdataSpider(scrapy.Spider):
name = 'rsdata'
allowed_domains = ['vivareal.com.br']
start_urls = ['https://www.vivareal.com.br/aluguel/sp/sao-jose-dos-campos/apartamento_residencial/#preco-ate=2000']
def parse(self, response):
nextpageurl = response.xpath('//a[@title="Próxima página"]/@href')
yield from self.scrape(response)
if nextpageurl:
path = nextpageurl.extract_first()
nextpage = response.urljoin(path)
print("Found url: {}".format(nextpage))
yield scrapy.Request(nextpage, callback=self.parse)
def scrape(self, response):
for resource in response.xpath('//article[@class="property-card__container js-property-card"]/..'):
item = RealstatedataItem()
item['description'] = resource.xpath('.//h2/span[@class="property-card__title js-cardLink js-card-title"]/text()').extract_first()
#item['address'] = resource.xpath('.//span[@class="property-card__address"]/text()').extract_first()
#item['prop_area'] = resource.xpath('.//span[@class="property-card__detail-value js-property-card-value property-card__detail-area js-property-card-detail-area"]/text()').extract_first()
#item['prop_rooms'] = resource.xpath('.//span[@class="property-card__detail-value js-property-card-value"]/text()').extract_first()
#item['prop_bath'] = resource.xpath('.//span[@class="property-card__detail-value js-property-card-value"]/text()').extract_first()
#item['prop_parking'] = resource.xpath('.//ul/li[4]/span[@class="property-card__detail-value js-property-card-value"]/text()').extract_first()
#item['price_rent'] = resource.xpath('.//p[@style="display: block;"]/text()').extract_first()
#item['price_cond'] = resource.xpath('.//strong[@class="js-condo-rice"]/text()').extract_first()
#item['realstate_name'] = resource.xpath('.//picture/img/@alt').extract_first()
yield item
Solution
Replace '#' with '?' in 'path' (notice that the "next page" button doesn't work):
import scrapy
from realstatedata.items import RealstatedataItem
class RsdataSpider(scrapy.Spider):
name = 'rsdata'
allowed_domains = ['vivareal.com.br']
start_urls = ['https://www.vivareal.com.br/aluguel/sp/sao-jose-dos-campos/apartamento_residencial/#preco-ate=2000']
def parse(self, response):
nextpageurl = response.xpath('//a[contains(@title,"Próxima página")]/@href')
yield from self.scrape(response)
if nextpageurl:
path = nextpageurl.extract_first()
# Got #pagina=2 => Replace with ?pagina=2
path = '?' + path[1:]
nextpage = response.urljoin(path)
print("Found url: {}".format(nextpage))
yield scrapy.Request(nextpage)
def scrape(self, response):
for resource in response.xpath('//article[@class="property-card__container js-property-card"]/..'):
item = RealstatedataItem()
item['description'] = resource.xpath('.//h2/span[@class="property-card__title js-cardLink js-card-title"]/text()').extract_first()
#item['address'] = resource.xpath('.//span[@class="property-card__address"]/text()').extract_first()
#item['prop_area'] = resource.xpath('.//span[@class="property-card__detail-value js-property-card-value property-card__detail-area js-property-card-detail-area"]/text()').extract_first()
#item['prop_rooms'] = resource.xpath('.//span[@class="property-card__detail-value js-property-card-value"]/text()').extract_first()
#item['prop_bath'] = resource.xpath('.//span[@class="property-card__detail-value js-property-card-value"]/text()').extract_first()
#item['prop_parking'] = resource.xpath('.//ul/li[4]/span[@class="property-card__detail-value js-property-card-value"]/text()').extract_first()
#item['price_rent'] = resource.xpath('.//p[@style="display: block;"]/text()').extract_first()
#item['price_cond'] = resource.xpath('.//strong[@class="js-condo-rice"]/text()').extract_first()
#item['realstate_name'] = resource.xpath('.//picture/img/@alt').extract_first()
yield item
Part of output:
{'description': ' Apartamento com 2 Quartos para Aluguel, 82m² '}
{'description': ' Apartamento com 4 Quartos à Venda/Aluguel 280m² '}
{'description': ' Apartamento com 2 Quartos para Aluguel, 70m² '}
{'description': ' Apartamento com 3 Quartos para Aluguel, 113m² '}
{'description': ' Apartamento com 2 Quartos para Venda/Aluguel 50m² '}
{'description': ' Apartamento com 2 Quartos para Venda/Aluguel 50m² '}
Found url: https://www.vivareal.com.br/aluguel/sp/sao-jose-dos-campos/apartamento_residencial/?pagina=27
Answered By - SuperUser
0 comments:
Post a Comment
Note: Only a member of this blog may post a comment.