Issue
This code is giving the items for the first page but is not going to the next page. First function calls the second function runs through the products of the first page but after finishing the loop in the first function it should call itself for next page the but its not. A help will be appreciated
from gc import callbacks
from subprocess import call
import scrapy
from scrapy.spiders import CrawlSpider, Rule
from scrapy.linkextractors import LinkExtractor
from lxml import html
class EbaySpider(scrapy.Spider):
name = 'ebay'
allowed_domains = ['ebay.co.uk']
start_urls = ['https://www.ebay.co.uk/sch/i.html?_dmd=2&_dkr=1&
iconV2Request=true&_ssn=jessicasmith2022&store_name=jesssuperstoreenterprise&
_sop=10&_oac=1&_ipg=240&_fcid=3&_pgn=1'
]
for url in start_urls:
def parse(self, response):
for link in (response.css('.s-item__info.clearfix > a::attr(href)')
.getall()):
yield response.follow(link , callback = self.productlinks)
next_page = response.xpath('//*[contains(concat( " ",
@class, " " ), concat( " ", "icon-link", " " ))]/@href')
.extract_first()
if next_page:
next_page_link = response.urljoin(next_page)
yield scrapy.Request(url=next_page_link , callback=self.parse)
Solution
You get two results for the pagination: the first is the previous page, and the second is the next page. Since you use extract_first()
you get the previous page, and it doesn't exists (because you're already in the first page).
Also your code can be improved in a lot of ways. Please read the documentation again, and maybe use an xpath cheat sheet or something.
import scrapy
class EbaySpider(scrapy.Spider):
name = 'ebay'
allowed_domains = ['ebay.co.uk']
start_urls = ['https://www.ebay.co.uk/sch/i.html?_dmd=2&_dkr=1&iconV2Request=true&_ssn=jessicasmith2022&store_name=jesssuperstoreenterprise&_sop=10&_oac=1&_ipg=240&_fcid=3&_pgn=1']
custom_settings = {'DOWNLOAD_DELAY': 0.5}
def parse(self, response):
for link in (response.css('.s-item__info.clearfix > a::attr(href)').getall()):
yield response.follow(link, callback=self.productlinks)
next_page = response.xpath('//a[contains(@class, "pagination__next")]/@href').get()
if next_page:
next_page_link = response.urljoin(next_page)
yield scrapy.Request(url=next_page_link, callback=self.parse)
def productlinks(self, response):
yield {
'ITEM_Name': response.xpath('//h1//span//text()').get(),
'TIMES_Sold': response.xpath('//div[contains(@class, "quantity")]//a/text()').get(default='Unknown'),
'ITEM_Price £': response.xpath('//span[@id="prcIsum"]/text()').get(default='').replace('£', '')
}
Answered By - SuperUser
0 comments:
Post a Comment
Note: Only a member of this blog may post a comment.