Issue
I am trying to get items from website which has no next page href only numbers.I cannot get all pages, only one page.How can I modify my code and why I cannot get all items with my code? Can anyone explain?
import scrapy
from scrapy_splash import SplashRequest
class Hepsips4Spider(scrapy.Spider):
#sayfaları otomatikleştir geçişi yapabilirsin
name = 'hepsips4'
allowed_domains = ['www.hepsiburada.com']
script = '''
function main(splash, args)
assert(splash:go(args.url))
assert(splash:wait(2))
return {
html = splash:html(),
}
end
'''
def start_requests(self):
yield SplashRequest(url="https://www.hepsiburada.com/playstation-4-oyunlari-c-60003893?siralama=enyeni&sayfa=1",callback=self.parse,endpoint="execute",args={'lua_source': self.script})
def parse(self, response):
for i in range(1,25):
yield SplashRequest(url=f"https://www.hepsiburada.com/playstation-4-oyunlari-c-60003893?siralama=enyeni&sayfa={i}",callback=self.parse_item,endpoint="execute",args={'lua_source': self.script})
def parse_item(self,response):
for row in response.xpath("//ul[@class='product-list results-container do-flex list']/li/div"):
name =row.xpath(".//a/div/h3/@title").get()
img =row.xpath(".//a/figure/div/img/@data-src").get()
company="hepsiburada"
link =row.xpath(".//a/@href").get()
fulllink=f"https://www.hepsiburada.com{link}"
price =row.xpath(".//a/div/div[2]/span/text()").get()
#sepet indirimini düzeltmeye çalış
platform='ps4'
yield{
'name': name,
'image': img,
'company': company,
'full_link': fulllink,
'price': price,
'platform': platform
}
Solution
script = '''
function main(splash, args)
assert(splash:go(args.url))
assert(splash:wait(2))
return {
html = splash:html(),
}
end
'''
Making waiting duration 2 fixed my problem.
Answered By - cvsrt
0 comments:
Post a Comment
Note: Only a member of this blog may post a comment.