Issue
I want to scrape some categories in https://www.johnlewis.com/. I am able to fetch the url of the categories but I can't scrape data from the website
# -*- coding: utf-8 -*-
import scrapy
from ..items import ExperimentItem
class ExperimenSpider(scrapy.Spider):
name = 'experimen'
allowed_domains = ['www.johnlewis.com']
start_urls = [
# firsttest
'https://www.johnlewis.com/browse/electricals/ipad-tablets/view-all-tablets/android/_/N-5nlvZ1z0ww04',
'https://www.johnlewis.com/browse/electricals/ipad-tablets/view-all-tablets/windows-10/_/N-5nlvZ1z0i0qv',
'https://www.johnlewis.com/browse/electricals/ipad-tablets/view-all-tablets/fire-os/_/N-5nlvZ1z0w8jv'
]
def parse(self, response):
global productCategory
global productSubCategory
products = response.css('.product-list-heading::text').extract_first()
currentUrl = response.request.url
items = ExperimentItem()
androidTablets = 'https://www.johnlewis.com/browse/electricals/ipad-tablets/view-all-tablets/android/_/N-5nlvZ1z0ww04',
windowTablets = 'https://www.johnlewis.com/browse/electricals/ipad-tablets/view-all-tablets/windows-10/_/N-5nlvZ1z0i0qv',
fireOs = 'https://www.johnlewis.com/browse/electricals/ipad-tablets/view-all-tablets/fire-os/_/N-5nlvZ1z0w8jv'
for product in products:
if androidTablets in currentUrl:
productCategory = 'Electric'
productCategory = 'tablets'
elif windowTablets in currentUrl:
productCategory = 'Electric'
productCategory = 'tablets'
elif fireOs in currentUrl:
productCategory = 'Electric'
productCategory = 'tablets'
productCountry = 'uk'
productSeller = 'John Lewis'
productLink = 'https://www.johnlewis.com' + response.css('.product-card__wrap-link::attr(href)').extract_first()
productTitle = response.css('.product-card__title-inner::text').extract_first()
productImage = response.css('.product-card__image::attr(src)').extract_first()
productStandardPrice = response.css('.product-card__price-span::text').extract_first()
productSalePrice = response.css('.product-card__price-span--discounted::text').extract_first()
items['productCategory'] = productCategory
items['productSubCategory'] = productSubCategory
items['productCountry'] = productCountry
items['productSeller'] = productSeller
items['productLink'] = productLink
items['productTitle'] = productTitle
items['productImage'] = productImage
items['productStandardPrice'] = productStandardPrice
items['productSalePrice'] = productSalePrice
yield items
The error I get:
2019-10-11 14:50:37 [scrapy.core.engine] DEBUG: Crawled (200) <GET https://www.johnlewis.com/browse/electricals/ipad-tablets/view-all-tablets/fire-os/_/N-5nlvZ1z0w8jv> (referer: None)
2019-10-11 14:50:37 [scrapy.core.engine] DEBUG: Crawled (200) <GET https://www.johnlewis.com/browse/electricals/ipad-tablets/view-all-tablets/android/_/N-5nlvZ1z0ww04> (referer: None)
2019-10-11 14:50:37 [scrapy.core.engine] DEBUG: Crawled (200) <GET https://www.johnlewis.com/browse/electricals/ipad-tablets/view-all-tablets/windows-10/_/N-5nlvZ1z0i0qv> (referer: None)
2019-10-11 14:50:37 [scrapy.core.scraper] ERROR: Spider error processing <GET https://www.johnlewis.com/browse/electricals/ipad-tablets/view-all-tablets/fire-os/_/N-5nlvZ1z0w8jv> (referer: None)
Traceback (most recent call last):
File "c:\users\tobi fafowora\anaconda3\lib\site-packages\scrapy\utils\defer.py", line 102, in iter_errback
yield next(it)
File "c:\users\tobi fafowora\anaconda3\lib\site-packages\scrapy\core\spidermw.py", line 84, in evaluate_iterable
for r in iterable:
File "c:\users\tobi fafowora\anaconda3\lib\site-packages\scrapy\spidermiddlewares\offsite.py", line 29, in process_spider_output
for x in result:
File "c:\users\tobi fafowora\anaconda3\lib\site-packages\scrapy\core\spidermw.py", line 84, in evaluate_iterable
for r in iterable:
File "c:\users\tobi fafowora\anaconda3\lib\site-packages\scrapy\spidermiddlewares\referer.py", line 339, in <genexpr>
return (_set_referer(r) for r in result or ())
File "c:\users\tobi fafowora\anaconda3\lib\site-packages\scrapy\core\spidermw.py", line 84, in evaluate_iterable
for r in iterable:
File "c:\users\tobi fafowora\anaconda3\lib\site-packages\scrapy\spidermiddlewares\urllength.py", line 37, in <genexpr>
return (r for r in result or () if _filter(r))
File "c:\users\tobi fafowora\anaconda3\lib\site-packages\scrapy\core\spidermw.py", line 84, in evaluate_iterable
for r in iterable:
File "c:\users\tobi fafowora\anaconda3\lib\site-packages\scrapy\spidermiddlewares\depth.py", line 58, in <genexpr>
return (r for r in result or () if _filter(r))
File "C:\Users\Tobi Fafowora\Desktop\scrapy\experiment\experiment\spiders\experimen.py", line 29, in parse
if androidTablets in currentUrl:
TypeError: 'in <string>' requires string as left operand, not tuple
2019-10-11 14:50:37 [scrapy.core.scraper] ERROR: Spider error processing <GET https://www.johnlewis.com/browse/electricals/ipad-tablets/view-all-tablets/android/_/N-5nlvZ1z0ww04> (referer: None)
Traceback (most recent call last):
File "c:\users\tobi fafowora\anaconda3\lib\site-packages\scrapy\utils\defer.py", line 102, in iter_errback
yield next(it)
File "c:\users\tobi fafowora\anaconda3\lib\site-packages\scrapy\core\spidermw.py", line 84, in evaluate_iterable
for r in iterable:
File "c:\users\tobi fafowora\anaconda3\lib\site-packages\scrapy\spidermiddlewares\offsite.py", line 29, in process_spider_output
for x in result:
File "c:\users\tobi fafowora\anaconda3\lib\site-packages\scrapy\core\spidermw.py", line 84, in evaluate_iterable
for r in iterable:
File "c:\users\tobi fafowora\anaconda3\lib\site-packages\scrapy\spidermiddlewares\referer.py", line 339, in <genexpr>
return (_set_referer(r) for r in result or ())
File "c:\users\tobi fafowora\anaconda3\lib\site-packages\scrapy\core\spidermw.py", line 84, in evaluate_iterable
for r in iterable:
File "c:\users\tobi fafowora\anaconda3\lib\site-packages\scrapy\spidermiddlewares\urllength.py", line 37, in <genexpr>
return (r for r in result or () if _filter(r))
File "c:\users\tobi fafowora\anaconda3\lib\site-packages\scrapy\core\spidermw.py", line 84, in evaluate_iterable
for r in iterable:
File "c:\users\tobi fafowora\anaconda3\lib\site-packages\scrapy\spidermiddlewares\depth.py", line 58, in <genexpr>
return (r for r in result or () if _filter(r))
File "C:\Users\Tobi Fafowora\Desktop\scrapy\experiment\experiment\spiders\experimen.py", line 29, in parse
if androidTablets in currentUrl:
TypeError: 'in <string>' requires string as left operand, not tuple
2019-10-11 14:50:37 [scrapy.core.scraper] ERROR: Spider error processing <GET https://www.johnlewis.com/browse/electricals/ipad-tablets/view-all-tablets/windows-10/_/N-5nlvZ1z0i0qv> (referer: None)
Traceback (most recent call last):
File "c:\users\tobi fafowora\anaconda3\lib\site-packages\scrapy\utils\defer.py", line 102, in iter_errback
yield next(it)
File "c:\users\tobi fafowora\anaconda3\lib\site-packages\scrapy\core\spidermw.py", line 84, in evaluate_iterable
for r in iterable:
File "c:\users\tobi fafowora\anaconda3\lib\site-packages\scrapy\spidermiddlewares\offsite.py", line 29, in process_spider_output
for x in result:
File "c:\users\tobi fafowora\anaconda3\lib\site-packages\scrapy\core\spidermw.py", line 84, in evaluate_iterable
for r in iterable:
File "c:\users\tobi fafowora\anaconda3\lib\site-packages\scrapy\spidermiddlewares\referer.py", line 339, in <genexpr>
return (_set_referer(r) for r in result or ())
File "c:\users\tobi fafowora\anaconda3\lib\site-packages\scrapy\core\spidermw.py", line 84, in evaluate_iterable
for r in iterable:
File "c:\users\tobi fafowora\anaconda3\lib\site-packages\scrapy\spidermiddlewares\urllength.py", line 37, in <genexpr>
return (r for r in result or () if _filter(r))
File "c:\users\tobi fafowora\anaconda3\lib\site-packages\scrapy\core\spidermw.py", line 84, in evaluate_iterable
for r in iterable:
File "c:\users\tobi fafowora\anaconda3\lib\site-packages\scrapy\spidermiddlewares\depth.py", line 58, in <genexpr>
return (r for r in result or () if _filter(r))
File "C:\Users\Tobi Fafowora\Desktop\scrapy\experiment\experiment\spiders\experimen.py", line 29, in parse
if androidTablets in currentUrl:`
TypeError: 'in <string>' requires string as left operand, not tuple
2019-10-11 14:50:37 [scrapy.core.engine] INFO: Closing spider (finished)
Solution
It's a simple solution, you have commas at the end of these lines:
androidTablets = 'https://www.johnlewis.com/browse/electricals/ipad-tablets/view-all-tablets/android/_/N-5nlvZ1z0ww04',
windowTablets = 'https://www.johnlewis.com/browse/electricals/ipad-tablets/view-all-tablets/windows-10/_/N-5nlvZ1z0i0qv',
fireOs = 'https://www.johnlewis.com/browse/electricals/ipad-tablets/view-all-tablets/fire-os/_/N-5nlvZ1z0w8jv'
this makes them into tuples
instead of strings. Your error is telling you that you are asking if a tuple
in in a string
. Remove those two commas and that problem will be solved.
Answered By - tomjn
0 comments:
Post a Comment
Note: Only a member of this blog may post a comment.