Issue
I have written the following code and it works most times. However, i get a random error
error "AttributeError: 'NoneType' object has no attribute 'find_all'"
sometimes on consecutive runs.
It mostly indicates line 13 find_all for products
.
What could be happening here and how to rectify it?
import requests
import bs4
import pandas as pd
url = 'https://www.hollandandbarrett.com/search/?query=prebiotic&page=1'
resp = requests.get(url)
html = bs4.BeautifulSoup(resp.content, 'html.parser')
row = {}
catalog = html.find('div', attrs = {'class':'ProductListContainer-module__list___yHwue','data-test':True}) #'list-Products'
if catalog is not None:
products = catalog.find_all('a', attrs = {'class':'ProductCard-module__link___FyAjR','data-test':True}) #'product-card'
data = []
for product in products:
if product is not None:
t = product.find('div', attrs =
{
'class':'ProductCard-module__title___dPGk8 Typography-module_base__h-bPx Typography-module_brandon__Es-DX Typography-module_bold__NNV5t',
'data-test':'product-card-title'
})
title = t.text.strip() if t else None
r = product.find('div', attrs =
{
'class':'RatingStars-module_star__3j5m8'
})
rating = r['title'].strip() if r else None
rr = product.find('div', attrs =
{
'class':'RatingStars-module_reviewCount__H8VBI Typography-module_base__h-bPx Typography-module_helvetica__-8F7V'
})
rating_review = rr.text.strip().replace('(','').replace(')','') if rr else None
#('product-card-price' or 'product-card-final-price') #'ProductCard-module__price___Sbmvg Typography-module_base__h-bPx Typography-module_helvetica__-8F7V Typography-module_bold__NNV5t'
p = product.find('div', attrs = {'class': 'ProductCard-module__priceBlock___5GV3W'}).findChild(attrs = {'data-test':True})
price = p.text.strip() if p else None
sp = product.find('div', attrs = {'class': 'ProductCard-module__priceBlock___5GV3W'}).findChild(attrs = {'data-test':'product-card-sale-price'})
sale_price = sp.text.strip() if sp else None
pu = product.find('div', attrs =
{
'class':'ProductCard-module__pricePerUnit___Lewj6 Typography-module_base__h-bPx Typography-module_helvetica__-8F7V',
'data-test':'price-per-unit'
})
price_per_unit = pu.text.strip() if pu else None
row = {
'title':title,
'rating':rating,
'rating_review':rating_review,
'price':price,
'sale_price':sale_price,
'price_per_unit':price_per_unit
}
data.append(row)
df = pd.DataFrame.from_dict(data, orient='columns')
# print(df)
df.to_csv('hollandandbarrett_products.csv',encoding='utf-8',index=True, header=True )
else:
print('Catalog not found!')
Solution
the error was in if statement condition is not
...
catalog = html.find('div', attrs = {'class':'ProductListContainer-module__list___yHwue','data-test':True}) #'list-Products'
if catalog is not None:
products = catalog.find_all('a', attrs = {'class':'ProductCard-module__link___FyAjR','data-test':True}) #'product-card'
The “is not” operator compares if the objects are pointing to the same memory location or not. and It returns true if the objects are not pointing to the same memory location otherwise it returns false.
The != operator compares only the value of the objects being compared.and It returns True if the value of both the objects are different and False otherwise.
hence show the error instead DO this.
catalog = html.find('div', attrs = {'class':'ProductListContainer-module__list___yHwue','data-test':True}) #'list-Products'
if catalog != None:
products = catalog.find_all('a', attrs = {'class':'ProductCard-module__link___FyAjR','data-test':True}) #'product-card'
i hope this will work and help you..
Answered By - AbhaySakariya
0 comments:
Post a Comment
Note: Only a member of this blog may post a comment.