Issue
I'm trying to scrape a page so I can see what is in stock and what is not.
When something is in stock this element appears: class_='cta primary'
But when something is out of stock this element disappears and instead shows: class_='cta nostock'
Can someone show me what to add to get both in stock and out of stock?
Many thanks
#--Web scraping packages
from bs4 import BeautifulSoup
import requests
#Pandas/numpy for data manipulation
import pandas as pd
import numpy as np
final_list=[]
url = 'https://www.hillsidehanddryers.com/dryers/?page={}'
#for pg in range(1, 3):
pg = url #+ '?page=' + str(pg)
r=requests.get(pg)
soup = BeautifulSoup(r.text, 'lxml')
for paragraph in soup.find_all('div',class_='innermargin'): # <section class='review_content'>
try:
title=paragraph.find('h4').text.strip()
price=paragraph.find('p',class_='price').text.strip()
outstock=paragraph.find('a',class_='cta nostock').text.strip()
instock=paragraph.find('a', class_='cta primary').text.strip()
final_list.append([title,price,outstock])
except AttributeError:
pass
df = pd.DataFrame(final_list,columns=['Title','price','outstock'])
df.head(100)
Solution
Just check if class
of cta nostock
exists and set value to Out Of Stock else set value In Stock:
stock = 'Out Of Stock' if item.find('a',class_='cta nostock') else 'In Stock'
Example
import requests
import pandas as pd
from bs4 import BeautifulSoup
headers ={
'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.114 Safari/537.36'
}
r =requests.get('https://www.hillsidehanddryers.com/dryers/?page={}')
soup=BeautifulSoup(r.content, 'lxml')
data = []
for item in soup.select('div.part'):
title=item.find('h4').text.strip()
price=item.find('p',class_='price').text.strip()
stock = 'Out Of Stock' if item.find('a',class_='cta nostock') else 'In Stock'
data.append({
'title':title,
'price':price,
'stock':stock
})
df = pd.DataFrame(data)
df.head(100)
Answered By - HedgeHog
0 comments:
Post a Comment
Note: Only a member of this blog may post a comment.