Issue
I am unable to parse the Yelp review rating.
I made a quick video of the problem. https://streamable.com/42oj4
bs = soup(page_html, "html.parser")
yelp_containers = bs.findAll("div", {"class": "lemon--div__373c0__1mboc searchResult__373c0__1yggB border-color--default__373c0__2xHhl"})
print(len(yelp_containers))
print(yelp_containers[1].text)
restaurant_title = yelp_containers[1].find("div", {"class": "lemon--div__373c0__1mboc businessName__373c0__1fTgn border-color--default__373c0__2xHhl"}).get_text()
print("restaurant_title: ", restaurant_title)
restaurant_address = yelp_containers[1].find("address", {"class": 'lemon--address__373c0__2sPac'}).get_text()
print("restaurant_address: " , restaurant_address)
restaurant_numReview = yelp_containers[1].find("span", {"class": "lemon--span__373c0__3997G text__373c0__2pB8f reviewCount__373c0__2r4xT text-color--mid__373c0__3G312 text-align--left__373c0__2pnx_"}).get_text()
print("restaurant_numReview: ", restaurant_numReview)
restaurant_starCount = yelp_containers[1].find("div", {"class": "lemon--div__373c0__1mboc i-stars__373c0__30xVZ i-stars--regular-4__373c0__2R5IO border-color--default__373c0__2oFDT overflow--hidden__373c0__8Jq2I"}).get_text()
print("restaurant_starCount: ", restaurant_starCount)
I expect to get "4 star rating", but I get nothing.
Solution
To scrape star count, you could use this code:
import re
from bs4 import BeautifulSoup as soup
try: #python3
from urllib.request import urlopen as uReq
except: #python2
from urllib2 import urlopen as uReq
my_url = 'https://www.yelp.com/search?find_desc=Restaurants&find_loc=San%20Francisco%2C%20CA'
#opening connection and grabbing the page
uClient = uReq(my_url)
page_html = uClient.read()
uClient.close()
bs = soup(page_html, "html.parser")
rows = []
for h3 in bs.select('h3'):
title = h3.get_text(strip=True)
if not re.findall(r'^\d+\.', title):
continue
address = h3.find_next(class_=re.compile(r'secondaryAttributes')).get_text(separator=' ', strip=True)
_, telephone, address = re.split(r'(\(\d+\) \d+-\d+)\s*', address)
num_reviews = re.findall(r'\d+', h3.find_next(class_=re.compile(r'reviewCount')).text)[0]
star_count = re.findall(r'\d+.?\d*', h3.find_next(lambda t: t.name=='div' and 'aria-label' in t.attrs)['aria-label'])[0]
rows.append([title, address, telephone, num_reviews, star_count])
print('{: ^40}{: ^60}{: ^20}{: ^15}{: ^10}'.format('Title', 'Address', 'Telephone', 'Num. reviews', 'Star Count'))
for row in rows:
print('{: <40}{: <60}{: ^20}{: ^15}{: ^10}'.format(*row))
Prints:
Title Address Telephone Num. reviews Star Count
1.Fog Harbor Fish House Pier 39 Fisherman's Wharf (415) 421-2442 5476 4
2.Marufuku Ramen SF Located in Japan Center Lower Pacific Heights (415) 872-9786 2345 4.5
3.The House 1230 Grant Ave North Beach/Telegraph Hill (415) 986-8612 4634 4.5
4.Zazie 941 Cole St Cole Valley (415) 564-5332 4256 4
5.Farmhouse Kitchen Thai Cuisine 710 Florida St Mission (415) 814-2920 1996 4
6.Burma Superstar 309 Clement St Inner Richmond (415) 387-2147 6605 4
7.Pier Market Seafood Restaurant Pier 39 Fisherman's Wharf (415) 989-7437 1879 4
8.Suppenküche 525 Laguna St Hayes Valley (415) 830-4716 3080 4
9.um.ma 1220 9th Ave Inner Sunset (415) 566-5777 54 4
10.Surisan 505 Beach St Fisherman's Wharf (415) 771-8449 1508 4.5
11.The New Spot On Polk 2401 Polk St Russian Hill (415) 913-7775 532 4.5
12.San Tung 1031 Irving St Inner Sunset (415) 242-0828 6596 4
13.Daeho Kalbijjim & Beef Soup 1620 Post St Japantown (415) 563-1388 330 4.5
14.Kitchen Story 3499 16th St Castro (415) 525-4905 3081 4
15.Liholiho Yacht Club 871 Sutter St Lower Nob Hill (415) 440-5446 2186 4.5
16.The Codmother Fish & Chips 496 Beach St Fisherman's Wharf (415) 606-9349 2892 4.5
17.Beretta 1199 Valencia St Mission (415) 695-1199 3495 4
18.Loló 974 Valencia St Mission (415) 643-5656 2061 4
19.Noosh 2001 Fillmore St Lower Pacific Heights (415) 231-5985 161 4
20.Hog Island Oyster Co Located in Ferry Building Marketplace Embarcadero (415) 391-7117 5718 4.5
21.Z & Y Restaurant 655 Jackson St Chinatown (415) 981-8988 1570 3.5
22.The Front Porch 65 29th St Bernal Heights (415) 695-7800 2708 4
23.Delarosa 2175 Chestnut St Marina/Cow Hollow (415) 673-7100 1446 4
24.Brenda’s French Soul Food 652 Polk St Tenderloin (415) 345-8100 10586 4
25.Dumpling Kitchen 1935 Taraval St Parkside (415) 682-8938 1419 4
26.Scoma’s Restaurant 1965 Al Scoma Way Fisherman's Wharf (415) 771-4383 3548 4
27.Causwells 2346 Chestnut St Marina/Cow Hollow (415) 447-6081 696 4
28.Mr Szechuan 890 Taraval St Parkside (415) 753-8788 128 4.5
29.Dumpling Time 11 Division St (415) 525-4797 1494 4
30.Tuba Authentic Turkish Restaurant 1007 Guerrero St Mission (415) 826-8822 983 4.5
Answered By - Andrej Kesely
0 comments:
Post a Comment
Note: Only a member of this blog may post a comment.