Issue
The above is the httml code im accessing, i need to gather data of "data-vehicle"Below is my code, it wont print anything, it only prints content list variable
html_soup = BeautifulSoup(response.text, 'html.parser')
content_list = html_soup.find_all('div', attrs={'class': 'vehicle vehicle-grid-item'})
print(content_list)
basic_info = []
for item in content_list:
basic_info.append(item.find_all('div', attrs={'class': 'data-vehicle'}))
print(basic_info)
it wont print basic_info variable
Solution
data-vehicle is an attribute of the div element that can be accessed with item.get("data-vehicle")
.
import json
from bs4 import BeautifulSoup
import pandas as pd
html = '''
<html>
<div class="vehicle vehicle-grid-item" data-auto5-vehicle-vin="3N1AB7AP1KY453039"
data-vehicle='{"year":"2019","make":"Nissan","model":"Sentra","trim":"SV CVT","bodyType":["Sedan"],"fuelType":"Gasoline","vin":"3N1AB7AP1KY453039","exterior_color":"Blue","drivetrain":"Front Wheel Drive","transmission":"1-Speed CVT w\/OD","zip":"76210","conditions":["Used"],"condition":"used","mileage":62818,"price":"17,999","stockNumber":"7TW77W","location":{"name":"Enterprise Car Sales Corinth"}}'/>
<div class="vehicle vehicle-grid-item" data-vehicle='{"year":"2019","make":"Ford","model":"Fiesta","bodyType":["Sedan"],"fuelType":"Gasoline","condition":"used"}'/>
</html>
'''
html_soup = BeautifulSoup(html, 'html.parser')
content_list = html_soup.find_all('div', attrs={'class': 'vehicle vehicle-grid-item'})
# print(content_list)
basic_info = []
for item in content_list:
data = item.get("data-vehicle")
if data:
# next convert the JSON string content to a dictionary
# so can create a dataframe with data-vehicle fields in each row
# location is a nested dictionary
if "location" in data:
location = data.pop("location")
for k, v in location.items():
data[f"location.{k}"] = v
basic_info.append(json.loads(data))
print(basic_info)
# convert to a Pandas dataframe
df = pd.DataFrame(basic_info)
print(df)
Output:
[{'year': '2019', 'make': 'Nissan', 'model': 'Sentra', 'trim': 'SV CVT',
'bodyType': ['Sedan'], 'fuelType': 'Gasoline', 'vin': '3N1AB7AP1KY453039',
'exterior_color': 'Blue', 'drivetrain': 'Front Wheel Drive',
'transmission': '1-Speed CVT w/OD', 'zip': '76210', 'conditions': ['Used'],
'condition': 'used', 'mileage': 62818, 'price': '17,999', 'stockNumber': '7TW77W',
'location.name': 'Enterprise Car Sales Corinth'}...]
year make model trim bodyType fuelType vin ... zip conditions condition mileage price stockNumber location.name
0 2019 Nissan Sentra SV CVT [Sedan] Gasoline 3N1AB7AP1KY453039 ... 76210 [Used] used 62818.0 17,999 7TW77W Enterprise Car Sales Corinth
1 2019 Ford Fiesta NaN [Sedan] Gasoline NaN ... NaN NaN used NaN NaN NaN NaN
[2 rows x 17 columns]
You can also create the dataframe with pd.DataFrame.from_records().
Answered By - CodeMonkey
0 comments:
Post a Comment
Note: Only a member of this blog may post a comment.