Issue
Currently learning Python so apologies for the stupid question but for some reason I can't seem to extract the £13.50 value from the following website:
"https://www.asos.com/the-north-face/the-north-face-norm-cotton-cap-in-black/prd/204512287?clr=black&colourWayId=204512317&cid=50056"
The below is the code I tried but it returns none:
import requests
import bs4
headers = {
"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/93.0.4577.63 Safari/537.36"
}
url = "https://www.asos.com/the-north-face/the-north-face-norm-cotton-cap-in-black/prd/204512287?clr=black&colourWayId=204512317&cid=50056"
page = requests.get(url, headers = headers)
soup = bs4.BeautifulSoup(page.text, "lxml")
current_price = soup.find('div', class_='a0j7k')
print(current_price)
I can see the tag exists when I inspect the website:
I can also grab one of the above tags no problem but the price always returns none.
Please can someone point out what I am doing wrong?
Solution
The price data you see is inside <script>
element, so beautifulsoup
doesn't see it. To parse it use re
/json
:
import json
import re
import requests
url = "https://www.asos.com/the-north-face/the-north-face-norm-cotton-cap-in-black/prd/204512287?clr=black&colourWayId=204512317&cid=50056"
headers = {
"User-Agent": "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:109.0) Gecko/20100101 Firefox/118.0"
}
html_text = requests.get(url, headers=headers).text
data = re.search(r"stockPriceResponse = '(.*?)';", html_text).group(1)
data = json.loads(data)
# uncomment this to print all data
# print(data)
price = data[0]["productPrice"]["current"]["text"]
print(price)
Prints:
£13.50
The data
contains:
[
{
"productId": 204512287,
"productPrice": {
"current": {
"value": 13.5,
"text": "£13.50",
"versionId": "PRMP000002000200000008090911220231009020000",
"conversionId": "0",
},
"previous": {
"value": 24,
"text": "£24.00",
"versionId": "PRMP000002000200000008090911220231009020000",
"conversionId": "0",
},
"rrp": {
"value": None,
"text": None,
"versionId": "PRMP000002000200000008090911220231009020000",
"conversionId": "0",
},
"xrp": {
"value": 13.5,
"text": "£13.50",
"versionId": "PRMP000002000200000008090911220231009020000",
"conversionId": "0",
},
"currency": "GBP",
"isMarkedDown": True,
"isOutletPrice": False,
"startDateTime": "2023-10-09T01:00:00Z",
"previousEndDate": "2023-09-30T23:00:00Z",
"lowestPriceInLast30DaysValue": 9,
"lowestPriceInLast30DaysText": "£9.00",
"lowestPriceInLast30DaysEndDate": "2023-10-06T07:00:59Z",
"lowestPriceInLast30DaysPercentage": -50,
"discountPercentage": 43,
},
"hasMultipleColoursInStock": False,
"hasMultiplePricesInStock": False,
"isInStock": True,
"variants": [
{
"id": 204512325,
"isInStock": True,
"isLowInStock": False,
"stockLastUpdatedDate": "2023-06-02T13:24:15.661Z",
"warehouse": None,
"source": None,
"seller": None,
"price": {
"current": {
"value": 13.5,
"text": "£13.50",
"versionId": "PRMP000002000200000008090911220231009020000",
"conversionId": "0",
},
"previous": {
"value": 24,
"text": "£24.00",
"versionId": "PRMP000002000200000008090911220231009020000",
"conversionId": "0",
},
"rrp": {
"value": None,
"text": None,
"versionId": "PRMP000002000200000008090911220231009020000",
"conversionId": "0",
},
"xrp": {
"value": 13.5,
"text": "£13.50",
"versionId": "PRMP000002000200000008090911220231009020000",
"conversionId": "0",
},
"currency": "GBP",
"isMarkedDown": True,
"isOutletPrice": False,
"startDateTime": "2023-10-09T01:00:00Z",
"previousEndDate": "2023-09-30T23:00:00Z",
"lowestPriceInLast30DaysValue": 9,
"lowestPriceInLast30DaysText": "£9.00",
"lowestPriceInLast30DaysEndDate": "2023-10-06T07:00:59Z",
"lowestPriceInLast30DaysPercentage": -50,
"discountPercentage": 43,
},
}
],
}
]
Answered By - Andrej Kesely
0 comments:
Post a Comment
Note: Only a member of this blog may post a comment.