Issue
I am trying to scrape this site to get the list of offers.
The problem is that we need to fill 2 forms (2 POST queries) before receiving the final result.
This is what I have done so far:
import requests as rs
from form_data import form_data1, form_data2
base_url = "https://compare.energy.vic.gov.au/api"
with rs.Session() as s:
url_ = f"{base_url}/get-psb-details?serverCacheId=null"
r = (s.get(url_))
serverCacheId = r.json()["serverCacheId"]
r = s.post(f"{base_url}/save-form-data", data=form_data1)
r = s.post(f"{base_url}/save-form-data", data=form_data2)
Then I am trying to retrieve the offers after the second POST query:
url_ = "https://compare.energy.vic.gov.au/api/get-offers"
body = {"serverCacheId": str(serverCacheId),
"loopBack": "false",
"selectedEnergy": "/offer"}
r = s.get(url_, params=body)
print(r.json())
But unfortunately I get a message indicating a redirection:
{'status': 'redirect', 'message': 'no data'}
The 2 POSTs use the current data:
form_data1 = {
"showSolarSelection": "true",
"energyType": "Electricity",
"userType": "Residential",
"bill": "no bill",
"postcode": "3000",
"usageProfile": "0",
"averageDailyConsumption": "0",
"skipNMI": "true",
"smartMeter": "1",
"disclaimer": "true",
"hasSolar": "0",
"hasConcession": "0",
"distributor": {
"id": "4",
"name": "Citipower",
"display": "Citipower",
"phone": "1300 301 101 / 13 12 80",
"distribution_zone_id": "11",
"distribution_zone_name": "All"
},
"distributorDerived": "0",
"distributorSubmit": "true",
"pageDataType": "energyConfigData",
"loopBack": "true"
}
and
form_data2 = {
"pvCapacity": "0", "pvCapacityCap": "null", "hhSize": "1", "totalRooms": "1", "fridgeCount": "0",
"gasConnection": "4", "poolHeating": "0", "poolHeatingSolar": "false", "poolHeatingGas": "false",
"poolHeatingElectric": "false", "poolHeatingNone": "false", "spaceHeatingElectricDucted": "false",
"spaceHeatingSplitSystem": "false", "spaceHeatingElectricUnderfloor": "false",
"spaceHeatingElectricIndividual": "false", "spaceHeatingGasDucted": "false",
"spaceHeatingGasUnderfloor": "false", "spaceHeatingGasIndividual": "false", "spaceHeatingOther": "false",
"spaceHeatingNone": "true", "spaceCoolingRoomAC": "false", "spaceCoolingSplitSystem": "false",
"spaceCoolingDuctedReverse": "false", "spaceCoolingDuctedEvaporative": "false",
"spaceCoolingPortableRef": "false", "spaceCoolingPortableEvap": "false", "spaceCoolingOther": "false",
"spaceCoolingNone": "true", "seaDistance": "", "clothesDryer": "0", "clothesDryerWeekday": "",
"clothesDryerWeekend": "", "dishwasherWeekday": "", "dishwasherWeekend": "",
"waterHeatingElectric": "false", "waterHeatingElectricSolar": "false", "waterHeatingGasStorage": "false",
"waterHeatingGasInstant": "false", "waterHeatingGasSolar": "false", "waterHeatingOther": "true",
"controlledLoad": "", "tvTotal": "", "turnOffAtPowerShort": "", "ovensElectric": "", "ovensGas": "",
"washingMachineUsage": "", "washingMachineWeekday": "", "washingMachineWeekend": "",
"televisionUsageWeekday": "", "televisionUsageWeekend": "", "heatingUsageMethod": "",
"gasUsageWinter": "0", "hhSize51": "", "energyType": "Electricity", "hasSolar": "0",
"pageDataType": "energyProfileData", "loopBack": "false"
}
Expected result
The expected result is a JSON object containing offers. Here is its structure:
{
"selectedEnergyType": "Electricity",
"energyTypeCount": 1,
"offers": {
"Electricity": {
"offersList": [{...}]
}
}
}
Solution
The site has some requirements and restrictions on the form data.
form_data1
:
- Add required fields
"solarCapacity"
and"feedInTariff"
."hasSolar": "0", "solarCapacity": "", # Add this "hasConcession": "0", "feedInTariff": "", # Add this
- Change
"loopBack": "true"
to"loopBack": false
.# "loopBack": "true" "loopBack": False
- Set
"serverCacheId"
and changedata=
tojson=
.# r = s.post(f"{base_url}/save-form-data", data=form_data1) r = s.post(f"{base_url}/save-form-data", json=dict(form_data1, serverCacheId=str(serverCacheId)))
form_data2
:
- Set
"serverCacheId"
and changedata=
tojson=
.# r = s.post(f"{base_url}/save-form-data", data=form_data2) r = s.post(f"{base_url}/save-form-data", json=dict(form_data2, serverCacheId=str(serverCacheId)))
- (Optional, for consistency) Change
"loopBack": "false"
to"loopBack": false
.# "loopBack": "false" "loopBack": False
The combined code:
import requests as rs
form_data1 = {
"showSolarSelection": "true",
"energyType": "Electricity",
"userType": "Residential",
"bill": "no bill",
"postcode": "3000",
"usageProfile": "0",
"averageDailyConsumption": "0",
"skipNMI": "true",
"smartMeter": "1",
"disclaimer": "true",
"hasSolar": "0",
"solarCapacity": "",
"hasConcession": "0",
"feedInTariff": "",
"distributor": {
"id": "4",
"name": "Citipower",
"display": "Citipower",
"phone": "1300 301 101 / 13 12 80",
"distribution_zone_id": "11",
"distribution_zone_name": "All"
},
"distributorDerived": "0",
"distributorSubmit": "true",
"pageDataType": "energyConfigData",
"loopBack": False
}
form_data2 = {
"pvCapacity": "0", "pvCapacityCap": "null", "hhSize": "1", "totalRooms": "1", "fridgeCount": "0",
"gasConnection": "4", "poolHeating": "0", "poolHeatingSolar": "false", "poolHeatingGas": "false",
"poolHeatingElectric": "false", "poolHeatingNone": "false", "spaceHeatingElectricDucted": "false",
"spaceHeatingSplitSystem": "false", "spaceHeatingElectricUnderfloor": "false",
"spaceHeatingElectricIndividual": "false", "spaceHeatingGasDucted": "false",
"spaceHeatingGasUnderfloor": "false", "spaceHeatingGasIndividual": "false", "spaceHeatingOther": "false",
"spaceHeatingNone": "true", "spaceCoolingRoomAC": "false", "spaceCoolingSplitSystem": "false",
"spaceCoolingDuctedReverse": "false", "spaceCoolingDuctedEvaporative": "false",
"spaceCoolingPortableRef": "false", "spaceCoolingPortableEvap": "false", "spaceCoolingOther": "false",
"spaceCoolingNone": "true", "seaDistance": "", "clothesDryer": "0", "clothesDryerWeekday": "",
"clothesDryerWeekend": "", "dishwasherWeekday": "", "dishwasherWeekend": "",
"waterHeatingElectric": "false", "waterHeatingElectricSolar": "false", "waterHeatingGasStorage": "false",
"waterHeatingGasInstant": "false", "waterHeatingGasSolar": "false", "waterHeatingOther": "true",
"controlledLoad": "", "tvTotal": "", "turnOffAtPowerShort": "", "ovensElectric": "", "ovensGas": "",
"washingMachineUsage": "", "washingMachineWeekday": "", "washingMachineWeekend": "",
"televisionUsageWeekday": "", "televisionUsageWeekend": "", "heatingUsageMethod": "",
"gasUsageWinter": "0", "hhSize51": "", "energyType": "Electricity", "hasSolar": "0",
"pageDataType": "energyProfileData", "loopBack": False
}
base_url = "https://compare.energy.vic.gov.au/api"
with rs.Session() as s:
url_ = f"{base_url}/get-psb-details?serverCacheId=null"
r = (s.get(url_))
serverCacheId = r.json()["serverCacheId"]
r = s.post(f"{base_url}/save-form-data", json=dict(form_data1, serverCacheId=str(serverCacheId)))
r = s.post(f"{base_url}/save-form-data", json=dict(form_data2, serverCacheId=str(serverCacheId)))
url_ = "https://compare.energy.vic.gov.au/api/get-offers"
body = {"serverCacheId": str(serverCacheId),
"loopBack": "false",
"selectedEnergy": "/offer"}
r = s.get(url_, params=body)
print(r.json())
Answered By - aaron
0 comments:
Post a Comment
Note: Only a member of this blog may post a comment.