Issue
I'm trying to read this local JSON file using pandas, but when I use the JSON NORMALIZE library to extract data from the JSON it throws me errors, I've tried many ways to do it but I don't know if it's a code problem or a JSON structure problem
I need the transportEvents part of this file to save and upload to bigquery project
[
{
"event": {
"eventPublishTime": 0,
"eventSubmissionTime": 0,
"correlationId": "string",
"eventName": "string",
"senderOrgName": "string",
"senderOrgTypes": [
"string"
],
"originatorId": "string",
"eventOccurrenceTime": 1589574600000,
"eventOccurrenceTime8601": "2020-05-15T15:30:00.000-05:00",
"fromOceanAggregator": "false",
"originatorName": "string",
"containerTransportId": "string",
"physicalId": "string",
"eventSubmissionGpsLocation": "string",
"action": "string",
"doc": {
"description": "string",
"contentUrl": "string",
"docId": "string",
"docVersion": 0,
"docType": "string",
"docReferences": [
{
"referenceName": "string",
"referenceValue": "string"
}
]
},
"destinationOrgName": "string",
"documentType": "string",
"vehicleId": "string",
"vehicleName": "string",
"voyageId": "string",
"transportationPhase": "Import",
"eventTransactionId": "string",
"shipmentRef": "string",
"carrierBookingNumber": "string",
"containerTransportRef": "string",
"firstPortOfEntryCountry": "AD",
"cargoRemainingOnboardCountries": [
"AD"
],
"transitImportCountries": [
"AD"
],
"transitExportCountries": [
"AD"
],
"transshipmentPorts": [
"string"
],
"transshipmentPortDescriptions": [
"string"
],
"entryPort": "string",
"exitPort": "string",
"terminal": "string",
"terminalDescription": "string",
"consignmentId": "string",
"consignmentRef": "string",
"transportEquipmentId": "string",
"transportEquipmentRef": "string",
"equipmentNumber": "string",
"equipmentType": "string",
"transportPlanIssuanceTime8601": "2020-05-15T15:30:00.000-05:00",
"transportPlanIssuanceTime": 1589574600000,
"transportPlanSequenceNumber": 0,
"transportPlanTotalNumber": 0,
"subscriptionId": "string",
"documentId": "string",
"documentVersion": 0,
"dangerousGoodsType": "string",
"packingListNumber": "string",
"billOfLadingNumber": "string",
"consigneeId": "string",
"consigneeIdType": "string",
"shipperId": "string",
"shipperIdType": "string",
"operatorCode": "string",
"operatorName": "string",
"sealType": "Carrier",
"sealNumber": "string",
"seals": [
{
"sealNumber": "string",
"sealType": "Carrier"
}
],
"reasonForSurvey": "string",
"verifiedGrossMass": "string",
"netWeight": "string",
"certificateType": "string",
"waypointId": "string",
"newSlotNumber": "string",
"fullStatus": "Full",
"equipmentNumbers": [
"string"
],
"reasonCode": "string",
"cutOffType": "Cargo",
"cutOffDateTime8601": "2020-05-15T15:30:00.000-05:00",
"cutOffDateTime": 1589574600000,
"cutOffEventType": "string",
"harmonizedCodes": [
"string"
],
"eventType": "string",
"associatedConsignmentIds": [
"string"
],
"associatedCarrierBookingNumbers": [
"string"
],
"associatedBillOfLadingNumbers": [
"string"
],
"eBLProvider": "string",
"consignmentsToAccess": [
{
"transactionRef": {
"reference": "string",
"type": "string"
},
"carrierIdentifier": "string",
"consignmentId": "string",
"carrierBookingNumber": "string",
"billOfLadingNumber": "string",
"equipmentNumbers": [
"string"
]
}
],
"data": [
{
"type": "Barge",
"value": "string"
}
],
"document": {
"description": "string",
"uri": "string",
"hash": "string",
"hashEncodingType": "string",
"platformDocumentId": "string",
"platformDocumentVersion": 0
},
"documents": [
{
"description": "string",
"uri": "string",
"hash": "string",
"hashEncodingType": "string",
"platformDocumentId": "string",
"platformDocumentVersion": 0
}
],
"location": {
"unlocode": "string",
"address": {
"address1": "string",
"address2": "string",
"city": "string",
"stateProvince": "string",
"zipPostal": "string",
"country": "US"
},
"smdgTerminal": "string",
"geoCoord": {
"latitude": 0,
"longitude": 0
},
"splc": "string",
"gln": "string",
"type": "UN/Locode",
"value": "string",
"description": "string",
"locationKey": "string"
},
"bookingData": {
"originLocation": {
"unlocode": "string",
"address": {
"address1": "string",
"address2": "string",
"city": "string",
"stateProvince": "string",
"zipPostal": "string",
"country": "US"
},
"smdgTerminal": "string",
"geoCoord": {
"latitude": 0,
"longitude": 0
},
"splc": "string",
"gln": "string",
"type": "UN/Locode",
"value": "string",
"description": "string",
"locationKey": "string"
},
"destinationLocation": {
"unlocode": "string",
"address": {
"address1": "string",
"address2": "string",
"city": "string",
"stateProvince": "string",
"zipPostal": "string",
"country": "US"
},
"smdgTerminal": "string",
"geoCoord": {
"latitude": 0,
"longitude": 0
},
"splc": "string",
"gln": "string",
"type": "UN/Locode",
"value": "string",
"description": "string",
"locationKey": "string"
},
"contractCarriageCondition": "SD_SD",
"departureTime8601": "2020-05-15T15:30:00.000-05:00",
"departureDateEpoch": 1589574600000,
"vehicleId": "string",
"vehicleName": "string",
"voyageId": "string",
"transportEquipmentDetails": [
{
"equipmentType": "string",
"equipmentQuantity": 0,
"description": "string"
}
],
"commodities": [
{
"itemNumber": 0,
"commodityDescription": "string",
"commodityHarmonizedCode": "string",
"commodityQuantity": 0,
"commodityWeight": "string"
}
]
},
"subcontractId": "string",
"subcontractCBN": "string",
"subcontractBOL": "string",
"messages": [
"string"
],
"consignmentStatus": "Active",
"transferFromConsignment": {
"carrierBookingNumber": "string",
"billOfLadingNumber": "string",
"consignmentId": "string"
},
"partLoadWithConsignment": {
"carrierBookingNumber": "string",
"billOfLadingNumber": "string",
"consignmentId": "string"
},
"transferFromEquipment": "string",
"splitFromConsignment": {
"carrierBookingNumber": "string",
"billOfLadingNumber": "string",
"consignmentId": "string"
},
"combinedCarrierBookingNumbers": [
"string"
],
"partBill": "false",
"partBillOf": {
"carrierBookingNumber": "string",
"billOfLadingNumber": "string",
"consignmentId": "string"
},
"shipmentIdentifier": {
"reference": "string",
"type": "string",
"label": "string"
},
"plannedCompletionDate": 1589574600000,
"plannedCompletionDate8601": "2020-05-15T15:30:00.000-05:00",
"plannedDeliveryDate": 1589574600000,
"plannedDeliveryDate8601": "2020-05-15T15:30:00.000-05:00",
"consignmentIdentifiers": [
{
"consignmentId": "string",
"carrierBookingNumber": "string",
"billOfLadingNumber": "string"
}
],
"references": [
{
"reference": "string",
"type": "string",
"label": "string"
}
],
"goodsDescription": "string",
"issuanceTime8601": "2020-05-15T15:30:00.000-05:00",
"callersPartyRole": "SELLER",
"parties": [
{
"partyRef": "string",
"partyRole": "BUYER"
}
],
"consignments": [
"string"
],
"transportEquipment": [
"string"
],
"shipments": [
"string"
],
"transportEquipmentDetails": [
{
"equipmentType": "string",
"equipmentQuantity": 0
}
],
"equipmentPickUpComments": "string",
"minimumTemperature": 0,
"maximumTemperature": 0,
"unit": "degreeCelsius",
"temperatureInstructions": "string",
"eventClassifier": "Planned",
"transportMode": "Vessel",
"eventAction": "Arrival",
"facilityType": "PortTerminal",
"emptyIndicator": "Empty",
"eventPriority": 0,
"releaseParty": {
"partyRef": "string",
"orgName": "string",
"address": {
"address1": "string",
"address2": "string",
"city": "string",
"stateProvince": "string",
"zipPostal": "string",
"country": "US"
}
},
"controlBy": "CUSTOMS",
"controlEventAction": "CONTROL_START",
"nationalControlCode": "string",
"procedure": "IMPORTATION",
"conditionalRelease": "string",
"releaseObject": "FULL_CONTAINER",
"releasedBy": "CUSTOMS",
"declarationRef": "string",
"nationalProcedureDetails": {
"procedureCode": "string",
"procedureDescription": "string",
"procedureType": "string",
"procedureStatus": "string",
"procedureNumber": "string",
"createTime8601": "2018-03-10T11:30:00.000-05:00",
"closeTime8601": "2018-03-10T11:30:00.000-05:00",
"carrierIdentification": "string",
"procedureLocation": "string"
},
"transportEvents": [
{
"eventAction": "Arrival",
"transportMode": "Rail",
"eventOccurrenceTime8601": "2018-03-13T11:30:00.000-05:00",
"location": {
"unlocode": "NLRTM"
},
"transportPlanSequenceNumber": 1,
"transportationPhase": "Import",
"vehicleId": "JEV4568",
"vehicleName": "Vehicle Name",
"voyageId": "1234",
"emptyIndicator": "Laden"
}
]
}
}
]
and this is the python code I am using
import pandas as pd
import json
from pandas.io.json import json_normalize
ruta_archivo_json = 'response_1660310720193.json'
data = pd.read_json(ruta_archivo_json)
df = pd.json_normalize(data, 'transportEvents')
print(df)
df.head(10)
Solution
I'm not sure what you would like to achieve in the final outcome. But if you run below code you should get something in return
import pandas as pd
import json
# from pandas.io.json import json_normalize #no need this
pd.options.display.max_colwidth = 100 #default=50
pd.options.display.max_rows = 250 #default=60
data = pd.read_json('response_1660310720193.json')
df = pd.json_normalize(data['event']).T
print(df)
Output:
eventPublishTime 0
eventSubmissionTime 0
correlationId string
eventName string
senderOrgName string
senderOrgTypes [string]
originatorId string
eventOccurrenceTime 1589574600000
eventOccurrenceTime8601 2020-05-15T15:30:00.000-05:00
fromOceanAggregator false
originatorName string
<truncated>
declarationRef string
transportEvents [{'eventAction': 'Arrival', 'transpo...
doc.description string
<truncated>
You should see 'transportEvents' in the df
, so if you drill deeper you can get the data
df1 = pd.json_normalize(df.T['transportEvents'].explode().tolist()).T
df1.columns = ['transportEvents']
print(df1)
transportEvents
eventAction Arrival
transportMode Rail
eventOccurrenceTime8601 2018-03-13T11:30:00.000-05:00
transportPlanSequenceNumber 1
transportationPhase Import
vehicleId JEV4568
vehicleName Vehicle Name
voyageId 1234
emptyIndicator Laden
location.unlocode NLRTM
After the above exploration, you would then know the correct keys to apply the function pd.json_normalize()
, with the first key event
and nested with second key transportEvents
, same output as above
df1 = pd.json_normalize(data, ['event', 'transportEvents'])
df1.columns = ['transportEvents']
print(df1)
Answered By - perpetualstudent
0 comments:
Post a Comment
Note: Only a member of this blog may post a comment.