Issue
I'm trying to build a plot where I can examine bird mortality against chemicals in the air. The challenge I'm facing is with drawing the plots on top of one another.
Below is the code I wrote. Basically given 6 chemicals, I have six separate axis where I plot in each the relationship of the said chemical against bird mortality with week number as the x-axis.
Even though the source of the graphs originate from one dataframe (X-axis isn't different) Seaborn plots the two graphs separately.
I tried using bird_chem_df.plot()
and vanilla matplotlib to see if this was limited to Seaborn, but no luck.
Curious to know what's exactly causing the X-Axis' not to agree despite the data coming from the same source and they share the same independent variable.
In case you want to play with the data = https://drive.google.com/file/d/1KKkSoy3xQno_vE_-LSoviClmqC3UDcW6/view?usp=sharing
# Import the CSV in the link above
# bird_chem_df = pd.read_csv('...')
# Using `target_chemicals_chart` as our array, we will loop through each
# and plot it against bird mortality
target_chemicals_chart = ['Toluene', 'o-Xylene', 'm,p-Xylene','Ethylbenzene', 'Benzene', 'PM2.5']
target_chemicals_display = {
'Toluene':[0,2.5],
'o-Xylene':[0,2.5],
'm,p-Xylene':[0,2.5],
'Benzene':[0,2.5],
'Ethylbenzene':[0,8],
'PM2.5':[0,25]
}
chemical_index = 0
# Setup the plot
fig, axs = plt.subplots(2,3,figsize=(12,7))
fig.subplots_adjust(left=7, bottom=7, right=9, top=9, wspace=0.2, hspace=0.5)
# Plot each axis
for row in axs:
for col in row:
target_chemical = target_chemicals_chart[chemical_index]
col.set_title(f"{target_chemical} vs. Bird Mortality")
sb.barplot(ax=col, x="week_number", y=target_chemical, data=bird_chem_df, ci=None, color='lightsteelblue')
col.set_ylim(target_chemicals_display[target_chemical])
bird = col.twinx()
sb.lineplot(ax=bird, x="week_number", y="total_bird_deaths", data=bird_chem_df, color='red')
col.set_xlabel("Weeks since Spill")
col.set_ylabel("Average Result for Chemical - ug/m3")
# Increase font size due to fig configurations
for item in ([col.title, col.xaxis.label, col.yaxis.label] +
col.get_xticklabels() + bird.get_yticklabels() + col.get_yticklabels()):
item.set_fontsize(20)
chemical_index += 1
Solution
- The locations of bar plots ticks are 0 index, line plots ticks are not.
- Convert the multi-dimensional array of
axs
to 1-D for easier access and iteration. - In this case, the x-axis for the bar plot was
'week_number'
, the same as for the line plot, so there are the same number of tick locations between the two plots. Therefore, instead of plotting withx='week_number'
plot the line plot to the same tick locations as the bar plot by specifyingx=ax.get_xticks()
- Another option is to use
x=bird_chem_df.index
for both plots (because the index is aRangeIndex
starting at 0), and then change the xtick labels tobird_chem_df.week_number
- Tested in
python 3.8.11
,pandas 1.3.2
,matplotlib 3.4.3
,seaborn 0.11.2
import seaborn as sns
import pandas as pd
import matplotlib.pyplot as plt
data = {'week_number': [19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34], 'Benzene': [0.62, 0.459, 0.542, 0.631, 0.56, 0.61, 0.691, 1.109, 0.524, 0.87, 0.896, 0.665, 0.898, 0.862, 0.611, 0.37], 'Ethylbenzene': [0.364, 0.204, 0.372, 0.36, 0.225, 0.412, 0.332, 0.659, 0.241, 1.7, np.nan, 1.2, 7.35, 0.352, 0.267, 0.154], 'PM2.5': [14.621, 12.561, 11.174, 18.307, 11.285, 20.202, 16.111, 13.057, 11.301, 12.214, 18.173, 21.308, 14.009, 14.111, 9.805, 7.818], 'Toluene': [1.339, 0.999, 1.18, 2.019, 1.217, 1.797, 1.478, 1.896, 1.552, 2.201, 1.101, 1.416, 1.215, 1.598, 1.356, 0.877], 'm,p-Xylene': [1.059, 0.842, 1.186, 1.116, 0.693, 1.372, 0.972, 2.103, 0.568, 1.783, 1.736, 1.486, 1.983, 1.082, 0.909, 0.354], 'o-Xylene': [0.525, 0.328, 0.356, 0.409, 0.265, 0.396, 0.32, 0.634, 0.266, 1.543, 0.74, 2.0, 0.93, 0.374, 0.328, 0.15], 'total_bird_deaths': [13, 14, 224, 87, 120, 165, 224, 252, 303, 416, 580, 537, 592, 713, 442, 798]}
bird_chem_df = pd.DataFrame(data)
target_chemicals_chart = ['Toluene', 'o-Xylene', 'm,p-Xylene','Ethylbenzene', 'Benzene', 'PM2.5']
target_chemicals_display = {'Toluene': [0, 2.5], 'o-Xylene': [0, 2.5], 'm,p-Xylene': [0, 2.5], 'Benzene': [0, 2.5], 'Ethylbenzene': [0, 8], 'PM2.5': [0, 25]}
chemical_index = 0
# Setup the plot
fig, axs = plt.subplots(2, 3, figsize=(15, 7))
# convert axs to a 1-D array
axs = axs.ravel()
for ax in axs:
target_chemical = target_chemicals_chart[chemical_index]
ax.set_title(f"{target_chemical} vs. Bird Mortality")
p1 = sns.barplot(ax=ax, x="week_number", y=target_chemical, data=bird_chem_df, ci=None, color='lightsteelblue')
ax.set_ylim(target_chemicals_display[target_chemical])
bird = ax.twinx()
# plot against the same tick values as the bar plot, with x=ax.get_xticks()
p2 = sns.lineplot(ax=bird, x=ax.get_xticks(), y="total_bird_deaths", data=bird_chem_df, color='red', marker='o')
ax.set_xlabel("Weeks since Spill")
ax.set_ylabel("Average Result for Chemical - ug/m3")
fig.tight_layout()
- To plot all the columns of chemicals
data = {'week_number': [19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34], 'Benzene': [0.62, 0.459, 0.542, 0.631, 0.56, 0.61, 0.691, 1.109, 0.524, 0.87, 0.896, 0.665, 0.898, 0.862, 0.611, 0.37], 'Ethylbenzene': [0.364, 0.204, 0.372, 0.36, 0.225, 0.412, 0.332, 0.659, 0.241, 1.7, np.nan, 1.2, 7.35, 0.352, 0.267, 0.154], 'PM2.5': [14.621, 12.561, 11.174, 18.307, 11.285, 20.202, 16.111, 13.057, 11.301, 12.214, 18.173, 21.308, 14.009, 14.111, 9.805, 7.818], 'Toluene': [1.339, 0.999, 1.18, 2.019, 1.217, 1.797, 1.478, 1.896, 1.552, 2.201, 1.101, 1.416, 1.215, 1.598, 1.356, 0.877], 'm,p-Xylene': [1.059, 0.842, 1.186, 1.116, 0.693, 1.372, 0.972, 2.103, 0.568, 1.783, 1.736, 1.486, 1.983, 1.082, 0.909, 0.354], 'o-Xylene': [0.525, 0.328, 0.356, 0.409, 0.265, 0.396, 0.32, 0.634, 0.266, 1.543, 0.74, 2.0, 0.93, 0.374, 0.328, 0.15], 'total_bird_deaths': [13, 14, 224, 87, 120, 165, 224, 252, 303, 416, 580, 537, 592, 713, 442, 798]}
bird_chem_df = pd.DataFrame(data)
target_chemicals_display = {'Toluene': [0, 2.5], 'o-Xylene': [0, 2.5], 'm,p-Xylene': [0, 2.5], 'Benzene': [0, 2.5], 'Ethylbenzene': [0, 8], 'PM2.5': [0, 25]}
target_chemicals_chart = ['Toluene', 'o-Xylene', 'm,p-Xylene','Ethylbenzene', 'Benzene', 'PM2.5']
for target_chemical in target_chemicals_display:
# Setup the plot
fig, axs = plt.subplots(2, 3, figsize=(15, 7))
axs = axs.ravel()
for ax in axs:
ax.set_title(f"{target_chemical} vs. Bird Mortality")
p1 = sns.barplot(ax=ax, x=bird_chem_df.index, y=target_chemical, data=bird_chem_df, ci=None, color='lightsteelblue')
ax.set_ylim(target_chemicals_display[target_chemical])
bird = ax.twinx()
p2 = sns.lineplot(ax=bird, x=bird_chem_df.index, y="total_bird_deaths", data=bird_chem_df, color='red', marker='o')
# set the x-axis tick label to be the week numbers
ax.set_xticks(ax.get_xticks())
ax.set_xticklabels(bird_chem_df.week_number)
ax.set_xlabel("Weeks since Spill")
ax.set_ylabel("Average Result for Chemical - ug/m3")
fig.tight_layout()
plt.show()
Answered By - Trenton McKinney
0 comments:
Post a Comment
Note: Only a member of this blog may post a comment.