Issue
I have created this plot where I have "observed E. coli" on the the left side "y axis", "modelled E. coli" on the right side "y axis" and "dates" on the "x axis".
The code is this
# -*- coding: utf-8 -*-
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
source = "Sample_table.csv"
df = pd.read_csv(source, encoding = 'unicode_escape')
x = df['Date_1']
y1 = df['Obs_Ec']
y2 = df['Rain']
y3 = df['Mod_Ec']
# Plot Line1 (Left Y Axis)
fig, ax1 = plt.subplots(1,1,figsize=(10,6), dpi= 80)
# Plot Line2 (Right Y Axis)
ax2 = ax1.twinx() # instantiate a second axes that shares the same x-axis
ax2.plot(x, y2, color='tab:blue', linewidth=2.0)
# Plot Line2 (Right Y Axis)
ax3 = ax1.twinx() # instantiate a second axes that shares the same x-axis
ax3.scatter(x, y3)
# Control limits of the y Axis
a,b = 0,80000
c,d = 0,80000
e,f = 0,35
ax1.set_ylim(a,b)
ax3.set_ylim(c,d)
ax2.set_ylim(e,f)
# Decorations
# ax1 (left Y axis)
ax1.set_xlabel('Date', fontsize=20)
ax1.set_ylabel('E. coli - cfu ml-1', color='tab:red', fontsize=20)
ax1.tick_params(axis='y',rotation=0, labelcolor='tab:red')
ax1.grid(alpha=.0)
ax1.tick_params(axis='both', labelsize=14)
# Plot the scatter points
ax1.scatter(x, y1,
color="red", # Color of the dots
s=50, # Size of the dots
alpha=0.5, # Alpha of the dots
linewidths=0.5) # Size of edge around the dots
ax1.scatter(0**np.arange(5), 0**np.arange(5))
ax1.legend(['Observed E. coli'], loc='right',fontsize=14, bbox_to_anchor=(0.2, -0.20))
ax3.scatter(x, y3,
color="green", # Color of the dots
s=50, # Size of the dots
alpha=0.5, # Alpha of the dots
linewidths=0.5) # Size of edge around the dots
ax3.scatter(0**np.arange(5), 0**np.arange(5))
ax3.legend(['Modelled E. coli'], loc='right',fontsize=14, bbox_to_anchor=(0.48, -0.20))
# ax2 (right Y axis)
ax2.set_ylabel("Rainfall - mm", color='tab:blue', fontsize=20)
ax2.tick_params(axis='y', labelcolor='tab:blue')
ax2.tick_params(axis='both', labelsize=15)
ax2.set_xticks(np.arange(1, len(x), 4))
ax2.set_xticklabels(x[0::4], rotation=15, fontdict={'fontsize':10})
ax2.set_title("SP051 - without SR (validation 2018-2020)", fontsize=22)
ax2.legend(['rainfall'], loc='right',fontsize=14, bbox_to_anchor=(1.05, -0.20))
fig.tight_layout()
plt.show()
But this code is giving me this plot below:
I want to change three things in this plot:
- First, transform the blue line plot into a bars plot.
- Second, and more important, I want to make the bar plot representing rainfall to be displayed on the top of the plot
- Third, I need to get rid of the tick marks in black on the right "y axis" by making the "ax3 scatter plot" simply share the "y axis" on the left side.
An example of the plot I want to create is the one below, but instead of the lines I will be using a scatter plot as shown in the previous figure:
Data
The data can be downloaded here: link for the data
data = {'Date_1': ['1/17/2018', '2/21/2018', '3/21/2018', '4/18/2018', '5/17/2018', '6/20/2018', '7/18/2018', '8/8/2018', '9/19/2018', '10/24/2018', '11/21/2018', '12/19/2018', '1/16/2019', '2/20/2019', '3/20/2019', '4/29/2019', '5/30/2019', '6/19/2019', '7/19/2019', '8/21/2019', '9/18/2019', '10/16/2019', '1/22/2020', '2/19/2020'],
'FLOW_OUTcms': [0.00273, 0.01566, 0.02071, 0.00511, 0.00777, 0.00581, 0.00599, 0.00309, 0.00204, 0.04024, 0.00456, 0.0376, 0.00359, 0.00301, 0.01515, 0.02796, 0.00443, 0.03602, 0.0071, 0.00255, 0.00159, 0.00319, 0.04443, 0.04542],
'Rain': [0.0, 30.4, 2.2, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 8.7, 0.0, 0.0, 0.1, 0.1, 0.0, 0.0, 0.1, 0.0, 1.1, 0.1, 33.3, 0.0],
'Mod_Ec': [10840, 212, 1953, 2616, 2715, 2869, 3050, 2741, 5479, 1049, 2066, 146, 6618, 7444, 992, 2374, 6602, 82, 5267, 3560, 4845, 1479, 58, 760],
'Obs_Ec': [2500, 69000, 13000, 3300, 1600, 2400, 2300, 1400, 1600, 1300, 10000, 20000, 2000, 2500, 2900, 1500, 280, 260, 64, 59, 450, 410, 3900, 870]}
df = pd.DataFrame(data)
Solution
- It will be better to plot directly with
pandas.DataFrame.plot
- It's better to plot the rain as a scatter plot, and then add vertical lines, than to use a barplot. This is the case because barplot ticks are 0 indexed, not indexed with a date range, so it will be difficult to align data points between the two types of tick locations.
- Cosmetically, I think it will look better to only add points where rain is greater than 0, so the dataframe can be filtered to only plot those points.
- Plot the primary plot for x and y to and assign it to axes
ax
- Create a secondary x-axis from
ax
and assign it toax2
- Plot the secondary y-axis onto
ax2
and assign the plot to a third axes,ax3
, which is used to customize the secondary axes.
- Tested in
python 3.8.11
,pandas 1.3.3
,matplotlib 3.4.3
import pandas as pd
# starting with the sample dataframe, convert Date_1 to a datetime dtype
df.Date_1 = pd.to_datetime(df.Date_1)
# plot E coli data
ax = df.plot(x='Date_1', y=['Mod_Ec', 'Obs_Ec'], figsize=(12, 8), rot=0, color=['blue', 'red'])
# center the xtick labels on the ticks
xticklabels = [t.get_text() for t in ax.get_xticklabels()]
xticks = ax.get_xticks()
ax.set_xticks(xticks)
ax.set_xticklabels(xticklabels, ha='center')
# cosmetics
ax.set_xlim(df.Date_1.min(), df.Date_1.max())
ax.set_ylim(0, 70000)
ax.set_ylabel('E. coli')
ax.set_xlabel('Date')
ax.legend(['Observed E. coli', 'Modelled E. coli'], loc='upper left', ncol=2, bbox_to_anchor=(-.01, 1.09))
# create twinx for rain
ax2 = ax.twinx()
# filter the rain column to only show points greater than 0
df_filtered = df[df.Rain.gt(0)]
# plot data with on twinx with secondary y as a scatter plot
ax3 = df_filtered.plot(kind='scatter', x='Date_1', y='Rain', marker='d', ax=ax2, color='deepskyblue', secondary_y=True, legend=False)
# add vlines to the scatter points
ax3.vlines(x=df_filtered.Date_1, ymin=0, ymax=df_filtered.Rain, color='deepskyblue')
# cosmetics
ax3.set_ylim(0, 60)
ax3.invert_yaxis() # reverse the secondary y axis so it starts at the top
ax3.set_ylabel('Rain (mm)')
ax3.legend(['Rainfall'], loc='upper right', ncol=1, bbox_to_anchor=(1.01, 1.09))
Answered By - Trenton McKinney
0 comments:
Post a Comment
Note: Only a member of this blog may post a comment.