Issue
based on another thread i got this code:
data = np.random.normal(loc=0.0, scale=1.0, size=2000)
df_data = pd.DataFrame(data)
import numpy as np
import scipy
import pandas as pd
from scipy.stats import norm
import matplotlib.pyplot as plt
def _plot(df):
for col in df.columns:
n_bins = 50
fig, axes = plt.subplots(figsize=(12,6))
n, bins, patches = axes.hist(df[col], n_bins, density=True, alpha=.1, edgecolor='black' )
mu = df[col].mean()
sigma = df[col].std()
pdf = 1/(sigma*np.sqrt(2*np.pi))*np.exp(-(bins-mu)**2/(2*sigma**2))
#probability density function
axes.plot(bins, pdf, color='green', alpha=.6)
#dashed lines
plt.axvline(np.mean(df_data[0]),color='b', linestyle='-.')
plt.axvline(np.mean(df_data[0]-sigma),color='b', linestyle='-.')
plt.axvline(np.mean(df_data[0]-2*sigma),color='b', linestyle='-.')
plt.axvline(np.mean(df_data[0]-3*sigma),color='b', linestyle='-.')
plt.axvline(min(df_data[0]),color='r', linestyle='-.')
plt.axvline(np.mean(df_data[0]+sigma),color='b', linestyle='-.')
plt.axvline(np.mean(df_data[0]+2*sigma),color='b', linestyle='-.')
plt.axvline(np.mean(df_data[0]+3*sigma),color='b', linestyle='-.')
plt.axvline(max(df_data[0]),color='r', linestyle='-.')
plt.ylabel('Probability Density')
plt.xlabel('Values')
print(mu)
print(sigma)
_plot(df_data)
Which returns me this nice plot:
As you can see the blue vertical lines indicate borders set by multiples of standard deviations. I would like to add the following information and color coding, which I now quickly placed in powerpoint:
I tried to mess with the plt.fill_between
function but didnt really get anything useful. Also I do not know how to write something, like the mu+l*sigma here, above the plot. How can i achieve the second picture based on what I have?
EDIT: solved by @Trenton McKinney
Putting new boxes inside the colored boxes:
for i, (x, c) in enumerate(locs[:-1]):
axes.axvspan(x, locs[i + 1][0], alpha=0.2, color=c)
tx = (x + locs[i + 1][0]) / 2
axes.text(tx, y1/2, f'Zustand {i + 1}', {'ha': 'center', 'va': 'center'}, rotation=90)
if i<4:
axes.text(tx, y1/1.25, r"$\mu$" + "-" + f"{4-i}"+ "$\cdot$" + "$\sigma$" , {'ha': 'center', 'va': 'center'}, rotation=90, bbox=dict(facecolor='white', alpha=0.8, edgecolor='black'))
else:
axes.text(tx, y1/1.25, r"$\mu$" + "+" + f"{i-4 + 1}"+ "$\cdot$" + "$\sigma$" , {'ha': 'center', 'va': 'center'}, rotation=90, bbox=dict(facecolor='white', alpha=0.8, edgecolor='black'))
Solution
- It will be easier to create a container with all of the values for the vertical lines because those values will be reused for placing the lines, and determining the
axvspan
andtext
placement. In this case, a dictionary is used. - See inline notation for explanations
- Use
.Axes.axvspan
to fill between vertical positions - See How do I merge two dictionaries in a single expression (take union of dictionaries)?
- Add text to the plot with
.Axes.text
- Tested in
python 3.10
,matplotlib 3.5.1
# extra imports
from collections import OrderedDict
from itertools import zip_longest
np.random.seed(2022)
data = np.random.normal(loc=0.0, scale=1.0, size=2000)
df_data = pd.DataFrame(data)
def _plot(df):
for col in df.columns:
n_bins = 50
fig, axes = plt.subplots(figsize=(12,6))
n, bins, patches = axes.hist(df[col], n_bins, density=True, alpha=.1, edgecolor='black' )
mu = df[col].mean()
sigma = df[col].std()
pdf = 1/(sigma*np.sqrt(2*np.pi))*np.exp(-(bins-mu)**2/(2*sigma**2))
#probability density function
axes.plot(bins, pdf, color='green', alpha=.6)
# get ylim to position the text
y0, y1 = axes.get_ylim()
# create a dict for all the x values for vertical lines with the line color
muu = {mu: 'b'}
mm = {df_data[0].min(): 'r', df_data[0].max(): 'r'}
mun = {df_data[0].sub(v*sigma).mean(): 'b' for v in range(1, 4)}
mup = {df_data[0].add(v*sigma).mean(): 'b' for v in range(1, 4)}
# combine the dicts: | requires python 3.9+. See linked SO answer for additional opitons to combine the dicts
vals = muu | mm | mun | mup
# order the keys (x values) from smallest to largest
vals = OrderedDict(sorted(vals.items()))
# plot the dashed lines
for x, c in vals.items():
plt.axvline(x, color=c, linestyle='-.')
# combine the x values with colors of the stages
locs = list(zip_longest(vals.keys(), ['blue', 'brown']*4))
# iterate through all but the last value, and add the vspan and the text
for i, (x, c) in enumerate(locs[:-1]):
axes.axvspan(x, locs[i + 1][0], alpha=0.2, color=c)
tx = (x + locs[i + 1][0]) / 2
axes.text(tx, y1/2, f'Stage {i + 1}', {'ha': 'center', 'va': 'center'}, rotation=90)
plt.ylabel('Probability Density')
plt.xlabel('Values')
print(mu)
print(sigma)
_plot(df_data)
Update for additional annotations
# extra annotations
sign = [f'µ - {v}σ' for v in range(4, 0, -1)]
sigp = [f'µ + {v}σ' for v in range(1, 5)]
anno = sign + sigp
# iterate through all but the last value and add the vspan and the text
for i, (x, c) in enumerate(locs[:-1]):
axes.axvspan(x, locs[i + 1][0], alpha=0.2, color=c)
tx = (x + locs[i + 1][0]) / 2
axes.text(tx, y1/2, f'Stage {i + 1}: {anno[i]}', {'ha': 'center', 'va': 'center'}, rotation=90)
Answered By - Trenton McKinney
0 comments:
Post a Comment
Note: Only a member of this blog may post a comment.