Issue
I have a problem statement to draw graphs on 5 CSV files of algorithm and compare the better algorithm among them
The csv file contains only floating point numbers of 100 rows * 4 columns I have plotted the kdeplot comparing the 1st column of 5 csv files
so I code the problem like this:
from cProfile import label
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
plt.style.use("fivethirtyeight")
sns.set_theme()
sns.color_palette("bright")
data1 = pd.read_csv("D:/C++/Programs/Python/Input/appendicitis/alg1/AverageIter1000.csv", on_bad_lines='skip', nrows= 100 , usecols=[0,1,2,3] , header = None)
data2 = pd.read_csv("D:/C++/Programs/Python/Input/appendicitis/alg2/AverageIter1000.csv", on_bad_lines='skip', nrows= 100 , usecols=[0,1,2,3] , header = None)
data3 = pd.read_csv("D:/C++/Programs/Python/Input/appendicitis/alg3/AverageIter1000.csv", on_bad_lines='skip', nrows= 100 , usecols=[0,1,2,3] , header = None)
data4 = pd.read_csv("D:/C++/Programs/Python/Input/appendicitis/alg4/AverageIter1000.csv", on_bad_lines='skip', nrows= 100 , usecols=[0,1,2,3] , header = None)
data5 = pd.read_csv("D:/C++/Programs/Python/Input/appendicitis/alg5/AverageIter1000.csv", on_bad_lines='skip', nrows= 100 , usecols=[0,1,2,3] , header = None)
sns.kdeplot(np.array(data1[0]), shade = True, linewidth = 2, label = 'arg1')
sns.kdeplot(np.array(data2[0]), shade = True, linewidth = 2, label = 'arg2')
sns.kdeplot(np.array(data3[0]), shade = True, linewidth = 2, label = 'arg3')
sns.kdeplot(np.array(data4[0]), shade = True, linewidth = 2, label = 'arg4')
sns.kdeplot(np.array(data5[0]), shade = True, linewidth = 2, label = 'arg5')
plt.xlabel("Accuracy")
plt.ylabel("Accuracy-Density")
plt.title("Accuracy graph visualisation")
plt.legend()
plt.show()
it does the work in plotting the graphs but what mainly I need is to highlight the average point in each graph. So how to do this please help me
Solution
You could apply the approach of How to plot a mean line on a distplot between 0 and the y value of the mean? for each of the 5 curves:
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import numpy as np
from matplotlib.colors import to_rgba
plt.style.use("fivethirtyeight")
sns.set_theme()
sns.set_palette("bright")
directories = [f'alg{i}' for i in range(1, 6)]
# all_data = [pd.read_csv(f"D:/C++/Programs/Python/Input/appendicitis/{dir}/AverageIter1000.csv",
# on_bad_lines='skip', nrows=100, usecols=[0, 1, 2, 3], header=None) for dir in directories]
# creates some test data, a bit similar to what could be in the files
all_data = [pd.DataFrame(np.random.normal(0.1, 1, (np.random.randint(100, 300), 1)).cumsum()) for _ in directories]
fig, ax = plt.subplots(figsize=(12, 5))
for data_i in all_data:
sns.kdeplot(np.array(data_i[0]), shade=False, linewidth=2, ax=ax)
for data_i, kdeline_i, dir in zip(all_data, ax.lines, directories):
mean = data_i[0].mean()
xs = kdeline_i.get_xdata()
ys = kdeline_i.get_ydata()
height = np.interp(mean, xs, ys)
color = kdeline_i.get_color()
ax.vlines(mean, 0, height, color=color, ls=':', lw=3)
ax.fill_between(xs, ys, facecolor=to_rgba(color, alpha=0.2), edgecolor=color, lw=2, label=dir)
ax.set_xlabel("Accuracy")
ax.set_ylabel("Accuracy-Density")
ax.set_title("Accuracy graph visualisation")
ax.legend()
plt.tight_layout()
plt.show()
Answered By - JohanC
0 comments:
Post a Comment
Note: Only a member of this blog may post a comment.