Issue
This is how I have generated 2 circles with classes 0 and 1. Can anyone help me in generating 3 circles with different classes?. I have used the default function from scikit learn to create 2 circles. Is there any possible way to create 3 circles in scikit learn?
from sklearn.datasets import make_moons, make_circles, make_classification
from matplotlib import pyplot as plt
from pandas import DataFrame
from bokeh.colors.groups import blue
import numpy as np
from sklearn.datasets import make_blobs
# generate 2d classification dataset
#X, y = make_moons(n_samples=1000, noise=0.1)
#X, y = make_circles(n_samples=1000, noise=0.1, factor=0.3, random_state=2)
# make the circles linearly separable
#X[:,1] = (np.multiply(X[:,0], X[:,0]) + np.multiply(X[:,1], X[:,1]))
# make blobs
X, y = make_circles(n_samples=1000, random_state=3, noise=0.04)
#X, y = make_classification(n_samples=1000, n_features=2, n_classes=2, n_redundant=0, n_informative=2,
# n_clusters_per_class=1, random_state=1)
# scatter plot, dots colored by class value
df = DataFrame(dict(x1=X[:,0], x2=X[:,1], label=y))
colors = {0:'red', 1:'blue'} #, 2:'green'} # Add another color for 3 classes
fig, ax = plt.subplots()
grouped = df.groupby('label')
for key, group in grouped:
group.plot(ax=ax, kind='scatter', x='x1', y='x2', label=key, color=colors[key])
# group.plot(ax=ax, kind='scatter', x='x1', y='x2', label=key, color=colors[key], xlim=[-3,3]) #for nonlinear mapping
export_csv = df.to_csv(r'blobs.csv', index = None, header=True)
print(df)
plt.savefig("blobs.png")
plt.show()
Solution
In your make_circles
function, you can control the ratio of the inner circle to outer circle using factor=
. A simple solution is to make 2 datasets with different ratios and combining them, for example:
from sklearn.datasets import make_circles
import seaborn as sns
import pandas as pd
import numpy as np
X_small, y_small = make_circles(n_samples=(250,500), random_state=3,
noise=0.04, factor = 0.3)
X_large, y_large = make_circles(n_samples=(250,500), random_state=3,
noise=0.04, factor = 0.7)
y_large[y_large==1] = 2
Combine them:
df = pd.DataFrame(np.vstack([X_small,X_large]),columns=['x1','x2'])
df['label'] = np.hstack([y_small,y_large])
df.label.value_counts()
1 500
0 500
2 500
Plot:
sns.scatterplot(data=df,x='x1',y='x2',hue='label')
Answered By - StupidWolf
0 comments:
Post a Comment
Note: Only a member of this blog may post a comment.