Issue
I fear, I have the same problem as in this post:
getting a warning when using sklearn.neighbors about keepdims
I try to use KNN as part of an ensemble classifier, but everytime I get the following warning:
FutureWarning: Unlike other reduction functions (e.g.
skew
,kurtosis
), the default behavior ofmode
typically preserves the axis it acts along. In SciPy 1.11.0, this behavior will change: the default value ofkeepdims
will become False, theaxis
over which the statistic is taken will be eliminated, and the value None will no longer be accepted. Setkeepdims
to True or False to avoid this warning. mode, _ = stats.mode(_y[neigh_ind, k], axis=1)
I know that one way to solve this issue is to suppress future warnings, but since this might lead to errors later, I would rather fix it now. Is there a way to do this? I tried simply calling KNeighborsClassifier(keepdim = True)
but this syntax was not accepted.
Also when adding
from warnings import simplefilter simplefilter(action='ignore', category=FutureWarning)
does not suppress the message for me.
Here is the full code:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns
sns.set(style='darkgrid', font_scale=1.4)
from imblearn.over_sampling import SMOTE
import itertools
#import warnings
#warnings.filterwarnings('ignore')
import plotly.express as px
import time
# Sklearn
from sklearn.model_selection import train_test_split, GridSearchCV, RandomizedSearchCV, StratifiedKFold
from sklearn.metrics import accuracy_score, confusion_matrix, recall_score, precision_score, f1_score
from sklearn.metrics import roc_auc_score, plot_confusion_matrix, plot_roc_curve, roc_curve
from sklearn.preprocessing import StandardScaler, MinMaxScaler, OneHotEncoder, LabelEncoder
from sklearn.feature_selection import mutual_info_classif
from sklearn.decomposition import PCA
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
import eli5
from eli5.sklearn import PermutationImportance
from sklearn.utils import resample
from sklearn.metrics import cohen_kappa_score, make_scorer
from sklearn import metrics
# Models
from sklearn.linear_model import LinearRegression, LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier
from lightgbm import LGBMClassifier
from catboost import CatBoostClassifier
from sklearn.naive_bayes import GaussianNB
# Train-validation split
X_train, X_valid, y_train, y_valid = train_test_split(X,y,stratify=y,train_size=0.8,test_size=0.2,random_state=0)
oversample = SMOTE(random_state=0)
X_train_Smot, Y_train_Smot = oversample.fit_resample(X_train, y_train)
# Classifiers
classifiers = {
"LogisticRegression" : LogisticRegression(random_state=0, solver='lbfgs'),
"KNN" : KNeighborsClassifier(),
"SVC" : SVC(random_state=0, probability=True),
"RandomForest" : RandomForestClassifier(random_state=0),
"XGBoost" : XGBClassifier(random_state=0, use_label_encoder=False, eval_metric='logloss'), # XGBoost takes too long
"LGBM" : LGBMClassifier(random_state=0),
#"CatBoost" : CatBoostClassifier(random_state=0, verbose=False),
"NaiveBayes": GaussianNB()
}
# Grids for grid search
LR_grid = {'C': [0.25, 0.5, 0.75, 1, 1.25, 1.5],
'max_iter': [50, 100, 150]
}
KNN_grid = {'n_neighbors': [3, 5, 7, 9],
'p': [1, 2]}
SVC_grid = {'C': [0.25, 0.5, 0.75, 1, 1.25, 1.5],
'kernel': ['linear', 'rbf'],
'gamma': ['scale', 'auto']}
RF_grid = {'n_estimators': [50, 100, 150, 200, 250, 300],
'max_depth': [4, 6, 8, 10, 12]}
boosted_grid = {'n_estimators': [50, 100, 150, 200],
'max_depth': [4, 8, 12],
'learning_rate': [0.05, 0.1, 0.15]}
NB_grid={'var_smoothing': [1e-10, 1e-9, 1e-8, 1e-7]}
# Dictionary of all grids
grid = {
"LogisticRegression" : LR_grid,
"KNN" : KNN_grid,
"SVC" : SVC_grid,
"RandomForest" : RF_grid,
"XGBoost" : boosted_grid,
"LGBM" : boosted_grid,
"CatBoost" : boosted_grid,
"NaiveBayes": NB_grid
}
i=0
clf_best_params=classifiers.copy()
valid_scores=pd.DataFrame({'Classifer':classifiers.keys(), 'Validation accuracy': np.zeros(len(classifiers)), 'Training time': np.zeros(len(classifiers))})
for key, classifier in classifiers.items():
start = time.time()
clf = GridSearchCV(estimator=classifier, param_grid=grid[key], n_jobs=-1, cv=None)
# Train and score
clf.fit(X_train_Smot, Y_train_Smot)
#valid_scores.iloc[i,1]=clf.score(X_valid, y_valid)
y_pred = clf.predict(X_valid)
valid_scores.iloc[i,1]=metrics.cohen_kappa_score(y_pred, y_valid, weights='quadratic')
# Save trained model
clf_best_params[key]=clf.best_params_
# Print iteration and training time
stop = time.time()
valid_scores.iloc[i,2]=np.round((stop - start)/60, 2)
print('Model:', key)
print('Training time (mins):', valid_scores.iloc[i,2])
print('')
i+=1
Solution
This is a warning generated when predict
function in sklearn
internally calls scipy.stats.mode
. This was fixed here - I suggest you update scikit-learn to latest and try.
Answered By - Jagadeesh
0 comments:
Post a Comment
Note: Only a member of this blog may post a comment.