Issue
I am currently working on the "Bank Scoring Case" Kaggle competition (https://www.kaggle.com/competitions/bank-scoring-case). And I try to hoptimize the hyperparameter of my Light GBM classifier model with Optuna and custom score to reduce False negative . Unfortunately I get a "NotFittedError: All estimators failed to fit" error whenever I am using optuna and I cannot figure out why that is. Any help is much appreciated.
import optuna
from optuna.distributions import IntDistribution as IntUniDist
from optuna.distributions import UniformDistribution as UniDist
from optuna.distributions import LogUniformDistribution as LogUniDist
from optuna.distributions import CategoricalDistribution as CatDist
from lightgbm import LGBMClassifier
from sklearn.pipeline import Pipeline
from optuna.distributions import FloatDistribution as fl
LogUniDist(1e-5, 1e0).to_internal_repr(1e-4)
param_distributions = {
'clf__n_estimators': fl(10, 2000),
'clf__num_leaves': fl(10, 100),
'clf__reg_alpha': fl(1e-5, 1e0),
'clf__reg_lambda': fl(1e-3, 1e0),
'clf__learning_rate': fl(0.01,0.1),
'clf__max_depth': fl(-1,20),
}
model=LGBMClassifier(random_state=8)#,class_weight='balanced'
model_optuna=Pipeline(steps=[#('prepro',preprocessor_ord),
('clf',model)])
from sklearn.metrics import confusion_matrix
from sklearn.metrics import fbeta_score, make_scorer
def customScore(y_test, y_pred):
#beta a choisir entre 0 et 1 on prend 0.7
beta=0.7 #le poids beta qui donnera plus de poids au FN
#et 1-beta moins de poids au FP
logi_confusion = confusion_matrix(y_test, y_pred)
tn, fp, fn, tp = logi_confusion.ravel()
alpha=10*fn+fp
return alpha
#scorer = make_scorer(customScore ,greater_is_better = False)
#création de notre propre score grace à make_scorer
opt_search_hp = optuna.integration.OptunaSearchCV(
model_optuna,
param_distributions,
n_trials=None,
verbose=3,
refit=True,
timeout=1800,
scoring= make_scorer(metric_custom_perte,needs_proba=True),
cv=4,
return_train_score=False
)
opt_search_hp.fit(x_train,y_train)
The error I get is as follows:
[I 2023-02-20 16:17:29,106] A new study created in memory with name: no-name-55879412-2d54-486c-af2b-3c4d74911d05
[I 2023-02-20 16:17:29,108] Searching the best hyperparameters using 128290 samples...
[W 2023-02-20 16:17:33,370] Trial 0 failed with parameters: {'clf__n_estimators': 1152.9940273025156, 'clf__num_leaves': 78.72037192055771, 'clf__reg_alpha': 0.16664156644372716, 'clf__reg_lambda': 0.7704390387651396, 'clf__learning_rate': 0.0994775554864861, 'clf__max_depth': 2.3794054350476657} because of the following error: NotFittedError('All estimators failed to fit').
Traceback (most recent call last):
File "/opt/anaconda3/lib/python3.7/site-packages/optuna/study/_optimize.py", line 200, in _run_trial
value_or_values = func(trial)
File "/opt/anaconda3/lib/python3.7/site-packages/optuna/integration/sklearn.py", line 239, in __call__
scoring=self.scoring,
File "/opt/anaconda3/lib/python3.7/site-packages/sklearn/model_selection/_validation.py", line 292, in cross_validate
_insert_error_scores(results, error_score)
File "/opt/anaconda3/lib/python3.7/site-packages/sklearn/model_selection/_validation.py", line 331, in _insert_error_scores
raise NotFittedError("All estimators failed to fit")
sklearn.exceptions.NotFittedError: All estimators failed to fit
[W 2023-02-20 16:17:33,376] Trial 0 failed with value None.
---------------------------------------------------------------------------
NotFittedError Traceback (most recent call last)
<ipython-input-113-2e14a5b0857d> in <module>
----> 1 opt_search_hp.fit(x_train,y_train)
/opt/anaconda3/lib/python3.7/site-packages/optuna/integration/sklearn.py in fit(self, X, y, groups, **fit_params)
903 n_trials=self.n_trials,
904 timeout=self.timeout,
--> 905 callbacks=self.callbacks,
906 )
907
/opt/anaconda3/lib/python3.7/site-packages/optuna/study/study.py in optimize(self, func, n_trials, timeout, n_jobs, catch, callbacks, gc_after_trial, show_progress_bar)
432 callbacks=callbacks,
433 gc_after_trial=gc_after_trial,
--> 434 show_progress_bar=show_progress_bar,
435 )
436
/opt/anaconda3/lib/python3.7/site-packages/optuna/study/_optimize.py in _optimize(study, func, n_trials, timeout, n_jobs, catch, callbacks, gc_after_trial, show_progress_bar)
74 reseed_sampler_rng=False,
75 time_start=None,
---> 76 progress_bar=progress_bar,
77 )
78 else:
/opt/anaconda3/lib/python3.7/site-packages/optuna/study/_optimize.py in _optimize_sequential(study, func, n_trials, timeout, catch, callbacks, gc_after_trial, reseed_sampler_rng, time_start, progress_bar)
161
162 try:
--> 163 frozen_trial = _run_trial(study, func, catch)
164 finally:
165 # The following line mitigates memory problems that can be occurred in some
/opt/anaconda3/lib/python3.7/site-packages/optuna/study/_optimize.py in _run_trial(study, func, catch)
249 and not isinstance(func_err, catch)
250 ):
--> 251 raise func_err
252 return frozen_trial
253
/opt/anaconda3/lib/python3.7/site-packages/optuna/study/_optimize.py in _run_trial(study, func, catch)
198 with get_heartbeat_thread(trial._trial_id, study._storage):
199 try:
--> 200 value_or_values = func(trial)
201 except exceptions.TrialPruned as e:
202 # TODO(mamu): Handle multi-objective cases.
/opt/anaconda3/lib/python3.7/site-packages/optuna/integration/sklearn.py in __call__(self, trial)
237 groups=self.groups,
238 return_train_score=self.return_train_score,
--> 239 scoring=self.scoring,
240 )
241
/opt/anaconda3/lib/python3.7/site-packages/sklearn/model_selection/_validation.py in cross_validate(estimator, X, y, groups, scoring, cv, n_jobs, verbose, fit_params, pre_dispatch, return_train_score, return_estimator, error_score)
290 # the correct key.
291 if callable(scoring):
--> 292 _insert_error_scores(results, error_score)
293
294 results = _aggregate_score_dicts(results)
/opt/anaconda3/lib/python3.7/site-packages/sklearn/model_selection/_validation.py in _insert_error_scores(results, error_score)
329
330 if successful_score is None:
--> 331 raise NotFittedError("All estimators failed to fit")
332
333 if isinstance(successful_score, dict):
# NotFittedError: All estimators failed to fit
Solution
Some of your parameters should be of type int but are defined as float. You should use IntDistribution instead:
from optuna.distributions import IntDistribution as intd
param_distributions = {
'clf__n_estimators': intd(10, 2000),
'clf__num_leaves': intd(10, 100),
'clf__reg_alpha': fl(1e-5, 1e0),
'clf__reg_lambda': fl(1e-3, 1e0),
'clf__learning_rate': fl(0.01,0.1),
'clf__max_depth': intd(-1,20),
}
Answered By - Mattravel
0 comments:
Post a Comment
Note: Only a member of this blog may post a comment.