Issue
I'm trying to use RFE from scikit-learn with an estimator from statsmodels NegativeBinomial.
So I created my own class:
from sklearn.datasets import make_friedman1
from sklearn.feature_selection import RFE
from sklearn.base import BaseEstimator
import statsmodels.api as sm
class MyEstimator(BaseEstimator):
def __init__(self, formula_, data_, family_):
self.model = sm.formula.glm(formula, data=data_, family=family_)
def fit(self, **kwargs):
self.model.fit()
self.coef_ = self.model.params.values
def predict(self, X):
result = self.model.predict(X)
return np.array(result)
X, y = make_friedman1(n_samples=50, n_features=10, random_state=0)
dataset = pd.DataFrame({'X1':X[:,0], 'X2':X[:,1], 'X3':X[:,2], 'y':y})
estimator = MyEstimator("y ~ X1 + X2 + X3", dataset, sm.families.NegativeBinomial())
selector = RFE(estimator, n_features_to_select=5, step=1)
selector = selector.fit()
But I get this error:
TypeError: fit() missing 2 required positional arguments: 'X' and 'y'
Does someone has an idea?
Solution
You can modify your code to require endog
and exog
variables, instead of using the formula
API:
import numpy as np
import pandas as pd
from sklearn.datasets import make_friedman1
from sklearn.feature_selection import RFE
from sklearn.base import BaseEstimator
import statsmodels.api as sm
class MyEstimator(BaseEstimator):
def __init__(self, family_):
self.family_ = family_
def fit(self, exog, endog):
self.model = sm.GLM(endog, exog, family=self.family_)
fit_results = self.model.fit()
self.coef_ = fit_results.params
def predict(self, X):
result = self.model.predict(X)
return np.array(result)
X, y = make_friedman1(n_samples=50, n_features=10, random_state=0)
estimator = MyEstimator(sm.families.NegativeBinomial())
selector = RFE(estimator, n_features_to_select=5, step=1)
selector = selector.fit(X, y.reshape(-1,1))
print(selector.ranking_)
# [1 1 3 1 1 5 1 6 4 2]
Answered By - AlexK
0 comments:
Post a Comment
Note: Only a member of this blog may post a comment.