Issue
How can I deal with polynomial degree when I want to save a polynomial model, sicne this info is not being saved!
import pandas as pd
import numpy as np
import joblib
from sklearn.preprocessing import PolynomialFeatures
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
df = pd.DataFrame({
"a": np.random.uniform(0.0, 1.0, 1000),
"b": np.random.uniform(10.0, 14.0, 1000),
"c": np.random.uniform(100.0, 1000.0, 1000)})
def data():
X_train, X_val, y_train, y_val = train_test_split(df.iloc[:, :2].values,
df.iloc[:, 2].values,
test_size=0.2,
random_state=1340)
return X_train, X_val, y_train, y_val
X_train, X_val, y_train, y_val = data()
poly_reg = PolynomialFeatures(degree = 2)
X_poly = poly_reg.fit_transform(X_train)
poly_reg_model = LinearRegression().fit(X_poly, y_train)
poly_model = joblib.dump(poly_reg_model, 'themodel')
y_pred = poly_reg_model.predict(poly_reg.fit_transform(X_val))
themodel = joblib.load('themodel')
Now, if I try to predict:
themodel.predict(X_val)
, I am receiving:
ValueError: matmul: Input operand 1 has a mismatch in its core dimension 0, with gufunc signature (n?,k),(k,m?)->(n?,m?) (size 6 is different from 2)
I have to do:
pol_feat = PolynomialFeatures(degree=2)
themodel.predict(pol_feat.fit_transform(X_val))
in order to work. So, how can i store this info in order to be able to use the model for prediction?
Solution
You have to pickle trained PolynomialFeatures also:
# train and pickle
poly_reg = PolynomialFeatures(degree = 2)
X_poly = poly_reg.fit_transform(X_train)
poly_reg_model = LinearRegression().fit(X_poly, y_train)
joblib.dump(poly_reg_model, 'themodel')
joblib.dump(poly_reg, 'poilynomia_features_model')
# load and predict
poilynomia_features_model = joblib.load('poilynomia_features_model')
themodel = joblib.load('themodel')
X_val_prep = poilynomia_features_model.transform(X_val)
predictions = themodel.predict(X_val_prep)
But better will wrap all the steps in the single pipeline:
pipeline = Pipeline(steps=[('poilynomia', PolynomialFeatures()),
('lr', LinearRegression())])
pipeline.fit(X_train, y_train)
pipeline.predict(X_val)
Answered By - Danylo Baibak
0 comments:
Post a Comment
Note: Only a member of this blog may post a comment.