Issue
I am developing credit card fraud detections project in machine learning. I have download the code form GitHub (https://github.com/devrajkataria/Credit-Card-Fraud-Detection-ML-WebApp) but I am getting following errors:
AttributeError: 'SVC' object has no attribute '_probA'
I tried to update the package but the did not work .I am using python version of 3.6. Here is the code.py
import numpy as np
import sklearn
from flask import Flask, request, jsonify, render_template
import pickle
from sklearn.svm import SVC
app = Flask(__name__)
# prediction function
def ValuePredictor(to_predict_list):
to_predict = np.array(to_predict_list).reshape(1, 7)
loaded_model = pickle.load(open("model.pkl", "rb"))
result = loaded_model.predict(to_predict)
return result[0]
@app.route('/')
def home():
return render_template("index.html")
@app.route('/predict',methods=['POST','GET'])
def predict():
if request.method == 'POST':
to_predict_list = request.form.to_dict()
to_predict_list = list(to_predict_list.values())
to_predict_list = list(map(float, to_predict_list))
result = ValuePredictor(to_predict_list)
if int(result)== 1:
prediction ='Given transaction is fradulent'
else:
prediction ='Given transaction is NOT fradulent'
return render_template("result.html", prediction = prediction)
if __name__ == "__main__":
app.run(debug=True)
Here is the classifier code.
# -*- coding: utf-8 -*-
"""Project_Final.ipynb
Automatically generated by Colaboratory.
# Data Preprocessing and Visualisation
"""
# Commented out IPython magic to ensure Python compatibility.
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
# %matplotlib inline
# Commented out IPython magic to ensure Python compatibility.
import sklearn
import random
from sklearn.utils import shuffle
# %matplotlib inline
from zipfile import ZipFile
with ZipFile('creditcardfraud.zip','r') as zip:
zip.printdir()
zip.extractall()
d=pd.read_csv('creditcard.csv')
sns.distplot(data['Amount'])
sns.distplot(data['Time'])
data.hist(figsize=(20,20))
plt.show()
sns.jointplot(x= 'Time', y= 'Amount', data= d)
class0 = d[d['Class']==0]
len(class0)
class1 = d[d['Class']==1]
len(class1)
class0
temp = shuffle(class0)
d1 = temp.iloc[:2000,:]
d1
frames = [d1, class1]
df_temp = pd.concat(frames)
df_temp.info()
df= shuffle(df_temp)
df.to_csv('creditcardsampling.csv')
sns.countplot('Class', data=df)
"""# SMOTE"""
!pip install --user imblearn
import imblearn
from imblearn.over_sampling import SMOTE
oversample=SMOTE()
X=df.iloc[ : ,:-1]
Y=df.iloc[: , -1]
X,Y=oversample.fit_resample(X,Y)
X=pd.DataFrame(X)
X.shape
Y=pd.DataFrame(Y)
Y.head()
names=['Time','V1','V2','V3','V4','V5','V6','V7','V8','V9','V10','V11','V12','V13','V14','V15','V16','V17','V18','V19','V20','V21','V22','V23','V24','V25','V26','V27','V28','Amount','Class']
data=pd.concat([X,Y],axis=1)
d=data.values
data=pd.DataFrame(d,columns=names)
sns.countplot('Class', data=data)
data.describe()
data.info()
plt.figure(figsize=(12,10))
sns.heatmap(data.corr())
!pip install --user lightgbm
!pip install --user utils
import math
import sklearn.preprocessing
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score , classification_report, confusion_matrix, precision_recall_curve, f1_score, auc
X_train, X_test, y_train, y_test = train_test_split(data.drop('Class', axis=1), data['Class'], test_size=0.3, random_state=42)
"""# Feature Scaling"""
cols= ['V22', 'V24', 'V25', 'V26', 'V27', 'V28']
scaler = StandardScaler()
frames= ['Time', 'Amount']
x= data[frames]
d_temp = data.drop(frames, axis=1)
temp_col=scaler.fit_transform(x)
scaled_col = pd.DataFrame(temp_col, columns=frames)
scaled_col.head()
d_scaled = pd.concat([scaled_col, d_temp], axis =1)
d_scaled.head()
y = data['Class']
d_scaled.head()
"""# Dimensionality Reduction"""
from sklearn.decomposition import PCA
pca = PCA(n_components=7)
X_temp_reduced = pca.fit_transform(d_scaled)
pca.explained_variance_ratio_
pca.explained_variance_
names=['Time','Amount','Transaction Method','Transaction Id','Location','Type of Card','Bank']
X_reduced= pd.DataFrame(X_temp_reduced,columns=names)
X_reduced.head()
Y=d_scaled['Class']
new_data=pd.concat([X_reduced,Y],axis=1)
new_data.head()
new_data.shape
new_data.to_csv('finaldata.csv')
X_train, X_test, y_train, y_test= train_test_split(X_reduced, d_scaled['Class'], test_size = 0.30, random_state = 42)
X_train.shape, X_test.shape
"""# Logistic Regression"""
from sklearn.linear_model import LogisticRegression
lr=LogisticRegression()
lr.fit(X_train,y_train)
y_pred_lr=lr.predict(X_test)
y_pred_lr
from sklearn.metrics import classification_report,confusion_matrix
print(confusion_matrix(y_test,y_pred_lr))
#Hyperparamter tuning
from sklearn.model_selection import GridSearchCV
lr_model = LogisticRegression()
lr_params = {'penalty': ['l1', 'l2'],'C': [0.001, 0.01, 0.1, 1, 10, 100, 1000]}
grid_lr= GridSearchCV(lr_model, param_grid = lr_params)
grid_lr.fit(X_train, y_train)
grid_lr.best_params_
y_pred_lr3=grid_lr.predict(X_test)
print(classification_report(y_test,y_pred_lr3))
"""# Support Vector Machine"""
from sklearn.svm import SVC
svc=SVC(kernel='rbf')
svc.fit(X_train,y_train)
y_pred_svc=svc.predict(X_test)
y_pred_svc
print(classification_report(y_test,y_pred_svc))
print(confusion_matrix(y_test,y_pred_svc))
from sklearn.model_selection import GridSearchCV
parameters = [ {'C': [1, 10, 100, 1000], 'kernel': ['rbf'], 'gamma': [0.1, 1, 0.01, 0.0001 ,0.001]}]
grid_search = GridSearchCV(estimator = svc,
param_grid = parameters,
scoring = 'accuracy',
n_jobs = -1)
grid_search = grid_search.fit(X_train, y_train)
best_accuracy = grid_search.best_score_
best_parameters = grid_search.best_params_
print("Best Accuracy: {:.2f} %".format(best_accuracy*100))
print("Best Parameters:", best_parameters)
svc_param=SVC(kernel='rbf',gamma=0.01,C=100)
svc_param.fit(X_train,y_train)
y_pred_svc2=svc_param.predict(X_test)
print(classification_report(y_test,y_pred_svc2))
"""# Decision Tree"""
from sklearn.tree import DecisionTreeClassifier
dtree=DecisionTreeClassifier()
dtree.fit(X_train,y_train)
y_pred_dtree=dtree.predict(X_test)
print(classification_report(y_test,y_pred_dtree))
Model code...
# Commented out IPython magic to ensure Python compatibility.
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
# %matplotlib inline
# Commented out IPython magic to ensure Python compatibility.
import sklearn
import random
from sklearn.utils import shuffle
# %matplotlib inline
data=pd.read_csv('creditcard.csv')
sns.distplot(data['Amount'])
sns.distplot(data['Time'])
data.hist(figsize=(20,20))
plt.show()
sns.jointplot(x= 'Time', y= 'Amount', data= d)
d=data
class0 = d[d['Class']==0]
len(class0)
class1 = d[d['Class']==1]
len(class1)
class0
temp = shuffle(class0)
d1 = temp.iloc[:2000,:]
d1
frames = [d1, class1]
df_temp = pd.concat(frames)
df_temp.info()
df= shuffle(df_temp)
df.to_csv('creditcardsampling.csv')
sns.countplot('Class', data=df)
"""# SMOTE"""
#!pip install --user imblearn
import imblearn
from imblearn.over_sampling import SMOTE
oversample=SMOTE()
X=df.iloc[ : ,:-1]
Y=df.iloc[: , -1]
X,Y=oversample.fit_resample(X,Y)
X=pd.DataFrame(X)
X.shape
Y=pd.DataFrame(Y)
Y.head()
names=['Time','V1','V2','V3','V4','V5','V6','V7','V8','V9','V10','V11','V12','V13','V14','V15','V16','V17','V18','V19','V20','V21','V22','V23','V24','V25','V26','V27','V28','Amount','Class']
data=pd.concat([X,Y],axis=1)
d=data.values
data=pd.DataFrame(d,columns=names)
sns.countplot('Class', data=data)
data.describe()
data.info()
plt.figure(figsize=(12,10))
sns.heatmap(data.corr())
import math
import sklearn.preprocessing
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score , classification_report, confusion_matrix, precision_recall_curve, f1_score, auc
X_train, X_test, y_train, y_test = train_test_split(data.drop('Class', axis=1), data['Class'], test_size=0.3, random_state=42)
"""# Feature Scaling"""
cols= ['V22', 'V24', 'V25', 'V26', 'V27', 'V28']
scaler = StandardScaler()
frames= ['Time', 'Amount']
x= data[frames]
d_temp = data.drop(frames, axis=1)
temp_col=scaler.fit_transform(x)
scaled_col = pd.DataFrame(temp_col, columns=frames)
scaled_col.head()
d_scaled = pd.concat([scaled_col, d_temp], axis =1)
d_scaled.head()
y = data['Class']
d_scaled.head()
"""# Dimensionality Reduction"""
from sklearn.decomposition import PCA
pca = PCA(n_components=7)
X_temp_reduced = pca.fit_transform(d_scaled)
pca.explained_variance_ratio_
pca.explained_variance_
names=['Time','Amount','Transaction Method','Transaction Id','Location','Type of Card','Bank']
X_reduced= pd.DataFrame(X_temp_reduced,columns=names)
X_reduced.head()
Y=d_scaled['Class']
new_data=pd.concat([X_reduced,Y],axis=1)
new_data.head()
new_data.shape
new_data.to_csv('finaldata.csv')
X_train, X_test, y_train, y_test= train_test_split(X_reduced, d_scaled['Class'], test_size = 0.30, random_state = 42)
X_train.shape, X_test.shape
from sklearn.metrics import classification_report,confusion_matrix
"""# Support Vector Machine"""
from sklearn.svm import SVC
svc=SVC(kernel='rbf',probability=True)
svc.fit(X_train,y_train)
y_pred_svc=svc.predict(X_test)
y_pred_svc
type(X_test)
X_test.to_csv('testing.csv')
from sklearn.model_selection import GridSearchCV
parameters = [ {'C': [1, 10, 100, 1000], 'kernel': ['rbf'], 'gamma': [0.1, 1, 0.01, 0.0001 ,0.001]}]
grid_search = GridSearchCV(estimator = svc,
param_grid = parameters,
scoring = 'accuracy',
n_jobs = -1)
grid_search = grid_search.fit(X_train, y_train)
best_accuracy = grid_search.best_score_
best_parameters = grid_search.best_params_
print("Best Accuracy: {:.2f} %".format(best_accuracy*100))
print("Best Parameters:", best_parameters)
svc_param=SVC(kernel='rbf',gamma=0.01,C=100,probability=True)
svc_param.fit(X_train,y_train)
import pickle
# Saving model to disk
pickle.dump(svc_param, open('model.pkl','wb'))
model=pickle.load(open('model.pkl','rb'))
Here is the project structure.
Here is the screenshot when i run the project into browser.
Solution
This is very likely to be a versioning problem. Your scikit-learn version is probably the latest version and the model.pkl
you downloaded from the repo you mentioned is an old and incompatible version.
To avoid this kind of problem, we should stick to best practices such as using a requirements.txt
file defining exactly the versions using during development. The same versions could then be installed on production.
If you really need to use this model.pkl
from the repo, I'd suggest creating a GitHub issue asking for a requirement.txt
file. Alternatively, you could try different scikit-learn versions until you get it working, but this can be very cumbersome.
Answered By - Tulio Casagrande
0 comments:
Post a Comment
Note: Only a member of this blog may post a comment.