rookie / modeling.py
kirchoof's picture
Create modeling.py
0e8f5c4
import pandas
import sklearn
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix
#open and read file
df = pandas.read_csv('RSL_copy.csv')
print(df.dtypes)
data= df.values
#split into features and target
X_array = data[:,0:2]
Y_array = data[:,2]
#split data into training set and test set
X_train, X_test, y_train, y_test = train_test_split(X_array,Y_array,test_size=0.2)
#create an insance of the model
lrmodel=LogisticRegression(solver='newton-cg')
# Train the MOdel to get line of best FIT
lrmodel.fit(X_train,y_train)
# make your prediction with x_train and compare it with y_train
train_prediction = lrmodel.predict(X_train)
#find the accuracy of the model by comparing it with y_train
accuracy = accuracy_score(train_prediction,y_train)
print('train prediction is',accuracy*100,'%')
#after training the model, test the model
prediction =lrmodel.predict(X_test)
#find the accuracy of your prediction
accuracy = accuracy_score(prediction,y_test)
print('test predcition:', accuracy*100,'%')
#confusion_matrix #cannot handle multiclass probelms
confusion_matrix(y_test,prediction)
print(confusion_matrix(y_test,prediction))
#sve your model
import pickle
filename = 'MWmodel.sav'
pickle.dump(lrmodel,open(filename, 'wb'))