import pandas
import sklearn
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix

#open and read file
df = pandas.read_csv('RSL_copy.csv')
print(df.dtypes)
data= df.values

#split into features and target
X_array = data[:,0:2]
Y_array = data[:,2]

#split data into training set and test set

X_train, X_test, y_train, y_test = train_test_split(X_array,Y_array,test_size=0.2)

#create an insance of the model
lrmodel=LogisticRegression(solver='newton-cg')
# Train the MOdel to get line of best FIT
lrmodel.fit(X_train,y_train)
# make your prediction with x_train and compare it with y_train
train_prediction = lrmodel.predict(X_train)
#find the accuracy of the model by  comparing  it with y_train

accuracy = accuracy_score(train_prediction,y_train)
print('train prediction is',accuracy*100,'%')

#after training the model, test the model
prediction =lrmodel.predict(X_test)
#find the accuracy of your prediction
accuracy = accuracy_score(prediction,y_test)
print('test predcition:', accuracy*100,'%')

#confusion_matrix  #cannot handle multiclass probelms
confusion_matrix(y_test,prediction)
print(confusion_matrix(y_test,prediction))


#sve your model
import pickle
filename = 'MWmodel.sav'
pickle.dump(lrmodel,open(filename, 'wb'))