Create modeling.py
Browse files- modeling.py +49 -0
modeling.py
ADDED
@@ -0,0 +1,49 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import pandas
|
2 |
+
import sklearn
|
3 |
+
from sklearn.linear_model import LogisticRegression
|
4 |
+
from sklearn.model_selection import train_test_split
|
5 |
+
from sklearn.metrics import accuracy_score
|
6 |
+
from sklearn.metrics import confusion_matrix
|
7 |
+
|
8 |
+
#open and read file
|
9 |
+
df = pandas.read_csv('RSL_copy.csv')
|
10 |
+
print(df.dtypes)
|
11 |
+
data= df.values
|
12 |
+
|
13 |
+
#split into features and target
|
14 |
+
X_array = data[:,0:2]
|
15 |
+
Y_array = data[:,2]
|
16 |
+
|
17 |
+
#split data into training set and test set
|
18 |
+
|
19 |
+
X_train, X_test, y_train, y_test = train_test_split(X_array,Y_array,test_size=0.2)
|
20 |
+
|
21 |
+
#create an insance of the model
|
22 |
+
lrmodel=LogisticRegression(solver='newton-cg')
|
23 |
+
# Train the MOdel to get line of best FIT
|
24 |
+
lrmodel.fit(X_train,y_train)
|
25 |
+
# make your prediction with x_train and compare it with y_train
|
26 |
+
train_prediction = lrmodel.predict(X_train)
|
27 |
+
#find the accuracy of the model by comparing it with y_train
|
28 |
+
|
29 |
+
accuracy = accuracy_score(train_prediction,y_train)
|
30 |
+
print('train prediction is',accuracy*100,'%')
|
31 |
+
|
32 |
+
#after training the model, test the model
|
33 |
+
prediction =lrmodel.predict(X_test)
|
34 |
+
#find the accuracy of your prediction
|
35 |
+
accuracy = accuracy_score(prediction,y_test)
|
36 |
+
print('test predcition:', accuracy*100,'%')
|
37 |
+
|
38 |
+
#confusion_matrix #cannot handle multiclass probelms
|
39 |
+
confusion_matrix(y_test,prediction)
|
40 |
+
print(confusion_matrix(y_test,prediction))
|
41 |
+
|
42 |
+
|
43 |
+
|
44 |
+
|
45 |
+
|
46 |
+
#sve your model
|
47 |
+
import pickle
|
48 |
+
filename = 'MWmodel.sav'
|
49 |
+
pickle.dump(lrmodel,open(filename, 'wb'))
|