Sumpoko2 commited on
Commit
899e5dc
1 Parent(s): 2e20b98

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +141 -0
app.py ADDED
@@ -0,0 +1,141 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ### ----------------------------- ###
2
+ ### libraries ###
3
+ ### ----------------------------- ###
4
+
5
+ import gradio as gr
6
+ import pandas as pd
7
+ import numpy as np
8
+ from sklearn.model_selection import train_test_split
9
+ from sklearn.linear_model import LogisticRegression
10
+ from sklearn import metrics
11
+ from reader import get_article
12
+
13
+
14
+ ### ------------------------------ ###
15
+ ### data transformation ###
16
+ ### ------------------------------ ###
17
+
18
+ # load dataset
19
+ uncleaned_data = pd.read_csv('data.csv')
20
+
21
+ # remove timestamp from dataset (always first column)
22
+ uncleaned_data = uncleaned_data.iloc[: , 1:]
23
+ data = pd.DataFrame()
24
+
25
+ # keep track of which columns are categorical and what
26
+ # those columns' value mappings are
27
+ # structure: {colname1: {...}, colname2: {...} }
28
+ cat_value_dicts = {}
29
+ final_colname = uncleaned_data.columns[len(uncleaned_data.columns) - 1]
30
+
31
+ # for each column...
32
+ for (colname, colval) in uncleaned_data.iteritems():
33
+
34
+ # check if col is already a number; if so, add col directly
35
+ # to new dataframe and skip to next column
36
+ if isinstance(colval.values[0], (np.integer, float)):
37
+ data[colname] = uncleaned_data[colname].copy()
38
+ continue
39
+
40
+ # structure: {0: "lilac", 1: "blue", ...}
41
+ new_dict = {}
42
+ val = 0 # first index per column
43
+ transformed_col_vals = [] # new numeric datapoints
44
+
45
+ # if not, for each item in that column...
46
+ for (row, item) in enumerate(colval.values):
47
+
48
+ # if item is not in this col's dict...
49
+ if item not in new_dict:
50
+ new_dict[item] = val
51
+ val += 1
52
+
53
+ # then add numerical value to transformed dataframe
54
+ transformed_col_vals.append(new_dict[item])
55
+
56
+ # reverse dictionary only for final col (0, 1) => (vals)
57
+ if colname == final_colname:
58
+ new_dict = {value : key for (key, value) in new_dict.items()}
59
+
60
+ cat_value_dicts[colname] = new_dict
61
+ data[colname] = transformed_col_vals
62
+
63
+
64
+ ### -------------------------------- ###
65
+ ### model training ###
66
+ ### -------------------------------- ###
67
+
68
+ # select features and predicton; automatically selects last column as prediction
69
+ cols = len(data.columns)
70
+ num_features = cols - 1
71
+ x = data.iloc[: , :num_features]
72
+ y = data.iloc[: , num_features:]
73
+
74
+ # split data into training and testing sets
75
+ x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.25)
76
+
77
+ # instantiate the model (using default parameters)
78
+ model = LogisticRegression()
79
+ model.fit(x_train, y_train.values.ravel())
80
+ y_pred = model.predict(x_test)
81
+
82
+
83
+ ### -------------------------------- ###
84
+ ### article generation ###
85
+ ### -------------------------------- ###
86
+ # borrow file reading function from reader.py
87
+
88
+ def get_feat():
89
+ feats = [abs(x) for x in model.coef_[0]]
90
+ max_val = max(feats)
91
+ idx = feats.index(max_val)
92
+ return data.columns[idx]
93
+
94
+ acc = str(round(metrics.accuracy_score(y_test, y_pred) * 100, 1)) + '%**'
95
+ most_imp_feat = get_feat() + "**"
96
+ info = get_article(acc, most_imp_feat)
97
+
98
+
99
+
100
+ ### ------------------------------- ###
101
+ ### interface creation ###
102
+ ### ------------------------------- ###
103
+
104
+
105
+ # predictor for generic number of features
106
+ def general_predictor(*args):
107
+ features = []
108
+
109
+ # transform categorical input
110
+ for colname, arg in zip(data.columns, args):
111
+ if (colname in cat_value_dicts):
112
+ features.append(cat_value_dicts[colname][arg])
113
+ else:
114
+ features.append(arg)
115
+
116
+ # predict single datapoint
117
+ new_input = [features]
118
+ result = model.predict(new_input)
119
+ return cat_value_dicts[final_colname][result[0]]
120
+
121
+ # add data labels to replace those lost via star-args
122
+ inputls = []
123
+ for colname in data.columns:
124
+ # skip last column
125
+ if colname == final_colname:
126
+ continue
127
+
128
+ # access categories dict if data is categorical
129
+ # otherwise, just use a number input
130
+ if colname in cat_value_dicts:
131
+ radio_options = list(cat_value_dicts[colname].keys())
132
+ inputls.append(gr.inputs.Radio(choices=radio_options, type="value", label=colname))
133
+ else:
134
+ # add numerical input
135
+ inputls.append(gr.inputs.Number(label=colname))
136
+
137
+ # generate gradio interface
138
+ interface = gr.Interface(general_predictor, inputs=inputls, outputs="text", article=info['article'], css=info['css'], theme="grass", title=info['title'], allow_flagging='never', description=info['description'])
139
+
140
+ # show the interface
141
+ interface.launch()