elaldana commited on
Commit
fe3dffd
1 Parent(s): 55e0d9a

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +128 -0
app.py ADDED
@@ -0,0 +1,128 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ## ----------------------------- ###
2
+ ### libraries ###
3
+ ### ----------------------------- ###
4
+ import gradio as gr
5
+ import pandas as pd
6
+ import numpy as np
7
+ import os
8
+ import warnings
9
+ from sklearn.model_selection import train_test_split
10
+ from sklearn.linear_model import LogisticRegression
11
+ from sklearn import metrics
12
+ from reader import get_article
13
+
14
+ warnings.filterwarnings("ignore")
15
+
16
+
17
+ ### ------------------------------ ###
18
+ ### data transformation ###
19
+ ### ------------------------------ ###
20
+ # load dataset
21
+ uncleaned_data = pd.read_csv('data.csv')
22
+
23
+ # remove timestamp from dataset (always first column)
24
+ if uncleaned_data.columns[0].lower() == 'timestamp':
25
+ uncleaned_data = uncleaned_data.iloc[: , 1:]
26
+ data = pd.DataFrame()
27
+
28
+ # keep track of which columns are categorical and what
29
+ # those columns' value mappings are
30
+ # structure: {colname1: {...}, colname2: {...} }
31
+ cat_value_dicts = {}
32
+ final_colname = uncleaned_data.columns[len(uncleaned_data.columns) - 1]
33
+
34
+ # for each column...
35
+ for (colname, colval) in uncleaned_data.iteritems():
36
+ # check if col is already a number; if so, add col directly
37
+ # to new dataframe and skip to next column
38
+ if isinstance(colval.values[0], (np.integer, float)):
39
+ data[colname] = uncleaned_data[colname].copy()
40
+ continue
41
+
42
+ # structure: {0: "lilac", 1: "blue", ...}
43
+ new_dict = {}
44
+ key = 0 # first index per column
45
+ transformed_col_vals = [] # new numeric datapoints
46
+
47
+ # if not, for each item in that column...
48
+ for item in colval.values:
49
+
50
+ # if item is not in this col's dict...
51
+ if item not in new_dict:
52
+ new_dict[item] = key
53
+ key += 1
54
+
55
+ # then add numerical value to transformed dataframe
56
+ transformed_col_vals.append(new_dict[item])
57
+
58
+ # reverse dictionary only for final col (0, 1) => (vals)
59
+ if colname == final_colname:
60
+ new_dict = {value : key for (key, value) in new_dict.items()}
61
+ cat_value_dicts[colname] = new_dict
62
+ data[colname] = transformed_col_vals
63
+
64
+
65
+ ### -------------------------------- ###
66
+ ### model training ###
67
+ ### -------------------------------- ###
68
+ # select features and predicton; automatically selects last column as prediction
69
+ num_features = len(data.columns) - 1
70
+ x = data.iloc[: , :num_features]
71
+ y = data.iloc[: , num_features:]
72
+
73
+ # split data into training and testing sets
74
+ x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.25)
75
+
76
+ # instantiate the model (using default parameters)
77
+ model = LogisticRegression(multi_class='multinomial', penalty='none', solver='newton-cg')
78
+ model.fit(x_train, y_train.values.ravel())
79
+ y_pred = model.predict(x_test)
80
+
81
+
82
+ ### -------------------------------- ###
83
+ ### file reading ###
84
+ ### -------------------------------- ###
85
+ # borrow file reading function from reader.py
86
+ info = get_article()
87
+
88
+
89
+ ### ------------------------------- ###
90
+ ### interface creation ###
91
+ ### ------------------------------- ###
92
+ # predictor for generic number of features
93
+ def general_predictor(*args):
94
+ features = []
95
+
96
+ # transform categorical input
97
+ for colname, arg in zip(data.columns, args):
98
+ if (colname in cat_value_dicts):
99
+ features.append(cat_value_dicts[colname][arg])
100
+ else:
101
+ features.append(arg)
102
+
103
+ # predict single datapoint
104
+ new_input = [features]
105
+ result = model.predict(new_input)
106
+ return cat_value_dicts[final_colname][result[0]]
107
+
108
+ # add data labels to replace those lost via star-args
109
+ inputls = []
110
+ for colname in data.columns:
111
+ # skip last column
112
+ if colname == final_colname:
113
+ continue
114
+
115
+ # access categories dict if data is categorical
116
+ # otherwise, just use a number input
117
+ if colname in cat_value_dicts:
118
+ radio_options = list(cat_value_dicts[colname].keys())
119
+ inputls.append(gr.inputs.Radio(choices=radio_options, type="value", label=colname))
120
+ else:
121
+ # add numerical input
122
+ inputls.append(gr.inputs.Number(label=colname))
123
+
124
+ # generate gradio interface
125
+ interface = gr.Interface(general_predictor, inputs=inputls, outputs="text", article=info['article'], css=info['css'], theme='huggingface', title=info['title'], allow_flagging=False, description=info['description'])
126
+
127
+ # show the interface
128
+ interface.launch(share=True)