Aaryamann1709 commited on
Commit
396f55d
1 Parent(s): 20f043f

Delete app.py

Browse files
Files changed (1) hide show
  1. app.py +0 -172
app.py DELETED
@@ -1,172 +0,0 @@
1
- ### ----------------------------- ###
2
- ### libraries ###
3
- ### ----------------------------- ###
4
-
5
- import gradio as gr
6
- import pandas as pd
7
- import numpy as np
8
- from sklearn.model_selection import train_test_split
9
- from sklearn.linear_model import LogisticRegression
10
- from sklearn import metrics
11
-
12
-
13
- ### ------------------------------ ###
14
- ### data transformation ###
15
- ### ------------------------------ ###
16
-
17
- # load dataset
18
- uncleaned_data = pd.read_csv('data.csv')
19
-
20
- # remove timestamp from dataset (always first column)
21
- uncleaned_data = uncleaned_data.iloc[: , 1:]
22
- data = pd.DataFrame()
23
-
24
- # keep track of which columns are categorical and what
25
- # those columns' value mappings are
26
- # structure: {colname1: {...}, colname2: {...} }
27
- cat_value_dicts = {}
28
- final_colname = uncleaned_data.columns[len(uncleaned_data.columns) - 1]
29
-
30
- # for each column...
31
- for (colname, colval) in uncleaned_data.iteritems():
32
-
33
- # check if col is already a number; if so, add col directly
34
- # to new dataframe and skip to next column
35
- if isinstance(colval.values[0], (np.integer, float)):
36
- data[colname] = uncleaned_data[colname].copy()
37
- continue
38
-
39
- # structure: {0: "lilac", 1: "blue", ...}
40
- new_dict = {}
41
- val = 0 # first index per column
42
- transformed_col_vals = [] # new numeric datapoints
43
-
44
- # if not, for each item in that column...
45
- for (row, item) in enumerate(colval.values):
46
-
47
- # if item is not in this col's dict...
48
- if item not in new_dict:
49
- new_dict[item] = val
50
- val += 1
51
-
52
- # then add numerical value to transformed dataframe
53
- transformed_col_vals.append(new_dict[item])
54
-
55
- # reverse dictionary only for final col (0, 1) => (vals)
56
- if colname == final_colname:
57
- new_dict = {value : key for (key, value) in new_dict.items()}
58
-
59
- cat_value_dicts[colname] = new_dict
60
- data[colname] = transformed_col_vals
61
-
62
-
63
- ### -------------------------------- ###
64
- ### model training ###
65
- ### -------------------------------- ###
66
-
67
- # select features and predicton; automatically selects last column as prediction
68
- cols = len(data.columns)
69
- num_features = cols - 1
70
- x = data.iloc[: , :num_features]
71
- y = data.iloc[: , num_features:]
72
-
73
- # split data into training and testing sets
74
- x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.25)
75
-
76
- # instantiate the model (using default parameters)
77
- model = LogisticRegression()
78
- model.fit(x_train, y_train.values.ravel())
79
- y_pred = model.predict(x_test)
80
-
81
-
82
- ### -------------------------------- ###
83
- ### article generation ###
84
- ### -------------------------------- ###
85
- # borrow file reading function from reader.py
86
-
87
- def get_feat():
88
- feats = [abs(x) for x in model.coef_[0]]
89
- max_val = max(feats)
90
- idx = feats.index(max_val)
91
- return data.columns[idx]
92
-
93
- acc = str(round(metrics.accuracy_score(y_test, y_pred) * 100, 1)) + "%"
94
- most_imp_feat = get_feat()
95
- # info = get_article(acc, most_imp_feat)
96
-
97
-
98
-
99
- ### ------------------------------- ###
100
- ### interface creation ###
101
- ### ------------------------------- ###
102
-
103
-
104
- # predictor for generic number of features
105
- def general_predictor(*args):
106
- features = []
107
-
108
- # transform categorical input
109
- for colname, arg in zip(data.columns, args):
110
- if (colname in cat_value_dicts):
111
- features.append(cat_value_dicts[colname][arg])
112
- else:
113
- features.append(arg)
114
-
115
- # predict single datapoint
116
- new_input = [features]
117
- result = model.predict(new_input)
118
- return cat_value_dicts[final_colname][result[0]]
119
-
120
- # add data labels to replace those lost via star-args
121
-
122
-
123
- block = gr.Blocks()
124
-
125
- with open('info.md') as f:
126
- with block:
127
- gr.Markdown(f.readline())
128
- gr.Markdown('Take the quiz to get a personalized recommendation using AI.')
129
-
130
- with gr.Row():
131
- with gr.Box():
132
- inputls = []
133
- for colname in data.columns:
134
- # skip last column
135
- if colname == final_colname:
136
- continue
137
-
138
- # access categories dict if data is categorical
139
- # otherwise, just use a number input
140
- if colname in cat_value_dicts:
141
- radio_options = list(cat_value_dicts[colname].keys())
142
- inputls.append(gr.inputs.Dropdown(choices=radio_options, type="value", label=colname))
143
- else:
144
- # add numerical input
145
- inputls.append(gr.inputs.Number(label=colname))
146
- gr.Markdown("<br />")
147
-
148
- submit = gr.Button("Click to see your personalized result!", variant="primary")
149
- gr.Markdown("<br />")
150
- output = gr.Textbox(label="Your recommendation:", placeholder="your recommendation will appear here")
151
-
152
- submit.click(fn=general_predictor, inputs=inputls, outputs=output)
153
- gr.Markdown("<br />")
154
-
155
- with gr.Row():
156
- with gr.Box():
157
- gr.Markdown(f"<h3>Accuracy: </h3>{acc}")
158
- with gr.Box():
159
- gr.Markdown(f"<h3>Most important feature: </h3>{most_imp_feat}")
160
-
161
- gr.Markdown("<br />")
162
-
163
- with gr.Box():
164
- gr.Markdown('''⭐ Note that model accuracy is based on the uploaded data.csv and reflects how well the AI model can give correct recommendations for <em>that dataset</em>. Model accuracy and most important feature can be helpful for understanding how the model works, but <em>should not be considered absolute facts about the real world</em>.''')
165
-
166
- with gr.Box():
167
- with open('info.md') as f:
168
- f.readline()
169
- gr.Markdown(f.read())
170
-
171
- # show the interface
172
- block.launch()