bbutzin commited on
Commit
eb530ce
β€’
1 Parent(s): babd8c3

Upload 4 files

Browse files
Files changed (4) hide show
  1. app (1).py +172 -0
  2. data.csv +22 -0
  3. info (2).md +16 -0
  4. requirements (1).txt +4 -0
app (1).py ADDED
@@ -0,0 +1,172 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ### ----------------------------- ###
2
+ ### libraries ###
3
+ ### ----------------------------- ###
4
+
5
+ import gradio as gr
6
+ import pandas as pd
7
+ import numpy as np
8
+ from sklearn.model_selection import train_test_split
9
+ from sklearn.linear_model import LogisticRegression
10
+ from sklearn import metrics
11
+
12
+
13
+ ### ------------------------------ ###
14
+ ### data transformation ###
15
+ ### ------------------------------ ###
16
+
17
+ # load dataset
18
+ uncleaned_data = pd.read_csv('data.csv')
19
+
20
+ # remove timestamp from dataset (always first column)
21
+ uncleaned_data = uncleaned_data.iloc[: , 1:]
22
+ data = pd.DataFrame()
23
+
24
+ # keep track of which columns are categorical and what
25
+ # those columns' value mappings are
26
+ # structure: {colname1: {...}, colname2: {...} }
27
+ cat_value_dicts = {}
28
+ final_colname = uncleaned_data.columns[len(uncleaned_data.columns) - 1]
29
+
30
+ # for each column...
31
+ for (colname, colval) in uncleaned_data.iteritems():
32
+
33
+ # check if col is already a number; if so, add col directly
34
+ # to new dataframe and skip to next column
35
+ if isinstance(colval.values[0], (np.integer, float)):
36
+ data[colname] = uncleaned_data[colname].copy()
37
+ continue
38
+
39
+ # structure: {0: "lilac", 1: "blue", ...}
40
+ new_dict = {}
41
+ val = 0 # first index per column
42
+ transformed_col_vals = [] # new numeric datapoints
43
+
44
+ # if not, for each item in that column...
45
+ for (row, item) in enumerate(colval.values):
46
+
47
+ # if item is not in this col's dict...
48
+ if item not in new_dict:
49
+ new_dict[item] = val
50
+ val += 1
51
+
52
+ # then add numerical value to transformed dataframe
53
+ transformed_col_vals.append(new_dict[item])
54
+
55
+ # reverse dictionary only for final col (0, 1) => (vals)
56
+ if colname == final_colname:
57
+ new_dict = {value : key for (key, value) in new_dict.items()}
58
+
59
+ cat_value_dicts[colname] = new_dict
60
+ data[colname] = transformed_col_vals
61
+
62
+
63
+ ### -------------------------------- ###
64
+ ### model training ###
65
+ ### -------------------------------- ###
66
+
67
+ # select features and predicton; automatically selects last column as prediction
68
+ cols = len(data.columns)
69
+ num_features = cols - 1
70
+ x = data.iloc[: , :num_features]
71
+ y = data.iloc[: , num_features:]
72
+
73
+ # split data into training and testing sets
74
+ x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.25)
75
+
76
+ # instantiate the model (using default parameters)
77
+ model = LogisticRegression()
78
+ model.fit(x_train, y_train.values.ravel())
79
+ y_pred = model.predict(x_test)
80
+
81
+
82
+ ### -------------------------------- ###
83
+ ### article generation ###
84
+ ### -------------------------------- ###
85
+ # borrow file reading function from reader.py
86
+
87
+ def get_feat():
88
+ feats = [abs(x) for x in model.coef_[0]]
89
+ max_val = max(feats)
90
+ idx = feats.index(max_val)
91
+ return data.columns[idx]
92
+
93
+ acc = str(round(metrics.accuracy_score(y_test, y_pred) * 100, 1)) + "%"
94
+ most_imp_feat = get_feat()
95
+ # info = get_article(acc, most_imp_feat)
96
+
97
+
98
+
99
+ ### ------------------------------- ###
100
+ ### interface creation ###
101
+ ### ------------------------------- ###
102
+
103
+
104
+ # predictor for generic number of features
105
+ def general_predictor(*args):
106
+ features = []
107
+
108
+ # transform categorical input
109
+ for colname, arg in zip(data.columns, args):
110
+ if (colname in cat_value_dicts):
111
+ features.append(cat_value_dicts[colname][arg])
112
+ else:
113
+ features.append(arg)
114
+
115
+ # predict single datapoint
116
+ new_input = [features]
117
+ result = model.predict(new_input)
118
+ return cat_value_dicts[final_colname][result[0]]
119
+
120
+ # add data labels to replace those lost via star-args
121
+
122
+
123
+ block = gr.Blocks()
124
+
125
+ with open('info.md') as f:
126
+ with block:
127
+ gr.Markdown(f.readline())
128
+ gr.Markdown('Take the quiz to get a personalized recommendation using AI.')
129
+
130
+ with gr.Row():
131
+ with gr.Group():
132
+ inputls = []
133
+ for colname in data.columns:
134
+ # skip last column
135
+ if colname == final_colname:
136
+ continue
137
+
138
+ # access categories dict if data is categorical
139
+ # otherwise, just use a number input
140
+ if colname in cat_value_dicts:
141
+ radio_options = list(cat_value_dicts[colname].keys())
142
+ inputls.append(gr.Dropdown(radio_options, type="value", label=colname))
143
+ else:
144
+ # add numerical input
145
+ inputls.append(gr.Number(label=colname))
146
+ gr.Markdown("<br />")
147
+
148
+ submit = gr.Button("Click to see your personalized result!", variant="primary")
149
+ gr.Markdown("<br />")
150
+ output = gr.Textbox(label="Your recommendation:", placeholder="your recommendation will appear here")
151
+
152
+ submit.click(fn=general_predictor, inputs=inputls, outputs=output)
153
+ gr.Markdown("<br />")
154
+
155
+ with gr.Row():
156
+ with gr.Group():
157
+ gr.Markdown(f"<h3>Accuracy: </h3>{acc}")
158
+ with gr.Group():
159
+ gr.Markdown(f"<h3>Most important feature: </h3>{most_imp_feat}")
160
+
161
+ gr.Markdown("<br />")
162
+
163
+ with gr.Group():
164
+ gr.Markdown('''⭐ Note that model accuracy is based on the uploaded data.csv and reflects how well the AI model can give correct recommendations for <em>that dataset</em>. Model accuracy and most important feature can be helpful for understanding how the model works, but <em>should not be considered absolute facts about the real world</em>.''')
165
+
166
+ with gr.Group():
167
+ with open('info.md') as f:
168
+ f.readline()
169
+ gr.Markdown(f.read())
170
+
171
+ # show the interface
172
+ block.launch()
data.csv ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Timestamp,Are you a fan of monsters and the macabre?,Where do you mostly consume content?,How would you describe your style?,How old are you?,What scares you the most?
2
+ 2024/06/22 10:45:55 AM CST,Yes,The internet,Other,21-30 years old,Spiders
3
+ 2024/06/22 10:46:15 AM CST,No,Video Games,Gothic,11-20 years old,A creepy tomb
4
+ 2024/06/22 10:47:13 AM CST,Yes,The internet,Grunge,21-30 years old,Spiders
5
+ 2024/06/22 10:48:31 AM CST,Yes,The internet,Preppy,31-40 years old,A creepy tomb
6
+ 2024/06/22 10:48:50 AM CST,Yes,Movies,Gothic,41-50 years old,The deep ocean
7
+ 2024/06/22 10:49:46 AM CST,Yes,The internet,Gothic,21-30 years old,Spiders
8
+ 2024/06/22 10:50:16 AM CST,Yes,Video Games,Grunge,61+ years old,The deep ocean
9
+ 2024/06/22 10:50:18 AM CST,Yes,Movies,Minimalist,31-40 years old,Darkness
10
+ 2024/06/22 10:50:47 AM CST,No,Television,Preppy,11-20 years old,The unknown
11
+ 2024/06/22 10:51:28 AM CST,Yes,Movies,Casual,21-30 years old,The deep ocean
12
+ 2024/06/22 10:51:49 AM CST,Yes,Movies,Sporty,21-30 years old,The unknown
13
+ 2024/06/22 10:52:17 AM CST,Yes,The internet,Sporty,21-30 years old,Darkness
14
+ 2024/06/22 11:17:01 AM CST,Yes,The internet,Casual,21-30 years old,The unknown
15
+ 2024/06/22 11:17:16 AM CST,No,Television,Grunge,11-20 years old,A haunted house
16
+ 2024/06/22 11:17:46 AM CST,Yes,Books,Minimalist,61+ years old,The deep ocean
17
+ 2024/06/22 11:31:24 AM CST,No,Television,Preppy,31-40 years old,A creepy tomb
18
+ 2024/06/22 11:44:20 AM CST,No,The internet,Other,31-40 years old,The deep ocean
19
+ 2024/06/22 12:04:39 PM CST,No,Books,Casual,11-20 years old,The deep ocean
20
+ 2024/06/22 12:05:14 PM CST,Yes,Out in the world,Other,21-30 years old,Mythical creatures
21
+ 2024/06/22 1:10:04 PM CST,Yes,The internet,Casual,61+ years old,The unknown
22
+ 2024/06/22 2:49:34 PM CST,Yes,Movies,Gothic,31-40 years old,The deep ocean
info (2).md ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # 😌 [Edit info.md - Your app's title here]
2
+
3
+ ### 🧐 Problem Statement and Research Summary
4
+ [add info about your problem statement and your research here!]
5
+
6
+ ### 🎣 Data Collection Plan
7
+ [Edit info.md - add info about what data you collected and why here!]
8
+
9
+ ### πŸ’₯ Ethical Considerations (Data Privacy and Bias)
10
+ * Data privacy: [Edit info.md - add info about you considered users' privacy here!]
11
+ * Bias: [Edit info.md - add info about you considered bias here!]
12
+
13
+ ### πŸ‘» Our Team
14
+ [Edit info.md - add info about your team members here!]
15
+
16
+ ![aiEDU logo](https://images.squarespace-cdn.com/content/v1/5e4efdef6d10420691f02bc1/5db5a8a3-1761-4fce-a096-bd5f2515162f/aiEDU+_black+logo+stacked.png?format=100w)
requirements (1).txt ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ pip>=23.2.1
2
+ pandas==1.3.4
3
+ scikit-learn>=1.0.1
4
+ numpy==1.21.4