Vacation111 commited on
Commit
2b95f4a
β€’
1 Parent(s): d401de1

Upload 4 files

Browse files
Files changed (4) hide show
  1. app.py +172 -0
  2. data +69 -0
  3. info.md +16 -0
  4. requirements.txt +4 -0
app.py ADDED
@@ -0,0 +1,172 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ### ----------------------------- ###
2
+ ### libraries ###
3
+ ### ----------------------------- ###
4
+
5
+ import gradio as gr
6
+ import pandas as pd
7
+ import numpy as np
8
+ from sklearn.model_selection import train_test_split
9
+ from sklearn.linear_model import LogisticRegression
10
+ from sklearn import metrics
11
+
12
+
13
+ ### ------------------------------ ###
14
+ ### data transformation ###
15
+ ### ------------------------------ ###
16
+
17
+ # load dataset
18
+ uncleaned_data = pd.read_csv('data.csv')
19
+
20
+ # remove timestamp from dataset (always first column)
21
+ uncleaned_data = uncleaned_data.iloc[: , 1:]
22
+ data = pd.DataFrame()
23
+
24
+ # keep track of which columns are categorical and what
25
+ # those columns' value mappings are
26
+ # structure: {colname1: {...}, colname2: {...} }
27
+ cat_value_dicts = {}
28
+ final_colname = uncleaned_data.columns[len(uncleaned_data.columns) - 1]
29
+
30
+ # for each column...
31
+ for (colname, colval) in uncleaned_data.iteritems():
32
+
33
+ # check if col is already a number; if so, add col directly
34
+ # to new dataframe and skip to next column
35
+ if isinstance(colval.values[0], (np.integer, float)):
36
+ data[colname] = uncleaned_data[colname].copy()
37
+ continue
38
+
39
+ # structure: {0: "lilac", 1: "blue", ...}
40
+ new_dict = {}
41
+ val = 0 # first index per column
42
+ transformed_col_vals = [] # new numeric datapoints
43
+
44
+ # if not, for each item in that column...
45
+ for (row, item) in enumerate(colval.values):
46
+
47
+ # if item is not in this col's dict...
48
+ if item not in new_dict:
49
+ new_dict[item] = val
50
+ val += 1
51
+
52
+ # then add numerical value to transformed dataframe
53
+ transformed_col_vals.append(new_dict[item])
54
+
55
+ # reverse dictionary only for final col (0, 1) => (vals)
56
+ if colname == final_colname:
57
+ new_dict = {value : key for (key, value) in new_dict.items()}
58
+
59
+ cat_value_dicts[colname] = new_dict
60
+ data[colname] = transformed_col_vals
61
+
62
+
63
+ ### -------------------------------- ###
64
+ ### model training ###
65
+ ### -------------------------------- ###
66
+
67
+ # select features and predicton; automatically selects last column as prediction
68
+ cols = len(data.columns)
69
+ num_features = cols - 1
70
+ x = data.iloc[: , :num_features]
71
+ y = data.iloc[: , num_features:]
72
+
73
+ # split data into training and testing sets
74
+ x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.25)
75
+
76
+ # instantiate the model (using default parameters)
77
+ model = LogisticRegression()
78
+ model.fit(x_train, y_train.values.ravel())
79
+ y_pred = model.predict(x_test)
80
+
81
+
82
+ ### -------------------------------- ###
83
+ ### article generation ###
84
+ ### -------------------------------- ###
85
+ # borrow file reading function from reader.py
86
+
87
+ def get_feat():
88
+ feats = [abs(x) for x in model.coef_[0]]
89
+ max_val = max(feats)
90
+ idx = feats.index(max_val)
91
+ return data.columns[idx]
92
+
93
+ acc = str(round(metrics.accuracy_score(y_test, y_pred) * 100, 1)) + "%"
94
+ most_imp_feat = get_feat()
95
+ # info = get_article(acc, most_imp_feat)
96
+
97
+
98
+
99
+ ### ------------------------------- ###
100
+ ### interface creation ###
101
+ ### ------------------------------- ###
102
+
103
+
104
+ # predictor for generic number of features
105
+ def general_predictor(*args):
106
+ features = []
107
+
108
+ # transform categorical input
109
+ for colname, arg in zip(data.columns, args):
110
+ if (colname in cat_value_dicts):
111
+ features.append(cat_value_dicts[colname][arg])
112
+ else:
113
+ features.append(arg)
114
+
115
+ # predict single datapoint
116
+ new_input = [features]
117
+ result = model.predict(new_input)
118
+ return cat_value_dicts[final_colname][result[0]]
119
+
120
+ # add data labels to replace those lost via star-args
121
+
122
+
123
+ block = gr.Blocks()
124
+
125
+ with open('info.md') as f:
126
+ with block:
127
+ gr.Markdown(f.readline())
128
+ gr.Markdown('Take the quiz to get a personalized recommendation using AI.')
129
+
130
+ with gr.Row():
131
+ with gr.Group():
132
+ inputls = []
133
+ for colname in data.columns:
134
+ # skip last column
135
+ if colname == final_colname:
136
+ continue
137
+
138
+ # access categories dict if data is categorical
139
+ # otherwise, just use a number input
140
+ if colname in cat_value_dicts:
141
+ radio_options = list(cat_value_dicts[colname].keys())
142
+ inputls.append(gr.Dropdown(radio_options, type="value", label=colname))
143
+ else:
144
+ # add numerical input
145
+ inputls.append(gr.Number(label=colname))
146
+ gr.Markdown("<br />")
147
+
148
+ submit = gr.Button("Click to see your personalized result!", variant="primary")
149
+ gr.Markdown("<br />")
150
+ output = gr.Textbox(label="Your recommendation:", placeholder="your recommendation will appear here")
151
+
152
+ submit.click(fn=general_predictor, inputs=inputls, outputs=output)
153
+ gr.Markdown("<br />")
154
+
155
+ with gr.Row():
156
+ with gr.Group():
157
+ gr.Markdown(f"<h3>Accuracy: </h3>{acc}")
158
+ with gr.Group():
159
+ gr.Markdown(f"<h3>Most important feature: </h3>{most_imp_feat}")
160
+
161
+ gr.Markdown("<br />")
162
+
163
+ with gr.Group():
164
+ gr.Markdown('''⭐ Note that model accuracy is based on the uploaded data.csv and reflects how well the AI model can give correct recommendations for <em>that dataset</em>. Model accuracy and most important feature can be helpful for understanding how the model works, but <em>should not be considered absolute facts about the real world</em>.''')
165
+
166
+ with gr.Group():
167
+ with open('info.md') as f:
168
+ f.readline()
169
+ gr.Markdown(f.read())
170
+
171
+ # show the interface
172
+ block.launch()
data ADDED
@@ -0,0 +1,69 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ,What's your budget?,Are you a extrovert or introvert? (Shy or bold?),Do you prefer hot or cold weather?,Can you swim?,Are you scared of heights?,Do you want to travel within the US or internationally?,Preferred flight length?,Preferred travel time? (If driving),How many people are you traveling with?,Would you prefer sightseeing or doing activities?,What's the age range?,Possible locations
2
+ ,"$500-1,000",Extrovert,Hot,Yes,Mildly,US,6-9 hrs,6-9 hrs,plus 1,Activities,12-17,"Honolulu, Hi"
3
+ ,"$500-1,000",Introvert,Hot,Yes,Mildly,International,3-6 hrs,6-9 hrs,plus 2,Activities,12-17,"Kyoto, Japan"
4
+ ,"$1,000-5,000",Introvert,Hot,No,Mildly,US,3-6 hrs,3-6 hrs,plus 3,Activities,12-17,"Honolulu, Hi"
5
+ ,"$1,000-5,000",Introvert,Hot,Yes,Mildly,International,6-9 hrs,6-9 hrs,5+ people,Activities,12-17,"Gold Coast, Australia"
6
+ ,"$1,000-5,000",Introvert,Hot,Yes,Not at all,International,1-3 hrs,1-3 hrs,plus 1,Activities,12-17,"Honolulu, Hi"
7
+ ,"$500-1,000",Extrovert,Cold,No,Mildly,International,3-6 hrs,3-6 hrs,plus 1,Activities,12-17,"Gold Coast, Australia"
8
+ ,"$1,000-5,000",Introvert,Hot,Yes,Not at all,International,9-12 hrs,12+ hrs,plus 3,Sightseeing,25+,"Madrid, Spain"
9
+ ,"$500-1,000",Extrovert,Hot,Yes,Mildly,US,3-6 hrs,3-6 hrs,plus 3,Activities,18-25,"San Francisco, CA"
10
+ ,"$5,000+",Introvert,Hot,Yes,Mildly,International,3-6 hrs,6-9 hrs,plus 1,Sightseeing,12-17,"Madrid, Spain"
11
+ ,"$5,000+",Extrovert,Hot,Yes,Not at all,International,12+ hrs,12+ hrs,plus 2,Activities,18-25,"Sierra Leone, Africa"
12
+ ,"$1,000-5,000",Introvert,Hot,Yes,Mildly,International,6-9 hrs,9-12 hrs,plus 2,Activities,12-17,"Gold Coast, Australia"
13
+ ,"$5,000+",Extrovert,Hot,Yes,Mildly,International,12+ hrs,12+ hrs,5+ people,Activities,12-17,"San Francisco, CA"
14
+ ,"$1,000-5,000",Extrovert,Cold,No,Mildly,International,6-9 hrs,6-9 hrs,plus 2,Activities,12-17,"Kyoto, Japan"
15
+ ,$0-500,Introvert,Cold,No,Not at all,International,1-3 hrs,1-3 hrs,plus 1,Sightseeing,12-17,"Honolulu, Hi"
16
+ ,"$1,000-5,000",Introvert,Hot,Yes,Mildly,US,3-6 hrs,6-9 hrs,plus 3,Activities,25+,"Cannon Beach, OR"
17
+ ,"$5,000+",Extrovert,Cold,No,Yes,US,6-9 hrs,1-3 hrs,5+ people,Activities,12-17,"Honolulu, Hi"
18
+ ,$0-500,Introvert,Hot,No,Yes,International,3-6 hrs,1-3 hrs,plus 2,Activities,12-17,"Kyoto, Japan"
19
+ ,"$1,000-5,000",Extrovert,Hot,No,Mildly,US,1-3 hrs,1-3 hrs,plus 2,Activities,12-17,"San Francisco, CA"
20
+ ,$0-500,Introvert,Hot,Yes,Not at all,International,6-9 hrs,1-3 hrs,plus 1,Activities,12-17,"Outer Banks, NC"
21
+ ,"$1,000-5,000",Extrovert,Hot,Yes,Mildly,US,3-6 hrs,1-3 hrs,plus 2,Activities,12-17,"Honolulu, Hi"
22
+ ,"$500-1,000",Extrovert,Cold,Yes,Mildly,International,6-9 hrs,9-12 hrs,plus 4,Activities,12-17,"Cannon Beach, OR"
23
+ ,"$5,000+",Extrovert,Hot,Yes,Not at all,International,12+ hrs,6-9 hrs,5+ people,Sightseeing,25+,"Honolulu, Hi"
24
+ ,"$1,000-5,000",Extrovert,Cold,Yes,Mildly,US,3-6 hrs,3-6 hrs,plus 1,Sightseeing,12-17,"London, England"
25
+ ,"$500-1,000",Extrovert,Cold,Yes,Yes,International,3-6 hrs,6-9 hrs,plus 3,Sightseeing,12-17,"Honolulu, Hi"
26
+ ,"$1,000-5,000",Extrovert,Hot,Yes,Not at all,International,3-6 hrs,1-3 hrs,plus 1,Activities,12-17,"Honolulu, Hi"
27
+ ,"$500-1,000",Extrovert,Hot,Yes,Mildly,International,6-9 hrs,9-12 hrs,plus 2,Activities,12-17,"Outer Banks, NC"
28
+ ,"$500-1,000",Introvert,Hot,Yes,Yes,International,3-6 hrs,1-3 hrs,plus 2,Sightseeing,18-25,"Gold Coast, Australia"
29
+ ,$0-500,Introvert,Hot,Yes,Not at all,US,1-3 hrs,3-6 hrs,plus 2,Activities,12-17,"Honolulu, Hi"
30
+ ,"$500-1,000",Introvert,Hot,Yes,Mildly,International,6-9 hrs,1-3 hrs,plus 1,Activities,12-17,"Outer Banks, NC"
31
+ ,"$500-1,000",Introvert,Hot,Yes,Not at all,International,3-6 hrs,3-6 hrs,plus 2,Sightseeing,12-17,"Honolulu, Hi"
32
+ ,"$1,000-5,000",Extrovert,Cold,Yes,Mildly,International,3-6 hrs,3-6 hrs,plus 3,Activities,18-25,"Gold Coast, Australia"
33
+ ,$0-500,Introvert,Hot,No,Not at all,International,9-12 hrs,9-12 hrs,5+ people,Activities,12-17,"Seoul, South Korea"
34
+ ,$0-500,Extrovert,Hot,Yes,Mildly,International,6-9 hrs,12+ hrs,plus 3,Activities,12-17,"Gold Coast, Australia"
35
+ ,"$1,000-5,000",Introvert,Hot,Yes,Mildly,US,6-9 hrs,9-12 hrs,plus 2,Activities,12-17,"Gold Coast, Australia"
36
+ ,"$1,000-5,000",Extrovert,Cold,Yes,Mildly,US,3-6 hrs,1-3 hrs,plus 1,Activities,12-17,"Honolulu, Hi"
37
+ ,$0-500,Introvert,Hot,Yes,Not at all,International,1-3 hrs,1-3 hrs,5+ people,Activities,12-17,"Seoul, South Korea"
38
+ ,"$1,000-5,000",Introvert,Hot,Yes,Yes,International,6-9 hrs,6-9 hrs,plus 3,Activities,25+,"Gold Coast, Australia"
39
+ ,$0-500,Extrovert,Hot,Yes,Mildly,International,1-3 hrs,1-3 hrs,5+ people,Activities,12-17,"Seoul, South Korea"
40
+ ,"$1,000-5,000",Extrovert,Hot,Yes,Mildly,International,9-12 hrs,3-6 hrs,plus 2,Activities,12-17,"Gold Coast, Australia"
41
+ ,"$1,000-5,000",Extrovert,Hot,Yes,Not at all,International,1-3 hrs,1-3 hrs,Just by myself,Activities,12-17,"Honolulu, Hi"
42
+ ,"$1,000-5,000",Extrovert,Hot,Yes,Mildly,International,6-9 hrs,6-9 hrs,plus 3,Activities,25+,"Seoul, South Korea"
43
+ ,"$1,000-5,000",Extrovert,Hot,Yes,Not at all,US,3-6 hrs,3-6 hrs,plus 1,Activities,12-17,"Seoul, South Korea"
44
+ ,"$500-1,000",Introvert,Hot,Yes,Mildly,US,3-6 hrs,6-9 hrs,plus 3,Activities,12-17,"San Francisco, CA"
45
+ ,"$1,000-5,000",Extrovert,Hot,Yes,Mildly,International,6-9 hrs,9-12 hrs,plus 2,Activities,12-17,"Gold Coast, Australia"
46
+ ,$0-500,Introvert,Cold,Yes,Mildly,US,6-9 hrs,6-9 hrs,plus 2,Sightseeing,25+,"Honolulu, Hi"
47
+ ,"$5,000+",Introvert,Hot,Yes,Not at all,International,12+ hrs,9-12 hrs,5+ people,Activities,12-17,"Honolulu, Hi"
48
+ ,"$5,000+",Extrovert,Hot,Yes,Not at all,US,6-9 hrs,6-9 hrs,Just by myself,Activities,25+,"Boulder, CO"
49
+ ,"$1,000-5,000",Extrovert,Hot,Yes,Mildly,International,6-9 hrs,6-9 hrs,plus 1,Sightseeing,25+,"London, England"
50
+ ,"$1,000-5,000",Introvert,Hot,Yes,Not at all,International,3-6 hrs,9-12 hrs,plus 2,Activities,12-17,"Honolulu, Hi"
51
+ ,"$1,000-5,000",Extrovert,Hot,No,Mildly,US,3-6 hrs,3-6 hrs,plus 1,Activities,12-17,"Outer Banks, NC"
52
+ ,$0-500,Introvert,Cold,Yes,Not at all,US,3-6 hrs,3-6 hrs,plus 1,Activities,25+,"London, England"
53
+ ,"$500-1,000",Introvert,Hot,Yes,Mildly,US,1-3 hrs,3-6 hrs,plus 1,Activities,25+,"Victoria, Canada"
54
+ ,"$1,000-5,000",Introvert,Hot,Yes,Mildly,International,3-6 hrs,3-6 hrs,plus 1,Sightseeing,25+,"Victoria, Canada"
55
+ ,"$1,000-5,000",Introvert,Hot,Yes,Mildly,US,1-3 hrs,1-3 hrs,plus 1,Sightseeing,25+,"Cannon Beach, OR"
56
+ ,"$500-1,000",Extrovert,Hot,Yes,Mildly,International,6-9 hrs,12+ hrs,plus 3,Activities,12-17,"Kyoto, Japan"
57
+ ,$0-500,Extrovert,Cold,No,Mildly,International,3-6 hrs,12+ hrs,Just by myself,Activities,12-17,"Honolulu, Hi"
58
+ ,$0-500,Introvert,Cold,No,Yes,International,3-6 hrs,3-6 hrs,plus 1,Activities,12-17,"Madrid, Spain"
59
+ ,"$1,000-5,000",Introvert,Cold,Yes,Mildly,International,3-6 hrs,9-12 hrs,plus 1,Activities,18-25,"Madrid, Spain"
60
+ ,"$1,000-5,000",Extrovert,Hot,Yes,Yes,International,3-6 hrs,1-3 hrs,plus 3,Activities,12-17,"Kyoto, Japan"
61
+ ,"$1,000-5,000",Introvert,Hot,Yes,Mildly,International,6-9 hrs,3-6 hrs,Just by myself,Activities,18-25,"Honolulu, Hi"
62
+ ,$0-500,Introvert,Hot,No,Mildly,US,1-3 hrs,3-6 hrs,plus 1,Activities,12-17,"San Francisco, CA"
63
+ ,"$1,000-5,000",Introvert,Hot,Yes,Not at all,International,6-9 hrs,9-12 hrs,plus 1,Sightseeing,25+,"Sierra Leone, Africa"
64
+ ,"$500-1,000",Introvert,Hot,Yes,Not at all,International,6-9 hrs,6-9 hrs,Just by myself,Activities,12-17,"Honolulu, Hi"
65
+ ,"$1,000-5,000",Extrovert,Hot,Yes,Mildly,International,12+ hrs,9-12 hrs,plus 1,Activities,12-17,"Kyoto, Japan"
66
+ ,$0-500,Introvert,Hot,Yes,Yes,International,9-12 hrs,6-9 hrs,plus 1,Activities,12-17,"Kyoto, Japan"
67
+ ,$0-500,Extrovert,Hot,Yes,Not at all,International,9-12 hrs,3-6 hrs,plus 1,Activities,12-17,"Gold Coast, Australia"
68
+ ,"$5,000+",Introvert,Hot,Yes,Not at all,International,12+ hrs,12+ hrs,Just by myself,Activities,18-25,"Kyoto, Japan"
69
+ 5/31/2024 8:30:02,"$1,000-5,000",Extrovert,Hot,Yes,Yes,International,12+ hrs,3-6 hrs,plus 1,Sightseeing,25+,"Seoul, South Korea"
info.md ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # 😌 [Edit info.md - Your app's title here]
2
+
3
+ ### 🧐 Problem Statement and Research Summary
4
+ [add info about your problem statement and your research here!]
5
+
6
+ ### 🎣 Data Collection Plan
7
+ [Edit info.md - add info about what data you collected and why here!]
8
+
9
+ ### πŸ’₯ Ethical Considerations (Data Privacy and Bias)
10
+ * Data privacy: [Edit info.md - add info about you considered users' privacy here!]
11
+ * Bias: [Edit info.md - add info about you considered bias here!]
12
+
13
+ ### πŸ‘» Our Team
14
+ [Edit info.md - add info about your team members here!]
15
+
16
+ ![aiEDU logo](https://images.squarespace-cdn.com/content/v1/5e4efdef6d10420691f02bc1/5db5a8a3-1761-4fce-a096-bd5f2515162f/aiEDU+_black+logo+stacked.png?format=100w)
requirements.txt ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ pip>=23.2.1
2
+ pandas==1.3.4
3
+ scikit-learn>=1.0.1
4
+ numpy==1.21.4