Nick Canu commited on
Commit
a79e4c0
1 Parent(s): abb14f6

report button and tab update

Browse files
.vscode/launch.json DELETED
@@ -1,16 +0,0 @@
1
- {
2
- // Use IntelliSense to learn about possible attributes.
3
- // Hover to view descriptions of existing attributes.
4
- // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
5
- "version": "0.2.0",
6
- "configurations": [
7
- {
8
- "name": "Python: Module",
9
- "type": "python",
10
- "request": "launch",
11
- "module": "streamlit",
12
- "args": ["run", "Home.py"],
13
- "justMyCode": true
14
- }
15
- ]
16
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
Alternate Class Files for Appendix/Community Aggregation - Input Manager.py ADDED
@@ -0,0 +1,114 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #Alternative input manager for description generator
2
+ class input_manager:
3
+ #initialize key dictionary from vector data frame and set community top N
4
+ def __init__(self,key_df, slim_df, search_tokens, top_n=10):
5
+ self.key_df = key_df
6
+ self.slim_df = slim_df
7
+ self.search_tokens = search_tokens
8
+ self.key = dict(zip(list(key_df.columns),np.zeros(len(key_df.columns))))
9
+ self.top_n = top_n
10
+ self.nlp = spacy.load("en_core_web_md")
11
+ #translate input text to vector
12
+ def set_input(self,input_cats):
13
+
14
+ #need setup to apply correct group tag to values
15
+ #separate known/unknown features
16
+ k_flags = [cat for cat in input_cats if cat in list(self.key.keys())]
17
+ unk_flags = [cat for cat in input_cats if cat not in list(self.key.keys())]
18
+
19
+ #process within feature class similarity for each unknown input
20
+ if len(unk_flags)>0:
21
+ outs = []
22
+
23
+ for word in unk_flags:
24
+ if re.match(r"game_type_",word):
25
+ tok = self.nlp(word.split("_")[-1])
26
+ mtch = max([(key,key.similarity(tok)) for key in self.search_tokens[0]],key=itemgetter(1))
27
+ #if no known match is found (model doesn't recognize input word), we're going to discard - other solutions performance prohibitive
28
+ if mtch[1]>0:
29
+ outs.append("game_type_"+mtch[0])
30
+ elif re.match(r"mechanic_",word):
31
+ tok = self.nlp(word.split("_")[-1])
32
+ mtch = max([(key,key.similarity(tok)) for key in self.search_tokens[1]],key=itemgetter(1))
33
+ if mtch[1]>0:
34
+ outs.append("mechanic_"+mtch[0])
35
+ elif re.match(r"category_",word):
36
+ tok = self.nlp(word.split("_")[-1])
37
+ mtch=max([(key,key.similarity(tok)) for key in self.search_tokens[2]],key=itemgetter(1))
38
+ if mtch[1]>0:
39
+ outs.append("category_"+mtch[0])
40
+ elif re.match(r"family_",word):
41
+ tok = self.nlp(word.split("_")[-1])
42
+ mtch=max([(key,key.similarity(tok)) for key in self.search_tokens[3]],key=itemgetter(1))
43
+ if mtch[1]>0:
44
+ outs.append("family_"+str(mtch[0]))
45
+
46
+ #if unks are processed, rejoin nearest match to known.
47
+ k_flags = list(set(k_flags+outs))
48
+
49
+ #preserve global key and ouput copy w/input keys activated to 1
50
+ d = self.key.copy()
51
+ for cat in k_flags:
52
+ d[cat] = 1.0
53
+ return d
54
+
55
+ def input_parser(self,in_vec):
56
+ #extracting keys from processed vector
57
+ ks = [k for k,v in in_vec.items() if v == 1]
58
+
59
+ #finding raw "total" match score - how many of the how input columns are hot in each existing vector
60
+ inter = self.key_df[ks].sum(axis=1)
61
+
62
+ #performing operation on each df seems to be slightly quicker than transforming the df here - may refactor though
63
+
64
+ #dropping any row without 3 matches (minimum match check)
65
+ cand_vec = self.key_df.iloc[list(inter[inter>=3].index)]
66
+ #if parsing returns less ranked matches than specificed top n, reduce threshold to 1 match and check again
67
+ if len(cand_vec) < self.top_n:
68
+ cand_vec = self.key_df.iloc[list(inter[inter>=1].index)]
69
+
70
+ cand_slim = self.slim_df.iloc[list(inter[inter>=3].index)]
71
+ if len(cand_slim) < self.top_n:
72
+ cand_slim = self.key_df.iloc[list(inter[inter>=1].index)]
73
+
74
+ return ks,cand_slim,in_vec.values()
75
+
76
+ #calculating per community vector pairwise jaccard similarity to input split by feature class
77
+ def ret_jaccard(self,in_vec,t_vec):
78
+ gt_score = sklearn.metrics.jaccard_score(in_vec[1:9],t_vec[1:9],zero_division=0)
79
+ cat_score = sklearn.metrics.jaccard_score(in_vec[192:276],t_vec[192:276],zero_division=0)
80
+ mech_score = sklearn.metrics.jaccard_score(in_vec[9:192],t_vec[9:192],zero_division=0)
81
+ fam_score = sklearn.metrics.jaccard_score(in_vec[276:3901],t_vec[276:3901],zero_division=0)
82
+ if in_vec[0] == t_vec[0]:
83
+ coop_score = 1
84
+ else:
85
+ coop_score = 0
86
+
87
+ #initial weighting treats all feature classes as equal - looking into updating this as a feedback mechanism
88
+ return np.mean([gt_score,cat_score,mech_score,fam_score,coop_score])
89
+
90
+ #function to actually return community neighbors
91
+ def n_neighbors(self,in_data):
92
+ #applies jaccard func to each row using vectors and maps to "full" df w/text
93
+ slim, vec, in_vec = in_data
94
+ vec['score']=vec.apply(lambda x: self.ret_jaccard(in_vec,x),raw=True,axis=1)
95
+ slim['score']=vec['score']
96
+
97
+ #converts to rank - this avoids splitting equal scoring groups inappropriately
98
+ slim['rank'] = slim['score'].rank(ascending=False)
99
+ return slim[slim['rank']<self.top_n].sort_values(by=['rank'])
100
+
101
+ def query_score(self,outframe, gen_text):
102
+ #requires text processing function, nearest neighbor community dataframe, and piece of generated text
103
+ query = doc_text_preprocessing(pd.Series(gen_text))
104
+ desc_tokens = pd.concat([outframe['cleaned_descriptions'],pd.Series(query)])
105
+ desc_dict = corpora.Dictionary()
106
+ desc_corpus = [desc_dict.doc2bow(doc, allow_update=True) for doc in desc_tokens]
107
+ temp_index = get_tmpfile("index")
108
+ index = similarities.Similarity(temp_index, desc_corpus, num_features=len(desc_dict.token2id))
109
+
110
+ sim_stack = []
111
+ for sims in index:
112
+ sim_stack.append(sims)
113
+
114
+ return (gen_text,np.mean(np.multiply(out['score'],sim_stack[-1][:-1])))
Home.py CHANGED
@@ -2,6 +2,8 @@ import streamlit as st
2
 
3
  st.set_page_config(page_title='Auto-BG: The Game Concept Generator', layout='wide')
4
 
 
 
5
  def application():
6
  ###Imports
7
  import pandas as pd
@@ -14,6 +16,7 @@ def application():
14
  from title_generator import Title_Generator
15
  import gzip
16
  import io
 
17
  from description_generator import input_manager, model_control
18
  from pathlib import Path
19
 
@@ -39,7 +42,7 @@ def application():
39
  if 'coop_d' not in st.session_state:
40
  st.session_state.coop_d = 0
41
 
42
- #non-ui helper functions
43
  #reader code extended from https://gist.github.com/thearn/5424244 for alternate load format
44
  def reader(path):
45
  f = gzip.GzipFile(filename=path)
@@ -81,7 +84,7 @@ def application():
81
  inter_pair = Tgen.candidate_generator(clean_desc)
82
  out = Tgen.candidate_score(inter_pair,ex_check)
83
  descs.append(out)
84
- st.sidebar.success("Prompt " +str(status+1)+ " generated!")
85
  st.session_state.output_dict = {0:descs[0],1:descs[1],2:descs[2]}
86
 
87
  def title_check(next=0):
@@ -131,8 +134,23 @@ def application():
131
  st.session_state.title_iter = 0
132
  show_title(0)
133
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
134
 
135
-
136
  ###Variables
137
 
138
  ###Data
@@ -161,8 +179,6 @@ def application():
161
 
162
  Tgen, iman, mctrl = setup_models()
163
 
164
-
165
-
166
  #UI
167
 
168
  #Application
@@ -336,7 +352,9 @@ def application():
336
 
337
  with d_col2:
338
  st.button("See Next Description", on_click=ND_button_clicked, use_container_width=True)
339
-
 
 
340
  def blog():
341
  """
342
  Blog describing the Auto-BG project
@@ -386,12 +404,11 @@ def about_us():
386
  *MADS (Master of Applied Data Science)*\n
387
  """)
388
 
389
- page_names_to_funcs = {
390
- "Application": application,
391
- "Blog": blog,
392
- "About Us": about_us,
393
- }
394
 
395
- demo_name = st.sidebar.selectbox("Choose a page:", page_names_to_funcs.keys())
396
- page_names_to_funcs[demo_name]()
397
 
 
 
 
2
 
3
  st.set_page_config(page_title='Auto-BG: The Game Concept Generator', layout='wide')
4
 
5
+ tab1, tab2, tab3 = st.tabs(['App', 'Blog', 'About Us'])
6
+
7
  def application():
8
  ###Imports
9
  import pandas as pd
 
16
  from title_generator import Title_Generator
17
  import gzip
18
  import io
19
+ from datetime import date
20
  from description_generator import input_manager, model_control
21
  from pathlib import Path
22
 
 
42
  if 'coop_d' not in st.session_state:
43
  st.session_state.coop_d = 0
44
 
45
+ #helper functions
46
  #reader code extended from https://gist.github.com/thearn/5424244 for alternate load format
47
  def reader(path):
48
  f = gzip.GzipFile(filename=path)
 
84
  inter_pair = Tgen.candidate_generator(clean_desc)
85
  out = Tgen.candidate_score(inter_pair,ex_check)
86
  descs.append(out)
87
+ results.success("Prompt " +str(status+1)+ "/3 Generated!")
88
  st.session_state.output_dict = {0:descs[0],1:descs[1],2:descs[2]}
89
 
90
  def title_check(next=0):
 
134
  st.session_state.title_iter = 0
135
  show_title(0)
136
 
137
+ def report():
138
+ inputs = '|'.join(str(x) for x in st.session_state.inputs)
139
+ data = {'rprtd': date.today(),'inpts': inputs, 'title': st.session_state.output_dict[st.session_state.desc_iter]['titles'][st.session_state.title_iter][0], 'desc':st.session_state.output_dict[st.session_state.desc_iter]['text']}
140
+ try:
141
+ r_df = pd.DataFrame(data, index=[0])
142
+ r_p = pd.read_pickle(Path(__file__).parent / "Persistent_Data/reported_df.PICKLE")
143
+ w_p = pd.concat([r_df, r_p])
144
+ w_p = w_p.drop_duplicates()
145
+ print('try')
146
+ print(w_p)
147
+ w_p.to_pickle(Path(__file__).parent / "Persistent_Data/reported_df.PICKLE")
148
+ except:
149
+ print('except')
150
+ print(r_df)
151
+ r_df.to_pickle(Path(__file__).parent / "Persistent_Data/reported_df.PICKLE")
152
 
153
+
154
  ###Variables
155
 
156
  ###Data
 
179
 
180
  Tgen, iman, mctrl = setup_models()
181
 
 
 
182
  #UI
183
 
184
  #Application
 
352
 
353
  with d_col2:
354
  st.button("See Next Description", on_click=ND_button_clicked, use_container_width=True)
355
+
356
+ st.button('Report', on_click=report, use_container_width=True)
357
+
358
  def blog():
359
  """
360
  Blog describing the Auto-BG project
 
404
  *MADS (Master of Applied Data Science)*\n
405
  """)
406
 
407
+ with tab1:
408
+ application()
 
 
 
409
 
410
+ with tab2:
411
+ blog()
412
 
413
+ with tab3:
414
+ about_us()