Spaces:

AutoBG
/

Auto-BoardGame

Sleeping

App Files Files Community

Nick Canu commited on Apr 13, 2023

Commit

a79e4c0

•

1 Parent(s): abb14f6

report button and tab update

Browse files

Files changed (3) hide show

.vscode/launch.json +0 -16
Alternate Class Files for Appendix/Community Aggregation - Input Manager.py +114 -0
Home.py +30 -13

.vscode/launch.json DELETED Viewed

@@ -1,16 +0,0 @@
-{
-    // Use IntelliSense to learn about possible attributes.
-    // Hover to view descriptions of existing attributes.
-    // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
-    "version": "0.2.0",
-    "configurations": [
-        {
-            "name": "Python: Module",
-            "type": "python",
-            "request": "launch",
-            "module": "streamlit",
-            "args": ["run", "Home.py"],
-            "justMyCode": true
-        }
-    ]
-}

Alternate Class Files for Appendix/Community Aggregation - Input Manager.py ADDED Viewed

	@@ -0,0 +1,114 @@

+#Alternative input manager for description generator
+class input_manager:
+     #initialize key dictionary from vector data frame and set community top N
+    def __init__(self,key_df, slim_df, search_tokens, top_n=10):
+        self.key_df = key_df
+        self.slim_df = slim_df
+        self.search_tokens = search_tokens
+        self.key = dict(zip(list(key_df.columns),np.zeros(len(key_df.columns))))
+        self.top_n = top_n
+        self.nlp = spacy.load("en_core_web_md")
+  #translate input text to vector
+    def set_input(self,input_cats):
+        #need setup to apply correct group tag to values
+        #separate known/unknown features
+        k_flags = [cat for cat in input_cats if cat in list(self.key.keys())]
+        unk_flags = [cat for cat in input_cats if cat not in list(self.key.keys())]
+        #process within feature class similarity for each unknown input
+        if len(unk_flags)>0:
+            outs = []
+        for word in unk_flags:
+            if re.match(r"game_type_",word):
+                tok = self.nlp(word.split("_")[-1])
+                mtch = max([(key,key.similarity(tok)) for key in self.search_tokens[0]],key=itemgetter(1))
+            #if no known match is found (model doesn't recognize input word), we're going to discard - other solutions performance prohibitive
+            if mtch[1]>0:
+                outs.append("game_type_"+mtch[0])
+            elif re.match(r"mechanic_",word):
+                tok = self.nlp(word.split("_")[-1])
+                mtch = max([(key,key.similarity(tok)) for key in self.search_tokens[1]],key=itemgetter(1))
+            if mtch[1]>0:
+                outs.append("mechanic_"+mtch[0])
+            elif re.match(r"category_",word):
+                tok = self.nlp(word.split("_")[-1])
+                mtch=max([(key,key.similarity(tok)) for key in self.search_tokens[2]],key=itemgetter(1))
+            if mtch[1]>0:
+                outs.append("category_"+mtch[0])
+            elif re.match(r"family_",word):
+                tok = self.nlp(word.split("_")[-1])
+                mtch=max([(key,key.similarity(tok)) for key in self.search_tokens[3]],key=itemgetter(1))
+            if mtch[1]>0:
+                outs.append("family_"+str(mtch[0]))
+        #if unks are processed, rejoin nearest match to known.
+        k_flags = list(set(k_flags+outs))
+        #preserve global key and ouput copy w/input keys activated to 1
+        d = self.key.copy()
+        for cat in k_flags:
+            d[cat] = 1.0
+        return d
+    def input_parser(self,in_vec):
+        #extracting keys from processed vector
+        ks = [k for k,v in in_vec.items() if v == 1]
+        #finding raw "total" match score - how many of the how input columns are hot in each existing vector
+        inter = self.key_df[ks].sum(axis=1)
+        #performing operation on each df seems to be slightly quicker than transforming the df here - may refactor though
+        #dropping any row without 3 matches (minimum match check)
+        cand_vec = self.key_df.iloc[list(inter[inter>=3].index)]
+        #if parsing returns less ranked matches than specificed top n, reduce threshold to 1 match and check again
+        if len(cand_vec) < self.top_n:
+            cand_vec = self.key_df.iloc[list(inter[inter>=1].index)]
+        cand_slim = self.slim_df.iloc[list(inter[inter>=3].index)]
+        if len(cand_slim) < self.top_n:
+            cand_slim = self.key_df.iloc[list(inter[inter>=1].index)]
+        return ks,cand_slim,in_vec.values()
+  #calculating per community vector pairwise jaccard similarity to input split by feature class
+    def ret_jaccard(self,in_vec,t_vec):
+        gt_score = sklearn.metrics.jaccard_score(in_vec[1:9],t_vec[1:9],zero_division=0)
+        cat_score = sklearn.metrics.jaccard_score(in_vec[192:276],t_vec[192:276],zero_division=0)
+        mech_score = sklearn.metrics.jaccard_score(in_vec[9:192],t_vec[9:192],zero_division=0)
+        fam_score = sklearn.metrics.jaccard_score(in_vec[276:3901],t_vec[276:3901],zero_division=0)
+        if in_vec[0] == t_vec[0]:
+            coop_score = 1
+        else:
+            coop_score = 0
+        #initial weighting treats all feature classes as equal - looking into updating this as a feedback mechanism
+        return np.mean([gt_score,cat_score,mech_score,fam_score,coop_score])
+  #function to actually return community neighbors
+    def n_neighbors(self,in_data):
+        #applies jaccard func to each row using vectors and maps to "full" df w/text
+        slim, vec, in_vec = in_data
+        vec['score']=vec.apply(lambda x: self.ret_jaccard(in_vec,x),raw=True,axis=1)
+        slim['score']=vec['score']
+        #converts to rank - this avoids splitting equal scoring groups inappropriately
+        slim['rank'] = slim['score'].rank(ascending=False)
+        return slim[slim['rank']<self.top_n].sort_values(by=['rank'])
+    def query_score(self,outframe, gen_text):
+        #requires text processing function, nearest neighbor community dataframe, and piece of generated text
+        query = doc_text_preprocessing(pd.Series(gen_text))
+        desc_tokens = pd.concat([outframe['cleaned_descriptions'],pd.Series(query)])
+        desc_dict = corpora.Dictionary()
+        desc_corpus = [desc_dict.doc2bow(doc, allow_update=True) for doc in desc_tokens]
+        temp_index = get_tmpfile("index")
+        index = similarities.Similarity(temp_index, desc_corpus, num_features=len(desc_dict.token2id))
+        sim_stack = []
+        for sims in index:
+            sim_stack.append(sims)
+        return (gen_text,np.mean(np.multiply(out['score'],sim_stack[-1][:-1])))

Home.py CHANGED Viewed

@@ -2,6 +2,8 @@ import streamlit as st
 st.set_page_config(page_title='Auto-BG: The Game Concept Generator', layout='wide')
 def application():
     ###Imports
     import pandas as pd
@@ -14,6 +16,7 @@ def application():
     from title_generator import Title_Generator
     import gzip
     import io
     from description_generator import input_manager, model_control
     from pathlib import Path
@@ -39,7 +42,7 @@ def application():
     if 'coop_d' not in st.session_state:
         st.session_state.coop_d = 0
-    #non-ui helper functions
     #reader code extended from https://gist.github.com/thearn/5424244 for alternate load format
     def reader(path):
         f = gzip.GzipFile(filename=path)
@@ -81,7 +84,7 @@ def application():
             inter_pair = Tgen.candidate_generator(clean_desc)
             out = Tgen.candidate_score(inter_pair,ex_check)
             descs.append(out)
-            st.sidebar.success("Prompt " +str(status+1)+ " generated!")
         st.session_state.output_dict = {0:descs[0],1:descs[1],2:descs[2]}
     def title_check(next=0):
@@ -131,8 +134,23 @@ def application():
             st.session_state.title_iter = 0
         show_title(0)
     ###Variables
     ###Data
@@ -161,8 +179,6 @@ def application():
     Tgen, iman, mctrl = setup_models()
     #UI
     #Application
@@ -336,7 +352,9 @@ def application():
             with d_col2:
                 st.button("See Next Description", on_click=ND_button_clicked, use_container_width=True)
 def blog():
     """
     Blog describing the Auto-BG project
@@ -386,12 +404,11 @@ def about_us():
         *MADS (Master of Applied Data Science)*\n
         """)
-page_names_to_funcs = {
-    "Application": application,
-    "Blog": blog,
-    "About Us": about_us,
-}
-demo_name = st.sidebar.selectbox("Choose a page:", page_names_to_funcs.keys())
-page_names_to_funcs[demo_name]()

 st.set_page_config(page_title='Auto-BG: The Game Concept Generator', layout='wide')
+tab1, tab2, tab3 = st.tabs(['App', 'Blog', 'About Us'])
 def application():
     ###Imports
     import pandas as pd
     from title_generator import Title_Generator
     import gzip
     import io
+    from datetime import date
     from description_generator import input_manager, model_control
     from pathlib import Path
     if 'coop_d' not in st.session_state:
         st.session_state.coop_d = 0
+    #helper functions
     #reader code extended from https://gist.github.com/thearn/5424244 for alternate load format
     def reader(path):
         f = gzip.GzipFile(filename=path)
             inter_pair = Tgen.candidate_generator(clean_desc)
             out = Tgen.candidate_score(inter_pair,ex_check)
             descs.append(out)
+            results.success("Prompt " +str(status+1)+ "/3 Generated!")
         st.session_state.output_dict = {0:descs[0],1:descs[1],2:descs[2]}
     def title_check(next=0):
             st.session_state.title_iter = 0
         show_title(0)
+    def report():
+        inputs = '|'.join(str(x) for x in st.session_state.inputs)
+        data = {'rprtd':  date.today(),'inpts': inputs, 'title': st.session_state.output_dict[st.session_state.desc_iter]['titles'][st.session_state.title_iter][0], 'desc':st.session_state.output_dict[st.session_state.desc_iter]['text']}
+        try:
+            r_df = pd.DataFrame(data, index=[0])
+            r_p = pd.read_pickle(Path(__file__).parent / "Persistent_Data/reported_df.PICKLE")
+            w_p = pd.concat([r_df, r_p])
+            w_p = w_p.drop_duplicates()
+            print('try')
+            print(w_p)
+            w_p.to_pickle(Path(__file__).parent / "Persistent_Data/reported_df.PICKLE")
+        except:
+            print('except')
+            print(r_df)
+            r_df.to_pickle(Path(__file__).parent / "Persistent_Data/reported_df.PICKLE")
     ###Variables
     ###Data
     Tgen, iman, mctrl = setup_models()
     #UI
     #Application
             with d_col2:
                 st.button("See Next Description", on_click=ND_button_clicked, use_container_width=True)
+            st.button('Report', on_click=report, use_container_width=True)
 def blog():
     """
     Blog describing the Auto-BG project
         *MADS (Master of Applied Data Science)*\n
         """)
+with tab1:
+    application()
+with tab2:
+    blog()
+with tab3:
+    about_us()