Spaces:

GroNLP
/

agalma

Sleeping

Mark7549 commited on May 11, 2024

Commit

7088ca8

•

1 Parent(s): 27b85d5

for nearest neighbours tab, the program check if word exists in a model before continuing

Files changed (2) hide show

app.py CHANGED Viewed

@@ -27,15 +27,19 @@ if active_tab == "Nearest neighbours":
     # Load the compressed word list
     compressed_word_list_filename = 'corpora/compass_filtered.pkl.gz'
     all_words = load_compressed_word_list(compressed_word_list_filename)
     with st.container():
         with col1:
             word = st.multiselect("Enter a word", all_words, max_selections=1)
             if len(word) > 0:
                 word = word[0]
         with col2:
-            time_slice = st.selectbox("Time slice", ["Archaic", "Classical", "Hellenistic", "Early Roman", "Late Roman"])
         models = st.multiselect(
             "Select models to search for neighbours",

     # Load the compressed word list
     compressed_word_list_filename = 'corpora/compass_filtered.pkl.gz'
     all_words = load_compressed_word_list(compressed_word_list_filename)
+    eligible_models = ["Archaic", "Classical", "Hellenistic", "Early Roman", "Late Roman"]
     with st.container():
         with col1:
             word = st.multiselect("Enter a word", all_words, max_selections=1)
             if len(word) > 0:
                 word = word[0]
+                # Check which models contain the word
+                eligible_models = check_word_in_models(word)
         with col2:
+            time_slice = st.selectbox("Time slice", eligible_models)
         models = st.multiselect(
             "Select models to search for neighbours",

word2vec.py CHANGED Viewed

@@ -187,6 +187,16 @@ def convert_time_name_to_model(time_name):
         return 'hellen_cbow'
     elif time_name == 'Late Roman':
         return 'late_roman_cbow'
 def get_nearest_neighbours(word, time_slice_model, n=10, models=load_all_models()):
     '''
@@ -297,6 +307,24 @@ def store_df_in_temp_file(df):
     return temp_file_path
 def main():
     # model = load_word2vec_model('models/archaic_cbow.model')
     # archaic_cbow_dict = model_dictionary(model)

         return 'hellen_cbow'
     elif time_name == 'Late Roman':
         return 'late_roman_cbow'
+    elif time_name == 'classical':
+        return 'Classical'
+    elif time_name == 'early_roman':
+        return 'Early Roman'
+    elif time_name == 'hellen':
+        return 'Hellenistic'
+    elif time_name == 'late_roman':
+        return 'Late Roman'
+    elif time_name == 'archaic':
+        return 'Archaic'
 def get_nearest_neighbours(word, time_slice_model, n=10, models=load_all_models()):
     '''
     return temp_file_path
+def check_word_in_models(word):
+    """
+        Check in which models a word occurs.
+    """
+    all_models = load_all_models()
+    eligible_models = []
+    for model in all_models:
+        model_name = convert_time_name_to_model(model[0])
+        model = model[1]
+        if word in model.wv.key_to_index:
+            eligible_models.append(model_name)
+    return eligible_models
 def main():
     # model = load_word2vec_model('models/archaic_cbow.model')
     # archaic_cbow_dict = model_dictionary(model)