Mark7549 commited on
Commit
7088ca8
1 Parent(s): 27b85d5

for nearest neighbours tab, the program check if word exists in a model before continuing

Browse files
Files changed (2) hide show
  1. app.py +5 -1
  2. word2vec.py +28 -0
app.py CHANGED
@@ -27,15 +27,19 @@ if active_tab == "Nearest neighbours":
27
  # Load the compressed word list
28
  compressed_word_list_filename = 'corpora/compass_filtered.pkl.gz'
29
  all_words = load_compressed_word_list(compressed_word_list_filename)
 
30
 
31
  with st.container():
32
  with col1:
33
  word = st.multiselect("Enter a word", all_words, max_selections=1)
34
  if len(word) > 0:
35
  word = word[0]
 
 
 
36
 
37
  with col2:
38
- time_slice = st.selectbox("Time slice", ["Archaic", "Classical", "Hellenistic", "Early Roman", "Late Roman"])
39
 
40
  models = st.multiselect(
41
  "Select models to search for neighbours",
 
27
  # Load the compressed word list
28
  compressed_word_list_filename = 'corpora/compass_filtered.pkl.gz'
29
  all_words = load_compressed_word_list(compressed_word_list_filename)
30
+ eligible_models = ["Archaic", "Classical", "Hellenistic", "Early Roman", "Late Roman"]
31
 
32
  with st.container():
33
  with col1:
34
  word = st.multiselect("Enter a word", all_words, max_selections=1)
35
  if len(word) > 0:
36
  word = word[0]
37
+
38
+ # Check which models contain the word
39
+ eligible_models = check_word_in_models(word)
40
 
41
  with col2:
42
+ time_slice = st.selectbox("Time slice", eligible_models)
43
 
44
  models = st.multiselect(
45
  "Select models to search for neighbours",
word2vec.py CHANGED
@@ -187,6 +187,16 @@ def convert_time_name_to_model(time_name):
187
  return 'hellen_cbow'
188
  elif time_name == 'Late Roman':
189
  return 'late_roman_cbow'
 
 
 
 
 
 
 
 
 
 
190
 
191
  def get_nearest_neighbours(word, time_slice_model, n=10, models=load_all_models()):
192
  '''
@@ -297,6 +307,24 @@ def store_df_in_temp_file(df):
297
  return temp_file_path
298
 
299
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
300
  def main():
301
  # model = load_word2vec_model('models/archaic_cbow.model')
302
  # archaic_cbow_dict = model_dictionary(model)
 
187
  return 'hellen_cbow'
188
  elif time_name == 'Late Roman':
189
  return 'late_roman_cbow'
190
+ elif time_name == 'classical':
191
+ return 'Classical'
192
+ elif time_name == 'early_roman':
193
+ return 'Early Roman'
194
+ elif time_name == 'hellen':
195
+ return 'Hellenistic'
196
+ elif time_name == 'late_roman':
197
+ return 'Late Roman'
198
+ elif time_name == 'archaic':
199
+ return 'Archaic'
200
 
201
  def get_nearest_neighbours(word, time_slice_model, n=10, models=load_all_models()):
202
  '''
 
307
  return temp_file_path
308
 
309
 
310
+
311
+ def check_word_in_models(word):
312
+ """
313
+ Check in which models a word occurs.
314
+ """
315
+ all_models = load_all_models()
316
+ eligible_models = []
317
+
318
+ for model in all_models:
319
+ model_name = convert_time_name_to_model(model[0])
320
+ model = model[1]
321
+
322
+ if word in model.wv.key_to_index:
323
+ eligible_models.append(model_name)
324
+
325
+ return eligible_models
326
+
327
+
328
  def main():
329
  # model = load_word2vec_model('models/archaic_cbow.model')
330
  # archaic_cbow_dict = model_dictionary(model)