for nearest neighbours tab, the program check if word exists in a model before continuing
Browse files- app.py +5 -1
- word2vec.py +28 -0
app.py
CHANGED
@@ -27,15 +27,19 @@ if active_tab == "Nearest neighbours":
|
|
27 |
# Load the compressed word list
|
28 |
compressed_word_list_filename = 'corpora/compass_filtered.pkl.gz'
|
29 |
all_words = load_compressed_word_list(compressed_word_list_filename)
|
|
|
30 |
|
31 |
with st.container():
|
32 |
with col1:
|
33 |
word = st.multiselect("Enter a word", all_words, max_selections=1)
|
34 |
if len(word) > 0:
|
35 |
word = word[0]
|
|
|
|
|
|
|
36 |
|
37 |
with col2:
|
38 |
-
time_slice = st.selectbox("Time slice",
|
39 |
|
40 |
models = st.multiselect(
|
41 |
"Select models to search for neighbours",
|
|
|
27 |
# Load the compressed word list
|
28 |
compressed_word_list_filename = 'corpora/compass_filtered.pkl.gz'
|
29 |
all_words = load_compressed_word_list(compressed_word_list_filename)
|
30 |
+
eligible_models = ["Archaic", "Classical", "Hellenistic", "Early Roman", "Late Roman"]
|
31 |
|
32 |
with st.container():
|
33 |
with col1:
|
34 |
word = st.multiselect("Enter a word", all_words, max_selections=1)
|
35 |
if len(word) > 0:
|
36 |
word = word[0]
|
37 |
+
|
38 |
+
# Check which models contain the word
|
39 |
+
eligible_models = check_word_in_models(word)
|
40 |
|
41 |
with col2:
|
42 |
+
time_slice = st.selectbox("Time slice", eligible_models)
|
43 |
|
44 |
models = st.multiselect(
|
45 |
"Select models to search for neighbours",
|
word2vec.py
CHANGED
@@ -187,6 +187,16 @@ def convert_time_name_to_model(time_name):
|
|
187 |
return 'hellen_cbow'
|
188 |
elif time_name == 'Late Roman':
|
189 |
return 'late_roman_cbow'
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
190 |
|
191 |
def get_nearest_neighbours(word, time_slice_model, n=10, models=load_all_models()):
|
192 |
'''
|
@@ -297,6 +307,24 @@ def store_df_in_temp_file(df):
|
|
297 |
return temp_file_path
|
298 |
|
299 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
300 |
def main():
|
301 |
# model = load_word2vec_model('models/archaic_cbow.model')
|
302 |
# archaic_cbow_dict = model_dictionary(model)
|
|
|
187 |
return 'hellen_cbow'
|
188 |
elif time_name == 'Late Roman':
|
189 |
return 'late_roman_cbow'
|
190 |
+
elif time_name == 'classical':
|
191 |
+
return 'Classical'
|
192 |
+
elif time_name == 'early_roman':
|
193 |
+
return 'Early Roman'
|
194 |
+
elif time_name == 'hellen':
|
195 |
+
return 'Hellenistic'
|
196 |
+
elif time_name == 'late_roman':
|
197 |
+
return 'Late Roman'
|
198 |
+
elif time_name == 'archaic':
|
199 |
+
return 'Archaic'
|
200 |
|
201 |
def get_nearest_neighbours(word, time_slice_model, n=10, models=load_all_models()):
|
202 |
'''
|
|
|
307 |
return temp_file_path
|
308 |
|
309 |
|
310 |
+
|
311 |
+
def check_word_in_models(word):
|
312 |
+
"""
|
313 |
+
Check in which models a word occurs.
|
314 |
+
"""
|
315 |
+
all_models = load_all_models()
|
316 |
+
eligible_models = []
|
317 |
+
|
318 |
+
for model in all_models:
|
319 |
+
model_name = convert_time_name_to_model(model[0])
|
320 |
+
model = model[1]
|
321 |
+
|
322 |
+
if word in model.wv.key_to_index:
|
323 |
+
eligible_models.append(model_name)
|
324 |
+
|
325 |
+
return eligible_models
|
326 |
+
|
327 |
+
|
328 |
def main():
|
329 |
# model = load_word2vec_model('models/archaic_cbow.model')
|
330 |
# archaic_cbow_dict = model_dictionary(model)
|