Spaces:

MarkdenOuden
/

Ancient_Greek_Word2Vec

Runtime error

Mark7549 commited on Mar 10, 2024

Commit

4e0c8c4

1 Parent(s): ce435c2

Added 'find nearest neighbours' functionality

Files changed (2) hide show

app.py CHANGED Viewed

@@ -1,5 +1,5 @@
 import gradio as gr
-from word2vec import get_cosine_similarity, get_cosine_similarity_one_word
 def greet(name, name2, name3):
@@ -10,7 +10,20 @@ with gr.Blocks() as demo:
     # Tab 1
     with gr.Tab("Find nearest neighbours"):
-        gr.Markdown("## Find nearest neighbours")
     # Tab 2

 import gradio as gr
+from word2vec import get_cosine_similarity, get_cosine_similarity_one_word, get_nearest_neighbours, load_all_models
 def greet(name, name2, name3):
     # Tab 1
     with gr.Tab("Find nearest neighbours"):
+        gr.Markdown("## Find nearest neighbours of a word")
+        iface = gr.Interface(
+            fn=get_nearest_neighbours,
+            inputs=[
+                gr.Textbox(label='Word 1 (required)', placeholder='χρηστήριον'),
+                gr.Radio(label='Time slice (required)', choices=["archaic_cbow", "classical_cbow", "early_roman_cbow", "hellen_cbow", "late_roman_cbow"]),
+                gr.Slider(label='Number of neighbours', minimum=1, maximum=50, step=1, value=10)
+            ],
+            outputs=gr.DataFrame(
+                label="Result",
+                headers=["Word", "Time slice", "Cosine similarity"]
+                ),
+            submit_btn='Calculate'
+        )
     # Tab 2

word2vec.py CHANGED Viewed

@@ -3,6 +3,20 @@ from collections import defaultdict
 import os
 def load_word2vec_model(model_path):
     '''
         Load a word2vec model from a file
@@ -104,18 +118,19 @@ def get_cosine_similarity_one_word(word, time_slice1, time_slice2):
     return cosine_similarity(dict1[word], dict2[word])
-def get_nearest_neighbours(word, time_slice_model, models, n):
     '''
         Return the nearest neighbours of a word
         word: the word for which the nearest neighbours are calculated
         time_slice_model: the word2vec model of the time slice of the input word
-        models: list of tuples with the name of the time slice and the word2vec model
-        n: the number of nearest neighbours to return
         Return: list of tuples with the word, the time slice and
                 the cosine similarity of the nearest neighbours
-    '''
     vector_1 = get_word_vector(time_slice_model, word)
     nearest_neighbours = []

 import os
+def load_all_models():
+    '''
+        Load all word2vec models
+    '''
+    archaic = ('archaic', load_word2vec_model('models/archaic_cbow.model'))
+    classical = ('classical', load_word2vec_model('models/classical_cbow.model'))
+    early_roman = ('early_roman', load_word2vec_model('models/early_roman_cbow.model'))
+    hellen = ('hellen', load_word2vec_model('models/hellen_cbow.model'))
+    late_roman = ('late_roman', load_word2vec_model('models/late_roman_cbow.model'))
+    return [archaic, classical, early_roman, hellen, late_roman]
 def load_word2vec_model(model_path):
     '''
         Load a word2vec model from a file
     return cosine_similarity(dict1[word], dict2[word])
+def get_nearest_neighbours(word, time_slice_model, n=10, models=load_all_models()):
     '''
         Return the nearest neighbours of a word
         word: the word for which the nearest neighbours are calculated
         time_slice_model: the word2vec model of the time slice of the input word
+        models: list of tuples with the name of the time slice and the word2vec model (default: all in ./models)
+        n: the number of nearest neighbours to return (default: 10)
         Return: list of tuples with the word, the time slice and
                 the cosine similarity of the nearest neighbours
+    '''
+    time_slice_model = load_word2vec_model(f'models/{time_slice_model}.model')
     vector_1 = get_word_vector(time_slice_model, word)
     nearest_neighbours = []