Mark7549 commited on
Commit
4e0c8c4
·
1 Parent(s): ce435c2

Added 'find nearest neighbours' functionality

Browse files
Files changed (2) hide show
  1. app.py +15 -2
  2. word2vec.py +19 -4
app.py CHANGED
@@ -1,5 +1,5 @@
1
  import gradio as gr
2
- from word2vec import get_cosine_similarity, get_cosine_similarity_one_word
3
 
4
 
5
  def greet(name, name2, name3):
@@ -10,7 +10,20 @@ with gr.Blocks() as demo:
10
 
11
  # Tab 1
12
  with gr.Tab("Find nearest neighbours"):
13
- gr.Markdown("## Find nearest neighbours")
 
 
 
 
 
 
 
 
 
 
 
 
 
14
 
15
 
16
  # Tab 2
 
1
  import gradio as gr
2
+ from word2vec import get_cosine_similarity, get_cosine_similarity_one_word, get_nearest_neighbours, load_all_models
3
 
4
 
5
  def greet(name, name2, name3):
 
10
 
11
  # Tab 1
12
  with gr.Tab("Find nearest neighbours"):
13
+ gr.Markdown("## Find nearest neighbours of a word")
14
+ iface = gr.Interface(
15
+ fn=get_nearest_neighbours,
16
+ inputs=[
17
+ gr.Textbox(label='Word 1 (required)', placeholder='χρηστήριον'),
18
+ gr.Radio(label='Time slice (required)', choices=["archaic_cbow", "classical_cbow", "early_roman_cbow", "hellen_cbow", "late_roman_cbow"]),
19
+ gr.Slider(label='Number of neighbours', minimum=1, maximum=50, step=1, value=10)
20
+ ],
21
+ outputs=gr.DataFrame(
22
+ label="Result",
23
+ headers=["Word", "Time slice", "Cosine similarity"]
24
+ ),
25
+ submit_btn='Calculate'
26
+ )
27
 
28
 
29
  # Tab 2
word2vec.py CHANGED
@@ -3,6 +3,20 @@ from collections import defaultdict
3
  import os
4
 
5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6
  def load_word2vec_model(model_path):
7
  '''
8
  Load a word2vec model from a file
@@ -104,18 +118,19 @@ def get_cosine_similarity_one_word(word, time_slice1, time_slice2):
104
  return cosine_similarity(dict1[word], dict2[word])
105
 
106
 
107
- def get_nearest_neighbours(word, time_slice_model, models, n):
108
  '''
109
  Return the nearest neighbours of a word
110
 
111
  word: the word for which the nearest neighbours are calculated
112
  time_slice_model: the word2vec model of the time slice of the input word
113
- models: list of tuples with the name of the time slice and the word2vec model
114
- n: the number of nearest neighbours to return
115
 
116
  Return: list of tuples with the word, the time slice and
117
  the cosine similarity of the nearest neighbours
118
- '''
 
119
  vector_1 = get_word_vector(time_slice_model, word)
120
  nearest_neighbours = []
121
 
 
3
  import os
4
 
5
 
6
+ def load_all_models():
7
+ '''
8
+ Load all word2vec models
9
+ '''
10
+
11
+ archaic = ('archaic', load_word2vec_model('models/archaic_cbow.model'))
12
+ classical = ('classical', load_word2vec_model('models/classical_cbow.model'))
13
+ early_roman = ('early_roman', load_word2vec_model('models/early_roman_cbow.model'))
14
+ hellen = ('hellen', load_word2vec_model('models/hellen_cbow.model'))
15
+ late_roman = ('late_roman', load_word2vec_model('models/late_roman_cbow.model'))
16
+
17
+ return [archaic, classical, early_roman, hellen, late_roman]
18
+
19
+
20
  def load_word2vec_model(model_path):
21
  '''
22
  Load a word2vec model from a file
 
118
  return cosine_similarity(dict1[word], dict2[word])
119
 
120
 
121
+ def get_nearest_neighbours(word, time_slice_model, n=10, models=load_all_models()):
122
  '''
123
  Return the nearest neighbours of a word
124
 
125
  word: the word for which the nearest neighbours are calculated
126
  time_slice_model: the word2vec model of the time slice of the input word
127
+ models: list of tuples with the name of the time slice and the word2vec model (default: all in ./models)
128
+ n: the number of nearest neighbours to return (default: 10)
129
 
130
  Return: list of tuples with the word, the time slice and
131
  the cosine similarity of the nearest neighbours
132
+ '''
133
+ time_slice_model = load_word2vec_model(f'models/{time_slice_model}.model')
134
  vector_1 = get_word_vector(time_slice_model, word)
135
  nearest_neighbours = []
136