from gensim.models import KeyedVectors
import gradio as gr
word_vectors_path = "word2vec.wordvectors"
wv = KeyedVectors.load(str(word_vectors_path), mmap='r')
def format_to_html(sim_output):
table = """
"""
rows = ""
for word, sim in sim_output:
rows += f"{word} | {round(sim, 4)} |
"
return table.format(rows)
def get_oov_word(error, words):
words = {"A": words[0], "B": words[1]}
key = str(error).split()[1].strip()[1:-1]
return words[key]
def find_most_similar(word):
sim_output = wv.most_similar(word)
return format_to_html(sim_output)
def find_similarity(wordA, wordB):
try:
sim = wv.similarity(wordA, wordB)
except Exception as e:
oov_word = get_oov_word(e, [wordA, wordB])
return f"[Error] {oov_word} is an OOV"
return max([float(sim), 0.0])
title = "Word Similarity With Literary Word2Vec"
examples = [["བླ་མ་", "སློབ་མ་"], ["སྟོབས་", "ཤུགས་"]]
assert isinstance(find_similarity(examples[0][1], examples[0][1]), float)
assert isinstance(find_similarity("ར་", "B"), str)
demo = gr.Interface(
fn=find_similarity,
inputs=[gr.inputs.Textbox("Word A"), gr.inputs.Textbox("Word B")],
outputs="text",
title=title,
examples=examples
).launch()
# demo = gr.Blocks()
# with demo:
# # Find top 10 most similar words
# print(gr.__version__)
# word = gr.inputs.Textbox("Word")
# top_10_similar_words = gr.inputs.HTML()
# b1 = gr.Button("Find similar words")
# b1.click(find_most_similar, inputs=word, outputs=top_10_similar_words)
# wordA = gr.inputs.Textbox("Word A")
# wordB = gr.inputs.Textbox("Word B")
# similarity = gr.Number()
# b2 = gr.Button("Find similarity")
# b2.click(similarity, inputs=[wordA, wordB], outputs=similarity)
# demo.launch()