|
from gensim.models import KeyedVectors |
|
import gradio as gr |
|
|
|
word_vectors_path = "word2vec.wordvectors" |
|
wv = KeyedVectors.load(str(word_vectors_path), mmap='r') |
|
|
|
def format_to_html(sim_output): |
|
table = """ |
|
<table> |
|
<tr> |
|
<th>Word</th> |
|
<th>Similarity (0-1)</th> |
|
</tr> |
|
{} |
|
</table> |
|
""" |
|
rows = "" |
|
for word, sim in sim_output: |
|
rows += f"<tr><td>{word}</td><td style='text-align: right'>{round(sim, 4)}</td></tr>" |
|
return table.format(rows) |
|
|
|
def get_oov_word(error, words): |
|
words = {"A": words[0], "B": words[1]} |
|
key = str(error).split()[1].strip()[1:-1] |
|
return words[key] |
|
|
|
def find_most_similar(word): |
|
sim_output = wv.most_similar(word) |
|
return format_to_html(sim_output) |
|
|
|
def find_similarity(wordA, wordB): |
|
try: |
|
sim = wv.similarity(wordA, wordB) |
|
except Exception as e: |
|
oov_word = get_oov_word(e, [wordA, wordB]) |
|
return f"[Error] {oov_word} is an OOV" |
|
|
|
return max([float(sim), 0.0]) |
|
|
|
title = "Word Similarity With Literary Word2Vec" |
|
examples = [["བླ་མ་", "སློབ་མ་"], ["སྟོབས་", "ཤུགས་"]] |
|
|
|
assert isinstance(find_similarity(examples[0][1], examples[0][1]), float) |
|
assert isinstance(find_similarity("ར་", "B"), str) |
|
|
|
demo = gr.Interface( |
|
fn=find_similarity, |
|
inputs=[gr.inputs.Textbox("Word A"), gr.inputs.Textbox("Word B")], |
|
outputs="text", |
|
title=title, |
|
examples=examples |
|
).launch() |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|