|
|
import gradio as gr |
|
|
from transformers import pipeline |
|
|
fill_mask = pipeline('fill-mask', |
|
|
model='davmel/ka_homonym_disambiguation_FM', |
|
|
tokenizer='davmel/ka_homonym_disambiguation_FM') |
|
|
|
|
|
masker = {0: 'αα', 1: 'ααα', 2: 'ααα€α'} |
|
|
|
|
|
def getMasked(sent): |
|
|
homonym_forms = [ |
|
|
'ααα αα', 'ααα ααααα‘', 'ααα αα', 'ααα ααα', 'ααα αα‘αααα‘ααͺ', 'ααα αααα', 'ααα ααα‘ααͺ', 'ααα αααα¨α', 'ααα αααααͺ', |
|
|
'ααα αααα', |
|
|
'ααα ααͺαα', 'ααα ααααα', 'ααα ααͺ', 'ααα ααααα', 'ααα ααααͺ', 'ααα αααα', 'ααα αα‘αααͺααα', 'ααα ααα¨α', 'ααα ααα‘', |
|
|
'ααα αααα‘α', 'ααα ααααα', 'ααα αα‘ααα£α α', 'ααα αααα‘αααα‘α', 'ααα αα‘ααααα‘', 'ααα ααα', 'ααα ααα', 'ααα α¨ααα', |
|
|
'ααα αα‘αα', |
|
|
'ααα αα‘ααα', 'ααα αααααα', 'ααα ααα', 'ααα ααα', 'ααα α‘', 'ααα αα', 'ααα ααααα‘', 'ααα αααα', 'ααα αααα', 'ααα αααα‘', |
|
|
'ααα αα‘ααα', 'ααα α¨αα', 'ααα αααα‘ααα', 'ααα αααααα', 'ααα ααααα', 'ααα αααα‘α', 'ααα ααα‘ααα', 'ααα ααα‘ααα', |
|
|
'ααα αααααα', |
|
|
'ααα ααααͺ', 'ααα αααααα', 'ααα α¨ααα', 'ααα αααα', 'ααα ααα', 'ααα ααααααͺ', 'ααα αααα‘', 'ααα ααααα‘', 'ααα ααα‘', |
|
|
'ααα αααααα', 'ααα αααα', 'ααα ααα', 'ααα αααα‘ααα', 'ααα ααα‘αα‘', 'ααα ααα¨αα', 'ααα αααα', 'ααα ααα', 'ααα αααα‘', |
|
|
'ααα ααααα¨α', 'ααα ααͺα', 'ααα αααα', 'ααα α¨αα', 'ααα αααα', 'ααα ααα', 'ααα α', 'ααα αααα‘α', 'ααα αααα', |
|
|
'ααα αααα‘ααααα‘', |
|
|
'ααα αααα', 'ααα ααα¨αα', 'ααα αα‘', 'ααα αα‘αααα‘α', 'ααα αααα', 'ααα α¨αααα', 'ααα ααα‘ααͺ', 'ααα αααααα', 'ααα αααα', |
|
|
'ααα αα', 'ααα αααααα', 'ααα αα‘αα', 'ααα αααͺ', 'ααα α¨ααͺ', 'ααα αααααα', 'ααα αα', 'ααα ααααααͺ', 'ααα αα‘αααααα ', |
|
|
'ααα α¨α', |
|
|
'ααα αααα‘αααα‘ααͺ', 'ααα αααα', 'ααα α¨ααͺαα', 'ααα αα‘αα', 'ααα αα‘αααα‘α', 'ααα αααͺ', 'ααα αααα', 'ααα αα', 'ααα ααααααα', |
|
|
'ααα αααααα', 'ααα αα‘ααͺ', 'ααα αα‘ααα£α ', 'ααα αααααα‘', 'ααα αααα', 'ααα αα‘α', 'ααα αααααͺ', 'ααα αααα', 'ααα ααααα', |
|
|
'ααα αααα‘αααα‘', 'ααα ααα‘', 'ααα ', 'ααα ααααͺ', 'ααα αααααͺ', 'ααα αααα‘αααα‘', 'ααα ααα‘ααͺ', 'ααα ααααα', 'ααα αα‘αα', |
|
|
'ααα αα‘ααα αα', 'ααα αααααα', 'ααα αααααͺ', 'ααα α', 'ααα αα‘αααα', 'ααα αααα', 'ααα αα', 'ααα αααα', 'ααα ααααα', |
|
|
'ααα αααα‘ααααα‘', 'ααα ααααα', 'ααα ααα¨α', 'ααα ααα', 'ααα ααααα', 'ααα αα‘αααα', 'ααα ααααααα', 'ααα αα‘ααα αα‘', |
|
|
'ααα αααα', |
|
|
'ααα ααα', 'ααα ααα¨αααα', 'ααα ααα‘ααα', 'ααα αααααααͺ', 'ααα ααααα', 'ααα αα‘α', 'ααα ααα¨ααͺ', 'ααα αααααα', 'ααα αα‘α', |
|
|
'ααα ααααα', 'ααα ααα‘', 'ααα αααα', 'ααα αα', 'ααα αααααͺ', 'ααα ααα', 'ααα αα‘αααα‘', 'ααα ααααα', 'ααα ααα‘α', |
|
|
'ααα αα‘αα', |
|
|
'ααα ααααα', 'ααα ααα‘', 'ααα αα‘αααααα α', 'ααα ααααα', 'ααα ααααα', 'ααα α‘α', 'ααα α£αα', 'ααα α‘ααͺ', 'α‘αααα α', |
|
|
'ααα ααααα', |
|
|
'ααα α¨αααͺ', 'ααα ααα', 'ααα αα‘αα‘', |
|
|
] |
|
|
new_sent = [] |
|
|
wasnt = True |
|
|
for word in sent.split(" "): |
|
|
if word in homonym_forms and wasnt: |
|
|
new_sent.append("[MASK]") |
|
|
|
|
|
else: |
|
|
new_sent.append(word) |
|
|
return " ".join(new_sent) |
|
|
|
|
|
def predict(text): |
|
|
|
|
|
text_with_mask = getMasked(text) |
|
|
|
|
|
predictions = fill_mask(text_with_mask) |
|
|
|
|
|
context = predictions[0]["token_str"] |
|
|
if context == masker[0]: |
|
|
return "Homonym is used as a \"Shovel\"" |
|
|
elif context == masker[1]: |
|
|
return "Homonym is used as a \"Lowland\"" |
|
|
else: |
|
|
return "Homonym is used as a \"Cafe\"" |
|
|
|
|
|
|
|
|
iface = gr.Interface(fn=predict, inputs="text", outputs="text", title="Homonym disambiguation in Georgian", |
|
|
description="Enter a sentence with the homonym \"ααα α\" (for the current purposes, please include the homonym once in the sentence).") |
|
|
iface.launch(share=True) |