import gradio as gr
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, Text2TextGenerationPipeline

pipe = Text2TextGenerationPipeline(model = AutoModelForSeq2SeqLM.from_pretrained("jpwahle/t5-large-word-sense-disambiguation"),
tokenizer = AutoTokenizer.from_pretrained("jpwahle/t5-large-word-sense-disambiguation"))

def wsd_gen(word, context, d1, d2, d3):
            question = 'question: question: which description describes the word' + ' " ' + word + ' " '
            descriptions_context = 'best in the following context? \descriptions:[  " ' + d1 + '" , " ' + d2 + ' " , or " '+ d3 + ' " ] context: ' + context + "'"
            raw_input = question + descriptions_context
            output = pipe(raw_input)[0]['generated_text']
            return output

examples = [["beat", 'The underdog team "beat" the reigning champion.', " A main accent or rhythmic unit in music or poetry. " ,  " To strike repeatedly and violently so as to hurt or injure.", " To defeat (someone) in a game or other competitive situation. "], ["shell", 'The first "shell" exploded in mid air taking out an enemy plane.', "The hard protective outer case of a mollusk or crustacean.", "An explosive artillery projectile or bomb.", "Something resembling or likened to a shell because of its shape or its function as an outer case."]]

word_mask = gr.Textbox(lines=1, placeholder= "Enter word to disambiguate", label = "Based on the context, which description best matches this word: ")
input_context = gr.Textbox(lines=1, placeholder="Enter context", label = "context: ")
input_desc1 = gr.Textbox(lines=1, placeholder="Enter description", label = "description 1: ")
input_desc2 = gr.Textbox(lines=1, placeholder="Enter description", label = "description 2: ")
input_desc3 = gr.Textbox(lines=1, placeholder="Enter description", label = "description 3: ")

gr.Interface(wsd_gen,
            inputs = [word_mask , input_context, input_desc1, input_desc2, input_desc3],
            outputs= "textbox",
            examples = examples,
            title = "T5-Word Sense Disambiguation", 
            description = "Determines which 'sense' (meaning) of a word is activated by the use of the word in a particular context given three different descriptions.",
            theme = "seafoam",
            article = "This is an implementation of Google's T5-large model applied to Word Sense Disambiguation (WSD) and trained on the SemCor dataset. the SemCor dataset is a corpus made up of 352 documents for a total of 226,040 manually sense-annotated annotations used specifically used to train supervised WSD systems. The model used in this spaces was uploaded by Jan Philip Wahle (jpelhaw) in huggingface.",
            allow_flagging="never").launch(inbrowser=True)