import gradio as gr from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, Text2TextGenerationPipeline pipe = Text2TextGenerationPipeline(model = AutoModelForSeq2SeqLM.from_pretrained("jpwahle/t5-large-word-sense-disambiguation"), tokenizer = AutoTokenizer.from_pretrained("jpwahle/t5-large-word-sense-disambiguation")) def wsd_gen(word, context, d1, d2, d3): question = 'question: question: which description describes the word' + ' " ' + word + ' " ' descriptions_context = 'best in the following context? \descriptions:[ " ' + d1 + '" , " ' + d2 + ' " , or " '+ d3 + ' " ] context: ' + context + "'" raw_input = question + descriptions_context output = pipe(raw_input)[0]['generated_text'] return output examples = [["beat", 'The underdog team "beat" the reigning champion.', " A main accent or rhythmic unit in music or poetry. " , " To strike repeatedly and violently so as to hurt or injure.", " To defeat (someone) in a game or other competitive situation. "], ["shell", 'The first "shell" exploded in mid air taking out an enemy plane.', "The hard protective outer case of a mollusk or crustacean.", "An explosive artillery projectile or bomb.", "Something resembling or likened to a shell because of its shape or its function as an outer case."]] word_mask = gr.Textbox(lines=1, placeholder= "Enter word to disambiguate", label = "Based on the context, which description best matches this word: ") input_context = gr.Textbox(lines=1, placeholder="Enter context", label = "context: ") input_desc1 = gr.Textbox(lines=1, placeholder="Enter description", label = "description 1: ") input_desc2 = gr.Textbox(lines=1, placeholder="Enter description", label = "description 2: ") input_desc3 = gr.Textbox(lines=1, placeholder="Enter description", label = "description 3: ") gr.Interface(wsd_gen, inputs = [word_mask , input_context, input_desc1, input_desc2, input_desc3], outputs= "textbox", examples = examples, title = "T5-Word Sense Disambiguation", description = "Determines which 'sense' (meaning) of a word is activated by the use of the word in a particular context given three different descriptions.", theme = "seafoam", article = "This is an implementation of Google's T5-large model applied to Word Sense Disambiguation (WSD) and trained on the SemCor dataset. the SemCor dataset is a corpus made up of 352 documents for a total of 226,040 manually sense-annotated annotations used specifically used to train supervised WSD systems. The model used in this spaces was uploaded by Jan Philip Wahle (jpelhaw) in huggingface.", allow_flagging="never").launch(inbrowser=True)