File size: 2,277 Bytes
c79091a
 
7cbe3b6
 
 
c79091a
 
 
 
 
 
 
7cbe3b6
 
 
 
 
 
 
 
 
 
c79091a
 
 
 
7cbe3b6
 
c79091a
 
7cbe3b6
c79091a
 
 
 
7cbe3b6
c79091a
 
 
 
 
 
 
 
 
 
 
 
 
 
7cbe3b6
 
c79091a
 
 
 
 
7cbe3b6
 
 
c79091a
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
from transformers import pipeline, set_seed
from transformers import BioGptTokenizer, BioGptForCausalLM
from multilingual_translation import translate
from utils import lang_ids

import gradio as gr

model_list = [
    "microsoft/biogpt",
    "microsoft/BioGPT-Large-PubMedQA"
]

lang_list = list(lang_ids.keys())

def translate_to_english(text, base_lang):
    if base_lang == "en":
        return text
    else:
        new_text = translate("facebook/m2m100_418M", text, base_lang, "en")
        return new_text
    

def biogpt(
    prompt: str,
    model_id: str,
    max_length: int = 25,
    num_return_sequences: int = 5,
    base_lang: str = "en"
):
    
    en_prompt = translate_to_english(prompt, base_lang)
    model = BioGptForCausalLM.from_pretrained(model_id)
    tokenizer = BioGptTokenizer.from_pretrained(model_id)
    generator = pipeline('text-generation', model=model, tokenizer=tokenizer)
    set_seed(42)
    output = generator(en_prompt, max_length=max_length, num_return_sequences=num_return_sequences, do_sample=True)
    output_dict = {
        "1": output[0]['generated_text'],
        "2": output[1]['generated_text'],
        "3": output[2]['generated_text'],
        "4": output[3]['generated_text'],
        "5": output[4]['generated_text']
    }
    return f'{output_dict["1"]}\n\n{output_dict["2"]}\n\n{output_dict["3"]}\n\n{output_dict["4"]}\n\n{output_dict["5"]}'


inputs = [
    gr.inputs.Textbox(label="Prompt", lines=5, default="COVID-19 is"),
    gr.Dropdown(model_list, value="microsoft/biogpt", label="Model ID"),
    gr.inputs.Slider(5, 100, 25, default=25, label="Max Length"),
    gr.inputs.Slider(1, 10, 5, default=5, label="Num Return Sequences"),
    gr.Dropdown(lang_list, value="en", label="Base Language")
    ]

outputs = gr.outputs.Textbox(label="Output")

examples = [
    ["COVID-19 is", "microsoft/biogpt", 25, 5, "en"],
    ["Kanser", "microsoft/BioGPT-Large-PubMedQA", 25, 5, "tr"],
    ["Covid-19 est", "microsoft/biogpt", 25, 5, "fr"],
]

title = " BioGPT: Generative Pre-trained Transformer for Biomedical Text Generation and Mining"
demo_app = gr.Interface(
    biogpt, 
    inputs, 
    outputs, 
    title=title, 
    examples=examples, 
    cache_examples=True,
)
demo_app.launch(debug=True, enable_queue=True)