import torch import gradio as gr from transformers import BioGptTokenizer, BioGptForCausalLM model_names = [ "BioGPT", "BioGPT-Large", "BioGPT-QA-PubMedQA-BioGPT", "BioGPT-QA-PubMEDQA-BioGPT-Large", "BioGPT-RE-BC5CDR", "BioGPT-RE-DDI", "BioGPT-RE-DTI", "BioGPT-DC-HoC" ] def load_model(model_name="BioGPT"): model_name_map = { "BioGPT":"microsoft/biogpt", "BioGPT-QA-PubMedQA-BioGPT":"microsoft/BioGPT-Large-PubMedQA" } tokenizer = BioGptTokenizer.from_pretrained(model_name_map[model_name]) model = BioGptForCausalLM.from_pretrained(model_name_map[model_name]) return tokenizer, model def get_beam_output(sentence, selected_model, min_len=100,max_len=512, n_beams=1): tokenizer, model = load_model(selected_model) inputs = tokenizer(sentence, return_tensors="pt") with torch.no_grad(): beam_output = model.generate(**inputs, min_length=min_len, max_length=max_len, num_beams=n_beams, early_stopping=True ) output=tokenizer.decode(beam_output[0], skip_special_tokens=True) return output inputs = [ gr.inputs.Textbox(label="prompt", lines=5, default="Bicalutamide"), gr.Dropdown(model_names, value="BioGPT", label="selected_model"), gr.inputs.Slider(1, 500, 1, default=100, label="min_len"), gr.inputs.Slider(1, 2048, 1, default=1024, label="max_len"), gr.inputs.Slider(1, 10, 1, default=5, label="num_beams") ] outputs = gr.outputs.Textbox(label="output") examples = [ ["Bicalutamide", "BioGPT", 25, 100, 5], ["Janus kinase 3 (JAK-3)", "BioGPT", 25, 100, 5], ["Apricitabine", "BioGPT", 25, 100, 5], ] iface = gr.Interface( fn=get_beam_output, inputs=inputs, outputs=outputs, examples=examples, title="BioGPT: generative pre-trained transformer for biomedical text generation and mining" ) iface.launch(debug=True, enable_queue=True)