import gradio import torch from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, AutoModelWithLMHead username = "jputhalath" # change it to your HuggingFace username model_path = username + '/MedQnA_Model' tokenizer_path = username + '/MedQnA_Tokenizer' loaded_model = AutoModelWithLMHead.from_pretrained(model_path) loaded_tokenizer = AutoTokenizer.from_pretrained(tokenizer_path) def generate_query_response(prompt, max_length=200): model = loaded_model tokenizer = loaded_tokenizer input_ids = tokenizer.encode(prompt, return_tensors="pt") # 'pt' for returning pytorch tensor # Create the attention mask and pad token id attention_mask = torch.ones_like(input_ids) pad_token_id = tokenizer.eos_token_id output = model.generate( input_ids, max_length=max_length, num_return_sequences=1, attention_mask=attention_mask, pad_token_id=pad_token_id ) return tokenizer.decode(output[0], skip_special_tokens=True) iface = gradio.Interface(fn=generate_query_response, inputs="text", outputs="text", title = "MedQnA Application") iface.launch()