import torch import gradio from transformers import AutoModelWithLMHead, AutoTokenizer def generate_response(model, tokenizer, prompt, max_length=200): input_ids = tokenizer.encode(prompt, return_tensors="pt") # 'pt' for returning pytorch tensor # Create the attention mask and pad token id attention_mask = torch.ones_like(input_ids) pad_token_id = tokenizer.eos_token_id output = model.generate( input_ids, max_length=max_length, num_return_sequences=1, attention_mask=attention_mask, pad_token_id=pad_token_id ) return tokenizer.decode(output[0], skip_special_tokens=True) # Load your model from hub username = "vsen7" # change it to your HuggingFace username checkpoint = username + '/Medical_Summary' loaded_model = AutoModelWithLMHead.from_pretrained(checkpoint) # Load your tokenizer from hub tokenizer = AutoTokenizer.from_pretrained(checkpoint) # Function for response generation def generate_query_response(prompt): model = loaded_model #tokenizer = tokenizer response = generate_response(model, tokenizer, prompt) return response # Gradio interface to generate UI link iface = gradio.Interface(fn=generate_query_response, inputs="textbox", outputs="textbox", title="Medical Summary", description="via gradio", allow_flagging="never", ) iface.launch()