import gradio as gr from transformers import pipeline from PIL import Image import torch from torchvision.transforms import Compose, Resize, CenterCrop, ToTensor, Normalize from transformers import CLIPProcessor, CLIPModel, BartTokenizer, BartForConditionalGeneration, GPT2LMHeadModel, GPT2Tokenizer, T5ForConditionalGeneration, T5Tokenizer from torchvision.models.detection import fasterrcnn_resnet50_fpn from torchvision.transforms import functional as F # Load the RoBERTa model roberta_model = pipeline("question-answering", model="deepset/roberta-base-squad2") # Define the interface function for RoBERTa def answer_question_roberta(context, question): result = roberta_model(question=question, context=context) return result["answer"] # Create the Gradio interface for RoBERTa roberta_interface = gr.Interface( fn=answer_question_roberta, inputs=["text", "text"], outputs="text", title="Question Answering with RoBERTa", description="Ask a question about the given context.", ) # Placeholder interfaces for other models placeholder_interface1 = gr.Interface( fn=lambda x: x, # Placeholder function inputs="text", outputs="text", title="Model 1", description="Placeholder for Model 1.", ) # Load the model and processor model = CLIPModel.from_pretrained("openai/clip-vit-base-patch16") processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch16") # Define the interface function def classify_image(image: Image.Image, labels: str): # Prepare the image transform = Compose([ Resize([224, 224]), CenterCrop(224), ToTensor(), Normalize((0.48145466, 0.4578275, 0.40821073), (0.26862954, 0.26130258, 0.27577711)), ]) image = transform(image) # Prepare the labels labels = labels.split(',') # Encode the image and labels inputs = processor(text=labels, images=image, return_tensors="pt", padding=True) # Get the model's output outputs = model(**inputs) logits_per_image = outputs.logits_per_image # Get the predicted label predicted_label = labels[torch.argmax(logits_per_image).item()] return predicted_label patch16_interface = gr.Interface( fn=classify_image, # The function for image classification inputs=["image", "text"], # Input components outputs="text", # Output component title="Image Classification with CLIP", # Title of the interface description="Upload an image and enter a list of labels (comma-separated). The model will predict the label that best matches the image.", # Description of the interface ) # Repeat for other placeholder interfaces... # Load the model and tokenizer model = BartForConditionalGeneration.from_pretrained("facebook/bart-large-cnn") tokenizer = BartTokenizer.from_pretrained("facebook/bart-large-cnn") # Define the interface function def summarize_text(input_text: str): # Encode the text inputs = tokenizer([input_text], max_length=1024, return_tensors='pt') # Get the model's output summary_ids = model.generate(inputs['input_ids'], num_beams=4, max_length=150, early_stopping=True) # Decode the output summary = [tokenizer.decode(g, skip_special_tokens=True, clean_up_tokenization_spaces=False) for g in summary_ids] return summary[0] # Create the Gradio interface bart_large_cnn_interface = gr.Interface( fn=summarize_text, inputs="text", outputs="text", title="Text Summarization with BART", description="Enter a long piece of text. The model will generate a summary.", ) # Load the model # model = fasterrcnn_resnet50_fpn(pretrained=True) # model.eval() # Define the interface function # def detect_objects(input_image: Image.Image): # Convert the image to a tensor # input_tensor = F.to_tensor(input_image) # Add an extra dimension at the beginning of the tensor # input_tensor = input_tensor.unsqueeze(0) # Get the model's output # output = model(input_tensor) # Get the bounding boxes # boxes = output[0]["boxes"] # Draw the bounding boxes on the image # for box in boxes: # input_image.draw.rectangle(list(box.detach().numpy()), outline="red") # return input_image # Create the Gradio interface # detr_resnet50_interface = gr.Interface( # fn=detect_objects, # inputs="image", # outputs="image", # title="Object Detection with DETR", # description="Upload an image. The model will detect objects in the image.", #) # Load the model and tokenizer model = GPT2LMHeadModel.from_pretrained("gpt2") tokenizer = GPT2Tokenizer.from_pretrained("gpt2") # Define the interface function def generate_text(prompt: str): # Encode the prompt inputs = tokenizer.encode(prompt, return_tensors="pt") # Generate a sequence of tokens outputs = model.generate(inputs, max_length=150, temperature=0.7, num_return_sequences=1) # Decode the tokens into a string text = tokenizer.decode(outputs[0], skip_special_tokens=True) return text # Create the Gradio interface gpt2_interface = gr.Interface( fn=generate_text, inputs="text", outputs="text", title="Text Generation with GPT-2", description="Enter a prompt and the model will generate a continuation of the text.", ) # Load the model and tokenizer model = T5ForConditionalGeneration.from_pretrained("vennify/t5-base-grammar-correction") tokenizer = T5Tokenizer.from_pretrained("vennify/t5-base-grammar-correction") # Define the interface function def correct_grammar(input_text: str): # Encode the text inputs = tokenizer.encode("correct: " + input_text, return_tensors="pt") # Generate a sequence of tokens outputs = model.generate(inputs, max_length=512, num_beams=4, early_stopping=True) # Decode the tokens into a string corrected_text = tokenizer.decode(outputs[0], skip_special_tokens=True) return corrected_text correct_grammar = gr.Interface( fn=correct_grammar, inputs="text", outputs="text" ) # Define the interface function def multi_model_interface(input_text: str): # Roberta roberta_inputs = roberta_tokenizer(input_text, return_tensors="pt") roberta_outputs = roberta_model(**roberta_inputs) _, roberta_preds = torch.max(roberta_outputs.logits, dim=1) roberta_result = f"Class: {roberta_preds.item()}" # Bart bart_inputs = bart_tokenizer([input_text], max_length=1024, return_tensors='pt') bart_summary_ids = bart_model.generate(bart_inputs['input_ids'], num_beams=4, max_length=150, early_stopping=True) bart_summary = [bart_tokenizer.decode(g, skip_special_tokens=True, clean_up_tokenization_spaces=False) for g in bart_summary_ids] # GPT-2 gpt2_inputs = gpt2_tokenizer.encode(input_text, return_tensors="pt") gpt2_outputs = gpt2_model.generate(gpt2_inputs, max_length=150, temperature=0.7, num_return_sequences=1) gpt2_text = gpt2_tokenizer.decode(gpt2_outputs[0], skip_special_tokens=True) # T5 t5_inputs = t5_tokenizer.encode("correct: " + input_text, return_tensors="pt") t5_outputs = t5_model.generate(t5_inputs, max_length=512, num_beams=4, early_stopping=True) t5_corrected_text = t5_tokenizer.decode(t5_outputs[0], skip_special_tokens=True) return {"Roberta Classification": roberta_result, "Bart Summary": bart_summary[0], "GPT-2 Generation": gpt2_text, "T5 Correction": t5_corrected_text} # Create the Gradio interface iface = gr.Interface( fn=multi_model_interface, inputs="text", outputs="text", title="Multi-Model Interface", description="Enter a text and the interface will display the output from each of the four models.", ) # Combine interfaces into a tabbed interface demo = gr.TabbedInterface( [roberta_interface, patch16_interface, bart_large_cnn_interface, gpt2_interface, correct_grammar], ["Single-Model: Question Answering", "Single-Model: Image Classification", "Single-Model: Text Summarization", "Single-Model: Text Generation", "Single-Model: Correct Grammar", "Computer Vision: Object Detection"] ) # Launch the tabbed interface if __name__ == "__main__": demo.launch() # Launch the interface iface.launch()