drxlouis's picture
Update app.py
e3ad362 verified
import gradio as gr
from transformers import pipeline
from PIL import Image
import torch
from torchvision.transforms import Compose, Resize, CenterCrop, ToTensor, Normalize
from transformers import CLIPProcessor, CLIPModel, BartTokenizer, BartForConditionalGeneration, GPT2LMHeadModel, GPT2Tokenizer, T5ForConditionalGeneration, T5Tokenizer
from torchvision.models.detection import fasterrcnn_resnet50_fpn
from torchvision.transforms import functional as F
# Load the RoBERTa model
roberta_model = pipeline("question-answering", model="deepset/roberta-base-squad2")
# Define the interface function for RoBERTa
def answer_question_roberta(context, question):
result = roberta_model(question=question, context=context)
return result["answer"]
# Create the Gradio interface for RoBERTa
roberta_interface = gr.Interface(
fn=answer_question_roberta,
inputs=["text", "text"],
outputs="text",
title="Question Answering with RoBERTa",
description="Ask a question about the given context.",
)
# Placeholder interfaces for other models
placeholder_interface1 = gr.Interface(
fn=lambda x: x, # Placeholder function
inputs="text",
outputs="text",
title="Model 1",
description="Placeholder for Model 1.",
)
# Load the model and processor
model = CLIPModel.from_pretrained("openai/clip-vit-base-patch16")
processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch16")
# Define the interface function
def classify_image(image: Image.Image, labels: str):
# Prepare the image
transform = Compose([
Resize([224, 224]),
CenterCrop(224),
ToTensor(),
Normalize((0.48145466, 0.4578275, 0.40821073), (0.26862954, 0.26130258, 0.27577711)),
])
image = transform(image)
# Prepare the labels
labels = labels.split(',')
# Encode the image and labels
inputs = processor(text=labels, images=image, return_tensors="pt", padding=True)
# Get the model's output
outputs = model(**inputs)
logits_per_image = outputs.logits_per_image
# Get the predicted label
predicted_label = labels[torch.argmax(logits_per_image).item()]
return predicted_label
patch16_interface = gr.Interface(
fn=classify_image, # The function for image classification
inputs=["image", "text"], # Input components
outputs="text", # Output component
title="Image Classification with CLIP", # Title of the interface
description="Upload an image and enter a list of labels (comma-separated). The model will predict the label that best matches the image.", # Description of the interface
)
# Repeat for other placeholder interfaces...
# Load the model and tokenizer
model = BartForConditionalGeneration.from_pretrained("facebook/bart-large-cnn")
tokenizer = BartTokenizer.from_pretrained("facebook/bart-large-cnn")
# Define the interface function
def summarize_text(input_text: str):
# Encode the text
inputs = tokenizer([input_text], max_length=1024, return_tensors='pt')
# Get the model's output
summary_ids = model.generate(inputs['input_ids'], num_beams=4, max_length=150, early_stopping=True)
# Decode the output
summary = [tokenizer.decode(g, skip_special_tokens=True, clean_up_tokenization_spaces=False) for g in summary_ids]
return summary[0]
# Create the Gradio interface
bart_large_cnn_interface = gr.Interface(
fn=summarize_text,
inputs="text",
outputs="text",
title="Text Summarization with BART",
description="Enter a long piece of text. The model will generate a summary.",
)
# Load the model
# model = fasterrcnn_resnet50_fpn(pretrained=True)
# model.eval()
# Define the interface function
# def detect_objects(input_image: Image.Image):
# Convert the image to a tensor
# input_tensor = F.to_tensor(input_image)
# Add an extra dimension at the beginning of the tensor
# input_tensor = input_tensor.unsqueeze(0)
# Get the model's output
# output = model(input_tensor)
# Get the bounding boxes
# boxes = output[0]["boxes"]
# Draw the bounding boxes on the image
# for box in boxes:
# input_image.draw.rectangle(list(box.detach().numpy()), outline="red")
# return input_image
# Create the Gradio interface
# detr_resnet50_interface = gr.Interface(
# fn=detect_objects,
# inputs="image",
# outputs="image",
# title="Object Detection with DETR",
# description="Upload an image. The model will detect objects in the image.",
#)
# Load the model and tokenizer
model = GPT2LMHeadModel.from_pretrained("gpt2")
tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
# Define the interface function
def generate_text(prompt: str):
# Encode the prompt
inputs = tokenizer.encode(prompt, return_tensors="pt")
# Generate a sequence of tokens
outputs = model.generate(inputs, max_length=150, temperature=0.7, num_return_sequences=1)
# Decode the tokens into a string
text = tokenizer.decode(outputs[0], skip_special_tokens=True)
return text
# Create the Gradio interface
gpt2_interface = gr.Interface(
fn=generate_text,
inputs="text",
outputs="text",
title="Text Generation with GPT-2",
description="Enter a prompt and the model will generate a continuation of the text.",
)
# Load the model and tokenizer
model = T5ForConditionalGeneration.from_pretrained("vennify/t5-base-grammar-correction")
tokenizer = T5Tokenizer.from_pretrained("vennify/t5-base-grammar-correction")
# Define the interface function
def correct_grammar(input_text: str):
# Encode the text
inputs = tokenizer.encode("correct: " + input_text, return_tensors="pt")
# Generate a sequence of tokens
outputs = model.generate(inputs, max_length=512, num_beams=4, early_stopping=True)
# Decode the tokens into a string
corrected_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
return corrected_text
correct_grammar = gr.Interface(
fn=correct_grammar,
inputs="text",
outputs="text"
)
# Define the interface function
def multi_model_interface(input_text: str):
# Roberta
roberta_inputs = roberta_tokenizer(input_text, return_tensors="pt")
roberta_outputs = roberta_model(**roberta_inputs)
_, roberta_preds = torch.max(roberta_outputs.logits, dim=1)
roberta_result = f"Class: {roberta_preds.item()}"
# Bart
bart_inputs = bart_tokenizer([input_text], max_length=1024, return_tensors='pt')
bart_summary_ids = bart_model.generate(bart_inputs['input_ids'], num_beams=4, max_length=150, early_stopping=True)
bart_summary = [bart_tokenizer.decode(g, skip_special_tokens=True, clean_up_tokenization_spaces=False) for g in bart_summary_ids]
# GPT-2
gpt2_inputs = gpt2_tokenizer.encode(input_text, return_tensors="pt")
gpt2_outputs = gpt2_model.generate(gpt2_inputs, max_length=150, temperature=0.7, num_return_sequences=1)
gpt2_text = gpt2_tokenizer.decode(gpt2_outputs[0], skip_special_tokens=True)
# T5
t5_inputs = t5_tokenizer.encode("correct: " + input_text, return_tensors="pt")
t5_outputs = t5_model.generate(t5_inputs, max_length=512, num_beams=4, early_stopping=True)
t5_corrected_text = t5_tokenizer.decode(t5_outputs[0], skip_special_tokens=True)
return {"Roberta Classification": roberta_result, "Bart Summary": bart_summary[0], "GPT-2 Generation": gpt2_text, "T5 Correction": t5_corrected_text}
# Create the Gradio interface
iface = gr.Interface(
fn=multi_model_interface,
inputs="text",
outputs="text",
title="Multi-Model Interface",
description="Enter a text and the interface will display the output from each of the four models.",
)
# Combine interfaces into a tabbed interface
demo = gr.TabbedInterface(
[roberta_interface, patch16_interface, bart_large_cnn_interface, gpt2_interface, correct_grammar],
["Single-Model: Question Answering", "Single-Model: Image Classification", "Single-Model: Text Summarization", "Single-Model: Text Generation", "Single-Model: Correct Grammar", "Computer Vision: Object Detection"]
)
# Launch the tabbed interface
if __name__ == "__main__":
demo.launch()
# Launch the interface
iface.launch()