|
import gradio as gr |
|
from transformers import pipeline |
|
from PIL import Image |
|
|
|
|
|
caption_pipeline = pipeline("image-to-text", model="Salesforce/blip-image-captioning-large") |
|
|
|
|
|
emotion_pipeline = pipeline("image-classification", model="RickyIG/emotion_face_image_classification_v3") |
|
|
|
|
|
object_pipeline = pipeline("object-detection", model="facebook/detr-resnet-50") |
|
|
|
def generate_caption_emotion_and_objects(image): |
|
|
|
caption_result = caption_pipeline(image) |
|
caption = caption_result[0]["generated_text"] |
|
|
|
|
|
emotion_result = emotion_pipeline(image) |
|
emotions = ", ".join([f"{res['label']}: {res['score']:.2f}" for res in emotion_result]) |
|
|
|
|
|
object_result = object_pipeline(image) |
|
objects = ", ".join([f"{obj['label']}: {obj['score']:.2f}" for obj in object_result]) |
|
|
|
|
|
combined_result = f"Caption: {caption}\nEmotions: {emotions}\nObjects: {objects}" |
|
return combined_result |
|
|
|
|
|
interface = gr.Interface(fn=generate_caption_emotion_and_objects, |
|
inputs=gr.components.Image(type="pil", label="Upload an Image"), |
|
outputs=gr.components.Textbox(label="Generated Caption, Emotions, and Objects Detected")) |
|
interface.launch() |
|
|