import gradio as gr from transformers import pipeline from PIL import Image # Initialize the pipeline with the image captioning model caption_pipeline = pipeline("image-to-text", model="Salesforce/blip-image-captioning-large") # Initialize the pipeline for emotion classification emotion_pipeline = pipeline("image-classification", model="RickyIG/emotion_face_image_classification_v3") # Initialize the pipeline for object detection object_pipeline = pipeline("object-detection", model="facebook/detr-resnet-50") def generate_caption_emotion_and_objects(image): # Process the image for captioning caption_result = caption_pipeline(image) caption = caption_result[0]["generated_text"] # Process the image for emotion classification emotion_result = emotion_pipeline(image) emotions = ", ".join([f"{res['label']}: {res['score']:.2f}" for res in emotion_result]) # Process the image for object detection object_result = object_pipeline(image) objects = ", ".join([f"{obj['label']}: {obj['score']:.2f}" for obj in object_result]) # Combine results combined_result = f"Caption: {caption}\nEmotions: {emotions}\nObjects: {objects}" return combined_result # Setup the Gradio interface interface = gr.Interface(fn=generate_caption_emotion_and_objects, inputs=gr.components.Image(type="pil", label="Upload an Image"), outputs=gr.components.Textbox(label="Generated Caption, Emotions, and Objects Detected")) interface.launch()