import gradio as gr
from transformers import BlipProcessor, BlipForConditionalGeneration

# Load the model and tokenizer
model_name = "Salesforce/blip-image-captioning-large"
processor = BlipProcessor.from_pretrained(model_name)
model = BlipForConditionalGeneration.from_pretrained(model_name)

def generate_caption(image):
    # Preprocess the image
    inputs = processor(images=image, return_tensors="pt")

    # Generate caption using the model
    caption = model.generate(**inputs)

    # Decode the output caption
    decoded_caption = processor.decode(caption[0], skip_special_tokens=True)
    return decoded_caption

# Define the Gradio interface
inputs = gr.inputs.Image(label="Upload an image")
outputs = gr.outputs.Textbox(label="Generated Caption")

# Create the Gradio app 
gr.Interface(fn=generate_caption, inputs=inputs, outputs=outputs).launch()