import gradio as gr from transformers import BlipProcessor, BlipForConditionalGeneration # Load the model and tokenizer model_name = "Salesforce/blip-image-captioning-large" processor = BlipProcessor.from_pretrained(model_name) model = BlipForConditionalGeneration.from_pretrained(model_name) def generate_caption(image): # Preprocess the image inputs = processor(images=image, return_tensors="pt") # Generate caption using the model caption = model.generate(**inputs) # Decode the output caption decoded_caption = processor.decode(caption[0], skip_special_tokens=True) return decoded_caption # Define the Gradio interface inputs = gr.inputs.Image(label="Upload an image") outputs = gr.outputs.Textbox(label="Generated Caption") # Create the Gradio app gr.Interface(fn=generate_caption, inputs=inputs, outputs=outputs).launch()