import gradio as gr from transformers import pipeline # Define the task and model for the pipeline task = 'image-to-text' model = 'Salesforce/blip-image-captioning-base' caption_pipeline = pipeline(task=task, model=model, max_new_tokens=100) def get_caption(image): # Directly pass the PIL image to the pipeline result = caption_pipeline(image) return result[0]['generated_text'] # Create a Gradio interface result = gr.Interface(fn=get_caption, inputs=[gr.Image(label="Upload Image", type="pil")], outputs="text", examples=['./images/cow.jpeg', './images/christmas_dog.jpeg'] ) # Launch the interface result.launch(inline=False)