File size: 863 Bytes
cdabe68
bac3301
681036b
bac3301
 
 
 
681036b
bac3301
 
 
cdabe68
bac3301
 
 
 
 
 
cdabe68
bac3301
 
 
dddb6a3
304d74e
bac3301
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
import gradio as gr
from transformers import BlipProcessor, BlipForConditionalGeneration

# Load the model and tokenizer
model_name = "Salesforce/blip-image-captioning-large"
processor = BlipProcessor.from_pretrained(model_name)
model = BlipForConditionalGeneration.from_pretrained(model_name)

def generate_caption(image):
    # Preprocess the image
    inputs = processor(images=image, return_tensors="pt")

    # Generate caption using the model
    caption = model.generate(**inputs)

    # Decode the output caption
    decoded_caption = processor.decode(caption[0], skip_special_tokens=True)
    return decoded_caption

# Define the Gradio interface
inputs = gr.inputs.Image(label="Upload an image")
outputs = gr.outputs.Textbox(label="Generated Caption")

# Create the Gradio app 
gr.Interface(fn=generate_caption, inputs=inputs, outputs=outputs).launch()