File size: 942 Bytes
b69f781
4e130e4
e45e6f2
 
 
a74e498
e45e6f2
 
b69f781
a74e498
 
 
 
e45e6f2
a74e498
e45e6f2
 
a74e498
4e130e4
a74e498
b69f781
a74e498
254e3a9
a74e498
b69f781
a74e498
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
import gradio as gr
import requests
from PIL import Image
from transformers import BlipProcessor, BlipForConditionalGeneration

# Load BLIP model
processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-large")
model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-large")

# Define function for generating captions
def generate_caption(image):
    # Convert image to PIL format
    raw_image = Image.open(image).convert('RGB')
    
    # Preprocess the image and generate caption
    inputs = processor(raw_image, return_tensors="pt")
    out = model.generate(**inputs)
    caption = processor.decode(out[0], skip_special_tokens=True)
    
    return caption

# Create Gradio interface
image_input = gr.inputs.Image(type='pil')  # Specify input type as PIL image
caption_output = gr.outputs.Textbox()

gr.Interface(fn=generate_caption, inputs=image_input, outputs=caption_output).launch()