caption_it / app.py
ubermenchh's picture
Create app.py
48ed931
raw
history blame
941 Bytes
from transformers import Blip2VisionConfig, Blip2QFormerConfig, OPTConfig, Blip2Config, Blip2ForConditionalGeneration, Blip2VisionModel, Blip2Processor
from PIL import Image
import requests
import torch
import gradio as gr
config = Blip2Config()
model = Blip2ForConditionalGeneration(config)
config = model.config
vis_config = Blip2VisionConfig()
model = Blip2VisionModel(vis_config)
config_2 = model.config
processor = Blip2Processor.from_pretrained('Salesforce/blip2-opt-2.7b')
model = Blip2ForConditionalGeneration.from_pretrained('Salesforce/blip2-opt-2.7b')
def captioning(image):
inputs = processor(images=image, return_tensors='pt')
generated_ids = model.generate(**inputs)
generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0].strip()
return image, generated_text
demo = gr.Interface(
captioning,
inputs=gr.Image(type="pil"),
outputs = ['image', 'text']
)
demo.launch()