IMGCaption / app.py
jaimin's picture
Update app.py
2b9a369
raw
history blame
No virus
2.05 kB
from PIL import Image
import requests
import gradio as gr
from transformers import AutoProcessor, AutoTokenizer, AutoImageProcessor, AutoModelForCausalLM, BlipForConditionalGeneration, VisionEncoderDecoderModel
import torch
from label import predict,recursion_change_bn,load_labels,hook_feature,returnCAM,returnTF,load_model
git_processor = AutoProcessor.from_pretrained("microsoft/git-large-r-textcaps")
git_model = AutoModelForCausalLM.from_pretrained("microsoft/git-large-r-textcaps")
blip_processor = AutoProcessor.from_pretrained("jaimin/Imagecap")
blip_model = BlipForConditionalGeneration.from_pretrained("jaimin/Imagecap")
device = "cuda" if torch.cuda.is_available() else "cpu"
git_model.to(device)
blip_model.to(device)
def generate_caption(processor, model, image, use_float_16=False):
inputs = processor(images=image, return_tensors="pt").to(device)
if use_float_16:
inputs = inputs.to(torch.float16)
generated_ids = model.generate(pixel_values=inputs.pixel_values, max_length=50)
generated_caption = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
return generated_caption
def generate_captions(image):
#img = Image.open(image)
caption_git = generate_caption(git_processor, git_model, image)
caption_blip = generate_caption(blip_processor, blip_model, image)
env, scene = predict(image)
return env,scene,caption_git_large_textcaps, caption_blip_large
outputs = [gr.outputs.Textbox(label="Environment"), gr.outputs.Textbox(label="Objects detected"), gr.outputs.Textbox(label="Caption generated by GIT"), gr.outputs.Textbox(label="Caption generated by BLIP")]
title = "Image Cap with Scene"
description = " Image caption with scene"
interface = gr.Interface(fn=generate_captions,
inputs=gr.inputs.Image(type="pil"),
outputs=outputs,
title=title,
description=description,
enable_queue=True)
interface.launch(debug=True)