medapp / app.py
hubsnippetai's picture
Update app.py
c6c3ebd verified
raw
history blame contribute delete
No virus
1.64 kB
import requests
import asyncio
from PIL import Image
from transformers import AutoProcessor, AutoModelForVision2Seq
model = AutoModelForVision2Seq.from_pretrained("microsoft/kosmos-2-patch14-224")
processor = AutoProcessor.from_pretrained("microsoft/kosmos-2-patch14-224")
# The original Kosmos-2 demo saves the image first then reload it. For some images, this will give slightly different image input and change the generation outputs.
#prompt = "{question}"
def describe_image(image_path, question : str):
inputs = processor(text=question, images=image_path, return_tensors="pt")
generated_ids = await model.generate(
pixel_values=inputs["pixel_values"],
input_ids=inputs["input_ids"],
attention_mask=inputs["attention_mask"],
image_embeds=None,
image_embeds_position_mask=inputs["image_embeds_position_mask"],
use_cache=True,
max_new_tokens=128,
)
generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
# Specify `cleanup_and_extract=False` in order to see the raw model generation.
processed_text = processor.post_process_generation(generated_text, cleanup_and_extract=False)
processed_text, entities = processor.post_process_generation(generated_text)
return processed_text
import gradio as gr
gr_app = gr.Interface(fn=describe_image, inputs=[gr.Image(label="Upload an image for description", type='pil'), gr.Textbox(label="Ask a question about the image")],
outputs=[gr.Textbox(label="Image description")], title="App for image description")
if __name__ == "__main__":
gr_app.launch(show_error = True)