Spaces:
Build error
Build error
File size: 1,636 Bytes
77ef807 1309060 77ef807 0ab4270 77ef807 1a15f55 77ef807 c6c3ebd cdac2b5 77ef807 d4bb996 77ef807 cdac2b5 77ef807 cdac2b5 77ef807 cdac2b5 77ef807 1a15f55 77ef807 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 |
import requests
import asyncio
from PIL import Image
from transformers import AutoProcessor, AutoModelForVision2Seq
model = AutoModelForVision2Seq.from_pretrained("microsoft/kosmos-2-patch14-224")
processor = AutoProcessor.from_pretrained("microsoft/kosmos-2-patch14-224")
# The original Kosmos-2 demo saves the image first then reload it. For some images, this will give slightly different image input and change the generation outputs.
#prompt = "{question}"
def describe_image(image_path, question : str):
inputs = processor(text=question, images=image_path, return_tensors="pt")
generated_ids = await model.generate(
pixel_values=inputs["pixel_values"],
input_ids=inputs["input_ids"],
attention_mask=inputs["attention_mask"],
image_embeds=None,
image_embeds_position_mask=inputs["image_embeds_position_mask"],
use_cache=True,
max_new_tokens=128,
)
generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
# Specify `cleanup_and_extract=False` in order to see the raw model generation.
processed_text = processor.post_process_generation(generated_text, cleanup_and_extract=False)
processed_text, entities = processor.post_process_generation(generated_text)
return processed_text
import gradio as gr
gr_app = gr.Interface(fn=describe_image, inputs=[gr.Image(label="Upload an image for description", type='pil'), gr.Textbox(label="Ask a question about the image")],
outputs=[gr.Textbox(label="Image description")], title="App for image description")
if __name__ == "__main__":
gr_app.launch(show_error = True) |