Spaces:

hubsnippetai
/

medapp

Build error

File size: 1,636 Bytes

77ef807
1309060
77ef807
 
 
 
 
0ab4270
 
77ef807
 
 
1a15f55
77ef807
c6c3ebd
cdac2b5
77ef807
d4bb996
77ef807
 
 
 
 
 
 
 
cdac2b5
77ef807
 
cdac2b5
77ef807
cdac2b5
77ef807
 
 
 
 
1a15f55
77ef807

import requests
import asyncio

from PIL import Image
from transformers import AutoProcessor, AutoModelForVision2Seq


model = AutoModelForVision2Seq.from_pretrained("microsoft/kosmos-2-patch14-224")
processor = AutoProcessor.from_pretrained("microsoft/kosmos-2-patch14-224")

# The original Kosmos-2 demo saves the image first then reload it. For some images, this will give slightly different image input and change the generation outputs.

#prompt = "{question}"

def describe_image(image_path, question : str):
  inputs = processor(text=question, images=image_path, return_tensors="pt")

      generated_ids = await model.generate(
      pixel_values=inputs["pixel_values"],
      input_ids=inputs["input_ids"],
      attention_mask=inputs["attention_mask"],
      image_embeds=None,
      image_embeds_position_mask=inputs["image_embeds_position_mask"],
      use_cache=True,
      max_new_tokens=128,
  )
  generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]

  # Specify `cleanup_and_extract=False` in order to see the raw model generation.
  processed_text = processor.post_process_generation(generated_text, cleanup_and_extract=False)

  processed_text, entities = processor.post_process_generation(generated_text)

  return processed_text

import gradio as gr

gr_app = gr.Interface(fn=describe_image, inputs=[gr.Image(label="Upload an image for description", type='pil'), gr.Textbox(label="Ask a question about the image")],
                      outputs=[gr.Textbox(label="Image description")], title="App for image description")

if __name__ == "__main__":
  gr_app.launch(show_error = True)