image_to_text / app.py
Greatmonkey's picture
Create app.py
ea026b7 verified
import gradio as gr
from transformers import AutoModelForCausalLM, AutoTokenizer
from PIL import Image
model_id = "vikhyatk/moondream2"
revision = "2024-03-06"
model = AutoModelForCausalLM.from_pretrained(
model_id, trust_remote_code=True, revision=revision
)
tokenizer = AutoTokenizer.from_pretrained(model_id, revision=revision)
def describe_image(image):
image = Image.fromarray(image)
enc_image = model.encode_image(image)
description = model.answer_question(enc_image, "Describe this image.", tokenizer)
return description
input_image = gr.inputs.Image()
output_text = gr.outputs.Textbox()
gr.Interface(
fn=describe_image,
inputs=input_image,
outputs=output_text,
title="Image Description",
description="Enter an image and get its description.",
).launch()