import gradio as gr from transformers import AutoModelForCausalLM, AutoTokenizer from PIL import Image model_id = "vikhyatk/moondream2" revision = "2024-03-06" model = AutoModelForCausalLM.from_pretrained( model_id, trust_remote_code=True, revision=revision ) tokenizer = AutoTokenizer.from_pretrained(model_id, revision=revision) def describe_image(image): image = Image.fromarray(image) enc_image = model.encode_image(image) description = model.answer_question(enc_image, "Describe this image.", tokenizer) return description input_image = gr.inputs.Image() output_text = gr.outputs.Textbox() gr.Interface( fn=describe_image, inputs=input_image, outputs=output_text, title="Image Description", description="Enter an image and get its description.", ).launch()