John Ho
change output to JSON foramt
dc42813
import gradio as gr
import spaces, torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from PIL import Image
from typing import Literal
@spaces.GPU
def load_model():
return AutoModelForCausalLM.from_pretrained(
"vikhyatk/moondream2",
revision="2025-04-14",
trust_remote_code=True,
device_map={"": "cuda"},
)
@spaces.GPU
def detect(
im: Image.Image, object_name: str, mode: Literal["point", "object_detection"]
):
"""
Open Vocabulary Detection using moondream2
Args:
im: Pillow Image
object_name: the object you would like to detect
mode: point or object_detection
Returns:
list: a list of bounding boxes (xyxy) or points (xy) coordinates that are normalized
"""
model = load_model()
if mode == "point":
return model.point(im, object_name)["points"]
elif mode == "object_detection":
return model.detect(im, object_name)["objects"]
demo = gr.Interface(
fn=detect,
inputs=[
gr.Image(label="Input Image", type="pil"),
gr.Textbox(label="Object to Detect"),
gr.Dropdown(label="Mode", choices=["point", "object_detection"]),
],
outputs=gr.JSON(label="Output JSON"),
)
demo.launch(
mcp_server=True, app_kwargs={"docs_url": "/docs"} # add FastAPI Swagger API Docs
)