Spaces:
Running
on
Zero
Running
on
Zero
import gradio as gr | |
import spaces, torch | |
from transformers import AutoModelForCausalLM, AutoTokenizer | |
from PIL import Image | |
from typing import Literal | |
def load_model(): | |
return AutoModelForCausalLM.from_pretrained( | |
"vikhyatk/moondream2", | |
revision="2025-04-14", | |
trust_remote_code=True, | |
device_map={"": "cuda"}, | |
) | |
def detect( | |
im: Image.Image, object_name: str, mode: Literal["point", "object_detection"] | |
): | |
""" | |
Open Vocabulary Detection using moondream2 | |
Args: | |
im: Pillow Image | |
object_name: the object you would like to detect | |
mode: point or object_detection | |
Returns: | |
list: a list of bounding boxes (xyxy) or points (xy) coordinates that are normalized | |
""" | |
model = load_model() | |
if mode == "point": | |
return model.point(im, object_name)["points"] | |
elif mode == "object_detection": | |
return model.detect(im, object_name)["objects"] | |
demo = gr.Interface( | |
fn=detect, | |
inputs=[ | |
gr.Image(label="Input Image", type="pil"), | |
gr.Textbox(label="Object to Detect"), | |
gr.Dropdown(label="Mode", choices=["point", "object_detection"]), | |
], | |
outputs=gr.JSON(label="Output JSON"), | |
) | |
demo.launch( | |
mcp_server=True, app_kwargs={"docs_url": "/docs"} # add FastAPI Swagger API Docs | |
) | |