John Ho
change output to JSON foramt
dc42813
raw
history blame
1.35 kB
import gradio as gr
import spaces, torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from PIL import Image
from typing import Literal
@spaces.GPU
def load_model():
return AutoModelForCausalLM.from_pretrained(
"vikhyatk/moondream2",
revision="2025-04-14",
trust_remote_code=True,
device_map={"": "cuda"},
)
@spaces.GPU
def detect(
im: Image.Image, object_name: str, mode: Literal["point", "object_detection"]
):
"""
Open Vocabulary Detection using moondream2
Args:
im: Pillow Image
object_name: the object you would like to detect
mode: point or object_detection
Returns:
list: a list of bounding boxes (xyxy) or points (xy) coordinates that are normalized
"""
model = load_model()
if mode == "point":
return model.point(im, object_name)["points"]
elif mode == "object_detection":
return model.detect(im, object_name)["objects"]
demo = gr.Interface(
fn=detect,
inputs=[
gr.Image(label="Input Image", type="pil"),
gr.Textbox(label="Object to Detect"),
gr.Dropdown(label="Mode", choices=["point", "object_detection"]),
],
outputs=gr.JSON(label="Output JSON"),
)
demo.launch(
mcp_server=True, app_kwargs={"docs_url": "/docs"} # add FastAPI Swagger API Docs
)