Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| import spaces, torch | |
| from transformers import AutoModelForCausalLM, AutoTokenizer | |
| from PIL import Image | |
| from typing import Literal | |
| def load_model(): | |
| return AutoModelForCausalLM.from_pretrained( | |
| "vikhyatk/moondream2", | |
| revision="2025-04-14", | |
| trust_remote_code=True, | |
| device_map={"": "cuda"}, | |
| ) | |
| def detect( | |
| im: Image.Image, object_name: str, mode: Literal["point", "object_detection"] | |
| ): | |
| """ | |
| Open Vocabulary Detection using moondream2 | |
| Args: | |
| im: Pillow Image | |
| object_name: the object you would like to detect | |
| mode: point or object_detection | |
| Returns: | |
| list: a list of bounding boxes (xyxy) or points (xy) coordinates that are normalized | |
| """ | |
| model = load_model() | |
| if mode == "point": | |
| return model.point(im, object_name)["points"] | |
| elif mode == "object_detection": | |
| return model.detect(im, object_name)["objects"] | |
| demo = gr.Interface( | |
| fn=detect, | |
| inputs=[ | |
| gr.Image(label="Input Image", type="pil"), | |
| gr.Textbox(label="Object to Detect"), | |
| gr.Dropdown(label="Mode", choices=["point", "object_detection"]), | |
| ], | |
| outputs=gr.JSON(label="Output JSON"), | |
| ) | |
| demo.launch( | |
| mcp_server=True, app_kwargs={"docs_url": "/docs"} # add FastAPI Swagger API Docs | |
| ) | |