Spaces:

GF-John
/

moondream-pointer

Sleeping

John Ho

change output to JSON foramt

dc42813 7 months ago

1.35 kB

	import gradio as gr
	import spaces, torch
	from transformers import AutoModelForCausalLM, AutoTokenizer
	from PIL import Image
	from typing import Literal


	@spaces.GPU
	def load_model():
	return AutoModelForCausalLM.from_pretrained(
	"vikhyatk/moondream2",
	revision="2025-04-14",
	trust_remote_code=True,
	device_map={"": "cuda"},
	)


	@spaces.GPU
	def detect(
	im: Image.Image, object_name: str, mode: Literal["point", "object_detection"]
	):
	"""
	Open Vocabulary Detection using moondream2

	Args:
	im: Pillow Image
	object_name: the object you would like to detect
	mode: point or object_detection
	Returns:
	list: a list of bounding boxes (xyxy) or points (xy) coordinates that are normalized
	"""
	model = load_model()
	if mode == "point":
	return model.point(im, object_name)["points"]
	elif mode == "object_detection":
	return model.detect(im, object_name)["objects"]


	demo = gr.Interface(
	fn=detect,
	inputs=[
	gr.Image(label="Input Image", type="pil"),
	gr.Textbox(label="Object to Detect"),
	gr.Dropdown(label="Mode", choices=["point", "object_detection"]),
	],
	outputs=gr.JSON(label="Output JSON"),
	)
	demo.launch(
	mcp_server=True, app_kwargs={"docs_url": "/docs"} # add FastAPI Swagger API Docs
	)