Spaces:
Configuration error
Configuration error
| import numpy as np | |
| import cv2 | |
| from PIL import Image | |
| import scripts.r_masking.core as core | |
| from reactor_utils import tensor_to_pil | |
| try: | |
| from ultralytics import YOLO | |
| except Exception as e: | |
| print(e) | |
| def load_yolo(model_path: str): | |
| try: | |
| return YOLO(model_path) | |
| except ModuleNotFoundError: | |
| # https://github.com/ultralytics/ultralytics/issues/3856 | |
| YOLO("yolov8n.pt") | |
| return YOLO(model_path) | |
| def inference_bbox( | |
| model, | |
| image: Image.Image, | |
| confidence: float = 0.3, | |
| device: str = "", | |
| ): | |
| pred = model(image, conf=confidence, device=device) | |
| bboxes = pred[0].boxes.xyxy.cpu().numpy() | |
| cv2_image = np.array(image) | |
| if len(cv2_image.shape) == 3: | |
| cv2_image = cv2_image[:, :, ::-1].copy() # Convert RGB to BGR for cv2 processing | |
| else: | |
| # Handle the grayscale image here | |
| # For example, you might want to convert it to a 3-channel grayscale image for consistency: | |
| cv2_image = cv2.cvtColor(cv2_image, cv2.COLOR_GRAY2BGR) | |
| cv2_gray = cv2.cvtColor(cv2_image, cv2.COLOR_BGR2GRAY) | |
| segms = [] | |
| for x0, y0, x1, y1 in bboxes: | |
| cv2_mask = np.zeros(cv2_gray.shape, np.uint8) | |
| cv2.rectangle(cv2_mask, (int(x0), int(y0)), (int(x1), int(y1)), 255, -1) | |
| cv2_mask_bool = cv2_mask.astype(bool) | |
| segms.append(cv2_mask_bool) | |
| n, m = bboxes.shape | |
| if n == 0: | |
| return [[], [], [], []] | |
| results = [[], [], [], []] | |
| for i in range(len(bboxes)): | |
| results[0].append(pred[0].names[int(pred[0].boxes[i].cls.item())]) | |
| results[1].append(bboxes[i]) | |
| results[2].append(segms[i]) | |
| results[3].append(pred[0].boxes[i].conf.cpu().numpy()) | |
| return results | |
| class UltraBBoxDetector: | |
| bbox_model = None | |
| def __init__(self, bbox_model): | |
| self.bbox_model = bbox_model | |
| def detect(self, image, threshold, dilation, crop_factor, drop_size=1, detailer_hook=None): | |
| drop_size = max(drop_size, 1) | |
| detected_results = inference_bbox(self.bbox_model, tensor_to_pil(image), threshold) | |
| segmasks = core.create_segmasks(detected_results) | |
| if dilation > 0: | |
| segmasks = core.dilate_masks(segmasks, dilation) | |
| items = [] | |
| h = image.shape[1] | |
| w = image.shape[2] | |
| for x, label in zip(segmasks, detected_results[0]): | |
| item_bbox = x[0] | |
| item_mask = x[1] | |
| y1, x1, y2, x2 = item_bbox | |
| if x2 - x1 > drop_size and y2 - y1 > drop_size: # minimum dimension must be (2,2) to avoid squeeze issue | |
| crop_region = core.make_crop_region(w, h, item_bbox, crop_factor) | |
| if detailer_hook is not None: | |
| crop_region = detailer_hook.post_crop_region(w, h, item_bbox, crop_region) | |
| cropped_image = core.crop_image(image, crop_region) | |
| cropped_mask = core.crop_ndarray2(item_mask, crop_region) | |
| confidence = x[2] | |
| # bbox_size = (item_bbox[2]-item_bbox[0],item_bbox[3]-item_bbox[1]) # (w,h) | |
| item = core.SEG(cropped_image, cropped_mask, confidence, crop_region, item_bbox, label, None) | |
| items.append(item) | |
| shape = image.shape[1], image.shape[2] | |
| segs = shape, items | |
| if detailer_hook is not None and hasattr(detailer_hook, "post_detection"): | |
| segs = detailer_hook.post_detection(segs) | |
| return segs | |
| def detect_combined(self, image, threshold, dilation): | |
| detected_results = inference_bbox(self.bbox_model, core.tensor2pil(image), threshold) | |
| segmasks = core.create_segmasks(detected_results) | |
| if dilation > 0: | |
| segmasks = core.dilate_masks(segmasks, dilation) | |
| return core.combine_masks(segmasks) | |
| def setAux(self, x): | |
| pass | |