Source code for detectools.formats.base

from __future__ import annotations

from abc import ABC, abstractmethod
from typing import Any, Dict, Generator, List, Literal, Sequence, Tuple, Union

import torch
from torch import Tensor
from torchvision.ops import nms, remove_small_boxes
from torchvision.transforms.v2 import ConvertBoundingBoxFormat
from torchvision.tv_tensors import BoundingBoxes
from detectools.formats.detect_mask import DetectMask


[docs] class BaseAnnotation(ABC): """Abstract class for Annotation data container. Attributes: ----------- Attributes: boxe (``BoundingBoxes``): Boxe coordinates in XYWH format. label (``Tensor``): Class label of object. spatial_size (``Tuple[int, int]``): Size of corresponding image (H, W) score (``Tensor``): Confidence score of the object (for prediction). Methods: ----------- """ boxe : BoundingBoxes label : Tensor spatial_size: Tuple[int, int] score: Tensor @abstractmethod def __init__(self): pass
[docs] @abstractmethod def object_to_coco( self, annotation_id: int = 1, image_id: int = 1 ) -> Dict[str, Any]: """Return Annotation data as COCO like dict. Args: annotation_id (``int``, **optional**): Id of the annotation. Defaults to 1. image_id (``int``, **optional**): Id of the corresponding image. Defaults to 1. Returns: ``Dict[str, Any]``: - COCO like dict with Annotation instance data. """ pass
[docs] class BaseFormat(ABC): """Abstract class for detection data container classes in detectools. Store target and predictions data. This data format support basics and advanced operations (padding, cropping, NMS, etc.). Attributes: ----------- Attributes: box_format (``Literal["XYWH", "XYXY", "CXCYWH"]``): Format of bounding boxes. spatial_size (``Tuple[int, int]``): Size of corresponding image (H, W) size (``int``): Number of objects in BaseFormat. data: (``Dict[str, Tensor]``): Data dict that contains objects informations in it's keys (labels, boxes, scores, masks). Methods: ----------- """ spatial_size: Tuple[ int, int ] # Store the H, W image size corresponding to objects boxes/masks stored in BaseFormat. data: Dict[ str, Tensor ] # Store all values (labels, boxes/masks at least) corresponding to objects in an image. size: int # Number of objects in image. box_format: Literal["XYWH", "XYXY", "CXCYWH"] # format for bounding boxes. ### Class methods that returns a BaseFormat
[docs] def from_coco( cls, coco_annotations: List[Dict[str, Any]], spatial_size: Tuple[int] ) -> BaseFormat: """Return BaseFormat from an image COCO data dictionnary. Args: coco_annotations (``List[Dict[str, Any]]``): Coco data dictionnary. spatial_size (``Tuple[int]``): Size (H, W) of the corresponding image. Returns: ``BaseFormat``: - BaseFormat instance. """ pass
[docs] @abstractmethod def empty(spatial_size: Tuple[int]) -> BaseFormat: """Return an empty instance of BaseFormat (DetectionFormat or SegmentationFormat depending on the Task mode). Args: spatial_size (``Tuple[int]``): Size (H, W) of the corresponding image. Returns: ``BaseFormat``: - BaseFormat instance. """ pass
[docs] def clone(self) -> BaseFormat: """Return a clone of BaseFormat. Returns: ``BaseFormat``: - Cloned BaseFormat. """ clone = type(self).empty(spatial_size=self.spatial_size) clone.size = self.size for key, value in self.data.items(): if isinstance(value, DetectMask): clone.data[key] = DetectMask(value._mask.clone().detach()) else: clone.data[key] = value.clone().detach() return clone
### Magic methods def __getitem__(self, indexes: Union[int, Sequence[int]]) -> BaseFormat: """ Return a subset BaseFormat by keeping only elements of data dict values (tensors) at positions of indexes. Args: indexes (``Union[int, Sequence[int]]``): Indexes to slice objects data. Returns: ``BaseFormat``: - BaseFormat with n objects for n indexes in indexes. """ sliced = self.clone() # slice each elements of data dict for key, value in self.data.items(): # general case if not isinstance(value, BoundingBoxes): sliced.data[key] = value[indexes] # set size and box_format of sliced format sliced.size = sliced.get("labels").nelement() sliced.set_boxes_format(self.box_format) return sliced def __contains__(self, key: str) -> bool: """Return True if key in self.data dict, else False. Args: key (``str``): Data dictionnary key Returns: ``bool``: - True if key in self.data else False. """ return key in self.data.keys() def __iter__(self) -> Generator[BaseAnnotation]: """Iterate through BaseFormat and yield at each index i a BaseAnnotation object that contains all informations for object at position i. Yields: ``BaseAnnotation``: BaseAnnotation at position x. """ for object_id in range(self.size): yield self.get_object(object_id) # Acessibility methods: get or set objects into BaseFormat
[docs] def get_device(self) -> torch.device: """Verify that all tensors of data dict are on same device and return device. Returns: ``torch.device``: - Device that hold tensor values of data dict. """ devices = [value.device for value in self.data.values()] devices_set = set(devices) assert ( len(devices_set) == 1 ), f"All tensors on data dict should be on the same device, got {len(devices_set)} devices : {devices_set}." return list(devices_set)[0]
[docs] def set_device(self, device: Union[torch.device, Literal["cuda", "cpu"]]): """Send all torch values of data dict on device. Args: device (``Union[torch.device, Literal['cuda', 'cpu']]``): Device to send tensors on. """ for key, value in self.data.items(): self.data[key] = value.to(device)
[docs] def get(self, *keys: str) -> Union[Tensor, Tuple[Tensor]]: """Return tensor data values from data dict for each key in keys. Args: keys (``str``): Key(s) of data dict. Returns: ``Union[Tensor, Tuple[Tensor]]``: - Corresponding key's values to gather from data dict. .. highlight:: python .. code-block:: python >>> format: BaseFormat >>> labels, boxes = format.get("labels", "boxes") """ outputs = [] for key in keys: assert ( key in self ), f"{key} should be in self.data, got only {list(self.data.keys())}." outputs.append(self.data[key]) if len(outputs) > 1: output = tuple(outputs) else: output = outputs[0] # single element return output
[docs] def set(self, key: str, value: Tensor): """Set a new pair of key/value. Value should be of shape (N, ...) with N == self.size. Args: key (``str``): Key of value to set. value (``Tensor``): Data as tensor. """ # get shape of new value and assert it's equal to self.size data_size = value.size()[0] if value.nelement() else 0 assert ( data_size == self.size ), f"New value size should be equal to self.size, got {data_size} and {self.size}." # assign value to key with correct device device = self.get_device() value = value.to(device) self.data[key] = value
[docs] @abstractmethod def get_object(self, indice: int) -> BaseAnnotation: """Return a BaseAnnotation object at position indice. Args: indice (``int``): Position of object to gather. Returns: ``BaseAnnotation``: - Annotation instance. """ pass
# Methods that changes internal states of Formats
[docs] @abstractmethod def crop(self, top: int, left: int, height: int, width: int): """Crop boxes and mask from top corner pixel and update spatial size. Args: top (``int``): Position to crop from top border. left (``int``): Position to crop from left border. height (``int``): height of the crop. width (``int``): Width of the crop. """ pass
[docs] @abstractmethod def pad(self, left: int, top: int, right: int, bottom: int): """Pad boxes and mask and update spatial size. Args: left (``int``): Pad value on left border. top (``int``): Pad value on top border. right (``int``): Pad value on right border. bottom (``int``): Pad value on bottom border. """ pass
[docs] def set_boxes_format(self, box_format: Literal["XYWH", "XYXY", "CXCYWH"]): """Change boxes format. Args: box_format (``Literal['XYWH', 'XYXY', 'CXCYWH']``): Format to set for boxes. """ assert box_format in [ "XYWH", "XYXY", "CXCYWH", ], f"box_format should be one of these [XYWH, XYXY, CXCYWH], got {box_format}." converter = ConvertBoundingBoxFormat(box_format) boxes = self.get("boxes") self.data["boxes"] = converter(boxes) self.box_format = box_format
[docs] def convert_labels(self, convert_labels_dict: Dict[int, int]): """Convert labels of Format. Args: convert_labels_dict (``Dict[int, int]``): Dict of converion {old_labels:new_labels}. """ labels = self.get("labels") new_labels = self.get("labels") for key, value in convert_labels_dict.items(): new_labels[labels == key] = value self.set("labels", new_labels)
[docs] def normalize(self): """Normalize boxes values between 0 & 1 by dividing by spatial_size.""" # normalize h, w = self.spatial_size boxes = self.get("boxes") boxes = boxes / torch.tensor([h, w, h, w], device=boxes.device) # recreate BoundingBoxes with normalized values boxes = BoundingBoxes( boxes, canvas_size=self.spatial_size, format=self.box_format, device=self.get_device(), ) # set new boxes self.set("boxes", boxes)
[docs] def rescale(self): """Rescale normalized boxes to true scale with spatial size.""" boxes: BoundingBoxes = self.get("boxes") h, w = self.spatial_size boxes = (boxes * torch.tensor([h, w, h, w], device=boxes.device)).int() boxes = BoundingBoxes( boxes, canvas_size=self.spatial_size, format=self.box_format, device=self.get_device(), ) self.set("boxes", boxes)
# Method to process objects selection
[docs] def sanitize(self, min_box_sides: float) -> BaseFormat: """Remove objects with boxes that have one of their sides smaller than min_box_sides. Args: min_box_sides (``float``): Minimum size of border to keep boxes. Returns: ``BaseFormat``: - BaseFormat without small boxes. """ format_boxes = ConvertBoundingBoxFormat("XYXY") boxes = format_boxes(self.get("boxes")) safe_objects_indexes = remove_small_boxes(boxes, min_box_sides) return self[safe_objects_indexes]
[docs] def sort_by_scores(self, descending: bool = True) -> BaseFormat: """Sort objects by scores in decreasing order. Args: descending (``bool``, **optional**): To sort objects in format with decreasing score order. Defaults to True. Returns: ``BaseFormat``: - Sorted BaseFormat. """ assert "scores" in self, "Format should contain scores to run sort_by_scores." indexes = torch.argsort(self.get("scores"), descending=descending) return self[indexes]
[docs] def max_detections(self, maximum_objects: int) -> BaseFormat: """Retrieve N (maximum objects) with highest scores. Args: maximum_objects (``int``): Number of object to keep. Returns: ``BaseFormat``: - Format with N objects with highest scores. """ assert "scores" in self, "Format should contain scores to run max_detection." return self.sort_by_scores()[:maximum_objects]
[docs] def confidence(self, confidence_threshold: float = 0.5) -> BaseFormat: """Keep only objects with confidence above confidence_threshold. Args: confidence_threshold (``float``, **optional**): Minimum confidence to keep object. Defaults to 0.5. Returns: ``BaseFormat``: - Format with only objects with scores > confidence_thr. """ assert "scores" in self, "Format should contain scores to run confidence." if self.size == 0: return self scores = self.get("scores") indexes = scores >= confidence_threshold return self[indexes]
[docs] def nms(self, iou_threshold=0.5) -> BaseFormat: """Apply non maximum suppression algorithm to format. Args: iou_threshold (``float``, **optional**): Threshold to consider boxes as overlapping. Defaults to 0.5. Returns: ``BaseFormat``: - Format with objects selected by NMS """ assert "scores" in self, f"Format should contain scores to run nms." if self.size == 0: return self boxes, scores = self.get("boxes", "scores") format_convert = ConvertBoundingBoxFormat("XYXY") boxes: BoundingBoxes = format_convert(boxes) indexes = nms( boxes.float(), scores=scores, iou_threshold=iou_threshold, ).to(boxes.device).sort()[0] return self[indexes]
# Method to export Formats objects
[docs] def coco(self, image_id: int = 1, annotation_id: int = 1) -> List[Dict[str, Any]]: """Export data as COCO annotations. Args: image_id (``int``, **optional**): Id to write for "image_id" field in annotation dict. Defaults to 1. annotation_id (``int``, **optional**): Id to write on the first annotation dict "id" field. Following ones have id indent from this one. Defaults to 1. Returns: ``List[Dict[str, Any]]``: - Coco annotations list for Format corresponding image. """ coco_annotations = [] for detection_object in self: coco_annotations.append( detection_object.object_to_coco( image_id=image_id, annotation_id=annotation_id ) ) annotation_id += 1 return coco_annotations
# Protection methods
[docs] def match(format1: BaseFormat, format2: BaseFormat) -> bool: """Check if 2 Formats match for combinations: - check if both contains same keys on data dictionnary. - check if spatial size is equivalent. Args: format1 (``BaseFormat``): BaseFormat 1. format2 (``BaseFormat``): BaseFormat 2. Returns: ``bool``: - True if BaseFormats matchs else False. """ keys1 = set(list(format1.data.keys())) keys2 = set(list(format2.data.keys())) return (keys1 == keys2) and format1.spatial_size == format2.spatial_size