diff --git a/CHANGELOG.md b/CHANGELOG.md index 5d0bfad4116..f6573d58f3c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -12,6 +12,16 @@ All notable changes to this project will be documented in this file. () - Add GPU memory monitor hook () +- Add YOLOv9 model for Object Detection + (, ) +- Add OV inference for keypoint detection + () +- Add tiling for semantic segmentation + () +- Add 3D Object Detection task with MonoDETR model + () +- Add OpenVINO inference for 3D Object Detection task + () ### Enhancements @@ -29,29 +39,6 @@ All notable changes to this project will be documented in this file. () - Improve FMetric computation () - -### Bug fixes - -- Fix MaskDINO, MonoDETR recipes - () - -## \[2.3.0\] - -### New features - -- Add YOLOv9 model for Object Detection - (, ) -- Add OV inference for keypoint detection - () -- Add tiling for semantic segmentation - () -- Add 3D Object Detection task with MonoDETR model - () -- Add OpenVINO inference for 3D Object Detection task - () - -### Enhancements - - Upgrade OV, MAPI, and NNCF dependencies () - Instance Segmentation Model refactoring @@ -63,6 +50,8 @@ All notable changes to this project will be documented in this file. ### Bug fixes +- Fix MaskDINO, MonoDETR recipes + () - Fix a wrong HPO log () - Update model name in rotated detection recipes @@ -70,6 +59,35 @@ All notable changes to this project will be documented in this file. - Fix SupCon flag (https://github.com/openvinotoolkit/training_extensions/pull/4076) +## \[2.2.2\] + +### Bug fixes + +- BC improvement + () + +## \[2.2.1\] + +### Bug fixes + +- Fix empty annotation in tiling + () +- Fix patching early stopping in tools/converter.py, update headers in templates, change training schedule for classification + () +- Fix tensor type compatibility in dynamic soft label assigner and RTMDet head + () +- Fix DETR target class indices are of type long in loss calculations + () +- Fix arrow format reader for multiclass ROI case + () + +### Enhancements + +- Decouple DinoV2 for semantic segmentation task + () +- Update Label Info handling + () + ## \[2.2.0\] ### New features @@ -194,6 +212,8 @@ All notable changes to this project will be documented in this file. () - Disable tiling classifier toggle in configurable parameters () +- Fix Ellipse shapes for Instance Segmentation + () ## \[v2.1.0\] diff --git a/docs/source/guide/release_notes/index.rst b/docs/source/guide/release_notes/index.rst index e0b8dc86383..6a826256dcf 100644 --- a/docs/source/guide/release_notes/index.rst +++ b/docs/source/guide/release_notes/index.rst @@ -4,6 +4,35 @@ Releases .. toctree:: :maxdepth: 1 +v2.2.2 (2024.12) +---------------- + +Enhancements +^^^^^^^^^^^^ + +Bug fixes +^^^^^^^^^ + +- BC improvement + +v2.2.1 (2024.12) +---------------- + +Enhancements +^^^^^^^^^^^^ + +- Decouple DinoV2 for semantic segmentation task +- Update Label Info handling + +Bug fixes +^^^^^^^^^ + +- Fix empty annotation in tiling +- Fix patching early stopping in tools/converter.py, update headers in templates, change training schedule for classification +- Fix tensor type compatibility in dynamic soft label assigner and RTMDet head +- Fix DETR target class indices are of type long in loss calculations +- Fix arrow format reader for multiclass ROI case + v2.2.0 (2024.10) ---------------- @@ -22,6 +51,8 @@ New features - Add Semi-SL MeanTeacher algorithm for Semantic Segmentation - Update head and h-label format for hierarchical label classification - Support configurable input size +- Revert the old workaround for detection confidence threshold +- Add Keypoint Detection legacy template Enhancements ^^^^^^^^^^^^ @@ -40,21 +71,38 @@ Enhancements - Change sematic segmentation to consider bbox only annotations - Relieve memory usage criteria on batch size 2 during adaptive batch size - Remove background label from RT Info for segmentation task +- Enable export of the feature vectors for semantic segmentation task - Prevent using too low confidence thresholds in detection +- Update HPO interface +- Bump onnx to 1.17.0 to omit CVE-2024-5187 Bug fixes ^^^^^^^^^ +- Update anomaly base transforms to use square resizing - Fix Combined Dataloader & unlabeled warmup loss in Semi-SL - Revert #3579 to fix issues with replacing coco_instance with a different format in some dataset - Add num_devices in Engine for multi-gpu training - Add missing tile recipes and various tile recipe changes - Change categories mapping logic - Fix config converter for tiling +- Fix `BboxOverlaps2D` handling of empty ground-truth annotations in datasets. - Fix num_trials calculation on dataset length less than num_class - Fix out_features in HierarchicalCBAMClsHead - Fix multilabel_accuracy of MixedHLabelAccuracy - Fix wrong indices setting in HLabelInfo +- Add legacy template LiteHRNet_18 template +- Model templates: rename model_status value 'DISCONTINUED' to 'OBSOLETE' +- Update MRCNN model export to include feature vector and saliency map +- Upgrade MAPI in 2.2 +- Fix applying model's hparams when loading model from checkpoint +- Fix incorrect all_groups order configuration in HLabelInfo +- Fix RTDETR recipes +- Fix wrong model name in converter & template +- Fix RTMDet Inst Explain Mode +- Fix RTDETR Explain Mode +- Fix classification and semantic segmentation tasks, when ROI provided for images +- Disable tiling classifier toggle in configurable parameters v2.1.0 (2024.07) ---------------- diff --git a/src/otx/algo/callbacks/adaptive_early_stopping.py b/src/otx/algo/callbacks/adaptive_early_stopping.py index 0f95e3c277d..754c6cf7c36 100644 --- a/src/otx/algo/callbacks/adaptive_early_stopping.py +++ b/src/otx/algo/callbacks/adaptive_early_stopping.py @@ -20,7 +20,7 @@ def __init__( self, monitor: str, min_delta: float = 0.0, - patience: int = 3, + patience: int = 10, verbose: bool = False, mode: str = "min", strict: bool = True, diff --git a/src/otx/algo/classification/backbones/vision_transformer.py b/src/otx/algo/classification/backbones/vision_transformer.py index c60f2ded49e..1255abff0d1 100644 --- a/src/otx/algo/classification/backbones/vision_transformer.py +++ b/src/otx/algo/classification/backbones/vision_transformer.py @@ -5,6 +5,7 @@ """Copy from mmpretrain/models/backbones/vision_transformer.py.""" from __future__ import annotations +import math from functools import partial from typing import TYPE_CHECKING, Any, Callable, Literal @@ -46,6 +47,7 @@ "vit-huge", "dinov2-s", "dinov2-small", + "dinov2-small-seg", "dinov2-b", "dinov2-base", "dinov2-l", @@ -87,6 +89,7 @@ class VisionTransformer(BaseModule): norm_layer: Normalization layer. act_layer: MLP activation layer. block_fn: Transformer block layer. + interpolate_offset: work-around offset to apply when interpolating positional embeddings lora: Enable LoRA training. """ @@ -147,6 +150,17 @@ class VisionTransformer(BaseModule): "num_heads": 6, "reg_tokens": 4, "no_embed_class": True, + }, + ), + **dict.fromkeys( + ["dinov2-small-seg"], # segmentation + { + "patch_size": 14, + "embed_dim": 384, + "depth": 12, + "num_heads": 6, + "reg_tokens": 0, + "no_embed_class": False, "init_values": 1e-5, }, ), @@ -193,9 +207,9 @@ class VisionTransformer(BaseModule): def __init__( # noqa: PLR0913 self, - arch: VIT_ARCH_TYPE = "vit-base", + arch: VIT_ARCH_TYPE | str = "vit-base", img_size: int | tuple[int, int] = 224, - patch_size: int | tuple[int, int] | None = None, + patch_size: int | None = None, in_chans: int = 3, num_classes: int = 1000, embed_dim: int | None = None, @@ -221,6 +235,7 @@ def __init__( # noqa: PLR0913 mlp_layer: nn.Module | None = None, act_layer: LayerType | None = None, norm_layer: LayerType | None = None, + interpolate_offset: float = 0.1, lora: bool = False, ) -> None: super().__init__() @@ -231,7 +246,7 @@ def __init__( # noqa: PLR0913 arch_settings: dict[str, Any] = self.arch_zoo[arch] self.img_size: int | tuple[int, int] = img_size - self.patch_size: int | tuple[int, int] = patch_size or arch_settings.get("patch_size", 16) + self.patch_size: int = patch_size or arch_settings.get("patch_size", 16) self.embed_dim = embed_dim or arch_settings.get("embed_dim", 768) depth = depth or arch_settings.get("depth", 12) num_heads = num_heads or arch_settings.get("num_heads", 12) @@ -251,6 +266,7 @@ def __init__( # noqa: PLR0913 self.no_embed_class = no_embed_class # don't embed prefix positions (includes reg) self.dynamic_img_size = dynamic_img_size self.grad_checkpointing = False + self.interpolate_offset = interpolate_offset embed_args = {} if dynamic_img_size: @@ -353,15 +369,17 @@ def resize_positional_embeddings(pos_embed: torch.Tensor, new_shape: tuple[int, # convert dinov2 pretrained weights state_dict = torch.load(checkpoint_path) state_dict.pop("mask_token", None) - state_dict["reg_token"] = state_dict.pop("register_tokens") + if "reg_token" in state_dict: + state_dict["reg_token"] = state_dict.pop("register_tokens") state_dict["cls_token"] = state_dict.pop("cls_token") + state_dict["pos_embed"][:, 0] img_size = (self.img_size, self.img_size) if isinstance(self.img_size, int) else self.img_size - patch_size = (self.patch_size, self.patch_size) if isinstance(self.patch_size, int) else self.patch_size - state_dict["pos_embed"] = resize_positional_embeddings( - state_dict.pop("pos_embed")[:, 1:], - (img_size[0] // patch_size[0], img_size[1] // patch_size[1]), - ) + patch_size = (self.patch_size, self.patch_size) + if state_dict["pos_embed"].shape != self.pos_embed.shape: + state_dict["pos_embed"] = resize_positional_embeddings( + state_dict.pop("pos_embed")[:, 1:], + (img_size[0] // patch_size[0], img_size[1] // patch_size[1]), + ) self.load_state_dict(state_dict, strict=False) else: msg = f"Unsupported `checkpoint_extension` {checkpoint_ext}, please choose from 'npz' or 'pth'." @@ -401,6 +419,137 @@ def _pos_embed(self, x: torch.Tensor) -> torch.Tensor: return self.pos_drop(x) + def interpolate_pos_encoding(self, x: torch.Tensor, w: int, h: int) -> torch.Tensor: + """Interpolates the positional encoding to match the input dimensions. + + Args: + x (torch.Tensor): Input tensor. + w (int): Width of the input image. + h (int): Height of the input image. + + Returns: + torch.Tensor: Tensor with interpolated positional encoding. + """ + previous_dtype = x.dtype + npatch = x.shape[1] + n = self.pos_embed.shape[1] + if npatch == n and w == h: + return self.pos_embed + pos_embed = self.pos_embed.float() + class_pos_embed = pos_embed[:, 0] + patch_pos_embed = pos_embed[:, 1:] + dim = x.shape[-1] + w0 = w // self.patch_size + h0 = h // self.patch_size + m = int(math.sqrt(n)) # Recover the number of patches in each dimension + if m * m != n: + msg = f"Expected m * m to equal n, but got m={m}, n={n}" + raise ValueError(msg) + kwargs = {} + if self.interpolate_offset: + # fix float error by introducing small offset + sx = float(w0 + self.interpolate_offset) / m + sy = float(h0 + self.interpolate_offset) / m + kwargs["scale_factor"] = (sx, sy) + else: + # Simply specify an output size instead of a scale factor + kwargs["size"] = (w0, h0) + patch_pos_embed = nn.functional.interpolate( + patch_pos_embed.reshape(1, m, m, dim).permute(0, 3, 1, 2), + mode="bicubic", + **kwargs, + ) + patch_pos_embed = patch_pos_embed.permute(0, 2, 3, 1).view(1, -1, dim) + return torch.cat((class_pos_embed.unsqueeze(0), patch_pos_embed), dim=1).to(previous_dtype) + + def prepare_tokens_with_masks(self, x: torch.Tensor, masks: torch.Tensor | None = None) -> torch.Tensor: + """Prepare tokens with optional masks. + + Args: + x (torch.Tensor): Input tensor. + masks (torch.Tensor | None): Optional masks tensor. + + Returns: + torch.Tensor: Tensor with prepared tokens. + """ + _, _, w, h = x.shape + x = self.patch_embed(x) + if masks is not None: + x = torch.where(masks.unsqueeze(-1), self.mask_token.to(x.dtype).unsqueeze(0), x) + + x = torch.cat((self.cls_token.expand(x.shape[0], -1, -1), x), dim=1) + x = x + self.interpolate_pos_encoding(x, w, h) + + if self.reg_token is not None: + x = torch.cat( + ( + x[:, :1], + self.reg_token.expand(x.shape[0], -1, -1), + x[:, 1:], + ), + dim=1, + ) + + return x + + def _get_intermediate_layers_not_chunked(self, x: torch.Tensor, n: int = 1) -> list[torch.Tensor]: + """Get intermediate layers without chunking. + + Args: + x (torch.Tensor): Input tensor. + n (int): Number of last blocks to take. If it's a list, take the specified blocks. + + Returns: + list[torch.Tensor]: List of intermediate layer outputs. + """ + x = self.prepare_tokens_with_masks(x) + # If n is an int, take the n last blocks. If it's a list, take them + output, total_block_len = [], len(self.blocks) + blocks_to_take = range(total_block_len - n, total_block_len) if isinstance(n, int) else n + for i, blk in enumerate(self.blocks): + x = blk(x) + if i in blocks_to_take: + output.append(x) + if len(output) != len(blocks_to_take): + msg = f"only {len(output)} / {len(blocks_to_take)} blocks found" + raise RuntimeError(msg) + return output + + def get_intermediate_layers( + self, + x: torch.Tensor, + n: int = 1, # Layers or n last layers to take + reshape: bool = False, + return_class_token: bool = False, + norm: bool = True, + ) -> tuple: + """Get intermediate layers of the VisionTransformer. + + Args: + x (torch.Tensor): Input tensor. + n (int): Number of last blocks to take. If it's a list, take the specified blocks. + reshape (bool): Whether to reshape the output feature maps. + return_class_token (bool): Whether to return the class token. + norm (bool): Whether to apply normalization to the outputs. + + Returns: + tuple: A tuple containing the intermediate layer outputs. + """ + outputs = self._get_intermediate_layers_not_chunked(x, n) + if norm: + outputs = [self.norm(out) for out in outputs] + class_tokens = [out[:, 0] for out in outputs] + outputs = [out[:, 1 + self.num_reg_tokens :] for out in outputs] + if reshape: + b, _, w, h = x.shape + outputs = [ + out.reshape(b, w // self.patch_size, h // self.patch_size, -1).permute(0, 3, 1, 2).contiguous() + for out in outputs + ] + if return_class_token: + return tuple(zip(outputs, class_tokens)) + return tuple(outputs) + def forward( self, x: torch.Tensor, diff --git a/src/otx/algo/common/losses/cross_focal_loss.py b/src/otx/algo/common/losses/cross_focal_loss.py index e6311dd0ae0..bfec15c0c84 100644 --- a/src/otx/algo/common/losses/cross_focal_loss.py +++ b/src/otx/algo/common/losses/cross_focal_loss.py @@ -7,9 +7,8 @@ import torch import torch.nn.functional -from otx.utils.device import get_available_device from torch import Tensor, nn -from torch.amp import custom_fwd +from torch.cuda.amp import custom_fwd from .focal_loss import py_sigmoid_focal_loss @@ -80,7 +79,7 @@ def __init__( self.cls_criterion = cross_sigmoid_focal_loss - @custom_fwd(device_type=get_available_device(), cast_inputs=torch.float32) + @custom_fwd(cast_inputs=torch.float32) def forward( self, pred: Tensor, diff --git a/src/otx/algo/common/utils/assigners/dynamic_soft_label_assigner.py b/src/otx/algo/common/utils/assigners/dynamic_soft_label_assigner.py index 4807e5b4a36..e12b1d1b678 100644 --- a/src/otx/algo/common/utils/assigners/dynamic_soft_label_assigner.py +++ b/src/otx/algo/common/utils/assigners/dynamic_soft_label_assigner.py @@ -196,7 +196,7 @@ def assign( assigned_labels = assigned_gt_inds.new_full((num_bboxes,), -1) assigned_labels[valid_mask] = gt_labels[matched_gt_inds].long() max_overlaps = assigned_gt_inds.new_full((num_bboxes,), -INF, dtype=torch.float32) - max_overlaps[valid_mask] = matched_pred_ious + max_overlaps[valid_mask] = matched_pred_ious.to(max_overlaps) return AssignResult(num_gt, assigned_gt_inds, max_overlaps, labels=assigned_labels) def dynamic_k_matching( diff --git a/src/otx/algo/common/utils/assigners/hungarian_matcher.py b/src/otx/algo/common/utils/assigners/hungarian_matcher.py index a1fcc316f87..4409bc6eb29 100644 --- a/src/otx/algo/common/utils/assigners/hungarian_matcher.py +++ b/src/otx/algo/common/utils/assigners/hungarian_matcher.py @@ -279,7 +279,7 @@ def batch_preparation( "pred_boxes": outputs["pred_boxes"][i], "pred_masks": outputs["pred_masks"][i] if "pred_masks" in outputs else None, "target_boxes": targets[i]["boxes"], - "target_labels": targets[i]["labels"], + "target_labels": targets[i]["labels"].long(), "target_mask": targets[i]["masks"] if "masks" in targets[i] else None, } for i in range(batch_size) diff --git a/src/otx/algo/detection/heads/rtmdet_head.py b/src/otx/algo/detection/heads/rtmdet_head.py index 2a04483dd09..a2ab8a95c82 100644 --- a/src/otx/algo/detection/heads/rtmdet_head.py +++ b/src/otx/algo/detection/heads/rtmdet_head.py @@ -491,7 +491,7 @@ def _get_targets_single( # type: ignore[override] if len(pos_inds) > 0: # point-based pos_bbox_targets = sampling_result.pos_gt_bboxes - bbox_targets[pos_inds, :] = pos_bbox_targets + bbox_targets[pos_inds, :] = pos_bbox_targets.to(bbox_targets) labels[pos_inds] = sampling_result.pos_gt_labels if self.train_cfg["pos_weight"] <= 0: diff --git a/src/otx/algo/detection/losses/rtdetr_loss.py b/src/otx/algo/detection/losses/rtdetr_loss.py index 361dfaa75c0..d71ca7989dd 100644 --- a/src/otx/algo/detection/losses/rtdetr_loss.py +++ b/src/otx/algo/detection/losses/rtdetr_loss.py @@ -77,7 +77,7 @@ def loss_labels_vfl( src_logits = outputs["pred_logits"] target_classes_o = torch.cat([t["labels"][J] for t, (_, J) in zip(targets, indices)]) target_classes = torch.full(src_logits.shape[:2], self.num_classes, dtype=torch.int64, device=src_logits.device) - target_classes[idx] = target_classes_o + target_classes[idx] = target_classes_o.long() target = nn.functional.one_hot(target_classes, num_classes=self.num_classes + 1)[..., :-1] target_score_o = torch.zeros_like(target_classes, dtype=src_logits.dtype) diff --git a/src/otx/algo/segmentation/backbones/__init__.py b/src/otx/algo/segmentation/backbones/__init__.py index 4c7a44cee9b..8b633cc21f8 100644 --- a/src/otx/algo/segmentation/backbones/__init__.py +++ b/src/otx/algo/segmentation/backbones/__init__.py @@ -3,8 +3,7 @@ # """Backbone modules for OTX segmentation model.""" -from .dinov2 import DinoVisionTransformer from .litehrnet import LiteHRNetBackbone from .mscan import MSCAN -__all__ = ["LiteHRNetBackbone", "DinoVisionTransformer", "MSCAN"] +__all__ = ["LiteHRNetBackbone", "MSCAN"] diff --git a/src/otx/algo/segmentation/backbones/dinov2.py b/src/otx/algo/segmentation/backbones/dinov2.py deleted file mode 100644 index ce1d605fe38..00000000000 --- a/src/otx/algo/segmentation/backbones/dinov2.py +++ /dev/null @@ -1,98 +0,0 @@ -# Copyright (C) 2023 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 -# -"""DINO-V2 model for the OTX classification.""" - -from __future__ import annotations - -import logging -import os -from functools import partial -from pathlib import Path - -import torch -from torch import nn - -from otx.algo.utils.mmengine_utils import load_checkpoint_to_model, load_from_http -from otx.utils.utils import get_class_initial_arguments - -logger = logging.getLogger() - - -class DinoVisionTransformer(nn.Module): - """DINO-v2 Model.""" - - def __init__( - self, - model_name: str, - freeze_backbone: bool, - out_index: list[int], - pretrained_weights: str | None = None, - ): - super().__init__() - self._init_args = get_class_initial_arguments() - - ci_data_root = os.environ.get("CI_DATA_ROOT") - pretrained: bool = True - if ci_data_root is not None and Path(ci_data_root).exists(): - pretrained = False - - self.backbone = torch.hub.load(repo_or_dir="facebookresearch/dinov2", model=model_name, pretrained=pretrained) - - if ci_data_root is not None and Path(ci_data_root).exists(): - ckpt_filename = f"{model_name}4_pretrain.pth" - ckpt_path = Path(ci_data_root) / "torch" / "hub" / "checkpoints" / ckpt_filename - if not ckpt_path.exists(): - msg = ( - f"Internal cache was specified but cannot find weights file: {ckpt_filename}. load from torch hub." - ) - logger.warning(msg) - self.backbone = torch.hub.load(repo_or_dir="facebookresearch/dinov2", model=model_name, pretrained=True) - else: - self.backbone.load_state_dict(torch.load(ckpt_path)) - - if freeze_backbone: - self._freeze_backbone(self.backbone) - - # take intermediate layers to preserve spatial dimension - self.backbone.forward = partial( - self.backbone.get_intermediate_layers, - n=out_index, - reshape=True, - ) - - if pretrained_weights is not None: - self.load_pretrained_weights(pretrained_weights) - - def _freeze_backbone(self, backbone: nn.Module) -> None: - """Freeze the backbone.""" - for _, v in backbone.named_parameters(): - v.requires_grad = False - - def init_weights(self) -> None: - """Initialize the weights.""" - # restrict rewriting backbone pretrained weights from torch.hub - # unless weights passed explicitly in config - if self.init_cfg: - return super().init_weights() - return None - - def forward(self, imgs: torch.Tensor) -> torch.Tensor: - """Forward function.""" - return self.backbone(imgs) - - def load_pretrained_weights(self, pretrained: str | None = None, prefix: str = "") -> None: - """Initialize weights.""" - checkpoint = None - if isinstance(pretrained, str) and Path(pretrained).exists(): - checkpoint = torch.load(pretrained, "cpu") - print(f"init weight - {pretrained}") - elif pretrained is not None: - cache_dir = Path.home() / ".cache" / "torch" / "hub" / "checkpoints" - checkpoint = load_from_http(filename=pretrained, map_location="cpu", model_dir=cache_dir) - print(f"init weight - {pretrained}") - if checkpoint is not None: - load_checkpoint_to_model(self, checkpoint, prefix=prefix) - - def __reduce__(self): - return (DinoVisionTransformer, self._init_args) diff --git a/src/otx/algo/segmentation/dino_v2_seg.py b/src/otx/algo/segmentation/dino_v2_seg.py index cb89a472ed7..70ffe23c9a7 100644 --- a/src/otx/algo/segmentation/dino_v2_seg.py +++ b/src/otx/algo/segmentation/dino_v2_seg.py @@ -5,9 +5,14 @@ from __future__ import annotations +from functools import partial +from pathlib import Path from typing import TYPE_CHECKING, Any, ClassVar +from urllib.parse import urlparse -from otx.algo.segmentation.backbones import DinoVisionTransformer +from torch.hub import download_url_to_file + +from otx.algo.classification.backbones.vision_transformer import VisionTransformer from otx.algo.segmentation.heads import FCNHead from otx.algo.segmentation.losses import CrossEntropyLossWithIgnore from otx.algo.segmentation.segmentors import BaseSegmentationModel @@ -21,18 +26,41 @@ class DinoV2Seg(OTXSegmentationModel): """DinoV2Seg Model.""" AVAILABLE_MODEL_VERSIONS: ClassVar[list[str]] = [ - "dinov2_vits14", + "dinov2-small-seg", ] + PRETRAINED_WEIGHTS: ClassVar[dict[str, str]] = { + "dinov2-small-seg": "https://dl.fbaipublicfiles.com/dinov2/dinov2_vits14/dinov2_vits14_pretrain.pth", + } def _build_model(self) -> nn.Module: if self.model_name not in self.AVAILABLE_MODEL_VERSIONS: msg = f"Model version {self.model_name} is not supported." raise ValueError(msg) - - backbone = DinoVisionTransformer(model_name=self.model_name, freeze_backbone=True, out_index=[8, 9, 10, 11]) + backbone = VisionTransformer(arch=self.model_name, img_size=self.input_size) + backbone.forward = partial( # type: ignore[method-assign] + backbone.get_intermediate_layers, + n=[8, 9, 10, 11], + reshape=True, + ) decode_head = FCNHead(self.model_name, num_classes=self.num_classes) criterion = CrossEntropyLossWithIgnore(ignore_index=self.label_info.ignore_index) # type: ignore[attr-defined] + backbone.init_weights() + if self.model_name in self.PRETRAINED_WEIGHTS: + print(f"init weight - {self.PRETRAINED_WEIGHTS[self.model_name]}") + parts = urlparse(self.PRETRAINED_WEIGHTS[self.model_name]) + filename = Path(parts.path).name + + cache_dir = Path.home() / ".cache" / "torch" / "hub" / "checkpoints" + cache_file = cache_dir / filename + if not Path.exists(cache_file): + download_url_to_file(self.PRETRAINED_WEIGHTS[self.model_name], cache_file, "", progress=True) + backbone.load_pretrained(checkpoint_path=cache_file) + + # freeze backbone + for _, v in backbone.named_parameters(): + v.requires_grad = False + return BaseSegmentationModel( backbone=backbone, decode_head=decode_head, diff --git a/src/otx/algo/segmentation/heads/fcn_head.py b/src/otx/algo/segmentation/heads/fcn_head.py index 67b0fe6fc86..de877b2825a 100644 --- a/src/otx/algo/segmentation/heads/fcn_head.py +++ b/src/otx/algo/segmentation/heads/fcn_head.py @@ -216,7 +216,7 @@ class FCNHead: "aggregator_merge_norm": "None", "aggregator_use_concat": False, }, - "dinov2_vits14": { + "dinov2-small-seg": { "in_channels": [384, 384, 384, 384], "in_index": [0, 1, 2, 3], "input_transform": "resize_concat", @@ -233,7 +233,7 @@ def __new__(cls, model_name: str, num_classes: int) -> FCNHeadModule: normalization = ( partial(build_norm_layer, nn.SyncBatchNorm, requires_grad=True) - if model_name == "dinov2_vits14" + if model_name == "dinov2-small-seg" else partial(build_norm_layer, nn.BatchNorm2d, requires_grad=True) ) diff --git a/src/otx/core/data/dataset/action_classification.py b/src/otx/core/data/dataset/action_classification.py index 23391984423..4cfa4808487 100644 --- a/src/otx/core/data/dataset/action_classification.py +++ b/src/otx/core/data/dataset/action_classification.py @@ -37,6 +37,7 @@ def __init__( image_color_channel: ImageColorChannel = ImageColorChannel.BGR, stack_images: bool = True, to_tv_image: bool = True, + data_format: str = "", ) -> None: super().__init__( dm_subset, diff --git a/src/otx/core/data/dataset/anomaly.py b/src/otx/core/data/dataset/anomaly.py index 1a0149423c6..091e7b4066d 100644 --- a/src/otx/core/data/dataset/anomaly.py +++ b/src/otx/core/data/dataset/anomaly.py @@ -57,6 +57,7 @@ def __init__( image_color_channel: ImageColorChannel = ImageColorChannel.RGB, stack_images: bool = True, to_tv_image: bool = True, + data_format: str = "", ) -> None: self.task_type = task_type super().__init__( diff --git a/src/otx/core/data/dataset/base.py b/src/otx/core/data/dataset/base.py index 239a5ded307..5626690d6ca 100644 --- a/src/otx/core/data/dataset/base.py +++ b/src/otx/core/data/dataset/base.py @@ -70,6 +70,7 @@ class OTXDataset(Dataset, Generic[T_OTXDataEntity]): max_refetch: Maximum number of images to fetch in cache image_color_channel: Color channel of images stack_images: Whether or not to stack images in collate function in OTXBatchData entity. + data_format: Source data format, which was originally passed to datumaro (could be arrow for instance). """ @@ -83,6 +84,7 @@ def __init__( image_color_channel: ImageColorChannel = ImageColorChannel.RGB, stack_images: bool = True, to_tv_image: bool = True, + data_format: str = "", ) -> None: self.dm_subset = dm_subset self.transforms = transforms @@ -92,8 +94,11 @@ def __init__( self.image_color_channel = image_color_channel self.stack_images = stack_images self.to_tv_image = to_tv_image + self.data_format = data_format - if self.dm_subset.categories(): + if self.dm_subset.categories() and data_format == "arrow": + self.label_info = LabelInfo.from_dm_label_groups_arrow(self.dm_subset.categories()[AnnotationType.label]) + elif self.dm_subset.categories(): self.label_info = LabelInfo.from_dm_label_groups(self.dm_subset.categories()[AnnotationType.label]) else: self.label_info = NullLabelInfo() diff --git a/src/otx/core/data/dataset/classification.py b/src/otx/core/data/dataset/classification.py index 8f4f5ffc241..40b9c4a99fd 100644 --- a/src/otx/core/data/dataset/classification.py +++ b/src/otx/core/data/dataset/classification.py @@ -39,7 +39,10 @@ def _get_item_impl(self, index: int) -> MulticlassClsDataEntity | None: labels_ids = [ label["label"]["_id"] for label in roi["labels"] if label["label"]["domain"] == "CLASSIFICATION" ] - label_anns = [self.label_info.label_names.index(label_id) for label_id in labels_ids] + if self.data_format == "arrow": + label_anns = [self.label_info.label_ids.index(label_id) for label_id in labels_ids] + else: + label_anns = [self.label_info.label_names.index(label_id) for label_id in labels_ids] else: # extract labels from annotations label_anns = [ann.label for ann in item.annotations if isinstance(ann, Label)] @@ -80,17 +83,21 @@ def _get_item_impl(self, index: int) -> MultilabelClsDataEntity | None: ignored_labels: list[int] = [] # This should be assigned form item img_data, img_shape, _ = self._get_img_data_and_shape(img) - label_anns = [] + label_ids = set() for ann in item.annotations: + # multilabel information stored in 'multi_label_ids' attribute when the source format is arrow + if "multi_label_ids" in ann.attributes: + for lbl_idx in ann.attributes["multi_label_ids"]: + label_ids.add(lbl_idx) + if isinstance(ann, Label): - label_anns.append(ann) + label_ids.add(ann.label) else: # If the annotation is not Label, it should be converted to Label. # For Chained Task: Detection (Bbox) -> Classification (Label) label = Label(label=ann.label) - if label not in label_anns: - label_anns.append(label) - labels = torch.as_tensor([ann.label for ann in label_anns]) + label_ids.add(label.label) + labels = torch.as_tensor(list(label_ids)) entity = MultilabelClsDataEntity( image=img_data, @@ -128,13 +135,22 @@ def __init__(self, **kwargs) -> None: self.dm_categories = self.dm_subset.categories()[AnnotationType.label] # Hlabel classification used HLabelInfo to insert the HLabelData. - self.label_info = HLabelInfo.from_dm_label_groups(self.dm_categories) + if self.data_format == "arrow": + # arrow format stores label IDs as names, have to deal with that here + self.label_info = HLabelInfo.from_dm_label_groups_arrow(self.dm_categories) + else: + self.label_info = HLabelInfo.from_dm_label_groups(self.dm_categories) + + self.id_to_name_mapping = dict(zip(self.label_info.label_ids, self.label_info.label_names)) + self.id_to_name_mapping[""] = "" + if self.label_info.num_multiclass_heads == 0: msg = "The number of multiclass heads should be larger than 0." raise ValueError(msg) - for dm_item in self.dm_subset: - self._add_ancestors(dm_item.annotations) + if self.data_format != "arrow": + for dm_item in self.dm_subset: + self._add_ancestors(dm_item.annotations) def _add_ancestors(self, label_anns: list[Label]) -> None: """Add ancestors recursively if some label miss the ancestor information. @@ -149,7 +165,7 @@ def _add_ancestors(self, label_anns: list[Label]) -> None: """ def _label_idx_to_name(idx: int) -> str: - return self.label_info.label_names[idx] + return self.dm_categories[idx].name def _label_name_to_idx(name: str) -> int: indices = [idx for idx, val in enumerate(self.label_info.label_names) if val == name] @@ -157,6 +173,8 @@ def _label_name_to_idx(name: str) -> int: def _get_label_group_idx(label_name: str) -> int: if isinstance(self.label_info, HLabelInfo): + if self.data_format == "arrow": + return self.label_info.class_to_group_idx[self.id_to_name_mapping[label_name]][0] return self.label_info.class_to_group_idx[label_name][0] msg = f"self.label_info should have HLabelInfo type, got {type(self.label_info)}" raise ValueError(msg) @@ -197,17 +215,22 @@ def _get_item_impl(self, index: int) -> HlabelClsDataEntity | None: ignored_labels: list[int] = [] # This should be assigned form item img_data, img_shape, _ = self._get_img_data_and_shape(img) - label_anns = [] + label_ids = set() for ann in item.annotations: + # in h-cls scenario multilabel information stored in 'multi_label_ids' attribute + if "multi_label_ids" in ann.attributes: + for lbl_idx in ann.attributes["multi_label_ids"]: + label_ids.add(lbl_idx) + if isinstance(ann, Label): - label_anns.append(ann) + label_ids.add(ann.label) else: # If the annotation is not Label, it should be converted to Label. # For Chained Task: Detection (Bbox) -> Classification (Label) label = Label(label=ann.label) - if label not in label_anns: - label_anns.append(label) - hlabel_labels = self._convert_label_to_hlabel_format(label_anns, ignored_labels) + label_ids.add(label.label) + + hlabel_labels = self._convert_label_to_hlabel_format([Label(label=idx) for idx in label_ids], ignored_labels) entity = HlabelClsDataEntity( image=img_data, @@ -256,18 +279,18 @@ def _convert_label_to_hlabel_format(self, label_anns: list[Label], ignored_label class_indices[i] = -1 for ann in label_anns: - ann_name = self.dm_categories.items[ann.label].name - ann_parent = self.dm_categories.items[ann.label].parent + if self.data_format == "arrow": + # skips unknown labels for instance, the empty one + if self.dm_categories.items[ann.label].name not in self.id_to_name_mapping: + continue + ann_name = self.id_to_name_mapping[self.dm_categories.items[ann.label].name] + else: + ann_name = self.dm_categories.items[ann.label].name group_idx, in_group_idx = self.label_info.class_to_group_idx[ann_name] - (parent_group_idx, parent_in_group_idx) = ( - self.label_info.class_to_group_idx[ann_parent] if ann_parent else (None, None) - ) if group_idx < num_multiclass_heads: class_indices[group_idx] = in_group_idx - if parent_group_idx is not None and parent_in_group_idx is not None: - class_indices[parent_group_idx] = parent_in_group_idx - elif not ignored_labels or ann.label not in ignored_labels: + elif ann.label not in ignored_labels: class_indices[num_multiclass_heads + in_group_idx] = 1 else: class_indices[num_multiclass_heads + in_group_idx] = -1 diff --git a/src/otx/core/data/dataset/instance_segmentation.py b/src/otx/core/data/dataset/instance_segmentation.py index 27384a3df9d..8b30366a97e 100644 --- a/src/otx/core/data/dataset/instance_segmentation.py +++ b/src/otx/core/data/dataset/instance_segmentation.py @@ -5,13 +5,15 @@ from __future__ import annotations +import warnings +from collections import defaultdict from functools import partial from typing import Callable import numpy as np import torch +from datumaro import Bbox, Ellipse, Image, Polygon from datumaro import Dataset as DmDataset -from datumaro import Image, Polygon from torchvision import tv_tensors from otx.core.data.entity.base import ImageInfo @@ -42,23 +44,49 @@ def _get_item_impl(self, index: int) -> InstanceSegDataEntity | None: ignored_labels: list[int] = [] img_data, img_shape, _ = self._get_img_data_and_shape(img) + anno_collection: dict[str, list] = defaultdict(list) + for anno in item.annotations: + anno_collection[anno.__class__.__name__].append(anno) + gt_bboxes, gt_labels, gt_masks, gt_polygons = [], [], [], [] - for annotation in item.annotations: - if isinstance(annotation, Polygon): - bbox = np.array(annotation.get_bbox(), dtype=np.float32) + # TODO(Eugene): https://jira.devtools.intel.com/browse/CVS-159363 + # Temporary solution to handle multiple annotation types. + # Ideally, we should pre-filter annotations during initialization of the dataset. + if Polygon.__name__ in anno_collection: # Polygon for InstSeg has higher priority + for poly in anno_collection[Polygon.__name__]: + bbox = Bbox(*poly.get_bbox()).points gt_bboxes.append(bbox) - gt_labels.append(annotation.label) + gt_labels.append(poly.label) if self.include_polygons: - gt_polygons.append(annotation) + gt_polygons.append(poly) else: - gt_masks.append(polygon_to_bitmap([annotation], *img_shape)[0]) - - # convert xywh to xyxy format - bboxes = np.array(gt_bboxes, dtype=np.float32) if gt_bboxes else np.empty((0, 4)) - bboxes[:, 2:] += bboxes[:, :2] + gt_masks.append(polygon_to_bitmap([poly], *img_shape)[0]) + elif Bbox.__name__ in anno_collection: + bboxes = anno_collection[Bbox.__name__] + gt_bboxes = [ann.points for ann in bboxes] + gt_labels = [ann.label for ann in bboxes] + for box in bboxes: + poly = Polygon(box.as_polygon()) + if self.include_polygons: + gt_polygons.append(poly) + else: + gt_masks.append(polygon_to_bitmap([poly], *img_shape)[0]) + elif Ellipse.__name__ in anno_collection: + for ellipse in anno_collection[Ellipse.__name__]: + bbox = Bbox(*ellipse.get_bbox()).points + gt_bboxes.append(bbox) + gt_labels.append(ellipse.label) + poly = Polygon(ellipse.as_polygon(num_points=10)) + if self.include_polygons: + gt_polygons.append(poly) + else: + gt_masks.append(polygon_to_bitmap([poly], *img_shape)[0]) + else: + warnings.warn(f"No valid annotations found for image {item.id}!", stacklevel=2) + bboxes = np.stack(gt_bboxes, dtype=np.float32, axis=0) if gt_bboxes else np.empty((0, 4)) masks = np.stack(gt_masks, axis=0) if gt_masks else np.zeros((0, *img_shape), dtype=bool) labels = np.array(gt_labels, dtype=np.int64) diff --git a/src/otx/core/data/dataset/keypoint_detection.py b/src/otx/core/data/dataset/keypoint_detection.py index c74b77c9319..47989b653aa 100644 --- a/src/otx/core/data/dataset/keypoint_detection.py +++ b/src/otx/core/data/dataset/keypoint_detection.py @@ -39,6 +39,7 @@ def __init__( image_color_channel: ImageColorChannel = ImageColorChannel.RGB, stack_images: bool = True, to_tv_image: bool = True, + data_format: str = "", ) -> None: super().__init__( dm_subset, @@ -49,14 +50,17 @@ def __init__( image_color_channel, stack_images, to_tv_image, + data_format, ) self.dm_subset = self._get_single_bbox_dataset(dm_subset) if self.dm_subset.categories(): + kp_labels = self.dm_subset.categories()[AnnotationType.points][0].labels self.label_info = LabelInfo( - label_names=self.dm_subset.categories()[AnnotationType.points][0].labels, + label_names=kp_labels, label_groups=[], + label_ids=[str(i) for i in range(len(kp_labels))], ) else: self.label_info = NullLabelInfo() diff --git a/src/otx/core/data/dataset/object_detection_3d.py b/src/otx/core/data/dataset/object_detection_3d.py index 4740298ba90..980178ce55a 100644 --- a/src/otx/core/data/dataset/object_detection_3d.py +++ b/src/otx/core/data/dataset/object_detection_3d.py @@ -40,6 +40,7 @@ def __init__( image_color_channel: ImageColorChannel = ImageColorChannel.RGB, stack_images: bool = True, to_tv_image: bool = False, + data_format: str = "", max_objects: int = 50, ) -> None: super().__init__( @@ -51,6 +52,7 @@ def __init__( image_color_channel, stack_images, to_tv_image, + data_format, ) self.max_objects = max_objects self.subset_type = list(self.dm_subset.get_subset_info())[-1].split(":")[0] diff --git a/src/otx/core/data/dataset/segmentation.py b/src/otx/core/data/dataset/segmentation.py index 0ab803b4f58..5672989e7fd 100644 --- a/src/otx/core/data/dataset/segmentation.py +++ b/src/otx/core/data/dataset/segmentation.py @@ -168,6 +168,7 @@ def __init__( stack_images: bool = True, to_tv_image: bool = True, ignore_index: int = 255, + data_format: str = "", ) -> None: super().__init__( dm_subset, @@ -188,6 +189,7 @@ def __init__( label_names=self.label_info.label_names, label_groups=self.label_info.label_groups, ignore_index=ignore_index, + label_ids=self.label_info.label_ids, ) self.ignore_index = ignore_index diff --git a/src/otx/core/data/dataset/tile.py b/src/otx/core/data/dataset/tile.py index d69c94b03e0..8fae8133afa 100644 --- a/src/otx/core/data/dataset/tile.py +++ b/src/otx/core/data/dataset/tile.py @@ -7,6 +7,8 @@ import logging as log import operator +import warnings +from collections import defaultdict from copy import deepcopy from itertools import product from typing import TYPE_CHECKING, Callable @@ -16,7 +18,7 @@ import torch from datumaro import Dataset as DmDataset from datumaro import DatasetItem, Image -from datumaro.components.annotation import AnnotationType, Bbox, ExtractedMask, Polygon +from datumaro.components.annotation import AnnotationType, Bbox, Ellipse, ExtractedMask, Polygon from datumaro.plugins.tiling import Tile from datumaro.plugins.tiling.tile import _apply_offset from datumaro.plugins.tiling.util import ( @@ -97,6 +99,7 @@ def __init__( self._tile_size = tile_size self._tile_ann_func_map[AnnotationType.polygon] = OTXTileTransform._tile_polygon self._tile_ann_func_map[AnnotationType.mask] = OTXTileTransform._tile_masks + self._tile_ann_func_map[AnnotationType.ellipse] = OTXTileTransform._tile_ellipse self.with_full_img = with_full_img @staticmethod @@ -161,6 +164,45 @@ def _tile_masks( attributes=deepcopy(ann.attributes), ) + @staticmethod + def _tile_ellipse( + ann: Ellipse, + roi_box: sg.Polygon, + threshold_drop_ann: float = 0.8, + *args, # noqa: ARG004 + **kwargs, # noqa: ARG004 + ) -> Polygon | None: + polygon = sg.Polygon(ann.get_points(num_points=10)) + + # NOTE: polygon may be invalid, e.g. self-intersecting + if not roi_box.intersects(polygon) or not polygon.is_valid: + return None + + # NOTE: intersection may return a GeometryCollection or MultiPolygon + inter = polygon.intersection(roi_box) + if isinstance(inter, (sg.GeometryCollection, sg.MultiPolygon)): + shapes = [(geom, geom.area) for geom in list(inter.geoms) if geom.is_valid] + if not shapes: + return None + + inter, _ = max(shapes, key=operator.itemgetter(1)) + + if not isinstance(inter, sg.Polygon) and not inter.is_valid: + return None + + prop_area = inter.area / polygon.area + + if prop_area < threshold_drop_ann: + return None + + inter = _apply_offset(inter, roi_box) + + return Polygon( + points=[p for xy in inter.exterior.coords for p in xy], + attributes=deepcopy(ann.attributes), + label=ann.label, + ) + def _extract_rois(self, image: Image) -> list[BboxIntCoords]: """Extracts Tile ROIs from the given image. @@ -507,24 +549,51 @@ def _get_item_impl(self, index: int) -> TileInstSegDataEntity: # type: ignore[o img = item.media_as(Image) img_data, img_shape, _ = self._get_img_data_and_shape(img) + anno_collection: dict[str, list] = defaultdict(list) + for anno in item.annotations: + anno_collection[anno.__class__.__name__].append(anno) + gt_bboxes, gt_labels, gt_masks, gt_polygons = [], [], [], [] - for annotation in item.annotations: - if isinstance(annotation, Polygon): - bbox = np.array(annotation.get_bbox(), dtype=np.float32) + # TODO(Eugene): https://jira.devtools.intel.com/browse/CVS-159363 + # Temporary solution to handle multiple annotation types. + # Ideally, we should pre-filter annotations during initialization of the dataset. + + if Polygon.__name__ in anno_collection: # Polygon for InstSeg has higher priority + for poly in anno_collection[Polygon.__name__]: + bbox = Bbox(*poly.get_bbox()).points gt_bboxes.append(bbox) - gt_labels.append(annotation.label) + gt_labels.append(poly.label) if self._dataset.include_polygons: - gt_polygons.append(annotation) + gt_polygons.append(poly) else: - gt_masks.append(polygon_to_bitmap([annotation], *img_shape)[0]) - - # convert xywh to xyxy format - bboxes = np.array(gt_bboxes, dtype=np.float32) - bboxes[:, 2:] += bboxes[:, :2] + gt_masks.append(polygon_to_bitmap([poly], *img_shape)[0]) + elif Bbox.__name__ in anno_collection: + boxes = anno_collection[Bbox.__name__] + gt_bboxes = [ann.points for ann in boxes] + gt_labels = [ann.label for ann in boxes] + for box in boxes: + poly = Polygon(box.as_polygon()) + if self._dataset.include_polygons: + gt_polygons.append(poly) + else: + gt_masks.append(polygon_to_bitmap([poly], *img_shape)[0]) + elif Ellipse.__name__ in anno_collection: + for ellipse in anno_collection[Ellipse.__name__]: + bbox = Bbox(*ellipse.get_bbox()).points + gt_bboxes.append(bbox) + gt_labels.append(ellipse.label) + poly = Polygon(ellipse.as_polygon(num_points=10)) + if self._dataset.include_polygons: + gt_polygons.append(poly) + else: + gt_masks.append(polygon_to_bitmap([poly], *img_shape)[0]) + else: + warnings.warn(f"No valid annotations found for image {item.id}!", stacklevel=2) - masks = np.stack(gt_masks, axis=0) if gt_masks else np.zeros((0, *img_shape), dtype=bool) + bboxes = np.stack(gt_bboxes, dtype=np.float32) if gt_bboxes else np.empty((0, 4), dtype=np.float32) + masks = np.stack(gt_masks, axis=0) if gt_masks else np.empty((0, *img_shape), dtype=bool) labels = np.array(gt_labels, dtype=np.int64) tile_entities, tile_attrs = self.get_tiles(img_data, item, index) diff --git a/src/otx/core/data/factory.py b/src/otx/core/data/factory.py index fd731109269..1f1e500b0fb 100644 --- a/src/otx/core/data/factory.py +++ b/src/otx/core/data/factory.py @@ -73,6 +73,7 @@ def create( # noqa: PLR0911 dm_subset: DmDataset, cfg_subset: SubsetConfig, mem_cache_handler: MemCacheHandlerBase, + data_format: str, mem_cache_img_max_size: tuple[int, int] | None = None, image_color_channel: ImageColorChannel = ImageColorChannel.RGB, stack_images: bool = True, @@ -85,6 +86,7 @@ def create( # noqa: PLR0911 common_kwargs = { "dm_subset": dm_subset, "transforms": transforms, + "data_format": data_format, "mem_cache_handler": mem_cache_handler, "mem_cache_img_max_size": mem_cache_img_max_size, "image_color_channel": image_color_channel, diff --git a/src/otx/core/data/module.py b/src/otx/core/data/module.py index f9b7cac8fd4..6449a07c270 100644 --- a/src/otx/core/data/module.py +++ b/src/otx/core/data/module.py @@ -107,13 +107,6 @@ def __init__( # noqa: PLR0913 self.subsets: dict[str, OTXDataset] = {} self.save_hyperparameters(ignore=["input_size"]) - # TODO (Jaeguk): This is workaround for a bug in Datumaro. - # These lines should be removed after next datumaro release. - # https://github.com/openvinotoolkit/datumaro/pull/1223/files - from datumaro.plugins.data_formats.video import VIDEO_EXTENSIONS - - VIDEO_EXTENSIONS.append(".mp4") - dataset = DmDataset.import_from(self.data_root, format=self.data_format) if self.task != "H_LABEL_CLS": dataset = pre_filtering( @@ -195,6 +188,7 @@ def __init__( # noqa: PLR0913 dm_subset=dm_subset.as_dataset(), cfg_subset=config_mapping[name], mem_cache_handler=mem_cache_handler, + data_format=self.data_format, mem_cache_img_max_size=mem_cache_img_max_size, image_color_channel=image_color_channel, stack_images=stack_images, @@ -238,6 +232,7 @@ def __init__( # noqa: PLR0913 include_polygons=include_polygons, ignore_index=ignore_index, vpm_config=vpm_config, + data_format=self.data_format, ) self.subsets[transform_key] = unlabeled_dataset else: @@ -252,6 +247,7 @@ def __init__( # noqa: PLR0913 include_polygons=include_polygons, ignore_index=ignore_index, vpm_config=vpm_config, + data_format=self.data_format, ) self.subsets[name] = unlabeled_dataset diff --git a/src/otx/core/data/pre_filtering.py b/src/otx/core/data/pre_filtering.py index 13fc08c7ebc..90487367d17 100644 --- a/src/otx/core/data/pre_filtering.py +++ b/src/otx/core/data/pre_filtering.py @@ -88,7 +88,7 @@ def remove_unused_labels( used_labels = [0, *used_labels] if data_format == "common_semantic_segmentation_with_subset_dirs" and len(original_categories) < len(used_labels): msg = ( - "There are labeles mismatch in dataset categories and actuall categories comes from semantic masks." + "There are labels mismatch in dataset categories and actual categories comes from semantic masks." "Please, check `dataset_meta.json` file." ) raise ValueError(msg) diff --git a/src/otx/core/model/base.py b/src/otx/core/model/base.py index a48325ca98c..a190a5f1bc6 100644 --- a/src/otx/core/model/base.py +++ b/src/otx/core/model/base.py @@ -404,6 +404,11 @@ def load_state_dict_incrementally(self, ckpt: dict[str, Any], *args, **kwargs) - msg = "Checkpoint should have `label_info`." raise ValueError(msg, ckpt_label_info) + if not hasattr(ckpt_label_info, "label_ids"): + msg = "Loading checkpoint from OTX < 2.2.1, label_ids are assigned automatically" + logger.info(msg) + ckpt_label_info.label_ids = [str(i) for i, _ in enumerate(ckpt_label_info.label_names)] + if ckpt_label_info != self.label_info: msg = ( "Load model state dictionary incrementally: " @@ -757,7 +762,7 @@ def lr_scheduler_step(self, scheduler: LRSchedulerTypeUnion, metric: Tensor) -> return super().lr_scheduler_step(scheduler=scheduler, metric=metric) if len(warmup_schedulers) != 1: - msg = "No more than two warmup schedulers coexist." + msg = "No more than one warmup schedulers coexist." raise RuntimeError(msg) warmup_scheduler = next(iter(warmup_schedulers)) @@ -822,7 +827,11 @@ def _dispatch_label_info(label_info: LabelInfoTypes) -> LabelInfo: if isinstance(label_info, int): return LabelInfo.from_num_classes(num_classes=label_info) if isinstance(label_info, Sequence) and all(isinstance(name, str) for name in label_info): - return LabelInfo(label_names=label_info, label_groups=[label_info]) + return LabelInfo( + label_names=label_info, + label_groups=[label_info], + label_ids=[str(i) for i in range(len(label_info))], + ) if isinstance(label_info, LabelInfo): return label_info @@ -1115,7 +1124,7 @@ def _create_label_info_from_ov_ir(self) -> LabelInfo: ) logger.warning(msg) - return LabelInfo(label_names=label_names, label_groups=[label_names]) + return LabelInfo(label_names=label_names, label_groups=[label_names], label_ids=[]) msg = "Cannot construct LabelInfo from OpenVINO IR. Please check this model is trained by OTX." raise ValueError(msg) diff --git a/src/otx/core/model/segmentation.py b/src/otx/core/model/segmentation.py index a22cc15fbc4..eeebba408fe 100644 --- a/src/otx/core/model/segmentation.py +++ b/src/otx/core/model/segmentation.py @@ -93,7 +93,6 @@ def __init__( self.unsupervised_weight = unsupervised_weight self.semisl_start_epoch = semisl_start_epoch self.drop_unreliable_pixels_percent = drop_unreliable_pixels_percent - super().__init__( label_info=label_info, input_size=input_size, @@ -254,7 +253,11 @@ def _dispatch_label_info(label_info: LabelInfoTypes) -> LabelInfo: if isinstance(label_info, int): return SegLabelInfo.from_num_classes(num_classes=label_info) if isinstance(label_info, Sequence) and all(isinstance(name, str) for name in label_info): - return SegLabelInfo(label_names=label_info, label_groups=[label_info]) + return SegLabelInfo( + label_names=label_info, + label_groups=[label_info], + label_ids=[str(i) for i in range(len(label_info))], + ) if isinstance(label_info, SegLabelInfo): return label_info diff --git a/src/otx/core/schedulers/warmup_schedulers.py b/src/otx/core/schedulers/warmup_schedulers.py index 6de763bb52b..0b1d12a711e 100644 --- a/src/otx/core/schedulers/warmup_schedulers.py +++ b/src/otx/core/schedulers/warmup_schedulers.py @@ -19,8 +19,9 @@ class LinearWarmupScheduler(LambdaLR): """Linear Warmup scheduler. Args: - num_warmup_steps: Learning rate will linearly increased during the period same as this number. - warmup_interval: If "epoch", count the number of steps for the warmup period. + optimizer (Optimizer): Optimizer to apply the scheduler. + num_warmup_steps (int): Learning rate will linearly increased during the period same as this number. + interval (Literal["step", "epoch"]): If "epoch", count the number of epochs for the warmup period. Otherwise, the iteration step will be the warmup period. """ @@ -55,7 +56,7 @@ class LinearWarmupSchedulerCallable: main_scheduler_callable: Callable to create a LR scheduler that will be mainly used. num_warmup_steps: Learning rate will linearly increased during the period same as this number. If it is less than equal to zero, do not create `LinearWarmupScheduler`. - warmup_interval: If "epoch", count the number of steps for the warmup period. + warmup_interval: If "epoch", count the number of epochs for the warmup period. Otherwise, the iteration step will be the warmup period. monitor: If given, override the main scheduler's `monitor` attribute. """ diff --git a/src/otx/core/types/export.py b/src/otx/core/types/export.py index cc9c592f3b9..fc35a39b8f7 100644 --- a/src/otx/core/types/export.py +++ b/src/otx/core/types/export.py @@ -9,6 +9,7 @@ from dataclasses import dataclass, fields from enum import Enum +import otx from otx.core.config.data import TileConfig from otx.core.types.label import HLabelInfo, LabelInfo @@ -102,7 +103,8 @@ def to_metadata(self) -> dict[tuple[str, str], str]: all_label_ids = "" for lbl in self.label_info.label_names: all_labels += lbl.replace(" ", "_") + " " - all_label_ids += lbl.replace(" ", "_") + " " + for lbl_id in self.label_info.label_ids: + all_label_ids += lbl_id + " " metadata = { # Common @@ -112,6 +114,7 @@ def to_metadata(self) -> dict[tuple[str, str], str]: ("model_info", "labels"): all_labels.strip(), ("model_info", "label_ids"): all_label_ids.strip(), ("model_info", "optimization_config"): json.dumps(self.optimization_config), + ("model_info", "otx_version"): otx.__version__, } if isinstance(self.label_info, HLabelInfo): diff --git a/src/otx/core/types/label.py b/src/otx/core/types/label.py index c89f67d7fd6..19c3ece3bb4 100644 --- a/src/otx/core/types/label.py +++ b/src/otx/core/types/label.py @@ -5,10 +5,13 @@ from __future__ import annotations +import copy import json from dataclasses import asdict, dataclass from typing import TYPE_CHECKING, Any +from datumaro.components.annotation import GroupType + if TYPE_CHECKING: from datumaro import Label, LabelCategories @@ -27,6 +30,7 @@ class LabelInfo: """Object to represent label information.""" label_names: list[str] + label_ids: list[str] label_groups: list[list[str]] @property @@ -51,10 +55,12 @@ def from_num_classes(cls, num_classes: int) -> LabelInfo: return NullLabelInfo() label_names = [f"label_{idx}" for idx in range(num_classes)] + label_ids = [str(i) for i in range(num_classes)] return cls( label_names=label_names, label_groups=[label_names], + label_ids=label_ids, ) @classmethod @@ -79,6 +85,38 @@ def from_dm_label_groups(cls, dm_label_categories: LabelCategories) -> LabelInfo return LabelInfo( label_names=label_names, label_groups=label_groups, + label_ids=[str(i) for i in range(len(label_names))], + ) + + @classmethod + def from_dm_label_groups_arrow(cls, dm_label_categories: LabelCategories) -> LabelInfo: + """Overload to support datumaro's arrow format.""" + label_names = [] + for item in dm_label_categories.items: + for attr in item.attributes: + if attr.startswith("__name__"): + label_names.append(attr[len("__name__") :]) + break + + if len(label_names) != len(dm_label_categories.items): + msg = "Wrong arrow format: can not extract label names from attributes" + raise ValueError(msg) + + id_to_name_mapping = {item.name: label_names[i] for i, item in enumerate(dm_label_categories.items)} + + for label_group in dm_label_categories.label_groups: + label_group.labels = [id_to_name_mapping.get(label, label) for label in label_group.labels] + + label_groups = [label_group.labels for label_group in dm_label_categories.label_groups] + if len(label_groups) == 0: # Single-label classification + label_groups = [label_names] + + label_ids = [item.name for item in dm_label_categories.items] + + return LabelInfo( + label_names=label_names, + label_groups=label_groups, + label_ids=label_ids, ) def as_dict(self) -> dict[str, Any]: @@ -279,8 +317,60 @@ def convert_labels_if_needed( label_to_idx=label_to_idx, label_tree_edges=get_label_tree_edges(dm_label_categories.items), empty_multiclass_head_indices=[], # consider the label removing case + label_ids=[str(i) for i in range(len(label_names))], ) + @classmethod + def from_dm_label_groups_arrow(cls, dm_label_categories: LabelCategories) -> HLabelInfo: + """Generate HLabelData from the Datumaro LabelCategories. Arrow-specific implementation. + + Args: + dm_label_categories (LabelCategories): the label categories of datumaro. + """ + dm_label_categories = copy.deepcopy(dm_label_categories) + + empty_label_name = None + for label_group in dm_label_categories.label_groups: + if label_group.group_type == GroupType.RESTRICTED: + empty_label_name = label_group.labels[0] + + dm_label_categories.label_groups = [ + group for group in dm_label_categories.label_groups if group.group_type != GroupType.RESTRICTED + ] + + empty_label_id = None + label_names = [] + for item in dm_label_categories.items: + for attr in item.attributes: + if attr.startswith("__name__"): + name = attr[len("__name__") :] + if name == empty_label_name: + empty_label_id = item.name + label_names.append(name) + break + + if len(label_names) != len(dm_label_categories.items): + msg = "Wrong arrow file: can not extract label names from attributes" + raise ValueError(msg) + + if empty_label_name is not None: + label_names.remove(empty_label_name) + dm_label_categories.items = [item for item in dm_label_categories.items if item.name != empty_label_id] + label_ids = [item.name for item in dm_label_categories.items] + + id_to_name_mapping = {item.name: label_names[i] for i, item in enumerate(dm_label_categories.items)} + + for i, item in enumerate(dm_label_categories.items): + item.name = label_names[i] + item.parent = id_to_name_mapping.get(item.parent, item.parent) + + for label_group in dm_label_categories.label_groups: + label_group.labels = [id_to_name_mapping.get(label, label) for label in label_group.labels] + + obj = cls.from_dm_label_groups(dm_label_categories) + obj.label_ids = label_ids + return obj + def as_head_config_dict(self) -> dict[str, Any]: """Return a dictionary including params needed to configure the HLabel MMPretrained head network.""" return { @@ -326,7 +416,7 @@ def from_num_classes(cls, num_classes: int) -> LabelInfo: if num_classes == 1: # binary segmentation label_names = ["background", "label_0"] - return SegLabelInfo(label_names=label_names, label_groups=[label_names]) + return SegLabelInfo(label_names=label_names, label_groups=[label_names], label_ids=["0", "1"]) return super().from_num_classes(num_classes) @@ -336,7 +426,7 @@ class NullLabelInfo(LabelInfo): """Represent no label information. It is used for Visual Prompting tasks.""" def __init__(self) -> None: - super().__init__(label_names=[], label_groups=[[]]) + super().__init__(label_names=[], label_groups=[[]], label_ids=[]) @classmethod def from_json(cls, _: str) -> LabelInfo: @@ -349,7 +439,7 @@ class AnomalyLabelInfo(LabelInfo): """Represent no label information. It is used for Anomaly tasks.""" def __init__(self) -> None: - super().__init__(label_names=["Normal", "Anomaly"], label_groups=[["Normal", "Anomaly"]]) + super().__init__(label_names=["Normal", "Anomaly"], label_groups=[["Normal", "Anomaly"]], label_ids=["0", "1"]) # Dispatching rules: diff --git a/src/otx/recipe/_base_/train.yaml b/src/otx/recipe/_base_/train.yaml index 7dba87f8381..806d09e3e0a 100644 --- a/src/otx/recipe/_base_/train.yaml +++ b/src/otx/recipe/_base_/train.yaml @@ -40,6 +40,8 @@ callbacks: init_args: max_interval: 5 decay: -0.025 + min_earlystop_patience: 5 + min_lrschedule_patience: 3 logger: - class_path: lightning.pytorch.loggers.csv_logs.CSVLogger init_args: diff --git a/src/otx/recipe/anomaly_classification/stfpm.yaml b/src/otx/recipe/anomaly_classification/stfpm.yaml index ec1c6af8ddc..91cf676c201 100644 --- a/src/otx/recipe/anomaly_classification/stfpm.yaml +++ b/src/otx/recipe/anomaly_classification/stfpm.yaml @@ -16,7 +16,7 @@ overrides: precision: 32 max_epochs: 100 callbacks: - - class_path: lightning.pytorch.callbacks.EarlyStopping + - class_path: otx.algo.callbacks.adaptive_early_stopping.EarlyStoppingWithWarmup init_args: patience: 5 mode: max diff --git a/src/otx/recipe/anomaly_detection/stfpm.yaml b/src/otx/recipe/anomaly_detection/stfpm.yaml index b13534505a4..25bb7be88bb 100644 --- a/src/otx/recipe/anomaly_detection/stfpm.yaml +++ b/src/otx/recipe/anomaly_detection/stfpm.yaml @@ -21,7 +21,7 @@ overrides: precision: 32 max_epochs: 100 callbacks: - - class_path: lightning.pytorch.callbacks.EarlyStopping + - class_path: otx.algo.callbacks.adaptive_early_stopping.EarlyStoppingWithWarmup init_args: patience: 5 mode: max diff --git a/src/otx/recipe/anomaly_segmentation/stfpm.yaml b/src/otx/recipe/anomaly_segmentation/stfpm.yaml index 9a3d9c85d6e..604ff9ba029 100644 --- a/src/otx/recipe/anomaly_segmentation/stfpm.yaml +++ b/src/otx/recipe/anomaly_segmentation/stfpm.yaml @@ -16,7 +16,7 @@ overrides: precision: 32 max_epochs: 100 callbacks: - - class_path: lightning.pytorch.callbacks.EarlyStopping + - class_path: otx.algo.callbacks.adaptive_early_stopping.EarlyStoppingWithWarmup init_args: patience: 5 mode: max diff --git a/src/otx/recipe/classification/h_label_cls/deit_tiny.yaml b/src/otx/recipe/classification/h_label_cls/deit_tiny.yaml index b36f48e14c9..1191e0e22d3 100644 --- a/src/otx/recipe/classification/h_label_cls/deit_tiny.yaml +++ b/src/otx/recipe/classification/h_label_cls/deit_tiny.yaml @@ -10,12 +10,16 @@ model: weight_decay: 0.05 scheduler: - class_path: lightning.pytorch.cli.ReduceLROnPlateau + class_path: otx.core.schedulers.LinearWarmupSchedulerCallable init_args: - mode: max - factor: 0.5 - patience: 1 - monitor: val/accuracy + num_warmup_steps: 0 + main_scheduler_callable: + class_path: lightning.pytorch.cli.ReduceLROnPlateau + init_args: + mode: max + factor: 0.5 + patience: 3 + monitor: val/accuracy engine: task: H_LABEL_CLS @@ -26,11 +30,12 @@ callback_monitor: val/accuracy data: ../../_base_/data/classification.yaml overrides: max_epochs: 90 - callbacks: - - class_path: otx.algo.callbacks.adaptive_early_stopping.EarlyStoppingWithWarmup - init_args: - patience: 3 data: task: H_LABEL_CLS data_format: datumaro + + callbacks: + - class_path: otx.algo.callbacks.adaptive_early_stopping.EarlyStoppingWithWarmup + init_args: + patience: 5 diff --git a/src/otx/recipe/classification/h_label_cls/efficientnet_b0.yaml b/src/otx/recipe/classification/h_label_cls/efficientnet_b0.yaml index 4bfbe3fc121..2bb282e4378 100644 --- a/src/otx/recipe/classification/h_label_cls/efficientnet_b0.yaml +++ b/src/otx/recipe/classification/h_label_cls/efficientnet_b0.yaml @@ -11,12 +11,16 @@ model: weight_decay: 0.0001 scheduler: - class_path: lightning.pytorch.cli.ReduceLROnPlateau + class_path: otx.core.schedulers.LinearWarmupSchedulerCallable init_args: - mode: max - factor: 0.5 - patience: 1 - monitor: val/accuracy + num_warmup_steps: 0 + main_scheduler_callable: + class_path: lightning.pytorch.cli.ReduceLROnPlateau + init_args: + mode: max + factor: 0.5 + patience: 3 + monitor: val/accuracy engine: task: H_LABEL_CLS @@ -29,11 +33,12 @@ overrides: reset: - data.train_subset.transforms - max_epochs: 90 callbacks: - class_path: otx.algo.callbacks.adaptive_early_stopping.EarlyStoppingWithWarmup init_args: - patience: 3 + patience: 5 + + max_epochs: 90 data: task: H_LABEL_CLS diff --git a/src/otx/recipe/classification/h_label_cls/efficientnet_v2.yaml b/src/otx/recipe/classification/h_label_cls/efficientnet_v2.yaml index 500cc168baa..fbc2d11ce21 100644 --- a/src/otx/recipe/classification/h_label_cls/efficientnet_v2.yaml +++ b/src/otx/recipe/classification/h_label_cls/efficientnet_v2.yaml @@ -10,6 +10,18 @@ model: momentum: 0.9 weight_decay: 0.0001 + scheduler: + class_path: otx.core.schedulers.LinearWarmupSchedulerCallable + init_args: + num_warmup_steps: 0 + main_scheduler_callable: + class_path: lightning.pytorch.cli.ReduceLROnPlateau + init_args: + mode: max + factor: 0.5 + patience: 3 + monitor: val/accuracy + engine: task: H_LABEL_CLS device: auto @@ -25,8 +37,11 @@ overrides: callbacks: - class_path: otx.algo.callbacks.adaptive_early_stopping.EarlyStoppingWithWarmup init_args: - patience: 3 warmup_iters: 750 + - class_path: otx.algo.callbacks.adaptive_early_stopping.EarlyStoppingWithWarmup + init_args: + patience: 5 + data: task: H_LABEL_CLS data_format: datumaro diff --git a/src/otx/recipe/classification/h_label_cls/mobilenet_v3_large.yaml b/src/otx/recipe/classification/h_label_cls/mobilenet_v3_large.yaml index 211bc8fa883..c94b7dd16b6 100644 --- a/src/otx/recipe/classification/h_label_cls/mobilenet_v3_large.yaml +++ b/src/otx/recipe/classification/h_label_cls/mobilenet_v3_large.yaml @@ -19,7 +19,7 @@ model: init_args: mode: max factor: 0.5 - patience: 1 + patience: 3 monitor: val/accuracy engine: @@ -31,10 +31,11 @@ callback_monitor: val/accuracy data: ../../_base_/data/classification.yaml overrides: max_epochs: 90 + callbacks: - class_path: otx.algo.callbacks.adaptive_early_stopping.EarlyStoppingWithWarmup init_args: - patience: 3 + patience: 5 data: task: H_LABEL_CLS diff --git a/src/otx/recipe/classification/h_label_cls/tv_efficientnet_b3.yaml b/src/otx/recipe/classification/h_label_cls/tv_efficientnet_b3.yaml index 2078c98b43b..d36cdfff5b7 100644 --- a/src/otx/recipe/classification/h_label_cls/tv_efficientnet_b3.yaml +++ b/src/otx/recipe/classification/h_label_cls/tv_efficientnet_b3.yaml @@ -12,10 +12,14 @@ model: weight_decay: 0.0001 scheduler: - class_path: torch.optim.lr_scheduler.CosineAnnealingLR + class_path: otx.core.schedulers.LinearWarmupSchedulerCallable init_args: - T_max: 100000 - eta_min: 0 + num_warmup_steps: 0 + main_scheduler_callable: + class_path: torch.optim.lr_scheduler.CosineAnnealingLR + init_args: + T_max: 100000 + eta_min: 0 engine: task: H_LABEL_CLS @@ -29,10 +33,11 @@ overrides: - data.train_subset.transforms max_epochs: 90 + callbacks: - class_path: otx.algo.callbacks.adaptive_early_stopping.EarlyStoppingWithWarmup init_args: - patience: 3 + patience: 5 data: task: H_LABEL_CLS diff --git a/src/otx/recipe/classification/h_label_cls/tv_efficientnet_v2_l.yaml b/src/otx/recipe/classification/h_label_cls/tv_efficientnet_v2_l.yaml index 0f2d7b60a6a..9bec7e924e6 100644 --- a/src/otx/recipe/classification/h_label_cls/tv_efficientnet_v2_l.yaml +++ b/src/otx/recipe/classification/h_label_cls/tv_efficientnet_v2_l.yaml @@ -12,10 +12,14 @@ model: weight_decay: 0.0001 scheduler: - class_path: torch.optim.lr_scheduler.CosineAnnealingLR + class_path: otx.core.schedulers.LinearWarmupSchedulerCallable init_args: - T_max: 100000 - eta_min: 0 + num_warmup_steps: 0 + main_scheduler_callable: + class_path: torch.optim.lr_scheduler.CosineAnnealingLR + init_args: + T_max: 100000 + eta_min: 0 engine: task: H_LABEL_CLS @@ -29,10 +33,11 @@ overrides: - data.train_subset.transforms max_epochs: 90 + callbacks: - class_path: otx.algo.callbacks.adaptive_early_stopping.EarlyStoppingWithWarmup init_args: - patience: 3 + patience: 5 data: task: H_LABEL_CLS diff --git a/src/otx/recipe/classification/h_label_cls/tv_mobilenet_v3_small.yaml b/src/otx/recipe/classification/h_label_cls/tv_mobilenet_v3_small.yaml index faab071ff5d..d00a5109f7d 100644 --- a/src/otx/recipe/classification/h_label_cls/tv_mobilenet_v3_small.yaml +++ b/src/otx/recipe/classification/h_label_cls/tv_mobilenet_v3_small.yaml @@ -12,10 +12,14 @@ model: weight_decay: 0.0001 scheduler: - class_path: torch.optim.lr_scheduler.CosineAnnealingLR + class_path: otx.core.schedulers.LinearWarmupSchedulerCallable init_args: - T_max: 100000 - eta_min: 0 + num_warmup_steps: 0 + main_scheduler_callable: + class_path: torch.optim.lr_scheduler.CosineAnnealingLR + init_args: + T_max: 100000 + eta_min: 0 engine: task: H_LABEL_CLS @@ -29,10 +33,11 @@ overrides: - data.train_subset.transforms max_epochs: 90 + callbacks: - class_path: otx.algo.callbacks.adaptive_early_stopping.EarlyStoppingWithWarmup init_args: - patience: 3 + patience: 5 data: task: H_LABEL_CLS diff --git a/src/otx/recipe/classification/multi_class_cls/deit_tiny.yaml b/src/otx/recipe/classification/multi_class_cls/deit_tiny.yaml index f5446d3cca6..cdc06e19f52 100644 --- a/src/otx/recipe/classification/multi_class_cls/deit_tiny.yaml +++ b/src/otx/recipe/classification/multi_class_cls/deit_tiny.yaml @@ -12,12 +12,16 @@ model: weight_decay: 0.05 scheduler: - class_path: lightning.pytorch.cli.ReduceLROnPlateau + class_path: otx.core.schedulers.LinearWarmupSchedulerCallable init_args: - mode: max - factor: 0.5 - patience: 1 - monitor: val/accuracy + num_warmup_steps: 0 + main_scheduler_callable: + class_path: lightning.pytorch.cli.ReduceLROnPlateau + init_args: + mode: max + factor: 0.5 + patience: 3 + monitor: val/accuracy engine: task: MULTI_CLASS_CLS @@ -28,7 +32,8 @@ callback_monitor: val/accuracy data: ../../_base_/data/classification.yaml overrides: max_epochs: 90 + callbacks: - class_path: otx.algo.callbacks.adaptive_early_stopping.EarlyStoppingWithWarmup init_args: - patience: 3 + patience: 5 diff --git a/src/otx/recipe/classification/multi_class_cls/dino_v2.yaml b/src/otx/recipe/classification/multi_class_cls/dino_v2.yaml index 300091fab8c..a11967f1068 100644 --- a/src/otx/recipe/classification/multi_class_cls/dino_v2.yaml +++ b/src/otx/recipe/classification/multi_class_cls/dino_v2.yaml @@ -11,12 +11,16 @@ model: weight_decay: 0.05 scheduler: - class_path: lightning.pytorch.cli.ReduceLROnPlateau + class_path: otx.core.schedulers.LinearWarmupSchedulerCallable init_args: - mode: max - factor: 0.5 - patience: 1 - monitor: val/accuracy + num_warmup_steps: 0 + main_scheduler_callable: + class_path: lightning.pytorch.cli.ReduceLROnPlateau + init_args: + mode: max + factor: 0.5 + patience: 3 + monitor: val/accuracy engine: task: MULTI_CLASS_CLS @@ -27,7 +31,8 @@ callback_monitor: val/accuracy data: ../../_base_/data/classification.yaml overrides: max_epochs: 90 + callbacks: - class_path: otx.algo.callbacks.adaptive_early_stopping.EarlyStoppingWithWarmup init_args: - patience: 3 + patience: 5 diff --git a/src/otx/recipe/classification/multi_class_cls/efficientnet_b0.yaml b/src/otx/recipe/classification/multi_class_cls/efficientnet_b0.yaml index 428fb89055b..fe6244853d6 100644 --- a/src/otx/recipe/classification/multi_class_cls/efficientnet_b0.yaml +++ b/src/otx/recipe/classification/multi_class_cls/efficientnet_b0.yaml @@ -12,12 +12,16 @@ model: weight_decay: 0.0001 scheduler: - class_path: lightning.pytorch.cli.ReduceLROnPlateau + class_path: otx.core.schedulers.LinearWarmupSchedulerCallable init_args: - mode: max - factor: 0.5 - patience: 1 - monitor: val/accuracy + num_warmup_steps: 0 + main_scheduler_callable: + class_path: lightning.pytorch.cli.ReduceLROnPlateau + init_args: + mode: max + factor: 0.5 + patience: 3 + monitor: val/accuracy engine: task: MULTI_CLASS_CLS @@ -30,11 +34,12 @@ overrides: reset: - data.train_subset.transforms - max_epochs: 90 callbacks: - class_path: otx.algo.callbacks.adaptive_early_stopping.EarlyStoppingWithWarmup init_args: - patience: 3 + patience: 5 + + max_epochs: 90 data: train_subset: diff --git a/src/otx/recipe/classification/multi_class_cls/efficientnet_v2.yaml b/src/otx/recipe/classification/multi_class_cls/efficientnet_v2.yaml index 2454c0e7094..7c71a8af171 100644 --- a/src/otx/recipe/classification/multi_class_cls/efficientnet_v2.yaml +++ b/src/otx/recipe/classification/multi_class_cls/efficientnet_v2.yaml @@ -12,12 +12,16 @@ model: weight_decay: 0.0001 scheduler: - class_path: lightning.pytorch.cli.ReduceLROnPlateau + class_path: otx.core.schedulers.LinearWarmupSchedulerCallable init_args: - mode: max - factor: 0.5 - patience: 1 - monitor: val/accuracy + num_warmup_steps: 0 + main_scheduler_callable: + class_path: lightning.pytorch.cli.ReduceLROnPlateau + init_args: + mode: max + factor: 0.5 + patience: 3 + monitor: val/accuracy engine: task: MULTI_CLASS_CLS @@ -30,11 +34,12 @@ overrides: reset: - data.train_subset.transforms - max_epochs: 90 callbacks: - class_path: otx.algo.callbacks.adaptive_early_stopping.EarlyStoppingWithWarmup init_args: - patience: 3 + patience: 5 + + max_epochs: 90 data: train_subset: diff --git a/src/otx/recipe/classification/multi_class_cls/mobilenet_v3_large.yaml b/src/otx/recipe/classification/multi_class_cls/mobilenet_v3_large.yaml index c4c6946fd6e..fdef97ef9c8 100644 --- a/src/otx/recipe/classification/multi_class_cls/mobilenet_v3_large.yaml +++ b/src/otx/recipe/classification/multi_class_cls/mobilenet_v3_large.yaml @@ -20,7 +20,7 @@ model: init_args: mode: max factor: 0.5 - patience: 1 + patience: 3 monitor: val/accuracy engine: @@ -32,7 +32,8 @@ callback_monitor: val/accuracy data: ../../_base_/data/classification.yaml overrides: max_epochs: 90 + callbacks: - class_path: otx.algo.callbacks.adaptive_early_stopping.EarlyStoppingWithWarmup init_args: - patience: 3 + patience: 5 diff --git a/src/otx/recipe/classification/multi_class_cls/tv_efficientnet_b3.yaml b/src/otx/recipe/classification/multi_class_cls/tv_efficientnet_b3.yaml index f06b3b36e32..ccd26a6535e 100644 --- a/src/otx/recipe/classification/multi_class_cls/tv_efficientnet_b3.yaml +++ b/src/otx/recipe/classification/multi_class_cls/tv_efficientnet_b3.yaml @@ -12,10 +12,14 @@ model: weight_decay: 0.0001 scheduler: - class_path: torch.optim.lr_scheduler.CosineAnnealingLR + class_path: otx.core.schedulers.LinearWarmupSchedulerCallable init_args: - T_max: 100000 - eta_min: 0 + num_warmup_steps: 0 + main_scheduler_callable: + class_path: torch.optim.lr_scheduler.CosineAnnealingLR + init_args: + T_max: 100000 + eta_min: 0 engine: task: MULTI_CLASS_CLS @@ -26,7 +30,8 @@ callback_monitor: val/accuracy data: ../../_base_/data/classification.yaml overrides: max_epochs: 90 + callbacks: - class_path: otx.algo.callbacks.adaptive_early_stopping.EarlyStoppingWithWarmup init_args: - patience: 3 + patience: 5 diff --git a/src/otx/recipe/classification/multi_class_cls/tv_efficientnet_v2_l.yaml b/src/otx/recipe/classification/multi_class_cls/tv_efficientnet_v2_l.yaml index c72714e9433..06d702e8576 100644 --- a/src/otx/recipe/classification/multi_class_cls/tv_efficientnet_v2_l.yaml +++ b/src/otx/recipe/classification/multi_class_cls/tv_efficientnet_v2_l.yaml @@ -12,10 +12,14 @@ model: weight_decay: 0.0001 scheduler: - class_path: torch.optim.lr_scheduler.CosineAnnealingLR + class_path: otx.core.schedulers.LinearWarmupSchedulerCallable init_args: - T_max: 100000 - eta_min: 0 + num_warmup_steps: 0 + main_scheduler_callable: + class_path: torch.optim.lr_scheduler.CosineAnnealingLR + init_args: + T_max: 100000 + eta_min: 0 engine: task: MULTI_CLASS_CLS @@ -26,7 +30,8 @@ callback_monitor: val/accuracy data: ../../_base_/data/classification.yaml overrides: max_epochs: 90 + callbacks: - class_path: otx.algo.callbacks.adaptive_early_stopping.EarlyStoppingWithWarmup init_args: - patience: 3 + patience: 5 diff --git a/src/otx/recipe/classification/multi_class_cls/tv_mobilenet_v3_small.yaml b/src/otx/recipe/classification/multi_class_cls/tv_mobilenet_v3_small.yaml index 4c6975c241a..9d626812765 100644 --- a/src/otx/recipe/classification/multi_class_cls/tv_mobilenet_v3_small.yaml +++ b/src/otx/recipe/classification/multi_class_cls/tv_mobilenet_v3_small.yaml @@ -12,10 +12,14 @@ model: weight_decay: 0.0001 scheduler: - class_path: torch.optim.lr_scheduler.CosineAnnealingLR + class_path: otx.core.schedulers.LinearWarmupSchedulerCallable init_args: - T_max: 100000 - eta_min: 0 + num_warmup_steps: 0 + main_scheduler_callable: + class_path: torch.optim.lr_scheduler.CosineAnnealingLR + init_args: + T_max: 100000 + eta_min: 0 engine: task: MULTI_CLASS_CLS @@ -26,7 +30,8 @@ callback_monitor: val/accuracy data: ../../_base_/data/classification.yaml overrides: max_epochs: 90 + callbacks: - class_path: otx.algo.callbacks.adaptive_early_stopping.EarlyStoppingWithWarmup init_args: - patience: 3 + patience: 5 diff --git a/src/otx/recipe/classification/multi_label_cls/deit_tiny.yaml b/src/otx/recipe/classification/multi_label_cls/deit_tiny.yaml index afb14dd046f..623bc178f4b 100644 --- a/src/otx/recipe/classification/multi_label_cls/deit_tiny.yaml +++ b/src/otx/recipe/classification/multi_label_cls/deit_tiny.yaml @@ -11,12 +11,16 @@ model: weight_decay: 0.05 scheduler: - class_path: lightning.pytorch.cli.ReduceLROnPlateau + class_path: otx.core.schedulers.LinearWarmupSchedulerCallable init_args: - mode: max - factor: 0.5 - patience: 1 - monitor: val/accuracy + num_warmup_steps: 0 + main_scheduler_callable: + class_path: lightning.pytorch.cli.ReduceLROnPlateau + init_args: + mode: max + factor: 0.5 + patience: 3 + monitor: val/accuracy engine: task: MULTI_LABEL_CLS @@ -27,14 +31,11 @@ callback_monitor: val/accuracy data: ../../_base_/data/classification.yaml overrides: max_epochs: 200 + callbacks: - class_path: otx.algo.callbacks.adaptive_early_stopping.EarlyStoppingWithWarmup init_args: - patience: 4 - - class_path: otx.algo.callbacks.adaptive_train_scheduling.AdaptiveTrainScheduling - init_args: - min_earlystop_patience: 4 - min_lrschedule_patience: 3 + patience: 5 data: task: MULTI_LABEL_CLS diff --git a/src/otx/recipe/classification/multi_label_cls/efficientnet_b0.yaml b/src/otx/recipe/classification/multi_label_cls/efficientnet_b0.yaml index f3625158439..1859e0aa5fa 100644 --- a/src/otx/recipe/classification/multi_label_cls/efficientnet_b0.yaml +++ b/src/otx/recipe/classification/multi_label_cls/efficientnet_b0.yaml @@ -12,12 +12,16 @@ model: weight_decay: 0.0005 scheduler: - class_path: lightning.pytorch.cli.ReduceLROnPlateau + class_path: otx.core.schedulers.LinearWarmupSchedulerCallable init_args: - mode: max - factor: 0.5 - patience: 1 - monitor: val/accuracy + num_warmup_steps: 0 + main_scheduler_callable: + class_path: lightning.pytorch.cli.ReduceLROnPlateau + init_args: + mode: max + factor: 0.5 + patience: 3 + monitor: val/accuracy engine: task: MULTI_LABEL_CLS @@ -30,11 +34,12 @@ overrides: reset: - data.train_subset.transforms - max_epochs: 200 callbacks: - class_path: otx.algo.callbacks.adaptive_early_stopping.EarlyStoppingWithWarmup init_args: - patience: 3 + patience: 5 + + max_epochs: 200 data: task: MULTI_LABEL_CLS diff --git a/src/otx/recipe/classification/multi_label_cls/efficientnet_v2.yaml b/src/otx/recipe/classification/multi_label_cls/efficientnet_v2.yaml index a304d76542b..ba43011950c 100644 --- a/src/otx/recipe/classification/multi_label_cls/efficientnet_v2.yaml +++ b/src/otx/recipe/classification/multi_label_cls/efficientnet_v2.yaml @@ -12,12 +12,16 @@ model: weight_decay: 0.0005 scheduler: - class_path: lightning.pytorch.cli.ReduceLROnPlateau + class_path: otx.core.schedulers.LinearWarmupSchedulerCallable init_args: - mode: max - factor: 0.5 - patience: 1 - monitor: val/accuracy + num_warmup_steps: 0 + main_scheduler_callable: + class_path: lightning.pytorch.cli.ReduceLROnPlateau + init_args: + mode: max + factor: 0.5 + patience: 3 + monitor: val/accuracy engine: task: MULTI_LABEL_CLS @@ -30,15 +34,12 @@ overrides: reset: - data.train_subset.transforms - max_epochs: 200 callbacks: - class_path: otx.algo.callbacks.adaptive_early_stopping.EarlyStoppingWithWarmup init_args: - patience: 4 - - class_path: otx.algo.callbacks.adaptive_train_scheduling.AdaptiveTrainScheduling - init_args: - min_earlystop_patience: 4 - min_lrschedule_patience: 3 + patience: 5 + + max_epochs: 200 data: task: MULTI_LABEL_CLS diff --git a/src/otx/recipe/classification/multi_label_cls/mobilenet_v3_large.yaml b/src/otx/recipe/classification/multi_label_cls/mobilenet_v3_large.yaml index 02021708453..f9322f22f07 100644 --- a/src/otx/recipe/classification/multi_label_cls/mobilenet_v3_large.yaml +++ b/src/otx/recipe/classification/multi_label_cls/mobilenet_v3_large.yaml @@ -20,7 +20,7 @@ model: init_args: mode: max factor: 0.5 - patience: 1 + patience: 3 monitor: val/accuracy engine: @@ -31,11 +31,11 @@ callback_monitor: val/accuracy data: ../../_base_/data/classification.yaml overrides: - callbacks: - - class_path: otx.algo.callbacks.adaptive_early_stopping.EarlyStoppingWithWarmup - init_args: - patience: 3 - data: task: MULTI_LABEL_CLS data_format: datumaro + + callbacks: + - class_path: otx.algo.callbacks.adaptive_early_stopping.EarlyStoppingWithWarmup + init_args: + patience: 5 diff --git a/src/otx/recipe/classification/multi_label_cls/tv_efficientnet_b3.yaml b/src/otx/recipe/classification/multi_label_cls/tv_efficientnet_b3.yaml index 9579f8e5e57..ebc03324933 100644 --- a/src/otx/recipe/classification/multi_label_cls/tv_efficientnet_b3.yaml +++ b/src/otx/recipe/classification/multi_label_cls/tv_efficientnet_b3.yaml @@ -12,10 +12,14 @@ model: weight_decay: 0.0001 scheduler: - class_path: torch.optim.lr_scheduler.CosineAnnealingLR + class_path: otx.core.schedulers.LinearWarmupSchedulerCallable init_args: - T_max: 100000 - eta_min: 0 + num_warmup_steps: 0 + main_scheduler_callable: + class_path: torch.optim.lr_scheduler.CosineAnnealingLR + init_args: + T_max: 100000 + eta_min: 0 engine: task: MULTI_LABEL_CLS @@ -31,7 +35,7 @@ overrides: callbacks: - class_path: otx.algo.callbacks.adaptive_early_stopping.EarlyStoppingWithWarmup init_args: - patience: 3 + patience: 5 data: task: MULTI_LABEL_CLS diff --git a/src/otx/recipe/classification/multi_label_cls/tv_efficientnet_v2_l.yaml b/src/otx/recipe/classification/multi_label_cls/tv_efficientnet_v2_l.yaml index 3003b26eb48..a1992d2b398 100644 --- a/src/otx/recipe/classification/multi_label_cls/tv_efficientnet_v2_l.yaml +++ b/src/otx/recipe/classification/multi_label_cls/tv_efficientnet_v2_l.yaml @@ -12,10 +12,14 @@ model: weight_decay: 0.0001 scheduler: - class_path: torch.optim.lr_scheduler.CosineAnnealingLR + class_path: otx.core.schedulers.LinearWarmupSchedulerCallable init_args: - T_max: 100000 - eta_min: 0 + num_warmup_steps: 0 + main_scheduler_callable: + class_path: torch.optim.lr_scheduler.CosineAnnealingLR + init_args: + T_max: 100000 + eta_min: 0 engine: task: MULTI_LABEL_CLS @@ -31,11 +35,7 @@ overrides: callbacks: - class_path: otx.algo.callbacks.adaptive_early_stopping.EarlyStoppingWithWarmup init_args: - patience: 4 - - class_path: otx.algo.callbacks.adaptive_train_scheduling.AdaptiveTrainScheduling - init_args: - min_earlystop_patience: 4 - min_lrschedule_patience: 3 + patience: 5 data: task: MULTI_LABEL_CLS diff --git a/src/otx/recipe/classification/multi_label_cls/tv_mobilenet_v3_small.yaml b/src/otx/recipe/classification/multi_label_cls/tv_mobilenet_v3_small.yaml index 492e835ef62..99ef63b59e1 100644 --- a/src/otx/recipe/classification/multi_label_cls/tv_mobilenet_v3_small.yaml +++ b/src/otx/recipe/classification/multi_label_cls/tv_mobilenet_v3_small.yaml @@ -12,10 +12,14 @@ model: weight_decay: 0.0001 scheduler: - class_path: torch.optim.lr_scheduler.CosineAnnealingLR + class_path: otx.core.schedulers.LinearWarmupSchedulerCallable init_args: - T_max: 100000 - eta_min: 0 + num_warmup_steps: 0 + main_scheduler_callable: + class_path: torch.optim.lr_scheduler.CosineAnnealingLR + init_args: + T_max: 100000 + eta_min: 0 engine: task: MULTI_LABEL_CLS @@ -31,7 +35,7 @@ overrides: callbacks: - class_path: otx.algo.callbacks.adaptive_early_stopping.EarlyStoppingWithWarmup init_args: - patience: 3 + patience: 5 data: task: MULTI_LABEL_CLS diff --git a/src/otx/recipe/detection/atss_mobilenetv2.yaml b/src/otx/recipe/detection/atss_mobilenetv2.yaml index adabd373f1e..ee8925cfce6 100644 --- a/src/otx/recipe/detection/atss_mobilenetv2.yaml +++ b/src/otx/recipe/detection/atss_mobilenetv2.yaml @@ -14,7 +14,7 @@ model: scheduler: class_path: otx.core.schedulers.LinearWarmupSchedulerCallable init_args: - num_warmup_steps: 3 + num_warmup_steps: 0 main_scheduler_callable: class_path: lightning.pytorch.cli.ReduceLROnPlateau init_args: diff --git a/src/otx/recipe/detection/atss_mobilenetv2_tile.yaml b/src/otx/recipe/detection/atss_mobilenetv2_tile.yaml index 981aae94fdb..6305ed7345e 100644 --- a/src/otx/recipe/detection/atss_mobilenetv2_tile.yaml +++ b/src/otx/recipe/detection/atss_mobilenetv2_tile.yaml @@ -14,7 +14,7 @@ model: scheduler: class_path: otx.core.schedulers.LinearWarmupSchedulerCallable init_args: - num_warmup_steps: 3 + num_warmup_steps: 0 main_scheduler_callable: class_path: lightning.pytorch.cli.ReduceLROnPlateau init_args: diff --git a/src/otx/recipe/detection/atss_resnext101.yaml b/src/otx/recipe/detection/atss_resnext101.yaml index 0a6bb28bbde..290c47ab5cf 100644 --- a/src/otx/recipe/detection/atss_resnext101.yaml +++ b/src/otx/recipe/detection/atss_resnext101.yaml @@ -14,7 +14,7 @@ model: scheduler: class_path: otx.core.schedulers.LinearWarmupSchedulerCallable init_args: - num_warmup_steps: 3 + num_warmup_steps: 0 main_scheduler_callable: class_path: lightning.pytorch.cli.ReduceLROnPlateau init_args: diff --git a/src/otx/recipe/detection/atss_resnext101_tile.yaml b/src/otx/recipe/detection/atss_resnext101_tile.yaml index 8f78195f637..7a99f76a07e 100644 --- a/src/otx/recipe/detection/atss_resnext101_tile.yaml +++ b/src/otx/recipe/detection/atss_resnext101_tile.yaml @@ -14,7 +14,7 @@ model: scheduler: class_path: otx.core.schedulers.LinearWarmupSchedulerCallable init_args: - num_warmup_steps: 3 + num_warmup_steps: 0 main_scheduler_callable: class_path: lightning.pytorch.cli.ReduceLROnPlateau init_args: diff --git a/src/otx/recipe/detection/rtdetr_101_tile.yaml b/src/otx/recipe/detection/rtdetr_101_tile.yaml index 918a173dedf..1d2bbfdeb3d 100644 --- a/src/otx/recipe/detection/rtdetr_101_tile.yaml +++ b/src/otx/recipe/detection/rtdetr_101_tile.yaml @@ -14,7 +14,7 @@ model: scheduler: class_path: otx.core.schedulers.LinearWarmupSchedulerCallable init_args: - num_warmup_steps: 5 + num_warmup_steps: 100 main_scheduler_callable: class_path: lightning.pytorch.cli.ReduceLROnPlateau init_args: @@ -35,15 +35,9 @@ overrides: - class_path: otx.algo.callbacks.adaptive_train_scheduling.AdaptiveTrainScheduling init_args: max_interval: 1 - decay: -0.025 min_lrschedule_patience: 3 - class_path: otx.algo.callbacks.adaptive_early_stopping.EarlyStoppingWithWarmup init_args: - monitor: null - mode: max - patience: 10 - check_on_train_epoch_end: false - min_delta: 0.001 warmup_iters: 100 warmup_epochs: 7 diff --git a/src/otx/recipe/detection/rtdetr_18_tile.yaml b/src/otx/recipe/detection/rtdetr_18_tile.yaml index d79091eb56c..0e58a15d516 100644 --- a/src/otx/recipe/detection/rtdetr_18_tile.yaml +++ b/src/otx/recipe/detection/rtdetr_18_tile.yaml @@ -14,7 +14,7 @@ model: scheduler: class_path: otx.core.schedulers.LinearWarmupSchedulerCallable init_args: - num_warmup_steps: 5 + num_warmup_steps: 0 main_scheduler_callable: class_path: lightning.pytorch.cli.ReduceLROnPlateau init_args: @@ -34,15 +34,9 @@ overrides: - class_path: otx.algo.callbacks.adaptive_train_scheduling.AdaptiveTrainScheduling init_args: max_interval: 1 - decay: -0.025 min_lrschedule_patience: 3 - class_path: otx.algo.callbacks.adaptive_early_stopping.EarlyStoppingWithWarmup init_args: - monitor: null - mode: max - patience: 10 - check_on_train_epoch_end: false - min_delta: 0.001 warmup_iters: 100 warmup_epochs: 7 diff --git a/src/otx/recipe/detection/rtdetr_50_tile.yaml b/src/otx/recipe/detection/rtdetr_50_tile.yaml index 4c0bfdb1e64..f5042a46741 100644 --- a/src/otx/recipe/detection/rtdetr_50_tile.yaml +++ b/src/otx/recipe/detection/rtdetr_50_tile.yaml @@ -14,7 +14,7 @@ model: scheduler: class_path: otx.core.schedulers.LinearWarmupSchedulerCallable init_args: - num_warmup_steps: 5 + num_warmup_steps: 100 main_scheduler_callable: class_path: lightning.pytorch.cli.ReduceLROnPlateau init_args: @@ -35,15 +35,9 @@ overrides: - class_path: otx.algo.callbacks.adaptive_train_scheduling.AdaptiveTrainScheduling init_args: max_interval: 1 - decay: -0.025 min_lrschedule_patience: 3 - class_path: otx.algo.callbacks.adaptive_early_stopping.EarlyStoppingWithWarmup init_args: - monitor: null - mode: max - patience: 10 - check_on_train_epoch_end: false - min_delta: 0.001 warmup_iters: 100 warmup_epochs: 7 diff --git a/src/otx/recipe/detection/rtmdet_tiny.yaml b/src/otx/recipe/detection/rtmdet_tiny.yaml index 69b6eef9978..73eec5d3016 100644 --- a/src/otx/recipe/detection/rtmdet_tiny.yaml +++ b/src/otx/recipe/detection/rtmdet_tiny.yaml @@ -13,7 +13,7 @@ model: scheduler: class_path: otx.core.schedulers.LinearWarmupSchedulerCallable init_args: - num_warmup_steps: 3 + num_warmup_steps: 0 main_scheduler_callable: class_path: lightning.pytorch.cli.ReduceLROnPlateau init_args: diff --git a/src/otx/recipe/detection/rtmdet_tiny_tile.yaml b/src/otx/recipe/detection/rtmdet_tiny_tile.yaml index 716f5151e8f..ed7f1100a15 100644 --- a/src/otx/recipe/detection/rtmdet_tiny_tile.yaml +++ b/src/otx/recipe/detection/rtmdet_tiny_tile.yaml @@ -13,7 +13,7 @@ model: scheduler: class_path: otx.core.schedulers.LinearWarmupSchedulerCallable init_args: - num_warmup_steps: 3 + num_warmup_steps: 0 main_scheduler_callable: class_path: lightning.pytorch.cli.ReduceLROnPlateau init_args: diff --git a/src/otx/recipe/detection/ssd_mobilenetv2.yaml b/src/otx/recipe/detection/ssd_mobilenetv2.yaml index 5b3ace2b81c..35cc1135d40 100644 --- a/src/otx/recipe/detection/ssd_mobilenetv2.yaml +++ b/src/otx/recipe/detection/ssd_mobilenetv2.yaml @@ -14,7 +14,7 @@ model: scheduler: class_path: otx.core.schedulers.LinearWarmupSchedulerCallable init_args: - num_warmup_steps: 3 + num_warmup_steps: 0 main_scheduler_callable: class_path: lightning.pytorch.cli.ReduceLROnPlateau init_args: diff --git a/src/otx/recipe/detection/ssd_mobilenetv2_tile.yaml b/src/otx/recipe/detection/ssd_mobilenetv2_tile.yaml index 620b55107b2..45d57954753 100644 --- a/src/otx/recipe/detection/ssd_mobilenetv2_tile.yaml +++ b/src/otx/recipe/detection/ssd_mobilenetv2_tile.yaml @@ -14,7 +14,7 @@ model: scheduler: class_path: otx.core.schedulers.LinearWarmupSchedulerCallable init_args: - num_warmup_steps: 3 + num_warmup_steps: 0 main_scheduler_callable: class_path: lightning.pytorch.cli.ReduceLROnPlateau init_args: diff --git a/src/otx/recipe/detection/yolox_l.yaml b/src/otx/recipe/detection/yolox_l.yaml index ed0ecfeec41..d457e2b9207 100644 --- a/src/otx/recipe/detection/yolox_l.yaml +++ b/src/otx/recipe/detection/yolox_l.yaml @@ -14,7 +14,7 @@ model: scheduler: class_path: otx.core.schedulers.LinearWarmupSchedulerCallable init_args: - num_warmup_steps: 3 + num_warmup_steps: 0 main_scheduler_callable: class_path: lightning.pytorch.cli.ReduceLROnPlateau init_args: diff --git a/src/otx/recipe/detection/yolox_l_tile.yaml b/src/otx/recipe/detection/yolox_l_tile.yaml index d9740706813..c5d4bf7210e 100644 --- a/src/otx/recipe/detection/yolox_l_tile.yaml +++ b/src/otx/recipe/detection/yolox_l_tile.yaml @@ -14,7 +14,7 @@ model: scheduler: class_path: otx.core.schedulers.LinearWarmupSchedulerCallable init_args: - num_warmup_steps: 3 + num_warmup_steps: 0 main_scheduler_callable: class_path: lightning.pytorch.cli.ReduceLROnPlateau init_args: diff --git a/src/otx/recipe/detection/yolox_s.yaml b/src/otx/recipe/detection/yolox_s.yaml index 460b8c5b1bd..e547174e4e0 100644 --- a/src/otx/recipe/detection/yolox_s.yaml +++ b/src/otx/recipe/detection/yolox_s.yaml @@ -14,7 +14,7 @@ model: scheduler: class_path: otx.core.schedulers.LinearWarmupSchedulerCallable init_args: - num_warmup_steps: 3 + num_warmup_steps: 0 main_scheduler_callable: class_path: lightning.pytorch.cli.ReduceLROnPlateau init_args: diff --git a/src/otx/recipe/detection/yolox_s_tile.yaml b/src/otx/recipe/detection/yolox_s_tile.yaml index 967639269b2..e3138139e4b 100644 --- a/src/otx/recipe/detection/yolox_s_tile.yaml +++ b/src/otx/recipe/detection/yolox_s_tile.yaml @@ -14,7 +14,7 @@ model: scheduler: class_path: otx.core.schedulers.LinearWarmupSchedulerCallable init_args: - num_warmup_steps: 3 + num_warmup_steps: 0 main_scheduler_callable: class_path: lightning.pytorch.cli.ReduceLROnPlateau init_args: diff --git a/src/otx/recipe/detection/yolox_tiny.yaml b/src/otx/recipe/detection/yolox_tiny.yaml index 86395c02f0d..fa954fb257c 100644 --- a/src/otx/recipe/detection/yolox_tiny.yaml +++ b/src/otx/recipe/detection/yolox_tiny.yaml @@ -14,7 +14,7 @@ model: scheduler: class_path: otx.core.schedulers.LinearWarmupSchedulerCallable init_args: - num_warmup_steps: 3 + num_warmup_steps: 0 main_scheduler_callable: class_path: lightning.pytorch.cli.ReduceLROnPlateau init_args: diff --git a/src/otx/recipe/detection/yolox_tiny_tile.yaml b/src/otx/recipe/detection/yolox_tiny_tile.yaml index 6b1c012e642..5bea45474f1 100644 --- a/src/otx/recipe/detection/yolox_tiny_tile.yaml +++ b/src/otx/recipe/detection/yolox_tiny_tile.yaml @@ -14,7 +14,7 @@ model: scheduler: class_path: otx.core.schedulers.LinearWarmupSchedulerCallable init_args: - num_warmup_steps: 3 + num_warmup_steps: 0 main_scheduler_callable: class_path: lightning.pytorch.cli.ReduceLROnPlateau init_args: diff --git a/src/otx/recipe/detection/yolox_x.yaml b/src/otx/recipe/detection/yolox_x.yaml index 4364cde6acf..a254eb755ea 100644 --- a/src/otx/recipe/detection/yolox_x.yaml +++ b/src/otx/recipe/detection/yolox_x.yaml @@ -14,7 +14,7 @@ model: scheduler: class_path: otx.core.schedulers.LinearWarmupSchedulerCallable init_args: - num_warmup_steps: 3 + num_warmup_steps: 0 main_scheduler_callable: class_path: lightning.pytorch.cli.ReduceLROnPlateau init_args: diff --git a/src/otx/recipe/detection/yolox_x_tile.yaml b/src/otx/recipe/detection/yolox_x_tile.yaml index b3dcf395c96..69751c03db0 100644 --- a/src/otx/recipe/detection/yolox_x_tile.yaml +++ b/src/otx/recipe/detection/yolox_x_tile.yaml @@ -14,7 +14,7 @@ model: scheduler: class_path: otx.core.schedulers.LinearWarmupSchedulerCallable init_args: - num_warmup_steps: 3 + num_warmup_steps: 0 main_scheduler_callable: class_path: lightning.pytorch.cli.ReduceLROnPlateau init_args: diff --git a/src/otx/recipe/semantic_segmentation/dino_v2.yaml b/src/otx/recipe/semantic_segmentation/dino_v2.yaml index 59302d244b9..a3c074cd585 100644 --- a/src/otx/recipe/semantic_segmentation/dino_v2.yaml +++ b/src/otx/recipe/semantic_segmentation/dino_v2.yaml @@ -2,10 +2,10 @@ model: class_path: otx.algo.segmentation.dino_v2_seg.DinoV2Seg init_args: label_info: 2 - model_name: dinov2_vits14 + model_name: dinov2-small-seg input_size: - - 560 - - 560 + - 518 + - 518 optimizer: class_path: torch.optim.AdamW @@ -17,11 +17,15 @@ model: weight_decay: 0.0001 scheduler: - class_path: torch.optim.lr_scheduler.PolynomialLR + class_path: otx.core.schedulers.LinearWarmupSchedulerCallable init_args: - total_iters: 150 - power: 0.9 - last_epoch: -1 + num_warmup_steps: 0 + main_scheduler_callable: + class_path: torch.optim.lr_scheduler.PolynomialLR + init_args: + total_iters: 150 + power: 0.9 + last_epoch: -1 engine: task: SEMANTIC_SEGMENTATION @@ -33,8 +37,8 @@ data: ../_base_/data/semantic_segmentation.yaml overrides: data: input_size: - - 560 - - 560 + - 518 + - 518 train_subset: transforms: - class_path: otx.core.data.transform_libs.torchvision.RandomResizedCrop diff --git a/src/otx/recipe/semantic_segmentation/dino_v2_tile.yaml b/src/otx/recipe/semantic_segmentation/dino_v2_tile.yaml index e645a193bca..4dc476089e8 100644 --- a/src/otx/recipe/semantic_segmentation/dino_v2_tile.yaml +++ b/src/otx/recipe/semantic_segmentation/dino_v2_tile.yaml @@ -2,7 +2,7 @@ model: class_path: otx.algo.segmentation.dino_v2_seg.DinoV2Seg init_args: label_info: 2 - model_name: dinov2_vits14 + model_name: dinov2-small-seg optimizer: class_path: torch.optim.AdamW @@ -30,8 +30,8 @@ data: ../_base_/data/semantic_segmentation_tile.yaml overrides: data: input_size: - - 560 - - 560 + - 518 + - 518 train_subset: transforms: - class_path: otx.core.data.transform_libs.torchvision.Resize diff --git a/src/otx/recipe/semantic_segmentation/semisl/dino_v2_semisl.yml b/src/otx/recipe/semantic_segmentation/semisl/dino_v2_semisl.yml index 721085499db..da9a62fa4be 100644 --- a/src/otx/recipe/semantic_segmentation/semisl/dino_v2_semisl.yml +++ b/src/otx/recipe/semantic_segmentation/semisl/dino_v2_semisl.yml @@ -2,11 +2,11 @@ model: class_path: otx.algo.segmentation.dino_v2_seg.DinoV2Seg init_args: label_info: 2 - model_name: dinov2_vits14 + model_version: dinov2-small-seg train_type: SEMI_SUPERVISED input_size: - - 560 - - 560 + - 518 + - 518 optimizer: class_path: torch.optim.AdamW @@ -34,8 +34,8 @@ data: ../../_base_/data/semisl/semantic_segmentation_semisl.yaml overrides: data: input_size: - - 560 - - 560 + - 518 + - 518 train_subset: transforms: - class_path: otx.core.data.transform_libs.torchvision.RandomResizedCrop diff --git a/src/otx/recipe/semantic_segmentation/semisl/litehrnet_x_semisl.yml b/src/otx/recipe/semantic_segmentation/semisl/litehrnet_x_semisl.yml index 5abb0004bac..2c0ca578cb8 100644 --- a/src/otx/recipe/semantic_segmentation/semisl/litehrnet_x_semisl.yml +++ b/src/otx/recipe/semantic_segmentation/semisl/litehrnet_x_semisl.yml @@ -4,6 +4,7 @@ model: label_info: 2 model_name: lite_hrnet_x train_type: SEMI_SUPERVISED + drop_unreliable_pixels_percent: 80 optimizer: class_path: torch.optim.Adam diff --git a/src/otx/recipe/visual_prompting/sam_tiny_vit.yaml b/src/otx/recipe/visual_prompting/sam_tiny_vit.yaml index 377d80b3722..4c8646bab0a 100644 --- a/src/otx/recipe/visual_prompting/sam_tiny_vit.yaml +++ b/src/otx/recipe/visual_prompting/sam_tiny_vit.yaml @@ -18,10 +18,14 @@ model: lr: 0.00001 scheduler: - class_path: torch.optim.lr_scheduler.ConstantLR + class_path: otx.core.schedulers.LinearWarmupSchedulerCallable init_args: - factor: 1 - total_iters: -1 + num_warmup_steps: 0 + main_scheduler_callable: + class_path: torch.optim.lr_scheduler.ConstantLR + init_args: + factor: 1 + total_iters: -1 engine: task: VISUAL_PROMPTING @@ -35,4 +39,4 @@ overrides: callbacks: - class_path: otx.algo.callbacks.adaptive_early_stopping.EarlyStoppingWithWarmup init_args: - patience: 3 + patience: 5 diff --git a/src/otx/recipe/visual_prompting/sam_vit_b.yaml b/src/otx/recipe/visual_prompting/sam_vit_b.yaml index bc3bf89351a..4493af39562 100644 --- a/src/otx/recipe/visual_prompting/sam_vit_b.yaml +++ b/src/otx/recipe/visual_prompting/sam_vit_b.yaml @@ -18,10 +18,14 @@ model: lr: 0.00001 scheduler: - class_path: torch.optim.lr_scheduler.ConstantLR + class_path: otx.core.schedulers.LinearWarmupSchedulerCallable init_args: - factor: 1 - total_iters: -1 + num_warmup_steps: 0 + main_scheduler_callable: + class_path: torch.optim.lr_scheduler.ConstantLR + init_args: + factor: 1 + total_iters: -1 engine: task: VISUAL_PROMPTING @@ -35,4 +39,4 @@ overrides: callbacks: - class_path: otx.algo.callbacks.adaptive_early_stopping.EarlyStoppingWithWarmup init_args: - patience: 3 + patience: 5 diff --git a/src/otx/tools/converter.py b/src/otx/tools/converter.py index d8f26bc72a3..1e670930cf0 100644 --- a/src/otx/tools/converter.py +++ b/src/otx/tools/converter.py @@ -248,6 +248,9 @@ def convert(config_path: str, task: OTXTaskType | None = None) -> dict: task_info = TEMPLATE_ID_DICT[template_config["model_template_id"]] if param_dict.get("enable_tiling", None) and not task_info["model_name"].endswith("_tile"): task_info["model_name"] += "_tile" + # classification task type can't be deducted from template name, try to extract from config + if "sub_task_type" in template_config and "_CLS" in task_info["task"]: + task_info["task"] = template_config["sub_task_type"] if task is not None: task_info["task"] = task default_config = ConfigConverter._get_default_config(task_info) @@ -317,13 +320,16 @@ def update_num_workers(param_value: int) -> None: config["data"]["test_subset"]["num_workers"] = param_value def update_enable_early_stopping(param_value: bool) -> None: - idx = ConfigConverter._get_callback_idx(config["callbacks"], "lightning.pytorch.callbacks.EarlyStopping") + idx = ConfigConverter._get_callback_idx( + config["callbacks"], + "otx.algo.callbacks.adaptive_early_stopping.EarlyStoppingWithWarmup", + ) if not param_value and idx > -1: config["callbacks"].pop(idx) def update_early_stop_patience(param_value: int) -> None: for callback in config["callbacks"]: - if callback["class_path"] == "lightning.pytorch.callbacks.EarlyStopping": + if callback["class_path"] == "otx.algo.callbacks.adaptive_early_stopping.EarlyStoppingWithWarmup": callback["init_args"]["patience"] = param_value break diff --git a/src/otx/tools/templates/classification/configuration.yaml b/src/otx/tools/templates/classification/configuration.yaml index ed91ea1cfa3..93f972b7a87 100644 --- a/src/otx/tools/templates/classification/configuration.yaml +++ b/src/otx/tools/templates/classification/configuration.yaml @@ -87,11 +87,11 @@ learning_parameters: num_iters: affects_outcome_of: TRAINING default_value: 200 - description: - Increasing this value causes the results to be more robust but training - time will be longer. + description: Maximum number of epochs to train a model. + Increasing this value may result in longer training, but potentially in a more robust model. + Note, if the early stopping is enabled, the actual number of epochs may be less than this value. editable: true - header: Number of training iterations + header: Number of training epochs max_value: 1000 min_value: 1 type: INTEGER @@ -174,7 +174,7 @@ learning_parameters: visible_in_ui: false early_stop_patience: affects_outcome_of: TRAINING - default_value: 3 + default_value: 5 description: Training will stop if the model does not improve within the number of epochs of patience. editable: true header: Patience for early stopping @@ -186,26 +186,7 @@ learning_parameters: operator: AND rules: [] type: UI_RULES - value: 8 - visible_in_ui: true - warning: This is applied exclusively when early stopping is enabled. - early_stop_iteration_patience: - affects_outcome_of: TRAINING - default_value: 0 - description: - Training will stop if the model does not improve within the number of iterations of patience. - This ensures the model is trained enough with the number of iterations of patience before early stopping. - editable: true - header: Iteration patience for early stopping - max_value: 1000 - min_value: 0 - type: INTEGER - ui_rules: - action: DISABLE_EDITING - operator: AND - rules: [] - type: UI_RULES - value: 0 + value: 5 visible_in_ui: true warning: This is applied exclusively when early stopping is enabled. use_adaptive_interval: @@ -264,7 +245,7 @@ learning_parameters: auto_num_workers: affects_outcome_of: TRAINING default_value: false - description: Adapt num_workers according to current hardware status automatically. + description: Adapt number of workers according to current hardware status automatically. editable: true header: Enable auto adaptive num_workers type: BOOLEAN diff --git a/src/otx/tools/templates/classification/efficientnet_v2_l/template.yaml b/src/otx/tools/templates/classification/efficientnet_v2_l/template.yaml index 4db892a3131..001e1e3d995 100644 --- a/src/otx/tools/templates/classification/efficientnet_v2_l/template.yaml +++ b/src/otx/tools/templates/classification/efficientnet_v2_l/template.yaml @@ -20,7 +20,7 @@ hyper_parameters: parameter_overrides: learning_parameters: batch_size: - default_value: 64 + default_value: 48 auto_hpo_state: POSSIBLE learning_rate: default_value: 0.01 diff --git a/src/otx/tools/templates/detection/detection/configuration.yaml b/src/otx/tools/templates/detection/detection/configuration.yaml index 5cb11d83c9f..9fe02a3d28a 100644 --- a/src/otx/tools/templates/detection/detection/configuration.yaml +++ b/src/otx/tools/templates/detection/detection/configuration.yaml @@ -91,11 +91,11 @@ learning_parameters: num_iters: affects_outcome_of: TRAINING default_value: 200 - description: - Increasing this value causes the results to be more robust but training - time will be longer. + description: Maximum number of epochs to train a model. + Increasing this value may result in longer training, but potentially in a more robust model. + Note, if the early stopping is enabled, the actual number of epochs may be less than this value. editable: true - header: Number of training iterations + header: Number of training epochs max_value: 1000 min_value: 1 type: INTEGER @@ -124,7 +124,7 @@ learning_parameters: operator: AND rules: [] type: UI_RULES - value: 0 + value: 2 visible_in_ui: true warning: null enable_early_stopping: @@ -173,25 +173,6 @@ learning_parameters: value: 10 visible_in_ui: true warning: This is applied exclusively when early stopping is enabled. - early_stop_iteration_patience: - affects_outcome_of: TRAINING - default_value: 0 - description: - Training will stop if the model does not improve within the number of iterations of patience. - This ensures the model is trained enough with the number of iterations of patience before early stopping. - editable: true - header: Iteration patience for early stopping - max_value: 1000 - min_value: 0 - type: INTEGER - ui_rules: - action: DISABLE_EDITING - operator: AND - rules: [] - type: UI_RULES - value: 0 - visible_in_ui: true - warning: This is applied exclusively when early stopping is enabled. use_adaptive_interval: affects_outcome_of: TRAINING default_value: true @@ -232,7 +213,7 @@ learning_parameters: auto_num_workers: affects_outcome_of: TRAINING default_value: false - description: Adapt num_workers according to current hardware status automatically. + description: Adapt number of workers according to current hardware status automatically. editable: true header: Enable auto adaptive num_workers type: BOOLEAN diff --git a/src/otx/tools/templates/detection/detection/cspdarknet_yolox_l/template.yaml b/src/otx/tools/templates/detection/detection/cspdarknet_yolox_l/template.yaml index c161471d452..19c6f4c7502 100644 --- a/src/otx/tools/templates/detection/detection/cspdarknet_yolox_l/template.yaml +++ b/src/otx/tools/templates/detection/detection/cspdarknet_yolox_l/template.yaml @@ -28,7 +28,7 @@ hyper_parameters: default_value: 0.001 auto_hpo_state: POSSIBLE learning_rate_warmup_iters: - default_value: 3 + default_value: 0 num_iters: default_value: 200 diff --git a/src/otx/tools/templates/detection/detection/cspdarknet_yolox_s/template.yaml b/src/otx/tools/templates/detection/detection/cspdarknet_yolox_s/template.yaml index f8ef1d4acd3..ad248a10bf6 100644 --- a/src/otx/tools/templates/detection/detection/cspdarknet_yolox_s/template.yaml +++ b/src/otx/tools/templates/detection/detection/cspdarknet_yolox_s/template.yaml @@ -28,7 +28,7 @@ hyper_parameters: default_value: 0.001 auto_hpo_state: POSSIBLE learning_rate_warmup_iters: - default_value: 3 + default_value: 0 num_iters: default_value: 200 diff --git a/src/otx/tools/templates/detection/detection/cspdarknet_yolox_tiny/template.yaml b/src/otx/tools/templates/detection/detection/cspdarknet_yolox_tiny/template.yaml index e2e426840ed..f3310cb8138 100644 --- a/src/otx/tools/templates/detection/detection/cspdarknet_yolox_tiny/template.yaml +++ b/src/otx/tools/templates/detection/detection/cspdarknet_yolox_tiny/template.yaml @@ -28,7 +28,7 @@ hyper_parameters: default_value: 0.0002 auto_hpo_state: POSSIBLE learning_rate_warmup_iters: - default_value: 3 + default_value: 0 num_iters: default_value: 200 diff --git a/src/otx/tools/templates/detection/detection/cspdarknet_yolox_x/template.yaml b/src/otx/tools/templates/detection/detection/cspdarknet_yolox_x/template.yaml index 97f85fed008..30bdfecff16 100644 --- a/src/otx/tools/templates/detection/detection/cspdarknet_yolox_x/template.yaml +++ b/src/otx/tools/templates/detection/detection/cspdarknet_yolox_x/template.yaml @@ -28,7 +28,7 @@ hyper_parameters: default_value: 0.001 auto_hpo_state: POSSIBLE learning_rate_warmup_iters: - default_value: 3 + default_value: 0 num_iters: default_value: 200 diff --git a/src/otx/tools/templates/detection/detection/mobilenetv2_atss/template.yaml b/src/otx/tools/templates/detection/detection/mobilenetv2_atss/template.yaml index 94dd429e1f1..aeed26d6c19 100644 --- a/src/otx/tools/templates/detection/detection/mobilenetv2_atss/template.yaml +++ b/src/otx/tools/templates/detection/detection/mobilenetv2_atss/template.yaml @@ -28,7 +28,7 @@ hyper_parameters: default_value: 0.004 auto_hpo_state: POSSIBLE learning_rate_warmup_iters: - default_value: 3 + default_value: 0 num_iters: default_value: 200 diff --git a/src/otx/tools/templates/detection/detection/mobilenetv2_ssd/template.yaml b/src/otx/tools/templates/detection/detection/mobilenetv2_ssd/template.yaml index 3cdde945a08..62943444683 100644 --- a/src/otx/tools/templates/detection/detection/mobilenetv2_ssd/template.yaml +++ b/src/otx/tools/templates/detection/detection/mobilenetv2_ssd/template.yaml @@ -28,7 +28,7 @@ hyper_parameters: default_value: 0.01 auto_hpo_state: POSSIBLE learning_rate_warmup_iters: - default_value: 3 + default_value: 0 num_iters: default_value: 200 diff --git a/src/otx/tools/templates/detection/detection/resnext101_atss/template.yaml b/src/otx/tools/templates/detection/detection/resnext101_atss/template.yaml index cf12454e78d..c6770cc7827 100644 --- a/src/otx/tools/templates/detection/detection/resnext101_atss/template.yaml +++ b/src/otx/tools/templates/detection/detection/resnext101_atss/template.yaml @@ -28,7 +28,7 @@ hyper_parameters: default_value: 0.004 auto_hpo_state: POSSIBLE learning_rate_warmup_iters: - default_value: 3 + default_value: 0 num_iters: default_value: 200 diff --git a/src/otx/tools/templates/detection/detection/rtdetr_101/template.yaml b/src/otx/tools/templates/detection/detection/rtdetr_101/template.yaml index 1394cf44159..7b4bcae96a8 100644 --- a/src/otx/tools/templates/detection/detection/rtdetr_101/template.yaml +++ b/src/otx/tools/templates/detection/detection/rtdetr_101/template.yaml @@ -27,8 +27,6 @@ hyper_parameters: learning_rate: default_value: 0.0001 auto_hpo_state: POSSIBLE - learning_rate_warmup_iters: - default_value: 100 num_iters: default_value: 200 diff --git a/src/otx/tools/templates/detection/detection/rtdetr_18/template.yaml b/src/otx/tools/templates/detection/detection/rtdetr_18/template.yaml index 7738c65f1b7..88999e071d6 100644 --- a/src/otx/tools/templates/detection/detection/rtdetr_18/template.yaml +++ b/src/otx/tools/templates/detection/detection/rtdetr_18/template.yaml @@ -27,8 +27,6 @@ hyper_parameters: learning_rate: default_value: 0.0001 auto_hpo_state: POSSIBLE - learning_rate_warmup_iters: - default_value: 100 num_iters: default_value: 200 diff --git a/src/otx/tools/templates/detection/detection/rtdetr_50/template.yaml b/src/otx/tools/templates/detection/detection/rtdetr_50/template.yaml index 408e48cd8fb..79497ab1c79 100644 --- a/src/otx/tools/templates/detection/detection/rtdetr_50/template.yaml +++ b/src/otx/tools/templates/detection/detection/rtdetr_50/template.yaml @@ -27,8 +27,6 @@ hyper_parameters: learning_rate: default_value: 0.0001 auto_hpo_state: POSSIBLE - learning_rate_warmup_iters: - default_value: 100 num_iters: default_value: 200 diff --git a/src/otx/tools/templates/detection/detection/rtmdet_tiny/template.yaml b/src/otx/tools/templates/detection/detection/rtmdet_tiny/template.yaml index 8b110503b62..a067b186861 100644 --- a/src/otx/tools/templates/detection/detection/rtmdet_tiny/template.yaml +++ b/src/otx/tools/templates/detection/detection/rtmdet_tiny/template.yaml @@ -28,7 +28,7 @@ hyper_parameters: default_value: 0.0007 auto_hpo_state: POSSIBLE learning_rate_warmup_iters: - default_value: 3 + default_value: 0 num_iters: default_value: 200 diff --git a/src/otx/tools/templates/detection/instance_segmentation/configuration.yaml b/src/otx/tools/templates/detection/instance_segmentation/configuration.yaml index a1c2078ed62..20421f3fd16 100644 --- a/src/otx/tools/templates/detection/instance_segmentation/configuration.yaml +++ b/src/otx/tools/templates/detection/instance_segmentation/configuration.yaml @@ -91,11 +91,11 @@ learning_parameters: num_iters: affects_outcome_of: TRAINING default_value: 200 - description: - Increasing this value causes the results to be more robust but training - time will be longer. + description: Maximum number of epochs to train a model. + Increasing this value may result in longer training, but potentially in a more robust model. + Note, if the early stopping is enabled, the actual number of epochs may be less than this value. editable: true - header: Number of training iterations + header: Number of training epochs max_value: 1000 min_value: 1 type: INTEGER @@ -173,25 +173,6 @@ learning_parameters: value: 10 visible_in_ui: true warning: This is applied exclusively when early stopping is enabled. - early_stop_iteration_patience: - affects_outcome_of: TRAINING - default_value: 0 - description: - Training will stop if the model does not improve within the number of iterations of patience. - This ensures the model is trained enough with the number of iterations of patience before early stopping. - editable: true - header: Iteration patience for early stopping - max_value: 1000 - min_value: 0 - type: INTEGER - ui_rules: - action: DISABLE_EDITING - operator: AND - rules: [] - type: UI_RULES - value: 0 - visible_in_ui: true - warning: This is applied exclusively when early stopping is enabled. use_adaptive_interval: affects_outcome_of: TRAINING default_value: true @@ -232,7 +213,7 @@ learning_parameters: auto_num_workers: affects_outcome_of: TRAINING default_value: false - description: Adapt num_workers according to current hardware status automatically. + description: Adapt number of workers according to current hardware status automatically. editable: true header: Enable auto adaptive num_workers type: BOOLEAN diff --git a/src/otx/tools/templates/detection/instance_segmentation/efficientnetb2b_maskrcnn/template.yaml b/src/otx/tools/templates/detection/instance_segmentation/efficientnetb2b_maskrcnn/template.yaml index 4b5e21a4f83..f30d4c6f792 100644 --- a/src/otx/tools/templates/detection/instance_segmentation/efficientnetb2b_maskrcnn/template.yaml +++ b/src/otx/tools/templates/detection/instance_segmentation/efficientnetb2b_maskrcnn/template.yaml @@ -27,8 +27,6 @@ hyper_parameters: learning_rate: default_value: 0.015 auto_hpo_state: POSSIBLE - learning_rate_warmup_iters: - default_value: 100 num_iters: default_value: 100 diff --git a/src/otx/tools/templates/detection/instance_segmentation/maskrcnn_swin_t/template.yaml b/src/otx/tools/templates/detection/instance_segmentation/maskrcnn_swin_t/template.yaml index e9e289c6bf6..cf609e3d1c8 100644 --- a/src/otx/tools/templates/detection/instance_segmentation/maskrcnn_swin_t/template.yaml +++ b/src/otx/tools/templates/detection/instance_segmentation/maskrcnn_swin_t/template.yaml @@ -27,8 +27,6 @@ hyper_parameters: learning_rate: default_value: 0.0001 auto_hpo_state: POSSIBLE - learning_rate_warmup_iters: - default_value: 100 num_iters: default_value: 100 diff --git a/src/otx/tools/templates/detection/instance_segmentation/resnet50_maskrcnn/template.yaml b/src/otx/tools/templates/detection/instance_segmentation/resnet50_maskrcnn/template.yaml index bd2248adbcd..31f1a310cab 100644 --- a/src/otx/tools/templates/detection/instance_segmentation/resnet50_maskrcnn/template.yaml +++ b/src/otx/tools/templates/detection/instance_segmentation/resnet50_maskrcnn/template.yaml @@ -27,8 +27,6 @@ hyper_parameters: learning_rate: default_value: 0.007 auto_hpo_state: POSSIBLE - learning_rate_warmup_iters: - default_value: 100 num_iters: default_value: 100 diff --git a/src/otx/tools/templates/detection/rotated_detection/configuration.yaml b/src/otx/tools/templates/detection/rotated_detection/configuration.yaml index b41ea7dda25..524376b9d0a 100644 --- a/src/otx/tools/templates/detection/rotated_detection/configuration.yaml +++ b/src/otx/tools/templates/detection/rotated_detection/configuration.yaml @@ -91,11 +91,11 @@ learning_parameters: num_iters: affects_outcome_of: TRAINING default_value: 200 - description: - Increasing this value causes the results to be more robust but training - time will be longer. + description: Maximum number of epochs to train a model. + Increasing this value may result in longer training, but potentially in a more robust model. + Note, if the early stopping is enabled, the actual number of epochs may be less than this value. editable: true - header: Number of training iterations + header: Number of training epochs max_value: 1000 min_value: 1 type: INTEGER @@ -173,25 +173,6 @@ learning_parameters: value: 10 visible_in_ui: true warning: This is applied exclusively when early stopping is enabled. - early_stop_iteration_patience: - affects_outcome_of: TRAINING - default_value: 0 - description: - Training will stop if the model does not improve within the number of iterations of patience. - This ensures the model is trained enough with the number of iterations of patience before early stopping. - editable: true - header: Iteration patience for early stopping - max_value: 1000 - min_value: 0 - type: INTEGER - ui_rules: - action: DISABLE_EDITING - operator: AND - rules: [] - type: UI_RULES - value: 0 - visible_in_ui: true - warning: This is applied exclusively when early stopping is enabled. use_adaptive_interval: affects_outcome_of: TRAINING default_value: true @@ -232,7 +213,7 @@ learning_parameters: auto_num_workers: affects_outcome_of: TRAINING default_value: false - description: Adapt num_workers according to current hardware status automatically. + description: Adapt number of workers according to current hardware status automatically. editable: true header: Enable auto adaptive num_workers type: BOOLEAN diff --git a/src/otx/tools/templates/detection/rotated_detection/efficientnetb2b_maskrcnn/template.yaml b/src/otx/tools/templates/detection/rotated_detection/efficientnetb2b_maskrcnn/template.yaml index 21e079c489a..2c5ebee3fc7 100644 --- a/src/otx/tools/templates/detection/rotated_detection/efficientnetb2b_maskrcnn/template.yaml +++ b/src/otx/tools/templates/detection/rotated_detection/efficientnetb2b_maskrcnn/template.yaml @@ -27,8 +27,6 @@ hyper_parameters: learning_rate: default_value: 0.007 auto_hpo_state: POSSIBLE - learning_rate_warmup_iters: - default_value: 100 num_iters: default_value: 100 diff --git a/src/otx/tools/templates/detection/rotated_detection/resnet50_maskrcnn/template.yaml b/src/otx/tools/templates/detection/rotated_detection/resnet50_maskrcnn/template.yaml index 4cb51f466eb..8d1bad4640c 100644 --- a/src/otx/tools/templates/detection/rotated_detection/resnet50_maskrcnn/template.yaml +++ b/src/otx/tools/templates/detection/rotated_detection/resnet50_maskrcnn/template.yaml @@ -27,8 +27,6 @@ hyper_parameters: learning_rate: default_value: 0.007 auto_hpo_state: POSSIBLE - learning_rate_warmup_iters: - default_value: 100 num_iters: default_value: 100 diff --git a/src/otx/tools/templates/keypoint_detection/configuration.yaml b/src/otx/tools/templates/keypoint_detection/configuration.yaml index e745d787c80..1ef84c01919 100644 --- a/src/otx/tools/templates/keypoint_detection/configuration.yaml +++ b/src/otx/tools/templates/keypoint_detection/configuration.yaml @@ -87,11 +87,11 @@ learning_parameters: num_iters: affects_outcome_of: TRAINING default_value: 200 - description: - Increasing this value causes the results to be more robust but training - time will be longer. + description: Maximum number of epochs to train a model. + Increasing this value may result in longer training, but potentially in a more robust model. + Note, if the early stopping is enabled, the actual number of epochs may be less than this value. editable: true - header: Number of training iterations + header: Number of training epochs max_value: 1000 min_value: 1 type: INTEGER @@ -120,7 +120,7 @@ learning_parameters: operator: AND rules: [] type: UI_RULES - value: 0 + value: 2 visible_in_ui: true warning: null learning_rate_warmup_iters: @@ -189,25 +189,6 @@ learning_parameters: value: 10 visible_in_ui: true warning: This is applied exclusively when early stopping is enabled. - early_stop_iteration_patience: - affects_outcome_of: TRAINING - default_value: 0 - description: - Training will stop if the model does not improve within the number of iterations of patience. - This ensures the model is trained enough with the number of iterations of patience before early stopping. - editable: true - header: Iteration patience for early stopping - max_value: 1000 - min_value: 0 - type: INTEGER - ui_rules: - action: DISABLE_EDITING - operator: AND - rules: [] - type: UI_RULES - value: 0 - visible_in_ui: true - warning: This is applied exclusively when early stopping is enabled. use_adaptive_interval: affects_outcome_of: TRAINING default_value: true @@ -248,7 +229,7 @@ learning_parameters: auto_num_workers: affects_outcome_of: TRAINING default_value: false - description: Adapt num_workers according to current hardware status automatically. + description: Adapt number of workers according to current hardware status automatically. editable: true header: Enable auto adaptive num_workers type: BOOLEAN diff --git a/src/otx/tools/templates/segmentation/configuration.yaml b/src/otx/tools/templates/segmentation/configuration.yaml index 23356c696db..c7df2603ec7 100644 --- a/src/otx/tools/templates/segmentation/configuration.yaml +++ b/src/otx/tools/templates/segmentation/configuration.yaml @@ -54,8 +54,8 @@ learning_parameters: default_value: 100 description: In this periods of initial training iterations, the model will be trained in low learning rate, - which will be increased incrementally up to the expected learning rate setting. - This warm-up phase is known to be helpful to stabilize training, thus result in better performance. + which will be increased linearly up to the expected learning rate setting. + This warm-up phase is known to be helpful to stabilize training, therefore, can lead to increased performance. editable: true header: Number of iterations for learning rate warmup max_value: 10000 @@ -74,11 +74,11 @@ learning_parameters: auto_hpo_state: not_possible auto_hpo_value: null default_value: 200 - description: - Increasing this value causes the results to be more robust but training - time will be longer. + description: Maximum number of epochs to train a model. + Increasing this value may result in longer training, but potentially in a more robust model. + Note, if the early stopping is enabled, the actual number of epochs may be less than this value. editable: true - header: Number of training iterations + header: Number of training epochs max_value: 1000 min_value: 1 type: INTEGER @@ -109,7 +109,7 @@ learning_parameters: operator: AND rules: [] type: UI_RULES - value: 0 + value: 2 visible_in_ui: true warning: null enable_early_stopping: @@ -143,7 +143,7 @@ learning_parameters: visible_in_ui: false early_stop_patience: affects_outcome_of: TRAINING - default_value: 7 + default_value: 10 description: Training will stop if the model does not improve within the number of epochs of patience. editable: true header: Patience for early stopping @@ -158,25 +158,6 @@ learning_parameters: value: 5 visible_in_ui: true warning: This is applied exclusively when early stopping is enabled. - early_stop_iteration_patience: - affects_outcome_of: TRAINING - default_value: 0 - description: - Training will stop if the model does not improve within the number of iterations of patience. - This ensures the model is trained enough with the number of iterations of patience before early stopping. - editable: true - header: Iteration patience for early stopping - max_value: 1000 - min_value: 0 - type: INTEGER - ui_rules: - action: DISABLE_EDITING - operator: AND - rules: [] - type: UI_RULES - value: 0 - visible_in_ui: true - warning: This is applied exclusively when early stopping is enabled. enable_supcon: affects_outcome_of: TRAINING default_value: false @@ -219,7 +200,7 @@ learning_parameters: auto_num_workers: affects_outcome_of: TRAINING default_value: false - description: Adapt num_workers according to current hardware status automatically. + description: Adapt number of workers according to current hardware status automatically. editable: true header: Enable auto adaptive num_workers type: BOOLEAN diff --git a/src/otx/tools/templates/segmentation/dinov2_small/template.yaml b/src/otx/tools/templates/segmentation/dinov2_small/template.yaml index ac837fa5007..630af3c6b7c 100644 --- a/src/otx/tools/templates/segmentation/dinov2_small/template.yaml +++ b/src/otx/tools/templates/segmentation/dinov2_small/template.yaml @@ -25,10 +25,6 @@ hyper_parameters: learning_rate: default_value: 0.001 auto_hpo_state: POSSIBLE - learning_rate_warmup_iters: - default_value: 100 - num_iters: - default_value: 200 # Training resources. max_nodes: 1 diff --git a/src/otx/tools/templates/segmentation/ham_segnext_b/template.yaml b/src/otx/tools/templates/segmentation/ham_segnext_b/template.yaml index ef390639238..88f3d5d41a3 100644 --- a/src/otx/tools/templates/segmentation/ham_segnext_b/template.yaml +++ b/src/otx/tools/templates/segmentation/ham_segnext_b/template.yaml @@ -25,14 +25,8 @@ hyper_parameters: learning_rate: default_value: 0.00006 auto_hpo_state: POSSIBLE - learning_rate_warmup_iters: - default_value: 100 - num_iters: - default_value: 200 early_stop_start: default_value: 100 - early_stop_patience: - default_value: 10 # Training resources. max_nodes: 1 diff --git a/src/otx/tools/templates/segmentation/ham_segnext_s/template.yaml b/src/otx/tools/templates/segmentation/ham_segnext_s/template.yaml index 9afd2660cf2..0c13b203b84 100644 --- a/src/otx/tools/templates/segmentation/ham_segnext_s/template.yaml +++ b/src/otx/tools/templates/segmentation/ham_segnext_s/template.yaml @@ -25,14 +25,8 @@ hyper_parameters: learning_rate: default_value: 0.00006 auto_hpo_state: POSSIBLE - learning_rate_warmup_iters: - default_value: 100 - num_iters: - default_value: 200 early_stop_start: default_value: 100 - early_stop_patience: - default_value: 10 # Training resources. max_nodes: 1 diff --git a/src/otx/tools/templates/segmentation/ham_segnext_t/template.yaml b/src/otx/tools/templates/segmentation/ham_segnext_t/template.yaml index c5879535caa..e23c8c91104 100644 --- a/src/otx/tools/templates/segmentation/ham_segnext_t/template.yaml +++ b/src/otx/tools/templates/segmentation/ham_segnext_t/template.yaml @@ -25,14 +25,8 @@ hyper_parameters: learning_rate: default_value: 0.00006 auto_hpo_state: POSSIBLE - learning_rate_warmup_iters: - default_value: 100 - num_iters: - default_value: 200 early_stop_start: default_value: 100 - early_stop_patience: - default_value: 10 # Training resources. max_nodes: 1 diff --git a/src/otx/tools/templates/segmentation/ocr_lite_hrnet_18_mod2/template.yaml b/src/otx/tools/templates/segmentation/ocr_lite_hrnet_18_mod2/template.yaml index 56af9f4b2b2..ce4e4da393f 100644 --- a/src/otx/tools/templates/segmentation/ocr_lite_hrnet_18_mod2/template.yaml +++ b/src/otx/tools/templates/segmentation/ocr_lite_hrnet_18_mod2/template.yaml @@ -24,10 +24,6 @@ hyper_parameters: learning_rate: default_value: 0.001 auto_hpo_state: POSSIBLE - learning_rate_warmup_iters: - default_value: 100 - num_iters: - default_value: 200 # Training resources. max_nodes: 1 diff --git a/src/otx/tools/templates/segmentation/ocr_lite_hrnet_s_mod2/template.yaml b/src/otx/tools/templates/segmentation/ocr_lite_hrnet_s_mod2/template.yaml index 6637673c64e..6c8be01af2a 100644 --- a/src/otx/tools/templates/segmentation/ocr_lite_hrnet_s_mod2/template.yaml +++ b/src/otx/tools/templates/segmentation/ocr_lite_hrnet_s_mod2/template.yaml @@ -25,10 +25,6 @@ hyper_parameters: learning_rate: default_value: 0.001 auto_hpo_state: POSSIBLE - learning_rate_warmup_iters: - default_value: 100 - num_iters: - default_value: 200 # Training resources. max_nodes: 1 diff --git a/src/otx/tools/templates/segmentation/ocr_lite_hrnet_x_mod3/template.yaml b/src/otx/tools/templates/segmentation/ocr_lite_hrnet_x_mod3/template.yaml index f4bc011554f..d4f98812586 100644 --- a/src/otx/tools/templates/segmentation/ocr_lite_hrnet_x_mod3/template.yaml +++ b/src/otx/tools/templates/segmentation/ocr_lite_hrnet_x_mod3/template.yaml @@ -25,10 +25,6 @@ hyper_parameters: learning_rate: default_value: 0.001 auto_hpo_state: POSSIBLE - learning_rate_warmup_iters: - default_value: 100 - num_iters: - default_value: 200 # Training resources. max_nodes: 1 diff --git a/tests/conftest.py b/tests/conftest.py index 643bf351be8..62ca433376d 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -373,6 +373,7 @@ def fxt_seg_label_info() -> SegLabelInfo: label_names, ["class2", "class3"], ], + label_ids=["0", "1", "2"], ) @@ -385,6 +386,7 @@ def fxt_multiclass_labelinfo() -> LabelInfo: label_names, ["class2", "class3"], ], + label_ids=["0", "1", "2"], ) @@ -398,6 +400,7 @@ def fxt_multilabel_labelinfo() -> LabelInfo: [label_names[1]], [label_names[2]], ], + label_ids=["0", "1", "2"], ) @@ -464,6 +467,7 @@ def fxt_hlabel_multilabel_info() -> HLabelInfo: ["Spade_A", "Spade"], ["Spade_King", "Spade"], ], + label_ids=[str(i) for i in range(9)], ) diff --git a/tests/integration/api/test_augmentation.py b/tests/integration/api/test_augmentation.py index cae1b5b8317..2f0b11a64c2 100644 --- a/tests/integration/api/test_augmentation.py +++ b/tests/integration/api/test_augmentation.py @@ -31,11 +31,12 @@ def _test_augmentation( ).config train_config = config["data"]["train_subset"] train_config["input_size"] = (32, 32) + data_format = config["data"]["data_format"] # Load dataset dm_dataset = DmDataset.import_from( target_dataset_per_task[task_name], - format=config["data"]["data_format"], + format=data_format, ) mem_cache_handler = MemCacheHandlerSingleton.create( mode="sinlgeprocessing", @@ -60,6 +61,7 @@ def _test_augmentation( dm_subset=dm_dataset, cfg_subset=SubsetConfig(sampler=SamplerConfig(**train_config.pop("sampler", {})), **train_config), mem_cache_handler=mem_cache_handler, + data_format=data_format, ) # Check if all aug combinations are size-compatible diff --git a/tests/unit/algo/classification/conftest.py b/tests/unit/algo/classification/conftest.py index 945c3d0bc4c..a283eff41b1 100644 --- a/tests/unit/algo/classification/conftest.py +++ b/tests/unit/algo/classification/conftest.py @@ -31,6 +31,7 @@ def fxt_hlabel_data() -> HLabelInfo: ["Heart_Queen", "Heart_King"], ["Spade_A", "Spade_King"], ], + label_ids=[str(i) for i in range(6)], num_multiclass_heads=3, num_multilabel_classes=0, head_idx_to_logits_range={"0": (0, 2), "1": (2, 4), "2": (4, 6)}, @@ -80,6 +81,7 @@ def fxt_hlabel_multilabel_info() -> HLabelInfo: "Red_Joker", "Extra_Joker", ], + label_ids=[str(i) for i in range(9)], label_groups=[ ["Heart", "Spade"], ["Heart_Queen", "Heart_King"], @@ -149,6 +151,7 @@ def fxt_hlabel_cifar() -> HLabelInfo: "aquatic_mammals", "fish", ], + label_ids=[str(i) for i in range(12)], label_groups=[ ["beaver", "dolphin", "otter", "seal", "whale"], ["aquarium_fish", "flatfish", "ray", "shark", "trout"], diff --git a/tests/unit/algo/detection/detectors/test_single_stage_detector.py b/tests/unit/algo/detection/detectors/test_single_stage_detector.py index 7d805062ed1..669e4d8c900 100644 --- a/tests/unit/algo/detection/detectors/test_single_stage_detector.py +++ b/tests/unit/algo/detection/detectors/test_single_stage_detector.py @@ -53,7 +53,7 @@ def batch(self): inputs = torch.randn(1, 3, 32, 32) return DetBatchDataEntity( batch_size=1, - imgs_info=[LabelInfo(["a"], [["a"]])], + imgs_info=[LabelInfo(["a"], ["0"], [["a"]])], images=inputs, bboxes=[torch.tensor([[0.5, 0.5, 0.5, 0.5]])], labels=[torch.tensor([0])], diff --git a/tests/unit/algo/detection/test_rtdetr.py b/tests/unit/algo/detection/test_rtdetr.py index d96305dfe13..6d2c92718a7 100644 --- a/tests/unit/algo/detection/test_rtdetr.py +++ b/tests/unit/algo/detection/test_rtdetr.py @@ -15,7 +15,7 @@ class TestRTDETR: def test_customize_outputs(self, mocker): - label_info = LabelInfo(["a", "b", "c"], [["a", "b", "c"]]) + label_info = LabelInfo(["a", "b", "c"], ["0", "1", "2"], [["a", "b", "c"]]) mocker.patch("otx.algo.detection.rtdetr.RTDETR._build_model", return_value=mocker.MagicMock()) model = RTDETR(model_name="rtdetr_18", label_info=label_info) model.model.load_from = None diff --git a/tests/unit/algo/segmentation/backbones/test_dinov2.py b/tests/unit/algo/segmentation/backbones/test_dinov2.py deleted file mode 100644 index 45fb2aaf84e..00000000000 --- a/tests/unit/algo/segmentation/backbones/test_dinov2.py +++ /dev/null @@ -1,82 +0,0 @@ -from __future__ import annotations - -from pathlib import Path -from unittest.mock import MagicMock - -import pytest -import torch -from otx.algo.segmentation.backbones import dinov2 as target_file -from otx.algo.segmentation.backbones.dinov2 import DinoVisionTransformer - - -class TestDinoVisionTransformer: - @pytest.fixture() - def mock_backbone_named_parameters(self) -> dict[str, MagicMock]: - named_parameter = {} - for i in range(3): - parameter = MagicMock() - parameter.requires_grad = True - named_parameter[f"layer_{i}"] = parameter - return named_parameter - - @pytest.fixture() - def mock_backbone(self, mock_backbone_named_parameters) -> MagicMock: - backbone = MagicMock() - backbone.named_parameters.return_value = list(mock_backbone_named_parameters.items()) - return backbone - - @pytest.fixture(autouse=True) - def mock_torch_hub_load(self, mocker, mock_backbone): - return mocker.patch("otx.algo.segmentation.backbones.dinov2.torch.hub.load", return_value=mock_backbone) - - def test_init(self, mock_backbone, mock_backbone_named_parameters): - dino = DinoVisionTransformer(model_name="dinov2_vits14", freeze_backbone=True, out_index=[8, 9, 10, 11]) - - assert dino.backbone == mock_backbone - for parameter in mock_backbone_named_parameters.values(): - assert parameter.requires_grad is False - - @pytest.fixture() - def dino_vit(self) -> DinoVisionTransformer: - return DinoVisionTransformer( - model_name="dinov2_vits14", - freeze_backbone=True, - out_index=[8, 9, 10, 11], - ) - - def test_forward(self, dino_vit, mock_backbone): - tensor = torch.rand(10, 3, 3, 3) - dino_vit.forward(tensor) - - mock_backbone.assert_called_once_with(tensor) - - @pytest.fixture() - def mock_load_from_http(self, mocker) -> MagicMock: - return mocker.patch.object(target_file, "load_from_http") - - @pytest.fixture() - def mock_load_checkpoint_to_model(self, mocker) -> MagicMock: - return mocker.patch.object(target_file, "load_checkpoint_to_model") - - @pytest.fixture() - def pretrained_weight(self, tmp_path) -> str: - weight = tmp_path / "pretrained.pth" - weight.touch() - return str(weight) - - @pytest.fixture() - def mock_torch_load(self, mocker) -> MagicMock: - return mocker.patch("otx.algo.segmentation.backbones.mscan.torch.load") - - def test_load_pretrained_weights(self, dino_vit, pretrained_weight, mock_torch_load, mock_load_checkpoint_to_model): - dino_vit.load_pretrained_weights(pretrained=pretrained_weight) - mock_torch_load.assert_called_once_with(pretrained_weight, "cpu") - mock_load_checkpoint_to_model.assert_called_once() - - def test_load_pretrained_weights_from_url(self, dino_vit, mock_load_from_http, mock_load_checkpoint_to_model): - pretrained_weight = "www.fake.com/fake.pth" - dino_vit.load_pretrained_weights(pretrained=pretrained_weight) - - cache_dir = Path.home() / ".cache" / "torch" / "hub" / "checkpoints" - mock_load_from_http.assert_called_once_with(filename=pretrained_weight, map_location="cpu", model_dir=cache_dir) - mock_load_checkpoint_to_model.assert_called_once() diff --git a/tests/unit/algo/segmentation/test_dino_v2_seg.py b/tests/unit/algo/segmentation/test_dino_v2_seg.py index e722254e47f..c9ad5a93946 100644 --- a/tests/unit/algo/segmentation/test_dino_v2_seg.py +++ b/tests/unit/algo/segmentation/test_dino_v2_seg.py @@ -10,9 +10,9 @@ class TestDinoV2Seg: - @pytest.fixture(scope="class") + @pytest.fixture() def fxt_dino_v2_seg(self) -> DinoV2Seg: - return DinoV2Seg(label_info=10, model_name="dinov2_vits14", input_size=(560, 560)) + return DinoV2Seg(label_info=10, model_name="dinov2-small-seg", input_size=(518, 518)) def test_dino_v2_seg_init(self, fxt_dino_v2_seg): assert isinstance(fxt_dino_v2_seg, DinoV2Seg) @@ -21,7 +21,7 @@ def test_dino_v2_seg_init(self, fxt_dino_v2_seg): def test_exporter(self, fxt_dino_v2_seg): exporter = fxt_dino_v2_seg._exporter assert isinstance(exporter, OTXModelExporter) - assert exporter.input_size == (1, 3, 560, 560) + assert exporter.input_size == (1, 3, 518, 518) def test_optimization_config(self, fxt_dino_v2_seg): config = fxt_dino_v2_seg._optimization_config @@ -32,7 +32,7 @@ def test_optimization_config(self, fxt_dino_v2_seg): @pytest.mark.parametrize( "model", [ - DinoV2Seg(model_name="dinov2_vits14", label_info=3), + DinoV2Seg(model_name="dinov2-small-seg", label_info=3, input_size=(518, 518)), ], ) def test_compiled_model(self, model): @@ -44,6 +44,6 @@ def test_compiled_model(self, model): model.model = torch.compile(model.model, backend=cnt) # Prepare inputs - x = torch.randn(1, 3, 560, 560) + x = torch.randn(1, 3, 518, 518) model.model(x) assert cnt.frame_count == 1 diff --git a/tests/unit/cli/test_cli.py b/tests/unit/cli/test_cli.py index 3b2501066ce..07aa5d083e2 100644 --- a/tests/unit/cli/test_cli.py +++ b/tests/unit/cli/test_cli.py @@ -188,7 +188,7 @@ def test_print_config_scheduler_override_command(self, fxt_print_config_schedule scheduler: class_path: otx.core.schedulers.LinearWarmupSchedulerCallable init_args: - num_warmup_steps: 3 + num_warmup_steps: 0 monitor: val/test_f1 warmup_interval: step main_scheduler_callable: diff --git a/tests/unit/core/data/conftest.py b/tests/unit/core/data/conftest.py index 665bc5a7471..e2821b17281 100644 --- a/tests/unit/core/data/conftest.py +++ b/tests/unit/core/data/conftest.py @@ -193,6 +193,7 @@ def fxt_mock_hlabelinfo(): return HLabelInfo( label_names=_LABEL_NAMES, label_groups=[["Non-Rigid", "Rigid"], ["Rectangle", "Triangle"], ["Circle"], ["Lion"], ["Panda"]], + label_ids=_LABEL_NAMES, num_multiclass_heads=2, num_multilabel_classes=3, head_idx_to_logits_range={"0": (0, 2), "1": (2, 4)}, diff --git a/tests/unit/core/data/test_factory.py b/tests/unit/core/data/test_factory.py index 9877739862b..a3a6f92d4af 100644 --- a/tests/unit/core/data/test_factory.py +++ b/tests/unit/core/data/test_factory.py @@ -102,6 +102,7 @@ def test_create( cfg_subset=cfg_subset, vpm_config=vpm_config, image_color_channel=image_color_channel, + data_format="", ), dataset_cls, ) diff --git a/tests/unit/core/metrics/test_accuracy.py b/tests/unit/core/metrics/test_accuracy.py index d3c43a8a087..73486330a3c 100644 --- a/tests/unit/core/metrics/test_accuracy.py +++ b/tests/unit/core/metrics/test_accuracy.py @@ -52,7 +52,7 @@ def test_default_multi_class_cls_metric_callable(self, fxt_multiclass_labelinfo: metric = MultiClassClsMetricCallable(fxt_multiclass_labelinfo) assert isinstance(metric.accuracy, MulticlassAccuracy) - one_class_label_info = LabelInfo(label_names=["class1"], label_groups=[["class1"]]) + one_class_label_info = LabelInfo(label_names=["class1"], label_groups=[["class1"]], label_ids=["0"]) assert one_class_label_info.num_classes == 1 binary_metric = MultiClassClsMetricCallable(one_class_label_info) assert isinstance(binary_metric.accuracy, BinaryAccuracy) diff --git a/tests/unit/core/model/test_detection_3d.py b/tests/unit/core/model/test_detection_3d.py index f46dc212b8d..c070305c091 100644 --- a/tests/unit/core/model/test_detection_3d.py +++ b/tests/unit/core/model/test_detection_3d.py @@ -34,6 +34,7 @@ def label_info(self) -> LabelInfo: return LabelInfo( label_names=["label_0", "label_1"], label_groups=[["label_0", "label_1"]], + label_ids=["0", "1"], ) @pytest.fixture() @@ -61,8 +62,16 @@ def test_export_parameters(self, model): ("label_info", "expected_label_info"), [ ( - LabelInfo(label_names=["label1", "label2", "label3"], label_groups=[["label1", "label2", "label3"]]), - LabelInfo(label_names=["label1", "label2", "label3"], label_groups=[["label1", "label2", "label3"]]), + LabelInfo( + label_names=["label1", "label2", "label3"], + label_groups=[["label1", "label2", "label3"]], + label_ids=["0", "1", "2"], + ), + LabelInfo( + label_names=["label1", "label2", "label3"], + label_groups=[["label1", "label2", "label3"]], + label_ids=["0", "1", "2"], + ), ), (LabelInfo.from_num_classes(num_classes=5), LabelInfo.from_num_classes(num_classes=5)), ], diff --git a/tests/unit/core/model/test_keypoint_detection.py b/tests/unit/core/model/test_keypoint_detection.py index d3cc06fede7..ca32686e769 100644 --- a/tests/unit/core/model/test_keypoint_detection.py +++ b/tests/unit/core/model/test_keypoint_detection.py @@ -34,6 +34,7 @@ def label_info(self) -> LabelInfo: return LabelInfo( label_names=["label_0", "label_1"], label_groups=[["label_0", "label_1"]], + label_ids=["0", "1"], ) @pytest.fixture() @@ -61,8 +62,16 @@ def test_export_parameters(self, model): ("label_info", "expected_label_info"), [ ( - LabelInfo(label_names=["label1", "label2", "label3"], label_groups=[["label1", "label2", "label3"]]), - LabelInfo(label_names=["label1", "label2", "label3"], label_groups=[["label1", "label2", "label3"]]), + LabelInfo( + label_names=["label1", "label2", "label3"], + label_groups=[["label1", "label2", "label3"]], + label_ids=["0", "1", "2"], + ), + LabelInfo( + label_names=["label1", "label2", "label3"], + label_groups=[["label1", "label2", "label3"]], + label_ids=["0", "1", "2"], + ), ), (LabelInfo.from_num_classes(num_classes=5), LabelInfo.from_num_classes(num_classes=5)), ], diff --git a/tests/unit/core/model/test_segmentation.py b/tests/unit/core/model/test_segmentation.py index b7181ce87cc..9f81fda86ab 100644 --- a/tests/unit/core/model/test_segmentation.py +++ b/tests/unit/core/model/test_segmentation.py @@ -34,6 +34,7 @@ def label_info(self): return SegLabelInfo( label_names=["Background", "label_0", "label_1"], label_groups=[["Background", "label_0", "label_1"]], + label_ids=["0", "1", "2"], ) @pytest.fixture() @@ -64,8 +65,16 @@ def test_export_parameters(self, model): ("label_info", "expected_label_info"), [ ( - SegLabelInfo(label_names=["label1", "label2", "label3"], label_groups=[["label1", "label2", "label3"]]), - SegLabelInfo(label_names=["label1", "label2", "label3"], label_groups=[["label1", "label2", "label3"]]), + SegLabelInfo( + label_names=["label1", "label2", "label3"], + label_groups=[["label1", "label2", "label3"]], + label_ids=["0", "1", "2"], + ), + SegLabelInfo( + label_names=["label1", "label2", "label3"], + label_groups=[["label1", "label2", "label3"]], + label_ids=["0", "1", "2"], + ), ), (SegLabelInfo.from_num_classes(num_classes=5), SegLabelInfo.from_num_classes(num_classes=5)), ], diff --git a/tests/unit/core/types/test_export.py b/tests/unit/core/types/test_export.py index 72add6f3c31..70a4aa1aa2f 100644 --- a/tests/unit/core/types/test_export.py +++ b/tests/unit/core/types/test_export.py @@ -52,3 +52,4 @@ def test_wrap(fxt_label_info, task_type): assert ("model_info", "tile_size") in metadata assert ("model_info", "tiles_overlap") in metadata assert ("model_info", "max_pred_number") in metadata + assert ("model_info", "otx_version") in metadata diff --git a/tests/unit/core/types/test_label.py b/tests/unit/core/types/test_label.py index 3ae1ae1f463..7c6d2359b7a 100644 --- a/tests/unit/core/types/test_label.py +++ b/tests/unit/core/types/test_label.py @@ -4,7 +4,7 @@ from datumaro import LabelCategories from datumaro.components.annotation import GroupType -from otx.core.types.label import HLabelInfo, NullLabelInfo, SegLabelInfo +from otx.core.types.label import HLabelInfo, LabelInfo, NullLabelInfo, SegLabelInfo def test_as_json(fxt_label_info): @@ -13,17 +13,43 @@ def test_as_json(fxt_label_info): assert fxt_label_info == deserialized +def test_label_info_from_arrow(): + labels = [ + LabelCategories.Category(name="car", attributes={"__name__car"}), + LabelCategories.Category(name="truck", attributes={"__name__truck"}), + ] + label_groups = [ + LabelCategories.LabelGroup( + name="Detection labels___vehicle", + labels=["car", "truck"], + group_type=GroupType.EXCLUSIVE, + ), + ] + dm_label_categories = LabelCategories(items=labels, label_groups=label_groups) + + label_info = LabelInfo.from_dm_label_groups_arrow(dm_label_categories) + assert len(label_info.label_names) == 2 + assert len(label_info.label_names) == 2 + assert len(label_info.label_groups[0]) == 2 + assert "car" in label_info.label_names + assert "truck" in label_info.label_names + + def test_seg_label_info(): # Automatically insert background label at zero index assert SegLabelInfo.from_num_classes(3) == SegLabelInfo( ["label_0", "label_1", "label_2"], + ["0", "1", "2"], [["label_0", "label_1", "label_2"]], ) - assert SegLabelInfo.from_num_classes(1) == SegLabelInfo(["background", "label_0"], [["background", "label_0"]]) + assert SegLabelInfo.from_num_classes(1) == SegLabelInfo( + ["background", "label_0"], + ["0", "1"], + [["background", "label_0"]], + ) assert SegLabelInfo.from_num_classes(0) == NullLabelInfo() -# Unit test def test_hlabel_info(): labels = [ LabelCategories.Category(name="car", parent="vehicle"), @@ -52,3 +78,38 @@ def test_hlabel_info(): assert list(hlabel_info.class_to_group_idx.keys()) == list( hlabel_info.label_to_idx.keys(), ), "class_to_group_idx and label_to_idx keys do not match" + + +def test_hlabel_info_arrow(): + labels = [ + LabelCategories.Category(name="car", parent="vehicle", attributes={"__name__car"}), + LabelCategories.Category(name="truck", parent="vehicle", attributes={"__name__truck"}), + LabelCategories.Category(name="plush_toy", parent="plush toy", attributes={"__name__plush toy"}), + LabelCategories.Category(name="No class", attributes={"__name__No class"}), + ] + label_groups = [ + LabelCategories.LabelGroup( + name="Detection labels___vehicle", + labels=["car", "truck"], + group_type=GroupType.EXCLUSIVE, + ), + LabelCategories.LabelGroup( + name="Detection labels___plush toy", + labels=["plush toy"], + group_type=GroupType.EXCLUSIVE, + ), + LabelCategories.LabelGroup(name="No class", labels=["No class"], group_type=GroupType.RESTRICTED), + ] + dm_label_categories = LabelCategories(items=labels, label_groups=label_groups) + + hlabel_info = HLabelInfo.from_dm_label_groups_arrow(dm_label_categories) + + # Check if class_to_group_idx and label_to_idx have the same keys + assert list(hlabel_info.class_to_group_idx.keys()) == list( + hlabel_info.label_to_idx.keys(), + ), "class_to_group_idx and label_to_idx keys do not match" + + assert len(hlabel_info.label_names) == 3 + assert "No class" not in hlabel_info.label_names + for label in ["car", "truck", "plush toy"]: + assert label in hlabel_info.label_names diff --git a/tests/unit/engine/utils/test_auto_configurator.py b/tests/unit/engine/utils/test_auto_configurator.py index f9ad33b0761..681b1b24639 100644 --- a/tests/unit/engine/utils/test_auto_configurator.py +++ b/tests/unit/engine/utils/test_auto_configurator.py @@ -131,9 +131,9 @@ def test_get_model(self, fxt_task: OTXTaskType) -> None: # With label_info label_names = ["class1", "class2", "class3"] label_info = ( - LabelInfo(label_names=label_names, label_groups=[label_names]) + LabelInfo(label_names=label_names, label_groups=[label_names], label_ids=label_names) if fxt_task != OTXTaskType.SEMANTIC_SEGMENTATION - else SegLabelInfo(label_names=label_names, label_groups=[label_names]) + else SegLabelInfo(label_names=label_names, label_groups=[label_names], label_ids=label_names) ) model = auto_configurator.get_model(label_info=label_info) assert isinstance(model, OTXModel) @@ -147,7 +147,7 @@ def test_get_model(self, fxt_task: OTXTaskType) -> None: def test_get_model_set_input_size(self) -> None: auto_configurator = AutoConfigurator(task=OTXTaskType.MULTI_CLASS_CLS) label_names = ["class1", "class2", "class3"] - label_info = LabelInfo(label_names=label_names, label_groups=[label_names]) + label_info = LabelInfo(label_names=label_names, label_groups=[label_names], label_ids=label_names) input_size = 300 model = auto_configurator.get_model(label_info=label_info, input_size=input_size) diff --git a/tests/unit/tools/test_converter.py b/tests/unit/tools/test_converter.py index 711b92b6bd4..eb35d890a4a 100644 --- a/tests/unit/tools/test_converter.py +++ b/tests/unit/tools/test_converter.py @@ -19,7 +19,7 @@ def test_convert(self): assert config["data"]["train_subset"]["num_workers"] == 8 assert config["data"]["val_subset"]["num_workers"] == 8 assert config["data"]["test_subset"]["num_workers"] == 8 - assert config["callbacks"][0]["init_args"]["patience"] == 10 + assert config["callbacks"][0]["init_args"]["patience"] == 4 assert config["data"]["tile_config"]["enable_tiler"] is True assert config["data"]["tile_config"]["overlap"] == 0.5 @@ -57,6 +57,6 @@ def test_instantiate(self, tmp_path): assert engine.datamodule.tile_config.enable_tiler assert len(train_kwargs["callbacks"]) == len(config["callbacks"]) - assert train_kwargs["callbacks"][0].patience == 10 + assert train_kwargs["callbacks"][0].patience == 4 assert len(train_kwargs["logger"]) == len(config["logger"]) assert train_kwargs["max_epochs"] == 50