diff --git a/CHANGELOG.md b/CHANGELOG.md
index 5d0bfad4116..f6573d58f3c 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -12,6 +12,16 @@ All notable changes to this project will be documented in this file.
   (<https://github.com/openvinotoolkit/training_extensions/pull/4066>)
 - Add GPU memory monitor hook
   (<https://github.com/openvinotoolkit/training_extensions/pull/4118>)
+- Add YOLOv9 model for Object Detection
+  (<https://github.com/openvinotoolkit/training_extensions/pull/3917>, <https://github.com/openvinotoolkit/training_extensions/pull/4026>)
+- Add OV inference for keypoint detection
+  (<https://github.com/openvinotoolkit/training_extensions/pull/3970>)
+- Add tiling for semantic segmentation
+  (<https://github.com/openvinotoolkit/training_extensions/pull/3954>)
+- Add 3D Object Detection task with MonoDETR model
+  (<https://github.com/openvinotoolkit/training_extensions/pull/3979>)
+- Add OpenVINO inference for 3D Object Detection task
+  (<https://github.com/openvinotoolkit/training_extensions/pull/4017>)
 
 ### Enhancements
 
@@ -29,29 +39,6 @@ All notable changes to this project will be documented in this file.
   (<https://github.com/openvinotoolkit/training_extensions/pull/4123>)
 - Improve FMetric computation
   (<https://github.com/openvinotoolkit/training_extensions/pull/4130>)
-
-### Bug fixes
-
-- Fix MaskDINO, MonoDETR recipes
-  (<https://github.com/openvinotoolkit/training_extensions/pull/4080>)
-
-## \[2.3.0\]
-
-### New features
-
-- Add YOLOv9 model for Object Detection
-  (<https://github.com/openvinotoolkit/training_extensions/pull/3917>, <https://github.com/openvinotoolkit/training_extensions/pull/4026>)
-- Add OV inference for keypoint detection
-  (<https://github.com/openvinotoolkit/training_extensions/pull/3970>)
-- Add tiling for semantic segmentation
-  (<https://github.com/openvinotoolkit/training_extensions/pull/3954>)
-- Add 3D Object Detection task with MonoDETR model
-  (<https://github.com/openvinotoolkit/training_extensions/pull/3979>)
-- Add OpenVINO inference for 3D Object Detection task
-  (<https://github.com/openvinotoolkit/training_extensions/pull/4017>)
-
-### Enhancements
-
 - Upgrade OV, MAPI, and NNCF dependencies
   (<https://github.com/openvinotoolkit/training_extensions/pull/3967>)
 - Instance Segmentation Model refactoring
@@ -63,6 +50,8 @@ All notable changes to this project will be documented in this file.
 
 ### Bug fixes
 
+- Fix MaskDINO, MonoDETR recipes
+  (<https://github.com/openvinotoolkit/training_extensions/pull/4080>)
 - Fix a wrong HPO log
   (<https://github.com/openvinotoolkit/training_extensions/pull/3972>)
 - Update model name in rotated detection recipes
@@ -70,6 +59,35 @@ All notable changes to this project will be documented in this file.
 - Fix SupCon flag
   (https://github.com/openvinotoolkit/training_extensions/pull/4076)
 
+## \[2.2.2\]
+
+### Bug fixes
+
+- BC improvement
+  (<https://github.com/openvinotoolkit/training_extensions/pull/4154>)
+
+## \[2.2.1\]
+
+### Bug fixes
+
+- Fix empty annotation in tiling
+  (<https://github.com/openvinotoolkit/training_extensions/pull/4124>)
+- Fix patching early stopping in tools/converter.py, update headers in templates, change training schedule for classification
+  (<https://github.com/openvinotoolkit/training_extensions/pull/4131>)
+- Fix tensor type compatibility in dynamic soft label assigner and RTMDet head
+  (<https://github.com/openvinotoolkit/training_extensions/pull/4140>)
+- Fix DETR target class indices are of type long in loss calculations
+  (<https://github.com/openvinotoolkit/training_extensions/pull/4143>)
+- Fix arrow format reader for multiclass ROI case
+  (<https://github.com/openvinotoolkit/training_extensions/pull/4145>)
+
+### Enhancements
+
+- Decouple DinoV2 for semantic segmentation task
+  (<https://github.com/openvinotoolkit/training_extensions/pull/4136>)
+- Update Label Info handling
+  (<https://github.com/openvinotoolkit/training_extensions/pull/4127>)
+
 ## \[2.2.0\]
 
 ### New features
@@ -194,6 +212,8 @@ All notable changes to this project will be documented in this file.
   (<https://github.com/openvinotoolkit/training_extensions/pull/4105>)
 - Disable tiling classifier toggle in configurable parameters
   (<https://github.com/openvinotoolkit/training_extensions/pull/4107>)
+- Fix Ellipse shapes for Instance Segmentation
+  (<https://github.com/openvinotoolkit/training_extensions/pull/4152>)
 
 ## \[v2.1.0\]
 
diff --git a/docs/source/guide/release_notes/index.rst b/docs/source/guide/release_notes/index.rst
index e0b8dc86383..6a826256dcf 100644
--- a/docs/source/guide/release_notes/index.rst
+++ b/docs/source/guide/release_notes/index.rst
@@ -4,6 +4,35 @@ Releases
 .. toctree::
   :maxdepth: 1
 
+v2.2.2 (2024.12)
+----------------
+
+Enhancements
+^^^^^^^^^^^^
+
+Bug fixes
+^^^^^^^^^
+
+- BC improvement
+
+v2.2.1 (2024.12)
+----------------
+
+Enhancements
+^^^^^^^^^^^^
+
+- Decouple DinoV2 for semantic segmentation task
+- Update Label Info handling
+
+Bug fixes
+^^^^^^^^^
+
+- Fix empty annotation in tiling
+- Fix patching early stopping in tools/converter.py, update headers in templates, change training schedule for classification
+- Fix tensor type compatibility in dynamic soft label assigner and RTMDet head
+- Fix DETR target class indices are of type long in loss calculations
+- Fix arrow format reader for multiclass ROI case
+
 v2.2.0 (2024.10)
 ----------------
 
@@ -22,6 +51,8 @@ New features
 - Add Semi-SL MeanTeacher algorithm for Semantic Segmentation
 - Update head and h-label format for hierarchical label classification
 - Support configurable input size
+- Revert the old workaround for detection confidence threshold
+- Add Keypoint Detection legacy template
 
 Enhancements
 ^^^^^^^^^^^^
@@ -40,21 +71,38 @@ Enhancements
 - Change sematic segmentation to consider bbox only annotations
 - Relieve memory usage criteria on batch size 2 during adaptive batch size
 - Remove background label from RT Info for segmentation task
+- Enable export of the feature vectors for semantic segmentation task
 - Prevent using too low confidence thresholds in detection
+- Update HPO interface
+- Bump onnx to 1.17.0 to omit CVE-2024-5187
 
 Bug fixes
 ^^^^^^^^^
 
+- Update anomaly base transforms to use square resizing
 - Fix Combined Dataloader & unlabeled warmup loss in Semi-SL
 - Revert #3579 to fix issues with replacing coco_instance with a different format in some dataset
 - Add num_devices in Engine for multi-gpu training
 - Add missing tile recipes and various tile recipe changes
 - Change categories mapping logic
 - Fix config converter for tiling
+- Fix `BboxOverlaps2D` handling of empty ground-truth annotations in datasets.
 - Fix num_trials calculation on dataset length less than num_class
 - Fix out_features in HierarchicalCBAMClsHead
 - Fix multilabel_accuracy of MixedHLabelAccuracy
 - Fix wrong indices setting in HLabelInfo
+- Add legacy template LiteHRNet_18 template
+- Model templates: rename model_status value 'DISCONTINUED' to 'OBSOLETE'
+- Update MRCNN model export to include feature vector and saliency map
+- Upgrade MAPI in 2.2
+- Fix applying model's hparams when loading model from checkpoint
+- Fix incorrect all_groups order configuration in HLabelInfo
+- Fix RTDETR recipes
+- Fix wrong model name in converter & template
+- Fix RTMDet Inst Explain Mode
+- Fix RTDETR Explain Mode
+- Fix classification and semantic segmentation tasks, when ROI provided for images
+- Disable tiling classifier toggle in configurable parameters
 
 v2.1.0 (2024.07)
 ----------------
diff --git a/src/otx/algo/callbacks/adaptive_early_stopping.py b/src/otx/algo/callbacks/adaptive_early_stopping.py
index 0f95e3c277d..754c6cf7c36 100644
--- a/src/otx/algo/callbacks/adaptive_early_stopping.py
+++ b/src/otx/algo/callbacks/adaptive_early_stopping.py
@@ -20,7 +20,7 @@ def __init__(
         self,
         monitor: str,
         min_delta: float = 0.0,
-        patience: int = 3,
+        patience: int = 10,
         verbose: bool = False,
         mode: str = "min",
         strict: bool = True,
diff --git a/src/otx/algo/classification/backbones/vision_transformer.py b/src/otx/algo/classification/backbones/vision_transformer.py
index c60f2ded49e..1255abff0d1 100644
--- a/src/otx/algo/classification/backbones/vision_transformer.py
+++ b/src/otx/algo/classification/backbones/vision_transformer.py
@@ -5,6 +5,7 @@
 """Copy from mmpretrain/models/backbones/vision_transformer.py."""
 from __future__ import annotations
 
+import math
 from functools import partial
 from typing import TYPE_CHECKING, Any, Callable, Literal
 
@@ -46,6 +47,7 @@
     "vit-huge",
     "dinov2-s",
     "dinov2-small",
+    "dinov2-small-seg",
     "dinov2-b",
     "dinov2-base",
     "dinov2-l",
@@ -87,6 +89,7 @@ class VisionTransformer(BaseModule):
         norm_layer: Normalization layer.
         act_layer: MLP activation layer.
         block_fn: Transformer block layer.
+        interpolate_offset: work-around offset to apply when interpolating positional embeddings
         lora: Enable LoRA training.
     """
 
@@ -147,6 +150,17 @@ class VisionTransformer(BaseModule):
                 "num_heads": 6,
                 "reg_tokens": 4,
                 "no_embed_class": True,
+            },
+        ),
+        **dict.fromkeys(
+            ["dinov2-small-seg"],  # segmentation
+            {
+                "patch_size": 14,
+                "embed_dim": 384,
+                "depth": 12,
+                "num_heads": 6,
+                "reg_tokens": 0,
+                "no_embed_class": False,
                 "init_values": 1e-5,
             },
         ),
@@ -193,9 +207,9 @@ class VisionTransformer(BaseModule):
 
     def __init__(  # noqa: PLR0913
         self,
-        arch: VIT_ARCH_TYPE = "vit-base",
+        arch: VIT_ARCH_TYPE | str = "vit-base",
         img_size: int | tuple[int, int] = 224,
-        patch_size: int | tuple[int, int] | None = None,
+        patch_size: int | None = None,
         in_chans: int = 3,
         num_classes: int = 1000,
         embed_dim: int | None = None,
@@ -221,6 +235,7 @@ def __init__(  # noqa: PLR0913
         mlp_layer: nn.Module | None = None,
         act_layer: LayerType | None = None,
         norm_layer: LayerType | None = None,
+        interpolate_offset: float = 0.1,
         lora: bool = False,
     ) -> None:
         super().__init__()
@@ -231,7 +246,7 @@ def __init__(  # noqa: PLR0913
             arch_settings: dict[str, Any] = self.arch_zoo[arch]
 
         self.img_size: int | tuple[int, int] = img_size
-        self.patch_size: int | tuple[int, int] = patch_size or arch_settings.get("patch_size", 16)
+        self.patch_size: int = patch_size or arch_settings.get("patch_size", 16)
         self.embed_dim = embed_dim or arch_settings.get("embed_dim", 768)
         depth = depth or arch_settings.get("depth", 12)
         num_heads = num_heads or arch_settings.get("num_heads", 12)
@@ -251,6 +266,7 @@ def __init__(  # noqa: PLR0913
         self.no_embed_class = no_embed_class  # don't embed prefix positions (includes reg)
         self.dynamic_img_size = dynamic_img_size
         self.grad_checkpointing = False
+        self.interpolate_offset = interpolate_offset
 
         embed_args = {}
         if dynamic_img_size:
@@ -353,15 +369,17 @@ def resize_positional_embeddings(pos_embed: torch.Tensor, new_shape: tuple[int,
             # convert dinov2 pretrained weights
             state_dict = torch.load(checkpoint_path)
             state_dict.pop("mask_token", None)
-            state_dict["reg_token"] = state_dict.pop("register_tokens")
+            if "reg_token" in state_dict:
+                state_dict["reg_token"] = state_dict.pop("register_tokens")
             state_dict["cls_token"] = state_dict.pop("cls_token") + state_dict["pos_embed"][:, 0]
 
             img_size = (self.img_size, self.img_size) if isinstance(self.img_size, int) else self.img_size
-            patch_size = (self.patch_size, self.patch_size) if isinstance(self.patch_size, int) else self.patch_size
-            state_dict["pos_embed"] = resize_positional_embeddings(
-                state_dict.pop("pos_embed")[:, 1:],
-                (img_size[0] // patch_size[0], img_size[1] // patch_size[1]),
-            )
+            patch_size = (self.patch_size, self.patch_size)
+            if state_dict["pos_embed"].shape != self.pos_embed.shape:
+                state_dict["pos_embed"] = resize_positional_embeddings(
+                    state_dict.pop("pos_embed")[:, 1:],
+                    (img_size[0] // patch_size[0], img_size[1] // patch_size[1]),
+                )
             self.load_state_dict(state_dict, strict=False)
         else:
             msg = f"Unsupported `checkpoint_extension` {checkpoint_ext}, please choose from 'npz' or 'pth'."
@@ -401,6 +419,137 @@ def _pos_embed(self, x: torch.Tensor) -> torch.Tensor:
 
         return self.pos_drop(x)
 
+    def interpolate_pos_encoding(self, x: torch.Tensor, w: int, h: int) -> torch.Tensor:
+        """Interpolates the positional encoding to match the input dimensions.
+
+        Args:
+            x (torch.Tensor): Input tensor.
+            w (int): Width of the input image.
+            h (int): Height of the input image.
+
+        Returns:
+            torch.Tensor: Tensor with interpolated positional encoding.
+        """
+        previous_dtype = x.dtype
+        npatch = x.shape[1]
+        n = self.pos_embed.shape[1]
+        if npatch == n and w == h:
+            return self.pos_embed
+        pos_embed = self.pos_embed.float()
+        class_pos_embed = pos_embed[:, 0]
+        patch_pos_embed = pos_embed[:, 1:]
+        dim = x.shape[-1]
+        w0 = w // self.patch_size
+        h0 = h // self.patch_size
+        m = int(math.sqrt(n))  # Recover the number of patches in each dimension
+        if m * m != n:
+            msg = f"Expected m * m to equal n, but got m={m}, n={n}"
+            raise ValueError(msg)
+        kwargs = {}
+        if self.interpolate_offset:
+            # fix float error by introducing small offset
+            sx = float(w0 + self.interpolate_offset) / m
+            sy = float(h0 + self.interpolate_offset) / m
+            kwargs["scale_factor"] = (sx, sy)
+        else:
+            # Simply specify an output size instead of a scale factor
+            kwargs["size"] = (w0, h0)
+        patch_pos_embed = nn.functional.interpolate(
+            patch_pos_embed.reshape(1, m, m, dim).permute(0, 3, 1, 2),
+            mode="bicubic",
+            **kwargs,
+        )
+        patch_pos_embed = patch_pos_embed.permute(0, 2, 3, 1).view(1, -1, dim)
+        return torch.cat((class_pos_embed.unsqueeze(0), patch_pos_embed), dim=1).to(previous_dtype)
+
+    def prepare_tokens_with_masks(self, x: torch.Tensor, masks: torch.Tensor | None = None) -> torch.Tensor:
+        """Prepare tokens with optional masks.
+
+        Args:
+            x (torch.Tensor): Input tensor.
+            masks (torch.Tensor | None): Optional masks tensor.
+
+        Returns:
+            torch.Tensor: Tensor with prepared tokens.
+        """
+        _, _, w, h = x.shape
+        x = self.patch_embed(x)
+        if masks is not None:
+            x = torch.where(masks.unsqueeze(-1), self.mask_token.to(x.dtype).unsqueeze(0), x)
+
+        x = torch.cat((self.cls_token.expand(x.shape[0], -1, -1), x), dim=1)
+        x = x + self.interpolate_pos_encoding(x, w, h)
+
+        if self.reg_token is not None:
+            x = torch.cat(
+                (
+                    x[:, :1],
+                    self.reg_token.expand(x.shape[0], -1, -1),
+                    x[:, 1:],
+                ),
+                dim=1,
+            )
+
+        return x
+
+    def _get_intermediate_layers_not_chunked(self, x: torch.Tensor, n: int = 1) -> list[torch.Tensor]:
+        """Get intermediate layers without chunking.
+
+        Args:
+            x (torch.Tensor): Input tensor.
+            n (int): Number of last blocks to take. If it's a list, take the specified blocks.
+
+        Returns:
+            list[torch.Tensor]: List of intermediate layer outputs.
+        """
+        x = self.prepare_tokens_with_masks(x)
+        # If n is an int, take the n last blocks. If it's a list, take them
+        output, total_block_len = [], len(self.blocks)
+        blocks_to_take = range(total_block_len - n, total_block_len) if isinstance(n, int) else n
+        for i, blk in enumerate(self.blocks):
+            x = blk(x)
+            if i in blocks_to_take:
+                output.append(x)
+        if len(output) != len(blocks_to_take):
+            msg = f"only {len(output)} / {len(blocks_to_take)} blocks found"
+            raise RuntimeError(msg)
+        return output
+
+    def get_intermediate_layers(
+        self,
+        x: torch.Tensor,
+        n: int = 1,  # Layers or n last layers to take
+        reshape: bool = False,
+        return_class_token: bool = False,
+        norm: bool = True,
+    ) -> tuple:
+        """Get intermediate layers of the VisionTransformer.
+
+        Args:
+            x (torch.Tensor): Input tensor.
+            n (int): Number of last blocks to take. If it's a list, take the specified blocks.
+            reshape (bool): Whether to reshape the output feature maps.
+            return_class_token (bool): Whether to return the class token.
+            norm (bool): Whether to apply normalization to the outputs.
+
+        Returns:
+            tuple: A tuple containing the intermediate layer outputs.
+        """
+        outputs = self._get_intermediate_layers_not_chunked(x, n)
+        if norm:
+            outputs = [self.norm(out) for out in outputs]
+        class_tokens = [out[:, 0] for out in outputs]
+        outputs = [out[:, 1 + self.num_reg_tokens :] for out in outputs]
+        if reshape:
+            b, _, w, h = x.shape
+            outputs = [
+                out.reshape(b, w // self.patch_size, h // self.patch_size, -1).permute(0, 3, 1, 2).contiguous()
+                for out in outputs
+            ]
+        if return_class_token:
+            return tuple(zip(outputs, class_tokens))
+        return tuple(outputs)
+
     def forward(
         self,
         x: torch.Tensor,
diff --git a/src/otx/algo/common/losses/cross_focal_loss.py b/src/otx/algo/common/losses/cross_focal_loss.py
index e6311dd0ae0..bfec15c0c84 100644
--- a/src/otx/algo/common/losses/cross_focal_loss.py
+++ b/src/otx/algo/common/losses/cross_focal_loss.py
@@ -7,9 +7,8 @@
 
 import torch
 import torch.nn.functional
-from otx.utils.device import get_available_device
 from torch import Tensor, nn
-from torch.amp import custom_fwd
+from torch.cuda.amp import custom_fwd
 
 from .focal_loss import py_sigmoid_focal_loss
 
@@ -80,7 +79,7 @@ def __init__(
 
         self.cls_criterion = cross_sigmoid_focal_loss
 
-    @custom_fwd(device_type=get_available_device(), cast_inputs=torch.float32)
+    @custom_fwd(cast_inputs=torch.float32)
     def forward(
         self,
         pred: Tensor,
diff --git a/src/otx/algo/common/utils/assigners/dynamic_soft_label_assigner.py b/src/otx/algo/common/utils/assigners/dynamic_soft_label_assigner.py
index 4807e5b4a36..e12b1d1b678 100644
--- a/src/otx/algo/common/utils/assigners/dynamic_soft_label_assigner.py
+++ b/src/otx/algo/common/utils/assigners/dynamic_soft_label_assigner.py
@@ -196,7 +196,7 @@ def assign(
         assigned_labels = assigned_gt_inds.new_full((num_bboxes,), -1)
         assigned_labels[valid_mask] = gt_labels[matched_gt_inds].long()
         max_overlaps = assigned_gt_inds.new_full((num_bboxes,), -INF, dtype=torch.float32)
-        max_overlaps[valid_mask] = matched_pred_ious
+        max_overlaps[valid_mask] = matched_pred_ious.to(max_overlaps)
         return AssignResult(num_gt, assigned_gt_inds, max_overlaps, labels=assigned_labels)
 
     def dynamic_k_matching(
diff --git a/src/otx/algo/common/utils/assigners/hungarian_matcher.py b/src/otx/algo/common/utils/assigners/hungarian_matcher.py
index a1fcc316f87..4409bc6eb29 100644
--- a/src/otx/algo/common/utils/assigners/hungarian_matcher.py
+++ b/src/otx/algo/common/utils/assigners/hungarian_matcher.py
@@ -279,7 +279,7 @@ def batch_preparation(
                 "pred_boxes": outputs["pred_boxes"][i],
                 "pred_masks": outputs["pred_masks"][i] if "pred_masks" in outputs else None,
                 "target_boxes": targets[i]["boxes"],
-                "target_labels": targets[i]["labels"],
+                "target_labels": targets[i]["labels"].long(),
                 "target_mask": targets[i]["masks"] if "masks" in targets[i] else None,
             }
             for i in range(batch_size)
diff --git a/src/otx/algo/detection/heads/rtmdet_head.py b/src/otx/algo/detection/heads/rtmdet_head.py
index 2a04483dd09..a2ab8a95c82 100644
--- a/src/otx/algo/detection/heads/rtmdet_head.py
+++ b/src/otx/algo/detection/heads/rtmdet_head.py
@@ -491,7 +491,7 @@ def _get_targets_single(  # type: ignore[override]
         if len(pos_inds) > 0:
             # point-based
             pos_bbox_targets = sampling_result.pos_gt_bboxes
-            bbox_targets[pos_inds, :] = pos_bbox_targets
+            bbox_targets[pos_inds, :] = pos_bbox_targets.to(bbox_targets)
 
             labels[pos_inds] = sampling_result.pos_gt_labels
             if self.train_cfg["pos_weight"] <= 0:
diff --git a/src/otx/algo/detection/losses/rtdetr_loss.py b/src/otx/algo/detection/losses/rtdetr_loss.py
index 361dfaa75c0..d71ca7989dd 100644
--- a/src/otx/algo/detection/losses/rtdetr_loss.py
+++ b/src/otx/algo/detection/losses/rtdetr_loss.py
@@ -77,7 +77,7 @@ def loss_labels_vfl(
         src_logits = outputs["pred_logits"]
         target_classes_o = torch.cat([t["labels"][J] for t, (_, J) in zip(targets, indices)])
         target_classes = torch.full(src_logits.shape[:2], self.num_classes, dtype=torch.int64, device=src_logits.device)
-        target_classes[idx] = target_classes_o
+        target_classes[idx] = target_classes_o.long()
         target = nn.functional.one_hot(target_classes, num_classes=self.num_classes + 1)[..., :-1]
 
         target_score_o = torch.zeros_like(target_classes, dtype=src_logits.dtype)
diff --git a/src/otx/algo/segmentation/backbones/__init__.py b/src/otx/algo/segmentation/backbones/__init__.py
index 4c7a44cee9b..8b633cc21f8 100644
--- a/src/otx/algo/segmentation/backbones/__init__.py
+++ b/src/otx/algo/segmentation/backbones/__init__.py
@@ -3,8 +3,7 @@
 #
 """Backbone modules for OTX segmentation model."""
 
-from .dinov2 import DinoVisionTransformer
 from .litehrnet import LiteHRNetBackbone
 from .mscan import MSCAN
 
-__all__ = ["LiteHRNetBackbone", "DinoVisionTransformer", "MSCAN"]
+__all__ = ["LiteHRNetBackbone", "MSCAN"]
diff --git a/src/otx/algo/segmentation/backbones/dinov2.py b/src/otx/algo/segmentation/backbones/dinov2.py
deleted file mode 100644
index ce1d605fe38..00000000000
--- a/src/otx/algo/segmentation/backbones/dinov2.py
+++ /dev/null
@@ -1,98 +0,0 @@
-# Copyright (C) 2023 Intel Corporation
-# SPDX-License-Identifier: Apache-2.0
-#
-"""DINO-V2 model for the OTX classification."""
-
-from __future__ import annotations
-
-import logging
-import os
-from functools import partial
-from pathlib import Path
-
-import torch
-from torch import nn
-
-from otx.algo.utils.mmengine_utils import load_checkpoint_to_model, load_from_http
-from otx.utils.utils import get_class_initial_arguments
-
-logger = logging.getLogger()
-
-
-class DinoVisionTransformer(nn.Module):
-    """DINO-v2 Model."""
-
-    def __init__(
-        self,
-        model_name: str,
-        freeze_backbone: bool,
-        out_index: list[int],
-        pretrained_weights: str | None = None,
-    ):
-        super().__init__()
-        self._init_args = get_class_initial_arguments()
-
-        ci_data_root = os.environ.get("CI_DATA_ROOT")
-        pretrained: bool = True
-        if ci_data_root is not None and Path(ci_data_root).exists():
-            pretrained = False
-
-        self.backbone = torch.hub.load(repo_or_dir="facebookresearch/dinov2", model=model_name, pretrained=pretrained)
-
-        if ci_data_root is not None and Path(ci_data_root).exists():
-            ckpt_filename = f"{model_name}4_pretrain.pth"
-            ckpt_path = Path(ci_data_root) / "torch" / "hub" / "checkpoints" / ckpt_filename
-            if not ckpt_path.exists():
-                msg = (
-                    f"Internal cache was specified but cannot find weights file: {ckpt_filename}. load from torch hub."
-                )
-                logger.warning(msg)
-                self.backbone = torch.hub.load(repo_or_dir="facebookresearch/dinov2", model=model_name, pretrained=True)
-            else:
-                self.backbone.load_state_dict(torch.load(ckpt_path))
-
-        if freeze_backbone:
-            self._freeze_backbone(self.backbone)
-
-        # take intermediate layers to preserve spatial dimension
-        self.backbone.forward = partial(
-            self.backbone.get_intermediate_layers,
-            n=out_index,
-            reshape=True,
-        )
-
-        if pretrained_weights is not None:
-            self.load_pretrained_weights(pretrained_weights)
-
-    def _freeze_backbone(self, backbone: nn.Module) -> None:
-        """Freeze the backbone."""
-        for _, v in backbone.named_parameters():
-            v.requires_grad = False
-
-    def init_weights(self) -> None:
-        """Initialize the weights."""
-        # restrict rewriting backbone pretrained weights from torch.hub
-        # unless weights passed explicitly in config
-        if self.init_cfg:
-            return super().init_weights()
-        return None
-
-    def forward(self, imgs: torch.Tensor) -> torch.Tensor:
-        """Forward function."""
-        return self.backbone(imgs)
-
-    def load_pretrained_weights(self, pretrained: str | None = None, prefix: str = "") -> None:
-        """Initialize weights."""
-        checkpoint = None
-        if isinstance(pretrained, str) and Path(pretrained).exists():
-            checkpoint = torch.load(pretrained, "cpu")
-            print(f"init weight - {pretrained}")
-        elif pretrained is not None:
-            cache_dir = Path.home() / ".cache" / "torch" / "hub" / "checkpoints"
-            checkpoint = load_from_http(filename=pretrained, map_location="cpu", model_dir=cache_dir)
-            print(f"init weight - {pretrained}")
-        if checkpoint is not None:
-            load_checkpoint_to_model(self, checkpoint, prefix=prefix)
-
-    def __reduce__(self):
-        return (DinoVisionTransformer, self._init_args)
diff --git a/src/otx/algo/segmentation/dino_v2_seg.py b/src/otx/algo/segmentation/dino_v2_seg.py
index cb89a472ed7..70ffe23c9a7 100644
--- a/src/otx/algo/segmentation/dino_v2_seg.py
+++ b/src/otx/algo/segmentation/dino_v2_seg.py
@@ -5,9 +5,14 @@
 
 from __future__ import annotations
 
+from functools import partial
+from pathlib import Path
 from typing import TYPE_CHECKING, Any, ClassVar
+from urllib.parse import urlparse
 
-from otx.algo.segmentation.backbones import DinoVisionTransformer
+from torch.hub import download_url_to_file
+
+from otx.algo.classification.backbones.vision_transformer import VisionTransformer
 from otx.algo.segmentation.heads import FCNHead
 from otx.algo.segmentation.losses import CrossEntropyLossWithIgnore
 from otx.algo.segmentation.segmentors import BaseSegmentationModel
@@ -21,18 +26,41 @@ class DinoV2Seg(OTXSegmentationModel):
     """DinoV2Seg Model."""
 
     AVAILABLE_MODEL_VERSIONS: ClassVar[list[str]] = [
-        "dinov2_vits14",
+        "dinov2-small-seg",
     ]
+    PRETRAINED_WEIGHTS: ClassVar[dict[str, str]] = {
+        "dinov2-small-seg": "https://dl.fbaipublicfiles.com/dinov2/dinov2_vits14/dinov2_vits14_pretrain.pth",
+    }
 
     def _build_model(self) -> nn.Module:
         if self.model_name not in self.AVAILABLE_MODEL_VERSIONS:
             msg = f"Model version {self.model_name} is not supported."
             raise ValueError(msg)
-
-        backbone = DinoVisionTransformer(model_name=self.model_name, freeze_backbone=True, out_index=[8, 9, 10, 11])
+        backbone = VisionTransformer(arch=self.model_name, img_size=self.input_size)
+        backbone.forward = partial(  # type: ignore[method-assign]
+            backbone.get_intermediate_layers,
+            n=[8, 9, 10, 11],
+            reshape=True,
+        )
         decode_head = FCNHead(self.model_name, num_classes=self.num_classes)
         criterion = CrossEntropyLossWithIgnore(ignore_index=self.label_info.ignore_index)  # type: ignore[attr-defined]
 
+        backbone.init_weights()
+        if self.model_name in self.PRETRAINED_WEIGHTS:
+            print(f"init weight - {self.PRETRAINED_WEIGHTS[self.model_name]}")
+            parts = urlparse(self.PRETRAINED_WEIGHTS[self.model_name])
+            filename = Path(parts.path).name
+
+            cache_dir = Path.home() / ".cache" / "torch" / "hub" / "checkpoints"
+            cache_file = cache_dir / filename
+            if not Path.exists(cache_file):
+                download_url_to_file(self.PRETRAINED_WEIGHTS[self.model_name], cache_file, "", progress=True)
+            backbone.load_pretrained(checkpoint_path=cache_file)
+
+        # freeze backbone
+        for _, v in backbone.named_parameters():
+            v.requires_grad = False
+
         return BaseSegmentationModel(
             backbone=backbone,
             decode_head=decode_head,
diff --git a/src/otx/algo/segmentation/heads/fcn_head.py b/src/otx/algo/segmentation/heads/fcn_head.py
index 67b0fe6fc86..de877b2825a 100644
--- a/src/otx/algo/segmentation/heads/fcn_head.py
+++ b/src/otx/algo/segmentation/heads/fcn_head.py
@@ -216,7 +216,7 @@ class FCNHead:
             "aggregator_merge_norm": "None",
             "aggregator_use_concat": False,
         },
-        "dinov2_vits14": {
+        "dinov2-small-seg": {
             "in_channels": [384, 384, 384, 384],
             "in_index": [0, 1, 2, 3],
             "input_transform": "resize_concat",
@@ -233,7 +233,7 @@ def __new__(cls, model_name: str, num_classes: int) -> FCNHeadModule:
 
         normalization = (
             partial(build_norm_layer, nn.SyncBatchNorm, requires_grad=True)
-            if model_name == "dinov2_vits14"
+            if model_name == "dinov2-small-seg"
             else partial(build_norm_layer, nn.BatchNorm2d, requires_grad=True)
         )
 
diff --git a/src/otx/core/data/dataset/action_classification.py b/src/otx/core/data/dataset/action_classification.py
index 23391984423..4cfa4808487 100644
--- a/src/otx/core/data/dataset/action_classification.py
+++ b/src/otx/core/data/dataset/action_classification.py
@@ -37,6 +37,7 @@ def __init__(
         image_color_channel: ImageColorChannel = ImageColorChannel.BGR,
         stack_images: bool = True,
         to_tv_image: bool = True,
+        data_format: str = "",
     ) -> None:
         super().__init__(
             dm_subset,
diff --git a/src/otx/core/data/dataset/anomaly.py b/src/otx/core/data/dataset/anomaly.py
index 1a0149423c6..091e7b4066d 100644
--- a/src/otx/core/data/dataset/anomaly.py
+++ b/src/otx/core/data/dataset/anomaly.py
@@ -57,6 +57,7 @@ def __init__(
         image_color_channel: ImageColorChannel = ImageColorChannel.RGB,
         stack_images: bool = True,
         to_tv_image: bool = True,
+        data_format: str = "",
     ) -> None:
         self.task_type = task_type
         super().__init__(
diff --git a/src/otx/core/data/dataset/base.py b/src/otx/core/data/dataset/base.py
index 239a5ded307..5626690d6ca 100644
--- a/src/otx/core/data/dataset/base.py
+++ b/src/otx/core/data/dataset/base.py
@@ -70,6 +70,7 @@ class OTXDataset(Dataset, Generic[T_OTXDataEntity]):
         max_refetch: Maximum number of images to fetch in cache
         image_color_channel: Color channel of images
         stack_images: Whether or not to stack images in collate function in OTXBatchData entity.
+        data_format: Source data format, which was originally passed to datumaro (could be arrow for instance).
 
     """
 
@@ -83,6 +84,7 @@ def __init__(
         image_color_channel: ImageColorChannel = ImageColorChannel.RGB,
         stack_images: bool = True,
         to_tv_image: bool = True,
+        data_format: str = "",
     ) -> None:
         self.dm_subset = dm_subset
         self.transforms = transforms
@@ -92,8 +94,11 @@ def __init__(
         self.image_color_channel = image_color_channel
         self.stack_images = stack_images
         self.to_tv_image = to_tv_image
+        self.data_format = data_format
 
-        if self.dm_subset.categories():
+        if self.dm_subset.categories() and data_format == "arrow":
+            self.label_info = LabelInfo.from_dm_label_groups_arrow(self.dm_subset.categories()[AnnotationType.label])
+        elif self.dm_subset.categories():
             self.label_info = LabelInfo.from_dm_label_groups(self.dm_subset.categories()[AnnotationType.label])
         else:
             self.label_info = NullLabelInfo()
diff --git a/src/otx/core/data/dataset/classification.py b/src/otx/core/data/dataset/classification.py
index 8f4f5ffc241..40b9c4a99fd 100644
--- a/src/otx/core/data/dataset/classification.py
+++ b/src/otx/core/data/dataset/classification.py
@@ -39,7 +39,10 @@ def _get_item_impl(self, index: int) -> MulticlassClsDataEntity | None:
             labels_ids = [
                 label["label"]["_id"] for label in roi["labels"] if label["label"]["domain"] == "CLASSIFICATION"
             ]
-            label_anns = [self.label_info.label_names.index(label_id) for label_id in labels_ids]
+            if self.data_format == "arrow":
+                label_anns = [self.label_info.label_ids.index(label_id) for label_id in labels_ids]
+            else:
+                label_anns = [self.label_info.label_names.index(label_id) for label_id in labels_ids]
         else:
             # extract labels from annotations
             label_anns = [ann.label for ann in item.annotations if isinstance(ann, Label)]
@@ -80,17 +83,21 @@ def _get_item_impl(self, index: int) -> MultilabelClsDataEntity | None:
         ignored_labels: list[int] = []  # This should be assigned form item
         img_data, img_shape, _ = self._get_img_data_and_shape(img)
 
-        label_anns = []
+        label_ids = set()
         for ann in item.annotations:
+            # multilabel information stored in 'multi_label_ids' attribute when the source format is arrow
+            if "multi_label_ids" in ann.attributes:
+                for lbl_idx in ann.attributes["multi_label_ids"]:
+                    label_ids.add(lbl_idx)
+
             if isinstance(ann, Label):
-                label_anns.append(ann)
+                label_ids.add(ann.label)
             else:
                 # If the annotation is not Label, it should be converted to Label.
                 # For Chained Task: Detection (Bbox) -> Classification (Label)
                 label = Label(label=ann.label)
-                if label not in label_anns:
-                    label_anns.append(label)
-        labels = torch.as_tensor([ann.label for ann in label_anns])
+                label_ids.add(label.label)
+        labels = torch.as_tensor(list(label_ids))
 
         entity = MultilabelClsDataEntity(
             image=img_data,
@@ -128,13 +135,22 @@ def __init__(self, **kwargs) -> None:
         self.dm_categories = self.dm_subset.categories()[AnnotationType.label]
 
         # Hlabel classification used HLabelInfo to insert the HLabelData.
-        self.label_info = HLabelInfo.from_dm_label_groups(self.dm_categories)
+        if self.data_format == "arrow":
+            # arrow format stores label IDs as names, have to deal with that here
+            self.label_info = HLabelInfo.from_dm_label_groups_arrow(self.dm_categories)
+        else:
+            self.label_info = HLabelInfo.from_dm_label_groups(self.dm_categories)
+
+        self.id_to_name_mapping = dict(zip(self.label_info.label_ids, self.label_info.label_names))
+        self.id_to_name_mapping[""] = ""
+
         if self.label_info.num_multiclass_heads == 0:
             msg = "The number of multiclass heads should be larger than 0."
             raise ValueError(msg)
 
-        for dm_item in self.dm_subset:
-            self._add_ancestors(dm_item.annotations)
+        if self.data_format != "arrow":
+            for dm_item in self.dm_subset:
+                self._add_ancestors(dm_item.annotations)
 
     def _add_ancestors(self, label_anns: list[Label]) -> None:
         """Add ancestors recursively if some label miss the ancestor information.
@@ -149,7 +165,7 @@ def _add_ancestors(self, label_anns: list[Label]) -> None:
         """
 
         def _label_idx_to_name(idx: int) -> str:
-            return self.label_info.label_names[idx]
+            return self.dm_categories[idx].name
 
         def _label_name_to_idx(name: str) -> int:
             indices = [idx for idx, val in enumerate(self.label_info.label_names) if val == name]
@@ -157,6 +173,8 @@ def _label_name_to_idx(name: str) -> int:
 
         def _get_label_group_idx(label_name: str) -> int:
             if isinstance(self.label_info, HLabelInfo):
+                if self.data_format == "arrow":
+                    return self.label_info.class_to_group_idx[self.id_to_name_mapping[label_name]][0]
                 return self.label_info.class_to_group_idx[label_name][0]
             msg = f"self.label_info should have HLabelInfo type, got {type(self.label_info)}"
             raise ValueError(msg)
@@ -197,17 +215,22 @@ def _get_item_impl(self, index: int) -> HlabelClsDataEntity | None:
         ignored_labels: list[int] = []  # This should be assigned form item
         img_data, img_shape, _ = self._get_img_data_and_shape(img)
 
-        label_anns = []
+        label_ids = set()
         for ann in item.annotations:
+            # in h-cls scenario multilabel information stored in 'multi_label_ids' attribute
+            if "multi_label_ids" in ann.attributes:
+                for lbl_idx in ann.attributes["multi_label_ids"]:
+                    label_ids.add(lbl_idx)
+
             if isinstance(ann, Label):
-                label_anns.append(ann)
+                label_ids.add(ann.label)
             else:
                 # If the annotation is not Label, it should be converted to Label.
                 # For Chained Task: Detection (Bbox) -> Classification (Label)
                 label = Label(label=ann.label)
-                if label not in label_anns:
-                    label_anns.append(label)
-        hlabel_labels = self._convert_label_to_hlabel_format(label_anns, ignored_labels)
+                label_ids.add(label.label)
+
+        hlabel_labels = self._convert_label_to_hlabel_format([Label(label=idx) for idx in label_ids], ignored_labels)
 
         entity = HlabelClsDataEntity(
             image=img_data,
@@ -256,18 +279,18 @@ def _convert_label_to_hlabel_format(self, label_anns: list[Label], ignored_label
             class_indices[i] = -1
 
         for ann in label_anns:
-            ann_name = self.dm_categories.items[ann.label].name
-            ann_parent = self.dm_categories.items[ann.label].parent
+            if self.data_format == "arrow":
+                # skips unknown labels for instance, the empty one
+                if self.dm_categories.items[ann.label].name not in self.id_to_name_mapping:
+                    continue
+                ann_name = self.id_to_name_mapping[self.dm_categories.items[ann.label].name]
+            else:
+                ann_name = self.dm_categories.items[ann.label].name
             group_idx, in_group_idx = self.label_info.class_to_group_idx[ann_name]
-            (parent_group_idx, parent_in_group_idx) = (
-                self.label_info.class_to_group_idx[ann_parent] if ann_parent else (None, None)
-            )
 
             if group_idx < num_multiclass_heads:
                 class_indices[group_idx] = in_group_idx
-                if parent_group_idx is not None and parent_in_group_idx is not None:
-                    class_indices[parent_group_idx] = parent_in_group_idx
-            elif not ignored_labels or ann.label not in ignored_labels:
+            elif ann.label not in ignored_labels:
                 class_indices[num_multiclass_heads + in_group_idx] = 1
             else:
                 class_indices[num_multiclass_heads + in_group_idx] = -1
diff --git a/src/otx/core/data/dataset/instance_segmentation.py b/src/otx/core/data/dataset/instance_segmentation.py
index 27384a3df9d..8b30366a97e 100644
--- a/src/otx/core/data/dataset/instance_segmentation.py
+++ b/src/otx/core/data/dataset/instance_segmentation.py
@@ -5,13 +5,15 @@
 
 from __future__ import annotations
 
+import warnings
+from collections import defaultdict
 from functools import partial
 from typing import Callable
 
 import numpy as np
 import torch
+from datumaro import Bbox, Ellipse, Image, Polygon
 from datumaro import Dataset as DmDataset
-from datumaro import Image, Polygon
 from torchvision import tv_tensors
 
 from otx.core.data.entity.base import ImageInfo
@@ -42,23 +44,49 @@ def _get_item_impl(self, index: int) -> InstanceSegDataEntity | None:
         ignored_labels: list[int] = []
         img_data, img_shape, _ = self._get_img_data_and_shape(img)
 
+        anno_collection: dict[str, list] = defaultdict(list)
+        for anno in item.annotations:
+            anno_collection[anno.__class__.__name__].append(anno)
+
         gt_bboxes, gt_labels, gt_masks, gt_polygons = [], [], [], []
 
-        for annotation in item.annotations:
-            if isinstance(annotation, Polygon):
-                bbox = np.array(annotation.get_bbox(), dtype=np.float32)
+        # TODO(Eugene): https://jira.devtools.intel.com/browse/CVS-159363
+        # Temporary solution to handle multiple annotation types.
+        # Ideally, we should pre-filter annotations during initialization of the dataset.
+        if Polygon.__name__ in anno_collection:  # Polygon for InstSeg has higher priority
+            for poly in anno_collection[Polygon.__name__]:
+                bbox = Bbox(*poly.get_bbox()).points
                 gt_bboxes.append(bbox)
-                gt_labels.append(annotation.label)
+                gt_labels.append(poly.label)
 
                 if self.include_polygons:
-                    gt_polygons.append(annotation)
+                    gt_polygons.append(poly)
                 else:
-                    gt_masks.append(polygon_to_bitmap([annotation], *img_shape)[0])
-
-        # convert xywh to xyxy format
-        bboxes = np.array(gt_bboxes, dtype=np.float32) if gt_bboxes else np.empty((0, 4))
-        bboxes[:, 2:] += bboxes[:, :2]
+                    gt_masks.append(polygon_to_bitmap([poly], *img_shape)[0])
+        elif Bbox.__name__ in anno_collection:
+            bboxes = anno_collection[Bbox.__name__]
+            gt_bboxes = [ann.points for ann in bboxes]
+            gt_labels = [ann.label for ann in bboxes]
+            for box in bboxes:
+                poly = Polygon(box.as_polygon())
+                if self.include_polygons:
+                    gt_polygons.append(poly)
+                else:
+                    gt_masks.append(polygon_to_bitmap([poly], *img_shape)[0])
+        elif Ellipse.__name__ in anno_collection:
+            for ellipse in anno_collection[Ellipse.__name__]:
+                bbox = Bbox(*ellipse.get_bbox()).points
+                gt_bboxes.append(bbox)
+                gt_labels.append(ellipse.label)
+                poly = Polygon(ellipse.as_polygon(num_points=10))
+                if self.include_polygons:
+                    gt_polygons.append(poly)
+                else:
+                    gt_masks.append(polygon_to_bitmap([poly], *img_shape)[0])
+        else:
+            warnings.warn(f"No valid annotations found for image {item.id}!", stacklevel=2)
 
+        bboxes = np.stack(gt_bboxes, dtype=np.float32, axis=0) if gt_bboxes else np.empty((0, 4))
         masks = np.stack(gt_masks, axis=0) if gt_masks else np.zeros((0, *img_shape), dtype=bool)
         labels = np.array(gt_labels, dtype=np.int64)
 
diff --git a/src/otx/core/data/dataset/keypoint_detection.py b/src/otx/core/data/dataset/keypoint_detection.py
index c74b77c9319..47989b653aa 100644
--- a/src/otx/core/data/dataset/keypoint_detection.py
+++ b/src/otx/core/data/dataset/keypoint_detection.py
@@ -39,6 +39,7 @@ def __init__(
         image_color_channel: ImageColorChannel = ImageColorChannel.RGB,
         stack_images: bool = True,
         to_tv_image: bool = True,
+        data_format: str = "",
     ) -> None:
         super().__init__(
             dm_subset,
@@ -49,14 +50,17 @@ def __init__(
             image_color_channel,
             stack_images,
             to_tv_image,
+            data_format,
         )
 
         self.dm_subset = self._get_single_bbox_dataset(dm_subset)
 
         if self.dm_subset.categories():
+            kp_labels = self.dm_subset.categories()[AnnotationType.points][0].labels
             self.label_info = LabelInfo(
-                label_names=self.dm_subset.categories()[AnnotationType.points][0].labels,
+                label_names=kp_labels,
                 label_groups=[],
+                label_ids=[str(i) for i in range(len(kp_labels))],
             )
         else:
             self.label_info = NullLabelInfo()
diff --git a/src/otx/core/data/dataset/object_detection_3d.py b/src/otx/core/data/dataset/object_detection_3d.py
index 4740298ba90..980178ce55a 100644
--- a/src/otx/core/data/dataset/object_detection_3d.py
+++ b/src/otx/core/data/dataset/object_detection_3d.py
@@ -40,6 +40,7 @@ def __init__(
         image_color_channel: ImageColorChannel = ImageColorChannel.RGB,
         stack_images: bool = True,
         to_tv_image: bool = False,
+        data_format: str = "",
         max_objects: int = 50,
     ) -> None:
         super().__init__(
@@ -51,6 +52,7 @@ def __init__(
             image_color_channel,
             stack_images,
             to_tv_image,
+            data_format,
         )
         self.max_objects = max_objects
         self.subset_type = list(self.dm_subset.get_subset_info())[-1].split(":")[0]
diff --git a/src/otx/core/data/dataset/segmentation.py b/src/otx/core/data/dataset/segmentation.py
index 0ab803b4f58..5672989e7fd 100644
--- a/src/otx/core/data/dataset/segmentation.py
+++ b/src/otx/core/data/dataset/segmentation.py
@@ -168,6 +168,7 @@ def __init__(
         stack_images: bool = True,
         to_tv_image: bool = True,
         ignore_index: int = 255,
+        data_format: str = "",
     ) -> None:
         super().__init__(
             dm_subset,
@@ -188,6 +189,7 @@ def __init__(
             label_names=self.label_info.label_names,
             label_groups=self.label_info.label_groups,
             ignore_index=ignore_index,
+            label_ids=self.label_info.label_ids,
         )
         self.ignore_index = ignore_index
 
diff --git a/src/otx/core/data/dataset/tile.py b/src/otx/core/data/dataset/tile.py
index d69c94b03e0..8fae8133afa 100644
--- a/src/otx/core/data/dataset/tile.py
+++ b/src/otx/core/data/dataset/tile.py
@@ -7,6 +7,8 @@
 
 import logging as log
 import operator
+import warnings
+from collections import defaultdict
 from copy import deepcopy
 from itertools import product
 from typing import TYPE_CHECKING, Callable
@@ -16,7 +18,7 @@
 import torch
 from datumaro import Dataset as DmDataset
 from datumaro import DatasetItem, Image
-from datumaro.components.annotation import AnnotationType, Bbox, ExtractedMask, Polygon
+from datumaro.components.annotation import AnnotationType, Bbox, Ellipse, ExtractedMask, Polygon
 from datumaro.plugins.tiling import Tile
 from datumaro.plugins.tiling.tile import _apply_offset
 from datumaro.plugins.tiling.util import (
@@ -97,6 +99,7 @@ def __init__(
         self._tile_size = tile_size
         self._tile_ann_func_map[AnnotationType.polygon] = OTXTileTransform._tile_polygon
         self._tile_ann_func_map[AnnotationType.mask] = OTXTileTransform._tile_masks
+        self._tile_ann_func_map[AnnotationType.ellipse] = OTXTileTransform._tile_ellipse
         self.with_full_img = with_full_img
 
     @staticmethod
@@ -161,6 +164,45 @@ def _tile_masks(
             attributes=deepcopy(ann.attributes),
         )
 
+    @staticmethod
+    def _tile_ellipse(
+        ann: Ellipse,
+        roi_box: sg.Polygon,
+        threshold_drop_ann: float = 0.8,
+        *args,  # noqa: ARG004
+        **kwargs,  # noqa: ARG004
+    ) -> Polygon | None:
+        polygon = sg.Polygon(ann.get_points(num_points=10))
+
+        # NOTE: polygon may be invalid, e.g. self-intersecting
+        if not roi_box.intersects(polygon) or not polygon.is_valid:
+            return None
+
+        # NOTE: intersection may return a GeometryCollection or MultiPolygon
+        inter = polygon.intersection(roi_box)
+        if isinstance(inter, (sg.GeometryCollection, sg.MultiPolygon)):
+            shapes = [(geom, geom.area) for geom in list(inter.geoms) if geom.is_valid]
+            if not shapes:
+                return None
+
+            inter, _ = max(shapes, key=operator.itemgetter(1))
+
+            if not isinstance(inter, sg.Polygon) and not inter.is_valid:
+                return None
+
+        prop_area = inter.area / polygon.area
+
+        if prop_area < threshold_drop_ann:
+            return None
+
+        inter = _apply_offset(inter, roi_box)
+
+        return Polygon(
+            points=[p for xy in inter.exterior.coords for p in xy],
+            attributes=deepcopy(ann.attributes),
+            label=ann.label,
+        )
+
     def _extract_rois(self, image: Image) -> list[BboxIntCoords]:
         """Extracts Tile ROIs from the given image.
 
@@ -507,24 +549,51 @@ def _get_item_impl(self, index: int) -> TileInstSegDataEntity:  # type: ignore[o
         img = item.media_as(Image)
         img_data, img_shape, _ = self._get_img_data_and_shape(img)
 
+        anno_collection: dict[str, list] = defaultdict(list)
+        for anno in item.annotations:
+            anno_collection[anno.__class__.__name__].append(anno)
+
         gt_bboxes, gt_labels, gt_masks, gt_polygons = [], [], [], []
 
-        for annotation in item.annotations:
-            if isinstance(annotation, Polygon):
-                bbox = np.array(annotation.get_bbox(), dtype=np.float32)
+        # TODO(Eugene): https://jira.devtools.intel.com/browse/CVS-159363
+        # Temporary solution to handle multiple annotation types.
+        # Ideally, we should pre-filter annotations during initialization of the dataset.
+
+        if Polygon.__name__ in anno_collection:  # Polygon for InstSeg has higher priority
+            for poly in anno_collection[Polygon.__name__]:
+                bbox = Bbox(*poly.get_bbox()).points
                 gt_bboxes.append(bbox)
-                gt_labels.append(annotation.label)
+                gt_labels.append(poly.label)
 
                 if self._dataset.include_polygons:
-                    gt_polygons.append(annotation)
+                    gt_polygons.append(poly)
                 else:
-                    gt_masks.append(polygon_to_bitmap([annotation], *img_shape)[0])
-
-        # convert xywh to xyxy format
-        bboxes = np.array(gt_bboxes, dtype=np.float32)
-        bboxes[:, 2:] += bboxes[:, :2]
+                    gt_masks.append(polygon_to_bitmap([poly], *img_shape)[0])
+        elif Bbox.__name__ in anno_collection:
+            boxes = anno_collection[Bbox.__name__]
+            gt_bboxes = [ann.points for ann in boxes]
+            gt_labels = [ann.label for ann in boxes]
+            for box in boxes:
+                poly = Polygon(box.as_polygon())
+                if self._dataset.include_polygons:
+                    gt_polygons.append(poly)
+                else:
+                    gt_masks.append(polygon_to_bitmap([poly], *img_shape)[0])
+        elif Ellipse.__name__ in anno_collection:
+            for ellipse in anno_collection[Ellipse.__name__]:
+                bbox = Bbox(*ellipse.get_bbox()).points
+                gt_bboxes.append(bbox)
+                gt_labels.append(ellipse.label)
+                poly = Polygon(ellipse.as_polygon(num_points=10))
+                if self._dataset.include_polygons:
+                    gt_polygons.append(poly)
+                else:
+                    gt_masks.append(polygon_to_bitmap([poly], *img_shape)[0])
+        else:
+            warnings.warn(f"No valid annotations found for image {item.id}!", stacklevel=2)
 
-        masks = np.stack(gt_masks, axis=0) if gt_masks else np.zeros((0, *img_shape), dtype=bool)
+        bboxes = np.stack(gt_bboxes, dtype=np.float32) if gt_bboxes else np.empty((0, 4), dtype=np.float32)
+        masks = np.stack(gt_masks, axis=0) if gt_masks else np.empty((0, *img_shape), dtype=bool)
         labels = np.array(gt_labels, dtype=np.int64)
 
         tile_entities, tile_attrs = self.get_tiles(img_data, item, index)
diff --git a/src/otx/core/data/factory.py b/src/otx/core/data/factory.py
index fd731109269..1f1e500b0fb 100644
--- a/src/otx/core/data/factory.py
+++ b/src/otx/core/data/factory.py
@@ -73,6 +73,7 @@ def create(  # noqa: PLR0911
         dm_subset: DmDataset,
         cfg_subset: SubsetConfig,
         mem_cache_handler: MemCacheHandlerBase,
+        data_format: str,
         mem_cache_img_max_size: tuple[int, int] | None = None,
         image_color_channel: ImageColorChannel = ImageColorChannel.RGB,
         stack_images: bool = True,
@@ -85,6 +86,7 @@ def create(  # noqa: PLR0911
         common_kwargs = {
             "dm_subset": dm_subset,
             "transforms": transforms,
+            "data_format": data_format,
             "mem_cache_handler": mem_cache_handler,
             "mem_cache_img_max_size": mem_cache_img_max_size,
             "image_color_channel": image_color_channel,
diff --git a/src/otx/core/data/module.py b/src/otx/core/data/module.py
index f9b7cac8fd4..6449a07c270 100644
--- a/src/otx/core/data/module.py
+++ b/src/otx/core/data/module.py
@@ -107,13 +107,6 @@ def __init__(  # noqa: PLR0913
         self.subsets: dict[str, OTXDataset] = {}
         self.save_hyperparameters(ignore=["input_size"])
 
-        # TODO (Jaeguk): This is workaround for a bug in Datumaro.
-        # These lines should be removed after next datumaro release.
-        # https://github.com/openvinotoolkit/datumaro/pull/1223/files
-        from datumaro.plugins.data_formats.video import VIDEO_EXTENSIONS
-
-        VIDEO_EXTENSIONS.append(".mp4")
-
         dataset = DmDataset.import_from(self.data_root, format=self.data_format)
         if self.task != "H_LABEL_CLS":
             dataset = pre_filtering(
@@ -195,6 +188,7 @@ def __init__(  # noqa: PLR0913
                 dm_subset=dm_subset.as_dataset(),
                 cfg_subset=config_mapping[name],
                 mem_cache_handler=mem_cache_handler,
+                data_format=self.data_format,
                 mem_cache_img_max_size=mem_cache_img_max_size,
                 image_color_channel=image_color_channel,
                 stack_images=stack_images,
@@ -238,6 +232,7 @@ def __init__(  # noqa: PLR0913
                         include_polygons=include_polygons,
                         ignore_index=ignore_index,
                         vpm_config=vpm_config,
+                        data_format=self.data_format,
                     )
                     self.subsets[transform_key] = unlabeled_dataset
             else:
@@ -252,6 +247,7 @@ def __init__(  # noqa: PLR0913
                     include_polygons=include_polygons,
                     ignore_index=ignore_index,
                     vpm_config=vpm_config,
+                    data_format=self.data_format,
                 )
                 self.subsets[name] = unlabeled_dataset
 
diff --git a/src/otx/core/data/pre_filtering.py b/src/otx/core/data/pre_filtering.py
index 13fc08c7ebc..90487367d17 100644
--- a/src/otx/core/data/pre_filtering.py
+++ b/src/otx/core/data/pre_filtering.py
@@ -88,7 +88,7 @@ def remove_unused_labels(
         used_labels = [0, *used_labels]
     if data_format == "common_semantic_segmentation_with_subset_dirs" and len(original_categories) < len(used_labels):
         msg = (
-            "There are labeles mismatch in dataset categories and actuall categories comes from semantic masks."
+            "There are labels mismatch in dataset categories and actual categories comes from semantic masks."
             "Please, check `dataset_meta.json` file."
         )
         raise ValueError(msg)
diff --git a/src/otx/core/model/base.py b/src/otx/core/model/base.py
index a48325ca98c..a190a5f1bc6 100644
--- a/src/otx/core/model/base.py
+++ b/src/otx/core/model/base.py
@@ -404,6 +404,11 @@ def load_state_dict_incrementally(self, ckpt: dict[str, Any], *args, **kwargs) -
             msg = "Checkpoint should have `label_info`."
             raise ValueError(msg, ckpt_label_info)
 
+        if not hasattr(ckpt_label_info, "label_ids"):
+            msg = "Loading checkpoint from OTX < 2.2.1, label_ids are assigned automatically"
+            logger.info(msg)
+            ckpt_label_info.label_ids = [str(i) for i, _ in enumerate(ckpt_label_info.label_names)]
+
         if ckpt_label_info != self.label_info:
             msg = (
                 "Load model state dictionary incrementally: "
@@ -757,7 +762,7 @@ def lr_scheduler_step(self, scheduler: LRSchedulerTypeUnion, metric: Tensor) ->
             return super().lr_scheduler_step(scheduler=scheduler, metric=metric)
 
         if len(warmup_schedulers) != 1:
-            msg = "No more than two warmup schedulers coexist."
+            msg = "No more than one warmup schedulers coexist."
             raise RuntimeError(msg)
 
         warmup_scheduler = next(iter(warmup_schedulers))
@@ -822,7 +827,11 @@ def _dispatch_label_info(label_info: LabelInfoTypes) -> LabelInfo:
         if isinstance(label_info, int):
             return LabelInfo.from_num_classes(num_classes=label_info)
         if isinstance(label_info, Sequence) and all(isinstance(name, str) for name in label_info):
-            return LabelInfo(label_names=label_info, label_groups=[label_info])
+            return LabelInfo(
+                label_names=label_info,
+                label_groups=[label_info],
+                label_ids=[str(i) for i in range(len(label_info))],
+            )
         if isinstance(label_info, LabelInfo):
             return label_info
 
@@ -1115,7 +1124,7 @@ def _create_label_info_from_ov_ir(self) -> LabelInfo:
             )
 
             logger.warning(msg)
-            return LabelInfo(label_names=label_names, label_groups=[label_names])
+            return LabelInfo(label_names=label_names, label_groups=[label_names], label_ids=[])
 
         msg = "Cannot construct LabelInfo from OpenVINO IR. Please check this model is trained by OTX."
         raise ValueError(msg)
diff --git a/src/otx/core/model/segmentation.py b/src/otx/core/model/segmentation.py
index a22cc15fbc4..eeebba408fe 100644
--- a/src/otx/core/model/segmentation.py
+++ b/src/otx/core/model/segmentation.py
@@ -93,7 +93,6 @@ def __init__(
         self.unsupervised_weight = unsupervised_weight
         self.semisl_start_epoch = semisl_start_epoch
         self.drop_unreliable_pixels_percent = drop_unreliable_pixels_percent
-
         super().__init__(
             label_info=label_info,
             input_size=input_size,
@@ -254,7 +253,11 @@ def _dispatch_label_info(label_info: LabelInfoTypes) -> LabelInfo:
         if isinstance(label_info, int):
             return SegLabelInfo.from_num_classes(num_classes=label_info)
         if isinstance(label_info, Sequence) and all(isinstance(name, str) for name in label_info):
-            return SegLabelInfo(label_names=label_info, label_groups=[label_info])
+            return SegLabelInfo(
+                label_names=label_info,
+                label_groups=[label_info],
+                label_ids=[str(i) for i in range(len(label_info))],
+            )
         if isinstance(label_info, SegLabelInfo):
             return label_info
 
diff --git a/src/otx/core/schedulers/warmup_schedulers.py b/src/otx/core/schedulers/warmup_schedulers.py
index 6de763bb52b..0b1d12a711e 100644
--- a/src/otx/core/schedulers/warmup_schedulers.py
+++ b/src/otx/core/schedulers/warmup_schedulers.py
@@ -19,8 +19,9 @@ class LinearWarmupScheduler(LambdaLR):
     """Linear Warmup scheduler.
 
     Args:
-        num_warmup_steps: Learning rate will linearly increased during the period same as this number.
-        warmup_interval: If "epoch", count the number of steps for the warmup period.
+        optimizer (Optimizer): Optimizer to apply the scheduler.
+        num_warmup_steps (int): Learning rate will linearly increased during the period same as this number.
+        interval (Literal["step", "epoch"]): If "epoch", count the number of epochs for the warmup period.
             Otherwise, the iteration step will be the warmup period.
     """
 
@@ -55,7 +56,7 @@ class LinearWarmupSchedulerCallable:
         main_scheduler_callable: Callable to create a LR scheduler that will be mainly used.
         num_warmup_steps: Learning rate will linearly increased during the period same as this number.
             If it is less than equal to zero, do not create `LinearWarmupScheduler`.
-        warmup_interval: If "epoch", count the number of steps for the warmup period.
+        warmup_interval: If "epoch", count the number of epochs for the warmup period.
             Otherwise, the iteration step will be the warmup period.
         monitor: If given, override the main scheduler's `monitor` attribute.
     """
diff --git a/src/otx/core/types/export.py b/src/otx/core/types/export.py
index cc9c592f3b9..fc35a39b8f7 100644
--- a/src/otx/core/types/export.py
+++ b/src/otx/core/types/export.py
@@ -9,6 +9,7 @@
 from dataclasses import dataclass, fields
 from enum import Enum
 
+import otx
 from otx.core.config.data import TileConfig
 from otx.core.types.label import HLabelInfo, LabelInfo
 
@@ -102,7 +103,8 @@ def to_metadata(self) -> dict[tuple[str, str], str]:
         all_label_ids = ""
         for lbl in self.label_info.label_names:
             all_labels += lbl.replace(" ", "_") + " "
-            all_label_ids += lbl.replace(" ", "_") + " "
+        for lbl_id in self.label_info.label_ids:
+            all_label_ids += lbl_id + " "
 
         metadata = {
             # Common
@@ -112,6 +114,7 @@ def to_metadata(self) -> dict[tuple[str, str], str]:
             ("model_info", "labels"): all_labels.strip(),
             ("model_info", "label_ids"): all_label_ids.strip(),
             ("model_info", "optimization_config"): json.dumps(self.optimization_config),
+            ("model_info", "otx_version"): otx.__version__,
         }
 
         if isinstance(self.label_info, HLabelInfo):
diff --git a/src/otx/core/types/label.py b/src/otx/core/types/label.py
index c89f67d7fd6..19c3ece3bb4 100644
--- a/src/otx/core/types/label.py
+++ b/src/otx/core/types/label.py
@@ -5,10 +5,13 @@
 
 from __future__ import annotations
 
+import copy
 import json
 from dataclasses import asdict, dataclass
 from typing import TYPE_CHECKING, Any
 
+from datumaro.components.annotation import GroupType
+
 if TYPE_CHECKING:
     from datumaro import Label, LabelCategories
 
@@ -27,6 +30,7 @@ class LabelInfo:
     """Object to represent label information."""
 
     label_names: list[str]
+    label_ids: list[str]
     label_groups: list[list[str]]
 
     @property
@@ -51,10 +55,12 @@ def from_num_classes(cls, num_classes: int) -> LabelInfo:
             return NullLabelInfo()
 
         label_names = [f"label_{idx}" for idx in range(num_classes)]
+        label_ids = [str(i) for i in range(num_classes)]
 
         return cls(
             label_names=label_names,
             label_groups=[label_names],
+            label_ids=label_ids,
         )
 
     @classmethod
@@ -79,6 +85,38 @@ def from_dm_label_groups(cls, dm_label_categories: LabelCategories) -> LabelInfo
         return LabelInfo(
             label_names=label_names,
             label_groups=label_groups,
+            label_ids=[str(i) for i in range(len(label_names))],
+        )
+
+    @classmethod
+    def from_dm_label_groups_arrow(cls, dm_label_categories: LabelCategories) -> LabelInfo:
+        """Overload to support datumaro's arrow format."""
+        label_names = []
+        for item in dm_label_categories.items:
+            for attr in item.attributes:
+                if attr.startswith("__name__"):
+                    label_names.append(attr[len("__name__") :])
+                    break
+
+        if len(label_names) != len(dm_label_categories.items):
+            msg = "Wrong arrow format: can not extract label names from attributes"
+            raise ValueError(msg)
+
+        id_to_name_mapping = {item.name: label_names[i] for i, item in enumerate(dm_label_categories.items)}
+
+        for label_group in dm_label_categories.label_groups:
+            label_group.labels = [id_to_name_mapping.get(label, label) for label in label_group.labels]
+
+        label_groups = [label_group.labels for label_group in dm_label_categories.label_groups]
+        if len(label_groups) == 0:  # Single-label classification
+            label_groups = [label_names]
+
+        label_ids = [item.name for item in dm_label_categories.items]
+
+        return LabelInfo(
+            label_names=label_names,
+            label_groups=label_groups,
+            label_ids=label_ids,
         )
 
     def as_dict(self) -> dict[str, Any]:
@@ -279,8 +317,60 @@ def convert_labels_if_needed(
             label_to_idx=label_to_idx,
             label_tree_edges=get_label_tree_edges(dm_label_categories.items),
             empty_multiclass_head_indices=[],  # consider the label removing case
+            label_ids=[str(i) for i in range(len(label_names))],
         )
 
+    @classmethod
+    def from_dm_label_groups_arrow(cls, dm_label_categories: LabelCategories) -> HLabelInfo:
+        """Generate HLabelData from the Datumaro LabelCategories. Arrow-specific implementation.
+
+        Args:
+            dm_label_categories (LabelCategories): the label categories of datumaro.
+        """
+        dm_label_categories = copy.deepcopy(dm_label_categories)
+
+        empty_label_name = None
+        for label_group in dm_label_categories.label_groups:
+            if label_group.group_type == GroupType.RESTRICTED:
+                empty_label_name = label_group.labels[0]
+
+        dm_label_categories.label_groups = [
+            group for group in dm_label_categories.label_groups if group.group_type != GroupType.RESTRICTED
+        ]
+
+        empty_label_id = None
+        label_names = []
+        for item in dm_label_categories.items:
+            for attr in item.attributes:
+                if attr.startswith("__name__"):
+                    name = attr[len("__name__") :]
+                    if name == empty_label_name:
+                        empty_label_id = item.name
+                    label_names.append(name)
+                    break
+
+        if len(label_names) != len(dm_label_categories.items):
+            msg = "Wrong arrow file: can not extract label names from attributes"
+            raise ValueError(msg)
+
+        if empty_label_name is not None:
+            label_names.remove(empty_label_name)
+        dm_label_categories.items = [item for item in dm_label_categories.items if item.name != empty_label_id]
+        label_ids = [item.name for item in dm_label_categories.items]
+
+        id_to_name_mapping = {item.name: label_names[i] for i, item in enumerate(dm_label_categories.items)}
+
+        for i, item in enumerate(dm_label_categories.items):
+            item.name = label_names[i]
+            item.parent = id_to_name_mapping.get(item.parent, item.parent)
+
+        for label_group in dm_label_categories.label_groups:
+            label_group.labels = [id_to_name_mapping.get(label, label) for label in label_group.labels]
+
+        obj = cls.from_dm_label_groups(dm_label_categories)
+        obj.label_ids = label_ids
+        return obj
+
     def as_head_config_dict(self) -> dict[str, Any]:
         """Return a dictionary including params needed to configure the HLabel MMPretrained head network."""
         return {
@@ -326,7 +416,7 @@ def from_num_classes(cls, num_classes: int) -> LabelInfo:
         if num_classes == 1:
             # binary segmentation
             label_names = ["background", "label_0"]
-            return SegLabelInfo(label_names=label_names, label_groups=[label_names])
+            return SegLabelInfo(label_names=label_names, label_groups=[label_names], label_ids=["0", "1"])
 
         return super().from_num_classes(num_classes)
 
@@ -336,7 +426,7 @@ class NullLabelInfo(LabelInfo):
     """Represent no label information. It is used for Visual Prompting tasks."""
 
     def __init__(self) -> None:
-        super().__init__(label_names=[], label_groups=[[]])
+        super().__init__(label_names=[], label_groups=[[]], label_ids=[])
 
     @classmethod
     def from_json(cls, _: str) -> LabelInfo:
@@ -349,7 +439,7 @@ class AnomalyLabelInfo(LabelInfo):
     """Represent no label information. It is used for Anomaly tasks."""
 
     def __init__(self) -> None:
-        super().__init__(label_names=["Normal", "Anomaly"], label_groups=[["Normal", "Anomaly"]])
+        super().__init__(label_names=["Normal", "Anomaly"], label_groups=[["Normal", "Anomaly"]], label_ids=["0", "1"])
 
 
 # Dispatching rules:
diff --git a/src/otx/recipe/_base_/train.yaml b/src/otx/recipe/_base_/train.yaml
index 7dba87f8381..806d09e3e0a 100644
--- a/src/otx/recipe/_base_/train.yaml
+++ b/src/otx/recipe/_base_/train.yaml
@@ -40,6 +40,8 @@ callbacks:
     init_args:
       max_interval: 5
       decay: -0.025
+      min_earlystop_patience: 5
+      min_lrschedule_patience: 3
 logger:
   - class_path: lightning.pytorch.loggers.csv_logs.CSVLogger
     init_args:
diff --git a/src/otx/recipe/anomaly_classification/stfpm.yaml b/src/otx/recipe/anomaly_classification/stfpm.yaml
index ec1c6af8ddc..91cf676c201 100644
--- a/src/otx/recipe/anomaly_classification/stfpm.yaml
+++ b/src/otx/recipe/anomaly_classification/stfpm.yaml
@@ -16,7 +16,7 @@ overrides:
   precision: 32
   max_epochs: 100
   callbacks:
-    - class_path: lightning.pytorch.callbacks.EarlyStopping
+    - class_path: otx.algo.callbacks.adaptive_early_stopping.EarlyStoppingWithWarmup
       init_args:
         patience: 5
         mode: max
diff --git a/src/otx/recipe/anomaly_detection/stfpm.yaml b/src/otx/recipe/anomaly_detection/stfpm.yaml
index b13534505a4..25bb7be88bb 100644
--- a/src/otx/recipe/anomaly_detection/stfpm.yaml
+++ b/src/otx/recipe/anomaly_detection/stfpm.yaml
@@ -21,7 +21,7 @@ overrides:
   precision: 32
   max_epochs: 100
   callbacks:
-    - class_path: lightning.pytorch.callbacks.EarlyStopping
+    - class_path: otx.algo.callbacks.adaptive_early_stopping.EarlyStoppingWithWarmup
       init_args:
         patience: 5
         mode: max
diff --git a/src/otx/recipe/anomaly_segmentation/stfpm.yaml b/src/otx/recipe/anomaly_segmentation/stfpm.yaml
index 9a3d9c85d6e..604ff9ba029 100644
--- a/src/otx/recipe/anomaly_segmentation/stfpm.yaml
+++ b/src/otx/recipe/anomaly_segmentation/stfpm.yaml
@@ -16,7 +16,7 @@ overrides:
   precision: 32
   max_epochs: 100
   callbacks:
-    - class_path: lightning.pytorch.callbacks.EarlyStopping
+    - class_path: otx.algo.callbacks.adaptive_early_stopping.EarlyStoppingWithWarmup
       init_args:
         patience: 5
         mode: max
diff --git a/src/otx/recipe/classification/h_label_cls/deit_tiny.yaml b/src/otx/recipe/classification/h_label_cls/deit_tiny.yaml
index b36f48e14c9..1191e0e22d3 100644
--- a/src/otx/recipe/classification/h_label_cls/deit_tiny.yaml
+++ b/src/otx/recipe/classification/h_label_cls/deit_tiny.yaml
@@ -10,12 +10,16 @@ model:
         weight_decay: 0.05
 
     scheduler:
-      class_path: lightning.pytorch.cli.ReduceLROnPlateau
+      class_path: otx.core.schedulers.LinearWarmupSchedulerCallable
       init_args:
-        mode: max
-        factor: 0.5
-        patience: 1
-        monitor: val/accuracy
+        num_warmup_steps: 0
+        main_scheduler_callable:
+          class_path: lightning.pytorch.cli.ReduceLROnPlateau
+          init_args:
+            mode: max
+            factor: 0.5
+            patience: 3
+            monitor: val/accuracy
 
 engine:
   task: H_LABEL_CLS
@@ -26,11 +30,12 @@ callback_monitor: val/accuracy
 data: ../../_base_/data/classification.yaml
 overrides:
   max_epochs: 90
-  callbacks:
-    - class_path: otx.algo.callbacks.adaptive_early_stopping.EarlyStoppingWithWarmup
-      init_args:
-        patience: 3
 
   data:
     task: H_LABEL_CLS
     data_format: datumaro
+
+  callbacks:
+    - class_path: otx.algo.callbacks.adaptive_early_stopping.EarlyStoppingWithWarmup
+      init_args:
+        patience: 5
diff --git a/src/otx/recipe/classification/h_label_cls/efficientnet_b0.yaml b/src/otx/recipe/classification/h_label_cls/efficientnet_b0.yaml
index 4bfbe3fc121..2bb282e4378 100644
--- a/src/otx/recipe/classification/h_label_cls/efficientnet_b0.yaml
+++ b/src/otx/recipe/classification/h_label_cls/efficientnet_b0.yaml
@@ -11,12 +11,16 @@ model:
         weight_decay: 0.0001
 
     scheduler:
-      class_path: lightning.pytorch.cli.ReduceLROnPlateau
+      class_path: otx.core.schedulers.LinearWarmupSchedulerCallable
       init_args:
-        mode: max
-        factor: 0.5
-        patience: 1
-        monitor: val/accuracy
+        num_warmup_steps: 0
+        main_scheduler_callable:
+          class_path: lightning.pytorch.cli.ReduceLROnPlateau
+          init_args:
+            mode: max
+            factor: 0.5
+            patience: 3
+            monitor: val/accuracy
 
 engine:
   task: H_LABEL_CLS
@@ -29,11 +33,12 @@ overrides:
   reset:
     - data.train_subset.transforms
 
-  max_epochs: 90
   callbacks:
     - class_path: otx.algo.callbacks.adaptive_early_stopping.EarlyStoppingWithWarmup
       init_args:
-        patience: 3
+        patience: 5
+
+  max_epochs: 90
 
   data:
     task: H_LABEL_CLS
diff --git a/src/otx/recipe/classification/h_label_cls/efficientnet_v2.yaml b/src/otx/recipe/classification/h_label_cls/efficientnet_v2.yaml
index 500cc168baa..fbc2d11ce21 100644
--- a/src/otx/recipe/classification/h_label_cls/efficientnet_v2.yaml
+++ b/src/otx/recipe/classification/h_label_cls/efficientnet_v2.yaml
@@ -10,6 +10,18 @@ model:
         momentum: 0.9
         weight_decay: 0.0001
 
+    scheduler:
+      class_path: otx.core.schedulers.LinearWarmupSchedulerCallable
+      init_args:
+        num_warmup_steps: 0
+        main_scheduler_callable:
+          class_path: lightning.pytorch.cli.ReduceLROnPlateau
+          init_args:
+            mode: max
+            factor: 0.5
+            patience: 3
+            monitor: val/accuracy
+
 engine:
   task: H_LABEL_CLS
   device: auto
@@ -25,8 +37,11 @@ overrides:
   callbacks:
     - class_path: otx.algo.callbacks.adaptive_early_stopping.EarlyStoppingWithWarmup
       init_args:
-        patience: 3
         warmup_iters: 750
+    - class_path: otx.algo.callbacks.adaptive_early_stopping.EarlyStoppingWithWarmup
+      init_args:
+        patience: 5
+
   data:
     task: H_LABEL_CLS
     data_format: datumaro
diff --git a/src/otx/recipe/classification/h_label_cls/mobilenet_v3_large.yaml b/src/otx/recipe/classification/h_label_cls/mobilenet_v3_large.yaml
index 211bc8fa883..c94b7dd16b6 100644
--- a/src/otx/recipe/classification/h_label_cls/mobilenet_v3_large.yaml
+++ b/src/otx/recipe/classification/h_label_cls/mobilenet_v3_large.yaml
@@ -19,7 +19,7 @@ model:
           init_args:
             mode: max
             factor: 0.5
-            patience: 1
+            patience: 3
             monitor: val/accuracy
 
 engine:
@@ -31,10 +31,11 @@ callback_monitor: val/accuracy
 data: ../../_base_/data/classification.yaml
 overrides:
   max_epochs: 90
+
   callbacks:
     - class_path: otx.algo.callbacks.adaptive_early_stopping.EarlyStoppingWithWarmup
       init_args:
-        patience: 3
+        patience: 5
 
   data:
     task: H_LABEL_CLS
diff --git a/src/otx/recipe/classification/h_label_cls/tv_efficientnet_b3.yaml b/src/otx/recipe/classification/h_label_cls/tv_efficientnet_b3.yaml
index 2078c98b43b..d36cdfff5b7 100644
--- a/src/otx/recipe/classification/h_label_cls/tv_efficientnet_b3.yaml
+++ b/src/otx/recipe/classification/h_label_cls/tv_efficientnet_b3.yaml
@@ -12,10 +12,14 @@ model:
         weight_decay: 0.0001
 
     scheduler:
-      class_path: torch.optim.lr_scheduler.CosineAnnealingLR
+      class_path: otx.core.schedulers.LinearWarmupSchedulerCallable
       init_args:
-        T_max: 100000
-        eta_min: 0
+        num_warmup_steps: 0
+        main_scheduler_callable:
+          class_path: torch.optim.lr_scheduler.CosineAnnealingLR
+          init_args:
+            T_max: 100000
+            eta_min: 0
 
 engine:
   task: H_LABEL_CLS
@@ -29,10 +33,11 @@ overrides:
     - data.train_subset.transforms
 
   max_epochs: 90
+
   callbacks:
     - class_path: otx.algo.callbacks.adaptive_early_stopping.EarlyStoppingWithWarmup
       init_args:
-        patience: 3
+        patience: 5
 
   data:
     task: H_LABEL_CLS
diff --git a/src/otx/recipe/classification/h_label_cls/tv_efficientnet_v2_l.yaml b/src/otx/recipe/classification/h_label_cls/tv_efficientnet_v2_l.yaml
index 0f2d7b60a6a..9bec7e924e6 100644
--- a/src/otx/recipe/classification/h_label_cls/tv_efficientnet_v2_l.yaml
+++ b/src/otx/recipe/classification/h_label_cls/tv_efficientnet_v2_l.yaml
@@ -12,10 +12,14 @@ model:
         weight_decay: 0.0001
 
     scheduler:
-      class_path: torch.optim.lr_scheduler.CosineAnnealingLR
+      class_path: otx.core.schedulers.LinearWarmupSchedulerCallable
       init_args:
-        T_max: 100000
-        eta_min: 0
+        num_warmup_steps: 0
+        main_scheduler_callable:
+          class_path: torch.optim.lr_scheduler.CosineAnnealingLR
+          init_args:
+            T_max: 100000
+            eta_min: 0
 
 engine:
   task: H_LABEL_CLS
@@ -29,10 +33,11 @@ overrides:
     - data.train_subset.transforms
 
   max_epochs: 90
+
   callbacks:
     - class_path: otx.algo.callbacks.adaptive_early_stopping.EarlyStoppingWithWarmup
       init_args:
-        patience: 3
+        patience: 5
 
   data:
     task: H_LABEL_CLS
diff --git a/src/otx/recipe/classification/h_label_cls/tv_mobilenet_v3_small.yaml b/src/otx/recipe/classification/h_label_cls/tv_mobilenet_v3_small.yaml
index faab071ff5d..d00a5109f7d 100644
--- a/src/otx/recipe/classification/h_label_cls/tv_mobilenet_v3_small.yaml
+++ b/src/otx/recipe/classification/h_label_cls/tv_mobilenet_v3_small.yaml
@@ -12,10 +12,14 @@ model:
         weight_decay: 0.0001
 
     scheduler:
-      class_path: torch.optim.lr_scheduler.CosineAnnealingLR
+      class_path: otx.core.schedulers.LinearWarmupSchedulerCallable
       init_args:
-        T_max: 100000
-        eta_min: 0
+        num_warmup_steps: 0
+        main_scheduler_callable:
+          class_path: torch.optim.lr_scheduler.CosineAnnealingLR
+          init_args:
+            T_max: 100000
+            eta_min: 0
 
 engine:
   task: H_LABEL_CLS
@@ -29,10 +33,11 @@ overrides:
     - data.train_subset.transforms
 
   max_epochs: 90
+
   callbacks:
     - class_path: otx.algo.callbacks.adaptive_early_stopping.EarlyStoppingWithWarmup
       init_args:
-        patience: 3
+        patience: 5
 
   data:
     task: H_LABEL_CLS
diff --git a/src/otx/recipe/classification/multi_class_cls/deit_tiny.yaml b/src/otx/recipe/classification/multi_class_cls/deit_tiny.yaml
index f5446d3cca6..cdc06e19f52 100644
--- a/src/otx/recipe/classification/multi_class_cls/deit_tiny.yaml
+++ b/src/otx/recipe/classification/multi_class_cls/deit_tiny.yaml
@@ -12,12 +12,16 @@ model:
         weight_decay: 0.05
 
     scheduler:
-      class_path: lightning.pytorch.cli.ReduceLROnPlateau
+      class_path: otx.core.schedulers.LinearWarmupSchedulerCallable
       init_args:
-        mode: max
-        factor: 0.5
-        patience: 1
-        monitor: val/accuracy
+        num_warmup_steps: 0
+        main_scheduler_callable:
+          class_path: lightning.pytorch.cli.ReduceLROnPlateau
+          init_args:
+            mode: max
+            factor: 0.5
+            patience: 3
+            monitor: val/accuracy
 
 engine:
   task: MULTI_CLASS_CLS
@@ -28,7 +32,8 @@ callback_monitor: val/accuracy
 data: ../../_base_/data/classification.yaml
 overrides:
   max_epochs: 90
+
   callbacks:
     - class_path: otx.algo.callbacks.adaptive_early_stopping.EarlyStoppingWithWarmup
       init_args:
-        patience: 3
+        patience: 5
diff --git a/src/otx/recipe/classification/multi_class_cls/dino_v2.yaml b/src/otx/recipe/classification/multi_class_cls/dino_v2.yaml
index 300091fab8c..a11967f1068 100644
--- a/src/otx/recipe/classification/multi_class_cls/dino_v2.yaml
+++ b/src/otx/recipe/classification/multi_class_cls/dino_v2.yaml
@@ -11,12 +11,16 @@ model:
         weight_decay: 0.05
 
     scheduler:
-      class_path: lightning.pytorch.cli.ReduceLROnPlateau
+      class_path: otx.core.schedulers.LinearWarmupSchedulerCallable
       init_args:
-        mode: max
-        factor: 0.5
-        patience: 1
-        monitor: val/accuracy
+        num_warmup_steps: 0
+        main_scheduler_callable:
+          class_path: lightning.pytorch.cli.ReduceLROnPlateau
+          init_args:
+            mode: max
+            factor: 0.5
+            patience: 3
+            monitor: val/accuracy
 
 engine:
   task: MULTI_CLASS_CLS
@@ -27,7 +31,8 @@ callback_monitor: val/accuracy
 data: ../../_base_/data/classification.yaml
 overrides:
   max_epochs: 90
+
   callbacks:
     - class_path: otx.algo.callbacks.adaptive_early_stopping.EarlyStoppingWithWarmup
       init_args:
-        patience: 3
+        patience: 5
diff --git a/src/otx/recipe/classification/multi_class_cls/efficientnet_b0.yaml b/src/otx/recipe/classification/multi_class_cls/efficientnet_b0.yaml
index 428fb89055b..fe6244853d6 100644
--- a/src/otx/recipe/classification/multi_class_cls/efficientnet_b0.yaml
+++ b/src/otx/recipe/classification/multi_class_cls/efficientnet_b0.yaml
@@ -12,12 +12,16 @@ model:
         weight_decay: 0.0001
 
     scheduler:
-      class_path: lightning.pytorch.cli.ReduceLROnPlateau
+      class_path: otx.core.schedulers.LinearWarmupSchedulerCallable
       init_args:
-        mode: max
-        factor: 0.5
-        patience: 1
-        monitor: val/accuracy
+        num_warmup_steps: 0
+        main_scheduler_callable:
+          class_path: lightning.pytorch.cli.ReduceLROnPlateau
+          init_args:
+            mode: max
+            factor: 0.5
+            patience: 3
+            monitor: val/accuracy
 
 engine:
   task: MULTI_CLASS_CLS
@@ -30,11 +34,12 @@ overrides:
   reset:
     - data.train_subset.transforms
 
-  max_epochs: 90
   callbacks:
     - class_path: otx.algo.callbacks.adaptive_early_stopping.EarlyStoppingWithWarmup
       init_args:
-        patience: 3
+        patience: 5
+
+  max_epochs: 90
 
   data:
     train_subset:
diff --git a/src/otx/recipe/classification/multi_class_cls/efficientnet_v2.yaml b/src/otx/recipe/classification/multi_class_cls/efficientnet_v2.yaml
index 2454c0e7094..7c71a8af171 100644
--- a/src/otx/recipe/classification/multi_class_cls/efficientnet_v2.yaml
+++ b/src/otx/recipe/classification/multi_class_cls/efficientnet_v2.yaml
@@ -12,12 +12,16 @@ model:
         weight_decay: 0.0001
 
     scheduler:
-      class_path: lightning.pytorch.cli.ReduceLROnPlateau
+      class_path: otx.core.schedulers.LinearWarmupSchedulerCallable
       init_args:
-        mode: max
-        factor: 0.5
-        patience: 1
-        monitor: val/accuracy
+        num_warmup_steps: 0
+        main_scheduler_callable:
+          class_path: lightning.pytorch.cli.ReduceLROnPlateau
+          init_args:
+            mode: max
+            factor: 0.5
+            patience: 3
+            monitor: val/accuracy
 
 engine:
   task: MULTI_CLASS_CLS
@@ -30,11 +34,12 @@ overrides:
   reset:
     - data.train_subset.transforms
 
-  max_epochs: 90
   callbacks:
     - class_path: otx.algo.callbacks.adaptive_early_stopping.EarlyStoppingWithWarmup
       init_args:
-        patience: 3
+        patience: 5
+
+  max_epochs: 90
 
   data:
     train_subset:
diff --git a/src/otx/recipe/classification/multi_class_cls/mobilenet_v3_large.yaml b/src/otx/recipe/classification/multi_class_cls/mobilenet_v3_large.yaml
index c4c6946fd6e..fdef97ef9c8 100644
--- a/src/otx/recipe/classification/multi_class_cls/mobilenet_v3_large.yaml
+++ b/src/otx/recipe/classification/multi_class_cls/mobilenet_v3_large.yaml
@@ -20,7 +20,7 @@ model:
           init_args:
             mode: max
             factor: 0.5
-            patience: 1
+            patience: 3
             monitor: val/accuracy
 
 engine:
@@ -32,7 +32,8 @@ callback_monitor: val/accuracy
 data: ../../_base_/data/classification.yaml
 overrides:
   max_epochs: 90
+
   callbacks:
     - class_path: otx.algo.callbacks.adaptive_early_stopping.EarlyStoppingWithWarmup
       init_args:
-        patience: 3
+        patience: 5
diff --git a/src/otx/recipe/classification/multi_class_cls/tv_efficientnet_b3.yaml b/src/otx/recipe/classification/multi_class_cls/tv_efficientnet_b3.yaml
index f06b3b36e32..ccd26a6535e 100644
--- a/src/otx/recipe/classification/multi_class_cls/tv_efficientnet_b3.yaml
+++ b/src/otx/recipe/classification/multi_class_cls/tv_efficientnet_b3.yaml
@@ -12,10 +12,14 @@ model:
         weight_decay: 0.0001
 
     scheduler:
-      class_path: torch.optim.lr_scheduler.CosineAnnealingLR
+      class_path: otx.core.schedulers.LinearWarmupSchedulerCallable
       init_args:
-        T_max: 100000
-        eta_min: 0
+        num_warmup_steps: 0
+        main_scheduler_callable:
+          class_path: torch.optim.lr_scheduler.CosineAnnealingLR
+          init_args:
+            T_max: 100000
+            eta_min: 0
 
 engine:
   task: MULTI_CLASS_CLS
@@ -26,7 +30,8 @@ callback_monitor: val/accuracy
 data: ../../_base_/data/classification.yaml
 overrides:
   max_epochs: 90
+
   callbacks:
     - class_path: otx.algo.callbacks.adaptive_early_stopping.EarlyStoppingWithWarmup
       init_args:
-        patience: 3
+        patience: 5
diff --git a/src/otx/recipe/classification/multi_class_cls/tv_efficientnet_v2_l.yaml b/src/otx/recipe/classification/multi_class_cls/tv_efficientnet_v2_l.yaml
index c72714e9433..06d702e8576 100644
--- a/src/otx/recipe/classification/multi_class_cls/tv_efficientnet_v2_l.yaml
+++ b/src/otx/recipe/classification/multi_class_cls/tv_efficientnet_v2_l.yaml
@@ -12,10 +12,14 @@ model:
         weight_decay: 0.0001
 
     scheduler:
-      class_path: torch.optim.lr_scheduler.CosineAnnealingLR
+      class_path: otx.core.schedulers.LinearWarmupSchedulerCallable
       init_args:
-        T_max: 100000
-        eta_min: 0
+        num_warmup_steps: 0
+        main_scheduler_callable:
+          class_path: torch.optim.lr_scheduler.CosineAnnealingLR
+          init_args:
+            T_max: 100000
+            eta_min: 0
 
 engine:
   task: MULTI_CLASS_CLS
@@ -26,7 +30,8 @@ callback_monitor: val/accuracy
 data: ../../_base_/data/classification.yaml
 overrides:
   max_epochs: 90
+
   callbacks:
     - class_path: otx.algo.callbacks.adaptive_early_stopping.EarlyStoppingWithWarmup
       init_args:
-        patience: 3
+        patience: 5
diff --git a/src/otx/recipe/classification/multi_class_cls/tv_mobilenet_v3_small.yaml b/src/otx/recipe/classification/multi_class_cls/tv_mobilenet_v3_small.yaml
index 4c6975c241a..9d626812765 100644
--- a/src/otx/recipe/classification/multi_class_cls/tv_mobilenet_v3_small.yaml
+++ b/src/otx/recipe/classification/multi_class_cls/tv_mobilenet_v3_small.yaml
@@ -12,10 +12,14 @@ model:
         weight_decay: 0.0001
 
     scheduler:
-      class_path: torch.optim.lr_scheduler.CosineAnnealingLR
+      class_path: otx.core.schedulers.LinearWarmupSchedulerCallable
       init_args:
-        T_max: 100000
-        eta_min: 0
+        num_warmup_steps: 0
+        main_scheduler_callable:
+          class_path: torch.optim.lr_scheduler.CosineAnnealingLR
+          init_args:
+            T_max: 100000
+            eta_min: 0
 
 engine:
   task: MULTI_CLASS_CLS
@@ -26,7 +30,8 @@ callback_monitor: val/accuracy
 data: ../../_base_/data/classification.yaml
 overrides:
   max_epochs: 90
+
   callbacks:
     - class_path: otx.algo.callbacks.adaptive_early_stopping.EarlyStoppingWithWarmup
       init_args:
-        patience: 3
+        patience: 5
diff --git a/src/otx/recipe/classification/multi_label_cls/deit_tiny.yaml b/src/otx/recipe/classification/multi_label_cls/deit_tiny.yaml
index afb14dd046f..623bc178f4b 100644
--- a/src/otx/recipe/classification/multi_label_cls/deit_tiny.yaml
+++ b/src/otx/recipe/classification/multi_label_cls/deit_tiny.yaml
@@ -11,12 +11,16 @@ model:
         weight_decay: 0.05
 
     scheduler:
-      class_path: lightning.pytorch.cli.ReduceLROnPlateau
+      class_path: otx.core.schedulers.LinearWarmupSchedulerCallable
       init_args:
-        mode: max
-        factor: 0.5
-        patience: 1
-        monitor: val/accuracy
+        num_warmup_steps: 0
+        main_scheduler_callable:
+          class_path: lightning.pytorch.cli.ReduceLROnPlateau
+          init_args:
+            mode: max
+            factor: 0.5
+            patience: 3
+            monitor: val/accuracy
 
 engine:
   task: MULTI_LABEL_CLS
@@ -27,14 +31,11 @@ callback_monitor: val/accuracy
 data: ../../_base_/data/classification.yaml
 overrides:
   max_epochs: 200
+
   callbacks:
     - class_path: otx.algo.callbacks.adaptive_early_stopping.EarlyStoppingWithWarmup
       init_args:
-        patience: 4
-    - class_path: otx.algo.callbacks.adaptive_train_scheduling.AdaptiveTrainScheduling
-      init_args:
-        min_earlystop_patience: 4
-        min_lrschedule_patience: 3
+        patience: 5
 
   data:
     task: MULTI_LABEL_CLS
diff --git a/src/otx/recipe/classification/multi_label_cls/efficientnet_b0.yaml b/src/otx/recipe/classification/multi_label_cls/efficientnet_b0.yaml
index f3625158439..1859e0aa5fa 100644
--- a/src/otx/recipe/classification/multi_label_cls/efficientnet_b0.yaml
+++ b/src/otx/recipe/classification/multi_label_cls/efficientnet_b0.yaml
@@ -12,12 +12,16 @@ model:
         weight_decay: 0.0005
 
     scheduler:
-      class_path: lightning.pytorch.cli.ReduceLROnPlateau
+      class_path: otx.core.schedulers.LinearWarmupSchedulerCallable
       init_args:
-        mode: max
-        factor: 0.5
-        patience: 1
-        monitor: val/accuracy
+        num_warmup_steps: 0
+        main_scheduler_callable:
+          class_path: lightning.pytorch.cli.ReduceLROnPlateau
+          init_args:
+            mode: max
+            factor: 0.5
+            patience: 3
+            monitor: val/accuracy
 
 engine:
   task: MULTI_LABEL_CLS
@@ -30,11 +34,12 @@ overrides:
   reset:
     - data.train_subset.transforms
 
-  max_epochs: 200
   callbacks:
     - class_path: otx.algo.callbacks.adaptive_early_stopping.EarlyStoppingWithWarmup
       init_args:
-        patience: 3
+        patience: 5
+
+  max_epochs: 200
 
   data:
     task: MULTI_LABEL_CLS
diff --git a/src/otx/recipe/classification/multi_label_cls/efficientnet_v2.yaml b/src/otx/recipe/classification/multi_label_cls/efficientnet_v2.yaml
index a304d76542b..ba43011950c 100644
--- a/src/otx/recipe/classification/multi_label_cls/efficientnet_v2.yaml
+++ b/src/otx/recipe/classification/multi_label_cls/efficientnet_v2.yaml
@@ -12,12 +12,16 @@ model:
         weight_decay: 0.0005
 
     scheduler:
-      class_path: lightning.pytorch.cli.ReduceLROnPlateau
+      class_path: otx.core.schedulers.LinearWarmupSchedulerCallable
       init_args:
-        mode: max
-        factor: 0.5
-        patience: 1
-        monitor: val/accuracy
+        num_warmup_steps: 0
+        main_scheduler_callable:
+          class_path: lightning.pytorch.cli.ReduceLROnPlateau
+          init_args:
+            mode: max
+            factor: 0.5
+            patience: 3
+            monitor: val/accuracy
 
 engine:
   task: MULTI_LABEL_CLS
@@ -30,15 +34,12 @@ overrides:
   reset:
     - data.train_subset.transforms
 
-  max_epochs: 200
   callbacks:
     - class_path: otx.algo.callbacks.adaptive_early_stopping.EarlyStoppingWithWarmup
       init_args:
-        patience: 4
-    - class_path: otx.algo.callbacks.adaptive_train_scheduling.AdaptiveTrainScheduling
-      init_args:
-        min_earlystop_patience: 4
-        min_lrschedule_patience: 3
+        patience: 5
+
+  max_epochs: 200
 
   data:
     task: MULTI_LABEL_CLS
diff --git a/src/otx/recipe/classification/multi_label_cls/mobilenet_v3_large.yaml b/src/otx/recipe/classification/multi_label_cls/mobilenet_v3_large.yaml
index 02021708453..f9322f22f07 100644
--- a/src/otx/recipe/classification/multi_label_cls/mobilenet_v3_large.yaml
+++ b/src/otx/recipe/classification/multi_label_cls/mobilenet_v3_large.yaml
@@ -20,7 +20,7 @@ model:
           init_args:
             mode: max
             factor: 0.5
-            patience: 1
+            patience: 3
             monitor: val/accuracy
 
 engine:
@@ -31,11 +31,11 @@ callback_monitor: val/accuracy
 
 data: ../../_base_/data/classification.yaml
 overrides:
-  callbacks:
-    - class_path: otx.algo.callbacks.adaptive_early_stopping.EarlyStoppingWithWarmup
-      init_args:
-        patience: 3
-
   data:
     task: MULTI_LABEL_CLS
     data_format: datumaro
+
+  callbacks:
+    - class_path: otx.algo.callbacks.adaptive_early_stopping.EarlyStoppingWithWarmup
+      init_args:
+        patience: 5
diff --git a/src/otx/recipe/classification/multi_label_cls/tv_efficientnet_b3.yaml b/src/otx/recipe/classification/multi_label_cls/tv_efficientnet_b3.yaml
index 9579f8e5e57..ebc03324933 100644
--- a/src/otx/recipe/classification/multi_label_cls/tv_efficientnet_b3.yaml
+++ b/src/otx/recipe/classification/multi_label_cls/tv_efficientnet_b3.yaml
@@ -12,10 +12,14 @@ model:
         weight_decay: 0.0001
 
     scheduler:
-      class_path: torch.optim.lr_scheduler.CosineAnnealingLR
+      class_path: otx.core.schedulers.LinearWarmupSchedulerCallable
       init_args:
-        T_max: 100000
-        eta_min: 0
+        num_warmup_steps: 0
+        main_scheduler_callable:
+          class_path: torch.optim.lr_scheduler.CosineAnnealingLR
+          init_args:
+            T_max: 100000
+            eta_min: 0
 
 engine:
   task: MULTI_LABEL_CLS
@@ -31,7 +35,7 @@ overrides:
   callbacks:
     - class_path: otx.algo.callbacks.adaptive_early_stopping.EarlyStoppingWithWarmup
       init_args:
-        patience: 3
+        patience: 5
 
   data:
     task: MULTI_LABEL_CLS
diff --git a/src/otx/recipe/classification/multi_label_cls/tv_efficientnet_v2_l.yaml b/src/otx/recipe/classification/multi_label_cls/tv_efficientnet_v2_l.yaml
index 3003b26eb48..a1992d2b398 100644
--- a/src/otx/recipe/classification/multi_label_cls/tv_efficientnet_v2_l.yaml
+++ b/src/otx/recipe/classification/multi_label_cls/tv_efficientnet_v2_l.yaml
@@ -12,10 +12,14 @@ model:
         weight_decay: 0.0001
 
     scheduler:
-      class_path: torch.optim.lr_scheduler.CosineAnnealingLR
+      class_path: otx.core.schedulers.LinearWarmupSchedulerCallable
       init_args:
-        T_max: 100000
-        eta_min: 0
+        num_warmup_steps: 0
+        main_scheduler_callable:
+          class_path: torch.optim.lr_scheduler.CosineAnnealingLR
+          init_args:
+            T_max: 100000
+            eta_min: 0
 
 engine:
   task: MULTI_LABEL_CLS
@@ -31,11 +35,7 @@ overrides:
   callbacks:
     - class_path: otx.algo.callbacks.adaptive_early_stopping.EarlyStoppingWithWarmup
       init_args:
-        patience: 4
-    - class_path: otx.algo.callbacks.adaptive_train_scheduling.AdaptiveTrainScheduling
-      init_args:
-        min_earlystop_patience: 4
-        min_lrschedule_patience: 3
+        patience: 5
 
   data:
     task: MULTI_LABEL_CLS
diff --git a/src/otx/recipe/classification/multi_label_cls/tv_mobilenet_v3_small.yaml b/src/otx/recipe/classification/multi_label_cls/tv_mobilenet_v3_small.yaml
index 492e835ef62..99ef63b59e1 100644
--- a/src/otx/recipe/classification/multi_label_cls/tv_mobilenet_v3_small.yaml
+++ b/src/otx/recipe/classification/multi_label_cls/tv_mobilenet_v3_small.yaml
@@ -12,10 +12,14 @@ model:
         weight_decay: 0.0001
 
     scheduler:
-      class_path: torch.optim.lr_scheduler.CosineAnnealingLR
+      class_path: otx.core.schedulers.LinearWarmupSchedulerCallable
       init_args:
-        T_max: 100000
-        eta_min: 0
+        num_warmup_steps: 0
+        main_scheduler_callable:
+          class_path: torch.optim.lr_scheduler.CosineAnnealingLR
+          init_args:
+            T_max: 100000
+            eta_min: 0
 
 engine:
   task: MULTI_LABEL_CLS
@@ -31,7 +35,7 @@ overrides:
   callbacks:
     - class_path: otx.algo.callbacks.adaptive_early_stopping.EarlyStoppingWithWarmup
       init_args:
-        patience: 3
+        patience: 5
 
   data:
     task: MULTI_LABEL_CLS
diff --git a/src/otx/recipe/detection/atss_mobilenetv2.yaml b/src/otx/recipe/detection/atss_mobilenetv2.yaml
index adabd373f1e..ee8925cfce6 100644
--- a/src/otx/recipe/detection/atss_mobilenetv2.yaml
+++ b/src/otx/recipe/detection/atss_mobilenetv2.yaml
@@ -14,7 +14,7 @@ model:
     scheduler:
       class_path: otx.core.schedulers.LinearWarmupSchedulerCallable
       init_args:
-        num_warmup_steps: 3
+        num_warmup_steps: 0
         main_scheduler_callable:
           class_path: lightning.pytorch.cli.ReduceLROnPlateau
           init_args:
diff --git a/src/otx/recipe/detection/atss_mobilenetv2_tile.yaml b/src/otx/recipe/detection/atss_mobilenetv2_tile.yaml
index 981aae94fdb..6305ed7345e 100644
--- a/src/otx/recipe/detection/atss_mobilenetv2_tile.yaml
+++ b/src/otx/recipe/detection/atss_mobilenetv2_tile.yaml
@@ -14,7 +14,7 @@ model:
     scheduler:
       class_path: otx.core.schedulers.LinearWarmupSchedulerCallable
       init_args:
-        num_warmup_steps: 3
+        num_warmup_steps: 0
         main_scheduler_callable:
           class_path: lightning.pytorch.cli.ReduceLROnPlateau
           init_args:
diff --git a/src/otx/recipe/detection/atss_resnext101.yaml b/src/otx/recipe/detection/atss_resnext101.yaml
index 0a6bb28bbde..290c47ab5cf 100644
--- a/src/otx/recipe/detection/atss_resnext101.yaml
+++ b/src/otx/recipe/detection/atss_resnext101.yaml
@@ -14,7 +14,7 @@ model:
     scheduler:
       class_path: otx.core.schedulers.LinearWarmupSchedulerCallable
       init_args:
-        num_warmup_steps: 3
+        num_warmup_steps: 0
         main_scheduler_callable:
           class_path: lightning.pytorch.cli.ReduceLROnPlateau
           init_args:
diff --git a/src/otx/recipe/detection/atss_resnext101_tile.yaml b/src/otx/recipe/detection/atss_resnext101_tile.yaml
index 8f78195f637..7a99f76a07e 100644
--- a/src/otx/recipe/detection/atss_resnext101_tile.yaml
+++ b/src/otx/recipe/detection/atss_resnext101_tile.yaml
@@ -14,7 +14,7 @@ model:
     scheduler:
       class_path: otx.core.schedulers.LinearWarmupSchedulerCallable
       init_args:
-        num_warmup_steps: 3
+        num_warmup_steps: 0
         main_scheduler_callable:
           class_path: lightning.pytorch.cli.ReduceLROnPlateau
           init_args:
diff --git a/src/otx/recipe/detection/rtdetr_101_tile.yaml b/src/otx/recipe/detection/rtdetr_101_tile.yaml
index 918a173dedf..1d2bbfdeb3d 100644
--- a/src/otx/recipe/detection/rtdetr_101_tile.yaml
+++ b/src/otx/recipe/detection/rtdetr_101_tile.yaml
@@ -14,7 +14,7 @@ model:
     scheduler:
       class_path: otx.core.schedulers.LinearWarmupSchedulerCallable
       init_args:
-        num_warmup_steps: 5
+        num_warmup_steps: 100
         main_scheduler_callable:
           class_path: lightning.pytorch.cli.ReduceLROnPlateau
           init_args:
@@ -35,15 +35,9 @@ overrides:
     - class_path: otx.algo.callbacks.adaptive_train_scheduling.AdaptiveTrainScheduling
       init_args:
         max_interval: 1
-        decay: -0.025
         min_lrschedule_patience: 3
     - class_path: otx.algo.callbacks.adaptive_early_stopping.EarlyStoppingWithWarmup
       init_args:
-        monitor: null
-        mode: max
-        patience: 10
-        check_on_train_epoch_end: false
-        min_delta: 0.001
         warmup_iters: 100
         warmup_epochs: 7
 
diff --git a/src/otx/recipe/detection/rtdetr_18_tile.yaml b/src/otx/recipe/detection/rtdetr_18_tile.yaml
index d79091eb56c..0e58a15d516 100644
--- a/src/otx/recipe/detection/rtdetr_18_tile.yaml
+++ b/src/otx/recipe/detection/rtdetr_18_tile.yaml
@@ -14,7 +14,7 @@ model:
     scheduler:
       class_path: otx.core.schedulers.LinearWarmupSchedulerCallable
       init_args:
-        num_warmup_steps: 5
+        num_warmup_steps: 0
         main_scheduler_callable:
           class_path: lightning.pytorch.cli.ReduceLROnPlateau
           init_args:
@@ -34,15 +34,9 @@ overrides:
     - class_path: otx.algo.callbacks.adaptive_train_scheduling.AdaptiveTrainScheduling
       init_args:
         max_interval: 1
-        decay: -0.025
         min_lrschedule_patience: 3
     - class_path: otx.algo.callbacks.adaptive_early_stopping.EarlyStoppingWithWarmup
       init_args:
-        monitor: null
-        mode: max
-        patience: 10
-        check_on_train_epoch_end: false
-        min_delta: 0.001
         warmup_iters: 100
         warmup_epochs: 7
 
diff --git a/src/otx/recipe/detection/rtdetr_50_tile.yaml b/src/otx/recipe/detection/rtdetr_50_tile.yaml
index 4c0bfdb1e64..f5042a46741 100644
--- a/src/otx/recipe/detection/rtdetr_50_tile.yaml
+++ b/src/otx/recipe/detection/rtdetr_50_tile.yaml
@@ -14,7 +14,7 @@ model:
     scheduler:
       class_path: otx.core.schedulers.LinearWarmupSchedulerCallable
       init_args:
-        num_warmup_steps: 5
+        num_warmup_steps: 100
         main_scheduler_callable:
           class_path: lightning.pytorch.cli.ReduceLROnPlateau
           init_args:
@@ -35,15 +35,9 @@ overrides:
     - class_path: otx.algo.callbacks.adaptive_train_scheduling.AdaptiveTrainScheduling
       init_args:
         max_interval: 1
-        decay: -0.025
         min_lrschedule_patience: 3
     - class_path: otx.algo.callbacks.adaptive_early_stopping.EarlyStoppingWithWarmup
       init_args:
-        monitor: null
-        mode: max
-        patience: 10
-        check_on_train_epoch_end: false
-        min_delta: 0.001
         warmup_iters: 100
         warmup_epochs: 7
 
diff --git a/src/otx/recipe/detection/rtmdet_tiny.yaml b/src/otx/recipe/detection/rtmdet_tiny.yaml
index 69b6eef9978..73eec5d3016 100644
--- a/src/otx/recipe/detection/rtmdet_tiny.yaml
+++ b/src/otx/recipe/detection/rtmdet_tiny.yaml
@@ -13,7 +13,7 @@ model:
     scheduler:
       class_path: otx.core.schedulers.LinearWarmupSchedulerCallable
       init_args:
-        num_warmup_steps: 3
+        num_warmup_steps: 0
         main_scheduler_callable:
           class_path: lightning.pytorch.cli.ReduceLROnPlateau
           init_args:
diff --git a/src/otx/recipe/detection/rtmdet_tiny_tile.yaml b/src/otx/recipe/detection/rtmdet_tiny_tile.yaml
index 716f5151e8f..ed7f1100a15 100644
--- a/src/otx/recipe/detection/rtmdet_tiny_tile.yaml
+++ b/src/otx/recipe/detection/rtmdet_tiny_tile.yaml
@@ -13,7 +13,7 @@ model:
     scheduler:
       class_path: otx.core.schedulers.LinearWarmupSchedulerCallable
       init_args:
-        num_warmup_steps: 3
+        num_warmup_steps: 0
         main_scheduler_callable:
           class_path: lightning.pytorch.cli.ReduceLROnPlateau
           init_args:
diff --git a/src/otx/recipe/detection/ssd_mobilenetv2.yaml b/src/otx/recipe/detection/ssd_mobilenetv2.yaml
index 5b3ace2b81c..35cc1135d40 100644
--- a/src/otx/recipe/detection/ssd_mobilenetv2.yaml
+++ b/src/otx/recipe/detection/ssd_mobilenetv2.yaml
@@ -14,7 +14,7 @@ model:
     scheduler:
       class_path: otx.core.schedulers.LinearWarmupSchedulerCallable
       init_args:
-        num_warmup_steps: 3
+        num_warmup_steps: 0
         main_scheduler_callable:
           class_path: lightning.pytorch.cli.ReduceLROnPlateau
           init_args:
diff --git a/src/otx/recipe/detection/ssd_mobilenetv2_tile.yaml b/src/otx/recipe/detection/ssd_mobilenetv2_tile.yaml
index 620b55107b2..45d57954753 100644
--- a/src/otx/recipe/detection/ssd_mobilenetv2_tile.yaml
+++ b/src/otx/recipe/detection/ssd_mobilenetv2_tile.yaml
@@ -14,7 +14,7 @@ model:
     scheduler:
       class_path: otx.core.schedulers.LinearWarmupSchedulerCallable
       init_args:
-        num_warmup_steps: 3
+        num_warmup_steps: 0
         main_scheduler_callable:
           class_path: lightning.pytorch.cli.ReduceLROnPlateau
           init_args:
diff --git a/src/otx/recipe/detection/yolox_l.yaml b/src/otx/recipe/detection/yolox_l.yaml
index ed0ecfeec41..d457e2b9207 100644
--- a/src/otx/recipe/detection/yolox_l.yaml
+++ b/src/otx/recipe/detection/yolox_l.yaml
@@ -14,7 +14,7 @@ model:
     scheduler:
       class_path: otx.core.schedulers.LinearWarmupSchedulerCallable
       init_args:
-        num_warmup_steps: 3
+        num_warmup_steps: 0
         main_scheduler_callable:
           class_path: lightning.pytorch.cli.ReduceLROnPlateau
           init_args:
diff --git a/src/otx/recipe/detection/yolox_l_tile.yaml b/src/otx/recipe/detection/yolox_l_tile.yaml
index d9740706813..c5d4bf7210e 100644
--- a/src/otx/recipe/detection/yolox_l_tile.yaml
+++ b/src/otx/recipe/detection/yolox_l_tile.yaml
@@ -14,7 +14,7 @@ model:
     scheduler:
       class_path: otx.core.schedulers.LinearWarmupSchedulerCallable
       init_args:
-        num_warmup_steps: 3
+        num_warmup_steps: 0
         main_scheduler_callable:
           class_path: lightning.pytorch.cli.ReduceLROnPlateau
           init_args:
diff --git a/src/otx/recipe/detection/yolox_s.yaml b/src/otx/recipe/detection/yolox_s.yaml
index 460b8c5b1bd..e547174e4e0 100644
--- a/src/otx/recipe/detection/yolox_s.yaml
+++ b/src/otx/recipe/detection/yolox_s.yaml
@@ -14,7 +14,7 @@ model:
     scheduler:
       class_path: otx.core.schedulers.LinearWarmupSchedulerCallable
       init_args:
-        num_warmup_steps: 3
+        num_warmup_steps: 0
         main_scheduler_callable:
           class_path: lightning.pytorch.cli.ReduceLROnPlateau
           init_args:
diff --git a/src/otx/recipe/detection/yolox_s_tile.yaml b/src/otx/recipe/detection/yolox_s_tile.yaml
index 967639269b2..e3138139e4b 100644
--- a/src/otx/recipe/detection/yolox_s_tile.yaml
+++ b/src/otx/recipe/detection/yolox_s_tile.yaml
@@ -14,7 +14,7 @@ model:
     scheduler:
       class_path: otx.core.schedulers.LinearWarmupSchedulerCallable
       init_args:
-        num_warmup_steps: 3
+        num_warmup_steps: 0
         main_scheduler_callable:
           class_path: lightning.pytorch.cli.ReduceLROnPlateau
           init_args:
diff --git a/src/otx/recipe/detection/yolox_tiny.yaml b/src/otx/recipe/detection/yolox_tiny.yaml
index 86395c02f0d..fa954fb257c 100644
--- a/src/otx/recipe/detection/yolox_tiny.yaml
+++ b/src/otx/recipe/detection/yolox_tiny.yaml
@@ -14,7 +14,7 @@ model:
     scheduler:
       class_path: otx.core.schedulers.LinearWarmupSchedulerCallable
       init_args:
-        num_warmup_steps: 3
+        num_warmup_steps: 0
         main_scheduler_callable:
           class_path: lightning.pytorch.cli.ReduceLROnPlateau
           init_args:
diff --git a/src/otx/recipe/detection/yolox_tiny_tile.yaml b/src/otx/recipe/detection/yolox_tiny_tile.yaml
index 6b1c012e642..5bea45474f1 100644
--- a/src/otx/recipe/detection/yolox_tiny_tile.yaml
+++ b/src/otx/recipe/detection/yolox_tiny_tile.yaml
@@ -14,7 +14,7 @@ model:
     scheduler:
       class_path: otx.core.schedulers.LinearWarmupSchedulerCallable
       init_args:
-        num_warmup_steps: 3
+        num_warmup_steps: 0
         main_scheduler_callable:
           class_path: lightning.pytorch.cli.ReduceLROnPlateau
           init_args:
diff --git a/src/otx/recipe/detection/yolox_x.yaml b/src/otx/recipe/detection/yolox_x.yaml
index 4364cde6acf..a254eb755ea 100644
--- a/src/otx/recipe/detection/yolox_x.yaml
+++ b/src/otx/recipe/detection/yolox_x.yaml
@@ -14,7 +14,7 @@ model:
     scheduler:
       class_path: otx.core.schedulers.LinearWarmupSchedulerCallable
       init_args:
-        num_warmup_steps: 3
+        num_warmup_steps: 0
         main_scheduler_callable:
           class_path: lightning.pytorch.cli.ReduceLROnPlateau
           init_args:
diff --git a/src/otx/recipe/detection/yolox_x_tile.yaml b/src/otx/recipe/detection/yolox_x_tile.yaml
index b3dcf395c96..69751c03db0 100644
--- a/src/otx/recipe/detection/yolox_x_tile.yaml
+++ b/src/otx/recipe/detection/yolox_x_tile.yaml
@@ -14,7 +14,7 @@ model:
     scheduler:
       class_path: otx.core.schedulers.LinearWarmupSchedulerCallable
       init_args:
-        num_warmup_steps: 3
+        num_warmup_steps: 0
         main_scheduler_callable:
           class_path: lightning.pytorch.cli.ReduceLROnPlateau
           init_args:
diff --git a/src/otx/recipe/semantic_segmentation/dino_v2.yaml b/src/otx/recipe/semantic_segmentation/dino_v2.yaml
index 59302d244b9..a3c074cd585 100644
--- a/src/otx/recipe/semantic_segmentation/dino_v2.yaml
+++ b/src/otx/recipe/semantic_segmentation/dino_v2.yaml
@@ -2,10 +2,10 @@ model:
   class_path: otx.algo.segmentation.dino_v2_seg.DinoV2Seg
   init_args:
     label_info: 2
-    model_name: dinov2_vits14
+    model_name: dinov2-small-seg
     input_size:
-      - 560
-      - 560
+      - 518
+      - 518
 
     optimizer:
       class_path: torch.optim.AdamW
@@ -17,11 +17,15 @@ model:
         weight_decay: 0.0001
 
     scheduler:
-      class_path: torch.optim.lr_scheduler.PolynomialLR
+      class_path: otx.core.schedulers.LinearWarmupSchedulerCallable
       init_args:
-        total_iters: 150
-        power: 0.9
-        last_epoch: -1
+        num_warmup_steps: 0
+        main_scheduler_callable:
+          class_path: torch.optim.lr_scheduler.PolynomialLR
+          init_args:
+            total_iters: 150
+            power: 0.9
+            last_epoch: -1
 
 engine:
   task: SEMANTIC_SEGMENTATION
@@ -33,8 +37,8 @@ data: ../_base_/data/semantic_segmentation.yaml
 overrides:
   data:
     input_size:
-      - 560
-      - 560
+      - 518
+      - 518
     train_subset:
       transforms:
         - class_path: otx.core.data.transform_libs.torchvision.RandomResizedCrop
diff --git a/src/otx/recipe/semantic_segmentation/dino_v2_tile.yaml b/src/otx/recipe/semantic_segmentation/dino_v2_tile.yaml
index e645a193bca..4dc476089e8 100644
--- a/src/otx/recipe/semantic_segmentation/dino_v2_tile.yaml
+++ b/src/otx/recipe/semantic_segmentation/dino_v2_tile.yaml
@@ -2,7 +2,7 @@ model:
   class_path: otx.algo.segmentation.dino_v2_seg.DinoV2Seg
   init_args:
     label_info: 2
-    model_name: dinov2_vits14
+    model_name: dinov2-small-seg
 
     optimizer:
       class_path: torch.optim.AdamW
@@ -30,8 +30,8 @@ data: ../_base_/data/semantic_segmentation_tile.yaml
 overrides:
   data:
     input_size:
-      - 560
-      - 560
+      - 518
+      - 518
     train_subset:
       transforms:
         - class_path: otx.core.data.transform_libs.torchvision.Resize
diff --git a/src/otx/recipe/semantic_segmentation/semisl/dino_v2_semisl.yml b/src/otx/recipe/semantic_segmentation/semisl/dino_v2_semisl.yml
index 721085499db..da9a62fa4be 100644
--- a/src/otx/recipe/semantic_segmentation/semisl/dino_v2_semisl.yml
+++ b/src/otx/recipe/semantic_segmentation/semisl/dino_v2_semisl.yml
@@ -2,11 +2,11 @@ model:
   class_path: otx.algo.segmentation.dino_v2_seg.DinoV2Seg
   init_args:
     label_info: 2
-    model_name: dinov2_vits14
+    model_version: dinov2-small-seg
     train_type: SEMI_SUPERVISED
     input_size:
-      - 560
-      - 560
+      - 518
+      - 518
 
     optimizer:
       class_path: torch.optim.AdamW
@@ -34,8 +34,8 @@ data: ../../_base_/data/semisl/semantic_segmentation_semisl.yaml
 overrides:
   data:
     input_size:
-      - 560
-      - 560
+      - 518
+      - 518
     train_subset:
       transforms:
         - class_path: otx.core.data.transform_libs.torchvision.RandomResizedCrop
diff --git a/src/otx/recipe/semantic_segmentation/semisl/litehrnet_x_semisl.yml b/src/otx/recipe/semantic_segmentation/semisl/litehrnet_x_semisl.yml
index 5abb0004bac..2c0ca578cb8 100644
--- a/src/otx/recipe/semantic_segmentation/semisl/litehrnet_x_semisl.yml
+++ b/src/otx/recipe/semantic_segmentation/semisl/litehrnet_x_semisl.yml
@@ -4,6 +4,7 @@ model:
     label_info: 2
     model_name: lite_hrnet_x
     train_type: SEMI_SUPERVISED
+    drop_unreliable_pixels_percent: 80
 
     optimizer:
       class_path: torch.optim.Adam
diff --git a/src/otx/recipe/visual_prompting/sam_tiny_vit.yaml b/src/otx/recipe/visual_prompting/sam_tiny_vit.yaml
index 377d80b3722..4c8646bab0a 100644
--- a/src/otx/recipe/visual_prompting/sam_tiny_vit.yaml
+++ b/src/otx/recipe/visual_prompting/sam_tiny_vit.yaml
@@ -18,10 +18,14 @@ model:
         lr: 0.00001
 
     scheduler:
-      class_path: torch.optim.lr_scheduler.ConstantLR
+      class_path: otx.core.schedulers.LinearWarmupSchedulerCallable
       init_args:
-        factor: 1
-        total_iters: -1
+        num_warmup_steps: 0
+        main_scheduler_callable:
+          class_path: torch.optim.lr_scheduler.ConstantLR
+          init_args:
+            factor: 1
+            total_iters: -1
 
 engine:
   task: VISUAL_PROMPTING
@@ -35,4 +39,4 @@ overrides:
   callbacks:
     - class_path: otx.algo.callbacks.adaptive_early_stopping.EarlyStoppingWithWarmup
       init_args:
-        patience: 3
+        patience: 5
diff --git a/src/otx/recipe/visual_prompting/sam_vit_b.yaml b/src/otx/recipe/visual_prompting/sam_vit_b.yaml
index bc3bf89351a..4493af39562 100644
--- a/src/otx/recipe/visual_prompting/sam_vit_b.yaml
+++ b/src/otx/recipe/visual_prompting/sam_vit_b.yaml
@@ -18,10 +18,14 @@ model:
         lr: 0.00001
 
     scheduler:
-      class_path: torch.optim.lr_scheduler.ConstantLR
+      class_path: otx.core.schedulers.LinearWarmupSchedulerCallable
       init_args:
-        factor: 1
-        total_iters: -1
+        num_warmup_steps: 0
+        main_scheduler_callable:
+          class_path: torch.optim.lr_scheduler.ConstantLR
+          init_args:
+            factor: 1
+            total_iters: -1
 
 engine:
   task: VISUAL_PROMPTING
@@ -35,4 +39,4 @@ overrides:
   callbacks:
     - class_path: otx.algo.callbacks.adaptive_early_stopping.EarlyStoppingWithWarmup
       init_args:
-        patience: 3
+        patience: 5
diff --git a/src/otx/tools/converter.py b/src/otx/tools/converter.py
index d8f26bc72a3..1e670930cf0 100644
--- a/src/otx/tools/converter.py
+++ b/src/otx/tools/converter.py
@@ -248,6 +248,9 @@ def convert(config_path: str, task: OTXTaskType | None = None) -> dict:
         task_info = TEMPLATE_ID_DICT[template_config["model_template_id"]]
         if param_dict.get("enable_tiling", None) and not task_info["model_name"].endswith("_tile"):
             task_info["model_name"] += "_tile"
+        # classification task type can't be deducted from template name, try to extract from config
+        if "sub_task_type" in template_config and "_CLS" in task_info["task"]:
+            task_info["task"] = template_config["sub_task_type"]
         if task is not None:
             task_info["task"] = task
         default_config = ConfigConverter._get_default_config(task_info)
@@ -317,13 +320,16 @@ def update_num_workers(param_value: int) -> None:
             config["data"]["test_subset"]["num_workers"] = param_value
 
         def update_enable_early_stopping(param_value: bool) -> None:
-            idx = ConfigConverter._get_callback_idx(config["callbacks"], "lightning.pytorch.callbacks.EarlyStopping")
+            idx = ConfigConverter._get_callback_idx(
+                config["callbacks"],
+                "otx.algo.callbacks.adaptive_early_stopping.EarlyStoppingWithWarmup",
+            )
             if not param_value and idx > -1:
                 config["callbacks"].pop(idx)
 
         def update_early_stop_patience(param_value: int) -> None:
             for callback in config["callbacks"]:
-                if callback["class_path"] == "lightning.pytorch.callbacks.EarlyStopping":
+                if callback["class_path"] == "otx.algo.callbacks.adaptive_early_stopping.EarlyStoppingWithWarmup":
                     callback["init_args"]["patience"] = param_value
                     break
 
diff --git a/src/otx/tools/templates/classification/configuration.yaml b/src/otx/tools/templates/classification/configuration.yaml
index ed91ea1cfa3..93f972b7a87 100644
--- a/src/otx/tools/templates/classification/configuration.yaml
+++ b/src/otx/tools/templates/classification/configuration.yaml
@@ -87,11 +87,11 @@ learning_parameters:
   num_iters:
     affects_outcome_of: TRAINING
     default_value: 200
-    description:
-      Increasing this value causes the results to be more robust but training
-      time will be longer.
+    description: Maximum number of epochs to train a model.
+      Increasing this value may result in longer training, but potentially in a more robust model.
+      Note, if the early stopping is enabled, the actual number of epochs may be less than this value.
     editable: true
-    header: Number of training iterations
+    header: Number of training epochs
     max_value: 1000
     min_value: 1
     type: INTEGER
@@ -174,7 +174,7 @@ learning_parameters:
     visible_in_ui: false
   early_stop_patience:
     affects_outcome_of: TRAINING
-    default_value: 3
+    default_value: 5
     description: Training will stop if the model does not improve within the number of epochs of patience.
     editable: true
     header: Patience for early stopping
@@ -186,26 +186,7 @@ learning_parameters:
       operator: AND
       rules: []
       type: UI_RULES
-    value: 8
-    visible_in_ui: true
-    warning: This is applied exclusively when early stopping is enabled.
-  early_stop_iteration_patience:
-    affects_outcome_of: TRAINING
-    default_value: 0
-    description:
-      Training will stop if the model does not improve within the number of iterations of patience.
-      This ensures the model is trained enough with the number of iterations of patience before early stopping.
-    editable: true
-    header: Iteration patience for early stopping
-    max_value: 1000
-    min_value: 0
-    type: INTEGER
-    ui_rules:
-      action: DISABLE_EDITING
-      operator: AND
-      rules: []
-      type: UI_RULES
-    value: 0
+    value: 5
     visible_in_ui: true
     warning: This is applied exclusively when early stopping is enabled.
   use_adaptive_interval:
@@ -264,7 +245,7 @@ learning_parameters:
   auto_num_workers:
     affects_outcome_of: TRAINING
     default_value: false
-    description: Adapt num_workers according to current hardware status automatically.
+    description: Adapt number of workers according to current hardware status automatically.
     editable: true
     header: Enable auto adaptive num_workers
     type: BOOLEAN
diff --git a/src/otx/tools/templates/classification/efficientnet_v2_l/template.yaml b/src/otx/tools/templates/classification/efficientnet_v2_l/template.yaml
index 4db892a3131..001e1e3d995 100644
--- a/src/otx/tools/templates/classification/efficientnet_v2_l/template.yaml
+++ b/src/otx/tools/templates/classification/efficientnet_v2_l/template.yaml
@@ -20,7 +20,7 @@ hyper_parameters:
   parameter_overrides:
     learning_parameters:
       batch_size:
-        default_value: 64
+        default_value: 48
         auto_hpo_state: POSSIBLE
       learning_rate:
         default_value: 0.01
diff --git a/src/otx/tools/templates/detection/detection/configuration.yaml b/src/otx/tools/templates/detection/detection/configuration.yaml
index 5cb11d83c9f..9fe02a3d28a 100644
--- a/src/otx/tools/templates/detection/detection/configuration.yaml
+++ b/src/otx/tools/templates/detection/detection/configuration.yaml
@@ -91,11 +91,11 @@ learning_parameters:
   num_iters:
     affects_outcome_of: TRAINING
     default_value: 200
-    description:
-      Increasing this value causes the results to be more robust but training
-      time will be longer.
+    description: Maximum number of epochs to train a model.
+      Increasing this value may result in longer training, but potentially in a more robust model.
+      Note, if the early stopping is enabled, the actual number of epochs may be less than this value.
     editable: true
-    header: Number of training iterations
+    header: Number of training epochs
     max_value: 1000
     min_value: 1
     type: INTEGER
@@ -124,7 +124,7 @@ learning_parameters:
       operator: AND
       rules: []
       type: UI_RULES
-    value: 0
+    value: 2
     visible_in_ui: true
     warning: null
   enable_early_stopping:
@@ -173,25 +173,6 @@ learning_parameters:
     value: 10
     visible_in_ui: true
     warning: This is applied exclusively when early stopping is enabled.
-  early_stop_iteration_patience:
-    affects_outcome_of: TRAINING
-    default_value: 0
-    description:
-      Training will stop if the model does not improve within the number of iterations of patience.
-      This ensures the model is trained enough with the number of iterations of patience before early stopping.
-    editable: true
-    header: Iteration patience for early stopping
-    max_value: 1000
-    min_value: 0
-    type: INTEGER
-    ui_rules:
-      action: DISABLE_EDITING
-      operator: AND
-      rules: []
-      type: UI_RULES
-    value: 0
-    visible_in_ui: true
-    warning: This is applied exclusively when early stopping is enabled.
   use_adaptive_interval:
     affects_outcome_of: TRAINING
     default_value: true
@@ -232,7 +213,7 @@ learning_parameters:
   auto_num_workers:
     affects_outcome_of: TRAINING
     default_value: false
-    description: Adapt num_workers according to current hardware status automatically.
+    description: Adapt number of workers according to current hardware status automatically.
     editable: true
     header: Enable auto adaptive num_workers
     type: BOOLEAN
diff --git a/src/otx/tools/templates/detection/detection/cspdarknet_yolox_l/template.yaml b/src/otx/tools/templates/detection/detection/cspdarknet_yolox_l/template.yaml
index c161471d452..19c6f4c7502 100644
--- a/src/otx/tools/templates/detection/detection/cspdarknet_yolox_l/template.yaml
+++ b/src/otx/tools/templates/detection/detection/cspdarknet_yolox_l/template.yaml
@@ -28,7 +28,7 @@ hyper_parameters:
         default_value: 0.001
         auto_hpo_state: POSSIBLE
       learning_rate_warmup_iters:
-        default_value: 3
+        default_value: 0
       num_iters:
         default_value: 200
 
diff --git a/src/otx/tools/templates/detection/detection/cspdarknet_yolox_s/template.yaml b/src/otx/tools/templates/detection/detection/cspdarknet_yolox_s/template.yaml
index f8ef1d4acd3..ad248a10bf6 100644
--- a/src/otx/tools/templates/detection/detection/cspdarknet_yolox_s/template.yaml
+++ b/src/otx/tools/templates/detection/detection/cspdarknet_yolox_s/template.yaml
@@ -28,7 +28,7 @@ hyper_parameters:
         default_value: 0.001
         auto_hpo_state: POSSIBLE
       learning_rate_warmup_iters:
-        default_value: 3
+        default_value: 0
       num_iters:
         default_value: 200
 
diff --git a/src/otx/tools/templates/detection/detection/cspdarknet_yolox_tiny/template.yaml b/src/otx/tools/templates/detection/detection/cspdarknet_yolox_tiny/template.yaml
index e2e426840ed..f3310cb8138 100644
--- a/src/otx/tools/templates/detection/detection/cspdarknet_yolox_tiny/template.yaml
+++ b/src/otx/tools/templates/detection/detection/cspdarknet_yolox_tiny/template.yaml
@@ -28,7 +28,7 @@ hyper_parameters:
         default_value: 0.0002
         auto_hpo_state: POSSIBLE
       learning_rate_warmup_iters:
-        default_value: 3
+        default_value: 0
       num_iters:
         default_value: 200
 
diff --git a/src/otx/tools/templates/detection/detection/cspdarknet_yolox_x/template.yaml b/src/otx/tools/templates/detection/detection/cspdarknet_yolox_x/template.yaml
index 97f85fed008..30bdfecff16 100644
--- a/src/otx/tools/templates/detection/detection/cspdarknet_yolox_x/template.yaml
+++ b/src/otx/tools/templates/detection/detection/cspdarknet_yolox_x/template.yaml
@@ -28,7 +28,7 @@ hyper_parameters:
         default_value: 0.001
         auto_hpo_state: POSSIBLE
       learning_rate_warmup_iters:
-        default_value: 3
+        default_value: 0
       num_iters:
         default_value: 200
 
diff --git a/src/otx/tools/templates/detection/detection/mobilenetv2_atss/template.yaml b/src/otx/tools/templates/detection/detection/mobilenetv2_atss/template.yaml
index 94dd429e1f1..aeed26d6c19 100644
--- a/src/otx/tools/templates/detection/detection/mobilenetv2_atss/template.yaml
+++ b/src/otx/tools/templates/detection/detection/mobilenetv2_atss/template.yaml
@@ -28,7 +28,7 @@ hyper_parameters:
         default_value: 0.004
         auto_hpo_state: POSSIBLE
       learning_rate_warmup_iters:
-        default_value: 3
+        default_value: 0
       num_iters:
         default_value: 200
 
diff --git a/src/otx/tools/templates/detection/detection/mobilenetv2_ssd/template.yaml b/src/otx/tools/templates/detection/detection/mobilenetv2_ssd/template.yaml
index 3cdde945a08..62943444683 100644
--- a/src/otx/tools/templates/detection/detection/mobilenetv2_ssd/template.yaml
+++ b/src/otx/tools/templates/detection/detection/mobilenetv2_ssd/template.yaml
@@ -28,7 +28,7 @@ hyper_parameters:
         default_value: 0.01
         auto_hpo_state: POSSIBLE
       learning_rate_warmup_iters:
-        default_value: 3
+        default_value: 0
       num_iters:
         default_value: 200
 
diff --git a/src/otx/tools/templates/detection/detection/resnext101_atss/template.yaml b/src/otx/tools/templates/detection/detection/resnext101_atss/template.yaml
index cf12454e78d..c6770cc7827 100644
--- a/src/otx/tools/templates/detection/detection/resnext101_atss/template.yaml
+++ b/src/otx/tools/templates/detection/detection/resnext101_atss/template.yaml
@@ -28,7 +28,7 @@ hyper_parameters:
         default_value: 0.004
         auto_hpo_state: POSSIBLE
       learning_rate_warmup_iters:
-        default_value: 3
+        default_value: 0
       num_iters:
         default_value: 200
 
diff --git a/src/otx/tools/templates/detection/detection/rtdetr_101/template.yaml b/src/otx/tools/templates/detection/detection/rtdetr_101/template.yaml
index 1394cf44159..7b4bcae96a8 100644
--- a/src/otx/tools/templates/detection/detection/rtdetr_101/template.yaml
+++ b/src/otx/tools/templates/detection/detection/rtdetr_101/template.yaml
@@ -27,8 +27,6 @@ hyper_parameters:
       learning_rate:
         default_value: 0.0001
         auto_hpo_state: POSSIBLE
-      learning_rate_warmup_iters:
-        default_value: 100
       num_iters:
         default_value: 200
 
diff --git a/src/otx/tools/templates/detection/detection/rtdetr_18/template.yaml b/src/otx/tools/templates/detection/detection/rtdetr_18/template.yaml
index 7738c65f1b7..88999e071d6 100644
--- a/src/otx/tools/templates/detection/detection/rtdetr_18/template.yaml
+++ b/src/otx/tools/templates/detection/detection/rtdetr_18/template.yaml
@@ -27,8 +27,6 @@ hyper_parameters:
       learning_rate:
         default_value: 0.0001
         auto_hpo_state: POSSIBLE
-      learning_rate_warmup_iters:
-        default_value: 100
       num_iters:
         default_value: 200
 
diff --git a/src/otx/tools/templates/detection/detection/rtdetr_50/template.yaml b/src/otx/tools/templates/detection/detection/rtdetr_50/template.yaml
index 408e48cd8fb..79497ab1c79 100644
--- a/src/otx/tools/templates/detection/detection/rtdetr_50/template.yaml
+++ b/src/otx/tools/templates/detection/detection/rtdetr_50/template.yaml
@@ -27,8 +27,6 @@ hyper_parameters:
       learning_rate:
         default_value: 0.0001
         auto_hpo_state: POSSIBLE
-      learning_rate_warmup_iters:
-        default_value: 100
       num_iters:
         default_value: 200
 
diff --git a/src/otx/tools/templates/detection/detection/rtmdet_tiny/template.yaml b/src/otx/tools/templates/detection/detection/rtmdet_tiny/template.yaml
index 8b110503b62..a067b186861 100644
--- a/src/otx/tools/templates/detection/detection/rtmdet_tiny/template.yaml
+++ b/src/otx/tools/templates/detection/detection/rtmdet_tiny/template.yaml
@@ -28,7 +28,7 @@ hyper_parameters:
         default_value: 0.0007
         auto_hpo_state: POSSIBLE
       learning_rate_warmup_iters:
-        default_value: 3
+        default_value: 0
       num_iters:
         default_value: 200
 
diff --git a/src/otx/tools/templates/detection/instance_segmentation/configuration.yaml b/src/otx/tools/templates/detection/instance_segmentation/configuration.yaml
index a1c2078ed62..20421f3fd16 100644
--- a/src/otx/tools/templates/detection/instance_segmentation/configuration.yaml
+++ b/src/otx/tools/templates/detection/instance_segmentation/configuration.yaml
@@ -91,11 +91,11 @@ learning_parameters:
   num_iters:
     affects_outcome_of: TRAINING
     default_value: 200
-    description:
-      Increasing this value causes the results to be more robust but training
-      time will be longer.
+    description: Maximum number of epochs to train a model.
+      Increasing this value may result in longer training, but potentially in a more robust model.
+      Note, if the early stopping is enabled, the actual number of epochs may be less than this value.
     editable: true
-    header: Number of training iterations
+    header: Number of training epochs
     max_value: 1000
     min_value: 1
     type: INTEGER
@@ -173,25 +173,6 @@ learning_parameters:
     value: 10
     visible_in_ui: true
     warning: This is applied exclusively when early stopping is enabled.
-  early_stop_iteration_patience:
-    affects_outcome_of: TRAINING
-    default_value: 0
-    description:
-      Training will stop if the model does not improve within the number of iterations of patience.
-      This ensures the model is trained enough with the number of iterations of patience before early stopping.
-    editable: true
-    header: Iteration patience for early stopping
-    max_value: 1000
-    min_value: 0
-    type: INTEGER
-    ui_rules:
-      action: DISABLE_EDITING
-      operator: AND
-      rules: []
-      type: UI_RULES
-    value: 0
-    visible_in_ui: true
-    warning: This is applied exclusively when early stopping is enabled.
   use_adaptive_interval:
     affects_outcome_of: TRAINING
     default_value: true
@@ -232,7 +213,7 @@ learning_parameters:
   auto_num_workers:
     affects_outcome_of: TRAINING
     default_value: false
-    description: Adapt num_workers according to current hardware status automatically.
+    description: Adapt number of workers according to current hardware status automatically.
     editable: true
     header: Enable auto adaptive num_workers
     type: BOOLEAN
diff --git a/src/otx/tools/templates/detection/instance_segmentation/efficientnetb2b_maskrcnn/template.yaml b/src/otx/tools/templates/detection/instance_segmentation/efficientnetb2b_maskrcnn/template.yaml
index 4b5e21a4f83..f30d4c6f792 100644
--- a/src/otx/tools/templates/detection/instance_segmentation/efficientnetb2b_maskrcnn/template.yaml
+++ b/src/otx/tools/templates/detection/instance_segmentation/efficientnetb2b_maskrcnn/template.yaml
@@ -27,8 +27,6 @@ hyper_parameters:
       learning_rate:
         default_value: 0.015
         auto_hpo_state: POSSIBLE
-      learning_rate_warmup_iters:
-        default_value: 100
       num_iters:
         default_value: 100
 
diff --git a/src/otx/tools/templates/detection/instance_segmentation/maskrcnn_swin_t/template.yaml b/src/otx/tools/templates/detection/instance_segmentation/maskrcnn_swin_t/template.yaml
index e9e289c6bf6..cf609e3d1c8 100644
--- a/src/otx/tools/templates/detection/instance_segmentation/maskrcnn_swin_t/template.yaml
+++ b/src/otx/tools/templates/detection/instance_segmentation/maskrcnn_swin_t/template.yaml
@@ -27,8 +27,6 @@ hyper_parameters:
       learning_rate:
         default_value: 0.0001
         auto_hpo_state: POSSIBLE
-      learning_rate_warmup_iters:
-        default_value: 100
       num_iters:
         default_value: 100
 
diff --git a/src/otx/tools/templates/detection/instance_segmentation/resnet50_maskrcnn/template.yaml b/src/otx/tools/templates/detection/instance_segmentation/resnet50_maskrcnn/template.yaml
index bd2248adbcd..31f1a310cab 100644
--- a/src/otx/tools/templates/detection/instance_segmentation/resnet50_maskrcnn/template.yaml
+++ b/src/otx/tools/templates/detection/instance_segmentation/resnet50_maskrcnn/template.yaml
@@ -27,8 +27,6 @@ hyper_parameters:
       learning_rate:
         default_value: 0.007
         auto_hpo_state: POSSIBLE
-      learning_rate_warmup_iters:
-        default_value: 100
       num_iters:
         default_value: 100
 
diff --git a/src/otx/tools/templates/detection/rotated_detection/configuration.yaml b/src/otx/tools/templates/detection/rotated_detection/configuration.yaml
index b41ea7dda25..524376b9d0a 100644
--- a/src/otx/tools/templates/detection/rotated_detection/configuration.yaml
+++ b/src/otx/tools/templates/detection/rotated_detection/configuration.yaml
@@ -91,11 +91,11 @@ learning_parameters:
   num_iters:
     affects_outcome_of: TRAINING
     default_value: 200
-    description:
-      Increasing this value causes the results to be more robust but training
-      time will be longer.
+    description: Maximum number of epochs to train a model.
+      Increasing this value may result in longer training, but potentially in a more robust model.
+      Note, if the early stopping is enabled, the actual number of epochs may be less than this value.
     editable: true
-    header: Number of training iterations
+    header: Number of training epochs
     max_value: 1000
     min_value: 1
     type: INTEGER
@@ -173,25 +173,6 @@ learning_parameters:
     value: 10
     visible_in_ui: true
     warning: This is applied exclusively when early stopping is enabled.
-  early_stop_iteration_patience:
-    affects_outcome_of: TRAINING
-    default_value: 0
-    description:
-      Training will stop if the model does not improve within the number of iterations of patience.
-      This ensures the model is trained enough with the number of iterations of patience before early stopping.
-    editable: true
-    header: Iteration patience for early stopping
-    max_value: 1000
-    min_value: 0
-    type: INTEGER
-    ui_rules:
-      action: DISABLE_EDITING
-      operator: AND
-      rules: []
-      type: UI_RULES
-    value: 0
-    visible_in_ui: true
-    warning: This is applied exclusively when early stopping is enabled.
   use_adaptive_interval:
     affects_outcome_of: TRAINING
     default_value: true
@@ -232,7 +213,7 @@ learning_parameters:
   auto_num_workers:
     affects_outcome_of: TRAINING
     default_value: false
-    description: Adapt num_workers according to current hardware status automatically.
+    description: Adapt number of workers according to current hardware status automatically.
     editable: true
     header: Enable auto adaptive num_workers
     type: BOOLEAN
diff --git a/src/otx/tools/templates/detection/rotated_detection/efficientnetb2b_maskrcnn/template.yaml b/src/otx/tools/templates/detection/rotated_detection/efficientnetb2b_maskrcnn/template.yaml
index 21e079c489a..2c5ebee3fc7 100644
--- a/src/otx/tools/templates/detection/rotated_detection/efficientnetb2b_maskrcnn/template.yaml
+++ b/src/otx/tools/templates/detection/rotated_detection/efficientnetb2b_maskrcnn/template.yaml
@@ -27,8 +27,6 @@ hyper_parameters:
       learning_rate:
         default_value: 0.007
         auto_hpo_state: POSSIBLE
-      learning_rate_warmup_iters:
-        default_value: 100
       num_iters:
         default_value: 100
 
diff --git a/src/otx/tools/templates/detection/rotated_detection/resnet50_maskrcnn/template.yaml b/src/otx/tools/templates/detection/rotated_detection/resnet50_maskrcnn/template.yaml
index 4cb51f466eb..8d1bad4640c 100644
--- a/src/otx/tools/templates/detection/rotated_detection/resnet50_maskrcnn/template.yaml
+++ b/src/otx/tools/templates/detection/rotated_detection/resnet50_maskrcnn/template.yaml
@@ -27,8 +27,6 @@ hyper_parameters:
       learning_rate:
         default_value: 0.007
         auto_hpo_state: POSSIBLE
-      learning_rate_warmup_iters:
-        default_value: 100
       num_iters:
         default_value: 100
 
diff --git a/src/otx/tools/templates/keypoint_detection/configuration.yaml b/src/otx/tools/templates/keypoint_detection/configuration.yaml
index e745d787c80..1ef84c01919 100644
--- a/src/otx/tools/templates/keypoint_detection/configuration.yaml
+++ b/src/otx/tools/templates/keypoint_detection/configuration.yaml
@@ -87,11 +87,11 @@ learning_parameters:
   num_iters:
     affects_outcome_of: TRAINING
     default_value: 200
-    description:
-      Increasing this value causes the results to be more robust but training
-      time will be longer.
+    description: Maximum number of epochs to train a model.
+      Increasing this value may result in longer training, but potentially in a more robust model.
+      Note, if the early stopping is enabled, the actual number of epochs may be less than this value.
     editable: true
-    header: Number of training iterations
+    header: Number of training epochs
     max_value: 1000
     min_value: 1
     type: INTEGER
@@ -120,7 +120,7 @@ learning_parameters:
       operator: AND
       rules: []
       type: UI_RULES
-    value: 0
+    value: 2
     visible_in_ui: true
     warning: null
   learning_rate_warmup_iters:
@@ -189,25 +189,6 @@ learning_parameters:
     value: 10
     visible_in_ui: true
     warning: This is applied exclusively when early stopping is enabled.
-  early_stop_iteration_patience:
-    affects_outcome_of: TRAINING
-    default_value: 0
-    description:
-      Training will stop if the model does not improve within the number of iterations of patience.
-      This ensures the model is trained enough with the number of iterations of patience before early stopping.
-    editable: true
-    header: Iteration patience for early stopping
-    max_value: 1000
-    min_value: 0
-    type: INTEGER
-    ui_rules:
-      action: DISABLE_EDITING
-      operator: AND
-      rules: []
-      type: UI_RULES
-    value: 0
-    visible_in_ui: true
-    warning: This is applied exclusively when early stopping is enabled.
   use_adaptive_interval:
     affects_outcome_of: TRAINING
     default_value: true
@@ -248,7 +229,7 @@ learning_parameters:
   auto_num_workers:
     affects_outcome_of: TRAINING
     default_value: false
-    description: Adapt num_workers according to current hardware status automatically.
+    description: Adapt number of workers according to current hardware status automatically.
     editable: true
     header: Enable auto adaptive num_workers
     type: BOOLEAN
diff --git a/src/otx/tools/templates/segmentation/configuration.yaml b/src/otx/tools/templates/segmentation/configuration.yaml
index 23356c696db..c7df2603ec7 100644
--- a/src/otx/tools/templates/segmentation/configuration.yaml
+++ b/src/otx/tools/templates/segmentation/configuration.yaml
@@ -54,8 +54,8 @@ learning_parameters:
     default_value: 100
     description:
       In this periods of initial training iterations, the model will be trained in low learning rate,
-      which will be increased incrementally up to the expected learning rate setting.
-      This warm-up phase is known to be helpful to stabilize training, thus result in better performance.
+      which will be increased linearly up to the expected learning rate setting.
+      This warm-up phase is known to be helpful to stabilize training, therefore, can lead to increased performance.
     editable: true
     header: Number of iterations for learning rate warmup
     max_value: 10000
@@ -74,11 +74,11 @@ learning_parameters:
     auto_hpo_state: not_possible
     auto_hpo_value: null
     default_value: 200
-    description:
-      Increasing this value causes the results to be more robust but training
-      time will be longer.
+    description: Maximum number of epochs to train a model.
+      Increasing this value may result in longer training, but potentially in a more robust model.
+      Note, if the early stopping is enabled, the actual number of epochs may be less than this value.
     editable: true
-    header: Number of training iterations
+    header: Number of training epochs
     max_value: 1000
     min_value: 1
     type: INTEGER
@@ -109,7 +109,7 @@ learning_parameters:
       operator: AND
       rules: []
       type: UI_RULES
-    value: 0
+    value: 2
     visible_in_ui: true
     warning: null
   enable_early_stopping:
@@ -143,7 +143,7 @@ learning_parameters:
     visible_in_ui: false
   early_stop_patience:
     affects_outcome_of: TRAINING
-    default_value: 7
+    default_value: 10
     description: Training will stop if the model does not improve within the number of epochs of patience.
     editable: true
     header: Patience for early stopping
@@ -158,25 +158,6 @@ learning_parameters:
     value: 5
     visible_in_ui: true
     warning: This is applied exclusively when early stopping is enabled.
-  early_stop_iteration_patience:
-    affects_outcome_of: TRAINING
-    default_value: 0
-    description:
-      Training will stop if the model does not improve within the number of iterations of patience.
-      This ensures the model is trained enough with the number of iterations of patience before early stopping.
-    editable: true
-    header: Iteration patience for early stopping
-    max_value: 1000
-    min_value: 0
-    type: INTEGER
-    ui_rules:
-      action: DISABLE_EDITING
-      operator: AND
-      rules: []
-      type: UI_RULES
-    value: 0
-    visible_in_ui: true
-    warning: This is applied exclusively when early stopping is enabled.
   enable_supcon:
     affects_outcome_of: TRAINING
     default_value: false
@@ -219,7 +200,7 @@ learning_parameters:
   auto_num_workers:
     affects_outcome_of: TRAINING
     default_value: false
-    description: Adapt num_workers according to current hardware status automatically.
+    description: Adapt number of workers according to current hardware status automatically.
     editable: true
     header: Enable auto adaptive num_workers
     type: BOOLEAN
diff --git a/src/otx/tools/templates/segmentation/dinov2_small/template.yaml b/src/otx/tools/templates/segmentation/dinov2_small/template.yaml
index ac837fa5007..630af3c6b7c 100644
--- a/src/otx/tools/templates/segmentation/dinov2_small/template.yaml
+++ b/src/otx/tools/templates/segmentation/dinov2_small/template.yaml
@@ -25,10 +25,6 @@ hyper_parameters:
       learning_rate:
         default_value: 0.001
         auto_hpo_state: POSSIBLE
-      learning_rate_warmup_iters:
-        default_value: 100
-      num_iters:
-        default_value: 200
 
 # Training resources.
 max_nodes: 1
diff --git a/src/otx/tools/templates/segmentation/ham_segnext_b/template.yaml b/src/otx/tools/templates/segmentation/ham_segnext_b/template.yaml
index ef390639238..88f3d5d41a3 100644
--- a/src/otx/tools/templates/segmentation/ham_segnext_b/template.yaml
+++ b/src/otx/tools/templates/segmentation/ham_segnext_b/template.yaml
@@ -25,14 +25,8 @@ hyper_parameters:
       learning_rate:
         default_value: 0.00006
         auto_hpo_state: POSSIBLE
-      learning_rate_warmup_iters:
-        default_value: 100
-      num_iters:
-        default_value: 200
       early_stop_start:
         default_value: 100
-      early_stop_patience:
-        default_value: 10
 
 # Training resources.
 max_nodes: 1
diff --git a/src/otx/tools/templates/segmentation/ham_segnext_s/template.yaml b/src/otx/tools/templates/segmentation/ham_segnext_s/template.yaml
index 9afd2660cf2..0c13b203b84 100644
--- a/src/otx/tools/templates/segmentation/ham_segnext_s/template.yaml
+++ b/src/otx/tools/templates/segmentation/ham_segnext_s/template.yaml
@@ -25,14 +25,8 @@ hyper_parameters:
       learning_rate:
         default_value: 0.00006
         auto_hpo_state: POSSIBLE
-      learning_rate_warmup_iters:
-        default_value: 100
-      num_iters:
-        default_value: 200
       early_stop_start:
         default_value: 100
-      early_stop_patience:
-        default_value: 10
 
 # Training resources.
 max_nodes: 1
diff --git a/src/otx/tools/templates/segmentation/ham_segnext_t/template.yaml b/src/otx/tools/templates/segmentation/ham_segnext_t/template.yaml
index c5879535caa..e23c8c91104 100644
--- a/src/otx/tools/templates/segmentation/ham_segnext_t/template.yaml
+++ b/src/otx/tools/templates/segmentation/ham_segnext_t/template.yaml
@@ -25,14 +25,8 @@ hyper_parameters:
       learning_rate:
         default_value: 0.00006
         auto_hpo_state: POSSIBLE
-      learning_rate_warmup_iters:
-        default_value: 100
-      num_iters:
-        default_value: 200
       early_stop_start:
         default_value: 100
-      early_stop_patience:
-        default_value: 10
 
 # Training resources.
 max_nodes: 1
diff --git a/src/otx/tools/templates/segmentation/ocr_lite_hrnet_18_mod2/template.yaml b/src/otx/tools/templates/segmentation/ocr_lite_hrnet_18_mod2/template.yaml
index 56af9f4b2b2..ce4e4da393f 100644
--- a/src/otx/tools/templates/segmentation/ocr_lite_hrnet_18_mod2/template.yaml
+++ b/src/otx/tools/templates/segmentation/ocr_lite_hrnet_18_mod2/template.yaml
@@ -24,10 +24,6 @@ hyper_parameters:
       learning_rate:
         default_value: 0.001
         auto_hpo_state: POSSIBLE
-      learning_rate_warmup_iters:
-        default_value: 100
-      num_iters:
-        default_value: 200
 
 # Training resources.
 max_nodes: 1
diff --git a/src/otx/tools/templates/segmentation/ocr_lite_hrnet_s_mod2/template.yaml b/src/otx/tools/templates/segmentation/ocr_lite_hrnet_s_mod2/template.yaml
index 6637673c64e..6c8be01af2a 100644
--- a/src/otx/tools/templates/segmentation/ocr_lite_hrnet_s_mod2/template.yaml
+++ b/src/otx/tools/templates/segmentation/ocr_lite_hrnet_s_mod2/template.yaml
@@ -25,10 +25,6 @@ hyper_parameters:
       learning_rate:
         default_value: 0.001
         auto_hpo_state: POSSIBLE
-      learning_rate_warmup_iters:
-        default_value: 100
-      num_iters:
-        default_value: 200
 
 # Training resources.
 max_nodes: 1
diff --git a/src/otx/tools/templates/segmentation/ocr_lite_hrnet_x_mod3/template.yaml b/src/otx/tools/templates/segmentation/ocr_lite_hrnet_x_mod3/template.yaml
index f4bc011554f..d4f98812586 100644
--- a/src/otx/tools/templates/segmentation/ocr_lite_hrnet_x_mod3/template.yaml
+++ b/src/otx/tools/templates/segmentation/ocr_lite_hrnet_x_mod3/template.yaml
@@ -25,10 +25,6 @@ hyper_parameters:
       learning_rate:
         default_value: 0.001
         auto_hpo_state: POSSIBLE
-      learning_rate_warmup_iters:
-        default_value: 100
-      num_iters:
-        default_value: 200
 
 # Training resources.
 max_nodes: 1
diff --git a/tests/conftest.py b/tests/conftest.py
index 643bf351be8..62ca433376d 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -373,6 +373,7 @@ def fxt_seg_label_info() -> SegLabelInfo:
             label_names,
             ["class2", "class3"],
         ],
+        label_ids=["0", "1", "2"],
     )
 
 
@@ -385,6 +386,7 @@ def fxt_multiclass_labelinfo() -> LabelInfo:
             label_names,
             ["class2", "class3"],
         ],
+        label_ids=["0", "1", "2"],
     )
 
 
@@ -398,6 +400,7 @@ def fxt_multilabel_labelinfo() -> LabelInfo:
             [label_names[1]],
             [label_names[2]],
         ],
+        label_ids=["0", "1", "2"],
     )
 
 
@@ -464,6 +467,7 @@ def fxt_hlabel_multilabel_info() -> HLabelInfo:
             ["Spade_A", "Spade"],
             ["Spade_King", "Spade"],
         ],
+        label_ids=[str(i) for i in range(9)],
     )
 
 
diff --git a/tests/integration/api/test_augmentation.py b/tests/integration/api/test_augmentation.py
index cae1b5b8317..2f0b11a64c2 100644
--- a/tests/integration/api/test_augmentation.py
+++ b/tests/integration/api/test_augmentation.py
@@ -31,11 +31,12 @@ def _test_augmentation(
     ).config
     train_config = config["data"]["train_subset"]
     train_config["input_size"] = (32, 32)
+    data_format = config["data"]["data_format"]
 
     # Load dataset
     dm_dataset = DmDataset.import_from(
         target_dataset_per_task[task_name],
-        format=config["data"]["data_format"],
+        format=data_format,
     )
     mem_cache_handler = MemCacheHandlerSingleton.create(
         mode="sinlgeprocessing",
@@ -60,6 +61,7 @@ def _test_augmentation(
             dm_subset=dm_dataset,
             cfg_subset=SubsetConfig(sampler=SamplerConfig(**train_config.pop("sampler", {})), **train_config),
             mem_cache_handler=mem_cache_handler,
+            data_format=data_format,
         )
 
         # Check if all aug combinations are size-compatible
diff --git a/tests/unit/algo/classification/conftest.py b/tests/unit/algo/classification/conftest.py
index 945c3d0bc4c..a283eff41b1 100644
--- a/tests/unit/algo/classification/conftest.py
+++ b/tests/unit/algo/classification/conftest.py
@@ -31,6 +31,7 @@ def fxt_hlabel_data() -> HLabelInfo:
             ["Heart_Queen", "Heart_King"],
             ["Spade_A", "Spade_King"],
         ],
+        label_ids=[str(i) for i in range(6)],
         num_multiclass_heads=3,
         num_multilabel_classes=0,
         head_idx_to_logits_range={"0": (0, 2), "1": (2, 4), "2": (4, 6)},
@@ -80,6 +81,7 @@ def fxt_hlabel_multilabel_info() -> HLabelInfo:
             "Red_Joker",
             "Extra_Joker",
         ],
+        label_ids=[str(i) for i in range(9)],
         label_groups=[
             ["Heart", "Spade"],
             ["Heart_Queen", "Heart_King"],
@@ -149,6 +151,7 @@ def fxt_hlabel_cifar() -> HLabelInfo:
             "aquatic_mammals",
             "fish",
         ],
+        label_ids=[str(i) for i in range(12)],
         label_groups=[
             ["beaver", "dolphin", "otter", "seal", "whale"],
             ["aquarium_fish", "flatfish", "ray", "shark", "trout"],
diff --git a/tests/unit/algo/detection/detectors/test_single_stage_detector.py b/tests/unit/algo/detection/detectors/test_single_stage_detector.py
index 7d805062ed1..669e4d8c900 100644
--- a/tests/unit/algo/detection/detectors/test_single_stage_detector.py
+++ b/tests/unit/algo/detection/detectors/test_single_stage_detector.py
@@ -53,7 +53,7 @@ def batch(self):
         inputs = torch.randn(1, 3, 32, 32)
         return DetBatchDataEntity(
             batch_size=1,
-            imgs_info=[LabelInfo(["a"], [["a"]])],
+            imgs_info=[LabelInfo(["a"], ["0"], [["a"]])],
             images=inputs,
             bboxes=[torch.tensor([[0.5, 0.5, 0.5, 0.5]])],
             labels=[torch.tensor([0])],
diff --git a/tests/unit/algo/detection/test_rtdetr.py b/tests/unit/algo/detection/test_rtdetr.py
index d96305dfe13..6d2c92718a7 100644
--- a/tests/unit/algo/detection/test_rtdetr.py
+++ b/tests/unit/algo/detection/test_rtdetr.py
@@ -15,7 +15,7 @@
 
 class TestRTDETR:
     def test_customize_outputs(self, mocker):
-        label_info = LabelInfo(["a", "b", "c"], [["a", "b", "c"]])
+        label_info = LabelInfo(["a", "b", "c"], ["0", "1", "2"], [["a", "b", "c"]])
         mocker.patch("otx.algo.detection.rtdetr.RTDETR._build_model", return_value=mocker.MagicMock())
         model = RTDETR(model_name="rtdetr_18", label_info=label_info)
         model.model.load_from = None
diff --git a/tests/unit/algo/segmentation/backbones/test_dinov2.py b/tests/unit/algo/segmentation/backbones/test_dinov2.py
deleted file mode 100644
index 45fb2aaf84e..00000000000
--- a/tests/unit/algo/segmentation/backbones/test_dinov2.py
+++ /dev/null
@@ -1,82 +0,0 @@
-from __future__ import annotations
-
-from pathlib import Path
-from unittest.mock import MagicMock
-
-import pytest
-import torch
-from otx.algo.segmentation.backbones import dinov2 as target_file
-from otx.algo.segmentation.backbones.dinov2 import DinoVisionTransformer
-
-
-class TestDinoVisionTransformer:
-    @pytest.fixture()
-    def mock_backbone_named_parameters(self) -> dict[str, MagicMock]:
-        named_parameter = {}
-        for i in range(3):
-            parameter = MagicMock()
-            parameter.requires_grad = True
-            named_parameter[f"layer_{i}"] = parameter
-        return named_parameter
-
-    @pytest.fixture()
-    def mock_backbone(self, mock_backbone_named_parameters) -> MagicMock:
-        backbone = MagicMock()
-        backbone.named_parameters.return_value = list(mock_backbone_named_parameters.items())
-        return backbone
-
-    @pytest.fixture(autouse=True)
-    def mock_torch_hub_load(self, mocker, mock_backbone):
-        return mocker.patch("otx.algo.segmentation.backbones.dinov2.torch.hub.load", return_value=mock_backbone)
-
-    def test_init(self, mock_backbone, mock_backbone_named_parameters):
-        dino = DinoVisionTransformer(model_name="dinov2_vits14", freeze_backbone=True, out_index=[8, 9, 10, 11])
-
-        assert dino.backbone == mock_backbone
-        for parameter in mock_backbone_named_parameters.values():
-            assert parameter.requires_grad is False
-
-    @pytest.fixture()
-    def dino_vit(self) -> DinoVisionTransformer:
-        return DinoVisionTransformer(
-            model_name="dinov2_vits14",
-            freeze_backbone=True,
-            out_index=[8, 9, 10, 11],
-        )
-
-    def test_forward(self, dino_vit, mock_backbone):
-        tensor = torch.rand(10, 3, 3, 3)
-        dino_vit.forward(tensor)
-
-        mock_backbone.assert_called_once_with(tensor)
-
-    @pytest.fixture()
-    def mock_load_from_http(self, mocker) -> MagicMock:
-        return mocker.patch.object(target_file, "load_from_http")
-
-    @pytest.fixture()
-    def mock_load_checkpoint_to_model(self, mocker) -> MagicMock:
-        return mocker.patch.object(target_file, "load_checkpoint_to_model")
-
-    @pytest.fixture()
-    def pretrained_weight(self, tmp_path) -> str:
-        weight = tmp_path / "pretrained.pth"
-        weight.touch()
-        return str(weight)
-
-    @pytest.fixture()
-    def mock_torch_load(self, mocker) -> MagicMock:
-        return mocker.patch("otx.algo.segmentation.backbones.mscan.torch.load")
-
-    def test_load_pretrained_weights(self, dino_vit, pretrained_weight, mock_torch_load, mock_load_checkpoint_to_model):
-        dino_vit.load_pretrained_weights(pretrained=pretrained_weight)
-        mock_torch_load.assert_called_once_with(pretrained_weight, "cpu")
-        mock_load_checkpoint_to_model.assert_called_once()
-
-    def test_load_pretrained_weights_from_url(self, dino_vit, mock_load_from_http, mock_load_checkpoint_to_model):
-        pretrained_weight = "www.fake.com/fake.pth"
-        dino_vit.load_pretrained_weights(pretrained=pretrained_weight)
-
-        cache_dir = Path.home() / ".cache" / "torch" / "hub" / "checkpoints"
-        mock_load_from_http.assert_called_once_with(filename=pretrained_weight, map_location="cpu", model_dir=cache_dir)
-        mock_load_checkpoint_to_model.assert_called_once()
diff --git a/tests/unit/algo/segmentation/test_dino_v2_seg.py b/tests/unit/algo/segmentation/test_dino_v2_seg.py
index e722254e47f..c9ad5a93946 100644
--- a/tests/unit/algo/segmentation/test_dino_v2_seg.py
+++ b/tests/unit/algo/segmentation/test_dino_v2_seg.py
@@ -10,9 +10,9 @@
 
 
 class TestDinoV2Seg:
-    @pytest.fixture(scope="class")
+    @pytest.fixture()
     def fxt_dino_v2_seg(self) -> DinoV2Seg:
-        return DinoV2Seg(label_info=10, model_name="dinov2_vits14", input_size=(560, 560))
+        return DinoV2Seg(label_info=10, model_name="dinov2-small-seg", input_size=(518, 518))
 
     def test_dino_v2_seg_init(self, fxt_dino_v2_seg):
         assert isinstance(fxt_dino_v2_seg, DinoV2Seg)
@@ -21,7 +21,7 @@ def test_dino_v2_seg_init(self, fxt_dino_v2_seg):
     def test_exporter(self, fxt_dino_v2_seg):
         exporter = fxt_dino_v2_seg._exporter
         assert isinstance(exporter, OTXModelExporter)
-        assert exporter.input_size == (1, 3, 560, 560)
+        assert exporter.input_size == (1, 3, 518, 518)
 
     def test_optimization_config(self, fxt_dino_v2_seg):
         config = fxt_dino_v2_seg._optimization_config
@@ -32,7 +32,7 @@ def test_optimization_config(self, fxt_dino_v2_seg):
     @pytest.mark.parametrize(
         "model",
         [
-            DinoV2Seg(model_name="dinov2_vits14", label_info=3),
+            DinoV2Seg(model_name="dinov2-small-seg", label_info=3, input_size=(518, 518)),
         ],
     )
     def test_compiled_model(self, model):
@@ -44,6 +44,6 @@ def test_compiled_model(self, model):
         model.model = torch.compile(model.model, backend=cnt)
 
         # Prepare inputs
-        x = torch.randn(1, 3, 560, 560)
+        x = torch.randn(1, 3, 518, 518)
         model.model(x)
         assert cnt.frame_count == 1
diff --git a/tests/unit/cli/test_cli.py b/tests/unit/cli/test_cli.py
index 3b2501066ce..07aa5d083e2 100644
--- a/tests/unit/cli/test_cli.py
+++ b/tests/unit/cli/test_cli.py
@@ -188,7 +188,7 @@ def test_print_config_scheduler_override_command(self, fxt_print_config_schedule
         scheduler:
           class_path: otx.core.schedulers.LinearWarmupSchedulerCallable
           init_args:
-            num_warmup_steps: 3
+            num_warmup_steps: 0
             monitor: val/test_f1
             warmup_interval: step
             main_scheduler_callable:
diff --git a/tests/unit/core/data/conftest.py b/tests/unit/core/data/conftest.py
index 665bc5a7471..e2821b17281 100644
--- a/tests/unit/core/data/conftest.py
+++ b/tests/unit/core/data/conftest.py
@@ -193,6 +193,7 @@ def fxt_mock_hlabelinfo():
     return HLabelInfo(
         label_names=_LABEL_NAMES,
         label_groups=[["Non-Rigid", "Rigid"], ["Rectangle", "Triangle"], ["Circle"], ["Lion"], ["Panda"]],
+        label_ids=_LABEL_NAMES,
         num_multiclass_heads=2,
         num_multilabel_classes=3,
         head_idx_to_logits_range={"0": (0, 2), "1": (2, 4)},
diff --git a/tests/unit/core/data/test_factory.py b/tests/unit/core/data/test_factory.py
index 9877739862b..a3a6f92d4af 100644
--- a/tests/unit/core/data/test_factory.py
+++ b/tests/unit/core/data/test_factory.py
@@ -102,6 +102,7 @@ def test_create(
                 cfg_subset=cfg_subset,
                 vpm_config=vpm_config,
                 image_color_channel=image_color_channel,
+                data_format="",
             ),
             dataset_cls,
         )
diff --git a/tests/unit/core/metrics/test_accuracy.py b/tests/unit/core/metrics/test_accuracy.py
index d3c43a8a087..73486330a3c 100644
--- a/tests/unit/core/metrics/test_accuracy.py
+++ b/tests/unit/core/metrics/test_accuracy.py
@@ -52,7 +52,7 @@ def test_default_multi_class_cls_metric_callable(self, fxt_multiclass_labelinfo:
         metric = MultiClassClsMetricCallable(fxt_multiclass_labelinfo)
         assert isinstance(metric.accuracy, MulticlassAccuracy)
 
-        one_class_label_info = LabelInfo(label_names=["class1"], label_groups=[["class1"]])
+        one_class_label_info = LabelInfo(label_names=["class1"], label_groups=[["class1"]], label_ids=["0"])
         assert one_class_label_info.num_classes == 1
         binary_metric = MultiClassClsMetricCallable(one_class_label_info)
         assert isinstance(binary_metric.accuracy, BinaryAccuracy)
diff --git a/tests/unit/core/model/test_detection_3d.py b/tests/unit/core/model/test_detection_3d.py
index f46dc212b8d..c070305c091 100644
--- a/tests/unit/core/model/test_detection_3d.py
+++ b/tests/unit/core/model/test_detection_3d.py
@@ -34,6 +34,7 @@ def label_info(self) -> LabelInfo:
         return LabelInfo(
             label_names=["label_0", "label_1"],
             label_groups=[["label_0", "label_1"]],
+            label_ids=["0", "1"],
         )
 
     @pytest.fixture()
@@ -61,8 +62,16 @@ def test_export_parameters(self, model):
         ("label_info", "expected_label_info"),
         [
             (
-                LabelInfo(label_names=["label1", "label2", "label3"], label_groups=[["label1", "label2", "label3"]]),
-                LabelInfo(label_names=["label1", "label2", "label3"], label_groups=[["label1", "label2", "label3"]]),
+                LabelInfo(
+                    label_names=["label1", "label2", "label3"],
+                    label_groups=[["label1", "label2", "label3"]],
+                    label_ids=["0", "1", "2"],
+                ),
+                LabelInfo(
+                    label_names=["label1", "label2", "label3"],
+                    label_groups=[["label1", "label2", "label3"]],
+                    label_ids=["0", "1", "2"],
+                ),
             ),
             (LabelInfo.from_num_classes(num_classes=5), LabelInfo.from_num_classes(num_classes=5)),
         ],
diff --git a/tests/unit/core/model/test_keypoint_detection.py b/tests/unit/core/model/test_keypoint_detection.py
index d3cc06fede7..ca32686e769 100644
--- a/tests/unit/core/model/test_keypoint_detection.py
+++ b/tests/unit/core/model/test_keypoint_detection.py
@@ -34,6 +34,7 @@ def label_info(self) -> LabelInfo:
         return LabelInfo(
             label_names=["label_0", "label_1"],
             label_groups=[["label_0", "label_1"]],
+            label_ids=["0", "1"],
         )
 
     @pytest.fixture()
@@ -61,8 +62,16 @@ def test_export_parameters(self, model):
         ("label_info", "expected_label_info"),
         [
             (
-                LabelInfo(label_names=["label1", "label2", "label3"], label_groups=[["label1", "label2", "label3"]]),
-                LabelInfo(label_names=["label1", "label2", "label3"], label_groups=[["label1", "label2", "label3"]]),
+                LabelInfo(
+                    label_names=["label1", "label2", "label3"],
+                    label_groups=[["label1", "label2", "label3"]],
+                    label_ids=["0", "1", "2"],
+                ),
+                LabelInfo(
+                    label_names=["label1", "label2", "label3"],
+                    label_groups=[["label1", "label2", "label3"]],
+                    label_ids=["0", "1", "2"],
+                ),
             ),
             (LabelInfo.from_num_classes(num_classes=5), LabelInfo.from_num_classes(num_classes=5)),
         ],
diff --git a/tests/unit/core/model/test_segmentation.py b/tests/unit/core/model/test_segmentation.py
index b7181ce87cc..9f81fda86ab 100644
--- a/tests/unit/core/model/test_segmentation.py
+++ b/tests/unit/core/model/test_segmentation.py
@@ -34,6 +34,7 @@ def label_info(self):
         return SegLabelInfo(
             label_names=["Background", "label_0", "label_1"],
             label_groups=[["Background", "label_0", "label_1"]],
+            label_ids=["0", "1", "2"],
         )
 
     @pytest.fixture()
@@ -64,8 +65,16 @@ def test_export_parameters(self, model):
         ("label_info", "expected_label_info"),
         [
             (
-                SegLabelInfo(label_names=["label1", "label2", "label3"], label_groups=[["label1", "label2", "label3"]]),
-                SegLabelInfo(label_names=["label1", "label2", "label3"], label_groups=[["label1", "label2", "label3"]]),
+                SegLabelInfo(
+                    label_names=["label1", "label2", "label3"],
+                    label_groups=[["label1", "label2", "label3"]],
+                    label_ids=["0", "1", "2"],
+                ),
+                SegLabelInfo(
+                    label_names=["label1", "label2", "label3"],
+                    label_groups=[["label1", "label2", "label3"]],
+                    label_ids=["0", "1", "2"],
+                ),
             ),
             (SegLabelInfo.from_num_classes(num_classes=5), SegLabelInfo.from_num_classes(num_classes=5)),
         ],
diff --git a/tests/unit/core/types/test_export.py b/tests/unit/core/types/test_export.py
index 72add6f3c31..70a4aa1aa2f 100644
--- a/tests/unit/core/types/test_export.py
+++ b/tests/unit/core/types/test_export.py
@@ -52,3 +52,4 @@ def test_wrap(fxt_label_info, task_type):
     assert ("model_info", "tile_size") in metadata
     assert ("model_info", "tiles_overlap") in metadata
     assert ("model_info", "max_pred_number") in metadata
+    assert ("model_info", "otx_version") in metadata
diff --git a/tests/unit/core/types/test_label.py b/tests/unit/core/types/test_label.py
index 3ae1ae1f463..7c6d2359b7a 100644
--- a/tests/unit/core/types/test_label.py
+++ b/tests/unit/core/types/test_label.py
@@ -4,7 +4,7 @@
 
 from datumaro import LabelCategories
 from datumaro.components.annotation import GroupType
-from otx.core.types.label import HLabelInfo, NullLabelInfo, SegLabelInfo
+from otx.core.types.label import HLabelInfo, LabelInfo, NullLabelInfo, SegLabelInfo
 
 
 def test_as_json(fxt_label_info):
@@ -13,17 +13,43 @@ def test_as_json(fxt_label_info):
     assert fxt_label_info == deserialized
 
 
+def test_label_info_from_arrow():
+    labels = [
+        LabelCategories.Category(name="car", attributes={"__name__car"}),
+        LabelCategories.Category(name="truck", attributes={"__name__truck"}),
+    ]
+    label_groups = [
+        LabelCategories.LabelGroup(
+            name="Detection labels___vehicle",
+            labels=["car", "truck"],
+            group_type=GroupType.EXCLUSIVE,
+        ),
+    ]
+    dm_label_categories = LabelCategories(items=labels, label_groups=label_groups)
+
+    label_info = LabelInfo.from_dm_label_groups_arrow(dm_label_categories)
+    assert len(label_info.label_names) == 2
+    assert len(label_info.label_names) == 2
+    assert len(label_info.label_groups[0]) == 2
+    assert "car" in label_info.label_names
+    assert "truck" in label_info.label_names
+
+
 def test_seg_label_info():
     # Automatically insert background label at zero index
     assert SegLabelInfo.from_num_classes(3) == SegLabelInfo(
         ["label_0", "label_1", "label_2"],
+        ["0", "1", "2"],
         [["label_0", "label_1", "label_2"]],
     )
-    assert SegLabelInfo.from_num_classes(1) == SegLabelInfo(["background", "label_0"], [["background", "label_0"]])
+    assert SegLabelInfo.from_num_classes(1) == SegLabelInfo(
+        ["background", "label_0"],
+        ["0", "1"],
+        [["background", "label_0"]],
+    )
     assert SegLabelInfo.from_num_classes(0) == NullLabelInfo()
 
 
-# Unit test
 def test_hlabel_info():
     labels = [
         LabelCategories.Category(name="car", parent="vehicle"),
@@ -52,3 +78,38 @@ def test_hlabel_info():
     assert list(hlabel_info.class_to_group_idx.keys()) == list(
         hlabel_info.label_to_idx.keys(),
     ), "class_to_group_idx and label_to_idx keys do not match"
+
+
+def test_hlabel_info_arrow():
+    labels = [
+        LabelCategories.Category(name="car", parent="vehicle", attributes={"__name__car"}),
+        LabelCategories.Category(name="truck", parent="vehicle", attributes={"__name__truck"}),
+        LabelCategories.Category(name="plush_toy", parent="plush toy", attributes={"__name__plush toy"}),
+        LabelCategories.Category(name="No class", attributes={"__name__No class"}),
+    ]
+    label_groups = [
+        LabelCategories.LabelGroup(
+            name="Detection labels___vehicle",
+            labels=["car", "truck"],
+            group_type=GroupType.EXCLUSIVE,
+        ),
+        LabelCategories.LabelGroup(
+            name="Detection labels___plush toy",
+            labels=["plush toy"],
+            group_type=GroupType.EXCLUSIVE,
+        ),
+        LabelCategories.LabelGroup(name="No class", labels=["No class"], group_type=GroupType.RESTRICTED),
+    ]
+    dm_label_categories = LabelCategories(items=labels, label_groups=label_groups)
+
+    hlabel_info = HLabelInfo.from_dm_label_groups_arrow(dm_label_categories)
+
+    # Check if class_to_group_idx and label_to_idx have the same keys
+    assert list(hlabel_info.class_to_group_idx.keys()) == list(
+        hlabel_info.label_to_idx.keys(),
+    ), "class_to_group_idx and label_to_idx keys do not match"
+
+    assert len(hlabel_info.label_names) == 3
+    assert "No class" not in hlabel_info.label_names
+    for label in ["car", "truck", "plush toy"]:
+        assert label in hlabel_info.label_names
diff --git a/tests/unit/engine/utils/test_auto_configurator.py b/tests/unit/engine/utils/test_auto_configurator.py
index f9ad33b0761..681b1b24639 100644
--- a/tests/unit/engine/utils/test_auto_configurator.py
+++ b/tests/unit/engine/utils/test_auto_configurator.py
@@ -131,9 +131,9 @@ def test_get_model(self, fxt_task: OTXTaskType) -> None:
         # With label_info
         label_names = ["class1", "class2", "class3"]
         label_info = (
-            LabelInfo(label_names=label_names, label_groups=[label_names])
+            LabelInfo(label_names=label_names, label_groups=[label_names], label_ids=label_names)
             if fxt_task != OTXTaskType.SEMANTIC_SEGMENTATION
-            else SegLabelInfo(label_names=label_names, label_groups=[label_names])
+            else SegLabelInfo(label_names=label_names, label_groups=[label_names], label_ids=label_names)
         )
         model = auto_configurator.get_model(label_info=label_info)
         assert isinstance(model, OTXModel)
@@ -147,7 +147,7 @@ def test_get_model(self, fxt_task: OTXTaskType) -> None:
     def test_get_model_set_input_size(self) -> None:
         auto_configurator = AutoConfigurator(task=OTXTaskType.MULTI_CLASS_CLS)
         label_names = ["class1", "class2", "class3"]
-        label_info = LabelInfo(label_names=label_names, label_groups=[label_names])
+        label_info = LabelInfo(label_names=label_names, label_groups=[label_names], label_ids=label_names)
         input_size = 300
 
         model = auto_configurator.get_model(label_info=label_info, input_size=input_size)
diff --git a/tests/unit/tools/test_converter.py b/tests/unit/tools/test_converter.py
index 711b92b6bd4..eb35d890a4a 100644
--- a/tests/unit/tools/test_converter.py
+++ b/tests/unit/tools/test_converter.py
@@ -19,7 +19,7 @@ def test_convert(self):
         assert config["data"]["train_subset"]["num_workers"] == 8
         assert config["data"]["val_subset"]["num_workers"] == 8
         assert config["data"]["test_subset"]["num_workers"] == 8
-        assert config["callbacks"][0]["init_args"]["patience"] == 10
+        assert config["callbacks"][0]["init_args"]["patience"] == 4
         assert config["data"]["tile_config"]["enable_tiler"] is True
         assert config["data"]["tile_config"]["overlap"] == 0.5
 
@@ -57,6 +57,6 @@ def test_instantiate(self, tmp_path):
         assert engine.datamodule.tile_config.enable_tiler
 
         assert len(train_kwargs["callbacks"]) == len(config["callbacks"])
-        assert train_kwargs["callbacks"][0].patience == 10
+        assert train_kwargs["callbacks"][0].patience == 4
         assert len(train_kwargs["logger"]) == len(config["logger"])
         assert train_kwargs["max_epochs"] == 50