Support KITTI 3D format (#1619)

### Summary  - Ticket no.151424 - Support KITTI 3D format ### How to test  - Add unit test ### Checklist  - [X] I have added unit tests to cover my changes. - [ ] I have added integration tests to cover my changes. - [X] I have added the description of my changes into [CHANGELOG](https://github.com/openvinotoolkit/datumaro/blob/develop/CHANGELOG.md). - [ ] I have updated the [documentation](https://github.com/openvinotoolkit/datumaro/tree/develop/docs) accordingly ### License - [ ] I submit _my code changes_ under the same [MIT License](https://github.com/openvinotoolkit/datumaro/blob/develop/LICENSE) that covers the project. Feel free to contact the maintainers if that's a concern. - [ ] I have updated the license header for each file (see an example below). ```python # Copyright (C) 2024 Intel Corporation # # SPDX-License-Identifier: MIT ``` --------- Co-authored-by: Wonju Lee <[email protected]>
openvinotoolkit · Sep 25, 2024 · c4d7bb4 · c4d7bb4
1 parent 62b5814
commit c4d7bb4
Show file tree

Hide file tree

Showing 14 changed files with 347 additions and 5 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -8,6 +8,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 ## \[Unreleased\]
 
 ### New features
+- Support KITTI 3D format
+  (<https://github.com/openvinotoolkit/datumaro/pull/1619>)
 - Add PseudoLabeling transform for unlabeled dataset
   (<https://github.com/openvinotoolkit/datumaro/pull/1594>)
 

diff --git a/src/datumaro/plugins/data_formats/kitti/importer.py b/src/datumaro/plugins/data_formats/kitti/importer.py
@@ -99,7 +99,7 @@ class KittiDetectionImporter(KittiImporter):
     @classmethod
     def detect(cls, context: FormatDetectionContext) -> FormatDetectionConfidence:
         # left color camera label files
-        context.require_file(f"**/label_2/*{cls._ANNO_EXT}")
+        context.require_file(f"**/label_2/*_*{cls._ANNO_EXT}")
         return cls.DETECT_CONFIDENCE
 
     @classmethod

diff --git a/src/datumaro/plugins/data_formats/kitti_3d/__init__.py b/src/datumaro/plugins/data_formats/kitti_3d/__init__.py
diff --git a/src/datumaro/plugins/data_formats/kitti_3d/base.py b/src/datumaro/plugins/data_formats/kitti_3d/base.py
@@ -0,0 +1,142 @@
+# Copyright (C) 2024 Intel Corporation
+#
+# SPDX-License-Identifier: MIT
+
+import glob
+import logging
+import os.path as osp
+from typing import List, Optional, Type, TypeVar
+
+from datumaro.components.annotation import AnnotationType, Bbox, LabelCategories
+from datumaro.components.dataset_base import DatasetItem, SubsetBase
+from datumaro.components.errors import InvalidAnnotationError
+from datumaro.components.importer import ImportContext
+from datumaro.components.media import Image, PointCloud
+from datumaro.util.image import find_images
+
+from .format import Kitti3dPath
+
+T = TypeVar("T")
+
+
+class Kitti3dBase(SubsetBase):
+    # https://www.cvlibs.net/datasets/kitti/eval_object.php?obj_benchmark=3d
+
+    def __init__(
+        self,
+        path: str,
+        *,
+        subset: Optional[str] = None,
+        ctx: Optional[ImportContext] = None,
+    ):
+        assert osp.isdir(path), path
+        super().__init__(subset=subset, media_type=PointCloud, ctx=ctx)
+
+        self._path = path
+
+        common_attrs = {"truncated", "occluded", "alpha", "dimensions", "location", "rotation_y"}
+        self._categories = {AnnotationType.label: LabelCategories(attributes=common_attrs)}
+        self._items = self._load_items()
+
+    def _load_items(self) -> List[DatasetItem]:
+        items = []
+        image_dir = osp.join(self._path, Kitti3dPath.IMAGE_DIR)
+        image_path_by_id = {
+            osp.splitext(osp.relpath(p, image_dir))[0]: p
+            for p in find_images(image_dir, recursive=True)
+        }
+
+        ann_dir = osp.join(self._path, Kitti3dPath.LABEL_DIR)
+        label_categories = self._categories[AnnotationType.label]
+
+        for labels_path in sorted(glob.glob(osp.join(ann_dir, "*.txt"), recursive=True)):
+            item_id = osp.splitext(osp.relpath(labels_path, ann_dir))[0]
+            anns = []
+
+            try:
+                with open(labels_path, "r", encoding="utf-8") as f:
+                    lines = f.readlines()
+            except IOError as e:
+                logging.error(f"Error reading file {labels_path}: {e}")
+                continue
+
+            for line_idx, line in enumerate(lines):
+                line = line.split()
+                if len(line) not in [15, 16]:
+                    logging.warning(
+                        f"Unexpected line length {len(line)} in file {labels_path} at line {line_idx + 1}"
+                    )
+                    continue
+
+                label_name = line[0]
+                label_id = label_categories.find(label_name)[0]
+                if label_id is None:
+                    label_id = label_categories.add(label_name)
+
+                try:
+                    x1 = self._parse_field(line[4], float, "bbox left-top x")
+                    y1 = self._parse_field(line[5], float, "bbox left-top y")
+                    x2 = self._parse_field(line[6], float, "bbox right-bottom x")
+                    y2 = self._parse_field(line[7], float, "bbox right-bottom y")
+
+                    attributes = {
+                        "truncated": self._parse_field(line[1], float, "truncated"),
+                        "occluded": self._parse_field(line[2], int, "occluded"),
+                        "alpha": self._parse_field(line[3], float, "alpha"),
+                        "dimensions": [
+                            self._parse_field(line[8], float, "height (in meters)"),
+                            self._parse_field(line[9], float, "width (in meters)"),
+                            self._parse_field(line[10], float, "length (in meters)"),
+                        ],
+                        "location": [
+                            self._parse_field(line[11], float, "x (in meters)"),
+                            self._parse_field(line[12], float, "y (in meters)"),
+                            self._parse_field(line[13], float, "z (in meters)"),
+                        ],
+                        "rotation_y": self._parse_field(line[14], float, "rotation_y"),
+                    }
+                except ValueError as e:
+                    logging.error(f"Error parsing line {line_idx + 1} in file {labels_path}: {e}")
+                    continue
+
+                anns.append(
+                    Bbox(
+                        x=x1,
+                        y=y1,
+                        w=x2 - x1,
+                        h=y2 - y1,
+                        id=line_idx,
+                        attributes=attributes,
+                        label=label_id,
+                    )
+                )
+                self._ann_types.add(AnnotationType.bbox)
+
+            image = image_path_by_id.pop(item_id, None)
+            if image:
+                image = Image.from_file(path=image)
+
+            items.append(
+                DatasetItem(
+                    id=item_id,
+                    subset=self._subset,
+                    media=PointCloud.from_file(
+                        path=osp.join(self._path, Kitti3dPath.PCD_DIR, item_id + ".bin"),
+                        extra_images=[image],
+                    ),
+                    attributes={
+                        "calib_path": osp.join(self._path, Kitti3dPath.CALIB_DIR, item_id + ".txt")
+                    },
+                    annotations=anns,
+                )
+            )
+
+        return items
+
+    def _parse_field(self, value: str, desired_type: Type[T], field_name: str) -> T:
+        try:
+            return desired_type(value)
+        except Exception as e:
+            raise InvalidAnnotationError(
+                f"Can't parse {field_name} from '{value}'. Expected {desired_type}"
+            ) from e
diff --git a/src/datumaro/plugins/data_formats/kitti_3d/format.py b/src/datumaro/plugins/data_formats/kitti_3d/format.py
@@ -0,0 +1,12 @@
+# Copyright (C) 2024 Intel Corporation
+#
+# SPDX-License-Identifier: MIT
+
+import os.path as osp
+
+
+class Kitti3dPath:
+    PCD_DIR = osp.join("velodyne")
+    IMAGE_DIR = "image_2"
+    LABEL_DIR = "label_2"
+    CALIB_DIR = "calib"
diff --git a/src/datumaro/plugins/data_formats/kitti_3d/importer.py b/src/datumaro/plugins/data_formats/kitti_3d/importer.py
@@ -0,0 +1,45 @@
+# Copyright (C) 2024 Intel Corporation
+#
+# SPDX-License-Identifier: MIT
+
+from typing import List
+
+from datumaro.components.errors import DatasetImportError
+from datumaro.components.format_detection import FormatDetectionConfidence, FormatDetectionContext
+from datumaro.components.importer import Importer
+
+from .format import Kitti3dPath
+
+
+class Kitti3dImporter(Importer):
+    _ANNO_EXT = ".txt"
+
+    @classmethod
+    def detect(cls, context: FormatDetectionContext) -> FormatDetectionConfidence:
+        context.require_file(f"{Kitti3dPath.PCD_DIR}/*.bin")
+        cls._check_ann_file(context.require_file(f"{Kitti3dPath.LABEL_DIR}/*.txt"), context)
+        return FormatDetectionConfidence.MEDIUM
+
+    @classmethod
+    def _check_ann_file(cls, fpath: str, context: FormatDetectionContext) -> bool:
+        with context.probe_text_file(
+            fpath, "Requirements for the annotation file of Kitti 3D format"
+        ) as fp:
+            for line in fp:
+                fields = line.rstrip("\n").split(" ")
+                if len(fields) == 15 or len(fields) == 16:
+                    return True
+                raise DatasetImportError(
+                    f"Kitti 3D format txt file should have 15 or 16 fields for "
+                    f"each line, but the read line has {len(fields)} fields: "
+                    f"fields={fields}."
+                )
+            raise DatasetImportError("Empty file is not allowed.")
+
+    @classmethod
+    def get_file_extensions(cls) -> List[str]:
+        return [cls._ANNO_EXT]
+
+    @classmethod
+    def find_sources(cls, path):
+        return [{"url": path, "format": "kitti3d"}]
diff --git a/src/datumaro/plugins/specs.json b/src/datumaro/plugins/specs.json
@@ -799,6 +799,21 @@
       ]
     }
   },
+  {
+    "import_path": "datumaro.plugins.data_formats.kitti_3d.base.Kitti3dBase",
+    "plugin_name": "kitti3d",
+    "plugin_type": "DatasetBase"
+  },
+  {
+    "import_path": "datumaro.plugins.data_formats.kitti_3d.importer.Kitti3dImporter",
+    "plugin_name": "kitti3d",
+    "plugin_type": "Importer",
+    "metadata": {
+      "file_extensions": [
+        ".txt"
+      ]
+    }
+  },
   {
     "import_path": "datumaro.plugins.data_formats.kitti_raw.base.KittiRawBase",
     "plugin_name": "kitti_raw",
@@ -1855,13 +1870,13 @@
     "plugin_type": "Transform"
   },
   {
-    "import_path": "datumaro.plugins.transforms.Correct",
-    "plugin_name": "correct",
+    "import_path": "datumaro.plugins.transforms.Clean",
+    "plugin_name": "clean",
     "plugin_type": "Transform"
   },
   {
-    "import_path": "datumaro.plugins.transforms.Clean",
-    "plugin_name": "clean",
+    "import_path": "datumaro.plugins.transforms.Correct",
+    "plugin_name": "correct",
     "plugin_type": "Transform"
   },
   {

diff --git a/tests/assets/kitti_dataset/kitti_3d/training/calib/000001.txt b/tests/assets/kitti_dataset/kitti_3d/training/calib/000001.txt
@@ -0,0 +1,7 @@
+P0: 7.215377000000e+02 0.000000000000e+00 6.095593000000e+02 0.000000000000e+00 0.000000000000e+00 7.215377000000e+02 1.728540000000e+02 0.000000000000e+00 0.000000000000e+00 0.000000000000e+00 1.000000000000e+00 0.000000000000e+00
+P1: 7.215377000000e+02 0.000000000000e+00 6.095593000000e+02 -3.875744000000e+02 0.000000000000e+00 7.215377000000e+02 1.728540000000e+02 0.000000000000e+00 0.000000000000e+00 0.000000000000e+00 1.000000000000e+00 0.000000000000e+00
+P2: 7.215377000000e+02 0.000000000000e+00 6.095593000000e+02 4.485728000000e+01 0.000000000000e+00 7.215377000000e+02 1.728540000000e+02 2.163791000000e-01 0.000000000000e+00 0.000000000000e+00 1.000000000000e+00 2.745884000000e-03
+P3: 7.215377000000e+02 0.000000000000e+00 6.095593000000e+02 -3.395242000000e+02 0.000000000000e+00 7.215377000000e+02 1.728540000000e+02 2.199936000000e+00 0.000000000000e+00 0.000000000000e+00 1.000000000000e+00 2.729905000000e-03
+R0_rect: 9.999239000000e-01 9.837760000000e-03 -7.445048000000e-03 -9.869795000000e-03 9.999421000000e-01 -4.278459000000e-03 7.402527000000e-03 4.351614000000e-03 9.999631000000e-01
+Tr_velo_to_cam: 7.533745000000e-03 -9.999714000000e-01 -6.166020000000e-04 -4.069766000000e-03 1.480249000000e-02 7.280733000000e-04 -9.998902000000e-01 -7.631618000000e-02 9.998621000000e-01 7.523790000000e-03 1.480755000000e-02 -2.717806000000e-01
+Tr_imu_to_velo: 9.999976000000e-01 7.553071000000e-04 -2.035826000000e-03 -8.086759000000e-01 -7.854027000000e-04 9.998898000000e-01 -1.482298000000e-02 3.195559000000e-01 2.024406000000e-03 1.482454000000e-02 9.998881000000e-01 -7.997231000000e-01
diff --git a/tests/assets/kitti_dataset/kitti_3d/training/image_2/000001.png b/tests/assets/kitti_dataset/kitti_3d/training/image_2/000001.png
diff --git a/tests/assets/kitti_dataset/kitti_3d/training/label_2/000001.txt b/tests/assets/kitti_dataset/kitti_3d/training/label_2/000001.txt
@@ -0,0 +1,3 @@
+Truck 0.00 0 -1.57 600 150 630 190 2.85 2.63 12.34 0.47 1.49 69.44 -1.56
+Car 0.00 3 -1.65 650 160 700 200 1.86 0.60 2.02 4.59 1.32 45.84 -1.55
+DontCare -1 -1 -10 500 170 590 190 -1 -1 -1 -1000 -1000 -1000 -10
diff --git a/tests/assets/kitti_dataset/kitti_3d/training/velodyne/000001.bin b/tests/assets/kitti_dataset/kitti_3d/training/velodyne/000001.bin
diff --git a/tests/assets/kitti_dataset/kitti_3d/training/velodyne/000002.bin b/tests/assets/kitti_dataset/kitti_3d/training/velodyne/000002.bin
diff --git a/tests/assets/kitti_dataset/kitti_3d/training/velodyne/000003.bin b/tests/assets/kitti_dataset/kitti_3d/training/velodyne/000003.bin
diff --git a/tests/unit/test_kitti_3d_format.py b/tests/unit/test_kitti_3d_format.py
@@ -0,0 +1,116 @@
+import os.path as osp
+from unittest import TestCase
+
+from datumaro.components.annotation import AnnotationType, Bbox, LabelCategories
+from datumaro.components.dataset_base import DatasetItem
+from datumaro.components.environment import Environment
+from datumaro.components.media import Image, PointCloud
+from datumaro.components.project import Dataset
+from datumaro.plugins.data_formats.kitti_3d.importer import Kitti3dImporter
+
+from tests.requirements import Requirements, mark_requirement
+from tests.utils.assets import get_test_asset_path
+from tests.utils.test_utils import compare_datasets_3d
+
+DUMMY_DATASET_DIR = get_test_asset_path("kitti_dataset", "kitti_3d", "training")
+
+
+class Kitti3DImporterTest(TestCase):
+    @mark_requirement(Requirements.DATUM_GENERAL_REQ)
+    def test_can_detect(self):
+        detected_formats = Environment().detect_dataset(DUMMY_DATASET_DIR)
+        self.assertEqual([Kitti3dImporter.NAME], detected_formats)
+
+    @mark_requirement(Requirements.DATUM_GENERAL_REQ)
+    def test_can_load(self):
+        """
+        <b>Description:</b>
+        Ensure that the dataset can be loaded correctly from the KITTI3D format.
+
+        <b>Expected results:</b>
+        The loaded dataset should have the same number of data items as the expected dataset.
+        The data items in the loaded dataset should have the same attributes and values as the expected data items.
+        The point clouds and images associated with the data items should be loaded correctly.
+
+        <b>Steps:</b>
+        1. Prepare an expected dataset with known data items, point clouds, images, and attributes.
+        2. Load the dataset from the KITTI3D format.
+        3. Compare the loaded dataset with the expected dataset.
+        """
+        pcd1 = osp.join(DUMMY_DATASET_DIR, "velodyne", "000001.bin")
+
+        image1 = Image.from_file(path=osp.join(DUMMY_DATASET_DIR, "image_2", "000001.png"))
+
+        expected_label_cat = LabelCategories(
+            attributes={"occluded", "truncated", "alpha", "dimensions", "location", "rotation_y"}
+        )
+        expected_label_cat.add("Truck")
+        expected_label_cat.add("Car")
+        expected_label_cat.add("DontCare")
+        expected_dataset = Dataset.from_iterable(
+            [
+                DatasetItem(
+                    id="000001",
+                    annotations=[
+                        Bbox(
+                            600,  # x1
+                            150,  # y1
+                            30,  # x2-x1
+                            40,  # y2-y1
+                            label=0,
+                            id=0,
+                            attributes={
+                                "truncated": 0.0,
+                                "occluded": 0,
+                                "alpha": -1.57,
+                                "dimensions": [2.85, 2.63, 12.34],
+                                "location": [0.47, 1.49, 69.44],
+                                "rotation_y": -1.56,
+                            },
+                            z_order=0,
+                        ),
+                        Bbox(
+                            650,  # x1
+                            160,  # y1
+                            50,  # x2-x1
+                            40,  # y2-y1
+                            label=1,
+                            id=1,
+                            attributes={
+                                "truncated": 0.0,
+                                "occluded": 3,
+                                "alpha": -1.65,
+                                "dimensions": [1.86, 0.6, 2.02],
+                                "location": [4.59, 1.32, 45.84],
+                                "rotation_y": -1.55,
+                            },
+                            z_order=0,
+                        ),
+                        Bbox(
+                            500,  # x1
+                            170,  # y1
+                            90,  # x2-x1
+                            20,  # y2-y1
+                            label=2,
+                            id=2,
+                            attributes={
+                                "truncated": -1.0,
+                                "occluded": -1,
+                                "alpha": -10.0,
+                                "dimensions": [-1.0, -1.0, -1.0],
+                                "location": [-1000.0, -1000.0, -1000.0],
+                                "rotation_y": -10.0,
+                            },
+                        ),
+                    ],
+                    media=PointCloud.from_file(path=pcd1, extra_images=[image1]),
+                    attributes={"calib_path": osp.join(DUMMY_DATASET_DIR, "calib", "000001.txt")},
+                ),
+            ],
+            categories={AnnotationType.label: expected_label_cat},
+            media_type=PointCloud,
+        )
+
+        parsed_dataset = Dataset.import_from(DUMMY_DATASET_DIR, "kitti3d")
+
+        compare_datasets_3d(self, expected_dataset, parsed_dataset, require_point_cloud=True)