openvinotoolkit · wonjuleee · Sep 25, 2024 · Sep 20, 2024 · Sep 20, 2024 · Sep 20, 2024
@@ -8,6 +8,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 ## \[Unreleased\]
 
 ### New features
+- Support KITTI 3D format
+  (<https://github.com/openvinotoolkit/datumaro/pull/1619>)
 
 ### Enhancements
 - Raise an appropriate error when exporting a datumaro dataset if its subset name contains path separators.

@@ -99,7 +99,7 @@ class KittiDetectionImporter(KittiImporter):
     @classmethod
     def detect(cls, context: FormatDetectionContext) -> FormatDetectionConfidence:
         # left color camera label files
-        context.require_file(f"**/label_2/*{cls._ANNO_EXT}")
+        context.require_file(f"**/label_2/*_*{cls._ANNO_EXT}")
         return cls.DETECT_CONFIDENCE
 
     @classmethod

@@ -0,0 +1,142 @@
+# Copyright (C) 2024 Intel Corporation
+#
+# SPDX-License-Identifier: MIT
+
+import glob
+import logging
+import os.path as osp
+from typing import List, Optional, Type, TypeVar
+
+from datumaro.components.annotation import AnnotationType, Bbox, LabelCategories
+from datumaro.components.dataset_base import DatasetItem, SubsetBase
+from datumaro.components.errors import InvalidAnnotationError
+from datumaro.components.importer import ImportContext
+from datumaro.components.media import Image, PointCloud
+from datumaro.util.image import find_images
+
+from .format import Kitti3dPath
+
+T = TypeVar("T")
+
+
+class Kitti3dBase(SubsetBase):
+    # https://www.cvlibs.net/datasets/kitti/eval_object.php?obj_benchmark=3d
+
+    def __init__(
+        self,
+        path: str,
+        *,
+        subset: Optional[str] = None,
+        ctx: Optional[ImportContext] = None,
+    ):
+        assert osp.isdir(path), path
+        super().__init__(subset=subset, media_type=PointCloud, ctx=ctx)
+
+        self._path = path
+
+        common_attrs = {"truncated", "occluded", "alpha", "dimensions", "location", "rotation_y"}
+        self._categories = {AnnotationType.label: LabelCategories(attributes=common_attrs)}
+        self._items = self._load_items()
+
+    def _load_items(self) -> List[DatasetItem]:
+        items = []
+        image_dir = osp.join(self._path, Kitti3dPath.IMAGE_DIR)
+        image_path_by_id = {
+            osp.splitext(osp.relpath(p, image_dir))[0]: p
+            for p in find_images(image_dir, recursive=True)
+        }
+
+        ann_dir = osp.join(self._path, Kitti3dPath.LABEL_DIR)
+        label_categories = self._categories[AnnotationType.label]
+
+        for labels_path in sorted(glob.glob(osp.join(ann_dir, "*.txt"), recursive=True)):
+            item_id = osp.splitext(osp.relpath(labels_path, ann_dir))[0]
+            anns = []
+
+            try:
+                with open(labels_path, "r", encoding="utf-8") as f:
+                    lines = f.readlines()
+            except IOError as e:
+                logging.error(f"Error reading file {labels_path}: {e}")
+                continue
+
+            for line_idx, line in enumerate(lines):
+                line = line.split()
+                if len(line) not in [15, 16]:
+                    logging.warning(
+                        f"Unexpected line length {len(line)} in file {labels_path} at line {line_idx + 1}"
+                    )
+                    continue
+
+                label_name = line[0]
+                label_id = label_categories.find(label_name)[0]
+                if label_id is None:
+                    label_id = label_categories.add(label_name)
+
+                try:
+                    x1 = self._parse_field(line[4], float, "bbox left-top x")
+                    y1 = self._parse_field(line[5], float, "bbox left-top y")
+                    x2 = self._parse_field(line[6], float, "bbox right-bottom x")
+                    y2 = self._parse_field(line[7], float, "bbox right-bottom y")
+
+                    attributes = {
+                        "truncated": self._parse_field(line[1], float, "truncated"),
+                        "occluded": self._parse_field(line[2], int, "occluded"),
+                        "alpha": self._parse_field(line[3], float, "alpha"),
+                        "dimensions": [
+                            self._parse_field(line[8], float, "height (in meters)"),
+                            self._parse_field(line[9], float, "width (in meters)"),
+                            self._parse_field(line[10], float, "length (in meters)"),
+                        ],
+                        "location": [
+                            self._parse_field(line[11], float, "x (in meters)"),
+                            self._parse_field(line[12], float, "y (in meters)"),
+                            self._parse_field(line[13], float, "z (in meters)"),
+                        ],
+                        "rotation_y": self._parse_field(line[14], float, "rotation_y"),
+                    }
+                except ValueError as e:
+                    logging.error(f"Error parsing line {line_idx + 1} in file {labels_path}: {e}")
+                    continue
+
+                anns.append(
+                    Bbox(
+                        x=x1,
+                        y=y1,
+                        w=x2 - x1,
+                        h=y2 - y1,
+                        id=line_idx,
+                        attributes=attributes,
+                        label=label_id,
+                    )
+                )
+                self._ann_types.add(AnnotationType.bbox)
+
+            image = image_path_by_id.pop(item_id, None)
+            if image:
+                image = Image.from_file(path=image)
+
+            items.append(
+                DatasetItem(
+                    id=item_id,
+                    subset=self._subset,
+                    media=PointCloud.from_file(
+                        path=osp.join(self._path, Kitti3dPath.PCD_DIR, item_id + ".bin"),
+                        extra_images=[image],
+                    ),
+                    attributes={
+                        "calib_path": osp.join(self._path, Kitti3dPath.CALIB_DIR, item_id + ".txt")
+                    },
+                    annotations=anns,
+                )
+            )
+
+        return items
+
+    def _parse_field(self, value: str, desired_type: Type[T], field_name: str) -> T:
+        try:
+            return desired_type(value)
+        except Exception as e:
+            raise InvalidAnnotationError(
+                f"Can't parse {field_name} from '{value}'. Expected {desired_type}"
+            ) from e
@@ -0,0 +1,12 @@
+# Copyright (C) 2024 Intel Corporation
+#
+# SPDX-License-Identifier: MIT
+
+import os.path as osp
+
+
+class Kitti3dPath:
+    PCD_DIR = osp.join("velodyne")
+    IMAGE_DIR = "image_2"
+    LABEL_DIR = "label_2"
+    CALIB_DIR = "calib"
@@ -0,0 +1,45 @@
+# Copyright (C) 2024 Intel Corporation
+#
+# SPDX-License-Identifier: MIT
+
+from typing import List
+
+from datumaro.components.errors import DatasetImportError
+from datumaro.components.format_detection import FormatDetectionConfidence, FormatDetectionContext
+from datumaro.components.importer import Importer
+
+from .format import Kitti3dPath
+
+
+class Kitti3dImporter(Importer):
+    _ANNO_EXT = ".txt"
+
+    @classmethod
+    def detect(cls, context: FormatDetectionContext) -> FormatDetectionConfidence:
+        context.require_file(f"{Kitti3dPath.PCD_DIR}/*.bin")
+        cls._check_ann_file(context.require_file(f"{Kitti3dPath.LABEL_DIR}/*.txt"), context)
+        return FormatDetectionConfidence.MEDIUM
+
+    @classmethod
+    def _check_ann_file(cls, fpath: str, context: FormatDetectionContext) -> bool:
+        with context.probe_text_file(
+            fpath, "Requirements for the annotation file of Kitti 3D format"
+        ) as fp:
+            for line in fp:
+                fields = line.rstrip("\n").split(" ")
+                if len(fields) == 15 or len(fields) == 16:
+                    return True
+                raise DatasetImportError(
+                    f"Kitti 3D format txt file should have 15 or 16 fields for "
+                    f"each line, but the read line has {len(fields)} fields: "
+                    f"fields={fields}."
+                )
+            raise DatasetImportError("Empty file is not allowed.")
+
+    @classmethod
+    def get_file_extensions(cls) -> List[str]:
+        return [cls._ANNO_EXT]
+
+    @classmethod
+    def find_sources(cls, path):
+        return [{"url": path, "format": "kitti3d"}]
@@ -1,13 +1,4 @@
 [
-  {
-    "import_path": "datumaro.plugins.accuracy_checker_plugin.ac_launcher.AcLauncher",
-    "plugin_name": "ac",
-    "plugin_type": "Launcher",
-    "extra_deps": [
-      "tensorflow",
-      "openvino.tools.accuracy_checker"
-    ]
-  },
   {
     "import_path": "datumaro.plugins.configurable_validator.ConfigurableValidator",
     "plugin_name": "configurable",
@@ -799,6 +790,21 @@
       ]
     }
   },
+  {
+    "import_path": "datumaro.plugins.data_formats.kitti_3d.base.Kitti3dBase",
+    "plugin_name": "kitti3d",
+    "plugin_type": "DatasetBase"
+  },
+  {
+    "import_path": "datumaro.plugins.data_formats.kitti_3d.importer.Kitti3dImporter",
+    "plugin_name": "kitti3d",
+    "plugin_type": "Importer",
+    "metadata": {
+      "file_extensions": [
+        ".txt"
+      ]
+    }
+  },
   {
     "import_path": "datumaro.plugins.data_formats.kitti_raw.base.KittiRawBase",
     "plugin_name": "kitti_raw",
@@ -1855,13 +1861,13 @@
     "plugin_type": "Transform"
   },
   {
-    "import_path": "datumaro.plugins.transforms.Correct",
-    "plugin_name": "correct",
+    "import_path": "datumaro.plugins.transforms.Clean",
+    "plugin_name": "clean",
     "plugin_type": "Transform"
   },
   {
-    "import_path": "datumaro.plugins.transforms.Clean",
-    "plugin_name": "clean",
+    "import_path": "datumaro.plugins.transforms.Correct",
+    "plugin_name": "correct",
     "plugin_type": "Transform"
   },
   {

@@ -0,0 +1,7 @@
+P0: 7.215377000000e+02 0.000000000000e+00 6.095593000000e+02 0.000000000000e+00 0.000000000000e+00 7.215377000000e+02 1.728540000000e+02 0.000000000000e+00 0.000000000000e+00 0.000000000000e+00 1.000000000000e+00 0.000000000000e+00
+P1: 7.215377000000e+02 0.000000000000e+00 6.095593000000e+02 -3.875744000000e+02 0.000000000000e+00 7.215377000000e+02 1.728540000000e+02 0.000000000000e+00 0.000000000000e+00 0.000000000000e+00 1.000000000000e+00 0.000000000000e+00
+P2: 7.215377000000e+02 0.000000000000e+00 6.095593000000e+02 4.485728000000e+01 0.000000000000e+00 7.215377000000e+02 1.728540000000e+02 2.163791000000e-01 0.000000000000e+00 0.000000000000e+00 1.000000000000e+00 2.745884000000e-03
+P3: 7.215377000000e+02 0.000000000000e+00 6.095593000000e+02 -3.395242000000e+02 0.000000000000e+00 7.215377000000e+02 1.728540000000e+02 2.199936000000e+00 0.000000000000e+00 0.000000000000e+00 1.000000000000e+00 2.729905000000e-03
+R0_rect: 9.999239000000e-01 9.837760000000e-03 -7.445048000000e-03 -9.869795000000e-03 9.999421000000e-01 -4.278459000000e-03 7.402527000000e-03 4.351614000000e-03 9.999631000000e-01
+Tr_velo_to_cam: 7.533745000000e-03 -9.999714000000e-01 -6.166020000000e-04 -4.069766000000e-03 1.480249000000e-02 7.280733000000e-04 -9.998902000000e-01 -7.631618000000e-02 9.998621000000e-01 7.523790000000e-03 1.480755000000e-02 -2.717806000000e-01
+Tr_imu_to_velo: 9.999976000000e-01 7.553071000000e-04 -2.035826000000e-03 -8.086759000000e-01 -7.854027000000e-04 9.998898000000e-01 -1.482298000000e-02 3.195559000000e-01 2.024406000000e-03 1.482454000000e-02 9.998881000000e-01 -7.997231000000e-01
@@ -0,0 +1,3 @@
+Truck 0.00 0 -1.57 600 150 630 190 2.85 2.63 12.34 0.47 1.49 69.44 -1.56
+Car 0.00 3 -1.65 650 160 700 200 1.86 0.60 2.02 4.59 1.32 45.84 -1.55
+DontCare -1 -1 -10 500 170 590 190 -1 -1 -1 -1000 -1000 -1000 -10