diff --git a/CHANGELOG.md b/CHANGELOG.md index 6591a9537c..b6aae7b444 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,6 +8,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## \[Unreleased\] ### New features +- Support KITTI 3D format + () - Add PseudoLabeling transform for unlabeled dataset () diff --git a/src/datumaro/plugins/data_formats/kitti/importer.py b/src/datumaro/plugins/data_formats/kitti/importer.py index 2880301901..c1e53fad0c 100644 --- a/src/datumaro/plugins/data_formats/kitti/importer.py +++ b/src/datumaro/plugins/data_formats/kitti/importer.py @@ -99,7 +99,7 @@ class KittiDetectionImporter(KittiImporter): @classmethod def detect(cls, context: FormatDetectionContext) -> FormatDetectionConfidence: # left color camera label files - context.require_file(f"**/label_2/*{cls._ANNO_EXT}") + context.require_file(f"**/label_2/*_*{cls._ANNO_EXT}") return cls.DETECT_CONFIDENCE @classmethod diff --git a/src/datumaro/plugins/data_formats/kitti_3d/__init__.py b/src/datumaro/plugins/data_formats/kitti_3d/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/src/datumaro/plugins/data_formats/kitti_3d/base.py b/src/datumaro/plugins/data_formats/kitti_3d/base.py new file mode 100644 index 0000000000..340792c14b --- /dev/null +++ b/src/datumaro/plugins/data_formats/kitti_3d/base.py @@ -0,0 +1,142 @@ +# Copyright (C) 2024 Intel Corporation +# +# SPDX-License-Identifier: MIT + +import glob +import logging +import os.path as osp +from typing import List, Optional, Type, TypeVar + +from datumaro.components.annotation import AnnotationType, Bbox, LabelCategories +from datumaro.components.dataset_base import DatasetItem, SubsetBase +from datumaro.components.errors import InvalidAnnotationError +from datumaro.components.importer import ImportContext +from datumaro.components.media import Image, PointCloud +from datumaro.util.image import find_images + +from .format import Kitti3dPath + +T = TypeVar("T") + + +class Kitti3dBase(SubsetBase): + # https://www.cvlibs.net/datasets/kitti/eval_object.php?obj_benchmark=3d + + def __init__( + self, + path: str, + *, + subset: Optional[str] = None, + ctx: Optional[ImportContext] = None, + ): + assert osp.isdir(path), path + super().__init__(subset=subset, media_type=PointCloud, ctx=ctx) + + self._path = path + + common_attrs = {"truncated", "occluded", "alpha", "dimensions", "location", "rotation_y"} + self._categories = {AnnotationType.label: LabelCategories(attributes=common_attrs)} + self._items = self._load_items() + + def _load_items(self) -> List[DatasetItem]: + items = [] + image_dir = osp.join(self._path, Kitti3dPath.IMAGE_DIR) + image_path_by_id = { + osp.splitext(osp.relpath(p, image_dir))[0]: p + for p in find_images(image_dir, recursive=True) + } + + ann_dir = osp.join(self._path, Kitti3dPath.LABEL_DIR) + label_categories = self._categories[AnnotationType.label] + + for labels_path in sorted(glob.glob(osp.join(ann_dir, "*.txt"), recursive=True)): + item_id = osp.splitext(osp.relpath(labels_path, ann_dir))[0] + anns = [] + + try: + with open(labels_path, "r", encoding="utf-8") as f: + lines = f.readlines() + except IOError as e: + logging.error(f"Error reading file {labels_path}: {e}") + continue + + for line_idx, line in enumerate(lines): + line = line.split() + if len(line) not in [15, 16]: + logging.warning( + f"Unexpected line length {len(line)} in file {labels_path} at line {line_idx + 1}" + ) + continue + + label_name = line[0] + label_id = label_categories.find(label_name)[0] + if label_id is None: + label_id = label_categories.add(label_name) + + try: + x1 = self._parse_field(line[4], float, "bbox left-top x") + y1 = self._parse_field(line[5], float, "bbox left-top y") + x2 = self._parse_field(line[6], float, "bbox right-bottom x") + y2 = self._parse_field(line[7], float, "bbox right-bottom y") + + attributes = { + "truncated": self._parse_field(line[1], float, "truncated"), + "occluded": self._parse_field(line[2], int, "occluded"), + "alpha": self._parse_field(line[3], float, "alpha"), + "dimensions": [ + self._parse_field(line[8], float, "height (in meters)"), + self._parse_field(line[9], float, "width (in meters)"), + self._parse_field(line[10], float, "length (in meters)"), + ], + "location": [ + self._parse_field(line[11], float, "x (in meters)"), + self._parse_field(line[12], float, "y (in meters)"), + self._parse_field(line[13], float, "z (in meters)"), + ], + "rotation_y": self._parse_field(line[14], float, "rotation_y"), + } + except ValueError as e: + logging.error(f"Error parsing line {line_idx + 1} in file {labels_path}: {e}") + continue + + anns.append( + Bbox( + x=x1, + y=y1, + w=x2 - x1, + h=y2 - y1, + id=line_idx, + attributes=attributes, + label=label_id, + ) + ) + self._ann_types.add(AnnotationType.bbox) + + image = image_path_by_id.pop(item_id, None) + if image: + image = Image.from_file(path=image) + + items.append( + DatasetItem( + id=item_id, + subset=self._subset, + media=PointCloud.from_file( + path=osp.join(self._path, Kitti3dPath.PCD_DIR, item_id + ".bin"), + extra_images=[image], + ), + attributes={ + "calib_path": osp.join(self._path, Kitti3dPath.CALIB_DIR, item_id + ".txt") + }, + annotations=anns, + ) + ) + + return items + + def _parse_field(self, value: str, desired_type: Type[T], field_name: str) -> T: + try: + return desired_type(value) + except Exception as e: + raise InvalidAnnotationError( + f"Can't parse {field_name} from '{value}'. Expected {desired_type}" + ) from e diff --git a/src/datumaro/plugins/data_formats/kitti_3d/format.py b/src/datumaro/plugins/data_formats/kitti_3d/format.py new file mode 100644 index 0000000000..98a883428d --- /dev/null +++ b/src/datumaro/plugins/data_formats/kitti_3d/format.py @@ -0,0 +1,12 @@ +# Copyright (C) 2024 Intel Corporation +# +# SPDX-License-Identifier: MIT + +import os.path as osp + + +class Kitti3dPath: + PCD_DIR = osp.join("velodyne") + IMAGE_DIR = "image_2" + LABEL_DIR = "label_2" + CALIB_DIR = "calib" diff --git a/src/datumaro/plugins/data_formats/kitti_3d/importer.py b/src/datumaro/plugins/data_formats/kitti_3d/importer.py new file mode 100644 index 0000000000..3be488b71f --- /dev/null +++ b/src/datumaro/plugins/data_formats/kitti_3d/importer.py @@ -0,0 +1,45 @@ +# Copyright (C) 2024 Intel Corporation +# +# SPDX-License-Identifier: MIT + +from typing import List + +from datumaro.components.errors import DatasetImportError +from datumaro.components.format_detection import FormatDetectionConfidence, FormatDetectionContext +from datumaro.components.importer import Importer + +from .format import Kitti3dPath + + +class Kitti3dImporter(Importer): + _ANNO_EXT = ".txt" + + @classmethod + def detect(cls, context: FormatDetectionContext) -> FormatDetectionConfidence: + context.require_file(f"{Kitti3dPath.PCD_DIR}/*.bin") + cls._check_ann_file(context.require_file(f"{Kitti3dPath.LABEL_DIR}/*.txt"), context) + return FormatDetectionConfidence.MEDIUM + + @classmethod + def _check_ann_file(cls, fpath: str, context: FormatDetectionContext) -> bool: + with context.probe_text_file( + fpath, "Requirements for the annotation file of Kitti 3D format" + ) as fp: + for line in fp: + fields = line.rstrip("\n").split(" ") + if len(fields) == 15 or len(fields) == 16: + return True + raise DatasetImportError( + f"Kitti 3D format txt file should have 15 or 16 fields for " + f"each line, but the read line has {len(fields)} fields: " + f"fields={fields}." + ) + raise DatasetImportError("Empty file is not allowed.") + + @classmethod + def get_file_extensions(cls) -> List[str]: + return [cls._ANNO_EXT] + + @classmethod + def find_sources(cls, path): + return [{"url": path, "format": "kitti3d"}] diff --git a/src/datumaro/plugins/specs.json b/src/datumaro/plugins/specs.json index 465b39e0db..395ff510b0 100644 --- a/src/datumaro/plugins/specs.json +++ b/src/datumaro/plugins/specs.json @@ -799,6 +799,21 @@ ] } }, + { + "import_path": "datumaro.plugins.data_formats.kitti_3d.base.Kitti3dBase", + "plugin_name": "kitti3d", + "plugin_type": "DatasetBase" + }, + { + "import_path": "datumaro.plugins.data_formats.kitti_3d.importer.Kitti3dImporter", + "plugin_name": "kitti3d", + "plugin_type": "Importer", + "metadata": { + "file_extensions": [ + ".txt" + ] + } + }, { "import_path": "datumaro.plugins.data_formats.kitti_raw.base.KittiRawBase", "plugin_name": "kitti_raw", @@ -1855,13 +1870,13 @@ "plugin_type": "Transform" }, { - "import_path": "datumaro.plugins.transforms.Correct", - "plugin_name": "correct", + "import_path": "datumaro.plugins.transforms.Clean", + "plugin_name": "clean", "plugin_type": "Transform" }, { - "import_path": "datumaro.plugins.transforms.Clean", - "plugin_name": "clean", + "import_path": "datumaro.plugins.transforms.Correct", + "plugin_name": "correct", "plugin_type": "Transform" }, { diff --git a/tests/assets/kitti_dataset/kitti_3d/training/calib/000001.txt b/tests/assets/kitti_dataset/kitti_3d/training/calib/000001.txt new file mode 100644 index 0000000000..367f0b263a --- /dev/null +++ b/tests/assets/kitti_dataset/kitti_3d/training/calib/000001.txt @@ -0,0 +1,7 @@ +P0: 7.215377000000e+02 0.000000000000e+00 6.095593000000e+02 0.000000000000e+00 0.000000000000e+00 7.215377000000e+02 1.728540000000e+02 0.000000000000e+00 0.000000000000e+00 0.000000000000e+00 1.000000000000e+00 0.000000000000e+00 +P1: 7.215377000000e+02 0.000000000000e+00 6.095593000000e+02 -3.875744000000e+02 0.000000000000e+00 7.215377000000e+02 1.728540000000e+02 0.000000000000e+00 0.000000000000e+00 0.000000000000e+00 1.000000000000e+00 0.000000000000e+00 +P2: 7.215377000000e+02 0.000000000000e+00 6.095593000000e+02 4.485728000000e+01 0.000000000000e+00 7.215377000000e+02 1.728540000000e+02 2.163791000000e-01 0.000000000000e+00 0.000000000000e+00 1.000000000000e+00 2.745884000000e-03 +P3: 7.215377000000e+02 0.000000000000e+00 6.095593000000e+02 -3.395242000000e+02 0.000000000000e+00 7.215377000000e+02 1.728540000000e+02 2.199936000000e+00 0.000000000000e+00 0.000000000000e+00 1.000000000000e+00 2.729905000000e-03 +R0_rect: 9.999239000000e-01 9.837760000000e-03 -7.445048000000e-03 -9.869795000000e-03 9.999421000000e-01 -4.278459000000e-03 7.402527000000e-03 4.351614000000e-03 9.999631000000e-01 +Tr_velo_to_cam: 7.533745000000e-03 -9.999714000000e-01 -6.166020000000e-04 -4.069766000000e-03 1.480249000000e-02 7.280733000000e-04 -9.998902000000e-01 -7.631618000000e-02 9.998621000000e-01 7.523790000000e-03 1.480755000000e-02 -2.717806000000e-01 +Tr_imu_to_velo: 9.999976000000e-01 7.553071000000e-04 -2.035826000000e-03 -8.086759000000e-01 -7.854027000000e-04 9.998898000000e-01 -1.482298000000e-02 3.195559000000e-01 2.024406000000e-03 1.482454000000e-02 9.998881000000e-01 -7.997231000000e-01 \ No newline at end of file diff --git a/tests/assets/kitti_dataset/kitti_3d/training/image_2/000001.png b/tests/assets/kitti_dataset/kitti_3d/training/image_2/000001.png new file mode 100644 index 0000000000..e6f3cff877 Binary files /dev/null and b/tests/assets/kitti_dataset/kitti_3d/training/image_2/000001.png differ diff --git a/tests/assets/kitti_dataset/kitti_3d/training/label_2/000001.txt b/tests/assets/kitti_dataset/kitti_3d/training/label_2/000001.txt new file mode 100644 index 0000000000..a2fdc0fa6f --- /dev/null +++ b/tests/assets/kitti_dataset/kitti_3d/training/label_2/000001.txt @@ -0,0 +1,3 @@ +Truck 0.00 0 -1.57 600 150 630 190 2.85 2.63 12.34 0.47 1.49 69.44 -1.56 +Car 0.00 3 -1.65 650 160 700 200 1.86 0.60 2.02 4.59 1.32 45.84 -1.55 +DontCare -1 -1 -10 500 170 590 190 -1 -1 -1 -1000 -1000 -1000 -10 diff --git a/tests/assets/kitti_dataset/kitti_3d/training/velodyne/000001.bin b/tests/assets/kitti_dataset/kitti_3d/training/velodyne/000001.bin new file mode 100644 index 0000000000..d6089802fb Binary files /dev/null and b/tests/assets/kitti_dataset/kitti_3d/training/velodyne/000001.bin differ diff --git a/tests/assets/kitti_dataset/kitti_3d/training/velodyne/000002.bin b/tests/assets/kitti_dataset/kitti_3d/training/velodyne/000002.bin new file mode 100644 index 0000000000..50a1df582a Binary files /dev/null and b/tests/assets/kitti_dataset/kitti_3d/training/velodyne/000002.bin differ diff --git a/tests/assets/kitti_dataset/kitti_3d/training/velodyne/000003.bin b/tests/assets/kitti_dataset/kitti_3d/training/velodyne/000003.bin new file mode 100644 index 0000000000..1eb847a044 Binary files /dev/null and b/tests/assets/kitti_dataset/kitti_3d/training/velodyne/000003.bin differ diff --git a/tests/unit/test_kitti_3d_format.py b/tests/unit/test_kitti_3d_format.py new file mode 100644 index 0000000000..ed4a8e6220 --- /dev/null +++ b/tests/unit/test_kitti_3d_format.py @@ -0,0 +1,116 @@ +import os.path as osp +from unittest import TestCase + +from datumaro.components.annotation import AnnotationType, Bbox, LabelCategories +from datumaro.components.dataset_base import DatasetItem +from datumaro.components.environment import Environment +from datumaro.components.media import Image, PointCloud +from datumaro.components.project import Dataset +from datumaro.plugins.data_formats.kitti_3d.importer import Kitti3dImporter + +from tests.requirements import Requirements, mark_requirement +from tests.utils.assets import get_test_asset_path +from tests.utils.test_utils import compare_datasets_3d + +DUMMY_DATASET_DIR = get_test_asset_path("kitti_dataset", "kitti_3d", "training") + + +class Kitti3DImporterTest(TestCase): + @mark_requirement(Requirements.DATUM_GENERAL_REQ) + def test_can_detect(self): + detected_formats = Environment().detect_dataset(DUMMY_DATASET_DIR) + self.assertEqual([Kitti3dImporter.NAME], detected_formats) + + @mark_requirement(Requirements.DATUM_GENERAL_REQ) + def test_can_load(self): + """ + Description: + Ensure that the dataset can be loaded correctly from the KITTI3D format. + + Expected results: + The loaded dataset should have the same number of data items as the expected dataset. + The data items in the loaded dataset should have the same attributes and values as the expected data items. + The point clouds and images associated with the data items should be loaded correctly. + + Steps: + 1. Prepare an expected dataset with known data items, point clouds, images, and attributes. + 2. Load the dataset from the KITTI3D format. + 3. Compare the loaded dataset with the expected dataset. + """ + pcd1 = osp.join(DUMMY_DATASET_DIR, "velodyne", "000001.bin") + + image1 = Image.from_file(path=osp.join(DUMMY_DATASET_DIR, "image_2", "000001.png")) + + expected_label_cat = LabelCategories( + attributes={"occluded", "truncated", "alpha", "dimensions", "location", "rotation_y"} + ) + expected_label_cat.add("Truck") + expected_label_cat.add("Car") + expected_label_cat.add("DontCare") + expected_dataset = Dataset.from_iterable( + [ + DatasetItem( + id="000001", + annotations=[ + Bbox( + 600, # x1 + 150, # y1 + 30, # x2-x1 + 40, # y2-y1 + label=0, + id=0, + attributes={ + "truncated": 0.0, + "occluded": 0, + "alpha": -1.57, + "dimensions": [2.85, 2.63, 12.34], + "location": [0.47, 1.49, 69.44], + "rotation_y": -1.56, + }, + z_order=0, + ), + Bbox( + 650, # x1 + 160, # y1 + 50, # x2-x1 + 40, # y2-y1 + label=1, + id=1, + attributes={ + "truncated": 0.0, + "occluded": 3, + "alpha": -1.65, + "dimensions": [1.86, 0.6, 2.02], + "location": [4.59, 1.32, 45.84], + "rotation_y": -1.55, + }, + z_order=0, + ), + Bbox( + 500, # x1 + 170, # y1 + 90, # x2-x1 + 20, # y2-y1 + label=2, + id=2, + attributes={ + "truncated": -1.0, + "occluded": -1, + "alpha": -10.0, + "dimensions": [-1.0, -1.0, -1.0], + "location": [-1000.0, -1000.0, -1000.0], + "rotation_y": -10.0, + }, + ), + ], + media=PointCloud.from_file(path=pcd1, extra_images=[image1]), + attributes={"calib_path": osp.join(DUMMY_DATASET_DIR, "calib", "000001.txt")}, + ), + ], + categories={AnnotationType.label: expected_label_cat}, + media_type=PointCloud, + ) + + parsed_dataset = Dataset.import_from(DUMMY_DATASET_DIR, "kitti3d") + + compare_datasets_3d(self, expected_dataset, parsed_dataset, require_point_cloud=True)