From 8311b810991426924d39e1924a8ed540d8a82981 Mon Sep 17 00:00:00 2001 From: relifest <3380383714@qq.com> Date: Tue, 12 Nov 2024 21:17:40 +0800 Subject: [PATCH 1/4] Modification and testing of stack_converter functionality and resolution of issues in the previous pull request --- pyproject.toml | 4 +- pytdml/convert_utils.py | 93 -------- pytdml/io/stac_converter.py | 120 ++++++++++ pytdml/type/basic_types.py | 6 +- pytdml/yaml_to_tdml.py | 5 - tests/data/UiT_HCD_California_2017.yml | 55 ----- .../{ => json}/UiT_HCD_California_2017.json | 2 + tests/data/{ => json}/WHU-building.json | 2 + tests/data/stac/catalog.json | 43 ++++ tests/data/stac/collection.json | 136 ++++++++++++ tests/data/stac/core-item.json | 125 +++++++++++ tests/data/stac/extended-item.json | 210 ++++++++++++++++++ tests/data/stac/simple-item.json | 81 +++++++ tests/data/yaml/UiT_HCD_California_2017.yml | 57 +++++ tests/test_tdml_io.py | 17 +- 15 files changed, 797 insertions(+), 159 deletions(-) create mode 100644 pytdml/io/stac_converter.py delete mode 100644 tests/data/UiT_HCD_California_2017.yml rename tests/data/{ => json}/UiT_HCD_California_2017.json (96%) rename tests/data/{ => json}/WHU-building.json (96%) create mode 100644 tests/data/stac/catalog.json create mode 100644 tests/data/stac/collection.json create mode 100644 tests/data/stac/core-item.json create mode 100644 tests/data/stac/extended-item.json create mode 100644 tests/data/stac/simple-item.json create mode 100644 tests/data/yaml/UiT_HCD_California_2017.yml diff --git a/pyproject.toml b/pyproject.toml index 515a48b..96f9e32 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -10,7 +10,6 @@ dependencies = [ "Pillow~=10.4.0", "PyYAML~=6.0", "numpy~=1.26.4", - "pandas~=2.2.2", "opencv-python~=4.10.0.84", "tensorflow~=2.17.0", "jsonschema~=4.23.0", @@ -21,7 +20,8 @@ dependencies = [ "matplotlib~=3.9.1", "minio~=7.2.7", "tqdm~=4.66.4", - "s3fs~=2024.9.0" + "s3fs~=2024.9.0", + "pystac~=1.11.0" ] [project.optional-dependencies] diff --git a/pytdml/convert_utils.py b/pytdml/convert_utils.py index cc43e81..17295af 100644 --- a/pytdml/convert_utils.py +++ b/pytdml/convert_utils.py @@ -152,96 +152,3 @@ def convert_coco_to_tdml(coco_dataset_path, output_json_path): write_to_json(dataset, output_json_path) -def convert_stac_to_tdml(stac_dataset_path, output_json_path): - # Reads JSON data in stac format from a given path. - - with open(stac_dataset_path, 'r') as stac_file: - stac_collection_dataset = json.load(stac_file) - # start of timer - start_time = time.time() - dataset_id = stac_collection_dataset.get("id") - dataset_description = stac_collection_dataset.get("description") - dataset_name = stac_collection_dataset.get("title") - dataset_version = stac_collection_dataset.get("version") - - keywords = stac_collection_dataset.get("keywords") - license_str = stac_collection_dataset.get("license") - extents = stac_collection_dataset.get("extent") - - extent = extents.get("spatial").get("bbox")[0] - print(extent, type(extent)) - - providers = [item["name"] for item in stac_collection_dataset.get("providers")] - - created_time = extents.get("temporal").get("interval")[0][0][:-1] - updated_time = extents.get("temporal").get("interval")[0][1] - datas = [item for item in stac_collection_dataset.get("links") if item["rel"] == "item"] - amount_of_training_data = len(datas) - td_list = [] - - task_name = "" - - for data in datas: - item_path = data.get("href") - with open(item_path, 'r') as itemfile: - stac_item = json.load(itemfile) - properties = stac_item.get("properties") - assets = stac_item.get("assets") - task_type = properties.get("label:tasks")[0] # list - label_classes = properties.get("label:classes") # list - label_methods = properties.get("label:methods") # list - item_extent = stac_item.get("bbox") - label_path = assets["labels"].get("href") - label_type = assets["labels"].get("type") - item_id = stac_item.get("id") - img_path = assets["raster"].get("href") - data_url = [] - - if task_type == "segmentation": - task_name = "semantic segmentation" - data_url.append(img_path) - label_url = label_path - image_type = label_type - labels = [AI_PixelLabel(confidence=1.0,type="AI_PixelLabel",image_URL=[label_url],image_format=[image_type])] - td_list.append( - AI_EOTrainingData(id=item_id,type="AI_EOTrainingData",training_type="Train", dataset_id=dataset_id,number_of_labels=1,labels=labels,extent=item_extent, - data_URL=data_url)) - - - for class_dict in label_classes: - class_dict['value'] = class_dict.pop('classes') - - tasks = [AI_EOTask(task_type=task_name, - id=str(dataset_id) + "_task", - dataset_id= str(dataset_id), - type='AI_EOTask')] - - # end of timer - end_time = time.time() - # Calculation of total and average time - total_time = end_time - start_time - average_time = total_time / amount_of_training_data - print(f"Total time for {amount_of_training_data} training instances: {total_time:.5f} seconds") - print(f"Average time per training instance: {average_time * 60:.5f} ms") - - dataset = EOTrainingDataset( - id=str(dataset_id), - name=dataset_name, - type="AI_EOTrainingDataset", - description=dataset_description, - tasks=tasks, - version=dataset_version, - amount_of_training_data=amount_of_training_data, - created_time=created_time, - updated_time=updated_time, - providers=providers, - keywords=keywords, - classes=label_classes, - number_of_classes=len(label_classes), - license=license_str, - data=td_list, - extent=extent - ) - # write to json - write_to_json(dataset, output_json_path) - diff --git a/pytdml/io/stac_converter.py b/pytdml/io/stac_converter.py new file mode 100644 index 0000000..13b352d --- /dev/null +++ b/pytdml/io/stac_converter.py @@ -0,0 +1,120 @@ +# ------------------------------------------------------------------------------ +# ------------------------------------------------------------------------------ +# +# Project: pytdml +# Authors: Boyi Shangguan, Kaixuan Wang, Zhaoyan Wu +# Created: 2022-05-04 +# Modified: 2023-10-27 +# Email: sgby@whu.edu.cn +# +# ------------------------------------------------------------------------------ +# +# Copyright (c) 2022 OGC Training Data Markup Language for AI Standard Working Group +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. +# +# ------------------------------------------------------------------------------ + +import json +import re +from datetime import datetime +from geojson import Feature +from pystac import Collection +from pytdml.type import EOTrainingDataset, AI_EOTrainingData, AI_ObjectLabel, AI_EOTask + + +def convert_stac_to_tdml(stac_dataset_path): + # Reads JSON data in stac format from a given path. + with open(stac_dataset_path, 'r') as file: + collection_data = json.load(file) + collection_object = Collection.from_dict(collection_data) + stac_collection_dataset = collection_object.to_dict(include_self_link=False, transform_hrefs=True) + + # Reads the necessary attributes from the Collection object and maps them to the EOTrainingDataset object + collection_version = stac_collection_dataset.get("stac_version") + collection_id = stac_collection_dataset.get("id") + collection_description = stac_collection_dataset.get("description") + collection_license = stac_collection_dataset.get("license") + collection_bbox = stac_collection_dataset.get("extent").get("spatial").get("bbox") + collection_interval = stac_collection_dataset.get("extent").get("temporal").get("interval") + data_time = [] + for item in collection_interval: + for time in item: + cleaned_date_time_str = re.sub(r"(\\+00:00|Z)$", "", time) + date_time_obj = datetime.strptime(cleaned_date_time_str, "%Y-%m-%dT%H:%M:%S.%f") + formatted_date_time_str = date_time_obj.strftime("%Y-%m-%dT%H:%M:%S") + data_time.append(formatted_date_time_str) + + if len(collection_bbox) == 1: + collection_extent = collection_bbox[0] + else: + collection_extent = [item for bbox in collection_bbox for item in bbox] + + # Reads the necessary attributes from the item object and maps them to the data object + collection_links = stac_collection_dataset.get("links") + collection_filtered_links = [link for link in collection_links if link.get("rel") == "item"] + + datalist = [] + for link in collection_filtered_links: + item_path = link.get("href") + with open(item_path, 'r') as item_file: + stac_item = json.load(item_file) + link_id = stac_item.get("id") + link_rel = link.get("rel") + feature = Feature(**stac_item) + link_href = [asset['href'] for asset in stac_item.get("assets").values()] + + label = AI_ObjectLabel( + type = "AI_ObjectLabel", + object = feature, + label_class = link_rel + ) + + data = AI_EOTrainingData( + type = "AI_EOTrainingData", + id = link_id, + labels = [label], + data_URL = link_href, + data_time = data_time + ) + datalist.append(data) + + # Reads the unnecessary attributes from the Collection object and maps them to the EOTrainingDataset object + collection_name = stac_collection_dataset.get("title") + + tasks = [AI_EOTask(task_type="STAC", + id=str(collection_id) + "_task", + dataset_id= str(collection_id), + type='AI_EOTask')] + + dataset = EOTrainingDataset( + # necessary attributes + id = str(collection_id), + name = collection_name, + description = collection_description, + license = collection_license, + tasks = tasks, + data = datalist, + type="AI_EOTrainingDataset", + # unnecessary attributes + version = collection_version, + extent = collection_extent + ) + + return dataset diff --git a/pytdml/type/basic_types.py b/pytdml/type/basic_types.py index 5a581e4..9e85b4a 100644 --- a/pytdml/type/basic_types.py +++ b/pytdml/type/basic_types.py @@ -1,4 +1,5 @@ # ------------------------------------------------------------------------------ +# ------------------------------------------------------------------------------ # # Project: pytdml # Authors: Boyi Shangguan, Kaixuan Wang, Zhaoyan Wu @@ -1289,8 +1290,8 @@ class AI_Label(BaseCamelModel): type: Literal["AI_AbstractLabel"] - is_negative: Optional[bool] = None - confidence: Optional[float] = Field(None, ge=0.0, le=1.0) + is_negative: Optional[bool] = Field(False) + confidence: Optional[float] = Field(1.0, ge=0.0, le=1.0) def to_dict(self): return self.model_dump(by_alias=True, exclude_none=True) @@ -1329,7 +1330,6 @@ def to_dict(self): def from_dict(json_dict): from pytdml.type.extended_types import AI_PixelLabel, AI_ObjectLabel, AI_SceneLabel new_dict = copy.deepcopy(json_dict) - new_dict = copy.deepcopy(json_dict) if new_dict.__contains__('labels'): labels = new_dict['labels'] for i in range(len(labels)): diff --git a/pytdml/yaml_to_tdml.py b/pytdml/yaml_to_tdml.py index e34a553..8015b23 100644 --- a/pytdml/yaml_to_tdml.py +++ b/pytdml/yaml_to_tdml.py @@ -321,8 +321,3 @@ def main(): training_datasets = yaml_to_tdml(yaml_path) if training_datasets: write_to_json(training_datasets, json_path) - - -if __name__ == '__main__': - result = yaml_to_eo_tdml("D:\\Project\\pyTDML3\\pytdml\\pytdml\\type\\UiT_HCD_California_2017.yml") - print(result.to_dict()) diff --git a/tests/data/UiT_HCD_California_2017.yml b/tests/data/UiT_HCD_California_2017.yml deleted file mode 100644 index f991cc8..0000000 --- a/tests/data/UiT_HCD_California_2017.yml +++ /dev/null @@ -1,55 +0,0 @@ ---- -type: AI_EOTrainingDataset -id: uit_hcd_california_2017 -name: UiT HCD California 2017 -description: This dataset is composed of two images and a label image. -license: CC BY-SA 4.0 -version: '1.0' -amountOfTrainingData: 1 -createdTime: '2017-01-01' -providers: - - LP DAAC - - ESA -classes: - - key: change - value: 1 - - key: unchanged - value: 0 -numberOfClasses: 2 -bands: - - name: - - code: red - - name: - - code: green - - name: - - code: blue - - name: - - code: VH - - name: - - code: VV - - name: - - code: VV/VH -imageSize: 2000x3500 -tasks: - - type: AI_EOTask - id: uit_hcd_california_2017-task - description: Multi-source images change detection - taskType: http://demo#change_detection -data: - - type: AI_EOTrainingData - id: '0' - dataTime: - - '2017-01-05' - - '2017-02-18' - dataURL: - - t1_L8.png - - t2_SAR.png - dataSources: - - title: Landsat-8 - numberOfLabels: 1 - labels: - - type: AI_PixelLabel - imageURL: - - change_label.png - imageFormat: - - image/png \ No newline at end of file diff --git a/tests/data/UiT_HCD_California_2017.json b/tests/data/json/UiT_HCD_California_2017.json similarity index 96% rename from tests/data/UiT_HCD_California_2017.json rename to tests/data/json/UiT_HCD_California_2017.json index ed0e9b9..e0e9ac7 100644 --- a/tests/data/UiT_HCD_California_2017.json +++ b/tests/data/json/UiT_HCD_California_2017.json @@ -99,6 +99,8 @@ "imageURL": [ "change_label.png" ], + "confidence": 1.0, + "isNegative": false, "imageFormat": [ "image/png" ] diff --git a/tests/data/WHU-building.json b/tests/data/json/WHU-building.json similarity index 96% rename from tests/data/WHU-building.json rename to tests/data/json/WHU-building.json index 26326f6..a82932e 100644 --- a/tests/data/WHU-building.json +++ b/tests/data/json/WHU-building.json @@ -77,6 +77,8 @@ "labels": [ { "type": "AI_PixelLabel", + "confidence": 1.0, + "isNegative": false, "imageURL": [ "change_label/change_label.tif" ], diff --git a/tests/data/stac/catalog.json b/tests/data/stac/catalog.json new file mode 100644 index 0000000..a9fe3ef --- /dev/null +++ b/tests/data/stac/catalog.json @@ -0,0 +1,43 @@ +{ + "id": "examples", + "type": "Catalog", + "title": "Example Catalog", + "stac_version": "1.1.0", + "description": "This catalog is a simple demonstration of an example catalog that is used to organize a hierarchy of collections and their items.", + "links": [ + { + "rel": "root", + "href": "./catalog.json", + "type": "application/json" + }, + { + "rel": "child", + "href": "./extensions-collection/collection.json", + "type": "application/json", + "title": "Collection Demonstrating STAC Extensions" + }, + { + "rel": "child", + "href": "./collection-only/collection.json", + "type": "application/json", + "title": "Collection with no items (standalone)" + }, + { + "rel": "child", + "href": "./collection-only/collection-with-schemas.json", + "type": "application/json", + "title": "Collection with no items (standalone with JSON Schemas)" + }, + { + "rel": "item", + "href": "./collectionless-item.json", + "type": "application/json", + "title": "Item that does not have a collection (not recommended, but allowed by the spec)" + }, + { + "rel": "self", + "href": "https://raw.githubusercontent.com/radiantearth/stac-spec/v1.1.0/examples/catalog.json", + "type": "application/json" + } + ] +} diff --git a/tests/data/stac/collection.json b/tests/data/stac/collection.json new file mode 100644 index 0000000..8deee64 --- /dev/null +++ b/tests/data/stac/collection.json @@ -0,0 +1,136 @@ +{ + "id": "simple-collection", + "type": "Collection", + "stac_extensions": [ + "https://stac-extensions.github.io/eo/v2.0.0/schema.json", + "https://stac-extensions.github.io/projection/v2.0.0/schema.json", + "https://stac-extensions.github.io/view/v1.0.0/schema.json" + ], + "stac_version": "1.1.0", + "description": "A simple collection demonstrating core catalog fields with links to a couple of items", + "title": "Simple Example Collection", + "keywords": [ + "simple", + "example", + "collection" + ], + "providers": [ + { + "name": "Remote Data, Inc", + "description": "Producers of awesome spatiotemporal assets", + "roles": [ + "producer", + "processor" + ], + "url": "http://remotedata.io" + } + ], + "extent": { + "spatial": { + "bbox": [ + [ + 172.91173669923782, + 1.3438851951615003, + 172.95469614953714, + 1.3690476620161975 + ] + ] + }, + "temporal": { + "interval": [ + [ + "2020-12-11T22:38:32.125Z", + "2020-12-14T18:02:31.437Z" + ] + ] + } + }, + "license": "CC-BY-4.0", + "summaries": { + "platform": [ + "cool_sat1", + "cool_sat2" + ], + "constellation": [ + "ion" + ], + "instruments": [ + "cool_sensor_v1", + "cool_sensor_v2" + ], + "gsd": { + "minimum": 0.512, + "maximum": 0.66 + }, + "eo:cloud_cover": { + "minimum": 1.2, + "maximum": 1.2 + }, + "proj:cpde": [ + "EPSG:32659" + ], + "view:sun_elevation": { + "minimum": 54.9, + "maximum": 54.9 + }, + "view:off_nadir": { + "minimum": 3.8, + "maximum": 3.8 + }, + "view:sun_azimuth": { + "minimum": 135.7, + "maximum": 135.7 + }, + "statistics": { + "type": "object", + "properties": { + "vegetation": { + "description": "Percentage of pixels that are detected as vegetation, e.g. forests, grasslands, etc.", + "minimum": 0, + "maximum": 100 + }, + "water": { + "description": "Percentage of pixels that are detected as water, e.g. rivers, oceans and ponds.", + "minimum": 0, + "maximum": 100 + }, + "urban": { + "description": "Percentage of pixels that detected as urban, e.g. roads and buildings.", + "minimum": 0, + "maximum": 100 + } + } + } + }, + "links": [ + { + "rel": "root", + "href": "tests/data/stac/collection.json", + "type": "application/json", + "title": "Simple Example Collection" + }, + { + "rel": "item", + "href": "tests/data/stac/simple-item.json", + "type": "application/geo+json", + "title": "Simple Item" + }, + { + "rel": "item", + "href": "tests/data/stac/core-item.json", + "type": "application/geo+json", + "title": "Core Item" + }, + { + "rel": "item", + "href": "tests/data/stac/extended-item.json", + "type": "application/geo+json", + "title": "Extended Item" + }, + { + "rel": "self", + "href": "https://raw.githubusercontent.com/radiantearth/stac-spec/v1.1.0/examples/collection.json", + "type": "application/json" + } + ] +} diff --git a/tests/data/stac/core-item.json b/tests/data/stac/core-item.json new file mode 100644 index 0000000..e151b13 --- /dev/null +++ b/tests/data/stac/core-item.json @@ -0,0 +1,125 @@ +{ + "stac_version": "1.1.0", + "stac_extensions": [], + "type": "Feature", + "id": "20201211_223832_CS2", + "bbox": [ + 172.91173669923782, + 1.3438851951615003, + 172.95469614953714, + 1.3690476620161975 + ], + "geometry": { + "type": "Polygon", + "coordinates": [ + [ + [ + 172.91173669923782, + 1.3438851951615003 + ], + [ + 172.95469614953714, + 1.3438851951615003 + ], + [ + 172.95469614953714, + 1.3690476620161975 + ], + [ + 172.91173669923782, + 1.3690476620161975 + ], + [ + 172.91173669923782, + 1.3438851951615003 + ] + ] + ] + }, + "properties": { + "title": "Core Item", + "description": "A sample STAC Item that includes examples of all common metadata", + "datetime": null, + "start_datetime": "2020-12-11T22:38:32.125Z", + "end_datetime": "2020-12-11T22:38:32.327Z", + "created": "2020-12-12T01:48:13.725Z", + "updated": "2020-12-12T01:48:13.725Z", + "platform": "cool_sat1", + "instruments": [ + "cool_sensor_v1" + ], + "constellation": "ion", + "mission": "collection 5624", + "gsd": 0.512 + }, + "collection": "simple-collection", + "links": [ + { + "rel": "collection", + "href": "./collection.json", + "type": "application/json", + "title": "Simple Example Collection" + }, + { + "rel": "root", + "href": "./collection.json", + "type": "application/json", + "title": "Simple Example Collection" + }, + { + "rel": "parent", + "href": "./collection.json", + "type": "application/json", + "title": "Simple Example Collection" + }, + { + "rel": "alternate", + "type": "text/html", + "href": "http://remotedata.io/catalog/20201211_223832_CS2/index.html", + "title": "HTML version of this STAC Item" + } + ], + "assets": { + "analytic": { + "href": "https://storage.googleapis.com/open-cogs/stac-examples/20201211_223832_CS2_analytic.tif", + "type": "image/tiff; application=geotiff; profile=cloud-optimized", + "title": "4-Band Analytic", + "roles": [ + "data" + ] + }, + "thumbnail": { + "href": "https://storage.googleapis.com/open-cogs/stac-examples/20201211_223832_CS2.jpg", + "title": "Thumbnail", + "type": "image/png", + "roles": [ + "thumbnail" + ] + }, + "visual": { + "href": "https://storage.googleapis.com/open-cogs/stac-examples/20201211_223832_CS2.tif", + "type": "image/tiff; application=geotiff; profile=cloud-optimized", + "title": "3-Band Visual", + "roles": [ + "visual" + ] + }, + "udm": { + "href": "https://storage.googleapis.com/open-cogs/stac-examples/20201211_223832_CS2_analytic_udm.tif", + "title": "Unusable Data Mask", + "type": "image/tiff; application=geotiff" + }, + "json-metadata": { + "href": "http://remotedata.io/catalog/20201211_223832_CS2/extended-metadata.json", + "title": "Extended Metadata", + "type": "application/json", + "roles": [ + "metadata" + ] + }, + "ephemeris": { + "href": "http://cool-sat.com/catalog/20201211_223832_CS2/20201211_223832_CS2.EPH", + "title": "Satellite Ephemeris Metadata" + } + } +} diff --git a/tests/data/stac/extended-item.json b/tests/data/stac/extended-item.json new file mode 100644 index 0000000..b5f3a0a --- /dev/null +++ b/tests/data/stac/extended-item.json @@ -0,0 +1,210 @@ +{ + "stac_version": "1.1.0", + "stac_extensions": [ + "https://stac-extensions.github.io/eo/v2.0.0/schema.json", + "https://stac-extensions.github.io/projection/v2.0.0/schema.json", + "https://stac-extensions.github.io/scientific/v1.0.0/schema.json", + "https://stac-extensions.github.io/view/v1.0.0/schema.json", + "https://stac-extensions.github.io/remote-data/v1.0.0/schema.json" + ], + "type": "Feature", + "id": "20201211_223832_CS2", + "bbox": [ + 172.91173669923782, + 1.3438851951615003, + 172.95469614953714, + 1.3690476620161975 + ], + "geometry": { + "type": "Polygon", + "coordinates": [ + [ + [ + 172.91173669923782, + 1.3438851951615003 + ], + [ + 172.95469614953714, + 1.3438851951615003 + ], + [ + 172.95469614953714, + 1.3690476620161975 + ], + [ + 172.91173669923782, + 1.3690476620161975 + ], + [ + 172.91173669923782, + 1.3438851951615003 + ] + ] + ] + }, + "properties": { + "title": "Extended Item", + "description": "A sample STAC Item that includes a variety of examples from the stable extensions", + "keywords": [ + "extended", + "example", + "item" + ], + "datetime": "2020-12-14T18:02:31.437000Z", + "created": "2020-12-15T01:48:13.725Z", + "updated": "2020-12-15T01:48:13.725Z", + "platform": "cool_sat2", + "instruments": [ + "cool_sensor_v2" + ], + "gsd": 0.66, + "eo:cloud_cover": 1.2, + "eo:snow_cover": 0, + "statistics": { + "vegetation": 12.57, + "water": 1.23, + "urban": 26.2 + }, + "proj:code": "EPSG:32659", + "proj:shape": [ + 5558, + 9559 + ], + "proj:transform": [ + 0.5, + 0, + 712710, + 0, + -0.5, + 151406, + 0, + 0, + 1 + ], + "view:sun_elevation": 54.9, + "view:off_nadir": 3.8, + "view:sun_azimuth": 135.7, + "rd:type": "scene", + "rd:anomalous_pixels": 0.14, + "rd:earth_sun_distance": 1.014156, + "rd:sat_id": "cool_sat2", + "rd:product_level": "LV3A", + "sci:doi": "10.5061/dryad.s2v81.2/27.2" + }, + "collection": "simple-collection", + "links": [ + { + "rel": "collection", + "href": "./collection.json", + "type": "application/json", + "title": "Simple Example Collection" + }, + { + "rel": "root", + "href": "./collection.json", + "type": "application/json", + "title": "Simple Example Collection" + }, + { + "rel": "parent", + "href": "./collection.json", + "type": "application/json", + "title": "Simple Example Collection" + }, + { + "rel": "alternate", + "type": "text/html", + "href": "http://remotedata.io/catalog/20201211_223832_CS2/index.html", + "title": "HTML version of this STAC Item" + } + ], + "assets": { + "analytic": { + "href": "https://storage.googleapis.com/open-cogs/stac-examples/20201211_223832_CS2_analytic.tif", + "type": "image/tiff; application=geotiff; profile=cloud-optimized", + "title": "4-Band Analytic", + "roles": [ + "data" + ], + "bands": [ + { + "name": "band1", + "eo:common_name": "blue", + "eo:center_wavelength": 0.47, + "eo:full_width_half_max": 70 + }, + { + "name": "band2", + "eo:common_name": "green", + "eo:center_wavelength": 0.56, + "eo:full_width_half_max": 80 + }, + { + "name": "band3", + "eo:common_name": "red", + "eo:center_wavelength": 0.645, + "eo:full_width_half_max": 90 + }, + { + "name": "band4", + "eo:common_name": "nir", + "eo:center_wavelength": 0.8, + "eo:full_width_half_max": 152 + } + ] + }, + "thumbnail": { + "href": "https://storage.googleapis.com/open-cogs/stac-examples/20201211_223832_CS2.jpg", + "title": "Thumbnail", + "type": "image/png", + "roles": [ + "thumbnail" + ] + }, + "visual": { + "href": "https://storage.googleapis.com/open-cogs/stac-examples/20201211_223832_CS2.tif", + "type": "image/tiff; application=geotiff; profile=cloud-optimized", + "title": "3-Band Visual", + "roles": [ + "visual" + ], + "bands": [ + { + "name": "band3", + "eo:common_name": "red", + "eo:center_wavelength": 0.645, + "eo:full_width_half_max": 90 + }, + { + "name": "band2", + "eo:common_name": "green", + "eo:center_wavelength": 0.56, + "eo:full_width_half_max": 80 + }, + { + "name": "band1", + "eo:common_name": "blue", + "eo:center_wavelength": 0.47, + "eo:full_width_half_max": 70 + } + ] + }, + "udm": { + "href": "https://storage.googleapis.com/open-cogs/stac-examples/20201211_223832_CS2_analytic_udm.tif", + "title": "Unusable Data Mask", + "type": "image/tiff; application=geotiff" + }, + "json-metadata": { + "href": "http://remotedata.io/catalog/20201211_223832_CS2/extended-metadata.json", + "title": "Extended Metadata", + "type": "application/json", + "roles": [ + "metadata" + ] + }, + "ephemeris": { + "href": "http://cool-sat.com/catalog/20201211_223832_CS2/20201211_223832_CS2.EPH", + "title": "Satellite Ephemeris Metadata" + } + } +} \ No newline at end of file diff --git a/tests/data/stac/simple-item.json b/tests/data/stac/simple-item.json new file mode 100644 index 0000000..449352e --- /dev/null +++ b/tests/data/stac/simple-item.json @@ -0,0 +1,81 @@ +{ + "stac_version": "1.1.0", + "stac_extensions": [], + "type": "Feature", + "id": "20201211_223832_CS2", + "bbox": [ + 172.91173669923782, + 1.3438851951615003, + 172.95469614953714, + 1.3690476620161975 + ], + "geometry": { + "type": "Polygon", + "coordinates": [ + [ + [ + 172.91173669923782, + 1.3438851951615003 + ], + [ + 172.95469614953714, + 1.3438851951615003 + ], + [ + 172.95469614953714, + 1.3690476620161975 + ], + [ + 172.91173669923782, + 1.3690476620161975 + ], + [ + 172.91173669923782, + 1.3438851951615003 + ] + ] + ] + }, + "properties": { + "datetime": "2020-12-11T22:38:32.125000Z" + }, + "collection": "simple-collection", + "links": [ + { + "rel": "collection", + "href": "./collection.json", + "type": "application/json", + "title": "Simple Example Collection" + }, + { + "rel": "root", + "href": "./collection.json", + "type": "application/json", + "title": "Simple Example Collection" + }, + { + "rel": "parent", + "href": "./collection.json", + "type": "application/json", + "title": "Simple Example Collection" + } + ], + "assets": { + "visual": { + "href": "https://storage.googleapis.com/open-cogs/stac-examples/20201211_223832_CS2.tif", + "type": "image/tiff; application=geotiff; profile=cloud-optimized", + "title": "3-Band Visual", + "roles": [ + "visual" + ] + }, + "thumbnail": { + "href": "https://storage.googleapis.com/open-cogs/stac-examples/20201211_223832_CS2.jpg", + "title": "Thumbnail", + "type": "image/jpeg", + "roles": [ + "thumbnail" + ] + } + } +} diff --git a/tests/data/yaml/UiT_HCD_California_2017.yml b/tests/data/yaml/UiT_HCD_California_2017.yml new file mode 100644 index 0000000..d661dba --- /dev/null +++ b/tests/data/yaml/UiT_HCD_California_2017.yml @@ -0,0 +1,57 @@ +--- +type: AI_EOTrainingDataset +id: uit_hcd_california_2017 +name: UiT HCD California 2017 +description: This dataset is composed of two images and a label image. +license: CC BY-SA 4.0 +version: '1.0' +amountOfTrainingData: 1 +createdTime: '2017-01-01' +providers: +- LP DAAC +- ESA +classes: +- key: change + value: 1 +- key: unchanged + value: 0 +numberOfClasses: 2 +bands: +- name: + - code: red +- name: + - code: green +- name: + - code: blue +- name: + - code: VH +- name: + - code: VV +- name: + - code: VV/VH +imageSize: 2000x3500 +tasks: +- type: AI_EOTask + id: uit_hcd_california_2017-task + description: Multi-source images change detection + taskType: http://demo#change_detection +data: +- type: AI_EOTrainingData + id: '0' + dataTime: + - '2017-01-05' + - '2017-02-18' + dataURL: + - t1_L8.png + - t2_SAR.png + dataSources: + - title: Landsat-8 + numberOfLabels: 1 + labels: + - type: AI_PixelLabel + imageURL: + - change_label.png + confidence: 1.0 + isNegative: false + imageFormat: + - image/png diff --git a/tests/test_tdml_io.py b/tests/test_tdml_io.py index aca405a..359c719 100644 --- a/tests/test_tdml_io.py +++ b/tests/test_tdml_io.py @@ -1,15 +1,30 @@ import pytest import json from pydantic import ValidationError, BaseModel, validator +from pytdml.yaml_to_tdml import yaml_to_eo_tdml from pytdml.type.extended_types import EOTrainingDataset from pytdml.io.tdml_writers import write_to_json from pytdml.io.tdml_readers import read_from_json +from pytdml.io.stac_converter import convert_stac_to_tdml def test_read_and_write(): - tdml_path = r"tests/data/WHU-building.json" + tdml_path = r"tests/data/json/WHU-building.json" td = read_from_json(tdml_path) with open(tdml_path, 'r') as f: data = json.load(f) assert td.to_dict() == data + +def test_yaml_to_eo_tdml(): + yaml_path = r"tests/data/yaml/UiT_HCD_California_2017.yml" + tdml_path = r"tests/data/json/UiT_HCD_California_2017.json" + td = yaml_to_eo_tdml(yaml_path) + with open(tdml_path, 'r') as f: + data = json.load(f) + assert td.to_dict() == data + +def test_convert_stac_to_tdml(): + stac_file_path = r"tests/data/stac/collection.json" + td = convert_stac_to_tdml(stac_file_path) + assert td.to_dict().get("type") == "AI_EOTrainingDataset" From 33e7822e29ce93a4c2dff7ec2dcfd8f22749d5ac Mon Sep 17 00:00:00 2001 From: Zhangyang <100121932+Relifest@users.noreply.github.com> Date: Tue, 12 Nov 2024 21:22:25 +0800 Subject: [PATCH 2/4] Update pyproject.toml --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 96f9e32..788632d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -21,7 +21,7 @@ dependencies = [ "minio~=7.2.7", "tqdm~=4.66.4", "s3fs~=2024.9.0", - "pystac~=1.11.0" + "pystac~=1.10.1" ] [project.optional-dependencies] From 7b0f763a5240936e8ca487c1fb3f13d42f11ea8a Mon Sep 17 00:00:00 2001 From: Zhangyang <100121932+Relifest@users.noreply.github.com> Date: Tue, 12 Nov 2024 21:31:32 +0800 Subject: [PATCH 3/4] Update tests.yml --- .github/workflows/tests.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index d68bd34..55bcf06 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -26,7 +26,7 @@ jobs: ${{ runner.os }}-pip- - name: Install dependencies - run: pip install .[dev] && pip install .[all] + run: pip install pytest && pip install .[all] - name: Run tests run: pytest From ea5d650611e95142265d80fedcb7145a4ae7acab Mon Sep 17 00:00:00 2001 From: relifest <3380383714@qq.com> Date: Fri, 22 Nov 2024 17:40:35 +0800 Subject: [PATCH 4/4] Update README.md --- README.md | 59 ++++++++++++++++++++++++----------------- pytdml/type/__init__.py | 2 +- pytdml/yaml_to_tdml.py | 3 +++ 3 files changed, 39 insertions(+), 25 deletions(-) diff --git a/README.md b/README.md index 6b15a4e..d5a9376 100644 --- a/README.md +++ b/README.md @@ -46,42 +46,53 @@ The training dataset can also be encoded to TrainingDML-AI JSON format with Pyth ```python from pytdml.io import write_to_json -from pytdml.type import EOTrainingDataset, EOTrainingData, EOTask, EODataSource, SceneLabel +from pytdml.type import EOTrainingDataset, AI_EOTrainingData, AI_EOTask, AI_SceneLabel # generate EO training dataset dataset = EOTrainingDataset( id='...', name='...', description='...', + license='...', + tasks=[ + AI_EOTask( + id='...', + task_type='...'), + ... + ], data=[ - EOTrainingData( + AI_EOTrainingData( id='...', + data_URL='...', labels=[ - SceneLabel( - label_class='...', - data_url='...', - date_time='...'), + AI_SceneLabel( + label_class='...' + ), ... ]), ... ], - version="...", + amount_of_training_data=..., - created_time="...", - updated_time="...", - providers=["..."], - keywords=["...", "..."], - tasks=[EOTask(task_type="...", - description="...")], - data_sources=[EODataSource( - id="...", - data_type="...", - resolution="..." - )], classes=["...", "...", "..."], + classification_scheme='...', + created_time="...", + data_sources=['...'], + doi='...', + keywords=['...', '...'], number_of_classes=..., - bands=["...", "...", "..."], - image_size="..." + providers=['...'], + scope=..., + statistics_info=[...], + updated_time='...', + version='...', + labeling=[...], + metrics_in_LIT=[...], + quality=[...], + changesets=[...], + bands=[...], + extent=[...], + image_size='...' ) # write to json write_to_json(dataset, "dataset.json") @@ -362,14 +373,14 @@ for e in range(100): ### Convert other EO dataset formats to TrainingDML-AI format -- convert coco format to TrainingDMl-AI format: +- convert stac format to TrainingDMl-AI format: ```python -from pytdml.convert_utils import convert_coco_to_tdml,convert_stac_to_tdml +from pytdml.io.stac_converter import convert_stac_to_tdml -coco_path = "/mnt/example/coco_file.json" +stac_path = "/mnt/example/stac_file.json" output_path = "convert_result.json" -convert_coco_to_tdml(coco_path, output_path) +dataset = convert_stac_to_tdml(stac_path) ``` diff --git a/pytdml/type/__init__.py b/pytdml/type/__init__.py index c2ea481..495339f 100644 --- a/pytdml/type/__init__.py +++ b/pytdml/type/__init__.py @@ -43,7 +43,7 @@ from .basic_types import TrainingDataset from .extended_types import AI_PixelLabel from .extended_types import AI_ObjectLabel -from .extended_types import AI_ObjectLabel +from .extended_types import AI_SceneLabel from .extended_types import AI_EOTask from .extended_types import AI_EOTrainingData from .extended_types import EOTrainingDataset diff --git a/pytdml/yaml_to_tdml.py b/pytdml/yaml_to_tdml.py index 8015b23..f289fb9 100644 --- a/pytdml/yaml_to_tdml.py +++ b/pytdml/yaml_to_tdml.py @@ -321,3 +321,6 @@ def main(): training_datasets = yaml_to_tdml(yaml_path) if training_datasets: write_to_json(training_datasets, json_path) + +if __name__ == '__main__': + main()