diff --git a/pyproject.toml b/pyproject.toml index 580f8c6..a9f4df0 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -69,6 +69,7 @@ module = [ "pandas.*", "pyarrow.*", "pypgstac.*", + "pyproj.*", "rich.*", "shapely.*", "tqdm.*", @@ -78,4 +79,4 @@ ignore_missing_imports = true [[tool.mypy.overrides]] module = "stac_geoparquet.*" -disallow_untyped_defs = true \ No newline at end of file +disallow_untyped_defs = true diff --git a/stac_geoparquet/to_parquet.py b/stac_geoparquet/to_parquet.py new file mode 100644 index 0000000..3641001 --- /dev/null +++ b/stac_geoparquet/to_parquet.py @@ -0,0 +1,46 @@ +import json +from typing import Any + +import pyarrow as pa +import pyarrow.parquet as pq +from pyproj import CRS + +WGS84_CRS_JSON = CRS.from_epsg(4326).to_json_dict() + + +def to_parquet(table: pa.Table, where: Any, **kwargs: Any) -> None: + """Write an Arrow table with STAC data to GeoParquet + + This writes metadata compliant with GeoParquet 1.1. + + Args: + table: The table to write to Parquet + where: The destination for saving. + """ + # TODO: include bbox of geometries + column_meta = { + "encoding": "WKB", + # TODO: specify known geometry types + "geometry_types": [], + "crs": WGS84_CRS_JSON, + "edges": "planar", + "covering": { + "bbox": { + "xmin": ["bbox", "xmin"], + "ymin": ["bbox", "ymin"], + "xmax": ["bbox", "xmax"], + "ymax": ["bbox", "ymax"], + } + }, + } + geo_meta = { + "version": "1.1.0-dev", + "columns": {"geometry": column_meta}, + "primary_column": "geometry", + } + + metadata = table.schema.metadata or {} + metadata.update({b"geo": json.dumps(geo_meta).encode("utf-8")}) + table = table.replace_schema_metadata(metadata) + + pq.write_table(table, where, **kwargs)