Skip to content

Commit

Permalink
Write to Parquet with GeoParquet 1.1 metadata (#40)
Browse files Browse the repository at this point in the history
* Write to Parquet with GeoParquet 1.1 metadata
  • Loading branch information
kylebarron authored Apr 19, 2024
1 parent 5ed701e commit 49f678f
Show file tree
Hide file tree
Showing 2 changed files with 48 additions and 1 deletion.
3 changes: 2 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,7 @@ module = [
"pandas.*",
"pyarrow.*",
"pypgstac.*",
"pyproj.*",
"rich.*",
"shapely.*",
"tqdm.*",
Expand All @@ -78,4 +79,4 @@ ignore_missing_imports = true

[[tool.mypy.overrides]]
module = "stac_geoparquet.*"
disallow_untyped_defs = true
disallow_untyped_defs = true
46 changes: 46 additions & 0 deletions stac_geoparquet/to_parquet.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
import json
from typing import Any

import pyarrow as pa
import pyarrow.parquet as pq
from pyproj import CRS

WGS84_CRS_JSON = CRS.from_epsg(4326).to_json_dict()


def to_parquet(table: pa.Table, where: Any, **kwargs: Any) -> None:
"""Write an Arrow table with STAC data to GeoParquet
This writes metadata compliant with GeoParquet 1.1.
Args:
table: The table to write to Parquet
where: The destination for saving.
"""
# TODO: include bbox of geometries
column_meta = {
"encoding": "WKB",
# TODO: specify known geometry types
"geometry_types": [],
"crs": WGS84_CRS_JSON,
"edges": "planar",
"covering": {
"bbox": {
"xmin": ["bbox", "xmin"],
"ymin": ["bbox", "ymin"],
"xmax": ["bbox", "xmax"],
"ymax": ["bbox", "ymax"],
}
},
}
geo_meta = {
"version": "1.1.0-dev",
"columns": {"geometry": column_meta},
"primary_column": "geometry",
}

metadata = table.schema.metadata or {}
metadata.update({b"geo": json.dumps(geo_meta).encode("utf-8")})
table = table.replace_schema_metadata(metadata)

pq.write_table(table, where, **kwargs)

0 comments on commit 49f678f

Please sign in to comment.