Skip to content

Commit

Permalink
feat: enable usage of csv files with wkt geometries
Browse files Browse the repository at this point in the history
  • Loading branch information
maxnowack committed Aug 7, 2024
1 parent bb4f220 commit b3b52cc
Show file tree
Hide file tree
Showing 6 changed files with 886 additions and 19 deletions.
5 changes: 5 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -549,6 +549,11 @@ csv-test: tippecanoe tippecanoe-decode
./tippecanoe-decode -x generator -x generator_options tests/csv/out.mbtiles > tests/csv/out.mbtiles.json.check
cmp tests/csv/out.mbtiles.json.check tests/csv/out.mbtiles.json
rm -f tests/csv/out.mbtiles.json.check tests/csv/out.mbtiles
# Reading from named CSV with geometry
./tippecanoe -q -zg -f -o tests/csv_geometry/out.mbtiles tests/csv_geometry/ne_110m_populated_places_simple.csv
./tippecanoe-decode -x generator -x generator_options tests/csv_geometry/out.mbtiles > tests/csv_geometry/out.mbtiles.json.check
cmp tests/csv_geometry/out.mbtiles.json.check tests/csv_geometry/out.mbtiles.json
rm -f tests/csv_geometry/out.mbtiles.json.check tests/csv_geometry/out.mbtiles

layer-json-test: tippecanoe tippecanoe-decode
# GeoJSON with description and named layer
Expand Down
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ tippecanoe
Builds [vector tilesets](https://github.com/mapbox/vector-tile-spec/) from large (or small) collections of [GeoJSON](http://geojson.org/), [FlatGeobuf](https://github.com/flatgeobuf/flatgeobuf), or [CSV](https://en.wikipedia.org/wiki/Comma-separated_values) features,
[like these](MADE_WITH.md).

This is the official home of Tippecanoe, developed and actively maintained by [Erica Fischer](https://github.com/e-n-f) at [Felt](https://felt.com).
This is the official home of Tippecanoe, developed and actively maintained by [Erica Fischer](https://github.com/e-n-f) at [Felt](https://felt.com).

Version 2.0.0 is equivalent to [1.36.0](https://github.com/mapbox/tippecanoe/tree/1.36.0) in the original repository. Thank you Mapbox for the many years of early support.

Expand Down Expand Up @@ -317,7 +317,7 @@ If your input is formatted as newline-delimited GeoJSON, use `-P` to make input
tippecanoe -z5 -o world.mbtiles -L'{"file":"ne_10m_admin_0_countries.json", "layer":"countries", "description":"Natural Earth countries"}'
```

CSV input files currently support only Point geometries, from columns named `latitude`, `longitude`, `lat`, `lon`, `long`, `lng`, `x`, or `y`.
CSV input files currently support Point geometries, from columns named `latitude`, `longitude`, `lat`, `lon`, `long`, `lng`, `x`, or `y`and Point, LineString, and Polygon geometries as a WKT string from columns named `geometry` or `wkt`.

### Parallel processing of input

Expand Down
136 changes: 123 additions & 13 deletions geocsv.cpp
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
#include <sstream>
#include <vector>
#include <string>
#include <cctype>
#include <stdlib.h>
#include <algorithm>
#include "geocsv.hpp"
Expand All @@ -11,6 +15,103 @@
#include "options.hpp"
#include "errors.hpp"

#include <iostream>
#include <vector>
#include <string>
#include <sstream>
#include <cctype>

void trim(std::string &s) {
s.erase(0, s.find_first_not_of(' '));
s.erase(s.find_last_not_of(' ') + 1);
}

void processRing(const std::string &ring, drawvec &dv) {
std::stringstream coordStream(ring);
std::string coord;
bool first = true;
while (std::getline(coordStream, coord, ',')) {
coord.erase(0, coord.find_first_not_of(' '));
coord.erase(coord.find_last_not_of(' ') + 1);
std::stringstream pointStream(coord);
std::string xStr, yStr;
std::getline(pointStream, xStr, ' ');
std::getline(pointStream, yStr);
long long wx, wy;
double x = std::stod(xStr);
double y = std::stod(yStr);
projection->project(x, y, 32, &wx, &wy);
if (first) {
dv.push_back(draw(VT_MOVETO, wx, wy));
first = false;
} else {
dv.push_back(draw(VT_LINETO, wx, wy));
}
}
dv.push_back(draw(VT_CLOSEPATH, 0, 0));
}

drawvec parse_wkt(const std::string &wkt, drawvec &dv, int &geometry_type) {
std::string type, coordinates;
std::stringstream ss(wkt);

// Read geometry type
std::getline(ss, type, '(');
type.erase(0, type.find_first_not_of(' '));
type.erase(type.find_last_not_of(' ') + 1);

// Read coordinates
std::getline(ss, coordinates);
coordinates = coordinates.substr(0, coordinates.size() - 1);
coordinates.erase(0, coordinates.find_first_not_of(' '));
coordinates.erase(coordinates.find_last_not_of(' ') + 1);

std::stringstream coordStream(coordinates);

if (type == "POINT") {
geometry_type = VT_POINT;
std::string xStr, yStr;
std::getline(coordStream, xStr, ' ');
std::getline(coordStream, yStr);
trim(xStr);
trim(yStr);
long long wx, wy;
double x = std::stod(xStr);
double y = std::stod(yStr);
projection->project(x, y, 32, &wx, &wy);
dv.push_back(draw(VT_MOVETO, wx, wy));
} else if (type == "LINESTRING") {
geometry_type = VT_LINE;
processRing(coordinates, dv);
} else if (type == "POLYGON") {
geometry_type = VT_POLYGON;
// Handle POLYGON type with multiple rings
std::vector<std::string> rings;
std::string ring;
int level = 0;
for (char c : coordinates) {
if (c == '(') {
if (level == 0) ring.clear();
level++;
} else if (c == ')') {
level--;
if (level == 0) {
trim(ring);
rings.push_back(ring);
}
}
if (level > 0 && c != '(') {
ring += c;
}
}
for (const std::string &currentRing : rings) {
processRing(currentRing, dv);
}
}

return dv;
}

void parse_geocsv(std::vector<struct serialization_state> &sst, std::string fname, int layer, std::string layername) {
FILE *f;

Expand All @@ -26,7 +127,7 @@ void parse_geocsv(std::vector<struct serialization_state> &sst, std::string fnam

std::string s;
std::vector<std::string> header;
ssize_t latcol = -1, loncol = -1;
ssize_t latcol = -1, loncol = -1, geometrycol = -1;

if ((s = csv_getline(f)).size() > 0) {
std::string err = check_utf8(s);
Expand All @@ -49,11 +150,14 @@ void parse_geocsv(std::vector<struct serialization_state> &sst, std::string fnam
if (lower == "x" || lower == "lon" || lower == "lng" || lower == "long" || (lower.find("longitude") != std::string::npos)) {
loncol = i;
}
if (lower == "geometry" || lower == "wkt") {
geometrycol = i;
}
}
}

if (latcol < 0 || loncol < 0) {
fprintf(stderr, "%s: Can't find \"lat\" and \"lon\" columns\n", fname.c_str());
if ((latcol < 0 || loncol < 0) && geometrycol < 0) {
fprintf(stderr, "%s: Can't find \"lat\" and \"lon\" or \"geometry\" columns\n", fname.c_str());
exit(EXIT_CSV);
}

Expand All @@ -73,27 +177,33 @@ void parse_geocsv(std::vector<struct serialization_state> &sst, std::string fnam
exit(EXIT_CSV);
}

if (line[loncol].empty() || line[latcol].empty()) {
if ((line[loncol].empty() || line[latcol].empty()) && line[geometrycol].empty()) {
static int warned = 0;
if (!warned) {
fprintf(stderr, "%s:%zu: null geometry (additional not reported)\n", fname.c_str(), seq + 1);
warned = 1;
}
continue;
}
double lon = atof(line[loncol].c_str());
double lat = atof(line[latcol].c_str());

long long x, y;
projection->project(lon, lat, 32, &x, &y);
drawvec dv;
dv.push_back(draw(VT_MOVETO, x, y));
int geometry_type = -1;
if (latcol >= 0 && loncol >= 0) {
double lon = atof(line[loncol].c_str());
double lat = atof(line[latcol].c_str());

long long x, y;
projection->project(lon, lat, 32, &x, &y);
dv.push_back(draw(VT_MOVETO, x, y));
geometry_type = VT_POINT;
} else if (geometrycol >= 0) {
parse_wkt(csv_dequote(line[geometrycol]), dv, geometry_type);
}

std::vector<std::string> full_keys;
std::vector<serial_val> full_values;

for (size_t i = 0; i < line.size(); i++) {
if (i != (size_t) latcol && i != (size_t) loncol) {
if (i != (size_t) latcol && i != (size_t) loncol && i != (size_t) geometrycol) {
line[i] = csv_dequote(line[i]);

serial_val sv;
Expand All @@ -117,13 +227,13 @@ void parse_geocsv(std::vector<struct serialization_state> &sst, std::string fnam
sf.layer = layer;
sf.segment = sst[0].segment;
sf.has_id = false;
sf.id = 0;
sf.id = seq;
sf.tippecanoe_minzoom = -1;
sf.tippecanoe_maxzoom = -1;
sf.feature_minzoom = false;
sf.seq = *(sst[0].layer_seq);
sf.geometry = dv;
sf.t = 1; // POINT
sf.t = geometry_type;
sf.full_keys = full_keys;
sf.full_values = full_values;

Expand Down
8 changes: 4 additions & 4 deletions man/tippecanoe.1
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
Builds vector tilesets \[la]https://github.com/mapbox/vector-tile-spec/\[ra] from large (or small) collections of GeoJSON \[la]http://geojson.org/\[ra], FlatGeobuf \[la]https://github.com/flatgeobuf/flatgeobuf\[ra], or CSV \[la]https://en.wikipedia.org/wiki/Comma-separated_values\[ra] features,
like these \[la]MADE_WITH.md\[ra]\&.
.PP
This is the official home of Tippecanoe, developed and actively maintained by Erica Fischer \[la]https://github.com/e-n-f\[ra] at Felt \[la]https://felt.com\[ra]\&.
This is the official home of Tippecanoe, developed and actively maintained by Erica Fischer \[la]https://github.com/e-n-f\[ra] at Felt \[la]https://felt.com\[ra]\&.
.PP
Version 2.0.0 is equivalent to 1.36.0 \[la]https://github.com/mapbox/tippecanoe/tree/1.36.0\[ra] in the original repository. Thank you Mapbox for the many years of early support.
.SH Intent
Expand Down Expand Up @@ -374,7 +374,7 @@ tippecanoe \-z5 \-o world.mbtiles \-L'{"file":"ne_10m_admin_0_countries.json", "
.fi
.RE
.PP
CSV input files currently support only Point geometries, from columns named \fB\fClatitude\fR, \fB\fClongitude\fR, \fB\fClat\fR, \fB\fClon\fR, \fB\fClong\fR, \fB\fClng\fR, \fB\fCx\fR, or \fB\fCy\fR\&.
CSV input files currently support Point geometries, from columns named \fB\fClatitude\fR, \fB\fClongitude\fR, \fB\fClat\fR, \fB\fClon\fR, \fB\fClong\fR, \fB\fClng\fR, \fB\fCx\fR, or \fB\fCy\fRand Point, LineString, and Polygon geometries as a WKT string from columns named \fB\fCgeometry\fR or \fB\fCwkt\fR\&.
.SS Parallel processing of input
.RS
.IP \(bu 2
Expand Down Expand Up @@ -471,8 +471,8 @@ zoom level precision (ft) precision (m) map scale
\fB\fC\-z18\fR 1.5 in 4 cm 1:1250
\fB\fC\-z19\fR 0.8 in 2 cm 1:600
\fB\fC\-z20\fR 0.4 in 1 cm 1:300
\fB\fC\-z21\fR 0.2 in 0.5 cm 1:150
\fB\fC\-z22\fR 0.1 in 0.25 cm 1:75
\fB\fC\-z21\fR 0.4 in 1 cm 1:300
\fB\fC\-z22\fR 0.4 in 1 cm 1:300
.TE
.SS Tile resolution
.RS
Expand Down
Loading

0 comments on commit b3b52cc

Please sign in to comment.