Skip to content

Commit

Permalink
fix parse_time | Migrate to pyproject.toml
Browse files Browse the repository at this point in the history
  • Loading branch information
praneethd7 committed Mar 17, 2024
1 parent faf7156 commit c501037
Show file tree
Hide file tree
Showing 5 changed files with 60 additions and 98 deletions.
3 changes: 1 addition & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -367,7 +367,7 @@ year = {2023}

## Citing stop spacings paper

If you use stop spacings paper in your research please use the following BibTeX entry:
If you use [stop spacings paper](https://doi.org/10.1016/j.jpubtr.2024.100083) in your research please use the following BibTeX entry:

```bibtex
@article{Devunuri2024,
Expand All @@ -383,7 +383,6 @@ If you use stop spacings paper in your research please use the following BibTeX
doi = {10.1016/j.jpubtr.2024.100083},
url = {https://www.sciencedirect.com/science/article/pii/S1077291X24000031},
urldate = {2024-03-07},
abstract = {Discussions of bus stop consolidation sometimes refer to average stop spacings, but there are no reliable statistics about spacings, nor methodologies for calculating them. This paper aims to clarify discussions of bus stop spacings by introducing clear definitions, a methodology for creating statistics from General Transit Feed Specification (GTFS) files, and a python package, gtfs-segments, which splits bus networks into isolated `segments.' With the package, we calculate national-level statistics from 539 US transit providers and 83 Canadian providers, as well as agency-level statistics for 30 providers in the US, 10 in Canada, and a sample of 38 providers from other countries. Our estimates of US and Canadian mean spacings are both around 350~m (slightly wider than five stops per mile). US spacings are wider than sometimes claimed but narrower than those in other countries. Finally, the paper gives examples of metrics created by combining GTFS with data from other sources and proposes research ideas and applications to transit planning involving fine-grained stop spacing data.},
keywords = {Bus stop,GTFS,Public Transit,Stop Spacings,Transit Planning}
}
```
Expand Down
3 changes: 2 additions & 1 deletion gtfs_segments/__init__.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
"""
The gtfs_segments package main init file.
"""
__version__ = "2.1.5"
import importlib.metadata
from .geom_utils import view_heatmap, view_spacings, view_spacings_interactive
from .gtfs_segments import get_gtfs_segments, pipeline_gtfs, process_feed
from .mobility import (
Expand All @@ -13,6 +13,7 @@
from .route_stats import get_route_stats
from .utils import export_segments, plot_hist, process, summary_stats

__version__ = importlib.metadata.version("gtfs_segments")
__all__ = [
"__version__",
"get_gtfs_segments",
Expand Down
101 changes: 53 additions & 48 deletions gtfs_segments/partridge_mod/parsers.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@

# Why 2^18? See https://git.io/vxB2P.
@lru_cache(maxsize=2**18)
def parse_time(val: str) -> int:
def parse_time(val: str) -> np.float32:
"""
The function `parse_time` takes a string representing a time value in the format "hh:mm:ss" and
returns the equivalent time in seconds as a numpy int, or returns the input value if it is
Expand All @@ -28,19 +28,16 @@ def parse_time(val: str) -> int:
val (str): The parameter `val` is a string representing a time value in the format "hh:mm:ss".
Returns:
a value of type np.float64.
a value of type np.float32.
"""
if val is np.nan or isinstance(val, int):
if isinstance(val, float) or (isinstance(val, float) and np.isnan(val)): # Corrected handling for np.nan
return val
val = val.strip()
h, m, s = val.split(":")
ssm = int(h) * 3600 + int(m) * 60 + int(s)
if val == "":
if str(val) == "":
return np.nan
val = str(val).strip()

# pandas doesn't have a NaN int, use floats
# return np.float64(ssm)
return ssm
h, m, s = map(int, val.split(":"))
return np.float32(h * 3600 + m * 60 + s)


@lru_cache(maxsize=2**18)
Expand All @@ -61,14 +58,22 @@ def parse_date(val: str) -> datetime.date:


@lru_cache(maxsize=2**18)
def parse_numeric(val: Any) -> float:
def parse_float(val: Any) -> float:
try:
return float(val)
except ValueError:
return np.nan

@lru_cache(maxsize=2**18)
def parse_integer(val: Any) -> float:
try:
return int(val)
except ValueError:
return np.nan


vparse_numeric = np.vectorize(parse_numeric)
vparse_float = np.vectorize(parse_float)
vparse_int = np.vectorize(parse_integer)
vparse_time = np.vectorize(parse_time)
vparse_date = np.vectorize(parse_date)

Expand Down Expand Up @@ -164,13 +169,13 @@ def transforms_dict() -> Dict[str, Dict[str, Any]]:
"converters": {
"start_date": vparse_date,
"end_date": vparse_date,
"monday": vparse_numeric,
"tuesday": vparse_numeric,
"wednesday": vparse_numeric,
"thursday": vparse_numeric,
"friday": vparse_numeric,
"saturday": vparse_numeric,
"sunday": vparse_numeric,
"monday": vparse_float,
"tuesday": vparse_float,
"wednesday": vparse_float,
"thursday": vparse_float,
"friday": vparse_float,
"saturday": vparse_float,
"sunday": vparse_float,
},
"required_columns": (
"service_id",
Expand All @@ -189,7 +194,7 @@ def transforms_dict() -> Dict[str, Dict[str, Any]]:
"usecols": {"service_id": "str", "date": "str", "exception_type": "int8"},
"converters": {
"date": vparse_date,
"exception_type": vparse_numeric,
"exception_type": vparse_float,
},
"required_columns": ("service_id", "date", "exception_type"),
},
Expand All @@ -203,9 +208,9 @@ def transforms_dict() -> Dict[str, Dict[str, Any]]:
"transfer_duration": "float16",
},
"converters": {
"price": vparse_numeric,
"payment_method": vparse_numeric,
"transfer_duration": vparse_numeric,
"price": vparse_float,
"payment_method": vparse_float,
"transfer_duration": vparse_float,
},
"required_columns": (
"fare_id",
Expand Down Expand Up @@ -252,8 +257,8 @@ def transforms_dict() -> Dict[str, Dict[str, Any]]:
"exact_times": "bool",
},
"converters": {
"headway_secs": vparse_numeric,
"exact_times": vparse_numeric,
"headway_secs": vparse_float,
"exact_times": vparse_float,
"start_time": vparse_time,
"end_time": vparse_time,
},
Expand All @@ -274,7 +279,7 @@ def transforms_dict() -> Dict[str, Dict[str, Any]]:
# "route_text_color": "str",
},
"converters": {
"route_type": vparse_numeric,
"route_type": vparse_float,
},
"required_columns": (
"route_id",
Expand All @@ -292,10 +297,10 @@ def transforms_dict() -> Dict[str, Dict[str, Any]]:
# "shape_dist_traveled":"float32",
},
"converters": {
"shape_pt_lat": vparse_numeric,
"shape_pt_lon": vparse_numeric,
"shape_pt_sequence": vparse_numeric,
# "shape_dist_traveled": vparse_numeric,
"shape_pt_lat": vparse_float,
"shape_pt_lon": vparse_float,
"shape_pt_sequence": vparse_float,
# "shape_dist_traveled": vparse_float,
},
"required_columns": (
"shape_id",
Expand All @@ -316,11 +321,11 @@ def transforms_dict() -> Dict[str, Dict[str, Any]]:
# "timepoint":"bool",
},
"converters": {
"stop_lat": vparse_numeric,
"stop_lon": vparse_numeric,
# "location_type": vparse_numeric,
"wheelchair_boarding": vparse_numeric,
"timepoint": vparse_numeric,
"stop_lat": vparse_float,
"stop_lon": vparse_float,
# "location_type": vparse_float,
"wheelchair_boarding": vparse_float,
"timepoint": vparse_float,
},
"required_columns": (
"stop_id",
Expand All @@ -336,20 +341,20 @@ def transforms_dict() -> Dict[str, Dict[str, Any]]:
"arrival_time": "float32",
# "departure_time",
"stop_id": "str",
"stop_sequence": "int16",
"pickup_type": "int8",
"drop_off_type": "int8",
"stop_sequence": vparse_int,
"pickup_type": vparse_int,
"drop_off_type": vparse_int,
# "shape_dist_traveled",
# "timepoint",
},
"converters": {
"arrival_time": vparse_time,
"departure_time": vparse_time,
"pickup_type": vparse_numeric,
"drop_off_type": vparse_numeric,
# "shape_dist_traveled": vparse_numeric,
"stop_sequence": vparse_numeric,
# "timepoint": vparse_numeric,
"pickup_type": vparse_int,
"drop_off_type": vparse_int,
# "shape_dist_traveled": vparse_float,
"stop_sequence": vparse_int,
# "timepoint": vparse_float,
},
"required_columns": (
"trip_id",
Expand All @@ -362,8 +367,8 @@ def transforms_dict() -> Dict[str, Dict[str, Any]]:
"transfers.txt": {
"usecols": ["from_stop_id", "to_stop_id", "transfer_type", "min_transfer_time"],
"converters": {
"transfer_type": vparse_numeric,
"min_transfer_time": vparse_numeric,
"transfer_type": vparse_float,
"min_transfer_time": vparse_float,
},
"required_columns": ("from_stop_id", "to_stop_id", "transfer_type"),
},
Expand All @@ -378,9 +383,9 @@ def transforms_dict() -> Dict[str, Dict[str, Any]]:
# "bikes_allowed":"int8",
},
"converters": {
"direction_id": vparse_numeric,
# "wheelchair_accessible": vparse_numeric,
"bikes_allowed": vparse_numeric,
"direction_id": vparse_float,
# "wheelchair_accessible": vparse_float,
"bikes_allowed": vparse_float,
},
"required_columns": ("route_id", "service_id", "trip_id"),
},
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ name = 'gtfs_segments'
description = "GTFS Segments: A fast and efficient library to generate bus stop spacings"
# Support Python 3.9+.
requires-python = ">=3.9"
version = "2.1.5"
version = "2.1.6"
dependencies = [
"geopandas",
"scipy",
Expand Down
49 changes: 3 additions & 46 deletions setup.py
Original file line number Diff line number Diff line change
@@ -1,47 +1,4 @@
from setuptools import find_packages, setup
import setuptools

__version__ = "0.2.0"
DESCRIPTION = "GTFS segments"
with open("README.md", "r", encoding="utf8") as fh:
LONG_DESCRIPTION = fh.read()

REQUIREMENTS = [
"geopandas",
"scipy",
"shapely",
"numpy",
"pandas",
"matplotlib",
"utm",
"contextily",
"requests",
"isoweek",
"faust-cchardet",
"charset_normalizer",
"folium",
"thefuzz",
]

# Setting up
setup(
name="gtfs_segments",
version=__version__,
author="Saipraneeth Devunuri",
author_email="<[email protected]>",
description=DESCRIPTION,
long_description=LONG_DESCRIPTION,
long_description_content_type="text/markdown",
packages=find_packages(),
install_requires=REQUIREMENTS,
url="https://github.com/UTEL-UIUC/gtfs_segments",
keywords=["python", "gtfs", "geodata"],
classifiers=[
"Development Status :: 3 - Alpha",
"Intended Audience :: Education",
"Programming Language :: Python :: 2",
"Programming Language :: Python :: 3",
"Operating System :: MacOS :: MacOS X",
"Operating System :: Microsoft :: Windows",
"License :: OSI Approved :: MIT License",
],
)
if __name__ == "__main__":
setuptools.setup()

0 comments on commit c501037

Please sign in to comment.