fix parse_time | Migrate to pyproject.toml

UTEL-UIUC · Mar 17, 2024 · c501037 · c501037
1 parent faf7156
commit c501037
Show file tree

Hide file tree

Showing 5 changed files with 60 additions and 98 deletions.
diff --git a/README.md b/README.md
@@ -367,7 +367,7 @@ year = {2023}
 
 ## Citing stop spacings paper
 
-If you use stop spacings paper in your research please use the following BibTeX entry:
+If you use [stop spacings paper](https://doi.org/10.1016/j.jpubtr.2024.100083) in your research please use the following BibTeX entry:
 
 ```bibtex
 @article{Devunuri2024,
@@ -383,7 +383,6 @@ If you use stop spacings paper in your research please use the following BibTeX
   doi = {10.1016/j.jpubtr.2024.100083},
   url = {https://www.sciencedirect.com/science/article/pii/S1077291X24000031},
   urldate = {2024-03-07},
-  abstract = {Discussions of bus stop consolidation sometimes refer to average stop spacings, but there are no reliable statistics about spacings, nor methodologies for calculating them. This paper aims to clarify discussions of bus stop spacings by introducing clear definitions, a methodology for creating statistics from General Transit Feed Specification (GTFS) files, and a python package, gtfs-segments, which splits bus networks into isolated `segments.' With the package, we calculate national-level statistics from 539 US transit providers and 83 Canadian providers, as well as agency-level statistics for 30 providers in the US, 10 in Canada, and a sample of 38 providers from other countries. Our estimates of US and Canadian mean spacings are both around 350~m (slightly wider than five stops per mile). US spacings are wider than sometimes claimed but narrower than those in other countries. Finally, the paper gives examples of metrics created by combining GTFS with data from other sources and proposes research ideas and applications to transit planning involving fine-grained stop spacing data.},
   keywords = {Bus stop,GTFS,Public Transit,Stop Spacings,Transit Planning}
 }
 ```

diff --git a/gtfs_segments/__init__.py b/gtfs_segments/__init__.py
@@ -1,7 +1,7 @@
 """
 The gtfs_segments package main init file.
 """
-__version__ = "2.1.5"
+import importlib.metadata
 from .geom_utils import view_heatmap, view_spacings, view_spacings_interactive
 from .gtfs_segments import get_gtfs_segments, pipeline_gtfs, process_feed
 from .mobility import (
@@ -13,6 +13,7 @@
 from .route_stats import get_route_stats
 from .utils import export_segments, plot_hist, process, summary_stats
 
+__version__ = importlib.metadata.version("gtfs_segments")
 __all__ = [
     "__version__",
     "get_gtfs_segments",

diff --git a/gtfs_segments/partridge_mod/parsers.py b/gtfs_segments/partridge_mod/parsers.py
@@ -18,7 +18,7 @@
 
 # Why 2^18? See https://git.io/vxB2P.
 @lru_cache(maxsize=2**18)
-def parse_time(val: str) -> int:
+def parse_time(val: str) -> np.float32:
     """
     The function `parse_time` takes a string representing a time value in the format "hh:mm:ss" and
     returns the equivalent time in seconds as a numpy int, or returns the input value if it is
@@ -28,19 +28,16 @@ def parse_time(val: str) -> int:
       val (str): The parameter `val` is a string representing a time value in the format "hh:mm:ss".
 
     Returns:
-      a value of type np.float64.
+      a value of type np.float32.
     """
-    if val is np.nan or isinstance(val, int):
+    if isinstance(val, float) or (isinstance(val, float) and np.isnan(val)):  # Corrected handling for np.nan
         return val
-    val = val.strip()
-    h, m, s = val.split(":")
-    ssm = int(h) * 3600 + int(m) * 60 + int(s)
-    if val == "":
+    if str(val) == "":
         return np.nan
+    val = str(val).strip()
 
-    # pandas doesn't have a NaN int, use floats
-    # return np.float64(ssm)
-    return ssm
+    h, m, s = map(int, val.split(":"))
+    return np.float32(h * 3600 + m * 60 + s)
 
 
 @lru_cache(maxsize=2**18)
@@ -61,14 +58,22 @@ def parse_date(val: str) -> datetime.date:
 
 
 @lru_cache(maxsize=2**18)
-def parse_numeric(val: Any) -> float:
+def parse_float(val: Any) -> float:
     try:
         return float(val)
     except ValueError:
         return np.nan
+
+@lru_cache(maxsize=2**18)
+def parse_integer(val: Any) -> float:
+    try:
+        return int(val)
+    except ValueError:
+        return np.nan
 
 
-vparse_numeric = np.vectorize(parse_numeric)
+vparse_float = np.vectorize(parse_float)
+vparse_int = np.vectorize(parse_integer)
 vparse_time = np.vectorize(parse_time)
 vparse_date = np.vectorize(parse_date)
 
@@ -164,13 +169,13 @@ def transforms_dict() -> Dict[str, Dict[str, Any]]:
             "converters": {
                 "start_date": vparse_date,
                 "end_date": vparse_date,
-                "monday": vparse_numeric,
-                "tuesday": vparse_numeric,
-                "wednesday": vparse_numeric,
-                "thursday": vparse_numeric,
-                "friday": vparse_numeric,
-                "saturday": vparse_numeric,
-                "sunday": vparse_numeric,
+                "monday": vparse_float,
+                "tuesday": vparse_float,
+                "wednesday": vparse_float,
+                "thursday": vparse_float,
+                "friday": vparse_float,
+                "saturday": vparse_float,
+                "sunday": vparse_float,
             },
             "required_columns": (
                 "service_id",
@@ -189,7 +194,7 @@ def transforms_dict() -> Dict[str, Dict[str, Any]]:
             "usecols": {"service_id": "str", "date": "str", "exception_type": "int8"},
             "converters": {
                 "date": vparse_date,
-                "exception_type": vparse_numeric,
+                "exception_type": vparse_float,
             },
             "required_columns": ("service_id", "date", "exception_type"),
         },
@@ -203,9 +208,9 @@ def transforms_dict() -> Dict[str, Dict[str, Any]]:
                 "transfer_duration": "float16",
             },
             "converters": {
-                "price": vparse_numeric,
-                "payment_method": vparse_numeric,
-                "transfer_duration": vparse_numeric,
+                "price": vparse_float,
+                "payment_method": vparse_float,
+                "transfer_duration": vparse_float,
             },
             "required_columns": (
                 "fare_id",
@@ -252,8 +257,8 @@ def transforms_dict() -> Dict[str, Dict[str, Any]]:
                 "exact_times": "bool",
             },
             "converters": {
-                "headway_secs": vparse_numeric,
-                "exact_times": vparse_numeric,
+                "headway_secs": vparse_float,
+                "exact_times": vparse_float,
                 "start_time": vparse_time,
                 "end_time": vparse_time,
             },
@@ -274,7 +279,7 @@ def transforms_dict() -> Dict[str, Dict[str, Any]]:
                 # "route_text_color": "str",
             },
             "converters": {
-                "route_type": vparse_numeric,
+                "route_type": vparse_float,
             },
             "required_columns": (
                 "route_id",
@@ -292,10 +297,10 @@ def transforms_dict() -> Dict[str, Dict[str, Any]]:
                 # "shape_dist_traveled":"float32",
             },
             "converters": {
-                "shape_pt_lat": vparse_numeric,
-                "shape_pt_lon": vparse_numeric,
-                "shape_pt_sequence": vparse_numeric,
-                # "shape_dist_traveled": vparse_numeric,
+                "shape_pt_lat": vparse_float,
+                "shape_pt_lon": vparse_float,
+                "shape_pt_sequence": vparse_float,
+                # "shape_dist_traveled": vparse_float,
             },
             "required_columns": (
                 "shape_id",
@@ -316,11 +321,11 @@ def transforms_dict() -> Dict[str, Dict[str, Any]]:
                 # "timepoint":"bool",
             },
             "converters": {
-                "stop_lat": vparse_numeric,
-                "stop_lon": vparse_numeric,
-                # "location_type": vparse_numeric,
-                "wheelchair_boarding": vparse_numeric,
-                "timepoint": vparse_numeric,
+                "stop_lat": vparse_float,
+                "stop_lon": vparse_float,
+                # "location_type": vparse_float,
+                "wheelchair_boarding": vparse_float,
+                "timepoint": vparse_float,
             },
             "required_columns": (
                 "stop_id",
@@ -336,20 +341,20 @@ def transforms_dict() -> Dict[str, Dict[str, Any]]:
                 "arrival_time": "float32",
                 # "departure_time",
                 "stop_id": "str",
-                "stop_sequence": "int16",
-                "pickup_type": "int8",
-                "drop_off_type": "int8",
+                "stop_sequence": vparse_int,
+                "pickup_type": vparse_int,
+                "drop_off_type": vparse_int,
                 # "shape_dist_traveled",
                 # "timepoint",
             },
             "converters": {
                 "arrival_time": vparse_time,
                 "departure_time": vparse_time,
-                "pickup_type": vparse_numeric,
-                "drop_off_type": vparse_numeric,
-                # "shape_dist_traveled": vparse_numeric,
-                "stop_sequence": vparse_numeric,
-                # "timepoint": vparse_numeric,
+                "pickup_type": vparse_int,
+                "drop_off_type": vparse_int,
+                # "shape_dist_traveled": vparse_float,
+                "stop_sequence": vparse_int,
+                # "timepoint": vparse_float,
             },
             "required_columns": (
                 "trip_id",
@@ -362,8 +367,8 @@ def transforms_dict() -> Dict[str, Dict[str, Any]]:
         "transfers.txt": {
             "usecols": ["from_stop_id", "to_stop_id", "transfer_type", "min_transfer_time"],
             "converters": {
-                "transfer_type": vparse_numeric,
-                "min_transfer_time": vparse_numeric,
+                "transfer_type": vparse_float,
+                "min_transfer_time": vparse_float,
             },
             "required_columns": ("from_stop_id", "to_stop_id", "transfer_type"),
         },
@@ -378,9 +383,9 @@ def transforms_dict() -> Dict[str, Dict[str, Any]]:
                 # "bikes_allowed":"int8",
             },
             "converters": {
-                "direction_id": vparse_numeric,
-                # "wheelchair_accessible": vparse_numeric,
-                "bikes_allowed": vparse_numeric,
+                "direction_id": vparse_float,
+                # "wheelchair_accessible": vparse_float,
+                "bikes_allowed": vparse_float,
             },
             "required_columns": ("route_id", "service_id", "trip_id"),
         },

diff --git a/pyproject.toml b/pyproject.toml
@@ -3,7 +3,7 @@ name = 'gtfs_segments'
 description = "GTFS Segments: A fast and efficient library to generate bus stop spacings"
 # Support Python 3.9+.
 requires-python = ">=3.9"
-version = "2.1.5"
+version = "2.1.6"
 dependencies = [
     "geopandas",
     "scipy",

diff --git a/setup.py b/setup.py
@@ -1,47 +1,4 @@
-from setuptools import find_packages, setup
+import setuptools
 
-__version__ = "0.2.0"
-DESCRIPTION = "GTFS segments"
-with open("README.md", "r", encoding="utf8") as fh:
-    LONG_DESCRIPTION = fh.read()
-
-REQUIREMENTS = [
-    "geopandas",
-    "scipy",
-    "shapely",
-    "numpy",
-    "pandas",
-    "matplotlib",
-    "utm",
-    "contextily",
-    "requests",
-    "isoweek",
-    "faust-cchardet",
-    "charset_normalizer",
-    "folium",
-    "thefuzz",
-]
-
-# Setting up
-setup(
-    name="gtfs_segments",
-    version=__version__,
-    author="Saipraneeth Devunuri",
-    author_email="<[email protected]>",
-    description=DESCRIPTION,
-    long_description=LONG_DESCRIPTION,
-    long_description_content_type="text/markdown",
-    packages=find_packages(),
-    install_requires=REQUIREMENTS,
-    url="https://github.com/UTEL-UIUC/gtfs_segments",
-    keywords=["python", "gtfs", "geodata"],
-    classifiers=[
-        "Development Status :: 3 - Alpha",
-        "Intended Audience :: Education",
-        "Programming Language :: Python :: 2",
-        "Programming Language :: Python :: 3",
-        "Operating System :: MacOS :: MacOS X",
-        "Operating System :: Microsoft :: Windows",
-        "License :: OSI Approved :: MIT License",
-    ],
-)
+if __name__ == "__main__":
+    setuptools.setup()