Skip to content

Commit

Permalink
try to check static feed URLs in changed DMFR files (#1356)
Browse files Browse the repository at this point in the history
* try to check static feed URLs in changed DMFR files

* test

* again

* again

* again

* more descriptive

* typing and logging

* GH Actions permissions

* cleaner output

* cleaner logging output

* undo the breaking change

* one more intentional break to test

* actually make it break!

* back to normal
  • Loading branch information
drewda authored Dec 18, 2024
1 parent f74e6c7 commit 7ec2d21
Show file tree
Hide file tree
Showing 2 changed files with 211 additions and 0 deletions.
71 changes: 71 additions & 0 deletions .github/workflows/check-feed-urls.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
name: Check feed URLs in changed DMFR files

on:
pull_request:
paths:
- 'feeds/*.dmfr.json'
types: [opened, synchronize]

permissions:
pull-requests: write # Needed for commenting on PRs
contents: read # Needed for checking out code

jobs:
validate:
runs-on: ubuntu-latest

steps:
- name: Checkout code
uses: actions/checkout@v4
with:
fetch-depth: 0

- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: '3.x'

- name: Install transitland-lib
run: scripts/install-transitland-lib.sh

- name: Get changed files and validate
id: validation
run: |
# Get list of changed dmfr files using GitHub's base and head refs
changed_files=$(git diff --name-only origin/${{ github.base_ref }} HEAD -- 'feeds/*.dmfr.json')
# Exit if no dmfr files changed
if [ -z "$changed_files" ]; then
echo "No DMFR files changed"
exit 0
fi
validation_output=""
# Loop through changed files
for file in $changed_files; do
echo "Validating $file"
if ! output=$(python scripts/check-feed-urls.py "$file" 2>&1); then
validation_output+="### Validation issues in $file \n\n $output"
echo "has_errors=true" >> $GITHUB_OUTPUT
fi
done
# Set the validation output as a step output
echo "message<<EOF" >> $GITHUB_OUTPUT
echo "$validation_output" >> $GITHUB_OUTPUT
echo "EOF" >> $GITHUB_OUTPUT
- name: Comment on PR
if: steps.validation.outputs.has_errors == 'true'
uses: actions/github-script@v6
with:
script: |
await github.rest.issues.createComment({
owner: context.repo.owner,
repo: context.repo.repo,
issue_number: context.issue.number,
body: `${{ steps.validation.outputs.message }}`
});
core.setFailed('Validation failed - see PR comments for details');
140 changes: 140 additions & 0 deletions scripts/check-feed-urls.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,140 @@
import sys
import json
import argparse
import subprocess
from pathlib import Path
from typing import Dict, Any, List, Optional, Union, TypedDict, Literal
import logging
import string

logger = logging.getLogger('dmfr_validator')

def setup_logging() -> None:
"""Configure the logger"""
handler = logging.StreamHandler(sys.stdout)
handler.setFormatter(logging.Formatter('%(message)s'))
logger.addHandler(handler)
logger.setLevel(logging.INFO)

def main() -> None:
parser: argparse.ArgumentParser = argparse.ArgumentParser(
description='Validate DMFR feed URLs using transitland'
)
parser.add_argument('file_path', type=Path, help='Path to the DMFR JSON file')
args: argparse.Namespace = parser.parse_args()

if not args.file_path.exists():
logger.error(
f"File Not Found",
extra={'code_block': f"Error: File not found: {args.file_path}"}
)
sys.exit(1)

is_valid: bool = process_dmfr(args.file_path)
sys.exit(0 if is_valid else 1)

class FeedUrls(TypedDict, total=False):
"""TypedDict for the urls field in a feed"""
static_current: str
static_historic: List[str]
static_planned: List[str]
static_hypothetical: List[str]
realtime_vehicle_positions: str
realtime_trip_updates: str
realtime_alerts: str
gbfs_auto_discovery: str
mds_provider: str

class Authorization(TypedDict):
"""TypedDict for feed authorization"""
type: Literal["header", "basic_auth", "query_param", "path_segment", "replace_url"]
param_name: Optional[str]
info_url: Optional[str]

class Feed(TypedDict):
"""TypedDict for a DMFR feed entry"""
id: str
spec: Literal["gtfs", "gtfs-rt", "gbfs", "mds"]
urls: Union[FeedUrls, List[str]] # List[str] for legacy format
authorization: Optional[Authorization]
name: Optional[str]
description: Optional[str]

class DMFRFile(TypedDict):
"""TypedDict for the root DMFR file structure"""
feeds: List[Feed]
license_spdx_identifier: Optional[str]

def validate_feed_url(url: str, dmfr_path: str) -> bool:
try:
logger.info(f"Validating feed URL: {url}")
result = subprocess.run(
["transitland", "validate", url],
capture_output=True,
text=True,
encoding='utf-8',
check=False
)

if result.returncode != 0:
output = ''.join(c for c in (result.stdout if result.stdout else result.stderr) if c in string.printable)
logger.error(
f"Validation failed for {url} in {dmfr_path}\n" +
output
)
return False

logger.info("Validation successful")
return True

except subprocess.SubprocessError as e:
logger.error(f"Error running transitland validate\n{str(e)}")
return False

def process_dmfr(file_path: Path) -> bool:
"""
Process a DMFR JSON file and validate eligible feeds.
Returns True if all validations pass, False if any fail.
"""
try:
with open(file_path) as f:
data: DMFRFile = json.load(f)

if not isinstance(data, dict) or 'feeds' not in data:
logger.error(f"Invalid DMFR file\nError: {file_path} is not a valid DMFR file")
return False

feeds: List[Feed] = data['feeds']
if not isinstance(feeds, list):
logger.error("Invalid DMFR file\nError: 'feeds' must be a list")
return False

all_valid: bool = True
for feed in feeds:
# Skip feeds that have authentication
if 'authorization' in feed:
logger.info("Skipping feed\nFeed requires authentication")
continue

# Check for static_current URL
urls: Union[FeedUrls, List[str]] = feed.get('urls', {})
if isinstance(urls, dict) and 'static_current' in urls:
if not validate_feed_url(urls['static_current'], str(file_path)):
all_valid = False
elif isinstance(urls, list) and urls:
# Handle legacy format where urls is an array
if not validate_feed_url(urls[0], str(file_path)):
all_valid = False

return all_valid

except json.JSONDecodeError as e:
logger.error(f"JSON Parse Error\nError: Invalid JSON in {file_path}: {str(e)}")
return False
except Exception as e:
logger.error(f"Processing Error\nError processing {file_path}: {str(e)}")
return False

if __name__ == '__main__':
setup_logging()
main()

0 comments on commit 7ec2d21

Please sign in to comment.