Skip to content

check_broken_links

check_broken_links #68

name: check_broken_links
on:
schedule:
- cron: 30 17 * * *
push:
paths:
- '**/*.ipynb'
pull_request:
paths:
- '**/*.ipynb'
# Trigger the workflow on manual dispatch
workflow_dispatch:
permissions:
contents: read
pages: write
id-token: write
defaults:
run:
shell: bash
jobs:
get-notebooks:
runs-on: ubuntu-22.04
outputs:
notebook_list: ${{ steps.list_all_notebooks.outputs.notebook_list }}
steps:
- uses: actions/checkout@v4
- name: Get all notebooks
working-directory: ./books
id: list_all_notebooks
run: |
all_notebooks=($(find . -type f -name "*.ipynb"))
notebook_list='['
for NOTEBOOK in "${all_notebooks[@]}"; do
echo "NOTEBOOK=${NOTEBOOK}"
notebook_list+="\"${NOTEBOOK}\","
done
notebook_list=$(sed '$s/,$//' <<< $notebook_list)
notebook_list+=']'
echo "notebook_list=${notebook_list}"
echo "notebook_list=${notebook_list}" >> $GITHUB_OUTPUT
check-links:
needs: [ get-notebooks ]
if: ${{ needs.get-notebooks.outputs.notebook_list != '[]' }}
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@master
- name: Set up Python
uses: actions/setup-python@v4
with:
python-version: '3.9'
- name: Install Python dependencies
run: |
python -m pip install pytest-check-links
- name: Check broken links
working-directory: ./books
run: |
notebook_list='${{ needs.get-notebooks.outputs.notebook_list }}'
echo "notebook_list=${notebook_list}"
for notebook in $(echo $notebook_list | jq -r '.[]'); do
echo "Checking links in $notebook"
pytest --check-links --check-links-ignore "https://doi.org/.*/" --check-links-ignore "https://www.sciencedirect.com/.*" $notebook
done