Skip to content

check_broken_links

check_broken_links #98

name: check_broken_links
on:
schedule:
- cron: 30 17 * * *
push:
paths:
- '**/*.ipynb'
pull_request:
paths:
- '**/*.ipynb'
# Trigger the workflow on manual dispatch
workflow_dispatch:
permissions:
contents: read
pages: write
id-token: write
defaults:
run:
shell: bash
jobs:
get-notebooks:
runs-on: ubuntu-22.04
outputs:
notebook_list: ${{ steps.list_all_notebooks.outputs.notebook_list }}
steps:
- uses: actions/checkout@v4
- name: Get all notebooks
working-directory: ./books
id: list_all_notebooks
run: |
all_notebooks=($(find . -type f -name "*.ipynb"))
notebook_list='['
for NOTEBOOK in "${all_notebooks[@]}"; do
html_notebook="${NOTEBOOK%.ipynb}.html"
echo "NOTEBOOK=${html_notebook}"
notebook_list+="\"${html_notebook}\","
done
notebook_list=$(sed '$s/,$//' <<< $notebook_list)
notebook_list+=']'
echo "notebook_list=${notebook_list}"
echo "notebook_list=${notebook_list}" >> $GITHUB_OUTPUT
check-links:
needs: [ get-notebooks ]
if: ${{ needs.get-notebooks.outputs.notebook_list != '[]' }}
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
with:
ref: gh-pages
- name: lychee Link Checker
run: |
curl -sSf 'https://sh.rustup.rs' | sh -s -- -y
sudo apt install gcc pkg-config libc6-dev libssl-dev -y
cargo install lychee
- name: Check broken links
run: |
notebook_list='${{ needs.get-notebooks.outputs.notebook_list }}'
echo "notebook_list=${notebook_list}"
for notebook in $(echo $notebook_list | jq -r '.[]'); do
echo "Checking links in $notebook"
lychee https://www.neurodesk.org/example-notebooks/${notebook} --remap "https://neurodesk.github.io/example-notebooks/intro.html https://www.neurodesk.org/example-notebooks" --accept 403
done