SPARQL Monitor Workflow #17
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name: SPARQL Monitor Workflow | |
on: | |
push: | |
branches: [ main ] | |
paths-ignore: | |
- 'monitor_results/**' | |
pull_request: | |
branches: [ main ] | |
paths-ignore: | |
- 'monitor_results/**' | |
schedule: | |
- cron: '0 3 1-7 * 1' # Alle 3 AM del primo lunedì del mese | |
jobs: | |
monitor: | |
runs-on: ubuntu-latest | |
steps: | |
- uses: actions/checkout@v3 | |
- name: Set up Python | |
uses: actions/setup-python@v4 | |
with: | |
python-version: '3.12' | |
- name: Install dependencies | |
run: | | |
python -m pip install --upgrade pip | |
pip install -r requirements.txt | |
pip install beautifulsoup4 | |
- name: Run SPARQL monitor | |
run: | | |
cd monitor | |
python -m main | |
- name: Create website from HTML reports | |
run: | | |
TZ='Europe/Rome' | |
python3 << 'EOF' | |
import os, time | |
from datetime import datetime | |
from bs4 import BeautifulSoup | |
import glob | |
os.environ['TZ'] = 'Europe/Rome' | |
time.tzset() | |
def find_latest_html(base_path, pattern): | |
date_dirs = sorted(os.listdir(base_path)) | |
if not date_dirs: | |
raise Exception(f"No directories found in {base_path}") | |
latest_dir = date_dirs[-1] | |
html_files = glob.glob(os.path.join(base_path, latest_dir, pattern)) | |
if not html_files: | |
raise Exception(f"No HTML files matching {pattern} found in {os.path.join(base_path, latest_dir)}") | |
return sorted(html_files)[-1] | |
def extract_content(html_file): | |
with open(html_file, 'r') as f: | |
html_content = f.read() | |
soup = BeautifulSoup(html_content, 'html.parser') | |
# Estrai la tabella | |
table = str(soup.find('table')) | |
# Estrai le informazioni dal body usando la struttura esatta dell'HTML | |
paragraphs = soup.find_all('p') | |
collection = "" | |
endpoint = "" | |
datetime_info = "" | |
runtime = "" | |
for p in paragraphs: | |
text = p.get_text() | |
if "Collection:" in text: | |
collection = text.replace("Collection:", "").strip() | |
elif "Endpoint:" in text: | |
endpoint = text.replace("Endpoint:", "").strip() | |
elif "Date and Time:" in text: | |
datetime_info = text.replace("Date and Time:", "").strip() | |
elif "Total Running Time:" in text: | |
runtime = text.replace("Total Running Time:", "").strip() | |
info = f""" | |
<p><strong>Collection:</strong> {collection}</p> | |
<p><strong>Endpoint:</strong> {endpoint}</p> | |
<p><strong>Date and Time:</strong> {datetime_info}</p> | |
<p><strong>Total Running Time:</strong> {runtime}</p> | |
""" | |
return info, table | |
# Trova i file HTML più recenti | |
meta_file = find_latest_html('monitor_results/meta_reports', 'meta_monitor_vis*.html') | |
index_file = find_latest_html('monitor_results/index_reports', 'index_monitor_vis*.html') | |
print(f"Found meta file: {meta_file}") | |
print(f"Found index file: {index_file}") | |
# Estrai i contenuti | |
meta_info, meta_table = extract_content(meta_file) | |
index_info, index_table = extract_content(index_file) | |
html = f"""<!DOCTYPE html> | |
<html lang="en"> | |
<head> | |
<meta charset="UTF-8"> | |
<meta name="viewport" content="width=device-width, initial-scale=1.0"> | |
<title>OpenCitations Data Quality Monitor</title> | |
<style> | |
body {{ | |
font-family: Arial, sans-serif; | |
margin: 0; | |
padding: 20px; | |
background-color: #f5f5f5; | |
}} | |
.container {{ | |
max-width: 1200px; | |
margin: 0 auto; | |
background-color: white; | |
padding: 20px; | |
border-radius: 8px; | |
box-shadow: 0 2px 4px rgba(0,0,0,0.1); | |
}} | |
h1 {{ | |
color: #333; | |
text-align: center; | |
}} | |
.section {{ | |
margin: 20px 0; | |
padding: 20px; | |
border: 1px solid #ddd; | |
border-radius: 4px; | |
}} | |
.info-section {{ | |
margin-bottom: 20px; | |
padding: 15px; | |
background-color: #f8f9fa; | |
border-radius: 4px; | |
}} | |
.info-section p {{ | |
margin: 8px 0; | |
color: #444; | |
}} | |
table {{ | |
width: 100%; | |
border-collapse: collapse; | |
margin-top: 10px; | |
}} | |
th, td {{ | |
padding: 12px; | |
text-align: left; | |
border-bottom: 1px solid #ddd; | |
}} | |
th {{ | |
background-color: #f8f8f8; | |
}} | |
.passed {{ | |
color: green; | |
}} | |
.failed {{ | |
color: red; | |
}} | |
.error {{ | |
color: orange; | |
}} | |
.null {{ | |
color: #999; | |
}} | |
.timestamp {{ | |
text-align: center; | |
color: #666; | |
margin-top: 20px; | |
}} | |
</style> | |
</head> | |
<body> | |
<div class="container"> | |
<h1>OpenCitations Data Quality Monitor</h1> | |
<div class="section"> | |
<h2>Meta Monitor Results</h2> | |
<div class="info-section"> | |
{meta_info} | |
</div> | |
{meta_table} | |
</div> | |
<div class="section"> | |
<h2>Index Monitor Results</h2> | |
<div class="info-section"> | |
{index_info} | |
</div> | |
{index_table} | |
</div> | |
<div class="timestamp"> | |
Last updated: {datetime.now().strftime('%d/%m/%Y, %H:%M:%S')} | |
</div> | |
</div> | |
</body> | |
</html> | |
""" | |
with open('index.html', 'w') as f: | |
f.write(html) | |
EOF | |
- name: Commit monitoring results to main | |
run: | | |
git config --local user.email "github-actions[bot]@users.noreply.github.com" | |
git config --local user.name "github-actions[bot]" | |
git add monitor_results/ | |
git commit -m "Add new monitoring results [automated]" || echo "No changes to commit" | |
git push | |
- name: Push index.html to gitwebsite branch | |
run: | | |
# Configura git | |
git config --local user.email "github-actions[bot]@users.noreply.github.com" | |
git config --local user.name "github-actions[bot]" | |
# Salva temporaneamente index.html | |
mv index.html /tmp/index.html | |
# Checkout del branch gitwebsite | |
git fetch origin gitwebsite | |
git checkout gitwebsite | |
# Aggiorna solo index.html | |
mv /tmp/index.html index.html | |
git add index.html | |
git commit -m "Update website [automated]" | |
git push origin gitwebsite | |
- name: Upload monitoring results as artifacts | |
uses: actions/upload-artifact@v3 | |
with: | |
name: monitor-results | |
path: monitor_results/ |