Skip to content

SPARQL Monitor Workflow #15

SPARQL Monitor Workflow

SPARQL Monitor Workflow #15

name: SPARQL Monitor Workflow
on:
push:
branches: [ main ]
paths-ignore:
- 'monitor_results/**'
pull_request:
branches: [ main ]
paths-ignore:
- 'monitor_results/**'
schedule:
- cron: '0 3 1-7 * 1' # Alle 3 AM del primo lunedì del mese
jobs:
monitor:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
- name: Set up Python
uses: actions/setup-python@v4
with:
python-version: '3.12'
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install -r requirements.txt
pip install beautifulsoup4
- name: Run SPARQL monitor
run: |
cd monitor
python -m main
- name: Create website from HTML reports
run: |
TZ='Europe/Rome'
python3 << 'EOF'
import os, time
from datetime import datetime
from bs4 import BeautifulSoup
import glob
os.environ['TZ'] = 'Europe/Rome'
time.tzset()
def find_latest_html(base_path, pattern):
date_dirs = sorted(os.listdir(base_path))
if not date_dirs:
raise Exception(f"No directories found in {base_path}")
latest_dir = date_dirs[-1]
html_files = glob.glob(os.path.join(base_path, latest_dir, pattern))
if not html_files:
raise Exception(f"No HTML files matching {pattern} found in {os.path.join(base_path, latest_dir)}")
return sorted(html_files)[-1]
def extract_content(html_file):
with open(html_file, 'r') as f:
html_content = f.read()
soup = BeautifulSoup(html_content, 'html.parser')
# Estrai la tabella
table = str(soup.find('table'))
# Estrai le informazioni dal body usando la struttura esatta dell'HTML
paragraphs = soup.find_all('p')
collection = ""
endpoint = ""
datetime_info = ""
runtime = ""
for p in paragraphs:
text = p.get_text()
if "Collection:" in text:
collection = text.replace("Collection:", "").strip()
elif "Endpoint:" in text:
endpoint = text.replace("Endpoint:", "").strip()
elif "Date and Time:" in text:
datetime_info = text.replace("Date and Time:", "").strip()
elif "Total Running Time:" in text:
runtime = text.replace("Total Running Time:", "").strip()
info = f"""
<p><strong>Collection:</strong> {collection}</p>
<p><strong>Endpoint:</strong> {endpoint}</p>
<p><strong>Date and Time:</strong> {datetime_info}</p>
<p><strong>Total Running Time:</strong> {runtime}</p>
"""
return info, table
# Trova i file HTML più recenti
meta_file = find_latest_html('monitor_results/meta_reports', 'meta_monitor_vis*.html')
index_file = find_latest_html('monitor_results/index_reports', 'index_monitor_vis*.html')
print(f"Found meta file: {meta_file}")
print(f"Found index file: {index_file}")
# Estrai i contenuti
meta_info, meta_table = extract_content(meta_file)
index_info, index_table = extract_content(index_file)
html = f"""<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>OpenCitations Data Quality Monitor</title>
<style>
body {{
font-family: Arial, sans-serif;
margin: 0;
padding: 20px;
background-color: #f5f5f5;
}}
.container {{
max-width: 1200px;
margin: 0 auto;
background-color: white;
padding: 20px;
border-radius: 8px;
box-shadow: 0 2px 4px rgba(0,0,0,0.1);
}}
h1 {{
color: #333;
text-align: center;
}}
.section {{
margin: 20px 0;
padding: 20px;
border: 1px solid #ddd;
border-radius: 4px;
}}
.info-section {{
margin-bottom: 20px;
padding: 15px;
background-color: #f8f9fa;
border-radius: 4px;
}}
.info-section p {{
margin: 8px 0;
color: #444;
}}
table {{
width: 100%;
border-collapse: collapse;
margin-top: 10px;
}}
th, td {{
padding: 12px;
text-align: left;
border-bottom: 1px solid #ddd;
}}
th {{
background-color: #f8f8f8;
}}
.passed {{
color: green;
}}
.failed {{
color: red;
}}
.error {{
color: orange;
}}
.null {{
color: #999;
}}
.timestamp {{
text-align: center;
color: #666;
margin-top: 20px;
}}
</style>
</head>
<body>
<div class="container">
<h1>OpenCitations Data Quality Monitor</h1>
<div class="section">
<h2>Meta Monitor Results</h2>
<div class="info-section">
{meta_info}
</div>
{meta_table}
</div>
<div class="section">
<h2>Index Monitor Results</h2>
<div class="info-section">
{index_info}
</div>
{index_table}
</div>
<div class="timestamp">
Last updated: {datetime.now().strftime('%d/%m/%Y, %H:%M:%S')}
</div>
</div>
</body>
</html>
"""
with open('index.html', 'w') as f:
f.write(html)
EOF
- name: Commit monitoring results to main
run: |
git config --local user.email "github-actions[bot]@users.noreply.github.com"
git config --local user.name "github-actions[bot]"
git add monitor_results/
git commit -m "Add new monitoring results [automated]" || echo "No changes to commit"
git push
- name: Push index.html to gitwebsite branch
run: |
# Configura git
git config --local user.email "github-actions[bot]@users.noreply.github.com"
git config --local user.name "github-actions[bot]"
# Salva temporaneamente index.html
mv index.html /tmp/index.html
# Checkout del branch gitwebsite
git fetch origin gitwebsite
git checkout gitwebsite
# Aggiorna solo index.html
mv /tmp/index.html index.html
git add index.html
git commit -m "Update website [automated]"
git push origin gitwebsite
- name: Upload monitoring results as artifacts
uses: actions/upload-artifact@v3
with:
name: monitor-results
path: monitor_results/