diff --git a/README.md b/README.md index f838ade..8874243 100644 --- a/README.md +++ b/README.md @@ -8,9 +8,9 @@ Celery Exporter is a Prometheus metrics exporter for Celery 4, written in python Here the list of exposed metrics: * `celery_tasks_total` exposes the number of tasks currently known to the queue - labeled by `name`, `state` and `namespace`. + labeled by `name`, `state`, `queue` and `namespace`. * `celery_tasks_runtime_seconds` tracks the number of seconds tasks take - until completed as histogram labeled by `name` and `namespace` + until completed as histogram labeled by `name`, `queue` and `namespace` * `celery_tasks_latency_seconds` exposes a histogram of task latency, i.e. the time until tasks are picked up by a worker * `celery_workers` exposes the number of currently probably alive workers diff --git a/celery_exporter/state.py b/celery_exporter/state.py index 316ce5f..fa0e58d 100644 --- a/celery_exporter/state.py +++ b/celery_exporter/state.py @@ -19,6 +19,15 @@ def __init__(self, max_tasks_in_memory=10000): self._queue_by_task = {} self._mutex = threading.Lock() + @classmethod + def _gen_wildcards(self, name): + chunked = name.split(".") + res = [name] + for elem in reversed(chunked): + chunked.pop() + res.append(".".join(chunked + ["*"])) + return res + @classmethod def get_config(self, app): res = dict() @@ -32,12 +41,17 @@ def get_config(self, app): for conf in confs.values(): default = conf.get("task_default_queue", CELERY_DEFAULT_QUEUE) if task_name in res and res[task_name] != default: - continue + break - try: + task_wildcard_names = self._gen_wildcards(task_name) + if "task_routes" in conf: routes = conf["task_routes"] - res[task_name] = routes[task_name]["queue"] - except KeyError: + res[task_name] = default + for i in task_wildcard_names: + if i in routes and "queue" in routes[i]: + res[task_name] = routes[i]["queue"] + break + else: res[task_name] = default return res diff --git a/dashboard/celery.json b/dashboard/celery.json new file mode 100644 index 0000000..fb4ba54 --- /dev/null +++ b/dashboard/celery.json @@ -0,0 +1,760 @@ +{ + "__inputs": [ + { + "name": "DS_PROMETHEUS", + "label": "Prometheus", + "description": "", + "type": "datasource", + "pluginId": "prometheus", + "pluginName": "Prometheus" + } + ], + "__requires": [ + { + "type": "panel", + "id": "graph", + "name": "Graph", + "version": "" + }, + { + "type": "panel", + "id": "heatmap", + "name": "Heatmap", + "version": "" + }, + { + "type": "datasource", + "id": "prometheus", + "name": "Prometheus", + "version": "1.0.0" + }, + { + "type": "panel", + "id": "singlestat", + "name": "Singlestat", + "version": "" + } + ], + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": "-- Grafana --", + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "type": "dashboard" + } + ] + }, + "editable": true, + "gnetId": null, + "graphTooltip": 0, + "id": null, + "iteration": 1554977181871, + "links": [], + "panels": [ + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 0 + }, + "id": 17, + "panels": [], + "title": "Celery Summary", + "type": "row" + }, + { + "aliasColors": {}, + "bars": true, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PROMETHEUS}", + "decimals": 5, + "fill": 1, + "gridPos": { + "h": 7, + "w": 24, + "x": 0, + "y": 1 + }, + "id": 32, + "interval": "", + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "hideEmpty": false, + "hideZero": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "total": false, + "values": false + }, + "lines": false, + "linewidth": 1, + "links": [], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "SUCCESS", + "color": "#629e51", + "zindex": -3 + }, + { + "alias": "FAILURE", + "color": "#bf1b00" + }, + { + "alias": "REVOKED", + "color": "#bf1b00" + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum( rate(celery_tasks_total{namespace=~\"^$Namespace$\",name=~\"^$Name$\",state=~\"^$State$\"}[$Interval]) ) by (state)", + "format": "time_series", + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{state}}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Celery Events by State rate (average $Interval)", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "decimals": 5, + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "decimals": 4, + "fill": 1, + "gridPos": { + "h": 9, + "w": 24, + "x": 0, + "y": 8 + }, + "id": 7, + "legend": { + "alignAsTable": true, + "avg": false, + "current": false, + "hideEmpty": true, + "hideZero": true, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sideWidth": 350, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum( rate( celery_tasks_total{namespace=~\"^$Namespace$\",name=~\"^$Name$\",state=~\"^$State$\"}[$Interval] ) ) by (name,state)", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{name}}/{{state}}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Celery Events by Tasks rate (average $Interval)", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "decimals": null, + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "cards": { + "cardPadding": null, + "cardRound": 1 + }, + "color": { + "cardColor": "#b4ff00", + "colorScale": "sqrt", + "colorScheme": "interpolateViridis", + "exponent": 0.5, + "max": null, + "min": null, + "mode": "spectrum" + }, + "dataFormat": "tsbuckets", + "gridPos": { + "h": 9, + "w": 24, + "x": 0, + "y": 17 + }, + "heatmap": {}, + "hideZeroBuckets": false, + "highlightCards": true, + "id": 4, + "legend": { + "show": true + }, + "links": [], + "reverseYBuckets": false, + "targets": [ + { + "expr": "sum ( rate( celery_tasks_runtime_seconds_bucket{namespace=~\"^$Namespace$\",name=~\"^$Name$\"}[$Interval] ) ) by (le)", + "format": "heatmap", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{le}}", + "refId": "A" + } + ], + "title": "Celery Tasks runtime rate (average $Interval)", + "tooltip": { + "show": true, + "showHistogram": false + }, + "tooltipDecimals": 2, + "type": "heatmap", + "xAxis": { + "show": true + }, + "xBucketNumber": null, + "xBucketSize": null, + "yAxis": { + "decimals": 0, + "format": "s", + "logBase": 1, + "max": null, + "min": null, + "show": true, + "splitFactor": null + }, + "yBucketBound": "auto", + "yBucketNumber": null, + "yBucketSize": null + }, + { + "cards": { + "cardPadding": null, + "cardRound": null + }, + "color": { + "cardColor": "#b4ff00", + "colorScale": "sqrt", + "colorScheme": "interpolatePurples", + "exponent": 0.5, + "mode": "spectrum" + }, + "dataFormat": "tsbuckets", + "gridPos": { + "h": 9, + "w": 24, + "x": 0, + "y": 26 + }, + "heatmap": {}, + "hideZeroBuckets": false, + "highlightCards": true, + "id": 36, + "legend": { + "show": false + }, + "links": [], + "reverseYBuckets": false, + "targets": [ + { + "expr": "sum ( rate( celery_tasks_latency_seconds_bucket{namespace=~\"^$Namespace$\",name=~\"^$Name$\"}[$Interval] ) ) by (le)", + "format": "heatmap", + "interval": "15s", + "intervalFactor": 1, + "legendFormat": "{{le}}", + "refId": "A" + } + ], + "title": "Celery Task latency rate (by $Interval)", + "tooltip": { + "show": true, + "showHistogram": false + }, + "type": "heatmap", + "xAxis": { + "show": true + }, + "xBucketNumber": null, + "xBucketSize": null, + "yAxis": { + "decimals": 0, + "format": "s", + "logBase": 1, + "max": null, + "min": null, + "show": true, + "splitFactor": null + }, + "yBucketBound": "auto", + "yBucketNumber": null, + "yBucketSize": null + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "#299c46", + "rgba(237, 129, 40, 0.89)", + "#d44a3a" + ], + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 9, + "w": 24, + "x": 0, + "y": 35 + }, + "id": 34, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": true, + "lineColor": "rgb(31, 120, 193)", + "show": true + }, + "tableColumn": "", + "targets": [ + { + "expr": "sum(avg_over_time(celery_workers{namespace=~\"^$Namespace$\"}[$Interval]))", + "format": "time_series", + "intervalFactor": 1, + "refId": "A" + } + ], + "thresholds": "", + "title": "Current Worker Count", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "avg" + }, + { + "collapsed": true, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 44 + }, + "id": 25, + "panels": [ + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "fill": 1, + "gridPos": { + "h": 7, + "w": 24, + "x": 0, + "y": 27 + }, + "id": 9, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "hideEmpty": true, + "hideZero": true, + "max": false, + "min": false, + "rightSide": false, + "show": false, + "sideWidth": 350, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "maxPerRow": 4, + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": "State", + "repeatDirection": "h", + "scopedVars": { + "State": { + "selected": true, + "text": "SUCCESS", + "value": "SUCCESS" + } + }, + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "topk(15, sum( irate( celery_tasks_total{namespace=~\"^$Namespace$\",name=~\"^$Name$\",state=~\"$State\"}[1m] ) ) by (name))", + "format": "time_series", + "interval": "$Interval", + "intervalFactor": 1, + "legendFormat": "{{name}}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Top 15 Tasks $State", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "decimals": null, + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + } + ], + "title": "Top Tasks", + "type": "row" + } + ], + "refresh": false, + "schemaVersion": 18, + "style": "dark", + "tags": [ + "prometheus", + "celery" + ], + "templating": { + "list": [ + { + "allValue": null, + "current": {}, + "datasource": "${DS_PROMETHEUS}", + "definition": "label_values(celery_tasks_total,namespace)", + "hide": 0, + "includeAll": false, + "label": null, + "multi": false, + "name": "Namespace", + "options": [], + "query": "label_values(celery_tasks_total,namespace)", + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "tagValuesQuery": "", + "tags": [], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": ".*", + "current": {}, + "datasource": "${DS_PROMETHEUS}", + "definition": "label_values(celery_tasks_total,name)", + "hide": 0, + "includeAll": true, + "label": null, + "multi": true, + "name": "Name", + "options": [], + "query": "label_values(celery_tasks_total,name)", + "refresh": 2, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "tagValuesQuery": "label_values(celery_tasks_total{queue=\"$tag\"}, name)", + "tags": [ + "$__all" + ], + "tagsQuery": "label_values(celery_tasks_total,queue)", + "type": "query", + "useTags": true + }, + { + "allValue": ".*", + "current": {}, + "datasource": "${DS_PROMETHEUS}", + "definition": "label_values(celery_tasks_total,state)", + "hide": 0, + "includeAll": true, + "label": null, + "multi": true, + "name": "State", + "options": [], + "query": "label_values(celery_tasks_total,state)", + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "tagValuesQuery": "", + "tags": [], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "auto": true, + "auto_count": 200, + "auto_min": "30s", + "current": { + "text": "auto", + "value": "$__auto_interval_Interval" + }, + "hide": 0, + "label": null, + "name": "Interval", + "options": [ + { + "selected": true, + "text": "auto", + "value": "$__auto_interval_Interval" + }, + { + "selected": false, + "text": "30s", + "value": "30s" + }, + { + "selected": false, + "text": "1m", + "value": "1m" + }, + { + "selected": false, + "text": "5m", + "value": "5m" + }, + { + "selected": false, + "text": "15m", + "value": "15m" + }, + { + "selected": false, + "text": "30m", + "value": "30m" + }, + { + "selected": false, + "text": "1h", + "value": "1h" + } + ], + "query": "30s,1m,5m,15m,30m,1h", + "refresh": 2, + "skipUrlSync": false, + "type": "interval" + } + ] + }, + "time": { + "from": "now-5m", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "", + "title": "Celery Monitoring", + "uid": "wHQ6LNgZz", + "version": 1 +} diff --git a/setup.py b/setup.py index 742cf81..7c02b5b 100644 --- a/setup.py +++ b/setup.py @@ -11,7 +11,7 @@ description="Prometheus metrics exporter for Celery", long_description=long_description, long_description_content_type="text/markdown", - version="1.3.1", + version="1.3.2", author="Fabio Todaro", license="MIT", author_email="fbregist@gmail.com", diff --git a/test/test_unit.py b/test/test_unit.py index 8c56b09..87a24e8 100644 --- a/test/test_unit.py +++ b/test/test_unit.py @@ -16,7 +16,7 @@ setup_metrics, ) -from celery_exporter.state import CELERY_MISSING_DATA +from celery_exporter.state import CELERY_MISSING_DATA, CeleryState from celery_test_utils import BaseTest, get_celery_app @@ -33,6 +33,7 @@ def setUp(self): "celery@adsqas78e891": { "task_routes": {"my_task": {}, "trial": {"queue": "deadbeef"}} }, + "celery@12311847jsa2": {}, } registered.return_value = {"celery@d6f95e9e24fc": [self.task, "trial"]} setup_metrics(self.app, self.namespace) # reset metrics @@ -290,6 +291,22 @@ def test_enable_events(self): e.enable_events() mock_enable_events.assert_called_once_with() + def test_gen_wildcards(self): + strings = { + "aaa.bbb.ccc.ddd": [ + "aaa.bbb.ccc.ddd", + "aaa.bbb.ccc.*", + "aaa.bbb.*", + "aaa.*", + "*", + ], + "aaa.bbb": ["aaa.bbb", "aaa.*", "*"], + "aaa": ["aaa", "*"], + } + for case, expectation in strings.items(): + result = CeleryState._gen_wildcards(case) + assert result == expectation + def _assert_task_states(self, states, cnt): for state in states: task_by_name_label = dict(