From 37fcda381783db737cfcd75c8935d378b0fd3ce4 Mon Sep 17 00:00:00 2001 From: m0duspwnens Date: Wed, 21 Jul 2021 14:30:41 -0400 Subject: [PATCH] add cpu row and panels to overview dashboard --- salt/grafana/defaults.yaml | 63 +++++- .../panels/cpu_usage_current_graph.json.jinja | 206 +++++++++--------- ...a => cpu_usage_tasks_all_graph.json.jinja} | 8 +- .../cpu_usage_tasks_blocked_graph.json.jinja | 132 +++++++++++ .../cpu_usage_tasks_paging_graph.json.jinja | 132 +++++++++++ .../cpu_usage_tasks_running_graph.json.jinja | 132 +++++++++++ .../cpu_usage_tasks_sleeping_graph.json.jinja | 132 +++++++++++ .../cpu_usage_tasks_stopped_graph.json.jinja | 132 +++++++++++ .../cpu_usage_tasks_unknown_graph.json.jinja | 132 +++++++++++ .../cpu_usage_tasks_zombies_graph.json.jinja | 132 +++++++++++ salt/grafana/panels/io_wait_graph.json.jinja | 161 ++++++++++++++ salt/telegraf/etc/telegraf.conf | 2 + 12 files changed, 1257 insertions(+), 107 deletions(-) rename salt/grafana/panels/{cpu_usage_tasks_graph.json.jinja => cpu_usage_tasks_all_graph.json.jinja} (94%) create mode 100644 salt/grafana/panels/cpu_usage_tasks_blocked_graph.json.jinja create mode 100644 salt/grafana/panels/cpu_usage_tasks_paging_graph.json.jinja create mode 100644 salt/grafana/panels/cpu_usage_tasks_running_graph.json.jinja create mode 100644 salt/grafana/panels/cpu_usage_tasks_sleeping_graph.json.jinja create mode 100644 salt/grafana/panels/cpu_usage_tasks_stopped_graph.json.jinja create mode 100644 salt/grafana/panels/cpu_usage_tasks_unknown_graph.json.jinja create mode 100644 salt/grafana/panels/cpu_usage_tasks_zombies_graph.json.jinja create mode 100644 salt/grafana/panels/io_wait_graph.json.jinja diff --git a/salt/grafana/defaults.yaml b/salt/grafana/defaults.yaml index 9adaa104e..97e04f63d 100644 --- a/salt/grafana/defaults.yaml +++ b/salt/grafana/defaults.yaml @@ -48,6 +48,57 @@ grafana: y: 1 h: 8 w: 24 + io_wait_graph: + gridPos: + x: 0 + y: 9 + h: 8 + w: 24 + cpu_usage_tasks_running_graph: + gridPos: + x: 0 + y: 17 + h: 8 + w: 24 + cpu_usage_tasks_zombies_graph: + gridPos: + x: 0 + y: 25 + h: 8 + w: 12 + cpu_usage_tasks_stopped_graph: + gridPos: + x: 12 + y: 25 + h: 8 + w: 12 + cpu_usage_tasks_sleeping_graph: + gridPos: + x: 0 + y: 33 + h: 8 + w: 12 + cpu_usage_tasks_blocked_graph: + gridPos: + x: 12 + y: 33 + h: 8 + w: 12 + cpu_usage_tasks_paging_graph: + gridPos: + x: 0 + y: 41 + h: 8 + w: 12 + cpu_usage_tasks_unknown_graph: + gridPos: + x: 12 + y: 41 + h: 8 + w: 12 + + + standalone: templating: @@ -162,7 +213,7 @@ grafana: y: 9 h: 1 w: 24 - cpu_usage_tasks_graph: + cpu_usage_tasks_all_graph: gridPos: x: 0 y: 10 @@ -566,7 +617,7 @@ grafana: y: 9 h: 1 w: 24 - cpu_usage_tasks_graph: + cpu_usage_tasks_all_graph: gridPos: x: 0 y: 10 @@ -895,7 +946,7 @@ grafana: y: 9 h: 1 w: 24 - cpu_usage_tasks_graph: + cpu_usage_tasks_all_graph: gridPos: x: 0 y: 10 @@ -1242,7 +1293,7 @@ grafana: y: 9 h: 1 w: 24 - cpu_usage_tasks_graph: + cpu_usage_tasks_all_graph: gridPos: x: 0 y: 10 @@ -1575,7 +1626,7 @@ grafana: y: 9 h: 1 w: 24 - cpu_usage_tasks_graph: + cpu_usage_tasks_all_graph: gridPos: x: 0 y: 10 @@ -1890,7 +1941,7 @@ grafana: y: 9 h: 1 w: 24 - cpu_usage_tasks_graph: + cpu_usage_tasks_all_graph: gridPos: x: 0 y: 10 diff --git a/salt/grafana/panels/cpu_usage_current_graph.json.jinja b/salt/grafana/panels/cpu_usage_current_graph.json.jinja index 34a29172e..555d23221 100644 --- a/salt/grafana/panels/cpu_usage_current_graph.json.jinja +++ b/salt/grafana/panels/cpu_usage_current_graph.json.jinja @@ -1,6 +1,14 @@ { - "type": "graph", - "title": "CPU Usage", + "aliasColors": {}, + "dashLength": 10, + "datasource": "InfluxDB", + "decimals": 1, + "fieldConfig": { + "defaults": { + "unit": "percent" + }, + "overrides": [] + }, "gridPos": { "x": {{ PANELS.cpu_usage_current_graph.gridPos.x }}, "y": {{ PANELS.cpu_usage_current_graph.gridPos.y }}, @@ -8,46 +16,67 @@ "h": {{ PANELS.cpu_usage_current_graph.gridPos.h }} }, "id": 69001, + "interval": "30", + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": true, + "min": false, + "rightSide": true, + "show": true, + "sort": "current", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "maxDataPoints": 750, + "nullPointMode": "connected", + "options": { + "alertThreshold": true + }, + "pluginVersion": "7.5.4", + "pointradius": 2, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, "targets": [ { - "refId": "A", - "queryType": "randomWalk", - "policy": "default", - "resultFormat": "time_series", - "orderByTime": "ASC", - "tags": [ - { - "key": "host", - "operator": "=~", - "value": "/^$servername$/" - }, - { - "condition": "AND", - "key": "cpu", - "operator": "=", - "value": "cpu-total" - } - ], + "alias": "$tag_host $tag_role", "groupBy": [ { - "type": "time", "params": [ "$__interval" - ] + ], + "type": "time" + }, + { + "params": [ + "host" + ], + "type": "tag" }, { "type": "tag", "params": [ - "host" + "role" ] }, { - "type": "fill", "params": [ "null" - ] + ], + "type": "fill" } ], + "measurement": "cpu", + "orderByTime": "ASC", + "policy": "default", + "queryType": "randomWalk", + "refId": "A", + "resultFormat": "time_series", "select": [ [ { @@ -68,93 +97,76 @@ } ] ], - "measurement": "cpu", - "alias": "$tag_host" + "tags": [ + { + "key": "host", + "operator": "=~", + "value": "/^$servername$/" + }, + { + "condition": "AND", + "key": "cpu", + "operator": "=", + "value": "cpu-total" + }, + { + "condition": "AND", + "key": "role", + "operator": "=~", + "value": "/^$role$/" + } + ] } ], - "options": { - "alertThreshold": true + "thresholds": [], + "timeRegions": [], + "title": "CPU Usage", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" }, - "datasource": "InfluxDB", - "fieldConfig": { - "defaults": { - "unit": "percent" - }, - "overrides": [] - }, - "pluginVersion": "7.5.4", - "renderer": "flot", - "yaxes": [ - { - "label": null, - "show": true, - "logBase": 1, - "min": "0", - "max": null, - "format": "percent", - "$$hashKey": "object:933" - }, - { - "label": null, - "show": true, - "logBase": 1, - "min": null, - "max": null, - "format": "short", - "$$hashKey": "object:934" - } - ], + "type": "graph", "xaxis": { - "show": true, + "buckets": null, "mode": "time", "name": null, - "values": [], - "buckets": null + "show": true, + "values": [] }, + "yaxes": [ + { + "$$hashKey": "object:933", + "format": "percent", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "$$hashKey": "object:934", + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], "yaxis": { "align": false, "alignLevel": null }, - "lines": true, - "linewidth": 1, - "dashLength": 10, - "spaceLength": 10, - "pointradius": 2, - "legend": { - "show": true, - "values": true, - "min": false, - "max": true, - "current": true, - "sort": "current", - "sortDesc": true, - "total": false, - "avg": true, - "alignAsTable": true, - "rightSide": true - }, - "nullPointMode": "connected", - "tooltip": { - "value_type": "individual", - "shared": true, - "sort": 2 - }, - "aliasColors": {}, - "seriesOverrides": [], - "thresholds": [], - "timeRegions": [], - "decimals": 1, + "bars": false, + "dashes": false, "fill": 0, "fillGradient": 0, - "dashes": false, "hiddenSeries": false, - "points": false, - "bars": false, - "stack": false, "percentage": false, + "points": false, + "stack": false, "steppedLine": false, "timeFrom": null, - "timeShift": null, - "interval": "30", - "maxDataPoints": 750 + "timeShift": null } diff --git a/salt/grafana/panels/cpu_usage_tasks_graph.json.jinja b/salt/grafana/panels/cpu_usage_tasks_all_graph.json.jinja similarity index 94% rename from salt/grafana/panels/cpu_usage_tasks_graph.json.jinja rename to salt/grafana/panels/cpu_usage_tasks_all_graph.json.jinja index 9f51d6648..e865f446f 100644 --- a/salt/grafana/panels/cpu_usage_tasks_graph.json.jinja +++ b/salt/grafana/panels/cpu_usage_tasks_all_graph.json.jinja @@ -8,10 +8,10 @@ }, "fill": 1, "gridPos": { - "x": {{ PANELS.cpu_usage_tasks_graph.gridPos.x }}, - "y": {{ PANELS.cpu_usage_tasks_graph.gridPos.y }}, - "w": {{ PANELS.cpu_usage_tasks_graph.gridPos.w }}, - "h": {{ PANELS.cpu_usage_tasks_graph.gridPos.h }} + "x": {{ PANELS.cpu_usage_tasks_all_graph.gridPos.x }}, + "y": {{ PANELS.cpu_usage_tasks_all_graph.gridPos.y }}, + "w": {{ PANELS.cpu_usage_tasks_all_graph.gridPos.w }}, + "h": {{ PANELS.cpu_usage_tasks_all_graph.gridPos.h }} }, "id": 61871, "legend": { diff --git a/salt/grafana/panels/cpu_usage_tasks_blocked_graph.json.jinja b/salt/grafana/panels/cpu_usage_tasks_blocked_graph.json.jinja new file mode 100644 index 000000000..7d9020445 --- /dev/null +++ b/salt/grafana/panels/cpu_usage_tasks_blocked_graph.json.jinja @@ -0,0 +1,132 @@ +{ + "type": "graph", + "title": "CPU Tasks Blocked", + "gridPos": { + "x": {{ PANELS.cpu_usage_tasks_blocked_graph.gridPos.x }}, + "y": {{ PANELS.cpu_usage_tasks_blocked_graph.gridPos.y }}, + "w": {{ PANELS.cpu_usage_tasks_blocked_graph.gridPos.w }}, + "h": {{ PANELS.cpu_usage_tasks_blocked_graph.gridPos.h }} + }, + "id": 69005, + "targets": [ + { + "refId": "A", + "queryType": "randomWalk", + "policy": "default", + "resultFormat": "time_series", + "orderByTime": "ASC", + "tags": [], + "groupBy": [ + { + "type": "time", + "params": [ + "$__interval" + ] + }, + { + "type": "fill", + "params": [ + "null" + ] + } + ], + "select": [ + [ + { + "type": "field", + "params": [ + "value" + ] + }, + { + "type": "mean", + "params": [] + } + ] + ], + "query": "SELECT mean(blocked) as blocked FROM \"processes\" WHERE host =~ /$servername$/ AND $timeFilter GROUP BY time($__interval), host, role ORDER BY asc", + "rawQuery": true, + "alias": "$tag_host $tag_role: $col" + } + ], + "options": { + "alertThreshold": true + }, + "datasource": "InfluxDB", + "fieldConfig": { + "defaults": {}, + "overrides": [] + }, + "pluginVersion": "7.5.4", + "renderer": "flot", + "yaxes": [ + { + "label": null, + "show": true, + "logBase": 1, + "min": null, + "max": null, + "format": "short", + "$$hashKey": "object:412" + }, + { + "label": null, + "show": true, + "logBase": 1, + "min": null, + "max": null, + "format": "short", + "$$hashKey": "object:413" + } + ], + "xaxis": { + "show": true, + "mode": "time", + "name": null, + "values": [], + "buckets": null + }, + "yaxis": { + "align": false, + "alignLevel": null + }, + "lines": true, + "linewidth": 1, + "dashLength": 10, + "spaceLength": 10, + "pointradius": 2, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": true, + "min": false, + "rightSide": true, + "show": true, + "sort": "current", + "sortDesc": true, + "total": false, + "values": true + }, + "nullPointMode": "connected", + "tooltip": { + "value_type": "individual", + "shared": true, + "sort": 2 + }, + "aliasColors": {}, + "seriesOverrides": [], + "thresholds": [], + "timeRegions": [], + "fill": 0, + "fillGradient": 0, + "dashes": false, + "hiddenSeries": false, + "points": false, + "bars": false, + "stack": false, + "percentage": false, + "steppedLine": false, + "timeFrom": null, + "timeShift": null +} diff --git a/salt/grafana/panels/cpu_usage_tasks_paging_graph.json.jinja b/salt/grafana/panels/cpu_usage_tasks_paging_graph.json.jinja new file mode 100644 index 000000000..ef963eb47 --- /dev/null +++ b/salt/grafana/panels/cpu_usage_tasks_paging_graph.json.jinja @@ -0,0 +1,132 @@ +{ + "type": "graph", + "title": "CPU Tasks Paging", + "gridPos": { + "x": {{ PANELS.cpu_usage_tasks_paging_graph.gridPos.x }}, + "y": {{ PANELS.cpu_usage_tasks_paging_graph.gridPos.y }}, + "w": {{ PANELS.cpu_usage_tasks_paging_graph.gridPos.w }}, + "h": {{ PANELS.cpu_usage_tasks_paging_graph.gridPos.h }} + }, + "id": 69008, + "targets": [ + { + "refId": "A", + "queryType": "randomWalk", + "policy": "default", + "resultFormat": "time_series", + "orderByTime": "ASC", + "tags": [], + "groupBy": [ + { + "type": "time", + "params": [ + "$__interval" + ] + }, + { + "type": "fill", + "params": [ + "null" + ] + } + ], + "select": [ + [ + { + "type": "field", + "params": [ + "value" + ] + }, + { + "type": "mean", + "params": [] + } + ] + ], + "query": "SELECT mean(paging) as paging FROM \"processes\" WHERE host =~ /$servername$/ AND $timeFilter GROUP BY time($__interval), host, role ORDER BY asc", + "rawQuery": true, + "alias": "$tag_host $tag_role: $col" + } + ], + "options": { + "alertThreshold": true + }, + "datasource": "InfluxDB", + "fieldConfig": { + "defaults": {}, + "overrides": [] + }, + "pluginVersion": "7.5.4", + "renderer": "flot", + "yaxes": [ + { + "label": null, + "show": true, + "logBase": 1, + "min": null, + "max": null, + "format": "short", + "$$hashKey": "object:412" + }, + { + "label": null, + "show": true, + "logBase": 1, + "min": null, + "max": null, + "format": "short", + "$$hashKey": "object:413" + } + ], + "xaxis": { + "show": true, + "mode": "time", + "name": null, + "values": [], + "buckets": null + }, + "yaxis": { + "align": false, + "alignLevel": null + }, + "lines": true, + "linewidth": 1, + "dashLength": 10, + "spaceLength": 10, + "pointradius": 2, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": true, + "min": false, + "rightSide": true, + "show": true, + "sort": "current", + "sortDesc": true, + "total": false, + "values": true + }, + "nullPointMode": "connected", + "tooltip": { + "value_type": "individual", + "shared": true, + "sort": 2 + }, + "aliasColors": {}, + "seriesOverrides": [], + "thresholds": [], + "timeRegions": [], + "fill": 0, + "fillGradient": 0, + "dashes": false, + "hiddenSeries": false, + "points": false, + "bars": false, + "stack": false, + "percentage": false, + "steppedLine": false, + "timeFrom": null, + "timeShift": null +} diff --git a/salt/grafana/panels/cpu_usage_tasks_running_graph.json.jinja b/salt/grafana/panels/cpu_usage_tasks_running_graph.json.jinja new file mode 100644 index 000000000..fdbc20719 --- /dev/null +++ b/salt/grafana/panels/cpu_usage_tasks_running_graph.json.jinja @@ -0,0 +1,132 @@ +{ + "type": "graph", + "title": "CPU Tasks Running", + "gridPos": { + "x": {{ PANELS.cpu_usage_tasks_running_graph.gridPos.x }}, + "y": {{ PANELS.cpu_usage_tasks_running_graph.gridPos.y }}, + "w": {{ PANELS.cpu_usage_tasks_running_graph.gridPos.w }}, + "h": {{ PANELS.cpu_usage_tasks_running_graph.gridPos.h }} + }, + "id": 69003, + "targets": [ + { + "refId": "A", + "queryType": "randomWalk", + "policy": "default", + "resultFormat": "time_series", + "orderByTime": "ASC", + "tags": [], + "groupBy": [ + { + "type": "time", + "params": [ + "$__interval" + ] + }, + { + "type": "fill", + "params": [ + "null" + ] + } + ], + "select": [ + [ + { + "type": "field", + "params": [ + "value" + ] + }, + { + "type": "mean", + "params": [] + } + ] + ], + "query": "SELECT mean(running) as running FROM \"processes\" WHERE host =~ /$servername$/ AND $timeFilter GROUP BY time($__interval), host, role ORDER BY asc", + "rawQuery": true, + "alias": "$tag_host $tag_role: $col" + } + ], + "options": { + "alertThreshold": true + }, + "datasource": "InfluxDB", + "fieldConfig": { + "defaults": {}, + "overrides": [] + }, + "pluginVersion": "7.5.4", + "renderer": "flot", + "yaxes": [ + { + "label": null, + "show": true, + "logBase": 1, + "min": null, + "max": null, + "format": "short", + "$$hashKey": "object:412" + }, + { + "label": null, + "show": true, + "logBase": 1, + "min": null, + "max": null, + "format": "short", + "$$hashKey": "object:413" + } + ], + "xaxis": { + "show": true, + "mode": "time", + "name": null, + "values": [], + "buckets": null + }, + "yaxis": { + "align": false, + "alignLevel": null + }, + "lines": true, + "linewidth": 1, + "dashLength": 10, + "spaceLength": 10, + "pointradius": 2, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": true, + "min": false, + "rightSide": true, + "show": true, + "sort": "current", + "sortDesc": true, + "total": false, + "values": true + }, + "nullPointMode": "connected", + "tooltip": { + "value_type": "individual", + "shared": true, + "sort": 2 + }, + "aliasColors": {}, + "seriesOverrides": [], + "thresholds": [], + "timeRegions": [], + "fill": 0, + "fillGradient": 0, + "dashes": false, + "hiddenSeries": false, + "points": false, + "bars": false, + "stack": false, + "percentage": false, + "steppedLine": false, + "timeFrom": null, + "timeShift": null +} diff --git a/salt/grafana/panels/cpu_usage_tasks_sleeping_graph.json.jinja b/salt/grafana/panels/cpu_usage_tasks_sleeping_graph.json.jinja new file mode 100644 index 000000000..4aadfb2cc --- /dev/null +++ b/salt/grafana/panels/cpu_usage_tasks_sleeping_graph.json.jinja @@ -0,0 +1,132 @@ +{ + "type": "graph", + "title": "CPU Tasks Sleeping", + "gridPos": { + "x": {{ PANELS.cpu_usage_tasks_sleeping_graph.gridPos.x }}, + "y": {{ PANELS.cpu_usage_tasks_sleeping_graph.gridPos.y }}, + "w": {{ PANELS.cpu_usage_tasks_sleeping_graph.gridPos.w }}, + "h": {{ PANELS.cpu_usage_tasks_sleeping_graph.gridPos.h }} + }, + "id": 69006, + "targets": [ + { + "refId": "A", + "queryType": "randomWalk", + "policy": "default", + "resultFormat": "time_series", + "orderByTime": "ASC", + "tags": [], + "groupBy": [ + { + "type": "time", + "params": [ + "$__interval" + ] + }, + { + "type": "fill", + "params": [ + "null" + ] + } + ], + "select": [ + [ + { + "type": "field", + "params": [ + "value" + ] + }, + { + "type": "mean", + "params": [] + } + ] + ], + "query": "SELECT mean(sleeping) as sleeping FROM \"processes\" WHERE host =~ /$servername$/ AND $timeFilter GROUP BY time($__interval), host, role ORDER BY asc", + "rawQuery": true, + "alias": "$tag_host $tag_role: $col" + } + ], + "options": { + "alertThreshold": true + }, + "datasource": "InfluxDB", + "fieldConfig": { + "defaults": {}, + "overrides": [] + }, + "pluginVersion": "7.5.4", + "renderer": "flot", + "yaxes": [ + { + "label": null, + "show": true, + "logBase": 1, + "min": null, + "max": null, + "format": "short", + "$$hashKey": "object:412" + }, + { + "label": null, + "show": true, + "logBase": 1, + "min": null, + "max": null, + "format": "short", + "$$hashKey": "object:413" + } + ], + "xaxis": { + "show": true, + "mode": "time", + "name": null, + "values": [], + "buckets": null + }, + "yaxis": { + "align": false, + "alignLevel": null + }, + "lines": true, + "linewidth": 1, + "dashLength": 10, + "spaceLength": 10, + "pointradius": 2, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": true, + "min": false, + "rightSide": true, + "show": true, + "sort": "current", + "sortDesc": true, + "total": false, + "values": true + }, + "nullPointMode": "connected", + "tooltip": { + "value_type": "individual", + "shared": true, + "sort": 2 + }, + "aliasColors": {}, + "seriesOverrides": [], + "thresholds": [], + "timeRegions": [], + "fill": 0, + "fillGradient": 0, + "dashes": false, + "hiddenSeries": false, + "points": false, + "bars": false, + "stack": false, + "percentage": false, + "steppedLine": false, + "timeFrom": null, + "timeShift": null +} diff --git a/salt/grafana/panels/cpu_usage_tasks_stopped_graph.json.jinja b/salt/grafana/panels/cpu_usage_tasks_stopped_graph.json.jinja new file mode 100644 index 000000000..4905d2e6d --- /dev/null +++ b/salt/grafana/panels/cpu_usage_tasks_stopped_graph.json.jinja @@ -0,0 +1,132 @@ +{ + "type": "graph", + "title": "CPU Tasks Stopped", + "gridPos": { + "x": {{ PANELS.cpu_usage_tasks_stopped_graph.gridPos.x }}, + "y": {{ PANELS.cpu_usage_tasks_stopped_graph.gridPos.y }}, + "w": {{ PANELS.cpu_usage_tasks_stopped_graph.gridPos.w }}, + "h": {{ PANELS.cpu_usage_tasks_stopped_graph.gridPos.h }} + }, + "id": 69007, + "targets": [ + { + "refId": "A", + "queryType": "randomWalk", + "policy": "default", + "resultFormat": "time_series", + "orderByTime": "ASC", + "tags": [], + "groupBy": [ + { + "type": "time", + "params": [ + "$__interval" + ] + }, + { + "type": "fill", + "params": [ + "null" + ] + } + ], + "select": [ + [ + { + "type": "field", + "params": [ + "value" + ] + }, + { + "type": "mean", + "params": [] + } + ] + ], + "query": "SELECT mean(stopped) as stopped FROM \"processes\" WHERE host =~ /$servername$/ AND $timeFilter GROUP BY time($__interval), host, role ORDER BY asc", + "rawQuery": true, + "alias": "$tag_host $tag_role: $col" + } + ], + "options": { + "alertThreshold": true + }, + "datasource": "InfluxDB", + "fieldConfig": { + "defaults": {}, + "overrides": [] + }, + "pluginVersion": "7.5.4", + "renderer": "flot", + "yaxes": [ + { + "label": null, + "show": true, + "logBase": 1, + "min": null, + "max": null, + "format": "short", + "$$hashKey": "object:412" + }, + { + "label": null, + "show": true, + "logBase": 1, + "min": null, + "max": null, + "format": "short", + "$$hashKey": "object:413" + } + ], + "xaxis": { + "show": true, + "mode": "time", + "name": null, + "values": [], + "buckets": null + }, + "yaxis": { + "align": false, + "alignLevel": null + }, + "lines": true, + "linewidth": 1, + "dashLength": 10, + "spaceLength": 10, + "pointradius": 2, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": true, + "min": false, + "rightSide": true, + "show": true, + "sort": "current", + "sortDesc": true, + "total": false, + "values": true + }, + "nullPointMode": "connected", + "tooltip": { + "value_type": "individual", + "shared": true, + "sort": 2 + }, + "aliasColors": {}, + "seriesOverrides": [], + "thresholds": [], + "timeRegions": [], + "fill": 0, + "fillGradient": 0, + "dashes": false, + "hiddenSeries": false, + "points": false, + "bars": false, + "stack": false, + "percentage": false, + "steppedLine": false, + "timeFrom": null, + "timeShift": null +} diff --git a/salt/grafana/panels/cpu_usage_tasks_unknown_graph.json.jinja b/salt/grafana/panels/cpu_usage_tasks_unknown_graph.json.jinja new file mode 100644 index 000000000..c86a97b72 --- /dev/null +++ b/salt/grafana/panels/cpu_usage_tasks_unknown_graph.json.jinja @@ -0,0 +1,132 @@ +{ + "type": "graph", + "title": "CPU Tasks Unknown", + "gridPos": { + "x": {{ PANELS.cpu_usage_tasks_unknown_graph.gridPos.x }}, + "y": {{ PANELS.cpu_usage_tasks_unknown_graph.gridPos.y }}, + "w": {{ PANELS.cpu_usage_tasks_unknown_graph.gridPos.w }}, + "h": {{ PANELS.cpu_usage_tasks_unknown_graph.gridPos.h }} + }, + "id": 69009, + "targets": [ + { + "refId": "A", + "queryType": "randomWalk", + "policy": "default", + "resultFormat": "time_series", + "orderByTime": "ASC", + "tags": [], + "groupBy": [ + { + "type": "time", + "params": [ + "$__interval" + ] + }, + { + "type": "fill", + "params": [ + "null" + ] + } + ], + "select": [ + [ + { + "type": "field", + "params": [ + "value" + ] + }, + { + "type": "mean", + "params": [] + } + ] + ], + "query": "SELECT mean(unknown) as unknown FROM \"processes\" WHERE host =~ /$servername$/ AND $timeFilter GROUP BY time($__interval), host, role ORDER BY asc", + "rawQuery": true, + "alias": "$tag_host $tag_role: $col" + } + ], + "options": { + "alertThreshold": true + }, + "datasource": "InfluxDB", + "fieldConfig": { + "defaults": {}, + "overrides": [] + }, + "pluginVersion": "7.5.4", + "renderer": "flot", + "yaxes": [ + { + "label": null, + "show": true, + "logBase": 1, + "min": null, + "max": null, + "format": "short", + "$$hashKey": "object:412" + }, + { + "label": null, + "show": true, + "logBase": 1, + "min": null, + "max": null, + "format": "short", + "$$hashKey": "object:413" + } + ], + "xaxis": { + "show": true, + "mode": "time", + "name": null, + "values": [], + "buckets": null + }, + "yaxis": { + "align": false, + "alignLevel": null + }, + "lines": true, + "linewidth": 1, + "dashLength": 10, + "spaceLength": 10, + "pointradius": 2, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": true, + "min": false, + "rightSide": true, + "show": true, + "sort": "current", + "sortDesc": true, + "total": false, + "values": true + }, + "nullPointMode": "connected", + "tooltip": { + "value_type": "individual", + "shared": true, + "sort": 2 + }, + "aliasColors": {}, + "seriesOverrides": [], + "thresholds": [], + "timeRegions": [], + "fill": 0, + "fillGradient": 0, + "dashes": false, + "hiddenSeries": false, + "points": false, + "bars": false, + "stack": false, + "percentage": false, + "steppedLine": false, + "timeFrom": null, + "timeShift": null +} diff --git a/salt/grafana/panels/cpu_usage_tasks_zombies_graph.json.jinja b/salt/grafana/panels/cpu_usage_tasks_zombies_graph.json.jinja new file mode 100644 index 000000000..1a4b83aea --- /dev/null +++ b/salt/grafana/panels/cpu_usage_tasks_zombies_graph.json.jinja @@ -0,0 +1,132 @@ +{ + "type": "graph", + "title": "CPU Tasks Zombies", + "gridPos": { + "x": {{ PANELS.cpu_usage_tasks_zombies_graph.gridPos.x }}, + "y": {{ PANELS.cpu_usage_tasks_zombies_graph.gridPos.y }}, + "w": {{ PANELS.cpu_usage_tasks_zombies_graph.gridPos.w }}, + "h": {{ PANELS.cpu_usage_tasks_zombies_graph.gridPos.h }} + }, + "id": 69004, + "targets": [ + { + "refId": "A", + "queryType": "randomWalk", + "policy": "default", + "resultFormat": "time_series", + "orderByTime": "ASC", + "tags": [], + "groupBy": [ + { + "type": "time", + "params": [ + "$__interval" + ] + }, + { + "type": "fill", + "params": [ + "null" + ] + } + ], + "select": [ + [ + { + "type": "field", + "params": [ + "value" + ] + }, + { + "type": "mean", + "params": [] + } + ] + ], + "query": "SELECT mean(zombies) as zombies FROM \"processes\" WHERE host =~ /$servername$/ AND $timeFilter GROUP BY time($__interval), host, role ORDER BY asc", + "rawQuery": true, + "alias": "$tag_host $tag_role: $col" + } + ], + "options": { + "alertThreshold": true + }, + "datasource": "InfluxDB", + "fieldConfig": { + "defaults": {}, + "overrides": [] + }, + "pluginVersion": "7.5.4", + "renderer": "flot", + "yaxes": [ + { + "label": null, + "show": true, + "logBase": 1, + "min": null, + "max": null, + "format": "short", + "$$hashKey": "object:412" + }, + { + "label": null, + "show": true, + "logBase": 1, + "min": null, + "max": null, + "format": "short", + "$$hashKey": "object:413" + } + ], + "xaxis": { + "show": true, + "mode": "time", + "name": null, + "values": [], + "buckets": null + }, + "yaxis": { + "align": false, + "alignLevel": null + }, + "lines": true, + "linewidth": 1, + "dashLength": 10, + "spaceLength": 10, + "pointradius": 2, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": true, + "min": false, + "rightSide": true, + "show": true, + "sort": "current", + "sortDesc": true, + "total": false, + "values": true + }, + "nullPointMode": "connected", + "tooltip": { + "value_type": "individual", + "shared": true, + "sort": 2 + }, + "aliasColors": {}, + "seriesOverrides": [], + "thresholds": [], + "timeRegions": [], + "fill": 0, + "fillGradient": 0, + "dashes": false, + "hiddenSeries": false, + "points": false, + "bars": false, + "stack": false, + "percentage": false, + "steppedLine": false, + "timeFrom": null, + "timeShift": null +} diff --git a/salt/grafana/panels/io_wait_graph.json.jinja b/salt/grafana/panels/io_wait_graph.json.jinja new file mode 100644 index 000000000..73057830e --- /dev/null +++ b/salt/grafana/panels/io_wait_graph.json.jinja @@ -0,0 +1,161 @@ +{ + "type": "graph", + "title": "IO Wait", + "gridPos": { + "x": {{ PANELS.io_wait_graph.gridPos.x }}, + "y": {{ PANELS.io_wait_graph.gridPos.y }}, + "w": {{ PANELS.io_wait_graph.gridPos.w }}, + "h": {{ PANELS.io_wait_graph.gridPos.h }} + }, + "id": 69011, + "targets": [ + { + "refId": "A", + "queryType": "randomWalk", + "policy": "default", + "resultFormat": "time_series", + "orderByTime": "ASC", + "tags": [ + { + "key": "host", + "operator": "=~", + "value": "/^$servername$/" + }, + { + "condition": "AND", + "key": "cpu", + "operator": "=", + "value": "cpu-total" + }, + { + "condition": "AND", + "key": "role", + "operator": "=~", + "value": "/^$role$/" + } + ], + "groupBy": [ + { + "type": "time", + "params": [ + "$__interval" + ] + }, + { + "type": "tag", + "params": [ + "host" + ] + }, + { + "type": "tag", + "params": [ + "role" + ] + }, + { + "type": "fill", + "params": [ + "null" + ] + } + ], + "select": [ + [ + { + "type": "field", + "params": [ + "usage_iowait" + ] + }, + { + "type": "mean", + "params": [] + } + ] + ], + "measurement": "cpu", + "alias": "$tag_host $tag_role" + } + ], + "options": { + "alertThreshold": true + }, + "datasource": "InfluxDB", + "fieldConfig": { + "defaults": {}, + "overrides": [] + }, + "pluginVersion": "7.5.4", + "renderer": "flot", + "yaxes": [ + { + "label": null, + "show": true, + "logBase": 1, + "min": "0", + "max": null, + "format": "percent", + "$$hashKey": "object:1740" + }, + { + "label": null, + "show": true, + "logBase": 1, + "min": null, + "max": null, + "format": "short", + "$$hashKey": "object:1741" + } + ], + "xaxis": { + "show": true, + "mode": "time", + "name": null, + "values": [], + "buckets": null + }, + "yaxis": { + "align": false, + "alignLevel": null + }, + "lines": true, + "linewidth": 1, + "dashLength": 10, + "spaceLength": 10, + "pointradius": 2, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": true, + "min": false, + "rightSide": true, + "show": true, + "sort": "current", + "sortDesc": true, + "total": false, + "values": true + }, + "nullPointMode": "connected", + "tooltip": { + "value_type": "individual", + "shared": true, + "sort": 2 + }, + "aliasColors": {}, + "seriesOverrides": [], + "thresholds": [], + "timeRegions": [], + "fill": 0, + "fillGradient": 0, + "dashes": false, + "hiddenSeries": false, + "points": false, + "bars": false, + "stack": false, + "percentage": false, + "steppedLine": false, + "decimals": 1, + "description": "" +} diff --git a/salt/telegraf/etc/telegraf.conf b/salt/telegraf/etc/telegraf.conf index d8620a082..b04b1549d 100644 --- a/salt/telegraf/etc/telegraf.conf +++ b/salt/telegraf/etc/telegraf.conf @@ -27,6 +27,8 @@ # rack = "1a" ## Environment variables can be used as tags, and throughout the config file # user = "$USER" + role = "{{ grains.id.split('_') | last }}" + {% if grains['role'] == 'so-helix' %} meta_cbid = "{{ UNIQUEID }}"