From 76d63bb2ad3111d14149a5ea494e2e925f576040 Mon Sep 17 00:00:00 2001 From: reyesj2 <94730068+reyesj2@users.noreply.github.com> Date: Wed, 23 Apr 2025 15:00:21 -0500 Subject: [PATCH 1/4] remove unused HOST_PROC env --- salt/telegraf/enabled.sls | 1 - 1 file changed, 1 deletion(-) diff --git a/salt/telegraf/enabled.sls b/salt/telegraf/enabled.sls index 8e3bc9fbe..b64481e19 100644 --- a/salt/telegraf/enabled.sls +++ b/salt/telegraf/enabled.sls @@ -20,7 +20,6 @@ so-telegraf: - user: 939 - group_add: 939,920 - environment: - - HOST_PROC=/host/proc - HOST_ETC=/host/etc - HOST_SYS=/host/sys - HOST_MOUNT_PREFIX=/host From 196e0c14861ff5b4ff088d04504230484d46cc1c Mon Sep 17 00:00:00 2001 From: reyesj2 <94730068+reyesj2@users.noreply.github.com> Date: Wed, 23 Apr 2025 15:01:48 -0500 Subject: [PATCH 2/4] change root bind so existing references to 'r[\"path\"] == \"/\")' work as expected --- salt/telegraf/enabled.sls | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/salt/telegraf/enabled.sls b/salt/telegraf/enabled.sls index b64481e19..8c71ecac3 100644 --- a/salt/telegraf/enabled.sls +++ b/salt/telegraf/enabled.sls @@ -37,7 +37,7 @@ so-telegraf: - /opt/so/conf/telegraf/node_config.json:/etc/telegraf/node_config.json:ro - /var/run/utmp:/var/run/utmp:ro - /var/run/docker.sock:/var/run/docker.sock:ro - - /:/host/root:ro + - /:/host:ro - /sys:/host/sys:ro - /proc:/host/proc:ro - /nsm:/host/nsm:ro From 400739736debd8814731c668a173531885b33b63 Mon Sep 17 00:00:00 2001 From: reyesj2 <94730068+reyesj2@users.noreply.github.com> Date: Wed, 23 Apr 2025 15:02:23 -0500 Subject: [PATCH 3/4] add monitored mounts, ignores docker overlays --- salt/telegraf/etc/telegraf.conf | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/salt/telegraf/etc/telegraf.conf b/salt/telegraf/etc/telegraf.conf index 533627bbd..4c2318c02 100644 --- a/salt/telegraf/etc/telegraf.conf +++ b/salt/telegraf/etc/telegraf.conf @@ -113,7 +113,13 @@ [[inputs.disk]] ## By default stats will be gathered for all mount points. ## Set mount_points will restrict the stats to only the specified mount points. - #mount_points = ["/", "/host/nsm"] + mount_points = ["/host", + "/host/nsm", + "/host/var", + "/host/var/log", + "/host/var/log/audit", + "/host/var/tmp" + ] ## Ignore mount points by filesystem type. #ignore_fs = ["tmpfs", "devtmpfs", "devfs", "overlay", "aufs", "squashfs"] From 9e4c456eb97dce98fb0f2e53250457d5a7a36225 Mon Sep 17 00:00:00 2001 From: reyesj2 <94730068+reyesj2@users.noreply.github.com> Date: Wed, 23 Apr 2025 15:02:57 -0500 Subject: [PATCH 4/4] fix nsm influxdb alert --- salt/influxdb/templates/alarm_nsm_disk.json | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/salt/influxdb/templates/alarm_nsm_disk.json b/salt/influxdb/templates/alarm_nsm_disk.json index 691f8c0e8..a3b10f7f3 100644 --- a/salt/influxdb/templates/alarm_nsm_disk.json +++ b/salt/influxdb/templates/alarm_nsm_disk.json @@ -5,10 +5,10 @@ "name": "alarm-nsm-disk" }, "spec": { - "description": "Percent used space on the root partition of at least one node has exceeded the alarm threshold.", + "description": "Percent used space on the nsm partition of at least one node has exceeded the alarm threshold.", "every": "1m0s", "name": "NSM Disk High Usage", - "query": "from(bucket: \"telegraf/so_short_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"disk\")\n |> filter(fn: (r) => r[\"path\"] == \"/\")\n |> filter(fn: (r) => r[\"_field\"] == \"used_percent\")\n |> aggregateWindow(every: 1m, fn: max, createEmpty: false)\n |> yield(name: \"max\")", + "query": "from(bucket: \"telegraf/so_short_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"disk\")\n |> filter(fn: (r) => r[\"path\"] == \"/nsm\")\n |> filter(fn: (r) => r[\"_field\"] == \"used_percent\")\n |> aggregateWindow(every: 1m, fn: max, createEmpty: false)\n |> yield(name: \"max\")", "status": "active", "statusMessageTemplate": "Check: ${ r._check_name } is: ${ r._level }", "thresholds": [