influx upgrade

This commit is contained in:
Jason Ertel
2023-02-13 19:30:10 -05:00
parent e3ca0345a8
commit 0890129c69
8 changed files with 83 additions and 1 deletions

View File

@@ -0,0 +1,18 @@
{
"apiVersion": "influxdata.com/v2alpha1",
"kind": "CheckDeadman",
"metadata": {
"name": "alarm-deadman"
},
"spec": {
"description": "Data has not been received from Telegraf for an extended period.",
"every": "1m0s",
"level": "CRIT",
"name": "Telegraf Data Outage",
"query": "from(bucket: \"telegraf/so_short_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"cpu\")\n |> filter(fn: (r) => r[\"_field\"] == \"usage_idle\")\n |> filter(fn: (r) => r[\"cpu\"] == \"cpu-total\")",
"staleTime": "15m0s",
"status": "active",
"statusMessageTemplate": "Check: ${ r._check_name } is: ${ r._level }",
"timeSince": "2m0s"
}
}

View File

@@ -0,0 +1,32 @@
{
"apiVersion": "influxdata.com/v2alpha1",
"kind": "CheckThreshold",
"metadata": {
"name": "alarm-nsm-disk"
},
"spec": {
"description": "Percent used space on the root partition of at least one node has exceeded the alarm threshold.",
"every": "1m0s",
"name": "NSM Disk High Usage",
"query": "from(bucket: \"telegraf/so_short_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"disk\")\n |> filter(fn: (r) => r[\"path\"] == \"/\")\n |> filter(fn: (r) => r[\"_field\"] == \"used_percent\")\n |> aggregateWindow(every: 1m, fn: max, createEmpty: false)\n |> yield(name: \"max\")",
"status": "active",
"statusMessageTemplate": "Check: ${ r._check_name } is: ${ r._level }",
"thresholds": [
{
"level": "CRIT",
"type": "greater",
"value": 95
},
{
"level": "INFO",
"type": "greater",
"value": 85
},
{
"level": "WARN",
"type": "greater",
"value": 90
}
]
}
}

View File

@@ -0,0 +1,32 @@
{
"apiVersion": "influxdata.com/v2alpha1",
"kind": "CheckThreshold",
"metadata": {
"name": "alarm-root-disk"
},
"spec": {
"description": "Percent used space on the root partition of at least one node has exceeded the alarm threshold.",
"every": "1m0s",
"name": "Root Disk High Usage",
"query": "from(bucket: \"telegraf/so_short_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"disk\")\n |> filter(fn: (r) => r[\"path\"] == \"/\")\n |> filter(fn: (r) => r[\"_field\"] == \"used_percent\")\n |> aggregateWindow(every: 1m, fn: max, createEmpty: false)\n |> yield(name: \"max\")",
"status": "active",
"statusMessageTemplate": "Check: ${ r._check_name } is: ${ r._level }",
"thresholds": [
{
"level": "CRIT",
"type": "greater",
"value": 95
},
{
"level": "INFO",
"type": "greater",
"value": 85
},
{
"level": "WARN",
"type": "greater",
"value": 90
}
]
}
}

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long