Merge pull request #10396 from Security-Onion-Solutions/2.4/Influxdb_alerts

2.4/influxdb alerts
This commit is contained in:
bryant-treacle
2023-05-23 14:14:48 -04:00
committed by GitHub
7 changed files with 161 additions and 2 deletions

View File

@@ -0,0 +1,28 @@
[{
"apiVersion": "influxdata.com/v2alpha1",
"kind": "CheckThreshold",
"metadata": {
"name": "high-redis-memory"
},
"spec": {
"description": "Triggers when the average percent of used memory for Redis reaches a defined threshold. To tune this alert, modify the value for the appropriate alert level.",
"every": "1m",
"name": "High Redis Memory Usage",
"query": "from(bucket: \"telegraf/so_short_term\")\n |\u003e range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |\u003e filter(fn: (r) =\u003e r[\"_measurement\"] == \"redisqueue\")\n |\u003e filter(fn: (r) =\u003e r[\"_field\"] == \"mem_used\")\n |\u003e aggregateWindow(every: 1m, fn: mean, createEmpty: false)\n |\u003e yield(name: \"mean\")",
"status": "active",
"statusMessageTemplate": "The amount of available memory for Redis on the ${r.host} node has reached the ${r._level} threshold. The current percent of used memory is ${r.mem_used}.",
"thresholds": [
{
"level": "WARN",
"type": "greater",
"value": 80
},
{
"level": "CRIT",
"type": "greater",
"value": 90
}
]
}
}]

View File

@@ -0,0 +1,22 @@
[{
"apiVersion": "influxdata.com/v2alpha1",
"kind": "CheckThreshold",
"metadata": {
"name": "monitor-interface-traffic"
},
"spec": {
"description": "Triggers when the volume of network traffic (in MBs) received on the monitor interface, per sensor, falls below a defined threshold. To tune this alert, modify the value in MBs for the appropriate alert level.",
"every": "1m",
"name": "Low Traffic Volume on Monitor Interface",
"query": "from(bucket: \"telegraf/so_short_term\")\n |\u003e range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |\u003e filter(fn: (r) =\u003e r[\"_measurement\"] == \"net\")\n |\u003e filter(fn: (r) =\u003e r[\"_field\"] == \"bytes_recv\")\n |\u003e filter(fn: (r) =\u003e r[\"interface\"] == \"bond0\")\n |\u003e derivative(unit: 1s, nonNegative: true)\n |\u003e map(fn: (r) =\u003e ({r with \"_value\": r._value * 8.0 / 1000000.0}))\n |\u003e yield(name: \"nonnegative derivative\")",
"status": "active",
"statusMessageTemplate": "Interface ${r.interface} on node ${r.host} has reached the ${r._level} threshold. The current volume of traffic on interface ${r.interface} is ${r.bytes_recv}MB/s.",
"thresholds": [
{
"level": "CRIT",
"type": "lesser",
"value": 5
}
]
}
}]

View File

@@ -0,0 +1,27 @@
[{
"apiVersion": "influxdata.com/v2alpha1",
"kind": "CheckThreshold",
"metadata": {
"name": "alarm-pcap-retention"
},
"spec": {
"description": "Triggers when the PCAP retention (in days), falls below the defined threshold. To tune this alert, modify the value for the appropriate alert level.",
"every": "1m0s",
"name": "Low PCAP Retention",
"query": "from(bucket: \"telegraf/so_short_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"pcapage\")\n |> filter(fn: (r) => r[\"_field\"] == \"seconds\")\n |> map(fn: (r) => ({ r with _value: r._value / (24.0 * 3600.0)})) |\u003e map(fn: (r) =\u003e ({r with _value: int(v: r._value)}))\n |> aggregateWindow(every: 1m, fn: mean, createEmpty: false)\n |> yield(name: \"mean\")",
"status": "active",
"statusMessageTemplate": "PCAP retention on node ${r.host} has reached the ${r._level} threshold. Node ${r.host} currently has approximately ${r.seconds} days of PCAP data.",
"thresholds": [
{
"level": "CRIT",
"type": "lesser",
"value": 1
},
{
"level": "WARN",
"type": "lesser",
"value": 3
}
]
}
}]

View File

@@ -0,0 +1,27 @@
[{
"apiVersion": "influxdata.com/v2alpha1",
"kind": "CheckThreshold",
"metadata": {
"name": "steno-packet-loss"
},
"spec": {
"description": "Triggers when the average percent of packet loss is above the defined threshold. To tune this alert, modify the value for the appropriate alert level.",
"every": "1m",
"name": "Stenographer Packet Loss",
"query": "from(bucket: \"telegraf/so_short_term\")\n |\u003e range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |\u003e filter(fn: (r) =\u003e r[\"_measurement\"] == \"stenodrop\")\n |\u003e filter(fn: (r) =\u003e r[\"_field\"] == \"drop\")\n |\u003e aggregateWindow(every: 1m, fn: mean, createEmpty: false)\n |\u003e yield(name: \"mean\")",
"status": "active",
"statusMessageTemplate": "Stenographer Packet Loss on node ${r.host} has reached the ${ r._level } threshold. The current packet loss is ${ r.drop }%.",
"thresholds": [
{
"level": "CRIT",
"type": "greater",
"value": 5
},
{
"level": "WARN",
"type": "greater",
"value": 3
}
]
}
}]

View File

@@ -0,0 +1,27 @@
[{
"apiVersion": "influxdata.com/v2alpha1",
"kind": "CheckThreshold",
"metadata": {
"name": "suricata-packet-loss"
},
"spec": {
"description": "Triggers when the average percent of packet loss is above the defined threshold. To tune this alert, modify the value for the appropriate alert level."
"every": "1m",
"name": "Suricata Packet Loss",
"query": "from(bucket: \"telegraf/so_short_term\")\n |\u003e range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |\u003e filter(fn: (r) =\u003e r[\"_measurement\"] == \"suridrop\")\n |\u003e filter(fn: (r) =\u003e r[\"_field\"] == \"drop\")\n |\u003e map(fn: (r) =\u003e ({r with \"_value\": r._value * 100.0}))\n |\u003e map(fn: (r) =\u003e ({ r with _value: int(v: r._value) }))\n |\u003e aggregateWindow(every: 1m, fn: mean, createEmpty: false)\n |\u003e yield(name: \"mean\")",
"status": "active",
"statusMessageTemplate": "Suricata packet loss on node ${r.host} has reached the ${ r._level } threshold. The current packet loss is ${ r.drop }%.",
"thresholds": [
{
"level": "CRIT",
"type": "greater",
"value": 5
},
{
"level": "WARN",
"type": "greater",
"value": 3
}
]
}
}]

View File

@@ -0,0 +1,27 @@
[{
"apiVersion": "influxdata.com/v2alpha1",
"kind": "CheckThreshold",
"metadata": {
"name": "zeek-packet-loss"
},
"spec": {
"description": "Triggers when the average percent of packet loss is above the defined threshold. To tune this alert, modify the value for the appropriate alert level."
"every": "1m",
"name": "Zeek Packet Loss",
"query": "from(bucket: \"telegraf/so_short_term\")\n |\u003e range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |\u003e filter(fn: (r) =\u003e r[\"_measurement\"] == \"zeekdrop\")\n |\u003e filter(fn: (r) =\u003e r[\"_field\"] == \"drop\")\n |\u003e map(fn: (r) =\u003e ({r with \"_value\": r._value * 100.0}))\n |\u003e map(fn: (r) =\u003e ({ r with _value: int(v: r._value) }))\n |\u003e aggregateWindow(every: 1m, fn: mean, createEmpty: false)\n |\u003e yield(name: \"mean\")",
"status": "active",
"statusMessageTemplate": "Zeek Packet Loss on node ${r.host} has reached the ${ r._level } threshold. The current packet loss is ${ r.drop }%.",
"thresholds": [
{
"level": "CRIT",
"type": "greater",
"value": 5
},
{
"level": "WARN",
"type": "greater",
"value": 3
}
]
}
}]

View File

@@ -11,8 +11,9 @@ if [[ ! "`pidof -x $(basename $0) -o %PPID`" ]]; then
UNPARSED=$(redis-cli llen logstash:unparsed | awk '{print $1}') UNPARSED=$(redis-cli llen logstash:unparsed | awk '{print $1}')
PARSED=$(redis-cli llen logstash:parsed | awk '{print $1}') PARSED=$(redis-cli llen logstash:parsed | awk '{print $1}')
MEM_USED=$(redis-cli info memory | grep used_memory_peak_perc | cut -d ":" -f2 | sed "s/%//")
echo "redisqueue unparsed=$UNPARSED,parsed=$PARSED"
echo "redisqueue unparsed=$UNPARSED,parsed=$PARSED,mem_used=$MEM_USED"
fi fi