mirror of
https://github.com/Security-Onion-Solutions/securityonion.git
synced 2025-12-06 17:22:49 +01:00
Merge pull request #10396 from Security-Onion-Solutions/2.4/Influxdb_alerts
2.4/influxdb alerts
This commit is contained in:
28
salt/influxdb/templates/alarm_high_redis_memory_usage.json
Normal file
28
salt/influxdb/templates/alarm_high_redis_memory_usage.json
Normal file
@@ -0,0 +1,28 @@
|
|||||||
|
[{
|
||||||
|
"apiVersion": "influxdata.com/v2alpha1",
|
||||||
|
"kind": "CheckThreshold",
|
||||||
|
"metadata": {
|
||||||
|
"name": "high-redis-memory"
|
||||||
|
},
|
||||||
|
"spec": {
|
||||||
|
"description": "Triggers when the average percent of used memory for Redis reaches a defined threshold. To tune this alert, modify the value for the appropriate alert level.",
|
||||||
|
"every": "1m",
|
||||||
|
"name": "High Redis Memory Usage",
|
||||||
|
"query": "from(bucket: \"telegraf/so_short_term\")\n |\u003e range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |\u003e filter(fn: (r) =\u003e r[\"_measurement\"] == \"redisqueue\")\n |\u003e filter(fn: (r) =\u003e r[\"_field\"] == \"mem_used\")\n |\u003e aggregateWindow(every: 1m, fn: mean, createEmpty: false)\n |\u003e yield(name: \"mean\")",
|
||||||
|
"status": "active",
|
||||||
|
"statusMessageTemplate": "The amount of available memory for Redis on the ${r.host} node has reached the ${r._level} threshold. The current percent of used memory is ${r.mem_used}.",
|
||||||
|
"thresholds": [
|
||||||
|
{
|
||||||
|
"level": "WARN",
|
||||||
|
"type": "greater",
|
||||||
|
"value": 80
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"level": "CRIT",
|
||||||
|
"type": "greater",
|
||||||
|
"value": 90
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
}]
|
||||||
|
|
||||||
22
salt/influxdb/templates/alarm_low_monitor_traffic.json
Normal file
22
salt/influxdb/templates/alarm_low_monitor_traffic.json
Normal file
@@ -0,0 +1,22 @@
|
|||||||
|
[{
|
||||||
|
"apiVersion": "influxdata.com/v2alpha1",
|
||||||
|
"kind": "CheckThreshold",
|
||||||
|
"metadata": {
|
||||||
|
"name": "monitor-interface-traffic"
|
||||||
|
},
|
||||||
|
"spec": {
|
||||||
|
"description": "Triggers when the volume of network traffic (in MBs) received on the monitor interface, per sensor, falls below a defined threshold. To tune this alert, modify the value in MBs for the appropriate alert level.",
|
||||||
|
"every": "1m",
|
||||||
|
"name": "Low Traffic Volume on Monitor Interface",
|
||||||
|
"query": "from(bucket: \"telegraf/so_short_term\")\n |\u003e range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |\u003e filter(fn: (r) =\u003e r[\"_measurement\"] == \"net\")\n |\u003e filter(fn: (r) =\u003e r[\"_field\"] == \"bytes_recv\")\n |\u003e filter(fn: (r) =\u003e r[\"interface\"] == \"bond0\")\n |\u003e derivative(unit: 1s, nonNegative: true)\n |\u003e map(fn: (r) =\u003e ({r with \"_value\": r._value * 8.0 / 1000000.0}))\n |\u003e yield(name: \"nonnegative derivative\")",
|
||||||
|
"status": "active",
|
||||||
|
"statusMessageTemplate": "Interface ${r.interface} on node ${r.host} has reached the ${r._level} threshold. The current volume of traffic on interface ${r.interface} is ${r.bytes_recv}MB/s.",
|
||||||
|
"thresholds": [
|
||||||
|
{
|
||||||
|
"level": "CRIT",
|
||||||
|
"type": "lesser",
|
||||||
|
"value": 5
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
}]
|
||||||
27
salt/influxdb/templates/alarm_pcap_retention.json
Normal file
27
salt/influxdb/templates/alarm_pcap_retention.json
Normal file
@@ -0,0 +1,27 @@
|
|||||||
|
[{
|
||||||
|
"apiVersion": "influxdata.com/v2alpha1",
|
||||||
|
"kind": "CheckThreshold",
|
||||||
|
"metadata": {
|
||||||
|
"name": "alarm-pcap-retention"
|
||||||
|
},
|
||||||
|
"spec": {
|
||||||
|
"description": "Triggers when the PCAP retention (in days), falls below the defined threshold. To tune this alert, modify the value for the appropriate alert level.",
|
||||||
|
"every": "1m0s",
|
||||||
|
"name": "Low PCAP Retention",
|
||||||
|
"query": "from(bucket: \"telegraf/so_short_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"pcapage\")\n |> filter(fn: (r) => r[\"_field\"] == \"seconds\")\n |> map(fn: (r) => ({ r with _value: r._value / (24.0 * 3600.0)})) |\u003e map(fn: (r) =\u003e ({r with _value: int(v: r._value)}))\n |> aggregateWindow(every: 1m, fn: mean, createEmpty: false)\n |> yield(name: \"mean\")",
|
||||||
|
"status": "active",
|
||||||
|
"statusMessageTemplate": "PCAP retention on node ${r.host} has reached the ${r._level} threshold. Node ${r.host} currently has approximately ${r.seconds} days of PCAP data.",
|
||||||
|
"thresholds": [
|
||||||
|
{
|
||||||
|
"level": "CRIT",
|
||||||
|
"type": "lesser",
|
||||||
|
"value": 1
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"level": "WARN",
|
||||||
|
"type": "lesser",
|
||||||
|
"value": 3
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
}]
|
||||||
27
salt/influxdb/templates/alarm_steno_packet_loss.json
Normal file
27
salt/influxdb/templates/alarm_steno_packet_loss.json
Normal file
@@ -0,0 +1,27 @@
|
|||||||
|
[{
|
||||||
|
"apiVersion": "influxdata.com/v2alpha1",
|
||||||
|
"kind": "CheckThreshold",
|
||||||
|
"metadata": {
|
||||||
|
"name": "steno-packet-loss"
|
||||||
|
},
|
||||||
|
"spec": {
|
||||||
|
"description": "Triggers when the average percent of packet loss is above the defined threshold. To tune this alert, modify the value for the appropriate alert level.",
|
||||||
|
"every": "1m",
|
||||||
|
"name": "Stenographer Packet Loss",
|
||||||
|
"query": "from(bucket: \"telegraf/so_short_term\")\n |\u003e range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |\u003e filter(fn: (r) =\u003e r[\"_measurement\"] == \"stenodrop\")\n |\u003e filter(fn: (r) =\u003e r[\"_field\"] == \"drop\")\n |\u003e aggregateWindow(every: 1m, fn: mean, createEmpty: false)\n |\u003e yield(name: \"mean\")",
|
||||||
|
"status": "active",
|
||||||
|
"statusMessageTemplate": "Stenographer Packet Loss on node ${r.host} has reached the ${ r._level } threshold. The current packet loss is ${ r.drop }%.",
|
||||||
|
"thresholds": [
|
||||||
|
{
|
||||||
|
"level": "CRIT",
|
||||||
|
"type": "greater",
|
||||||
|
"value": 5
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"level": "WARN",
|
||||||
|
"type": "greater",
|
||||||
|
"value": 3
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
}]
|
||||||
27
salt/influxdb/templates/alarm_suricata_packet_loss.json
Normal file
27
salt/influxdb/templates/alarm_suricata_packet_loss.json
Normal file
@@ -0,0 +1,27 @@
|
|||||||
|
[{
|
||||||
|
"apiVersion": "influxdata.com/v2alpha1",
|
||||||
|
"kind": "CheckThreshold",
|
||||||
|
"metadata": {
|
||||||
|
"name": "suricata-packet-loss"
|
||||||
|
},
|
||||||
|
"spec": {
|
||||||
|
"description": "Triggers when the average percent of packet loss is above the defined threshold. To tune this alert, modify the value for the appropriate alert level."
|
||||||
|
"every": "1m",
|
||||||
|
"name": "Suricata Packet Loss",
|
||||||
|
"query": "from(bucket: \"telegraf/so_short_term\")\n |\u003e range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |\u003e filter(fn: (r) =\u003e r[\"_measurement\"] == \"suridrop\")\n |\u003e filter(fn: (r) =\u003e r[\"_field\"] == \"drop\")\n |\u003e map(fn: (r) =\u003e ({r with \"_value\": r._value * 100.0}))\n |\u003e map(fn: (r) =\u003e ({ r with _value: int(v: r._value) }))\n |\u003e aggregateWindow(every: 1m, fn: mean, createEmpty: false)\n |\u003e yield(name: \"mean\")",
|
||||||
|
"status": "active",
|
||||||
|
"statusMessageTemplate": "Suricata packet loss on node ${r.host} has reached the ${ r._level } threshold. The current packet loss is ${ r.drop }%.",
|
||||||
|
"thresholds": [
|
||||||
|
{
|
||||||
|
"level": "CRIT",
|
||||||
|
"type": "greater",
|
||||||
|
"value": 5
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"level": "WARN",
|
||||||
|
"type": "greater",
|
||||||
|
"value": 3
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
}]
|
||||||
27
salt/influxdb/templates/alarm_zeek_packet_loss.json
Normal file
27
salt/influxdb/templates/alarm_zeek_packet_loss.json
Normal file
@@ -0,0 +1,27 @@
|
|||||||
|
[{
|
||||||
|
"apiVersion": "influxdata.com/v2alpha1",
|
||||||
|
"kind": "CheckThreshold",
|
||||||
|
"metadata": {
|
||||||
|
"name": "zeek-packet-loss"
|
||||||
|
},
|
||||||
|
"spec": {
|
||||||
|
"description": "Triggers when the average percent of packet loss is above the defined threshold. To tune this alert, modify the value for the appropriate alert level."
|
||||||
|
"every": "1m",
|
||||||
|
"name": "Zeek Packet Loss",
|
||||||
|
"query": "from(bucket: \"telegraf/so_short_term\")\n |\u003e range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |\u003e filter(fn: (r) =\u003e r[\"_measurement\"] == \"zeekdrop\")\n |\u003e filter(fn: (r) =\u003e r[\"_field\"] == \"drop\")\n |\u003e map(fn: (r) =\u003e ({r with \"_value\": r._value * 100.0}))\n |\u003e map(fn: (r) =\u003e ({ r with _value: int(v: r._value) }))\n |\u003e aggregateWindow(every: 1m, fn: mean, createEmpty: false)\n |\u003e yield(name: \"mean\")",
|
||||||
|
"status": "active",
|
||||||
|
"statusMessageTemplate": "Zeek Packet Loss on node ${r.host} has reached the ${ r._level } threshold. The current packet loss is ${ r.drop }%.",
|
||||||
|
"thresholds": [
|
||||||
|
{
|
||||||
|
"level": "CRIT",
|
||||||
|
"type": "greater",
|
||||||
|
"value": 5
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"level": "WARN",
|
||||||
|
"type": "greater",
|
||||||
|
"value": 3
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
}]
|
||||||
@@ -11,8 +11,9 @@ if [[ ! "`pidof -x $(basename $0) -o %PPID`" ]]; then
|
|||||||
|
|
||||||
UNPARSED=$(redis-cli llen logstash:unparsed | awk '{print $1}')
|
UNPARSED=$(redis-cli llen logstash:unparsed | awk '{print $1}')
|
||||||
PARSED=$(redis-cli llen logstash:parsed | awk '{print $1}')
|
PARSED=$(redis-cli llen logstash:parsed | awk '{print $1}')
|
||||||
|
MEM_USED=$(redis-cli info memory | grep used_memory_peak_perc | cut -d ":" -f2 | sed "s/%//")
|
||||||
|
|
||||||
echo "redisqueue unparsed=$UNPARSED,parsed=$PARSED"
|
echo "redisqueue unparsed=$UNPARSED,parsed=$PARSED,mem_used=$MEM_USED"
|
||||||
|
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user