mirror of
https://github.com/Security-Onion-Solutions/securityonion.git
synced 2025-12-06 09:12:45 +01:00
add configurable realert threshold per agent
This commit is contained in:
@@ -11,5 +11,6 @@ manager:
|
||||
critical_agents: []
|
||||
custom_kquery:
|
||||
offline_threshold: 5
|
||||
realert_threshold: 5
|
||||
page_size: 250
|
||||
run_interval: 5
|
||||
|
||||
@@ -61,6 +61,11 @@ manager:
|
||||
global: True
|
||||
helpLink: elastic-fleet.html
|
||||
forcedType: int
|
||||
realert_threshold:
|
||||
description: The time to pass before another alert for an offline agent exceeding the offline_threshold is generated.
|
||||
global: True
|
||||
helpLink: elastic-fleet.html
|
||||
forcedType: int
|
||||
page_size:
|
||||
description: The amount of agents that can be processed per API request to fleet.
|
||||
global: True
|
||||
|
||||
@@ -2,6 +2,7 @@
|
||||
{%- set OFFLINE_THRESHOLD_HOURS = MANAGERMERGED.agent_monitoring.config.offline_threshold -%}
|
||||
{%- set PAGE_SIZE = MANAGERMERGED.agent_monitoring.config.page_size -%}
|
||||
{%- set CUSTOM_KQUERY = MANAGERMERGED.agent_monitoring.config.custom_kquery -%}
|
||||
{%- set REALERT_THRESHOLD = MANAGERMERGED.agent_monitoring.config.realert_threshold -%}
|
||||
#!/bin/bash
|
||||
|
||||
set -euo pipefail
|
||||
@@ -17,6 +18,7 @@ CRITICAL_AGENTS_FILE="/dev/null"
|
||||
CRITICAL_AGENTS_FILE="/opt/so/conf/agents/critical-agents.txt"
|
||||
{%- endif %}
|
||||
OFFLINE_THRESHOLD_HOURS={{ OFFLINE_THRESHOLD_HOURS }}
|
||||
REALERT_THRESHOLD={{ REALERT_THRESHOLD }}
|
||||
PAGE_SIZE="{{ PAGE_SIZE }}"
|
||||
|
||||
log_message() {
|
||||
@@ -71,6 +73,52 @@ calculate_offline_hours() {
|
||||
echo $((diff / 3600))
|
||||
}
|
||||
|
||||
check_recent_log_entries() {
|
||||
local agent_hostname="$1"
|
||||
|
||||
if [ ! -f "$LOG_FILE" ]; then
|
||||
return 1
|
||||
fi
|
||||
|
||||
local current_time=$(date +%s)
|
||||
local threshold_seconds=$((REALERT_THRESHOLD * 3600))
|
||||
local agent_hostname_lower=$(echo "$agent_hostname" | tr '[:upper:]' '[:lower:]')
|
||||
local most_recent_timestamp=""
|
||||
|
||||
while IFS= read -r line; do
|
||||
[ -z "$line" ] && continue
|
||||
|
||||
local logged_hostname=$(echo "$line" | jq -r '.["agent.hostname"] // empty' 2>/dev/null)
|
||||
local logged_timestamp=$(echo "$line" | jq -r '.["@timestamp"] // empty' 2>/dev/null)
|
||||
|
||||
[ -z "$logged_hostname" ] || [ -z "$logged_timestamp" ] && continue
|
||||
|
||||
local logged_hostname_lower=$(echo "$logged_hostname" | tr '[:upper:]' '[:lower:]')
|
||||
|
||||
if [ "$logged_hostname_lower" = "$agent_hostname_lower" ]; then
|
||||
most_recent_timestamp="$logged_timestamp"
|
||||
fi
|
||||
done < <(tail -n 1000 "$LOG_FILE" 2>/dev/null)
|
||||
|
||||
# If there is agent entry (within last 1000), check the time difference
|
||||
if [ -n "$most_recent_timestamp" ]; then
|
||||
local logged_time=$(date -d "$most_recent_timestamp" +%s 2>/dev/null || echo "0")
|
||||
|
||||
if [ "$logged_time" -ne "0" ]; then
|
||||
local time_diff=$((current_time - logged_time))
|
||||
local hours_diff=$((time_diff / 3600))
|
||||
|
||||
# Skip if last agent timestamp was more recent than realert threshold
|
||||
if ((hours_diff < REALERT_THRESHOLD)); then
|
||||
return 0
|
||||
fi
|
||||
fi
|
||||
fi
|
||||
|
||||
# Agent has not been logged within realert threshold
|
||||
return 1
|
||||
}
|
||||
|
||||
main() {
|
||||
log_message "INFO" "Starting Fleet agent status check"
|
||||
|
||||
@@ -150,6 +198,12 @@ main() {
|
||||
continue
|
||||
fi
|
||||
|
||||
# Check if this agent was already logged within the realert_threshold
|
||||
if check_recent_log_entries "$agent_hostname"; then
|
||||
log_message "INFO" "Skipping $agent_hostname (status: $agent_status) - already logged within last ${REALERT_THRESHOLD}h"
|
||||
continue
|
||||
fi
|
||||
|
||||
log_entry=$(echo 'null' | jq -c \
|
||||
--arg ts "$current_timestamp" \
|
||||
--arg id "$agent_id" \
|
||||
|
||||
Reference in New Issue
Block a user