mirror of
https://github.com/Security-Onion-Solutions/securityonion.git
synced 2025-12-06 17:22:49 +01:00
Merge pull request #15012 from Security-Onion-Solutions/reyesj2/ea-alerter
add configurable realert threshold per agent
This commit is contained in:
@@ -11,5 +11,6 @@ manager:
|
|||||||
critical_agents: []
|
critical_agents: []
|
||||||
custom_kquery:
|
custom_kquery:
|
||||||
offline_threshold: 5
|
offline_threshold: 5
|
||||||
|
realert_threshold: 5
|
||||||
page_size: 250
|
page_size: 250
|
||||||
run_interval: 5
|
run_interval: 5
|
||||||
|
|||||||
@@ -61,6 +61,11 @@ manager:
|
|||||||
global: True
|
global: True
|
||||||
helpLink: elastic-fleet.html
|
helpLink: elastic-fleet.html
|
||||||
forcedType: int
|
forcedType: int
|
||||||
|
realert_threshold:
|
||||||
|
description: The time to pass before another alert for an offline agent exceeding the offline_threshold is generated.
|
||||||
|
global: True
|
||||||
|
helpLink: elastic-fleet.html
|
||||||
|
forcedType: int
|
||||||
page_size:
|
page_size:
|
||||||
description: The amount of agents that can be processed per API request to fleet.
|
description: The amount of agents that can be processed per API request to fleet.
|
||||||
global: True
|
global: True
|
||||||
|
|||||||
@@ -2,6 +2,7 @@
|
|||||||
{%- set OFFLINE_THRESHOLD_HOURS = MANAGERMERGED.agent_monitoring.config.offline_threshold -%}
|
{%- set OFFLINE_THRESHOLD_HOURS = MANAGERMERGED.agent_monitoring.config.offline_threshold -%}
|
||||||
{%- set PAGE_SIZE = MANAGERMERGED.agent_monitoring.config.page_size -%}
|
{%- set PAGE_SIZE = MANAGERMERGED.agent_monitoring.config.page_size -%}
|
||||||
{%- set CUSTOM_KQUERY = MANAGERMERGED.agent_monitoring.config.custom_kquery -%}
|
{%- set CUSTOM_KQUERY = MANAGERMERGED.agent_monitoring.config.custom_kquery -%}
|
||||||
|
{%- set REALERT_THRESHOLD = MANAGERMERGED.agent_monitoring.config.realert_threshold -%}
|
||||||
#!/bin/bash
|
#!/bin/bash
|
||||||
|
|
||||||
set -euo pipefail
|
set -euo pipefail
|
||||||
@@ -17,6 +18,7 @@ CRITICAL_AGENTS_FILE="/dev/null"
|
|||||||
CRITICAL_AGENTS_FILE="/opt/so/conf/agents/critical-agents.txt"
|
CRITICAL_AGENTS_FILE="/opt/so/conf/agents/critical-agents.txt"
|
||||||
{%- endif %}
|
{%- endif %}
|
||||||
OFFLINE_THRESHOLD_HOURS={{ OFFLINE_THRESHOLD_HOURS }}
|
OFFLINE_THRESHOLD_HOURS={{ OFFLINE_THRESHOLD_HOURS }}
|
||||||
|
REALERT_THRESHOLD={{ REALERT_THRESHOLD }}
|
||||||
PAGE_SIZE="{{ PAGE_SIZE }}"
|
PAGE_SIZE="{{ PAGE_SIZE }}"
|
||||||
|
|
||||||
log_message() {
|
log_message() {
|
||||||
@@ -71,6 +73,52 @@ calculate_offline_hours() {
|
|||||||
echo $((diff / 3600))
|
echo $((diff / 3600))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
check_recent_log_entries() {
|
||||||
|
local agent_hostname="$1"
|
||||||
|
|
||||||
|
if [ ! -f "$LOG_FILE" ]; then
|
||||||
|
return 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
local current_time=$(date +%s)
|
||||||
|
local threshold_seconds=$((REALERT_THRESHOLD * 3600))
|
||||||
|
local agent_hostname_lower=$(echo "$agent_hostname" | tr '[:upper:]' '[:lower:]')
|
||||||
|
local most_recent_timestamp=""
|
||||||
|
|
||||||
|
while IFS= read -r line; do
|
||||||
|
[ -z "$line" ] && continue
|
||||||
|
|
||||||
|
local logged_hostname=$(echo "$line" | jq -r '.["agent.hostname"] // empty' 2>/dev/null)
|
||||||
|
local logged_timestamp=$(echo "$line" | jq -r '.["@timestamp"] // empty' 2>/dev/null)
|
||||||
|
|
||||||
|
[ -z "$logged_hostname" ] || [ -z "$logged_timestamp" ] && continue
|
||||||
|
|
||||||
|
local logged_hostname_lower=$(echo "$logged_hostname" | tr '[:upper:]' '[:lower:]')
|
||||||
|
|
||||||
|
if [ "$logged_hostname_lower" = "$agent_hostname_lower" ]; then
|
||||||
|
most_recent_timestamp="$logged_timestamp"
|
||||||
|
fi
|
||||||
|
done < <(tail -n 1000 "$LOG_FILE" 2>/dev/null)
|
||||||
|
|
||||||
|
# If there is agent entry (within last 1000), check the time difference
|
||||||
|
if [ -n "$most_recent_timestamp" ]; then
|
||||||
|
local logged_time=$(date -d "$most_recent_timestamp" +%s 2>/dev/null || echo "0")
|
||||||
|
|
||||||
|
if [ "$logged_time" -ne "0" ]; then
|
||||||
|
local time_diff=$((current_time - logged_time))
|
||||||
|
local hours_diff=$((time_diff / 3600))
|
||||||
|
|
||||||
|
# Skip if last agent timestamp was more recent than realert threshold
|
||||||
|
if ((hours_diff < REALERT_THRESHOLD)); then
|
||||||
|
return 0
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Agent has not been logged within realert threshold
|
||||||
|
return 1
|
||||||
|
}
|
||||||
|
|
||||||
main() {
|
main() {
|
||||||
log_message "INFO" "Starting Fleet agent status check"
|
log_message "INFO" "Starting Fleet agent status check"
|
||||||
|
|
||||||
@@ -150,6 +198,12 @@ main() {
|
|||||||
continue
|
continue
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
# Check if this agent was already logged within the realert_threshold
|
||||||
|
if check_recent_log_entries "$agent_hostname"; then
|
||||||
|
log_message "INFO" "Skipping $agent_hostname (status: $agent_status) - already logged within last ${REALERT_THRESHOLD}h"
|
||||||
|
continue
|
||||||
|
fi
|
||||||
|
|
||||||
log_entry=$(echo 'null' | jq -c \
|
log_entry=$(echo 'null' | jq -c \
|
||||||
--arg ts "$current_timestamp" \
|
--arg ts "$current_timestamp" \
|
||||||
--arg id "$agent_id" \
|
--arg id "$agent_id" \
|
||||||
|
|||||||
Reference in New Issue
Block a user