Merge pull request #15015 from Security-Onion-Solutions/vlb2

Vlb2
This commit is contained in:
Josh Patterson
2025-09-10 14:58:41 -04:00
committed by GitHub
25 changed files with 882 additions and 63 deletions

View File

@@ -38,6 +38,7 @@ elasticfleet:
- elasticsearch
- endpoint
- fleet_server
- filestream
- http_endpoint
- httpjson
- log

View File

@@ -0,0 +1,48 @@
{
"package": {
"name": "filestream",
"version": ""
},
"name": "agent-monitor",
"namespace": "",
"description": "",
"policy_ids": [
"so-grid-nodes_general"
],
"output_id": null,
"vars": {},
"inputs": {
"filestream-filestream": {
"enabled": true,
"streams": {
"filestream.generic": {
"enabled": true,
"vars": {
"paths": [
"/opt/so/log/agents/agent-monitor.log"
],
"data_stream.dataset": "agent-monitor",
"pipeline": "elasticagent.monitor",
"parsers": "",
"exclude_files": [
"\\.gz$"
],
"include_files": [],
"processors": "- decode_json_fields:\n fields: [\"message\"]\n target: \"\"\n- add_fields:\n target: event\n fields:\n module: gridmetrics",
"tags": [],
"recursive_glob": true,
"ignore_older": "72h",
"clean_inactive": -1,
"harvester_limit": 0,
"fingerprint": true,
"fingerprint_offset": 0,
"fingerprint_length": 1024,
"file_identity_native": false,
"exclude_lines": [],
"include_lines": []
}
}
}
}
}
}

View File

@@ -284,6 +284,86 @@ elasticsearch:
hot:
actions: {}
min_age: 0ms
so-assistant-chat:
index_sorting: false
index_template:
composed_of:
- assistant-chat-mappings
- assistant-chat-settings
data_stream:
allow_custom_routing: false
hidden: false
ignore_missing_component_templates: []
index_patterns:
- so-assistant-chat*
priority: 501
template:
mappings:
date_detection: false
dynamic_templates:
- strings_as_keyword:
mapping:
ignore_above: 1024
type: keyword
match_mapping_type: string
settings:
index:
lifecycle:
name: so-assistant-chat-logs
mapping:
total_fields:
limit: 1500
number_of_replicas: 0
number_of_shards: 1
refresh_interval: 1s
sort:
field: '@timestamp'
order: desc
policy:
phases:
hot:
actions: {}
min_age: 0ms
so-assistant-session:
index_sorting: false
index_template:
composed_of:
- assistant-session-mappings
- assistant-session-settings
data_stream:
allow_custom_routing: false
hidden: false
ignore_missing_component_templates: []
index_patterns:
- so-assistant-session*
priority: 501
template:
mappings:
date_detection: false
dynamic_templates:
- strings_as_keyword:
mapping:
ignore_above: 1024
type: keyword
match_mapping_type: string
settings:
index:
lifecycle:
name: so-assistant-session-logs
mapping:
total_fields:
limit: 1500
number_of_replicas: 0
number_of_shards: 1
refresh_interval: 1s
sort:
field: '@timestamp'
order: desc
policy:
phases:
hot:
actions: {}
min_age: 0ms
so-endgame:
index_sorting: false
index_template:
@@ -1243,6 +1323,70 @@ elasticsearch:
set_priority:
priority: 50
min_age: 30d
so-logs-agent-monitor:
index_sorting: false
index_template:
composed_of:
- event-mappings
- so-elastic-agent-monitor
- so-fleet_integrations.ip_mappings-1
- so-fleet_globals-1
- so-fleet_agent_id_verification-1
data_stream:
allow_custom_routing: false
hidden: false
ignore_missing_component_templates:
- logs-agent-monitor@custom
index_patterns:
- logs-agent-monitor-*
priority: 501
template:
mappings:
_meta:
managed: true
managed_by: security_onion
package:
name: elastic_agent
settings:
index:
lifecycle:
name: so-logs-agent-monitor-logs
mapping:
total_fields:
limit: 5000
number_of_replicas: 0
sort:
field: '@timestamp'
order: desc
policy:
_meta:
managed: true
managed_by: security_onion
package:
name: elastic_agent
phases:
cold:
actions:
set_priority:
priority: 0
min_age: 60d
delete:
actions:
delete: {}
min_age: 365d
hot:
actions:
rollover:
max_age: 30d
max_primary_shard_size: 50gb
set_priority:
priority: 100
min_age: 0ms
warm:
actions:
set_priority:
priority: 50
min_age: 30d
so-logs-elastic_agent_x_apm_server:
index_sorting: false
index_template:

View File

@@ -0,0 +1,36 @@
{
"processors": [
{
"set": {
"field": "event.dataset",
"value": "gridmetrics.agents",
"ignore_failure": true
}
},
{
"set": {
"field": "event.module",
"value": "gridmetrics",
"ignore_failure": true
}
},
{
"remove": {
"field": [
"host",
"elastic_agent",
"agent"
],
"ignore_missing": true,
"ignore_failure": true
}
},
{
"json": {
"field": "message",
"add_to_root": true,
"ignore_failure": true
}
}
]
}

View File

@@ -0,0 +1,43 @@
{
"template": {
"mappings": {
"properties": {
"agent": {
"type": "object",
"properties": {
"hostname": {
"ignore_above": 1024,
"type": "keyword"
},
"id": {
"ignore_above": 1024,
"type": "keyword"
},
"last_checkin_status": {
"ignore_above": 1024,
"type": "keyword"
},
"last_checkin": {
"type": "date"
},
"name": {
"ignore_above": 1024,
"type": "keyword"
},
"offline_duration_hours": {
"type": "integer"
},
"policy_id": {
"ignore_above": 1024,
"type": "keyword"
},
"status": {
"ignore_above": 1024,
"type": "keyword"
}
}
}
}
}
}
}

View File

@@ -0,0 +1,104 @@
{
"template": {
"mappings": {
"properties": {
"@timestamp": {
"type": "date"
},
"so_kind": {
"ignore_above": 1024,
"type": "keyword"
},
"so_operation": {
"ignore_above": 1024,
"type": "keyword"
},
"so_chat": {
"properties": {
"role": {
"ignore_above": 1024,
"type": "keyword"
},
"content": {
"type": "object",
"enabled": false
},
"sessionId": {
"ignore_above": 1024,
"type": "keyword"
},
"createTime": {
"type": "date"
},
"deletedAt": {
"type": "date"
},
"tags": {
"ignore_above": 1024,
"type": "keyword"
},
"tool_use_id": {
"ignore_above": 1024,
"type": "keyword"
},
"userId": {
"ignore_above": 1024,
"type": "keyword"
},
"message": {
"properties": {
"id": {
"ignore_above": 1024,
"type": "keyword"
},
"type": {
"ignore_above": 1024,
"type": "keyword"
},
"role": {
"ignore_above": 1024,
"type": "keyword"
},
"model": {
"ignore_above": 1024,
"type": "keyword"
},
"contentStr": {
"type": "text"
},
"contentBlocks": {
"type": "nested",
"enabled": false
},
"stopReason": {
"ignore_above": 1024,
"type": "keyword"
},
"stopSequence": {
"ignore_above": 1024,
"type": "keyword"
},
"usage": {
"properties": {
"input_tokens": {
"type": "long"
},
"output_tokens": {
"type": "long"
},
"credits": {
"type": "long"
}
}
}
}
}
}
}
}
}
},
"_meta": {
"ecs_version": "1.12.2"
}
}

View File

@@ -0,0 +1,7 @@
{
"template": {},
"version": 1,
"_meta": {
"description": "default settings for common Security Onion Assistant indices"
}
}

View File

@@ -0,0 +1,44 @@
{
"template": {
"mappings": {
"properties": {
"@timestamp": {
"type": "date"
},
"so_kind": {
"ignore_above": 1024,
"type": "keyword"
},
"so_session": {
"properties": {
"title": {
"ignore_above": 1024,
"type": "keyword"
},
"sessionId": {
"ignore_above": 1024,
"type": "keyword"
},
"createTime": {
"type": "date"
},
"deleteTime": {
"type": "date"
},
"tags": {
"ignore_above": 1024,
"type": "keyword"
},
"userId": {
"ignore_above": 1024,
"type": "keyword"
}
}
}
}
}
},
"_meta": {
"ecs_version": "1.12.2"
}
}

View File

@@ -0,0 +1,7 @@
{
"template": {},
"version": 1,
"_meta": {
"description": "default settings for common Security Onion Assistant indices"
}
}

View File

@@ -1230,6 +1230,10 @@ firewall:
portgroups:
- elasticsearch_node
- elasticsearch_rest
managerhype:
portgroups:
- elasticsearch_node
- elasticsearch_rest
standalone:
portgroups:
- elasticsearch_node
@@ -1377,6 +1381,10 @@ firewall:
portgroups:
- elasticsearch_node
- elasticsearch_rest
managerhype:
portgroups:
- elasticsearch_node
- elasticsearch_rest
standalone:
portgroups:
- elasticsearch_node
@@ -1579,6 +1587,9 @@ firewall:
portgroups:
- redis
- elastic_agent_data
managerhype:
portgroups:
- elastic_agent_data
self:
portgroups:
- redis
@@ -1696,6 +1707,9 @@ firewall:
managersearch:
portgroups:
- openssh
managerhype:
portgroups:
- openssh
standalone:
portgroups:
- openssh
@@ -1758,6 +1772,8 @@ firewall:
portgroups: []
managersearch:
portgroups: []
managerhype:
portgroups: []
standalone:
portgroups: []
customhostgroup0:

View File

@@ -25,7 +25,7 @@
{% set KAFKA_EXTERNAL_ACCESS = salt['pillar.get']('kafka:config:external_access:enabled', default=False) %}
{% set kafka_node_type = salt['pillar.get']('kafka:nodes:'+ GLOBALS.hostname + ':role') %}
{% if role in ['manager', 'managersearch', 'standalone'] %}
{% if role.startswith('manager') or role == 'standalone' %}
{% do FIREWALL_DEFAULT.firewall.role[role].chain["DOCKER-USER"].hostgroups[role].portgroups.append('kafka_controller') %}
{% do FIREWALL_DEFAULT.firewall.role[role].chain["DOCKER-USER"].hostgroups.receiver.portgroups.append('kafka_controller') %}
{% endif %}
@@ -38,8 +38,8 @@
{% do FIREWALL_DEFAULT.firewall.role[role].chain["DOCKER-USER"].hostgroups.receiver.portgroups.append('kafka_controller') %}
{% endif %}
{% if role in ['manager', 'managersearch', 'standalone', 'receiver'] %}
{% for r in ['manager', 'managersearch', 'standalone', 'receiver', 'fleet', 'idh', 'sensor', 'searchnode','heavynode', 'elastic_agent_endpoint', 'desktop'] %}
{% if role.startswith('manager') or role in ['standalone', 'receiver'] %}
{% for r in ['manager', 'managersearch', 'managerhype', 'standalone', 'receiver', 'fleet', 'idh', 'sensor', 'searchnode','heavynode', 'elastic_agent_endpoint', 'desktop'] %}
{% if FIREWALL_DEFAULT.firewall.role[role].chain["DOCKER-USER"].hostgroups[r] is defined %}
{% do FIREWALL_DEFAULT.firewall.role[role].chain["DOCKER-USER"].hostgroups[r].portgroups.append('kafka_data') %}
{% endif %}
@@ -48,11 +48,11 @@
{% if KAFKA_EXTERNAL_ACCESS %}
{# Kafka external access only applies for Kafka nodes with the broker role. #}
{% if role in ['manager', 'managersearch', 'standalone', 'receiver'] and 'broker' in kafka_node_type %}
{% if role.startswith('manager') or role in ['standalone', 'receiver'] and 'broker' in kafka_node_type %}
{% do FIREWALL_DEFAULT.firewall.role[role].chain["DOCKER-USER"].hostgroups.external_kafka.portgroups.append('kafka_external_access') %}
{% endif %}
{% endif %}
{% endif %}
{% set FIREWALL_MERGED = salt['pillar.get']('firewall', FIREWALL_DEFAULT.firewall, merge=True) %}
{% set FIREWALL_MERGED = salt['pillar.get']('firewall', FIREWALL_DEFAULT.firewall, merge=True) %}

View File

@@ -268,3 +268,12 @@ logrotate:
- nocompress
- create
- sharedscripts
/opt/so/log/agents/agent-monitor*_x_log:
- daily
- rotate 14
- missingok
- compress
- create
- extension .log
- dateext
- dateyesterday

View File

@@ -175,3 +175,10 @@ logrotate:
multiline: True
global: True
forcedType: "[]string"
"/opt/so/log/agents/agent-monitor*_x_log":
description: List of logrotate options for this file.
title: /opt/so/log/agents/agent-monitor*.log
advanced: True
multiline: True
global: True
forcedType: "[]string"

View File

@@ -17,7 +17,7 @@
{% for node_type, node_details in redis_node_data.items() | sort %}
{% if GLOBALS.role in ['so-searchnode', 'so-standalone', 'so-managersearch', 'so-fleet'] %}
{% if node_type in ['manager', 'managersearch', 'standalone', 'receiver' ] %}
{% if node_type.startswith('manager') or node_type in ['standalone', 'receiver'] %}
{% for hostname in redis_node_data[node_type].keys() %}
{% do LOGSTASH_REDIS_NODES.append({hostname:node_details[hostname].ip}) %}
{% endfor %}
@@ -47,7 +47,7 @@
{% endif %}
{# Disable logstash on manager & receiver nodes unless it has an override configured #}
{% if not KAFKA_LOGSTASH %}
{% if GLOBALS.role in ['so-manager', 'so-receiver'] and GLOBALS.hostname not in KAFKA_LOGSTASH %}
{% if GLOBALS.role in ['so-manager', 'so-managerhype', 'so-receiver'] and GLOBALS.hostname not in KAFKA_LOGSTASH %}
{% do LOGSTASH_MERGED.update({'enabled': False}) %}
{% endif %}
{% endif %}

View File

@@ -5,3 +5,12 @@ manager:
minute: 0
additionalCA: ''
insecureSkipVerify: False
agent_monitoring:
enabled: False
config:
critical_agents: []
custom_kquery:
offline_threshold: 5
realert_threshold: 5
page_size: 250
run_interval: 5

View File

@@ -34,6 +34,26 @@ agents_log_dir:
- user
- group
agents_conf_dir:
file.directory:
- name: /opt/so/conf/agents
- user: root
- group: root
- recurse:
- user
- group
{% if MANAGERMERGED.agent_monitoring.config.critical_agents | length > 0 %}
critical_agents_patterns:
file.managed:
- name: /opt/so/conf/agents/critical-agents.txt
- contents: {{ MANAGERMERGED.agent_monitoring.config.critical_agents }}
{% else %}
remove_critical_agents_config:
file.absent:
- name: /opt/so/conf/agents/critical-agents.txt
{% endif %}
yara_log_dir:
file.directory:
- name: /opt/so/log/yarasync
@@ -127,6 +147,21 @@ so_fleetagent_status:
- month: '*'
- dayweek: '*'
so_fleetagent_monitor:
{% if MANAGERMERGED.agent_monitoring.enabled %}
cron.present:
{% else %}
cron.absent:
{% endif %}
- name: /bin/flock -n /opt/so/log/agents/agent-monitor.lock /usr/sbin/so-elastic-agent-monitor
- identifier: so_fleetagent_monitor
- user: root
- minute: '*/{{ MANAGERMERGED.agent_monitoring.config.run_interval }}'
- hour: '*'
- daymonth: '*'
- month: '*'
- dayweek: '*'
socore_own_saltstack_default:
file.directory:
- name: /opt/so/saltstack/default

View File

@@ -37,3 +37,44 @@ manager:
forcedType: bool
global: True
helpLink: proxy.html
agent_monitoring:
enabled:
description: Enable monitoring elastic agents for health issues. Can be used to trigger an alert when a 'critical' agent hasn't checked in with fleet for longer than the configured offline threshold.
global: True
helpLink: elastic-fleet.html
forcedType: bool
config:
critical_agents:
description: List of 'critical' agents to log when they haven't checked in longer than the maximum allowed time. If there are no 'critical' agents specified all offline agents will be logged once they reach the offline threshold.
global: True
multiline: True
helpLink: elastic-fleet.html
forcedType: "[]string"
custom_kquery:
description: For more granular control over what agents to monitor for offline|degraded status add a kquery here. It is recommended to create & test within Elastic Fleet first to ensure your agents are targeted correctly using the query. eg 'status:offline AND tags:INFRA'
global: True
helpLink: elastic-fleet.html
forcedType: string
advanced: True
offline_threshold:
description: The maximum allowed time in hours a 'critical' agent has been offline before being logged.
global: True
helpLink: elastic-fleet.html
forcedType: int
realert_threshold:
description: The time to pass before another alert for an offline agent exceeding the offline_threshold is generated.
global: True
helpLink: elastic-fleet.html
forcedType: int
page_size:
description: The amount of agents that can be processed per API request to fleet.
global: True
helpLink: elastic-fleet.html
forcedType: int
advanced: True
run_interval:
description: The time in minutes between checking fleet agent statuses.
global: True
advanced: True
helpLink: elastic-fleet.html
forcedType: int

View File

@@ -0,0 +1,254 @@
{%- from 'manager/map.jinja' import MANAGERMERGED -%}
{%- set OFFLINE_THRESHOLD_HOURS = MANAGERMERGED.agent_monitoring.config.offline_threshold -%}
{%- set PAGE_SIZE = MANAGERMERGED.agent_monitoring.config.page_size -%}
{%- set CUSTOM_KQUERY = MANAGERMERGED.agent_monitoring.config.custom_kquery -%}
{%- set REALERT_THRESHOLD = MANAGERMERGED.agent_monitoring.config.realert_threshold -%}
#!/bin/bash
set -euo pipefail
LOG_DIR="/opt/so/log/agents"
LOG_FILE="$LOG_DIR/agent-monitor.log"
CURL_CONFIG="/opt/so/conf/elasticsearch/curl.config"
FLEET_API="http://localhost:5601/api/fleet/agents"
{#- When using custom kquery ignore critical agents patterns. Since we want all the results of custom query logged #}
{%- if CUSTOM_KQUERY != None and CUSTOM_KQUERY | length > 0 %}
CRITICAL_AGENTS_FILE="/dev/null"
{%- else %}
CRITICAL_AGENTS_FILE="/opt/so/conf/agents/critical-agents.txt"
{%- endif %}
OFFLINE_THRESHOLD_HOURS={{ OFFLINE_THRESHOLD_HOURS }}
REALERT_THRESHOLD={{ REALERT_THRESHOLD }}
PAGE_SIZE="{{ PAGE_SIZE }}"
log_message() {
local level="$1"
local message="$2"
echo "$(date -u +"%Y-%m-%dT%H:%M:%SZ") [$level] $message" >&2
}
matches_critical_pattern() {
local hostname="$1"
local pattern_file="$2"
# If critical agents file doesn't exist or is empty, match all
if [ ! -f "$pattern_file" ] || [ ! -s "$pattern_file" ]; then
return 0
fi
local hostname_lower=$(echo "$hostname" | tr '[:upper:]' '[:lower:]')
while IFS= read -r pattern || [ -n "$pattern" ]; do
# empty lines and comments
[[ -z "$pattern" || "$pattern" =~ ^[[:space:]]*# ]] && continue
# cut whitespace
pattern=$(echo "$pattern" | xargs)
local pattern_lower=$(echo "$pattern" | tr '[:upper:]' '[:lower:]')
# Replace * with bash wildcard
local bash_pattern="${pattern_lower//\*/.*}"
# Check if hostname matches the pattern
if [[ "$hostname_lower" =~ ^${bash_pattern}$ ]]; then
return 0
fi
done < "$pattern_file"
return 1
}
calculate_offline_hours() {
local last_checkin="$1"
local current_time=$(date +%s)
local checkin_time=$(date -d "$last_checkin" +%s 2>/dev/null || echo "0")
if [ "$checkin_time" -eq "0" ]; then
echo "0"
return
fi
local diff=$((current_time - checkin_time))
echo $((diff / 3600))
}
check_recent_log_entries() {
local agent_hostname="$1"
if [ ! -f "$LOG_FILE" ]; then
return 1
fi
local current_time=$(date +%s)
local threshold_seconds=$((REALERT_THRESHOLD * 3600))
local agent_hostname_lower=$(echo "$agent_hostname" | tr '[:upper:]' '[:lower:]')
local most_recent_timestamp=""
while IFS= read -r line; do
[ -z "$line" ] && continue
local logged_hostname=$(echo "$line" | jq -r '.["agent.hostname"] // empty' 2>/dev/null)
local logged_timestamp=$(echo "$line" | jq -r '.["@timestamp"] // empty' 2>/dev/null)
[ -z "$logged_hostname" ] || [ -z "$logged_timestamp" ] && continue
local logged_hostname_lower=$(echo "$logged_hostname" | tr '[:upper:]' '[:lower:]')
if [ "$logged_hostname_lower" = "$agent_hostname_lower" ]; then
most_recent_timestamp="$logged_timestamp"
fi
done < <(tail -n 1000 "$LOG_FILE" 2>/dev/null)
# If there is agent entry (within last 1000), check the time difference
if [ -n "$most_recent_timestamp" ]; then
local logged_time=$(date -d "$most_recent_timestamp" +%s 2>/dev/null || echo "0")
if [ "$logged_time" -ne "0" ]; then
local time_diff=$((current_time - logged_time))
local hours_diff=$((time_diff / 3600))
# Skip if last agent timestamp was more recent than realert threshold
if ((hours_diff < REALERT_THRESHOLD)); then
return 0
fi
fi
fi
# Agent has not been logged within realert threshold
return 1
}
main() {
log_message "INFO" "Starting Fleet agent status check"
# Check if critical agents file is configured
if [ -f "$CRITICAL_AGENTS_FILE" ] && [ -s "$CRITICAL_AGENTS_FILE" ]; then
log_message "INFO" "Using critical agents filter from: $CRITICAL_AGENTS_FILE"
log_message "INFO" "Patterns: $(grep -v '^#' "$CRITICAL_AGENTS_FILE" 2>/dev/null | xargs | tr ' ' ',')"
else
log_message "INFO" "No critical agents filter found, monitoring all agents"
fi
log_message "INFO" "Querying Fleet API"
local page=1
local total_agents=0
local processed_agents=0
local current_timestamp=$(date -u +"%Y-%m-%dT%H:%M:%SZ")
{%- if CUSTOM_KQUERY != None and CUSTOM_KQUERY | length > 0 %}
log_message "INFO" "Using custom kquery: {{ CUSTOM_KQUERY }}"
FLEET_QUERY="${FLEET_API}?kuery={{ CUSTOM_KQUERY | urlencode }}&perPage=${PAGE_SIZE}&page=${page}"
{%- else %}
log_message "INFO" "Using default query (all offline or degraded agents)"
FLEET_QUERY="${FLEET_API}?kuery=status%3Aoffline%20OR%20status%3Adegraded&perPage=${PAGE_SIZE}&page=${page}"
{%- endif %}
while true; do
log_message "INFO" "Fetching page $page (${PAGE_SIZE} agents per page)"
if ! response_body=$(curl -K "$CURL_CONFIG" \
-s --fail \
"$FLEET_QUERY" \
-H 'kbn-xsrf: true' 2>/dev/null); then
log_message "ERROR" "Failed to query Fleet API (page $page)"
exit 1
fi
# pagination info
current_total=$(echo "$response_body" | jq -r '.total // 0')
current_page=$(echo "$response_body" | jq -r '.page // 1')
agents_in_page=$(echo "$response_body" | jq -r '.list | length')
# Update total
if [ "$page" -eq 1 ]; then
total_agents="$current_total"
log_message "INFO" "Found $total_agents total agents across all pages"
fi
log_message "INFO" "Processing page $current_page with $agents_in_page agents"
# Process agents from current page
mapfile -t agents < <(echo "$response_body" | jq -c '.list[]')
for agent in "${agents[@]}"; do
# Grab agent details
agent_id=$(echo "$agent" | jq -r '.id // "unknown"')
agent_hostname=$(echo "$agent" | jq -r '.local_metadata.host.hostname // "unknown"')
agent_name=$(echo "$agent" | jq -r '.local_metadata.host.name // "unknown"')
agent_status=$(echo "$agent" | jq -r '.status // "unknown"')
last_checkin=$(echo "$agent" | jq -r '.last_checkin // ""')
last_checkin_status=$(echo "$agent" | jq -r '.last_checkin_status // "unknown"')
policy_id=$(echo "$agent" | jq -r '.policy_id // "unknown"')
# Only log agents that are offline or degraded (skip inactive agents)
# Fleetserver agents can show multiple versions as 'inactive'
if [ "$agent_status" = "offline" ] || [ "$agent_status" = "degraded" ]; then
# Check if agent matches critical agent patterns (if configured)
if ! matches_critical_pattern "$agent_hostname" "$CRITICAL_AGENTS_FILE"; then
log_message "WARN" "${agent_hostname^^} is ${agent_status^^}, but does not match configured critical agents patterns. Not logging ${agent_status^^} agent"
continue # Skip this agent if it doesn't match any critical agent pattern
fi
offline_hours=$(calculate_offline_hours "$last_checkin")
if [ "$offline_hours" -lt "$OFFLINE_THRESHOLD_HOURS" ]; then
log_message "INFO" "${agent_hostname^^} has been offline for ${offline_hours}h (threshold: ${OFFLINE_THRESHOLD_HOURS}h). Not logging ${agent_status^^} agent until it reaches threshold"
continue
fi
# Check if this agent was already logged within the realert_threshold
if check_recent_log_entries "$agent_hostname"; then
log_message "INFO" "Skipping $agent_hostname (status: $agent_status) - already logged within last ${REALERT_THRESHOLD}h"
continue
fi
log_entry=$(echo 'null' | jq -c \
--arg ts "$current_timestamp" \
--arg id "$agent_id" \
--arg hostname "$agent_hostname" \
--arg name "$agent_name" \
--arg status "$agent_status" \
--arg last_checkin "$last_checkin" \
--arg last_checkin_status "$last_checkin_status" \
--arg policy_id "$policy_id" \
--arg offline_hours "$offline_hours" \
'{
"@timestamp": $ts,
"agent.id": $id,
"agent.hostname": $hostname,
"agent.name": $name,
"agent.status": $status,
"agent.last_checkin": $last_checkin,
"agent.last_checkin_status": $last_checkin_status,
"agent.policy_id": $policy_id,
"agent.offline_duration_hours": ($offline_hours | tonumber)
}')
echo "$log_entry" >> "$LOG_FILE"
log_message "INFO" "Logged offline agent: $agent_hostname (status: $agent_status, offline: ${offline_hours}h)"
fi
done
processed_agents=$((processed_agents + agents_in_page))
if [ "$agents_in_page" -eq 0 ] || [ "$processed_agents" -ge "$total_agents" ]; then
log_message "INFO" "Completed processing all pages. Total processed: $processed_agents agents"
break
fi
page=$((page + 1))
# Limit pagination loops incase of any issues. If agent count is high enough increase page_size in SOC manager.agent_monitoring.config.page_size
if [ "$page" -gt 100 ]; then
log_message "ERROR" "Reached maximum page limit (100). Issue with script or extremely large fleet deployment. Consider increasing page_size in SOC -> manager.agent_monitoring.config.page_size"
break
fi
done
log_message "INFO" "Fleet agent status check completed. Processed $processed_agents out of $total_agents agents"
}
main "$@"

View File

@@ -15,6 +15,7 @@ require_manager
echo
echo "This script will remove the current Elastic Fleet install and all of its data and then rerun Elastic Fleet setup."
echo "Deployed Elastic Agents will no longer be enrolled and will need to be reinstalled."
echo "Only the Elastic Fleet instance on the Manager will be reinstalled - dedicated Fleet node config will removed and will need to be reinstalled."
echo "This script should only be used as a last resort to reinstall Elastic Fleet."
echo
echo "If you would like to proceed, then type AGREE and press ENTER."

View File

@@ -196,19 +196,23 @@ http {
}
location / {
auth_request /auth/sessions/whoami;
auth_request_set $userid $upstream_http_x_kratos_authenticated_identity_id;
proxy_set_header x-user-id $userid;
proxy_pass http://{{ GLOBALS.manager }}:9822/;
proxy_read_timeout 300;
proxy_connect_timeout 300;
proxy_set_header Host $host;
proxy_set_header X-Real-IP $remote_addr;
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
proxy_set_header Proxy "";
proxy_set_header Upgrade $http_upgrade;
proxy_set_header Connection "Upgrade";
proxy_set_header X-Forwarded-Proto $scheme;
auth_request /auth/sessions/whoami;
auth_request_set $userid $upstream_http_x_kratos_authenticated_identity_id;
proxy_set_header x-user-id $userid;
proxy_pass http://{{ GLOBALS.manager }}:9822/;
proxy_read_timeout 300;
proxy_connect_timeout 300;
proxy_set_header Host $host;
proxy_set_header X-Real-IP $remote_addr;
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
proxy_set_header Proxy "";
proxy_set_header Upgrade $http_upgrade;
proxy_set_header Connection "Upgrade";
proxy_set_header X-Forwarded-Proto $scheme;
proxy_buffering off;
proxy_cache off;
proxy_request_buffering off;
}
location ~ ^/auth/.*?(login|oidc/callback) {

View File

@@ -26,9 +26,9 @@
'rocky-devel.repo',
'rocky-extras.repo',
'rocky.repo',
'oracle-linux-ol9',
'uek-ol9',
'virt-oll9'
'oracle-linux-ol9.repo',
'uek-ol9.repo',
'virt-ol9.repo'
]
%}
{% else %}

View File

@@ -18,6 +18,7 @@ sensoroniagentconf:
- group: 939
- mode: 600
- template: jinja
- show_changes: False
analyzersdir:
file.directory:

View File

@@ -1491,6 +1491,8 @@ soc:
- repo: file:///nsm/airgap-resources/playbooks/securityonion-resources-playbooks
branch: main
folder: securityonion-normalized
assistant:
apiUrl: https://onionai.securityonion.net
salt:
queueDir: /opt/sensoroni/queue
timeoutMs: 45000
@@ -2541,3 +2543,12 @@ soc:
- ' -priv'
condition: all of selection_*
level: 'high' # info | low | medium | high | critical
assistant:
enabled: false
investigationPrompt: Investigate Alert ID {socid}
contextLimitSmall: 200000
contextLimitLarge: 1000000
thresholdColorRatioLow: 0.5
thresholdColorRatioMed: 0.75
thresholdColorRatioMax: 1
lowBalanceColorAlert: 500000

View File

@@ -580,7 +580,42 @@ soc:
- field: folder
label: Folder
airgap: *pbRepos
assistant:
apiUrl:
description: The URL of the AI gateway.
advanced: True
global: True
client:
assistant:
enabled:
description: Set to true to enable the Onion AI assistant in SOC.
global: True
investigationPrompt:
description: Prompt given to Onion AI when beginning an investigation.
global: True
contextLimitSmall:
description: Smaller context limit for Onion AI.
global: True
advanced: True
contextLimitLarge:
description: Larger context limit for Onion AI.
global: True
advanced: True
thresholdColorRatioLow:
description: Lower visual context color change threshold.
global: True
advanced: True
thresholdColorRatioMed:
description: Middle visual context color change threshold.
global: True
advanced: True
thresholdColorRatioMax:
description: Max visual context color change threshold.
global: True
advanced: True
lowBalanceColorAlert:
description: Onion AI credit amount at which balance turns red.
advanced: True
apiTimeoutMs:
description: Duration (in milliseconds) to wait for a response from the SOC server API before giving up and showing an error on the SOC UI.
global: True

View File

@@ -29,46 +29,8 @@ title() {
}
fail_setup() {
local failure_reason="${1:-Unknown failure}"
# Capture call stack information
local calling_function="${FUNCNAME[1]:-main}"
local calling_line="${BASH_LINENO[0]:-unknown}"
local calling_file="${BASH_SOURCE[1]:-unknown}"
# Build call stack trace
local call_stack=""
local i=1
while [[ $i -lt ${#FUNCNAME[@]} ]]; do
local func="${FUNCNAME[$i]}"
local file="${BASH_SOURCE[$i]##*/}" # Get basename only
local line="${BASH_LINENO[$((i-1))]}"
if [[ -n "$call_stack" ]]; then
call_stack="$call_stack -> "
fi
call_stack="$call_stack$func($file:$line)"
((i++))
done
# Enhanced error logging with call stack
error "FAILURE: Called from $calling_function() at line $calling_line"
error "REASON: $failure_reason"
error "STACK: $call_stack"
error "Setup encountered an unrecoverable failure: $failure_reason"
# Create detailed failure file with enhanced information
{
echo "SETUP_FAILURE_TIMESTAMP=$(date -u '+%Y-%m-%d %H:%M:%S UTC')"
echo "SETUP_FAILURE_REASON=$failure_reason"
echo "SETUP_CALLING_FUNCTION=$calling_function"
echo "SETUP_CALLING_LINE=$calling_line"
echo "SETUP_CALLING_FILE=${calling_file##*/}"
echo "SETUP_CALL_STACK=$call_stack"
echo "SETUP_LOG_LOCATION=$setup_log"
echo "SETUP_FAILURE_DETAILS=Check $setup_log for complete error details"
} > /root/failure
error "Setup encountered an unrecoverable failure, exiting"
touch /root/failure
exit 1
}