mirror of
https://github.com/Security-Onion-Solutions/securityonion.git
synced 2025-12-07 17:52:46 +01:00
custom kquery
This commit is contained in:
@@ -38,6 +38,7 @@ elasticfleet:
|
|||||||
- elasticsearch
|
- elasticsearch
|
||||||
- endpoint
|
- endpoint
|
||||||
- fleet_server
|
- fleet_server
|
||||||
|
- filestream
|
||||||
- http_endpoint
|
- http_endpoint
|
||||||
- httpjson
|
- httpjson
|
||||||
- log
|
- log
|
||||||
|
|||||||
36
salt/elasticsearch/files/ingest/elasticagent.monitor
Normal file
36
salt/elasticsearch/files/ingest/elasticagent.monitor
Normal file
@@ -0,0 +1,36 @@
|
|||||||
|
{
|
||||||
|
"processors": [
|
||||||
|
{
|
||||||
|
"set": {
|
||||||
|
"field": "event.dataset",
|
||||||
|
"value": "gridmetrics.agents",
|
||||||
|
"ignore_failure": true
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"set": {
|
||||||
|
"field": "event.module",
|
||||||
|
"value": "gridmetrics",
|
||||||
|
"ignore_failure": true
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"remove": {
|
||||||
|
"field": [
|
||||||
|
"host",
|
||||||
|
"elastic_agent",
|
||||||
|
"agent"
|
||||||
|
],
|
||||||
|
"ignore_missing": true,
|
||||||
|
"ignore_failure": true
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"json": {
|
||||||
|
"field": "message",
|
||||||
|
"add_to_root": true,
|
||||||
|
"ignore_failure": true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
@@ -9,6 +9,7 @@ manager:
|
|||||||
enabled: False
|
enabled: False
|
||||||
config:
|
config:
|
||||||
critical_agents: []
|
critical_agents: []
|
||||||
|
custom_kquery:
|
||||||
offline_threshold: 5
|
offline_threshold: 5
|
||||||
page_size: 250
|
page_size: 250
|
||||||
run_interval: 5
|
run_interval: 5
|
||||||
|
|||||||
@@ -45,11 +45,17 @@ manager:
|
|||||||
forcedType: bool
|
forcedType: bool
|
||||||
config:
|
config:
|
||||||
critical_agents:
|
critical_agents:
|
||||||
description: List of 'critical' agents to log when they haven't checked in longer than the maximum allowed time. If there are no 'critical' agents specified all offline agents will be logged once they reach the offline threshold
|
description: List of 'critical' agents to log when they haven't checked in longer than the maximum allowed time. If there are no 'critical' agents specified all offline agents will be logged once they reach the offline threshold.
|
||||||
global: True
|
global: True
|
||||||
multiline: True
|
multiline: True
|
||||||
helpLink: elastic-fleet.html
|
helpLink: elastic-fleet.html
|
||||||
forcedType: "[]string"
|
forcedType: "[]string"
|
||||||
|
custom_kquery:
|
||||||
|
description: For more granular control over what agents to monitor for offline|degraded status add a kquery here. It is recommended to create & test within Elastic Fleet first to ensure your agents are targeted correctly using the query. eg 'status:offline AND tags:INFRA'
|
||||||
|
global: True
|
||||||
|
helpLink: elastic-fleet.html
|
||||||
|
forcedType: string
|
||||||
|
advanced: True
|
||||||
offline_threshold:
|
offline_threshold:
|
||||||
description: The maximum allowed time in hours a 'critical' agent has been offline before being logged.
|
description: The maximum allowed time in hours a 'critical' agent has been offline before being logged.
|
||||||
global: True
|
global: True
|
||||||
|
|||||||
@@ -1,17 +1,21 @@
|
|||||||
|
{%- from 'manager/map.jinja' import MANAGERMERGED -%}
|
||||||
|
{%- set OFFLINE_THRESHOLD_HOURS = MANAGERMERGED.agent_monitoring.config.offline_threshold -%}
|
||||||
|
{%- set PAGE_SIZE = MANAGERMERGED.agent_monitoring.config.page_size -%}
|
||||||
|
{%- set CUSTOM_KQUERY = MANAGERMERGED.agent_monitoring.config.custom_kquery -%}
|
||||||
#!/bin/bash
|
#!/bin/bash
|
||||||
|
|
||||||
{% from 'manager/map.jinja' import MANAGERMERGED %}
|
|
||||||
{%- set OFFLINE_THRESHOLD_HOURS = MANAGERMERGED.agent_monitoring.config.offline_threshold %}
|
|
||||||
{%- set PAGE_SIZE = MANAGERMERGED.agent_monitoring.config.page_size %}
|
|
||||||
|
|
||||||
set -euo pipefail
|
set -euo pipefail
|
||||||
|
|
||||||
LOG_DIR="/opt/so/log/agents"
|
LOG_DIR="/opt/so/log/agents"
|
||||||
LOG_FILE="$LOG_DIR/agent-monitor-$(date -u +"%Y%m%d").log"
|
LOG_FILE="$LOG_DIR/agent-monitor-$(date -u +"%Y%m%d").log"
|
||||||
CURL_CONFIG="/opt/so/conf/elasticsearch/curl.config"
|
CURL_CONFIG="/opt/so/conf/elasticsearch/curl.config"
|
||||||
FLEET_API="http://localhost:5601/api/fleet/agents"
|
FLEET_API="http://localhost:5601/api/fleet/agents"
|
||||||
|
{#- When using custom kquery ignore critical agents patterns. Since we want all the results of custom query logged #}
|
||||||
|
{%- if CUSTOM_KQUERY != None and CUSTOM_KQUERY | length > 0 %}
|
||||||
|
CRITICAL_AGENTS_FILE="/dev/null"
|
||||||
|
{%- else %}
|
||||||
CRITICAL_AGENTS_FILE="/opt/so/conf/agents/critical-agents.txt"
|
CRITICAL_AGENTS_FILE="/opt/so/conf/agents/critical-agents.txt"
|
||||||
|
{%- endif %}
|
||||||
OFFLINE_THRESHOLD_HOURS={{ OFFLINE_THRESHOLD_HOURS }}
|
OFFLINE_THRESHOLD_HOURS={{ OFFLINE_THRESHOLD_HOURS }}
|
||||||
PAGE_SIZE="{{ PAGE_SIZE }}"
|
PAGE_SIZE="{{ PAGE_SIZE }}"
|
||||||
|
|
||||||
@@ -98,12 +102,20 @@ main() {
|
|||||||
local processed_agents=0
|
local processed_agents=0
|
||||||
local current_timestamp=$(date -u +"%Y-%m-%dT%H:%M:%SZ")
|
local current_timestamp=$(date -u +"%Y-%m-%dT%H:%M:%SZ")
|
||||||
|
|
||||||
|
{%- if CUSTOM_KQUERY != None and CUSTOM_KQUERY | length > 0 %}
|
||||||
|
log_message "INFO" "Using custom kquery: {{ CUSTOM_KQUERY }}"
|
||||||
|
FLEET_QUERY="${FLEET_API}?kuery={{ CUSTOM_KQUERY | urlencode }}&perPage=${PAGE_SIZE}&page=${page}"
|
||||||
|
{%- else %}
|
||||||
|
log_message "INFO" "Using default query (all offline or degraded agents)"
|
||||||
|
FLEET_QUERY="${FLEET_API}?kuery=status%3Aoffline%20OR%20status%3Adegraded&perPage=${PAGE_SIZE}&page=${page}"
|
||||||
|
{%- endif %}
|
||||||
|
|
||||||
while true; do
|
while true; do
|
||||||
log_message "INFO" "Fetching page $page (${PAGE_SIZE} agents per page)"
|
log_message "INFO" "Fetching page $page (${PAGE_SIZE} agents per page)"
|
||||||
|
|
||||||
if ! response_body=$(curl -K "$CURL_CONFIG" \
|
if ! response_body=$(curl -K "$CURL_CONFIG" \
|
||||||
-s --fail \
|
-s --fail \
|
||||||
"${FLEET_API}?perPage=${PAGE_SIZE}&page=${page}&showInactive=true" \
|
$FLEET_QUERY \
|
||||||
-H 'kbn-xsrf: true' 2>/dev/null); then
|
-H 'kbn-xsrf: true' 2>/dev/null); then
|
||||||
log_message "ERROR" "Failed to query Fleet API (page $page)"
|
log_message "ERROR" "Failed to query Fleet API (page $page)"
|
||||||
exit 1
|
exit 1
|
||||||
@@ -123,7 +135,9 @@ main() {
|
|||||||
log_message "INFO" "Processing page $current_page with $agents_in_page agents"
|
log_message "INFO" "Processing page $current_page with $agents_in_page agents"
|
||||||
|
|
||||||
# Process agents from current page
|
# Process agents from current page
|
||||||
echo "$response_body" | jq -c '.list[]' | while IFS= read -r agent; do
|
mapfile -t agents < <(echo "$response_body" | jq -c '.list[]')
|
||||||
|
|
||||||
|
for agent in "${agents[@]}"; do
|
||||||
# Grab agent details
|
# Grab agent details
|
||||||
agent_id=$(echo "$agent" | jq -r '.id // "unknown"')
|
agent_id=$(echo "$agent" | jq -r '.id // "unknown"')
|
||||||
agent_hostname=$(echo "$agent" | jq -r '.local_metadata.host.hostname // "unknown"')
|
agent_hostname=$(echo "$agent" | jq -r '.local_metadata.host.hostname // "unknown"')
|
||||||
@@ -138,12 +152,13 @@ main() {
|
|||||||
if [ "$agent_status" = "offline" ] || [ "$agent_status" = "degraded" ]; then
|
if [ "$agent_status" = "offline" ] || [ "$agent_status" = "degraded" ]; then
|
||||||
# Check if agent matches critical agent patterns (if configured)
|
# Check if agent matches critical agent patterns (if configured)
|
||||||
if ! matches_critical_pattern "$agent_hostname" "$CRITICAL_AGENTS_FILE"; then
|
if ! matches_critical_pattern "$agent_hostname" "$CRITICAL_AGENTS_FILE"; then
|
||||||
|
log_message "WARN" "${agent_hostname^^} is ${agent_status^^}, but does not match configured critical agents patterns. Not logging ${agent_status^^} agent"
|
||||||
continue # Skip this agent if it doesn't match any critical agent pattern
|
continue # Skip this agent if it doesn't match any critical agent pattern
|
||||||
fi
|
fi
|
||||||
|
|
||||||
offline_hours=$(calculate_offline_hours "$last_checkin")
|
offline_hours=$(calculate_offline_hours "$last_checkin")
|
||||||
|
|
||||||
log_entry=$(jq -c \
|
log_entry=$(echo 'null' | jq -c \
|
||||||
--arg ts "$current_timestamp" \
|
--arg ts "$current_timestamp" \
|
||||||
--arg id "$agent_id" \
|
--arg id "$agent_id" \
|
||||||
--arg hostname "$agent_hostname" \
|
--arg hostname "$agent_hostname" \
|
||||||
|
|||||||
Reference in New Issue
Block a user