mirror of
https://github.com/Security-Onion-Solutions/securityonion.git
synced 2026-01-18 06:01:41 +01:00
add timing to scripts to allow for debugging delays
This commit is contained in:
@@ -4,32 +4,148 @@
|
||||
# https://securityonion.net/license; you may not use this file except in compliance with the
|
||||
# Elastic License 2.0.
|
||||
|
||||
# Logging configuration
|
||||
LOG_FILE="/opt/so/log/elasticsearch/so-elasticsearch-ilm-policy-load.log"
|
||||
LOG_DIR=$(dirname "$LOG_FILE")
|
||||
|
||||
# Ensure log directory exists
|
||||
if [[ ! -d "$LOG_DIR" ]]; then
|
||||
mkdir -p "$LOG_DIR"
|
||||
fi
|
||||
|
||||
# Timing variables
|
||||
SCRIPT_START_TIME=$(date +%s%3N)
|
||||
LAST_CHECKPOINT_TIME=$SCRIPT_START_TIME
|
||||
|
||||
# Get elapsed time in milliseconds since script start
|
||||
get_elapsed_ms() {
|
||||
local now=$(date +%s%3N)
|
||||
echo $((now - SCRIPT_START_TIME))
|
||||
}
|
||||
|
||||
# Get time since last checkpoint and update checkpoint
|
||||
get_checkpoint_ms() {
|
||||
local now=$(date +%s%3N)
|
||||
local elapsed=$((now - LAST_CHECKPOINT_TIME))
|
||||
LAST_CHECKPOINT_TIME=$now
|
||||
echo $elapsed
|
||||
}
|
||||
|
||||
# Logging function with timing
|
||||
log() {
|
||||
local level=$1
|
||||
shift
|
||||
local message="$*"
|
||||
local timestamp=$(date '+%Y-%m-%d %H:%M:%S')
|
||||
local elapsed_ms=$(get_elapsed_ms)
|
||||
local elapsed_sec=$(echo "scale=3; $elapsed_ms / 1000" | bc)
|
||||
echo "[$timestamp] [+${elapsed_sec}s] [$level] $message" | tee -a "$LOG_FILE"
|
||||
}
|
||||
|
||||
log_info() {
|
||||
log "INFO" "$@"
|
||||
}
|
||||
|
||||
log_warn() {
|
||||
log "WARN" "$@"
|
||||
}
|
||||
|
||||
log_error() {
|
||||
log "ERROR" "$@"
|
||||
}
|
||||
|
||||
# Log with checkpoint timing (shows time since last checkpoint)
|
||||
log_timing() {
|
||||
local checkpoint_ms=$(get_checkpoint_ms)
|
||||
local checkpoint_sec=$(echo "scale=3; $checkpoint_ms / 1000" | bc)
|
||||
local elapsed_ms=$(get_elapsed_ms)
|
||||
local elapsed_sec=$(echo "scale=3; $elapsed_ms / 1000" | bc)
|
||||
local timestamp=$(date '+%Y-%m-%d %H:%M:%S')
|
||||
echo "[$timestamp] [+${elapsed_sec}s] [TIMING] $* (took ${checkpoint_sec}s)" | tee -a "$LOG_FILE"
|
||||
}
|
||||
|
||||
log_info "========== Starting Elasticsearch ILM policy load =========="
|
||||
|
||||
LAST_CHECKPOINT_TIME=$(date +%s%3N)
|
||||
. /usr/sbin/so-common
|
||||
log_timing "Sourced so-common"
|
||||
|
||||
{%- from 'elasticsearch/template.map.jinja' import ES_INDEX_SETTINGS %}
|
||||
|
||||
policy_count=0
|
||||
total_policy_time_ms=0
|
||||
|
||||
{%- for index, settings in ES_INDEX_SETTINGS.items() %}
|
||||
{%- if settings.policy is defined %}
|
||||
{%- if index == 'so-logs-detections.alerts' %}
|
||||
echo
|
||||
echo "Setting up so-logs-detections.alerts-so policy..."
|
||||
log_info "Setting up so-logs-detections.alerts-so policy..."
|
||||
LAST_CHECKPOINT_TIME=$(date +%s%3N)
|
||||
policy_start=$(date +%s%3N)
|
||||
curl -K /opt/so/conf/elasticsearch/curl.config -b "sid=$SESSIONCOOKIE" -s -k -L -X PUT "https://localhost:9200/_ilm/policy/{{ index }}-so" -H 'Content-Type: application/json' -d'{ "policy": {{ settings.policy | tojson(true) }} }'
|
||||
policy_end=$(date +%s%3N)
|
||||
policy_duration=$((policy_end - policy_start))
|
||||
total_policy_time_ms=$((total_policy_time_ms + policy_duration))
|
||||
policy_count=$((policy_count + 1))
|
||||
log_timing "Completed so-logs-detections.alerts-so policy (${policy_duration}ms)"
|
||||
echo
|
||||
{%- elif index == 'so-logs-soc' %}
|
||||
echo
|
||||
echo "Setting up so-soc-logs policy..."
|
||||
log_info "Setting up so-soc-logs policy..."
|
||||
LAST_CHECKPOINT_TIME=$(date +%s%3N)
|
||||
policy_start=$(date +%s%3N)
|
||||
curl -K /opt/so/conf/elasticsearch/curl.config -b "sid=$SESSIONCOOKIE" -s -k -L -X PUT "https://localhost:9200/_ilm/policy/so-soc-logs" -H 'Content-Type: application/json' -d'{ "policy": {{ settings.policy | tojson(true) }} }'
|
||||
policy_end=$(date +%s%3N)
|
||||
policy_duration=$((policy_end - policy_start))
|
||||
total_policy_time_ms=$((total_policy_time_ms + policy_duration))
|
||||
policy_count=$((policy_count + 1))
|
||||
log_timing "Completed so-soc-logs policy (${policy_duration}ms)"
|
||||
echo
|
||||
echo
|
||||
echo "Setting up {{ index }}-logs policy..."
|
||||
log_info "Setting up {{ index }}-logs policy..."
|
||||
LAST_CHECKPOINT_TIME=$(date +%s%3N)
|
||||
policy_start=$(date +%s%3N)
|
||||
curl -K /opt/so/conf/elasticsearch/curl.config -b "sid=$SESSIONCOOKIE" -s -k -L -X PUT "https://localhost:9200/_ilm/policy/{{ index }}-logs" -H 'Content-Type: application/json' -d'{ "policy": {{ settings.policy | tojson(true) }} }'
|
||||
policy_end=$(date +%s%3N)
|
||||
policy_duration=$((policy_end - policy_start))
|
||||
total_policy_time_ms=$((total_policy_time_ms + policy_duration))
|
||||
policy_count=$((policy_count + 1))
|
||||
log_timing "Completed {{ index }}-logs policy (${policy_duration}ms)"
|
||||
echo
|
||||
{%- else %}
|
||||
echo
|
||||
echo "Setting up {{ index }}-logs policy..."
|
||||
log_info "Setting up {{ index }}-logs policy..."
|
||||
LAST_CHECKPOINT_TIME=$(date +%s%3N)
|
||||
policy_start=$(date +%s%3N)
|
||||
curl -K /opt/so/conf/elasticsearch/curl.config -b "sid=$SESSIONCOOKIE" -s -k -L -X PUT "https://localhost:9200/_ilm/policy/{{ index }}-logs" -H 'Content-Type: application/json' -d'{ "policy": {{ settings.policy | tojson(true) }} }'
|
||||
policy_end=$(date +%s%3N)
|
||||
policy_duration=$((policy_end - policy_start))
|
||||
total_policy_time_ms=$((total_policy_time_ms + policy_duration))
|
||||
policy_count=$((policy_count + 1))
|
||||
log_timing "Completed {{ index }}-logs policy (${policy_duration}ms)"
|
||||
echo
|
||||
{%- endif %}
|
||||
{%- endif %}
|
||||
{%- endfor %}
|
||||
echo
|
||||
|
||||
# Final timing summary
|
||||
total_elapsed_ms=$(get_elapsed_ms)
|
||||
total_elapsed_sec=$(echo "scale=3; $total_elapsed_ms / 1000" | bc)
|
||||
avg_policy_ms=0
|
||||
if [[ $policy_count -gt 0 ]]; then
|
||||
avg_policy_ms=$((total_policy_time_ms / policy_count))
|
||||
fi
|
||||
|
||||
log_info "========== TIMING SUMMARY =========="
|
||||
log_info "Total policies processed: $policy_count"
|
||||
log_info "Total policy load time: ${total_policy_time_ms}ms"
|
||||
log_info "Average time per policy: ${avg_policy_ms}ms"
|
||||
log_info "Total script time: ${total_elapsed_sec}s"
|
||||
log_info "===================================="
|
||||
|
||||
log_info "========== Elasticsearch ILM policy load complete (${total_elapsed_sec}s) =========="
|
||||
|
||||
@@ -6,20 +6,84 @@
|
||||
{%- import_yaml 'elasticfleet/defaults.yaml' as ELASTICFLEETDEFAULTS %}
|
||||
{% from 'vars/globals.map.jinja' import GLOBALS %}
|
||||
|
||||
# Logging configuration
|
||||
LOG_FILE="/opt/so/log/elasticsearch/so-elasticsearch-templates-load.log"
|
||||
LOG_DIR=$(dirname "$LOG_FILE")
|
||||
|
||||
# Ensure log directory exists
|
||||
if [[ ! -d "$LOG_DIR" ]]; then
|
||||
mkdir -p "$LOG_DIR"
|
||||
fi
|
||||
|
||||
# Timing variables
|
||||
SCRIPT_START_TIME=$(date +%s%3N)
|
||||
LAST_CHECKPOINT_TIME=$SCRIPT_START_TIME
|
||||
|
||||
# Get elapsed time in milliseconds since script start
|
||||
get_elapsed_ms() {
|
||||
local now=$(date +%s%3N)
|
||||
echo $((now - SCRIPT_START_TIME))
|
||||
}
|
||||
|
||||
# Get time since last checkpoint and update checkpoint
|
||||
get_checkpoint_ms() {
|
||||
local now=$(date +%s%3N)
|
||||
local elapsed=$((now - LAST_CHECKPOINT_TIME))
|
||||
LAST_CHECKPOINT_TIME=$now
|
||||
echo $elapsed
|
||||
}
|
||||
|
||||
# Logging function with timing
|
||||
log() {
|
||||
local level=$1
|
||||
shift
|
||||
local message="$*"
|
||||
local timestamp=$(date '+%Y-%m-%d %H:%M:%S')
|
||||
local elapsed_ms=$(get_elapsed_ms)
|
||||
local elapsed_sec=$(echo "scale=3; $elapsed_ms / 1000" | bc)
|
||||
echo "[$timestamp] [+${elapsed_sec}s] [$level] $message" | tee -a "$LOG_FILE"
|
||||
}
|
||||
|
||||
log_info() {
|
||||
log "INFO" "$@"
|
||||
}
|
||||
|
||||
log_warn() {
|
||||
log "WARN" "$@"
|
||||
}
|
||||
|
||||
log_error() {
|
||||
log "ERROR" "$@"
|
||||
}
|
||||
|
||||
# Log with checkpoint timing (shows time since last checkpoint)
|
||||
log_timing() {
|
||||
local checkpoint_ms=$(get_checkpoint_ms)
|
||||
local checkpoint_sec=$(echo "scale=3; $checkpoint_ms / 1000" | bc)
|
||||
local elapsed_ms=$(get_elapsed_ms)
|
||||
local elapsed_sec=$(echo "scale=3; $elapsed_ms / 1000" | bc)
|
||||
local timestamp=$(date '+%Y-%m-%d %H:%M:%S')
|
||||
echo "[$timestamp] [+${elapsed_sec}s] [TIMING] $* (took ${checkpoint_sec}s)" | tee -a "$LOG_FILE"
|
||||
}
|
||||
|
||||
STATE_FILE_INITIAL=/opt/so/state/estemplates_initial_load_attempt.txt
|
||||
STATE_FILE_SUCCESS=/opt/so/state/estemplates.txt
|
||||
|
||||
log_info "========== Starting Elasticsearch templates load =========="
|
||||
|
||||
if [[ -f $STATE_FILE_INITIAL ]]; then
|
||||
# The initial template load has already run. As this is a subsequent load, all dependencies should
|
||||
# already be satisified. Therefore, immediately exit/abort this script upon any template load failure
|
||||
# since this is an unrecoverable failure.
|
||||
should_exit_on_failure=1
|
||||
log_info "State file $STATE_FILE_INITIAL exists - this is a subsequent template load"
|
||||
else
|
||||
# This is the initial template load, and there likely are some components not yet setup in Elasticsearch.
|
||||
# Therefore load as many templates as possible at this time and if an error occurs proceed to the next
|
||||
# template. But if at least one template fails to load do not mark the templates as having been loaded.
|
||||
# This will allow the next load to resume the load of the templates that failed to load initially.
|
||||
should_exit_on_failure=0
|
||||
log_info "This is the initial template load"
|
||||
echo "This is the initial template load"
|
||||
fi
|
||||
|
||||
@@ -27,30 +91,51 @@ fi
|
||||
pgrep soup > /dev/null && should_exit_on_failure=0
|
||||
|
||||
load_failures=0
|
||||
template_count=0
|
||||
total_template_time_ms=0
|
||||
|
||||
load_template() {
|
||||
uri=$1
|
||||
file=$2
|
||||
|
||||
local start_time=$(date +%s%3N)
|
||||
|
||||
echo "Loading template file $i"
|
||||
if ! retry 3 1 "so-elasticsearch-query $uri -d@$file -XPUT" "{\"acknowledged\":true}"; then
|
||||
local end_time=$(date +%s%3N)
|
||||
local duration=$((end_time - start_time))
|
||||
total_template_time_ms=$((total_template_time_ms + duration))
|
||||
template_count=$((template_count + 1))
|
||||
if [[ $should_exit_on_failure -eq 1 ]]; then
|
||||
log_error "FAILED template $file (${duration}ms)"
|
||||
fail "Could not load template file: $file"
|
||||
else
|
||||
load_failures=$((load_failures+1))
|
||||
log_warn "FAILED template $file (${duration}ms) - failure count: $load_failures"
|
||||
echo "Incremented load failure counter: $load_failures"
|
||||
fi
|
||||
else
|
||||
local end_time=$(date +%s%3N)
|
||||
local duration=$((end_time - start_time))
|
||||
total_template_time_ms=$((total_template_time_ms + duration))
|
||||
template_count=$((template_count + 1))
|
||||
log_info "OK template $file (${duration}ms)"
|
||||
fi
|
||||
}
|
||||
|
||||
if [ ! -f $STATE_FILE_SUCCESS ]; then
|
||||
echo "State file $STATE_FILE_SUCCESS not found. Running so-elasticsearch-templates-load."
|
||||
log_info "State file $STATE_FILE_SUCCESS not found. Running so-elasticsearch-templates-load."
|
||||
|
||||
LAST_CHECKPOINT_TIME=$(date +%s%3N)
|
||||
. /usr/sbin/so-common
|
||||
log_timing "Sourced so-common"
|
||||
|
||||
{% if GLOBALS.role != 'so-heavynode' %}
|
||||
if [ -f /usr/sbin/so-elastic-fleet-common ]; then
|
||||
LAST_CHECKPOINT_TIME=$(date +%s%3N)
|
||||
. /usr/sbin/so-elastic-fleet-common
|
||||
log_timing "Sourced so-elastic-fleet-common"
|
||||
fi
|
||||
{% endif %}
|
||||
|
||||
@@ -68,14 +153,20 @@ if [ ! -f $STATE_FILE_SUCCESS ]; then
|
||||
if [ -f "$file" ]; then
|
||||
# Wait for ElasticSearch to initialize
|
||||
echo -n "Waiting for ElasticSearch..."
|
||||
log_info "Waiting for ElasticSearch to initialize..."
|
||||
LAST_CHECKPOINT_TIME=$(date +%s%3N)
|
||||
retry 240 1 "so-elasticsearch-query / -k --output /dev/null --silent --head --fail" || fail "Connection attempt timed out. Unable to connect to ElasticSearch. \nPlease try: \n -checking log(s) in /var/log/elasticsearch/\n -running 'sudo docker ps' \n -running 'sudo so-elastic-restart'"
|
||||
log_timing "ElasticSearch connection established"
|
||||
{% if GLOBALS.role != 'so-heavynode' %}
|
||||
TEMPLATE="logs-endpoint.alerts@package"
|
||||
LAST_CHECKPOINT_TIME=$(date +%s%3N)
|
||||
INSTALLED=$(so-elasticsearch-query _component_template/$TEMPLATE | jq -r .component_templates[0].name)
|
||||
log_timing "Checked for installed package template $TEMPLATE"
|
||||
if [ "$INSTALLED" != "$TEMPLATE" ]; then
|
||||
echo
|
||||
echo "Packages not yet installed."
|
||||
echo
|
||||
log_warn "Packages not yet installed - exiting"
|
||||
exit 0
|
||||
fi
|
||||
{% endif %}
|
||||
@@ -84,40 +175,56 @@ if [ ! -f $STATE_FILE_SUCCESS ]; then
|
||||
|
||||
cd ${ELASTICSEARCH_TEMPLATES}/component/ecs
|
||||
|
||||
log_info "===== Loading ECS component templates from $(pwd) ====="
|
||||
echo "Loading ECS component templates..."
|
||||
LAST_CHECKPOINT_TIME=$(date +%s%3N)
|
||||
ecs_count=0
|
||||
for i in *; do
|
||||
TEMPLATE=$(echo $i | cut -d '.' -f1)
|
||||
load_template "_component_template/${TEMPLATE}-mappings" "$i"
|
||||
ecs_count=$((ecs_count + 1))
|
||||
done
|
||||
log_timing "Completed $ecs_count ECS component templates"
|
||||
echo
|
||||
|
||||
cd ${ELASTICSEARCH_TEMPLATES}/component/elastic-agent
|
||||
|
||||
log_info "===== Loading Elastic Agent component templates from $(pwd) ====="
|
||||
echo "Loading Elastic Agent component templates..."
|
||||
LAST_CHECKPOINT_TIME=$(date +%s%3N)
|
||||
{% if GLOBALS.role == 'so-heavynode' %}
|
||||
component_pattern="so-*"
|
||||
{% else %}
|
||||
component_pattern="*"
|
||||
{% endif %}
|
||||
agent_count=0
|
||||
for i in $component_pattern; do
|
||||
TEMPLATE=${i::-5}
|
||||
load_template "_component_template/$TEMPLATE" "$i"
|
||||
agent_count=$((agent_count + 1))
|
||||
done
|
||||
log_timing "Completed $agent_count Elastic Agent component templates"
|
||||
echo
|
||||
|
||||
# Load SO-specific component templates
|
||||
cd ${ELASTICSEARCH_TEMPLATES}/component/so
|
||||
|
||||
log_info "===== Loading Security Onion component templates from $(pwd) ====="
|
||||
echo "Loading Security Onion component templates..."
|
||||
LAST_CHECKPOINT_TIME=$(date +%s%3N)
|
||||
so_count=0
|
||||
for i in *; do
|
||||
TEMPLATE=$(echo $i | cut -d '.' -f1);
|
||||
load_template "_component_template/$TEMPLATE" "$i"
|
||||
so_count=$((so_count + 1))
|
||||
done
|
||||
log_timing "Completed $so_count Security Onion component templates"
|
||||
echo
|
||||
|
||||
# Load SO index templates
|
||||
cd ${ELASTICSEARCH_TEMPLATES}/index
|
||||
|
||||
log_info "===== Loading Security Onion index templates from $(pwd) ====="
|
||||
echo "Loading Security Onion index templates..."
|
||||
shopt -s extglob
|
||||
{% if GLOBALS.role == 'so-heavynode' %}
|
||||
@@ -131,35 +238,66 @@ if [ ! -f $STATE_FILE_SUCCESS ]; then
|
||||
# In this situation, the script will treat the skipped template as a temporary failure
|
||||
# and allow the templates to be loaded again on the next run or highstate, whichever
|
||||
# comes first.
|
||||
LAST_CHECKPOINT_TIME=$(date +%s%3N)
|
||||
COMPONENT_LIST=$(so-elasticsearch-component-templates-list)
|
||||
log_timing "Retrieved component templates list"
|
||||
LAST_CHECKPOINT_TIME=$(date +%s%3N)
|
||||
index_count=0
|
||||
skipped_count=0
|
||||
for i in $pattern; do
|
||||
TEMPLATE=${i::-14}
|
||||
COMPONENT_PATTERN=${TEMPLATE:3}
|
||||
MATCH=$(echo "$TEMPLATE" | grep -E "^so-logs-|^so-metrics" | grep -vE "detections|osquery")
|
||||
if [[ -n "$MATCH" && ! "$COMPONENT_LIST" =~ "$COMPONENT_PATTERN" && ! "$COMPONENT_PATTERN" =~ \.generic|logs-winlog\.winlog ]]; then
|
||||
load_failures=$((load_failures+1))
|
||||
skipped_count=$((skipped_count + 1))
|
||||
log_warn "SKIPPED - Component template does not exist for $COMPONENT_PATTERN"
|
||||
echo "Component template does not exist for $COMPONENT_PATTERN. The index template will not be loaded. Load failures: $load_failures"
|
||||
else
|
||||
load_template "_index_template/$TEMPLATE" "$i"
|
||||
index_count=$((index_count + 1))
|
||||
fi
|
||||
done
|
||||
log_timing "Completed $index_count index templates ($skipped_count skipped)"
|
||||
else
|
||||
{% if GLOBALS.role == 'so-heavynode' %}
|
||||
echo "Common template does not exist. Exiting..."
|
||||
log_warn "Common template does not exist. Exiting..."
|
||||
{% else %}
|
||||
echo "Elastic Fleet not configured. Exiting..."
|
||||
log_warn "Elastic Fleet not configured. Exiting..."
|
||||
{% endif %}
|
||||
exit 0
|
||||
fi
|
||||
|
||||
cd - >/dev/null
|
||||
|
||||
# Final timing summary
|
||||
total_elapsed_ms=$(get_elapsed_ms)
|
||||
total_elapsed_sec=$(echo "scale=3; $total_elapsed_ms / 1000" | bc)
|
||||
avg_template_ms=0
|
||||
if [[ $template_count -gt 0 ]]; then
|
||||
avg_template_ms=$((total_template_time_ms / template_count))
|
||||
fi
|
||||
|
||||
log_info "========== TIMING SUMMARY =========="
|
||||
log_info "Total templates processed: $template_count"
|
||||
log_info "Total template load time: ${total_template_time_ms}ms"
|
||||
log_info "Average time per template: ${avg_template_ms}ms"
|
||||
log_info "Total script time: ${total_elapsed_sec}s"
|
||||
log_info "===================================="
|
||||
|
||||
if [[ $load_failures -eq 0 ]]; then
|
||||
echo "All templates loaded successfully"
|
||||
log_info "All templates loaded successfully"
|
||||
touch $STATE_FILE_SUCCESS
|
||||
else
|
||||
echo "Encountered $load_failures templates that were unable to load, likely due to missing dependencies that will be available later; will retry on next highstate"
|
||||
log_warn "Encountered $load_failures templates that were unable to load"
|
||||
fi
|
||||
else
|
||||
echo "Templates already loaded"
|
||||
log_info "Templates already loaded (state file $STATE_FILE_SUCCESS exists)"
|
||||
fi
|
||||
|
||||
log_info "========== Elasticsearch templates load complete ($(echo "scale=3; $(get_elapsed_ms) / 1000" | bc)s) =========="
|
||||
|
||||
Reference in New Issue
Block a user