From 5c63111002862a581e429efd3035ac55b37c5673 Mon Sep 17 00:00:00 2001 From: Josh Patterson Date: Fri, 16 Jan 2026 16:42:24 -0500 Subject: [PATCH 1/6] add timing to scripts to allow for debugging delays --- .../so-elasticsearch-ilm-policy-load | 116 +++++++++++++++ .../so-elasticsearch-templates-load | 138 ++++++++++++++++++ 2 files changed, 254 insertions(+) diff --git a/salt/elasticsearch/tools/sbin_jinja/so-elasticsearch-ilm-policy-load b/salt/elasticsearch/tools/sbin_jinja/so-elasticsearch-ilm-policy-load index 04a7a8ab0..13123020c 100755 --- a/salt/elasticsearch/tools/sbin_jinja/so-elasticsearch-ilm-policy-load +++ b/salt/elasticsearch/tools/sbin_jinja/so-elasticsearch-ilm-policy-load @@ -4,32 +4,148 @@ # https://securityonion.net/license; you may not use this file except in compliance with the # Elastic License 2.0. +# Logging configuration +LOG_FILE="/opt/so/log/elasticsearch/so-elasticsearch-ilm-policy-load.log" +LOG_DIR=$(dirname "$LOG_FILE") + +# Ensure log directory exists +if [[ ! -d "$LOG_DIR" ]]; then + mkdir -p "$LOG_DIR" +fi + +# Timing variables +SCRIPT_START_TIME=$(date +%s%3N) +LAST_CHECKPOINT_TIME=$SCRIPT_START_TIME + +# Get elapsed time in milliseconds since script start +get_elapsed_ms() { + local now=$(date +%s%3N) + echo $((now - SCRIPT_START_TIME)) +} + +# Get time since last checkpoint and update checkpoint +get_checkpoint_ms() { + local now=$(date +%s%3N) + local elapsed=$((now - LAST_CHECKPOINT_TIME)) + LAST_CHECKPOINT_TIME=$now + echo $elapsed +} + +# Logging function with timing +log() { + local level=$1 + shift + local message="$*" + local timestamp=$(date '+%Y-%m-%d %H:%M:%S') + local elapsed_ms=$(get_elapsed_ms) + local elapsed_sec=$(echo "scale=3; $elapsed_ms / 1000" | bc) + echo "[$timestamp] [+${elapsed_sec}s] [$level] $message" | tee -a "$LOG_FILE" +} + +log_info() { + log "INFO" "$@" +} + +log_warn() { + log "WARN" "$@" +} + +log_error() { + log "ERROR" "$@" +} + +# Log with checkpoint timing (shows time since last checkpoint) +log_timing() { + local checkpoint_ms=$(get_checkpoint_ms) + local checkpoint_sec=$(echo "scale=3; $checkpoint_ms / 1000" | bc) + local elapsed_ms=$(get_elapsed_ms) + local elapsed_sec=$(echo "scale=3; $elapsed_ms / 1000" | bc) + local timestamp=$(date '+%Y-%m-%d %H:%M:%S') + echo "[$timestamp] [+${elapsed_sec}s] [TIMING] $* (took ${checkpoint_sec}s)" | tee -a "$LOG_FILE" +} + +log_info "========== Starting Elasticsearch ILM policy load ==========" + +LAST_CHECKPOINT_TIME=$(date +%s%3N) . /usr/sbin/so-common +log_timing "Sourced so-common" {%- from 'elasticsearch/template.map.jinja' import ES_INDEX_SETTINGS %} +policy_count=0 +total_policy_time_ms=0 + {%- for index, settings in ES_INDEX_SETTINGS.items() %} {%- if settings.policy is defined %} {%- if index == 'so-logs-detections.alerts' %} echo echo "Setting up so-logs-detections.alerts-so policy..." + log_info "Setting up so-logs-detections.alerts-so policy..." + LAST_CHECKPOINT_TIME=$(date +%s%3N) + policy_start=$(date +%s%3N) curl -K /opt/so/conf/elasticsearch/curl.config -b "sid=$SESSIONCOOKIE" -s -k -L -X PUT "https://localhost:9200/_ilm/policy/{{ index }}-so" -H 'Content-Type: application/json' -d'{ "policy": {{ settings.policy | tojson(true) }} }' + policy_end=$(date +%s%3N) + policy_duration=$((policy_end - policy_start)) + total_policy_time_ms=$((total_policy_time_ms + policy_duration)) + policy_count=$((policy_count + 1)) + log_timing "Completed so-logs-detections.alerts-so policy (${policy_duration}ms)" echo {%- elif index == 'so-logs-soc' %} echo echo "Setting up so-soc-logs policy..." + log_info "Setting up so-soc-logs policy..." + LAST_CHECKPOINT_TIME=$(date +%s%3N) + policy_start=$(date +%s%3N) curl -K /opt/so/conf/elasticsearch/curl.config -b "sid=$SESSIONCOOKIE" -s -k -L -X PUT "https://localhost:9200/_ilm/policy/so-soc-logs" -H 'Content-Type: application/json' -d'{ "policy": {{ settings.policy | tojson(true) }} }' + policy_end=$(date +%s%3N) + policy_duration=$((policy_end - policy_start)) + total_policy_time_ms=$((total_policy_time_ms + policy_duration)) + policy_count=$((policy_count + 1)) + log_timing "Completed so-soc-logs policy (${policy_duration}ms)" echo echo echo "Setting up {{ index }}-logs policy..." + log_info "Setting up {{ index }}-logs policy..." + LAST_CHECKPOINT_TIME=$(date +%s%3N) + policy_start=$(date +%s%3N) curl -K /opt/so/conf/elasticsearch/curl.config -b "sid=$SESSIONCOOKIE" -s -k -L -X PUT "https://localhost:9200/_ilm/policy/{{ index }}-logs" -H 'Content-Type: application/json' -d'{ "policy": {{ settings.policy | tojson(true) }} }' + policy_end=$(date +%s%3N) + policy_duration=$((policy_end - policy_start)) + total_policy_time_ms=$((total_policy_time_ms + policy_duration)) + policy_count=$((policy_count + 1)) + log_timing "Completed {{ index }}-logs policy (${policy_duration}ms)" echo {%- else %} echo echo "Setting up {{ index }}-logs policy..." + log_info "Setting up {{ index }}-logs policy..." + LAST_CHECKPOINT_TIME=$(date +%s%3N) + policy_start=$(date +%s%3N) curl -K /opt/so/conf/elasticsearch/curl.config -b "sid=$SESSIONCOOKIE" -s -k -L -X PUT "https://localhost:9200/_ilm/policy/{{ index }}-logs" -H 'Content-Type: application/json' -d'{ "policy": {{ settings.policy | tojson(true) }} }' + policy_end=$(date +%s%3N) + policy_duration=$((policy_end - policy_start)) + total_policy_time_ms=$((total_policy_time_ms + policy_duration)) + policy_count=$((policy_count + 1)) + log_timing "Completed {{ index }}-logs policy (${policy_duration}ms)" echo {%- endif %} {%- endif %} {%- endfor %} echo + +# Final timing summary +total_elapsed_ms=$(get_elapsed_ms) +total_elapsed_sec=$(echo "scale=3; $total_elapsed_ms / 1000" | bc) +avg_policy_ms=0 +if [[ $policy_count -gt 0 ]]; then + avg_policy_ms=$((total_policy_time_ms / policy_count)) +fi + +log_info "========== TIMING SUMMARY ==========" +log_info "Total policies processed: $policy_count" +log_info "Total policy load time: ${total_policy_time_ms}ms" +log_info "Average time per policy: ${avg_policy_ms}ms" +log_info "Total script time: ${total_elapsed_sec}s" +log_info "====================================" + +log_info "========== Elasticsearch ILM policy load complete (${total_elapsed_sec}s) ==========" diff --git a/salt/elasticsearch/tools/sbin_jinja/so-elasticsearch-templates-load b/salt/elasticsearch/tools/sbin_jinja/so-elasticsearch-templates-load index 4ac1b4d5f..e274dbc90 100755 --- a/salt/elasticsearch/tools/sbin_jinja/so-elasticsearch-templates-load +++ b/salt/elasticsearch/tools/sbin_jinja/so-elasticsearch-templates-load @@ -6,20 +6,84 @@ {%- import_yaml 'elasticfleet/defaults.yaml' as ELASTICFLEETDEFAULTS %} {% from 'vars/globals.map.jinja' import GLOBALS %} +# Logging configuration +LOG_FILE="/opt/so/log/elasticsearch/so-elasticsearch-templates-load.log" +LOG_DIR=$(dirname "$LOG_FILE") + +# Ensure log directory exists +if [[ ! -d "$LOG_DIR" ]]; then + mkdir -p "$LOG_DIR" +fi + +# Timing variables +SCRIPT_START_TIME=$(date +%s%3N) +LAST_CHECKPOINT_TIME=$SCRIPT_START_TIME + +# Get elapsed time in milliseconds since script start +get_elapsed_ms() { + local now=$(date +%s%3N) + echo $((now - SCRIPT_START_TIME)) +} + +# Get time since last checkpoint and update checkpoint +get_checkpoint_ms() { + local now=$(date +%s%3N) + local elapsed=$((now - LAST_CHECKPOINT_TIME)) + LAST_CHECKPOINT_TIME=$now + echo $elapsed +} + +# Logging function with timing +log() { + local level=$1 + shift + local message="$*" + local timestamp=$(date '+%Y-%m-%d %H:%M:%S') + local elapsed_ms=$(get_elapsed_ms) + local elapsed_sec=$(echo "scale=3; $elapsed_ms / 1000" | bc) + echo "[$timestamp] [+${elapsed_sec}s] [$level] $message" | tee -a "$LOG_FILE" +} + +log_info() { + log "INFO" "$@" +} + +log_warn() { + log "WARN" "$@" +} + +log_error() { + log "ERROR" "$@" +} + +# Log with checkpoint timing (shows time since last checkpoint) +log_timing() { + local checkpoint_ms=$(get_checkpoint_ms) + local checkpoint_sec=$(echo "scale=3; $checkpoint_ms / 1000" | bc) + local elapsed_ms=$(get_elapsed_ms) + local elapsed_sec=$(echo "scale=3; $elapsed_ms / 1000" | bc) + local timestamp=$(date '+%Y-%m-%d %H:%M:%S') + echo "[$timestamp] [+${elapsed_sec}s] [TIMING] $* (took ${checkpoint_sec}s)" | tee -a "$LOG_FILE" +} + STATE_FILE_INITIAL=/opt/so/state/estemplates_initial_load_attempt.txt STATE_FILE_SUCCESS=/opt/so/state/estemplates.txt +log_info "========== Starting Elasticsearch templates load ==========" + if [[ -f $STATE_FILE_INITIAL ]]; then # The initial template load has already run. As this is a subsequent load, all dependencies should # already be satisified. Therefore, immediately exit/abort this script upon any template load failure # since this is an unrecoverable failure. should_exit_on_failure=1 + log_info "State file $STATE_FILE_INITIAL exists - this is a subsequent template load" else # This is the initial template load, and there likely are some components not yet setup in Elasticsearch. # Therefore load as many templates as possible at this time and if an error occurs proceed to the next # template. But if at least one template fails to load do not mark the templates as having been loaded. # This will allow the next load to resume the load of the templates that failed to load initially. should_exit_on_failure=0 + log_info "This is the initial template load" echo "This is the initial template load" fi @@ -27,30 +91,51 @@ fi pgrep soup > /dev/null && should_exit_on_failure=0 load_failures=0 +template_count=0 +total_template_time_ms=0 load_template() { uri=$1 file=$2 + + local start_time=$(date +%s%3N) echo "Loading template file $i" if ! retry 3 1 "so-elasticsearch-query $uri -d@$file -XPUT" "{\"acknowledged\":true}"; then + local end_time=$(date +%s%3N) + local duration=$((end_time - start_time)) + total_template_time_ms=$((total_template_time_ms + duration)) + template_count=$((template_count + 1)) if [[ $should_exit_on_failure -eq 1 ]]; then + log_error "FAILED template $file (${duration}ms)" fail "Could not load template file: $file" else load_failures=$((load_failures+1)) + log_warn "FAILED template $file (${duration}ms) - failure count: $load_failures" echo "Incremented load failure counter: $load_failures" fi + else + local end_time=$(date +%s%3N) + local duration=$((end_time - start_time)) + total_template_time_ms=$((total_template_time_ms + duration)) + template_count=$((template_count + 1)) + log_info "OK template $file (${duration}ms)" fi } if [ ! -f $STATE_FILE_SUCCESS ]; then echo "State file $STATE_FILE_SUCCESS not found. Running so-elasticsearch-templates-load." + log_info "State file $STATE_FILE_SUCCESS not found. Running so-elasticsearch-templates-load." + LAST_CHECKPOINT_TIME=$(date +%s%3N) . /usr/sbin/so-common + log_timing "Sourced so-common" {% if GLOBALS.role != 'so-heavynode' %} if [ -f /usr/sbin/so-elastic-fleet-common ]; then + LAST_CHECKPOINT_TIME=$(date +%s%3N) . /usr/sbin/so-elastic-fleet-common + log_timing "Sourced so-elastic-fleet-common" fi {% endif %} @@ -68,14 +153,20 @@ if [ ! -f $STATE_FILE_SUCCESS ]; then if [ -f "$file" ]; then # Wait for ElasticSearch to initialize echo -n "Waiting for ElasticSearch..." + log_info "Waiting for ElasticSearch to initialize..." + LAST_CHECKPOINT_TIME=$(date +%s%3N) retry 240 1 "so-elasticsearch-query / -k --output /dev/null --silent --head --fail" || fail "Connection attempt timed out. Unable to connect to ElasticSearch. \nPlease try: \n -checking log(s) in /var/log/elasticsearch/\n -running 'sudo docker ps' \n -running 'sudo so-elastic-restart'" + log_timing "ElasticSearch connection established" {% if GLOBALS.role != 'so-heavynode' %} TEMPLATE="logs-endpoint.alerts@package" + LAST_CHECKPOINT_TIME=$(date +%s%3N) INSTALLED=$(so-elasticsearch-query _component_template/$TEMPLATE | jq -r .component_templates[0].name) + log_timing "Checked for installed package template $TEMPLATE" if [ "$INSTALLED" != "$TEMPLATE" ]; then echo echo "Packages not yet installed." echo + log_warn "Packages not yet installed - exiting" exit 0 fi {% endif %} @@ -84,40 +175,56 @@ if [ ! -f $STATE_FILE_SUCCESS ]; then cd ${ELASTICSEARCH_TEMPLATES}/component/ecs + log_info "===== Loading ECS component templates from $(pwd) =====" echo "Loading ECS component templates..." + LAST_CHECKPOINT_TIME=$(date +%s%3N) + ecs_count=0 for i in *; do TEMPLATE=$(echo $i | cut -d '.' -f1) load_template "_component_template/${TEMPLATE}-mappings" "$i" + ecs_count=$((ecs_count + 1)) done + log_timing "Completed $ecs_count ECS component templates" echo cd ${ELASTICSEARCH_TEMPLATES}/component/elastic-agent + log_info "===== Loading Elastic Agent component templates from $(pwd) =====" echo "Loading Elastic Agent component templates..." + LAST_CHECKPOINT_TIME=$(date +%s%3N) {% if GLOBALS.role == 'so-heavynode' %} component_pattern="so-*" {% else %} component_pattern="*" {% endif %} + agent_count=0 for i in $component_pattern; do TEMPLATE=${i::-5} load_template "_component_template/$TEMPLATE" "$i" + agent_count=$((agent_count + 1)) done + log_timing "Completed $agent_count Elastic Agent component templates" echo # Load SO-specific component templates cd ${ELASTICSEARCH_TEMPLATES}/component/so + log_info "===== Loading Security Onion component templates from $(pwd) =====" echo "Loading Security Onion component templates..." + LAST_CHECKPOINT_TIME=$(date +%s%3N) + so_count=0 for i in *; do TEMPLATE=$(echo $i | cut -d '.' -f1); load_template "_component_template/$TEMPLATE" "$i" + so_count=$((so_count + 1)) done + log_timing "Completed $so_count Security Onion component templates" echo # Load SO index templates cd ${ELASTICSEARCH_TEMPLATES}/index + log_info "===== Loading Security Onion index templates from $(pwd) =====" echo "Loading Security Onion index templates..." shopt -s extglob {% if GLOBALS.role == 'so-heavynode' %} @@ -131,35 +238,66 @@ if [ ! -f $STATE_FILE_SUCCESS ]; then # In this situation, the script will treat the skipped template as a temporary failure # and allow the templates to be loaded again on the next run or highstate, whichever # comes first. + LAST_CHECKPOINT_TIME=$(date +%s%3N) COMPONENT_LIST=$(so-elasticsearch-component-templates-list) + log_timing "Retrieved component templates list" + LAST_CHECKPOINT_TIME=$(date +%s%3N) + index_count=0 + skipped_count=0 for i in $pattern; do TEMPLATE=${i::-14} COMPONENT_PATTERN=${TEMPLATE:3} MATCH=$(echo "$TEMPLATE" | grep -E "^so-logs-|^so-metrics" | grep -vE "detections|osquery") if [[ -n "$MATCH" && ! "$COMPONENT_LIST" =~ "$COMPONENT_PATTERN" && ! "$COMPONENT_PATTERN" =~ \.generic|logs-winlog\.winlog ]]; then load_failures=$((load_failures+1)) + skipped_count=$((skipped_count + 1)) + log_warn "SKIPPED - Component template does not exist for $COMPONENT_PATTERN" echo "Component template does not exist for $COMPONENT_PATTERN. The index template will not be loaded. Load failures: $load_failures" else load_template "_index_template/$TEMPLATE" "$i" + index_count=$((index_count + 1)) fi done + log_timing "Completed $index_count index templates ($skipped_count skipped)" else {% if GLOBALS.role == 'so-heavynode' %} echo "Common template does not exist. Exiting..." + log_warn "Common template does not exist. Exiting..." {% else %} echo "Elastic Fleet not configured. Exiting..." + log_warn "Elastic Fleet not configured. Exiting..." {% endif %} exit 0 fi cd - >/dev/null + # Final timing summary + total_elapsed_ms=$(get_elapsed_ms) + total_elapsed_sec=$(echo "scale=3; $total_elapsed_ms / 1000" | bc) + avg_template_ms=0 + if [[ $template_count -gt 0 ]]; then + avg_template_ms=$((total_template_time_ms / template_count)) + fi + + log_info "========== TIMING SUMMARY ==========" + log_info "Total templates processed: $template_count" + log_info "Total template load time: ${total_template_time_ms}ms" + log_info "Average time per template: ${avg_template_ms}ms" + log_info "Total script time: ${total_elapsed_sec}s" + log_info "====================================" + if [[ $load_failures -eq 0 ]]; then echo "All templates loaded successfully" + log_info "All templates loaded successfully" touch $STATE_FILE_SUCCESS else echo "Encountered $load_failures templates that were unable to load, likely due to missing dependencies that will be available later; will retry on next highstate" + log_warn "Encountered $load_failures templates that were unable to load" fi else echo "Templates already loaded" + log_info "Templates already loaded (state file $STATE_FILE_SUCCESS exists)" fi + +log_info "========== Elasticsearch templates load complete ($(echo "scale=3; $(get_elapsed_ms) / 1000" | bc)s) ==========" From 82d5115b3fc7b9ed66710c3e11d4505d19b194c0 Mon Sep 17 00:00:00 2001 From: Josh Patterson Date: Fri, 16 Jan 2026 16:43:10 -0500 Subject: [PATCH 2/6] rerun so-elasticsearch-templates-load during setup --- salt/elasticfleet/enabled.sls | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/salt/elasticfleet/enabled.sls b/salt/elasticfleet/enabled.sls index db10a7182..fc9c38aaf 100644 --- a/salt/elasticfleet/enabled.sls +++ b/salt/elasticfleet/enabled.sls @@ -188,6 +188,19 @@ so-elastic-defend-manage-filters-file-watch: {% endif %} {% endif %} +{# this block exists to rerun so-elasticsearch-templates-load during setup #} +{# currently the script has a check if /usr/sbin/so-elastic-fleet-common exists before allowing templates to load #} +{# that prevents the templates from being loaded during the initial highstate #} +{% if GLOBALS.is_manager %} +so-elasticsearch-templates-elasticfleet-ready: + cmd.run: + - name: /usr/sbin/so-elasticsearch-templates-load + - cwd: /opt/so + - require: + - docker_container: so-elastic-fleet + - onlyif: pgrep so-setup +{% endif %} + delete_so-elastic-fleet_so-status.disabled: file.uncomment: - name: /opt/so/conf/so-status/so-status.conf From 074158b4950cf7801269e9870c0c149399145c4e Mon Sep 17 00:00:00 2001 From: Josh Patterson Date: Fri, 16 Jan 2026 17:42:00 -0500 Subject: [PATCH 3/6] discard so-elasticsearch-templates-load running again during setup --- salt/elasticfleet/enabled.sls | 13 ------------- 1 file changed, 13 deletions(-) diff --git a/salt/elasticfleet/enabled.sls b/salt/elasticfleet/enabled.sls index fc9c38aaf..db10a7182 100644 --- a/salt/elasticfleet/enabled.sls +++ b/salt/elasticfleet/enabled.sls @@ -188,19 +188,6 @@ so-elastic-defend-manage-filters-file-watch: {% endif %} {% endif %} -{# this block exists to rerun so-elasticsearch-templates-load during setup #} -{# currently the script has a check if /usr/sbin/so-elastic-fleet-common exists before allowing templates to load #} -{# that prevents the templates from being loaded during the initial highstate #} -{% if GLOBALS.is_manager %} -so-elasticsearch-templates-elasticfleet-ready: - cmd.run: - - name: /usr/sbin/so-elasticsearch-templates-load - - cwd: /opt/so - - require: - - docker_container: so-elastic-fleet - - onlyif: pgrep so-setup -{% endif %} - delete_so-elastic-fleet_so-status.disabled: file.uncomment: - name: /opt/so/conf/so-status/so-status.conf From 17532fe49d19a5ab84b81b60e81fd801fb64f305 Mon Sep 17 00:00:00 2001 From: Josh Patterson Date: Fri, 16 Jan 2026 17:42:58 -0500 Subject: [PATCH 4/6] run a final highstate on managers prior to verify --- setup/so-setup | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/setup/so-setup b/setup/so-setup index d09e8fc35..43619fb68 100755 --- a/setup/so-setup +++ b/setup/so-setup @@ -831,6 +831,10 @@ if ! [[ -f $install_opt_file ]]; then fi checkin_at_boot set_initial_firewall_access + # run a final highstate before enabling scheduled highstates. + # this will ensure so-elasticsearch-ilm-policy-load and so-elasticsearch-templates-load have a chance to run after elasticfleet is setup + info "Running final highstate for setup" + logCmd "salt-call state.highstate -l info" logCmd "salt-call schedule.enable -linfo --local" verify_setup else From 9905d23976487972283bc47e5fb2ef94371e27d8 Mon Sep 17 00:00:00 2001 From: Josh Patterson Date: Fri, 16 Jan 2026 18:27:24 -0500 Subject: [PATCH 5/6] inform which state is being applied --- salt/elasticfleet/tools/sbin_jinja/so-elastic-fleet-setup | 2 ++ 1 file changed, 2 insertions(+) diff --git a/salt/elasticfleet/tools/sbin_jinja/so-elastic-fleet-setup b/salt/elasticfleet/tools/sbin_jinja/so-elastic-fleet-setup index 446fc6c9a..5e0dc0c69 100755 --- a/salt/elasticfleet/tools/sbin_jinja/so-elastic-fleet-setup +++ b/salt/elasticfleet/tools/sbin_jinja/so-elastic-fleet-setup @@ -241,9 +241,11 @@ printf '%s\n'\ "" >> "$global_pillar_file" # Call Elastic-Fleet Salt State +printf "\nApplying elasticfleet state" salt-call state.apply elasticfleet queue=True # Generate installers & install Elastic Agent on the node so-elastic-agent-gen-installers +printf "\nApplying elasticfleet.install_agent_grid state" salt-call state.apply elasticfleet.install_agent_grid queue=True exit 0 From 6f4b96b61b353d3bde392fd18f1ffd5993b66ea9 Mon Sep 17 00:00:00 2001 From: Josh Patterson Date: Fri, 16 Jan 2026 18:31:45 -0500 Subject: [PATCH 6/6] removing time logging changes --- .../so-elasticsearch-ilm-policy-load | 116 --------------- .../so-elasticsearch-templates-load | 138 ------------------ 2 files changed, 254 deletions(-) diff --git a/salt/elasticsearch/tools/sbin_jinja/so-elasticsearch-ilm-policy-load b/salt/elasticsearch/tools/sbin_jinja/so-elasticsearch-ilm-policy-load index 13123020c..04a7a8ab0 100755 --- a/salt/elasticsearch/tools/sbin_jinja/so-elasticsearch-ilm-policy-load +++ b/salt/elasticsearch/tools/sbin_jinja/so-elasticsearch-ilm-policy-load @@ -4,148 +4,32 @@ # https://securityonion.net/license; you may not use this file except in compliance with the # Elastic License 2.0. -# Logging configuration -LOG_FILE="/opt/so/log/elasticsearch/so-elasticsearch-ilm-policy-load.log" -LOG_DIR=$(dirname "$LOG_FILE") - -# Ensure log directory exists -if [[ ! -d "$LOG_DIR" ]]; then - mkdir -p "$LOG_DIR" -fi - -# Timing variables -SCRIPT_START_TIME=$(date +%s%3N) -LAST_CHECKPOINT_TIME=$SCRIPT_START_TIME - -# Get elapsed time in milliseconds since script start -get_elapsed_ms() { - local now=$(date +%s%3N) - echo $((now - SCRIPT_START_TIME)) -} - -# Get time since last checkpoint and update checkpoint -get_checkpoint_ms() { - local now=$(date +%s%3N) - local elapsed=$((now - LAST_CHECKPOINT_TIME)) - LAST_CHECKPOINT_TIME=$now - echo $elapsed -} - -# Logging function with timing -log() { - local level=$1 - shift - local message="$*" - local timestamp=$(date '+%Y-%m-%d %H:%M:%S') - local elapsed_ms=$(get_elapsed_ms) - local elapsed_sec=$(echo "scale=3; $elapsed_ms / 1000" | bc) - echo "[$timestamp] [+${elapsed_sec}s] [$level] $message" | tee -a "$LOG_FILE" -} - -log_info() { - log "INFO" "$@" -} - -log_warn() { - log "WARN" "$@" -} - -log_error() { - log "ERROR" "$@" -} - -# Log with checkpoint timing (shows time since last checkpoint) -log_timing() { - local checkpoint_ms=$(get_checkpoint_ms) - local checkpoint_sec=$(echo "scale=3; $checkpoint_ms / 1000" | bc) - local elapsed_ms=$(get_elapsed_ms) - local elapsed_sec=$(echo "scale=3; $elapsed_ms / 1000" | bc) - local timestamp=$(date '+%Y-%m-%d %H:%M:%S') - echo "[$timestamp] [+${elapsed_sec}s] [TIMING] $* (took ${checkpoint_sec}s)" | tee -a "$LOG_FILE" -} - -log_info "========== Starting Elasticsearch ILM policy load ==========" - -LAST_CHECKPOINT_TIME=$(date +%s%3N) . /usr/sbin/so-common -log_timing "Sourced so-common" {%- from 'elasticsearch/template.map.jinja' import ES_INDEX_SETTINGS %} -policy_count=0 -total_policy_time_ms=0 - {%- for index, settings in ES_INDEX_SETTINGS.items() %} {%- if settings.policy is defined %} {%- if index == 'so-logs-detections.alerts' %} echo echo "Setting up so-logs-detections.alerts-so policy..." - log_info "Setting up so-logs-detections.alerts-so policy..." - LAST_CHECKPOINT_TIME=$(date +%s%3N) - policy_start=$(date +%s%3N) curl -K /opt/so/conf/elasticsearch/curl.config -b "sid=$SESSIONCOOKIE" -s -k -L -X PUT "https://localhost:9200/_ilm/policy/{{ index }}-so" -H 'Content-Type: application/json' -d'{ "policy": {{ settings.policy | tojson(true) }} }' - policy_end=$(date +%s%3N) - policy_duration=$((policy_end - policy_start)) - total_policy_time_ms=$((total_policy_time_ms + policy_duration)) - policy_count=$((policy_count + 1)) - log_timing "Completed so-logs-detections.alerts-so policy (${policy_duration}ms)" echo {%- elif index == 'so-logs-soc' %} echo echo "Setting up so-soc-logs policy..." - log_info "Setting up so-soc-logs policy..." - LAST_CHECKPOINT_TIME=$(date +%s%3N) - policy_start=$(date +%s%3N) curl -K /opt/so/conf/elasticsearch/curl.config -b "sid=$SESSIONCOOKIE" -s -k -L -X PUT "https://localhost:9200/_ilm/policy/so-soc-logs" -H 'Content-Type: application/json' -d'{ "policy": {{ settings.policy | tojson(true) }} }' - policy_end=$(date +%s%3N) - policy_duration=$((policy_end - policy_start)) - total_policy_time_ms=$((total_policy_time_ms + policy_duration)) - policy_count=$((policy_count + 1)) - log_timing "Completed so-soc-logs policy (${policy_duration}ms)" echo echo echo "Setting up {{ index }}-logs policy..." - log_info "Setting up {{ index }}-logs policy..." - LAST_CHECKPOINT_TIME=$(date +%s%3N) - policy_start=$(date +%s%3N) curl -K /opt/so/conf/elasticsearch/curl.config -b "sid=$SESSIONCOOKIE" -s -k -L -X PUT "https://localhost:9200/_ilm/policy/{{ index }}-logs" -H 'Content-Type: application/json' -d'{ "policy": {{ settings.policy | tojson(true) }} }' - policy_end=$(date +%s%3N) - policy_duration=$((policy_end - policy_start)) - total_policy_time_ms=$((total_policy_time_ms + policy_duration)) - policy_count=$((policy_count + 1)) - log_timing "Completed {{ index }}-logs policy (${policy_duration}ms)" echo {%- else %} echo echo "Setting up {{ index }}-logs policy..." - log_info "Setting up {{ index }}-logs policy..." - LAST_CHECKPOINT_TIME=$(date +%s%3N) - policy_start=$(date +%s%3N) curl -K /opt/so/conf/elasticsearch/curl.config -b "sid=$SESSIONCOOKIE" -s -k -L -X PUT "https://localhost:9200/_ilm/policy/{{ index }}-logs" -H 'Content-Type: application/json' -d'{ "policy": {{ settings.policy | tojson(true) }} }' - policy_end=$(date +%s%3N) - policy_duration=$((policy_end - policy_start)) - total_policy_time_ms=$((total_policy_time_ms + policy_duration)) - policy_count=$((policy_count + 1)) - log_timing "Completed {{ index }}-logs policy (${policy_duration}ms)" echo {%- endif %} {%- endif %} {%- endfor %} echo - -# Final timing summary -total_elapsed_ms=$(get_elapsed_ms) -total_elapsed_sec=$(echo "scale=3; $total_elapsed_ms / 1000" | bc) -avg_policy_ms=0 -if [[ $policy_count -gt 0 ]]; then - avg_policy_ms=$((total_policy_time_ms / policy_count)) -fi - -log_info "========== TIMING SUMMARY ==========" -log_info "Total policies processed: $policy_count" -log_info "Total policy load time: ${total_policy_time_ms}ms" -log_info "Average time per policy: ${avg_policy_ms}ms" -log_info "Total script time: ${total_elapsed_sec}s" -log_info "====================================" - -log_info "========== Elasticsearch ILM policy load complete (${total_elapsed_sec}s) ==========" diff --git a/salt/elasticsearch/tools/sbin_jinja/so-elasticsearch-templates-load b/salt/elasticsearch/tools/sbin_jinja/so-elasticsearch-templates-load index e274dbc90..4ac1b4d5f 100755 --- a/salt/elasticsearch/tools/sbin_jinja/so-elasticsearch-templates-load +++ b/salt/elasticsearch/tools/sbin_jinja/so-elasticsearch-templates-load @@ -6,84 +6,20 @@ {%- import_yaml 'elasticfleet/defaults.yaml' as ELASTICFLEETDEFAULTS %} {% from 'vars/globals.map.jinja' import GLOBALS %} -# Logging configuration -LOG_FILE="/opt/so/log/elasticsearch/so-elasticsearch-templates-load.log" -LOG_DIR=$(dirname "$LOG_FILE") - -# Ensure log directory exists -if [[ ! -d "$LOG_DIR" ]]; then - mkdir -p "$LOG_DIR" -fi - -# Timing variables -SCRIPT_START_TIME=$(date +%s%3N) -LAST_CHECKPOINT_TIME=$SCRIPT_START_TIME - -# Get elapsed time in milliseconds since script start -get_elapsed_ms() { - local now=$(date +%s%3N) - echo $((now - SCRIPT_START_TIME)) -} - -# Get time since last checkpoint and update checkpoint -get_checkpoint_ms() { - local now=$(date +%s%3N) - local elapsed=$((now - LAST_CHECKPOINT_TIME)) - LAST_CHECKPOINT_TIME=$now - echo $elapsed -} - -# Logging function with timing -log() { - local level=$1 - shift - local message="$*" - local timestamp=$(date '+%Y-%m-%d %H:%M:%S') - local elapsed_ms=$(get_elapsed_ms) - local elapsed_sec=$(echo "scale=3; $elapsed_ms / 1000" | bc) - echo "[$timestamp] [+${elapsed_sec}s] [$level] $message" | tee -a "$LOG_FILE" -} - -log_info() { - log "INFO" "$@" -} - -log_warn() { - log "WARN" "$@" -} - -log_error() { - log "ERROR" "$@" -} - -# Log with checkpoint timing (shows time since last checkpoint) -log_timing() { - local checkpoint_ms=$(get_checkpoint_ms) - local checkpoint_sec=$(echo "scale=3; $checkpoint_ms / 1000" | bc) - local elapsed_ms=$(get_elapsed_ms) - local elapsed_sec=$(echo "scale=3; $elapsed_ms / 1000" | bc) - local timestamp=$(date '+%Y-%m-%d %H:%M:%S') - echo "[$timestamp] [+${elapsed_sec}s] [TIMING] $* (took ${checkpoint_sec}s)" | tee -a "$LOG_FILE" -} - STATE_FILE_INITIAL=/opt/so/state/estemplates_initial_load_attempt.txt STATE_FILE_SUCCESS=/opt/so/state/estemplates.txt -log_info "========== Starting Elasticsearch templates load ==========" - if [[ -f $STATE_FILE_INITIAL ]]; then # The initial template load has already run. As this is a subsequent load, all dependencies should # already be satisified. Therefore, immediately exit/abort this script upon any template load failure # since this is an unrecoverable failure. should_exit_on_failure=1 - log_info "State file $STATE_FILE_INITIAL exists - this is a subsequent template load" else # This is the initial template load, and there likely are some components not yet setup in Elasticsearch. # Therefore load as many templates as possible at this time and if an error occurs proceed to the next # template. But if at least one template fails to load do not mark the templates as having been loaded. # This will allow the next load to resume the load of the templates that failed to load initially. should_exit_on_failure=0 - log_info "This is the initial template load" echo "This is the initial template load" fi @@ -91,51 +27,30 @@ fi pgrep soup > /dev/null && should_exit_on_failure=0 load_failures=0 -template_count=0 -total_template_time_ms=0 load_template() { uri=$1 file=$2 - - local start_time=$(date +%s%3N) echo "Loading template file $i" if ! retry 3 1 "so-elasticsearch-query $uri -d@$file -XPUT" "{\"acknowledged\":true}"; then - local end_time=$(date +%s%3N) - local duration=$((end_time - start_time)) - total_template_time_ms=$((total_template_time_ms + duration)) - template_count=$((template_count + 1)) if [[ $should_exit_on_failure -eq 1 ]]; then - log_error "FAILED template $file (${duration}ms)" fail "Could not load template file: $file" else load_failures=$((load_failures+1)) - log_warn "FAILED template $file (${duration}ms) - failure count: $load_failures" echo "Incremented load failure counter: $load_failures" fi - else - local end_time=$(date +%s%3N) - local duration=$((end_time - start_time)) - total_template_time_ms=$((total_template_time_ms + duration)) - template_count=$((template_count + 1)) - log_info "OK template $file (${duration}ms)" fi } if [ ! -f $STATE_FILE_SUCCESS ]; then echo "State file $STATE_FILE_SUCCESS not found. Running so-elasticsearch-templates-load." - log_info "State file $STATE_FILE_SUCCESS not found. Running so-elasticsearch-templates-load." - LAST_CHECKPOINT_TIME=$(date +%s%3N) . /usr/sbin/so-common - log_timing "Sourced so-common" {% if GLOBALS.role != 'so-heavynode' %} if [ -f /usr/sbin/so-elastic-fleet-common ]; then - LAST_CHECKPOINT_TIME=$(date +%s%3N) . /usr/sbin/so-elastic-fleet-common - log_timing "Sourced so-elastic-fleet-common" fi {% endif %} @@ -153,20 +68,14 @@ if [ ! -f $STATE_FILE_SUCCESS ]; then if [ -f "$file" ]; then # Wait for ElasticSearch to initialize echo -n "Waiting for ElasticSearch..." - log_info "Waiting for ElasticSearch to initialize..." - LAST_CHECKPOINT_TIME=$(date +%s%3N) retry 240 1 "so-elasticsearch-query / -k --output /dev/null --silent --head --fail" || fail "Connection attempt timed out. Unable to connect to ElasticSearch. \nPlease try: \n -checking log(s) in /var/log/elasticsearch/\n -running 'sudo docker ps' \n -running 'sudo so-elastic-restart'" - log_timing "ElasticSearch connection established" {% if GLOBALS.role != 'so-heavynode' %} TEMPLATE="logs-endpoint.alerts@package" - LAST_CHECKPOINT_TIME=$(date +%s%3N) INSTALLED=$(so-elasticsearch-query _component_template/$TEMPLATE | jq -r .component_templates[0].name) - log_timing "Checked for installed package template $TEMPLATE" if [ "$INSTALLED" != "$TEMPLATE" ]; then echo echo "Packages not yet installed." echo - log_warn "Packages not yet installed - exiting" exit 0 fi {% endif %} @@ -175,56 +84,40 @@ if [ ! -f $STATE_FILE_SUCCESS ]; then cd ${ELASTICSEARCH_TEMPLATES}/component/ecs - log_info "===== Loading ECS component templates from $(pwd) =====" echo "Loading ECS component templates..." - LAST_CHECKPOINT_TIME=$(date +%s%3N) - ecs_count=0 for i in *; do TEMPLATE=$(echo $i | cut -d '.' -f1) load_template "_component_template/${TEMPLATE}-mappings" "$i" - ecs_count=$((ecs_count + 1)) done - log_timing "Completed $ecs_count ECS component templates" echo cd ${ELASTICSEARCH_TEMPLATES}/component/elastic-agent - log_info "===== Loading Elastic Agent component templates from $(pwd) =====" echo "Loading Elastic Agent component templates..." - LAST_CHECKPOINT_TIME=$(date +%s%3N) {% if GLOBALS.role == 'so-heavynode' %} component_pattern="so-*" {% else %} component_pattern="*" {% endif %} - agent_count=0 for i in $component_pattern; do TEMPLATE=${i::-5} load_template "_component_template/$TEMPLATE" "$i" - agent_count=$((agent_count + 1)) done - log_timing "Completed $agent_count Elastic Agent component templates" echo # Load SO-specific component templates cd ${ELASTICSEARCH_TEMPLATES}/component/so - log_info "===== Loading Security Onion component templates from $(pwd) =====" echo "Loading Security Onion component templates..." - LAST_CHECKPOINT_TIME=$(date +%s%3N) - so_count=0 for i in *; do TEMPLATE=$(echo $i | cut -d '.' -f1); load_template "_component_template/$TEMPLATE" "$i" - so_count=$((so_count + 1)) done - log_timing "Completed $so_count Security Onion component templates" echo # Load SO index templates cd ${ELASTICSEARCH_TEMPLATES}/index - log_info "===== Loading Security Onion index templates from $(pwd) =====" echo "Loading Security Onion index templates..." shopt -s extglob {% if GLOBALS.role == 'so-heavynode' %} @@ -238,66 +131,35 @@ if [ ! -f $STATE_FILE_SUCCESS ]; then # In this situation, the script will treat the skipped template as a temporary failure # and allow the templates to be loaded again on the next run or highstate, whichever # comes first. - LAST_CHECKPOINT_TIME=$(date +%s%3N) COMPONENT_LIST=$(so-elasticsearch-component-templates-list) - log_timing "Retrieved component templates list" - LAST_CHECKPOINT_TIME=$(date +%s%3N) - index_count=0 - skipped_count=0 for i in $pattern; do TEMPLATE=${i::-14} COMPONENT_PATTERN=${TEMPLATE:3} MATCH=$(echo "$TEMPLATE" | grep -E "^so-logs-|^so-metrics" | grep -vE "detections|osquery") if [[ -n "$MATCH" && ! "$COMPONENT_LIST" =~ "$COMPONENT_PATTERN" && ! "$COMPONENT_PATTERN" =~ \.generic|logs-winlog\.winlog ]]; then load_failures=$((load_failures+1)) - skipped_count=$((skipped_count + 1)) - log_warn "SKIPPED - Component template does not exist for $COMPONENT_PATTERN" echo "Component template does not exist for $COMPONENT_PATTERN. The index template will not be loaded. Load failures: $load_failures" else load_template "_index_template/$TEMPLATE" "$i" - index_count=$((index_count + 1)) fi done - log_timing "Completed $index_count index templates ($skipped_count skipped)" else {% if GLOBALS.role == 'so-heavynode' %} echo "Common template does not exist. Exiting..." - log_warn "Common template does not exist. Exiting..." {% else %} echo "Elastic Fleet not configured. Exiting..." - log_warn "Elastic Fleet not configured. Exiting..." {% endif %} exit 0 fi cd - >/dev/null - # Final timing summary - total_elapsed_ms=$(get_elapsed_ms) - total_elapsed_sec=$(echo "scale=3; $total_elapsed_ms / 1000" | bc) - avg_template_ms=0 - if [[ $template_count -gt 0 ]]; then - avg_template_ms=$((total_template_time_ms / template_count)) - fi - - log_info "========== TIMING SUMMARY ==========" - log_info "Total templates processed: $template_count" - log_info "Total template load time: ${total_template_time_ms}ms" - log_info "Average time per template: ${avg_template_ms}ms" - log_info "Total script time: ${total_elapsed_sec}s" - log_info "====================================" - if [[ $load_failures -eq 0 ]]; then echo "All templates loaded successfully" - log_info "All templates loaded successfully" touch $STATE_FILE_SUCCESS else echo "Encountered $load_failures templates that were unable to load, likely due to missing dependencies that will be available later; will retry on next highstate" - log_warn "Encountered $load_failures templates that were unable to load" fi else echo "Templates already loaded" - log_info "Templates already loaded (state file $STATE_FILE_SUCCESS exists)" fi - -log_info "========== Elasticsearch templates load complete ($(echo "scale=3; $(get_elapsed_ms) / 1000" | bc)s) =========="