Parallelize so-elasticsearch-templates-load template PUTs

Load component and index templates as throttled background jobs (max 10
concurrent) instead of sequential curl PUTs, matching the bounded-concurrency
+ flock-serialized-output pattern used by the fleet/ILM load scripts. Keeps a
wait barrier between the component phase and the index phase so index
templates never load before their referenced component templates. Failures are
tracked via per-job marker files since counter increments can't escape
background subshells.
This commit is contained in:
Josh Patterson
2026-06-12 15:11:34 -04:00
parent ae6a705ce1
commit 43f72c1f9f
@@ -11,10 +11,8 @@ ADDON_STATEFILE_SUCCESS=/opt/so/state/addon_estemplates.txt
ELASTICSEARCH_TEMPLATES_DIR="/opt/so/conf/elasticsearch/templates" ELASTICSEARCH_TEMPLATES_DIR="/opt/so/conf/elasticsearch/templates"
SO_TEMPLATES_DIR="${ELASTICSEARCH_TEMPLATES_DIR}/index" SO_TEMPLATES_DIR="${ELASTICSEARCH_TEMPLATES_DIR}/index"
ADDON_TEMPLATES_DIR="${ELASTICSEARCH_TEMPLATES_DIR}/addon-index" ADDON_TEMPLATES_DIR="${ELASTICSEARCH_TEMPLATES_DIR}/addon-index"
SO_LOAD_FAILURES=0 FAILED_NAMES=()
ADDON_LOAD_FAILURES=0 FAILED_COUNT=0
SO_LOAD_FAILURES_NAMES=()
ADDON_LOAD_FAILURES_NAMES=()
IS_HEAVYNODE="false" IS_HEAVYNODE="false"
FORCE="false" FORCE="false"
VERBOSE="false" VERBOSE="false"
@@ -46,20 +44,86 @@ while [[ $# -gt 0 ]]; do
shift shift
done done
# Max number of concurrent template PUT jobs. Override via env if needed.
MAX_TEMPLATE_JOBS=${MAX_TEMPLATE_JOBS:-10}
# Block until fewer than MAX_TEMPLATE_JOBS background jobs are running.
template_throttle() {
while (( $(jobs -rp | wc -l) >= MAX_TEMPLATE_JOBS )); do
wait -n
done
}
# Per-job failure markers and an output lock for serializing parallel job output.
# Each failed load drops one file (named after the template) into FAIL_DIR; the
# output of each job is flushed as a single block under flock so concurrent jobs
# never interleave their (chatty) retry output.
FAIL_DIR=$(mktemp -d)
OUTPUT_LOCK="${FAIL_DIR}/.output.lock"
: > "$OUTPUT_LOCK"
trap 'rm -rf "$FAIL_DIR"' EXIT
# Record a failure: $1 = the template name/path to report later. Slashes are
# encoded so the path becomes a safe single filename.
record_failure() {
local marker="${1//\//__}"
: > "${FAIL_DIR}/fail.${marker}"
}
# Populate FAILED_NAMES and FAILED_COUNT from the current phase's markers.
# Must run in the current shell (not a command substitution) so the array sticks.
collect_failures() {
FAILED_NAMES=()
FAILED_COUNT=0
local f name
shopt -s nullglob
for f in "${FAIL_DIR}"/fail.*; do
name="${f##*/fail.}"
name="${name//__//}"
FAILED_NAMES+=("$name")
FAILED_COUNT=$((FAILED_COUNT + 1))
done
shopt -u nullglob
}
# Clear markers and names between phases so SO and addon counts stay independent.
reset_failures() {
shopt -s nullglob
rm -f "${FAIL_DIR}"/fail.*
shopt -u nullglob
FAILED_NAMES=()
FAILED_COUNT=0
}
# Print a block of text atomically (under the shared output lock) so the output
# of concurrent background jobs is not interleaved.
locked_echo() {
{ flock 9; printf '%s\n' "$1"; } 9>>"$OUTPUT_LOCK"
}
# Loads one template file via PUT. Intended to be dispatched as a background job.
# $1 uri - e.g. _component_template/foo or _index_template/foo
# $2 file - path to the template JSON
# $3 report_name - name/path to record if this load fails
load_template() { load_template() {
local uri="$1" local uri="$1"
local file="$2" local file="$2"
local report_name="$3"
local out rc=0 block
echo "Loading template file $file" # Capture everything (including retry's diagnostic chatter) into one block so
if ! output=$(retry 3 3 "so-elasticsearch-query $uri -d@$file -XPUT" "{\"acknowledged\":true}"); then # concurrent jobs never interleave; the whole block is flushed under one flock.
echo "$output" block="Loading template file $file"$'\n'
if ! out=$(retry 3 3 "so-elasticsearch-query $uri -d@$file -XPUT" "{\"acknowledged\":true}" 2>&1); then
return 1 block+="$out"$'\n'
rc=1
elif [[ "$VERBOSE" == "true" ]]; then elif [[ "$VERBOSE" == "true" ]]; then
echo "$output" block+="$out"$'\n'
fi fi
{ flock 9; printf '%s' "$block"; } 9>>"$OUTPUT_LOCK"
(( rc != 0 )) && record_failure "$report_name"
} }
check_required_component_template_exists() { check_required_component_template_exists() {
@@ -110,6 +174,9 @@ load_component_templates() {
return return
fi fi
# Dispatch loads as throttled background jobs. The barrier (wait) happens in
# the caller after all component groups have been dispatched, since index
# templates must not load until every component template is in place.
for component in "$pattern"/*.json; do for component in "$pattern"/*.json; do
tmpl_name=$(basename "${component%.json}") tmpl_name=$(basename "${component%.json}")
@@ -118,10 +185,8 @@ load_component_templates() {
tmpl_name="${tmpl_name%-mappings}-mappings" tmpl_name="${tmpl_name%-mappings}-mappings"
fi fi
if ! load_template "_component_template/${tmpl_name}" "$component"; then template_throttle
SO_LOAD_FAILURES=$((SO_LOAD_FAILURES + 1)) load_template "_component_template/${tmpl_name}" "$component" "$component" &
SO_LOAD_FAILURES_NAMES+=("$component")
fi
done done
} }
@@ -180,6 +245,9 @@ if [[ "$FORCE" == "true" || ! -f "$SO_STATEFILE_SUCCESS" ]] && index_templates_e
load_component_templates "Elastic Agent" "elastic-agent" load_component_templates "Elastic Agent" "elastic-agent"
load_component_templates "Security Onion" "so" load_component_templates "Security Onion" "so"
# Barrier: every component template PUT must complete before we snapshot the
# component template list and start loading index templates that depend on them.
wait
component_templates=$(so-elasticsearch-component-templates-list) component_templates=$(so-elasticsearch-component-templates-list)
echo -e "Loading Security Onion index templates...\n" echo -e "Loading Security Onion index templates...\n"
for so_idx_tmpl in "${SO_TEMPLATES_DIR}"/*.json; do for so_idx_tmpl in "${SO_TEMPLATES_DIR}"/*.json; do
@@ -189,7 +257,7 @@ if [[ "$FORCE" == "true" || ! -f "$SO_STATEFILE_SUCCESS" ]] && index_templates_e
# TODO: Better way to load only heavynode specific templates # TODO: Better way to load only heavynode specific templates
if ! check_heavynode_compatiable_index_template "$tmpl_name"; then if ! check_heavynode_compatiable_index_template "$tmpl_name"; then
if [[ "$VERBOSE" == "true" ]]; then if [[ "$VERBOSE" == "true" ]]; then
echo "Skipping over $so_idx_tmpl, template is not a heavynode specific index template." locked_echo "Skipping over $so_idx_tmpl, template is not a heavynode specific index template."
fi fi
continue continue
@@ -197,32 +265,34 @@ if [[ "$FORCE" == "true" || ! -f "$SO_STATEFILE_SUCCESS" ]] && index_templates_e
fi fi
if check_required_component_template_exists "$so_idx_tmpl"; then if check_required_component_template_exists "$so_idx_tmpl"; then
if ! load_template "_index_template/$tmpl_name" "$so_idx_tmpl"; then template_throttle
SO_LOAD_FAILURES=$((SO_LOAD_FAILURES + 1)) load_template "_index_template/$tmpl_name" "$so_idx_tmpl" "$so_idx_tmpl" &
SO_LOAD_FAILURES_NAMES+=("$so_idx_tmpl")
fi
else else
echo "Skipping over $so_idx_tmpl due to missing required component template(s)." locked_echo "Skipping over $so_idx_tmpl due to missing required component template(s)."
SO_LOAD_FAILURES=$((SO_LOAD_FAILURES + 1)) record_failure "$so_idx_tmpl"
SO_LOAD_FAILURES_NAMES+=("$so_idx_tmpl")
continue continue
fi fi
done done
if [[ $SO_LOAD_FAILURES -eq 0 ]]; then # Barrier: all SO index template PUTs must finish before tallying failures.
wait
collect_failures
if [[ $FAILED_COUNT -eq 0 ]]; then
echo "All Security Onion core templates loaded successfully." echo "All Security Onion core templates loaded successfully."
touch "$SO_STATEFILE_SUCCESS" touch "$SO_STATEFILE_SUCCESS"
else else
echo "Encountered $SO_LOAD_FAILURES failure(s) loading templates:" echo "Encountered $FAILED_COUNT failure(s) loading templates:"
for failed_template in "${SO_LOAD_FAILURES_NAMES[@]}"; do for failed_template in "${FAILED_NAMES[@]}"; do
echo " - $failed_template" echo " - $failed_template"
done done
if [[ "$SHOULD_EXIT_ON_FAILURE" == "true" ]]; then if [[ "$SHOULD_EXIT_ON_FAILURE" == "true" ]]; then
fail "Failed to load all Security Onion core templates successfully." fail "Failed to load all Security Onion core templates successfully."
fi fi
fi fi
reset_failures
elif ! index_templates_exist "$SO_TEMPLATES_DIR"; then elif ! index_templates_exist "$SO_TEMPLATES_DIR"; then
echo "No Security Onion core index templates found in ${SO_TEMPLATES_DIR}, skipping." echo "No Security Onion core index templates found in ${SO_TEMPLATES_DIR}, skipping."
elif [[ -f "$SO_STATEFILE_SUCCESS" ]]; then elif [[ -f "$SO_STATEFILE_SUCCESS" ]]; then
@@ -241,26 +311,27 @@ if should_load_addon_templates; then
tmpl_name=$(basename "${addon_idx_tmpl%-template.json}") tmpl_name=$(basename "${addon_idx_tmpl%-template.json}")
if check_required_component_template_exists "$addon_idx_tmpl"; then if check_required_component_template_exists "$addon_idx_tmpl"; then
if ! load_template "_index_template/${tmpl_name}" "$addon_idx_tmpl"; then template_throttle
ADDON_LOAD_FAILURES=$((ADDON_LOAD_FAILURES + 1)) load_template "_index_template/${tmpl_name}" "$addon_idx_tmpl" "$addon_idx_tmpl" &
ADDON_LOAD_FAILURES_NAMES+=("$addon_idx_tmpl")
fi
else else
echo "Skipping over $addon_idx_tmpl due to missing required component template(s)." locked_echo "Skipping over $addon_idx_tmpl due to missing required component template(s)."
ADDON_LOAD_FAILURES=$((ADDON_LOAD_FAILURES + 1)) record_failure "$addon_idx_tmpl"
ADDON_LOAD_FAILURES_NAMES+=("$addon_idx_tmpl")
continue continue
fi fi
done done
if [[ $ADDON_LOAD_FAILURES -eq 0 ]]; then # Barrier: all addon index template PUTs must finish before tallying failures.
wait
collect_failures
if [[ $FAILED_COUNT -eq 0 ]]; then
echo "All addon integration templates loaded successfully." echo "All addon integration templates loaded successfully."
touch "$ADDON_STATEFILE_SUCCESS" touch "$ADDON_STATEFILE_SUCCESS"
else else
echo "Encountered $ADDON_LOAD_FAILURES failure(s) loading addon integration templates:" echo "Encountered $FAILED_COUNT failure(s) loading addon integration templates:"
for failed_template in "${ADDON_LOAD_FAILURES_NAMES[@]}"; do for failed_template in "${FAILED_NAMES[@]}"; do
echo " - $failed_template" echo " - $failed_template"
done done
if [[ "$SHOULD_EXIT_ON_FAILURE" == "true" ]]; then if [[ "$SHOULD_EXIT_ON_FAILURE" == "true" ]]; then