From d2524a593f6cd9888a9f7d04f62cf4753421d2f4 Mon Sep 17 00:00:00 2001 From: reyesj2 <94730068+reyesj2@users.noreply.github.com> Date: Thu, 14 May 2026 17:12:02 -0500 Subject: [PATCH 01/12] use -verify flag during grid agent install to ensure agent health --- salt/elasticfleet/install_agent_grid.sls | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/salt/elasticfleet/install_agent_grid.sls b/salt/elasticfleet/install_agent_grid.sls index 482af2e1e..5201eddf2 100644 --- a/salt/elasticfleet/install_agent_grid.sls +++ b/salt/elasticfleet/install_agent_grid.sls @@ -14,20 +14,23 @@ pull_agent_installer: file.managed: - - name: /opt/so/so-elastic-agent_linux_amd64 + - name: /opt/so/log/agents/so-elastic-agent_linux_amd64 - source: salt://elasticfleet/files/so_agent-installers/so-elastic-agent_linux_amd64 - mode: 755 - makedirs: True run_installer: cmd.run: - - name: ./so-elastic-agent_linux_amd64 -token={{ GRIDNODETOKEN }} -force - - cwd: /opt/so + {# Run agent installer and wait for it to report healthy status #} + - name: ./so-elastic-agent_linux_amd64 -token={{ GRIDNODETOKEN }} -force -verify + - cwd: /opt/so/log/agents - retry: attempts: 3 interval: 20 + - require: + - file: pull_agent_installer cleanup_agent_installer: file.absent: - - name: /opt/so/so-elastic-agent_linux_amd64 + - name: /opt/so/log/agents/so-elastic-agent_linux_amd64 {% endif %} From 244a73b7a2b38a17e9331606fab6551689553098 Mon Sep 17 00:00:00 2001 From: Mike Reeves Date: Fri, 15 May 2026 08:48:54 -0400 Subject: [PATCH 02/12] Make so-postgres-backup fail-safe against silent corruption The dump pipeline returned gzip's exit status, so a pg_dumpall that died mid-stream still produced a valid .gz holding a truncated dump, written straight to the final filename. The idempotency check then blocked retries for the day and the corrupt file counted toward retention, evicting a good backup each day until none remained. - set -o pipefail so a failed pg_dumpall fails the pipeline - dump to a .tmp file and atomically rename only after success, so the final filename appears only for a complete backup - gzip -t integrity check before publishing - trap-based cleanup of the temp file; sweep stale temps at startup - run retention only after a successful backup, with a glob restricted to finished backups - log timestamped OK/ERROR outcomes to /opt/so/log/postgres/backup.log --- salt/postgres/tools/sbin/so-postgres-backup | 48 ++++++++++++++++++--- 1 file changed, 41 insertions(+), 7 deletions(-) diff --git a/salt/postgres/tools/sbin/so-postgres-backup b/salt/postgres/tools/sbin/so-postgres-backup index 9db522336..08a73e3a4 100644 --- a/salt/postgres/tools/sbin/so-postgres-backup +++ b/salt/postgres/tools/sbin/so-postgres-backup @@ -7,15 +7,29 @@ . /usr/sbin/so-common +# Without pipefail, a pipeline's exit status is gzip's. A failed pg_dumpall would +# otherwise be masked by a successful gzip, silently producing a valid .gz that +# holds a truncated dump. +set -o pipefail + # Backups contain role password hashes and full chat data; keep them 0600. umask 0077 TODAY=$(date '+%Y_%m_%d') BACKUPDIR=/nsm/backup BACKUPFILE="$BACKUPDIR/so-postgres-backup-$TODAY.sql.gz" +TMPFILE="$BACKUPFILE.tmp" MAXBACKUPS=7 +LOGFILE=/opt/so/log/postgres/backup.log -mkdir -p $BACKUPDIR +log() { + echo "$(date '+%Y-%m-%d %H:%M:%S') $*" >> "$LOGFILE" +} + +mkdir -p "$BACKUPDIR" + +# Remove any temp files left behind by a previously crashed run +rm -f "$BACKUPDIR"/so-postgres-backup-*.sql.gz.tmp # Skip if already backed up today if [ -f "$BACKUPFILE" ]; then @@ -27,13 +41,33 @@ if ! docker ps --format '{{.Names}}' | grep -q '^so-postgres$'; then exit 0 fi -# Dump all databases and roles, compress -docker exec so-postgres pg_dumpall -U postgres | gzip > "$BACKUPFILE" +# Always clean up the temp file on exit; the success path clears this trap +# after the atomic rename so the finished backup is not deleted. +trap 'rm -f "$TMPFILE"' EXIT -# Retention cleanup -NUMBACKUPS=$(find $BACKUPDIR -type f -name "so-postgres-backup*" | wc -l) +# Dump all databases and roles, compress. Write to a temp file so the final +# filename only ever appears for a complete, verified backup. +if ! docker exec so-postgres pg_dumpall -U postgres | gzip > "$TMPFILE"; then + log "ERROR: pg_dumpall/gzip failed; backup aborted" + exit 1 +fi + +# Verify the compressed stream is intact before publishing it +if ! gzip -t "$TMPFILE"; then + log "ERROR: backup failed gzip integrity check; backup aborted" + exit 1 +fi + +# Atomically publish the verified backup +mv "$TMPFILE" "$BACKUPFILE" +trap - EXIT +log "OK: wrote $BACKUPFILE" + +# Retention cleanup (only reached after a successful backup). The glob is +# restricted to finished backups so an in-progress .tmp can never be counted. +NUMBACKUPS=$(find "$BACKUPDIR" -type f -name "so-postgres-backup-*.sql.gz" | wc -l) while [ "$NUMBACKUPS" -gt "$MAXBACKUPS" ]; do - OLDEST=$(find $BACKUPDIR -type f -name "so-postgres-backup*" -printf '%T+ %p\n' | sort | head -n 1 | awk -F" " '{print $2}') + OLDEST=$(find "$BACKUPDIR" -type f -name "so-postgres-backup-*.sql.gz" -printf '%T+ %p\n' | sort | head -n 1 | awk -F" " '{print $2}') rm -f "$OLDEST" - NUMBACKUPS=$(find $BACKUPDIR -type f -name "so-postgres-backup*" | wc -l) + NUMBACKUPS=$(find "$BACKUPDIR" -type f -name "so-postgres-backup-*.sql.gz" | wc -l) done From ce566ba174ba321e3955b9447aeab5975aeaab2b Mon Sep 17 00:00:00 2001 From: Jason Ertel Date: Fri, 15 May 2026 11:36:46 -0400 Subject: [PATCH 03/12] exclude fps --- salt/common/tools/sbin/so-log-check | 2 ++ 1 file changed, 2 insertions(+) diff --git a/salt/common/tools/sbin/so-log-check b/salt/common/tools/sbin/so-log-check index a3d9c51d0..65b1041fe 100755 --- a/salt/common/tools/sbin/so-log-check +++ b/salt/common/tools/sbin/so-log-check @@ -165,6 +165,8 @@ if [[ $EXCLUDE_FALSE_POSITIVE_ERRORS == 'Y' ]]; then EXCLUDED_ERRORS="$EXCLUDED_ERRORS|upgrading component template" # false positive (elasticsearch index or template names contain 'error') EXCLUDED_ERRORS="$EXCLUDED_ERRORS|upgrading composable template" # false positive (elasticsearch composable template names contain 'error') EXCLUDED_ERRORS="$EXCLUDED_ERRORS|Error while parsing document for index \[.ds-logs-kratos-so-.*object mapping for \[file\]" # false positive (mapping error occuring BEFORE kratos index has rolled over in 2.4.210) + EXCLUDED_ERRORS="$EXCLUDED_ERRORS|No such container" # false positive (telegraf trying to run stats on an old container) + EXCLUDED_ERRORS="$EXCLUDED_ERRORS|passwords do not match" # false positive (automated hydra test) fi if [[ $EXCLUDE_KNOWN_ERRORS == 'Y' ]]; then From e89c820b65bc4a00ffd45b6aafc61015aee60901 Mon Sep 17 00:00:00 2001 From: Jorge Reyes <94730068+reyesj2@users.noreply.github.com> Date: Sat, 16 May 2026 09:59:14 -0500 Subject: [PATCH 04/12] Revert "use -verify flag during grid agent install to ensure agent health" --- salt/elasticfleet/install_agent_grid.sls | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/salt/elasticfleet/install_agent_grid.sls b/salt/elasticfleet/install_agent_grid.sls index 5201eddf2..482af2e1e 100644 --- a/salt/elasticfleet/install_agent_grid.sls +++ b/salt/elasticfleet/install_agent_grid.sls @@ -14,23 +14,20 @@ pull_agent_installer: file.managed: - - name: /opt/so/log/agents/so-elastic-agent_linux_amd64 + - name: /opt/so/so-elastic-agent_linux_amd64 - source: salt://elasticfleet/files/so_agent-installers/so-elastic-agent_linux_amd64 - mode: 755 - makedirs: True run_installer: cmd.run: - {# Run agent installer and wait for it to report healthy status #} - - name: ./so-elastic-agent_linux_amd64 -token={{ GRIDNODETOKEN }} -force -verify - - cwd: /opt/so/log/agents + - name: ./so-elastic-agent_linux_amd64 -token={{ GRIDNODETOKEN }} -force + - cwd: /opt/so - retry: attempts: 3 interval: 20 - - require: - - file: pull_agent_installer cleanup_agent_installer: file.absent: - - name: /opt/so/log/agents/so-elastic-agent_linux_amd64 + - name: /opt/so/so-elastic-agent_linux_amd64 {% endif %} From d0aa33a255f26cedae7a1c93336a11302ba0446d Mon Sep 17 00:00:00 2001 From: reyesj2 <94730068+reyesj2@users.noreply.github.com> Date: Tue, 19 May 2026 10:50:17 -0500 Subject: [PATCH 05/12] sync elastic agent packages to fleet nodes --- salt/elasticfleet/enabled.sls | 2 ++ 1 file changed, 2 insertions(+) diff --git a/salt/elasticfleet/enabled.sls b/salt/elasticfleet/enabled.sls index cb189f9a9..166cb9719 100644 --- a/salt/elasticfleet/enabled.sls +++ b/salt/elasticfleet/enabled.sls @@ -26,7 +26,9 @@ include: wait_for_elasticsearch_elasticfleet: cmd.run: - name: so-elasticsearch-wait +{% endif %} +{% if GLOBALS.role == "so-fleet" %} # Sync Elastic Agent artifacts to Fleet Node elasticagent_syncartifacts: file.recurse: From 6c8997b28a7b1998a553dc3c87c9168a0a8aab06 Mon Sep 17 00:00:00 2001 From: reyesj2 <94730068+reyesj2@users.noreply.github.com> Date: Tue, 19 May 2026 22:27:31 -0500 Subject: [PATCH 06/12] verify all heavynodes and all searchnodes are at compatible ES version before attempting an elasticsearch upgrade --- salt/manager/tools/sbin/soup | 139 +++++++++++++++++++++++++++++++++-- 1 file changed, 134 insertions(+), 5 deletions(-) diff --git a/salt/manager/tools/sbin/soup b/salt/manager/tools/sbin/soup index bd3048019..e6a14607e 100755 --- a/salt/manager/tools/sbin/soup +++ b/salt/manager/tools/sbin/soup @@ -971,6 +971,9 @@ verify_es_version_compatibility() { local is_active_intermediate_upgrade=1 # supported upgrade paths for SO-ES versions declare -A es_upgrade_map=( + ["8.18.4"]="8.18.6 8.18.8 9.0.8" + ["8.18.6"]="8.18.8 9.0.8" + ["8.18.8"]="9.0.8" ["9.0.8"]="9.3.3" ) @@ -994,6 +997,116 @@ verify_es_version_compatibility() { exit 160 fi + compatible_es_versions="$target_es_version" + for current_version in "${!es_upgrade_map[@]}"; do + # shellcheck disable=SC2076 + if [[ " ${es_upgrade_map[$current_version]} " =~ " $target_es_version " ]]; then + compatible_es_versions+=" $current_version" + fi + done + + # Check if the given ES version can directly upgrade to the target ES version. Used to assist with catching lagging nodes during the upgrade process + es_version_can_upgrade_to_target() { + local current_version="$1" + # shellcheck disable=SC2076 + if [[ -n "$current_version" && " $compatible_es_versions " =~ " $current_version " ]]; then + return 0 + fi + + return 1 + } + + # Gather Elasticsearch cluster version info and verify that each node in the cluster is running a version compatible with the target ES version. + verify_searchnodes_es_target_compatibility() { + local retries=20 + local retry_count=0 + local delay=180 + SEARCHNODE_ES_VERSIONS="" + + while [[ $retry_count -lt $retries ]]; do + SEARCHNODE_ES_VERSIONS=$(so-elasticsearch-query _nodes/_all/version --retry 5 --retry-delay 10 --fail 2>&1) + local exit_status=$? + + if [[ $exit_status -ne 0 ]]; then + echo "Failed to retrieve Elasticsearch versions from searchnodes... Retrying in $delay seconds. Attempt $((retry_count + 1)) of $retries." + ((retry_count++)) + sleep $delay + continue + fi + + local all_searchnodes_compatible=true + while IFS=$'\t' read -r node current_version; do + [[ -z "$node" ]] && continue + if ! es_version_can_upgrade_to_target "$current_version"; then + echo "Searchnode $node is running Elasticsearch $current_version, which is not directly upgradable to Elasticsearch $target_es_version." + all_searchnodes_compatible=false + fi + done < <(echo "$SEARCHNODE_ES_VERSIONS" | jq -r '.nodes | to_entries[] | [.value.name, .value.version] | @tsv') + + if [[ "$all_searchnodes_compatible" == true ]]; then + echo "All Searchnodes are upgradable to Elasticsearch $target_es_version." + return 0 + fi + + echo "One or more Searchnodes cannot upgrade directly to Elasticsearch $target_es_version. Rechecking in $delay seconds. Attempt $((retry_count + 1)) of $retries." + ((retry_count++)) + sleep $delay + done + + return 1 + } + + # Gather heavynode version info and verify that each node is running a version compatible with the target ES version. + verify_heavynodes_es_target_compatibility() { + if ! salt-key -l accepted | grep -q 'heavynode$'; then + echo "No heavynodes detected. Skipping heavynode Elasticsearch version compatibility check." + return 0 + fi + + echo -e "\nOne or more heavynodes detected. Verifying each is running an Elasticsearch version that is compatible with $target_es_version." + + local retries=20 + local retry_count=0 + local delay=180 + HEAVYNODE_ES_VERSIONS="" + + while [[ $retry_count -lt $retries ]]; do + HEAVYNODE_ES_VERSIONS=$(salt -C 'G@role:so-heavynode' cmd.run 'so-elasticsearch-query / --retry 5 --retry-delay 10 | jq -r ".version.number"' shell=/bin/bash --out=json 2> /dev/null) + local exit_status=$? + + if [[ $exit_status -ne 0 ]]; then + echo "Failed to retrieve Elasticsearch version from one or more heavynodes... Retrying in $delay seconds. Attempt $((retry_count + 1)) of $retries." + ((retry_count++)) + sleep $delay + continue + fi + + local all_heavynodes_compatible=true + while IFS=$'\t' read -r node current_version; do + [[ -z "$node" ]] && continue + if ! es_version_can_upgrade_to_target "$current_version"; then + echo "Heavynode $node is running Elasticsearch $current_version, which is not directly upgradable to Elasticsearch $target_es_version." + all_heavynodes_compatible=false + fi + done < <(echo "$HEAVYNODE_ES_VERSIONS" | jq -r 'to_entries[] | [.key, .value] | @tsv') + + if [[ "$all_heavynodes_compatible" == true ]]; then + echo -e "\nAll heavynodes can upgrade to Elasticsearch $target_es_version." + return 0 + fi + + echo "One or more heavynodes cannot upgrade directly to Elasticsearch $target_es_version. Rechecking in $delay seconds. Attempt $((retry_count + 1)) of $retries." + ((retry_count++)) + sleep $delay + done + + return 1 + } + + if [[ ! -f "$es_verification_script" ]]; then + create_intermediate_upgrade_verification_script "$es_verification_script" + fi + for statefile in "${es_required_version_statefile_base}"-*; do [[ -f $statefile ]] || continue @@ -1012,10 +1125,6 @@ verify_es_version_compatibility() { continue fi - if [[ ! -f "$es_verification_script" ]]; then - create_intermediate_upgrade_verification_script "$es_verification_script" - fi - echo -e "\n##############################################################################################################################\n" echo "A previously required intermediate Elasticsearch upgrade was detected. Verifying that all Searchnodes/Heavynodes have successfully upgraded Elasticsearch to $es_required_version_statefile_value before proceeding with soup to avoid potential data loss! This command can take up to an hour to complete." if ! timeout --foreground 4000 bash "$es_verification_script" "$es_required_version_statefile_value" "$statefile"; then @@ -1037,6 +1146,26 @@ verify_es_version_compatibility() { # shellcheck disable=SC2076 # Do not want a regex here eg usage " 8.18.8 9.0.8 " =~ " 9.0.8 " if [[ " ${es_upgrade_map[$es_version]} " =~ " $target_es_version " || "$es_version" == "$target_es_version" ]]; then + if ! verify_searchnodes_es_target_compatibility || ! verify_heavynodes_es_target_compatibility; then + echo -e "\n!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!\n" + + echo "One or more Searchnode(s)/Heavynode(s) cannot upgrade directly to Elasticsearch $target_es_version. This can happen with soups that include Elasticsearch upgrades being run in quick succession. Typically, this will resolve itself as the grid synchronizes. Please allow time for all Searchnodes/Heavynodes to have upgraded Elasticsearch to a compatible version with $target_es_version before running soup again to avoid potential data loss!" + + if [[ -n "$HEAVYNODE_ES_VERSIONS" ]]; then + echo "Current heavynode Elasticsearch versions:" + echo "$HEAVYNODE_ES_VERSIONS" | jq '.' + fi + + if [[ -n "$SEARCHNODE_ES_VERSIONS" ]]; then + echo "Current searchnode Elasticsearch versions:" + echo "$SEARCHNODE_ES_VERSIONS" | jq '.nodes | to_entries | map({(.value.name): .value.version}) | sort | add' + fi + + echo -e "\n!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!\n" + + exit 161 + fi + # supported upgrade return 0 else @@ -1394,7 +1523,7 @@ main() { echo "Verifying we have the latest soup script." verify_latest_update_script - echo "Verifying Elasticsearch version compatibility before upgrading." + echo "Verifying Elasticsearch version compatibility across the grid before upgrading." verify_es_version_compatibility echo "Let's see if we need to update Security Onion." From d7a1b67095f630b256b0fc716e24b70ab1d39c13 Mon Sep 17 00:00:00 2001 From: reyesj2 <94730068+reyesj2@users.noreply.github.com> Date: Wed, 20 May 2026 09:16:57 -0500 Subject: [PATCH 07/12] use pipefail on heavynode versino command to pass through error --- salt/manager/tools/sbin/soup | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/salt/manager/tools/sbin/soup b/salt/manager/tools/sbin/soup index e6a14607e..d21599ad8 100755 --- a/salt/manager/tools/sbin/soup +++ b/salt/manager/tools/sbin/soup @@ -1071,7 +1071,7 @@ verify_es_version_compatibility() { HEAVYNODE_ES_VERSIONS="" while [[ $retry_count -lt $retries ]]; do - HEAVYNODE_ES_VERSIONS=$(salt -C 'G@role:so-heavynode' cmd.run 'so-elasticsearch-query / --retry 5 --retry-delay 10 | jq -r ".version.number"' shell=/bin/bash --out=json 2> /dev/null) + HEAVYNODE_ES_VERSIONS=$(salt -C 'G@role:so-heavynode' cmd.run 'set -o pipefail; so-elasticsearch-query / --retry 5 --retry-delay 10 | jq -er ".version.number"' shell=/bin/bash --out=json 2> /dev/null) local exit_status=$? if [[ $exit_status -ne 0 ]]; then From 7d13007aa9f364eebece1ecae9c561bda6e3c2ed Mon Sep 17 00:00:00 2001 From: reyesj2 <94730068+reyesj2@users.noreply.github.com> Date: Wed, 20 May 2026 10:03:37 -0500 Subject: [PATCH 08/12] block soup if all ES nodes are not online and reporting their ES version for compatibility check --- salt/manager/tools/sbin/soup | 53 ++++++++++++++++++++++++++++++------ 1 file changed, 45 insertions(+), 8 deletions(-) diff --git a/salt/manager/tools/sbin/soup b/salt/manager/tools/sbin/soup index d21599ad8..8a68e5242 100755 --- a/salt/manager/tools/sbin/soup +++ b/salt/manager/tools/sbin/soup @@ -1021,9 +1021,24 @@ verify_es_version_compatibility() { local retries=20 local retry_count=0 local delay=180 + local expected_es_nodes + local searchnode_minions SEARCHNODE_ES_VERSIONS="" while [[ $retry_count -lt $retries ]]; do + if ! searchnode_minions=$(set -o pipefail; salt-key --out=json --list=accepted 2> /dev/null | jq -r '.minions[]? | select(endswith("searchnode"))'); then + echo "Failed to retrieve grid searchnodes via salt-key... Retrying in $delay seconds. Attempt $((retry_count + 1)) of $retries." + ((retry_count++)) + sleep $delay + continue + fi + # Always add node running soup to expected es nodes + expected_es_nodes="${MINIONID%_*}" + while IFS= read -r searchnode_minion; do + [[ -z "$searchnode_minion" ]] && continue + expected_es_nodes+=$'\n'"${searchnode_minion%_searchnode}" + done <<< "$searchnode_minions" + SEARCHNODE_ES_VERSIONS=$(so-elasticsearch-query _nodes/_all/version --retry 5 --retry-delay 10 --fail 2>&1) local exit_status=$? @@ -1043,6 +1058,14 @@ verify_es_version_compatibility() { fi done < <(echo "$SEARCHNODE_ES_VERSIONS" | jq -r '.nodes | to_entries[] | [.value.name, .value.version] | @tsv') + while IFS= read -r expected_es_node; do + [[ -z "$expected_es_node" ]] && continue + if ! echo "$SEARCHNODE_ES_VERSIONS" | jq -e --arg node "$expected_es_node" '.nodes | to_entries | any(.value.name == $node)' > /dev/null; then + echo "Searchnode $expected_es_node did not report an Elasticsearch version. It may be offline or still upgrading." + all_searchnodes_compatible=false + fi + done <<< "$expected_es_nodes" + if [[ "$all_searchnodes_compatible" == true ]]; then echo "All Searchnodes are upgradable to Elasticsearch $target_es_version." return 0 @@ -1056,21 +1079,27 @@ verify_es_version_compatibility() { return 1 } - # Gather heavynode version info and verify that each node is running a version compatible with the target ES version. + # Gather heavynode version info and verify that each node is running a version compatible with the target ES version. verify_heavynodes_es_target_compatibility() { - if ! salt-key -l accepted | grep -q 'heavynode$'; then - echo "No heavynodes detected. Skipping heavynode Elasticsearch version compatibility check." - return 0 - fi - - echo -e "\nOne or more heavynodes detected. Verifying each is running an Elasticsearch version that is compatible with $target_es_version." - + local heavynode_minions local retries=20 local retry_count=0 local delay=180 HEAVYNODE_ES_VERSIONS="" while [[ $retry_count -lt $retries ]]; do + if ! heavynode_minions=$(set -o pipefail; salt-key --out=json --list=accepted 2> /dev/null | jq -r '.minions[]? | select(endswith("heavynode"))'); then + echo "Failed to retrieve grid heavynodes via salt-key... Retrying in $delay seconds. Attempt $((retry_count + 1)) of $retries." + ((retry_count++)) + sleep $delay + continue + fi + + if [[ -z "$heavynode_minions" ]]; then + echo "No heavynodes detected. Skipping heavynode Elasticsearch version compatibility check." + return 0 + fi + HEAVYNODE_ES_VERSIONS=$(salt -C 'G@role:so-heavynode' cmd.run 'set -o pipefail; so-elasticsearch-query / --retry 5 --retry-delay 10 | jq -er ".version.number"' shell=/bin/bash --out=json 2> /dev/null) local exit_status=$? @@ -1090,6 +1119,14 @@ verify_es_version_compatibility() { fi done < <(echo "$HEAVYNODE_ES_VERSIONS" | jq -r 'to_entries[] | [.key, .value] | @tsv') + while IFS= read -r heavynode_minion; do + [[ -z "$heavynode_minion" ]] && continue + if ! echo "$HEAVYNODE_ES_VERSIONS" | jq -e --arg minion "$heavynode_minion" 'has($minion)' > /dev/null; then + echo "Heavynode $heavynode_minion did not report an Elasticsearch version. It may be offline or still upgrading." + all_heavynodes_compatible=false + fi + done <<< "$heavynode_minions" + if [[ "$all_heavynodes_compatible" == true ]]; then echo -e "\nAll heavynodes can upgrade to Elasticsearch $target_es_version." return 0 From b485be460204202628a5b2e7d349c42c803add5b Mon Sep 17 00:00:00 2001 From: reyesj2 <94730068+reyesj2@users.noreply.github.com> Date: Wed, 20 May 2026 14:12:58 -0500 Subject: [PATCH 09/12] separate salt-key command from main es version compatiblity loop --- salt/manager/tools/sbin/soup | 72 ++++++++++++++++++++++-------------- 1 file changed, 45 insertions(+), 27 deletions(-) diff --git a/salt/manager/tools/sbin/soup b/salt/manager/tools/sbin/soup index 8a68e5242..3bec13716 100755 --- a/salt/manager/tools/sbin/soup +++ b/salt/manager/tools/sbin/soup @@ -1021,24 +1021,33 @@ verify_es_version_compatibility() { local retries=20 local retry_count=0 local delay=180 - local expected_es_nodes - local searchnode_minions + local expected_es_nodes searchnode_minions attempt + local searchnode_discovery_success=false SEARCHNODE_ES_VERSIONS="" - while [[ $retry_count -lt $retries ]]; do - if ! searchnode_minions=$(set -o pipefail; salt-key --out=json --list=accepted 2> /dev/null | jq -r '.minions[]? | select(endswith("searchnode"))'); then - echo "Failed to retrieve grid searchnodes via salt-key... Retrying in $delay seconds. Attempt $((retry_count + 1)) of $retries." - ((retry_count++)) - sleep $delay - continue + for attempt in {1..3}; do + if searchnode_minions=$(set -o pipefail; salt-key --out=json --list=accepted 2> /dev/null | jq -r '.minions[]? | select(endswith("searchnode"))'); then + searchnode_discovery_success=true + break fi - # Always add node running soup to expected es nodes - expected_es_nodes="${MINIONID%_*}" - while IFS= read -r searchnode_minion; do - [[ -z "$searchnode_minion" ]] && continue - expected_es_nodes+=$'\n'"${searchnode_minion%_searchnode}" - done <<< "$searchnode_minions" + echo "Failed to retrieve grid searchnodes via salt-key... Retrying in 30 seconds. Attempt $attempt of 3." + sleep 30 + done + + if [[ "$searchnode_discovery_success" != "true" ]]; then + echo "Failed to retrieve grid searchnodes via salt-key." + return 1 + fi + + # Always add node running soup to expected es nodes + expected_es_nodes="${MINIONID%_*}" + while IFS= read -r searchnode_minion; do + [[ -z "$searchnode_minion" ]] && continue + expected_es_nodes+=$'\n'"${searchnode_minion%_searchnode}" + done <<< "$searchnode_minions" + + while [[ $retry_count -lt $retries ]]; do SEARCHNODE_ES_VERSIONS=$(so-elasticsearch-query _nodes/_all/version --retry 5 --retry-delay 10 --fail 2>&1) local exit_status=$? @@ -1081,25 +1090,34 @@ verify_es_version_compatibility() { # Gather heavynode version info and verify that each node is running a version compatible with the target ES version. verify_heavynodes_es_target_compatibility() { - local heavynode_minions + local heavynode_minions attempt local retries=20 local retry_count=0 local delay=180 + local heavynode_discovery_success=false HEAVYNODE_ES_VERSIONS="" + for attempt in {1..3}; do + if heavynode_minions=$(set -o pipefail; salt-key --out=json --list=accepted 2> /dev/null | jq -r '.minions[]? | select(endswith("heavynode"))'); then + heavynode_discovery_success=true + break + fi + + echo "Failed to retrieve grid heavynodes via salt-key... Retrying in 30 seconds. Attempt $attempt of 3." + sleep 30 + done + + if [[ "$heavynode_discovery_success" != "true" ]]; then + echo "Failed to retrieve grid heavynodes via salt-key." + return 1 + fi + + if [[ -z "$heavynode_minions" ]]; then + echo "No heavynodes detected. Skipping heavynode Elasticsearch version compatibility check." + return 0 + fi + while [[ $retry_count -lt $retries ]]; do - if ! heavynode_minions=$(set -o pipefail; salt-key --out=json --list=accepted 2> /dev/null | jq -r '.minions[]? | select(endswith("heavynode"))'); then - echo "Failed to retrieve grid heavynodes via salt-key... Retrying in $delay seconds. Attempt $((retry_count + 1)) of $retries." - ((retry_count++)) - sleep $delay - continue - fi - - if [[ -z "$heavynode_minions" ]]; then - echo "No heavynodes detected. Skipping heavynode Elasticsearch version compatibility check." - return 0 - fi - HEAVYNODE_ES_VERSIONS=$(salt -C 'G@role:so-heavynode' cmd.run 'set -o pipefail; so-elasticsearch-query / --retry 5 --retry-delay 10 | jq -er ".version.number"' shell=/bin/bash --out=json 2> /dev/null) local exit_status=$? From 141a61f5b53d44e647350ac2c4b48be1708fd807 Mon Sep 17 00:00:00 2001 From: Mike Reeves Date: Thu, 21 May 2026 13:47:03 -0400 Subject: [PATCH 10/12] 3.1.0 --- DOWNLOAD_AND_VERIFY_ISO.md | 22 +++++++++++----------- sigs/securityonion-3.1.0-20260521.iso.sig | Bin 0 -> 566 bytes 2 files changed, 11 insertions(+), 11 deletions(-) create mode 100644 sigs/securityonion-3.1.0-20260521.iso.sig diff --git a/DOWNLOAD_AND_VERIFY_ISO.md b/DOWNLOAD_AND_VERIFY_ISO.md index 47937c1b9..a0ea874fa 100644 --- a/DOWNLOAD_AND_VERIFY_ISO.md +++ b/DOWNLOAD_AND_VERIFY_ISO.md @@ -1,17 +1,17 @@ -### 3.0.0-20260331 ISO image released on 2026/03/31 +### 3.1.0-20260521 ISO image released on 2026/05/21 ### Download and Verify -3.0.0-20260331 ISO image: -https://download.securityonion.net/file/securityonion/securityonion-3.0.0-20260331.iso +3.1.0-20260521 ISO image: +https://download.securityonion.net/file/securityonion/securityonion-3.1.0-20260521.iso -MD5: ECD318A1662A6FDE0EF213F5A9BD4B07 -SHA1: E55BE314440CCF3392DC0B06BC5E270B43176D9C -SHA256: 7FC47405E335CBE5C2B6C51FE7AC60248F35CBE504907B8B5A33822B23F8F4D5 +MD5: A853BC118639ABCE1795D6E313BFFBDE +SHA1: FCA615AD6E31710B33AE5870FEF447861FDB3B8F +SHA256: CE2A5947274D9ED2C5068A1FD46B64C4FEF70445EA9B61A98DD3621781329F2C Signature for ISO image: -https://github.com/Security-Onion-Solutions/securityonion/raw/3/main/sigs/securityonion-3.0.0-20260331.iso.sig +https://github.com/Security-Onion-Solutions/securityonion/raw/3/main/sigs/securityonion-3.1.0-20260521.iso.sig Signing key: https://raw.githubusercontent.com/Security-Onion-Solutions/securityonion/3/main/KEYS @@ -25,22 +25,22 @@ wget https://raw.githubusercontent.com/Security-Onion-Solutions/securityonion/3/ Download the signature file for the ISO: ``` -wget https://github.com/Security-Onion-Solutions/securityonion/raw/3/main/sigs/securityonion-3.0.0-20260331.iso.sig +wget https://github.com/Security-Onion-Solutions/securityonion/raw/3/main/sigs/securityonion-3.1.0-20260521.iso.sig ``` Download the ISO image: ``` -wget https://download.securityonion.net/file/securityonion/securityonion-3.0.0-20260331.iso +wget https://download.securityonion.net/file/securityonion/securityonion-3.1.0-20260521.iso ``` Verify the downloaded ISO image using the signature file: ``` -gpg --verify securityonion-3.0.0-20260331.iso.sig securityonion-3.0.0-20260331.iso +gpg --verify securityonion-3.1.0-20260521.iso.sig securityonion-3.1.0-20260521.iso ``` The output should show "Good signature" and the Primary key fingerprint should match what's shown below: ``` -gpg: Signature made Mon 30 Mar 2026 06:22:14 PM EDT using RSA key ID FE507013 +gpg: Signature made Thu 21 May 2026 11:10:01 AM EDT using RSA key ID FE507013 gpg: Good signature from "Security Onion Solutions, LLC " gpg: WARNING: This key is not certified with a trusted signature! gpg: There is no indication that the signature belongs to the owner. diff --git a/sigs/securityonion-3.1.0-20260521.iso.sig b/sigs/securityonion-3.1.0-20260521.iso.sig new file mode 100644 index 0000000000000000000000000000000000000000..af7564315fe4d271f8ffbcd5f20b1ee05bb215cf GIT binary patch literal 566 zcmV-60?GY}0y6{v0SEvc79j-41gSkXz6^6dp_W8^5Ma0dP;e6k0%{K+NdO875PT3| zxBgIY6XV$r{v$aAV9tWR2Cp82NY=5;oVcqFn(Xc4XM_F@euGbQ9!7nT67q|Yq&1Dz z;iL7U0Ax7EIw}GT(Bu_zK4Ys^hEOTJd&0o_3 zO5?R2v(LbEYEoJfB$6YydM(aL@phySuee&F)y$&07&AzwgvkmQV2JQZXm~MBEH3+G zKSPv5@iFE2n&Svb5pdPU%C&j6uqfp$?~q;+r++a=qvp=Fw>WPGwbbg1<|B&(3e-p8 zHPkpN1A8@ZjLz)8m z0VR{c;S9p&qu4L@;gzq_m74NtBE5~0*;%l!jX;8s?*95U&c&Hkk<|icjLg=~Miv|` zKJRsQYV~kgfumHMA<&=dJ$I^he>#jJnzd+{7r{Dna2YorX!yA=#k>Nel`MH%Harcf z1;AR{*}>X*g{2mN$Mb+;3r-|A+9*%9tY zB7`=&%xE^sj)0}?{(8WJMCBYz*i>k`qj%|THzqCeswa9p(zb&>&{OGp>6i)kn+a literal 0 HcmV?d00001 From 89a28d2cfeed5729fd31f4e113d6448537ffcd38 Mon Sep 17 00:00:00 2001 From: Mike Reeves Date: Thu, 21 May 2026 15:45:58 -0400 Subject: [PATCH 11/12] Bump version from 3.1.0 to 3.2.0 --- VERSION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/VERSION b/VERSION index fd2a01863..944880fa1 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -3.1.0 +3.2.0 From aa7897874034e9e4383c06780a93d9bc2312bf40 Mon Sep 17 00:00:00 2001 From: Mike Reeves Date: Thu, 21 May 2026 15:57:57 -0400 Subject: [PATCH 12/12] Add 3.2.0 option to discussion template --- .github/DISCUSSION_TEMPLATE/3-0.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/DISCUSSION_TEMPLATE/3-0.yml b/.github/DISCUSSION_TEMPLATE/3-0.yml index 3fb9e5b30..8f74145c4 100644 --- a/.github/DISCUSSION_TEMPLATE/3-0.yml +++ b/.github/DISCUSSION_TEMPLATE/3-0.yml @@ -11,6 +11,7 @@ body: - - 3.0.0 - 3.1.0 + - 3.2.0 - Other (please provide detail below) validations: required: true