From d2524a593f6cd9888a9f7d04f62cf4753421d2f4 Mon Sep 17 00:00:00 2001 From: reyesj2 <94730068+reyesj2@users.noreply.github.com> Date: Thu, 14 May 2026 17:12:02 -0500 Subject: [PATCH 01/25] use -verify flag during grid agent install to ensure agent health --- salt/elasticfleet/install_agent_grid.sls | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/salt/elasticfleet/install_agent_grid.sls b/salt/elasticfleet/install_agent_grid.sls index 482af2e1e..5201eddf2 100644 --- a/salt/elasticfleet/install_agent_grid.sls +++ b/salt/elasticfleet/install_agent_grid.sls @@ -14,20 +14,23 @@ pull_agent_installer: file.managed: - - name: /opt/so/so-elastic-agent_linux_amd64 + - name: /opt/so/log/agents/so-elastic-agent_linux_amd64 - source: salt://elasticfleet/files/so_agent-installers/so-elastic-agent_linux_amd64 - mode: 755 - makedirs: True run_installer: cmd.run: - - name: ./so-elastic-agent_linux_amd64 -token={{ GRIDNODETOKEN }} -force - - cwd: /opt/so + {# Run agent installer and wait for it to report healthy status #} + - name: ./so-elastic-agent_linux_amd64 -token={{ GRIDNODETOKEN }} -force -verify + - cwd: /opt/so/log/agents - retry: attempts: 3 interval: 20 + - require: + - file: pull_agent_installer cleanup_agent_installer: file.absent: - - name: /opt/so/so-elastic-agent_linux_amd64 + - name: /opt/so/log/agents/so-elastic-agent_linux_amd64 {% endif %} From 244a73b7a2b38a17e9331606fab6551689553098 Mon Sep 17 00:00:00 2001 From: Mike Reeves Date: Fri, 15 May 2026 08:48:54 -0400 Subject: [PATCH 02/25] Make so-postgres-backup fail-safe against silent corruption The dump pipeline returned gzip's exit status, so a pg_dumpall that died mid-stream still produced a valid .gz holding a truncated dump, written straight to the final filename. The idempotency check then blocked retries for the day and the corrupt file counted toward retention, evicting a good backup each day until none remained. - set -o pipefail so a failed pg_dumpall fails the pipeline - dump to a .tmp file and atomically rename only after success, so the final filename appears only for a complete backup - gzip -t integrity check before publishing - trap-based cleanup of the temp file; sweep stale temps at startup - run retention only after a successful backup, with a glob restricted to finished backups - log timestamped OK/ERROR outcomes to /opt/so/log/postgres/backup.log --- salt/postgres/tools/sbin/so-postgres-backup | 48 ++++++++++++++++++--- 1 file changed, 41 insertions(+), 7 deletions(-) diff --git a/salt/postgres/tools/sbin/so-postgres-backup b/salt/postgres/tools/sbin/so-postgres-backup index 9db522336..08a73e3a4 100644 --- a/salt/postgres/tools/sbin/so-postgres-backup +++ b/salt/postgres/tools/sbin/so-postgres-backup @@ -7,15 +7,29 @@ . /usr/sbin/so-common +# Without pipefail, a pipeline's exit status is gzip's. A failed pg_dumpall would +# otherwise be masked by a successful gzip, silently producing a valid .gz that +# holds a truncated dump. +set -o pipefail + # Backups contain role password hashes and full chat data; keep them 0600. umask 0077 TODAY=$(date '+%Y_%m_%d') BACKUPDIR=/nsm/backup BACKUPFILE="$BACKUPDIR/so-postgres-backup-$TODAY.sql.gz" +TMPFILE="$BACKUPFILE.tmp" MAXBACKUPS=7 +LOGFILE=/opt/so/log/postgres/backup.log -mkdir -p $BACKUPDIR +log() { + echo "$(date '+%Y-%m-%d %H:%M:%S') $*" >> "$LOGFILE" +} + +mkdir -p "$BACKUPDIR" + +# Remove any temp files left behind by a previously crashed run +rm -f "$BACKUPDIR"/so-postgres-backup-*.sql.gz.tmp # Skip if already backed up today if [ -f "$BACKUPFILE" ]; then @@ -27,13 +41,33 @@ if ! docker ps --format '{{.Names}}' | grep -q '^so-postgres$'; then exit 0 fi -# Dump all databases and roles, compress -docker exec so-postgres pg_dumpall -U postgres | gzip > "$BACKUPFILE" +# Always clean up the temp file on exit; the success path clears this trap +# after the atomic rename so the finished backup is not deleted. +trap 'rm -f "$TMPFILE"' EXIT -# Retention cleanup -NUMBACKUPS=$(find $BACKUPDIR -type f -name "so-postgres-backup*" | wc -l) +# Dump all databases and roles, compress. Write to a temp file so the final +# filename only ever appears for a complete, verified backup. +if ! docker exec so-postgres pg_dumpall -U postgres | gzip > "$TMPFILE"; then + log "ERROR: pg_dumpall/gzip failed; backup aborted" + exit 1 +fi + +# Verify the compressed stream is intact before publishing it +if ! gzip -t "$TMPFILE"; then + log "ERROR: backup failed gzip integrity check; backup aborted" + exit 1 +fi + +# Atomically publish the verified backup +mv "$TMPFILE" "$BACKUPFILE" +trap - EXIT +log "OK: wrote $BACKUPFILE" + +# Retention cleanup (only reached after a successful backup). The glob is +# restricted to finished backups so an in-progress .tmp can never be counted. +NUMBACKUPS=$(find "$BACKUPDIR" -type f -name "so-postgres-backup-*.sql.gz" | wc -l) while [ "$NUMBACKUPS" -gt "$MAXBACKUPS" ]; do - OLDEST=$(find $BACKUPDIR -type f -name "so-postgres-backup*" -printf '%T+ %p\n' | sort | head -n 1 | awk -F" " '{print $2}') + OLDEST=$(find "$BACKUPDIR" -type f -name "so-postgres-backup-*.sql.gz" -printf '%T+ %p\n' | sort | head -n 1 | awk -F" " '{print $2}') rm -f "$OLDEST" - NUMBACKUPS=$(find $BACKUPDIR -type f -name "so-postgres-backup*" | wc -l) + NUMBACKUPS=$(find "$BACKUPDIR" -type f -name "so-postgres-backup-*.sql.gz" | wc -l) done From ce566ba174ba321e3955b9447aeab5975aeaab2b Mon Sep 17 00:00:00 2001 From: Jason Ertel Date: Fri, 15 May 2026 11:36:46 -0400 Subject: [PATCH 03/25] exclude fps --- salt/common/tools/sbin/so-log-check | 2 ++ 1 file changed, 2 insertions(+) diff --git a/salt/common/tools/sbin/so-log-check b/salt/common/tools/sbin/so-log-check index a3d9c51d0..65b1041fe 100755 --- a/salt/common/tools/sbin/so-log-check +++ b/salt/common/tools/sbin/so-log-check @@ -165,6 +165,8 @@ if [[ $EXCLUDE_FALSE_POSITIVE_ERRORS == 'Y' ]]; then EXCLUDED_ERRORS="$EXCLUDED_ERRORS|upgrading component template" # false positive (elasticsearch index or template names contain 'error') EXCLUDED_ERRORS="$EXCLUDED_ERRORS|upgrading composable template" # false positive (elasticsearch composable template names contain 'error') EXCLUDED_ERRORS="$EXCLUDED_ERRORS|Error while parsing document for index \[.ds-logs-kratos-so-.*object mapping for \[file\]" # false positive (mapping error occuring BEFORE kratos index has rolled over in 2.4.210) + EXCLUDED_ERRORS="$EXCLUDED_ERRORS|No such container" # false positive (telegraf trying to run stats on an old container) + EXCLUDED_ERRORS="$EXCLUDED_ERRORS|passwords do not match" # false positive (automated hydra test) fi if [[ $EXCLUDE_KNOWN_ERRORS == 'Y' ]]; then From e89c820b65bc4a00ffd45b6aafc61015aee60901 Mon Sep 17 00:00:00 2001 From: Jorge Reyes <94730068+reyesj2@users.noreply.github.com> Date: Sat, 16 May 2026 09:59:14 -0500 Subject: [PATCH 04/25] Revert "use -verify flag during grid agent install to ensure agent health" --- salt/elasticfleet/install_agent_grid.sls | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/salt/elasticfleet/install_agent_grid.sls b/salt/elasticfleet/install_agent_grid.sls index 5201eddf2..482af2e1e 100644 --- a/salt/elasticfleet/install_agent_grid.sls +++ b/salt/elasticfleet/install_agent_grid.sls @@ -14,23 +14,20 @@ pull_agent_installer: file.managed: - - name: /opt/so/log/agents/so-elastic-agent_linux_amd64 + - name: /opt/so/so-elastic-agent_linux_amd64 - source: salt://elasticfleet/files/so_agent-installers/so-elastic-agent_linux_amd64 - mode: 755 - makedirs: True run_installer: cmd.run: - {# Run agent installer and wait for it to report healthy status #} - - name: ./so-elastic-agent_linux_amd64 -token={{ GRIDNODETOKEN }} -force -verify - - cwd: /opt/so/log/agents + - name: ./so-elastic-agent_linux_amd64 -token={{ GRIDNODETOKEN }} -force + - cwd: /opt/so - retry: attempts: 3 interval: 20 - - require: - - file: pull_agent_installer cleanup_agent_installer: file.absent: - - name: /opt/so/log/agents/so-elastic-agent_linux_amd64 + - name: /opt/so/so-elastic-agent_linux_amd64 {% endif %} From d0aa33a255f26cedae7a1c93336a11302ba0446d Mon Sep 17 00:00:00 2001 From: reyesj2 <94730068+reyesj2@users.noreply.github.com> Date: Tue, 19 May 2026 10:50:17 -0500 Subject: [PATCH 05/25] sync elastic agent packages to fleet nodes --- salt/elasticfleet/enabled.sls | 2 ++ 1 file changed, 2 insertions(+) diff --git a/salt/elasticfleet/enabled.sls b/salt/elasticfleet/enabled.sls index cb189f9a9..166cb9719 100644 --- a/salt/elasticfleet/enabled.sls +++ b/salt/elasticfleet/enabled.sls @@ -26,7 +26,9 @@ include: wait_for_elasticsearch_elasticfleet: cmd.run: - name: so-elasticsearch-wait +{% endif %} +{% if GLOBALS.role == "so-fleet" %} # Sync Elastic Agent artifacts to Fleet Node elasticagent_syncartifacts: file.recurse: From 6c8997b28a7b1998a553dc3c87c9168a0a8aab06 Mon Sep 17 00:00:00 2001 From: reyesj2 <94730068+reyesj2@users.noreply.github.com> Date: Tue, 19 May 2026 22:27:31 -0500 Subject: [PATCH 06/25] verify all heavynodes and all searchnodes are at compatible ES version before attempting an elasticsearch upgrade --- salt/manager/tools/sbin/soup | 139 +++++++++++++++++++++++++++++++++-- 1 file changed, 134 insertions(+), 5 deletions(-) diff --git a/salt/manager/tools/sbin/soup b/salt/manager/tools/sbin/soup index bd3048019..e6a14607e 100755 --- a/salt/manager/tools/sbin/soup +++ b/salt/manager/tools/sbin/soup @@ -971,6 +971,9 @@ verify_es_version_compatibility() { local is_active_intermediate_upgrade=1 # supported upgrade paths for SO-ES versions declare -A es_upgrade_map=( + ["8.18.4"]="8.18.6 8.18.8 9.0.8" + ["8.18.6"]="8.18.8 9.0.8" + ["8.18.8"]="9.0.8" ["9.0.8"]="9.3.3" ) @@ -994,6 +997,116 @@ verify_es_version_compatibility() { exit 160 fi + compatible_es_versions="$target_es_version" + for current_version in "${!es_upgrade_map[@]}"; do + # shellcheck disable=SC2076 + if [[ " ${es_upgrade_map[$current_version]} " =~ " $target_es_version " ]]; then + compatible_es_versions+=" $current_version" + fi + done + + # Check if the given ES version can directly upgrade to the target ES version. Used to assist with catching lagging nodes during the upgrade process + es_version_can_upgrade_to_target() { + local current_version="$1" + # shellcheck disable=SC2076 + if [[ -n "$current_version" && " $compatible_es_versions " =~ " $current_version " ]]; then + return 0 + fi + + return 1 + } + + # Gather Elasticsearch cluster version info and verify that each node in the cluster is running a version compatible with the target ES version. + verify_searchnodes_es_target_compatibility() { + local retries=20 + local retry_count=0 + local delay=180 + SEARCHNODE_ES_VERSIONS="" + + while [[ $retry_count -lt $retries ]]; do + SEARCHNODE_ES_VERSIONS=$(so-elasticsearch-query _nodes/_all/version --retry 5 --retry-delay 10 --fail 2>&1) + local exit_status=$? + + if [[ $exit_status -ne 0 ]]; then + echo "Failed to retrieve Elasticsearch versions from searchnodes... Retrying in $delay seconds. Attempt $((retry_count + 1)) of $retries." + ((retry_count++)) + sleep $delay + continue + fi + + local all_searchnodes_compatible=true + while IFS=$'\t' read -r node current_version; do + [[ -z "$node" ]] && continue + if ! es_version_can_upgrade_to_target "$current_version"; then + echo "Searchnode $node is running Elasticsearch $current_version, which is not directly upgradable to Elasticsearch $target_es_version." + all_searchnodes_compatible=false + fi + done < <(echo "$SEARCHNODE_ES_VERSIONS" | jq -r '.nodes | to_entries[] | [.value.name, .value.version] | @tsv') + + if [[ "$all_searchnodes_compatible" == true ]]; then + echo "All Searchnodes are upgradable to Elasticsearch $target_es_version." + return 0 + fi + + echo "One or more Searchnodes cannot upgrade directly to Elasticsearch $target_es_version. Rechecking in $delay seconds. Attempt $((retry_count + 1)) of $retries." + ((retry_count++)) + sleep $delay + done + + return 1 + } + + # Gather heavynode version info and verify that each node is running a version compatible with the target ES version. + verify_heavynodes_es_target_compatibility() { + if ! salt-key -l accepted | grep -q 'heavynode$'; then + echo "No heavynodes detected. Skipping heavynode Elasticsearch version compatibility check." + return 0 + fi + + echo -e "\nOne or more heavynodes detected. Verifying each is running an Elasticsearch version that is compatible with $target_es_version." + + local retries=20 + local retry_count=0 + local delay=180 + HEAVYNODE_ES_VERSIONS="" + + while [[ $retry_count -lt $retries ]]; do + HEAVYNODE_ES_VERSIONS=$(salt -C 'G@role:so-heavynode' cmd.run 'so-elasticsearch-query / --retry 5 --retry-delay 10 | jq -r ".version.number"' shell=/bin/bash --out=json 2> /dev/null) + local exit_status=$? + + if [[ $exit_status -ne 0 ]]; then + echo "Failed to retrieve Elasticsearch version from one or more heavynodes... Retrying in $delay seconds. Attempt $((retry_count + 1)) of $retries." + ((retry_count++)) + sleep $delay + continue + fi + + local all_heavynodes_compatible=true + while IFS=$'\t' read -r node current_version; do + [[ -z "$node" ]] && continue + if ! es_version_can_upgrade_to_target "$current_version"; then + echo "Heavynode $node is running Elasticsearch $current_version, which is not directly upgradable to Elasticsearch $target_es_version." + all_heavynodes_compatible=false + fi + done < <(echo "$HEAVYNODE_ES_VERSIONS" | jq -r 'to_entries[] | [.key, .value] | @tsv') + + if [[ "$all_heavynodes_compatible" == true ]]; then + echo -e "\nAll heavynodes can upgrade to Elasticsearch $target_es_version." + return 0 + fi + + echo "One or more heavynodes cannot upgrade directly to Elasticsearch $target_es_version. Rechecking in $delay seconds. Attempt $((retry_count + 1)) of $retries." + ((retry_count++)) + sleep $delay + done + + return 1 + } + + if [[ ! -f "$es_verification_script" ]]; then + create_intermediate_upgrade_verification_script "$es_verification_script" + fi + for statefile in "${es_required_version_statefile_base}"-*; do [[ -f $statefile ]] || continue @@ -1012,10 +1125,6 @@ verify_es_version_compatibility() { continue fi - if [[ ! -f "$es_verification_script" ]]; then - create_intermediate_upgrade_verification_script "$es_verification_script" - fi - echo -e "\n##############################################################################################################################\n" echo "A previously required intermediate Elasticsearch upgrade was detected. Verifying that all Searchnodes/Heavynodes have successfully upgraded Elasticsearch to $es_required_version_statefile_value before proceeding with soup to avoid potential data loss! This command can take up to an hour to complete." if ! timeout --foreground 4000 bash "$es_verification_script" "$es_required_version_statefile_value" "$statefile"; then @@ -1037,6 +1146,26 @@ verify_es_version_compatibility() { # shellcheck disable=SC2076 # Do not want a regex here eg usage " 8.18.8 9.0.8 " =~ " 9.0.8 " if [[ " ${es_upgrade_map[$es_version]} " =~ " $target_es_version " || "$es_version" == "$target_es_version" ]]; then + if ! verify_searchnodes_es_target_compatibility || ! verify_heavynodes_es_target_compatibility; then + echo -e "\n!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!\n" + + echo "One or more Searchnode(s)/Heavynode(s) cannot upgrade directly to Elasticsearch $target_es_version. This can happen with soups that include Elasticsearch upgrades being run in quick succession. Typically, this will resolve itself as the grid synchronizes. Please allow time for all Searchnodes/Heavynodes to have upgraded Elasticsearch to a compatible version with $target_es_version before running soup again to avoid potential data loss!" + + if [[ -n "$HEAVYNODE_ES_VERSIONS" ]]; then + echo "Current heavynode Elasticsearch versions:" + echo "$HEAVYNODE_ES_VERSIONS" | jq '.' + fi + + if [[ -n "$SEARCHNODE_ES_VERSIONS" ]]; then + echo "Current searchnode Elasticsearch versions:" + echo "$SEARCHNODE_ES_VERSIONS" | jq '.nodes | to_entries | map({(.value.name): .value.version}) | sort | add' + fi + + echo -e "\n!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!\n" + + exit 161 + fi + # supported upgrade return 0 else @@ -1394,7 +1523,7 @@ main() { echo "Verifying we have the latest soup script." verify_latest_update_script - echo "Verifying Elasticsearch version compatibility before upgrading." + echo "Verifying Elasticsearch version compatibility across the grid before upgrading." verify_es_version_compatibility echo "Let's see if we need to update Security Onion." From d7a1b67095f630b256b0fc716e24b70ab1d39c13 Mon Sep 17 00:00:00 2001 From: reyesj2 <94730068+reyesj2@users.noreply.github.com> Date: Wed, 20 May 2026 09:16:57 -0500 Subject: [PATCH 07/25] use pipefail on heavynode versino command to pass through error --- salt/manager/tools/sbin/soup | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/salt/manager/tools/sbin/soup b/salt/manager/tools/sbin/soup index e6a14607e..d21599ad8 100755 --- a/salt/manager/tools/sbin/soup +++ b/salt/manager/tools/sbin/soup @@ -1071,7 +1071,7 @@ verify_es_version_compatibility() { HEAVYNODE_ES_VERSIONS="" while [[ $retry_count -lt $retries ]]; do - HEAVYNODE_ES_VERSIONS=$(salt -C 'G@role:so-heavynode' cmd.run 'so-elasticsearch-query / --retry 5 --retry-delay 10 | jq -r ".version.number"' shell=/bin/bash --out=json 2> /dev/null) + HEAVYNODE_ES_VERSIONS=$(salt -C 'G@role:so-heavynode' cmd.run 'set -o pipefail; so-elasticsearch-query / --retry 5 --retry-delay 10 | jq -er ".version.number"' shell=/bin/bash --out=json 2> /dev/null) local exit_status=$? if [[ $exit_status -ne 0 ]]; then From 7d13007aa9f364eebece1ecae9c561bda6e3c2ed Mon Sep 17 00:00:00 2001 From: reyesj2 <94730068+reyesj2@users.noreply.github.com> Date: Wed, 20 May 2026 10:03:37 -0500 Subject: [PATCH 08/25] block soup if all ES nodes are not online and reporting their ES version for compatibility check --- salt/manager/tools/sbin/soup | 53 ++++++++++++++++++++++++++++++------ 1 file changed, 45 insertions(+), 8 deletions(-) diff --git a/salt/manager/tools/sbin/soup b/salt/manager/tools/sbin/soup index d21599ad8..8a68e5242 100755 --- a/salt/manager/tools/sbin/soup +++ b/salt/manager/tools/sbin/soup @@ -1021,9 +1021,24 @@ verify_es_version_compatibility() { local retries=20 local retry_count=0 local delay=180 + local expected_es_nodes + local searchnode_minions SEARCHNODE_ES_VERSIONS="" while [[ $retry_count -lt $retries ]]; do + if ! searchnode_minions=$(set -o pipefail; salt-key --out=json --list=accepted 2> /dev/null | jq -r '.minions[]? | select(endswith("searchnode"))'); then + echo "Failed to retrieve grid searchnodes via salt-key... Retrying in $delay seconds. Attempt $((retry_count + 1)) of $retries." + ((retry_count++)) + sleep $delay + continue + fi + # Always add node running soup to expected es nodes + expected_es_nodes="${MINIONID%_*}" + while IFS= read -r searchnode_minion; do + [[ -z "$searchnode_minion" ]] && continue + expected_es_nodes+=$'\n'"${searchnode_minion%_searchnode}" + done <<< "$searchnode_minions" + SEARCHNODE_ES_VERSIONS=$(so-elasticsearch-query _nodes/_all/version --retry 5 --retry-delay 10 --fail 2>&1) local exit_status=$? @@ -1043,6 +1058,14 @@ verify_es_version_compatibility() { fi done < <(echo "$SEARCHNODE_ES_VERSIONS" | jq -r '.nodes | to_entries[] | [.value.name, .value.version] | @tsv') + while IFS= read -r expected_es_node; do + [[ -z "$expected_es_node" ]] && continue + if ! echo "$SEARCHNODE_ES_VERSIONS" | jq -e --arg node "$expected_es_node" '.nodes | to_entries | any(.value.name == $node)' > /dev/null; then + echo "Searchnode $expected_es_node did not report an Elasticsearch version. It may be offline or still upgrading." + all_searchnodes_compatible=false + fi + done <<< "$expected_es_nodes" + if [[ "$all_searchnodes_compatible" == true ]]; then echo "All Searchnodes are upgradable to Elasticsearch $target_es_version." return 0 @@ -1056,21 +1079,27 @@ verify_es_version_compatibility() { return 1 } - # Gather heavynode version info and verify that each node is running a version compatible with the target ES version. + # Gather heavynode version info and verify that each node is running a version compatible with the target ES version. verify_heavynodes_es_target_compatibility() { - if ! salt-key -l accepted | grep -q 'heavynode$'; then - echo "No heavynodes detected. Skipping heavynode Elasticsearch version compatibility check." - return 0 - fi - - echo -e "\nOne or more heavynodes detected. Verifying each is running an Elasticsearch version that is compatible with $target_es_version." - + local heavynode_minions local retries=20 local retry_count=0 local delay=180 HEAVYNODE_ES_VERSIONS="" while [[ $retry_count -lt $retries ]]; do + if ! heavynode_minions=$(set -o pipefail; salt-key --out=json --list=accepted 2> /dev/null | jq -r '.minions[]? | select(endswith("heavynode"))'); then + echo "Failed to retrieve grid heavynodes via salt-key... Retrying in $delay seconds. Attempt $((retry_count + 1)) of $retries." + ((retry_count++)) + sleep $delay + continue + fi + + if [[ -z "$heavynode_minions" ]]; then + echo "No heavynodes detected. Skipping heavynode Elasticsearch version compatibility check." + return 0 + fi + HEAVYNODE_ES_VERSIONS=$(salt -C 'G@role:so-heavynode' cmd.run 'set -o pipefail; so-elasticsearch-query / --retry 5 --retry-delay 10 | jq -er ".version.number"' shell=/bin/bash --out=json 2> /dev/null) local exit_status=$? @@ -1090,6 +1119,14 @@ verify_es_version_compatibility() { fi done < <(echo "$HEAVYNODE_ES_VERSIONS" | jq -r 'to_entries[] | [.key, .value] | @tsv') + while IFS= read -r heavynode_minion; do + [[ -z "$heavynode_minion" ]] && continue + if ! echo "$HEAVYNODE_ES_VERSIONS" | jq -e --arg minion "$heavynode_minion" 'has($minion)' > /dev/null; then + echo "Heavynode $heavynode_minion did not report an Elasticsearch version. It may be offline or still upgrading." + all_heavynodes_compatible=false + fi + done <<< "$heavynode_minions" + if [[ "$all_heavynodes_compatible" == true ]]; then echo -e "\nAll heavynodes can upgrade to Elasticsearch $target_es_version." return 0 From b485be460204202628a5b2e7d349c42c803add5b Mon Sep 17 00:00:00 2001 From: reyesj2 <94730068+reyesj2@users.noreply.github.com> Date: Wed, 20 May 2026 14:12:58 -0500 Subject: [PATCH 09/25] separate salt-key command from main es version compatiblity loop --- salt/manager/tools/sbin/soup | 72 ++++++++++++++++++++++-------------- 1 file changed, 45 insertions(+), 27 deletions(-) diff --git a/salt/manager/tools/sbin/soup b/salt/manager/tools/sbin/soup index 8a68e5242..3bec13716 100755 --- a/salt/manager/tools/sbin/soup +++ b/salt/manager/tools/sbin/soup @@ -1021,24 +1021,33 @@ verify_es_version_compatibility() { local retries=20 local retry_count=0 local delay=180 - local expected_es_nodes - local searchnode_minions + local expected_es_nodes searchnode_minions attempt + local searchnode_discovery_success=false SEARCHNODE_ES_VERSIONS="" - while [[ $retry_count -lt $retries ]]; do - if ! searchnode_minions=$(set -o pipefail; salt-key --out=json --list=accepted 2> /dev/null | jq -r '.minions[]? | select(endswith("searchnode"))'); then - echo "Failed to retrieve grid searchnodes via salt-key... Retrying in $delay seconds. Attempt $((retry_count + 1)) of $retries." - ((retry_count++)) - sleep $delay - continue + for attempt in {1..3}; do + if searchnode_minions=$(set -o pipefail; salt-key --out=json --list=accepted 2> /dev/null | jq -r '.minions[]? | select(endswith("searchnode"))'); then + searchnode_discovery_success=true + break fi - # Always add node running soup to expected es nodes - expected_es_nodes="${MINIONID%_*}" - while IFS= read -r searchnode_minion; do - [[ -z "$searchnode_minion" ]] && continue - expected_es_nodes+=$'\n'"${searchnode_minion%_searchnode}" - done <<< "$searchnode_minions" + echo "Failed to retrieve grid searchnodes via salt-key... Retrying in 30 seconds. Attempt $attempt of 3." + sleep 30 + done + + if [[ "$searchnode_discovery_success" != "true" ]]; then + echo "Failed to retrieve grid searchnodes via salt-key." + return 1 + fi + + # Always add node running soup to expected es nodes + expected_es_nodes="${MINIONID%_*}" + while IFS= read -r searchnode_minion; do + [[ -z "$searchnode_minion" ]] && continue + expected_es_nodes+=$'\n'"${searchnode_minion%_searchnode}" + done <<< "$searchnode_minions" + + while [[ $retry_count -lt $retries ]]; do SEARCHNODE_ES_VERSIONS=$(so-elasticsearch-query _nodes/_all/version --retry 5 --retry-delay 10 --fail 2>&1) local exit_status=$? @@ -1081,25 +1090,34 @@ verify_es_version_compatibility() { # Gather heavynode version info and verify that each node is running a version compatible with the target ES version. verify_heavynodes_es_target_compatibility() { - local heavynode_minions + local heavynode_minions attempt local retries=20 local retry_count=0 local delay=180 + local heavynode_discovery_success=false HEAVYNODE_ES_VERSIONS="" + for attempt in {1..3}; do + if heavynode_minions=$(set -o pipefail; salt-key --out=json --list=accepted 2> /dev/null | jq -r '.minions[]? | select(endswith("heavynode"))'); then + heavynode_discovery_success=true + break + fi + + echo "Failed to retrieve grid heavynodes via salt-key... Retrying in 30 seconds. Attempt $attempt of 3." + sleep 30 + done + + if [[ "$heavynode_discovery_success" != "true" ]]; then + echo "Failed to retrieve grid heavynodes via salt-key." + return 1 + fi + + if [[ -z "$heavynode_minions" ]]; then + echo "No heavynodes detected. Skipping heavynode Elasticsearch version compatibility check." + return 0 + fi + while [[ $retry_count -lt $retries ]]; do - if ! heavynode_minions=$(set -o pipefail; salt-key --out=json --list=accepted 2> /dev/null | jq -r '.minions[]? | select(endswith("heavynode"))'); then - echo "Failed to retrieve grid heavynodes via salt-key... Retrying in $delay seconds. Attempt $((retry_count + 1)) of $retries." - ((retry_count++)) - sleep $delay - continue - fi - - if [[ -z "$heavynode_minions" ]]; then - echo "No heavynodes detected. Skipping heavynode Elasticsearch version compatibility check." - return 0 - fi - HEAVYNODE_ES_VERSIONS=$(salt -C 'G@role:so-heavynode' cmd.run 'set -o pipefail; so-elasticsearch-query / --retry 5 --retry-delay 10 | jq -er ".version.number"' shell=/bin/bash --out=json 2> /dev/null) local exit_status=$? From 141a61f5b53d44e647350ac2c4b48be1708fd807 Mon Sep 17 00:00:00 2001 From: Mike Reeves Date: Thu, 21 May 2026 13:47:03 -0400 Subject: [PATCH 10/25] 3.1.0 --- DOWNLOAD_AND_VERIFY_ISO.md | 22 +++++++++++----------- sigs/securityonion-3.1.0-20260521.iso.sig | Bin 0 -> 566 bytes 2 files changed, 11 insertions(+), 11 deletions(-) create mode 100644 sigs/securityonion-3.1.0-20260521.iso.sig diff --git a/DOWNLOAD_AND_VERIFY_ISO.md b/DOWNLOAD_AND_VERIFY_ISO.md index 47937c1b9..a0ea874fa 100644 --- a/DOWNLOAD_AND_VERIFY_ISO.md +++ b/DOWNLOAD_AND_VERIFY_ISO.md @@ -1,17 +1,17 @@ -### 3.0.0-20260331 ISO image released on 2026/03/31 +### 3.1.0-20260521 ISO image released on 2026/05/21 ### Download and Verify -3.0.0-20260331 ISO image: -https://download.securityonion.net/file/securityonion/securityonion-3.0.0-20260331.iso +3.1.0-20260521 ISO image: +https://download.securityonion.net/file/securityonion/securityonion-3.1.0-20260521.iso -MD5: ECD318A1662A6FDE0EF213F5A9BD4B07 -SHA1: E55BE314440CCF3392DC0B06BC5E270B43176D9C -SHA256: 7FC47405E335CBE5C2B6C51FE7AC60248F35CBE504907B8B5A33822B23F8F4D5 +MD5: A853BC118639ABCE1795D6E313BFFBDE +SHA1: FCA615AD6E31710B33AE5870FEF447861FDB3B8F +SHA256: CE2A5947274D9ED2C5068A1FD46B64C4FEF70445EA9B61A98DD3621781329F2C Signature for ISO image: -https://github.com/Security-Onion-Solutions/securityonion/raw/3/main/sigs/securityonion-3.0.0-20260331.iso.sig +https://github.com/Security-Onion-Solutions/securityonion/raw/3/main/sigs/securityonion-3.1.0-20260521.iso.sig Signing key: https://raw.githubusercontent.com/Security-Onion-Solutions/securityonion/3/main/KEYS @@ -25,22 +25,22 @@ wget https://raw.githubusercontent.com/Security-Onion-Solutions/securityonion/3/ Download the signature file for the ISO: ``` -wget https://github.com/Security-Onion-Solutions/securityonion/raw/3/main/sigs/securityonion-3.0.0-20260331.iso.sig +wget https://github.com/Security-Onion-Solutions/securityonion/raw/3/main/sigs/securityonion-3.1.0-20260521.iso.sig ``` Download the ISO image: ``` -wget https://download.securityonion.net/file/securityonion/securityonion-3.0.0-20260331.iso +wget https://download.securityonion.net/file/securityonion/securityonion-3.1.0-20260521.iso ``` Verify the downloaded ISO image using the signature file: ``` -gpg --verify securityonion-3.0.0-20260331.iso.sig securityonion-3.0.0-20260331.iso +gpg --verify securityonion-3.1.0-20260521.iso.sig securityonion-3.1.0-20260521.iso ``` The output should show "Good signature" and the Primary key fingerprint should match what's shown below: ``` -gpg: Signature made Mon 30 Mar 2026 06:22:14 PM EDT using RSA key ID FE507013 +gpg: Signature made Thu 21 May 2026 11:10:01 AM EDT using RSA key ID FE507013 gpg: Good signature from "Security Onion Solutions, LLC " gpg: WARNING: This key is not certified with a trusted signature! gpg: There is no indication that the signature belongs to the owner. diff --git a/sigs/securityonion-3.1.0-20260521.iso.sig b/sigs/securityonion-3.1.0-20260521.iso.sig new file mode 100644 index 0000000000000000000000000000000000000000..af7564315fe4d271f8ffbcd5f20b1ee05bb215cf GIT binary patch literal 566 zcmV-60?GY}0y6{v0SEvc79j-41gSkXz6^6dp_W8^5Ma0dP;e6k0%{K+NdO875PT3| zxBgIY6XV$r{v$aAV9tWR2Cp82NY=5;oVcqFn(Xc4XM_F@euGbQ9!7nT67q|Yq&1Dz z;iL7U0Ax7EIw}GT(Bu_zK4Ys^hEOTJd&0o_3 zO5?R2v(LbEYEoJfB$6YydM(aL@phySuee&F)y$&07&AzwgvkmQV2JQZXm~MBEH3+G zKSPv5@iFE2n&Svb5pdPU%C&j6uqfp$?~q;+r++a=qvp=Fw>WPGwbbg1<|B&(3e-p8 zHPkpN1A8@ZjLz)8m z0VR{c;S9p&qu4L@;gzq_m74NtBE5~0*;%l!jX;8s?*95U&c&Hkk<|icjLg=~Miv|` zKJRsQYV~kgfumHMA<&=dJ$I^he>#jJnzd+{7r{Dna2YorX!yA=#k>Nel`MH%Harcf z1;AR{*}>X*g{2mN$Mb+;3r-|A+9*%9tY zB7`=&%xE^sj)0}?{(8WJMCBYz*i>k`qj%|THzqCeswa9p(zb&>&{OGp>6i)kn+a literal 0 HcmV?d00001 From 89a28d2cfeed5729fd31f4e113d6448537ffcd38 Mon Sep 17 00:00:00 2001 From: Mike Reeves Date: Thu, 21 May 2026 15:45:58 -0400 Subject: [PATCH 11/25] Bump version from 3.1.0 to 3.2.0 --- VERSION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/VERSION b/VERSION index fd2a01863..944880fa1 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -3.1.0 +3.2.0 From aa7897874034e9e4383c06780a93d9bc2312bf40 Mon Sep 17 00:00:00 2001 From: Mike Reeves Date: Thu, 21 May 2026 15:57:57 -0400 Subject: [PATCH 12/25] Add 3.2.0 option to discussion template --- .github/DISCUSSION_TEMPLATE/3-0.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/DISCUSSION_TEMPLATE/3-0.yml b/.github/DISCUSSION_TEMPLATE/3-0.yml index 3fb9e5b30..8f74145c4 100644 --- a/.github/DISCUSSION_TEMPLATE/3-0.yml +++ b/.github/DISCUSSION_TEMPLATE/3-0.yml @@ -11,6 +11,7 @@ body: - - 3.0.0 - 3.1.0 + - 3.2.0 - Other (please provide detail below) validations: required: true From d72219c5869e4aea39ccb285eca09ccf55c599ee Mon Sep 17 00:00:00 2001 From: reyesj2 <94730068+reyesj2@users.noreply.github.com> Date: Fri, 22 May 2026 09:59:17 -0500 Subject: [PATCH 13/25] use multiple or combined input --- salt/manager/tools/sbin/soup | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/salt/manager/tools/sbin/soup b/salt/manager/tools/sbin/soup index 3bec13716..46785be3c 100755 --- a/salt/manager/tools/sbin/soup +++ b/salt/manager/tools/sbin/soup @@ -1139,7 +1139,7 @@ verify_es_version_compatibility() { while IFS= read -r heavynode_minion; do [[ -z "$heavynode_minion" ]] && continue - if ! echo "$HEAVYNODE_ES_VERSIONS" | jq -e --arg minion "$heavynode_minion" 'has($minion)' > /dev/null; then + if ! echo "$HEAVYNODE_ES_VERSIONS" | jq -se --arg minion "$heavynode_minion" 'add | has($minion)' > /dev/null; then echo "Heavynode $heavynode_minion did not report an Elasticsearch version. It may be offline or still upgrading." all_heavynodes_compatible=false fi From c0272ddb81a196648d93f9cbc7c1ee7a687e2562 Mon Sep 17 00:00:00 2001 From: Mike Reeves Date: Tue, 26 May 2026 09:24:10 -0400 Subject: [PATCH 14/25] Add version number to HOTFIX file --- HOTFIX | 1 + 1 file changed, 1 insertion(+) diff --git a/HOTFIX b/HOTFIX index e69de29bb..cb8d8ec41 100644 --- a/HOTFIX +++ b/HOTFIX @@ -0,0 +1 @@ +20260526 From 473f93f0ee20e2c307455b9c5e639a044d448927 Mon Sep 17 00:00:00 2001 From: reyesj2 <94730068+reyesj2@users.noreply.github.com> Date: Wed, 27 May 2026 09:33:15 -0500 Subject: [PATCH 15/25] check for stale logstash pipeline name in pillars --- salt/manager/tools/sbin/soup | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/salt/manager/tools/sbin/soup b/salt/manager/tools/sbin/soup index 46785be3c..58cbbac43 100755 --- a/salt/manager/tools/sbin/soup +++ b/salt/manager/tools/sbin/soup @@ -1506,7 +1506,23 @@ EOF # Keeping this block in case we need to do a hotfix that requires salt update apply_hotfix() { - echo "No actions required. ($INSTALLEDVERSION/$HOTFIXVERSION)" + if [[ "$INSTALLEDVERSION" == "3.1.0" ]] ; then + echo "Checking for conflicting logstash defined_pipelines pillar value." + local LOGSTASH_FILE=/opt/so/saltstack/local/pillar/logstash/soc_logstash.sls + local MINIONDIR=/opt/so/saltstack/local/pillar/minions + + for pillar_file in "$LOGSTASH_FILE" "$MINIONDIR"/*.sls; do + [[ -f "$pillar_file" ]] || continue + if grep -q 'so/0013_input_lumberjack_fleet.conf$' "$pillar_file"; then + echo "Found conflicting defined_pipeline pillar value in $pillar_file. Updating to use the new logstash pipeline name." + sed -i 's#so/0013_input_lumberjack_fleet\.conf$#so/0013_input_lumberjack_fleet.conf.jinja#g' "$pillar_file" + chown socore:socore "$pillar_file" + fi + + done + else + echo "No actions required. ($INSTALLEDVERSION/$HOTFIXVERSION)" + fi } failed_soup_restore_items() { From 0834998cca219394c49ea8116284285bef20b228 Mon Sep 17 00:00:00 2001 From: reyesj2 <94730068+reyesj2@users.noreply.github.com> Date: Wed, 27 May 2026 09:52:29 -0500 Subject: [PATCH 16/25] usuable for next soup --- salt/manager/tools/sbin/soup | 31 ++++++++++++++++++------------- 1 file changed, 18 insertions(+), 13 deletions(-) diff --git a/salt/manager/tools/sbin/soup b/salt/manager/tools/sbin/soup index 58cbbac43..6f4b936dd 100755 --- a/salt/manager/tools/sbin/soup +++ b/salt/manager/tools/sbin/soup @@ -533,6 +533,23 @@ elasticfleet_set_agent_logging_level_warn() { done <<< "$policies_to_update" } +update_logstash_pipeline_name() { + local original_pipeline_name="$1" + local new_pipeline_name="$2" + + echo "Checking for conflicting logstash defined_pipelines pillar value." + local LOGSTASH_FILE=/opt/so/saltstack/local/pillar/logstash/soc_logstash.sls + local MINIONDIR=/opt/so/saltstack/local/pillar/minions + for pillar_file in "$LOGSTASH_FILE" "$MINIONDIR"/*.sls; do + [[ -f "$pillar_file" ]] || continue + if grep -q "$original_pipeline_name$" "$pillar_file"; then + echo "Found conflicting defined_pipeline pillar value in $pillar_file. Updating to use the new logstash pipeline name." + sed -i "s#$original_pipeline_name\$#$new_pipeline_name#g" "$pillar_file" + chown socore:socore "$pillar_file" + fi + done +} + check_transform_health_and_reauthorize() { . /usr/sbin/so-elastic-fleet-common @@ -1507,19 +1524,7 @@ EOF # Keeping this block in case we need to do a hotfix that requires salt update apply_hotfix() { if [[ "$INSTALLEDVERSION" == "3.1.0" ]] ; then - echo "Checking for conflicting logstash defined_pipelines pillar value." - local LOGSTASH_FILE=/opt/so/saltstack/local/pillar/logstash/soc_logstash.sls - local MINIONDIR=/opt/so/saltstack/local/pillar/minions - - for pillar_file in "$LOGSTASH_FILE" "$MINIONDIR"/*.sls; do - [[ -f "$pillar_file" ]] || continue - if grep -q 'so/0013_input_lumberjack_fleet.conf$' "$pillar_file"; then - echo "Found conflicting defined_pipeline pillar value in $pillar_file. Updating to use the new logstash pipeline name." - sed -i 's#so/0013_input_lumberjack_fleet\.conf$#so/0013_input_lumberjack_fleet.conf.jinja#g' "$pillar_file" - chown socore:socore "$pillar_file" - fi - - done + update_logstash_pipeline_name "so/0013_input_lumberjack_fleet.conf" "so/0013_input_lumberjack_fleet.conf.jinja" else echo "No actions required. ($INSTALLEDVERSION/$HOTFIXVERSION)" fi From 0b4a4de609e8f55c63d79e91f68b6bfc4e1d1a60 Mon Sep 17 00:00:00 2001 From: reyesj2 <94730068+reyesj2@users.noreply.github.com> Date: Wed, 27 May 2026 12:21:22 -0500 Subject: [PATCH 17/25] always run logstash pipeline rename --- salt/manager/tools/sbin/soup | 1 + 1 file changed, 1 insertion(+) diff --git a/salt/manager/tools/sbin/soup b/salt/manager/tools/sbin/soup index 6f4b936dd..473ef79c5 100755 --- a/salt/manager/tools/sbin/soup +++ b/salt/manager/tools/sbin/soup @@ -701,6 +701,7 @@ up_to_3.1.0() { # Clear existing component template state file. rm -f /opt/so/state/esfleet_component_templates.json rename_strelka_scan_lnk + update_logstash_pipeline_name "so/0013_input_lumberjack_fleet.conf" "so/0013_input_lumberjack_fleet.conf.jinja" INSTALLEDVERSION=3.1.0 } From bf609a112eed5001ec73745d6ea00fdda8de4267 Mon Sep 17 00:00:00 2001 From: reyesj2 <94730068+reyesj2@users.noreply.github.com> Date: Wed, 27 May 2026 12:21:44 -0500 Subject: [PATCH 18/25] LF --- HOTFIX | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/HOTFIX b/HOTFIX index cb8d8ec41..8d594f290 100644 --- a/HOTFIX +++ b/HOTFIX @@ -1 +1 @@ -20260526 +20260526 From 79987f3659ab554b975e31461ed20a21d009383d Mon Sep 17 00:00:00 2001 From: Josh Patterson Date: Wed, 27 May 2026 13:55:30 -0400 Subject: [PATCH 19/25] bootstrap so-soc db in postgres during soup --- salt/manager/tools/sbin/soup | 35 ++++++++++++++++++++++- salt/postgres/telegraf_users.sls | 18 ++---------- salt/postgres/tools/sbin/so-postgres-wait | 32 +++++++++++++++++++++ 3 files changed, 68 insertions(+), 17 deletions(-) create mode 100644 salt/postgres/tools/sbin/so-postgres-wait diff --git a/salt/manager/tools/sbin/soup b/salt/manager/tools/sbin/soup index 3bec13716..05f58b9a5 100755 --- a/salt/manager/tools/sbin/soup +++ b/salt/manager/tools/sbin/soup @@ -370,8 +370,9 @@ preupgrade_changes() { # This function is to add any new pillar items if needed. echo "Checking to see if changes are needed." - [[ "$INSTALLEDVERSION" =~ ^2\.4\.21[0-9]+$ ]] && up_to_3.0.0 + [[ "$INSTALLEDVERSION" =~ ^2\.4\.21[0-9]+$ ]] && up_to_3.0.0 [[ "$INSTALLEDVERSION" == "3.0.0" ]] && up_to_3.1.0 + [[ "$INSTALLEDVERSION" == "3.1.0" ]] && up_to_3.2.0 true } @@ -381,6 +382,7 @@ postupgrade_changes() { [[ "$POSTVERSION" =~ ^2\.4\.21[0-9]+$ ]] && post_to_3.0.0 [[ "$POSTVERSION" == "3.0.0" ]] && post_to_3.1.0 + [[ "$POSTVERSION" == "3.1.0" ]] && post_to_3.2.0 true } @@ -720,6 +722,37 @@ post_to_3.1.0() { ### 3.1.0 End ### +### 3.2.0 Scripts ### + +bootstrap_so_soc_database() { + # init-db.sh is mounted into so-postgres at /docker-entrypoint-initdb.d/init-db.sh + # and runs automatically only on a fresh data directory. Hosts upgrading from + # 3.1.0 already have /nsm/postgres populated, so the so_soc bootstrap block + # added in 3.2 never fires. Re-run the script explicitly; it's idempotent. + echo "Bootstrapping so_soc database via init-db.sh." + if ! /usr/sbin/so-postgres-wait; then + FINAL_MESSAGE_QUEUE+=("WARNING: so-postgres was not ready during the 3.2.0 upgrade; the so_soc database may not have been bootstrapped. Re-run manually: docker exec so-postgres bash /docker-entrypoint-initdb.d/init-db.sh") + return 0 + fi + if ! docker exec so-postgres bash /docker-entrypoint-initdb.d/init-db.sh; then + FINAL_MESSAGE_QUEUE+=("WARNING: init-db.sh failed inside so-postgres during the 3.2.0 upgrade; the so_soc database may not have been bootstrapped. Re-run manually: docker exec so-postgres bash /docker-entrypoint-initdb.d/init-db.sh") + return 0 + fi + echo "so_soc bootstrap complete." +} + +up_to_3.2.0() { + INSTALLEDVERSION=3.2.0 +} + +post_to_3.2.0() { + bootstrap_so_soc_database + + POSTVERSION=3.2.0 +} + +### 3.2.0 End ### + repo_sync() { echo "Sync the local repo." diff --git a/salt/postgres/telegraf_users.sls b/salt/postgres/telegraf_users.sls index 28d9d6247..5e3566a95 100644 --- a/salt/postgres/telegraf_users.sls +++ b/salt/postgres/telegraf_users.sls @@ -18,26 +18,12 @@ include: {% set TG_OUT = TELEGRAFMERGED.output | upper %} {% if TG_OUT in ['POSTGRES', 'BOTH'] %} -# docker_container.running returns as soon as the container starts, but on -# first-init docker-entrypoint.sh starts a temporary postgres with -# `listen_addresses=''` to run /docker-entrypoint-initdb.d scripts, then -# shuts it down before exec'ing the real CMD. A default pg_isready check -# (Unix socket) passes during that ephemeral phase and races the shutdown -# with "the database system is shutting down". Checking TCP readiness on -# 127.0.0.1 only succeeds after the final postgres binds the port. postgres_wait_ready: cmd.run: - - name: | - for i in $(seq 1 60); do - if docker exec so-postgres pg_isready -h 127.0.0.1 -U postgres -q 2>/dev/null; then - exit 0 - fi - sleep 2 - done - echo "so-postgres did not accept TCP connections within 120s" >&2 - exit 1 + - name: /usr/sbin/so-postgres-wait - require: - docker_container: so-postgres + - file: postgres_sbin # Ensure the shared Telegraf database exists. init-db.sh only runs on a # fresh data dir, so hosts upgraded onto an existing /nsm/postgres volume diff --git a/salt/postgres/tools/sbin/so-postgres-wait b/salt/postgres/tools/sbin/so-postgres-wait new file mode 100644 index 000000000..7c4c8ce92 --- /dev/null +++ b/salt/postgres/tools/sbin/so-postgres-wait @@ -0,0 +1,32 @@ +#!/bin/bash + +# Copyright Security Onion Solutions LLC and/or licensed to Security Onion Solutions LLC under one +# or more contributor license agreements. Licensed under the Elastic License 2.0 as shown at +# https://securityonion.net/license; you may not use this file except in compliance with the +# Elastic License 2.0. + +# Wait for the so-postgres container to accept TCP connections. +# +# docker_container.running returns as soon as the container starts, but on +# first-init docker-entrypoint.sh starts a temporary postgres with +# `listen_addresses=''` to run /docker-entrypoint-initdb.d scripts, then +# shuts it down before exec'ing the real CMD. A default pg_isready check +# (Unix socket) passes during that ephemeral phase and races the shutdown +# with "the database system is shutting down". Checking TCP readiness on +# 127.0.0.1 only succeeds after the final postgres binds the port. +# +# Usage: so-postgres-wait [iterations] [sleep_seconds] +# Default: 60 iterations, 2s sleep (~120s total). + +ITERATIONS=${1:-60} +SLEEP_SECONDS=${2:-2} + +for i in $(seq 1 "$ITERATIONS"); do + if docker exec so-postgres pg_isready -h 127.0.0.1 -U postgres -q 2>/dev/null; then + exit 0 + fi + sleep "$SLEEP_SECONDS" +done + +echo "so-postgres did not accept TCP connections within $((ITERATIONS * SLEEP_SECONDS))s" >&2 +exit 1 From 613eca52fcd800570e444d271202429531c731cd Mon Sep 17 00:00:00 2001 From: reyesj2 <94730068+reyesj2@users.noreply.github.com> Date: Wed, 27 May 2026 13:24:10 -0500 Subject: [PATCH 20/25] update hotfix date --- HOTFIX | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/HOTFIX b/HOTFIX index 8d594f290..70406bf9d 100644 --- a/HOTFIX +++ b/HOTFIX @@ -1 +1 @@ -20260526 +20260528 From b2a82fec29b25a5718c7a903f38402af905e2a4a Mon Sep 17 00:00:00 2001 From: reyesj2 <94730068+reyesj2@users.noreply.github.com> Date: Wed, 27 May 2026 13:24:23 -0500 Subject: [PATCH 21/25] fix_logstash_0013_lumberjack_pipeline_name Before removing from apply_hotfix function first verify that older installs < 3.1.0 are still upgradable when referencing 'so/0013_input_lumberjack_fleet.conf' via pillar. Failure to do so will prevent logstash from starting --- salt/manager/tools/sbin/soup | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/salt/manager/tools/sbin/soup b/salt/manager/tools/sbin/soup index 473ef79c5..ba76d2a3e 100755 --- a/salt/manager/tools/sbin/soup +++ b/salt/manager/tools/sbin/soup @@ -693,6 +693,10 @@ rename_strelka_scan_lnk() { rm -f "$TMP_VALUE_FILE" } +fix_logstash_0013_lumberjack_pipeline_name() { + update_logstash_pipeline_name "so/0013_input_lumberjack_fleet.conf" "so/0013_input_lumberjack_fleet.conf.jinja" +} + up_to_3.1.0() { ensure_postgres_local_pillar ensure_postgres_secret @@ -701,7 +705,7 @@ up_to_3.1.0() { # Clear existing component template state file. rm -f /opt/so/state/esfleet_component_templates.json rename_strelka_scan_lnk - update_logstash_pipeline_name "so/0013_input_lumberjack_fleet.conf" "so/0013_input_lumberjack_fleet.conf.jinja" + fix_logstash_0013_lumberjack_pipeline_name INSTALLEDVERSION=3.1.0 } @@ -1525,7 +1529,9 @@ EOF # Keeping this block in case we need to do a hotfix that requires salt update apply_hotfix() { if [[ "$INSTALLEDVERSION" == "3.1.0" ]] ; then - update_logstash_pipeline_name "so/0013_input_lumberjack_fleet.conf" "so/0013_input_lumberjack_fleet.conf.jinja" + # Do not remove this fix_logstash_0013_lumberjack_pipeline_name in future hotfixes without first validating older + # installs referencing "so/0013_input_lumberjack_fleet.conf" via pillar are upgradable + fix_logstash_0013_lumberjack_pipeline_name else echo "No actions required. ($INSTALLEDVERSION/$HOTFIXVERSION)" fi From 93ffce98d71d332e7743056b0d36f3aacbbab24d Mon Sep 17 00:00:00 2001 From: Josh Patterson Date: Wed, 27 May 2026 15:07:25 -0400 Subject: [PATCH 22/25] add onionconfig and postgres modules to soc config --- salt/soc/defaults.yaml | 6 ++++++ salt/soc/merged.map.jinja | 7 +++++++ salt/soc/soc_soc.yaml | 20 ++++++++++++++++++++ 3 files changed, 33 insertions(+) diff --git a/salt/soc/defaults.yaml b/salt/soc/defaults.yaml index cc80758fc..62b451bec 100644 --- a/salt/soc/defaults.yaml +++ b/salt/soc/defaults.yaml @@ -1519,6 +1519,12 @@ soc: serviceAccountJSON: "" serviceAccountLocation: "" healthTimeoutSeconds: 5 + onionconfig: + saltstackDir: /opt/so/saltstack + bypassEnabled: false + postgres: + host: + password: salt: queueDir: /opt/sensoroni/queue timeoutMs: 45000 diff --git a/salt/soc/merged.map.jinja b/salt/soc/merged.map.jinja index 349937983..b34efb11d 100644 --- a/salt/soc/merged.map.jinja +++ b/salt/soc/merged.map.jinja @@ -16,6 +16,13 @@ {% do SOCMERGED.config.server.update({'additionalCA': MANAGERMERGED.additionalCA}) %} {% do SOCMERGED.config.server.update({'insecureSkipVerify': MANAGERMERGED.insecureSkipVerify}) %} +{% if not SOCMERGED.config.server.modules.postgres.host %} +{% do SOCMERGED.config.server.modules.postgres.update({'host': GLOBALS.manager}) %} +{% endif %} +{% if not SOCMERGED.config.server.modules.postgres.password %} +{% do SOCMERGED.config.server.modules.postgres.update({'password': salt['pillar.get']('secrets:postgres_pass', '')}) %} +{% endif %} + {# if SOCMERGED.config.server.modules.cases == httpcase details come from the soc pillar #} {% if SOCMERGED.config.server.modules.cases != 'soc' %} {% do SOCMERGED.config.server.modules.elastic.update({'casesEnabled': false}) %} diff --git a/salt/soc/soc_soc.yaml b/salt/soc/soc_soc.yaml index 647bdd778..3cb244eed 100644 --- a/salt/soc/soc_soc.yaml +++ b/salt/soc/soc_soc.yaml @@ -453,6 +453,26 @@ soc: description: Duration (in milliseconds) that must elapse after a grid node fails to check-in before the node will be marked offline (fault). global: True advanced: True + onionconfig: + saltstackDir: + description: Root directory containing the SaltStack tree that SOC reads and writes configuration from. Should not be changed under normal circumstances. + global: True + advanced: True + bypassEnabled: + description: When enabled, errors encountered while reading the SaltStack pillar tree (missing files, unreadable directories, etc.) are logged but do not prevent SOC from starting or serving settings. Intended for advanced troubleshooting and recovery scenarios when the pillar tree is partially unreadable. + global: True + advanced: True + forcedType: bool + postgres: + host: + description: Hostname or IP address of the PostgreSQL server used by SOC. Defaults to the manager hostname. + global: True + advanced: True + password: + description: Password used by SOC to authenticate to the PostgreSQL server. Defaults to the postgres superuser password seeded in the secrets pillar. + global: True + sensitive: True + advanced: True salt: longRelayTimeoutMs: description: Duration (in milliseconds) to wait for a response from the Salt API when executing tasks known for being long running before giving up and showing an error on the SOC UI. From bb8ae91d91936d84dbf1e61617e8bba59c66a9f8 Mon Sep 17 00:00:00 2001 From: Josh Patterson Date: Wed, 27 May 2026 16:39:52 -0400 Subject: [PATCH 23/25] fix so-soc postgres bootstrap --- salt/manager/tools/sbin/soup | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/salt/manager/tools/sbin/soup b/salt/manager/tools/sbin/soup index 05f58b9a5..c31891f1d 100755 --- a/salt/manager/tools/sbin/soup +++ b/salt/manager/tools/sbin/soup @@ -730,12 +730,17 @@ bootstrap_so_soc_database() { # 3.1.0 already have /nsm/postgres populated, so the so_soc bootstrap block # added in 3.2 never fires. Re-run the script explicitly; it's idempotent. echo "Bootstrapping so_soc database via init-db.sh." + # The postgres image has no USER directive, so `docker exec` defaults to + # root, and the container env intentionally omits POSTGRES_USER (the upstream + # entrypoint defaults it transiently during first-init only). Recreate both + # so psql inside init-db.sh resolves the connect user correctly. + local exec_cmd="docker exec -u postgres -e POSTGRES_USER=postgres so-postgres bash /docker-entrypoint-initdb.d/init-db.sh" if ! /usr/sbin/so-postgres-wait; then - FINAL_MESSAGE_QUEUE+=("WARNING: so-postgres was not ready during the 3.2.0 upgrade; the so_soc database may not have been bootstrapped. Re-run manually: docker exec so-postgres bash /docker-entrypoint-initdb.d/init-db.sh") + FINAL_MESSAGE_QUEUE+=("WARNING: so-postgres was not ready during the 3.2.0 upgrade; the so_soc database may not have been bootstrapped. Re-run manually: $exec_cmd") return 0 fi - if ! docker exec so-postgres bash /docker-entrypoint-initdb.d/init-db.sh; then - FINAL_MESSAGE_QUEUE+=("WARNING: init-db.sh failed inside so-postgres during the 3.2.0 upgrade; the so_soc database may not have been bootstrapped. Re-run manually: docker exec so-postgres bash /docker-entrypoint-initdb.d/init-db.sh") + if ! $exec_cmd; then + FINAL_MESSAGE_QUEUE+=("WARNING: init-db.sh failed inside so-postgres during the 3.2.0 upgrade; the so_soc database may not have been bootstrapped. Re-run manually: $exec_cmd") return 0 fi echo "so_soc bootstrap complete." From 5abd6de4b55b2c3c8965620bdbcc371a944da3cb Mon Sep 17 00:00:00 2001 From: Mike Reeves Date: Thu, 28 May 2026 09:34:17 -0400 Subject: [PATCH 24/25] 3.1.0 hotfix --- DOWNLOAD_AND_VERIFY_ISO.md | 22 +++++++++++----------- sigs/securityonion-3.1.0-20260528.iso.sig | Bin 0 -> 566 bytes 2 files changed, 11 insertions(+), 11 deletions(-) create mode 100644 sigs/securityonion-3.1.0-20260528.iso.sig diff --git a/DOWNLOAD_AND_VERIFY_ISO.md b/DOWNLOAD_AND_VERIFY_ISO.md index a0ea874fa..bae49c4ac 100644 --- a/DOWNLOAD_AND_VERIFY_ISO.md +++ b/DOWNLOAD_AND_VERIFY_ISO.md @@ -1,17 +1,17 @@ -### 3.1.0-20260521 ISO image released on 2026/05/21 +### 3.1.0-20260528 ISO image released on 2026/05/28 ### Download and Verify -3.1.0-20260521 ISO image: -https://download.securityonion.net/file/securityonion/securityonion-3.1.0-20260521.iso +3.1.0-20260528 ISO image: +https://download.securityonion.net/file/securityonion/securityonion-3.1.0-20260528.iso -MD5: A853BC118639ABCE1795D6E313BFFBDE -SHA1: FCA615AD6E31710B33AE5870FEF447861FDB3B8F -SHA256: CE2A5947274D9ED2C5068A1FD46B64C4FEF70445EA9B61A98DD3621781329F2C +MD5: 9D6FF58DEEE24089D722C73169765B3E +SHA1: 2B8B816B6CEC3B7F96B3C5E040EBF502DD2C412F +SHA256: 62FAB57E247C843D6A04F0796D8162C732B65D82FC3E4A59D087135B9FD32912 Signature for ISO image: -https://github.com/Security-Onion-Solutions/securityonion/raw/3/main/sigs/securityonion-3.1.0-20260521.iso.sig +https://github.com/Security-Onion-Solutions/securityonion/raw/3/main/sigs/securityonion-3.1.0-20260528.iso.sig Signing key: https://raw.githubusercontent.com/Security-Onion-Solutions/securityonion/3/main/KEYS @@ -25,22 +25,22 @@ wget https://raw.githubusercontent.com/Security-Onion-Solutions/securityonion/3/ Download the signature file for the ISO: ``` -wget https://github.com/Security-Onion-Solutions/securityonion/raw/3/main/sigs/securityonion-3.1.0-20260521.iso.sig +wget https://github.com/Security-Onion-Solutions/securityonion/raw/3/main/sigs/securityonion-3.1.0-20260528.iso.sig ``` Download the ISO image: ``` -wget https://download.securityonion.net/file/securityonion/securityonion-3.1.0-20260521.iso +wget https://download.securityonion.net/file/securityonion/securityonion-3.1.0-20260528.iso ``` Verify the downloaded ISO image using the signature file: ``` -gpg --verify securityonion-3.1.0-20260521.iso.sig securityonion-3.1.0-20260521.iso +gpg --verify securityonion-3.1.0-20260528.iso.sig securityonion-3.1.0-20260528.iso ``` The output should show "Good signature" and the Primary key fingerprint should match what's shown below: ``` -gpg: Signature made Thu 21 May 2026 11:10:01 AM EDT using RSA key ID FE507013 +gpg: Signature made Wed 27 May 2026 03:03:59 PM EDT using RSA key ID FE507013 gpg: Good signature from "Security Onion Solutions, LLC " gpg: WARNING: This key is not certified with a trusted signature! gpg: There is no indication that the signature belongs to the owner. diff --git a/sigs/securityonion-3.1.0-20260528.iso.sig b/sigs/securityonion-3.1.0-20260528.iso.sig new file mode 100644 index 0000000000000000000000000000000000000000..e4bead44df3b87b3f8f037867841feada61581a2 GIT binary patch literal 566 zcmV-60?GY}0y6{v0SEvc79j-41gSkXz6^6dp_W8^5Ma0dP;e6k0%{jP9{>so5PT3| zxBgIY6Ei^%|6UnlNTps}bX?WU@Vh_C?-M|n}<~50wVGXXGoD8W3KDy z=ZYPL6fKH1GNVpuJ^)}-r--D30lb_cgD%wZnXQ45n7fB0fT`F(k_%Xy#}A(Ws^Obh|St0AeMB6z&!7o&T7 zp=tatlM-=K)1EHl3Cb$hB&|O52e!UHZNL=;?pHa#16K$n|AWd z)vl}W%vr`V#&5!~(`lP#V(2C>vGbH>2=4ggFFj*5pMZ|+vES?u+77OpxzU$otkQX@ zLtD9EAjr>rTD3OA7lYK3XqlWNjt#DJMaCg{V$BvxKCYi|fd5UPD=|I-2Sv&aPF-v8 zHAWRBPEKp**n0iwqd8Dv@x0>_Yw1N&%nj4r+waMCN&|`*2dOB!(-wW;NvXPPB(Dov zm Date: Thu, 28 May 2026 10:24:47 -0400 Subject: [PATCH 25/25] Remove outdated HOTFIX version number --- HOTFIX | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/HOTFIX b/HOTFIX index 70406bf9d..8b1378917 100644 --- a/HOTFIX +++ b/HOTFIX @@ -1 +1 @@ -20260528 +