From fabecb82885d80efe96edf239f8b463f0103b8ef Mon Sep 17 00:00:00 2001 From: Josh Patterson Date: Thu, 14 May 2026 13:57:40 -0400 Subject: [PATCH 01/24] remove highstate from startup_states. highstate on system start --- salt/manager/sync_es_users.sls | 8 +++-- salt/salt/minion/boot_highstate.sls | 32 +++++++++++++++++++ salt/salt/minion/init.sls | 31 +++++++++++++++--- .../service/so-boot-highstate.service.jinja | 14 ++++++++ salt/setup/virt/setSalt.sls | 5 --- setup/so-functions | 15 +++++---- setup/so-setup | 2 +- 7 files changed, 88 insertions(+), 19 deletions(-) create mode 100644 salt/salt/minion/boot_highstate.sls create mode 100644 salt/salt/service/so-boot-highstate.service.jinja diff --git a/salt/manager/sync_es_users.sls b/salt/manager/sync_es_users.sls index 29b090e18..f452ff5fe 100644 --- a/salt/manager/sync_es_users.sls +++ b/salt/manager/sync_es_users.sls @@ -31,11 +31,13 @@ sync_es_users: - http: wait_for_kratos - file: so-user.lock # require so-user.lock file to be missing -# we dont want this added too early in setup, so we add the onlyif to verify 'startup_states: highstate' -# is in the minion config. That line is added before the final highstate during setup +# we dont want this added too early in setup, so the onlyif gates on the +# /opt/so/conf/setup-complete marker. The marker is written by +# mark_setup_complete in setup/so-functions just before the final setup +# highstate (and by an upgrade-path state for systems set up under the old gate). so-user_sync: cron.present: - user: root - name: 'PATH=/usr/local/sbin:/usr/local/bin:/sbin:/bin:/usr/sbin:/usr/bin:/root/bin /usr/sbin/so-user sync &>> /opt/so/log/soc/sync.log' - identifier: so-user_sync - - onlyif: "grep -x 'startup_states: highstate' /etc/salt/minion" + - onlyif: "test -e /opt/so/conf/setup-complete" diff --git a/salt/salt/minion/boot_highstate.sls b/salt/salt/minion/boot_highstate.sls new file mode 100644 index 000000000..13bfbda65 --- /dev/null +++ b/salt/salt/minion/boot_highstate.sls @@ -0,0 +1,32 @@ +# Copyright Security Onion Solutions LLC and/or licensed to Security Onion Solutions LLC under one +# or more contributor license agreements. Licensed under the Elastic License 2.0 as shown at +# https://securityonion.net/license; you may not use this file except in compliance with the +# Elastic License 2.0. + +# Manages /etc/systemd/system/so-boot-highstate.service, a Type=oneshot +# RemainAfterExit=yes unit that runs `salt-call state.highstate` exactly once +# per system boot. Replaces the legacy `startup_states: highstate` minion +# config, which fired on every salt-minion service restart (causing a redundant +# highstate whenever a highstate itself restarted salt-minion). + +include: + - systemd.reload + +so_boot_highstate_unit_file: + file.managed: + - name: /etc/systemd/system/so-boot-highstate.service + - source: salt://salt/service/so-boot-highstate.service.jinja + - template: jinja + - onchanges_in: + - module: systemd_reload + +# Only enable once setup is complete. Until then the gate file is missing and +# the unit's own ConditionPathExists would no-op it anyway -- this just keeps +# `systemctl is-enabled` honest for the sync_es_users gate. +so_boot_highstate_service: + service.enabled: + - name: so-boot-highstate.service + - onlyif: test -e /opt/so/conf/setup-complete + - require: + - file: so_boot_highstate_unit_file + - module: systemd_reload diff --git a/salt/salt/minion/init.sls b/salt/salt/minion/init.sls index eb7018aed..0d0eed22c 100644 --- a/salt/salt/minion/init.sls +++ b/salt/salt/minion/init.sls @@ -17,6 +17,7 @@ include: - repo.client - salt.mine_functions - salt.minion.service_file + - salt.minion.boot_highstate {% if GLOBALS.is_manager %} - ca.signing_policy {% endif %} @@ -80,11 +81,33 @@ set_log_levels: - "log_level: info" - "log_level_logfile: info" -enable_startup_states: - file.uncomment: +# startup_states: highstate caused a full highstate to run on every +# salt-minion service start, including the restart triggered when a highstate +# itself modified the minion config (beacons, mine, unit file). Replaced by +# so-boot-highstate.service (managed in salt.minion.boot_highstate), which +# runs once per system boot only. Strip the line from /etc/salt/minion on +# upgrade; both the commented and uncommented forms historically existed. +remove_startup_states: + file.line: - name: /etc/salt/minion - - regex: '^startup_states: highstate$' - - unless: pgrep so-setup + - match: 'startup_states: highstate' + - mode: delete + +# Upgrade-path bridge: systems that already passed setup under the old gate +# (`grep -x 'startup_states: highstate' /etc/salt/minion`) get a setup-complete +# marker so so-boot-highstate.service can be enabled and the so-user_sync cron +# in sync_es_users.sls keeps installing. Setup-in-progress systems instead get +# the marker from `mark_setup_complete` in setup/so-functions at the right +# moment. `replace: false` means we never overwrite a marker once written. +mark_setup_complete_for_upgrades: + file.managed: + - name: /opt/so/conf/setup-complete + - replace: false + - makedirs: True + - onlyif: "grep -qx 'startup_states: highstate' /etc/salt/minion" + - require_in: + - file: remove_startup_states + - service: so_boot_highstate_service {% endif %} diff --git a/salt/salt/service/so-boot-highstate.service.jinja b/salt/salt/service/so-boot-highstate.service.jinja new file mode 100644 index 000000000..f3ec950c3 --- /dev/null +++ b/salt/salt/service/so-boot-highstate.service.jinja @@ -0,0 +1,14 @@ +[Unit] +Description=Security Onion boot-time highstate (runs once per boot) +After=salt-minion.service network-online.target +Wants=network-online.target +Requires=salt-minion.service +ConditionPathExists=/opt/so/conf/setup-complete + +[Service] +Type=oneshot +RemainAfterExit=yes +ExecStart=/usr/bin/salt-call state.highstate -l info queue=True + +[Install] +WantedBy=multi-user.target diff --git a/salt/setup/virt/setSalt.sls b/salt/setup/virt/setSalt.sls index 69c8795de..59ab9e1e3 100644 --- a/salt/setup/virt/setSalt.sls +++ b/salt/setup/virt/setSalt.sls @@ -8,11 +8,6 @@ set_role_grain: - name: role - value: so-{{ grains.id.split("_") | last }} -set_highstate: - file.append: - - name: /etc/salt/minion - - text: 'startup_states: highstate' - enable_salt_minion: service.enabled: - name: salt-minion diff --git a/setup/so-functions b/setup/so-functions index c94b8eee7..da8e31d73 100755 --- a/setup/so-functions +++ b/setup/so-functions @@ -539,16 +539,19 @@ configure_minion() { " x509_v2: true"\ "log_level: info"\ "log_level_logfile: info"\ - "log_file: /opt/so/log/salt/minion"\ - "#startup_states: highstate" >> "$minion_config" + "log_file: /opt/so/log/salt/minion" >> "$minion_config" } -checkin_at_boot() { - local minion_config=/etc/salt/minion +mark_setup_complete() { + # Writes the setup-complete marker. Salt's so-boot-highstate.service + # (boot-time oneshot) and the so-user_sync cron gate in + # salt/manager/sync_es_users.sls both key off this file. + local marker=/opt/so/conf/setup-complete - info "Enabling checkin at boot" - sed -i 's/#startup_states: highstate/startup_states: highstate/' "$minion_config" + info "Marking setup as complete" + mkdir -p "$(dirname "$marker")" + touch "$marker" } check_requirements() { diff --git a/setup/so-setup b/setup/so-setup index 6c77e781c..c11d287eb 100755 --- a/setup/so-setup +++ b/setup/so-setup @@ -792,7 +792,7 @@ if ! [[ -f $install_opt_file ]]; then error "Failed to run so-elastic-fleet-setup" fail_setup fi - checkin_at_boot + mark_setup_complete set_initial_firewall_access initialize_elasticsearch_indices "so-case so-casehistory so-assistant-session so-assistant-chat" # run a final highstate before enabling scheduled highstates. From 86edc5aaba451d571d2cb68f703745ca0d0d5843 Mon Sep 17 00:00:00 2001 From: Jason Ertel Date: Thu, 28 May 2026 22:57:59 -0400 Subject: [PATCH 02/24] version --- VERSION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/VERSION b/VERSION index 944880fa1..03e153fda 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -3.2.0 +3.0.0-kilo From 68a82a425b155ea0dca567278990bc90635504c5 Mon Sep 17 00:00:00 2001 From: Jason Ertel Date: Sat, 30 May 2026 08:12:50 -0400 Subject: [PATCH 03/24] fix version --- VERSION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/VERSION b/VERSION index 03e153fda..944880fa1 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -3.0.0-kilo +3.2.0 From 79da9f9f2c011dbfb868cc02071b703666b3afb1 Mon Sep 17 00:00:00 2001 From: reyesj2 <94730068+reyesj2@users.noreply.github.com> Date: Mon, 1 Jun 2026 11:26:52 -0500 Subject: [PATCH 04/24] check if there is a version or hotfix to upgrade to before verifiying elasticsearch compatibility --- salt/manager/tools/sbin/soup | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/salt/manager/tools/sbin/soup b/salt/manager/tools/sbin/soup index ba76d2a3e..eb7818cf1 100755 --- a/salt/manager/tools/sbin/soup +++ b/salt/manager/tools/sbin/soup @@ -1606,11 +1606,12 @@ main() { echo "Verifying we have the latest soup script." verify_latest_update_script + echo "Let's see if we need to update Security Onion." + upgrade_check + echo "Verifying Elasticsearch version compatibility across the grid before upgrading." verify_es_version_compatibility - echo "Let's see if we need to update Security Onion." - upgrade_check upgrade_space echo "Checking for Salt Master and Minion updates." From 3c533cccbce84fab75b6c96ef69933816c3bcd77 Mon Sep 17 00:00:00 2001 From: reyesj2 <94730068+reyesj2@users.noreply.github.com> Date: Mon, 1 Jun 2026 11:28:59 -0500 Subject: [PATCH 05/24] and after free space check --- salt/manager/tools/sbin/soup | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/salt/manager/tools/sbin/soup b/salt/manager/tools/sbin/soup index eb7818cf1..a62a39b40 100755 --- a/salt/manager/tools/sbin/soup +++ b/salt/manager/tools/sbin/soup @@ -1608,12 +1608,11 @@ main() { echo "Let's see if we need to update Security Onion." upgrade_check + upgrade_space echo "Verifying Elasticsearch version compatibility across the grid before upgrading." verify_es_version_compatibility - upgrade_space - echo "Checking for Salt Master and Minion updates." upgrade_check_salt set -e From f2996fb888c5db9bd9740ae0cc5a922330b8ea6b Mon Sep 17 00:00:00 2001 From: reyesj2 <94730068+reyesj2@users.noreply.github.com> Date: Mon, 1 Jun 2026 11:52:35 -0500 Subject: [PATCH 06/24] use so-config-backup script in soup --- salt/backup/tools/sbin/so-config-backup.jinja | 6 ++++-- salt/manager/tools/sbin/soup | 14 ++++---------- 2 files changed, 8 insertions(+), 12 deletions(-) diff --git a/salt/backup/tools/sbin/so-config-backup.jinja b/salt/backup/tools/sbin/so-config-backup.jinja index 7f65bbba3..8d214e665 100755 --- a/salt/backup/tools/sbin/so-config-backup.jinja +++ b/salt/backup/tools/sbin/so-config-backup.jinja @@ -25,9 +25,11 @@ if [ ! -f $BACKUPFILE ]; then # Create empty backup file tar -cf $BACKUPFILE -T /dev/null - # Loop through all paths defined in global.sls, and append them to backup file + # Loop through all paths defined in global.sls, and append them to backup file if they exist {%- for LOCATION in BACKUPLOCATIONS %} - tar -rf $BACKUPFILE "${EXCLUSIONS[@]}" {{ LOCATION }} + if [[ -d {{ LOCATION }} || -f {{ LOCATION }} ]]; then + tar -rf $BACKUPFILE "${EXCLUSIONS[@]}" {{ LOCATION }} + fi {%- endfor %} fi diff --git a/salt/manager/tools/sbin/soup b/salt/manager/tools/sbin/soup index 135c51276..7874bf7b2 100755 --- a/salt/manager/tools/sbin/soup +++ b/salt/manager/tools/sbin/soup @@ -188,13 +188,6 @@ airgap_update_dockers() { fi } -backup_old_states_pillars() { - - tar czf /nsm/backup/$(echo $INSTALLEDVERSION)_$(date +%Y%m%d-%H%M%S)_soup_default_states_pillars.tar.gz /opt/so/saltstack/default/ - tar czf /nsm/backup/$(echo $INSTALLEDVERSION)_$(date +%Y%m%d-%H%M%S)_soup_local_states_pillars.tar.gz /opt/so/saltstack/local/ - -} - update_registry() { docker stop so-dockerregistry docker rm so-dockerregistry @@ -1670,7 +1663,8 @@ main() { echo "Applying $HOTFIXVERSION hotfix" # since we don't run the backup.config_backup state on import we wont snapshot previous version states and pillars if [[ ! "$MINION_ROLE" == "import" ]]; then - backup_old_states_pillars + echo "Running so-config-backup script." + /sbin/so-config-backup fi copy_new_files create_local_directories "/opt/so/saltstack/default" @@ -1726,8 +1720,8 @@ main() { # since we don't run the backup.config_backup state on import we wont snapshot previous version states and pillars if [[ ! "$MINION_ROLE" == "import" ]]; then echo "" - echo "Creating snapshots of default and local Salt states and pillars and saving to /nsm/backup/" - backup_old_states_pillars + echo "Running so-config-backup script." + /sbin/so-config-backup fi echo "" From f9c2579261b0785024b0ba09a33329e1a96642e2 Mon Sep 17 00:00:00 2001 From: reyesj2 <94730068+reyesj2@users.noreply.github.com> Date: Mon, 1 Jun 2026 12:04:33 -0500 Subject: [PATCH 07/24] remove logstash pipeline rename from hotfix moving to up_to_3.2.0 --- salt/manager/tools/sbin/soup | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/salt/manager/tools/sbin/soup b/salt/manager/tools/sbin/soup index 135c51276..ede301648 100755 --- a/salt/manager/tools/sbin/soup +++ b/salt/manager/tools/sbin/soup @@ -769,6 +769,8 @@ bootstrap_so_soc_database() { } up_to_3.2.0() { + fix_logstash_0013_lumberjack_pipeline_name + INSTALLEDVERSION=3.2.0 } @@ -1566,13 +1568,7 @@ EOF # Keeping this block in case we need to do a hotfix that requires salt update apply_hotfix() { - if [[ "$INSTALLEDVERSION" == "3.1.0" ]] ; then - # Do not remove this fix_logstash_0013_lumberjack_pipeline_name in future hotfixes without first validating older - # installs referencing "so/0013_input_lumberjack_fleet.conf" via pillar are upgradable - fix_logstash_0013_lumberjack_pipeline_name - else - echo "No actions required. ($INSTALLEDVERSION/$HOTFIXVERSION)" - fi + echo "No actions required. ($INSTALLEDVERSION/$HOTFIXVERSION)" } failed_soup_restore_items() { From 559465b40783ca3baa7265b1ac4dcd71e0ea32c9 Mon Sep 17 00:00:00 2001 From: reyesj2 <94730068+reyesj2@users.noreply.github.com> Date: Mon, 1 Jun 2026 12:05:09 -0500 Subject: [PATCH 08/24] run elastic agent gen installers script in post_to_3.2.0 --- salt/manager/tools/sbin/soup | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/salt/manager/tools/sbin/soup b/salt/manager/tools/sbin/soup index ede301648..1416f2ba3 100755 --- a/salt/manager/tools/sbin/soup +++ b/salt/manager/tools/sbin/soup @@ -777,6 +777,10 @@ up_to_3.2.0() { post_to_3.2.0() { bootstrap_so_soc_database + # Including agent regen script here since it was missed in post_to_3.1.0 + echo "Regenerating Elastic Agent Installers" + /sbin/so-elastic-agent-gen-installers + POSTVERSION=3.2.0 } From 7ca23132554557daf79bbf1ec47ad4a949e3c870 Mon Sep 17 00:00:00 2001 From: Jason Ertel Date: Wed, 3 Jun 2026 09:05:23 -0400 Subject: [PATCH 09/24] move to securityonion db --- salt/manager/tools/sbin/soup | 24 ------------------------ salt/postgres/files/init-db.sh | 7 +------ salt/soc/defaults.yaml | 5 +++-- salt/soc/merged.map.jinja | 3 ++- salt/soc/soc_soc.yaml | 14 +++++++++++++- 5 files changed, 19 insertions(+), 34 deletions(-) diff --git a/salt/manager/tools/sbin/soup b/salt/manager/tools/sbin/soup index 135c51276..cd5f47e35 100755 --- a/salt/manager/tools/sbin/soup +++ b/salt/manager/tools/sbin/soup @@ -746,35 +746,11 @@ post_to_3.1.0() { ### 3.2.0 Scripts ### -bootstrap_so_soc_database() { - # init-db.sh is mounted into so-postgres at /docker-entrypoint-initdb.d/init-db.sh - # and runs automatically only on a fresh data directory. Hosts upgrading from - # 3.1.0 already have /nsm/postgres populated, so the so_soc bootstrap block - # added in 3.2 never fires. Re-run the script explicitly; it's idempotent. - echo "Bootstrapping so_soc database via init-db.sh." - # The postgres image has no USER directive, so `docker exec` defaults to - # root, and the container env intentionally omits POSTGRES_USER (the upstream - # entrypoint defaults it transiently during first-init only). Recreate both - # so psql inside init-db.sh resolves the connect user correctly. - local exec_cmd="docker exec -u postgres -e POSTGRES_USER=postgres so-postgres bash /docker-entrypoint-initdb.d/init-db.sh" - if ! /usr/sbin/so-postgres-wait; then - FINAL_MESSAGE_QUEUE+=("WARNING: so-postgres was not ready during the 3.2.0 upgrade; the so_soc database may not have been bootstrapped. Re-run manually: $exec_cmd") - return 0 - fi - if ! $exec_cmd; then - FINAL_MESSAGE_QUEUE+=("WARNING: init-db.sh failed inside so-postgres during the 3.2.0 upgrade; the so_soc database may not have been bootstrapped. Re-run manually: $exec_cmd") - return 0 - fi - echo "so_soc bootstrap complete." -} - up_to_3.2.0() { INSTALLEDVERSION=3.2.0 } post_to_3.2.0() { - bootstrap_so_soc_database - POSTVERSION=3.2.0 } diff --git a/salt/postgres/files/init-db.sh b/salt/postgres/files/init-db.sh index 03e6d08dd..2187585da 100644 --- a/salt/postgres/files/init-db.sh +++ b/salt/postgres/files/init-db.sh @@ -31,9 +31,4 @@ EOSQL # only ensures the shared database exists on first initialization. if ! psql -U "$POSTGRES_USER" -tAc "SELECT 1 FROM pg_database WHERE datname='so_telegraf'" | grep -q 1; then psql -v ON_ERROR_STOP=1 -U "$POSTGRES_USER" -c "CREATE DATABASE so_telegraf" -fi - -# Bootstrap the SOC database. -if ! psql -U "$POSTGRES_USER" -tAc "SELECT 1 FROM pg_database WHERE datname='so_soc'" | grep -q 1; then - psql -v ON_ERROR_STOP=1 -U "$POSTGRES_USER" -c "CREATE DATABASE so_soc" -fi +fi \ No newline at end of file diff --git a/salt/soc/defaults.yaml b/salt/soc/defaults.yaml index 62b451bec..05cad494e 100644 --- a/salt/soc/defaults.yaml +++ b/salt/soc/defaults.yaml @@ -1523,8 +1523,9 @@ soc: saltstackDir: /opt/so/saltstack bypassEnabled: false postgres: - host: - password: + database: securityonion + host: "" + password: "" salt: queueDir: /opt/sensoroni/queue timeoutMs: 45000 diff --git a/salt/soc/merged.map.jinja b/salt/soc/merged.map.jinja index b34efb11d..cfc0fafbd 100644 --- a/salt/soc/merged.map.jinja +++ b/salt/soc/merged.map.jinja @@ -20,7 +20,8 @@ {% do SOCMERGED.config.server.modules.postgres.update({'host': GLOBALS.manager}) %} {% endif %} {% if not SOCMERGED.config.server.modules.postgres.password %} -{% do SOCMERGED.config.server.modules.postgres.update({'password': salt['pillar.get']('secrets:postgres_pass', '')}) %} +{% do SOCMERGED.config.server.modules.postgres.update({'password': salt['pillar.get']('postgres:auth:users:so_postgres_user:pass', '')}) %} +{% do SOCMERGED.config.server.modules.postgres.update({'user': salt['pillar.get']('postgres:auth:users:so_postgres_user:user', 'so_postgres')}) %} {% endif %} {# if SOCMERGED.config.server.modules.cases == httpcase details come from the soc pillar #} diff --git a/salt/soc/soc_soc.yaml b/salt/soc/soc_soc.yaml index 3cb244eed..ad34c3bbf 100644 --- a/salt/soc/soc_soc.yaml +++ b/salt/soc/soc_soc.yaml @@ -468,8 +468,20 @@ soc: description: Hostname or IP address of the PostgreSQL server used by SOC. Defaults to the manager hostname. global: True advanced: True + port: + description: Port of the PostgreSQL server used by SOC. + global: True + advanced: True + user: + description: Username used by SOC to authenticate to the PostgreSQL server. + global: True + advanced: True + database: + description: Database used by SOC to authenticate to the PostgreSQL server. + global: True + advanced: True password: - description: Password used by SOC to authenticate to the PostgreSQL server. Defaults to the postgres superuser password seeded in the secrets pillar. + description: Password used by SOC to authenticate to the PostgreSQL server. global: True sensitive: True advanced: True From 61e72c89e422754978c7abf1b8fbdf68fcd5ef76 Mon Sep 17 00:00:00 2001 From: Jason Ertel Date: Wed, 3 Jun 2026 09:49:53 -0400 Subject: [PATCH 10/24] postgres updates --- salt/postgres/files/init-db.sh | 1 + salt/soc/defaults.yaml | 5 ++++- salt/soc/soc_soc.yaml | 8 ++++++-- 3 files changed, 11 insertions(+), 3 deletions(-) diff --git a/salt/postgres/files/init-db.sh b/salt/postgres/files/init-db.sh index 2187585da..d12bc4c9b 100644 --- a/salt/postgres/files/init-db.sh +++ b/salt/postgres/files/init-db.sh @@ -17,6 +17,7 @@ psql -v ON_ERROR_STOP=1 --username "$POSTGRES_USER" --dbname "$POSTGRES_DB" <<-E END IF; END \$\$; + GRANT ALL ON SCHEMA public TO "$SO_POSTGRES_USER"; GRANT ALL PRIVILEGES ON DATABASE "$POSTGRES_DB" TO "$SO_POSTGRES_USER"; -- Lock the SOC database down at the connect layer; PUBLIC gets CONNECT -- by default, which would let per-minion telegraf roles open sessions diff --git a/salt/soc/defaults.yaml b/salt/soc/defaults.yaml index 05cad494e..c9399eab4 100644 --- a/salt/soc/defaults.yaml +++ b/salt/soc/defaults.yaml @@ -1523,8 +1523,11 @@ soc: saltstackDir: /opt/so/saltstack bypassEnabled: false postgres: - database: securityonion host: "" + port: 5432 + sslMode: "allow" + database: securityonion + user: "" password: "" salt: queueDir: /opt/sensoroni/queue diff --git a/salt/soc/soc_soc.yaml b/salt/soc/soc_soc.yaml index ad34c3bbf..b2ac6d175 100644 --- a/salt/soc/soc_soc.yaml +++ b/salt/soc/soc_soc.yaml @@ -472,14 +472,18 @@ soc: description: Port of the PostgreSQL server used by SOC. global: True advanced: True - user: - description: Username used by SOC to authenticate to the PostgreSQL server. + sslMode: + description: "Use encrypted connections to the PostgreSQL server. Must be one of the following values: disable, allow, prefer, require, verify-ca, verify-full. Defaults to allow." global: True advanced: True database: description: Database used by SOC to authenticate to the PostgreSQL server. global: True advanced: True + user: + description: Username used by SOC to authenticate to the PostgreSQL server. + global: True + advanced: True password: description: Password used by SOC to authenticate to the PostgreSQL server. global: True From a767c79641a62b38c995d2e8ece2d62baf1d0b38 Mon Sep 17 00:00:00 2001 From: Jason Ertel Date: Wed, 3 Jun 2026 10:39:37 -0400 Subject: [PATCH 11/24] restore soup db init --- salt/manager/tools/sbin/soup | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/salt/manager/tools/sbin/soup b/salt/manager/tools/sbin/soup index 82fb19434..d50187c9c 100755 --- a/salt/manager/tools/sbin/soup +++ b/salt/manager/tools/sbin/soup @@ -739,6 +739,28 @@ post_to_3.1.0() { ### 3.2.0 Scripts ### +bootstrap_so_soc_database() { + # init-db.sh is mounted into so-postgres at /docker-entrypoint-initdb.d/init-db.sh + # and runs automatically only on a fresh data directory. Hosts upgrading from + # 3.1.0 already have /nsm/postgres populated, so the so_soc bootstrap block + # added in 3.2 never fires. Re-run the script explicitly; it's idempotent. + echo "Bootstrapping so_soc database via init-db.sh." + # The postgres image has no USER directive, so `docker exec` defaults to + # root, and the container env intentionally omits POSTGRES_USER (the upstream + # entrypoint defaults it transiently during first-init only). Recreate both + # so psql inside init-db.sh resolves the connect user correctly. + local exec_cmd="docker exec -u postgres -e POSTGRES_USER=postgres so-postgres bash /docker-entrypoint-initdb.d/init-db.sh" + if ! /usr/sbin/so-postgres-wait; then + FINAL_MESSAGE_QUEUE+=("WARNING: so-postgres was not ready during the 3.2.0 upgrade; the so_soc database may not have been bootstrapped. Re-run manually: $exec_cmd") + return 0 + fi + if ! $exec_cmd; then + FINAL_MESSAGE_QUEUE+=("WARNING: init-db.sh failed inside so-postgres during the 3.2.0 upgrade; the so_soc database may not have been bootstrapped. Re-run manually: $exec_cmd") + return 0 + fi + echo "so_soc bootstrap complete." +} + up_to_3.2.0() { fix_logstash_0013_lumberjack_pipeline_name @@ -746,6 +768,8 @@ up_to_3.2.0() { } post_to_3.2.0() { + bootstrap_so_soc_database + # Including agent regen script here since it was missed in post_to_3.1.0 echo "Regenerating Elastic Agent Installers" /sbin/so-elastic-agent-gen-installers From 1d3d98f759132e59679bfb25980ce36c625c8007 Mon Sep 17 00:00:00 2001 From: Jason Ertel Date: Wed, 3 Jun 2026 12:24:41 -0400 Subject: [PATCH 12/24] kilo --- VERSION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/VERSION b/VERSION index 944880fa1..03e153fda 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -3.2.0 +3.0.0-kilo From 2d653b6f1bd7111f1e4f02f9463742f3bce7a77a Mon Sep 17 00:00:00 2001 From: Josh Patterson Date: Wed, 3 Jun 2026 15:46:58 -0400 Subject: [PATCH 13/24] does not need to be jinja template --- salt/salt/minion/boot_highstate.sls | 3 +-- ...-boot-highstate.service.jinja => so-boot-highstate.service} | 0 2 files changed, 1 insertion(+), 2 deletions(-) rename salt/salt/service/{so-boot-highstate.service.jinja => so-boot-highstate.service} (100%) diff --git a/salt/salt/minion/boot_highstate.sls b/salt/salt/minion/boot_highstate.sls index 13bfbda65..e489210f6 100644 --- a/salt/salt/minion/boot_highstate.sls +++ b/salt/salt/minion/boot_highstate.sls @@ -15,8 +15,7 @@ include: so_boot_highstate_unit_file: file.managed: - name: /etc/systemd/system/so-boot-highstate.service - - source: salt://salt/service/so-boot-highstate.service.jinja - - template: jinja + - source: salt://salt/service/so-boot-highstate.service - onchanges_in: - module: systemd_reload diff --git a/salt/salt/service/so-boot-highstate.service.jinja b/salt/salt/service/so-boot-highstate.service similarity index 100% rename from salt/salt/service/so-boot-highstate.service.jinja rename to salt/salt/service/so-boot-highstate.service From ca85c5d90045b333d73aa94349d82d4fadea2c89 Mon Sep 17 00:00:00 2001 From: Jason Ertel Date: Wed, 3 Jun 2026 17:26:08 -0400 Subject: [PATCH 14/24] fix version --- VERSION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/VERSION b/VERSION index 03e153fda..944880fa1 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -3.0.0-kilo +3.2.0 From 13f8be40b59e04d2de171b7ea93185dceddd5a32 Mon Sep 17 00:00:00 2001 From: Josh Patterson Date: Thu, 4 Jun 2026 08:46:35 -0400 Subject: [PATCH 15/24] so-boot-highstate: wait for docker before running highstate Add docker.service to After= and Wants= so the boot-time highstate starts after docker is up. Uses Wants (soft) so highstate still runs if docker fails to start. --- salt/salt/service/so-boot-highstate.service | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/salt/salt/service/so-boot-highstate.service b/salt/salt/service/so-boot-highstate.service index f3ec950c3..a770122d6 100644 --- a/salt/salt/service/so-boot-highstate.service +++ b/salt/salt/service/so-boot-highstate.service @@ -1,7 +1,7 @@ [Unit] Description=Security Onion boot-time highstate (runs once per boot) -After=salt-minion.service network-online.target -Wants=network-online.target +After=salt-minion.service network-online.target docker.service +Wants=network-online.target docker.service Requires=salt-minion.service ConditionPathExists=/opt/so/conf/setup-complete From cb3631da818d1be7cb3a8c1e495751c2af0a8575 Mon Sep 17 00:00:00 2001 From: Josh Patterson Date: Thu, 4 Jun 2026 15:07:27 -0400 Subject: [PATCH 16/24] Move setup-complete marker from /opt/so/conf to /opt/so/state The setup-complete marker is a runtime-state file, not config, so move it to /opt/so/state/setup-complete. Updates both writers (mark_setup_complete in setup/so-functions and the upgrade-path state in minion/init.sls) and the three readers (so-boot-highstate.service ConditionPathExists, boot_highstate.sls enable gate, and the so-user_sync cron gate). --- salt/manager/sync_es_users.sls | 4 ++-- salt/salt/minion/boot_highstate.sls | 2 +- salt/salt/minion/init.sls | 4 ++-- salt/salt/service/so-boot-highstate.service | 2 +- setup/so-functions | 2 +- 5 files changed, 7 insertions(+), 7 deletions(-) diff --git a/salt/manager/sync_es_users.sls b/salt/manager/sync_es_users.sls index f452ff5fe..8fc9c6bb4 100644 --- a/salt/manager/sync_es_users.sls +++ b/salt/manager/sync_es_users.sls @@ -32,7 +32,7 @@ sync_es_users: - file: so-user.lock # require so-user.lock file to be missing # we dont want this added too early in setup, so the onlyif gates on the -# /opt/so/conf/setup-complete marker. The marker is written by +# /opt/so/state/setup-complete marker. The marker is written by # mark_setup_complete in setup/so-functions just before the final setup # highstate (and by an upgrade-path state for systems set up under the old gate). so-user_sync: @@ -40,4 +40,4 @@ so-user_sync: - user: root - name: 'PATH=/usr/local/sbin:/usr/local/bin:/sbin:/bin:/usr/sbin:/usr/bin:/root/bin /usr/sbin/so-user sync &>> /opt/so/log/soc/sync.log' - identifier: so-user_sync - - onlyif: "test -e /opt/so/conf/setup-complete" + - onlyif: "test -e /opt/so/state/setup-complete" diff --git a/salt/salt/minion/boot_highstate.sls b/salt/salt/minion/boot_highstate.sls index e489210f6..eb2596dad 100644 --- a/salt/salt/minion/boot_highstate.sls +++ b/salt/salt/minion/boot_highstate.sls @@ -25,7 +25,7 @@ so_boot_highstate_unit_file: so_boot_highstate_service: service.enabled: - name: so-boot-highstate.service - - onlyif: test -e /opt/so/conf/setup-complete + - onlyif: test -e /opt/so/state/setup-complete - require: - file: so_boot_highstate_unit_file - module: systemd_reload diff --git a/salt/salt/minion/init.sls b/salt/salt/minion/init.sls index 0d0eed22c..59dd0289c 100644 --- a/salt/salt/minion/init.sls +++ b/salt/salt/minion/init.sls @@ -94,14 +94,14 @@ remove_startup_states: - mode: delete # Upgrade-path bridge: systems that already passed setup under the old gate -# (`grep -x 'startup_states: highstate' /etc/salt/minion`) get a setup-complete +# (`grep -x 'startup_states: highstate' /etc/salt/minion`) get a /opt/so/state/setup-complete # marker so so-boot-highstate.service can be enabled and the so-user_sync cron # in sync_es_users.sls keeps installing. Setup-in-progress systems instead get # the marker from `mark_setup_complete` in setup/so-functions at the right # moment. `replace: false` means we never overwrite a marker once written. mark_setup_complete_for_upgrades: file.managed: - - name: /opt/so/conf/setup-complete + - name: /opt/so/state/setup-complete - replace: false - makedirs: True - onlyif: "grep -qx 'startup_states: highstate' /etc/salt/minion" diff --git a/salt/salt/service/so-boot-highstate.service b/salt/salt/service/so-boot-highstate.service index a770122d6..cc8c6a1c6 100644 --- a/salt/salt/service/so-boot-highstate.service +++ b/salt/salt/service/so-boot-highstate.service @@ -3,7 +3,7 @@ Description=Security Onion boot-time highstate (runs once per boot) After=salt-minion.service network-online.target docker.service Wants=network-online.target docker.service Requires=salt-minion.service -ConditionPathExists=/opt/so/conf/setup-complete +ConditionPathExists=/opt/so/state/setup-complete [Service] Type=oneshot diff --git a/setup/so-functions b/setup/so-functions index da8e31d73..5ce9a8fdc 100755 --- a/setup/so-functions +++ b/setup/so-functions @@ -547,7 +547,7 @@ mark_setup_complete() { # Writes the setup-complete marker. Salt's so-boot-highstate.service # (boot-time oneshot) and the so-user_sync cron gate in # salt/manager/sync_es_users.sls both key off this file. - local marker=/opt/so/conf/setup-complete + local marker=/opt/so/state/setup-complete info "Marking setup as complete" mkdir -p "$(dirname "$marker")" From ac907ba45fe6f146b7ee753b61c8d872557e929c Mon Sep 17 00:00:00 2001 From: reyesj2 <94730068+reyesj2@users.noreply.github.com> Date: Fri, 5 Jun 2026 16:42:08 -0500 Subject: [PATCH 17/24] fix elasticsearch template generation issue --- salt/elasticsearch/cluster.sls | 19 ++++++++++++++++++- salt/elasticsearch/template.map.jinja | 27 +++++++++++++++++++++++---- 2 files changed, 41 insertions(+), 5 deletions(-) diff --git a/salt/elasticsearch/cluster.sls b/salt/elasticsearch/cluster.sls index e25aed36a..d20ee45ca 100644 --- a/salt/elasticsearch/cluster.sls +++ b/salt/elasticsearch/cluster.sls @@ -9,9 +9,12 @@ {% from 'elasticsearch/config.map.jinja' import ELASTICSEARCHMERGED %} {% from 'elasticsearch/template.map.jinja' import ES_INDEX_SETTINGS, SO_MANAGED_INDICES %} {% if GLOBALS.role != 'so-heavynode' %} -{% from 'elasticsearch/template.map.jinja' import ALL_ADDON_SETTINGS %} +{% from 'elasticsearch/template.map.jinja' import ALL_ADDON_SETTINGS, ADDON_INDICES %} {% endif %} +include: + - elasticsearch.enabled + escomponenttemplates: file.recurse: - name: /opt/so/conf/elasticsearch/templates/component @@ -35,6 +38,20 @@ so_index_template_dir: {%- endfor %} {%- endif %} +{% if GLOBALS.role != "so-heavynode" %} +# Clean up legacy and non-SO managed templates from the elasticsearch/templates/addon-index/ directory +addon_index_template_dir: + file.directory: + - name: /opt/so/conf/elasticsearch/templates/addon-index + - clean: True + {%- if ADDON_INDICES %} + - require: + {%- for index in ADDON_INDICES %} + - file: addon_index_template_{{index}} + {%- endfor %} + {%- endif %} +{% endif %} + # Auto-generate index templates for SO managed indices (directly defined in elasticsearch/defaults.yaml) # These index templates are for the core SO datasets and are always required {% for index, settings in ES_INDEX_SETTINGS.items() %} diff --git a/salt/elasticsearch/template.map.jinja b/salt/elasticsearch/template.map.jinja index e66057775..ed1b49abe 100644 --- a/salt/elasticsearch/template.map.jinja +++ b/salt/elasticsearch/template.map.jinja @@ -61,15 +61,25 @@ {% if ALL_ADDON_SETTINGS_ORIG.keys() | length > 0 %} {% for index in ALL_ADDON_SETTINGS_ORIG.keys() %} {% do ALL_ADDON_SETTINGS_GLOBAL_OVERRIDES.update({index: salt['defaults.merge'](ALL_ADDON_SETTINGS_ORIG[index], PILLAR_GLOBAL_OVERRIDES, in_place=False)}) %} +{# Explicitly excluding addon indices from ES_INDEX_SETTINGS_ORIG + When manager.soc_managed_annotations runs, new entries are added to the salt/elasticsearch/defaults.yaml file to support 'revert to default' functionality. + Subsequent map renders will then incorrectly include 'integration X' in 'ES_INDEX_SETTINGS_ORIG' due to being in the defaults.yaml file. #} +{% if index in ES_INDEX_SETTINGS_ORIG.keys() %} +{% do ES_INDEX_SETTINGS_ORIG.pop(index) %} +{% endif %} {% endfor %} {% endif %} {% set ES_INDEX_SETTINGS = {} %} -{% macro create_final_index_template(DEFINED_SETTINGS, GLOBAL_OVERRIDES, FINAL_INDEX_SETTINGS) %} +{% macro create_final_index_template(DEFINED_SETTINGS, GLOBAL_OVERRIDES, FINAL_INDEX_SETTINGS, EXCLUDE_INDICES=[]) %} {% do GLOBAL_OVERRIDES.update(salt['defaults.merge'](GLOBAL_OVERRIDES, ES_INDEX_PILLAR, in_place=False)) %} {% for index, settings in GLOBAL_OVERRIDES.items() %} +{% if index in EXCLUDE_INDICES %} +{% continue %} +{% endif %} + {# prevent this action from being performed on custom defined indices. #} {# the custom defined index is not present in either of the dictionaries and fails to reder. #} {% if index in DEFINED_SETTINGS and index in GLOBAL_OVERRIDES %} @@ -150,10 +160,19 @@ {% endfor %} {% endmacro %} -{{ create_final_index_template(ES_INDEX_SETTINGS_ORIG, ES_INDEX_SETTINGS_GLOBAL_OVERRIDES, ES_INDEX_SETTINGS) }} -{{ create_final_index_template(ALL_ADDON_SETTINGS_ORIG, ALL_ADDON_SETTINGS_GLOBAL_OVERRIDES, ALL_ADDON_SETTINGS) }} +{# Exclude addon integrations from final ES_INDEX_SETTINGS #} +{{ create_final_index_template(ES_INDEX_SETTINGS_ORIG, ES_INDEX_SETTINGS_GLOBAL_OVERRIDES, ES_INDEX_SETTINGS, ALL_ADDON_SETTINGS_ORIG.keys() | list ) }} + +{# Exclude SO managed indices, otherwise ALL_ADDON_SETTINGS will include pillar values + of core integrations without merging defaults, resulting in an overlapping, but bad index template being generated. #} +{{ create_final_index_template(ALL_ADDON_SETTINGS_ORIG, ALL_ADDON_SETTINGS_GLOBAL_OVERRIDES, ALL_ADDON_SETTINGS, ES_INDEX_SETTINGS_ORIG.keys() | list ) }} {% set SO_MANAGED_INDICES = [] %} {% for index, settings in ES_INDEX_SETTINGS.items() %} {% do SO_MANAGED_INDICES.append(index) %} -{% endfor %} \ No newline at end of file +{% endfor %} + +{% set ADDON_INDICES = [] %} +{% for index, settings in ALL_ADDON_SETTINGS.items() %} +{% do ADDON_INDICES.append(index) %} +{% endfor %} From 9580976ba2a1317d1b99ef4091a964248170d8bb Mon Sep 17 00:00:00 2001 From: Josh Patterson Date: Mon, 8 Jun 2026 11:05:13 -0400 Subject: [PATCH 18/24] Add manager boot-time grid mine.update oneshot before highstate so-boot-mine-update.service is a manager-only Type=oneshot unit that runs once per boot after salt-master/salt-minion start and before so-boot-highstate.service. It pushes mine.update to all reachable minions so mine-backed pillars (node IPs, ES/Redis/Logstash discovery) are fresh before the boot highstate renders them. The helper waits for the responsive minion set to settle (plateau) rather than for every accepted key to report up, so an intentionally powered-off minion doesn't block the update; MAX_WAIT remains as a backstop. --- salt/manager/tools/sbin/so-boot-mine-update | 42 +++++++++++++++++++ salt/salt/master.sls | 1 + salt/salt/master/boot_mine_update.sls | 29 +++++++++++++ salt/salt/service/so-boot-mine-update.service | 15 +++++++ 4 files changed, 87 insertions(+) create mode 100755 salt/manager/tools/sbin/so-boot-mine-update create mode 100644 salt/salt/master/boot_mine_update.sls create mode 100644 salt/salt/service/so-boot-mine-update.service diff --git a/salt/manager/tools/sbin/so-boot-mine-update b/salt/manager/tools/sbin/so-boot-mine-update new file mode 100755 index 000000000..f497d891f --- /dev/null +++ b/salt/manager/tools/sbin/so-boot-mine-update @@ -0,0 +1,42 @@ +#!/bin/bash +# +# Copyright Security Onion Solutions LLC and/or licensed to Security Onion Solutions LLC under one +# or more contributor license agreements. Licensed under the Elastic License 2.0 as shown at +# https://securityonion.net/license; you may not use this file except in compliance with the +# Elastic License 2.0. + +# Runs once per boot on managers (via so-boot-mine-update.service), before +# so-boot-highstate.service. Waits for the responsive minion set to settle, then +# pushes mine.update to all minions so mine-backed pillars (node IPs, ES/Redis/ +# Logstash discovery) are fresh before the boot highstate renders them. + +MAX_WAIT=${MINE_UPDATE_MAX_WAIT:-180} # hard backstop only +INTERVAL=10 +STABLE_CHECKS=3 # up-count must hold steady this many polls +elapsed=0 +prev=-1 +stable=0 +up=0 + +# Wait for the *reachable* minion set to settle rather than for every accepted +# key to report up: an operator may accept a minion's key and then intentionally +# power off that host, so requiring up >= accepted would never be satisfied and +# we'd always burn the full MAX_WAIT. Once the responsive count stops growing we +# stop waiting and run mine.update against whoever is up. +while [ "$elapsed" -lt "$MAX_WAIT" ]; do + up=$(/usr/bin/salt-run manage.up --out=json 2>/dev/null \ + | python3 -c 'import sys,json; print(len(json.load(sys.stdin)))' 2>/dev/null) + up=${up:-0} + if [ "$up" -gt 0 ] && [ "$up" -eq "$prev" ]; then + stable=$((stable + 1)) + [ "$stable" -ge "$STABLE_CHECKS" ] && break + else + stable=0 + fi + prev=$up + sleep "$INTERVAL" + elapsed=$((elapsed + INTERVAL)) +done + +echo "so-boot-mine-update: ${up} minions up (settled after ${elapsed}s); running mine.update" +/usr/bin/salt '*' mine.update --out=txt diff --git a/salt/salt/master.sls b/salt/salt/master.sls index 895150cd7..c62bd20f3 100644 --- a/salt/salt/master.sls +++ b/salt/salt/master.sls @@ -14,6 +14,7 @@ include: - salt.minion + - salt.master.boot_mine_update {% if 'vrt' in salt['pillar.get']('features', []) %} - salt.cloud - salt.cloud.reactor_config_hypervisor diff --git a/salt/salt/master/boot_mine_update.sls b/salt/salt/master/boot_mine_update.sls new file mode 100644 index 000000000..9f96c0ddf --- /dev/null +++ b/salt/salt/master/boot_mine_update.sls @@ -0,0 +1,29 @@ +# Copyright Security Onion Solutions LLC and/or licensed to Security Onion Solutions LLC under one +# or more contributor license agreements. Licensed under the Elastic License 2.0 as shown at +# https://securityonion.net/license; you may not use this file except in compliance with the +# Elastic License 2.0. + +# Manages /etc/systemd/system/so-boot-mine-update.service, a manager-only +# Type=oneshot unit that pushes `salt '*' mine.update` once per boot, ordered +# before so-boot-highstate.service so mine-backed pillars (node IPs, ES/Redis/ +# Logstash discovery) are fresh before the boot highstate renders them. + +include: + - systemd.reload + +so_boot_mine_update_unit_file: + file.managed: + - name: /etc/systemd/system/so-boot-mine-update.service + - source: salt://salt/service/so-boot-mine-update.service + - onchanges_in: + - module: systemd_reload + +# Only enable once setup is complete. Until then the gate file is missing and +# the unit's own ConditionPathExists would no-op it anyway. +so_boot_mine_update_service: + service.enabled: + - name: so-boot-mine-update.service + - onlyif: test -e /opt/so/state/setup-complete + - require: + - file: so_boot_mine_update_unit_file + - module: systemd_reload diff --git a/salt/salt/service/so-boot-mine-update.service b/salt/salt/service/so-boot-mine-update.service new file mode 100644 index 000000000..c5c6cdf7b --- /dev/null +++ b/salt/salt/service/so-boot-mine-update.service @@ -0,0 +1,15 @@ +[Unit] +Description=Security Onion boot-time grid mine.update (managers, runs once per boot before highstate) +After=salt-master.service salt-minion.service network-online.target +Wants=network-online.target +Requires=salt-master.service salt-minion.service +Before=so-boot-highstate.service +ConditionPathExists=/opt/so/state/setup-complete + +[Service] +Type=oneshot +RemainAfterExit=yes +ExecStart=/usr/sbin/so-boot-mine-update + +[Install] +WantedBy=multi-user.target From 6ad345730b5aa6f7959e00fa077844f3aa947316 Mon Sep 17 00:00:00 2001 From: reyesj2 <94730068+reyesj2@users.noreply.github.com> Date: Mon, 8 Jun 2026 15:02:57 -0500 Subject: [PATCH 19/24] respect elasticfleet enable_auto_configuration setting for so-elastic-fleet-urls-update --- salt/elasticfleet/manager.sls | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/salt/elasticfleet/manager.sls b/salt/elasticfleet/manager.sls index 1728f2010..6cb672bef 100644 --- a/salt/elasticfleet/manager.sls +++ b/salt/elasticfleet/manager.sls @@ -11,14 +11,15 @@ include: - elasticfleet.config # If enabled, automatically update Fleet Logstash Outputs -{% if ELASTICFLEETMERGED.config.server.enable_auto_configuration and grains.role not in ['so-import', 'so-eval'] %} +{% if ELASTICFLEETMERGED.config.server.enable_auto_configuration %} +{% if grains.role not in ['so-import', 'so-eval']%} so-elastic-fleet-auto-configure-logstash-outputs: cmd.run: - name: /usr/sbin/so-elastic-fleet-outputs-update - retry: attempts: 4 interval: 30 -{% endif %} +{% endif %} # If enabled, automatically update Fleet Server URLs & ES Connection so-elastic-fleet-auto-configure-server-urls: @@ -27,6 +28,7 @@ so-elastic-fleet-auto-configure-server-urls: - retry: attempts: 4 interval: 30 +{% endif %} # Automatically update Fleet Server Elasticsearch URLs & Agent Artifact URLs so-elastic-fleet-auto-configure-elasticsearch-urls: From e536ffa36387c63a97e820cf3606e44ae94da228 Mon Sep 17 00:00:00 2001 From: Josh Patterson Date: Tue, 9 Jun 2026 09:35:24 -0400 Subject: [PATCH 20/24] so-boot-mine-update: render node_data after mine.update before highstate After the boot-time mine.update, have the manager actually render the node_data pillar and log whether it came back populated. node_data: False makes salt/top.sls apply the bootstrap recovery branch instead of the manager's real config, so surfacing this in the journal makes the condition visible before so-boot-highstate runs. Best-effort and non-blocking: always exits 0 so highstate proceeds regardless. --- salt/manager/tools/sbin/so-boot-mine-update | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/salt/manager/tools/sbin/so-boot-mine-update b/salt/manager/tools/sbin/so-boot-mine-update index f497d891f..292b24ecc 100755 --- a/salt/manager/tools/sbin/so-boot-mine-update +++ b/salt/manager/tools/sbin/so-boot-mine-update @@ -40,3 +40,20 @@ done echo "so-boot-mine-update: ${up} minions up (settled after ${elapsed}s); running mine.update" /usr/bin/salt '*' mine.update --out=txt + +# Best-effort: confirm the manager can render node_data (non-False) now that the +# mine is updated. node_data: False makes salt/top.sls fall back to the bootstrap +# recovery branch instead of the manager's real config, so we surface that in the +# journal here. We never block highstate -- if still empty, the recovery branch +# and later highstates self-heal. +/usr/bin/salt-call saltutil.refresh_pillar >/dev/null 2>&1 +sleep 2 +status=$(/usr/bin/salt-call --out=json pillar.get node_data 2>/dev/null \ + | python3 -c 'import sys,json; d=json.load(sys.stdin).get("local"); print("rendered" if d else "empty")' 2>/dev/null) +status=${status:-empty} +if [ "$status" = "rendered" ]; then + echo "so-boot-mine-update: node_data renders; highstate will apply manager config" +else + echo "so-boot-mine-update: WARNING node_data still empty after mine.update; highstate may hit the bootstrap recovery branch" +fi +exit 0 From 8c306eb37dc04209e896347a3140092bcf2cb340 Mon Sep 17 00:00:00 2001 From: Josh Patterson Date: Tue, 9 Jun 2026 09:49:19 -0400 Subject: [PATCH 21/24] so-boot-mine-update: log the rendered node_data content Dump the actual rendered node_data pillar (pretty-printed JSON) to the journal instead of just a rendered/empty verdict, so the boot-time render attempt is fully inspectable. Empty renders print false/null and still emit the WARNING. --- salt/manager/tools/sbin/so-boot-mine-update | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/salt/manager/tools/sbin/so-boot-mine-update b/salt/manager/tools/sbin/so-boot-mine-update index 292b24ecc..38dd63191 100755 --- a/salt/manager/tools/sbin/so-boot-mine-update +++ b/salt/manager/tools/sbin/so-boot-mine-update @@ -48,10 +48,11 @@ echo "so-boot-mine-update: ${up} minions up (settled after ${elapsed}s); running # and later highstates self-heal. /usr/bin/salt-call saltutil.refresh_pillar >/dev/null 2>&1 sleep 2 -status=$(/usr/bin/salt-call --out=json pillar.get node_data 2>/dev/null \ - | python3 -c 'import sys,json; d=json.load(sys.stdin).get("local"); print("rendered" if d else "empty")' 2>/dev/null) -status=${status:-empty} -if [ "$status" = "rendered" ]; then +rendered=$(/usr/bin/salt-call --out=json pillar.get node_data 2>/dev/null \ + | python3 -c 'import sys,json; d=json.load(sys.stdin).get("local"); print(json.dumps(d, indent=2, sort_keys=True))' 2>/dev/null) +echo "so-boot-mine-update: node_data rendered as:" +echo "${rendered:-null}" +if [ -n "$rendered" ] && [ "$rendered" != "null" ] && [ "$rendered" != "false" ]; then echo "so-boot-mine-update: node_data renders; highstate will apply manager config" else echo "so-boot-mine-update: WARNING node_data still empty after mine.update; highstate may hit the bootstrap recovery branch" From 27c77023255caf5e2eede75f0ad03cd1350d9eeb Mon Sep 17 00:00:00 2001 From: Josh Patterson Date: Tue, 9 Jun 2026 10:10:32 -0400 Subject: [PATCH 22/24] so-boot-mine-update: wait for a complete mine before highstate Mine-backed pillars (node_data, elasticsearch:nodes, redis:nodes, logstash:nodes, hypervisor:nodes) include a node only if it returned an IP from the mine, and the configs they build are rebuilt fresh every highstate. After a manager reboot with a flushed mine, the first boot highstate could run before an up node re-reported network.ip_addrs, dropping it from e.g. so-elasticsearch ExtraHosts and forcing a container recreate. After the initial broad mine.update, poll until every currently-up minion actually has network.ip_addrs in the mine, re-pushing mine.update to stragglers, before releasing the boot highstate. Shares the existing MINE_UPDATE_MAX_WAIT backstop so a slow/down node never blocks boot, and still logs the rendered node_data for inspection. --- salt/manager/tools/sbin/so-boot-mine-update | 49 +++++++++++++++------ 1 file changed, 35 insertions(+), 14 deletions(-) diff --git a/salt/manager/tools/sbin/so-boot-mine-update b/salt/manager/tools/sbin/so-boot-mine-update index 38dd63191..85da4866a 100755 --- a/salt/manager/tools/sbin/so-boot-mine-update +++ b/salt/manager/tools/sbin/so-boot-mine-update @@ -6,9 +6,11 @@ # Elastic License 2.0. # Runs once per boot on managers (via so-boot-mine-update.service), before -# so-boot-highstate.service. Waits for the responsive minion set to settle, then -# pushes mine.update to all minions so mine-backed pillars (node IPs, ES/Redis/ -# Logstash discovery) are fresh before the boot highstate renders them. +# so-boot-highstate.service. Waits for the responsive minion set to settle, pushes +# mine.update, then waits until every up minion has actually reported to the mine +# so mine-backed pillars (node IPs, ES/Redis/Logstash discovery) are complete +# before the boot highstate renders them -- otherwise a not-yet-reported node gets +# dropped from those pillars and torn out of the configs they build. MAX_WAIT=${MINE_UPDATE_MAX_WAIT:-180} # hard backstop only INTERVAL=10 @@ -41,20 +43,39 @@ done echo "so-boot-mine-update: ${up} minions up (settled after ${elapsed}s); running mine.update" /usr/bin/salt '*' mine.update --out=txt -# Best-effort: confirm the manager can render node_data (non-False) now that the -# mine is updated. node_data: False makes salt/top.sls fall back to the bootstrap -# recovery branch instead of the manager's real config, so we surface that in the -# journal here. We never block highstate -- if still empty, the recovery branch -# and later highstates self-heal. +# A node that is up but has not yet re-reported network.ip_addrs to the mine is +# silently dropped from mine-backed pillars (elasticsearch:nodes, node_data, ...) +# when highstate recompiles them -- which e.g. removes it from so-elasticsearch +# ExtraHosts and forces a container recreate. After the broad mine.update above, +# wait until every up minion actually has network.ip_addrs in the mine, re-pushing +# mine.update to stragglers, before releasing the boot highstate. Bounded by the +# same MAX_WAIT backstop so a slow/down node never blocks boot indefinitely. +missing="" +while [ "$elapsed" -lt "$MAX_WAIT" ]; do + up_json=$(/usr/bin/salt-run manage.up --out=json 2>/dev/null) + mine_json=$(/usr/bin/salt-run mine.get '*' network.ip_addrs tgt_type=glob --out=json 2>/dev/null) + missing=$(printf '%s' "$up_json" | python3 -c ' +import sys, json +up = set(json.load(sys.stdin) or []) +mine = {k for k, v in (json.loads(sys.argv[1]) or {}).items() if v} +print("\n".join(sorted(up - mine))) +' "$mine_json" 2>/dev/null) + if [ -z "$missing" ]; then + echo "so-boot-mine-update: mine complete for all up minions after ${elapsed}s" + break + fi + echo "so-boot-mine-update: mine missing up minion(s): $(echo $missing); re-running mine.update" + for m in $missing; do /usr/bin/salt "$m" mine.update --out=txt; done + sleep "$INTERVAL" + elapsed=$((elapsed + INTERVAL)) +done +[ -n "$missing" ] && echo "so-boot-mine-update: WARNING ${MAX_WAIT}s backstop hit; up minion(s) still absent from mine: $(echo $missing); highstate may drop them from configs" + +# Log what node_data renders so the boot-time pillar state is inspectable. /usr/bin/salt-call saltutil.refresh_pillar >/dev/null 2>&1 sleep 2 rendered=$(/usr/bin/salt-call --out=json pillar.get node_data 2>/dev/null \ - | python3 -c 'import sys,json; d=json.load(sys.stdin).get("local"); print(json.dumps(d, indent=2, sort_keys=True))' 2>/dev/null) + | python3 -c 'import sys,json; print(json.dumps(json.load(sys.stdin).get("local"), indent=2, sort_keys=True))' 2>/dev/null) echo "so-boot-mine-update: node_data rendered as:" echo "${rendered:-null}" -if [ -n "$rendered" ] && [ "$rendered" != "null" ] && [ "$rendered" != "false" ]; then - echo "so-boot-mine-update: node_data renders; highstate will apply manager config" -else - echo "so-boot-mine-update: WARNING node_data still empty after mine.update; highstate may hit the bootstrap recovery branch" -fi exit 0 From 9f5a9616a5e5e942fd10fb0ac5682e45684dbbfd Mon Sep 17 00:00:00 2001 From: reyesj2 <94730068+reyesj2@users.noreply.github.com> Date: Tue, 9 Jun 2026 12:51:58 -0500 Subject: [PATCH 23/24] use pipe exit status for update_docker_containers --- setup/so-functions | 2 ++ setup/so-setup | 5 ++++- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/setup/so-functions b/setup/so-functions index 5ce9a8fdc..2d5181dc1 100755 --- a/setup/so-functions +++ b/setup/so-functions @@ -980,6 +980,8 @@ docker_seed_registry() { docker_seed_update_percent=25 update_docker_containers 'netinstall' '' 'docker_seed_update' '/dev/stdout' 2>&1 | tee -a "$setup_log" + # Use pipe exit status of 'update_docker_containers' for return code + return ${PIPESTATUS[0]} fi } diff --git a/setup/so-setup b/setup/so-setup index c11d287eb..72cd555d6 100755 --- a/setup/so-setup +++ b/setup/so-setup @@ -767,7 +767,10 @@ if ! [[ -f $install_opt_file ]]; then title "Applying the registry state" logCmd "salt-call state.apply -l info registry" title "Seeding the docker registry" - docker_seed_registry + if ! docker_seed_registry; then + error "Failed to seed the docker registry" + fail_setup + fi title "Applying the manager state" logCmd "salt-call state.apply -l info manager" logCmd "salt-call state.apply influxdb -l info" From f088a27159afea926531729e2a45b399172d160b Mon Sep 17 00:00:00 2001 From: Josh Patterson Date: Tue, 9 Jun 2026 13:52:19 -0400 Subject: [PATCH 24/24] so-boot-mine-update: warm master pillar cache before highstate A complete mine is not enough: elasticsearch:nodes, redis:nodes, logstash:nodes (tgt_type=pillar) and hypervisor:nodes (tgt_type=compound) resolve their target against the master's per-minion data cache (grains+pillar in data.p), which is populated only when a minion's pillar is recompiled -- separately from the mine. After a reboot a node can be in the mine (so node_data/glob sees it) yet absent from that cache, so it fails the elasticsearch:enabled:true pillar match and is dropped from elasticsearch:nodes -> so-elasticsearch ExtraHosts -> container recreate. After the mine-completeness wait, run salt '*' saltutil.refresh_pillar wait=True to synchronously cache every up node's pillar (the same lever deploy_newnode.sls uses), then verify with salt-run cache.pillar and retry stragglers, bounded by MINE_UPDATE_MAX_WAIT. Also log elasticsearch:nodes alongside node_data for inspection. --- salt/manager/tools/sbin/so-boot-mine-update | 54 +++++++++++++++++---- 1 file changed, 45 insertions(+), 9 deletions(-) diff --git a/salt/manager/tools/sbin/so-boot-mine-update b/salt/manager/tools/sbin/so-boot-mine-update index 85da4866a..79cd67844 100755 --- a/salt/manager/tools/sbin/so-boot-mine-update +++ b/salt/manager/tools/sbin/so-boot-mine-update @@ -7,10 +7,12 @@ # Runs once per boot on managers (via so-boot-mine-update.service), before # so-boot-highstate.service. Waits for the responsive minion set to settle, pushes -# mine.update, then waits until every up minion has actually reported to the mine -# so mine-backed pillars (node IPs, ES/Redis/Logstash discovery) are complete -# before the boot highstate renders them -- otherwise a not-yet-reported node gets -# dropped from those pillars and torn out of the configs they build. +# mine.update, waits until every up minion has actually reported to the mine, then +# warms the master's per-minion pillar cache so the mine-backed node pillars (node +# IPs, ES/Redis/Logstash/hypervisor discovery -- some glob- and some pillar/grain- +# targeted) are complete before the boot highstate renders them. Otherwise a node +# that is up but not yet fully reported gets dropped from those pillars and torn +# out of the configs they build (e.g. so-elasticsearch ExtraHosts -> container recreate). MAX_WAIT=${MINE_UPDATE_MAX_WAIT:-180} # hard backstop only INTERVAL=10 @@ -71,11 +73,45 @@ print("\n".join(sorted(up - mine))) done [ -n "$missing" ] && echo "so-boot-mine-update: WARNING ${MAX_WAIT}s backstop hit; up minion(s) still absent from mine: $(echo $missing); highstate may drop them from configs" -# Log what node_data renders so the boot-time pillar state is inspectable. +# The pillar/compound-targeted node pillars (elasticsearch:nodes, redis:nodes, +# logstash:nodes, hypervisor:nodes) resolve their target against the master's +# per-minion data cache (grains+pillar in .../minions//data.p), populated only +# when a minion's pillar is (re)compiled -- separately from the mine. A freshly +# booted node can be in the mine (glob/node_data sees it) yet absent from that +# cache, so it is dropped from those pillars and from the configs they build (e.g. +# so-elasticsearch ExtraHosts). Force a synchronous pillar refresh so the master +# caches every up node's pillar; refresh_pillar wait=True returns only once the +# pillar is recompiled (and thus cached for matching). Retry stragglers <= MAX_WAIT. +echo "so-boot-mine-update: warming master pillar cache for pillar/grain-targeted node pillars" +/usr/bin/salt '*' saltutil.refresh_pillar wait=True --out=txt +missing="" +while [ "$elapsed" -lt "$MAX_WAIT" ]; do + up_json=$(/usr/bin/salt-run manage.up --out=json 2>/dev/null) + cached_json=$(/usr/bin/salt-run cache.pillar tgt='*' --out=json 2>/dev/null) + missing=$(printf '%s' "$up_json" | python3 -c ' +import sys, json +up = set(json.load(sys.stdin) or []) +cached = {k for k, v in (json.loads(sys.argv[1]) or {}).items() if v} +print("\n".join(sorted(up - cached))) +' "$cached_json" 2>/dev/null) + if [ -z "$missing" ]; then + echo "so-boot-mine-update: pillar cache warm for all up minions after ${elapsed}s" + break + fi + echo "so-boot-mine-update: pillar not yet cached for: $(echo $missing); refreshing" + for m in $missing; do /usr/bin/salt "$m" saltutil.refresh_pillar wait=True --out=txt; done + sleep "$INTERVAL" + elapsed=$((elapsed + INTERVAL)) +done +[ -n "$missing" ] && echo "so-boot-mine-update: WARNING ${MAX_WAIT}s backstop hit; pillar not cached for: $(echo $missing); pillar-targeted pillars may drop them" + +# Log what the mine-backed pillars render so the boot-time state is inspectable. /usr/bin/salt-call saltutil.refresh_pillar >/dev/null 2>&1 sleep 2 -rendered=$(/usr/bin/salt-call --out=json pillar.get node_data 2>/dev/null \ - | python3 -c 'import sys,json; print(json.dumps(json.load(sys.stdin).get("local"), indent=2, sort_keys=True))' 2>/dev/null) -echo "so-boot-mine-update: node_data rendered as:" -echo "${rendered:-null}" +for key in node_data elasticsearch:nodes; do + rendered=$(/usr/bin/salt-call --out=json pillar.get "$key" 2>/dev/null \ + | python3 -c 'import sys,json; print(json.dumps(json.load(sys.stdin).get("local"), indent=2, sort_keys=True))' 2>/dev/null) + echo "so-boot-mine-update: ${key} rendered as:" + echo "${rendered:-null}" +done exit 0