From 61ca60a94cd5050067f8fd82548f9376c1efa7dd Mon Sep 17 00:00:00 2001 From: Jason Ertel Date: Wed, 13 May 2026 17:28:07 -0400 Subject: [PATCH 01/43] prep for soc db config --- salt/common/tools/sbin/so-log-check | 1 + salt/kratos/soc_kratos.yaml | 2 +- salt/postgres/config.sls | 6 +++--- salt/postgres/enabled.sls | 4 ++-- salt/postgres/files/{init-users.sh => init-db.sh} | 5 +++++ salt/postgres/telegraf_users.sls | 2 +- salt/soc/soc_soc.yaml | 1 + 7 files changed, 14 insertions(+), 7 deletions(-) rename salt/postgres/files/{init-users.sh => init-db.sh} (89%) diff --git a/salt/common/tools/sbin/so-log-check b/salt/common/tools/sbin/so-log-check index a3d9c51d0..94fdd7229 100755 --- a/salt/common/tools/sbin/so-log-check +++ b/salt/common/tools/sbin/so-log-check @@ -165,6 +165,7 @@ if [[ $EXCLUDE_FALSE_POSITIVE_ERRORS == 'Y' ]]; then EXCLUDED_ERRORS="$EXCLUDED_ERRORS|upgrading component template" # false positive (elasticsearch index or template names contain 'error') EXCLUDED_ERRORS="$EXCLUDED_ERRORS|upgrading composable template" # false positive (elasticsearch composable template names contain 'error') EXCLUDED_ERRORS="$EXCLUDED_ERRORS|Error while parsing document for index \[.ds-logs-kratos-so-.*object mapping for \[file\]" # false positive (mapping error occuring BEFORE kratos index has rolled over in 2.4.210) + EXCLUDED_ERRORS="$EXCLUDED_ERRORS|No such container" # false positive (telegraf trying to run stats on an old container) fi if [[ $EXCLUDE_KNOWN_ERRORS == 'Y' ]]; then diff --git a/salt/kratos/soc_kratos.yaml b/salt/kratos/soc_kratos.yaml index 4cfe2c1c3..267c4bc50 100644 --- a/salt/kratos/soc_kratos.yaml +++ b/salt/kratos/soc_kratos.yaml @@ -103,7 +103,7 @@ kratos: config: session: lifespan: - description: Defines the length of a login session. + description: Defines the length of a login session before it will timeout, and require a new login. global: True helpLink: kratos whoami: diff --git a/salt/postgres/config.sls b/salt/postgres/config.sls index 11ca52649..e458e8455 100644 --- a/salt/postgres/config.sls +++ b/salt/postgres/config.sls @@ -46,10 +46,10 @@ postgresinitdir: - require: - file: postgresconfdir -postgresinitusers: +postgresinitdb: file.managed: - - name: /opt/so/conf/postgres/init/init-users.sh - - source: salt://postgres/files/init-users.sh + - name: /opt/so/conf/postgres/init/init-db.sh + - source: salt://postgres/files/init-db.sh - user: 939 - group: 939 - mode: 755 diff --git a/salt/postgres/enabled.sls b/salt/postgres/enabled.sls index b3abb621e..79ef6f997 100644 --- a/salt/postgres/enabled.sls +++ b/salt/postgres/enabled.sls @@ -31,7 +31,7 @@ so-postgres: - POSTGRES_DB=securityonion # Passwords are delivered via mounted 0600 secret files, not plaintext env vars. # The upstream postgres image resolves POSTGRES_PASSWORD_FILE; entrypoint.sh and - # init-users.sh resolve SO_POSTGRES_PASS_FILE the same way. + # init-db.sh resolve SO_POSTGRES_PASS_FILE the same way. - POSTGRES_PASSWORD_FILE=/run/secrets/postgres_password - SO_POSTGRES_USER={{ SO_POSTGRES_USER }} - SO_POSTGRES_PASS_FILE=/run/secrets/so_postgres_pass @@ -46,7 +46,7 @@ so-postgres: - /opt/so/conf/postgres/postgresql.conf:/conf/postgresql.conf:ro - /opt/so/conf/postgres/pg_hba.conf:/conf/pg_hba.conf:ro - /opt/so/conf/postgres/secrets:/run/secrets:ro - - /opt/so/conf/postgres/init/init-users.sh:/docker-entrypoint-initdb.d/init-users.sh:ro + - /opt/so/conf/postgres/init/init-db.sh:/docker-entrypoint-initdb.d/init-db.sh:ro - /etc/pki/postgres.crt:/conf/postgres.crt:ro - /etc/pki/postgres.key:/conf/postgres.key:ro - /etc/pki/tls/certs/intca.crt:/conf/ca.crt:ro diff --git a/salt/postgres/files/init-users.sh b/salt/postgres/files/init-db.sh similarity index 89% rename from salt/postgres/files/init-users.sh rename to salt/postgres/files/init-db.sh index e28b11f0f..03e6d08dd 100644 --- a/salt/postgres/files/init-users.sh +++ b/salt/postgres/files/init-db.sh @@ -32,3 +32,8 @@ EOSQL if ! psql -U "$POSTGRES_USER" -tAc "SELECT 1 FROM pg_database WHERE datname='so_telegraf'" | grep -q 1; then psql -v ON_ERROR_STOP=1 -U "$POSTGRES_USER" -c "CREATE DATABASE so_telegraf" fi + +# Bootstrap the SOC database. +if ! psql -U "$POSTGRES_USER" -tAc "SELECT 1 FROM pg_database WHERE datname='so_soc'" | grep -q 1; then + psql -v ON_ERROR_STOP=1 -U "$POSTGRES_USER" -c "CREATE DATABASE so_soc" +fi diff --git a/salt/postgres/telegraf_users.sls b/salt/postgres/telegraf_users.sls index 62490ea52..1ac7c80ed 100644 --- a/salt/postgres/telegraf_users.sls +++ b/salt/postgres/telegraf_users.sls @@ -39,7 +39,7 @@ postgres_wait_ready: - require: - docker_container: so-postgres -# Ensure the shared Telegraf database exists. init-users.sh only runs on a +# Ensure the shared Telegraf database exists. init-db.sh only runs on a # fresh data dir, so hosts upgraded onto an existing /nsm/postgres volume # would otherwise never get so_telegraf. postgres_create_telegraf_db: diff --git a/salt/soc/soc_soc.yaml b/salt/soc/soc_soc.yaml index 6a2f79629..647bdd778 100644 --- a/salt/soc/soc_soc.yaml +++ b/salt/soc/soc_soc.yaml @@ -818,6 +818,7 @@ soc: description: List of available external tools visible in the SOC UI. Each tool is defined in JSON object notation, and must include the "name" key and "link" key, where the link is the tool's URL. global: True advanced: True + multiline: True forcedType: "[]{}" exportNodeId: description: The node ID on which export jobs will be executed. From 907f699721c1376e85ec2788fb2c994d61056117 Mon Sep 17 00:00:00 2001 From: Jason Ertel Date: Thu, 14 May 2026 11:03:08 -0400 Subject: [PATCH 02/43] state rename --- salt/postgres/enabled.sls | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/salt/postgres/enabled.sls b/salt/postgres/enabled.sls index 79ef6f997..20d256ae8 100644 --- a/salt/postgres/enabled.sls +++ b/salt/postgres/enabled.sls @@ -70,7 +70,7 @@ so-postgres: - watch: - file: postgresconf - file: postgreshba - - file: postgresinitusers + - file: postgresinitdb - file: postgres_super_secret - file: postgres_app_secret - x509: postgres_crt @@ -78,7 +78,7 @@ so-postgres: - require: - file: postgresconf - file: postgreshba - - file: postgresinitusers + - file: postgresinitdb - file: postgres_super_secret - file: postgres_app_secret - x509: postgres_crt From 6f273d7d9795a47b8cd31c507eac2bbec49c3752 Mon Sep 17 00:00:00 2001 From: Mike Reeves Date: Thu, 14 May 2026 15:53:00 -0400 Subject: [PATCH 03/43] Rename init-users.sh to init-db.sh and update all references --- salt/postgres/config.sls | 6 +++--- salt/postgres/enabled.sls | 8 ++++---- salt/postgres/files/{init-users.sh => init-db.sh} | 0 salt/postgres/telegraf_users.sls | 2 +- 4 files changed, 8 insertions(+), 8 deletions(-) rename salt/postgres/files/{init-users.sh => init-db.sh} (100%) diff --git a/salt/postgres/config.sls b/salt/postgres/config.sls index 11ca52649..e458e8455 100644 --- a/salt/postgres/config.sls +++ b/salt/postgres/config.sls @@ -46,10 +46,10 @@ postgresinitdir: - require: - file: postgresconfdir -postgresinitusers: +postgresinitdb: file.managed: - - name: /opt/so/conf/postgres/init/init-users.sh - - source: salt://postgres/files/init-users.sh + - name: /opt/so/conf/postgres/init/init-db.sh + - source: salt://postgres/files/init-db.sh - user: 939 - group: 939 - mode: 755 diff --git a/salt/postgres/enabled.sls b/salt/postgres/enabled.sls index b3abb621e..20d256ae8 100644 --- a/salt/postgres/enabled.sls +++ b/salt/postgres/enabled.sls @@ -31,7 +31,7 @@ so-postgres: - POSTGRES_DB=securityonion # Passwords are delivered via mounted 0600 secret files, not plaintext env vars. # The upstream postgres image resolves POSTGRES_PASSWORD_FILE; entrypoint.sh and - # init-users.sh resolve SO_POSTGRES_PASS_FILE the same way. + # init-db.sh resolve SO_POSTGRES_PASS_FILE the same way. - POSTGRES_PASSWORD_FILE=/run/secrets/postgres_password - SO_POSTGRES_USER={{ SO_POSTGRES_USER }} - SO_POSTGRES_PASS_FILE=/run/secrets/so_postgres_pass @@ -46,7 +46,7 @@ so-postgres: - /opt/so/conf/postgres/postgresql.conf:/conf/postgresql.conf:ro - /opt/so/conf/postgres/pg_hba.conf:/conf/pg_hba.conf:ro - /opt/so/conf/postgres/secrets:/run/secrets:ro - - /opt/so/conf/postgres/init/init-users.sh:/docker-entrypoint-initdb.d/init-users.sh:ro + - /opt/so/conf/postgres/init/init-db.sh:/docker-entrypoint-initdb.d/init-db.sh:ro - /etc/pki/postgres.crt:/conf/postgres.crt:ro - /etc/pki/postgres.key:/conf/postgres.key:ro - /etc/pki/tls/certs/intca.crt:/conf/ca.crt:ro @@ -70,7 +70,7 @@ so-postgres: - watch: - file: postgresconf - file: postgreshba - - file: postgresinitusers + - file: postgresinitdb - file: postgres_super_secret - file: postgres_app_secret - x509: postgres_crt @@ -78,7 +78,7 @@ so-postgres: - require: - file: postgresconf - file: postgreshba - - file: postgresinitusers + - file: postgresinitdb - file: postgres_super_secret - file: postgres_app_secret - x509: postgres_crt diff --git a/salt/postgres/files/init-users.sh b/salt/postgres/files/init-db.sh similarity index 100% rename from salt/postgres/files/init-users.sh rename to salt/postgres/files/init-db.sh diff --git a/salt/postgres/telegraf_users.sls b/salt/postgres/telegraf_users.sls index 62490ea52..1ac7c80ed 100644 --- a/salt/postgres/telegraf_users.sls +++ b/salt/postgres/telegraf_users.sls @@ -39,7 +39,7 @@ postgres_wait_ready: - require: - docker_container: so-postgres -# Ensure the shared Telegraf database exists. init-users.sh only runs on a +# Ensure the shared Telegraf database exists. init-db.sh only runs on a # fresh data dir, so hosts upgraded onto an existing /nsm/postgres volume # would otherwise never get so_telegraf. postgres_create_telegraf_db: From b7a13899f776c48cff48d7c9af9add92ee3dd298 Mon Sep 17 00:00:00 2001 From: Mike Reeves Date: Thu, 14 May 2026 15:56:04 -0400 Subject: [PATCH 04/43] Suppress output logging for postgres telegraf role provisioning --- salt/postgres/telegraf_users.sls | 1 + 1 file changed, 1 insertion(+) diff --git a/salt/postgres/telegraf_users.sls b/salt/postgres/telegraf_users.sls index 1ac7c80ed..6d6a30d84 100644 --- a/salt/postgres/telegraf_users.sls +++ b/salt/postgres/telegraf_users.sls @@ -118,6 +118,7 @@ postgres_telegraf_role_{{ u }}: GRANT CONNECT ON DATABASE so_telegraf TO "{{ u }}"; GRANT so_telegraf TO "{{ u }}"; EOSQL + - hide_output: True - require: - cmd: postgres_telegraf_group_role From 450eacca417fcc7503b06e491f5b8cbbb1bdcd92 Mon Sep 17 00:00:00 2001 From: Mike Reeves Date: Thu, 14 May 2026 16:15:54 -0400 Subject: [PATCH 05/43] Move telegraf role provisioning to external script with env vars --- salt/postgres/config.sls | 8 ++++++++ salt/postgres/files/telegraf_role.sh | 23 +++++++++++++++++++++++ salt/postgres/telegraf_users.sls | 21 ++++++--------------- 3 files changed, 37 insertions(+), 15 deletions(-) create mode 100644 salt/postgres/files/telegraf_role.sh diff --git a/salt/postgres/config.sls b/salt/postgres/config.sls index e458e8455..f5bf856eb 100644 --- a/salt/postgres/config.sls +++ b/salt/postgres/config.sls @@ -94,6 +94,14 @@ postgres_app_secret: - require: - file: postgressecretsdir +postgrestelegrafrole: + file.managed: + - name: /usr/local/bin/telegraf_role.sh + - source: salt://postgres/files/telegraf_role.sh + - user: root + - group: root + - mode: 755 + postgres_sbin: file.recurse: - name: /usr/sbin diff --git a/salt/postgres/files/telegraf_role.sh b/salt/postgres/files/telegraf_role.sh new file mode 100644 index 000000000..352efa018 --- /dev/null +++ b/salt/postgres/files/telegraf_role.sh @@ -0,0 +1,23 @@ +#!/bin/bash +set -e + +# Provision or update a Telegraf postgres role. +# Expects ROLE_USER and ROLE_PASS environment variables. + +docker exec -i so-postgres psql \ + -v ON_ERROR_STOP=1 \ + -v role_user="$ROLE_USER" \ + -v role_pass="$ROLE_PASS" \ + -U postgres -d so_telegraf <<'EOSQL' +DO $$ +BEGIN + IF NOT EXISTS (SELECT FROM pg_catalog.pg_roles WHERE rolname = :role_user) THEN + EXECUTE format('CREATE ROLE %I WITH LOGIN PASSWORD %L', :role_user, :role_pass); + ELSE + EXECUTE format('ALTER ROLE %I WITH LOGIN PASSWORD %L', :role_user, :role_pass); + END IF; +END +$$; +GRANT CONNECT ON DATABASE so_telegraf TO :"role_user"; +GRANT so_telegraf TO :"role_user"; +EOSQL diff --git a/salt/postgres/telegraf_users.sls b/salt/postgres/telegraf_users.sls index 6d6a30d84..bafa781cc 100644 --- a/salt/postgres/telegraf_users.sls +++ b/salt/postgres/telegraf_users.sls @@ -100,26 +100,17 @@ postgres_telegraf_group_role: {% for mid, entry in creds.items() %} {% if entry.get('user') and entry.get('pass') %} {% set u = entry.user %} -{% set p = entry.pass | replace("'", "''") %} +{% set p = entry.pass %} postgres_telegraf_role_{{ u }}: cmd.run: - - name: | - docker exec -i so-postgres psql -v ON_ERROR_STOP=1 -U postgres -d so_telegraf <<'EOSQL' - DO $$ - BEGIN - IF NOT EXISTS (SELECT FROM pg_catalog.pg_roles WHERE rolname = '{{ u }}') THEN - EXECUTE format('CREATE ROLE %I WITH LOGIN PASSWORD %L', '{{ u }}', '{{ p }}'); - ELSE - EXECUTE format('ALTER ROLE %I WITH PASSWORD %L', '{{ u }}', '{{ p }}'); - END IF; - END - $$; - GRANT CONNECT ON DATABASE so_telegraf TO "{{ u }}"; - GRANT so_telegraf TO "{{ u }}"; - EOSQL + - name: /usr/local/bin/telegraf_role.sh + - env: + - ROLE_USER: {{ u }} + - ROLE_PASS: {{ p }} - hide_output: True - require: + - file: postgrestelegrafrole - cmd: postgres_telegraf_group_role {% endif %} From 03fa01a705db80ff9ef146400d5c62115ce0e08b Mon Sep 17 00:00:00 2001 From: Mike Reeves Date: Thu, 14 May 2026 16:18:01 -0400 Subject: [PATCH 06/43] Move telegraf_role.sh to postgres tools/sbin --- salt/postgres/config.sls | 8 -------- salt/postgres/telegraf_users.sls | 4 ++-- salt/postgres/{files => tools/sbin}/telegraf_role.sh | 0 3 files changed, 2 insertions(+), 10 deletions(-) rename salt/postgres/{files => tools/sbin}/telegraf_role.sh (100%) diff --git a/salt/postgres/config.sls b/salt/postgres/config.sls index f5bf856eb..e458e8455 100644 --- a/salt/postgres/config.sls +++ b/salt/postgres/config.sls @@ -94,14 +94,6 @@ postgres_app_secret: - require: - file: postgressecretsdir -postgrestelegrafrole: - file.managed: - - name: /usr/local/bin/telegraf_role.sh - - source: salt://postgres/files/telegraf_role.sh - - user: root - - group: root - - mode: 755 - postgres_sbin: file.recurse: - name: /usr/sbin diff --git a/salt/postgres/telegraf_users.sls b/salt/postgres/telegraf_users.sls index bafa781cc..369e30dbc 100644 --- a/salt/postgres/telegraf_users.sls +++ b/salt/postgres/telegraf_users.sls @@ -104,13 +104,13 @@ postgres_telegraf_group_role: postgres_telegraf_role_{{ u }}: cmd.run: - - name: /usr/local/bin/telegraf_role.sh + - name: /usr/sbin/telegraf_role.sh - env: - ROLE_USER: {{ u }} - ROLE_PASS: {{ p }} - hide_output: True - require: - - file: postgrestelegrafrole + - file: postgres_sbin - cmd: postgres_telegraf_group_role {% endif %} diff --git a/salt/postgres/files/telegraf_role.sh b/salt/postgres/tools/sbin/telegraf_role.sh similarity index 100% rename from salt/postgres/files/telegraf_role.sh rename to salt/postgres/tools/sbin/telegraf_role.sh From b9f2d56932d870a9b8bd9b0daacd4f96a9191587 Mon Sep 17 00:00:00 2001 From: Mike Reeves Date: Thu, 14 May 2026 16:37:08 -0400 Subject: [PATCH 07/43] Consolidate telegraf postgres SQL into multi-mode script Replace inline psql heredocs in telegraf_users.sls with subcommand dispatcher telegraf_postgres.sh: create_db, group_role, user, retention. --- salt/postgres/telegraf_users.sls | 63 ++-------- salt/postgres/tools/sbin/telegraf_postgres.sh | 108 ++++++++++++++++++ salt/postgres/tools/sbin/telegraf_role.sh | 23 ---- 3 files changed, 117 insertions(+), 77 deletions(-) create mode 100644 salt/postgres/tools/sbin/telegraf_postgres.sh delete mode 100644 salt/postgres/tools/sbin/telegraf_role.sh diff --git a/salt/postgres/telegraf_users.sls b/salt/postgres/telegraf_users.sls index 369e30dbc..4b95ac45b 100644 --- a/salt/postgres/telegraf_users.sls +++ b/salt/postgres/telegraf_users.sls @@ -44,12 +44,10 @@ postgres_wait_ready: # would otherwise never get so_telegraf. postgres_create_telegraf_db: cmd.run: - - name: | - if ! docker exec so-postgres psql -U postgres -tAc "SELECT 1 FROM pg_database WHERE datname='so_telegraf'" | grep -q 1; then - docker exec so-postgres psql -v ON_ERROR_STOP=1 -U postgres -c "CREATE DATABASE so_telegraf" - fi + - name: /usr/sbin/telegraf_postgres.sh create_db - require: - cmd: postgres_wait_ready + - file: postgres_sbin # Provision the shared group role and schema once. Every per-minion role is a # member of so_telegraf, and each Telegraf connection does SET ROLE so_telegraf @@ -57,44 +55,10 @@ postgres_create_telegraf_db: # on first write are owned by the group role and every member can INSERT/SELECT. postgres_telegraf_group_role: cmd.run: - - name: | - docker exec -i so-postgres psql -v ON_ERROR_STOP=1 -U postgres -d so_telegraf <<'EOSQL' - DO $$ - BEGIN - IF NOT EXISTS (SELECT FROM pg_catalog.pg_roles WHERE rolname = 'so_telegraf') THEN - CREATE ROLE so_telegraf NOLOGIN; - END IF; - END - $$; - GRANT CONNECT ON DATABASE so_telegraf TO so_telegraf; - CREATE SCHEMA IF NOT EXISTS telegraf AUTHORIZATION so_telegraf; - GRANT USAGE, CREATE ON SCHEMA telegraf TO so_telegraf; - CREATE SCHEMA IF NOT EXISTS partman; - CREATE EXTENSION IF NOT EXISTS pg_partman SCHEMA partman; - CREATE EXTENSION IF NOT EXISTS pg_cron; - -- Telegraf (running as so_telegraf) calls partman.create_parent() - -- on first write of each metric, which needs USAGE on the partman - -- schema, EXECUTE on its functions/procedures, and write access to - -- partman.part_config so it can register new partitioned parents. - GRANT USAGE, CREATE ON SCHEMA partman TO so_telegraf; - GRANT SELECT, INSERT, UPDATE, DELETE ON ALL TABLES IN SCHEMA partman TO so_telegraf; - GRANT EXECUTE ON ALL FUNCTIONS IN SCHEMA partman TO so_telegraf; - GRANT EXECUTE ON ALL PROCEDURES IN SCHEMA partman TO so_telegraf; - -- partman creates per-parent template tables (partman.template_*) at - -- runtime; default privileges extend DML/sequence access to them. - ALTER DEFAULT PRIVILEGES IN SCHEMA partman - GRANT SELECT, INSERT, UPDATE, DELETE ON TABLES TO so_telegraf; - ALTER DEFAULT PRIVILEGES IN SCHEMA partman - GRANT USAGE, SELECT, UPDATE ON SEQUENCES TO so_telegraf; - -- Hourly partman maintenance. cron.schedule is idempotent by jobname. - SELECT cron.schedule( - 'telegraf-partman-maintenance', - '17 * * * *', - 'CALL partman.run_maintenance_proc()' - ); - EOSQL + - name: /usr/sbin/telegraf_postgres.sh group_role - require: - cmd: postgres_create_telegraf_db + - file: postgres_sbin {% set creds = salt['pillar.get']('telegraf:postgres_creds', {}) %} {% for mid, entry in creds.items() %} @@ -104,7 +68,7 @@ postgres_telegraf_group_role: postgres_telegraf_role_{{ u }}: cmd.run: - - name: /usr/sbin/telegraf_role.sh + - name: /usr/sbin/telegraf_postgres.sh user - env: - ROLE_USER: {{ u }} - ROLE_PASS: {{ p }} @@ -122,21 +86,12 @@ postgres_telegraf_role_{{ u }}: {% set retention = salt['pillar.get']('postgres:telegraf:retention_days', 14) | int %} postgres_telegraf_retention_reconcile: cmd.run: - - name: | - docker exec -i so-postgres psql -v ON_ERROR_STOP=1 -U postgres -d so_telegraf <<'EOSQL' - DO $$ - BEGIN - IF EXISTS (SELECT 1 FROM pg_catalog.pg_extension WHERE extname = 'pg_partman') THEN - UPDATE partman.part_config - SET retention = '{{ retention }} days', - retention_keep_table = false - WHERE parent_table LIKE 'telegraf.%'; - END IF; - END - $$; - EOSQL + - name: /usr/sbin/telegraf_postgres.sh retention + - env: + - RETENTION_DAYS: {{ retention }} - require: - cmd: postgres_telegraf_group_role + - file: postgres_sbin {% endif %} diff --git a/salt/postgres/tools/sbin/telegraf_postgres.sh b/salt/postgres/tools/sbin/telegraf_postgres.sh new file mode 100644 index 000000000..7f2061543 --- /dev/null +++ b/salt/postgres/tools/sbin/telegraf_postgres.sh @@ -0,0 +1,108 @@ +#!/bin/bash +set -e + +# Provision Telegraf state inside the so-postgres container. +# Usage: telegraf_postgres.sh +# create_db Ensure the so_telegraf database exists. +# group_role Provision the so_telegraf group role, telegraf/partman schemas, +# pg_partman, pg_cron, and the hourly partman maintenance job. +# user Create or update a per-minion login role granted to so_telegraf. +# Env: ROLE_USER, ROLE_PASS. +# retention Reconcile partman retention on telegraf parents. +# Env: RETENTION_DAYS. + +cmd="${1:?subcommand required}" + +case "$cmd" in + create_db) + if ! docker exec so-postgres psql -U postgres -tAc \ + "SELECT 1 FROM pg_database WHERE datname='so_telegraf'" | grep -q 1; then + docker exec so-postgres psql -v ON_ERROR_STOP=1 -U postgres \ + -c "CREATE DATABASE so_telegraf" + fi + ;; + + group_role) + docker exec -i so-postgres psql -v ON_ERROR_STOP=1 -U postgres -d so_telegraf <<'EOSQL' +DO $$ +BEGIN + IF NOT EXISTS (SELECT FROM pg_catalog.pg_roles WHERE rolname = 'so_telegraf') THEN + CREATE ROLE so_telegraf NOLOGIN; + END IF; +END +$$; +GRANT CONNECT ON DATABASE so_telegraf TO so_telegraf; +CREATE SCHEMA IF NOT EXISTS telegraf AUTHORIZATION so_telegraf; +GRANT USAGE, CREATE ON SCHEMA telegraf TO so_telegraf; +CREATE SCHEMA IF NOT EXISTS partman; +CREATE EXTENSION IF NOT EXISTS pg_partman SCHEMA partman; +CREATE EXTENSION IF NOT EXISTS pg_cron; +-- Telegraf (running as so_telegraf) calls partman.create_parent() +-- on first write of each metric, which needs USAGE on the partman +-- schema, EXECUTE on its functions/procedures, and write access to +-- partman.part_config so it can register new partitioned parents. +GRANT USAGE, CREATE ON SCHEMA partman TO so_telegraf; +GRANT SELECT, INSERT, UPDATE, DELETE ON ALL TABLES IN SCHEMA partman TO so_telegraf; +GRANT EXECUTE ON ALL FUNCTIONS IN SCHEMA partman TO so_telegraf; +GRANT EXECUTE ON ALL PROCEDURES IN SCHEMA partman TO so_telegraf; +-- partman creates per-parent template tables (partman.template_*) at +-- runtime; default privileges extend DML/sequence access to them. +ALTER DEFAULT PRIVILEGES IN SCHEMA partman + GRANT SELECT, INSERT, UPDATE, DELETE ON TABLES TO so_telegraf; +ALTER DEFAULT PRIVILEGES IN SCHEMA partman + GRANT USAGE, SELECT, UPDATE ON SEQUENCES TO so_telegraf; +-- Hourly partman maintenance. cron.schedule is idempotent by jobname. +SELECT cron.schedule( + 'telegraf-partman-maintenance', + '17 * * * *', + 'CALL partman.run_maintenance_proc()' +); +EOSQL + ;; + + user) + : "${ROLE_USER:?ROLE_USER is required}" + : "${ROLE_PASS:?ROLE_PASS is required}" + docker exec -i so-postgres psql \ + -v ON_ERROR_STOP=1 \ + -v role_user="$ROLE_USER" \ + -v role_pass="$ROLE_PASS" \ + -U postgres -d so_telegraf <<'EOSQL' +DO $$ +BEGIN + IF NOT EXISTS (SELECT FROM pg_catalog.pg_roles WHERE rolname = :role_user) THEN + EXECUTE format('CREATE ROLE %I WITH LOGIN PASSWORD %L', :role_user, :role_pass); + ELSE + EXECUTE format('ALTER ROLE %I WITH LOGIN PASSWORD %L', :role_user, :role_pass); + END IF; +END +$$; +GRANT CONNECT ON DATABASE so_telegraf TO :"role_user"; +GRANT so_telegraf TO :"role_user"; +EOSQL + ;; + + retention) + : "${RETENTION_DAYS:?RETENTION_DAYS is required}" + docker exec -i so-postgres psql \ + -v ON_ERROR_STOP=1 \ + -v retention_days="$RETENTION_DAYS" \ + -U postgres -d so_telegraf <<'EOSQL' +DO $$ +BEGIN + IF EXISTS (SELECT 1 FROM pg_catalog.pg_extension WHERE extname = 'pg_partman') THEN + UPDATE partman.part_config + SET retention = :'retention_days' || ' days', + retention_keep_table = false + WHERE parent_table LIKE 'telegraf.%'; + END IF; +END +$$; +EOSQL + ;; + + *) + echo "Unknown subcommand: $cmd" >&2 + exit 1 + ;; +esac diff --git a/salt/postgres/tools/sbin/telegraf_role.sh b/salt/postgres/tools/sbin/telegraf_role.sh deleted file mode 100644 index 352efa018..000000000 --- a/salt/postgres/tools/sbin/telegraf_role.sh +++ /dev/null @@ -1,23 +0,0 @@ -#!/bin/bash -set -e - -# Provision or update a Telegraf postgres role. -# Expects ROLE_USER and ROLE_PASS environment variables. - -docker exec -i so-postgres psql \ - -v ON_ERROR_STOP=1 \ - -v role_user="$ROLE_USER" \ - -v role_pass="$ROLE_PASS" \ - -U postgres -d so_telegraf <<'EOSQL' -DO $$ -BEGIN - IF NOT EXISTS (SELECT FROM pg_catalog.pg_roles WHERE rolname = :role_user) THEN - EXECUTE format('CREATE ROLE %I WITH LOGIN PASSWORD %L', :role_user, :role_pass); - ELSE - EXECUTE format('ALTER ROLE %I WITH LOGIN PASSWORD %L', :role_user, :role_pass); - END IF; -END -$$; -GRANT CONNECT ON DATABASE so_telegraf TO :"role_user"; -GRANT so_telegraf TO :"role_user"; -EOSQL From 8e38bff0c31ddbf7603dead157a741fafc708664 Mon Sep 17 00:00:00 2001 From: Mike Reeves Date: Thu, 14 May 2026 16:55:53 -0400 Subject: [PATCH 08/43] Rename telegraf_postgres.sh to so-telegraf-postgres --- salt/postgres/telegraf_users.sls | 8 ++++---- .../sbin/{telegraf_postgres.sh => so-telegraf-postgres} | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) rename salt/postgres/tools/sbin/{telegraf_postgres.sh => so-telegraf-postgres} (98%) diff --git a/salt/postgres/telegraf_users.sls b/salt/postgres/telegraf_users.sls index 4b95ac45b..b4226f1fd 100644 --- a/salt/postgres/telegraf_users.sls +++ b/salt/postgres/telegraf_users.sls @@ -44,7 +44,7 @@ postgres_wait_ready: # would otherwise never get so_telegraf. postgres_create_telegraf_db: cmd.run: - - name: /usr/sbin/telegraf_postgres.sh create_db + - name: /usr/sbin/so-telegraf-postgres create_db - require: - cmd: postgres_wait_ready - file: postgres_sbin @@ -55,7 +55,7 @@ postgres_create_telegraf_db: # on first write are owned by the group role and every member can INSERT/SELECT. postgres_telegraf_group_role: cmd.run: - - name: /usr/sbin/telegraf_postgres.sh group_role + - name: /usr/sbin/so-telegraf-postgres group_role - require: - cmd: postgres_create_telegraf_db - file: postgres_sbin @@ -68,7 +68,7 @@ postgres_telegraf_group_role: postgres_telegraf_role_{{ u }}: cmd.run: - - name: /usr/sbin/telegraf_postgres.sh user + - name: /usr/sbin/so-telegraf-postgres user - env: - ROLE_USER: {{ u }} - ROLE_PASS: {{ p }} @@ -86,7 +86,7 @@ postgres_telegraf_role_{{ u }}: {% set retention = salt['pillar.get']('postgres:telegraf:retention_days', 14) | int %} postgres_telegraf_retention_reconcile: cmd.run: - - name: /usr/sbin/telegraf_postgres.sh retention + - name: /usr/sbin/so-telegraf-postgres retention - env: - RETENTION_DAYS: {{ retention }} - require: diff --git a/salt/postgres/tools/sbin/telegraf_postgres.sh b/salt/postgres/tools/sbin/so-telegraf-postgres similarity index 98% rename from salt/postgres/tools/sbin/telegraf_postgres.sh rename to salt/postgres/tools/sbin/so-telegraf-postgres index 7f2061543..e15d4c450 100644 --- a/salt/postgres/tools/sbin/telegraf_postgres.sh +++ b/salt/postgres/tools/sbin/so-telegraf-postgres @@ -2,7 +2,7 @@ set -e # Provision Telegraf state inside the so-postgres container. -# Usage: telegraf_postgres.sh +# Usage: so-telegraf-postgres # create_db Ensure the so_telegraf database exists. # group_role Provision the so_telegraf group role, telegraf/partman schemas, # pg_partman, pg_cron, and the hourly partman maintenance job. From 249b126312641375450f0614e9b928948df2aa79 Mon Sep 17 00:00:00 2001 From: Mike Reeves Date: Thu, 14 May 2026 17:08:51 -0400 Subject: [PATCH 09/43] Quote telegraf role env vars to survive YAML-special chars in passwords --- salt/postgres/telegraf_users.sls | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/salt/postgres/telegraf_users.sls b/salt/postgres/telegraf_users.sls index b4226f1fd..28d9d6247 100644 --- a/salt/postgres/telegraf_users.sls +++ b/salt/postgres/telegraf_users.sls @@ -70,8 +70,8 @@ postgres_telegraf_role_{{ u }}: cmd.run: - name: /usr/sbin/so-telegraf-postgres user - env: - - ROLE_USER: {{ u }} - - ROLE_PASS: {{ p }} + - ROLE_USER: {{ u | tojson }} + - ROLE_PASS: {{ p | tojson }} - hide_output: True - require: - file: postgres_sbin From 024fece607315553a71ab94c463d46809bda4e5b Mon Sep 17 00:00:00 2001 From: Josh Brower Date: Thu, 14 May 2026 17:08:57 -0400 Subject: [PATCH 10/43] Tweak for nginx upgrade --- salt/nginx/etc/nginx.conf | 2 ++ 1 file changed, 2 insertions(+) diff --git a/salt/nginx/etc/nginx.conf b/salt/nginx/etc/nginx.conf index 0c98b7b28..8150265f5 100644 --- a/salt/nginx/etc/nginx.conf +++ b/salt/nginx/etc/nginx.conf @@ -225,6 +225,7 @@ http { limit_req zone=auth_throttle burst={{ NGINXMERGED.config.throttle_login_burst }} nodelay; limit_req_status 429; proxy_pass http://{{ GLOBALS.manager }}:4433; + proxy_set_header Connection "Close"; proxy_read_timeout 90; proxy_connect_timeout 90; proxy_set_header Host $host; @@ -237,6 +238,7 @@ http { location ~ ^/auth/.*?(whoami|logout|settings|errors|webauthn.js) { rewrite /auth/(.*) /$1 break; proxy_pass http://{{ GLOBALS.manager }}:4433; + proxy_set_header Connection "Close"; proxy_read_timeout 90; proxy_connect_timeout 90; proxy_set_header Host $host; From 64731c73ba954df47c4fad887d265f12ee1ca20b Mon Sep 17 00:00:00 2001 From: Mike Reeves Date: Thu, 14 May 2026 17:17:49 -0400 Subject: [PATCH 11/43] Fix psql :var substitution in telegraf role and retention SQL psql does not substitute :var references inside dollar-quoted strings, so the DO blocks in the user and retention subcommands were receiving literal colons and failing (silently for user, via hide_output: True). Rewrite the conditional CREATE/ALTER ROLE with SELECT format(...) \\gexec and guard the retention UPDATE with \\gset + \\if. --- salt/postgres/tools/sbin/so-telegraf-postgres | 40 ++++++++++--------- 1 file changed, 21 insertions(+), 19 deletions(-) diff --git a/salt/postgres/tools/sbin/so-telegraf-postgres b/salt/postgres/tools/sbin/so-telegraf-postgres index e15d4c450..ef7c3f9e6 100644 --- a/salt/postgres/tools/sbin/so-telegraf-postgres +++ b/salt/postgres/tools/sbin/so-telegraf-postgres @@ -63,20 +63,22 @@ EOSQL user) : "${ROLE_USER:?ROLE_USER is required}" : "${ROLE_PASS:?ROLE_PASS is required}" + # psql does not substitute :vars inside dollar-quoted strings, so the + # conditional CREATE/ALTER is built outside any DO block and dispatched + # with \gexec. format() handles identifier/literal quoting. docker exec -i so-postgres psql \ -v ON_ERROR_STOP=1 \ -v role_user="$ROLE_USER" \ -v role_pass="$ROLE_PASS" \ -U postgres -d so_telegraf <<'EOSQL' -DO $$ -BEGIN - IF NOT EXISTS (SELECT FROM pg_catalog.pg_roles WHERE rolname = :role_user) THEN - EXECUTE format('CREATE ROLE %I WITH LOGIN PASSWORD %L', :role_user, :role_pass); - ELSE - EXECUTE format('ALTER ROLE %I WITH LOGIN PASSWORD %L', :role_user, :role_pass); - END IF; -END -$$; +SELECT format( + CASE WHEN EXISTS (SELECT FROM pg_catalog.pg_roles WHERE rolname = :'role_user') + THEN 'ALTER ROLE %I WITH LOGIN PASSWORD %L' + ELSE 'CREATE ROLE %I WITH LOGIN PASSWORD %L' + END, + :'role_user', + :'role_pass' +) \gexec GRANT CONNECT ON DATABASE so_telegraf TO :"role_user"; GRANT so_telegraf TO :"role_user"; EOSQL @@ -84,20 +86,20 @@ EOSQL retention) : "${RETENTION_DAYS:?RETENTION_DAYS is required}" + # \gset + \if guards against a missing pg_partman without using a DO + # block (psql :var substitution doesn't reach into dollar-quoted code). docker exec -i so-postgres psql \ -v ON_ERROR_STOP=1 \ -v retention_days="$RETENTION_DAYS" \ -U postgres -d so_telegraf <<'EOSQL' -DO $$ -BEGIN - IF EXISTS (SELECT 1 FROM pg_catalog.pg_extension WHERE extname = 'pg_partman') THEN - UPDATE partman.part_config - SET retention = :'retention_days' || ' days', - retention_keep_table = false - WHERE parent_table LIKE 'telegraf.%'; - END IF; -END -$$; +SELECT CASE WHEN EXISTS (SELECT 1 FROM pg_catalog.pg_extension WHERE extname = 'pg_partman') + THEN 'true' ELSE 'false' END AS has_partman \gset +\if :has_partman +UPDATE partman.part_config +SET retention = :'retention_days' || ' days', + retention_keep_table = false +WHERE parent_table LIKE 'telegraf.%'; +\endif EOSQL ;; From d2524a593f6cd9888a9f7d04f62cf4753421d2f4 Mon Sep 17 00:00:00 2001 From: reyesj2 <94730068+reyesj2@users.noreply.github.com> Date: Thu, 14 May 2026 17:12:02 -0500 Subject: [PATCH 12/43] use -verify flag during grid agent install to ensure agent health --- salt/elasticfleet/install_agent_grid.sls | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/salt/elasticfleet/install_agent_grid.sls b/salt/elasticfleet/install_agent_grid.sls index 482af2e1e..5201eddf2 100644 --- a/salt/elasticfleet/install_agent_grid.sls +++ b/salt/elasticfleet/install_agent_grid.sls @@ -14,20 +14,23 @@ pull_agent_installer: file.managed: - - name: /opt/so/so-elastic-agent_linux_amd64 + - name: /opt/so/log/agents/so-elastic-agent_linux_amd64 - source: salt://elasticfleet/files/so_agent-installers/so-elastic-agent_linux_amd64 - mode: 755 - makedirs: True run_installer: cmd.run: - - name: ./so-elastic-agent_linux_amd64 -token={{ GRIDNODETOKEN }} -force - - cwd: /opt/so + {# Run agent installer and wait for it to report healthy status #} + - name: ./so-elastic-agent_linux_amd64 -token={{ GRIDNODETOKEN }} -force -verify + - cwd: /opt/so/log/agents - retry: attempts: 3 interval: 20 + - require: + - file: pull_agent_installer cleanup_agent_installer: file.absent: - - name: /opt/so/so-elastic-agent_linux_amd64 + - name: /opt/so/log/agents/so-elastic-agent_linux_amd64 {% endif %} From 244a73b7a2b38a17e9331606fab6551689553098 Mon Sep 17 00:00:00 2001 From: Mike Reeves Date: Fri, 15 May 2026 08:48:54 -0400 Subject: [PATCH 13/43] Make so-postgres-backup fail-safe against silent corruption The dump pipeline returned gzip's exit status, so a pg_dumpall that died mid-stream still produced a valid .gz holding a truncated dump, written straight to the final filename. The idempotency check then blocked retries for the day and the corrupt file counted toward retention, evicting a good backup each day until none remained. - set -o pipefail so a failed pg_dumpall fails the pipeline - dump to a .tmp file and atomically rename only after success, so the final filename appears only for a complete backup - gzip -t integrity check before publishing - trap-based cleanup of the temp file; sweep stale temps at startup - run retention only after a successful backup, with a glob restricted to finished backups - log timestamped OK/ERROR outcomes to /opt/so/log/postgres/backup.log --- salt/postgres/tools/sbin/so-postgres-backup | 48 ++++++++++++++++++--- 1 file changed, 41 insertions(+), 7 deletions(-) diff --git a/salt/postgres/tools/sbin/so-postgres-backup b/salt/postgres/tools/sbin/so-postgres-backup index 9db522336..08a73e3a4 100644 --- a/salt/postgres/tools/sbin/so-postgres-backup +++ b/salt/postgres/tools/sbin/so-postgres-backup @@ -7,15 +7,29 @@ . /usr/sbin/so-common +# Without pipefail, a pipeline's exit status is gzip's. A failed pg_dumpall would +# otherwise be masked by a successful gzip, silently producing a valid .gz that +# holds a truncated dump. +set -o pipefail + # Backups contain role password hashes and full chat data; keep them 0600. umask 0077 TODAY=$(date '+%Y_%m_%d') BACKUPDIR=/nsm/backup BACKUPFILE="$BACKUPDIR/so-postgres-backup-$TODAY.sql.gz" +TMPFILE="$BACKUPFILE.tmp" MAXBACKUPS=7 +LOGFILE=/opt/so/log/postgres/backup.log -mkdir -p $BACKUPDIR +log() { + echo "$(date '+%Y-%m-%d %H:%M:%S') $*" >> "$LOGFILE" +} + +mkdir -p "$BACKUPDIR" + +# Remove any temp files left behind by a previously crashed run +rm -f "$BACKUPDIR"/so-postgres-backup-*.sql.gz.tmp # Skip if already backed up today if [ -f "$BACKUPFILE" ]; then @@ -27,13 +41,33 @@ if ! docker ps --format '{{.Names}}' | grep -q '^so-postgres$'; then exit 0 fi -# Dump all databases and roles, compress -docker exec so-postgres pg_dumpall -U postgres | gzip > "$BACKUPFILE" +# Always clean up the temp file on exit; the success path clears this trap +# after the atomic rename so the finished backup is not deleted. +trap 'rm -f "$TMPFILE"' EXIT -# Retention cleanup -NUMBACKUPS=$(find $BACKUPDIR -type f -name "so-postgres-backup*" | wc -l) +# Dump all databases and roles, compress. Write to a temp file so the final +# filename only ever appears for a complete, verified backup. +if ! docker exec so-postgres pg_dumpall -U postgres | gzip > "$TMPFILE"; then + log "ERROR: pg_dumpall/gzip failed; backup aborted" + exit 1 +fi + +# Verify the compressed stream is intact before publishing it +if ! gzip -t "$TMPFILE"; then + log "ERROR: backup failed gzip integrity check; backup aborted" + exit 1 +fi + +# Atomically publish the verified backup +mv "$TMPFILE" "$BACKUPFILE" +trap - EXIT +log "OK: wrote $BACKUPFILE" + +# Retention cleanup (only reached after a successful backup). The glob is +# restricted to finished backups so an in-progress .tmp can never be counted. +NUMBACKUPS=$(find "$BACKUPDIR" -type f -name "so-postgres-backup-*.sql.gz" | wc -l) while [ "$NUMBACKUPS" -gt "$MAXBACKUPS" ]; do - OLDEST=$(find $BACKUPDIR -type f -name "so-postgres-backup*" -printf '%T+ %p\n' | sort | head -n 1 | awk -F" " '{print $2}') + OLDEST=$(find "$BACKUPDIR" -type f -name "so-postgres-backup-*.sql.gz" -printf '%T+ %p\n' | sort | head -n 1 | awk -F" " '{print $2}') rm -f "$OLDEST" - NUMBACKUPS=$(find $BACKUPDIR -type f -name "so-postgres-backup*" | wc -l) + NUMBACKUPS=$(find "$BACKUPDIR" -type f -name "so-postgres-backup-*.sql.gz" | wc -l) done From ce566ba174ba321e3955b9447aeab5975aeaab2b Mon Sep 17 00:00:00 2001 From: Jason Ertel Date: Fri, 15 May 2026 11:36:46 -0400 Subject: [PATCH 14/43] exclude fps --- salt/common/tools/sbin/so-log-check | 2 ++ 1 file changed, 2 insertions(+) diff --git a/salt/common/tools/sbin/so-log-check b/salt/common/tools/sbin/so-log-check index a3d9c51d0..65b1041fe 100755 --- a/salt/common/tools/sbin/so-log-check +++ b/salt/common/tools/sbin/so-log-check @@ -165,6 +165,8 @@ if [[ $EXCLUDE_FALSE_POSITIVE_ERRORS == 'Y' ]]; then EXCLUDED_ERRORS="$EXCLUDED_ERRORS|upgrading component template" # false positive (elasticsearch index or template names contain 'error') EXCLUDED_ERRORS="$EXCLUDED_ERRORS|upgrading composable template" # false positive (elasticsearch composable template names contain 'error') EXCLUDED_ERRORS="$EXCLUDED_ERRORS|Error while parsing document for index \[.ds-logs-kratos-so-.*object mapping for \[file\]" # false positive (mapping error occuring BEFORE kratos index has rolled over in 2.4.210) + EXCLUDED_ERRORS="$EXCLUDED_ERRORS|No such container" # false positive (telegraf trying to run stats on an old container) + EXCLUDED_ERRORS="$EXCLUDED_ERRORS|passwords do not match" # false positive (automated hydra test) fi if [[ $EXCLUDE_KNOWN_ERRORS == 'Y' ]]; then From e89c820b65bc4a00ffd45b6aafc61015aee60901 Mon Sep 17 00:00:00 2001 From: Jorge Reyes <94730068+reyesj2@users.noreply.github.com> Date: Sat, 16 May 2026 09:59:14 -0500 Subject: [PATCH 15/43] Revert "use -verify flag during grid agent install to ensure agent health" --- salt/elasticfleet/install_agent_grid.sls | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/salt/elasticfleet/install_agent_grid.sls b/salt/elasticfleet/install_agent_grid.sls index 5201eddf2..482af2e1e 100644 --- a/salt/elasticfleet/install_agent_grid.sls +++ b/salt/elasticfleet/install_agent_grid.sls @@ -14,23 +14,20 @@ pull_agent_installer: file.managed: - - name: /opt/so/log/agents/so-elastic-agent_linux_amd64 + - name: /opt/so/so-elastic-agent_linux_amd64 - source: salt://elasticfleet/files/so_agent-installers/so-elastic-agent_linux_amd64 - mode: 755 - makedirs: True run_installer: cmd.run: - {# Run agent installer and wait for it to report healthy status #} - - name: ./so-elastic-agent_linux_amd64 -token={{ GRIDNODETOKEN }} -force -verify - - cwd: /opt/so/log/agents + - name: ./so-elastic-agent_linux_amd64 -token={{ GRIDNODETOKEN }} -force + - cwd: /opt/so - retry: attempts: 3 interval: 20 - - require: - - file: pull_agent_installer cleanup_agent_installer: file.absent: - - name: /opt/so/log/agents/so-elastic-agent_linux_amd64 + - name: /opt/so/so-elastic-agent_linux_amd64 {% endif %} From d0aa33a255f26cedae7a1c93336a11302ba0446d Mon Sep 17 00:00:00 2001 From: reyesj2 <94730068+reyesj2@users.noreply.github.com> Date: Tue, 19 May 2026 10:50:17 -0500 Subject: [PATCH 16/43] sync elastic agent packages to fleet nodes --- salt/elasticfleet/enabled.sls | 2 ++ 1 file changed, 2 insertions(+) diff --git a/salt/elasticfleet/enabled.sls b/salt/elasticfleet/enabled.sls index cb189f9a9..166cb9719 100644 --- a/salt/elasticfleet/enabled.sls +++ b/salt/elasticfleet/enabled.sls @@ -26,7 +26,9 @@ include: wait_for_elasticsearch_elasticfleet: cmd.run: - name: so-elasticsearch-wait +{% endif %} +{% if GLOBALS.role == "so-fleet" %} # Sync Elastic Agent artifacts to Fleet Node elasticagent_syncartifacts: file.recurse: From 6c8997b28a7b1998a553dc3c87c9168a0a8aab06 Mon Sep 17 00:00:00 2001 From: reyesj2 <94730068+reyesj2@users.noreply.github.com> Date: Tue, 19 May 2026 22:27:31 -0500 Subject: [PATCH 17/43] verify all heavynodes and all searchnodes are at compatible ES version before attempting an elasticsearch upgrade --- salt/manager/tools/sbin/soup | 139 +++++++++++++++++++++++++++++++++-- 1 file changed, 134 insertions(+), 5 deletions(-) diff --git a/salt/manager/tools/sbin/soup b/salt/manager/tools/sbin/soup index bd3048019..e6a14607e 100755 --- a/salt/manager/tools/sbin/soup +++ b/salt/manager/tools/sbin/soup @@ -971,6 +971,9 @@ verify_es_version_compatibility() { local is_active_intermediate_upgrade=1 # supported upgrade paths for SO-ES versions declare -A es_upgrade_map=( + ["8.18.4"]="8.18.6 8.18.8 9.0.8" + ["8.18.6"]="8.18.8 9.0.8" + ["8.18.8"]="9.0.8" ["9.0.8"]="9.3.3" ) @@ -994,6 +997,116 @@ verify_es_version_compatibility() { exit 160 fi + compatible_es_versions="$target_es_version" + for current_version in "${!es_upgrade_map[@]}"; do + # shellcheck disable=SC2076 + if [[ " ${es_upgrade_map[$current_version]} " =~ " $target_es_version " ]]; then + compatible_es_versions+=" $current_version" + fi + done + + # Check if the given ES version can directly upgrade to the target ES version. Used to assist with catching lagging nodes during the upgrade process + es_version_can_upgrade_to_target() { + local current_version="$1" + # shellcheck disable=SC2076 + if [[ -n "$current_version" && " $compatible_es_versions " =~ " $current_version " ]]; then + return 0 + fi + + return 1 + } + + # Gather Elasticsearch cluster version info and verify that each node in the cluster is running a version compatible with the target ES version. + verify_searchnodes_es_target_compatibility() { + local retries=20 + local retry_count=0 + local delay=180 + SEARCHNODE_ES_VERSIONS="" + + while [[ $retry_count -lt $retries ]]; do + SEARCHNODE_ES_VERSIONS=$(so-elasticsearch-query _nodes/_all/version --retry 5 --retry-delay 10 --fail 2>&1) + local exit_status=$? + + if [[ $exit_status -ne 0 ]]; then + echo "Failed to retrieve Elasticsearch versions from searchnodes... Retrying in $delay seconds. Attempt $((retry_count + 1)) of $retries." + ((retry_count++)) + sleep $delay + continue + fi + + local all_searchnodes_compatible=true + while IFS=$'\t' read -r node current_version; do + [[ -z "$node" ]] && continue + if ! es_version_can_upgrade_to_target "$current_version"; then + echo "Searchnode $node is running Elasticsearch $current_version, which is not directly upgradable to Elasticsearch $target_es_version." + all_searchnodes_compatible=false + fi + done < <(echo "$SEARCHNODE_ES_VERSIONS" | jq -r '.nodes | to_entries[] | [.value.name, .value.version] | @tsv') + + if [[ "$all_searchnodes_compatible" == true ]]; then + echo "All Searchnodes are upgradable to Elasticsearch $target_es_version." + return 0 + fi + + echo "One or more Searchnodes cannot upgrade directly to Elasticsearch $target_es_version. Rechecking in $delay seconds. Attempt $((retry_count + 1)) of $retries." + ((retry_count++)) + sleep $delay + done + + return 1 + } + + # Gather heavynode version info and verify that each node is running a version compatible with the target ES version. + verify_heavynodes_es_target_compatibility() { + if ! salt-key -l accepted | grep -q 'heavynode$'; then + echo "No heavynodes detected. Skipping heavynode Elasticsearch version compatibility check." + return 0 + fi + + echo -e "\nOne or more heavynodes detected. Verifying each is running an Elasticsearch version that is compatible with $target_es_version." + + local retries=20 + local retry_count=0 + local delay=180 + HEAVYNODE_ES_VERSIONS="" + + while [[ $retry_count -lt $retries ]]; do + HEAVYNODE_ES_VERSIONS=$(salt -C 'G@role:so-heavynode' cmd.run 'so-elasticsearch-query / --retry 5 --retry-delay 10 | jq -r ".version.number"' shell=/bin/bash --out=json 2> /dev/null) + local exit_status=$? + + if [[ $exit_status -ne 0 ]]; then + echo "Failed to retrieve Elasticsearch version from one or more heavynodes... Retrying in $delay seconds. Attempt $((retry_count + 1)) of $retries." + ((retry_count++)) + sleep $delay + continue + fi + + local all_heavynodes_compatible=true + while IFS=$'\t' read -r node current_version; do + [[ -z "$node" ]] && continue + if ! es_version_can_upgrade_to_target "$current_version"; then + echo "Heavynode $node is running Elasticsearch $current_version, which is not directly upgradable to Elasticsearch $target_es_version." + all_heavynodes_compatible=false + fi + done < <(echo "$HEAVYNODE_ES_VERSIONS" | jq -r 'to_entries[] | [.key, .value] | @tsv') + + if [[ "$all_heavynodes_compatible" == true ]]; then + echo -e "\nAll heavynodes can upgrade to Elasticsearch $target_es_version." + return 0 + fi + + echo "One or more heavynodes cannot upgrade directly to Elasticsearch $target_es_version. Rechecking in $delay seconds. Attempt $((retry_count + 1)) of $retries." + ((retry_count++)) + sleep $delay + done + + return 1 + } + + if [[ ! -f "$es_verification_script" ]]; then + create_intermediate_upgrade_verification_script "$es_verification_script" + fi + for statefile in "${es_required_version_statefile_base}"-*; do [[ -f $statefile ]] || continue @@ -1012,10 +1125,6 @@ verify_es_version_compatibility() { continue fi - if [[ ! -f "$es_verification_script" ]]; then - create_intermediate_upgrade_verification_script "$es_verification_script" - fi - echo -e "\n##############################################################################################################################\n" echo "A previously required intermediate Elasticsearch upgrade was detected. Verifying that all Searchnodes/Heavynodes have successfully upgraded Elasticsearch to $es_required_version_statefile_value before proceeding with soup to avoid potential data loss! This command can take up to an hour to complete." if ! timeout --foreground 4000 bash "$es_verification_script" "$es_required_version_statefile_value" "$statefile"; then @@ -1037,6 +1146,26 @@ verify_es_version_compatibility() { # shellcheck disable=SC2076 # Do not want a regex here eg usage " 8.18.8 9.0.8 " =~ " 9.0.8 " if [[ " ${es_upgrade_map[$es_version]} " =~ " $target_es_version " || "$es_version" == "$target_es_version" ]]; then + if ! verify_searchnodes_es_target_compatibility || ! verify_heavynodes_es_target_compatibility; then + echo -e "\n!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!\n" + + echo "One or more Searchnode(s)/Heavynode(s) cannot upgrade directly to Elasticsearch $target_es_version. This can happen with soups that include Elasticsearch upgrades being run in quick succession. Typically, this will resolve itself as the grid synchronizes. Please allow time for all Searchnodes/Heavynodes to have upgraded Elasticsearch to a compatible version with $target_es_version before running soup again to avoid potential data loss!" + + if [[ -n "$HEAVYNODE_ES_VERSIONS" ]]; then + echo "Current heavynode Elasticsearch versions:" + echo "$HEAVYNODE_ES_VERSIONS" | jq '.' + fi + + if [[ -n "$SEARCHNODE_ES_VERSIONS" ]]; then + echo "Current searchnode Elasticsearch versions:" + echo "$SEARCHNODE_ES_VERSIONS" | jq '.nodes | to_entries | map({(.value.name): .value.version}) | sort | add' + fi + + echo -e "\n!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!\n" + + exit 161 + fi + # supported upgrade return 0 else @@ -1394,7 +1523,7 @@ main() { echo "Verifying we have the latest soup script." verify_latest_update_script - echo "Verifying Elasticsearch version compatibility before upgrading." + echo "Verifying Elasticsearch version compatibility across the grid before upgrading." verify_es_version_compatibility echo "Let's see if we need to update Security Onion." From d7a1b67095f630b256b0fc716e24b70ab1d39c13 Mon Sep 17 00:00:00 2001 From: reyesj2 <94730068+reyesj2@users.noreply.github.com> Date: Wed, 20 May 2026 09:16:57 -0500 Subject: [PATCH 18/43] use pipefail on heavynode versino command to pass through error --- salt/manager/tools/sbin/soup | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/salt/manager/tools/sbin/soup b/salt/manager/tools/sbin/soup index e6a14607e..d21599ad8 100755 --- a/salt/manager/tools/sbin/soup +++ b/salt/manager/tools/sbin/soup @@ -1071,7 +1071,7 @@ verify_es_version_compatibility() { HEAVYNODE_ES_VERSIONS="" while [[ $retry_count -lt $retries ]]; do - HEAVYNODE_ES_VERSIONS=$(salt -C 'G@role:so-heavynode' cmd.run 'so-elasticsearch-query / --retry 5 --retry-delay 10 | jq -r ".version.number"' shell=/bin/bash --out=json 2> /dev/null) + HEAVYNODE_ES_VERSIONS=$(salt -C 'G@role:so-heavynode' cmd.run 'set -o pipefail; so-elasticsearch-query / --retry 5 --retry-delay 10 | jq -er ".version.number"' shell=/bin/bash --out=json 2> /dev/null) local exit_status=$? if [[ $exit_status -ne 0 ]]; then From 7d13007aa9f364eebece1ecae9c561bda6e3c2ed Mon Sep 17 00:00:00 2001 From: reyesj2 <94730068+reyesj2@users.noreply.github.com> Date: Wed, 20 May 2026 10:03:37 -0500 Subject: [PATCH 19/43] block soup if all ES nodes are not online and reporting their ES version for compatibility check --- salt/manager/tools/sbin/soup | 53 ++++++++++++++++++++++++++++++------ 1 file changed, 45 insertions(+), 8 deletions(-) diff --git a/salt/manager/tools/sbin/soup b/salt/manager/tools/sbin/soup index d21599ad8..8a68e5242 100755 --- a/salt/manager/tools/sbin/soup +++ b/salt/manager/tools/sbin/soup @@ -1021,9 +1021,24 @@ verify_es_version_compatibility() { local retries=20 local retry_count=0 local delay=180 + local expected_es_nodes + local searchnode_minions SEARCHNODE_ES_VERSIONS="" while [[ $retry_count -lt $retries ]]; do + if ! searchnode_minions=$(set -o pipefail; salt-key --out=json --list=accepted 2> /dev/null | jq -r '.minions[]? | select(endswith("searchnode"))'); then + echo "Failed to retrieve grid searchnodes via salt-key... Retrying in $delay seconds. Attempt $((retry_count + 1)) of $retries." + ((retry_count++)) + sleep $delay + continue + fi + # Always add node running soup to expected es nodes + expected_es_nodes="${MINIONID%_*}" + while IFS= read -r searchnode_minion; do + [[ -z "$searchnode_minion" ]] && continue + expected_es_nodes+=$'\n'"${searchnode_minion%_searchnode}" + done <<< "$searchnode_minions" + SEARCHNODE_ES_VERSIONS=$(so-elasticsearch-query _nodes/_all/version --retry 5 --retry-delay 10 --fail 2>&1) local exit_status=$? @@ -1043,6 +1058,14 @@ verify_es_version_compatibility() { fi done < <(echo "$SEARCHNODE_ES_VERSIONS" | jq -r '.nodes | to_entries[] | [.value.name, .value.version] | @tsv') + while IFS= read -r expected_es_node; do + [[ -z "$expected_es_node" ]] && continue + if ! echo "$SEARCHNODE_ES_VERSIONS" | jq -e --arg node "$expected_es_node" '.nodes | to_entries | any(.value.name == $node)' > /dev/null; then + echo "Searchnode $expected_es_node did not report an Elasticsearch version. It may be offline or still upgrading." + all_searchnodes_compatible=false + fi + done <<< "$expected_es_nodes" + if [[ "$all_searchnodes_compatible" == true ]]; then echo "All Searchnodes are upgradable to Elasticsearch $target_es_version." return 0 @@ -1056,21 +1079,27 @@ verify_es_version_compatibility() { return 1 } - # Gather heavynode version info and verify that each node is running a version compatible with the target ES version. + # Gather heavynode version info and verify that each node is running a version compatible with the target ES version. verify_heavynodes_es_target_compatibility() { - if ! salt-key -l accepted | grep -q 'heavynode$'; then - echo "No heavynodes detected. Skipping heavynode Elasticsearch version compatibility check." - return 0 - fi - - echo -e "\nOne or more heavynodes detected. Verifying each is running an Elasticsearch version that is compatible with $target_es_version." - + local heavynode_minions local retries=20 local retry_count=0 local delay=180 HEAVYNODE_ES_VERSIONS="" while [[ $retry_count -lt $retries ]]; do + if ! heavynode_minions=$(set -o pipefail; salt-key --out=json --list=accepted 2> /dev/null | jq -r '.minions[]? | select(endswith("heavynode"))'); then + echo "Failed to retrieve grid heavynodes via salt-key... Retrying in $delay seconds. Attempt $((retry_count + 1)) of $retries." + ((retry_count++)) + sleep $delay + continue + fi + + if [[ -z "$heavynode_minions" ]]; then + echo "No heavynodes detected. Skipping heavynode Elasticsearch version compatibility check." + return 0 + fi + HEAVYNODE_ES_VERSIONS=$(salt -C 'G@role:so-heavynode' cmd.run 'set -o pipefail; so-elasticsearch-query / --retry 5 --retry-delay 10 | jq -er ".version.number"' shell=/bin/bash --out=json 2> /dev/null) local exit_status=$? @@ -1090,6 +1119,14 @@ verify_es_version_compatibility() { fi done < <(echo "$HEAVYNODE_ES_VERSIONS" | jq -r 'to_entries[] | [.key, .value] | @tsv') + while IFS= read -r heavynode_minion; do + [[ -z "$heavynode_minion" ]] && continue + if ! echo "$HEAVYNODE_ES_VERSIONS" | jq -e --arg minion "$heavynode_minion" 'has($minion)' > /dev/null; then + echo "Heavynode $heavynode_minion did not report an Elasticsearch version. It may be offline or still upgrading." + all_heavynodes_compatible=false + fi + done <<< "$heavynode_minions" + if [[ "$all_heavynodes_compatible" == true ]]; then echo -e "\nAll heavynodes can upgrade to Elasticsearch $target_es_version." return 0 From b485be460204202628a5b2e7d349c42c803add5b Mon Sep 17 00:00:00 2001 From: reyesj2 <94730068+reyesj2@users.noreply.github.com> Date: Wed, 20 May 2026 14:12:58 -0500 Subject: [PATCH 20/43] separate salt-key command from main es version compatiblity loop --- salt/manager/tools/sbin/soup | 72 ++++++++++++++++++++++-------------- 1 file changed, 45 insertions(+), 27 deletions(-) diff --git a/salt/manager/tools/sbin/soup b/salt/manager/tools/sbin/soup index 8a68e5242..3bec13716 100755 --- a/salt/manager/tools/sbin/soup +++ b/salt/manager/tools/sbin/soup @@ -1021,24 +1021,33 @@ verify_es_version_compatibility() { local retries=20 local retry_count=0 local delay=180 - local expected_es_nodes - local searchnode_minions + local expected_es_nodes searchnode_minions attempt + local searchnode_discovery_success=false SEARCHNODE_ES_VERSIONS="" - while [[ $retry_count -lt $retries ]]; do - if ! searchnode_minions=$(set -o pipefail; salt-key --out=json --list=accepted 2> /dev/null | jq -r '.minions[]? | select(endswith("searchnode"))'); then - echo "Failed to retrieve grid searchnodes via salt-key... Retrying in $delay seconds. Attempt $((retry_count + 1)) of $retries." - ((retry_count++)) - sleep $delay - continue + for attempt in {1..3}; do + if searchnode_minions=$(set -o pipefail; salt-key --out=json --list=accepted 2> /dev/null | jq -r '.minions[]? | select(endswith("searchnode"))'); then + searchnode_discovery_success=true + break fi - # Always add node running soup to expected es nodes - expected_es_nodes="${MINIONID%_*}" - while IFS= read -r searchnode_minion; do - [[ -z "$searchnode_minion" ]] && continue - expected_es_nodes+=$'\n'"${searchnode_minion%_searchnode}" - done <<< "$searchnode_minions" + echo "Failed to retrieve grid searchnodes via salt-key... Retrying in 30 seconds. Attempt $attempt of 3." + sleep 30 + done + + if [[ "$searchnode_discovery_success" != "true" ]]; then + echo "Failed to retrieve grid searchnodes via salt-key." + return 1 + fi + + # Always add node running soup to expected es nodes + expected_es_nodes="${MINIONID%_*}" + while IFS= read -r searchnode_minion; do + [[ -z "$searchnode_minion" ]] && continue + expected_es_nodes+=$'\n'"${searchnode_minion%_searchnode}" + done <<< "$searchnode_minions" + + while [[ $retry_count -lt $retries ]]; do SEARCHNODE_ES_VERSIONS=$(so-elasticsearch-query _nodes/_all/version --retry 5 --retry-delay 10 --fail 2>&1) local exit_status=$? @@ -1081,25 +1090,34 @@ verify_es_version_compatibility() { # Gather heavynode version info and verify that each node is running a version compatible with the target ES version. verify_heavynodes_es_target_compatibility() { - local heavynode_minions + local heavynode_minions attempt local retries=20 local retry_count=0 local delay=180 + local heavynode_discovery_success=false HEAVYNODE_ES_VERSIONS="" + for attempt in {1..3}; do + if heavynode_minions=$(set -o pipefail; salt-key --out=json --list=accepted 2> /dev/null | jq -r '.minions[]? | select(endswith("heavynode"))'); then + heavynode_discovery_success=true + break + fi + + echo "Failed to retrieve grid heavynodes via salt-key... Retrying in 30 seconds. Attempt $attempt of 3." + sleep 30 + done + + if [[ "$heavynode_discovery_success" != "true" ]]; then + echo "Failed to retrieve grid heavynodes via salt-key." + return 1 + fi + + if [[ -z "$heavynode_minions" ]]; then + echo "No heavynodes detected. Skipping heavynode Elasticsearch version compatibility check." + return 0 + fi + while [[ $retry_count -lt $retries ]]; do - if ! heavynode_minions=$(set -o pipefail; salt-key --out=json --list=accepted 2> /dev/null | jq -r '.minions[]? | select(endswith("heavynode"))'); then - echo "Failed to retrieve grid heavynodes via salt-key... Retrying in $delay seconds. Attempt $((retry_count + 1)) of $retries." - ((retry_count++)) - sleep $delay - continue - fi - - if [[ -z "$heavynode_minions" ]]; then - echo "No heavynodes detected. Skipping heavynode Elasticsearch version compatibility check." - return 0 - fi - HEAVYNODE_ES_VERSIONS=$(salt -C 'G@role:so-heavynode' cmd.run 'set -o pipefail; so-elasticsearch-query / --retry 5 --retry-delay 10 | jq -er ".version.number"' shell=/bin/bash --out=json 2> /dev/null) local exit_status=$? From 141a61f5b53d44e647350ac2c4b48be1708fd807 Mon Sep 17 00:00:00 2001 From: Mike Reeves Date: Thu, 21 May 2026 13:47:03 -0400 Subject: [PATCH 21/43] 3.1.0 --- DOWNLOAD_AND_VERIFY_ISO.md | 22 +++++++++++----------- sigs/securityonion-3.1.0-20260521.iso.sig | Bin 0 -> 566 bytes 2 files changed, 11 insertions(+), 11 deletions(-) create mode 100644 sigs/securityonion-3.1.0-20260521.iso.sig diff --git a/DOWNLOAD_AND_VERIFY_ISO.md b/DOWNLOAD_AND_VERIFY_ISO.md index 47937c1b9..a0ea874fa 100644 --- a/DOWNLOAD_AND_VERIFY_ISO.md +++ b/DOWNLOAD_AND_VERIFY_ISO.md @@ -1,17 +1,17 @@ -### 3.0.0-20260331 ISO image released on 2026/03/31 +### 3.1.0-20260521 ISO image released on 2026/05/21 ### Download and Verify -3.0.0-20260331 ISO image: -https://download.securityonion.net/file/securityonion/securityonion-3.0.0-20260331.iso +3.1.0-20260521 ISO image: +https://download.securityonion.net/file/securityonion/securityonion-3.1.0-20260521.iso -MD5: ECD318A1662A6FDE0EF213F5A9BD4B07 -SHA1: E55BE314440CCF3392DC0B06BC5E270B43176D9C -SHA256: 7FC47405E335CBE5C2B6C51FE7AC60248F35CBE504907B8B5A33822B23F8F4D5 +MD5: A853BC118639ABCE1795D6E313BFFBDE +SHA1: FCA615AD6E31710B33AE5870FEF447861FDB3B8F +SHA256: CE2A5947274D9ED2C5068A1FD46B64C4FEF70445EA9B61A98DD3621781329F2C Signature for ISO image: -https://github.com/Security-Onion-Solutions/securityonion/raw/3/main/sigs/securityonion-3.0.0-20260331.iso.sig +https://github.com/Security-Onion-Solutions/securityonion/raw/3/main/sigs/securityonion-3.1.0-20260521.iso.sig Signing key: https://raw.githubusercontent.com/Security-Onion-Solutions/securityonion/3/main/KEYS @@ -25,22 +25,22 @@ wget https://raw.githubusercontent.com/Security-Onion-Solutions/securityonion/3/ Download the signature file for the ISO: ``` -wget https://github.com/Security-Onion-Solutions/securityonion/raw/3/main/sigs/securityonion-3.0.0-20260331.iso.sig +wget https://github.com/Security-Onion-Solutions/securityonion/raw/3/main/sigs/securityonion-3.1.0-20260521.iso.sig ``` Download the ISO image: ``` -wget https://download.securityonion.net/file/securityonion/securityonion-3.0.0-20260331.iso +wget https://download.securityonion.net/file/securityonion/securityonion-3.1.0-20260521.iso ``` Verify the downloaded ISO image using the signature file: ``` -gpg --verify securityonion-3.0.0-20260331.iso.sig securityonion-3.0.0-20260331.iso +gpg --verify securityonion-3.1.0-20260521.iso.sig securityonion-3.1.0-20260521.iso ``` The output should show "Good signature" and the Primary key fingerprint should match what's shown below: ``` -gpg: Signature made Mon 30 Mar 2026 06:22:14 PM EDT using RSA key ID FE507013 +gpg: Signature made Thu 21 May 2026 11:10:01 AM EDT using RSA key ID FE507013 gpg: Good signature from "Security Onion Solutions, LLC " gpg: WARNING: This key is not certified with a trusted signature! gpg: There is no indication that the signature belongs to the owner. diff --git a/sigs/securityonion-3.1.0-20260521.iso.sig b/sigs/securityonion-3.1.0-20260521.iso.sig new file mode 100644 index 0000000000000000000000000000000000000000..af7564315fe4d271f8ffbcd5f20b1ee05bb215cf GIT binary patch literal 566 zcmV-60?GY}0y6{v0SEvc79j-41gSkXz6^6dp_W8^5Ma0dP;e6k0%{K+NdO875PT3| zxBgIY6XV$r{v$aAV9tWR2Cp82NY=5;oVcqFn(Xc4XM_F@euGbQ9!7nT67q|Yq&1Dz z;iL7U0Ax7EIw}GT(Bu_zK4Ys^hEOTJd&0o_3 zO5?R2v(LbEYEoJfB$6YydM(aL@phySuee&F)y$&07&AzwgvkmQV2JQZXm~MBEH3+G zKSPv5@iFE2n&Svb5pdPU%C&j6uqfp$?~q;+r++a=qvp=Fw>WPGwbbg1<|B&(3e-p8 zHPkpN1A8@ZjLz)8m z0VR{c;S9p&qu4L@;gzq_m74NtBE5~0*;%l!jX;8s?*95U&c&Hkk<|icjLg=~Miv|` zKJRsQYV~kgfumHMA<&=dJ$I^he>#jJnzd+{7r{Dna2YorX!yA=#k>Nel`MH%Harcf z1;AR{*}>X*g{2mN$Mb+;3r-|A+9*%9tY zB7`=&%xE^sj)0}?{(8WJMCBYz*i>k`qj%|THzqCeswa9p(zb&>&{OGp>6i)kn+a literal 0 HcmV?d00001 From 89a28d2cfeed5729fd31f4e113d6448537ffcd38 Mon Sep 17 00:00:00 2001 From: Mike Reeves Date: Thu, 21 May 2026 15:45:58 -0400 Subject: [PATCH 22/43] Bump version from 3.1.0 to 3.2.0 --- VERSION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/VERSION b/VERSION index fd2a01863..944880fa1 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -3.1.0 +3.2.0 From aa7897874034e9e4383c06780a93d9bc2312bf40 Mon Sep 17 00:00:00 2001 From: Mike Reeves Date: Thu, 21 May 2026 15:57:57 -0400 Subject: [PATCH 23/43] Add 3.2.0 option to discussion template --- .github/DISCUSSION_TEMPLATE/3-0.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/DISCUSSION_TEMPLATE/3-0.yml b/.github/DISCUSSION_TEMPLATE/3-0.yml index 3fb9e5b30..8f74145c4 100644 --- a/.github/DISCUSSION_TEMPLATE/3-0.yml +++ b/.github/DISCUSSION_TEMPLATE/3-0.yml @@ -11,6 +11,7 @@ body: - - 3.0.0 - 3.1.0 + - 3.2.0 - Other (please provide detail below) validations: required: true From d72219c5869e4aea39ccb285eca09ccf55c599ee Mon Sep 17 00:00:00 2001 From: reyesj2 <94730068+reyesj2@users.noreply.github.com> Date: Fri, 22 May 2026 09:59:17 -0500 Subject: [PATCH 24/43] use multiple or combined input --- salt/manager/tools/sbin/soup | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/salt/manager/tools/sbin/soup b/salt/manager/tools/sbin/soup index 3bec13716..46785be3c 100755 --- a/salt/manager/tools/sbin/soup +++ b/salt/manager/tools/sbin/soup @@ -1139,7 +1139,7 @@ verify_es_version_compatibility() { while IFS= read -r heavynode_minion; do [[ -z "$heavynode_minion" ]] && continue - if ! echo "$HEAVYNODE_ES_VERSIONS" | jq -e --arg minion "$heavynode_minion" 'has($minion)' > /dev/null; then + if ! echo "$HEAVYNODE_ES_VERSIONS" | jq -se --arg minion "$heavynode_minion" 'add | has($minion)' > /dev/null; then echo "Heavynode $heavynode_minion did not report an Elasticsearch version. It may be offline or still upgrading." all_heavynodes_compatible=false fi From c0272ddb81a196648d93f9cbc7c1ee7a687e2562 Mon Sep 17 00:00:00 2001 From: Mike Reeves Date: Tue, 26 May 2026 09:24:10 -0400 Subject: [PATCH 25/43] Add version number to HOTFIX file --- HOTFIX | 1 + 1 file changed, 1 insertion(+) diff --git a/HOTFIX b/HOTFIX index e69de29bb..cb8d8ec41 100644 --- a/HOTFIX +++ b/HOTFIX @@ -0,0 +1 @@ +20260526 From 473f93f0ee20e2c307455b9c5e639a044d448927 Mon Sep 17 00:00:00 2001 From: reyesj2 <94730068+reyesj2@users.noreply.github.com> Date: Wed, 27 May 2026 09:33:15 -0500 Subject: [PATCH 26/43] check for stale logstash pipeline name in pillars --- salt/manager/tools/sbin/soup | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/salt/manager/tools/sbin/soup b/salt/manager/tools/sbin/soup index 46785be3c..58cbbac43 100755 --- a/salt/manager/tools/sbin/soup +++ b/salt/manager/tools/sbin/soup @@ -1506,7 +1506,23 @@ EOF # Keeping this block in case we need to do a hotfix that requires salt update apply_hotfix() { - echo "No actions required. ($INSTALLEDVERSION/$HOTFIXVERSION)" + if [[ "$INSTALLEDVERSION" == "3.1.0" ]] ; then + echo "Checking for conflicting logstash defined_pipelines pillar value." + local LOGSTASH_FILE=/opt/so/saltstack/local/pillar/logstash/soc_logstash.sls + local MINIONDIR=/opt/so/saltstack/local/pillar/minions + + for pillar_file in "$LOGSTASH_FILE" "$MINIONDIR"/*.sls; do + [[ -f "$pillar_file" ]] || continue + if grep -q 'so/0013_input_lumberjack_fleet.conf$' "$pillar_file"; then + echo "Found conflicting defined_pipeline pillar value in $pillar_file. Updating to use the new logstash pipeline name." + sed -i 's#so/0013_input_lumberjack_fleet\.conf$#so/0013_input_lumberjack_fleet.conf.jinja#g' "$pillar_file" + chown socore:socore "$pillar_file" + fi + + done + else + echo "No actions required. ($INSTALLEDVERSION/$HOTFIXVERSION)" + fi } failed_soup_restore_items() { From 0834998cca219394c49ea8116284285bef20b228 Mon Sep 17 00:00:00 2001 From: reyesj2 <94730068+reyesj2@users.noreply.github.com> Date: Wed, 27 May 2026 09:52:29 -0500 Subject: [PATCH 27/43] usuable for next soup --- salt/manager/tools/sbin/soup | 31 ++++++++++++++++++------------- 1 file changed, 18 insertions(+), 13 deletions(-) diff --git a/salt/manager/tools/sbin/soup b/salt/manager/tools/sbin/soup index 58cbbac43..6f4b936dd 100755 --- a/salt/manager/tools/sbin/soup +++ b/salt/manager/tools/sbin/soup @@ -533,6 +533,23 @@ elasticfleet_set_agent_logging_level_warn() { done <<< "$policies_to_update" } +update_logstash_pipeline_name() { + local original_pipeline_name="$1" + local new_pipeline_name="$2" + + echo "Checking for conflicting logstash defined_pipelines pillar value." + local LOGSTASH_FILE=/opt/so/saltstack/local/pillar/logstash/soc_logstash.sls + local MINIONDIR=/opt/so/saltstack/local/pillar/minions + for pillar_file in "$LOGSTASH_FILE" "$MINIONDIR"/*.sls; do + [[ -f "$pillar_file" ]] || continue + if grep -q "$original_pipeline_name$" "$pillar_file"; then + echo "Found conflicting defined_pipeline pillar value in $pillar_file. Updating to use the new logstash pipeline name." + sed -i "s#$original_pipeline_name\$#$new_pipeline_name#g" "$pillar_file" + chown socore:socore "$pillar_file" + fi + done +} + check_transform_health_and_reauthorize() { . /usr/sbin/so-elastic-fleet-common @@ -1507,19 +1524,7 @@ EOF # Keeping this block in case we need to do a hotfix that requires salt update apply_hotfix() { if [[ "$INSTALLEDVERSION" == "3.1.0" ]] ; then - echo "Checking for conflicting logstash defined_pipelines pillar value." - local LOGSTASH_FILE=/opt/so/saltstack/local/pillar/logstash/soc_logstash.sls - local MINIONDIR=/opt/so/saltstack/local/pillar/minions - - for pillar_file in "$LOGSTASH_FILE" "$MINIONDIR"/*.sls; do - [[ -f "$pillar_file" ]] || continue - if grep -q 'so/0013_input_lumberjack_fleet.conf$' "$pillar_file"; then - echo "Found conflicting defined_pipeline pillar value in $pillar_file. Updating to use the new logstash pipeline name." - sed -i 's#so/0013_input_lumberjack_fleet\.conf$#so/0013_input_lumberjack_fleet.conf.jinja#g' "$pillar_file" - chown socore:socore "$pillar_file" - fi - - done + update_logstash_pipeline_name "so/0013_input_lumberjack_fleet.conf" "so/0013_input_lumberjack_fleet.conf.jinja" else echo "No actions required. ($INSTALLEDVERSION/$HOTFIXVERSION)" fi From 0b4a4de609e8f55c63d79e91f68b6bfc4e1d1a60 Mon Sep 17 00:00:00 2001 From: reyesj2 <94730068+reyesj2@users.noreply.github.com> Date: Wed, 27 May 2026 12:21:22 -0500 Subject: [PATCH 28/43] always run logstash pipeline rename --- salt/manager/tools/sbin/soup | 1 + 1 file changed, 1 insertion(+) diff --git a/salt/manager/tools/sbin/soup b/salt/manager/tools/sbin/soup index 6f4b936dd..473ef79c5 100755 --- a/salt/manager/tools/sbin/soup +++ b/salt/manager/tools/sbin/soup @@ -701,6 +701,7 @@ up_to_3.1.0() { # Clear existing component template state file. rm -f /opt/so/state/esfleet_component_templates.json rename_strelka_scan_lnk + update_logstash_pipeline_name "so/0013_input_lumberjack_fleet.conf" "so/0013_input_lumberjack_fleet.conf.jinja" INSTALLEDVERSION=3.1.0 } From bf609a112eed5001ec73745d6ea00fdda8de4267 Mon Sep 17 00:00:00 2001 From: reyesj2 <94730068+reyesj2@users.noreply.github.com> Date: Wed, 27 May 2026 12:21:44 -0500 Subject: [PATCH 29/43] LF --- HOTFIX | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/HOTFIX b/HOTFIX index cb8d8ec41..8d594f290 100644 --- a/HOTFIX +++ b/HOTFIX @@ -1 +1 @@ -20260526 +20260526 From 79987f3659ab554b975e31461ed20a21d009383d Mon Sep 17 00:00:00 2001 From: Josh Patterson Date: Wed, 27 May 2026 13:55:30 -0400 Subject: [PATCH 30/43] bootstrap so-soc db in postgres during soup --- salt/manager/tools/sbin/soup | 35 ++++++++++++++++++++++- salt/postgres/telegraf_users.sls | 18 ++---------- salt/postgres/tools/sbin/so-postgres-wait | 32 +++++++++++++++++++++ 3 files changed, 68 insertions(+), 17 deletions(-) create mode 100644 salt/postgres/tools/sbin/so-postgres-wait diff --git a/salt/manager/tools/sbin/soup b/salt/manager/tools/sbin/soup index 3bec13716..05f58b9a5 100755 --- a/salt/manager/tools/sbin/soup +++ b/salt/manager/tools/sbin/soup @@ -370,8 +370,9 @@ preupgrade_changes() { # This function is to add any new pillar items if needed. echo "Checking to see if changes are needed." - [[ "$INSTALLEDVERSION" =~ ^2\.4\.21[0-9]+$ ]] && up_to_3.0.0 + [[ "$INSTALLEDVERSION" =~ ^2\.4\.21[0-9]+$ ]] && up_to_3.0.0 [[ "$INSTALLEDVERSION" == "3.0.0" ]] && up_to_3.1.0 + [[ "$INSTALLEDVERSION" == "3.1.0" ]] && up_to_3.2.0 true } @@ -381,6 +382,7 @@ postupgrade_changes() { [[ "$POSTVERSION" =~ ^2\.4\.21[0-9]+$ ]] && post_to_3.0.0 [[ "$POSTVERSION" == "3.0.0" ]] && post_to_3.1.0 + [[ "$POSTVERSION" == "3.1.0" ]] && post_to_3.2.0 true } @@ -720,6 +722,37 @@ post_to_3.1.0() { ### 3.1.0 End ### +### 3.2.0 Scripts ### + +bootstrap_so_soc_database() { + # init-db.sh is mounted into so-postgres at /docker-entrypoint-initdb.d/init-db.sh + # and runs automatically only on a fresh data directory. Hosts upgrading from + # 3.1.0 already have /nsm/postgres populated, so the so_soc bootstrap block + # added in 3.2 never fires. Re-run the script explicitly; it's idempotent. + echo "Bootstrapping so_soc database via init-db.sh." + if ! /usr/sbin/so-postgres-wait; then + FINAL_MESSAGE_QUEUE+=("WARNING: so-postgres was not ready during the 3.2.0 upgrade; the so_soc database may not have been bootstrapped. Re-run manually: docker exec so-postgres bash /docker-entrypoint-initdb.d/init-db.sh") + return 0 + fi + if ! docker exec so-postgres bash /docker-entrypoint-initdb.d/init-db.sh; then + FINAL_MESSAGE_QUEUE+=("WARNING: init-db.sh failed inside so-postgres during the 3.2.0 upgrade; the so_soc database may not have been bootstrapped. Re-run manually: docker exec so-postgres bash /docker-entrypoint-initdb.d/init-db.sh") + return 0 + fi + echo "so_soc bootstrap complete." +} + +up_to_3.2.0() { + INSTALLEDVERSION=3.2.0 +} + +post_to_3.2.0() { + bootstrap_so_soc_database + + POSTVERSION=3.2.0 +} + +### 3.2.0 End ### + repo_sync() { echo "Sync the local repo." diff --git a/salt/postgres/telegraf_users.sls b/salt/postgres/telegraf_users.sls index 28d9d6247..5e3566a95 100644 --- a/salt/postgres/telegraf_users.sls +++ b/salt/postgres/telegraf_users.sls @@ -18,26 +18,12 @@ include: {% set TG_OUT = TELEGRAFMERGED.output | upper %} {% if TG_OUT in ['POSTGRES', 'BOTH'] %} -# docker_container.running returns as soon as the container starts, but on -# first-init docker-entrypoint.sh starts a temporary postgres with -# `listen_addresses=''` to run /docker-entrypoint-initdb.d scripts, then -# shuts it down before exec'ing the real CMD. A default pg_isready check -# (Unix socket) passes during that ephemeral phase and races the shutdown -# with "the database system is shutting down". Checking TCP readiness on -# 127.0.0.1 only succeeds after the final postgres binds the port. postgres_wait_ready: cmd.run: - - name: | - for i in $(seq 1 60); do - if docker exec so-postgres pg_isready -h 127.0.0.1 -U postgres -q 2>/dev/null; then - exit 0 - fi - sleep 2 - done - echo "so-postgres did not accept TCP connections within 120s" >&2 - exit 1 + - name: /usr/sbin/so-postgres-wait - require: - docker_container: so-postgres + - file: postgres_sbin # Ensure the shared Telegraf database exists. init-db.sh only runs on a # fresh data dir, so hosts upgraded onto an existing /nsm/postgres volume diff --git a/salt/postgres/tools/sbin/so-postgres-wait b/salt/postgres/tools/sbin/so-postgres-wait new file mode 100644 index 000000000..7c4c8ce92 --- /dev/null +++ b/salt/postgres/tools/sbin/so-postgres-wait @@ -0,0 +1,32 @@ +#!/bin/bash + +# Copyright Security Onion Solutions LLC and/or licensed to Security Onion Solutions LLC under one +# or more contributor license agreements. Licensed under the Elastic License 2.0 as shown at +# https://securityonion.net/license; you may not use this file except in compliance with the +# Elastic License 2.0. + +# Wait for the so-postgres container to accept TCP connections. +# +# docker_container.running returns as soon as the container starts, but on +# first-init docker-entrypoint.sh starts a temporary postgres with +# `listen_addresses=''` to run /docker-entrypoint-initdb.d scripts, then +# shuts it down before exec'ing the real CMD. A default pg_isready check +# (Unix socket) passes during that ephemeral phase and races the shutdown +# with "the database system is shutting down". Checking TCP readiness on +# 127.0.0.1 only succeeds after the final postgres binds the port. +# +# Usage: so-postgres-wait [iterations] [sleep_seconds] +# Default: 60 iterations, 2s sleep (~120s total). + +ITERATIONS=${1:-60} +SLEEP_SECONDS=${2:-2} + +for i in $(seq 1 "$ITERATIONS"); do + if docker exec so-postgres pg_isready -h 127.0.0.1 -U postgres -q 2>/dev/null; then + exit 0 + fi + sleep "$SLEEP_SECONDS" +done + +echo "so-postgres did not accept TCP connections within $((ITERATIONS * SLEEP_SECONDS))s" >&2 +exit 1 From 613eca52fcd800570e444d271202429531c731cd Mon Sep 17 00:00:00 2001 From: reyesj2 <94730068+reyesj2@users.noreply.github.com> Date: Wed, 27 May 2026 13:24:10 -0500 Subject: [PATCH 31/43] update hotfix date --- HOTFIX | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/HOTFIX b/HOTFIX index 8d594f290..70406bf9d 100644 --- a/HOTFIX +++ b/HOTFIX @@ -1 +1 @@ -20260526 +20260528 From b2a82fec29b25a5718c7a903f38402af905e2a4a Mon Sep 17 00:00:00 2001 From: reyesj2 <94730068+reyesj2@users.noreply.github.com> Date: Wed, 27 May 2026 13:24:23 -0500 Subject: [PATCH 32/43] fix_logstash_0013_lumberjack_pipeline_name Before removing from apply_hotfix function first verify that older installs < 3.1.0 are still upgradable when referencing 'so/0013_input_lumberjack_fleet.conf' via pillar. Failure to do so will prevent logstash from starting --- salt/manager/tools/sbin/soup | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/salt/manager/tools/sbin/soup b/salt/manager/tools/sbin/soup index 473ef79c5..ba76d2a3e 100755 --- a/salt/manager/tools/sbin/soup +++ b/salt/manager/tools/sbin/soup @@ -693,6 +693,10 @@ rename_strelka_scan_lnk() { rm -f "$TMP_VALUE_FILE" } +fix_logstash_0013_lumberjack_pipeline_name() { + update_logstash_pipeline_name "so/0013_input_lumberjack_fleet.conf" "so/0013_input_lumberjack_fleet.conf.jinja" +} + up_to_3.1.0() { ensure_postgres_local_pillar ensure_postgres_secret @@ -701,7 +705,7 @@ up_to_3.1.0() { # Clear existing component template state file. rm -f /opt/so/state/esfleet_component_templates.json rename_strelka_scan_lnk - update_logstash_pipeline_name "so/0013_input_lumberjack_fleet.conf" "so/0013_input_lumberjack_fleet.conf.jinja" + fix_logstash_0013_lumberjack_pipeline_name INSTALLEDVERSION=3.1.0 } @@ -1525,7 +1529,9 @@ EOF # Keeping this block in case we need to do a hotfix that requires salt update apply_hotfix() { if [[ "$INSTALLEDVERSION" == "3.1.0" ]] ; then - update_logstash_pipeline_name "so/0013_input_lumberjack_fleet.conf" "so/0013_input_lumberjack_fleet.conf.jinja" + # Do not remove this fix_logstash_0013_lumberjack_pipeline_name in future hotfixes without first validating older + # installs referencing "so/0013_input_lumberjack_fleet.conf" via pillar are upgradable + fix_logstash_0013_lumberjack_pipeline_name else echo "No actions required. ($INSTALLEDVERSION/$HOTFIXVERSION)" fi From 93ffce98d71d332e7743056b0d36f3aacbbab24d Mon Sep 17 00:00:00 2001 From: Josh Patterson Date: Wed, 27 May 2026 15:07:25 -0400 Subject: [PATCH 33/43] add onionconfig and postgres modules to soc config --- salt/soc/defaults.yaml | 6 ++++++ salt/soc/merged.map.jinja | 7 +++++++ salt/soc/soc_soc.yaml | 20 ++++++++++++++++++++ 3 files changed, 33 insertions(+) diff --git a/salt/soc/defaults.yaml b/salt/soc/defaults.yaml index cc80758fc..62b451bec 100644 --- a/salt/soc/defaults.yaml +++ b/salt/soc/defaults.yaml @@ -1519,6 +1519,12 @@ soc: serviceAccountJSON: "" serviceAccountLocation: "" healthTimeoutSeconds: 5 + onionconfig: + saltstackDir: /opt/so/saltstack + bypassEnabled: false + postgres: + host: + password: salt: queueDir: /opt/sensoroni/queue timeoutMs: 45000 diff --git a/salt/soc/merged.map.jinja b/salt/soc/merged.map.jinja index 349937983..b34efb11d 100644 --- a/salt/soc/merged.map.jinja +++ b/salt/soc/merged.map.jinja @@ -16,6 +16,13 @@ {% do SOCMERGED.config.server.update({'additionalCA': MANAGERMERGED.additionalCA}) %} {% do SOCMERGED.config.server.update({'insecureSkipVerify': MANAGERMERGED.insecureSkipVerify}) %} +{% if not SOCMERGED.config.server.modules.postgres.host %} +{% do SOCMERGED.config.server.modules.postgres.update({'host': GLOBALS.manager}) %} +{% endif %} +{% if not SOCMERGED.config.server.modules.postgres.password %} +{% do SOCMERGED.config.server.modules.postgres.update({'password': salt['pillar.get']('secrets:postgres_pass', '')}) %} +{% endif %} + {# if SOCMERGED.config.server.modules.cases == httpcase details come from the soc pillar #} {% if SOCMERGED.config.server.modules.cases != 'soc' %} {% do SOCMERGED.config.server.modules.elastic.update({'casesEnabled': false}) %} diff --git a/salt/soc/soc_soc.yaml b/salt/soc/soc_soc.yaml index 647bdd778..3cb244eed 100644 --- a/salt/soc/soc_soc.yaml +++ b/salt/soc/soc_soc.yaml @@ -453,6 +453,26 @@ soc: description: Duration (in milliseconds) that must elapse after a grid node fails to check-in before the node will be marked offline (fault). global: True advanced: True + onionconfig: + saltstackDir: + description: Root directory containing the SaltStack tree that SOC reads and writes configuration from. Should not be changed under normal circumstances. + global: True + advanced: True + bypassEnabled: + description: When enabled, errors encountered while reading the SaltStack pillar tree (missing files, unreadable directories, etc.) are logged but do not prevent SOC from starting or serving settings. Intended for advanced troubleshooting and recovery scenarios when the pillar tree is partially unreadable. + global: True + advanced: True + forcedType: bool + postgres: + host: + description: Hostname or IP address of the PostgreSQL server used by SOC. Defaults to the manager hostname. + global: True + advanced: True + password: + description: Password used by SOC to authenticate to the PostgreSQL server. Defaults to the postgres superuser password seeded in the secrets pillar. + global: True + sensitive: True + advanced: True salt: longRelayTimeoutMs: description: Duration (in milliseconds) to wait for a response from the Salt API when executing tasks known for being long running before giving up and showing an error on the SOC UI. From bb8ae91d91936d84dbf1e61617e8bba59c66a9f8 Mon Sep 17 00:00:00 2001 From: Josh Patterson Date: Wed, 27 May 2026 16:39:52 -0400 Subject: [PATCH 34/43] fix so-soc postgres bootstrap --- salt/manager/tools/sbin/soup | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/salt/manager/tools/sbin/soup b/salt/manager/tools/sbin/soup index 05f58b9a5..c31891f1d 100755 --- a/salt/manager/tools/sbin/soup +++ b/salt/manager/tools/sbin/soup @@ -730,12 +730,17 @@ bootstrap_so_soc_database() { # 3.1.0 already have /nsm/postgres populated, so the so_soc bootstrap block # added in 3.2 never fires. Re-run the script explicitly; it's idempotent. echo "Bootstrapping so_soc database via init-db.sh." + # The postgres image has no USER directive, so `docker exec` defaults to + # root, and the container env intentionally omits POSTGRES_USER (the upstream + # entrypoint defaults it transiently during first-init only). Recreate both + # so psql inside init-db.sh resolves the connect user correctly. + local exec_cmd="docker exec -u postgres -e POSTGRES_USER=postgres so-postgres bash /docker-entrypoint-initdb.d/init-db.sh" if ! /usr/sbin/so-postgres-wait; then - FINAL_MESSAGE_QUEUE+=("WARNING: so-postgres was not ready during the 3.2.0 upgrade; the so_soc database may not have been bootstrapped. Re-run manually: docker exec so-postgres bash /docker-entrypoint-initdb.d/init-db.sh") + FINAL_MESSAGE_QUEUE+=("WARNING: so-postgres was not ready during the 3.2.0 upgrade; the so_soc database may not have been bootstrapped. Re-run manually: $exec_cmd") return 0 fi - if ! docker exec so-postgres bash /docker-entrypoint-initdb.d/init-db.sh; then - FINAL_MESSAGE_QUEUE+=("WARNING: init-db.sh failed inside so-postgres during the 3.2.0 upgrade; the so_soc database may not have been bootstrapped. Re-run manually: docker exec so-postgres bash /docker-entrypoint-initdb.d/init-db.sh") + if ! $exec_cmd; then + FINAL_MESSAGE_QUEUE+=("WARNING: init-db.sh failed inside so-postgres during the 3.2.0 upgrade; the so_soc database may not have been bootstrapped. Re-run manually: $exec_cmd") return 0 fi echo "so_soc bootstrap complete." From 5abd6de4b55b2c3c8965620bdbcc371a944da3cb Mon Sep 17 00:00:00 2001 From: Mike Reeves Date: Thu, 28 May 2026 09:34:17 -0400 Subject: [PATCH 35/43] 3.1.0 hotfix --- DOWNLOAD_AND_VERIFY_ISO.md | 22 +++++++++++----------- sigs/securityonion-3.1.0-20260528.iso.sig | Bin 0 -> 566 bytes 2 files changed, 11 insertions(+), 11 deletions(-) create mode 100644 sigs/securityonion-3.1.0-20260528.iso.sig diff --git a/DOWNLOAD_AND_VERIFY_ISO.md b/DOWNLOAD_AND_VERIFY_ISO.md index a0ea874fa..bae49c4ac 100644 --- a/DOWNLOAD_AND_VERIFY_ISO.md +++ b/DOWNLOAD_AND_VERIFY_ISO.md @@ -1,17 +1,17 @@ -### 3.1.0-20260521 ISO image released on 2026/05/21 +### 3.1.0-20260528 ISO image released on 2026/05/28 ### Download and Verify -3.1.0-20260521 ISO image: -https://download.securityonion.net/file/securityonion/securityonion-3.1.0-20260521.iso +3.1.0-20260528 ISO image: +https://download.securityonion.net/file/securityonion/securityonion-3.1.0-20260528.iso -MD5: A853BC118639ABCE1795D6E313BFFBDE -SHA1: FCA615AD6E31710B33AE5870FEF447861FDB3B8F -SHA256: CE2A5947274D9ED2C5068A1FD46B64C4FEF70445EA9B61A98DD3621781329F2C +MD5: 9D6FF58DEEE24089D722C73169765B3E +SHA1: 2B8B816B6CEC3B7F96B3C5E040EBF502DD2C412F +SHA256: 62FAB57E247C843D6A04F0796D8162C732B65D82FC3E4A59D087135B9FD32912 Signature for ISO image: -https://github.com/Security-Onion-Solutions/securityonion/raw/3/main/sigs/securityonion-3.1.0-20260521.iso.sig +https://github.com/Security-Onion-Solutions/securityonion/raw/3/main/sigs/securityonion-3.1.0-20260528.iso.sig Signing key: https://raw.githubusercontent.com/Security-Onion-Solutions/securityonion/3/main/KEYS @@ -25,22 +25,22 @@ wget https://raw.githubusercontent.com/Security-Onion-Solutions/securityonion/3/ Download the signature file for the ISO: ``` -wget https://github.com/Security-Onion-Solutions/securityonion/raw/3/main/sigs/securityonion-3.1.0-20260521.iso.sig +wget https://github.com/Security-Onion-Solutions/securityonion/raw/3/main/sigs/securityonion-3.1.0-20260528.iso.sig ``` Download the ISO image: ``` -wget https://download.securityonion.net/file/securityonion/securityonion-3.1.0-20260521.iso +wget https://download.securityonion.net/file/securityonion/securityonion-3.1.0-20260528.iso ``` Verify the downloaded ISO image using the signature file: ``` -gpg --verify securityonion-3.1.0-20260521.iso.sig securityonion-3.1.0-20260521.iso +gpg --verify securityonion-3.1.0-20260528.iso.sig securityonion-3.1.0-20260528.iso ``` The output should show "Good signature" and the Primary key fingerprint should match what's shown below: ``` -gpg: Signature made Thu 21 May 2026 11:10:01 AM EDT using RSA key ID FE507013 +gpg: Signature made Wed 27 May 2026 03:03:59 PM EDT using RSA key ID FE507013 gpg: Good signature from "Security Onion Solutions, LLC " gpg: WARNING: This key is not certified with a trusted signature! gpg: There is no indication that the signature belongs to the owner. diff --git a/sigs/securityonion-3.1.0-20260528.iso.sig b/sigs/securityonion-3.1.0-20260528.iso.sig new file mode 100644 index 0000000000000000000000000000000000000000..e4bead44df3b87b3f8f037867841feada61581a2 GIT binary patch literal 566 zcmV-60?GY}0y6{v0SEvc79j-41gSkXz6^6dp_W8^5Ma0dP;e6k0%{jP9{>so5PT3| zxBgIY6Ei^%|6UnlNTps}bX?WU@Vh_C?-M|n}<~50wVGXXGoD8W3KDy z=ZYPL6fKH1GNVpuJ^)}-r--D30lb_cgD%wZnXQ45n7fB0fT`F(k_%Xy#}A(Ws^Obh|St0AeMB6z&!7o&T7 zp=tatlM-=K)1EHl3Cb$hB&|O52e!UHZNL=;?pHa#16K$n|AWd z)vl}W%vr`V#&5!~(`lP#V(2C>vGbH>2=4ggFFj*5pMZ|+vES?u+77OpxzU$otkQX@ zLtD9EAjr>rTD3OA7lYK3XqlWNjt#DJMaCg{V$BvxKCYi|fd5UPD=|I-2Sv&aPF-v8 zHAWRBPEKp**n0iwqd8Dv@x0>_Yw1N&%nj4r+waMCN&|`*2dOB!(-wW;NvXPPB(Dov zm Date: Thu, 28 May 2026 10:24:47 -0400 Subject: [PATCH 36/43] Remove outdated HOTFIX version number --- HOTFIX | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/HOTFIX b/HOTFIX index 70406bf9d..8b1378917 100644 --- a/HOTFIX +++ b/HOTFIX @@ -1 +1 @@ -20260528 + From 86edc5aaba451d571d2cb68f703745ca0d0d5843 Mon Sep 17 00:00:00 2001 From: Jason Ertel Date: Thu, 28 May 2026 22:57:59 -0400 Subject: [PATCH 37/43] version --- VERSION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/VERSION b/VERSION index 944880fa1..03e153fda 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -3.2.0 +3.0.0-kilo From 68a82a425b155ea0dca567278990bc90635504c5 Mon Sep 17 00:00:00 2001 From: Jason Ertel Date: Sat, 30 May 2026 08:12:50 -0400 Subject: [PATCH 38/43] fix version --- VERSION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/VERSION b/VERSION index 03e153fda..944880fa1 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -3.0.0-kilo +3.2.0 From 79da9f9f2c011dbfb868cc02071b703666b3afb1 Mon Sep 17 00:00:00 2001 From: reyesj2 <94730068+reyesj2@users.noreply.github.com> Date: Mon, 1 Jun 2026 11:26:52 -0500 Subject: [PATCH 39/43] check if there is a version or hotfix to upgrade to before verifiying elasticsearch compatibility --- salt/manager/tools/sbin/soup | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/salt/manager/tools/sbin/soup b/salt/manager/tools/sbin/soup index ba76d2a3e..eb7818cf1 100755 --- a/salt/manager/tools/sbin/soup +++ b/salt/manager/tools/sbin/soup @@ -1606,11 +1606,12 @@ main() { echo "Verifying we have the latest soup script." verify_latest_update_script + echo "Let's see if we need to update Security Onion." + upgrade_check + echo "Verifying Elasticsearch version compatibility across the grid before upgrading." verify_es_version_compatibility - echo "Let's see if we need to update Security Onion." - upgrade_check upgrade_space echo "Checking for Salt Master and Minion updates." From 3c533cccbce84fab75b6c96ef69933816c3bcd77 Mon Sep 17 00:00:00 2001 From: reyesj2 <94730068+reyesj2@users.noreply.github.com> Date: Mon, 1 Jun 2026 11:28:59 -0500 Subject: [PATCH 40/43] and after free space check --- salt/manager/tools/sbin/soup | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/salt/manager/tools/sbin/soup b/salt/manager/tools/sbin/soup index eb7818cf1..a62a39b40 100755 --- a/salt/manager/tools/sbin/soup +++ b/salt/manager/tools/sbin/soup @@ -1608,12 +1608,11 @@ main() { echo "Let's see if we need to update Security Onion." upgrade_check + upgrade_space echo "Verifying Elasticsearch version compatibility across the grid before upgrading." verify_es_version_compatibility - upgrade_space - echo "Checking for Salt Master and Minion updates." upgrade_check_salt set -e From f2996fb888c5db9bd9740ae0cc5a922330b8ea6b Mon Sep 17 00:00:00 2001 From: reyesj2 <94730068+reyesj2@users.noreply.github.com> Date: Mon, 1 Jun 2026 11:52:35 -0500 Subject: [PATCH 41/43] use so-config-backup script in soup --- salt/backup/tools/sbin/so-config-backup.jinja | 6 ++++-- salt/manager/tools/sbin/soup | 14 ++++---------- 2 files changed, 8 insertions(+), 12 deletions(-) diff --git a/salt/backup/tools/sbin/so-config-backup.jinja b/salt/backup/tools/sbin/so-config-backup.jinja index 7f65bbba3..8d214e665 100755 --- a/salt/backup/tools/sbin/so-config-backup.jinja +++ b/salt/backup/tools/sbin/so-config-backup.jinja @@ -25,9 +25,11 @@ if [ ! -f $BACKUPFILE ]; then # Create empty backup file tar -cf $BACKUPFILE -T /dev/null - # Loop through all paths defined in global.sls, and append them to backup file + # Loop through all paths defined in global.sls, and append them to backup file if they exist {%- for LOCATION in BACKUPLOCATIONS %} - tar -rf $BACKUPFILE "${EXCLUSIONS[@]}" {{ LOCATION }} + if [[ -d {{ LOCATION }} || -f {{ LOCATION }} ]]; then + tar -rf $BACKUPFILE "${EXCLUSIONS[@]}" {{ LOCATION }} + fi {%- endfor %} fi diff --git a/salt/manager/tools/sbin/soup b/salt/manager/tools/sbin/soup index 135c51276..7874bf7b2 100755 --- a/salt/manager/tools/sbin/soup +++ b/salt/manager/tools/sbin/soup @@ -188,13 +188,6 @@ airgap_update_dockers() { fi } -backup_old_states_pillars() { - - tar czf /nsm/backup/$(echo $INSTALLEDVERSION)_$(date +%Y%m%d-%H%M%S)_soup_default_states_pillars.tar.gz /opt/so/saltstack/default/ - tar czf /nsm/backup/$(echo $INSTALLEDVERSION)_$(date +%Y%m%d-%H%M%S)_soup_local_states_pillars.tar.gz /opt/so/saltstack/local/ - -} - update_registry() { docker stop so-dockerregistry docker rm so-dockerregistry @@ -1670,7 +1663,8 @@ main() { echo "Applying $HOTFIXVERSION hotfix" # since we don't run the backup.config_backup state on import we wont snapshot previous version states and pillars if [[ ! "$MINION_ROLE" == "import" ]]; then - backup_old_states_pillars + echo "Running so-config-backup script." + /sbin/so-config-backup fi copy_new_files create_local_directories "/opt/so/saltstack/default" @@ -1726,8 +1720,8 @@ main() { # since we don't run the backup.config_backup state on import we wont snapshot previous version states and pillars if [[ ! "$MINION_ROLE" == "import" ]]; then echo "" - echo "Creating snapshots of default and local Salt states and pillars and saving to /nsm/backup/" - backup_old_states_pillars + echo "Running so-config-backup script." + /sbin/so-config-backup fi echo "" From f9c2579261b0785024b0ba09a33329e1a96642e2 Mon Sep 17 00:00:00 2001 From: reyesj2 <94730068+reyesj2@users.noreply.github.com> Date: Mon, 1 Jun 2026 12:04:33 -0500 Subject: [PATCH 42/43] remove logstash pipeline rename from hotfix moving to up_to_3.2.0 --- salt/manager/tools/sbin/soup | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/salt/manager/tools/sbin/soup b/salt/manager/tools/sbin/soup index 135c51276..ede301648 100755 --- a/salt/manager/tools/sbin/soup +++ b/salt/manager/tools/sbin/soup @@ -769,6 +769,8 @@ bootstrap_so_soc_database() { } up_to_3.2.0() { + fix_logstash_0013_lumberjack_pipeline_name + INSTALLEDVERSION=3.2.0 } @@ -1566,13 +1568,7 @@ EOF # Keeping this block in case we need to do a hotfix that requires salt update apply_hotfix() { - if [[ "$INSTALLEDVERSION" == "3.1.0" ]] ; then - # Do not remove this fix_logstash_0013_lumberjack_pipeline_name in future hotfixes without first validating older - # installs referencing "so/0013_input_lumberjack_fleet.conf" via pillar are upgradable - fix_logstash_0013_lumberjack_pipeline_name - else - echo "No actions required. ($INSTALLEDVERSION/$HOTFIXVERSION)" - fi + echo "No actions required. ($INSTALLEDVERSION/$HOTFIXVERSION)" } failed_soup_restore_items() { From 559465b40783ca3baa7265b1ac4dcd71e0ea32c9 Mon Sep 17 00:00:00 2001 From: reyesj2 <94730068+reyesj2@users.noreply.github.com> Date: Mon, 1 Jun 2026 12:05:09 -0500 Subject: [PATCH 43/43] run elastic agent gen installers script in post_to_3.2.0 --- salt/manager/tools/sbin/soup | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/salt/manager/tools/sbin/soup b/salt/manager/tools/sbin/soup index ede301648..1416f2ba3 100755 --- a/salt/manager/tools/sbin/soup +++ b/salt/manager/tools/sbin/soup @@ -777,6 +777,10 @@ up_to_3.2.0() { post_to_3.2.0() { bootstrap_so_soc_database + # Including agent regen script here since it was missed in post_to_3.1.0 + echo "Regenerating Elastic Agent Installers" + /sbin/so-elastic-agent-gen-installers + POSTVERSION=3.2.0 }