Merge pull request #15749 from Security-Onion-Solutions/feature/postgres

Add so-postgres Salt states and infrastructure
This commit is contained in:
Mike Reeves
2026-04-28 10:15:47 -04:00
committed by GitHub
50 changed files with 1417 additions and 10 deletions
+46 -1
View File
@@ -273,7 +273,7 @@ function deleteMinionFiles () {
log "ERROR" "Failed to delete $PILLARFILE"
return 1
fi
rm -f $ADVPILLARFILE
if [ $? -ne 0 ]; then
log "ERROR" "Failed to delete $ADVPILLARFILE"
@@ -281,6 +281,39 @@ function deleteMinionFiles () {
fi
}
# Remove this minion's postgres Telegraf credential from the shared creds
# pillar and drop the matching role in Postgres. Always returns 0 so a dead
# or unreachable so-postgres doesn't block minion deletion — in that case we
# log a warning and leave the role behind for manual cleanup.
function remove_postgres_telegraf_from_minion() {
local MINION_SAFE
MINION_SAFE=$(echo "$MINION_ID" | tr '.-' '__' | tr '[:upper:]' '[:lower:]')
local PG_USER="so_telegraf_${MINION_SAFE}"
log "INFO" "Removing postgres telegraf cred for $MINION_ID"
so-telegraf-cred remove "$MINION_ID" >/dev/null 2>&1 || true
if docker ps --format '{{.Names}}' 2>/dev/null | grep -q '^so-postgres$'; then
if ! docker exec -i so-postgres psql -v ON_ERROR_STOP=1 -U postgres -d so_telegraf >/dev/null 2>&1 <<EOSQL
DO \$\$
BEGIN
IF EXISTS (SELECT FROM pg_catalog.pg_roles WHERE rolname = '$PG_USER') THEN
EXECUTE format('REASSIGN OWNED BY %I TO so_telegraf', '$PG_USER');
EXECUTE format('DROP OWNED BY %I', '$PG_USER');
EXECUTE format('DROP ROLE %I', '$PG_USER');
END IF;
END
\$\$;
EOSQL
then
log "WARN" "Failed to drop postgres role $PG_USER; pillar entry was removed — drop manually if the role persists"
fi
else
log "WARN" "so-postgres container is not running; skipping DB role cleanup for $PG_USER"
fi
}
# Create the minion file
function ensure_socore_ownership() {
log "INFO" "Setting socore ownership on minion files"
@@ -542,6 +575,17 @@ function add_telegraf_to_minion() {
log "ERROR" "Failed to add telegraf configuration to $PILLARFILE"
return 1
fi
# Provision the per-minion postgres Telegraf credential in the shared
# telegraf/creds.sls pillar. so-telegraf-cred is the only writer; it
# generates a password on first add and is a no-op on re-add so the cred
# is stable across repeated so-minion runs. postgres.telegraf_users on the
# manager creates/updates the DB role from the same pillar.
so-telegraf-cred add "$MINION_ID"
if [ $? -ne 0 ]; then
log "ERROR" "Failed to provision postgres telegraf cred for $MINION_ID"
return 1
fi
}
function add_influxdb_to_minion() {
@@ -1069,6 +1113,7 @@ case "$OPERATION" in
"delete")
log "INFO" "Removing minion $MINION_ID"
remove_postgres_telegraf_from_minion
deleteMinionFiles || {
log "ERROR" "Failed to delete minion files for $MINION_ID"
exit 1
+54
View File
@@ -0,0 +1,54 @@
#!/bin/bash
# Copyright Security Onion Solutions LLC and/or licensed to Security Onion Solutions LLC under one
# or more contributor license agreements. Licensed under the Elastic License 2.0 as shown at
# https://securityonion.net/license; you may not use this file except in compliance with the
# Elastic License 2.0.
# Single writer for the Telegraf Postgres credentials pillar. Thin wrapper
# around so-yaml.py that generates a password on first add and no-ops on
# re-add so the cred is stable across repeated so-minion runs.
#
# Note: so-yaml.py splits keys on '.' with no escape. SO minion ids are
# dot-free by construction (setup/so-functions:1884 takes the short_name
# before the first '.'), so using the raw minion id as the key is safe.
CREDS=/opt/so/saltstack/local/pillar/telegraf/creds.sls
usage() {
echo "Usage: $0 <add|remove> <minion_id>" >&2
exit 2
}
seed_creds_file() {
mkdir -p "$(dirname "$CREDS")" || return 1
if [[ ! -f "$CREDS" ]]; then
(umask 027 && printf 'telegraf:\n postgres_creds: {}\n' > "$CREDS") || return 1
chown socore:socore "$CREDS" 2>/dev/null || true
chmod 640 "$CREDS" || return 1
fi
}
OP=$1
MID=$2
[[ -z "$OP" || -z "$MID" ]] && usage
case "$OP" in
add)
SAFE=$(echo "$MID" | tr '.-' '__' | tr '[:upper:]' '[:lower:]')
seed_creds_file || exit 1
if so-yaml.py get -r "$CREDS" "telegraf.postgres_creds.${MID}.user" >/dev/null 2>&1; then
exit 0
fi
PASS=$(tr -dc 'A-Za-z0-9~!@#^&*()_=+[]|;:,.<>?-' < /dev/urandom | head -c 72)
so-yaml.py replace "$CREDS" "telegraf.postgres_creds.${MID}.user" "so_telegraf_${SAFE}" >/dev/null
so-yaml.py replace "$CREDS" "telegraf.postgres_creds.${MID}.pass" "$PASS" >/dev/null
;;
remove)
[[ -f "$CREDS" ]] || exit 0
so-yaml.py remove "$CREDS" "telegraf.postgres_creds.${MID}" >/dev/null 2>&1 || true
;;
*)
usage
;;
esac
+12 -4
View File
@@ -39,9 +39,16 @@ def showUsage(args):
def loadYaml(filename):
file = open(filename, "r")
content = file.read()
return yaml.safe_load(content)
try:
with open(filename, "r") as file:
content = file.read()
return yaml.safe_load(content)
except FileNotFoundError:
print(f"File not found: {filename}", file=sys.stderr)
sys.exit(1)
except Exception as e:
print(f"Error reading file {filename}: {e}", file=sys.stderr)
sys.exit(1)
def writeYaml(filename, content):
@@ -285,7 +292,8 @@ def add(args):
def removeKey(content, key):
pieces = key.split(".", 1)
if len(pieces) > 1:
removeKey(content[pieces[0]], pieces[1])
if pieces[0] in content:
removeKey(content[pieces[0]], pieces[1])
else:
content.pop(key, None)
+18
View File
@@ -973,3 +973,21 @@ class TestReplaceListObject(unittest.TestCase):
expected = "key1:\n- id: '1'\n status: updated\n- id: '2'\n status: inactive\n"
self.assertEqual(actual, expected)
class TestLoadYaml(unittest.TestCase):
def test_load_yaml_missing_file(self):
with patch('sys.exit', new=MagicMock()) as sysmock:
with patch('sys.stderr', new=StringIO()) as mock_stderr:
soyaml.loadYaml("/tmp/so-yaml_test-does-not-exist.yaml")
sysmock.assert_called_with(1)
self.assertIn("File not found:", mock_stderr.getvalue())
def test_load_yaml_read_error(self):
with patch('sys.exit', new=MagicMock()) as sysmock:
with patch('sys.stderr', new=StringIO()) as mock_stderr:
with patch('builtins.open', side_effect=PermissionError("denied")):
soyaml.loadYaml("/tmp/so-yaml_test-unreadable.yaml")
sysmock.assert_called_with(1)
self.assertIn("Error reading file", mock_stderr.getvalue())
+51
View File
@@ -485,7 +485,44 @@ elasticsearch_backup_index_templates() {
tar -czf /nsm/backup/3.0.0_elasticsearch_index_templates.tar.gz -C /opt/so/conf/elasticsearch/templates/index/ .
}
ensure_postgres_local_pillar() {
# Postgres was added as a service after 3.0.0, so the new pillar/top.sls
# references postgres.soc_postgres / postgres.adv_postgres unconditionally.
# Managers upgrading from 3.0.0 have no /opt/so/saltstack/local/pillar/postgres/
# (make_some_dirs only runs at install time), so the stubs must be created
# here before salt-master restarts against the new top.sls.
echo "Ensuring postgres local pillar stubs exist."
local dir=/opt/so/saltstack/local/pillar/postgres
mkdir -p "$dir"
[[ -f "$dir/soc_postgres.sls" ]] || touch "$dir/soc_postgres.sls"
[[ -f "$dir/adv_postgres.sls" ]] || touch "$dir/adv_postgres.sls"
chown -R socore:socore "$dir"
}
ensure_postgres_secret() {
# On a fresh install, generate_passwords + secrets_pillar seed
# secrets:postgres_pass in /opt/so/saltstack/local/pillar/secrets.sls. That
# code path is skipped on upgrade (secrets.sls already exists from 3.0.0
# with import_pass/influx_pass but no postgres_pass), so the postgres
# container's POSTGRES_PASSWORD_FILE and SOC's PG_ADMIN_PASS would be empty
# after highstate. Generate one now if missing.
local secrets_file=/opt/so/saltstack/local/pillar/secrets.sls
if [[ ! -f "$secrets_file" ]]; then
echo "WARNING: $secrets_file missing; skipping postgres_pass backfill."
return 0
fi
if so-yaml.py get -r "$secrets_file" secrets.postgres_pass >/dev/null 2>&1; then
echo "secrets.postgres_pass already set; leaving as-is."
return 0
fi
echo "Seeding secrets.postgres_pass in $secrets_file."
so-yaml.py add "$secrets_file" secrets.postgres_pass "$(get_random_value)"
chown socore:socore "$secrets_file"
}
up_to_3.1.0() {
ensure_postgres_local_pillar
ensure_postgres_secret
determine_elastic_agent_upgrade
elasticsearch_backup_index_templates
# Clear existing component template state file.
@@ -502,6 +539,20 @@ post_to_3.1.0() {
salt-call state.apply salt.cloud.config concurrent=True
fi
# Backfill the Telegraf creds pillar for every accepted minion. so-telegraf-cred
# add is idempotent — it no-ops when an entry already exists — so this is safe
# to run on every soup. The subsequent state.apply creates/updates the matching
# Postgres roles from the reconciled pillar.
echo "Reconciling Telegraf Postgres creds for accepted minions."
for mid in $(salt-key --out=json --list=accepted 2>/dev/null | jq -r '.minions[]?' 2>/dev/null); do
[[ -n "$mid" ]] || continue
/usr/sbin/so-telegraf-cred add "$mid" || echo " warning: so-telegraf-cred add $mid failed" >&2
done
# Run through the master (not --local) so state compilation uses the
# master's configured file_roots; the manager's /etc/salt/minion has no
# file_roots of its own and --local would fail with "No matching sls found".
salt-call state.apply postgres.telegraf_users queue=True || true
POSTVERSION=3.1.0
}