diff --git a/pillar/telegraf/creds.sls b/pillar/telegraf/creds.sls new file mode 100644 index 000000000..8521bfbd9 --- /dev/null +++ b/pillar/telegraf/creds.sls @@ -0,0 +1,12 @@ +# Copyright Security Onion Solutions LLC and/or licensed to Security Onion Solutions LLC under one +# or more contributor license agreements. Licensed under the Elastic License 2.0 as shown at +# https://securityonion.net/license; you may not use this file except in compliance with the +# Elastic License 2.0. + +# Per-minion Telegraf Postgres credentials. so-telegraf-cred on the manager is +# the single writer; it mutates /opt/so/saltstack/local/pillar/telegraf/creds.sls +# under flock. Pillar_roots order (local before default) means the populated +# copy shadows this default on any real grid; this file exists so the pillar +# key is always defined on fresh installs and when no minions have creds yet. +telegraf: + postgres_creds: {} diff --git a/pillar/top.sls b/pillar/top.sls index 808182c2b..712629dbf 100644 --- a/pillar/top.sls +++ b/pillar/top.sls @@ -17,6 +17,7 @@ base: - sensoroni.adv_sensoroni - telegraf.soc_telegraf - telegraf.adv_telegraf + - telegraf.creds - versionlock.soc_versionlock - versionlock.adv_versionlock - soc.license diff --git a/salt/manager/tools/sbin/so-minion b/salt/manager/tools/sbin/so-minion index 4095637c8..86bab25e6 100755 --- a/salt/manager/tools/sbin/so-minion +++ b/salt/manager/tools/sbin/so-minion @@ -281,22 +281,18 @@ function deleteMinionFiles () { fi } -# Remove this minion's postgres Telegraf credential from both the aggregate -# pillar and the postgres database. Paired with add_telegraf_to_minion: -# add/delete cycle both here and in the DB. Always returns 0 so a dead or -# unreachable so-postgres doesn't block minion deletion — in that case we +# Remove this minion's postgres Telegraf credential from the shared creds +# pillar and drop the matching role in Postgres. Always returns 0 so a dead +# or unreachable so-postgres doesn't block minion deletion — in that case we # log a warning and leave the role behind for manual cleanup. function remove_postgres_telegraf_from_minion() { local MINION_SAFE MINION_SAFE=$(echo "$MINION_ID" | tr '.-' '__' | tr '[:upper:]' '[:lower:]') local PG_USER="so_telegraf_${MINION_SAFE}" - local AGGREGATE=/opt/so/saltstack/local/pillar/postgres/auth.sls log "INFO" "Removing postgres telegraf cred for $MINION_ID" - if [[ -f "$AGGREGATE" ]]; then - so-yaml.py remove "$AGGREGATE" "postgres.auth.users.telegraf_${MINION_SAFE}" >/dev/null 2>&1 || true - fi + so-telegraf-cred remove "$MINION_ID" >/dev/null 2>&1 || true if docker ps --format '{{.Names}}' 2>/dev/null | grep -q '^so-postgres$'; then if ! docker exec -i so-postgres psql -v ON_ERROR_STOP=1 -U postgres -d so_telegraf >/dev/null 2>&1 <.{user,pass} into the aggregate - # pillar so postgres.telegraf_users CREATE ROLE finds it. - # - # An existing password is reused if the aggregate already has one (re-add), - # so rerunning so-minion for the same minion keeps the cred stable. - local MINION_SAFE - MINION_SAFE=$(echo "$MINION_ID" | tr '.-' '__' | tr '[:upper:]' '[:lower:]') - local PG_USER="so_telegraf_${MINION_SAFE}" - local AGGREGATE=/opt/so/saltstack/local/pillar/postgres/auth.sls - local PG_PASS="" - if [[ -f "$AGGREGATE" ]]; then - PG_PASS=$(so-yaml.py get -r "$AGGREGATE" "postgres.auth.users.telegraf_${MINION_SAFE}.pass" 2>/dev/null || true) - fi - if [[ -z "$PG_PASS" ]]; then - PG_PASS=$(tr -dc 'A-Za-z0-9~!@#^&*()_=+[]|;:,.<>?-' < /dev/urandom | head -c 72) - fi - - so-yaml.py replace "$PILLARFILE" postgres.telegraf.user "$PG_USER" >/dev/null - so-yaml.py replace "$PILLARFILE" postgres.telegraf.pass "$PG_PASS" >/dev/null - if [[ -f "$AGGREGATE" ]]; then - so-yaml.py replace "$AGGREGATE" "postgres.auth.users.telegraf_${MINION_SAFE}.user" "$PG_USER" >/dev/null - so-yaml.py replace "$AGGREGATE" "postgres.auth.users.telegraf_${MINION_SAFE}.pass" "$PG_PASS" >/dev/null + # Provision the per-minion postgres Telegraf credential in the shared + # telegraf/creds.sls pillar. so-telegraf-cred is the only writer; it + # generates a password on first add and is a no-op on re-add so the cred + # is stable across repeated so-minion runs. postgres.telegraf_users on the + # manager creates/updates the DB role from the same pillar. + so-telegraf-cred add "$MINION_ID" + if [ $? -ne 0 ]; then + log "ERROR" "Failed to provision postgres telegraf cred for $MINION_ID" + return 1 fi } diff --git a/salt/manager/tools/sbin/so-telegraf-cred b/salt/manager/tools/sbin/so-telegraf-cred new file mode 100644 index 000000000..b2b1ba030 --- /dev/null +++ b/salt/manager/tools/sbin/so-telegraf-cred @@ -0,0 +1,54 @@ +#!/bin/bash + +# Copyright Security Onion Solutions LLC and/or licensed to Security Onion Solutions LLC under one +# or more contributor license agreements. Licensed under the Elastic License 2.0 as shown at +# https://securityonion.net/license; you may not use this file except in compliance with the +# Elastic License 2.0. + +# Single writer for the Telegraf Postgres credentials pillar. Thin wrapper +# around so-yaml.py that generates a password on first add and no-ops on +# re-add so the cred is stable across repeated so-minion runs. +# +# Note: so-yaml.py splits keys on '.' with no escape. SO minion ids are +# dot-free by construction (setup/so-functions:1884 takes the short_name +# before the first '.'), so using the raw minion id as the key is safe. + +CREDS=/opt/so/saltstack/local/pillar/telegraf/creds.sls + +usage() { + echo "Usage: $0 " >&2 + exit 2 +} + +seed_creds_file() { + mkdir -p "$(dirname "$CREDS")" + if [[ ! -f "$CREDS" ]]; then + (umask 027 && printf 'telegraf:\n postgres_creds: {}\n' > "$CREDS") + chown socore:socore "$CREDS" 2>/dev/null || true + chmod 640 "$CREDS" + fi +} + +OP=$1 +MID=$2 +[[ -z "$OP" || -z "$MID" ]] && usage + +case "$OP" in + add) + SAFE=$(echo "$MID" | tr '.-' '__' | tr '[:upper:]' '[:lower:]') + seed_creds_file + if so-yaml.py get -r "$CREDS" "telegraf.postgres_creds.${MID}.user" >/dev/null 2>&1; then + exit 0 + fi + PASS=$(tr -dc 'A-Za-z0-9~!@#^&*()_=+[]|;:,.<>?-' < /dev/urandom | head -c 72) + so-yaml.py replace "$CREDS" "telegraf.postgres_creds.${MID}.user" "so_telegraf_${SAFE}" >/dev/null + so-yaml.py replace "$CREDS" "telegraf.postgres_creds.${MID}.pass" "$PASS" >/dev/null + ;; + remove) + [[ -f "$CREDS" ]] || exit 0 + so-yaml.py remove "$CREDS" "telegraf.postgres_creds.${MID}" >/dev/null 2>&1 || true + ;; + *) + usage + ;; +esac diff --git a/salt/manager/tools/sbin/soup b/salt/manager/tools/sbin/soup index 1580e83dd..2c727c0f7 100755 --- a/salt/manager/tools/sbin/soup +++ b/salt/manager/tools/sbin/soup @@ -490,32 +490,16 @@ up_to_3.1.0() { post_to_3.1.0() { /usr/sbin/so-kibana-space-defaults - # One-time backfill for minions that existed before the postgres Telegraf - # feature shipped. postgres.auth's up_minions fallback loop generates any - # missing aggregate pillar entries; postgres.telegraf_users CREATEs the - # matching DB roles; then the bash loop below copies each minion's cred - # into its own pillar file. Going forward, so-minion owns add/delete for - # every new minion, so this backfill is only needed on the upgrade boundary. - echo "Provisioning Telegraf Postgres users for existing minions." - salt-call --local state.apply postgres.auth,postgres.telegraf_users queue=True || true - - AGGREGATE_PILLAR=/opt/so/saltstack/local/pillar/postgres/auth.sls - MINIONS_DIR=/opt/so/saltstack/local/pillar/minions - if [[ -f "$AGGREGATE_PILLAR" && -d "$MINIONS_DIR" ]]; then - for pillar_file in "$MINIONS_DIR"/*.sls; do - [[ -f "$pillar_file" ]] || continue - mid=$(basename "$pillar_file" .sls) - [[ "$mid" == adv_* ]] && continue - safe=$(echo "$mid" | tr '.-' '__' | tr '[:upper:]' '[:lower:]') - existing_user=$(so-yaml.py get -r "$pillar_file" postgres.telegraf.user 2>/dev/null || true) - [[ "$existing_user" == "so_telegraf_${safe}" ]] && continue - user=$(so-yaml.py get -r "$AGGREGATE_PILLAR" "postgres.auth.users.telegraf_${safe}.user" 2>/dev/null || true) - pass=$(so-yaml.py get -r "$AGGREGATE_PILLAR" "postgres.auth.users.telegraf_${safe}.pass" 2>/dev/null || true) - [[ -z "$user" || -z "$pass" ]] && continue - so-yaml.py replace "$pillar_file" postgres.telegraf.user "$user" >/dev/null - so-yaml.py replace "$pillar_file" postgres.telegraf.pass "$pass" >/dev/null - done - fi + # Backfill the Telegraf creds pillar for every accepted minion. so-telegraf-cred + # add is idempotent — it no-ops when an entry already exists — so this is safe + # to run on every soup. The subsequent state.apply creates/updates the matching + # Postgres roles from the reconciled pillar. + echo "Reconciling Telegraf Postgres creds for accepted minions." + for mid in $(salt-key --out=json --list=accepted 2>/dev/null | jq -r '.minions[]?' 2>/dev/null); do + [[ -n "$mid" ]] || continue + /usr/sbin/so-telegraf-cred add "$mid" || echo " warning: so-telegraf-cred add $mid failed" >&2 + done + salt-call --local state.apply postgres.telegraf_users queue=True || true POSTVERSION=3.1.0 } diff --git a/salt/orch/deploy_newnode.sls b/salt/orch/deploy_newnode.sls index c05a812a3..ee241ef33 100644 --- a/salt/orch/deploy_newnode.sls +++ b/salt/orch/deploy_newnode.sls @@ -25,8 +25,33 @@ manager_run_es_soc: - salt: {{NEWNODE}}_update_mine {% endif %} +# so-minion has already added the new minion's entry to telegraf/creds.sls +# via so-telegraf-cred before this orch fires. Reconcile the Postgres role +# on the manager so the new minion can authenticate on its first highstate, +# then refresh the minion's pillar so its telegraf.conf renders with the +# freshly-written cred. +manager_create_postgres_telegraf_role: + salt.state: + - tgt: {{ MANAGER }} + - sls: + - postgres.telegraf_users + - queue: True + - require: + - salt: {{NEWNODE}}_update_mine + +{{NEWNODE}}_refresh_pillar: + salt.function: + - name: saltutil.refresh_pillar + - tgt: {{ NEWNODE }} + - kwarg: + wait: True + - require: + - salt: manager_create_postgres_telegraf_role + {{NEWNODE}}_run_highstate: salt.state: - tgt: {{ NEWNODE }} - highstate: True - queue: True + - require: + - salt: {{NEWNODE}}_refresh_pillar diff --git a/salt/postgres/auth.sls b/salt/postgres/auth.sls index 3da1bcde0..4f486ff02 100644 --- a/salt/postgres/auth.sls +++ b/salt/postgres/auth.sls @@ -13,24 +13,8 @@ {% set CHARS = DIGITS~LOWERCASE~UPPERCASE~SYMBOLS %} {% set so_postgres_user_pass = salt['pillar.get']('postgres:auth:users:so_postgres_user:pass', salt['random.get_str'](72, chars=CHARS)) %} - {# Per-minion Telegraf Postgres credentials. Merge currently-up minions with any #} - {# previously-known entries in pillar so existing passwords persist across runs. #} - {% set existing = salt['pillar.get']('postgres:auth:users', {}) %} - {% set up_minions = salt['saltutil.runner']('manage.up') or [] %} - {% set telegraf_users = {} %} - {% for key, entry in existing.items() %} - {%- if key.startswith('telegraf_') and entry.get('user') and entry.get('pass') %} - {%- do telegraf_users.update({key: entry}) %} - {%- endif %} - {% endfor %} - {% for mid in up_minions %} - {%- set safe = mid | replace('.','_') | replace('-','_') | lower %} - {%- set key = 'telegraf_' ~ safe %} - {%- if key not in telegraf_users %} - {%- do telegraf_users.update({key: {'user': 'so_telegraf_' ~ safe, 'pass': salt['random.get_str'](72, chars=CHARS)}}) %} - {%- endif %} - {% endfor %} - +# Admin cred only. Per-minion Telegraf creds live in telegraf/creds.sls, +# managed by /usr/sbin/so-telegraf-cred (called from so-minion). postgres_auth_pillar: file.managed: - name: /opt/so/saltstack/local/pillar/postgres/auth.sls @@ -43,11 +27,6 @@ postgres_auth_pillar: so_postgres_user: user: so_postgres pass: "{{ so_postgres_user_pass }}" - {% for key, entry in telegraf_users.items() %} - {{ key }}: - user: {{ entry.user }} - pass: "{{ entry.pass }}" - {% endfor %} - show_changes: False {% else %} diff --git a/salt/postgres/telegraf_users.sls b/salt/postgres/telegraf_users.sls index dbbc0f03e..62490ea52 100644 --- a/salt/postgres/telegraf_users.sls +++ b/salt/postgres/telegraf_users.sls @@ -10,7 +10,7 @@ {# postgres_wait_ready below requires `docker_container: so-postgres`, which is declared in postgres.enabled. Include it here so state.apply postgres.telegraf_users - on its own (from the reactor orch or from soup) still has that ID in scope. Salt + on its own (e.g. from orch.deploy_newnode) still has that ID in scope. Salt de-duplicates the circular include. #} include: - postgres.enabled @@ -96,9 +96,9 @@ postgres_telegraf_group_role: - require: - cmd: postgres_create_telegraf_db -{% set users = salt['pillar.get']('postgres:auth:users', {}) %} -{% for key, entry in users.items() %} -{% if key.startswith('telegraf_') and entry.get('user') and entry.get('pass') %} +{% set creds = salt['pillar.get']('telegraf:postgres_creds', {}) %} +{% for mid, entry in creds.items() %} +{% if entry.get('user') and entry.get('pass') %} {% set u = entry.user %} {% set p = entry.pass | replace("'", "''") %} diff --git a/salt/telegraf/etc/telegraf.conf b/salt/telegraf/etc/telegraf.conf index 53b96e4ab..02d969ff3 100644 --- a/salt/telegraf/etc/telegraf.conf +++ b/salt/telegraf/etc/telegraf.conf @@ -10,12 +10,12 @@ {%- set LOGSTASH_ENABLED = LOGSTASH_MERGED.enabled %} {%- set TG_OUT = TELEGRAFMERGED.output | upper %} {%- set PG_HOST = GLOBALS.manager_ip %} -{#- Per-minion telegraf creds are written into the minion's own pillar file - (/opt/so/saltstack/local/pillar/minions/.sls) by postgres.auth on the - manager. Each minion only sees its own password — the aggregate map in - postgres:auth:users is manager-scoped. #} -{%- set PG_USER = salt['pillar.get']('postgres:telegraf:user', '') %} -{%- set PG_PASS = salt['pillar.get']('postgres:telegraf:pass', '') %} +{#- Per-minion telegraf creds live in the grid-wide telegraf/creds.sls pillar, + written by /usr/sbin/so-telegraf-cred on the manager. Each minion looks up + its own entry by grains.id. #} +{%- set PG_ENTRY = salt['pillar.get']('telegraf:postgres_creds:' ~ grains.id, {}) %} +{%- set PG_USER = PG_ENTRY.get('user', '') %} +{%- set PG_PASS = PG_ENTRY.get('pass', '') %} # Global tags can be specified here in key="value" format. [global_tags] role = "{{ GLOBALS.role.split('-') | last }}"