From 05f6503d61b9faae6d9f8587e1c11725f3407ca4 Mon Sep 17 00:00:00 2001 From: Mike Reeves Date: Tue, 21 Apr 2026 10:05:08 -0400 Subject: [PATCH] Gate postgres telegraf fan-out on reactor-provided minion id MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit postgres.auth was running an `unless` shell check per up-minion on every manager highstate, even when nothing had changed — N fork+python starts of so-yaml.py add up on large grids. The work is only needed when a specific minion's key is accepted. - salt/postgres/auth.sls: fan out only when postgres_fanout_minion pillar is set (targets that single minion). Manager highstates with no pillar take a zero-N code path. - salt/reactor/telegraf_user_sync.sls: re-pass the accepted minion id as postgres_fanout_minion to the orch. - salt/orch/telegraf_postgres_sync.sls: forward the pillar to the salt.state invocation so the state render sees it. - salt/manager/tools/sbin/soup: for the one-time 3.1.0 backfill, drop the per-minion state.apply and do an in-shell loop over the minion pillar files using so-yaml.py directly. Skips minions that already have postgres.telegraf.user set. --- salt/manager/tools/sbin/soup | 28 +++++++++++++++++++++++----- salt/orch/telegraf_postgres_sync.sls | 6 ++++++ salt/postgres/auth.sls | 26 +++++++++++--------------- salt/reactor/telegraf_user_sync.sls | 2 ++ 4 files changed, 42 insertions(+), 20 deletions(-) diff --git a/salt/manager/tools/sbin/soup b/salt/manager/tools/sbin/soup index 0adffef86..c19fe487e 100755 --- a/salt/manager/tools/sbin/soup +++ b/salt/manager/tools/sbin/soup @@ -490,14 +490,32 @@ up_to_3.1.0() { post_to_3.1.0() { /usr/sbin/so-kibana-space-defaults - # Provision per-minion Telegraf Postgres users for every minion known to the - # manager. postgres.auth iterates manage.up to generate any missing passwords; - # postgres.telegraf_users reconciles the roles and schemas inside the so-postgres - # container. Then push a telegraf state to every minion so their telegraf.conf - # picks up the new credentials on the first apply after soup. + # One-time backfill for minions that existed before the postgres Telegraf + # feature shipped. Generate the aggregate pillar on the manager and create + # the per-minion DB roles, then fan each minion's cred into its own pillar + # file. Going forward the reactor handles each new salt-key accept with a + # targeted fan-out, so a manager highstate no longer needs to iterate. echo "Provisioning Telegraf Postgres users for existing minions." salt-call --local state.apply postgres.auth,postgres.telegraf_users queue=True || true + AGGREGATE_PILLAR=/opt/so/saltstack/local/pillar/postgres/auth.sls + MINIONS_DIR=/opt/so/saltstack/local/pillar/minions + if [[ -f "$AGGREGATE_PILLAR" && -d "$MINIONS_DIR" ]]; then + for pillar_file in "$MINIONS_DIR"/*.sls; do + [[ -f "$pillar_file" ]] || continue + mid=$(basename "$pillar_file" .sls) + [[ "$mid" == adv_* ]] && continue + safe=$(echo "$mid" | tr '.-' '__' | tr '[:upper:]' '[:lower:]') + existing_user=$(so-yaml.py get -r "$pillar_file" postgres.telegraf.user 2>/dev/null || true) + [[ "$existing_user" == "so_telegraf_${safe}" ]] && continue + user=$(so-yaml.py get -r "$AGGREGATE_PILLAR" "postgres.auth.users.telegraf_${safe}.user" 2>/dev/null || true) + pass=$(so-yaml.py get -r "$AGGREGATE_PILLAR" "postgres.auth.users.telegraf_${safe}.pass" 2>/dev/null || true) + [[ -z "$user" || -z "$pass" ]] && continue + so-yaml.py replace "$pillar_file" postgres.telegraf.user "$user" >/dev/null + so-yaml.py replace "$pillar_file" postgres.telegraf.pass "$pass" >/dev/null + done + fi + POSTVERSION=3.1.0 } diff --git a/salt/orch/telegraf_postgres_sync.sls b/salt/orch/telegraf_postgres_sync.sls index 94be77137..5b11d1619 100644 --- a/salt/orch/telegraf_postgres_sync.sls +++ b/salt/orch/telegraf_postgres_sync.sls @@ -12,6 +12,8 @@ # Target the manager via role grains — same pattern as orch/delete_hypervisor.sls. # The reactor doesn't know the manager's minion id, and grains.master on the # runner is a hostname, not a targetable id. +{% set FANOUT_MINION = salt['pillar.get']('postgres_fanout_minion', '') %} + manager_sync_telegraf_pg_users: salt.state: - tgt: 'G@role:so-manager or G@role:so-managerhype or G@role:so-managersearch or G@role:so-standalone or G@role:so-eval' @@ -20,3 +22,7 @@ manager_sync_telegraf_pg_users: - postgres.auth - postgres.telegraf_users - queue: True + {% if FANOUT_MINION %} + - pillar: + postgres_fanout_minion: {{ FANOUT_MINION }} + {% endif %} diff --git a/salt/postgres/auth.sls b/salt/postgres/auth.sls index 44c89c581..e0397beba 100644 --- a/salt/postgres/auth.sls +++ b/salt/postgres/auth.sls @@ -50,13 +50,14 @@ postgres_auth_pillar: {% endfor %} - show_changes: False - {# Fan each minion's telegraf cred out to its own pillar file. The minions/ - .sls file is only served to that specific minion via pillar/top.sls - (`- minions.{{ grains.id }}`), so sensors, heavynodes, etc. see their own - credential without the admin password or anyone else's. Run per up-minion - so we have the original minion id (not just the safe-normalized version). #} - {% for mid in up_minions %} - {%- set safe = mid | replace('.','_') | replace('-','_') | lower %} + {# Fan a specific minion's telegraf cred out to its own pillar file. Only + runs when postgres_fanout_minion pillar is provided — otherwise this state + is a no-op. That keeps manager highstates from doing N so-yaml.py forks + when nothing changed. The reactor passes postgres_fanout_minion through + the orch on salt-key accept; soup handles bulk backfill separately. #} + {% set fanout_mid = salt['pillar.get']('postgres_fanout_minion') %} + {% if fanout_mid %} + {%- set safe = fanout_mid | replace('.','_') | replace('-','_') | lower %} {%- set key = 'telegraf_' ~ safe %} {%- set entry = telegraf_users.get(key) %} {%- if entry %} @@ -65,7 +66,7 @@ postgres_telegraf_minion_pillar_{{ safe }}: cmd.run: - name: | set -e - PILLAR_FILE=/opt/so/saltstack/local/pillar/minions/{{ mid }}.sls + PILLAR_FILE=/opt/so/saltstack/local/pillar/minions/{{ fanout_mid }}.sls if [ ! -f "$PILLAR_FILE" ]; then echo '{}' > "$PILLAR_FILE" chown socore:socore "$PILLAR_FILE" 2>/dev/null || true @@ -73,18 +74,13 @@ postgres_telegraf_minion_pillar_{{ safe }}: fi /usr/sbin/so-yaml.py replace "$PILLAR_FILE" postgres.telegraf.user '{{ entry.user }}' /usr/sbin/so-yaml.py replace "$PILLAR_FILE" postgres.telegraf.pass '{{ entry.pass }}' - {#- Skip if this minion's pillar file already carries a matching user. - Passwords are generated once per minion (see the `if key not in telegraf_users` - guard above) and never rotate, so once a cred is fanned out the file - doesn't need to be rewritten on subsequent auth runs. If we ever add - rotation, we'd need to delete postgres.telegraf to force a re-fan. #} - unless: | - [ "$(/usr/sbin/so-yaml.py get -r /opt/so/saltstack/local/pillar/minions/{{ mid }}.sls postgres.telegraf.user 2>/dev/null)" = '{{ entry.user }}' ] + [ "$(/usr/sbin/so-yaml.py get -r /opt/so/saltstack/local/pillar/minions/{{ fanout_mid }}.sls postgres.telegraf.user 2>/dev/null)" = '{{ entry.user }}' ] - require: - file: postgres_auth_pillar {%- endif %} - {% endfor %} + {% endif %} {% else %} {{sls}}_state_not_allowed: diff --git a/salt/reactor/telegraf_user_sync.sls b/salt/reactor/telegraf_user_sync.sls index 4830dbc53..075dbf62e 100644 --- a/salt/reactor/telegraf_user_sync.sls +++ b/salt/reactor/telegraf_user_sync.sls @@ -10,6 +10,8 @@ runner.state.orchestrate: - args: - mods: orch.telegraf_postgres_sync + - pillar: + postgres_fanout_minion: {{ data['id'] }} {% do salt.log.info('telegraf_user_sync reactor: syncing telegraf PG user for minion %s' % data['id']) %}