Gate postgres telegraf fan-out on reactor-provided minion id

postgres.auth was running an `unless` shell check per up-minion on every
manager highstate, even when nothing had changed — N fork+python starts
of so-yaml.py add up on large grids. The work is only needed when a
specific minion's key is accepted.

- salt/postgres/auth.sls: fan out only when postgres_fanout_minion
  pillar is set (targets that single minion). Manager highstates with
  no pillar take a zero-N code path.
- salt/reactor/telegraf_user_sync.sls: re-pass the accepted minion id
  as postgres_fanout_minion to the orch.
- salt/orch/telegraf_postgres_sync.sls: forward the pillar to the
  salt.state invocation so the state render sees it.
- salt/manager/tools/sbin/soup: for the one-time 3.1.0 backfill, drop
  the per-minion state.apply and do an in-shell loop over the minion
  pillar files using so-yaml.py directly. Skips minions that already
  have postgres.telegraf.user set.
This commit is contained in:
Mike Reeves
2026-04-21 10:05:08 -04:00
parent a149ea7e8f
commit 05f6503d61
4 changed files with 42 additions and 20 deletions
+23 -5
View File
@@ -490,14 +490,32 @@ up_to_3.1.0() {
post_to_3.1.0() {
/usr/sbin/so-kibana-space-defaults
# Provision per-minion Telegraf Postgres users for every minion known to the
# manager. postgres.auth iterates manage.up to generate any missing passwords;
# postgres.telegraf_users reconciles the roles and schemas inside the so-postgres
# container. Then push a telegraf state to every minion so their telegraf.conf
# picks up the new credentials on the first apply after soup.
# One-time backfill for minions that existed before the postgres Telegraf
# feature shipped. Generate the aggregate pillar on the manager and create
# the per-minion DB roles, then fan each minion's cred into its own pillar
# file. Going forward the reactor handles each new salt-key accept with a
# targeted fan-out, so a manager highstate no longer needs to iterate.
echo "Provisioning Telegraf Postgres users for existing minions."
salt-call --local state.apply postgres.auth,postgres.telegraf_users queue=True || true
AGGREGATE_PILLAR=/opt/so/saltstack/local/pillar/postgres/auth.sls
MINIONS_DIR=/opt/so/saltstack/local/pillar/minions
if [[ -f "$AGGREGATE_PILLAR" && -d "$MINIONS_DIR" ]]; then
for pillar_file in "$MINIONS_DIR"/*.sls; do
[[ -f "$pillar_file" ]] || continue
mid=$(basename "$pillar_file" .sls)
[[ "$mid" == adv_* ]] && continue
safe=$(echo "$mid" | tr '.-' '__' | tr '[:upper:]' '[:lower:]')
existing_user=$(so-yaml.py get -r "$pillar_file" postgres.telegraf.user 2>/dev/null || true)
[[ "$existing_user" == "so_telegraf_${safe}" ]] && continue
user=$(so-yaml.py get -r "$AGGREGATE_PILLAR" "postgres.auth.users.telegraf_${safe}.user" 2>/dev/null || true)
pass=$(so-yaml.py get -r "$AGGREGATE_PILLAR" "postgres.auth.users.telegraf_${safe}.pass" 2>/dev/null || true)
[[ -z "$user" || -z "$pass" ]] && continue
so-yaml.py replace "$pillar_file" postgres.telegraf.user "$user" >/dev/null
so-yaml.py replace "$pillar_file" postgres.telegraf.pass "$pass" >/dev/null
done
fi
POSTVERSION=3.1.0
}
+6
View File
@@ -12,6 +12,8 @@
# Target the manager via role grains — same pattern as orch/delete_hypervisor.sls.
# The reactor doesn't know the manager's minion id, and grains.master on the
# runner is a hostname, not a targetable id.
{% set FANOUT_MINION = salt['pillar.get']('postgres_fanout_minion', '') %}
manager_sync_telegraf_pg_users:
salt.state:
- tgt: 'G@role:so-manager or G@role:so-managerhype or G@role:so-managersearch or G@role:so-standalone or G@role:so-eval'
@@ -20,3 +22,7 @@ manager_sync_telegraf_pg_users:
- postgres.auth
- postgres.telegraf_users
- queue: True
{% if FANOUT_MINION %}
- pillar:
postgres_fanout_minion: {{ FANOUT_MINION }}
{% endif %}
+11 -15
View File
@@ -50,13 +50,14 @@ postgres_auth_pillar:
{% endfor %}
- show_changes: False
{# Fan each minion's telegraf cred out to its own pillar file. The minions/
<id>.sls file is only served to that specific minion via pillar/top.sls
(`- minions.{{ grains.id }}`), so sensors, heavynodes, etc. see their own
credential without the admin password or anyone else's. Run per up-minion
so we have the original minion id (not just the safe-normalized version). #}
{% for mid in up_minions %}
{%- set safe = mid | replace('.','_') | replace('-','_') | lower %}
{# Fan a specific minion's telegraf cred out to its own pillar file. Only
runs when postgres_fanout_minion pillar is provided — otherwise this state
is a no-op. That keeps manager highstates from doing N so-yaml.py forks
when nothing changed. The reactor passes postgres_fanout_minion through
the orch on salt-key accept; soup handles bulk backfill separately. #}
{% set fanout_mid = salt['pillar.get']('postgres_fanout_minion') %}
{% if fanout_mid %}
{%- set safe = fanout_mid | replace('.','_') | replace('-','_') | lower %}
{%- set key = 'telegraf_' ~ safe %}
{%- set entry = telegraf_users.get(key) %}
{%- if entry %}
@@ -65,7 +66,7 @@ postgres_telegraf_minion_pillar_{{ safe }}:
cmd.run:
- name: |
set -e
PILLAR_FILE=/opt/so/saltstack/local/pillar/minions/{{ mid }}.sls
PILLAR_FILE=/opt/so/saltstack/local/pillar/minions/{{ fanout_mid }}.sls
if [ ! -f "$PILLAR_FILE" ]; then
echo '{}' > "$PILLAR_FILE"
chown socore:socore "$PILLAR_FILE" 2>/dev/null || true
@@ -73,18 +74,13 @@ postgres_telegraf_minion_pillar_{{ safe }}:
fi
/usr/sbin/so-yaml.py replace "$PILLAR_FILE" postgres.telegraf.user '{{ entry.user }}'
/usr/sbin/so-yaml.py replace "$PILLAR_FILE" postgres.telegraf.pass '{{ entry.pass }}'
{#- Skip if this minion's pillar file already carries a matching user.
Passwords are generated once per minion (see the `if key not in telegraf_users`
guard above) and never rotate, so once a cred is fanned out the file
doesn't need to be rewritten on subsequent auth runs. If we ever add
rotation, we'd need to delete postgres.telegraf to force a re-fan. #}
- unless: |
[ "$(/usr/sbin/so-yaml.py get -r /opt/so/saltstack/local/pillar/minions/{{ mid }}.sls postgres.telegraf.user 2>/dev/null)" = '{{ entry.user }}' ]
[ "$(/usr/sbin/so-yaml.py get -r /opt/so/saltstack/local/pillar/minions/{{ fanout_mid }}.sls postgres.telegraf.user 2>/dev/null)" = '{{ entry.user }}' ]
- require:
- file: postgres_auth_pillar
{%- endif %}
{% endfor %}
{% endif %}
{% else %}
{{sls}}_state_not_allowed:
+2
View File
@@ -10,6 +10,8 @@
runner.state.orchestrate:
- args:
- mods: orch.telegraf_postgres_sync
- pillar:
postgres_fanout_minion: {{ data['id'] }}
{% do salt.log.info('telegraf_user_sync reactor: syncing telegraf PG user for minion %s' % data['id']) %}