maxSubSessionTokens and maxDelegationDepth config settings

Merge pull request #15971 from Security-Onion-Solutions/delta
userid vs names
2026-06-16 07:08:41 +02:00 · 2026-06-15 11:40:01 -04:00 · 2026-06-15 15:28:03 +02:00 · 2026-06-12 11:18:59 -04:00 · 2026-06-11 14:36:03 -05:00 · 2026-06-11 08:22:14 -04:00
15 changed files with 256 additions and 11 deletions
@@ -101,6 +101,17 @@ so-elastic-fleet:
      - file: trusttheca
      - x509: etc_elasticfleet_key
      - x509: etc_elasticfleet_crt
+
+wait_for_so-elastic-fleet:
+  http.wait_for_successful_query:
+    - name: "https://localhost:8220/api/status"
+    - ssl: True
+    - verify_ssl: False
+    - status: 200
+    - wait_for: 300
+    - request_interval: 15
+    - require:
+      - docker_container: so-elastic-fleet
 {%   endif %}

 delete_so-elastic-fleet_so-status.disabled:
@@ -9,6 +9,7 @@

 include:
  - elasticfleet.config
+  - kibana.enabled

 # If enabled, automatically update Fleet Logstash Outputs
 {% if ELASTICFLEETMERGED.config.server.enable_auto_configuration %}
@@ -19,6 +20,8 @@ so-elastic-fleet-auto-configure-logstash-outputs:
    - retry:
        attempts: 4
        interval: 30
+    - require:
+      - http: wait_for_so-kibana
 {%   endif %}

 # If enabled, automatically update Fleet Server URLs & ES Connection
@@ -28,6 +31,8 @@ so-elastic-fleet-auto-configure-server-urls:
    - retry:
        attempts: 4
        interval: 30
+    - require:
+      - http: wait_for_so-kibana
 {% endif %}

 # Automatically update Fleet Server Elasticsearch URLs & Agent Artifact URLs
@@ -37,6 +42,8 @@ so-elastic-fleet-auto-configure-elasticsearch-urls:
    - retry:
        attempts: 4
        interval: 30
+    - require:
+      - http: wait_for_so-kibana

 so-elastic-fleet-auto-configure-artifact-urls:
  cmd.run:
@@ -44,6 +51,8 @@ so-elastic-fleet-auto-configure-artifact-urls:
    - retry:
        attempts: 4
        interval: 30
+    - require:
+      - http: wait_for_so-kibana

 so-elastic-fleet-package-statefile:
  file.managed:
@@ -55,7 +64,9 @@ so-elastic-fleet-package-upgrade:
    - name: /usr/sbin/so-elastic-fleet-package-upgrade
    - retry:
        attempts: 3
-        interval: 10
+        interval: 30
+    - require:
+      - http: wait_for_so-kibana
    - onchanges:
      - file: /opt/so/state/elastic_fleet_packages.txt

@@ -65,6 +76,8 @@ so-elastic-fleet-integrations:
    - retry:
        attempts: 3
        interval: 10
+    - require:
+      - http: wait_for_so-kibana

 so-elastic-agent-grid-upgrade:
  cmd.run:
@@ -72,6 +85,8 @@ so-elastic-agent-grid-upgrade:
    - retry:
        attempts: 12
        interval: 5
+    - require:
+      - http: wait_for_so-kibana

 so-elastic-fleet-integration-upgrade:
  cmd.run:
@@ -79,16 +94,22 @@ so-elastic-fleet-integration-upgrade:
    - retry:
        attempts: 3
        interval: 10
+    - require:
+      - http: wait_for_so-kibana

 {# Optional integrations script doesn't need the retries like so-elastic-fleet-integration-upgrade which loads the default integrations #}
 so-elastic-fleet-addon-integrations:
  cmd.run:
    - name: /usr/sbin/so-elastic-fleet-optional-integrations-load
+    - require:
+      - http: wait_for_so-kibana

 {% if ELASTICFLEETMERGED.config.defend_filters.enable_auto_configuration %}
 so-elastic-defend-manage-filters-file-watch:
  cmd.run:
    - name: python3 /sbin/so-elastic-defend-manage-filters.py -c /opt/so/conf/elasticsearch/curl.config -d /opt/so/conf/elastic-fleet/defend-exclusions/disabled-filters.yaml -i /nsm/securityonion-resources/event_filters/ -i /opt/so/conf/elastic-fleet/defend-exclusions/rulesets/custom-filters/ &>> /opt/so/log/elasticfleet/elastic-defend-manage-filters.log
+    - require:
+      - http: wait_for_so-kibana
    - onchanges:
      - file: elasticdefendcustom
      - file: elasticdefenddisabled
@@ -108,9 +108,12 @@ if [ ! -f /opt/so/state/eaintegrations.txt ]; then
  done

  # Only create the state file if all policies were created/updated successfully
-  if [[ "$RETURN_CODE" != "1" ]]; then
+  if [[ $RETURN_CODE -eq 0 ]]; then
    touch /opt/so/state/eaintegrations.txt
+  else
+    exit 1
  fi
 else
-  exit $RETURN_CODE
+  echo "Fleet integration policies already loaded."
+  exit 0
 fi
@@ -8,18 +8,33 @@

 . /usr/sbin/so-elastic-fleet-common

+PKG_LOAD_FAILURES=0
+PKG_LOAD_FAILURES_NAMES=()
+
 {%- for PACKAGE in SUPPORTED_PACKAGES %}
 echo "Upgrading {{ PACKAGE }} package..."
 if VERSION=$(elastic_fleet_package_latest_version_check "{{ PACKAGE }}"); then
    if ! elastic_fleet_package_install "{{ PACKAGE }}" "$VERSION"; then
-        # exit 1 on failure to upgrade a default package, allow salt to handle retries
-        echo -e "\nERROR: Failed to upgrade $PACKAGE to version: $VERSION"
-        exit 1
+        PKG_LOAD_FAILURES=$((PKG_LOAD_FAILURES + 1))
+        PKG_LOAD_FAILURES_NAMES+=("{{ PACKAGE }}")
    fi
 else
-    echo -e "\nERROR: Failed to get version information for integration $PACKAGE"
+    PKG_LOAD_FAILURES=$((PKG_LOAD_FAILURES + 1))
+    PKG_LOAD_FAILURES_NAMES+=("{{ PACKAGE }}")
 fi
 echo
 {%- endfor %}
+
+if [ $PKG_LOAD_FAILURES -gt 0 ]; then
+    echo "ERROR: Failed to upgrade $PKG_LOAD_FAILURES package(s):"
+    for PKG in "${PKG_LOAD_FAILURES_NAMES[@]}"; do
+        echo " - $PKG"
+    done
+    # exit 1 on failure to upgrade a default package, allow salt to handle retries
+    exit 1
+else
+    echo "Successfully upgraded all packages."
+fi
+
 echo
 /usr/sbin/so-elasticsearch-templates-load
@@ -32,7 +32,7 @@ so-kafka:
    - networks:
      - sobridge:
        - ipv4_address: {{ DOCKERMERGED.containers['so-kafka'].ip }}
-    - user: kafka
+    - user: "960"
    - environment:
        KAFKA_HEAP_OPTS: -Xmx2G -Xms1G
        KAFKA_OPTS: "-javaagent:/opt/jolokia/agents/jolokia-agent-jvm-javaagent.jar=port=8778,host={{ DOCKERMERGED.containers['so-kafka'].ip }},policyLocation=file:/opt/jolokia/jolokia.xml {%- if KAFKA_EXTERNAL_ACCESS %} -Djava.security.auth.login.config=/opt/kafka/config/kafka_server_jaas.conf {% endif -%}"
@@ -6,6 +6,7 @@
 {% from 'allowed_states.map.jinja' import allowed_states %}
 {% if sls.split('.')[0] in allowed_states %}
 {%   from 'docker/docker.map.jinja' import DOCKERMERGED %}
+{%   from 'elasticsearch/config.map.jinja' import ELASTICSEARCHMERGED %}
 {%   from 'vars/globals.map.jinja' import GLOBALS %}

 include:
@@ -17,7 +18,7 @@ so-kibana:
  docker_container.running:
    - image: {{ GLOBALS.registry_host }}:5000/{{ GLOBALS.image_repo }}/so-kibana:{{ GLOBALS.so_version }}
    - hostname: kibana
-    - user: kibana
+    - user: "932:0"
    - networks:
      - sobridge:
        - ipv4_address: {{ DOCKERMERGED.containers['so-kibana'].ip }}
@@ -60,6 +61,19 @@ so-kibana:
    - watch:
      - file: kibanaconfig

+wait_for_so-kibana:
+  http.wait_for_successful_query:
+    - name: "http://localhost:5601/api/status"
+    - username: 'so_elastic'
+    - password: '{{ ELASTICSEARCHMERGED.auth.users.so_elastic_user.pass }}'
+    - ssl: True
+    - verify_ssl: False
+    - status: 200
+    - wait_for: 300
+    - request_interval: 15
+    - require:
+      - docker_container: so-kibana
+
 delete_so-kibana_so-status.disabled:
  file.uncomment:
    - name: /opt/so/conf/so-status/so-status.conf
@@ -33,7 +33,7 @@ so-logstash:
    - networks:
      - sobridge:
        - ipv4_address: {{ DOCKERMERGED.containers['so-logstash'].ip }}
-    - user: logstash
+    - user: "931:0"
    - extra_hosts:
    {% for node in LOGSTASH_NODES %}
    {%   for hostname, ip in node.items() %}
@@ -0,0 +1,117 @@
+#!/bin/bash
+#
+# Copyright Security Onion Solutions LLC and/or licensed to Security Onion Solutions LLC under one
+# or more contributor license agreements. Licensed under the Elastic License 2.0 as shown at
+# https://securityonion.net/license; you may not use this file except in compliance with the
+# Elastic License 2.0.
+
+# Runs once per boot on managers (via so-boot-mine-update.service), before
+# so-boot-highstate.service. Waits for the responsive minion set to settle, pushes
+# mine.update, waits until every up minion has actually reported to the mine, then
+# warms the master's per-minion pillar cache so the mine-backed node pillars (node
+# IPs, ES/Redis/Logstash/hypervisor discovery -- some glob- and some pillar/grain-
+# targeted) are complete before the boot highstate renders them. Otherwise a node
+# that is up but not yet fully reported gets dropped from those pillars and torn
+# out of the configs they build (e.g. so-elasticsearch ExtraHosts -> container recreate).
+
+MAX_WAIT=${MINE_UPDATE_MAX_WAIT:-180}   # hard backstop only
+INTERVAL=10
+STABLE_CHECKS=3                          # up-count must hold steady this many polls
+elapsed=0
+prev=-1
+stable=0
+up=0
+
+# Wait for the *reachable* minion set to settle rather than for every accepted
+# key to report up: an operator may accept a minion's key and then intentionally
+# power off that host, so requiring up >= accepted would never be satisfied and
+# we'd always burn the full MAX_WAIT. Once the responsive count stops growing we
+# stop waiting and run mine.update against whoever is up.
+while [ "$elapsed" -lt "$MAX_WAIT" ]; do
+  up=$(/usr/bin/salt-run manage.up --out=json 2>/dev/null \
+    | python3 -c 'import sys,json; print(len(json.load(sys.stdin)))' 2>/dev/null)
+  up=${up:-0}
+  if [ "$up" -gt 0 ] && [ "$up" -eq "$prev" ]; then
+    stable=$((stable + 1))
+    [ "$stable" -ge "$STABLE_CHECKS" ] && break
+  else
+    stable=0
+  fi
+  prev=$up
+  sleep "$INTERVAL"
+  elapsed=$((elapsed + INTERVAL))
+done
+
+echo "so-boot-mine-update: ${up} minions up (settled after ${elapsed}s); running mine.update"
+/usr/bin/salt '*' mine.update --out=txt
+
+# A node that is up but has not yet re-reported network.ip_addrs to the mine is
+# silently dropped from mine-backed pillars (elasticsearch:nodes, node_data, ...)
+# when highstate recompiles them -- which e.g. removes it from so-elasticsearch
+# ExtraHosts and forces a container recreate. After the broad mine.update above,
+# wait until every up minion actually has network.ip_addrs in the mine, re-pushing
+# mine.update to stragglers, before releasing the boot highstate. Bounded by the
+# same MAX_WAIT backstop so a slow/down node never blocks boot indefinitely.
+missing=""
+while [ "$elapsed" -lt "$MAX_WAIT" ]; do
+  up_json=$(/usr/bin/salt-run manage.up --out=json 2>/dev/null)
+  mine_json=$(/usr/bin/salt-run mine.get '*' network.ip_addrs tgt_type=glob --out=json 2>/dev/null)
+  missing=$(printf '%s' "$up_json" | python3 -c '
+import sys, json
+up = set(json.load(sys.stdin) or [])
+mine = {k for k, v in (json.loads(sys.argv[1]) or {}).items() if v}
+print("\n".join(sorted(up - mine)))
+' "$mine_json" 2>/dev/null)
+  if [ -z "$missing" ]; then
+    echo "so-boot-mine-update: mine complete for all up minions after ${elapsed}s"
+    break
+  fi
+  echo "so-boot-mine-update: mine missing up minion(s): $(echo $missing); re-running mine.update"
+  for m in $missing; do /usr/bin/salt "$m" mine.update --out=txt; done
+  sleep "$INTERVAL"
+  elapsed=$((elapsed + INTERVAL))
+done
+[ -n "$missing" ] && echo "so-boot-mine-update: WARNING ${MAX_WAIT}s backstop hit; up minion(s) still absent from mine: $(echo $missing); highstate may drop them from configs"
+
+# The pillar/compound-targeted node pillars (elasticsearch:nodes, redis:nodes,
+# logstash:nodes, hypervisor:nodes) resolve their target against the master's
+# per-minion data cache (grains+pillar in .../minions/<id>/data.p), populated only
+# when a minion's pillar is (re)compiled -- separately from the mine. A freshly
+# booted node can be in the mine (glob/node_data sees it) yet absent from that
+# cache, so it is dropped from those pillars and from the configs they build (e.g.
+# so-elasticsearch ExtraHosts). Force a synchronous pillar refresh so the master
+# caches every up node's pillar; refresh_pillar wait=True returns only once the
+# pillar is recompiled (and thus cached for matching). Retry stragglers <= MAX_WAIT.
+echo "so-boot-mine-update: warming master pillar cache for pillar/grain-targeted node pillars"
+/usr/bin/salt '*' saltutil.refresh_pillar wait=True --out=txt
+missing=""
+while [ "$elapsed" -lt "$MAX_WAIT" ]; do
+  up_json=$(/usr/bin/salt-run manage.up --out=json 2>/dev/null)
+  cached_json=$(/usr/bin/salt-run cache.pillar tgt='*' --out=json 2>/dev/null)
+  missing=$(printf '%s' "$up_json" | python3 -c '
+import sys, json
+up = set(json.load(sys.stdin) or [])
+cached = {k for k, v in (json.loads(sys.argv[1]) or {}).items() if v}
+print("\n".join(sorted(up - cached)))
+' "$cached_json" 2>/dev/null)
+  if [ -z "$missing" ]; then
+    echo "so-boot-mine-update: pillar cache warm for all up minions after ${elapsed}s"
+    break
+  fi
+  echo "so-boot-mine-update: pillar not yet cached for: $(echo $missing); refreshing"
+  for m in $missing; do /usr/bin/salt "$m" saltutil.refresh_pillar wait=True --out=txt; done
+  sleep "$INTERVAL"
+  elapsed=$((elapsed + INTERVAL))
+done
+[ -n "$missing" ] && echo "so-boot-mine-update: WARNING ${MAX_WAIT}s backstop hit; pillar not cached for: $(echo $missing); pillar-targeted pillars may drop them"
+
+# Log what the mine-backed pillars render so the boot-time state is inspectable.
+/usr/bin/salt-call saltutil.refresh_pillar >/dev/null 2>&1
+sleep 2
+for key in node_data elasticsearch:nodes; do
+  rendered=$(/usr/bin/salt-call --out=json pillar.get "$key" 2>/dev/null \
+    | python3 -c 'import sys,json; print(json.dumps(json.load(sys.stdin).get("local"), indent=2, sort_keys=True))' 2>/dev/null)
+  echo "so-boot-mine-update: ${key} rendered as:"
+  echo "${rendered:-null}"
+done
+exit 0
@@ -14,6 +14,7 @@

 include:
  - salt.minion
+  - salt.master.boot_mine_update
 {%   if 'vrt' in salt['pillar.get']('features', []) %}
  - salt.cloud
  - salt.cloud.reactor_config_hypervisor
@@ -0,0 +1,29 @@
+# Copyright Security Onion Solutions LLC and/or licensed to Security Onion Solutions LLC under one
+# or more contributor license agreements. Licensed under the Elastic License 2.0 as shown at
+# https://securityonion.net/license; you may not use this file except in compliance with the
+# Elastic License 2.0.
+
+# Manages /etc/systemd/system/so-boot-mine-update.service, a manager-only
+# Type=oneshot unit that pushes `salt '*' mine.update` once per boot, ordered
+# before so-boot-highstate.service so mine-backed pillars (node IPs, ES/Redis/
+# Logstash discovery) are fresh before the boot highstate renders them.
+
+include:
+  - systemd.reload
+
+so_boot_mine_update_unit_file:
+  file.managed:
+    - name: /etc/systemd/system/so-boot-mine-update.service
+    - source: salt://salt/service/so-boot-mine-update.service
+    - onchanges_in:
+      - module: systemd_reload
+
+# Only enable once setup is complete. Until then the gate file is missing and
+# the unit's own ConditionPathExists would no-op it anyway.
+so_boot_mine_update_service:
+  service.enabled:
+    - name: so-boot-mine-update.service
+    - onlyif: test -e /opt/so/state/setup-complete
+    - require:
+      - file: so_boot_mine_update_unit_file
+      - module: systemd_reload
@@ -0,0 +1,15 @@
+[Unit]
+Description=Security Onion boot-time grid mine.update (managers, runs once per boot before highstate)
+After=salt-master.service salt-minion.service network-online.target
+Wants=network-online.target
+Requires=salt-master.service salt-minion.service
+Before=so-boot-highstate.service
+ConditionPathExists=/opt/so/state/setup-complete
+
+[Service]
+Type=oneshot
+RemainAfterExit=yes
+ExecStart=/usr/sbin/so-boot-mine-update
+
+[Install]
+WantedBy=multi-user.target
@@ -1508,6 +1508,8 @@ soc:
        assistant:
          systemPromptAddendum: ""
          systemPromptAddendumMaxLength: 50000
+          maxSubSessionTokens: 0
+          maxDelegationDepth: 0
          adapters:
            - name: SOAI
              protocol: securityonion_ai_cloud
@@ -714,6 +714,16 @@ soc:
            description: Maximum length of the system prompt addendum. Longer prompts will be truncated.
            global: True
            advanced: True
+          maxSubSessionTokens:
+            description: Maximum number of output tokens a delegated sub-session may generate across all of its turns. When the budget is reached, the sub-agent is halted and its result is returned to the parent agent. Set to 0 to disable the limit.
+            global: True
+            advanced: True
+            forcedType: int
+          maxDelegationDepth:
+            description: Maximum delegation nesting depth for sub-agents. For example, a value of 2 lets the main agent delegate to a sub-agent that may itself delegate one level deeper. Any deeper delegation is refused and the requesting agent continues without it. Set to 0 to disable the limit.
+            global: True
+            advanced: True
+            forcedType: int
          adapters:
            description: Configuration for AI adapters used by the Onion AI assistant. Please see documentation for help on which fields are required for which protocols.
            global: True
@@ -980,6 +980,8 @@ docker_seed_registry() {
 		docker_seed_update_percent=25

 		update_docker_containers 'netinstall' '' 'docker_seed_update' '/dev/stdout' 2>&1 | tee -a "$setup_log"
+        # Use pipe exit status of 'update_docker_containers' for return code
+		return ${PIPESTATUS[0]}
 	fi
 }

@@ -223,6 +223,8 @@ if [ -n "$test_profile" ]; then
 	WEBPASSWD1=0n10nus3r
 	WEBPASSWD2=0n10nus3r
 	NODE_DESCRIPTION="${HOSTNAME} - ${install_type} - ${MSRVIP_OFFSET}"
+	# opt out of telemetry for automated testing
+	telemetry=1

 	update_sudoers_for_testing
 fi
@@ -767,7 +769,10 @@ if ! [[ -f $install_opt_file ]]; then
 		title "Applying the registry state"
 		logCmd "salt-call state.apply -l info registry"
 		title "Seeding the docker registry"
-		docker_seed_registry
+		if ! docker_seed_registry; then
+			error "Failed to seed the docker registry"
+			fail_setup
+		fi
 		title "Applying the manager state"
 		logCmd "salt-call state.apply -l info manager"
 		logCmd "salt-call state.apply influxdb -l info"
Author	SHA1	Message	Date
Matthew Wright	69759767f5	maxSubSessionTokens and maxDelegationDepth config settings	2026-06-15 11:40:01 -04:00
Josh Brower	ea73216f4e	Merge pull request #15971 from Security-Onion-Solutions/delta userid vs names	2026-06-15 15:28:03 +02:00
Josh Brower	9031c1fd22	userid vs names	2026-06-12 11:18:59 -04:00
Jorge Reyes	f03f0155f4	Merge pull request #15966 from Security-Onion-Solutions/reyesj2-patch-8 update so-elastic-fleet-package-upgrade script	2026-06-11 14:36:03 -05:00
Jason Ertel	0cc94980af	Merge pull request #15967 from Security-Onion-Solutions/jertel/wip Jertel/wip	2026-06-11 08:22:14 -04:00
Jason Ertel	b8bf684077	ver	2026-06-11 08:18:38 -04:00
Jason Ertel	f083db67e4	disable telemetry for automated tests	2026-06-11 08:17:39 -04:00
reyesj2	4741cc92bd	fleet manager start kibana if it isn't already running and wait for healthly status	2026-06-10 17:52:08 -05:00
reyesj2	46655860e9	http	2026-06-10 17:27:23 -05:00
reyesj2	289ddda5e8	kibana health check for fleet scripts	2026-06-10 17:06:22 -05:00
reyesj2	f905afbc6f	logging	2026-06-10 15:01:22 -05:00
reyesj2	bd5e77afc5	increase delay in so-elastic-fleet-package-upgrade attempts	2026-06-10 14:59:29 -05:00
reyesj2	944e773759	save exit until all packages have been attempted	2026-06-10 14:58:49 -05:00
Josh Patterson	3ba96da3b7	Merge pull request #15965 from Security-Onion-Solutions/nostartupstates remove startup states from salt config	2026-06-09 16:26:47 -04:00
Jorge Reyes	f0712bd780	Merge pull request #15964 from Security-Onion-Solutions/reyesj2-patch-8 use pipe exit status for update_docker_containers	2026-06-09 13:49:24 -05:00
Josh Patterson	448668a72e	Merge remote-tracking branch 'origin/3/dev' into nostartupstates	2026-06-09 14:02:00 -04:00
Josh Patterson	f088a27159	so-boot-mine-update: warm master pillar cache before highstate A complete mine is not enough: elasticsearch:nodes, redis:nodes, logstash:nodes (tgt_type=pillar) and hypervisor:nodes (tgt_type=compound) resolve their target against the master's per-minion data cache (grains+pillar in data.p), which is populated only when a minion's pillar is recompiled -- separately from the mine. After a reboot a node can be in the mine (so node_data/glob sees it) yet absent from that cache, so it fails the elasticsearch:enabled:true pillar match and is dropped from elasticsearch:nodes -> so-elasticsearch ExtraHosts -> container recreate. After the mine-completeness wait, run salt '*' saltutil.refresh_pillar wait=True to synchronously cache every up node's pillar (the same lever deploy_newnode.sls uses), then verify with salt-run cache.pillar and retry stragglers, bounded by MINE_UPDATE_MAX_WAIT. Also log elasticsearch:nodes alongside node_data for inspection.	2026-06-09 13:52:19 -04:00
reyesj2	9f5a9616a5	use pipe exit status for update_docker_containers	2026-06-09 12:51:58 -05:00
Josh Patterson	27c7702325	so-boot-mine-update: wait for a complete mine before highstate Mine-backed pillars (node_data, elasticsearch:nodes, redis:nodes, logstash:nodes, hypervisor:nodes) include a node only if it returned an IP from the mine, and the configs they build are rebuilt fresh every highstate. After a manager reboot with a flushed mine, the first boot highstate could run before an up node re-reported network.ip_addrs, dropping it from e.g. so-elasticsearch ExtraHosts and forcing a container recreate. After the initial broad mine.update, poll until every currently-up minion actually has network.ip_addrs in the mine, re-pushing mine.update to stragglers, before releasing the boot highstate. Shares the existing MINE_UPDATE_MAX_WAIT backstop so a slow/down node never blocks boot, and still logs the rendered node_data for inspection.	2026-06-09 10:10:32 -04:00
Josh Patterson	8c306eb37d	so-boot-mine-update: log the rendered node_data content Dump the actual rendered node_data pillar (pretty-printed JSON) to the journal instead of just a rendered/empty verdict, so the boot-time render attempt is fully inspectable. Empty renders print false/null and still emit the WARNING.	2026-06-09 09:49:19 -04:00
Josh Patterson	e536ffa363	so-boot-mine-update: render node_data after mine.update before highstate After the boot-time mine.update, have the manager actually render the node_data pillar and log whether it came back populated. node_data: False makes salt/top.sls apply the bootstrap recovery branch instead of the manager's real config, so surfacing this in the journal makes the condition visible before so-boot-highstate runs. Best-effort and non-blocking: always exits 0 so highstate proceeds regardless.	2026-06-09 09:35:24 -04:00
Jason Ertel	eb82f9ea9d	kilo version	2026-06-08 16:53:35 -04:00
Josh Patterson	9580976ba2	Add manager boot-time grid mine.update oneshot before highstate so-boot-mine-update.service is a manager-only Type=oneshot unit that runs once per boot after salt-master/salt-minion start and before so-boot-highstate.service. It pushes mine.update to all reachable minions so mine-backed pillars (node IPs, ES/Redis/Logstash discovery) are fresh before the boot highstate renders them. The helper waits for the responsive minion set to settle (plateau) rather than for every accepted key to report up, so an intentionally powered-off minion doesn't block the update; MAX_WAIT remains as a backstop.	2026-06-08 11:05:13 -04:00