Merge remote-tracking branch 'origin/3/dev' into soupmod

This commit is contained in:
Josh Patterson
2026-06-10 16:19:17 -04:00
20 changed files with 345 additions and 59 deletions
+5 -3
View File
@@ -31,11 +31,13 @@ sync_es_users:
- http: wait_for_kratos
- file: so-user.lock # require so-user.lock file to be missing
# we dont want this added too early in setup, so we add the onlyif to verify 'startup_states: highstate'
# is in the minion config. That line is added before the final highstate during setup
# we dont want this added too early in setup, so the onlyif gates on the
# /opt/so/state/setup-complete marker. The marker is written by
# mark_setup_complete in setup/so-functions just before the final setup
# highstate (and by an upgrade-path state for systems set up under the old gate).
so-user_sync:
cron.present:
- user: root
- name: 'PATH=/usr/local/sbin:/usr/local/bin:/sbin:/bin:/usr/sbin:/usr/bin:/root/bin /usr/sbin/so-user sync &>> /opt/so/log/soc/sync.log'
- identifier: so-user_sync
- onlyif: "grep -x 'startup_states: highstate' /etc/salt/minion"
- onlyif: "test -e /opt/so/state/setup-complete"
+117
View File
@@ -0,0 +1,117 @@
#!/bin/bash
#
# Copyright Security Onion Solutions LLC and/or licensed to Security Onion Solutions LLC under one
# or more contributor license agreements. Licensed under the Elastic License 2.0 as shown at
# https://securityonion.net/license; you may not use this file except in compliance with the
# Elastic License 2.0.
# Runs once per boot on managers (via so-boot-mine-update.service), before
# so-boot-highstate.service. Waits for the responsive minion set to settle, pushes
# mine.update, waits until every up minion has actually reported to the mine, then
# warms the master's per-minion pillar cache so the mine-backed node pillars (node
# IPs, ES/Redis/Logstash/hypervisor discovery -- some glob- and some pillar/grain-
# targeted) are complete before the boot highstate renders them. Otherwise a node
# that is up but not yet fully reported gets dropped from those pillars and torn
# out of the configs they build (e.g. so-elasticsearch ExtraHosts -> container recreate).
MAX_WAIT=${MINE_UPDATE_MAX_WAIT:-180} # hard backstop only
INTERVAL=10
STABLE_CHECKS=3 # up-count must hold steady this many polls
elapsed=0
prev=-1
stable=0
up=0
# Wait for the *reachable* minion set to settle rather than for every accepted
# key to report up: an operator may accept a minion's key and then intentionally
# power off that host, so requiring up >= accepted would never be satisfied and
# we'd always burn the full MAX_WAIT. Once the responsive count stops growing we
# stop waiting and run mine.update against whoever is up.
while [ "$elapsed" -lt "$MAX_WAIT" ]; do
up=$(/usr/bin/salt-run manage.up --out=json 2>/dev/null \
| python3 -c 'import sys,json; print(len(json.load(sys.stdin)))' 2>/dev/null)
up=${up:-0}
if [ "$up" -gt 0 ] && [ "$up" -eq "$prev" ]; then
stable=$((stable + 1))
[ "$stable" -ge "$STABLE_CHECKS" ] && break
else
stable=0
fi
prev=$up
sleep "$INTERVAL"
elapsed=$((elapsed + INTERVAL))
done
echo "so-boot-mine-update: ${up} minions up (settled after ${elapsed}s); running mine.update"
/usr/bin/salt '*' mine.update --out=txt
# A node that is up but has not yet re-reported network.ip_addrs to the mine is
# silently dropped from mine-backed pillars (elasticsearch:nodes, node_data, ...)
# when highstate recompiles them -- which e.g. removes it from so-elasticsearch
# ExtraHosts and forces a container recreate. After the broad mine.update above,
# wait until every up minion actually has network.ip_addrs in the mine, re-pushing
# mine.update to stragglers, before releasing the boot highstate. Bounded by the
# same MAX_WAIT backstop so a slow/down node never blocks boot indefinitely.
missing=""
while [ "$elapsed" -lt "$MAX_WAIT" ]; do
up_json=$(/usr/bin/salt-run manage.up --out=json 2>/dev/null)
mine_json=$(/usr/bin/salt-run mine.get '*' network.ip_addrs tgt_type=glob --out=json 2>/dev/null)
missing=$(printf '%s' "$up_json" | python3 -c '
import sys, json
up = set(json.load(sys.stdin) or [])
mine = {k for k, v in (json.loads(sys.argv[1]) or {}).items() if v}
print("\n".join(sorted(up - mine)))
' "$mine_json" 2>/dev/null)
if [ -z "$missing" ]; then
echo "so-boot-mine-update: mine complete for all up minions after ${elapsed}s"
break
fi
echo "so-boot-mine-update: mine missing up minion(s): $(echo $missing); re-running mine.update"
for m in $missing; do /usr/bin/salt "$m" mine.update --out=txt; done
sleep "$INTERVAL"
elapsed=$((elapsed + INTERVAL))
done
[ -n "$missing" ] && echo "so-boot-mine-update: WARNING ${MAX_WAIT}s backstop hit; up minion(s) still absent from mine: $(echo $missing); highstate may drop them from configs"
# The pillar/compound-targeted node pillars (elasticsearch:nodes, redis:nodes,
# logstash:nodes, hypervisor:nodes) resolve their target against the master's
# per-minion data cache (grains+pillar in .../minions/<id>/data.p), populated only
# when a minion's pillar is (re)compiled -- separately from the mine. A freshly
# booted node can be in the mine (glob/node_data sees it) yet absent from that
# cache, so it is dropped from those pillars and from the configs they build (e.g.
# so-elasticsearch ExtraHosts). Force a synchronous pillar refresh so the master
# caches every up node's pillar; refresh_pillar wait=True returns only once the
# pillar is recompiled (and thus cached for matching). Retry stragglers <= MAX_WAIT.
echo "so-boot-mine-update: warming master pillar cache for pillar/grain-targeted node pillars"
/usr/bin/salt '*' saltutil.refresh_pillar wait=True --out=txt
missing=""
while [ "$elapsed" -lt "$MAX_WAIT" ]; do
up_json=$(/usr/bin/salt-run manage.up --out=json 2>/dev/null)
cached_json=$(/usr/bin/salt-run cache.pillar tgt='*' --out=json 2>/dev/null)
missing=$(printf '%s' "$up_json" | python3 -c '
import sys, json
up = set(json.load(sys.stdin) or [])
cached = {k for k, v in (json.loads(sys.argv[1]) or {}).items() if v}
print("\n".join(sorted(up - cached)))
' "$cached_json" 2>/dev/null)
if [ -z "$missing" ]; then
echo "so-boot-mine-update: pillar cache warm for all up minions after ${elapsed}s"
break
fi
echo "so-boot-mine-update: pillar not yet cached for: $(echo $missing); refreshing"
for m in $missing; do /usr/bin/salt "$m" saltutil.refresh_pillar wait=True --out=txt; done
sleep "$INTERVAL"
elapsed=$((elapsed + INTERVAL))
done
[ -n "$missing" ] && echo "so-boot-mine-update: WARNING ${MAX_WAIT}s backstop hit; pillar not cached for: $(echo $missing); pillar-targeted pillars may drop them"
# Log what the mine-backed pillars render so the boot-time state is inspectable.
/usr/bin/salt-call saltutil.refresh_pillar >/dev/null 2>&1
sleep 2
for key in node_data elasticsearch:nodes; do
rendered=$(/usr/bin/salt-call --out=json pillar.get "$key" 2>/dev/null \
| python3 -c 'import sys,json; print(json.dumps(json.load(sys.stdin).get("local"), indent=2, sort_keys=True))' 2>/dev/null)
echo "so-boot-mine-update: ${key} rendered as:"
echo "${rendered:-null}"
done
exit 0
+14 -20
View File
@@ -188,13 +188,6 @@ airgap_update_dockers() {
fi
}
backup_old_states_pillars() {
tar czf /nsm/backup/$(echo $INSTALLEDVERSION)_$(date +%Y%m%d-%H%M%S)_soup_default_states_pillars.tar.gz /opt/so/saltstack/default/
tar czf /nsm/backup/$(echo $INSTALLEDVERSION)_$(date +%Y%m%d-%H%M%S)_soup_local_states_pillars.tar.gz /opt/so/saltstack/local/
}
update_registry() {
docker stop so-dockerregistry
docker rm so-dockerregistry
@@ -770,12 +763,18 @@ bootstrap_so_soc_database() {
}
up_to_3.2.0() {
fix_logstash_0013_lumberjack_pipeline_name
INSTALLEDVERSION=3.2.0
}
post_to_3.2.0() {
bootstrap_so_soc_database
# Including agent regen script here since it was missed in post_to_3.1.0
echo "Regenerating Elastic Agent Installers"
/sbin/so-elastic-agent-gen-installers
POSTVERSION=3.2.0
}
@@ -1567,13 +1566,7 @@ EOF
# Keeping this block in case we need to do a hotfix that requires salt update
apply_hotfix() {
if [[ "$INSTALLEDVERSION" == "3.1.0" ]] ; then
# Do not remove this fix_logstash_0013_lumberjack_pipeline_name in future hotfixes without first validating older
# installs referencing "so/0013_input_lumberjack_fleet.conf" via pillar are upgradable
fix_logstash_0013_lumberjack_pipeline_name
else
echo "No actions required. ($INSTALLEDVERSION/$HOTFIXVERSION)"
fi
echo "No actions required. ($INSTALLEDVERSION/$HOTFIXVERSION)"
}
failed_soup_restore_items() {
@@ -1645,13 +1638,13 @@ main() {
echo "Verifying we have the latest soup script."
verify_latest_update_script
echo "Verifying Elasticsearch version compatibility across the grid before upgrading."
verify_es_version_compatibility
echo "Let's see if we need to update Security Onion."
upgrade_check
upgrade_space
echo "Verifying Elasticsearch version compatibility across the grid before upgrading."
verify_es_version_compatibility
echo "Checking for Salt Master and Minion updates."
upgrade_check_salt
set -e
@@ -1671,7 +1664,8 @@ main() {
echo "Applying $HOTFIXVERSION hotfix"
# since we don't run the backup.config_backup state on import we wont snapshot previous version states and pillars
if [[ ! "$MINION_ROLE" == "import" ]]; then
backup_old_states_pillars
echo "Running so-config-backup script."
/sbin/so-config-backup
fi
copy_new_files
create_local_directories "/opt/so/saltstack/default"
@@ -1727,8 +1721,8 @@ main() {
# since we don't run the backup.config_backup state on import we wont snapshot previous version states and pillars
if [[ ! "$MINION_ROLE" == "import" ]]; then
echo ""
echo "Creating snapshots of default and local Salt states and pillars and saving to /nsm/backup/"
backup_old_states_pillars
echo "Running so-config-backup script."
/sbin/so-config-backup
fi
echo ""