mirror of
https://github.com/Security-Onion-Solutions/securityonion.git
synced 2026-05-06 19:38:51 +02:00
Harden reinstall_init and add post-saltify readiness gate
- setup/so-functions: dump pre-reinstall salt state (systemctl / journalctl / ls /etc/salt / .rpmnew diff) to the setup log so a failed reinstall leaves a usable post-mortem; swap the manual rm -rf of /etc/salt/* for `dnf -y remove salt` so package configs get cleaned up properly. - setup/so-setup: replace the `sleep 2 / state.show_top / sleep 2` dance after saltify with a readiness gate that waits for /etc/salt/pki/master/master.pub, runs check_salt_master_status, and then wait_for_minion_key_pending before salt-key -ya. Fixes reinstalls on 3.x timing out on "Unable to sign_in to master". - salt/common/tools/sbin/so-common: add wait_for_minion_key_pending helper, polls `salt-key -l pre` until the minion appears.
This commit is contained in:
@@ -162,6 +162,29 @@ check_salt_master_status() {
|
||||
return 0
|
||||
}
|
||||
|
||||
# Wait until $minion shows up in the salt master's unaccepted-keys list.
|
||||
# Used after saltify on a reinstall to replace the old `sleep 2 / state.show_top /
|
||||
# sleep 2` dance — the new minion's key takes longer to appear than 2s on
|
||||
# salt 3006.x and the subsequent salt-key -ya needs something to accept.
|
||||
# Returns 0 as soon as the key is pending, 1 after attempts*delay seconds.
|
||||
wait_for_minion_key_pending() {
|
||||
local minion="$1"
|
||||
local attempts="${2:-30}"
|
||||
local delay="${3:-2}"
|
||||
local count=0
|
||||
while ! salt-key -l pre --out=json 2>/dev/null \
|
||||
| python3 -c "import json,sys; d=json.load(sys.stdin); sys.exit(0 if '$minion' in d.get('minions_pre', []) else 1)" 2>/dev/null; do
|
||||
((count+=1))
|
||||
if [[ $count -ge $attempts ]]; then
|
||||
echo "Gave up waiting for $minion to appear in salt-master's pending keys"
|
||||
return 1
|
||||
fi
|
||||
sleep "$delay"
|
||||
done
|
||||
echo "Minion $minion is pending acceptance after $((count * delay))s"
|
||||
return 0
|
||||
}
|
||||
|
||||
# this is only intended to be used to check the status of the minion from a salt master
|
||||
check_salt_minion_status() {
|
||||
local minion="$1"
|
||||
|
||||
+14
-1
@@ -1550,6 +1550,19 @@ reinstall_init() {
|
||||
local service_retry_count=20
|
||||
|
||||
{
|
||||
# Snapshot pre-reinstall salt state before any destructive step so a
|
||||
# failed reinstall leaves a usable post-mortem in the setup log.
|
||||
echo "=== pre-reinstall salt diagnostic $(date -Iseconds) ==="
|
||||
systemctl status salt-master --no-pager 2>&1 | head -40 || true
|
||||
systemctl status salt-minion --no-pager 2>&1 | head -40 || true
|
||||
journalctl -u salt-master --no-pager --since "-10 minutes" 2>&1 | tail -80 || true
|
||||
journalctl -u salt-minion --no-pager --since "-10 minutes" 2>&1 | tail -80 || true
|
||||
ls -laR /etc/salt 2>&1 | head -60 || true
|
||||
ls -la /var/cache/salt 2>&1 | head -40 || true
|
||||
[[ -f /etc/salt/master.rpmnew ]] && diff -u /etc/salt/master /etc/salt/master.rpmnew 2>&1 | head -80 || true
|
||||
[[ -f /etc/salt/minion.rpmnew ]] && diff -u /etc/salt/minion /etc/salt/minion.rpmnew 2>&1 | head -40 || true
|
||||
echo "=== end diagnostic ==="
|
||||
|
||||
# remove all of root's cronjobs
|
||||
crontab -r -u root
|
||||
|
||||
@@ -1580,7 +1593,7 @@ reinstall_init() {
|
||||
kill -9 $pid
|
||||
fail_setup
|
||||
fi
|
||||
|
||||
|
||||
sleep 5
|
||||
((count++))
|
||||
done
|
||||
|
||||
+11
-3
@@ -724,10 +724,18 @@ if ! [[ -f $install_opt_file ]]; then
|
||||
# Install salt
|
||||
saltify
|
||||
check_sos_appliance
|
||||
# Wait for salt-master to be actually running and have its PKI
|
||||
# ready after a fresh saltify. Without this, salt-key operations
|
||||
# silently race the daemon and the key accept no-ops, which is
|
||||
# what was causing reinstalls on 3.x to hang on state.show_top.
|
||||
retry 30 2 "test -f /etc/salt/pki/master/master.pub" \
|
||||
|| fail "salt-master did not initialize PKI after saltify"
|
||||
check_salt_master_status \
|
||||
|| fail "salt-master not accepting calls after saltify"
|
||||
|
||||
logCmd "salt-key -yd $MINION_ID"
|
||||
sleep 2 # Debug RSA Key format errors
|
||||
logCmd "salt-call state.show_top"
|
||||
sleep 2 # Debug RSA Key format errors
|
||||
wait_for_minion_key_pending "$MINION_ID" 30 2 \
|
||||
|| fail "salt-minion never presented its key to salt-master"
|
||||
logCmd "salt-key -ya $MINION_ID"
|
||||
logCmd "salt-call saltutil.sync_all"
|
||||
# we need to sync the runner and generate the soqemussh user keys so that first highstate after license created
|
||||
|
||||
Reference in New Issue
Block a user