Compare commits

..

5 Commits

Author SHA1 Message Date
Josh Patterson 199c2746f1 stop salt-minion and salt-master regardless of install type. display reinstall on console and save to logfile 2026-04-24 15:24:11 -04:00
Josh Patterson 8eca465ef6 uninstall elastic-agent before stopping dockers on reinstall 2026-04-24 14:35:11 -04:00
Josh Patterson 02381fbbe9 stop salt-cloud , belt-and-suspenders against a broken/incomplete salt RPM 2026-04-24 11:33:21 -04:00
Josh Patterson 0722b681b1 redo service stop on reinstall 2026-04-24 11:04:46 -04:00
Josh Patterson 564815e836 redo how services are stopped during reinstall 2026-04-24 10:46:29 -04:00
3 changed files with 45 additions and 34 deletions
+1 -1
View File
@@ -3,8 +3,8 @@ kratos:
description: Enables or disables the Kratos authentication system. WARNING - Disabling this process will cause the grid to malfunction. Re-enabling this setting will require manual effort via SSH.
forcedType: bool
advanced: True
readonly: True
helpLink: kratos
oidc:
enabled:
description: Set to True to enable OIDC / Single Sign-On (SSO) to SOC. Requires a valid Security Onion license key.
-1
View File
@@ -3,7 +3,6 @@ soc:
description: Enables or disables SOC. WARNING - Disabling this setting is unsupported and will cause the grid to malfunction. Re-enabling this setting is a manual effort via SSH.
forcedType: bool
advanced: True
readonly: True
telemetryEnabled:
title: SOC Telemetry
description: When this setting is enabled and the grid is not in airgap mode, SOC will provide feature usage data to the Security Onion development team via Google Analytics. This data helps Security Onion developers determine which product features are being used and can also provide insight into improving the user interface. When changing this setting, wait for the grid to fully synchronize and then perform a hard browser refresh on SOC, to force the browser cache to update and reflect the new setting.
+44 -32
View File
@@ -202,10 +202,10 @@ check_service_status() {
systemctl status $service_name > /dev/null 2>&1
local status=$?
if [ $status -gt 0 ]; then
info " $service_name is not running"
info "$service_name is not running"
return 1;
else
info " $service_name is running"
info "$service_name is running"
return 0;
fi
@@ -1541,13 +1541,8 @@ clear_previous_setup_results() {
reinstall_init() {
info "Putting system in state to run setup again"
if [[ $install_type =~ ^(MANAGER|EVAL|MANAGERSEARCH|MANAGERHYPE|STANDALONE|FLEET|IMPORT)$ ]]; then
local salt_services=( "salt-master" "salt-minion" )
else
local salt_services=( "salt-minion" )
fi
local service_retry_count=20
# Always include both services. check_service_status skips units that aren't present.
local salt_services=( "salt-master" "salt-minion" )
{
# remove all of root's cronjobs
@@ -1563,31 +1558,51 @@ reinstall_init() {
salt-call state.apply ca.remove -linfo --local --file-root=../salt
# Kill any salt processes (safely)
# Stop salt services and force-kill any lingering salt processes (including orphans
# from an earlier reinstall attempt where the unit file is gone but processes survive)
# so dnf remove salt can run cleanly
for service in "${salt_services[@]}"; do
# Stop the service in the background so we can exit after a certain amount of time
if check_service_status "$service"; then
systemctl stop "$service" &
info "Stopping $service via systemctl"
systemctl stop "$service"
fi
local pid=$!
local count=0
while check_service_status "$service"; do
if [[ $count -gt $service_retry_count ]]; then
echo "Could not stop $service after 1 minute, exiting setup."
# Stop the systemctl process trying to kill the service, show user a message, then exit setup
kill -9 $pid
fail_setup
fi
sleep 5
((count++))
done
done
# Unconditionally force-kill any remaining salt binaries — these may be orphaned
# from a prior aborted reinstall (no unit file, so systemctl can't see them).
for salt_bin in salt-master salt-minion salt-call salt-cloud; do
if pgrep -f "/usr/bin/${salt_bin}" > /dev/null 2>&1; then
info "Force-killing lingering $salt_bin processes"
pkill -9 -ef "/usr/bin/${salt_bin}" 2>/dev/null
fi
done
# Catch stray `salt` CLI children from saltutil.kill_all_jobs / state.apply invocations
pkill -9 -ef "/usr/bin/python3 /bin/salt" 2>/dev/null
# Give the kernel a moment to reap the killed processes before dnf removes the binaries
local kill_wait=0
while pgrep -f "/usr/bin/salt-" > /dev/null 2>&1; do
if [[ $kill_wait -gt 10 ]]; then
info "Salt processes still present after SIGKILL + 10s wait; proceeding anyway"
pgrep -af "/usr/bin/salt-" | while read -r line; do info " lingering: $line"; done
break
fi
sleep 1
((kill_wait++))
done
# Clear the 'failed' state SIGKILL left on the units before removing the package
systemctl reset-failed salt-master.service salt-minion.service 2>/dev/null || true
# Remove all salt configs
rm -rf /etc/salt/engines/* /etc/salt/grains /etc/salt/master /etc/salt/master.d/* /etc/salt/minion /etc/salt/minion.d/* /etc/salt/pki/* /etc/salt/proxy /etc/salt/proxy.d/* /var/cache/salt/
dnf -y remove salt
rm -rf /etc/salt/ /var/cache/salt/
# Drop systemd's in-memory references to the now-removed units
systemctl daemon-reload
# Uninstall local Elastic Agent, if installed
elastic-agent uninstall -f
if command -v docker &> /dev/null; then
# Stop and remove all so-* containers so files can be changed with more safety
@@ -1611,10 +1626,7 @@ reinstall_init() {
backup_dir /nsm/hydra "$date_string"
backup_dir /nsm/influxdb "$date_string"
# Uninstall local Elastic Agent, if installed
elastic-agent uninstall -f
} >> "$setup_log" 2>&1
} 2>&1 | tee -a "$setup_log"
info "System reinstall init has been completed."
}