Compare commits

...

5 Commits

Author SHA1 Message Date
Josh Patterson 199c2746f1 stop salt-minion and salt-master regardless of install type. display reinstall on console and save to logfile 2026-04-24 15:24:11 -04:00
Josh Patterson 8eca465ef6 uninstall elastic-agent before stopping dockers on reinstall 2026-04-24 14:35:11 -04:00
Josh Patterson 02381fbbe9 stop salt-cloud , belt-and-suspenders against a broken/incomplete salt RPM 2026-04-24 11:33:21 -04:00
Josh Patterson 0722b681b1 redo service stop on reinstall 2026-04-24 11:04:46 -04:00
Josh Patterson 564815e836 redo how services are stopped during reinstall 2026-04-24 10:46:29 -04:00
+44 -32
View File
@@ -202,10 +202,10 @@ check_service_status() {
systemctl status $service_name > /dev/null 2>&1 systemctl status $service_name > /dev/null 2>&1
local status=$? local status=$?
if [ $status -gt 0 ]; then if [ $status -gt 0 ]; then
info " $service_name is not running" info "$service_name is not running"
return 1; return 1;
else else
info " $service_name is running" info "$service_name is running"
return 0; return 0;
fi fi
@@ -1541,13 +1541,8 @@ clear_previous_setup_results() {
reinstall_init() { reinstall_init() {
info "Putting system in state to run setup again" info "Putting system in state to run setup again"
if [[ $install_type =~ ^(MANAGER|EVAL|MANAGERSEARCH|MANAGERHYPE|STANDALONE|FLEET|IMPORT)$ ]]; then # Always include both services. check_service_status skips units that aren't present.
local salt_services=( "salt-master" "salt-minion" ) local salt_services=( "salt-master" "salt-minion" )
else
local salt_services=( "salt-minion" )
fi
local service_retry_count=20
{ {
# remove all of root's cronjobs # remove all of root's cronjobs
@@ -1563,31 +1558,51 @@ reinstall_init() {
salt-call state.apply ca.remove -linfo --local --file-root=../salt salt-call state.apply ca.remove -linfo --local --file-root=../salt
# Kill any salt processes (safely) # Stop salt services and force-kill any lingering salt processes (including orphans
# from an earlier reinstall attempt where the unit file is gone but processes survive)
# so dnf remove salt can run cleanly
for service in "${salt_services[@]}"; do for service in "${salt_services[@]}"; do
# Stop the service in the background so we can exit after a certain amount of time
if check_service_status "$service"; then if check_service_status "$service"; then
systemctl stop "$service" & info "Stopping $service via systemctl"
systemctl stop "$service"
fi fi
local pid=$!
local count=0
while check_service_status "$service"; do
if [[ $count -gt $service_retry_count ]]; then
echo "Could not stop $service after 1 minute, exiting setup."
# Stop the systemctl process trying to kill the service, show user a message, then exit setup
kill -9 $pid
fail_setup
fi
sleep 5
((count++))
done
done done
# Unconditionally force-kill any remaining salt binaries — these may be orphaned
# from a prior aborted reinstall (no unit file, so systemctl can't see them).
for salt_bin in salt-master salt-minion salt-call salt-cloud; do
if pgrep -f "/usr/bin/${salt_bin}" > /dev/null 2>&1; then
info "Force-killing lingering $salt_bin processes"
pkill -9 -ef "/usr/bin/${salt_bin}" 2>/dev/null
fi
done
# Catch stray `salt` CLI children from saltutil.kill_all_jobs / state.apply invocations
pkill -9 -ef "/usr/bin/python3 /bin/salt" 2>/dev/null
# Give the kernel a moment to reap the killed processes before dnf removes the binaries
local kill_wait=0
while pgrep -f "/usr/bin/salt-" > /dev/null 2>&1; do
if [[ $kill_wait -gt 10 ]]; then
info "Salt processes still present after SIGKILL + 10s wait; proceeding anyway"
pgrep -af "/usr/bin/salt-" | while read -r line; do info " lingering: $line"; done
break
fi
sleep 1
((kill_wait++))
done
# Clear the 'failed' state SIGKILL left on the units before removing the package
systemctl reset-failed salt-master.service salt-minion.service 2>/dev/null || true
# Remove all salt configs # Remove all salt configs
rm -rf /etc/salt/engines/* /etc/salt/grains /etc/salt/master /etc/salt/master.d/* /etc/salt/minion /etc/salt/minion.d/* /etc/salt/pki/* /etc/salt/proxy /etc/salt/proxy.d/* /var/cache/salt/ dnf -y remove salt
rm -rf /etc/salt/ /var/cache/salt/
# Drop systemd's in-memory references to the now-removed units
systemctl daemon-reload
# Uninstall local Elastic Agent, if installed
elastic-agent uninstall -f
if command -v docker &> /dev/null; then if command -v docker &> /dev/null; then
# Stop and remove all so-* containers so files can be changed with more safety # Stop and remove all so-* containers so files can be changed with more safety
@@ -1611,10 +1626,7 @@ reinstall_init() {
backup_dir /nsm/hydra "$date_string" backup_dir /nsm/hydra "$date_string"
backup_dir /nsm/influxdb "$date_string" backup_dir /nsm/influxdb "$date_string"
# Uninstall local Elastic Agent, if installed } 2>&1 | tee -a "$setup_log"
elastic-agent uninstall -f
} >> "$setup_log" 2>&1
info "System reinstall init has been completed." info "System reinstall init has been completed."
} }