Merge pull request #2201 from Security-Onion-Solutions/bugfix/reinstall

Bugfix/reinstall
This commit is contained in:
William Wernert
2020-12-04 09:38:46 -05:00
committed by GitHub
2 changed files with 159 additions and 114 deletions

View File

@@ -251,19 +251,19 @@ check_pass_match() {
fi
}
# False if stopped, true if running
check_service_status() {
local service_name=$1
echo "Checking service $service_name status" >> "$setup_log" 2>&1
systemctl status $service_name > /dev/null 2>&1
local status=$?
#true if there is an issue with the service false if it is running properly
if [ $status -gt 0 ]; then
echo "$service_name is not running" >> "$setup_log" 2>&1
echo 1;
echo " $service_name is not running" >> "$setup_log" 2>&1
return 1;
else
echo "$service_name is running" >> "$setup_log" 2>&1
echo 0;
echo " $service_name is running" >> "$setup_log" 2>&1
return 0;
fi
}
@@ -273,28 +273,27 @@ check_salt_master_status() {
salt-call saltutil.kill_all_jobs > /dev/null 2>&1
salt-call state.show_top > /dev/null 2>&1
local status=$?
#true if there is an issue talking to salt master
if [ $status -gt 0 ]; then
echo 1;
echo " Could not talk to salt master" >> "$setup_log" 2>&1
return 1;
else
echo "Can talk to salt master" >> "$setup_log" 2>&1
echo 0;
echo " Can talk to salt master" >> "$setup_log" 2>&1
return 0;
fi
}
check_salt_minion_status() {
echo "Checking if the salt minion will respond to jobs" >> "$setup_log" 2>&1
salt "$MINION_ID" test.ping >> "$setup_log" 2>&1
salt "$MINION_ID" test.ping > /dev/null 2>&1
local status=$?
#true if there is an issue getting a job response from the minion
if [ $status -gt 0 ]; then
echo 1;
echo " Minion did not respond" >> "$setup_log" 2>&1
return 1;
else
echo "Received job response from salt minion" >> "$setup_log" 2>&1
echo 0;
echo " Received job response from salt minion" >> "$setup_log" 2>&1
return 0;
fi
}
check_soremote_pass() {
@@ -767,12 +766,12 @@ detect_os() {
disable_auto_start() {
if crontab -l -u $INSTALLUSERNAME 2>&1 | grep so-setup > /dev/null 2>&1; then
if crontab -l -u $INSTALLUSERNAME 2>&1 | grep -q so-setup; then
# Remove the automated setup script from crontab, if it exists
logCmd "crontab -u $INSTALLUSERNAME -r"
fi
if grep so-setup /home/$INSTALLUSERNAME/.bash_profile > /dev/null 2>&1; then
if grep -q so-setup /home/$INSTALLUSERNAME/.bash_profile; then
# Truncate last line of the bash profile
info "Removing auto-run of setup from bash profile"
sed -i '$ d' /home/$INSTALLUSERNAME/.bash_profile >> "$setup_log" 2>&1
@@ -1384,13 +1383,47 @@ reserve_group_ids() {
reinstall_init() {
info "Putting system in state to run setup again"
if [[ $install_type =~ ^(MANAGER|EVAL|HELIXSENSOR|MANAGERSEARCH|STANDALONE|FLEET|IMPORT)$ ]]; then
local salt_services=( "salt-master" "salt-minion" )
else
local salt_services=( "salt-minion" )
fi
local service_retry_count=20
{
# Kill any salt processes
pkill -9 -ef /usr/bin/salt
if command -v salt-call &> /dev/null; then
# Disable schedule so highstate doesn't start running during the install
salt-call -l info schedule.disable
# Kill any currently running salt jobs, also to prevent issues with highstate.
salt-call -l info saltutil.kill_all_jobs
fi
# Kill any salt processes (safely)
for service in "${salt_services[@]}"; do
# Stop the service in the background so we can exit after a certain amount of time
systemctl stop "$service" &
local pid=$!
local count=0
while check_service_status "$service"; do
if [[ $count -gt $service_retry_count ]]; then
echo "Could not stop $service after 1 minute, exiting setup."
# Stop the systemctl process trying to kill the service, show user a message, then exit setup
kill -9 $pid
kill -SIGUSR1 "$(ps --pid $$ -oppid=)"; exit 1
fi
sleep 5
((count++))
done
done
# Remove all salt configs
rm -rf /etc/salt/global /etc/salt/minion /etc/salt/master /etc/salt/pki/*
rm -rf /etc/salt/grains /etc/salt/minion /etc/salt/pki/*
if command -v docker &> /dev/null; then
# Stop and remove all so-* containers so files can be changed with more safety
@@ -1411,7 +1444,7 @@ reinstall_init() {
# Remove the old launcher package in case the config changes
remove_package launcher-final
} >> $setup_log 2>&1
} >> "$setup_log" 2>&1
}
backup_dir() {
@@ -1608,61 +1641,59 @@ salt_checkin() {
"salt-master" \
"salt-minion"
)
local LOOP_COUNT=0
local count=0
for service in "${SALT_SERVICES[@]}"; do
echo "Stopping service $service" >> "$setup_log" 2>&1
systemctl stop "$service" >> "$setup_log" 2>&1
LOOP_COUNT=0
while ! (( $(check_service_status $service) )); do
echo "$service still running" >> "$setup_log" 2>&1
if [ $LOOP_COUNT -gt 60 ]; then
echo "$service could not be stopped in 60 seconds, exiting" >> "$setup_log" 2>&1
exit 1
{
echo "Restarting service $service"
systemctl restart "$service" &
local pid=$!
} >> "$setup_log" 2>&1
count=0
while ! (check_service_status "$service"); do
# On final loop, kill the pid trying to restart service and try to manually kill then start it
if [ $count -eq 12 ]; then
{
kill -9 "$pid"
systemctl kill "$service"
systemctl start "$service" &
local pid=$!
} >> "$setup_log" 2>&1
fi
sleep 1;
((LOOP_COUNT+=1))
if [ $count -gt 12 ]; then
echo "$service could not be restarted in 120 seconds, exiting" >> "$setup_log" 2>&1
kill -9 "$pid"
kill -SIGUSR1 "$(ps --pid $$ -oppid=)"; exit 1
fi
sleep 10;
((count++))
done
done
sleep 5;
for service in "${SALT_SERVICES[@]}"; do
echo "Starting service $service" >> "$setup_log" 2>&1
systemctl start "$service" >> "$setup_log" 2>&1
LOOP_COUNT=0
while (( $(check_service_status $service) )); do
echo "$service still not running" >> "$setup_log" 2>&1
if [ $LOOP_COUNT -gt 60 ]; then
echo "$service could not be started in 60 seconds, exiting" >> "$setup_log" 2>&1
exit 1
fi
sleep 1;
((LOOP_COUNT+=1))
done
done
sleep 5;
LOOP_COUNT=0
while (( $(check_salt_master_status) )); do
count=0
while ! (check_salt_master_status); do
echo "salt minion cannot talk to salt master" >> "$setup_log" 2>&1
if [ $LOOP_COUNT -gt 30 ]; then
if [ $count -gt 30 ]; then
echo "salt minion could not talk to salt master after 30 attempts, exiting" >> "$setup_log" 2>&1
exit 1
kill -SIGUSR1 "$(ps --pid $$ -oppid=)"; exit 1
fi
sleep 1;
((LOOP_COUNT+=1))
((count++))
done
LOOP_COUNT=0
while (( $(check_salt_minion_status) )); do
count=0
while ! (check_salt_minion_status); do
echo "salt master did not get a job response from salt minion" >> "$setup_log" 2>&1
if [ $LOOP_COUNT -gt 30 ]; then
if [ $count -gt 30 ]; then
echo "salt master did not get a job response from salt minion after 30 attempts, exiting" >> "$setup_log" 2>&1
exit 1
kill -SIGUSR1 "$(ps --pid $$ -oppid=)"; exit 1
fi
systemctl kill salt-minion
systemctl start salt-minion
sleep 1;
((LOOP_COUNT+=1))
((count++))
done
echo " Confirming existence of the CA certificate"