From c2e7e425092ae6b262040dd3fc9c8c2352e04c1f Mon Sep 17 00:00:00 2001 From: William Wernert Date: Tue, 1 Dec 2020 15:36:05 -0500 Subject: [PATCH 01/20] [fix] Don't SIGKILL salt services + disable highstate schedule --- setup/so-functions | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/setup/so-functions b/setup/so-functions index 4ba639fa5..b3986c826 100755 --- a/setup/so-functions +++ b/setup/so-functions @@ -1384,8 +1384,13 @@ reinstall_init() { info "Putting system in state to run setup again" { - # Kill any salt processes - pkill -9 -ef /usr/bin/salt + # Disable all scheduled jobs + if command -v salt-call &> /dev/null; then + salt-call schedule.disable + fi + + # Kill any salt processes (safely) + systemctl stop salt-* # Remove all salt configs rm -rf /etc/salt/global /etc/salt/minion /etc/salt/master /etc/salt/pki/* From 38028a543a92f6e88bba7221c6149b702ea45fb5 Mon Sep 17 00:00:00 2001 From: William Wernert Date: Tue, 1 Dec 2020 21:18:24 -0500 Subject: [PATCH 02/20] [feat] Add timeout for salt services to stop during reinstall init --- setup/so-functions | 25 ++++++++++++++++++++++++- setup/so-whiptail | 13 +++++++++++++ 2 files changed, 37 insertions(+), 1 deletion(-) diff --git a/setup/so-functions b/setup/so-functions index b3986c826..8c06888f6 100755 --- a/setup/so-functions +++ b/setup/so-functions @@ -1382,6 +1382,12 @@ reserve_group_ids() { reinstall_init() { info "Putting system in state to run setup again" + + local salt_services=( + "salt-master" + "salt-minion" + ) + local service_retry_count=20 { # Disable all scheduled jobs @@ -1390,7 +1396,24 @@ reinstall_init() { fi # Kill any salt processes (safely) - systemctl stop salt-* + for service in "${salt_services[@]}"; do + # Stop the service in the background so we can exit after a certain amount of time + systemctl stop "$service" & + local pid=$! + + local count=0 + while ! (check_service_status "$service"); do + if [ count > $service_retry_count ]; then + echo "Could not stop $service after 1 minute, exiting setup." + + # Stop the systemctl process trying to kill the service, show user a message, then exit setup + kill -9 $pid + whiptail_service_stop_failed "$service" + fi + sleep 5 + ((count++)) + done + done # Remove all salt configs rm -rf /etc/salt/global /etc/salt/minion /etc/salt/master /etc/salt/pki/* diff --git a/setup/so-whiptail b/setup/so-whiptail index 11d968910..a41e61f94 100755 --- a/setup/so-whiptail +++ b/setup/so-whiptail @@ -1175,6 +1175,19 @@ whiptail_sensor_config() { } +whiptail_service_stop_failed() { + local service=$1 + + read -r -d '' message <<- EOM + The ${service} service could not be stopped. Please stop it manually and then re-run setup. + + Press ENTER to exit the installer. + EOM + + whiptail --title "Security Onion Setup" --msgbox "$message" 10 75 + exit 1 +} + whiptail_set_hostname() { [ -n "$TESTING" ] && return From 2d6feea5c5ff696b50bb6f03862ddcd40c96bb26 Mon Sep 17 00:00:00 2001 From: William Wernert Date: Tue, 1 Dec 2020 21:21:32 -0500 Subject: [PATCH 03/20] [fix] Syntax fixes --- setup/so-functions | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup/so-functions b/setup/so-functions index 8c06888f6..863b09a18 100755 --- a/setup/so-functions +++ b/setup/so-functions @@ -1403,7 +1403,7 @@ reinstall_init() { local count=0 while ! (check_service_status "$service"); do - if [ count > $service_retry_count ]; then + if [[ $count > $service_retry_count ]]; then echo "Could not stop $service after 1 minute, exiting setup." # Stop the systemctl process trying to kill the service, show user a message, then exit setup From 4b5b936abb6f311303fcb956d0eb9e87662a3921 Mon Sep 17 00:00:00 2001 From: William Wernert Date: Tue, 1 Dec 2020 21:40:41 -0500 Subject: [PATCH 04/20] [fix] echo -> return --- setup/so-functions | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/setup/so-functions b/setup/so-functions index 863b09a18..d4acd8a75 100755 --- a/setup/so-functions +++ b/setup/so-functions @@ -251,19 +251,19 @@ check_pass_match() { fi } +# False if stopped, true if running check_service_status() { local service_name=$1 echo "Checking service $service_name status" >> "$setup_log" 2>&1 systemctl status $service_name > /dev/null 2>&1 local status=$? - #true if there is an issue with the service false if it is running properly if [ $status -gt 0 ]; then echo "$service_name is not running" >> "$setup_log" 2>&1 - echo 1; + return 1; else echo "$service_name is running" >> "$setup_log" 2>&1 - echo 0; + return 0; fi } @@ -1402,7 +1402,7 @@ reinstall_init() { local pid=$! local count=0 - while ! (check_service_status "$service"); do + while check_service_status "$service"; do if [[ $count > $service_retry_count ]]; then echo "Could not stop $service after 1 minute, exiting setup." @@ -1639,7 +1639,7 @@ salt_checkin() { echo "Stopping service $service" >> "$setup_log" 2>&1 systemctl stop "$service" >> "$setup_log" 2>&1 LOOP_COUNT=0 - while ! (( $(check_service_status $service) )); do + while check_service_status "$service"; do echo "$service still running" >> "$setup_log" 2>&1 if [ $LOOP_COUNT -gt 60 ]; then echo "$service could not be stopped in 60 seconds, exiting" >> "$setup_log" 2>&1 @@ -1656,7 +1656,7 @@ salt_checkin() { echo "Starting service $service" >> "$setup_log" 2>&1 systemctl start "$service" >> "$setup_log" 2>&1 LOOP_COUNT=0 - while (( $(check_service_status $service) )); do + while ! (check_service_status "$service"); do echo "$service still not running" >> "$setup_log" 2>&1 if [ $LOOP_COUNT -gt 60 ]; then echo "$service could not be started in 60 seconds, exiting" >> "$setup_log" 2>&1 From 2d4fe5829913481b3f0f39cae74f0e0f7d17b0bd Mon Sep 17 00:00:00 2001 From: William Wernert Date: Tue, 1 Dec 2020 21:43:38 -0500 Subject: [PATCH 05/20] [fix] Also kill currently running jobs --- setup/so-functions | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/setup/so-functions b/setup/so-functions index d4acd8a75..a54153077 100755 --- a/setup/so-functions +++ b/setup/so-functions @@ -1390,9 +1390,12 @@ reinstall_init() { local service_retry_count=20 { - # Disable all scheduled jobs if command -v salt-call &> /dev/null; then - salt-call schedule.disable + # Disable scheduled jobs so highstate doesn't start running during the install + salt-call -l info schedule.disable + + # Kill any currently running salt jobs, also to prevent issues with highstate. + salt-call -l info saltutil.kill_all_jobs fi # Kill any salt processes (safely) From 467f9923b07f3ee6e3a2008f673e10f0a3b40726 Mon Sep 17 00:00:00 2001 From: William Wernert Date: Wed, 2 Dec 2020 13:19:34 -0500 Subject: [PATCH 06/20] [refactor] Add trap to handle script exits, change what files are deleted in /etc/salt/ --- setup/so-functions | 78 ++++++++++++++++++---------------------------- setup/so-setup | 14 ++++++++- setup/so-whiptail | 13 -------- 3 files changed, 44 insertions(+), 61 deletions(-) diff --git a/setup/so-functions b/setup/so-functions index a54153077..9cf01d74c 100755 --- a/setup/so-functions +++ b/setup/so-functions @@ -273,12 +273,11 @@ check_salt_master_status() { salt-call saltutil.kill_all_jobs > /dev/null 2>&1 salt-call state.show_top > /dev/null 2>&1 local status=$? - #true if there is an issue talking to salt master if [ $status -gt 0 ]; then - echo 1; + return 1; else echo "Can talk to salt master" >> "$setup_log" 2>&1 - echo 0; + return 0; fi } @@ -287,12 +286,11 @@ check_salt_minion_status() { echo "Checking if the salt minion will respond to jobs" >> "$setup_log" 2>&1 salt "$MINION_ID" test.ping >> "$setup_log" 2>&1 local status=$? - #true if there is an issue getting a job response from the minion if [ $status -gt 0 ]; then - echo 1; + return 1; else echo "Received job response from salt minion" >> "$setup_log" 2>&1 - echo 0; + return 0; fi } @@ -1391,7 +1389,7 @@ reinstall_init() { { if command -v salt-call &> /dev/null; then - # Disable scheduled jobs so highstate doesn't start running during the install + # Disable schedule so highstate doesn't start running during the install salt-call -l info schedule.disable # Kill any currently running salt jobs, also to prevent issues with highstate. @@ -1406,12 +1404,12 @@ reinstall_init() { local count=0 while check_service_status "$service"; do - if [[ $count > $service_retry_count ]]; then + if [[ $count -gt $service_retry_count ]]; then echo "Could not stop $service after 1 minute, exiting setup." # Stop the systemctl process trying to kill the service, show user a message, then exit setup kill -9 $pid - whiptail_service_stop_failed "$service" + kill -SIGSOKILL "$(ps --pid $$ -oppid=)"; exit 1 fi sleep 5 ((count++)) @@ -1419,7 +1417,7 @@ reinstall_init() { done # Remove all salt configs - rm -rf /etc/salt/global /etc/salt/minion /etc/salt/master /etc/salt/pki/* + rm -rf /etc/salt/grains /etc/salt/minion /etc/salt/pki/* if command -v docker &> /dev/null; then # Stop and remove all so-* containers so files can be changed with more safety @@ -1440,7 +1438,7 @@ reinstall_init() { # Remove the old launcher package in case the config changes remove_package launcher-final - } >> $setup_log 2>&1 + } >> "$setup_log" 2>&1 } backup_dir() { @@ -1637,61 +1635,47 @@ salt_checkin() { "salt-master" \ "salt-minion" ) - local LOOP_COUNT=0 - for service in "${SALT_SERVICES[@]}"; do - echo "Stopping service $service" >> "$setup_log" 2>&1 - systemctl stop "$service" >> "$setup_log" 2>&1 - LOOP_COUNT=0 - while check_service_status "$service"; do - echo "$service still running" >> "$setup_log" 2>&1 - if [ $LOOP_COUNT -gt 60 ]; then - echo "$service could not be stopped in 60 seconds, exiting" >> "$setup_log" 2>&1 - exit 1 - fi - sleep 1; - ((LOOP_COUNT+=1)) - done - done - - sleep 5; + local count=0 for service in "${SALT_SERVICES[@]}"; do - echo "Starting service $service" >> "$setup_log" 2>&1 - systemctl start "$service" >> "$setup_log" 2>&1 - LOOP_COUNT=0 + { + echo "Restarting service $service" + systemctl restart "$service" & + local pid=$! + } >> "$setup_log" 2>&1 + + count=0 while ! (check_service_status "$service"); do echo "$service still not running" >> "$setup_log" 2>&1 - if [ $LOOP_COUNT -gt 60 ]; then - echo "$service could not be started in 60 seconds, exiting" >> "$setup_log" 2>&1 - exit 1 + if [ $count -gt 120 ]; then + echo "$service could not be restarted in 120 seconds, exiting" >> "$setup_log" 2>&1 + kill -SIGSOKILL "$(ps --pid $$ -oppid=)"; exit 1 fi sleep 1; - ((LOOP_COUNT+=1)) + ((count++)) done done - sleep 5; - - LOOP_COUNT=0 - while (( $(check_salt_master_status) )); do + count=0 + while ! (check_salt_master_status); do echo "salt minion cannot talk to salt master" >> "$setup_log" 2>&1 - if [ $LOOP_COUNT -gt 30 ]; then + if [ $count -gt 30 ]; then echo "salt minion could not talk to salt master after 30 attempts, exiting" >> "$setup_log" 2>&1 - exit 1 + kill -SIGSOKILL "$(ps --pid $$ -oppid=)"; exit 1 fi sleep 1; - ((LOOP_COUNT+=1)) + ((count++)) done - LOOP_COUNT=0 - while (( $(check_salt_minion_status) )); do + count=0 + while ! (check_salt_minion_status); do echo "salt master did not get a job response from salt minion" >> "$setup_log" 2>&1 - if [ $LOOP_COUNT -gt 30 ]; then + if [ $count -gt 30 ]; then echo "salt master did not get a job response from salt minion after 30 attempts, exiting" >> "$setup_log" 2>&1 - exit 1 + kill -SIGSOKILL "$(ps --pid $$ -oppid=)"; exit 1 fi sleep 1; - ((LOOP_COUNT+=1)) + ((count++)) done echo " Confirming existence of the CA certificate" diff --git a/setup/so-setup b/setup/so-setup index 77c579cfc..2a6b4e925 100755 --- a/setup/so-setup +++ b/setup/so-setup @@ -483,6 +483,18 @@ if [[ $is_minion || $is_import ]]; then [ "$automated" == no ] && copy_ssh_key >> $setup_log 2>&1 fi + +# Exit parent script if +trap 'catch $? $LINENO' SIGSOKILL + +catch() { + if [ "$1" != 0 ]; then + info "Fatal error occurred at $2 in so-setup, failing setup." + whiptail_setup_failed + exit + fi +} + # Begin install { # Set initial percentage to 0 @@ -583,7 +595,7 @@ fi if [[ $is_minion ]]; then set_progress_str 22 'Checking if the Salt Minion needs to be updated' - salt-call state.apply salt.minion -l info >> $setup_log 2>&1 + salt-call state.apply -l info salt.minion >> $setup_log 2>&1 fi set_progress_str 23 'Generating CA and checking in' diff --git a/setup/so-whiptail b/setup/so-whiptail index a41e61f94..11d968910 100755 --- a/setup/so-whiptail +++ b/setup/so-whiptail @@ -1175,19 +1175,6 @@ whiptail_sensor_config() { } -whiptail_service_stop_failed() { - local service=$1 - - read -r -d '' message <<- EOM - The ${service} service could not be stopped. Please stop it manually and then re-run setup. - - Press ENTER to exit the installer. - EOM - - whiptail --title "Security Onion Setup" --msgbox "$message" 10 75 - exit 1 -} - whiptail_set_hostname() { [ -n "$TESTING" ] && return From 8fe43d6d5642116e03f77308e4dedee4e41bdc61 Mon Sep 17 00:00:00 2001 From: William Wernert Date: Wed, 2 Dec 2020 13:35:57 -0500 Subject: [PATCH 07/20] [fix] Print WARNING instead of ERROR if minion is not responding initially --- setup/so-functions | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup/so-functions b/setup/so-functions index 9cf01d74c..98fd50bf3 100755 --- a/setup/so-functions +++ b/setup/so-functions @@ -284,7 +284,7 @@ check_salt_master_status() { check_salt_minion_status() { echo "Checking if the salt minion will respond to jobs" >> "$setup_log" 2>&1 - salt "$MINION_ID" test.ping >> "$setup_log" 2>&1 + salt "$MINION_ID" test.ping | sed 's/ERROR/WARNING/' >> "$setup_log" 2>&1 local status=$? if [ $status -gt 0 ]; then return 1; From cc5d54764a05493eb250bddfbe27e8acba942386 Mon Sep 17 00:00:00 2001 From: William Wernert Date: Wed, 2 Dec 2020 13:54:02 -0500 Subject: [PATCH 08/20] [fix] sed masks command return code, remove --- setup/so-functions | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/setup/so-functions b/setup/so-functions index 98fd50bf3..4772f3707 100755 --- a/setup/so-functions +++ b/setup/so-functions @@ -274,9 +274,10 @@ check_salt_master_status() { salt-call state.show_top > /dev/null 2>&1 local status=$? if [ $status -gt 0 ]; then + echo " Could not talk to salt master" >> "$setup_log" 2>&1 return 1; else - echo "Can talk to salt master" >> "$setup_log" 2>&1 + echo " Can talk to salt master" >> "$setup_log" 2>&1 return 0; fi @@ -284,15 +285,15 @@ check_salt_master_status() { check_salt_minion_status() { echo "Checking if the salt minion will respond to jobs" >> "$setup_log" 2>&1 - salt "$MINION_ID" test.ping | sed 's/ERROR/WARNING/' >> "$setup_log" 2>&1 + salt "$MINION_ID" test.ping > /dev/null 2>&1 local status=$? if [ $status -gt 0 ]; then + echo " Minion did not respond" >> "$setup_log" 2>&1 return 1; else - echo "Received job response from salt minion" >> "$setup_log" 2>&1 + echo " Received job response from salt minion" >> "$setup_log" 2>&1 return 0; fi - } check_soremote_pass() { From fc7fe235905a6346d921095e460d43797e27e7c7 Mon Sep 17 00:00:00 2001 From: William Wernert Date: Wed, 2 Dec 2020 14:06:50 -0500 Subject: [PATCH 09/20] [fix] Correct signal naming --- setup/so-functions | 8 ++++---- setup/so-setup | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/setup/so-functions b/setup/so-functions index 4772f3707..6aa30f89c 100755 --- a/setup/so-functions +++ b/setup/so-functions @@ -1410,7 +1410,7 @@ reinstall_init() { # Stop the systemctl process trying to kill the service, show user a message, then exit setup kill -9 $pid - kill -SIGSOKILL "$(ps --pid $$ -oppid=)"; exit 1 + kill -SIGUSR1 "$(ps --pid $$ -oppid=)"; exit 1 fi sleep 5 ((count++)) @@ -1650,7 +1650,7 @@ salt_checkin() { echo "$service still not running" >> "$setup_log" 2>&1 if [ $count -gt 120 ]; then echo "$service could not be restarted in 120 seconds, exiting" >> "$setup_log" 2>&1 - kill -SIGSOKILL "$(ps --pid $$ -oppid=)"; exit 1 + kill -SIGUSR1 "$(ps --pid $$ -oppid=)"; exit 1 fi sleep 1; ((count++)) @@ -1662,7 +1662,7 @@ salt_checkin() { echo "salt minion cannot talk to salt master" >> "$setup_log" 2>&1 if [ $count -gt 30 ]; then echo "salt minion could not talk to salt master after 30 attempts, exiting" >> "$setup_log" 2>&1 - kill -SIGSOKILL "$(ps --pid $$ -oppid=)"; exit 1 + kill -SIGUSR1 "$(ps --pid $$ -oppid=)"; exit 1 fi sleep 1; ((count++)) @@ -1673,7 +1673,7 @@ salt_checkin() { echo "salt master did not get a job response from salt minion" >> "$setup_log" 2>&1 if [ $count -gt 30 ]; then echo "salt master did not get a job response from salt minion after 30 attempts, exiting" >> "$setup_log" 2>&1 - kill -SIGSOKILL "$(ps --pid $$ -oppid=)"; exit 1 + kill -SIGUSR1 "$(ps --pid $$ -oppid=)"; exit 1 fi sleep 1; ((count++)) diff --git a/setup/so-setup b/setup/so-setup index 2a6b4e925..79ba916a9 100755 --- a/setup/so-setup +++ b/setup/so-setup @@ -485,7 +485,7 @@ fi # Exit parent script if -trap 'catch $? $LINENO' SIGSOKILL +trap 'catch $? $LINENO' SIGUSR1 catch() { if [ "$1" != 0 ]; then From af8295a65130894f1b8984c3097864548ffb7c87 Mon Sep 17 00:00:00 2001 From: William Wernert Date: Wed, 2 Dec 2020 17:07:49 -0500 Subject: [PATCH 10/20] [reafactor] systemctl stop -> kill --- setup/so-functions | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup/so-functions b/setup/so-functions index 6aa30f89c..4103f0988 100755 --- a/setup/so-functions +++ b/setup/so-functions @@ -1400,7 +1400,7 @@ reinstall_init() { # Kill any salt processes (safely) for service in "${salt_services[@]}"; do # Stop the service in the background so we can exit after a certain amount of time - systemctl stop "$service" & + systemctl kill "$service" & local pid=$! local count=0 From 76fff28dfa5a85f217435e7cec3018e1915b9876 Mon Sep 17 00:00:00 2001 From: William Wernert Date: Thu, 3 Dec 2020 10:18:44 -0500 Subject: [PATCH 11/20] [fix] Correct logic for service check + bash trap --- setup/so-functions | 2 +- setup/so-setup | 10 ++++------ 2 files changed, 5 insertions(+), 7 deletions(-) diff --git a/setup/so-functions b/setup/so-functions index 4103f0988..76e579765 100755 --- a/setup/so-functions +++ b/setup/so-functions @@ -1404,7 +1404,7 @@ reinstall_init() { local pid=$! local count=0 - while check_service_status "$service"; do + while ! (check_service_status "$service"); do if [[ $count -gt $service_retry_count ]]; then echo "Could not stop $service after 1 minute, exiting setup." diff --git a/setup/so-setup b/setup/so-setup index 79ba916a9..924bdf307 100755 --- a/setup/so-setup +++ b/setup/so-setup @@ -485,14 +485,12 @@ fi # Exit parent script if -trap 'catch $? $LINENO' SIGUSR1 +trap 'catch $LINENO' SIGUSR1 catch() { - if [ "$1" != 0 ]; then - info "Fatal error occurred at $2 in so-setup, failing setup." - whiptail_setup_failed - exit - fi + info "Fatal error occurred at $2 in so-setup, failing setup." + whiptail_setup_failed + exit } # Begin install From 2c208ec943a8fc45f912731076009c99bf19503a Mon Sep 17 00:00:00 2001 From: William Wernert Date: Thu, 3 Dec 2020 10:31:45 -0500 Subject: [PATCH 12/20] [fix] kill -> stop, add indent to service check, revert incorrect logic --- setup/so-functions | 8 ++++---- setup/so-setup | 8 ++++---- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/setup/so-functions b/setup/so-functions index 76e579765..767ca6288 100755 --- a/setup/so-functions +++ b/setup/so-functions @@ -259,10 +259,10 @@ check_service_status() { systemctl status $service_name > /dev/null 2>&1 local status=$? if [ $status -gt 0 ]; then - echo "$service_name is not running" >> "$setup_log" 2>&1 + echo " $service_name is not running" >> "$setup_log" 2>&1 return 1; else - echo "$service_name is running" >> "$setup_log" 2>&1 + echo " $service_name is running" >> "$setup_log" 2>&1 return 0; fi @@ -1400,11 +1400,11 @@ reinstall_init() { # Kill any salt processes (safely) for service in "${salt_services[@]}"; do # Stop the service in the background so we can exit after a certain amount of time - systemctl kill "$service" & + systemctl stop "$service" & local pid=$! local count=0 - while ! (check_service_status "$service"); do + while check_service_status "$service"; do if [[ $count -gt $service_retry_count ]]; then echo "Could not stop $service after 1 minute, exiting setup." diff --git a/setup/so-setup b/setup/so-setup index 924bdf307..3bec2bb87 100755 --- a/setup/so-setup +++ b/setup/so-setup @@ -499,11 +499,11 @@ catch() { export percentage=0 set_path - if [[ $is_manager && $is_airgap ]]; then - info "Creating airgap repo" - create_repo >> $setup_log 2>&1 + if [[ $is_manager && $is_airgap ]]; then + info "Creating airgap repo" + create_repo >> $setup_log 2>&1 airgap_rules >> $setup_log 2>&1 - fi + fi if [[ $is_minion ]]; then set_progress_str 1 'Configuring firewall' From 80ce8b5e41c6573bde3de15ef636ecc8f26c1d81 Mon Sep 17 00:00:00 2001 From: William Wernert Date: Thu, 3 Dec 2020 13:59:25 -0500 Subject: [PATCH 13/20] [refactor] Run all changes inside whiptail progress, use grep -q --- setup/so-functions | 4 +- setup/so-setup | 117 +++++++++++++++++++++++---------------------- 2 files changed, 62 insertions(+), 59 deletions(-) diff --git a/setup/so-functions b/setup/so-functions index 767ca6288..8c23441ed 100755 --- a/setup/so-functions +++ b/setup/so-functions @@ -766,12 +766,12 @@ detect_os() { disable_auto_start() { - if crontab -l -u $INSTALLUSERNAME 2>&1 | grep so-setup > /dev/null 2>&1; then + if crontab -l -u $INSTALLUSERNAME 2>&1 | grep -q so-setup; then # Remove the automated setup script from crontab, if it exists logCmd "crontab -u $INSTALLUSERNAME -r" fi - if grep so-setup /home/$INSTALLUSERNAME/.bash_profile > /dev/null 2>&1; then + if grep -q so-setup /home/$INSTALLUSERNAME/.bash_profile; then # Truncate last line of the bash profile info "Removing auto-run of setup from bash profile" sed -i '$ d' /home/$INSTALLUSERNAME/.bash_profile >> "$setup_log" 2>&1 diff --git a/setup/so-setup b/setup/so-setup index 3bec2bb87..73363959c 100755 --- a/setup/so-setup +++ b/setup/so-setup @@ -428,63 +428,7 @@ whiptail_make_changes # From here on changes will be made. echo "1" > /root/accept_changes -if [[ $is_reinstall ]]; then - reinstall_init -fi - -if [[ -n "$TURBO" ]]; then - use_turbo_proxy -fi - -if [[ "$setup_type" == 'iso' ]]; then - # Init networking so rest of install works - set_hostname - set_management_interface -fi - -disable_ipv6 -disable_auto_start - -if [[ "$setup_type" != 'iso' ]]; then - set_hostname -fi - -if [[ $is_minion ]]; then - add_mngr_ip_to_hosts -fi - -{ - mark_version; - clear_manager; -} >> $setup_log 2>&1 - - -if [[ $is_manager || $is_import ]]; then - { - generate_passwords; - secrets_pillar; - add_socore_user_manager; - } >> $setup_log 2>&1 -fi - -if [[ $is_manager && ! $is_eval ]]; then - add_soremote_user_manager >> $setup_log 2>&1 -fi - -{ - set_main_ip; - set_redirect; -} >> $setup_log 2>&1 - -host_pillar >> $setup_log 2>&1 - -if [[ $is_minion || $is_import ]]; then - set_updates >> $setup_log 2>&1 - [ "$automated" == no ] && copy_ssh_key >> $setup_log 2>&1 -fi - - -# Exit parent script if +# Set up handler for setup to exit early (use `kill -SIGUSR1 "$(ps --pid $$ -oppid=)"; exit 1` in child scripts) trap 'catch $LINENO' SIGUSR1 catch() { @@ -497,8 +441,67 @@ catch() { { # Set initial percentage to 0 export percentage=0 + + # Show initial progress message + set_progress_str 0 'Running initial configuration steps' + set_path + if [[ $is_reinstall ]]; then + reinstall_init + fi + + if [[ -n "$TURBO" ]]; then + use_turbo_proxy + fi + + if [[ "$setup_type" == 'iso' ]]; then + # Init networking so rest of install works + set_hostname >> $setup_log 2>&1 + set_management_interface + fi + + disable_ipv6 + disable_auto_start + + if [[ "$setup_type" != 'iso' ]]; then + set_hostname >> $setup_log 2>&1 + fi + + if [[ $is_minion ]]; then + add_mngr_ip_to_hosts + fi + + { + mark_version; + clear_manager; + } >> $setup_log 2>&1 + + + if [[ $is_manager || $is_import ]]; then + { + generate_passwords; + secrets_pillar; + add_socore_user_manager; + } >> $setup_log 2>&1 + fi + + if [[ $is_manager && ! $is_eval ]]; then + add_soremote_user_manager >> $setup_log 2>&1 + fi + + { + set_main_ip; + set_redirect; + } >> $setup_log 2>&1 + + host_pillar >> $setup_log 2>&1 + + if [[ $is_minion || $is_import ]]; then + set_updates >> $setup_log 2>&1 + [ "$automated" == no ] && copy_ssh_key >> $setup_log 2>&1 + fi + if [[ $is_manager && $is_airgap ]]; then info "Creating airgap repo" create_repo >> $setup_log 2>&1 From 3049718660d92d39492f2cac6433be6b8961d5fc Mon Sep 17 00:00:00 2001 From: William Wernert Date: Thu, 3 Dec 2020 14:42:13 -0500 Subject: [PATCH 14/20] [fix] Kill + start salt-minion if it isn't responding --- setup/so-functions | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/setup/so-functions b/setup/so-functions index 8c23441ed..b42e03bb7 100755 --- a/setup/so-functions +++ b/setup/so-functions @@ -1382,10 +1382,12 @@ reserve_group_ids() { reinstall_init() { info "Putting system in state to run setup again" - local salt_services=( - "salt-master" - "salt-minion" - ) + if [[ $install_type =~ ^(MANAGER|EVAL|HELIXSENSOR|MANAGERSEARCH|STANDALONE|FLEET|IMPORT)$ ]]; then + local salt_services=( "salt-master" "salt-minion" ) + else + local salt_services=( "salt-minion" ) + fi + local service_retry_count=20 { @@ -1412,6 +1414,7 @@ reinstall_init() { kill -9 $pid kill -SIGUSR1 "$(ps --pid $$ -oppid=)"; exit 1 fi + sleep 5 ((count++)) done @@ -1671,6 +1674,8 @@ salt_checkin() { count=0 while ! (check_salt_minion_status); do echo "salt master did not get a job response from salt minion" >> "$setup_log" 2>&1 + systemctl kill salt-minion + systemctl start salt-minion if [ $count -gt 30 ]; then echo "salt master did not get a job response from salt minion after 30 attempts, exiting" >> "$setup_log" 2>&1 kill -SIGUSR1 "$(ps --pid $$ -oppid=)"; exit 1 From b5bfad07dc3c53d6ebe301b29ef33f29437cc1ba Mon Sep 17 00:00:00 2001 From: William Wernert Date: Thu, 3 Dec 2020 14:55:23 -0500 Subject: [PATCH 15/20] [fix] kill/start after if statement --- setup/so-functions | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/setup/so-functions b/setup/so-functions index b42e03bb7..30399170f 100755 --- a/setup/so-functions +++ b/setup/so-functions @@ -1674,12 +1674,12 @@ salt_checkin() { count=0 while ! (check_salt_minion_status); do echo "salt master did not get a job response from salt minion" >> "$setup_log" 2>&1 - systemctl kill salt-minion - systemctl start salt-minion if [ $count -gt 30 ]; then echo "salt master did not get a job response from salt minion after 30 attempts, exiting" >> "$setup_log" 2>&1 kill -SIGUSR1 "$(ps --pid $$ -oppid=)"; exit 1 fi + systemctl kill salt-minion + systemctl start salt-minion sleep 1; ((count++)) done From ac85cbc3f19516901249dcd7902323896cd377fa Mon Sep 17 00:00:00 2001 From: William Wernert Date: Thu, 3 Dec 2020 15:10:41 -0500 Subject: [PATCH 16/20] [fix] Move set_redirect out of sub-shell --- setup/so-setup | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/setup/so-setup b/setup/so-setup index 73363959c..8dcce0e9b 100755 --- a/setup/so-setup +++ b/setup/so-setup @@ -437,6 +437,12 @@ catch() { exit } +# This block sets REDIRECTIT which is used by a function outside the below subshell +{ + set_main_ip; + set_redirect; +} >> $setup_log 2>&1 + # Begin install { # Set initial percentage to 0 @@ -490,11 +496,6 @@ catch() { add_soremote_user_manager >> $setup_log 2>&1 fi - { - set_main_ip; - set_redirect; - } >> $setup_log 2>&1 - host_pillar >> $setup_log 2>&1 if [[ $is_minion || $is_import ]]; then From ebade0a5a6a6083ddba30080b336122f0d0ddb64 Mon Sep 17 00:00:00 2001 From: William Wernert Date: Thu, 3 Dec 2020 15:20:33 -0500 Subject: [PATCH 17/20] [fix] Also kill+start while trying to restart service initially --- setup/so-functions | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/setup/so-functions b/setup/so-functions index 30399170f..e17fa23ce 100755 --- a/setup/so-functions +++ b/setup/so-functions @@ -1650,7 +1650,8 @@ salt_checkin() { count=0 while ! (check_service_status "$service"); do - echo "$service still not running" >> "$setup_log" 2>&1 + systemctl kill "$service" + systemctl start "$service" if [ $count -gt 120 ]; then echo "$service could not be restarted in 120 seconds, exiting" >> "$setup_log" 2>&1 kill -SIGUSR1 "$(ps --pid $$ -oppid=)"; exit 1 From 660c768f8f9a4c5ee33ad6f1f1c9fe7f9853580c Mon Sep 17 00:00:00 2001 From: William Wernert Date: Thu, 3 Dec 2020 15:26:59 -0500 Subject: [PATCH 18/20] Only kill+start on final loop and increase time between status checks --- setup/so-functions | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/setup/so-functions b/setup/so-functions index e17fa23ce..d6c309431 100755 --- a/setup/so-functions +++ b/setup/so-functions @@ -1650,13 +1650,16 @@ salt_checkin() { count=0 while ! (check_service_status "$service"); do - systemctl kill "$service" - systemctl start "$service" - if [ $count -gt 120 ]; then + if [ $count -eq 12 ]; then + systemctl kill "$service" + systemctl start "$service" + fi + + if [ $count -gt 12 ]; then echo "$service could not be restarted in 120 seconds, exiting" >> "$setup_log" 2>&1 kill -SIGUSR1 "$(ps --pid $$ -oppid=)"; exit 1 fi - sleep 1; + sleep 10; ((count++)) done done From 3273a6366235bc7a7f26f88e4c55f1b6a1aaf7eb Mon Sep 17 00:00:00 2001 From: William Wernert Date: Thu, 3 Dec 2020 15:35:50 -0500 Subject: [PATCH 19/20] [fix] kill old restart pid and assign new pid for start --- setup/so-functions | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/setup/so-functions b/setup/so-functions index d6c309431..67cbb7c24 100755 --- a/setup/so-functions +++ b/setup/so-functions @@ -1650,13 +1650,19 @@ salt_checkin() { count=0 while ! (check_service_status "$service"); do + # On final loop, kill the pid trying to restart service and try to manually kill then start it if [ $count -eq 12 ]; then - systemctl kill "$service" - systemctl start "$service" + { + kill -9 "$pid" + systemctl kill "$service" + systemctl start "$service" & + local pid=$! + } >> "$setup_log" 2>&1 fi if [ $count -gt 12 ]; then echo "$service could not be restarted in 120 seconds, exiting" >> "$setup_log" 2>&1 + kill -9 "$pid" kill -SIGUSR1 "$(ps --pid $$ -oppid=)"; exit 1 fi sleep 10; From fca50660a26864a65381312acc561b34f3c8ade1 Mon Sep 17 00:00:00 2001 From: William Wernert Date: Fri, 4 Dec 2020 09:33:28 -0500 Subject: [PATCH 20/20] [fix] Trap argument off by one --- setup/so-setup | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup/so-setup b/setup/so-setup index 8dcce0e9b..1ea238a38 100755 --- a/setup/so-setup +++ b/setup/so-setup @@ -432,7 +432,7 @@ echo "1" > /root/accept_changes trap 'catch $LINENO' SIGUSR1 catch() { - info "Fatal error occurred at $2 in so-setup, failing setup." + info "Fatal error occurred at $1 in so-setup, failing setup." whiptail_setup_failed exit }