Merge pull request #9603 from Security-Onion-Solutions/kilo

Handle setup failures
This commit is contained in:
Jason Ertel
2023-01-19 15:49:41 -05:00
committed by GitHub
4 changed files with 121 additions and 19 deletions

View File

@@ -225,31 +225,17 @@ init_monitor() {
}
is_manager_node() {
# Check to see if this is a manager node
role=$(lookup_role)
is_single_node_grid && return 0
[ $role == 'manager' ] && return 0
[ $role == 'managersearch' ] && return 0
[ $role == 'helix' ] && return 0
return 1
grep "role: so-" /etc/salt/grains | grep -E "manager|eval|managersearch|standalone|import" &> /dev/null
}
is_sensor_node() {
# Check to see if this is a sensor (forward) node
role=$(lookup_role)
is_single_node_grid && return 0
[ $role == 'sensor' ] && return 0
[ $role == 'heavynode' ] && return 0
[ $role == 'helix' ] && return 0
return 1
grep "role: so-" /etc/salt/grains | grep -E "sensor|heavynode|helix" &> /dev/null
}
is_single_node_grid() {
role=$(lookup_role)
[ $role == 'eval' ] && return 0
[ $role == 'standalone' ] && return 0
[ $role == 'import' ] && return 0
return 1
grep "role: so-" /etc/salt/grains | grep -E "eval|standalone|import" &> /dev/null
}
lookup_bond_interfaces() {

View File

@@ -1,5 +1,8 @@
# this script is used to delete the default Grafana dashboard folders that existed prior to Grafana dashboard and Salt management changes in 2.3.70
# Exit if an error occurs. The next highstate will retry.
set -e
folders=$(curl -X GET http://admin:{{salt['pillar.get']('secrets:grafana_admin')}}@localhost:3000/api/folders | jq -r '.[] | @base64')
delfolder=("Manager" "Manager Search" "Sensor Nodes" "Search Nodes" "Standalone" "Eval Mode")

View File

@@ -585,7 +585,7 @@ if ! [[ -f $install_opt_file ]]; then
fi
checkin_at_boot
set_initial_firewall_access
whiptail_setup_complete
./so-verify $setup_type
else
touch /root/accept_changes
mkdir -p /opt/so
@@ -608,7 +608,7 @@ if ! [[ -f $install_opt_file ]]; then
configure_minion "$minion_type"
drop_install_options
checkin_at_boot
whiptail_setup_complete
./so-verify $setup_type
fi
# Need to make sure the latest install is located on the web server of the manager to check the versions and donwload the code if required

113
setup/so-verify Executable file
View File

@@ -0,0 +1,113 @@
#!/bin/bash
# Copyright Security Onion Solutions LLC and/or licensed to Security Onion Solutions LLC under one
# or more contributor license agreements. Licensed under the Elastic License 2.0 as shown at
# https://securityonion.net/license; you may not use this file except in compliance with the
# Elastic License 2.0.
cd "$(dirname "$0")" || exit 255
source ../salt/common/tools/sbin/so-common
source ./so-functions
source ./so-whiptail
source ./so-variables
setup_type=$1
setup_in_progress() {
ps -ef | grep so-setup | grep -v grep &> /dev/null
}
using_iso() {
if [ "$setup_type" == "iso" ]; then
return 0
fi
return 0
}
whipit() {
if [[ $exit_code -eq 0 ]]; then
whiptail_setup_complete
else
whiptail_setup_failed
fi
}
# Check entire setup log for errors or unexpected salt states
log_has_errors() {
# Ignore salt mast cached public key and minion failed to auth because this is a test
# to see if the salt key had already been accepted.
# Ignore failed to connect to ::1 since we have most curls wrapped in a retry.
# Ignore perl-Error- since that is the name of a Perl package SO installs.
# Ignore Failed: 0 since that is the salt state output, and we detect state failures
# via Result: False already.
grep -E "FAILED|Failed|failed|ERROR|Error|Result: False" "$setup_log" | \
grep -vE "The Salt Master has cached the public key for this node" | \
grep -vE "Minion failed to authenticate with the master" | \
grep -vE "Failed to connect to ::1" | \
grep -vE "perl-Error-" | \
grep -vE "Failed:\s*?[0-9]+" | \
grep -vE "Status .* was not found" | \
grep -vE "Uncaught exception, closing connection" | \
grep -vE "Exception in callback None" | \
grep -vE "deprecation: ERROR" | \
grep -vE "code: 100" | \
grep -vE "Running scope as unit" &> "$error_log"
if [[ $? -eq 0 ]]; then
return 0
fi
return 1
}
# For ISO installs, we know nothing else can be running on this server, so there should be
# nothing in any mail spool dir.
cron_error_in_mail_spool() {
if find /var/spool/mail/ -type f -size +0 &> /dev/null; then
return 0
fi
return 1
}
# so-setup must return a 0 exit code, indicating all containers are up and healthy. Will retry for a limited
# time before giving up.
status_failed() {
max_retries=120
wait_secs=10
retry_attempts=0
while ! so-status -q; do
if [[ $retry_attempts -eq $max_retries ]]; then
return 0
fi
retry_attempts=$((retry_attempts+1))
echo "INFO: so-status returned non-zero exit code; will retry in $wait_secs seconds ($retry_attempts/$max_retries)"
sleep $wait_secs
done
return 1
}
main() {
exit_code=0
if log_has_errors; then
echo "WARNING: Errors detected during setup"
exit_code=1
elif using_iso && cron_error_in_mail_spool; then
echo "WARNING: Unexpected cron job output in mail spool"
exit_code=1
elif is_manager_node && status_failed; then
echo "WARNING: Containers are not in a healthy state"
exit_code=1
else
echo "Successfully completed setup!"
fi
setup_in_progress && whipit $exit_code
exit $exit_code
}
main