From 05a6d702b07b6bf1b6ae3a9f3a74a3f04586318d Mon Sep 17 00:00:00 2001 From: Jason Ertel Date: Thu, 19 Jan 2023 10:03:03 -0500 Subject: [PATCH 1/5] Add logic to determine if setup succeeded and provide relevant output --- salt/common/tools/sbin/so-common | 20 +----- setup/so-setup | 4 +- setup/so-verify | 108 +++++++++++++++++++++++++++++++ 3 files changed, 113 insertions(+), 19 deletions(-) create mode 100644 setup/so-verify diff --git a/salt/common/tools/sbin/so-common b/salt/common/tools/sbin/so-common index 43e6f4446..c4a3fdbaf 100755 --- a/salt/common/tools/sbin/so-common +++ b/salt/common/tools/sbin/so-common @@ -225,31 +225,17 @@ init_monitor() { } is_manager_node() { - # Check to see if this is a manager node - role=$(lookup_role) - is_single_node_grid && return 0 - [ $role == 'manager' ] && return 0 - [ $role == 'managersearch' ] && return 0 - [ $role == 'helix' ] && return 0 - return 1 + grep "role: so-" /etc/salt/grains | grep -E "manager|eval|managersearch|standalone|import" &> /dev/null } is_sensor_node() { # Check to see if this is a sensor (forward) node - role=$(lookup_role) is_single_node_grid && return 0 - [ $role == 'sensor' ] && return 0 - [ $role == 'heavynode' ] && return 0 - [ $role == 'helix' ] && return 0 - return 1 + grep "role: so-" /etc/salt/grains | grep -E "sensor|heavynode|helix" &> /dev/null } is_single_node_grid() { - role=$(lookup_role) - [ $role == 'eval' ] && return 0 - [ $role == 'standalone' ] && return 0 - [ $role == 'import' ] && return 0 - return 1 + grep "role: so-" /etc/salt/grains | grep -E "eval|standalone|import" &> /dev/null } lookup_bond_interfaces() { diff --git a/setup/so-setup b/setup/so-setup index 99e218a61..80c6372c9 100755 --- a/setup/so-setup +++ b/setup/so-setup @@ -585,7 +585,7 @@ if ! [[ -f $install_opt_file ]]; then fi checkin_at_boot set_initial_firewall_access - whiptail_setup_complete + verify_setup $setup_type else touch /root/accept_changes mkdir -p /opt/so @@ -608,7 +608,7 @@ if ! [[ -f $install_opt_file ]]; then configure_minion "$minion_type" drop_install_options checkin_at_boot - whiptail_setup_complete + verify_setup $setup_type fi # Need to make sure the latest install is located on the web server of the manager to check the versions and donwload the code if required diff --git a/setup/so-verify b/setup/so-verify new file mode 100644 index 000000000..22947df7e --- /dev/null +++ b/setup/so-verify @@ -0,0 +1,108 @@ +#!/bin/bash + +# Copyright Security Onion Solutions LLC and/or licensed to Security Onion Solutions LLC under one +# or more contributor license agreements. Licensed under the Elastic License 2.0 as shown at +# https://securityonion.net/license; you may not use this file except in compliance with the +# Elastic License 2.0. + +cd "$(dirname "$0")" || exit 255 + +source ../salt/common/tools/sbin/so-common +source ./so-functions +source ./so-whiptail +source ./so-variables + +setup_type=$1 + +setup_in_progress() { + ps -ef | grep so-setup | grep -v grep &> /dev/null +} + +using_iso() { + if [ "$setup_type" == "iso" ]; then + return 0 + fi + return 0 +} + +whipit() { + if [[ $exit_code -eq 0 ]]; then + whiptail_setup_complete + else + whiptail_setup_failed + fi +} + +# Check entire setup log for errors or unexpected salt states +log_has_errors() { + grep -E "FAILED|Failed|failed|ERROR|Error|Result: False" "$setup_log" | \ + grep -vE "[ERROR ] The Salt Master has cached the public key for this node" | \ + grep -vE "Minion failed to authenticate with master" | \ + grep -vE "perl-Error-" | \ + grep -vE "Failed:\s*?[0-9]+" | \ + grep -vE "Status .* was not found" | \ + grep -vE "Uncaught exception, closing connection" | \ + grep -vE "Exception in callback None" | \ + grep -vE "deprecation: ERROR" | \ + grep -vE "code: 100" | \ + grep -vE "Running scope as unit" &> "$error_log" + + if [[ $? -eq 0 ]]; then + return 0 + fi + return 1 +} + +# For ISO installs, we know nothing else can be running on this server, so there should be +# nothing in any mail spool dir. +cron_error_in_mail_spool() { + if find /var/spool/mail/ -type f -size +0 &> /dev/null; then + return 0 + fi + return 1 +} + +# so-setup must return a 0 exit code, indicating all containers are up and healthy. Will retry for a limited +# time before giving up. +status_failed() { + max_retries=120 + wait_secs=10 + retry_attempts=0 + while ! so-status -q; do + if [[ $retry_attempts -eq $max_retries ]]; then + return 0 + fi + retry_attempts=$((retry_attempts+1)) + echo "INFO: so-status returned non-zero exit code; will retry in $wait_secs seconds ($retry_attempts/$max_retries)" + sleep $wait_secs + done + return 1 +} + +main() { + exit_code=0 + if log_has_errors; then + echo "WARNING: Errors detected during setup" + exit_code=1 + fi + + if using_iso && cron_error_in_mail_spool; then + echo "WARNING: Unexpected cron job output in mail spool" + exit_code=1 + fi + + if is_manager_node && status_failed; then + echo "WARNING: Containers are not in a healthy state" + exit_code=1 + fi + + if [[ exit_code -eq 0 ]]; then + echo "Successfully completed setup!" + fi + + setup_in_progress && whipit $exit_code + + exit $exit_code +} + +main From c5260e4787cd83153dbf5dc0be774e5f1e4706e1 Mon Sep 17 00:00:00 2001 From: Jason Ertel Date: Thu, 19 Jan 2023 11:25:59 -0500 Subject: [PATCH 2/5] verify setup --- setup/so-setup | 4 ++-- setup/so-verify | 12 +++--------- 2 files changed, 5 insertions(+), 11 deletions(-) mode change 100644 => 100755 setup/so-verify diff --git a/setup/so-setup b/setup/so-setup index 80c6372c9..ad4f57d55 100755 --- a/setup/so-setup +++ b/setup/so-setup @@ -585,7 +585,7 @@ if ! [[ -f $install_opt_file ]]; then fi checkin_at_boot set_initial_firewall_access - verify_setup $setup_type + so-verify $setup_type else touch /root/accept_changes mkdir -p /opt/so @@ -608,7 +608,7 @@ if ! [[ -f $install_opt_file ]]; then configure_minion "$minion_type" drop_install_options checkin_at_boot - verify_setup $setup_type + so-verify $setup_type fi # Need to make sure the latest install is located on the web server of the manager to check the versions and donwload the code if required diff --git a/setup/so-verify b/setup/so-verify old mode 100644 new mode 100755 index 22947df7e..26581e673 --- a/setup/so-verify +++ b/setup/so-verify @@ -84,19 +84,13 @@ main() { if log_has_errors; then echo "WARNING: Errors detected during setup" exit_code=1 - fi - - if using_iso && cron_error_in_mail_spool; then + elif using_iso && cron_error_in_mail_spool; then echo "WARNING: Unexpected cron job output in mail spool" exit_code=1 - fi - - if is_manager_node && status_failed; then + elif is_manager_node && status_failed; then echo "WARNING: Containers are not in a healthy state" exit_code=1 - fi - - if [[ exit_code -eq 0 ]]; then + else echo "Successfully completed setup!" fi From 6b7a8e1fcd3db82d78a5625d24096cad5c36d17a Mon Sep 17 00:00:00 2001 From: Jason Ertel Date: Thu, 19 Jan 2023 12:53:24 -0500 Subject: [PATCH 3/5] fix verify path --- setup/so-setup | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/setup/so-setup b/setup/so-setup index ad4f57d55..fb19a9120 100755 --- a/setup/so-setup +++ b/setup/so-setup @@ -585,7 +585,7 @@ if ! [[ -f $install_opt_file ]]; then fi checkin_at_boot set_initial_firewall_access - so-verify $setup_type + ./so-verify $setup_type else touch /root/accept_changes mkdir -p /opt/so @@ -608,7 +608,7 @@ if ! [[ -f $install_opt_file ]]; then configure_minion "$minion_type" drop_install_options checkin_at_boot - so-verify $setup_type + ./so-verify $setup_type fi # Need to make sure the latest install is located on the web server of the manager to check the versions and donwload the code if required From 59177288efb054cb1f4c2daba95bb160284a9fd3 Mon Sep 17 00:00:00 2001 From: Jason Ertel Date: Thu, 19 Jan 2023 13:56:14 -0500 Subject: [PATCH 4/5] correct grep patterns --- setup/so-verify | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/setup/so-verify b/setup/so-verify index 26581e673..c4af1d424 100755 --- a/setup/so-verify +++ b/setup/so-verify @@ -36,8 +36,8 @@ whipit() { # Check entire setup log for errors or unexpected salt states log_has_errors() { grep -E "FAILED|Failed|failed|ERROR|Error|Result: False" "$setup_log" | \ - grep -vE "[ERROR ] The Salt Master has cached the public key for this node" | \ - grep -vE "Minion failed to authenticate with master" | \ + grep -vE "The Salt Master has cached the public key for this node" | \ + grep -vE "Minion failed to authenticate with the master" | \ grep -vE "perl-Error-" | \ grep -vE "Failed:\s*?[0-9]+" | \ grep -vE "Status .* was not found" | \ From 79fb5dc52589bb469949c75b8265820600fd33f9 Mon Sep 17 00:00:00 2001 From: Jason Ertel Date: Thu, 19 Jan 2023 14:19:55 -0500 Subject: [PATCH 5/5] prevent false success occurring when deleting the grafana dashboard --- .../tools/sbin/so-grafana-dashboard-folder-delete | 3 +++ setup/so-verify | 11 +++++++++++ 2 files changed, 14 insertions(+) diff --git a/salt/common/tools/sbin/so-grafana-dashboard-folder-delete b/salt/common/tools/sbin/so-grafana-dashboard-folder-delete index f3338de84..0783fccea 100755 --- a/salt/common/tools/sbin/so-grafana-dashboard-folder-delete +++ b/salt/common/tools/sbin/so-grafana-dashboard-folder-delete @@ -1,5 +1,8 @@ # this script is used to delete the default Grafana dashboard folders that existed prior to Grafana dashboard and Salt management changes in 2.3.70 +# Exit if an error occurs. The next highstate will retry. +set -e + folders=$(curl -X GET http://admin:{{salt['pillar.get']('secrets:grafana_admin')}}@localhost:3000/api/folders | jq -r '.[] | @base64') delfolder=("Manager" "Manager Search" "Sensor Nodes" "Search Nodes" "Standalone" "Eval Mode") diff --git a/setup/so-verify b/setup/so-verify index c4af1d424..234ef2dec 100755 --- a/setup/so-verify +++ b/setup/so-verify @@ -35,9 +35,20 @@ whipit() { # Check entire setup log for errors or unexpected salt states log_has_errors() { + # Ignore salt mast cached public key and minion failed to auth because this is a test + # to see if the salt key had already been accepted. + + # Ignore failed to connect to ::1 since we have most curls wrapped in a retry. + + # Ignore perl-Error- since that is the name of a Perl package SO installs. + + # Ignore Failed: 0 since that is the salt state output, and we detect state failures + # via Result: False already. + grep -E "FAILED|Failed|failed|ERROR|Error|Result: False" "$setup_log" | \ grep -vE "The Salt Master has cached the public key for this node" | \ grep -vE "Minion failed to authenticate with the master" | \ + grep -vE "Failed to connect to ::1" | \ grep -vE "perl-Error-" | \ grep -vE "Failed:\s*?[0-9]+" | \ grep -vE "Status .* was not found" | \