diff --git a/salt/common/tools/sbin/so-common b/salt/common/tools/sbin/so-common index f754b34ef..c19d51a42 100755 --- a/salt/common/tools/sbin/so-common +++ b/salt/common/tools/sbin/so-common @@ -133,11 +133,23 @@ check_elastic_license() { } check_salt_master_status() { - local timeout=$1 - echo "Checking if we can talk to the salt master" - salt-call state.show_top concurrent=true - - return + local count=0 + local attempts="${1:- 10}" + current_time="$(date '+%b %d %H:%M:%S')" + echo "Checking if we can access the salt master and that it is ready at: ${current_time}" + while ! salt-call state.show_top -l error concurrent=true 1> /dev/null; do + current_time="$(date '+%b %d %H:%M:%S')" + echo "Can't access salt master or it is not ready at: ${current_time}" + ((count+=1)) + if [[ $count -eq $attempts ]]; then + # 10 attempts takes about 5.5 minutes + echo "Gave up trying to access salt-master" + return 1 + fi + done + current_time="$(date '+%b %d %H:%M:%S')" + echo "Successfully accessed and salt master ready at: ${current_time}" + return 0 } check_salt_minion_status() { diff --git a/salt/common/tools/sbin/so-image-common b/salt/common/tools/sbin/so-image-common index 11d2d6366..7e510e3ad 100755 --- a/salt/common/tools/sbin/so-image-common +++ b/salt/common/tools/sbin/so-image-common @@ -137,7 +137,7 @@ update_docker_containers() { for i in "${TRUSTED_CONTAINERS[@]}" do if [ -z "$PROGRESS_CALLBACK" ]; then - echo "Downloading $i" >> "$LOG_FILE" 2>&1 + echo "Downloading $i" >> "$LOG_FILE" 2>&1 else $PROGRESS_CALLBACK $i fi diff --git a/salt/manager/tools/sbin/soup b/salt/manager/tools/sbin/soup index 998d27539..84501bad5 100755 --- a/salt/manager/tools/sbin/soup +++ b/salt/manager/tools/sbin/soup @@ -736,14 +736,8 @@ main() { echo "" set_os - if ! check_salt_master_status; then - echo "Could not talk to salt master" - echo "Please run 'systemctl status salt-master' to ensure the salt-master service is running and check the log at /opt/so/log/salt/master." - echo "SOUP will now attempt to start the salt-master service and exit." - exit 1 - fi - echo "This node can communicate with the salt-master." + check_salt_master_status 1 || fail "Could not talk to salt master: Please run 'systemctl status salt-master' to ensure the salt-master service is running and check the log at /opt/so/log/salt/master." echo "Checking to see if this is a manager." echo "" @@ -829,7 +823,7 @@ main() { else update_registry set +e - update_docker_containers "soup" "" "" "$SOUP_LOG" + update_docker_containers 'soup' '' '' '/dev/stdout' 2>&1 | tee -a "$SOUP_LOG" set -e fi @@ -881,7 +875,7 @@ main() { # Testing that salt-master is up by checking that is it connected to itself set +e echo "Waiting on the Salt Master service to be ready." - salt-call state.show_top -l error queue=True || fail "salt-master could not be reached. Check $SOUP_LOG for details." + check_salt_master_status || fail "Can't access salt master or it is not ready. Check $SOUP_LOG for details." set -e # update the salt-minion configs here and start the minion @@ -917,7 +911,7 @@ main() { set +e echo "Waiting on the Salt Master service to be ready." - salt-call state.show_top -l error queue=True || fail "salt-master could not be reached. Check $SOUP_LOG for details." + check_salt_master_status || fail "Can't access salt master or it is not ready. Check $SOUP_LOG for details." set -e echo "Running a highstate to complete the Security Onion upgrade on this manager. This could take several minutes." diff --git a/salt/salt/engines/master/checkmine.py b/salt/salt/engines/master/checkmine.py new file mode 100644 index 000000000..09e624ba3 --- /dev/null +++ b/salt/salt/engines/master/checkmine.py @@ -0,0 +1,48 @@ +# -*- coding: utf-8 -*- + +import logging +from time import sleep +import os +import salt.client + +log = logging.getLogger(__name__) +local = salt.client.LocalClient() + +def start(interval=60): + def mine_flush(minion): + log.warning('checkmine engine: flushing mine cache for %s' % minion) + local.cmd(minion, 'mine.flush') + + def mine_update(minion): + log.warning('checkmine engine: updating mine cache for %s' % minion) + local.cmd(minion, 'mine.update') + + log.info("checkmine engine: started") + cachedir = __opts__['cachedir'] + while True: + log.debug('checkmine engine: checking which minions are alive') + manage_alived = __salt__['saltutil.runner']('manage.alived', show_ip=True) + log.debug('checkmine engine: alive minions: %s' % ' , '.join(manage_alived)) + + for minion in manage_alived: + mine_path = os.path.join(cachedir, 'minions', minion, 'mine.p') + mine_size = os.path.getsize(mine_path) + log.debug('checkmine engine: minion: %s mine_size: %i' % (minion, mine_size)) + # For some reason the mine file can be corrupt and only be 1 byte in size + if mine_size == 1: + log.error('checkmine engine: found %s to be 1 byte' % mine_path) + mine_flush(minion) + mine_update(minion) + # Update the mine if the ip in the mine doesn't match returned from manage.alived + else: + network_ip_addrs = __salt__['saltutil.runner']('mine.get', tgt=minion, fun='network.ip_addrs') + mine_ip = network_ip_addrs[minion][0] + log.debug('checkmine engine: minion: %s mine_ip: %s' % (minion, mine_ip)) + manage_alived_ip = manage_alived[minion] + log.debug('checkmine engine: minion: %s managed_alived_ip: %s' % (minion, manage_alived_ip)) + if mine_ip != manage_alived_ip: + log.error('checkmine engine: found minion %s has manage_alived_ip %s but a mine_ip of %s' % (minion, manage_alived_ip, mine_ip)) + mine_flush(minion) + mine_update(minion) + + sleep(interval) diff --git a/salt/salt/files/engines.conf b/salt/salt/files/engines.conf new file mode 100644 index 000000000..7c43e99e1 --- /dev/null +++ b/salt/salt/files/engines.conf @@ -0,0 +1,6 @@ +engines_dirs: + - /etc/salt/engines + +engines: + - checkmine: + interval: 60 diff --git a/salt/salt/master.sls b/salt/salt/master.sls index b10a4df0f..0a65f3e01 100644 --- a/salt/salt/master.sls +++ b/salt/salt/master.sls @@ -12,22 +12,34 @@ hold_salt_master_package: - name: salt-master {% endif %} +# prior to 2.4.30 this engine ran on the manager with salt-minion +# this has changed to running with the salt-master in 2.4.30 +remove_engines_config: + file.absent: + - name: /etc/salt/minion.d/engines.conf + - source: salt://salt/files/engines.conf + - watch_in: + - service: salt_minion_service + +checkmine_engine: + file.managed: + - name: /etc/salt/engines/checkmine.py + - source: salt://salt/engines/master/checkmine.py + - makedirs: True + +engines_config: + file.managed: + - name: /etc/salt/master.d/engines.conf + - source: salt://salt/files/engines.conf + salt_master_service: service.running: - name: salt-master - enable: True - -checkmine_engine: - file.absent: - - name: /etc/salt/engines/checkmine.py - - watch_in: - - service: salt_minion_service - -engines_config: - file.absent: - - name: /etc/salt/minion.d/engines.conf - - watch_in: - - service: salt_minion_service + - watch: + - file: checkmine_engine + - file: engines_config + - order: last {% else %} diff --git a/salt/salt/minion.sls b/salt/salt/minion.sls index 865bd367f..e0c422e7f 100644 --- a/salt/salt/minion.sls +++ b/salt/salt/minion.sls @@ -67,6 +67,9 @@ set_log_levels: - "log_level: info" - "log_level_logfile: info" +# prior to 2.4.30 this managed file would restart the salt-minion service when updated +# since this file is currently only adding a sleep timer on service start +# it is not required to restart the service salt_minion_service_unit_file: file.managed: - name: {{ SYSTEMD_UNIT_FILE }} @@ -89,6 +92,5 @@ salt_minion_service: - file: mine_functions {% if INSTALLEDSALTVERSION|string == SALTVERSION|string %} - file: set_log_levels - - file: salt_minion_service_unit_file {% endif %} - order: last diff --git a/setup/so-functions b/setup/so-functions index 42a4b4ac6..68fd01550 100755 --- a/setup/so-functions +++ b/setup/so-functions @@ -2111,11 +2111,6 @@ saltify() { } -# Run a salt command to generate the minion key -salt_firstcheckin() { - salt-call state.show_top >> /dev/null 2>&1 # send output to /dev/null because we don't actually care about the ouput -} - salt_install_module_deps() { logCmd "salt-pip install docker --no-index --only-binary=:all: --find-links files/salt_module_deps/docker/" logCmd "salt-pip install pymysql --no-index --only-binary=:all: --find-links files/salt_module_deps/pymysql/" diff --git a/setup/so-setup b/setup/so-setup index 60296d2f3..4db24aa1a 100755 --- a/setup/so-setup +++ b/setup/so-setup @@ -714,6 +714,12 @@ if ! [[ -f $install_opt_file ]]; then logCmd "salt-call state.apply common.packages" logCmd "salt-call state.apply common" + logCmd "salt-call state.apply salt.master" + + # wait here until we get a response from the salt-master since it may have just restarted + # exit setup after 5-6 minutes of trying + check_salt_master_status || fail "Can't access salt master or it is not ready" + logCmd "salt-call state.apply docker" firewall_generate_templates set_initial_firewall_policy @@ -768,8 +774,6 @@ if ! [[ -f $install_opt_file ]]; then checkin_at_boot set_initial_firewall_access logCmd "salt-call schedule.enable -linfo --local" - systemctl restart salt-master - systemctl restart salt-minion verify_setup else touch /root/accept_changes