diff --git a/salt/manager/tools/sbin/soup b/salt/manager/tools/sbin/soup index 8a68e5242..3bec13716 100755 --- a/salt/manager/tools/sbin/soup +++ b/salt/manager/tools/sbin/soup @@ -1021,24 +1021,33 @@ verify_es_version_compatibility() { local retries=20 local retry_count=0 local delay=180 - local expected_es_nodes - local searchnode_minions + local expected_es_nodes searchnode_minions attempt + local searchnode_discovery_success=false SEARCHNODE_ES_VERSIONS="" - while [[ $retry_count -lt $retries ]]; do - if ! searchnode_minions=$(set -o pipefail; salt-key --out=json --list=accepted 2> /dev/null | jq -r '.minions[]? | select(endswith("searchnode"))'); then - echo "Failed to retrieve grid searchnodes via salt-key... Retrying in $delay seconds. Attempt $((retry_count + 1)) of $retries." - ((retry_count++)) - sleep $delay - continue + for attempt in {1..3}; do + if searchnode_minions=$(set -o pipefail; salt-key --out=json --list=accepted 2> /dev/null | jq -r '.minions[]? | select(endswith("searchnode"))'); then + searchnode_discovery_success=true + break fi - # Always add node running soup to expected es nodes - expected_es_nodes="${MINIONID%_*}" - while IFS= read -r searchnode_minion; do - [[ -z "$searchnode_minion" ]] && continue - expected_es_nodes+=$'\n'"${searchnode_minion%_searchnode}" - done <<< "$searchnode_minions" + echo "Failed to retrieve grid searchnodes via salt-key... Retrying in 30 seconds. Attempt $attempt of 3." + sleep 30 + done + + if [[ "$searchnode_discovery_success" != "true" ]]; then + echo "Failed to retrieve grid searchnodes via salt-key." + return 1 + fi + + # Always add node running soup to expected es nodes + expected_es_nodes="${MINIONID%_*}" + while IFS= read -r searchnode_minion; do + [[ -z "$searchnode_minion" ]] && continue + expected_es_nodes+=$'\n'"${searchnode_minion%_searchnode}" + done <<< "$searchnode_minions" + + while [[ $retry_count -lt $retries ]]; do SEARCHNODE_ES_VERSIONS=$(so-elasticsearch-query _nodes/_all/version --retry 5 --retry-delay 10 --fail 2>&1) local exit_status=$? @@ -1081,25 +1090,34 @@ verify_es_version_compatibility() { # Gather heavynode version info and verify that each node is running a version compatible with the target ES version. verify_heavynodes_es_target_compatibility() { - local heavynode_minions + local heavynode_minions attempt local retries=20 local retry_count=0 local delay=180 + local heavynode_discovery_success=false HEAVYNODE_ES_VERSIONS="" + for attempt in {1..3}; do + if heavynode_minions=$(set -o pipefail; salt-key --out=json --list=accepted 2> /dev/null | jq -r '.minions[]? | select(endswith("heavynode"))'); then + heavynode_discovery_success=true + break + fi + + echo "Failed to retrieve grid heavynodes via salt-key... Retrying in 30 seconds. Attempt $attempt of 3." + sleep 30 + done + + if [[ "$heavynode_discovery_success" != "true" ]]; then + echo "Failed to retrieve grid heavynodes via salt-key." + return 1 + fi + + if [[ -z "$heavynode_minions" ]]; then + echo "No heavynodes detected. Skipping heavynode Elasticsearch version compatibility check." + return 0 + fi + while [[ $retry_count -lt $retries ]]; do - if ! heavynode_minions=$(set -o pipefail; salt-key --out=json --list=accepted 2> /dev/null | jq -r '.minions[]? | select(endswith("heavynode"))'); then - echo "Failed to retrieve grid heavynodes via salt-key... Retrying in $delay seconds. Attempt $((retry_count + 1)) of $retries." - ((retry_count++)) - sleep $delay - continue - fi - - if [[ -z "$heavynode_minions" ]]; then - echo "No heavynodes detected. Skipping heavynode Elasticsearch version compatibility check." - return 0 - fi - HEAVYNODE_ES_VERSIONS=$(salt -C 'G@role:so-heavynode' cmd.run 'set -o pipefail; so-elasticsearch-query / --retry 5 --retry-delay 10 | jq -er ".version.number"' shell=/bin/bash --out=json 2> /dev/null) local exit_status=$?