verify all heavynodes and all searchnodes are at compatible ES version before attempting an elasticsearch upgrade

This commit is contained in:
reyesj2
2026-05-19 22:27:31 -05:00
parent 74b50f6009
commit 6c8997b28a
+134 -5
View File
@@ -971,6 +971,9 @@ verify_es_version_compatibility() {
local is_active_intermediate_upgrade=1 local is_active_intermediate_upgrade=1
# supported upgrade paths for SO-ES versions # supported upgrade paths for SO-ES versions
declare -A es_upgrade_map=( declare -A es_upgrade_map=(
["8.18.4"]="8.18.6 8.18.8 9.0.8"
["8.18.6"]="8.18.8 9.0.8"
["8.18.8"]="9.0.8"
["9.0.8"]="9.3.3" ["9.0.8"]="9.3.3"
) )
@@ -994,6 +997,116 @@ verify_es_version_compatibility() {
exit 160 exit 160
fi fi
compatible_es_versions="$target_es_version"
for current_version in "${!es_upgrade_map[@]}"; do
# shellcheck disable=SC2076
if [[ " ${es_upgrade_map[$current_version]} " =~ " $target_es_version " ]]; then
compatible_es_versions+=" $current_version"
fi
done
# Check if the given ES version can directly upgrade to the target ES version. Used to assist with catching lagging nodes during the upgrade process
es_version_can_upgrade_to_target() {
local current_version="$1"
# shellcheck disable=SC2076
if [[ -n "$current_version" && " $compatible_es_versions " =~ " $current_version " ]]; then
return 0
fi
return 1
}
# Gather Elasticsearch cluster version info and verify that each node in the cluster is running a version compatible with the target ES version.
verify_searchnodes_es_target_compatibility() {
local retries=20
local retry_count=0
local delay=180
SEARCHNODE_ES_VERSIONS=""
while [[ $retry_count -lt $retries ]]; do
SEARCHNODE_ES_VERSIONS=$(so-elasticsearch-query _nodes/_all/version --retry 5 --retry-delay 10 --fail 2>&1)
local exit_status=$?
if [[ $exit_status -ne 0 ]]; then
echo "Failed to retrieve Elasticsearch versions from searchnodes... Retrying in $delay seconds. Attempt $((retry_count + 1)) of $retries."
((retry_count++))
sleep $delay
continue
fi
local all_searchnodes_compatible=true
while IFS=$'\t' read -r node current_version; do
[[ -z "$node" ]] && continue
if ! es_version_can_upgrade_to_target "$current_version"; then
echo "Searchnode $node is running Elasticsearch $current_version, which is not directly upgradable to Elasticsearch $target_es_version."
all_searchnodes_compatible=false
fi
done < <(echo "$SEARCHNODE_ES_VERSIONS" | jq -r '.nodes | to_entries[] | [.value.name, .value.version] | @tsv')
if [[ "$all_searchnodes_compatible" == true ]]; then
echo "All Searchnodes are upgradable to Elasticsearch $target_es_version."
return 0
fi
echo "One or more Searchnodes cannot upgrade directly to Elasticsearch $target_es_version. Rechecking in $delay seconds. Attempt $((retry_count + 1)) of $retries."
((retry_count++))
sleep $delay
done
return 1
}
# Gather heavynode version info and verify that each node is running a version compatible with the target ES version.
verify_heavynodes_es_target_compatibility() {
if ! salt-key -l accepted | grep -q 'heavynode$'; then
echo "No heavynodes detected. Skipping heavynode Elasticsearch version compatibility check."
return 0
fi
echo -e "\nOne or more heavynodes detected. Verifying each is running an Elasticsearch version that is compatible with $target_es_version."
local retries=20
local retry_count=0
local delay=180
HEAVYNODE_ES_VERSIONS=""
while [[ $retry_count -lt $retries ]]; do
HEAVYNODE_ES_VERSIONS=$(salt -C 'G@role:so-heavynode' cmd.run 'so-elasticsearch-query / --retry 5 --retry-delay 10 | jq -r ".version.number"' shell=/bin/bash --out=json 2> /dev/null)
local exit_status=$?
if [[ $exit_status -ne 0 ]]; then
echo "Failed to retrieve Elasticsearch version from one or more heavynodes... Retrying in $delay seconds. Attempt $((retry_count + 1)) of $retries."
((retry_count++))
sleep $delay
continue
fi
local all_heavynodes_compatible=true
while IFS=$'\t' read -r node current_version; do
[[ -z "$node" ]] && continue
if ! es_version_can_upgrade_to_target "$current_version"; then
echo "Heavynode $node is running Elasticsearch $current_version, which is not directly upgradable to Elasticsearch $target_es_version."
all_heavynodes_compatible=false
fi
done < <(echo "$HEAVYNODE_ES_VERSIONS" | jq -r 'to_entries[] | [.key, .value] | @tsv')
if [[ "$all_heavynodes_compatible" == true ]]; then
echo -e "\nAll heavynodes can upgrade to Elasticsearch $target_es_version."
return 0
fi
echo "One or more heavynodes cannot upgrade directly to Elasticsearch $target_es_version. Rechecking in $delay seconds. Attempt $((retry_count + 1)) of $retries."
((retry_count++))
sleep $delay
done
return 1
}
if [[ ! -f "$es_verification_script" ]]; then
create_intermediate_upgrade_verification_script "$es_verification_script"
fi
for statefile in "${es_required_version_statefile_base}"-*; do for statefile in "${es_required_version_statefile_base}"-*; do
[[ -f $statefile ]] || continue [[ -f $statefile ]] || continue
@@ -1012,10 +1125,6 @@ verify_es_version_compatibility() {
continue continue
fi fi
if [[ ! -f "$es_verification_script" ]]; then
create_intermediate_upgrade_verification_script "$es_verification_script"
fi
echo -e "\n##############################################################################################################################\n" echo -e "\n##############################################################################################################################\n"
echo "A previously required intermediate Elasticsearch upgrade was detected. Verifying that all Searchnodes/Heavynodes have successfully upgraded Elasticsearch to $es_required_version_statefile_value before proceeding with soup to avoid potential data loss! This command can take up to an hour to complete." echo "A previously required intermediate Elasticsearch upgrade was detected. Verifying that all Searchnodes/Heavynodes have successfully upgraded Elasticsearch to $es_required_version_statefile_value before proceeding with soup to avoid potential data loss! This command can take up to an hour to complete."
if ! timeout --foreground 4000 bash "$es_verification_script" "$es_required_version_statefile_value" "$statefile"; then if ! timeout --foreground 4000 bash "$es_verification_script" "$es_required_version_statefile_value" "$statefile"; then
@@ -1037,6 +1146,26 @@ verify_es_version_compatibility() {
# shellcheck disable=SC2076 # Do not want a regex here eg usage " 8.18.8 9.0.8 " =~ " 9.0.8 " # shellcheck disable=SC2076 # Do not want a regex here eg usage " 8.18.8 9.0.8 " =~ " 9.0.8 "
if [[ " ${es_upgrade_map[$es_version]} " =~ " $target_es_version " || "$es_version" == "$target_es_version" ]]; then if [[ " ${es_upgrade_map[$es_version]} " =~ " $target_es_version " || "$es_version" == "$target_es_version" ]]; then
if ! verify_searchnodes_es_target_compatibility || ! verify_heavynodes_es_target_compatibility; then
echo -e "\n!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!\n"
echo "One or more Searchnode(s)/Heavynode(s) cannot upgrade directly to Elasticsearch $target_es_version. This can happen with soups that include Elasticsearch upgrades being run in quick succession. Typically, this will resolve itself as the grid synchronizes. Please allow time for all Searchnodes/Heavynodes to have upgraded Elasticsearch to a compatible version with $target_es_version before running soup again to avoid potential data loss!"
if [[ -n "$HEAVYNODE_ES_VERSIONS" ]]; then
echo "Current heavynode Elasticsearch versions:"
echo "$HEAVYNODE_ES_VERSIONS" | jq '.'
fi
if [[ -n "$SEARCHNODE_ES_VERSIONS" ]]; then
echo "Current searchnode Elasticsearch versions:"
echo "$SEARCHNODE_ES_VERSIONS" | jq '.nodes | to_entries | map({(.value.name): .value.version}) | sort | add'
fi
echo -e "\n!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!\n"
exit 161
fi
# supported upgrade # supported upgrade
return 0 return 0
else else
@@ -1394,7 +1523,7 @@ main() {
echo "Verifying we have the latest soup script." echo "Verifying we have the latest soup script."
verify_latest_update_script verify_latest_update_script
echo "Verifying Elasticsearch version compatibility before upgrading." echo "Verifying Elasticsearch version compatibility across the grid before upgrading."
verify_es_version_compatibility verify_es_version_compatibility
echo "Let's see if we need to update Security Onion." echo "Let's see if we need to update Security Onion."