mirror of
https://github.com/Security-Onion-Solutions/securityonion.git
synced 2026-06-22 10:18:09 +02:00
Merge remote-tracking branch 'origin/3/dev' into jertel/wip
This commit is contained in:
@@ -166,6 +166,7 @@ if [[ $EXCLUDE_FALSE_POSITIVE_ERRORS == 'Y' ]]; then
|
||||
EXCLUDED_ERRORS="$EXCLUDED_ERRORS|upgrading composable template" # false positive (elasticsearch composable template names contain 'error')
|
||||
EXCLUDED_ERRORS="$EXCLUDED_ERRORS|Error while parsing document for index \[.ds-logs-kratos-so-.*object mapping for \[file\]" # false positive (mapping error occuring BEFORE kratos index has rolled over in 2.4.210)
|
||||
EXCLUDED_ERRORS="$EXCLUDED_ERRORS|No such container" # false positive (telegraf trying to run stats on an old container)
|
||||
EXCLUDED_ERRORS="$EXCLUDED_ERRORS|passwords do not match" # false positive (automated hydra test)
|
||||
fi
|
||||
|
||||
if [[ $EXCLUDE_KNOWN_ERRORS == 'Y' ]]; then
|
||||
|
||||
@@ -26,7 +26,9 @@ include:
|
||||
wait_for_elasticsearch_elasticfleet:
|
||||
cmd.run:
|
||||
- name: so-elasticsearch-wait
|
||||
{% endif %}
|
||||
|
||||
{% if GLOBALS.role == "so-fleet" %}
|
||||
# Sync Elastic Agent artifacts to Fleet Node
|
||||
elasticagent_syncartifacts:
|
||||
file.recurse:
|
||||
|
||||
@@ -971,6 +971,9 @@ verify_es_version_compatibility() {
|
||||
local is_active_intermediate_upgrade=1
|
||||
# supported upgrade paths for SO-ES versions
|
||||
declare -A es_upgrade_map=(
|
||||
["8.18.4"]="8.18.6 8.18.8 9.0.8"
|
||||
["8.18.6"]="8.18.8 9.0.8"
|
||||
["8.18.8"]="9.0.8"
|
||||
["9.0.8"]="9.3.3"
|
||||
)
|
||||
|
||||
@@ -994,6 +997,171 @@ verify_es_version_compatibility() {
|
||||
exit 160
|
||||
fi
|
||||
|
||||
compatible_es_versions="$target_es_version"
|
||||
for current_version in "${!es_upgrade_map[@]}"; do
|
||||
# shellcheck disable=SC2076
|
||||
if [[ " ${es_upgrade_map[$current_version]} " =~ " $target_es_version " ]]; then
|
||||
compatible_es_versions+=" $current_version"
|
||||
fi
|
||||
done
|
||||
|
||||
# Check if the given ES version can directly upgrade to the target ES version. Used to assist with catching lagging nodes during the upgrade process
|
||||
es_version_can_upgrade_to_target() {
|
||||
local current_version="$1"
|
||||
# shellcheck disable=SC2076
|
||||
if [[ -n "$current_version" && " $compatible_es_versions " =~ " $current_version " ]]; then
|
||||
return 0
|
||||
fi
|
||||
|
||||
return 1
|
||||
}
|
||||
|
||||
# Gather Elasticsearch cluster version info and verify that each node in the cluster is running a version compatible with the target ES version.
|
||||
verify_searchnodes_es_target_compatibility() {
|
||||
local retries=20
|
||||
local retry_count=0
|
||||
local delay=180
|
||||
local expected_es_nodes searchnode_minions attempt
|
||||
local searchnode_discovery_success=false
|
||||
SEARCHNODE_ES_VERSIONS=""
|
||||
|
||||
for attempt in {1..3}; do
|
||||
if searchnode_minions=$(set -o pipefail; salt-key --out=json --list=accepted 2> /dev/null | jq -r '.minions[]? | select(endswith("searchnode"))'); then
|
||||
searchnode_discovery_success=true
|
||||
break
|
||||
fi
|
||||
|
||||
echo "Failed to retrieve grid searchnodes via salt-key... Retrying in 30 seconds. Attempt $attempt of 3."
|
||||
sleep 30
|
||||
done
|
||||
|
||||
if [[ "$searchnode_discovery_success" != "true" ]]; then
|
||||
echo "Failed to retrieve grid searchnodes via salt-key."
|
||||
return 1
|
||||
fi
|
||||
|
||||
# Always add node running soup to expected es nodes
|
||||
expected_es_nodes="${MINIONID%_*}"
|
||||
while IFS= read -r searchnode_minion; do
|
||||
[[ -z "$searchnode_minion" ]] && continue
|
||||
expected_es_nodes+=$'\n'"${searchnode_minion%_searchnode}"
|
||||
done <<< "$searchnode_minions"
|
||||
|
||||
while [[ $retry_count -lt $retries ]]; do
|
||||
SEARCHNODE_ES_VERSIONS=$(so-elasticsearch-query _nodes/_all/version --retry 5 --retry-delay 10 --fail 2>&1)
|
||||
local exit_status=$?
|
||||
|
||||
if [[ $exit_status -ne 0 ]]; then
|
||||
echo "Failed to retrieve Elasticsearch versions from searchnodes... Retrying in $delay seconds. Attempt $((retry_count + 1)) of $retries."
|
||||
((retry_count++))
|
||||
sleep $delay
|
||||
continue
|
||||
fi
|
||||
|
||||
local all_searchnodes_compatible=true
|
||||
while IFS=$'\t' read -r node current_version; do
|
||||
[[ -z "$node" ]] && continue
|
||||
if ! es_version_can_upgrade_to_target "$current_version"; then
|
||||
echo "Searchnode $node is running Elasticsearch $current_version, which is not directly upgradable to Elasticsearch $target_es_version."
|
||||
all_searchnodes_compatible=false
|
||||
fi
|
||||
done < <(echo "$SEARCHNODE_ES_VERSIONS" | jq -r '.nodes | to_entries[] | [.value.name, .value.version] | @tsv')
|
||||
|
||||
while IFS= read -r expected_es_node; do
|
||||
[[ -z "$expected_es_node" ]] && continue
|
||||
if ! echo "$SEARCHNODE_ES_VERSIONS" | jq -e --arg node "$expected_es_node" '.nodes | to_entries | any(.value.name == $node)' > /dev/null; then
|
||||
echo "Searchnode $expected_es_node did not report an Elasticsearch version. It may be offline or still upgrading."
|
||||
all_searchnodes_compatible=false
|
||||
fi
|
||||
done <<< "$expected_es_nodes"
|
||||
|
||||
if [[ "$all_searchnodes_compatible" == true ]]; then
|
||||
echo "All Searchnodes are upgradable to Elasticsearch $target_es_version."
|
||||
return 0
|
||||
fi
|
||||
|
||||
echo "One or more Searchnodes cannot upgrade directly to Elasticsearch $target_es_version. Rechecking in $delay seconds. Attempt $((retry_count + 1)) of $retries."
|
||||
((retry_count++))
|
||||
sleep $delay
|
||||
done
|
||||
|
||||
return 1
|
||||
}
|
||||
|
||||
# Gather heavynode version info and verify that each node is running a version compatible with the target ES version.
|
||||
verify_heavynodes_es_target_compatibility() {
|
||||
local heavynode_minions attempt
|
||||
local retries=20
|
||||
local retry_count=0
|
||||
local delay=180
|
||||
local heavynode_discovery_success=false
|
||||
HEAVYNODE_ES_VERSIONS=""
|
||||
|
||||
for attempt in {1..3}; do
|
||||
if heavynode_minions=$(set -o pipefail; salt-key --out=json --list=accepted 2> /dev/null | jq -r '.minions[]? | select(endswith("heavynode"))'); then
|
||||
heavynode_discovery_success=true
|
||||
break
|
||||
fi
|
||||
|
||||
echo "Failed to retrieve grid heavynodes via salt-key... Retrying in 30 seconds. Attempt $attempt of 3."
|
||||
sleep 30
|
||||
done
|
||||
|
||||
if [[ "$heavynode_discovery_success" != "true" ]]; then
|
||||
echo "Failed to retrieve grid heavynodes via salt-key."
|
||||
return 1
|
||||
fi
|
||||
|
||||
if [[ -z "$heavynode_minions" ]]; then
|
||||
echo "No heavynodes detected. Skipping heavynode Elasticsearch version compatibility check."
|
||||
return 0
|
||||
fi
|
||||
|
||||
while [[ $retry_count -lt $retries ]]; do
|
||||
HEAVYNODE_ES_VERSIONS=$(salt -C 'G@role:so-heavynode' cmd.run 'set -o pipefail; so-elasticsearch-query / --retry 5 --retry-delay 10 | jq -er ".version.number"' shell=/bin/bash --out=json 2> /dev/null)
|
||||
local exit_status=$?
|
||||
|
||||
if [[ $exit_status -ne 0 ]]; then
|
||||
echo "Failed to retrieve Elasticsearch version from one or more heavynodes... Retrying in $delay seconds. Attempt $((retry_count + 1)) of $retries."
|
||||
((retry_count++))
|
||||
sleep $delay
|
||||
continue
|
||||
fi
|
||||
|
||||
local all_heavynodes_compatible=true
|
||||
while IFS=$'\t' read -r node current_version; do
|
||||
[[ -z "$node" ]] && continue
|
||||
if ! es_version_can_upgrade_to_target "$current_version"; then
|
||||
echo "Heavynode $node is running Elasticsearch $current_version, which is not directly upgradable to Elasticsearch $target_es_version."
|
||||
all_heavynodes_compatible=false
|
||||
fi
|
||||
done < <(echo "$HEAVYNODE_ES_VERSIONS" | jq -r 'to_entries[] | [.key, .value] | @tsv')
|
||||
|
||||
while IFS= read -r heavynode_minion; do
|
||||
[[ -z "$heavynode_minion" ]] && continue
|
||||
if ! echo "$HEAVYNODE_ES_VERSIONS" | jq -e --arg minion "$heavynode_minion" 'has($minion)' > /dev/null; then
|
||||
echo "Heavynode $heavynode_minion did not report an Elasticsearch version. It may be offline or still upgrading."
|
||||
all_heavynodes_compatible=false
|
||||
fi
|
||||
done <<< "$heavynode_minions"
|
||||
|
||||
if [[ "$all_heavynodes_compatible" == true ]]; then
|
||||
echo -e "\nAll heavynodes can upgrade to Elasticsearch $target_es_version."
|
||||
return 0
|
||||
fi
|
||||
|
||||
echo "One or more heavynodes cannot upgrade directly to Elasticsearch $target_es_version. Rechecking in $delay seconds. Attempt $((retry_count + 1)) of $retries."
|
||||
((retry_count++))
|
||||
sleep $delay
|
||||
done
|
||||
|
||||
return 1
|
||||
}
|
||||
|
||||
if [[ ! -f "$es_verification_script" ]]; then
|
||||
create_intermediate_upgrade_verification_script "$es_verification_script"
|
||||
fi
|
||||
|
||||
for statefile in "${es_required_version_statefile_base}"-*; do
|
||||
[[ -f $statefile ]] || continue
|
||||
|
||||
@@ -1012,10 +1180,6 @@ verify_es_version_compatibility() {
|
||||
continue
|
||||
fi
|
||||
|
||||
if [[ ! -f "$es_verification_script" ]]; then
|
||||
create_intermediate_upgrade_verification_script "$es_verification_script"
|
||||
fi
|
||||
|
||||
echo -e "\n##############################################################################################################################\n"
|
||||
echo "A previously required intermediate Elasticsearch upgrade was detected. Verifying that all Searchnodes/Heavynodes have successfully upgraded Elasticsearch to $es_required_version_statefile_value before proceeding with soup to avoid potential data loss! This command can take up to an hour to complete."
|
||||
if ! timeout --foreground 4000 bash "$es_verification_script" "$es_required_version_statefile_value" "$statefile"; then
|
||||
@@ -1037,6 +1201,26 @@ verify_es_version_compatibility() {
|
||||
|
||||
# shellcheck disable=SC2076 # Do not want a regex here eg usage " 8.18.8 9.0.8 " =~ " 9.0.8 "
|
||||
if [[ " ${es_upgrade_map[$es_version]} " =~ " $target_es_version " || "$es_version" == "$target_es_version" ]]; then
|
||||
if ! verify_searchnodes_es_target_compatibility || ! verify_heavynodes_es_target_compatibility; then
|
||||
echo -e "\n!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!\n"
|
||||
|
||||
echo "One or more Searchnode(s)/Heavynode(s) cannot upgrade directly to Elasticsearch $target_es_version. This can happen with soups that include Elasticsearch upgrades being run in quick succession. Typically, this will resolve itself as the grid synchronizes. Please allow time for all Searchnodes/Heavynodes to have upgraded Elasticsearch to a compatible version with $target_es_version before running soup again to avoid potential data loss!"
|
||||
|
||||
if [[ -n "$HEAVYNODE_ES_VERSIONS" ]]; then
|
||||
echo "Current heavynode Elasticsearch versions:"
|
||||
echo "$HEAVYNODE_ES_VERSIONS" | jq '.'
|
||||
fi
|
||||
|
||||
if [[ -n "$SEARCHNODE_ES_VERSIONS" ]]; then
|
||||
echo "Current searchnode Elasticsearch versions:"
|
||||
echo "$SEARCHNODE_ES_VERSIONS" | jq '.nodes | to_entries | map({(.value.name): .value.version}) | sort | add'
|
||||
fi
|
||||
|
||||
echo -e "\n!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!\n"
|
||||
|
||||
exit 161
|
||||
fi
|
||||
|
||||
# supported upgrade
|
||||
return 0
|
||||
else
|
||||
@@ -1394,7 +1578,7 @@ main() {
|
||||
echo "Verifying we have the latest soup script."
|
||||
verify_latest_update_script
|
||||
|
||||
echo "Verifying Elasticsearch version compatibility before upgrading."
|
||||
echo "Verifying Elasticsearch version compatibility across the grid before upgrading."
|
||||
verify_es_version_compatibility
|
||||
|
||||
echo "Let's see if we need to update Security Onion."
|
||||
|
||||
@@ -7,15 +7,29 @@
|
||||
|
||||
. /usr/sbin/so-common
|
||||
|
||||
# Without pipefail, a pipeline's exit status is gzip's. A failed pg_dumpall would
|
||||
# otherwise be masked by a successful gzip, silently producing a valid .gz that
|
||||
# holds a truncated dump.
|
||||
set -o pipefail
|
||||
|
||||
# Backups contain role password hashes and full chat data; keep them 0600.
|
||||
umask 0077
|
||||
|
||||
TODAY=$(date '+%Y_%m_%d')
|
||||
BACKUPDIR=/nsm/backup
|
||||
BACKUPFILE="$BACKUPDIR/so-postgres-backup-$TODAY.sql.gz"
|
||||
TMPFILE="$BACKUPFILE.tmp"
|
||||
MAXBACKUPS=7
|
||||
LOGFILE=/opt/so/log/postgres/backup.log
|
||||
|
||||
mkdir -p $BACKUPDIR
|
||||
log() {
|
||||
echo "$(date '+%Y-%m-%d %H:%M:%S') $*" >> "$LOGFILE"
|
||||
}
|
||||
|
||||
mkdir -p "$BACKUPDIR"
|
||||
|
||||
# Remove any temp files left behind by a previously crashed run
|
||||
rm -f "$BACKUPDIR"/so-postgres-backup-*.sql.gz.tmp
|
||||
|
||||
# Skip if already backed up today
|
||||
if [ -f "$BACKUPFILE" ]; then
|
||||
@@ -27,13 +41,33 @@ if ! docker ps --format '{{.Names}}' | grep -q '^so-postgres$'; then
|
||||
exit 0
|
||||
fi
|
||||
|
||||
# Dump all databases and roles, compress
|
||||
docker exec so-postgres pg_dumpall -U postgres | gzip > "$BACKUPFILE"
|
||||
# Always clean up the temp file on exit; the success path clears this trap
|
||||
# after the atomic rename so the finished backup is not deleted.
|
||||
trap 'rm -f "$TMPFILE"' EXIT
|
||||
|
||||
# Retention cleanup
|
||||
NUMBACKUPS=$(find $BACKUPDIR -type f -name "so-postgres-backup*" | wc -l)
|
||||
# Dump all databases and roles, compress. Write to a temp file so the final
|
||||
# filename only ever appears for a complete, verified backup.
|
||||
if ! docker exec so-postgres pg_dumpall -U postgres | gzip > "$TMPFILE"; then
|
||||
log "ERROR: pg_dumpall/gzip failed; backup aborted"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Verify the compressed stream is intact before publishing it
|
||||
if ! gzip -t "$TMPFILE"; then
|
||||
log "ERROR: backup failed gzip integrity check; backup aborted"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Atomically publish the verified backup
|
||||
mv "$TMPFILE" "$BACKUPFILE"
|
||||
trap - EXIT
|
||||
log "OK: wrote $BACKUPFILE"
|
||||
|
||||
# Retention cleanup (only reached after a successful backup). The glob is
|
||||
# restricted to finished backups so an in-progress .tmp can never be counted.
|
||||
NUMBACKUPS=$(find "$BACKUPDIR" -type f -name "so-postgres-backup-*.sql.gz" | wc -l)
|
||||
while [ "$NUMBACKUPS" -gt "$MAXBACKUPS" ]; do
|
||||
OLDEST=$(find $BACKUPDIR -type f -name "so-postgres-backup*" -printf '%T+ %p\n' | sort | head -n 1 | awk -F" " '{print $2}')
|
||||
OLDEST=$(find "$BACKUPDIR" -type f -name "so-postgres-backup-*.sql.gz" -printf '%T+ %p\n' | sort | head -n 1 | awk -F" " '{print $2}')
|
||||
rm -f "$OLDEST"
|
||||
NUMBACKUPS=$(find $BACKUPDIR -type f -name "so-postgres-backup*" | wc -l)
|
||||
NUMBACKUPS=$(find "$BACKUPDIR" -type f -name "so-postgres-backup-*.sql.gz" | wc -l)
|
||||
done
|
||||
|
||||
Reference in New Issue
Block a user