#!/bin/bash # Copyright Security Onion Solutions LLC and/or licensed to Security Onion Solutions LLC under one # or more contributor license agreements. Licensed under the Elastic License 2.0 as shown at # https://securityonion.net/license; you may not use this file except in compliance with the # Elastic License 2.0. INFLUX_URL="https://localhost:8086/api/v2" JSON_OUTPUT=false VERBOSE=false TEMP_FILES=() . /usr/sbin/so-common RED='\033[0;31m' GREEN='\033[0;32m' YELLOW='\033[1;33m' BLUE='\033[0;34m' BOLD='\033[1;37m' NC='\033[0m' REDBOLD='\033[1;31m' YELLOWBOLD='\033[1;33m' declare -a recommendation_lines declare -a recommendation_records cleanup_temp_files() { local file for file in "${TEMP_FILES[@]}"; do [ -f "$file" ] && rm -f "$file" 2>/dev/null done } trap cleanup_temp_files EXIT INT TERM create_temp_file() { local tmpfile tmpfile=$(mktemp) TEMP_FILES+=("$tmpfile") echo "$tmpfile" } log_title() { if [ "$1" == "LOG" ]; then echo -e "\n${BOLD}================ $2 ================${NC}\n" elif [ "$1" == "OK" ]; then echo -e "${GREEN} $2 ${NC}" elif [ "$1" == "WARN" ]; then echo -e "${YELLOW} $2 ${NC}" elif [ "$1" == "ERROR" ]; then echo -e "${RED} $2 ${NC}" fi } usage() { cat << EOF Usage: $(basename "$0") [OPTIONS] Estimate remaining days until Elasticsearch cluster reaches low watermark threshold. OPTIONS: --json Output results in JSON format -v, --verbose Show additional output -h, --help Show this help message EOF exit 0 } while [[ $# -gt 0 ]]; do case $1 in --json) JSON_OUTPUT=true shift ;; -v|--verbose) VERBOSE=true shift ;; -h|--help) usage ;; *) echo "Unknown option: $1" >&2 usage ;; esac done request() { curl -skK /opt/so/conf/influxdb/curl.config "$INFLUX_URL/$@" } lookup_org_id() { request "orgs?org=Security+Onion" | jq -r '.orgs[] | select(.name == "Security Onion").id' } run_flux_query() { local query=$1 request "query?org=$ORG_ID" \ -H 'Accept:application/csv' \ -H 'Content-type:application/vnd.flux' \ -d "$query" -XPOST 2>/dev/null } read_csv_value() { local input="$1" printf '%s\n' "$input" | awk -F',' ' $0 ~ /^#/ { next } NF < 1 { next } { gsub(/\r|\t/, "") for (i = 1; i <= NF; i++) { sub(/^[[:space:]]+/, "", $i) sub(/[[:space:]]+$/, "", $i) } if (($2 == "_result" || $2 == "result") && $3 != "table" && $NF != "") { print $NF exit } } ' } normalize_number() { local value="${1:-0}" awk -v val="$value" 'BEGIN { if (val == "" || val == "null") { printf "0"; exit } if (val == val + 0) { printf "%.0f", val + 0; exit } printf "0" }' } bytes_to_gb() { local bytes="${1:-0}" awk -v b="$bytes" 'BEGIN { if (b == "" || b == "null") { printf "0.00"; exit } printf "%.2f", b / 1024 / 1024 / 1024 }' } expand_node_roles() { local role_string="$1" local -a roles=() # Only show data-related roles: d=data, h=data_hot, w=data_warm, c=data_cold, s=data_content f=data_frozen [[ "$role_string" =~ h ]] && roles+=("data_hot") [[ "$role_string" =~ w ]] && roles+=("data_warm") [[ "$role_string" =~ c ]] && roles+=("data_cold") [[ "$role_string" =~ s ]] && roles+=("data_content") [[ "$role_string" =~ f ]] && roles+=("data_frozen") [[ "$role_string" =~ d ]] && roles+=("data") local IFS=',' echo "${roles[*]}" } run_indices_growth() { if ! command -v so-elasticsearch-indices-growth >/dev/null 2>&1; then return 1 fi if [ "$EUID" -ne 0 ] && command -v sudo >/dev/null 2>&1; then sudo -n so-elasticsearch-indices-growth 2>/dev/null || so-elasticsearch-indices-growth 2>/dev/null else so-elasticsearch-indices-growth 2>/dev/null fi } fetch_total_bytes() { local start="$1" local stop="$2" local range_line if [ -n "$stop" ]; then range_line=" |> range(start: ${start}, stop: ${stop})" else range_line=" |> range(start: ${start})" fi local query query=$(cat <<-EOF from(bucket: "telegraf/so_long_term") ${range_line} |> filter(fn: (r) => r._measurement == "elasticsearch_index_size") |> last() |> group() |> sum() |> keep(columns: ["_value"]) EOF ) local result value result=$(run_flux_query "$query") value=$(read_csv_value "$result") normalize_number "$value" } fail() { if [ "$JSON_OUTPUT" = true ]; then jq -n --arg error "$1" '{error: $error}' else echo "ERROR: $1" >&2 fi exit 1 } echo -e "\nDISCLAIMER: Script output is based on current data patterns, but are approximations solely intended to assist with getting a general ILM policy configured." ORG_ID=$(lookup_org_id) [ -n "$ORG_ID" ] || fail "Unable to resolve InfluxDB org id" cluster_storage_size=0 indexed_storage_source="elasticsearch" cluster_storage_size_output=$(so-elasticsearch-query '_cluster/stats?filter_path=indices.store.size_in_bytes' --fail 2>/dev/null || true) if [ -n "$cluster_storage_size_output" ]; then cluster_storage_size=$(echo "$cluster_storage_size_output" | jq -r '.indices.store.size_in_bytes // 0' 2>/dev/null) if ! printf '%s' "$cluster_storage_size" | grep -Eq '^[0-9]+$'; then cluster_storage_size=0 fi fi # historical data from influxdb for growth calculation one_day_total=$(fetch_total_bytes "-25h" "-23h") seven_day_total=$(fetch_total_bytes "-7d8h" "-7d") thirty_day_total=$(fetch_total_bytes "-30d8h" "-30d") # available historical windows (prefer 30d/7d when available, to avoid using recent 24h traffic spike as true daily ingest rate) history_days=0 historical_total=0 if [ "$thirty_day_total" -gt 0 ]; then history_days=30 history_label="30-day" historical_total=$thirty_day_total elif [ "$seven_day_total" -gt 0 ]; then history_days=7 history_label="7-day" historical_total=$seven_day_total elif [ "$one_day_total" -gt 0 ]; then history_days=1 history_label="24-hour" historical_total=$one_day_total fi [ "$history_days" -gt 0 ] || fail "Historical InfluxDB data unavailable for growth calculation. If this a newer grid try re-running this script in a few days. Otherwise review /opt/so/log/telegraf/telegraf.log for errors with collecting required ES metrics." # Daily growth rate growth_bytes=$(( cluster_storage_size - historical_total )) daily_growth_bytes=$(awk -v diff="$growth_bytes" -v days="$history_days" 'BEGIN { if (days <= 0) { print 0; exit } printf "%.0f", diff / days }') # Daily shard creation rate using same time window (30d / 7d / 24h) daily_shard_creation=0 now_ms=$(date +%s)000 history_ago_ms=$(awk -v now="$now_ms" -v days="$history_days" 'BEGIN { printf "%.0f", now - (days * 86400 * 1000) }') shard_creation_output=$(so-elasticsearch-query "_cat/indices/.ds-*?format=json&h=index,pri,rep,creation.date" --fail 2>/dev/null || true) if [ -n "$shard_creation_output" ]; then recent_shards=$(echo "$shard_creation_output" | jq --argjson cutoff "$history_ago_ms" ' [.[] | select(.["creation.date"] != null and (.["creation.date"] | tonumber) >= $cutoff) | (.pri | tonumber) + ((.pri | tonumber) * (.rep | tonumber)) ] | add // 0 ' 2>/dev/null) if [ -n "$recent_shards" ] && [[ "$recent_shards" =~ ^[0-9]+$ ]]; then daily_shard_creation=$(awk -v total="$recent_shards" -v days="$history_days" 'BEGIN { if (days <= 0) { print 0; exit } printf "%.1f", total / days }') fi fi # Find expected ILM deletions ilm_delete_7d=0 ilm_delete_30d=0 ilm_indices_7d=0 ilm_indices_30d=0 ilm_delete_immediate=0 ilm_indices_immediate=0 ilm_delete_scheduled_7d=0 ilm_indices_scheduled_7d=0 ilm_delete_scheduled_30d=0 ilm_indices_scheduled_30d=0 ilm_shards_7d=0 ilm_shards_30d=0 ilm_shards_immediate=0 ilm_shards_scheduled_7d=0 ilm_shards_scheduled_30d=0 # For verbose output declare -a scheduled_indices_names declare -a scheduled_indices_sizes declare -a scheduled_indices_days declare -a immediate_indices_names declare -a immediate_indices_sizes # Get ilm policy delete ages per policy # example output 'so-logs-1password.audit_events-logs|365' tmpfile_policies=$(create_temp_file) so-elasticsearch-query '_ilm/policy' --fail 2>/dev/null | jq -r ' def age_to_days: if type == "number" then . elif type == "string" then (ascii_downcase) as $s | (try ($s | capture("^(?-?[0-9.]+)(?[smhd]?)$")) catch {num:"0", unit:""}) as $m | (($m.num | tonumber? // 0)) as $val | (if $m.unit == "d" or $m.unit == "" then $val elif $m.unit == "h" then $val / 24 elif $m.unit == "m" then $val / 1440 elif $m.unit == "s" then $val / 86400 else $val end) else 0 end; to_entries[] | select(.value.policy.phases.delete.min_age?) | "\(.key)|\((.value.policy.phases.delete.min_age | age_to_days))" ' > "$tmpfile_policies" 2>/dev/null || true declare -A policy_ages if [ -s "$tmpfile_policies" ]; then # create associative array of policy -> delete_age while IFS='|' read -r policy age; do policy_ages["$policy"]=$age done < "$tmpfile_policies" # Get ILM managed indices with their age and policy, figure days until deletion tmpfile_indices=$(create_temp_file) so-elasticsearch-query '_all/_ilm/explain' --fail 2>/dev/null | jq -r ' def age_to_days: if type == "number" then . elif type == "string" then (ascii_downcase) as $s | (try ($s | capture("^(?-?[0-9.]+)(?[smhd]?)$")) catch {num:"0", unit:""}) as $m | (($m.num | tonumber? // 0)) as $val | (if $m.unit == "d" or $m.unit == "" then $val elif $m.unit == "h" then $val / 24 elif $m.unit == "m" then $val / 1440 elif $m.unit == "s" then $val / 86400 else $val end) else 0 end; .indices | to_entries[] | select(.value.managed == true and .value.policy) | "\(.key)|\(.value.policy)|\(((.value.age? // "0") | age_to_days))|\(.value.phase? // "")" ' > "$tmpfile_indices" 2>/dev/null || true # Process each index and calculate totals tmpfile_all=$(create_temp_file) while IFS='|' read -r index policy age phase; do if [ -n "${policy_ages[$policy]:-}" ]; then delete_age=${policy_ages[$policy]} delete_age=${delete_age:-0} age=${age:-0} days_until_ceiling=$(awk -v del="$delete_age" -v aged="$age" 'BEGIN { diff = del - aged; if (diff <= 0) { print 0; exit } base = int(diff); if (diff > base) { base = base + 1 } print base; }') if [ -z "$days_until_ceiling" ]; then days_until_ceiling=0 fi if [ "$days_until_ceiling" -lt 0 ]; then days_until_ceiling=0 fi bucket="scheduled" if [ "$phase" = "delete" ]; then days_until_ceiling=0 bucket="immediate" fi if [ "$days_until_ceiling" -le 30 ] 2>/dev/null; then echo "$index|$days_until_ceiling|$bucket" >> "$tmpfile_all" fi fi done < "$tmpfile_indices" # Get size and shard counts for indices if [ -s "$tmpfile_all" ]; then candidate_indices=$(cut -d'|' -f1 "$tmpfile_all" | tr '\n' ',' | sed 's/,$//') if [ -n "$candidate_indices" ]; then tmpfile_sizes=$(create_temp_file) so-elasticsearch-query "_cat/indices/${candidate_indices}?format=json&h=index,pri.store.size,pri,rep&bytes=b" --fail 2>/dev/null | \ jq -r '.[] | "\(.index)|\(.["pri.store.size"])|\(.pri)|\(.rep)"' > "$tmpfile_sizes" 2>/dev/null || true # Build size and shard lookup declare -A index_sizes declare -A index_shards while IFS='|' read -r idx size pri rep; do index_sizes["$idx"]=$size # Total shards = pri + (pri * rep) total_shards=$(awk -v p="$pri" -v r="$rep" 'BEGIN { printf "%.0f", p + (p * r) }') index_shards["$idx"]=$total_shards done < "$tmpfile_sizes" # Calculate totals for ilm deletes while IFS='|' read -r index days_until bucket; do size=${index_sizes[$index]:-0} shards=${index_shards[$index]:-0} if [ "$bucket" = "immediate" ]; then ilm_delete_immediate=$((ilm_delete_immediate + size)) ilm_indices_immediate=$((ilm_indices_immediate + 1)) ilm_shards_immediate=$((ilm_shards_immediate + shards)) if [ "$VERBOSE" = true ]; then immediate_indices_names+=("$index") immediate_indices_sizes+=("$size") fi else if [ "$days_until" -le 7 ] 2>/dev/null; then ilm_delete_scheduled_7d=$((ilm_delete_scheduled_7d + size)) ilm_indices_scheduled_7d=$((ilm_indices_scheduled_7d + 1)) ilm_shards_scheduled_7d=$((ilm_shards_scheduled_7d + shards)) if [ "$VERBOSE" = true ]; then scheduled_indices_names+=("$index") scheduled_indices_sizes+=("$size") scheduled_indices_days+=("$days_until") fi fi ilm_delete_scheduled_30d=$((ilm_delete_scheduled_30d + size)) ilm_indices_scheduled_30d=$((ilm_indices_scheduled_30d + 1)) ilm_shards_scheduled_30d=$((ilm_shards_scheduled_30d + shards)) fi if [ "$days_until" -le 7 ] 2>/dev/null; then ilm_delete_7d=$((ilm_delete_7d + size)) ilm_indices_7d=$((ilm_indices_7d + 1)) ilm_shards_7d=$((ilm_shards_7d + shards)) fi ilm_delete_30d=$((ilm_delete_30d + size)) ilm_indices_30d=$((ilm_indices_30d + 1)) ilm_shards_30d=$((ilm_shards_30d + shards)) done < "$tmpfile_all" fi fi fi # Get the average daily ILM deletion rate (smooth out over 30d / 7d for consistency) daily_ilm_delete_bytes=0 if [ "$ilm_delete_scheduled_30d" -gt 0 ] && [ "$ilm_indices_scheduled_30d" -gt 0 ]; then daily_ilm_delete_bytes=$(awk -v total="$ilm_delete_scheduled_30d" 'BEGIN { printf "%.0f", total / 30 }') elif [ "$ilm_delete_scheduled_7d" -gt 0 ] && [ "$ilm_indices_scheduled_7d" -gt 0 ]; then daily_ilm_delete_bytes=$(awk -v total="$ilm_delete_scheduled_7d" 'BEGIN { printf "%.0f", total / 7 }') fi # Net storage growth (growth - deletions) net_growth_bytes=$(awk -v growth="$daily_growth_bytes" -v deletions="$daily_ilm_delete_bytes" 'BEGIN { printf "%.0f", growth - deletions }') ilm_delete_7d_gb=$(bytes_to_gb "$ilm_delete_7d") ilm_delete_30d_gb=$(bytes_to_gb "$ilm_delete_30d") ilm_delete_immediate_gb=$(bytes_to_gb "$ilm_delete_immediate") ilm_delete_scheduled_7d_gb=$(bytes_to_gb "$ilm_delete_scheduled_7d") ilm_delete_scheduled_30d_gb=$(bytes_to_gb "$ilm_delete_scheduled_30d") daily_ilm_delete_gb=$(bytes_to_gb "$daily_ilm_delete_bytes") ilm_impact_pct="0.0" if [ "$cluster_storage_size" -gt 0 ] && [ "$ilm_delete_7d" -gt 0 ]; then ilm_impact_pct=$(awk -v ilm="$ilm_delete_7d" -v total="$cluster_storage_size" 'BEGIN { if (total <= 0) { printf "0.0"; exit } printf "%.1f", (ilm / total) * 100 }') fi ilm_window_daily_bytes=0 ilm_window_daily_gb="0.00" if [ "$ilm_delete_7d" -gt 0 ]; then ilm_window_daily_bytes=$(awk -v total="$ilm_delete_7d" 'BEGIN { printf "%.0f", total / 7 }') ilm_window_daily_gb=$(awk -v total="$ilm_delete_7d" 'BEGIN { printf "%.2f", total / 7 / 1024 / 1024 / 1024 }') fi ilm_rate_variance_pct="" ilm_rate_variance_warning=false if [ "$daily_ilm_delete_bytes" -gt 0 ] && [ "$ilm_window_daily_bytes" -gt 0 ]; then ilm_rate_variance_pct=$(awk -v window="$ilm_window_daily_bytes" -v daily="$daily_ilm_delete_bytes" 'BEGIN { if (daily == 0) { print ""; exit } diff = window - daily; if (diff < 0) diff = -diff; pct = diff / daily * 100; if (pct < 0) pct = -pct; printf "%.0f", pct }') if [ -n "$ilm_rate_variance_pct" ]; then ilm_rate_flag=$(awk -v v="$ilm_rate_variance_pct" 'BEGIN { if (v + 0 > 30) print 1; else print 0 }') if [ "$ilm_rate_flag" -eq 1 ] 2>/dev/null; then ilm_rate_variance_warning=true fi fi fi ilm_rate_variance_warning_json="false" if [ "$ilm_rate_variance_warning" = true ]; then ilm_rate_variance_warning_json="true" fi # Elasticsearch cluster disk watermark settings (fallback to 85/90/95 defaults) watermark_output=$(so-elasticsearch-query '_cluster/settings?include_defaults=true&filter_path=*.cluster.routing.allocation.disk.*' --fail 2>/dev/null) || fail "Failed to query Elasticsearch cluster settings" low=$(echo "$watermark_output" | jq -r '.transient.cluster.routing.allocation.disk.watermark.low // .persistent.cluster.routing.allocation.disk.watermark.low // .defaults.cluster.routing.allocation.disk.watermark.low // empty') high=$(echo "$watermark_output" | jq -r '.transient.cluster.routing.allocation.disk.watermark.high // .persistent.cluster.routing.allocation.disk.watermark.high // .defaults.cluster.routing.allocation.disk.watermark.high // empty') flood=$(echo "$watermark_output" | jq -r '.transient.cluster.routing.allocation.disk.watermark.flood_stage // .persistent.cluster.routing.allocation.disk.watermark.flood_stage // .defaults.cluster.routing.allocation.disk.watermark.flood_stage // empty') low=${low:-"85%"} high=${high:-"90%"} flood=${flood:-"95%"} low_percent=${low%\%} low_fraction=$(awk -v p="$low_percent" 'BEGIN { if (p == "" || p + 0 <= 0) { printf "%.6f", 0.85; exit } printf "%.6f", p / 100 }') high_percent=${high%\%} high_fraction=$(awk -v p="$high_percent" 'BEGIN { if (p == "" || p + 0 <= 0) { printf "%.6f", 0.90; exit } printf "%.6f", p / 100 }') # Cluster shard total cluster_shards_output=$(so-elasticsearch-query '_cluster/stats?filter_path=indices.shards.total' --fail 2>/dev/null) || fail "Failed to query cluster shard stats" total_shards=$(echo "$cluster_shards_output" | jq -r '.indices.shards.total // 0' 2>/dev/null) # Get max shards per node setting (with default 1000) max_shards_per_node_output=$(so-elasticsearch-query '_cluster/settings?include_defaults=true&filter_path=*.cluster.max_shards_per_node' --fail 2>/dev/null) || fail "Failed to query cluster shard settings" max_shards_per_node=$(echo "$max_shards_per_node_output" | jq -r '.transient.cluster.max_shards_per_node // .persistent.cluster.max_shards_per_node // .defaults.cluster.max_shards_per_node // "1000"' 2>/dev/null) max_shards_per_node=${max_shards_per_node:-1000} # Get same disk usage metric ES uses for watermark (not only ES used storage, but OS level storage usage) nodes_output=$(so-elasticsearch-query '_cat/nodes?format=json&h=name,ip,node.role,disk.total,disk.used,disk.avail&bytes=b' --fail 2>/dev/null) || fail "Failed to query Elasticsearch node disk usage" # Parse nodes with data roles and calculate cluster totals # Only include nodes with data roles: d=data, h=data_hot, w=data_warm, c=data_cold, s=data_content, f=data_frozen cluster_stats=$(echo "$nodes_output" | jq --argjson low "$low_fraction" ' [ .[] | select(.["node.role"] | test("[dhwcsf]")) | .total = (.["disk.total"] | tostring | gsub("[^0-9.]"; "") | tonumber) | .used = (.["disk.used"] | tostring | gsub("[^0-9.]"; "") | tonumber) | .avail = (.["disk.avail"] | tostring | gsub("[^0-9.]"; "") | tonumber) | select(.total? and .used?) | .low_threshold = (.total * $low) | .remaining = (.low_threshold - .used) ] | { total: ([.[].total] | add // 0), used: ([.[].used] | add // 0), low_threshold: ([.[].low_threshold] | add // 0), remaining: ([.[].remaining] | add // 0) } ') cluster_total=$(echo "$cluster_stats" | jq -r '.total') cluster_used=$(echo "$cluster_stats" | jq -r '.used') cluster_low_threshold=$(echo "$cluster_stats" | jq -r '.low_threshold') cluster_remaining=$(echo "$cluster_stats" | jq -r '.remaining') cluster_high_threshold=$(awk -v total="$cluster_total" -v frac="$high_fraction" 'BEGIN { if (total == "" || frac == "" || total + 0 <= 0 || frac + 0 <= 0) { printf "0"; exit } printf "%.0f", total * frac }') cluster_over_low_bytes=$(awk -v used="$cluster_used" -v threshold="$cluster_low_threshold" 'BEGIN { if (used == "" || threshold == "") { printf "0"; exit } diff = used - threshold; if (diff < 0) diff = 0; printf "%.0f", diff }') cluster_over_high_bytes=$(awk -v used="$cluster_used" -v threshold="$cluster_high_threshold" 'BEGIN { if (used == "" || threshold == "") { printf "0"; exit } diff = used - threshold; if (diff < 0) diff = 0; printf "%.0f", diff }') # Count data nodes and calculate shard capacity # Only count nodes with data roles: d=data, h=data_hot, w=data_warm, c=data_cold, s=data_content f=data_frozen data_node_count=$(echo "$nodes_output" | jq '[.[] | select(.["node.role"] | test("[dhwcsf]"))] | length') max_shard_capacity=$((data_node_count * max_shards_per_node)) declare -a data_node_names declare -a data_node_roles if [ "$data_node_count" -gt 0 ]; then while IFS='|' read -r node_name node_role; do data_node_names+=("$node_name") data_node_roles+=("$node_role") done < <(echo "$nodes_output" | jq -r '.[] | select(.["node.role"] | test("[dhwcsf]")) | "\(.name)|\(.["node.role"])"') fi shard_usage_percent="0.0" if [ "$max_shard_capacity" -gt 0 ]; then shard_usage_percent=$(awk -v current="$total_shards" -v max="$max_shard_capacity" 'BEGIN { if (max <= 0) { printf "0.0"; exit } printf "%.1f", (current / max) * 100 }') fi recommendations_triggered=false recommendations_ready=false recommendations_message="" recommendations_json='[]' recommendations_triggered_json=false recommendation_lines=() recommendation_records=() should_trigger_recommendations=false recommendations_reason="" days_to_low_numeric="" days_to_low_gross_numeric="" [ "$cluster_total" -gt 0 ] || fail "No Elasticsearch data nodes retrieved from _cat/nodes" # Calculate current retention period (age of oldest .ds-logs-* index) oldest_index_days="" oldest_index_name="" oldest_index_output=$(so-elasticsearch-query '_cat/indices/.ds-logs-*?format=json&h=index,creation.date&s=creation.date:asc' --fail 2>/dev/null | jq -r '.[0] // empty' 2>/dev/null || true) if [ -n "$oldest_index_output" ]; then oldest_index_name=$(echo "$oldest_index_output" | jq -r '.index // empty' 2>/dev/null) oldest_creation_ms=$(echo "$oldest_index_output" | jq -r '.["creation.date"] // empty' 2>/dev/null) if [ -n "$oldest_creation_ms" ] && [[ "$oldest_creation_ms" =~ ^[0-9]+$ ]]; then oldest_creation_sec=$((oldest_creation_ms / 1000)) if [ "$oldest_creation_sec" -gt 0 ]; then now_sec=$(date +%s) if [ "$now_sec" -ge "$oldest_creation_sec" ]; then age_sec=$((now_sec - oldest_creation_sec)) oldest_index_days=$(awk -v age="$age_sec" 'BEGIN { printf "%.1f", age / 86400 }') fi fi fi fi # Calculate days until low watermark using net growth days_to_low="" days_to_low_gross="" target_date="" # Calculate with gross growth if [ "$daily_growth_bytes" -gt 0 ] && [ "$(echo "$cluster_remaining > 0" | bc -l 2>/dev/null || awk -v r="$cluster_remaining" 'BEGIN { if (r > 0) print 1; else print 0 }')" -eq 1 ]; then days_to_low_gross=$(awk -v rem="$cluster_remaining" -v perday="$daily_growth_bytes" 'BEGIN { printf "%.2f", rem / perday }') fi # Calculate with net growth (minus ILM deletions) if [ "$net_growth_bytes" -gt 0 ] && [ "$(echo "$cluster_remaining > 0" | bc -l 2>/dev/null || awk -v r="$cluster_remaining" 'BEGIN { if (r > 0) print 1; else print 0 }')" -eq 1 ]; then days_to_low=$(awk -v rem="$cluster_remaining" -v perday="$net_growth_bytes" 'BEGIN { printf "%.2f", rem / perday }') ceil_days=$(awk -v d="$days_to_low" 'BEGIN { base = int(d); if (d > base) { base = base + 1 } if (base < 0) { base = 0 } printf "%d", base }') target_date=$(date -d "+${ceil_days} days" +%F 2>/dev/null) elif [ "$(echo "$cluster_remaining > 0" | bc -l 2>/dev/null || awk -v r="$cluster_remaining" 'BEGIN { if (r > 0) print 1; else print 0 }')" -eq 1 ]; then # Net growth is zero or negative, cluster is in equilibrium or shrinking days_to_low="stable" fi if [ -n "$days_to_low" ] && [ "$days_to_low" != "stable" ] && [[ "$days_to_low" =~ ^[0-9]+(\.[0-9]+)?$ ]]; then days_to_low_numeric="$days_to_low" fi if [ -n "$days_to_low_gross" ] && [[ "$days_to_low_gross" =~ ^[0-9]+(\.[0-9]+)?$ ]]; then days_to_low_gross_numeric="$days_to_low_gross" fi # Calculate estimated retention (oldest index age + days until low watermark) estimated_retention_days="" if [ -n "$oldest_index_days" ] && [ -n "$days_to_low_numeric" ]; then estimated_retention_days=$(awk -v oldest="$oldest_index_days" -v remaining="$days_to_low_numeric" 'BEGIN { printf "%.1f", oldest + remaining }') fi cluster_at_or_below_low=$(echo "$cluster_remaining <= 0" | bc -l 2>/dev/null || awk -v r="$cluster_remaining" 'BEGIN { if (r <= 0) print 1; else print 0 }') if [ "$cluster_at_or_below_low" -eq 1 ]; then should_trigger_recommendations=true if [ "$cluster_over_high_bytes" -gt 0 ] 2>/dev/null; then recommendations_reason="Cluster is beyond the high watermark threshold. Reduce retention on the fastest-growing indices immediately." else recommendations_reason="Cluster is at or beyond the low watermark threshold. Reduce retention on the fastest-growing indices immediately." fi elif [ -n "$days_to_low_numeric" ]; then within_seven=$(awk -v d="$days_to_low_numeric" 'BEGIN { if (d <= 7) print 1; else print 0 }') if [ "$within_seven" -eq 1 ]; then should_trigger_recommendations=true recommendations_reason="Projected low watermark breach in ~${days_to_low_numeric} days (${target_date:-N/A}). Reduce retention on the fastest-growing indices." fi elif [ -n "$days_to_low_gross_numeric" ]; then within_seven_gross=$(awk -v d="$days_to_low_gross_numeric" 'BEGIN { if (d <= 7) print 1; else print 0 }') if [ "$within_seven_gross" -eq 1 ]; then should_trigger_recommendations=true recommendations_reason="Gross growth trend indicates a low watermark breach in ~${days_to_low_gross_numeric} days (${target_date:-N/A}). Reduce retention on the fastest-growing indices before ILM deletions." fi fi cluster_over_high_flag=0 if [ "$cluster_over_high_bytes" -gt 0 ] 2>/dev/null; then cluster_over_high_flag=1 fi cluster_over_low_flag=0 if [ "$cluster_over_low_bytes" -gt 0 ] 2>/dev/null; then cluster_over_low_flag=1 fi cluster_high_threshold_gb=$(bytes_to_gb "$cluster_high_threshold") cluster_over_low_gb=$(bytes_to_gb "$cluster_over_low_bytes") cluster_over_high_gb=$(bytes_to_gb "$cluster_over_high_bytes") if [ "$should_trigger_recommendations" = true ]; then recommendations_triggered=true recommendations_triggered_json=true if [ -n "$recommendations_reason" ]; then recommendations_message="$recommendations_reason" else recommendations_message="Cluster is nearing the low watermark threshold. Reduce retention on the fastest-growing indices." fi growth_output=$(run_indices_growth || true) if [ -n "${growth_output//[[:space:]]/}" ]; then mapfile -t recommendation_source_lines < <(printf '%s\n' "$growth_output" | tail -n +3 | awk 'NF' | head -n 3) for line in "${recommendation_source_lines[@]}"; do index=$(echo "$line" | awk '{print $1}') [ -n "$index" ] || continue growth_24h_gb=$(echo "$line" | awk '{print $(NF-2)}') creation_date_display="" retention_days="" policy="" delete_min_age="" index_info=$(so-elasticsearch-query "_cat/indices/${index}?format=json&h=index,creation.date,creation.date.string" --fail 2>/dev/null) || true if [ -n "$index_info" ]; then creation_epoch=$(echo "$index_info" | jq -r '.[0]."creation.date" // empty' 2>/dev/null) creation_readable=$(echo "$index_info" | jq -r '.[0]."creation.date.string" // empty' 2>/dev/null) if [ -n "$creation_epoch" ] && [[ "$creation_epoch" =~ ^[0-9]+$ ]]; then creation_seconds=$((creation_epoch / 1000)) if [ "$creation_seconds" -gt 0 ]; then creation_date_display=$(date -u -d "@$creation_seconds" +%FT%TZ 2>/dev/null) now_seconds=$(date +%s) if [ "$now_seconds" -ge "$creation_seconds" ]; then retention_days=$(awk -v now="$now_seconds" -v created="$creation_seconds" 'BEGIN { diff = now - created; if (diff < 0) diff = 0; printf "%.1f", diff / 86400 }') fi fi fi if [ -z "$creation_date_display" ] && [ -n "$creation_readable" ] && [ "$creation_readable" != "null" ]; then creation_date_display="$creation_readable" fi fi ilm_output=$(so-elasticsearch-query "${index}/_ilm/explain" --fail 2>/dev/null) || true if [ -n "$ilm_output" ]; then policy=$(echo "$ilm_output" | jq -r '.indices | to_entries | .[0].value.policy // empty' 2>/dev/null) fi if [ -n "$policy" ] && [ -n "${policy_ages[$policy]:-}" ]; then delete_min_age=${policy_ages[$policy]} fi retention_days_display=${retention_days:-unknown} retention_days_floor="" if [ -n "$retention_days" ]; then retention_days_floor=$(awk -v v="$retention_days" 'BEGIN { if (v == "" || v == "null") { print ""; exit } val = v + 0; if (val < 1) val = 1; printf "%d", int(val) }') if [ -n "$retention_days_floor" ] && [ "$retention_days_floor" -lt 1 ]; then retention_days_floor=1 fi fi delete_min_age_numeric="" if [ -n "$delete_min_age" ]; then delete_min_age_numeric=$(awk -v v="$delete_min_age" 'BEGIN { if (v == "" || v == "null") { print ""; exit } val = v + 0; if (val < 1) val = 1; printf "%d", int(val) }') fi recommended_delete_min_age="" if [ -n "$retention_days_floor" ]; then recommended_delete_min_age="$retention_days_floor" fi if [ -n "$delete_min_age_numeric" ]; then if [ -n "$recommended_delete_min_age" ]; then recommended_delete_min_age=$(awk -v rec="$recommended_delete_min_age" -v cur="$delete_min_age_numeric" 'BEGIN { rec += 0; cur += 0; if (cur < rec) printf "%d", cur; else printf "%d", rec }') else recommended_delete_min_age="$delete_min_age_numeric" fi fi if [ -z "$recommended_delete_min_age" ] && [ -n "$retention_days_floor" ]; then recommended_delete_min_age="$retention_days_floor" fi action_phrase="" if [ -n "$recommended_delete_min_age" ]; then if [ -n "$delete_min_age_numeric" ] && [ "$recommended_delete_min_age" -lt "$delete_min_age_numeric" ]; then action_phrase="Lower delete.min_age to ~${recommended_delete_min_age}d" else action_phrase="Cap delete.min_age at ~${recommended_delete_min_age}d" fi if [ -n "$retention_days_floor" ]; then action_phrase="${action_phrase} (observed retention ~${retention_days_floor}d)" fi action_phrase="${action_phrase}; consider whether a tighter cap (e.g., 30d) fits requirements." else action_phrase="Review ILM delete.min_age for this index; consider more aggressive retention if throughput stays high." fi policy_clause="" if [ -n "$policy" ]; then policy_clause=", policy ${policy}" fi if [ -n "$delete_min_age" ]; then policy_clause="${policy_clause} (current delete.min_age ${delete_min_age}d)" fi recommendation_lines+=(" - ${BOLD}${index}${NC}: ~${growth_24h_gb} GB growth in last 24h, retention ~${retention_days_display} days (created ${creation_date_display:-unknown})${policy_clause}. ${action_phrase}") record=$(jq -nc \ --arg index "$index" \ --arg growth "$growth_24h_gb" \ --arg retention "${retention_days:-}" \ --arg created "${creation_date_display:-}" \ --arg policy "$policy" \ --arg delete_age "${delete_min_age:-}" \ --arg suggested "${recommended_delete_min_age:-}" \ --arg action "$action_phrase" \ '{ index: $index, growth_gb_last_24h: (if ($growth | length) > 0 then ($growth | tonumber) else null end), retention_days: (if ($retention | length) > 0 then ($retention | tonumber) else null end), creation_date: (if ($created | length) > 0 then $created else null end), ilm_policy: (if ($policy | length) > 0 then $policy else null end), delete_min_age_days: (if ($delete_age | length) > 0 then ($delete_age | tonumber) else null end), suggested_delete_min_age_days: (if ($suggested | length) > 0 then ($suggested | tonumber) else null end), recommendation: (if ($action | length) > 0 then $action else null end) }') recommendation_records+=("$record") done fi if [ ${#recommendation_records[@]} -gt 0 ]; then recommendations_ready=true recommendations_json=$(printf '%s\n' "${recommendation_records[@]}" | jq -s '.') else if [ -n "$recommendations_reason" ]; then recommendations_message="$recommendations_reason Unable to retrieve detailed growth data from so-elasticsearch-indices-growth." else recommendations_message="Unable to retrieve growth data from so-elasticsearch-indices-growth while near the low watermark threshold." fi fi fi if [ "$JSON_OUTPUT" = true ]; then jq -n \ --arg indexed_storage_source "$indexed_storage_source" \ --arg current_gb "$(bytes_to_gb "$cluster_storage_size")" \ --arg oldest_index_days "$oldest_index_days" \ --arg estimated_retention_days "$estimated_retention_days" \ --arg daily_growth_gb "$(bytes_to_gb "$daily_growth_bytes")" \ --arg daily_ilm_delete_gb "$daily_ilm_delete_gb" \ --arg net_growth_gb "$(bytes_to_gb "$net_growth_bytes")" \ --arg ilm_delete_7d_gb "$ilm_delete_7d_gb" \ --arg ilm_delete_immediate_gb "$ilm_delete_immediate_gb" \ --arg ilm_delete_scheduled_7d_gb "$ilm_delete_scheduled_7d_gb" \ --arg ilm_delete_scheduled_30d_gb "$ilm_delete_scheduled_30d_gb" \ --arg ilm_delete_30d_gb "$ilm_delete_30d_gb" \ --arg ilm_window_daily_gb "$ilm_window_daily_gb" \ --arg ilm_impact_pct "$ilm_impact_pct" \ --arg ilm_rate_variance_pct "$ilm_rate_variance_pct" \ --arg growth_window "$history_label" \ --arg cluster_total_gb "$(bytes_to_gb "$cluster_total")" \ --arg cluster_used_gb "$(bytes_to_gb "$cluster_used")" \ --arg cluster_remaining_gb "$(bytes_to_gb "$cluster_remaining")" \ --arg cluster_low_threshold_gb "$(bytes_to_gb "$cluster_low_threshold")" \ --arg cluster_high_threshold_gb "$cluster_high_threshold_gb" \ --arg cluster_over_low_gb "$cluster_over_low_gb" \ --arg cluster_over_high_gb "$cluster_over_high_gb" \ --arg shard_usage_percent "$shard_usage_percent" \ --arg low_watermark "$low" \ --arg high_watermark "$high" \ --arg flood_watermark "$flood" \ --arg days_to_low "${days_to_low:-null}" \ --arg days_to_low_gross "${days_to_low_gross:-null}" \ --arg estimated_date "${target_date:-null}" \ --arg recommendation_message "$recommendations_message" \ --argjson total_shards "$total_shards" \ --argjson max_shard_capacity "$max_shard_capacity" \ --argjson data_node_count "$data_node_count" \ --argjson max_shards_per_node "$max_shards_per_node" \ --argjson ilm_indices_7d "$ilm_indices_7d" \ --argjson ilm_indices_immediate "$ilm_indices_immediate" \ --argjson ilm_indices_scheduled_7d "$ilm_indices_scheduled_7d" \ --argjson ilm_indices_scheduled_30d "$ilm_indices_scheduled_30d" \ --argjson ilm_indices_30d "$ilm_indices_30d" \ --argjson ilm_shards_7d "$ilm_shards_7d" \ --argjson ilm_shards_30d "$ilm_shards_30d" \ --argjson ilm_shards_immediate "$ilm_shards_immediate" \ --argjson ilm_shards_scheduled_7d "$ilm_shards_scheduled_7d" \ --argjson ilm_shards_scheduled_30d "$ilm_shards_scheduled_30d" \ --arg daily_shard_creation "$daily_shard_creation" \ --argjson recommendations "$recommendations_json" \ --argjson recommendations_triggered "$recommendations_triggered_json" \ ' { indexed_storage_gb: ($current_gb | tonumber), indexed_storage_source: $indexed_storage_source, oldest_index_days: (if ($oldest_index_days | length) > 0 then ($oldest_index_days | tonumber) else null end), estimated_retention_days: (if ($estimated_retention_days | length) > 0 then ($estimated_retention_days | tonumber) else null end), growth: { daily_growth_gb: ($daily_growth_gb | tonumber), daily_ilm_delete_gb: (if ($daily_ilm_delete_gb | length) > 0 then ($daily_ilm_delete_gb | tonumber) else null end), net_growth_gb: (if ($net_growth_gb | length) > 0 then ($net_growth_gb | tonumber) else null end), daily_shard_creation: (if ($daily_shard_creation | length) > 0 then ($daily_shard_creation | tonumber) else null end), }, ilm: { deleting_now: { indices: $ilm_indices_immediate, storage_gb: (if ($ilm_delete_immediate_gb | length) > 0 then ($ilm_delete_immediate_gb | tonumber) else null end), shards: $ilm_shards_immediate }, scheduled_7d: { indices: $ilm_indices_scheduled_7d, storage_gb: (if ($ilm_delete_scheduled_7d_gb | length) > 0 then ($ilm_delete_scheduled_7d_gb | tonumber) else null end), shards: $ilm_shards_scheduled_7d }, scheduled_30d: { indices: $ilm_indices_scheduled_30d, storage_gb: (if ($ilm_delete_scheduled_30d_gb | length) > 0 then ($ilm_delete_scheduled_30d_gb | tonumber) else null end), shards: $ilm_shards_scheduled_30d }, indices_to_delete_7d: $ilm_indices_7d, storage_to_delete_7d_gb: (if ($ilm_delete_7d_gb | length) > 0 then ($ilm_delete_7d_gb | tonumber) else null end), shards_to_delete_7d: $ilm_shards_7d, total_30d_indices: $ilm_indices_30d, total_30d_storage_gb: (if ($ilm_delete_30d_gb | length) > 0 then ($ilm_delete_30d_gb | tonumber) else null end), total_30d_shards: $ilm_shards_30d, percent_of_current_data: (if ($ilm_impact_pct | length) > 0 then ($ilm_impact_pct | tonumber) else null end), windowed_daily_avg_gb: (if ($ilm_window_daily_gb | length) > 0 then ($ilm_window_daily_gb | tonumber) else null end), }, cluster: { total_gb: ($cluster_total_gb | tonumber), used_gb: ($cluster_used_gb | tonumber), remaining_before_low_watermark_gb: (if ($cluster_remaining_gb | length) > 0 then ($cluster_remaining_gb | tonumber) else null end), low_watermark_threshold_gb: (if ($cluster_low_threshold_gb | length) > 0 then ($cluster_low_threshold_gb | tonumber) else null end), high_watermark_threshold_gb: (if ($cluster_high_threshold_gb | length) > 0 then ($cluster_high_threshold_gb | tonumber) else null end), over_low_watermark_gb: (if ($cluster_over_low_gb | length) > 0 then ($cluster_over_low_gb | tonumber) else null end), over_high_watermark_gb: (if ($cluster_over_high_gb | length) > 0 then ($cluster_over_high_gb | tonumber) else null end), low_watermark_setting: $low_watermark, high_watermark_setting: $high_watermark, flood_watermark_setting: $flood_watermark, shards: { current: $total_shards, max_capacity: $max_shard_capacity, usage_percent: (if ($shard_usage_percent | length) > 0 then ($shard_usage_percent | tonumber) else null end), data_nodes: $data_node_count, max_shards_per_node: $max_shards_per_node } }, projection: { days_to_low_watermark_net: (if $days_to_low == "null" or $days_to_low == "stable" then $days_to_low else ($days_to_low | tonumber) end), days_to_low_watermark_gross: (if $days_to_low_gross == "null" then null else ($days_to_low_gross | tonumber) end), estimated_breach_date: (if $estimated_date == "null" then null else $estimated_date end) }, recommendations: { triggered: $recommendations_triggered, message: (if ($recommendation_message | length) > 0 then $recommendation_message else null end), indices: $recommendations } }' else log_title "LOG" "Storage Overview" indexed_gb_display=$(bytes_to_gb "$cluster_storage_size") echo -e "${BOLD}Indexed data size:${NC} ${indexed_gb_display} GB (Elasticsearch)" echo -e "${BOLD}Cluster capacity:${NC} $(bytes_to_gb "$cluster_total") GB total" echo -e "${BOLD}Cluster used:${NC} $(bytes_to_gb "$cluster_used") GB" echo -e "${BOLD}Low watermark:${NC} $low ($(bytes_to_gb "$cluster_low_threshold") GB threshold)" if [ "$cluster_over_low_flag" -eq 1 ]; then if [ "$cluster_over_high_flag" -eq 1 ]; then echo -e "${BOLD}Remaining space:${NC} ${REDBOLD}${cluster_over_high_gb} GB${NC} OVER the high watermark" else echo -e "${BOLD}Remaining space:${NC} ${YELLOWBOLD}${cluster_over_low_gb} GB${NC} OVER the low watermark" fi else echo -e "${BOLD}Remaining space:${NC} $(bytes_to_gb "$cluster_remaining") GB before low watermark" fi # Display shard capacity information shard_warning_flag=$(awk -v pct="$shard_usage_percent" 'BEGIN { if (pct + 0 >= 80) print 1; else print 0 }') if [ "$shard_warning_flag" -eq 1 ]; then echo -e "${BOLD}Cluster shards:${NC} ${YELLOW}${total_shards} / ${max_shard_capacity} (${shard_usage_percent}%)${NC}" else echo -e "${BOLD}Cluster shards:${NC} ${total_shards} / ${max_shard_capacity} (${shard_usage_percent}%)" fi # Display data nodes with roles (only data-related roles) if [ "$data_node_count" -gt 0 ]; then echo -e "${BOLD}Cluster data nodes:${NC} ${data_node_count}" for i in "${!data_node_names[@]}"; do node_name="${data_node_names[$i]}" node_role="${data_node_roles[$i]}" expanded_roles=$(expand_node_roles "$node_role") echo -e " ${node_name}: ${expanded_roles}" done fi log_title "LOG" "ES Growth" echo -e "${BOLD}Daily growth rate:${NC} $(bytes_to_gb "$daily_growth_bytes") GB/day" if [ "$daily_ilm_delete_bytes" -gt 0 ]; then echo -e "${BOLD}ILM deletion rate:${NC} ${daily_ilm_delete_gb} GB/day (scheduled)" echo -e "${BOLD}Net growth rate:${NC} $(bytes_to_gb "$net_growth_bytes") GB/day" else echo -e "${BOLD}ILM deletion rate:${NC} 0.00 GB/day (scheduled)" echo -e "${BOLD}Net growth rate:${NC} $(bytes_to_gb "$net_growth_bytes") GB/day" fi # Display daily shards if [ -n "$daily_shard_creation" ] && [ "$(awk -v d="$daily_shard_creation" 'BEGIN { if (d > 0) print 1; else print 0 }')" -eq 1 ]; then daily_shard_creation_rounded=$(awk -v d="$daily_shard_creation" 'BEGIN { printf "%.0f", d }') echo -e "${BOLD}Daily shard creation:${NC} ~${daily_shard_creation_rounded} shards/day" fi if [ "$ilm_indices_immediate" -gt 0 ]; then echo -e "${BOLD}Deleting now:${NC} $ilm_indices_immediate indices (~${ilm_delete_immediate_gb} GB, $ilm_shards_immediate shards)" fi if [ "$ilm_indices_30d" -gt 0 ]; then if [ "$ilm_delete_scheduled_30d" -gt 0 ] && [ "$ilm_indices_scheduled_30d" -gt 0 ]; then echo -e "${BOLD}Storage to be freed (30d):${NC} $ilm_indices_30d indices (~${ilm_delete_30d_gb} GB, $ilm_shards_30d shards)" elif [ "$ilm_indices_7d" -gt 0 ]; then echo -e "${BOLD}Storage to be freed (7d):${NC} $ilm_indices_7d indices (~${ilm_delete_7d_gb} GB, $ilm_shards_7d shards)" fi fi log_title "LOG" "Retention Projection" if [ -n "$oldest_index_days" ]; then oldest_days_rounded=$(awk -v d="$oldest_index_days" 'BEGIN { printf "%.0f", d }') if [ -n "$oldest_index_name" ]; then echo -e "${BOLD}Oldest index:${NC} ~${oldest_days_rounded} days (${oldest_index_name})" else echo -e "${BOLD}Oldest index:${NC} ~${oldest_days_rounded} days (.ds-logs-* only)" fi if [ -n "$estimated_retention_days" ]; then estimated_days_rounded=$(awk -v d="$estimated_retention_days" 'BEGIN { printf "%.0f", d }') echo -e "${BOLD}Estimated retention:${NC} ~${estimated_days_rounded} days (until configured low watermark setting)" fi echo fi if [ "$days_to_low" = "stable" ]; then if [ "$net_growth_bytes" -lt 0 ]; then shrink_rate_gb=$(bytes_to_gb "${net_growth_bytes#-}") log_title "OK" "Cluster is shrinking - ILM deletions exceed growth" echo echo -e "${BOLD}Storage trend:${NC} Decreasing at ~${shrink_rate_gb} GB/day" echo -e "${BOLD}Note:${NC} Current ILM policies are reclaiming more space than incoming data consumes." if [ "$cluster_over_low_bytes" -gt 0 ] 2>/dev/null; then recovery_days=$(awk -v excess="$cluster_over_low_bytes" -v rate="${net_growth_bytes#-}" 'BEGIN { if (rate <= 0) { print ""; exit } printf "%.1f", excess / rate }') if [ -n "$recovery_days" ]; then echo -e "${BOLD}Recovery time:${NC} Estimated ${recovery_days} days to fall below the low watermark if trend continues" fi fi else log_title "OK" "Cluster is in equilibrium - ILM deletions balance growth" echo echo -e "${BOLD}Storage trend:${NC} Stable (net growth ~0 GB/day)" echo -e "${BOLD}Note:${NC} Current ILM policies are keeping storage steady." fi elif [ -z "$days_to_low" ]; then if [ "$net_growth_bytes" -lt 0 ] && [ "$daily_ilm_delete_bytes" -gt 0 ]; then shrink_rate_gb=$(bytes_to_gb "${net_growth_bytes#-}") log_title "OK" "Cluster is shrinking - ILM deletions exceed growth" echo echo -e "${BOLD}Storage trend:${NC} Decreasing at ~${shrink_rate_gb} GB/day" echo -e "${BOLD}Note:${NC} Storage is expected to continue decreasing due to ILM policies." elif [ "$daily_growth_bytes" -le 0 ]; then log_title "WARN" "Unable to project: Growth rate is zero or negative" elif [ "$(echo "$cluster_remaining <= 0" | bc -l 2>/dev/null || awk -v r="$cluster_remaining" 'BEGIN { if (r <= 0) print 1; else print 0 }')" -eq 1 ]; then log_title "ERROR" "Cluster already at low watermark threshold! Review recommendations below and consider updating ILM." else log_title "WARN" "Unable to calculate projection" fi else if (( $(echo "$days_to_low < 7" | bc -l 2>/dev/null || awk -v d="$days_to_low" 'BEGIN { if (d < 7) print 1; else print 0 }') )); then log_title "ERROR" "Low watermark breach estimated in ~$days_to_low days (${target_date:-N/A})" elif (( $(echo "$days_to_low < 14" | bc -l 2>/dev/null || awk -v d="$days_to_low" 'BEGIN { if (d < 14) print 1; else print 0 }') )); then log_title "WARN" "Low watermark breach estimated in ~$days_to_low days (${target_date:-N/A})" else log_title "OK" "Low watermark breach estimated in ~$days_to_low days (${target_date:-N/A})" fi echo fi if [ "$recommendations_triggered" = true ]; then log_title "LOG" "Recommendations" if [ "$recommendations_ready" = true ]; then echo -e "${BOLD}Action:${NC} Reduce retention on the fastest-growing indices to reduce overall storage usage." for rec_line in "${recommendation_lines[@]}"; do echo -e "$rec_line" done else if [ -n "$recommendations_message" ]; then echo -e "${BOLD}Note:${NC} $recommendations_message" fi fi echo fi if [ "$VERBOSE" = true ]; then log_title "LOG" "Scheduled Deletions (Detailed)" if [ ${#immediate_indices_names[@]} -gt 0 ]; then echo -e "${BOLD}Deleting Now (in delete phase):${NC}" echo total_immediate_mb=0 for i in "${!immediate_indices_names[@]}"; do index_name="${immediate_indices_names[$i]}" size_bytes="${immediate_indices_sizes[$i]}" size_mb=$(awk -v b="$size_bytes" 'BEGIN { printf "%.2f", b / 1024 / 1024 }') total_immediate_mb=$(awk -v total="$total_immediate_mb" -v size="$size_mb" 'BEGIN { printf "%.2f", total + size }') printf " %-60s %10s MB\n" "$index_name" "$size_mb" done echo -e "${BOLD}Total:${NC} ${total_immediate_mb} MB (${#immediate_indices_names[@]} indices)" echo fi if [ ${#scheduled_indices_names[@]} -gt 0 ]; then echo -e "${BOLD}Scheduled for Deletion (≤7 days):${NC}" echo total_scheduled_mb=0 # Sort by days_until deletion sorted_indices=() for i in "${!scheduled_indices_names[@]}"; do sorted_indices+=("${scheduled_indices_days[$i]}|${scheduled_indices_names[$i]}|${scheduled_indices_sizes[$i]}") done OLD_IFS="$IFS" IFS=$'\n' sorted_indices=($(sort -t'|' -k1 -n <<<"${sorted_indices[*]}")) IFS="$OLD_IFS" for entry in "${sorted_indices[@]}"; do IFS='|' read -r days_until index_name size_bytes <<< "$entry" size_mb=$(awk -v b="$size_bytes" 'BEGIN { printf "%.2f", b / 1024 / 1024 }') total_scheduled_mb=$(awk -v total="$total_scheduled_mb" -v size="$size_mb" 'BEGIN { printf "%.2f", total + size }') days_display=$(awk -v d="$days_until" 'BEGIN { printf "%.1f", d }') printf " %-55s %10s MB (in ~%s days)\n" "$index_name" "$size_mb" "$days_display" done echo -e "${BOLD}Total:${NC} ${total_scheduled_mb} MB (${#scheduled_indices_names[@]} indices)" echo fi if [ ${#immediate_indices_names[@]} -eq 0 ] && [ ${#scheduled_indices_names[@]} -eq 0 ]; then echo -e "No indices scheduled for deletion within the next 7 days." echo fi fi echo fi exit 0