From a308a39bbe6d9ab55ffb4de48f682a9aaeace469 Mon Sep 17 00:00:00 2001 From: Wes Date: Wed, 24 May 2023 16:48:45 +0000 Subject: [PATCH 1/8] Use disk space taken up by indices if the script is not running on a manager --- .../tools/sbin_jinja/so-elasticsearch-cluster-space-used | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/salt/elasticsearch/tools/sbin_jinja/so-elasticsearch-cluster-space-used b/salt/elasticsearch/tools/sbin_jinja/so-elasticsearch-cluster-space-used index b8ac4f6e6..222cb2f5d 100755 --- a/salt/elasticsearch/tools/sbin_jinja/so-elasticsearch-cluster-space-used +++ b/salt/elasticsearch/tools/sbin_jinja/so-elasticsearch-cluster-space-used @@ -12,9 +12,11 @@ TOTAL_AVAILABLE_SPACE=0 # Iterate through the output of _cat/allocation for each node in the cluster to determine the total available space {% if GLOBALS.role == 'so-manager' %} +# Get total disk space - disk.total for i in $(/usr/sbin/so-elasticsearch-query _cat/allocation | grep -v {{ GLOBALS.manager }} | awk '{print $3}'); do {% else %} -for i in $(/usr/sbin/so-elasticsearch-query _cat/allocation | awk '{print $3}'); do +# Get disk space taken up by indices - disk.indices +for i in $(/usr/sbin/so-elasticsearch-query _cat/allocation | awk '{print $2}'); do {% endif %} size=$(echo $i | grep -oE '[0-9].*' | awk '{print int($1+0.5)}') unit=$(echo $i | grep -oE '[A-Za-z]+') From 53f258b08f8279d7adf11bfa8b2193de435d8a2d Mon Sep 17 00:00:00 2001 From: Wes Date: Wed, 24 May 2023 17:02:11 +0000 Subject: [PATCH 2/8] Add eligible_indices to index deletion requirements --- .../sbin_jinja/so-curator-cluster-delete-delete | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/salt/curator/tools/sbin_jinja/so-curator-cluster-delete-delete b/salt/curator/tools/sbin_jinja/so-curator-cluster-delete-delete index d6049ffb8..81d2720c1 100755 --- a/salt/curator/tools/sbin_jinja/so-curator-cluster-delete-delete +++ b/salt/curator/tools/sbin_jinja/so-curator-cluster-delete-delete @@ -11,14 +11,24 @@ LOG="/opt/so/log/curator/so-curator-cluster-delete.log" LOG_SIZE_LIMIT=$(/usr/sbin/so-elasticsearch-cluster-space-total {{ RETENTION.retention_pct}}) +TODAY=$(date +'%Y.%m.%d) + +eligible_indices() { + [[ $(/usr/sbin/so-elasticsearch-query _cat/indices?h=index,status | grep 'open$' | awk '{print $1}' | grep -vE "playbook|so-case|$TODAY" | grep -E "(logstash-|so-|.ds-logs-)" | wc -l) -ge 1 ]] +} overlimit() { [[ $(/usr/sbin/so-elasticsearch-cluster-space-used) -gt "${LOG_SIZE_LIMIT}" ]] } -# Check to see if Elasticsearch indices using more disk space than LOG_SIZE_LIMIT +########################### +# Check for 2 conditions: # +########################### +# 1. Check if Elasticsearch indices are using more disk space than LOG_SIZE_LIMIT +# 2. Check if Elasticsearch indices are eligible for deletion -- they cannot be Playbook, SOC, today's, or other important indices # Closed indices will be deleted first. If we are able to bring disk space under LOG_SIZE_LIMIT, we will break out of the loop. -while overlimit; do + +while overlimit && eligible_indices; do # If we can't query Elasticsearch, then immediately return false. /usr/sbin/so-elasticsearch-query _cat/indices?h=index,status > /dev/null 2>&1 [ $? -eq 1 ] && echo "$(date) - Could not query Elasticsearch." >> ${LOG} && exit From 5de59a879a34c607ebbaa9fa03d8b998b0d2dc55 Mon Sep 17 00:00:00 2001 From: Wes Date: Fri, 26 May 2023 13:15:27 +0000 Subject: [PATCH 3/8] Break out of index deletion when unable to bring space below the disk space threshold --- .../so-curator-cluster-delete-delete | 52 ++++++++++--------- 1 file changed, 28 insertions(+), 24 deletions(-) diff --git a/salt/curator/tools/sbin_jinja/so-curator-cluster-delete-delete b/salt/curator/tools/sbin_jinja/so-curator-cluster-delete-delete index 81d2720c1..388c32b0d 100755 --- a/salt/curator/tools/sbin_jinja/so-curator-cluster-delete-delete +++ b/salt/curator/tools/sbin_jinja/so-curator-cluster-delete-delete @@ -10,54 +10,58 @@ {%- set RETENTION = salt['pillar.get']('elasticsearch:retention', ELASTICDEFAULTS.elasticsearch.retention, merge=true) -%} LOG="/opt/so/log/curator/so-curator-cluster-delete.log" -LOG_SIZE_LIMIT=$(/usr/sbin/so-elasticsearch-cluster-space-total {{ RETENTION.retention_pct}}) -TODAY=$(date +'%Y.%m.%d) - -eligible_indices() { - [[ $(/usr/sbin/so-elasticsearch-query _cat/indices?h=index,status | grep 'open$' | awk '{print $1}' | grep -vE "playbook|so-case|$TODAY" | grep -E "(logstash-|so-|.ds-logs-)" | wc -l) -ge 1 ]] -} +ALERT_LOG="/opt/so/log/curator/alert.log" +LOG_SIZE_LIMIT_GB=$(/usr/sbin/so-elasticsearch-cluster-space-total {{ RETENTION.retention_pct}}) +LOG_SIZE_LIMIT=$(( "$LOG_SIZE_LIMIT_GB" * 1024 * 1024 * 1024 )) +ITERATION=0 +MAX_ITERATIONS=10 overlimit() { - [[ $(/usr/sbin/so-elasticsearch-cluster-space-used) -gt "${LOG_SIZE_LIMIT}" ]] + [[ $(/usr/sbin/so-elasticsearch-cluster-space-used) -gt ${LOG_SIZE_LIMIT} ]] } ########################### # Check for 2 conditions: # ########################### # 1. Check if Elasticsearch indices are using more disk space than LOG_SIZE_LIMIT -# 2. Check if Elasticsearch indices are eligible for deletion -- they cannot be Playbook, SOC, today's, or other important indices -# Closed indices will be deleted first. If we are able to bring disk space under LOG_SIZE_LIMIT, we will break out of the loop. +# 2. Check if the maximum number of iterations - MAX_ITERATIONS - has been exceeded. If so, exit. +# Closed indices will be deleted first. If we are able to bring disk space under LOG_SIZE_LIMIT, or the number of iterations has exceeded the maximum allowed number of iterations, we will break out of the loop. + +while overlimit && [[ $ITERATION -lt $MAX_ITERATIONS ]]; do -while overlimit && eligible_indices; do # If we can't query Elasticsearch, then immediately return false. /usr/sbin/so-elasticsearch-query _cat/indices?h=index,status > /dev/null 2>&1 [ $? -eq 1 ] && echo "$(date) - Could not query Elasticsearch." >> ${LOG} && exit + # We iterate through the closed and open indices - CLOSED_INDICES=$(/usr/sbin/so-elasticsearch-query _cat/indices?h=index,status | grep 'close$' | awk '{print $1}' | grep -v "so-case" | grep -E "(logstash-|so-|.ds-logs-)" | sort -t- -k3) - OPEN_INDICES=$(/usr/sbin/so-elasticsearch-query _cat/indices?h=index,status | grep 'open$' | awk '{print $1}' | grep -v "so-case" | grep -E "(logstash-|so-|.ds-logs-)" | sort -t- -k3) - for INDEX in ${CLOSED_INDICES} ${OPEN_INDICES}; do - # Now that we've sorted the indices from oldest to newest, we need to check each index to see if it is assigned as the current write index for a data stream + CLOSED_INDICES=$(/usr/sbin/so-elasticsearch-query _cat/indices?h=index,status | grep 'close$' | awk '{print $1}' | grep -vE "playbook|so-case" | grep -E "(logstash-|so-|.ds-logs-)" | sort -t- -k3) + OPEN_INDICES=$(/usr/sbin/so-elasticsearch-query _cat/indices?h=index,status | grep 'open$' | awk '{print $1}' | grep -vE "playbook|so-case" | grep -E "(logstash-|so-|.ds-logs-)" | sort -t- -k3) + + for INDEX in ${CLOSED_INDICES} ${OPEN_INDICES}; do + # Now that we've sorted the indices from oldest to newest, we need to check each index to see if it is assigned as the current write index for a data stream # To do so, we need to identify to which data stream this index is associated # We extract the data stream name using the pattern below DATASTREAM_PATTERN="logs-[a-zA-Z_.]+-[a-zA-Z_.]+" DATASTREAM=$(echo "${INDEX}" | grep -oE "$DATASTREAM_PATTERN") # We look up the data stream, and determine the write index. If there is only one backing index, we delete the entire data stream - BACKING_INDICES=$(/usr/sbin/so-elasticsearch-query _data_stream/${DATASTREAM} | jq -r '.data_streams[0].indices | length') - if [ "$BACKING_INDICES" -gt 1 ]; then + BACKING_INDICES=$(/usr/sbin/so-elasticsearch-query _data_stream/${DATASTREAM} | jq -r '.data_streams[0].indices | length') + if [ "$BACKING_INDICES" -gt 1 ]; then CURRENT_WRITE_INDEX=$(/usr/sbin/so-elasticsearch-query _data_stream/$DATASTREAM | jq -r .data_streams[0].indices[-1].index_name) - # We make sure we are not trying to delete a write index + # We make sure we are not trying to delete a write index if [ "${INDEX}" != "${CURRENT_WRITE_INDEX}" ]; then # This should not be a write index, so we should be allowed to delete it - printf "\n$(date) - Used disk space exceeds LOG_SIZE_LIMIT (${LOG_SIZE_LIMIT} GB) - Deleting ${INDEX} index...\n" >> ${LOG} + printf "\n$(date) - Used disk space exceeds LOG_SIZE_LIMIT (${LOG_SIZE_LIMIT_GB} GB) - Deleting ${INDEX} index...\n" >> ${LOG} /usr/sbin/so-elasticsearch-query ${INDEX} -XDELETE >> ${LOG} 2>&1 fi - else - # We delete the entire data stream, since there is only one backing index - printf "\n$(date) - Used disk space exceeds LOG_SIZE_LIMIT (${LOG_SIZE_LIMIT} GB) - Deleting ${DATASTREAM} data stream...\n" >> ${LOG} - /usr/sbin/so-elasticsearch-query _data_stream/${DATASTREAM} -XDELETE >> ${LOG} 2>&1 - fi - if ! overlimit; then + fi + if ! overlimit ; then exit fi + ((ITERATION++)) + if [[ $ITERATION -ge $MAX_ITERATIONS ]]; then + alert_id=$(uuidgen) + printf "\n$(date) -> Maximum iteration limit reached ($MAX_ITERATIONS). Unable to bring disk below threshold. Writing alert ($alert_id) to ${ALERT_LOG}\n" >> ${LOG} + printf "\n$(date),$alert_id,Maximum iteration limit reached ($MAX_ITERATIONS). Unable to bring disk below threshold.\n" >> ${ALERT_LOG} + fi done done From ce114a26010e022e54a8bd949c1299c67fb3dc7c Mon Sep 17 00:00:00 2001 From: Wes Date: Fri, 26 May 2023 13:19:45 +0000 Subject: [PATCH 4/8] Fix total space logic and rename TOTAL_AVAILABLE_SPACE to TOTAL_USED_SPACE --- .../so-elasticsearch-cluster-space-used | 20 +++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/salt/elasticsearch/tools/sbin_jinja/so-elasticsearch-cluster-space-used b/salt/elasticsearch/tools/sbin_jinja/so-elasticsearch-cluster-space-used index 222cb2f5d..971708eba 100755 --- a/salt/elasticsearch/tools/sbin_jinja/so-elasticsearch-cluster-space-used +++ b/salt/elasticsearch/tools/sbin_jinja/so-elasticsearch-cluster-space-used @@ -1,16 +1,16 @@ #!/bin/bash # # Copyright Security Onion Solutions LLC and/or licensed to Security Onion Solutions LLC under one -# or more contributor license agreements. Licensed under the Elastic License 2.0 as shown at +# or more contributor license agreements. Licensed under the Elastic License 2.0 as shown at # https://securityonion.net/license; you may not use this file except in compliance with the # Elastic License 2.0. . /usr/sbin/so-common {% from 'vars/globals.map.jinja' import GLOBALS %} -TOTAL_AVAILABLE_SPACE=0 +TOTAL_USED_SPACE=0 -# Iterate through the output of _cat/allocation for each node in the cluster to determine the total available space +# Iterate through the output of _cat/allocation for each node in the cluster to determine the total used space {% if GLOBALS.role == 'so-manager' %} # Get total disk space - disk.total for i in $(/usr/sbin/so-elasticsearch-query _cat/allocation | grep -v {{ GLOBALS.manager }} | awk '{print $3}'); do @@ -21,10 +21,18 @@ for i in $(/usr/sbin/so-elasticsearch-query _cat/allocation | awk '{print $2}'); size=$(echo $i | grep -oE '[0-9].*' | awk '{print int($1+0.5)}') unit=$(echo $i | grep -oE '[A-Za-z]+') if [ $unit = "tb" ]; then + size=$(( size * 1024 * 1024 * 1024 * 1024 )) + elif [ $unit = "gb" ]; then + size=$(( size * 1024 * 1024 * 1024 )) + elif [ $unit = "mb" ]; then + size=$(( size * 1024 * 1024 )) + elif [ $unit = "kb" ]; then size=$(( size * 1024 )) + elif [ $unit = "b" ]; then + size=size fi - TOTAL_AVAILABLE_SPACE=$(( TOTAL_AVAILABLE_SPACE + size )) + TOTAL_USED_SPACE=$(( TOTAL_USED_SPACE + size )) done -# Calculate the percentage of available space based on our previously defined value -echo "$TOTAL_AVAILABLE_SPACE" +# Calculate the percentage of used space based on our previously defined value +echo "$TOTAL_USED_SPACE" From 5af1bfe1427e83e33b119cc2f3466c4557b5f470 Mon Sep 17 00:00:00 2001 From: Wes Date: Sat, 27 May 2023 21:15:45 +0000 Subject: [PATCH 5/8] Move alert generation outside of the inner loop --- salt/curator/tools/sbin_jinja/so-curator-cluster-delete-delete | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/salt/curator/tools/sbin_jinja/so-curator-cluster-delete-delete b/salt/curator/tools/sbin_jinja/so-curator-cluster-delete-delete index 388c32b0d..0beed1a59 100755 --- a/salt/curator/tools/sbin_jinja/so-curator-cluster-delete-delete +++ b/salt/curator/tools/sbin_jinja/so-curator-cluster-delete-delete @@ -58,10 +58,10 @@ while overlimit && [[ $ITERATION -lt $MAX_ITERATIONS ]]; do exit fi ((ITERATION++)) + done if [[ $ITERATION -ge $MAX_ITERATIONS ]]; then alert_id=$(uuidgen) printf "\n$(date) -> Maximum iteration limit reached ($MAX_ITERATIONS). Unable to bring disk below threshold. Writing alert ($alert_id) to ${ALERT_LOG}\n" >> ${LOG} printf "\n$(date),$alert_id,Maximum iteration limit reached ($MAX_ITERATIONS). Unable to bring disk below threshold.\n" >> ${ALERT_LOG} fi - done done From b441fe662f8d1590dfd168e34e1e33c6781f8000 Mon Sep 17 00:00:00 2001 From: Wes Date: Tue, 30 May 2023 17:28:59 +0000 Subject: [PATCH 6/8] Change 1024 to 1000 for gigabytes --- .../tools/sbin_jinja/so-elasticsearch-cluster-space-used | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/salt/elasticsearch/tools/sbin_jinja/so-elasticsearch-cluster-space-used b/salt/elasticsearch/tools/sbin_jinja/so-elasticsearch-cluster-space-used index 971708eba..7c0f2f3c6 100755 --- a/salt/elasticsearch/tools/sbin_jinja/so-elasticsearch-cluster-space-used +++ b/salt/elasticsearch/tools/sbin_jinja/so-elasticsearch-cluster-space-used @@ -21,13 +21,13 @@ for i in $(/usr/sbin/so-elasticsearch-query _cat/allocation | awk '{print $2}'); size=$(echo $i | grep -oE '[0-9].*' | awk '{print int($1+0.5)}') unit=$(echo $i | grep -oE '[A-Za-z]+') if [ $unit = "tb" ]; then - size=$(( size * 1024 * 1024 * 1024 * 1024 )) + size=$(( size * 1000 * 1000 * 1000 * 1000 )) elif [ $unit = "gb" ]; then - size=$(( size * 1024 * 1024 * 1024 )) + size=$(( size * 1000 * 1000 * 1000 )) elif [ $unit = "mb" ]; then - size=$(( size * 1024 * 1024 )) + size=$(( size * 1000 * 1000 )) elif [ $unit = "kb" ]; then - size=$(( size * 1024 )) + size=$(( size * 1000 )) elif [ $unit = "b" ]; then size=size fi From 096dadf9bdc281c466a0be523031ac4387b29425 Mon Sep 17 00:00:00 2001 From: Wes Date: Tue, 30 May 2023 17:29:42 +0000 Subject: [PATCH 7/8] Change 1024 to 1000 for gigabytes --- salt/curator/tools/sbin_jinja/so-curator-cluster-delete-delete | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/salt/curator/tools/sbin_jinja/so-curator-cluster-delete-delete b/salt/curator/tools/sbin_jinja/so-curator-cluster-delete-delete index 0beed1a59..e0c5144bc 100755 --- a/salt/curator/tools/sbin_jinja/so-curator-cluster-delete-delete +++ b/salt/curator/tools/sbin_jinja/so-curator-cluster-delete-delete @@ -12,7 +12,7 @@ LOG="/opt/so/log/curator/so-curator-cluster-delete.log" ALERT_LOG="/opt/so/log/curator/alert.log" LOG_SIZE_LIMIT_GB=$(/usr/sbin/so-elasticsearch-cluster-space-total {{ RETENTION.retention_pct}}) -LOG_SIZE_LIMIT=$(( "$LOG_SIZE_LIMIT_GB" * 1024 * 1024 * 1024 )) +LOG_SIZE_LIMIT=$(( "$LOG_SIZE_LIMIT_GB" * 1000 * 1000 * 1000 )) ITERATION=0 MAX_ITERATIONS=10 From 4469a93a75ba06ac5bbebe7547ac75b6d0c4cbf9 Mon Sep 17 00:00:00 2001 From: Wes Date: Tue, 30 May 2023 18:24:30 +0000 Subject: [PATCH 8/8] Fix typo --- .../tools/sbin_jinja/so-elasticsearch-cluster-space-used | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/salt/elasticsearch/tools/sbin_jinja/so-elasticsearch-cluster-space-used b/salt/elasticsearch/tools/sbin_jinja/so-elasticsearch-cluster-space-used index 7c0f2f3c6..5d8a60e22 100755 --- a/salt/elasticsearch/tools/sbin_jinja/so-elasticsearch-cluster-space-used +++ b/salt/elasticsearch/tools/sbin_jinja/so-elasticsearch-cluster-space-used @@ -21,7 +21,7 @@ for i in $(/usr/sbin/so-elasticsearch-query _cat/allocation | awk '{print $2}'); size=$(echo $i | grep -oE '[0-9].*' | awk '{print int($1+0.5)}') unit=$(echo $i | grep -oE '[A-Za-z]+') if [ $unit = "tb" ]; then - size=$(( size * 1000 * 1000 * 1000 * 1000 )) + size=$(( size * 1000 * 1000 * 1000 * 1000 )) elif [ $unit = "gb" ]; then size=$(( size * 1000 * 1000 * 1000 )) elif [ $unit = "mb" ]; then