#!/bin/bash
# Copyright Security Onion Solutions LLC and/or licensed to Security Onion Solutions LLC under one
# or more contributor license agreements. Licensed under the Elastic License 2.0 as shown at
# https://securityonion.net/license; you may not use this file except in compliance with the
# Elastic License 2.0.

{% from 'vars/globals.map.jinja' import GLOBALS %}
{% import_yaml 'elasticsearch/defaults.yaml' as ELASTICDEFAULTS %}
{%- set ELASTICSEARCH_HOST = GLOBALS.node_ip -%}
{%- set RETENTION = salt['pillar.get']('elasticsearch:retention', ELASTICDEFAULTS.elasticsearch.retention, merge=true) -%}

LOG="/opt/so/log/elasticsearch/so-elasticsearch-indices-delete.log"
ALERT_LOG="/opt/so/log/elasticsearch/indices-delete-alert.log"
LOG_SIZE_LIMIT_GB=$(/usr/sbin/so-elasticsearch-cluster-space-total {{ RETENTION.retention_pct}})
LOG_SIZE_LIMIT=$(( "$LOG_SIZE_LIMIT_GB" * 1000 * 1000 * 1000 ))
ITERATION=0
MAX_ITERATIONS=10

overlimit() {
         [[ $(/usr/sbin/so-elasticsearch-cluster-space-used) -gt ${LOG_SIZE_LIMIT} ]]
}

###########################
# Check for 2 conditions: #
###########################
# 1. Check if Elasticsearch indices are using more disk space than LOG_SIZE_LIMIT
# 2. Check if the maximum number of iterations - MAX_ITERATIONS - has been exceeded. If so, exit.
# Closed indices will be deleted first. If we are able to bring disk space under LOG_SIZE_LIMIT, or the number of iterations has exceeded the maximum allowed number of iterations, we will break out of the loop.


while overlimit && [[ $ITERATION -lt $MAX_ITERATIONS ]]; do

  # If we can't query Elasticsearch, then immediately return false.
  /usr/sbin/so-elasticsearch-query _cat/indices?h=index,status > /dev/null 2>&1
  [ $? -eq 1 ] && echo "$(date) - Could not query Elasticsearch." >> ${LOG} && exit

  # We iterate through the closed and open indices
  CLOSED_SO_INDICES=$(/usr/sbin/so-elasticsearch-query _cat/indices?h=index,status | grep 'close$' | awk '{print $1}' | grep -E "(^logstash-.*|^so-.*)" | grep -vE "so-case|so-detection" | sort -t- -k3)
  CLOSED_INDICES=$(/usr/sbin/so-elasticsearch-query _cat/indices?h=index,status | grep 'close$' | awk '{print $1}' | grep -E "^.ds-logs-.*" | grep -v "suricata" | sort -t- -k4)
  OPEN_SO_INDICES=$(/usr/sbin/so-elasticsearch-query _cat/indices?h=index,status | grep 'open$' | awk '{print $1}' | grep -E "(^logstash-.*|^so-.*)" | grep -vE "so-case|so-detection" | sort -t- -k3)
  OPEN_INDICES=$(/usr/sbin/so-elasticsearch-query _cat/indices?h=index,status | grep 'open$' | awk '{print $1}' | grep -E "^.ds-logs-.*" | grep -v "suricata" | sort -t- -k4)

  for INDEX in ${CLOSED_SO_INDICES} ${OPEN_SO_INDICES} ${CLOSED_INDICES} ${OPEN_INDICES}; do
    # Check if index is an older index. If it is an older index, delete it before moving on to newer indices.
    if [[ "$INDEX" =~ "^logstash-.*|so-.*" ]]; then
      printf "\n$(date) - Used disk space exceeds LOG_SIZE_LIMIT (${LOG_SIZE_LIMIT_GB} GB) - Deleting ${INDEX} index...\n" >> ${LOG}
      /usr/sbin/so-elasticsearch-query ${INDEX} -XDELETE >> ${LOG} 2>&1
    else
      # Now that we've sorted the indices from oldest to newest, we need to check each index to see if it is assigned as the current write index for a data stream
      # To do so, we need to identify to which data stream this index is associated
      # We extract the data stream name using the pattern below
      DATASTREAM_PATTERN="logs-[a-zA-Z_.]+-[a-zA-Z_.]+"
      DATASTREAM=$(echo "${INDEX}" | grep -oE "$DATASTREAM_PATTERN")
      # We look up the data stream, and determine the write index. If there is only one backing index, we delete the entire data stream
      BACKING_INDICES=$(/usr/sbin/so-elasticsearch-query _data_stream/${DATASTREAM} | jq -r '.data_streams[0].indices | length')
      if [ "$BACKING_INDICES" -gt 1 ]; then
        CURRENT_WRITE_INDEX=$(/usr/sbin/so-elasticsearch-query _data_stream/$DATASTREAM | jq -r .data_streams[0].indices[-1].index_name)
        # We make sure we are not trying to delete a write index
        if [ "${INDEX}" != "${CURRENT_WRITE_INDEX}" ]; then
          # This should not be a write index, so we should be allowed to delete it
          printf "\n$(date) - Used disk space exceeds LOG_SIZE_LIMIT (${LOG_SIZE_LIMIT_GB} GB) - Deleting ${INDEX} index...\n" >> ${LOG}
          /usr/sbin/so-elasticsearch-query ${INDEX} -XDELETE >> ${LOG} 2>&1
        fi
      else
            printf "\n$(date) - Used disk space exceeds LOG_SIZE_LIMIT (${LOG_SIZE_LIMIT_GB} GB) - There is only one backing index (${INDEX}).  Deleting ${DATASTREAM} data stream...\n" >> ${LOG}
      /usr/sbin/so-elasticsearch-query _data_stream/$DATASTREAM -XDELETE >> ${LOG} 2>&1
      fi
    fi
    if ! overlimit ; then
      exit
    fi
    ((ITERATION++))
  done
    if [[ $ITERATION -ge $MAX_ITERATIONS ]]; then
      alert_id=$(uuidgen)
      printf "\n$(date) -> Maximum iteration limit reached ($MAX_ITERATIONS). Unable to bring disk below threshold. Writing alert ($alert_id) to ${ALERT_LOG}\n" >> ${LOG}
      printf "\n$(date),$alert_id,Maximum iteration limit reached ($MAX_ITERATIONS). Unable to bring disk below threshold.\n" >> ${ALERT_LOG}
    fi
done
