Merge pull request #10024 from Security-Onion-Solutions/esspace

Manage disk-based index deletion via so-curator-cluster-delete
This commit is contained in:
weslambert
2023-03-28 12:25:19 -04:00
committed by GitHub
12 changed files with 205 additions and 161 deletions

View File

@@ -0,0 +1,57 @@
#!/bin/bash
#
# Copyright Security Onion Solutions LLC and/or licensed to Security Onion Solutions LLC under one
# or more contributor license agreements. Licensed under the Elastic License 2.0 as shown at
# https://securityonion.net/license; you may not use this file except in compliance with the
# Elastic License 2.0.
. /usr/sbin/so-common
{% from 'vars/globals.map.jinja' import GLOBALS %}
TOTAL_AVAILABLE_SPACE=0
# Wait for ElasticSearch to initialize
COUNT=0
ELASTICSEARCH_CONNECTED="no"
while [[ "$COUNT" -le 240 ]]; do
/usr/sbin/so-elasticsearch-query / -k --output /dev/null --silent --head --fail
if [ $? -eq 0 ]; then
ELASTICSEARCH_CONNECTED="yes"
break
else
((COUNT+=1))
sleep 1
fi
done
if [ "$ELASTICSEARCH_CONNECTED" == "no" ]; then
echo
echo -e "Connection attempt timed out. Unable to connect to ElasticSearch. \nPlease try: \n -checking log(s) in /var/log/elasticsearch/\n -running 'sudo docker ps' \n -running 'sudo so-elastic-restart'"
echo
exit 1
fi
# Set percentage of space to desired value, otherwise use a default value of 80 percent
if [[ "$1" != "" ]]; then
PERCENTAGE=$1
else
PERCENTAGE=80
fi
# Iterate through the output of _cat/allocation for each node in the cluster to determine the total available space
{% if GLOBALS.role == 'so-manager' %}
for i in $(/usr/sbin/so-elasticsearch-query _cat/allocation | grep -v {{ GLOBALS.manager }} | awk '{print $5}'); do
{% else %}
for i in $(/usr/sbin/so-elasticsearch-query _cat/allocation | awk '{print $5}'); do
{% endif %}
size=$(echo $i | grep -oE '[0-9].*' | awk '{print int($1+0.5)}')
unit=$(echo $i | grep -oE '[A-Za-z]+')
if [ $unit = "tb" ]; then
size=$(( size * 1024 ))
fi
TOTAL_AVAILABLE_SPACE=$(( TOTAL_AVAILABLE_SPACE + size ))
done
# Calculate the percentage of available space based on our previously defined value
PERCENTAGE_AVAILABLE_SPACE=$(( TOTAL_AVAILABLE_SPACE*PERCENTAGE/100 ))
echo "$PERCENTAGE_AVAILABLE_SPACE"

View File

@@ -0,0 +1,28 @@
#!/bin/bash
#
# Copyright Security Onion Solutions LLC and/or licensed to Security Onion Solutions LLC under one
# or more contributor license agreements. Licensed under the Elastic License 2.0 as shown at
# https://securityonion.net/license; you may not use this file except in compliance with the
# Elastic License 2.0.
. /usr/sbin/so-common
{% from 'vars/globals.map.jinja' import GLOBALS %}
TOTAL_AVAILABLE_SPACE=0
# Iterate through the output of _cat/allocation for each node in the cluster to determine the total available space
{% if GLOBALS.role == 'so-manager' %}
for i in $(/usr/sbin/so-elasticsearch-query _cat/allocation | grep -v {{ GLOBALS.manager }} | awk '{print $3}'); do
{% else %}
for i in $(/usr/sbin/so-elasticsearch-query _cat/allocation | awk '{print $3}'); do
{% endif %}
size=$(echo $i | grep -oE '[0-9].*' | awk '{print int($1+0.5)}')
unit=$(echo $i | grep -oE '[A-Za-z]+')
if [ $unit = "tb" ]; then
size=$(( size * 1024 ))
fi
TOTAL_AVAILABLE_SPACE=$(( TOTAL_AVAILABLE_SPACE + size ))
done
# Calculate the percentage of available space based on our previously defined value
echo "$TOTAL_AVAILABLE_SPACE"

View File

@@ -3,6 +3,11 @@
# https://securityonion.net/license; you may not use this file except in compliance with the
# Elastic License 2.0.
{% import_yaml 'elasticsearch/defaults.yaml' as ELASTICDEFAULTS %}
{% set ELASTICMERGED = salt['pillar.get']('elasticsearch:retention', ELASTICDEFAULTS.elasticsearch.retention, merge=true) %}
{{ ELASTICMERGED.retention_pct }}
{%- set log_size_limit = salt['pillar.get']('elasticsearch:log_size_limit') %}
actions:
1:

View File

@@ -1,36 +0,0 @@
#!/bin/bash
# Copyright Security Onion Solutions LLC and/or licensed to Security Onion Solutions LLC under one
# or more contributor license agreements. Licensed under the Elastic License 2.0 as shown at
# https://securityonion.net/license; you may not use this file except in compliance with the
# Elastic License 2.0.
#. /usr/sbin/so-elastic-common
#. /etc/nsm/securityonion.conf
# If logrotate script doesn't already exist, create it
#FILE="/etc/logrotate.d/so-curator-closed-delete"
#if ! [ -f ${FILE} ]; then
# cat << EOF > ${FILE}
#/var/log/nsm/so-curator-closed-delete.log {
# daily
# rotate 7
# copytruncate
# compress
# missingok
# notifempty
#}
#EOF
#fi
# Avoid starting multiple instances
APP=closeddelete
lf=/tmp/$APP-pidLockFile
# create empty lock file if none exists
cat /dev/null >> $lf
read lastPID < $lf
# if lastPID is not null and a process with that pid exists , exit
[ ! -z "$lastPID" -a -d /proc/$lastPID ] && exit
echo $$ > $lf
/usr/sbin/so-curator-closed-delete-delete

View File

@@ -1,61 +0,0 @@
#!/bin/bash
# Copyright Security Onion Solutions LLC and/or licensed to Security Onion Solutions LLC under one
# or more contributor license agreements. Licensed under the Elastic License 2.0 as shown at
# https://securityonion.net/license; you may not use this file except in compliance with the
# Elastic License 2.0.
{% from 'vars/globals.map.jinja' import GLOBALS %}
{%- if grains['role'] in ['so-searchnode', 'so-heavynode'] %}
{%- set ELASTICSEARCH_HOST = GLOBALS.node_ip -%}
{%- set ELASTICSEARCH_PORT = salt['pillar.get']('elasticsearch:es_port') -%}
{%- elif grains['role'] in ['so-eval', 'so-managersearch', 'so-standalone', 'so-manager'] %}
{%- set ELASTICSEARCH_HOST = GLOBALS.manager_ip -%}
{%- set ELASTICSEARCH_PORT = salt['pillar.get']('manager:es_port') -%}
{%- endif -%}
{%- set LOG_SIZE_LIMIT = salt['pillar.get']('elasticsearch:log_size_limit') -%}
# Copyright Security Onion Solutions LLC and/or licensed to Security Onion Solutions LLC under one
# or more contributor license agreements. Licensed under the Elastic License 2.0 as shown at
# https://securityonion.net/license; you may not use this file except in compliance with the
# Elastic License 2.0.
LOG="/opt/so/log/curator/so-curator-closed-delete.log"
overlimit() {
[[ $(du -hs --block-size=1GB /nsm/elasticsearch/nodes | awk '{print $1}' ) -gt "{{LOG_SIZE_LIMIT}}" ]]
}
closedindices() {
# If we can't query Elasticsearch, then immediately return false.
curl -K /opt/so/conf/elasticsearch/curl.config -s -k https://{{ELASTICSEARCH_HOST}}:{{ELASTICSEARCH_PORT}}/_cat/indices?h=index\&expand_wildcards=closed >/dev/null 2>&1
[ $? -eq 1 ] && return false
# First, get the list of closed indices using _cat/indices?h=index\&expand_wildcards=closed.
# Next, filter out any so-case indices.
# Finally, use grep's -q option to return true if there are any remaining logstash- or so- indices.
curl -K /opt/so/conf/elasticsearch/curl.config -s -k https://{{ELASTICSEARCH_HOST}}:{{ELASTICSEARCH_PORT}}/_cat/indices?h=index\&expand_wildcards=closed | grep -v "so-case" | grep -q -E "(logstash-|so-)"
}
# Check for 2 conditions:
# 1. Are Elasticsearch indices using more disk space than LOG_SIZE_LIMIT?
# 2. Are there any closed indices that we can delete?
# If both conditions are true, keep on looping until one of the conditions is false.
while overlimit && closedindices; do
# We need to determine OLDEST_INDEX:
# First, get the list of closed indices using _cat/indices?h=index\&expand_wildcards=closed.
# Next, filter out any so-case indices and only select the remaining logstash- or so- indices.
# Then, sort by date by telling sort to use hyphen as delimiter and sort on the third field.
# Finally, select the first entry in that sorted list.
OLDEST_INDEX=$(curl -K /opt/so/conf/elasticsearch/curl.config -s -k https://{{ELASTICSEARCH_HOST}}:{{ELASTICSEARCH_PORT}}/_cat/indices?h=index\&expand_wildcards=closed | grep -v "so-case" | grep -E "(logstash-|so-)" | sort -t- -k3 | head -1)
# Now that we've determined OLDEST_INDEX, ask Elasticsearch to delete it.
curl -K /opt/so/conf/elasticsearch/curl.config-XDELETE -k https://{{ELASTICSEARCH_HOST}}:{{ELASTICSEARCH_PORT}}/${OLDEST_INDEX}
# Finally, write a log entry that says we deleted it.
echo "$(date) - Used disk space exceeds LOG_SIZE_LIMIT ({{LOG_SIZE_LIMIT}} GB) - Index ${OLDEST_INDEX} deleted ..." >> ${LOG}
done

0
salt/curator/files/bin/so-curator-cluster-close Normal file → Executable file
View File

19
salt/curator/files/bin/so-curator-cluster-delete Normal file → Executable file
View File

@@ -4,7 +4,8 @@
# https://securityonion.net/license; you may not use this file except in compliance with the
# Elastic License 2.0.
APP=delete
# Avoid starting multiple instances
APP=clusterdelete
lf=/tmp/$APP-pidLockFile
# create empty lock file if none exists
cat /dev/null >> $lf
@@ -13,18 +14,4 @@ read lastPID < $lf
[ ! -z "$lastPID" -a -d /proc/$lastPID ] && exit
echo $$ > $lf
docker exec so-curator curator --config /etc/curator/config/curator.yml /etc/curator/action/so-zeek-delete.yml > /dev/null 2>&1;
docker exec so-curator curator --config /etc/curator/config/curator.yml /etc/curator/action/so-beats-delete.yml > /dev/null 2>&1;
docker exec so-curator curator --config /etc/curator/config/curator.yml /etc/curator/action/so-firewall-delete.yml > /dev/null 2>&1;
docker exec so-curator curator --config /etc/curator/config/curator.yml /etc/curator/action/so-ids-delete.yml > /dev/null 2>&1;
docker exec so-curator curator --config /etc/curator/config/curator.yml /etc/curator/action/so-import-delete.yml > /dev/null 2>&1;
docker exec so-curator curator --config /etc/curator/config/curator.yml /etc/curator/action/so-kratos-delete.yml > /dev/null 2>&1;
docker exec so-curator curator --config /etc/curator/config/curator.yml /etc/curator/action/so-osquery-delete.yml > /dev/null 2>&1;
docker exec so-curator curator --config /etc/curator/config/curator.yml /etc/curator/action/so-ossec-delete.yml > /dev/null 2>&1;
docker exec so-curator curator --config /etc/curator/config/curator.yml /etc/curator/action/so-strelka-delete.yml > /dev/null 2>&1;
docker exec so-curator curator --config /etc/curator/config/curator.yml /etc/curator/action/so-syslog-delete.yml > /dev/null 2>&1;
docker exec so-curator curator --config /etc/curator/config/curator.yml /etc/curator/action/logs-import-so-delete.yml > /dev/null 2>&1;
docker exec so-curator curator --config /etc/curator/config/curator.yml /etc/curator/action/logs-strelka-delete.yml > /dev/null 2>&1;
docker exec so-curator curator --config /etc/curator/config/curator.yml /etc/curator/action/logs-suricata-delete.yml > /dev/null 2>&1;
docker exec so-curator curator --config /etc/curator/config/curator.yml /etc/curator/action/logs-syslog-delete.yml > /dev/null 2>&1;
docker exec so-curator curator --config /etc/curator/config/curator.yml /etc/curator/action/logs-zeek-delete.yml > /dev/null 2>&1;
/usr/sbin/so-curator-cluster-delete-delete

View File

@@ -0,0 +1,85 @@
#!/bin/bash
# Copyright Security Onion Solutions LLC and/or licensed to Security Onion Solutions LLC under one
# or more contributor license agreements. Licensed under the Elastic License 2.0 as shown at
# https://securityonion.net/license; you may not use this file except in compliance with the
# Elastic License 2.0.
{% from 'vars/globals.map.jinja' import GLOBALS %}
{% import_yaml 'elasticsearch/defaults.yaml' as ELASTICDEFAULTS %}
{%- set ELASTICSEARCH_HOST = GLOBALS.node_ip -%}
{%- set RETENTION = salt['pillar.get']('elasticsearch:retention', ELASTICDEFAULTS.elasticsearch.retention, merge=true) -%}
LOG="/opt/so/log/curator/so-curator-cluster-delete.log"
LOG_SIZE_LIMIT=$(/usr/sbin/so-elasticsearch-cluster-space-total {{ RETENTION.retention_pct}})
overlimit() {
[[ $(/usr/sbin/so-elasticsearch-cluster-space-used) -gt "${LOG_SIZE_LIMIT}" ]]
}
closedindices() {
# If we can't query Elasticsearch, then immediately return false.
/usr/sbin/so-elasticsearch-query _cat/indices?h=index,status | grep close > /dev/null 2>&1
[ $? -eq 1 ] && return false
# First, get the list of closed indices using _cat/indices?h=index,status | grep close | awk '{print $1}'.
# Next, filter out any so-case indices.
# Finally, use grep's -q option to return true if there are any remaining logstash-, so-, or .ds-logs- indices.
/usr/sbin/so-elasticsearch-query _cat/indices?h=index,status | grep close | awk '{print $1}' | grep -v "so-case" | grep -q -E "(logstash-|so-|.ds-logs-)"
}
# Check for 2 conditions:
# 1. Are Elasticsearch indices using more disk space than LOG_SIZE_LIMIT?
# 2. Are there any closed indices that we can delete?
# If both conditions are true, keep on looping until one of the conditions is false.
while overlimit && closedindices; do
CLOSED_INDICES=$(/usr/sbin/so-elasticsearch-query _cat/indices?h=index,status | grep close | awk '{print $1}' | grep -v "so-case" | grep -E "(logstash-|so-|.ds-logs-)" | sort -t- -k3)
# We iterate through the closed indices
for CLOSED_INDEX in ${CLOSED_INDICES}; do
# Now that we've sorted the indices from oldest to newest, we need to check each index to see if it is assigned as the current write index for a data stream
# To do so, we need to identify to which data stream this index is associated
# We extract the data stream name using the pattern below
DATASTREAM_PATTERN="logs-[a-zA-Z_.]+-[a-zA-Z_.]+"
DATASTREAM=$(echo "${CLOSED_INDEX}" | grep -oE "$DATASTREAM_PATTERN")
# We look up the data stream, and determine the write index
CURRENT_WRITE_INDEX=$(/usr/sbin/so-elasticsearch-query _data_stream/$DATASTREAM | jq -r .data_streams[0].indices[-1].index_name)
# We make sure we are not trying to delete a write index
if [ "${CLOSED_INDEX}" != "${CURRENT_WRITE_INDEX}" ]; then
# This should not be a write index, so we should be allowed to delete it
/usr/sbin/so-elasticsearch-query ${CLOSED_INDEX} -XDELETE
# Finally, write a log entry that says we deleted it.
echo "$(date) - Used disk space exceeds LOG_SIZE_LIMIT (${LOG_SIZE_LIMIT} GB) - Index ${CLOSED_INDEX} deleted ..." >> ${LOG}
fi
if ! overlimit; then
exit
fi
done
done
while overlimit; do
# We need to determine the oldest open index.
# First, get the list of open indices using _cat/indices?h=index,status | grep open | awk '{print $1}'.
# Next, filter out any so-case indices and only select the remaining logstash-, so-, or .ds-logs- indices.
# Then, sort by date by telling sort to use hyphen as delimiter and sort on the third field.
OPEN_INDICES=$(/usr/sbin/so-elasticsearch-query _cat/indices?h=index,status | grep open | awk '{print $1}' | grep -v "so-case" | grep -E "(logstash-|so-|.ds-logs-)" | sort -t- -k3)
# We iterate through the open indices
for OPEN_INDEX in ${OPEN_INDICES}; do
# Now that we've sorted the indices from oldest to newest, we need to check each index to see if it is assigned as the current write index for a data stream
# To do so, we need to identify to which data stream this index is associated
# We extract the data stream name using the pattern below
DATASTREAM_PATTERN="logs-[a-zA-Z_.]+-[a-zA-Z_.]+"
DATASTREAM=$(echo "${OPEN_INDEX}" | grep -oE "$DATASTREAM_PATTERN")
# We look up the data stream, and determine the write index
CURRENT_WRITE_INDEX=$(/usr/sbin/so-elasticsearch-query _data_stream/$DATASTREAM | jq -r .data_streams[0].indices[-1].index_name)
# We make sure we are not trying to delete a write index
if [ "${OPEN_INDEX}" != "${CURRENT_WRITE_INDEX}" ]; then
# This should not be a write index, so we should be allowed to delete it
/usr/sbin/so-elasticsearch-query ${OPEN_INDEX} -XDELETE
# Finally, write a log entry that says we deleted it.
echo "$(date) - Used disk space exceeds LOG_SIZE_LIMIT (${LOG_SIZE_LIMIT} GB) - Index ${OPEN_INDEX} deleted ..." >> ${LOG}
fi
if ! overlimit; then
exit
fi
done
done

View File

@@ -27,6 +27,12 @@ curator:
- createhome: False
# Create the log directory
curlogdir:
file.directory:
- name: /opt/so/log/curator
- user: 934
- group: 939
curactiondir:
file.directory:
- name: /opt/so/conf/curator/action
@@ -34,12 +40,6 @@ curactiondir:
- group: 939
- makedirs: True
curlogdir:
file.directory:
- name: /opt/so/log/curator
- user: 934
- group: 939
actionconfs:
file.recurse:
- name: /opt/so/conf/curator/action
@@ -50,7 +50,6 @@ actionconfs:
- defaults:
CURATORMERGED: {{ CURATORMERGED }}
curconf:
file.managed:
- name: /opt/so/conf/curator/curator.yml
@@ -61,40 +60,6 @@ curconf:
- template: jinja
- show_changes: False
curcloseddel:
file.managed:
- name: /usr/sbin/so-curator-closed-delete
- source: salt://curator/files/bin/so-curator-closed-delete
- user: 934
- group: 939
- mode: 755
curcloseddeldel:
file.managed:
- name: /usr/sbin/so-curator-closed-delete-delete
- source: salt://curator/files/bin/so-curator-closed-delete-delete
- user: 934
- group: 939
- mode: 755
- template: jinja
curclose:
file.managed:
- name: /usr/sbin/so-curator-close
- source: salt://curator/files/bin/so-curator-close
- user: 934
- group: 939
- mode: 755
- template: jinja
curdel:
file.managed:
- name: /usr/sbin/so-curator-delete
- source: salt://curator/files/bin/so-curator-delete
- user: 934
- group: 939
- mode: 755
curclusterclose:
file.managed:
- name: /usr/sbin/so-curator-cluster-close
@@ -104,13 +69,21 @@ curclusterclose:
- mode: 755
- template: jinja
curclusterdelete:
curclusterdelete:
file.managed:
- name: /usr/sbin/so-curator-cluster-delete
- source: salt://curator/files/bin/so-curator-cluster-delete
- user: 934
- group: 939
- mode: 755
curclusterdeletedelete:
file.managed:
- name: /usr/sbin/so-curator-cluster-delete-delete
- source: salt://curator/files/bin/so-curator-cluster-delete-delete
- user: 934
- group: 939
- mode: 755
- template: jinja
so-curator:
@@ -163,12 +136,12 @@ so-curatorclusterclose:
- month: '*'
- dayweek: '*'
so-curatorclusterdelete:
so-curatorclusterdeletecron:
cron.present:
- name: /usr/sbin/so-curator-cluster-delete > /opt/so/log/curator/cron-delete.log 2>&1
- name: /usr/sbin/so-curator-cluster-delete > /opt/so/log/curator/cron-cluster-delete.log 2>&1
- user: root
- minute: '2'
- hour: '*/1'
- minute: '*/5'
- hour: '*'
- daymonth: '*'
- month: '*'
- dayweek: '*'

View File

@@ -1,4 +1,6 @@
elasticsearch:
retention:
retention_pct: 50
config:
node: {}
cluster:

View File

@@ -407,7 +407,6 @@ so-elasticsearch-roles-load:
- docker_container: so-elasticsearch
- file: es_sync_scripts
{% else %}
{{sls}}_state_not_allowed:

View File

@@ -1,7 +1,12 @@
elasticsearch:
esheap:
description: Specify the memory heap size in (m)egabytes for Elasticsearch.
helpLink: elasticsearch.html
helpLink: elasticsearch.html
retention:
retention_pct:
decription: Total percentage of space used by Elasticsearch for multi node clusters
helpLink: elasticsearch.yaml
global: True
config:
cluster:
name: