From 3cb3281cd5ae0ecc27b6ddfe23a8e1d3c4a72ef4 Mon Sep 17 00:00:00 2001 From: reyesj2 <94730068+reyesj2@users.noreply.github.com> Date: Tue, 29 Apr 2025 12:38:41 -0500 Subject: [PATCH 1/6] add metrics for es index sizes --- salt/telegraf/config.sls | 18 ++++++++++++ salt/telegraf/enabled.sls | 3 ++ salt/telegraf/etc/telegraf.conf | 24 ++++++++++++++++ salt/telegraf/scripts/esindexsize.sh | 41 ++++++++++++++++++++++++++++ 4 files changed, 86 insertions(+) create mode 100644 salt/telegraf/scripts/esindexsize.sh diff --git a/salt/telegraf/config.sls b/salt/telegraf/config.sls index a35be55f5..dea76cd82 100644 --- a/salt/telegraf/config.sls +++ b/salt/telegraf/config.sls @@ -45,6 +45,24 @@ tgraf_sync_script_{{script}}: GLOBALS: {{ GLOBALS }} {% endfor %} +{% if GLOBALS.role in ['so-standalone', 'so-manager', 'so-managersearch', 'so-heavynode', 'so-eval', 'so-import'] %} +tgraf_sync_script_esindexsize.sh: + file.managed: + - name: /opt/so/conf/telegraf/scripts/esindexsize.sh + - user: root + - group: 939 + - mode: 770 + - source: salt://telegraf/scripts/esindexsize.sh +{# Copy conf/elasticsearch/curl.config for telegraf to use with esindexsize.sh #} +tgraf_sync_escurl_conf: + file.managed: + - name: /opt/so/conf/telegraf/etc/escurl.config + - user: 939 + - group: 939 + - mode: 400 + - source: /opt/so/conf/elasticsearch/curl.config +{% endif %} + telegraf_sbin: file.recurse: - name: /usr/sbin diff --git a/salt/telegraf/enabled.sls b/salt/telegraf/enabled.sls index 8c71ecac3..ffb45ceb9 100644 --- a/salt/telegraf/enabled.sls +++ b/salt/telegraf/enabled.sls @@ -56,6 +56,9 @@ so-telegraf: - /opt/so/log/sostatus:/var/log/sostatus:ro - /opt/so/log/salt:/var/log/salt:ro - /opt/so/log/agents:/var/log/agents:ro + {% if GLOBALS.role in ['so-standalone', 'so-manager', 'so-managersearch', 'so-heavynode', 'so-eval', 'so-import'] %} + - /opt/so/conf/telegraf/etc/escurl.config:/etc/telegraf/elasticsearch.config:ro + {% endif %} {% if DOCKER.containers['so-telegraf'].custom_bind_mounts %} {% for BIND in DOCKER.containers['so-telegraf'].custom_bind_mounts %} - {{ BIND }} diff --git a/salt/telegraf/etc/telegraf.conf b/salt/telegraf/etc/telegraf.conf index 4c2318c02..a4173a014 100644 --- a/salt/telegraf/etc/telegraf.conf +++ b/salt/telegraf/etc/telegraf.conf @@ -199,6 +199,20 @@ username = "{{ ES_USER }}" password = "{{ ES_PASS }}" insecure_skip_verify = true + # Every hour collect current size of all indices +[[ inputs.elasticsearch ]] + servers = ["https://{{ NODEIP }}:9200"] + username = "{{ ES_USER }}" + password = "{{ ES_PASS }}" + insecure_skip_verify = true + + indices_level = "indices" + indices_include = ["_all"] + # Drop everything except specific field + fieldinclude = ["store_size_in_bytes"] + + interval = "1m" + {%- elif grains['role'] in ['so-searchnode'] %} [[inputs.elasticsearch]] servers = ["https://{{ NODEIP }}:9200"] @@ -323,3 +337,13 @@ # # Read metrics about network interface usage [[inputs.net]] + +# Scripts run every 30s||TELEGRAFMERGED.config.interval - ES index script doesn't need to run as frequently +{%- if grains.role in ['so-standalone', 'so-manager', 'so-managersearch', 'so-heavynode', 'so-eval', 'so-import'] %} +[[ inputs.exec ]] + commands = [ + "/scripts/esindexsize.sh" + ] + data_format = "influx" + interval = "1h" +{%- endif %} diff --git a/salt/telegraf/scripts/esindexsize.sh b/salt/telegraf/scripts/esindexsize.sh new file mode 100644 index 000000000..2ba46149e --- /dev/null +++ b/salt/telegraf/scripts/esindexsize.sh @@ -0,0 +1,41 @@ +#!/bin/bash +# +# Copyright Security Onion Solutions LLC and/or licensed to Security Onion Solutions LLC under one +# or more contributor license agreements. Licensed under the Elastic License 2.0 as shown at +# https://securityonion.net/license; you may not use this file except in compliance with the +# Elastic License 2.0. + +DATASTREAM_INFO=$(curl -K /etc/telegraf/elasticsearch.config -s -k -L "https://localhost:9200/_data_stream?format=json") +INDICES=$(curl -K /etc/telegraf/elasticsearch.config -s -k -L "https://localhost:9200/_cat/indices?h=index,store.size&bytes=b&s=index:asc&format=json") +INDICES_WITH_SIZE=() + +while IFS= read -r DS; do + datastream_indices=() + datastream=$(echo "$DS" | jq -r '.name') + # influx doesn't like key starting with '.' + if [[ $datastream != .* ]]; then + while IFS= read -r DS_IDX; do + datastream_indices+=("$DS_IDX") + done < <(echo "$DS" | jq -r '.indices[].index_name') + datastream_size=0 + + for idx in ${datastream_indices[@]}; do + current_index=$(echo "$INDICES" | jq -r --arg index "$idx" '.[] | select(.index == $index)["store.size"]') + datastream_size=$(($datastream_size + $current_index)) + done + INDICES_WITH_SIZE+=("${datastream}=${datastream_size}i") + # echo "$datastream size is $(echo "$datastream_size" | numfmt --to iec)" + fi +done < <(echo "$DATASTREAM_INFO" | jq -c '.data_streams[]') + +measurement="elasticsearch_index_size " +total=${#INDICES_WITH_SIZE[@]} +for idxws in "${!INDICES_WITH_SIZE[@]}"; do + if [[ $idxws -lt $(($total - 1)) ]]; then + measurement+="${INDICES_WITH_SIZE[$idxws]}," + else + measurement+="${INDICES_WITH_SIZE[$idxws]}" + fi +done + +echo "$measurement" \ No newline at end of file From 85f5f75c84494cfdc5cd2cb9c5f775ef5dea6044 Mon Sep 17 00:00:00 2001 From: reyesj2 <94730068+reyesj2@users.noreply.github.com> Date: Tue, 29 Apr 2025 12:42:05 -0500 Subject: [PATCH 2/6] use salt location for es curl.config --- salt/telegraf/config.sls | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/salt/telegraf/config.sls b/salt/telegraf/config.sls index dea76cd82..05aa6a422 100644 --- a/salt/telegraf/config.sls +++ b/salt/telegraf/config.sls @@ -60,7 +60,7 @@ tgraf_sync_escurl_conf: - user: 939 - group: 939 - mode: 400 - - source: /opt/so/conf/elasticsearch/curl.config + - source: salt://elasticsearch/curl.config {% endif %} telegraf_sbin: From 1ddc653a52a9562000dc7116d0516a9f6bd7e4de Mon Sep 17 00:00:00 2001 From: reyesj2 <94730068+reyesj2@users.noreply.github.com> Date: Tue, 29 Apr 2025 13:40:39 -0500 Subject: [PATCH 3/6] fix input error in agentstatus script --- salt/telegraf/scripts/agentstatus.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/salt/telegraf/scripts/agentstatus.sh b/salt/telegraf/scripts/agentstatus.sh index a390552fc..718f0e5ce 100644 --- a/salt/telegraf/scripts/agentstatus.sh +++ b/salt/telegraf/scripts/agentstatus.sh @@ -24,7 +24,7 @@ if [[ ! "`pidof -x $(basename $0) -o %PPID`" ]]; then EVENTS=$(cat $LOGFILE | grep -wF events | awk '{print $2}' | tr -d ',') TOTAL=$(cat $LOGFILE | grep -wF total | awk '{print $2}' | tr -d ',') ALL=$(cat $LOGFILE | grep -wF all | awk '{print $2}' | tr -d ',') - ACTIVE=$(cat $LOGFILE | grep -wF active | awk '{print $2}') + ACTIVE=$(cat $LOGFILE | grep -wF active | awk '{print $2}' | tr -d ',') echo "agentstatus online=$ONLINE,error=$ERROR,inactive=$INACTIVE,offline=$OFFLINE,updating=$UPDATING,unenrolled=$UNENROLLED,other=$OTHER,events=$EVENTS,total=$TOTAL,all=$ALL,active=$ACTIVE" fi From b918a5e25652da2d0a4f7a5274f1ab16acdae72b Mon Sep 17 00:00:00 2001 From: reyesj2 <94730068+reyesj2@users.noreply.github.com> Date: Tue, 29 Apr 2025 16:05:55 -0500 Subject: [PATCH 4/6] old attempt --- salt/telegraf/etc/telegraf.conf | 14 -------------- 1 file changed, 14 deletions(-) diff --git a/salt/telegraf/etc/telegraf.conf b/salt/telegraf/etc/telegraf.conf index a4173a014..d663eb4e3 100644 --- a/salt/telegraf/etc/telegraf.conf +++ b/salt/telegraf/etc/telegraf.conf @@ -199,20 +199,6 @@ username = "{{ ES_USER }}" password = "{{ ES_PASS }}" insecure_skip_verify = true - # Every hour collect current size of all indices -[[ inputs.elasticsearch ]] - servers = ["https://{{ NODEIP }}:9200"] - username = "{{ ES_USER }}" - password = "{{ ES_PASS }}" - insecure_skip_verify = true - - indices_level = "indices" - indices_include = ["_all"] - # Drop everything except specific field - fieldinclude = ["store_size_in_bytes"] - - interval = "1m" - {%- elif grains['role'] in ['so-searchnode'] %} [[inputs.elasticsearch]] servers = ["https://{{ NODEIP }}:9200"] From 044d2301587b318c378b4999c837341fada777be Mon Sep 17 00:00:00 2001 From: reyesj2 <94730068+reyesj2@users.noreply.github.com> Date: Wed, 30 Apr 2025 13:05:36 -0500 Subject: [PATCH 5/6] get 200 from es before collecting metrics --- salt/telegraf/scripts/esindexsize.sh | 64 +++++++++++++++------------- 1 file changed, 34 insertions(+), 30 deletions(-) diff --git a/salt/telegraf/scripts/esindexsize.sh b/salt/telegraf/scripts/esindexsize.sh index 2ba46149e..dbb50f83e 100644 --- a/salt/telegraf/scripts/esindexsize.sh +++ b/salt/telegraf/scripts/esindexsize.sh @@ -5,37 +5,41 @@ # https://securityonion.net/license; you may not use this file except in compliance with the # Elastic License 2.0. -DATASTREAM_INFO=$(curl -K /etc/telegraf/elasticsearch.config -s -k -L "https://localhost:9200/_data_stream?format=json") -INDICES=$(curl -K /etc/telegraf/elasticsearch.config -s -k -L "https://localhost:9200/_cat/indices?h=index,store.size&bytes=b&s=index:asc&format=json") -INDICES_WITH_SIZE=() +if curl -K /etc/telegraf/elasticsearch.config -s -k -L "https://localhost:9200/" -w "%{http_code}" -o /dev/null | grep -q '200'; then -while IFS= read -r DS; do - datastream_indices=() - datastream=$(echo "$DS" | jq -r '.name') - # influx doesn't like key starting with '.' - if [[ $datastream != .* ]]; then - while IFS= read -r DS_IDX; do - datastream_indices+=("$DS_IDX") - done < <(echo "$DS" | jq -r '.indices[].index_name') - datastream_size=0 + DATASTREAM_INFO=$(curl -K /etc/telegraf/elasticsearch.config -s -k -L "https://localhost:9200/_data_stream?format=json") + INDICES=$(curl -K /etc/telegraf/elasticsearch.config -s -k -L "https://localhost:9200/_cat/indices?h=index,store.size&bytes=b&s=index:asc&format=json") + INDICES_WITH_SIZE=() - for idx in ${datastream_indices[@]}; do - current_index=$(echo "$INDICES" | jq -r --arg index "$idx" '.[] | select(.index == $index)["store.size"]') - datastream_size=$(($datastream_size + $current_index)) - done - INDICES_WITH_SIZE+=("${datastream}=${datastream_size}i") - # echo "$datastream size is $(echo "$datastream_size" | numfmt --to iec)" - fi -done < <(echo "$DATASTREAM_INFO" | jq -c '.data_streams[]') + while IFS= read -r DS; do + datastream_indices=() + datastream=$(echo "$DS" | jq -r '.name') + # influx doesn't like key starting with '.' + if [[ $datastream != .* ]]; then + while IFS= read -r DS_IDX; do + datastream_indices+=("$DS_IDX") + done < <(echo "$DS" | jq -r '.indices[].index_name') + datastream_size=0 -measurement="elasticsearch_index_size " -total=${#INDICES_WITH_SIZE[@]} -for idxws in "${!INDICES_WITH_SIZE[@]}"; do - if [[ $idxws -lt $(($total - 1)) ]]; then - measurement+="${INDICES_WITH_SIZE[$idxws]}," - else - measurement+="${INDICES_WITH_SIZE[$idxws]}" - fi -done + for idx in ${datastream_indices[@]}; do + current_index=$(echo "$INDICES" | jq -r --arg index "$idx" '.[] | select(.index == $index)["store.size"]') + datastream_size=$(($datastream_size + $current_index)) + done + INDICES_WITH_SIZE+=("${datastream}=${datastream_size}i") + # echo "$datastream size is $(echo "$datastream_size" | numfmt --to iec)" + fi + done < <(echo "$DATASTREAM_INFO" | jq -c '.data_streams[]') -echo "$measurement" \ No newline at end of file + measurement="elasticsearch_index_size " + total=${#INDICES_WITH_SIZE[@]} + for idxws in "${!INDICES_WITH_SIZE[@]}"; do + if [[ $idxws -lt $(($total - 1)) ]]; then + measurement+="${INDICES_WITH_SIZE[$idxws]}," + else + measurement+="${INDICES_WITH_SIZE[$idxws]}" + fi + done + + echo "$measurement" + +fi \ No newline at end of file From fd029508645e88625f5be7e71f2f299210a7a6ff Mon Sep 17 00:00:00 2001 From: reyesj2 <94730068+reyesj2@users.noreply.github.com> Date: Fri, 2 May 2025 13:36:28 -0500 Subject: [PATCH 6/6] use globals.is_manager --- salt/telegraf/enabled.sls | 2 +- salt/telegraf/etc/telegraf.conf | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/salt/telegraf/enabled.sls b/salt/telegraf/enabled.sls index ffb45ceb9..451c78dda 100644 --- a/salt/telegraf/enabled.sls +++ b/salt/telegraf/enabled.sls @@ -56,7 +56,7 @@ so-telegraf: - /opt/so/log/sostatus:/var/log/sostatus:ro - /opt/so/log/salt:/var/log/salt:ro - /opt/so/log/agents:/var/log/agents:ro - {% if GLOBALS.role in ['so-standalone', 'so-manager', 'so-managersearch', 'so-heavynode', 'so-eval', 'so-import'] %} + {% if GLOBALS.is_manager or GLOBALS.role == 'so-heavynode' %} - /opt/so/conf/telegraf/etc/escurl.config:/etc/telegraf/elasticsearch.config:ro {% endif %} {% if DOCKER.containers['so-telegraf'].custom_bind_mounts %} diff --git a/salt/telegraf/etc/telegraf.conf b/salt/telegraf/etc/telegraf.conf index d663eb4e3..f5d331209 100644 --- a/salt/telegraf/etc/telegraf.conf +++ b/salt/telegraf/etc/telegraf.conf @@ -192,7 +192,7 @@ # # # Read stats from one or more Elasticsearch servers or clusters -{%- if grains['role'] in ['so-manager', 'so-eval', 'so-managersearch', 'so-standalone', 'so-heavynode', 'so-import'] %} +{%- if GLOBALS.is_manager or GLOBALS.role == 'so-heavynode' %} [[inputs.elasticsearch]] servers = ["https://{{ NODEIP }}:9200"] cluster_stats = true @@ -325,7 +325,7 @@ [[inputs.net]] # Scripts run every 30s||TELEGRAFMERGED.config.interval - ES index script doesn't need to run as frequently -{%- if grains.role in ['so-standalone', 'so-manager', 'so-managersearch', 'so-heavynode', 'so-eval', 'so-import'] %} +{%- if GLOBALS.is_manager or GLOBALS.role == 'so-heavynode' %} [[ inputs.exec ]] commands = [ "/scripts/esindexsize.sh"