From b0a515f2c33f715e3ca233ad2ac941b8d8fcd1fa Mon Sep 17 00:00:00 2001 From: Josh Patterson Date: Wed, 16 Jul 2025 12:09:01 -0400 Subject: [PATCH 001/124] update base cloud image location --- salt/_runners/setup_hypervisor.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/salt/_runners/setup_hypervisor.py b/salt/_runners/setup_hypervisor.py index 6ddd571c9..9d7116d59 100644 --- a/salt/_runners/setup_hypervisor.py +++ b/salt/_runners/setup_hypervisor.py @@ -165,7 +165,7 @@ def _validate_image_checksum(path, expected_sha256): return True # Constants -IMAGE_URL = "https://yum.oracle.com/templates/OracleLinux/OL9/u5/x86_64/OL9U5_x86_64-kvm-b253.qcow2" +IMAGE_URL = "https://download.securityonion.net/file/securityonion/OL9U5_x86_64-kvm-b253.qcow2" IMAGE_SHA256 = "3b00bbbefc8e78dd28d9f538834fb9e2a03d5ccdc2cadf2ffd0036c0a8f02021" IMAGE_PATH = "/nsm/libvirt/boot/OL9U5_x86_64-kvm-b253.qcow2" MANAGER_HOSTNAME = socket.gethostname() From 58ffe576d7230d237e6afb5a056e69d306e39679 Mon Sep 17 00:00:00 2001 From: Josh Patterson Date: Wed, 16 Jul 2025 12:09:39 -0400 Subject: [PATCH 002/124] add pci mappings for sos hw --- salt/hypervisor/defaults.yaml | 122 +++++++++++++++++++++++++--------- 1 file changed, 92 insertions(+), 30 deletions(-) diff --git a/salt/hypervisor/defaults.yaml b/salt/hypervisor/defaults.yaml index 06509828c..8cf754193 100644 --- a/salt/hypervisor/defaults.yaml +++ b/salt/hypervisor/defaults.yaml @@ -17,42 +17,104 @@ hypervisor: 6: pci_0000_02_00_1 7: pci_0000_41_00_0 8: pci_0000_41_00_1 - model1: + SOSSNNV: hardware: cpu: 128 - memory: 128 + memory: 256 disk: - 1: pci_0000_c7_00_0 - 2: pci_0000_c8_00_0 + 1: pci_0000_42_00_0 + 2: pci_0000_43_00_0 + 3: pci_0000_44_00_0 + 4: pci_0000_45_00_0 copper: - 1: pci_0000_c4_00_0 - 2: pci_0000_c4_00_1 - 3: pci_0000_c4_00_2 - 4: pci_0000_c4_00_3 + sfp: + 1: pci_0000_02_00_0 + 2: pci_0000_02_00_1 + 3: pci_0000_41_00_0 + 4: pci_0000_41_00_1 + SOSSNNV-DE02: + cpu: 128 + memory: 384 + disk: + 1: pci_0000_41_00_0 + 2: pci_0000_42_00_0 + 3: pci_0000_81_00_0 + 4: pci_0000_82_00_0 + 5: pci_0000_83_00_0 + 6: pci_0000_84_00_0 + copper: + 1: pci_0000_85_00_0 + 2: pci_0000_85_00_1 + 3: pci_0000_85_00_2 + 4: pci_0000_85_00_3 + sfp: + 5: pci_0000_c4_00_0 + 6: pci_0000_c4_00_1 + 7: pci_0000_c5_00_0 + 8: pci_0000_c5_00_1 + 9: pci_0000_c5_00_2 + 10: pci_0000_c5_00_3 + SOSSN7200: + cpu: 128 + memory: 256 + copper: + 1: pci_0000_03_00_0 + 2: pci_0000_03_00_1 + 3: pci_0000_03_00_2 + 4: pci_0000_03_00_3 sfp: 5: pci_0000_02_00_0 6: pci_0000_02_00_1 - 7: pci_0000_41_00_0 - 8: pci_0000_41_00_1 - model2: - cpu: 256 - memory: 256 - disk: - 1: pci_0000_c7_00_0 - 2: pci_0000_c8_00_0 - 3: pci_0000_c9_00_0 - 4: pci_0000_c10_00_0 + 7: pci_0000_81_00_0 + 8: pci_0000_81_00_1 + 9: pci_0000_81_00_2 + 10: pci_0000_81_00_3 + SOSSN7200-DE02: + cpu: 128 + memory: 384 copper: - 1: pci_0000_c4_00_0 - 2: pci_0000_c4_00_1 - 3: pci_0000_c4_00_2 - 4: pci_0000_c4_00_3 - 5: pci_0000_c5_00_0 - 6: pci_0000_c5_00_1 - 7: pci_0000_c5_00_2 - 8: pci_0000_c5_00_3 + 1: pci_0000_82_00_0 + 2: pci_0000_82_00_1 + 3: pci_0000_82_00_2 + 4: pci_0000_82_00_3 sfp: - 9: pci_0000_02_00_0 - 10: pci_0000_02_00_1 - 11: pci_0000_41_00_0 - 12: pci_0000_41_00_1 \ No newline at end of file + 5: pci_0000_c4_00_0 + 6: pci_0000_c4_00_1 + 7: pci_0000_c5_00_0 + 8: pci_0000_c5_00_1 + 9: pci_0000_c6_00_0 + 10: pci_0000_c6_00_1 + 11: pci_0000_c6_00_2 + 12: pci_0000_c6_00_3 + SOS4000: + cpu: 128 + memory: 256 + copper: + 1: pci_0000_03_00_0 + 2: pci_0000_03_00_1 + 3: pci_0000_03_00_2 + 4: pci_0000_03_00_3 + sfp: + 5: pci_0000_02_00_0 + 6: pci_0000_02_00_1 + 7: pci_0000_81_00_0 + 8: pci_0000_81_00_1 + 9: pci_0000_81_00_2 + 10: pci_0000_81_00_3 + SOS5000-DE02: + cpu: 128 + memory: 384 + copper: + 1: pci_0000_82_00_0 + 2: pci_0000_82_00_1 + 3: pci_0000_82_00_2 + 4: pci_0000_82_00_3 + sfp: + 5: pci_0000_c4_00_0 + 6: pci_0000_c4_00_1 + 7: pci_0000_c5_00_0 + 8: pci_0000_c5_00_1 + 9: pci_0000_c6_00_0 + 10: pci_0000_c6_00_1 + 11: pci_0000_c6_00_2 + 12: pci_0000_c6_00_3 From f588a80ec71d0b38f74858360714668fd7c29a6b Mon Sep 17 00:00:00 2001 From: reyesj2 <94730068+reyesj2@users.noreply.github.com> Date: Tue, 16 Sep 2025 10:37:26 -0500 Subject: [PATCH 003/124] fix jq error when indices don't exist (seen on fresh installs when fleet hasn't ever been installed) --- .../tools/sbin_jinja/so-elastic-fleet-setup | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/salt/elasticfleet/tools/sbin_jinja/so-elastic-fleet-setup b/salt/elasticfleet/tools/sbin_jinja/so-elastic-fleet-setup index deb16dadf..1ceec9c95 100755 --- a/salt/elasticfleet/tools/sbin_jinja/so-elastic-fleet-setup +++ b/salt/elasticfleet/tools/sbin_jinja/so-elastic-fleet-setup @@ -23,18 +23,17 @@ if [[ "$RETURN_CODE" != "0" ]]; then exit 1 fi -ALIASES=".fleet-servers .fleet-policies-leader .fleet-policies .fleet-agents .fleet-artifacts .fleet-enrollment-api-keys .kibana_ingest" -for ALIAS in ${ALIASES} -do +ALIASES=(.fleet-servers .fleet-policies-leader .fleet-policies .fleet-agents .fleet-artifacts .fleet-enrollment-api-keys .kibana_ingest) +for ALIAS in "${ALIASES[@]}"; do # Get all concrete indices from alias - INDXS=$(curl -K /opt/so/conf/kibana/curl.config -s -k -L -H "Content-Type: application/json" "https://localhost:9200/_resolve/index/${ALIAS}" | jq -r '.aliases[].indices[]') - - # Delete all resolved indices - for INDX in ${INDXS} - do + if INDXS_RAW=$(curl -sK /opt/so/conf/kibana/curl.config -s -k -L -H "Content-Type: application/json" "https://localhost:9200/_resolve/index/${ALIAS}" --fail 2>/dev/null); then + INDXS=$(echo "$INDXS_RAW" | jq -r '.aliases[].indices[]') + # Delete all resolved indices + for INDX in ${INDXS}; do status "Deleting $INDX" curl -K /opt/so/conf/kibana/curl.config -s -k -L -H "Content-Type: application/json" "https://localhost:9200/${INDX}" -XDELETE - done + done + fi done # Restarting Kibana... From fdb5ad810a717c04f926176bb5a64cc3ccf5d591 Mon Sep 17 00:00:00 2001 From: reyesj2 <94730068+reyesj2@users.noreply.github.com> Date: Tue, 16 Sep 2025 20:10:48 -0500 Subject: [PATCH 004/124] add err check and retries around func elastic_fleet_policy_create --- .../tools/sbin/so-elastic-fleet-common | 18 ++++++++++-------- .../tools/sbin_jinja/so-elastic-fleet-setup | 15 ++++++++++++--- 2 files changed, 22 insertions(+), 11 deletions(-) diff --git a/salt/elasticfleet/tools/sbin/so-elastic-fleet-common b/salt/elasticfleet/tools/sbin/so-elastic-fleet-common index 9780c8b12..8f42a9f9a 100644 --- a/salt/elasticfleet/tools/sbin/so-elastic-fleet-common +++ b/salt/elasticfleet/tools/sbin/so-elastic-fleet-common @@ -190,17 +190,19 @@ elastic_fleet_policy_create() { NAME=$1 DESC=$2 FLEETSERVER=$3 - TIMEOUT=$4 + TIMEOUT=$4 JSON_STRING=$( jq -n \ - --arg NAME "$NAME" \ - --arg DESC "$DESC" \ - --arg TIMEOUT $TIMEOUT \ - --arg FLEETSERVER "$FLEETSERVER" \ - '{"name": $NAME,"id":$NAME,"description":$DESC,"namespace":"default","monitoring_enabled":["logs"],"inactivity_timeout":$TIMEOUT,"has_fleet_server":$FLEETSERVER}' - ) + --arg NAME "$NAME" \ + --arg DESC "$DESC" \ + --arg TIMEOUT $TIMEOUT \ + --arg FLEETSERVER "$FLEETSERVER" \ + '{"name": $NAME,"id":$NAME,"description":$DESC,"namespace":"default","monitoring_enabled":["logs"],"inactivity_timeout":$TIMEOUT,"has_fleet_server":$FLEETSERVER}' + ) # Create Fleet Policy - curl -K /opt/so/conf/elasticsearch/curl.config -L -X POST "localhost:5601/api/fleet/agent_policies" -H 'kbn-xsrf: true' -H 'Content-Type: application/json' -d "$JSON_STRING" + if ! curl -sK /opt/so/conf/elasticsearch/curl.config -L -X POST "localhost:5601/api/fleet/agent_policies" -H 'kbn-xsrf: true' -H 'Content-Type: application/json' -d "$JSON_STRING" --retry 3 --fail 2>/dev/null; then + return 1 + fi } diff --git a/salt/elasticfleet/tools/sbin_jinja/so-elastic-fleet-setup b/salt/elasticfleet/tools/sbin_jinja/so-elastic-fleet-setup index 1ceec9c95..4e84987f6 100755 --- a/salt/elasticfleet/tools/sbin_jinja/so-elastic-fleet-setup +++ b/salt/elasticfleet/tools/sbin_jinja/so-elastic-fleet-setup @@ -116,13 +116,22 @@ printf "\n\n" /usr/sbin/so-elasticsearch-templates-load # Initial Endpoints Policy -elastic_fleet_policy_create "endpoints-initial" "Initial Endpoint Policy" "false" "1209600" +if ! elastic_fleet_policy_create "endpoints-initial" "Initial Endpoint Policy" "false" "1209600"; then + echo -e "Failed to create endpoints-initial policy..." + exit 1 +fi # Grid Nodes - General Policy -elastic_fleet_policy_create "so-grid-nodes_general" "SO Grid Nodes - General Purpose" "false" "1209600" +if ! elastic_fleet_policy_create "so-grid-nodes_general" "SO Grid Nodes - General Purpose" "false" "1209600"; then + echo -e "Failed to create so-grid-nodes_general policy..." + exit 1 +fi # Grid Nodes - Heavy Node Policy -elastic_fleet_policy_create "so-grid-nodes_heavy" "SO Grid Nodes - Heavy Node" "false" "1209600" +if ! elastic_fleet_policy_create "so-grid-nodes_heavy" "SO Grid Nodes - Heavy Node" "false" "1209600"; then + echo -e "Failed to create so-grid-nodes_heavy policy..." + exit 1 +fi # Load Integrations for default policies so-elastic-fleet-integration-policy-load From bdeb92ab05817b86425aaf2257292dc81704daa9 Mon Sep 17 00:00:00 2001 From: reyesj2 <94730068+reyesj2@users.noreply.github.com> Date: Tue, 16 Sep 2025 20:30:45 -0500 Subject: [PATCH 005/124] add err check and retries for elastic_fleet_integration_create --- .../tools/sbin/so-elastic-fleet-common | 4 +++- ...ic-fleet-integration-policy-elastic-defend | 5 ++++- .../so-elastic-fleet-integration-policy-load | 20 +++++++++++++++---- .../tools/sbin_jinja/so-elastic-fleet-setup | 5 ++++- 4 files changed, 27 insertions(+), 7 deletions(-) diff --git a/salt/elasticfleet/tools/sbin/so-elastic-fleet-common b/salt/elasticfleet/tools/sbin/so-elastic-fleet-common index 8f42a9f9a..66f1dcf7a 100644 --- a/salt/elasticfleet/tools/sbin/so-elastic-fleet-common +++ b/salt/elasticfleet/tools/sbin/so-elastic-fleet-common @@ -39,7 +39,9 @@ elastic_fleet_integration_create() { JSON_STRING=$1 - curl -K /opt/so/conf/elasticsearch/curl.config -L -X POST "localhost:5601/api/fleet/package_policies" -H 'kbn-xsrf: true' -H 'Content-Type: application/json' -d "$JSON_STRING" + if ! curl -K /opt/so/conf/elasticsearch/curl.config -L -X POST "localhost:5601/api/fleet/package_policies" -H 'kbn-xsrf: true' -H 'Content-Type: application/json' -d "$JSON_STRING" --retry 3 --fail 2>/dev/null; then + return 1 + fi } diff --git a/salt/elasticfleet/tools/sbin/so-elastic-fleet-integration-policy-elastic-defend b/salt/elasticfleet/tools/sbin/so-elastic-fleet-integration-policy-elastic-defend index 636942490..312c84be6 100755 --- a/salt/elasticfleet/tools/sbin/so-elastic-fleet-integration-policy-elastic-defend +++ b/salt/elasticfleet/tools/sbin/so-elastic-fleet-integration-policy-elastic-defend @@ -18,6 +18,9 @@ do elastic_fleet_integration_policy_upgrade "$INTEGRATION_ID" else printf "\n\nIntegration does not exist - Creating integration\n" - elastic_fleet_integration_create "@$INTEGRATION" + if ! elastic_fleet_integration_create "@$INTEGRATION"; then + echo -e "\nFailed to create integration for ${INTEGRATION##*/}" + exit 1 + fi fi done diff --git a/salt/elasticfleet/tools/sbin/so-elastic-fleet-integration-policy-load b/salt/elasticfleet/tools/sbin/so-elastic-fleet-integration-policy-load index 26414a94b..b26b79695 100644 --- a/salt/elasticfleet/tools/sbin/so-elastic-fleet-integration-policy-load +++ b/salt/elasticfleet/tools/sbin/so-elastic-fleet-integration-policy-load @@ -28,7 +28,10 @@ if [ ! -f /opt/so/state/eaintegrations.txt ]; then elastic_fleet_integration_update "$INTEGRATION_ID" "@$INTEGRATION" else printf "\n\nIntegration does not exist - Creating integration\n" - elastic_fleet_integration_create "@$INTEGRATION" + if ! elastic_fleet_integration_create "@$INTEGRATION"; then + echo -e "\nFailed to create integration for ${INTEGRATION##*/}" + exit 1 + fi fi done @@ -42,7 +45,10 @@ if [ ! -f /opt/so/state/eaintegrations.txt ]; then elastic_fleet_integration_update "$INTEGRATION_ID" "@$INTEGRATION" else printf "\n\nIntegration does not exist - Creating integration\n" - elastic_fleet_integration_create "@$INTEGRATION" + if ! elastic_fleet_integration_create "@$INTEGRATION"; then + echo -e "\nFailed to create integration for ${INTEGRATION##*/}" + exit 1 + fi fi done if [[ "$RETURN_CODE" != "1" ]]; then @@ -60,7 +66,10 @@ if [ ! -f /opt/so/state/eaintegrations.txt ]; then else printf "\n\nIntegration does not exist - Creating integration\n" if [ "$NAME" != "elasticsearch-logs" ]; then - elastic_fleet_integration_create "@$INTEGRATION" + if ! elastic_fleet_integration_create "@$INTEGRATION"; then + echo -e "\nFailed to create integration for ${INTEGRATION##*/}" + exit 1 + fi fi fi done @@ -81,7 +90,10 @@ if [ ! -f /opt/so/state/eaintegrations.txt ]; then else printf "\n\nIntegration does not exist - Creating integration\n" if [ "$NAME" != "elasticsearch-logs" ]; then - elastic_fleet_integration_create "@$INTEGRATION" + if ! elastic_fleet_integration_create "@$INTEGRATION"; then + echo -e "\nFailed to create integration for ${INTEGRATION##*/}" + exit 1 + fi fi fi fi diff --git a/salt/elasticfleet/tools/sbin_jinja/so-elastic-fleet-setup b/salt/elasticfleet/tools/sbin_jinja/so-elastic-fleet-setup index 4e84987f6..0510b6bfe 100755 --- a/salt/elasticfleet/tools/sbin_jinja/so-elastic-fleet-setup +++ b/salt/elasticfleet/tools/sbin_jinja/so-elastic-fleet-setup @@ -73,7 +73,10 @@ UPDATED_INTEGRATION_POLICY=$(jq --arg policy_id "FleetServer_{{ GLOBALS.hostname .name = $name' /opt/so/conf/elastic-fleet/integrations/fleet-server/fleet-server.json) # Add the Fleet Server Integration to the new Fleet Policy -elastic_fleet_integration_create "$UPDATED_INTEGRATION_POLICY" +if ! elastic_fleet_integration_create "$UPDATED_INTEGRATION_POLICY"; then + echo -e "\nFailed to create Fleet server integration for Manager.." + exit 1 +fi # Now we can create the Logstash Output and set it to to be the default Output printf "\n\nCreate Logstash Output Config if node is not an Import or Eval install\n" From 948d72c282abe86c29999cb711ec6979383bc424 Mon Sep 17 00:00:00 2001 From: reyesj2 <94730068+reyesj2@users.noreply.github.com> Date: Tue, 16 Sep 2025 21:07:02 -0500 Subject: [PATCH 006/124] add error check and retry to elastic_fleet_integration_update --- .../tools/sbin/so-elastic-fleet-common | 6 +++-- ...et-integration-policy-elastic-fleet-server | 6 ++++- .../so-elastic-fleet-integration-policy-load | 22 ++++++++++++++----- 3 files changed, 26 insertions(+), 8 deletions(-) diff --git a/salt/elasticfleet/tools/sbin/so-elastic-fleet-common b/salt/elasticfleet/tools/sbin/so-elastic-fleet-common index 66f1dcf7a..42e563562 100644 --- a/salt/elasticfleet/tools/sbin/so-elastic-fleet-common +++ b/salt/elasticfleet/tools/sbin/so-elastic-fleet-common @@ -39,7 +39,7 @@ elastic_fleet_integration_create() { JSON_STRING=$1 - if ! curl -K /opt/so/conf/elasticsearch/curl.config -L -X POST "localhost:5601/api/fleet/package_policies" -H 'kbn-xsrf: true' -H 'Content-Type: application/json' -d "$JSON_STRING" --retry 3 --fail 2>/dev/null; then + if ! curl -sK /opt/so/conf/elasticsearch/curl.config -L -X POST "localhost:5601/api/fleet/package_policies" -H 'kbn-xsrf: true' -H 'Content-Type: application/json' -d "$JSON_STRING" --retry 3 --fail 2>/dev/null; then return 1 fi } @@ -67,7 +67,9 @@ elastic_fleet_integration_update() { JSON_STRING=$2 - curl -K /opt/so/conf/elasticsearch/curl.config -L -X PUT "localhost:5601/api/fleet/package_policies/$UPDATE_ID" -H 'kbn-xsrf: true' -H 'Content-Type: application/json' -d "$JSON_STRING" + if ! curl -sK /opt/so/conf/elasticsearch/curl.config -L -X PUT "localhost:5601/api/fleet/package_policies/$UPDATE_ID" -H 'kbn-xsrf: true' -H 'Content-Type: application/json' -d "$JSON_STRING" --retry 3 --fail 2>/dev/null; then + return 1 + fi } elastic_fleet_integration_policy_upgrade() { diff --git a/salt/elasticfleet/tools/sbin/so-elastic-fleet-integration-policy-elastic-fleet-server b/salt/elasticfleet/tools/sbin/so-elastic-fleet-integration-policy-elastic-fleet-server index 8f7c8b8b4..caa684829 100644 --- a/salt/elasticfleet/tools/sbin/so-elastic-fleet-integration-policy-elastic-fleet-server +++ b/salt/elasticfleet/tools/sbin/so-elastic-fleet-integration-policy-elastic-fleet-server @@ -25,5 +25,9 @@ for POLICYNAME in $POLICY; do .name = $name' /opt/so/conf/elastic-fleet/integrations/fleet-server/fleet-server.json) # Now update the integration policy using the modified JSON - elastic_fleet_integration_update "$INTEGRATION_ID" "$UPDATED_INTEGRATION_POLICY" + if ! elastic_fleet_integration_update "$INTEGRATION_ID" "$UPDATED_INTEGRATION_POLICY"; then + # exit 1 on failure to update fleet integration policies, let salt handle retries + echo "Failed to update $POLICYNAME.." + exit 1 + fi done \ No newline at end of file diff --git a/salt/elasticfleet/tools/sbin/so-elastic-fleet-integration-policy-load b/salt/elasticfleet/tools/sbin/so-elastic-fleet-integration-policy-load index b26b79695..8427b47bc 100644 --- a/salt/elasticfleet/tools/sbin/so-elastic-fleet-integration-policy-load +++ b/salt/elasticfleet/tools/sbin/so-elastic-fleet-integration-policy-load @@ -13,7 +13,7 @@ if [ ! -f /opt/so/state/eaintegrations.txt ]; then /usr/sbin/so-elastic-fleet-package-upgrade # Second, update Fleet Server policies - /sbin/so-elastic-fleet-integration-policy-elastic-fleet-server + /usr/sbin/so-elastic-fleet-integration-policy-elastic-fleet-server # Third, configure Elastic Defend Integration seperately /usr/sbin/so-elastic-fleet-integration-policy-elastic-defend @@ -25,7 +25,10 @@ if [ ! -f /opt/so/state/eaintegrations.txt ]; then elastic_fleet_integration_check "endpoints-initial" "$INTEGRATION" if [ -n "$INTEGRATION_ID" ]; then printf "\n\nIntegration $NAME exists - Updating integration\n" - elastic_fleet_integration_update "$INTEGRATION_ID" "@$INTEGRATION" + if ! elastic_fleet_integration_update "$INTEGRATION_ID" "@$INTEGRATION"; then + echo -e "\nFailed to update integration for ${INTEGRATION##*/}" + exit 1 + fi else printf "\n\nIntegration does not exist - Creating integration\n" if ! elastic_fleet_integration_create "@$INTEGRATION"; then @@ -42,7 +45,10 @@ if [ ! -f /opt/so/state/eaintegrations.txt ]; then elastic_fleet_integration_check "so-grid-nodes_general" "$INTEGRATION" if [ -n "$INTEGRATION_ID" ]; then printf "\n\nIntegration $NAME exists - Updating integration\n" - elastic_fleet_integration_update "$INTEGRATION_ID" "@$INTEGRATION" + if ! elastic_fleet_integration_update "$INTEGRATION_ID" "@$INTEGRATION"; then + echo -e "\nFailed to update integration for ${INTEGRATION##*/}" + exit 1 + fi else printf "\n\nIntegration does not exist - Creating integration\n" if ! elastic_fleet_integration_create "@$INTEGRATION"; then @@ -62,7 +68,10 @@ if [ ! -f /opt/so/state/eaintegrations.txt ]; then elastic_fleet_integration_check "so-grid-nodes_heavy" "$INTEGRATION" if [ -n "$INTEGRATION_ID" ]; then printf "\n\nIntegration $NAME exists - Updating integration\n" - elastic_fleet_integration_update "$INTEGRATION_ID" "@$INTEGRATION" + if ! elastic_fleet_integration_update "$INTEGRATION_ID" "@$INTEGRATION"; then + echo -e "\nFailed to update integration for ${INTEGRATION##*/}" + exit 1 + fi else printf "\n\nIntegration does not exist - Creating integration\n" if [ "$NAME" != "elasticsearch-logs" ]; then @@ -86,7 +95,10 @@ if [ ! -f /opt/so/state/eaintegrations.txt ]; then elastic_fleet_integration_check "$FLEET_POLICY" "$INTEGRATION" if [ -n "$INTEGRATION_ID" ]; then printf "\n\nIntegration $NAME exists - Updating integration\n" - elastic_fleet_integration_update "$INTEGRATION_ID" "@$INTEGRATION" + if ! elastic_fleet_integration_update "$INTEGRATION_ID" "@$INTEGRATION"; then + echo -e "\nFailed to update integration for ${INTEGRATION##*/}" + exit 1 + fi else printf "\n\nIntegration does not exist - Creating integration\n" if [ "$NAME" != "elasticsearch-logs" ]; then From 94e8cd84e6f2984ea703fea432b546ebc17cbf00 Mon Sep 17 00:00:00 2001 From: reyesj2 <94730068+reyesj2@users.noreply.github.com> Date: Tue, 16 Sep 2025 21:07:33 -0500 Subject: [PATCH 007/124] because of more aggressive exits use salt to rerun script as needed --- salt/elasticfleet/enabled.sls | 3 +++ 1 file changed, 3 insertions(+) diff --git a/salt/elasticfleet/enabled.sls b/salt/elasticfleet/enabled.sls index 0ca54ccb8..797291dfe 100644 --- a/salt/elasticfleet/enabled.sls +++ b/salt/elasticfleet/enabled.sls @@ -141,6 +141,9 @@ so-elastic-fleet-package-upgrade: so-elastic-fleet-integrations: cmd.run: - name: /usr/sbin/so-elastic-fleet-integration-policy-load + - retry: + attempts: 3 + interval: 10 so-elastic-agent-grid-upgrade: cmd.run: From bcd2e95fbe3a77a55b98a8487aba43f190b70f5e Mon Sep 17 00:00:00 2001 From: reyesj2 <94730068+reyesj2@users.noreply.github.com> Date: Tue, 16 Sep 2025 21:22:03 -0500 Subject: [PATCH 008/124] add error checking and retries to elastic_fleet_integration_policy_upgrade --- salt/elasticfleet/tools/sbin/so-elastic-fleet-common | 4 +++- .../sbin/so-elastic-fleet-integration-policy-elastic-defend | 5 ++++- .../tools/sbin_jinja/so-elastic-fleet-integration-upgrade | 3 +-- 3 files changed, 8 insertions(+), 4 deletions(-) diff --git a/salt/elasticfleet/tools/sbin/so-elastic-fleet-common b/salt/elasticfleet/tools/sbin/so-elastic-fleet-common index 42e563562..f76c6d64e 100644 --- a/salt/elasticfleet/tools/sbin/so-elastic-fleet-common +++ b/salt/elasticfleet/tools/sbin/so-elastic-fleet-common @@ -81,7 +81,9 @@ elastic_fleet_integration_policy_upgrade() { '{"packagePolicyIds":[$INTEGRATIONID]}' ) - curl -K /opt/so/conf/elasticsearch/curl.config -L -X POST "localhost:5601/api/fleet/package_policies/upgrade" -H 'kbn-xsrf: true' -H 'Content-Type: application/json' -d "$JSON_STRING" + if ! curl -sK /opt/so/conf/elasticsearch/curl.config -L -X POST "localhost:5601/api/fleet/package_policies/upgrade" -H 'kbn-xsrf: true' -H 'Content-Type: application/json' -d "$JSON_STRING" --retry 3 --fail 2>/dev/null; then + return 1 + fi } diff --git a/salt/elasticfleet/tools/sbin/so-elastic-fleet-integration-policy-elastic-defend b/salt/elasticfleet/tools/sbin/so-elastic-fleet-integration-policy-elastic-defend index 312c84be6..9769f2f79 100755 --- a/salt/elasticfleet/tools/sbin/so-elastic-fleet-integration-policy-elastic-defend +++ b/salt/elasticfleet/tools/sbin/so-elastic-fleet-integration-policy-elastic-defend @@ -15,7 +15,10 @@ do elastic_fleet_integration_check "endpoints-initial" "$INTEGRATION" if [ -n "$INTEGRATION_ID" ]; then printf "\n\nIntegration $NAME exists - Upgrading integration policy\n" - elastic_fleet_integration_policy_upgrade "$INTEGRATION_ID" + if ! elastic_fleet_integration_policy_upgrade "$INTEGRATION_ID"; then + echo -e "\nFailed to upgrade integration policy for ${INTEGRATION##*/}" + exit 1 + fi else printf "\n\nIntegration does not exist - Creating integration\n" if ! elastic_fleet_integration_create "@$INTEGRATION"; then diff --git a/salt/elasticfleet/tools/sbin_jinja/so-elastic-fleet-integration-upgrade b/salt/elasticfleet/tools/sbin_jinja/so-elastic-fleet-integration-upgrade index 68a644798..f25059f39 100644 --- a/salt/elasticfleet/tools/sbin_jinja/so-elastic-fleet-integration-upgrade +++ b/salt/elasticfleet/tools/sbin_jinja/so-elastic-fleet-integration-upgrade @@ -62,8 +62,7 @@ for AGENT_POLICY in $agent_policies; do # If no errors with dry run, proceed with actual upgrade if [[ "$DRYRUN_ERRORS" == "false" ]]; then echo "No errors detected. Proceeding with upgrade..." - elastic_fleet_integration_policy_upgrade "$INTEGRATION_ID" - if [ $? -ne 0 ]; then + if ! elastic_fleet_integration_policy_upgrade "$INTEGRATION_ID"; then echo "Error: Upgrade failed for $PACKAGE_NAME with integration ID '$INTEGRATION_ID'." exit 1 fi From 063a2b33488a81304e7c06fe9ca04e89ebee0350 Mon Sep 17 00:00:00 2001 From: reyesj2 <94730068+reyesj2@users.noreply.github.com> Date: Tue, 16 Sep 2025 21:56:53 -0500 Subject: [PATCH 009/124] update elastic_fleet_package_version_check & elastic_fleet_package_install to add error checking + retries. Update related scripts --- salt/elasticfleet/enabled.sls | 3 +++ .../tools/sbin/so-elastic-fleet-common | 14 +++++++++++--- .../tools/sbin_jinja/so-elastic-fleet-package-load | 12 ++++++++++-- .../sbin_jinja/so-elastic-fleet-package-upgrade | 11 +++++++++-- 4 files changed, 33 insertions(+), 7 deletions(-) diff --git a/salt/elasticfleet/enabled.sls b/salt/elasticfleet/enabled.sls index 797291dfe..58409fb48 100644 --- a/salt/elasticfleet/enabled.sls +++ b/salt/elasticfleet/enabled.sls @@ -135,6 +135,9 @@ so-elastic-fleet-package-statefile: so-elastic-fleet-package-upgrade: cmd.run: - name: /usr/sbin/so-elastic-fleet-package-upgrade + - retry: + attempts: 3 + interval: 10 - onchanges: - file: /opt/so/state/elastic_fleet_packages.txt diff --git a/salt/elasticfleet/tools/sbin/so-elastic-fleet-common b/salt/elasticfleet/tools/sbin/so-elastic-fleet-common index f76c6d64e..b4ac496df 100644 --- a/salt/elasticfleet/tools/sbin/so-elastic-fleet-common +++ b/salt/elasticfleet/tools/sbin/so-elastic-fleet-common @@ -89,24 +89,32 @@ elastic_fleet_integration_policy_upgrade() { elastic_fleet_package_version_check() { PACKAGE=$1 - curl -s -K /opt/so/conf/elasticsearch/curl.config -b "sid=$SESSIONCOOKIE" -L -X GET "localhost:5601/api/fleet/epm/packages/$PACKAGE" | jq -r '.item.version' + + if output=$(curl -sK /opt/so/conf/elasticsearch/curl.config -b "sid=$SESSIONCOOKIE" -L -X GET "localhost:5601/api/fleet/epm/packages/$PACKAGE" --retry 3 --fail 2>/dev/null); then + echo $output | jq -r '.item.version' + else + return 1 + fi } elastic_fleet_package_latest_version_check() { PACKAGE=$1 - if output=$(curl -s -K /opt/so/conf/elasticsearch/curl.config -b "sid=$SESSIONCOOKIE" -L -X GET "localhost:5601/api/fleet/epm/packages/$PACKAGE" --fail); then + if output=$(curl -s -K /opt/so/conf/elasticsearch/curl.config -b "sid=$SESSIONCOOKIE" -L -X GET "localhost:5601/api/fleet/epm/packages/$PACKAGE" --retry 3 --fail 2>/dev/null); then if version=$(jq -e -r '.item.latestVersion' <<< $output); then echo "$version" fi else echo "Error: Failed to get latest version for $PACKAGE" + return 1 fi } elastic_fleet_package_install() { PKG=$1 VERSION=$2 - curl -s -K /opt/so/conf/elasticsearch/curl.config -b "sid=$SESSIONCOOKIE" -L -X POST -H 'kbn-xsrf: true' -H 'Content-Type: application/json' -d '{"force":true}' "localhost:5601/api/fleet/epm/packages/$PKG/$VERSION" + if ! curl -sK /opt/so/conf/elasticsearch/curl.config -b "sid=$SESSIONCOOKIE" -L -X POST -H 'kbn-xsrf: true' -H 'Content-Type: application/json' -d '{"force":true}' "localhost:5601/api/fleet/epm/packages/$PKG/$VERSION" --retry 3 --fail 2>/dev/null; then + return 1 + fi } elastic_fleet_bulk_package_install() { diff --git a/salt/elasticfleet/tools/sbin_jinja/so-elastic-fleet-package-load b/salt/elasticfleet/tools/sbin_jinja/so-elastic-fleet-package-load index 819d7ecff..52fa96cd5 100755 --- a/salt/elasticfleet/tools/sbin_jinja/so-elastic-fleet-package-load +++ b/salt/elasticfleet/tools/sbin_jinja/so-elastic-fleet-package-load @@ -10,8 +10,16 @@ {%- for PACKAGE in SUPPORTED_PACKAGES %} echo "Setting up {{ PACKAGE }} package..." -VERSION=$(elastic_fleet_package_version_check "{{ PACKAGE }}") -elastic_fleet_package_install "{{ PACKAGE }}" "$VERSION" +if VERSION=$(elastic_fleet_package_version_check "{{ PACKAGE }}"); then + if ! elastic_fleet_package_install "{{ PACKAGE }}" "$VERSION"; then + # packages loaded by this script should never fail to install and REQUIRED before an installation of SO can be considered successful + echo -e "\nERROR: Failed to install default integration package -- $PACKAGE $VERSION" + exit 1 + fi +else + echo -e "\nERROR: Failed to get version information for integration $PACKAGE" + exit 1 +fi echo {%- endfor %} echo diff --git a/salt/elasticfleet/tools/sbin_jinja/so-elastic-fleet-package-upgrade b/salt/elasticfleet/tools/sbin_jinja/so-elastic-fleet-package-upgrade index a092e3ecb..18211a7c6 100644 --- a/salt/elasticfleet/tools/sbin_jinja/so-elastic-fleet-package-upgrade +++ b/salt/elasticfleet/tools/sbin_jinja/so-elastic-fleet-package-upgrade @@ -10,8 +10,15 @@ {%- for PACKAGE in SUPPORTED_PACKAGES %} echo "Upgrading {{ PACKAGE }} package..." -VERSION=$(elastic_fleet_package_latest_version_check "{{ PACKAGE }}") -elastic_fleet_package_install "{{ PACKAGE }}" "$VERSION" +if VERSION=$(elastic_fleet_package_latest_version_check "{{ PACKAGE }}"); then + if ! elastic_fleet_package_install "{{ PACKAGE }}" "$VERSION"; then + # exit 1 on failure to upgrade a default package, allow salt to handle retries + echo -e "\nERROR: Failed to upgrade $PACKAGE to version: $VERSION" + exit 1 + fi +else + echo -e "\nERROR: Failed to get version information for integration $PACKAGE" +fi echo {%- endfor %} echo From 456cad1adad30b1b4c8d6a4b84ea56519c4f6532 Mon Sep 17 00:00:00 2001 From: Doug Burks Date: Wed, 17 Sep 2025 12:36:55 -0400 Subject: [PATCH 010/124] Update DOWNLOAD_AND_VERIFY_ISO.md for 2.4.180 --- DOWNLOAD_AND_VERIFY_ISO.md | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/DOWNLOAD_AND_VERIFY_ISO.md b/DOWNLOAD_AND_VERIFY_ISO.md index 6b966957c..ec4e4657c 100644 --- a/DOWNLOAD_AND_VERIFY_ISO.md +++ b/DOWNLOAD_AND_VERIFY_ISO.md @@ -1,17 +1,17 @@ -### 2.4.170-20250812 ISO image released on 2025/08/12 +### 2.4.180-20250916 ISO image released on 2025/09/17 ### Download and Verify -2.4.170-20250812 ISO image: -https://download.securityonion.net/file/securityonion/securityonion-2.4.170-20250812.iso +2.4.180-20250916 ISO image: +https://download.securityonion.net/file/securityonion/securityonion-2.4.180-20250916.iso -MD5: 50ECAAD05736298452DECEAE074FA773 -SHA1: 1B1EB520DE61ECC4BF34E512DAFE307317D7666A -SHA256: 87D176A48A58BAD1C2D57196F999BED23DE9B526226E3754F0C166C866CCDC1A +MD5: DE93880E38DE4BE45D05A41E1745CB1F +SHA1: AEA6948911E50A4A38E8729E0E965C565402E3FC +SHA256: C9BD8CA071E43B048ABF9ED145B87935CB1D4BB839B2244A06FAD1BBA8EAC84A Signature for ISO image: -https://github.com/Security-Onion-Solutions/securityonion/raw/2.4/main/sigs/securityonion-2.4.170-20250812.iso.sig +https://github.com/Security-Onion-Solutions/securityonion/raw/2.4/main/sigs/securityonion-2.4.180-20250916.iso.sig Signing key: https://raw.githubusercontent.com/Security-Onion-Solutions/securityonion/2.4/main/KEYS @@ -25,22 +25,22 @@ wget https://raw.githubusercontent.com/Security-Onion-Solutions/securityonion/2. Download the signature file for the ISO: ``` -wget https://github.com/Security-Onion-Solutions/securityonion/raw/2.4/main/sigs/securityonion-2.4.170-20250812.iso.sig +wget https://github.com/Security-Onion-Solutions/securityonion/raw/2.4/main/sigs/securityonion-2.4.180-20250916.iso.sig ``` Download the ISO image: ``` -wget https://download.securityonion.net/file/securityonion/securityonion-2.4.170-20250812.iso +wget https://download.securityonion.net/file/securityonion/securityonion-2.4.180-20250916.iso ``` Verify the downloaded ISO image using the signature file: ``` -gpg --verify securityonion-2.4.170-20250812.iso.sig securityonion-2.4.170-20250812.iso +gpg --verify securityonion-2.4.180-20250916.iso.sig securityonion-2.4.180-20250916.iso ``` The output should show "Good signature" and the Primary key fingerprint should match what's shown below: ``` -gpg: Signature made Fri 08 Aug 2025 06:24:56 PM EDT using RSA key ID FE507013 +gpg: Signature made Tue 16 Sep 2025 06:30:19 PM EDT using RSA key ID FE507013 gpg: Good signature from "Security Onion Solutions, LLC " gpg: WARNING: This key is not certified with a trusted signature! gpg: There is no indication that the signature belongs to the owner. From 4dae1afe0b62478aa94dc2f317dba75c04e4723d Mon Sep 17 00:00:00 2001 From: Doug Burks Date: Wed, 17 Sep 2025 12:37:29 -0400 Subject: [PATCH 011/124] Add files via upload --- sigs/securityonion-2.4.180-20250916.iso.sig | Bin 0 -> 566 bytes 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 sigs/securityonion-2.4.180-20250916.iso.sig diff --git a/sigs/securityonion-2.4.180-20250916.iso.sig b/sigs/securityonion-2.4.180-20250916.iso.sig new file mode 100644 index 0000000000000000000000000000000000000000..4ba61d38982ccdf8bc64f713a2e21f39d4dae22f GIT binary patch literal 566 zcmV-60?GY}0y6{v0SEvc79j-41gSkXz6^6dp_W8^5Ma0dP;e6k0%*zP`v3|F5PT3| zxBgIY6PCvh|8+XP{V*SXVDYnAqvZHUaERfvC!p~$h&T8)tPm!{i)RJtnGZzuo*zYN zF9H8*%WJM4&(^p%m@U-EJ8&0e$Zgy-mraT_a13o$EZC~qDkEnL7R$^RiRaDwm}SEL z94sHWrcPU;`{afb(vF}C(~S2?RzL?BV-iDh7tR850$>~A%bO~MDD_>( z-j!fB7~P!_ph1Be3AGE^m_)Pb33x1UTl{_a52w;UP&4$?2E~(!7<^iR&9qA+mFrh9 z32Abta?00E>>KuFh$Gwv`*N^=PMa`$O`Op9{IeDM+ni9Z1*B06f1Xz6xCq`DJz z-$fKn1}Pc&FQPbL{#7XU?o%w^4=OlHtS(HvJ!X-poC(Jnu8_h-ijwETQAvr~B`yGN zONZH>p)~?I5p0{xa^G)Qy)Yf|_d~)D@u+v-3722nG3h%|VPI=6b^aM*7V EGS8X}ga7~l literal 0 HcmV?d00001 From 5806999f63c21e9bb1eec7a7f9e98e6f4783eb84 Mon Sep 17 00:00:00 2001 From: reyesj2 <94730068+reyesj2@users.noreply.github.com> Date: Wed, 17 Sep 2025 11:39:06 -0500 Subject: [PATCH 012/124] add error check & retries to elastic_fleet_bulk_package_install --- salt/elasticfleet/tools/sbin/so-elastic-fleet-common | 4 +++- .../sbin_jinja/so-elastic-fleet-optional-integrations-load | 6 +++++- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/salt/elasticfleet/tools/sbin/so-elastic-fleet-common b/salt/elasticfleet/tools/sbin/so-elastic-fleet-common index b4ac496df..873cb6e0d 100644 --- a/salt/elasticfleet/tools/sbin/so-elastic-fleet-common +++ b/salt/elasticfleet/tools/sbin/so-elastic-fleet-common @@ -119,7 +119,9 @@ elastic_fleet_package_install() { elastic_fleet_bulk_package_install() { BULK_PKG_LIST=$1 - curl -s -K /opt/so/conf/elasticsearch/curl.config -b "sid=$SESSIONCOOKIE" -L -X POST -H 'kbn-xsrf: true' -H 'Content-Type: application/json' -d@$1 "localhost:5601/api/fleet/epm/packages/_bulk" + if ! curl -sK /opt/so/conf/elasticsearch/curl.config -b "sid=$SESSIONCOOKIE" -L -X POST -H 'kbn-xsrf: true' -H 'Content-Type: application/json' -d@$1 "localhost:5601/api/fleet/epm/packages/_bulk" --retry 3 --fail 2>/dev/null; then + return 1 + fi } elastic_fleet_package_is_installed() { diff --git a/salt/elasticfleet/tools/sbin_jinja/so-elastic-fleet-optional-integrations-load b/salt/elasticfleet/tools/sbin_jinja/so-elastic-fleet-optional-integrations-load index 886bbf75c..22ab543ac 100644 --- a/salt/elasticfleet/tools/sbin_jinja/so-elastic-fleet-optional-integrations-load +++ b/salt/elasticfleet/tools/sbin_jinja/so-elastic-fleet-optional-integrations-load @@ -160,7 +160,11 @@ if [[ -f $STATE_FILE_SUCCESS ]]; then for file in "${pkg_filename}_"*.json; do [ -e "$file" ] || continue - elastic_fleet_bulk_package_install $file >> $BULK_INSTALL_OUTPUT + if ! elastic_fleet_bulk_package_install $file >> $BULK_INSTALL_OUTPUT; then + # integrations loaded my this script are non-essential and shouldn't cause exit, skip them for now next highstate run can retry + echo "Failed to complete a chunk of bulk package installs -- $file " + continue + fi done # cleanup any temp files for chunked package install rm -f ${pkg_filename}_*.json $BULK_INSTALL_PACKAGE_LIST From 9e24d21282e86f2fa8bfcc5f04f9998199fa841e Mon Sep 17 00:00:00 2001 From: reyesj2 <94730068+reyesj2@users.noreply.github.com> Date: Wed, 17 Sep 2025 11:41:27 -0500 Subject: [PATCH 013/124] remove unused functions from so-elastic-fleet-common --- .../tools/sbin/so-elastic-fleet-common | 22 ------------------- 1 file changed, 22 deletions(-) diff --git a/salt/elasticfleet/tools/sbin/so-elastic-fleet-common b/salt/elasticfleet/tools/sbin/so-elastic-fleet-common index 873cb6e0d..ee174e159 100644 --- a/salt/elasticfleet/tools/sbin/so-elastic-fleet-common +++ b/salt/elasticfleet/tools/sbin/so-elastic-fleet-common @@ -124,11 +124,6 @@ elastic_fleet_bulk_package_install() { fi } -elastic_fleet_package_is_installed() { - PACKAGE=$1 - curl -s -K /opt/so/conf/elasticsearch/curl.config -b "sid=$SESSIONCOOKIE" -L -X GET -H 'kbn-xsrf: true' "localhost:5601/api/fleet/epm/packages/$PACKAGE" | jq -r '.item.status' -} - elastic_fleet_installed_packages() { curl -s -K /opt/so/conf/elasticsearch/curl.config -b "sid=$SESSIONCOOKIE" -L -X GET -H 'kbn-xsrf: true' -H 'Content-Type: application/json' "localhost:5601/api/fleet/epm/packages/installed?perPage=500" } @@ -141,14 +136,6 @@ elastic_fleet_agent_policy_ids() { fi } -elastic_fleet_agent_policy_names() { - curl -s -K /opt/so/conf/elasticsearch/curl.config -b "sid=$SESSIONCOOKIE" -L -X GET "localhost:5601/api/fleet/agent_policies" | jq -r .items[].name - if [ $? -ne 0 ]; then - echo "Error: Failed to retrieve agent policies." - exit 1 - fi -} - elastic_fleet_integration_policy_names() { AGENT_POLICY=$1 curl -s -K /opt/so/conf/elasticsearch/curl.config -b "sid=$SESSIONCOOKIE" -L -X GET "localhost:5601/api/fleet/agent_policies/$AGENT_POLICY" | jq -r .item.package_policies[].name @@ -221,12 +208,3 @@ elastic_fleet_policy_create() { fi } - -elastic_fleet_policy_update() { - - POLICYID=$1 - JSON_STRING=$2 - - curl -K /opt/so/conf/elasticsearch/curl.config -L -X PUT "localhost:5601/api/fleet/agent_policies/$POLICYID" -H 'kbn-xsrf: true' -H 'Content-Type: application/json' -d "$JSON_STRING" -} - From 4418623f7314d9f3cddc6cc4b655d8ae2e16a0da Mon Sep 17 00:00:00 2001 From: Jason Ertel Date: Wed, 17 Sep 2025 14:20:44 -0400 Subject: [PATCH 014/124] bump version --- VERSION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/VERSION b/VERSION index 1ff799fad..3f8c50a50 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -2.4.180 +2.4.190 From e01d0f81eadf7004461fdb73342cfa8708aa509d Mon Sep 17 00:00:00 2001 From: Doug Burks Date: Wed, 17 Sep 2025 14:22:40 -0400 Subject: [PATCH 015/124] Update 2-4.yml --- .github/DISCUSSION_TEMPLATE/2-4.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/DISCUSSION_TEMPLATE/2-4.yml b/.github/DISCUSSION_TEMPLATE/2-4.yml index 273430e7d..c85249fac 100644 --- a/.github/DISCUSSION_TEMPLATE/2-4.yml +++ b/.github/DISCUSSION_TEMPLATE/2-4.yml @@ -31,6 +31,7 @@ body: - 2.4.160 - 2.4.170 - 2.4.180 + - 2.4.190 - Other (please provide detail below) validations: required: true From d0e875928db9131799e93e7d3777227d699caf0f Mon Sep 17 00:00:00 2001 From: reyesj2 <94730068+reyesj2@users.noreply.github.com> Date: Wed, 17 Sep 2025 14:59:13 -0500 Subject: [PATCH 016/124] add error checking and retries for elastic_fleet_installed_packages & associated script --- salt/elasticfleet/tools/sbin/so-elastic-fleet-common | 4 +++- .../sbin_jinja/so-elastic-fleet-optional-integrations-load | 5 +++-- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/salt/elasticfleet/tools/sbin/so-elastic-fleet-common b/salt/elasticfleet/tools/sbin/so-elastic-fleet-common index ee174e159..8945c37d9 100644 --- a/salt/elasticfleet/tools/sbin/so-elastic-fleet-common +++ b/salt/elasticfleet/tools/sbin/so-elastic-fleet-common @@ -125,7 +125,9 @@ elastic_fleet_bulk_package_install() { } elastic_fleet_installed_packages() { - curl -s -K /opt/so/conf/elasticsearch/curl.config -b "sid=$SESSIONCOOKIE" -L -X GET -H 'kbn-xsrf: true' -H 'Content-Type: application/json' "localhost:5601/api/fleet/epm/packages/installed?perPage=500" + if ! curl -sK /opt/so/conf/elasticsearch/curl.config -b "sid=$SESSIONCOOKIE" -L -X GET -H 'kbn-xsrf: true' -H 'Content-Type: application/json' "localhost:5601/api/fleet/epm/packages/installed?perPage=500" --retry 3 --fail 2>/dev/null; then + return 1 + fi } elastic_fleet_agent_policy_ids() { diff --git a/salt/elasticfleet/tools/sbin_jinja/so-elastic-fleet-optional-integrations-load b/salt/elasticfleet/tools/sbin_jinja/so-elastic-fleet-optional-integrations-load index 22ab543ac..833f3255d 100644 --- a/salt/elasticfleet/tools/sbin_jinja/so-elastic-fleet-optional-integrations-load +++ b/salt/elasticfleet/tools/sbin_jinja/so-elastic-fleet-optional-integrations-load @@ -172,8 +172,9 @@ if [[ -f $STATE_FILE_SUCCESS ]]; then echo "Elastic integrations don't appear to need installation/updating..." fi # Write out file for generating index/component/ilm templates - latest_installed_package_list=$(elastic_fleet_installed_packages) - echo $latest_installed_package_list | jq '[.items[] | {name: .name, es_index_patterns: .dataStreams}]' > $PACKAGE_COMPONENTS + if latest_installed_package_list=$(elastic_fleet_installed_packages); then + echo $latest_installed_package_list | jq '[.items[] | {name: .name, es_index_patterns: .dataStreams}]' > $PACKAGE_COMPONENTS + fi if retry 3 1 "so-elasticsearch-query / --fail --output /dev/null"; then # Refresh installed component template list latest_component_templates_list=$(so-elasticsearch-query _component_template | jq '.component_templates[] | .name' | jq -s '.') From f3aaee1e414379fc22443d262f5231ec041e6a71 Mon Sep 17 00:00:00 2001 From: reyesj2 <94730068+reyesj2@users.noreply.github.com> Date: Wed, 17 Sep 2025 14:59:41 -0500 Subject: [PATCH 017/124] update elastic_fleet_agent_policy_ids scripts already check rc --- salt/elasticfleet/tools/sbin/so-elastic-fleet-common | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/salt/elasticfleet/tools/sbin/so-elastic-fleet-common b/salt/elasticfleet/tools/sbin/so-elastic-fleet-common index 8945c37d9..bbc8dd85f 100644 --- a/salt/elasticfleet/tools/sbin/so-elastic-fleet-common +++ b/salt/elasticfleet/tools/sbin/so-elastic-fleet-common @@ -131,10 +131,11 @@ elastic_fleet_installed_packages() { } elastic_fleet_agent_policy_ids() { - curl -s -K /opt/so/conf/elasticsearch/curl.config -b "sid=$SESSIONCOOKIE" -L -X GET "localhost:5601/api/fleet/agent_policies" | jq -r .items[].id - if [ $? -ne 0 ]; then + if output=$(curl -s -K /opt/so/conf/elasticsearch/curl.config -b "sid=$SESSIONCOOKIE" -L -X GET "localhost:5601/api/fleet/agent_policies" --retry 3 --fail 2>/dev/null); + echo "$output" | jq -r .items[].id + else echo "Error: Failed to retrieve agent policies." - exit 1 + return 1 fi } From 5b70398c0ab6320f4affe6e9cd48966d6ee718e5 Mon Sep 17 00:00:00 2001 From: reyesj2 <94730068+reyesj2@users.noreply.github.com> Date: Wed, 17 Sep 2025 15:35:20 -0500 Subject: [PATCH 018/124] add error check & retries to elastic_fleet_integration_policy_names and associated scripts --- salt/elasticfleet/enabled.sls | 3 +++ salt/elasticfleet/tools/sbin/so-elastic-fleet-common | 7 ++++--- .../tools/sbin_jinja/so-elastic-fleet-integration-upgrade | 5 ++++- .../sbin_jinja/so-elastic-fleet-optional-integrations-load | 7 ++++++- 4 files changed, 17 insertions(+), 5 deletions(-) diff --git a/salt/elasticfleet/enabled.sls b/salt/elasticfleet/enabled.sls index 58409fb48..6b8be2f28 100644 --- a/salt/elasticfleet/enabled.sls +++ b/salt/elasticfleet/enabled.sls @@ -158,6 +158,9 @@ so-elastic-agent-grid-upgrade: so-elastic-fleet-integration-upgrade: cmd.run: - name: /usr/sbin/so-elastic-fleet-integration-upgrade + - retry: + attempts: 3 + interval: 10 so-elastic-fleet-addon-integrations: cmd.run: diff --git a/salt/elasticfleet/tools/sbin/so-elastic-fleet-common b/salt/elasticfleet/tools/sbin/so-elastic-fleet-common index bbc8dd85f..9dbfbf752 100644 --- a/salt/elasticfleet/tools/sbin/so-elastic-fleet-common +++ b/salt/elasticfleet/tools/sbin/so-elastic-fleet-common @@ -141,10 +141,11 @@ elastic_fleet_agent_policy_ids() { elastic_fleet_integration_policy_names() { AGENT_POLICY=$1 - curl -s -K /opt/so/conf/elasticsearch/curl.config -b "sid=$SESSIONCOOKIE" -L -X GET "localhost:5601/api/fleet/agent_policies/$AGENT_POLICY" | jq -r .item.package_policies[].name - if [ $? -ne 0 ]; then + if output=$(curl -sK /opt/so/conf/elasticsearch/curl.config -b "sid=$SESSIONCOOKIE" -L -X GET "localhost:5601/api/fleet/agent_policies/$AGENT_POLICY" --retry 3 --fail 2>/dev/null); then + echo "$output" | jq -r .item.package_policies[].name + else echo "Error: Failed to retrieve integrations for '$AGENT_POLICY'." - exit 1 + return 1 fi } diff --git a/salt/elasticfleet/tools/sbin_jinja/so-elastic-fleet-integration-upgrade b/salt/elasticfleet/tools/sbin_jinja/so-elastic-fleet-integration-upgrade index f25059f39..ec0011ea1 100644 --- a/salt/elasticfleet/tools/sbin_jinja/so-elastic-fleet-integration-upgrade +++ b/salt/elasticfleet/tools/sbin_jinja/so-elastic-fleet-integration-upgrade @@ -25,7 +25,10 @@ fi default_packages=({% for pkg in SUPPORTED_PACKAGES %}"{{ pkg }}"{% if not loop.last %} {% endif %}{% endfor %}) for AGENT_POLICY in $agent_policies; do - integrations=$(elastic_fleet_integration_policy_names "$AGENT_POLICY") + if ! integrations=$(elastic_fleet_integration_policy_names "$AGENT_POLICY") + # this script upgrades default integration packages, exit 1 and let salt handle retrying + exit 1 + fi for INTEGRATION in $integrations; do if ! [[ "$INTEGRATION" == "elastic-defend-endpoints" ]] && ! [[ "$INTEGRATION" == "fleet_server-"* ]]; then # Get package name so we know what package to look for when checking the current and latest available version diff --git a/salt/elasticfleet/tools/sbin_jinja/so-elastic-fleet-optional-integrations-load b/salt/elasticfleet/tools/sbin_jinja/so-elastic-fleet-optional-integrations-load index 833f3255d..a3e62df20 100644 --- a/salt/elasticfleet/tools/sbin_jinja/so-elastic-fleet-optional-integrations-load +++ b/salt/elasticfleet/tools/sbin_jinja/so-elastic-fleet-optional-integrations-load @@ -62,7 +62,12 @@ default_packages=({% for pkg in SUPPORTED_PACKAGES %}"{{ pkg }}"{% if not loop.l in_use_integrations=() for AGENT_POLICY in $agent_policies; do - integrations=$(elastic_fleet_integration_policy_names "$AGENT_POLICY") + + if ! integrations=$(elastic_fleet_integration_policy_names "$AGENT_POLICY"); then + # skip the agent policy if we can't get required info, let salt retry. Integrations loaded by this script are non-default integrations. + echo "Skipping $AGENT_POLICY.. " + continue + fi for INTEGRATION in $integrations; do PACKAGE_NAME=$(elastic_fleet_integration_policy_package_name "$AGENT_POLICY" "$INTEGRATION") # non-default integrations that are in-use in any policy From a5011b398d8accf80efb3f1bcf37aa18b819045e Mon Sep 17 00:00:00 2001 From: reyesj2 <94730068+reyesj2@users.noreply.github.com> Date: Thu, 18 Sep 2025 09:39:56 -0500 Subject: [PATCH 019/124] add err check and retries to elastic_fleet_integration_policy_package_name and associated scripts --- salt/elasticfleet/enabled.sls | 1 + salt/elasticfleet/tools/sbin/so-elastic-fleet-common | 7 ++++--- .../tools/sbin_jinja/so-elastic-fleet-integration-upgrade | 4 +++- .../sbin_jinja/so-elastic-fleet-optional-integrations-load | 5 ++++- 4 files changed, 12 insertions(+), 5 deletions(-) diff --git a/salt/elasticfleet/enabled.sls b/salt/elasticfleet/enabled.sls index 6b8be2f28..cef47168f 100644 --- a/salt/elasticfleet/enabled.sls +++ b/salt/elasticfleet/enabled.sls @@ -162,6 +162,7 @@ so-elastic-fleet-integration-upgrade: attempts: 3 interval: 10 +{# Optional integrations script doesn't need the retries like so-elastic-fleet-integration-upgrade which loads the default integrations #} so-elastic-fleet-addon-integrations: cmd.run: - name: /usr/sbin/so-elastic-fleet-optional-integrations-load diff --git a/salt/elasticfleet/tools/sbin/so-elastic-fleet-common b/salt/elasticfleet/tools/sbin/so-elastic-fleet-common index 9dbfbf752..32e407487 100644 --- a/salt/elasticfleet/tools/sbin/so-elastic-fleet-common +++ b/salt/elasticfleet/tools/sbin/so-elastic-fleet-common @@ -152,10 +152,11 @@ elastic_fleet_integration_policy_names() { elastic_fleet_integration_policy_package_name() { AGENT_POLICY=$1 INTEGRATION=$2 - curl -s -K /opt/so/conf/elasticsearch/curl.config -b "sid=$SESSIONCOOKIE" -L -X GET "localhost:5601/api/fleet/agent_policies/$AGENT_POLICY" | jq -r --arg INTEGRATION "$INTEGRATION" '.item.package_policies[] | select(.name==$INTEGRATION)| .package.name' - if [ $? -ne 0 ]; then + if output=$(curl -sK /opt/so/conf/elasticsearch/curl.config -b "sid=$SESSIONCOOKIE" -L -X GET "localhost:5601/api/fleet/agent_policies/$AGENT_POLICY" --retry 3 --fail 2>/dev/null); then + echo "$output" | jq -r --arg INTEGRATION "$INTEGRATION" '.item.package_policies[] | select(.name==$INTEGRATION)| .package.name' + else echo "Error: Failed to retrieve package name for '$INTEGRATION' in '$AGENT_POLICY'." - exit 1 + return 1 fi } diff --git a/salt/elasticfleet/tools/sbin_jinja/so-elastic-fleet-integration-upgrade b/salt/elasticfleet/tools/sbin_jinja/so-elastic-fleet-integration-upgrade index ec0011ea1..8b9cf3639 100644 --- a/salt/elasticfleet/tools/sbin_jinja/so-elastic-fleet-integration-upgrade +++ b/salt/elasticfleet/tools/sbin_jinja/so-elastic-fleet-integration-upgrade @@ -32,7 +32,9 @@ for AGENT_POLICY in $agent_policies; do for INTEGRATION in $integrations; do if ! [[ "$INTEGRATION" == "elastic-defend-endpoints" ]] && ! [[ "$INTEGRATION" == "fleet_server-"* ]]; then # Get package name so we know what package to look for when checking the current and latest available version - PACKAGE_NAME=$(elastic_fleet_integration_policy_package_name "$AGENT_POLICY" "$INTEGRATION") + if ! PACKAGE_NAME=$(elastic_fleet_integration_policy_package_name "$AGENT_POLICY" "$INTEGRATION"); then + exit 1 + fi {%- if not AUTO_UPGRADE_INTEGRATIONS %} if [[ " ${default_packages[@]} " =~ " $PACKAGE_NAME " ]]; then {%- endif %} diff --git a/salt/elasticfleet/tools/sbin_jinja/so-elastic-fleet-optional-integrations-load b/salt/elasticfleet/tools/sbin_jinja/so-elastic-fleet-optional-integrations-load index a3e62df20..896f8adef 100644 --- a/salt/elasticfleet/tools/sbin_jinja/so-elastic-fleet-optional-integrations-load +++ b/salt/elasticfleet/tools/sbin_jinja/so-elastic-fleet-optional-integrations-load @@ -69,7 +69,10 @@ for AGENT_POLICY in $agent_policies; do continue fi for INTEGRATION in $integrations; do - PACKAGE_NAME=$(elastic_fleet_integration_policy_package_name "$AGENT_POLICY" "$INTEGRATION") + if ! PACKAGE_NAME=$(elastic_fleet_integration_policy_package_name "$AGENT_POLICY" "$INTEGRATION") + echo "Not adding $INTEGRATION, couldn't get package name" + continue + fi # non-default integrations that are in-use in any policy if ! [[ " ${default_packages[@]} " =~ " $PACKAGE_NAME " ]]; then in_use_integrations+=("$PACKAGE_NAME") From 24a0fa3f6d2a5784dc1fe5ef51d3a45f953bda68 Mon Sep 17 00:00:00 2001 From: reyesj2 <94730068+reyesj2@users.noreply.github.com> Date: Thu, 18 Sep 2025 10:15:57 -0500 Subject: [PATCH 020/124] add fleet_api wrapper for curl retries --- .../tools/sbin/so-elastic-fleet-common | 29 ++++++++++++------- 1 file changed, 18 insertions(+), 11 deletions(-) diff --git a/salt/elasticfleet/tools/sbin/so-elastic-fleet-common b/salt/elasticfleet/tools/sbin/so-elastic-fleet-common index 32e407487..7334c409a 100644 --- a/salt/elasticfleet/tools/sbin/so-elastic-fleet-common +++ b/salt/elasticfleet/tools/sbin/so-elastic-fleet-common @@ -23,6 +23,13 @@ fi # Define a banner to separate sections banner="=========================================================================" +fleet_api() { + local QUERYPATH=$1 + shift + + curl -sK /opt/so/conf/elasticsearch/curl.config -L "localhost:5601/api/fleet/${QUERYPATH}" "$@" --retry 3 --fail 2>/dev/null +} + elastic_fleet_integration_check() { AGENT_POLICY=$1 @@ -39,7 +46,7 @@ elastic_fleet_integration_create() { JSON_STRING=$1 - if ! curl -sK /opt/so/conf/elasticsearch/curl.config -L -X POST "localhost:5601/api/fleet/package_policies" -H 'kbn-xsrf: true' -H 'Content-Type: application/json' -d "$JSON_STRING" --retry 3 --fail 2>/dev/null; then + if ! fleet_api "localhost:5601/api/fleet/package_policies" -H 'kbn-xsrf: true' -H 'Content-Type: application/json' -XPOST -d "$JSON_STRING"; then return 1 fi } @@ -67,7 +74,7 @@ elastic_fleet_integration_update() { JSON_STRING=$2 - if ! curl -sK /opt/so/conf/elasticsearch/curl.config -L -X PUT "localhost:5601/api/fleet/package_policies/$UPDATE_ID" -H 'kbn-xsrf: true' -H 'Content-Type: application/json' -d "$JSON_STRING" --retry 3 --fail 2>/dev/null; then + if ! fleet_api "localhost:5601/api/fleet/package_policies/$UPDATE_ID" -H 'kbn-xsrf: true' -H 'Content-Type: application/json' -XPUT -d "$JSON_STRING"; then return 1 fi } @@ -81,7 +88,7 @@ elastic_fleet_integration_policy_upgrade() { '{"packagePolicyIds":[$INTEGRATIONID]}' ) - if ! curl -sK /opt/so/conf/elasticsearch/curl.config -L -X POST "localhost:5601/api/fleet/package_policies/upgrade" -H 'kbn-xsrf: true' -H 'Content-Type: application/json' -d "$JSON_STRING" --retry 3 --fail 2>/dev/null; then + if ! fleet_api "localhost:5601/api/fleet/package_policies/upgrade" -XPOST -H 'kbn-xsrf: true' -H 'Content-Type: application/json' -d "$JSON_STRING"; then return 1 fi } @@ -90,7 +97,7 @@ elastic_fleet_integration_policy_upgrade() { elastic_fleet_package_version_check() { PACKAGE=$1 - if output=$(curl -sK /opt/so/conf/elasticsearch/curl.config -b "sid=$SESSIONCOOKIE" -L -X GET "localhost:5601/api/fleet/epm/packages/$PACKAGE" --retry 3 --fail 2>/dev/null); then + if output=$(fleet_api "localhost:5601/api/fleet/epm/packages/$PACKAGE"); then echo $output | jq -r '.item.version' else return 1 @@ -99,7 +106,7 @@ elastic_fleet_package_version_check() { elastic_fleet_package_latest_version_check() { PACKAGE=$1 - if output=$(curl -s -K /opt/so/conf/elasticsearch/curl.config -b "sid=$SESSIONCOOKIE" -L -X GET "localhost:5601/api/fleet/epm/packages/$PACKAGE" --retry 3 --fail 2>/dev/null); then + if output=$(fleet_api "localhost:5601/api/fleet/epm/packages/$PACKAGE"); then if version=$(jq -e -r '.item.latestVersion' <<< $output); then echo "$version" fi @@ -112,26 +119,26 @@ elastic_fleet_package_latest_version_check() { elastic_fleet_package_install() { PKG=$1 VERSION=$2 - if ! curl -sK /opt/so/conf/elasticsearch/curl.config -b "sid=$SESSIONCOOKIE" -L -X POST -H 'kbn-xsrf: true' -H 'Content-Type: application/json' -d '{"force":true}' "localhost:5601/api/fleet/epm/packages/$PKG/$VERSION" --retry 3 --fail 2>/dev/null; then + if ! fleet_api "localhost:5601/api/fleet/epm/packages/$PKG/$VERSION" -XPOST -H 'kbn-xsrf: true' -H 'Content-Type: application/json' -d '{"force":true}'; then return 1 fi } elastic_fleet_bulk_package_install() { BULK_PKG_LIST=$1 - if ! curl -sK /opt/so/conf/elasticsearch/curl.config -b "sid=$SESSIONCOOKIE" -L -X POST -H 'kbn-xsrf: true' -H 'Content-Type: application/json' -d@$1 "localhost:5601/api/fleet/epm/packages/_bulk" --retry 3 --fail 2>/dev/null; then + if ! fleet_api "localhost:5601/api/fleet/epm/packages/_bulk" -XPOST -H 'kbn-xsrf: true' -H 'Content-Type: application/json' -d@$BULK_PKG_LIST; then return 1 fi } elastic_fleet_installed_packages() { - if ! curl -sK /opt/so/conf/elasticsearch/curl.config -b "sid=$SESSIONCOOKIE" -L -X GET -H 'kbn-xsrf: true' -H 'Content-Type: application/json' "localhost:5601/api/fleet/epm/packages/installed?perPage=500" --retry 3 --fail 2>/dev/null; then + if ! fleet_api "localhost:5601/api/fleet/epm/packages/installed?perPage=500"; then return 1 fi } elastic_fleet_agent_policy_ids() { - if output=$(curl -s -K /opt/so/conf/elasticsearch/curl.config -b "sid=$SESSIONCOOKIE" -L -X GET "localhost:5601/api/fleet/agent_policies" --retry 3 --fail 2>/dev/null); + if output=$(fleet_api "localhost:5601/api/fleet/agent_policies"); echo "$output" | jq -r .items[].id else echo "Error: Failed to retrieve agent policies." @@ -141,7 +148,7 @@ elastic_fleet_agent_policy_ids() { elastic_fleet_integration_policy_names() { AGENT_POLICY=$1 - if output=$(curl -sK /opt/so/conf/elasticsearch/curl.config -b "sid=$SESSIONCOOKIE" -L -X GET "localhost:5601/api/fleet/agent_policies/$AGENT_POLICY" --retry 3 --fail 2>/dev/null); then + if output=$(fleet_api "localhost:5601/api/fleet/agent_policies/$AGENT_POLICY"); then echo "$output" | jq -r .item.package_policies[].name else echo "Error: Failed to retrieve integrations for '$AGENT_POLICY'." @@ -152,7 +159,7 @@ elastic_fleet_integration_policy_names() { elastic_fleet_integration_policy_package_name() { AGENT_POLICY=$1 INTEGRATION=$2 - if output=$(curl -sK /opt/so/conf/elasticsearch/curl.config -b "sid=$SESSIONCOOKIE" -L -X GET "localhost:5601/api/fleet/agent_policies/$AGENT_POLICY" --retry 3 --fail 2>/dev/null); then + if output=$(fleet_api "localhost:5601/api/fleet/agent_policies/$AGENT_POLICY"); then echo "$output" | jq -r --arg INTEGRATION "$INTEGRATION" '.item.package_policies[] | select(.name==$INTEGRATION)| .package.name' else echo "Error: Failed to retrieve package name for '$INTEGRATION' in '$AGENT_POLICY'." From 8b07ff453d781329534bdd632c1de5b266b2f508 Mon Sep 17 00:00:00 2001 From: reyesj2 <94730068+reyesj2@users.noreply.github.com> Date: Thu, 18 Sep 2025 10:21:07 -0500 Subject: [PATCH 021/124] elastic_fleet_integration_policy_package_version --- salt/elasticfleet/tools/sbin/so-elastic-fleet-common | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/salt/elasticfleet/tools/sbin/so-elastic-fleet-common b/salt/elasticfleet/tools/sbin/so-elastic-fleet-common index 7334c409a..115055916 100644 --- a/salt/elasticfleet/tools/sbin/so-elastic-fleet-common +++ b/salt/elasticfleet/tools/sbin/so-elastic-fleet-common @@ -171,13 +171,13 @@ elastic_fleet_integration_policy_package_version() { AGENT_POLICY=$1 INTEGRATION=$2 - if output=$(curl -s -K /opt/so/conf/elasticsearch/curl.config -L -X GET "localhost:5601/api/fleet/agent_policies/$AGENT_POLICY" --fail); then - if version=$(jq -e -r --arg INTEGRATION "$INTEGRATION" '.item.package_policies[] | select(.name==$INTEGRATION)| .package.version' <<< $output); then + if output=$(fleet_api "localhost:5601/api/fleet/agent_policies/$AGENT_POLICY"); then + if version=$(jq -e -r --arg INTEGRATION "$INTEGRATION" '.item.package_policies[] | select(.name==$INTEGRATION)| .package.version' <<< "$output"); then echo "$version" fi else echo "Error: Failed to retrieve agent policy $AGENT_POLICY" - exit 1 + return 1 fi } From f663f22628ecf0d755d2882c60f0bfb8df5f63cd Mon Sep 17 00:00:00 2001 From: reyesj2 <94730068+reyesj2@users.noreply.github.com> Date: Thu, 18 Sep 2025 10:27:54 -0500 Subject: [PATCH 022/124] elastic_fleet_integration_id --- salt/elasticfleet/tools/sbin/so-elastic-fleet-common | 7 ++++--- .../tools/sbin_jinja/so-elastic-fleet-integration-upgrade | 4 +++- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/salt/elasticfleet/tools/sbin/so-elastic-fleet-common b/salt/elasticfleet/tools/sbin/so-elastic-fleet-common index 115055916..80a016908 100644 --- a/salt/elasticfleet/tools/sbin/so-elastic-fleet-common +++ b/salt/elasticfleet/tools/sbin/so-elastic-fleet-common @@ -184,10 +184,11 @@ elastic_fleet_integration_policy_package_version() { elastic_fleet_integration_id() { AGENT_POLICY=$1 INTEGRATION=$2 - curl -s -K /opt/so/conf/elasticsearch/curl.config -b "sid=$SESSIONCOOKIE" -L -X GET "localhost:5601/api/fleet/agent_policies/$AGENT_POLICY" | jq -r --arg INTEGRATION "$INTEGRATION" '.item.package_policies[] | select(.name==$INTEGRATION)| .id' - if [ $? -ne 0 ]; then + if output=$(fleet_api "localhost:5601/api/fleet/agent_policies/$AGENT_POLICY"); then + echo "$output" | jq -r --arg INTEGRATION "$INTEGRATION" '.item.package_policies[] | select(.name==$INTEGRATION)| .id' + else echo "Error: Failed to retrieve integration ID for '$INTEGRATION' in '$AGENT_POLICY'." - exit 1 + return 1 fi } diff --git a/salt/elasticfleet/tools/sbin_jinja/so-elastic-fleet-integration-upgrade b/salt/elasticfleet/tools/sbin_jinja/so-elastic-fleet-integration-upgrade index 8b9cf3639..f24d17210 100644 --- a/salt/elasticfleet/tools/sbin_jinja/so-elastic-fleet-integration-upgrade +++ b/salt/elasticfleet/tools/sbin_jinja/so-elastic-fleet-integration-upgrade @@ -53,7 +53,9 @@ for AGENT_POLICY in $agent_policies; do fi # Get integration ID - INTEGRATION_ID=$(elastic_fleet_integration_id "$AGENT_POLICY" "$INTEGRATION") + if ! INTEGRATION_ID=$(elastic_fleet_integration_id "$AGENT_POLICY" "$INTEGRATION"); + exit 1 + fi if [[ "$PACKAGE_VERSION" != "$AVAILABLE_VERSION" ]]; then # Dry run of the upgrade From faa112eddf80754ce873f87129b04a1d1a65f329 Mon Sep 17 00:00:00 2001 From: reyesj2 <94730068+reyesj2@users.noreply.github.com> Date: Thu, 18 Sep 2025 12:18:16 -0500 Subject: [PATCH 023/124] update last so-elastic-fleet-common functions --- .../tools/sbin/so-elastic-fleet-common | 45 ++++++++++--------- .../so-elastic-fleet-integration-upgrade | 4 +- 2 files changed, 27 insertions(+), 22 deletions(-) diff --git a/salt/elasticfleet/tools/sbin/so-elastic-fleet-common b/salt/elasticfleet/tools/sbin/so-elastic-fleet-common index 80a016908..5d8781ba8 100644 --- a/salt/elasticfleet/tools/sbin/so-elastic-fleet-common +++ b/salt/elasticfleet/tools/sbin/so-elastic-fleet-common @@ -46,7 +46,7 @@ elastic_fleet_integration_create() { JSON_STRING=$1 - if ! fleet_api "localhost:5601/api/fleet/package_policies" -H 'kbn-xsrf: true' -H 'Content-Type: application/json' -XPOST -d "$JSON_STRING"; then + if ! fleet_api "package_policies" -H 'kbn-xsrf: true' -H 'Content-Type: application/json' -XPOST -d "$JSON_STRING"; then return 1 fi } @@ -65,7 +65,10 @@ elastic_fleet_integration_remove() { '{"packagePolicyIds":[$INTEGRATIONID]}' ) - curl -K /opt/so/conf/elasticsearch/curl.config -L -X POST "localhost:5601/api/fleet/package_policies/delete" -H 'kbn-xsrf: true' -H 'Content-Type: application/json' -d "$JSON_STRING" + if ! fleet_api "package_policies/delete" -XPOST -H 'kbn-xsrf: true' -H 'Content-Type: application/json' -d "$JSON_STRING"; then + echo "Error: Unable to delete '$NAME' from '$AGENT_POLICY'" + return 1 + fi } elastic_fleet_integration_update() { @@ -74,7 +77,7 @@ elastic_fleet_integration_update() { JSON_STRING=$2 - if ! fleet_api "localhost:5601/api/fleet/package_policies/$UPDATE_ID" -H 'kbn-xsrf: true' -H 'Content-Type: application/json' -XPUT -d "$JSON_STRING"; then + if ! fleet_api "package_policies/$UPDATE_ID" -H 'kbn-xsrf: true' -H 'Content-Type: application/json' -XPUT -d "$JSON_STRING"; then return 1 fi } @@ -88,7 +91,7 @@ elastic_fleet_integration_policy_upgrade() { '{"packagePolicyIds":[$INTEGRATIONID]}' ) - if ! fleet_api "localhost:5601/api/fleet/package_policies/upgrade" -XPOST -H 'kbn-xsrf: true' -H 'Content-Type: application/json' -d "$JSON_STRING"; then + if ! fleet_api "package_policies/upgrade" -XPOST -H 'kbn-xsrf: true' -H 'Content-Type: application/json' -d "$JSON_STRING"; then return 1 fi } @@ -97,21 +100,22 @@ elastic_fleet_integration_policy_upgrade() { elastic_fleet_package_version_check() { PACKAGE=$1 - if output=$(fleet_api "localhost:5601/api/fleet/epm/packages/$PACKAGE"); then - echo $output | jq -r '.item.version' + if output=$(fleet_api "epm/packages/$PACKAGE"); then + echo "$output" | jq -r '.item.version' else + echo "Error: Failed to get current package version for '$PACKAGE'" return 1 fi } elastic_fleet_package_latest_version_check() { PACKAGE=$1 - if output=$(fleet_api "localhost:5601/api/fleet/epm/packages/$PACKAGE"); then + if output=$(fleet_api "epm/packages/$PACKAGE"); then if version=$(jq -e -r '.item.latestVersion' <<< $output); then echo "$version" fi else - echo "Error: Failed to get latest version for $PACKAGE" + echo "Error: Failed to get latest version for '$PACKAGE'" return 1 fi } @@ -119,26 +123,26 @@ elastic_fleet_package_latest_version_check() { elastic_fleet_package_install() { PKG=$1 VERSION=$2 - if ! fleet_api "localhost:5601/api/fleet/epm/packages/$PKG/$VERSION" -XPOST -H 'kbn-xsrf: true' -H 'Content-Type: application/json' -d '{"force":true}'; then + if ! fleet_api "epm/packages/$PKG/$VERSION" -XPOST -H 'kbn-xsrf: true' -H 'Content-Type: application/json' -d '{"force":true}'; then return 1 fi } elastic_fleet_bulk_package_install() { BULK_PKG_LIST=$1 - if ! fleet_api "localhost:5601/api/fleet/epm/packages/_bulk" -XPOST -H 'kbn-xsrf: true' -H 'Content-Type: application/json' -d@$BULK_PKG_LIST; then + if ! fleet_api "epm/packages/_bulk" -XPOST -H 'kbn-xsrf: true' -H 'Content-Type: application/json' -d@$BULK_PKG_LIST; then return 1 fi } elastic_fleet_installed_packages() { - if ! fleet_api "localhost:5601/api/fleet/epm/packages/installed?perPage=500"; then + if ! fleet_api "epm/packages/installed?perPage=500"; then return 1 fi } elastic_fleet_agent_policy_ids() { - if output=$(fleet_api "localhost:5601/api/fleet/agent_policies"); + if output=$(fleet_api "agent_policies"); echo "$output" | jq -r .items[].id else echo "Error: Failed to retrieve agent policies." @@ -148,7 +152,7 @@ elastic_fleet_agent_policy_ids() { elastic_fleet_integration_policy_names() { AGENT_POLICY=$1 - if output=$(fleet_api "localhost:5601/api/fleet/agent_policies/$AGENT_POLICY"); then + if output=$(fleet_api "agent_policies/$AGENT_POLICY"); then echo "$output" | jq -r .item.package_policies[].name else echo "Error: Failed to retrieve integrations for '$AGENT_POLICY'." @@ -159,7 +163,7 @@ elastic_fleet_integration_policy_names() { elastic_fleet_integration_policy_package_name() { AGENT_POLICY=$1 INTEGRATION=$2 - if output=$(fleet_api "localhost:5601/api/fleet/agent_policies/$AGENT_POLICY"); then + if output=$(fleet_api "agent_policies/$AGENT_POLICY"); then echo "$output" | jq -r --arg INTEGRATION "$INTEGRATION" '.item.package_policies[] | select(.name==$INTEGRATION)| .package.name' else echo "Error: Failed to retrieve package name for '$INTEGRATION' in '$AGENT_POLICY'." @@ -171,12 +175,12 @@ elastic_fleet_integration_policy_package_version() { AGENT_POLICY=$1 INTEGRATION=$2 - if output=$(fleet_api "localhost:5601/api/fleet/agent_policies/$AGENT_POLICY"); then + if output=$(fleet_api "agent_policies/$AGENT_POLICY"); then if version=$(jq -e -r --arg INTEGRATION "$INTEGRATION" '.item.package_policies[] | select(.name==$INTEGRATION)| .package.version' <<< "$output"); then echo "$version" fi else - echo "Error: Failed to retrieve agent policy $AGENT_POLICY" + echo "Error: Failed to retrieve integration version for '$INTEGRATION' in policy '$AGENT_POLICY'" return 1 fi } @@ -184,7 +188,7 @@ elastic_fleet_integration_policy_package_version() { elastic_fleet_integration_id() { AGENT_POLICY=$1 INTEGRATION=$2 - if output=$(fleet_api "localhost:5601/api/fleet/agent_policies/$AGENT_POLICY"); then + if output=$(fleet_api "agent_policies/$AGENT_POLICY"); then echo "$output" | jq -r --arg INTEGRATION "$INTEGRATION" '.item.package_policies[] | select(.name==$INTEGRATION)| .id' else echo "Error: Failed to retrieve integration ID for '$INTEGRATION' in '$AGENT_POLICY'." @@ -194,10 +198,9 @@ elastic_fleet_integration_id() { elastic_fleet_integration_policy_dryrun_upgrade() { INTEGRATION_ID=$1 - curl -s -K /opt/so/conf/elasticsearch/curl.config -b "sid=$SESSIONCOOKIE" -H "Content-Type: application/json" -H 'kbn-xsrf: true' -L -X POST "localhost:5601/api/fleet/package_policies/upgrade/dryrun" -d "{\"packagePolicyIds\":[\"$INTEGRATION_ID\"]}" - if [ $? -ne 0 ]; then + if ! fleet_api "package_policies/upgrade/dryrun" -H "Content-Type: application/json" -H 'kbn-xsrf: true' -XPOST -d "{\"packagePolicyIds\":[\"$INTEGRATION_ID\"]}"; then echo "Error: Failed to complete dry run for '$INTEGRATION_ID'." - exit 1 + return 1 fi } @@ -216,7 +219,7 @@ elastic_fleet_policy_create() { '{"name": $NAME,"id":$NAME,"description":$DESC,"namespace":"default","monitoring_enabled":["logs"],"inactivity_timeout":$TIMEOUT,"has_fleet_server":$FLEETSERVER}' ) # Create Fleet Policy - if ! curl -sK /opt/so/conf/elasticsearch/curl.config -L -X POST "localhost:5601/api/fleet/agent_policies" -H 'kbn-xsrf: true' -H 'Content-Type: application/json' -d "$JSON_STRING" --retry 3 --fail 2>/dev/null; then + if ! fleet_api "agent_policies" -XPOST -H 'kbn-xsrf: true' -H 'Content-Type: application/json' -d "$JSON_STRING"; then return 1 fi diff --git a/salt/elasticfleet/tools/sbin_jinja/so-elastic-fleet-integration-upgrade b/salt/elasticfleet/tools/sbin_jinja/so-elastic-fleet-integration-upgrade index f24d17210..318ce45e6 100644 --- a/salt/elasticfleet/tools/sbin_jinja/so-elastic-fleet-integration-upgrade +++ b/salt/elasticfleet/tools/sbin_jinja/so-elastic-fleet-integration-upgrade @@ -63,7 +63,9 @@ for AGENT_POLICY in $agent_policies; do echo "Current $PACKAGE_NAME package version ($PACKAGE_VERSION) is not the same as the latest available package ($AVAILABLE_VERSION)..." echo "Upgrading $INTEGRATION..." echo "Starting dry run..." - DRYRUN_OUTPUT=$(elastic_fleet_integration_policy_dryrun_upgrade "$INTEGRATION_ID") + if ! DRYRUN_OUTPUT=$(elastic_fleet_integration_policy_dryrun_upgrade "$INTEGRATION_ID") + exit 1 + fi DRYRUN_ERRORS=$(echo "$DRYRUN_OUTPUT" | jq .[].hasErrors) # If no errors with dry run, proceed with actual upgrade From cd5483623b789613bf343d51a4585e8f9a249c80 Mon Sep 17 00:00:00 2001 From: reyesj2 <94730068+reyesj2@users.noreply.github.com> Date: Thu, 18 Sep 2025 14:33:34 -0500 Subject: [PATCH 024/124] update import/eval fleet output config -- try to prevent corrupt dual 'default' output polices from having a successful installation --- .../tools/sbin_jinja/so-elastic-fleet-setup | 90 +++++++++++++++---- salt/manager/tools/sbin/soup | 28 ++++++ 2 files changed, 102 insertions(+), 16 deletions(-) diff --git a/salt/elasticfleet/tools/sbin_jinja/so-elastic-fleet-setup b/salt/elasticfleet/tools/sbin_jinja/so-elastic-fleet-setup index 0510b6bfe..066edd2da 100755 --- a/salt/elasticfleet/tools/sbin_jinja/so-elastic-fleet-setup +++ b/salt/elasticfleet/tools/sbin_jinja/so-elastic-fleet-setup @@ -50,22 +50,54 @@ if [[ "$RETURN_CODE" != "0" ]]; then fi printf "\n### Create ES Token ###\n" -ESTOKEN=$(curl -K /opt/so/conf/elasticsearch/curl.config -L -X POST "localhost:5601/api/fleet/service_tokens" -H 'kbn-xsrf: true' -H 'Content-Type: application/json' | jq -r .value) +if ESTOKEN_RAW=$(fleet_api "service_tokens" -XPOST-H 'kbn-xsrf: true' -H 'Content-Type: application/json'); then + ESTOKEN=$(echo "$ESTOKEN_RAW" | jq -r .value) +else + echo -e "\nFailed to create ES token..." + exit 1 +fi ### Create Outputs, Fleet Policy and Fleet URLs ### # Create the Manager Elasticsearch Output first and set it as the default output printf "\nAdd Manager Elasticsearch Output...\n" -ESCACRT=$(openssl x509 -in $INTCA) -JSON_STRING=$( jq -n \ - --arg ESCACRT "$ESCACRT" \ - '{"name":"so-manager_elasticsearch","id":"so-manager_elasticsearch","type":"elasticsearch","hosts":["https://{{ GLOBALS.manager_ip }}:9200","https://{{ GLOBALS.manager }}:9200"],"is_default":true,"is_default_monitoring":true,"config_yaml":"","ssl":{"certificate_authorities": [$ESCACRT]}}' ) -curl -K /opt/so/conf/elasticsearch/curl.config -L -X POST "localhost:5601/api/fleet/outputs" -H 'kbn-xsrf: true' -H 'Content-Type: application/json' -d "$JSON_STRING" +ESCACRT=$(openssl x509 -in "$INTCA" -outform DER | sha256sum | cut -d' ' -f1 | tr '[:lower:]' '[:upper:]') +JSON_STRING=$(jq -n \ + --arg ESCACRT "$ESCACRT" \ + '{"name":"so-manager_elasticsearch","id":"so-manager_elasticsearch","type":"elasticsearch","hosts":["https://{{ GLOBALS.manager_ip }}:9200","https://{{ GLOBALS.manager }}:9200"],"is_default":true,"is_default_monitoring":true,"config_yaml":"","ca_trusted_fingerprint": $ESCACRT}') + +if ! fleet_api "outputs" -XPOST -H 'kbn-xsrf: true' -H 'Content-Type: application/json' -d "$JSON_STRING"; then + echo -e "\nFailed to create so-elasticsearch_manager policy..." + exit 1 +fi printf "\n\n" +# At this point there should only be two policies. fleet-default-output & so-manager_elasticsearch +status "Verifying so-manager_elasticsearch policy is configured as the current default" + +# Grab the fleet-default-output policy instead of so-manager_elasticsearch, because a weird state can exist where both fleet-default-output & so-elasticsearch_manager can be set as the active default output for logs / metrics. Resulting in logs not ingesting on import/eval nodes +# Check that fleet-default-output isn't configured as a default for anything +if DEFAULTPOLICY=$(fleet_api "outputs/fleet-default-output"); then + fleet_default=$(echo "$DEFAULTPOLICY" | jq -er '.item.is_default') + fleet_default_monitoring=$(echo "$DEFAULTPOLICY" | jq -er '.item.is_default_monitoring') + if [[ ! $fleet_default ]] && [[ ! $fleet_default_monitoring ]]; then + echo -e "\nso-manager_elasticsearch is configured as the current default policy..." + else + echo -e "\nVerification of so-manager_elasticsearch policy failed... The default 'fleet-default-output' output is still active..." + exit 1 + fi +else + # fleet-output-policy is created automatically by fleet when started. Should always exist on any installation type + echo -e "\nDefault fleet-default-output policy doesn't exist...\n" + exit 1 +fi + # Create the Manager Fleet Server Host Agent Policy # This has to be done while the Elasticsearch Output is set to the default Output printf "Create Manager Fleet Server Policy...\n" -elastic_fleet_policy_create "FleetServer_{{ GLOBALS.hostname }}" "Fleet Server - {{ GLOBALS.hostname }}" "false" "120" +if ! elastic_fleet_policy_create "FleetServer_{{ GLOBALS.hostname }}" "Fleet Server - {{ GLOBALS.hostname }}" "false" "120"; then + echo -e "\n Failed to create Manager fleet server policy..." + exit 1 +fi # Modify the default integration policy to update the policy_id with the correct naming UPDATED_INTEGRATION_POLICY=$(jq --arg policy_id "FleetServer_{{ GLOBALS.hostname }}" --arg name "fleet_server-{{ GLOBALS.hostname }}" ' @@ -90,7 +122,10 @@ JSON_STRING=$( jq -n \ --arg LOGSTASHCA "$LOGSTASHCA" \ '{"name":"grid-logstash","is_default":true,"is_default_monitoring":true,"id":"so-manager_logstash","type":"logstash","hosts":["{{ GLOBALS.manager_ip }}:5055", "{{ GLOBALS.manager }}:5055"],"config_yaml":"","ssl":{"certificate": $LOGSTASHCRT,"key": $LOGSTASHKEY,"certificate_authorities":[ $LOGSTASHCA ]},"proxy_id":null}' ) -curl -K /opt/so/conf/elasticsearch/curl.config -L -X POST "localhost:5601/api/fleet/outputs" -H 'kbn-xsrf: true' -H 'Content-Type: application/json' -d "$JSON_STRING" +if ! fleet_api "outputs" -XPOST -H 'kbn-xsrf: true' -H 'Content-Type: application/json' -d "$JSON_STRING"; then + echo -e "\nFailed to create logstash fleet output" + exit 1 +fi printf "\n\n" {%- endif %} @@ -108,7 +143,10 @@ else fi ## This array replaces whatever URLs are currently configured -curl -K /opt/so/conf/elasticsearch/curl.config -L -X POST "localhost:5601/api/fleet/fleet_server_hosts" -H 'kbn-xsrf: true' -H 'Content-Type: application/json' -d "$JSON_STRING" +if ! fleet_api "fleet_server_hosts" -XPOST -H 'kbn-xsrf: true' -H 'Content-Type: application/json' -d "$JSON_STRING"; then + echo -e "\nFailed to add manager fleet URL" + exit 1 +fi printf "\n\n" ### Create Policies & Associated Integration Configuration ### @@ -120,19 +158,19 @@ printf "\n\n" # Initial Endpoints Policy if ! elastic_fleet_policy_create "endpoints-initial" "Initial Endpoint Policy" "false" "1209600"; then - echo -e "Failed to create endpoints-initial policy..." + echo -e "\nFailed to create endpoints-initial policy..." exit 1 fi # Grid Nodes - General Policy if ! elastic_fleet_policy_create "so-grid-nodes_general" "SO Grid Nodes - General Purpose" "false" "1209600"; then - echo -e "Failed to create so-grid-nodes_general policy..." + echo -e "\nFailed to create so-grid-nodes_general policy..." exit 1 fi # Grid Nodes - Heavy Node Policy if ! elastic_fleet_policy_create "so-grid-nodes_heavy" "SO Grid Nodes - Heavy Node" "false" "1209600"; then - echo -e "Failed to create so-grid-nodes_heavy policy..." + echo -e "\nFailed to create so-grid-nodes_heavy policy..." exit 1 fi @@ -146,14 +184,34 @@ JSON_STRING=$( jq -n \ '{"name":$NAME,"host":$URL,"is_default":true}' ) -curl -K /opt/so/conf/elasticsearch/curl.config -L -X POST "localhost:5601/api/fleet/agent_download_sources" -H 'kbn-xsrf: true' -H 'Content-Type: application/json' -d "$JSON_STRING" +if ! fleet_api "agent_download_sources" -XPOST-H 'kbn-xsrf: true' -H 'Content-Type: application/json' -d "$JSON_STRING"; then + echo -e "\nFailed to update Elastic Agent artifact URL" + exit 1 +fi ### Finalization ### # Query for Enrollment Tokens for default policies -ENDPOINTSENROLLMENTOKEN=$(curl -K /opt/so/conf/elasticsearch/curl.config -L "localhost:5601/api/fleet/enrollment_api_keys" -H 'kbn-xsrf: true' -H 'Content-Type: application/json' | jq .list | jq -r -c '.[] | select(.policy_id | contains("endpoints-initial")) | .api_key') -GRIDNODESENROLLMENTOKENGENERAL=$(curl -K /opt/so/conf/elasticsearch/curl.config -L "localhost:5601/api/fleet/enrollment_api_keys" -H 'kbn-xsrf: true' -H 'Content-Type: application/json' | jq .list | jq -r -c '.[] | select(.policy_id | contains("so-grid-nodes_general")) | .api_key') -GRIDNODESENROLLMENTOKENHEAVY=$(curl -K /opt/so/conf/elasticsearch/curl.config -L "localhost:5601/api/fleet/enrollment_api_keys" -H 'kbn-xsrf: true' -H 'Content-Type: application/json' | jq .list | jq -r -c '.[] | select(.policy_id | contains("so-grid-nodes_heavy")) | .api_key') +if ENDPOINTSENROLLMENTOKEN_RAW=$(fleet_api "/enrollment_api_keys" -H 'kbn-xsrf: true' -H 'Content-Type: application/json'); then + ENDPOINTSENROLLMENTOKEN=$(echo "$ENDPOINTSENROLLMENTOKEN_RAW" | jq .list | jq -r -c '.[] | select(.policy_id | contains("endpoints-initial")) | .api_key') +else + echo -e "\nFailed to query for Endpoints enrollment token" + exit 1 +fi + +if GRIDNODESENROLLMENTOKENGENERAL_RAW=$(fleet_api "enrollment_api_keys" -H 'kbn-xsrf: true' -H 'Content-Type: application/json'); then + GRIDNODESENROLLMENTOKENGENERAL=$(echo "$GRIDNODESENROLLMENTOKENGENERAL_RAW" | jq .list | jq -r -c '.[] | select(.policy_id | contains("so-grid-nodes_general")) | .api_key') +else + echo -e "\nFailed to query for Grid nodes - General enrollment token" + exit 1 +fi + +if GRIDNODESENROLLMENTOKENHEAVY_RAW=$(fleet_api "enrollment_api_keys" -H 'kbn-xsrf: true' -H 'Content-Type: application/json'); then + GRIDNODESENROLLMENTOKENHEAVY=$(echo "$GRIDNODESENROLLMENTOKENHEAVY_RAW" | jq .list | jq -r -c '.[] | select(.policy_id | contains("so-grid-nodes_heavy")) | .api_key') +else + echo -e "\nFailed to query for Grid nodes - Heavy enrollment token" + exit 1 +fi # Store needed data in minion pillar pillar_file=/opt/so/saltstack/local/pillar/minions/{{ GLOBALS.minion_id }}.sls diff --git a/salt/manager/tools/sbin/soup b/salt/manager/tools/sbin/soup index 86595c162..46010da61 100755 --- a/salt/manager/tools/sbin/soup +++ b/salt/manager/tools/sbin/soup @@ -450,6 +450,7 @@ postupgrade_changes() { [[ "$POSTVERSION" == 2.4.150 ]] && post_to_2.4.160 [[ "$POSTVERSION" == 2.4.160 ]] && post_to_2.4.170 [[ "$POSTVERSION" == 2.4.170 ]] && post_to_2.4.180 + [[ "$POSTVERSION" == 2.4.180 ]] && post_to_2.4.190 true } @@ -608,6 +609,15 @@ post_to_2.4.180() { POSTVERSION=2.4.180 } +post_to_2.4.190() { + # Only need to update import / eval nodes + if [[ "$MINIONID" =~ "_import" ]] || [[ ! "$MINIONID" =~ "_eval" ]]; then + update_import_fleet_output + fi + + POSTVERSION=2.4.190 +} + repo_sync() { echo "Sync the local repo." su socore -c '/usr/sbin/so-repo-sync' || fail "Unable to complete so-repo-sync." @@ -870,6 +880,11 @@ up_to_2.4.180() { INSTALLEDVERSION=2.4.180 } +up_to_2.4.190() { + echo "Nothing to do for 2.4.190" + INSTALLEDVERSION=2.4.190 +} + add_hydra_pillars() { mkdir -p /opt/so/saltstack/local/pillar/hydra touch /opt/so/saltstack/local/pillar/hydra/soc_hydra.sls @@ -1143,6 +1158,19 @@ update_elasticsearch_index_settings() { done } +update_import_fleet_output() { + if output=$(curl -sK /opt/so/conf/elasticsearch/curl.config -L "localhost:5601/api/fleet/outputs/so-manager_elasticsearch" --retry 3 --fail 2>/dev/null); then + # Update the current config of so-manager_elasticsearch output policy in place (leaving any customizations like having changed the preset value from 'balanced' to 'performance') + CAFINGERPRINT=$(openssl x509 -in /etc/pki/tls/certs/intca.crt -outform DER | sha256sum | cut -d' ' -f1 | tr '[:lower:]' '[:upper:]') + updated_policy=$(jq --args CAFINGERPRINT "$CAFINGERPRINT" '.item | (del(.id) | .ca_trusted_fingerprint = $CAFINGERPRINT)' <<< "$output") + if curl -sK /opt/so/conf/elasticsearch/curl.config -L "localhost:5601/api/fleet/outputs/so-manager_elasticsearch" -XPUT -H 'kbn-xsrf: true' -H 'Content-Type: application/json' -d "$updated_policy" --retry 3 --fail 2>/dev/null; then + echo "Successfully updated so-manager_elasticsearch fleet output policy" + else + fail "Failed to update so-manager_elasticsearch fleet output policy" + fi + fi +} + update_salt_mine() { echo "Populating the mine with mine_functions for each host." set +e From d9eba3cd0eaacc3a8f4c516c2388c349ba163680 Mon Sep 17 00:00:00 2001 From: reyesj2 <94730068+reyesj2@users.noreply.github.com> Date: Thu, 18 Sep 2025 15:17:22 -0500 Subject: [PATCH 025/124] typo --- salt/elasticfleet/tools/sbin_jinja/so-elastic-fleet-setup | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/salt/elasticfleet/tools/sbin_jinja/so-elastic-fleet-setup b/salt/elasticfleet/tools/sbin_jinja/so-elastic-fleet-setup index 066edd2da..9033d7a0c 100755 --- a/salt/elasticfleet/tools/sbin_jinja/so-elastic-fleet-setup +++ b/salt/elasticfleet/tools/sbin_jinja/so-elastic-fleet-setup @@ -50,7 +50,7 @@ if [[ "$RETURN_CODE" != "0" ]]; then fi printf "\n### Create ES Token ###\n" -if ESTOKEN_RAW=$(fleet_api "service_tokens" -XPOST-H 'kbn-xsrf: true' -H 'Content-Type: application/json'); then +if ESTOKEN_RAW=$(fleet_api "service_tokens" -XPOST -H 'kbn-xsrf: true' -H 'Content-Type: application/json'); then ESTOKEN=$(echo "$ESTOKEN_RAW" | jq -r .value) else echo -e "\nFailed to create ES token..." From 336ca0dbbdccc6e7d17268047c78e97efe2512fb Mon Sep 17 00:00:00 2001 From: reyesj2 <94730068+reyesj2@users.noreply.github.com> Date: Thu, 18 Sep 2025 15:42:25 -0500 Subject: [PATCH 026/124] typos --- salt/elasticfleet/tools/sbin/so-elastic-fleet-common | 2 +- salt/elasticfleet/tools/sbin_jinja/so-elastic-fleet-setup | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/salt/elasticfleet/tools/sbin/so-elastic-fleet-common b/salt/elasticfleet/tools/sbin/so-elastic-fleet-common index 5d8781ba8..4ca5030aa 100644 --- a/salt/elasticfleet/tools/sbin/so-elastic-fleet-common +++ b/salt/elasticfleet/tools/sbin/so-elastic-fleet-common @@ -142,7 +142,7 @@ elastic_fleet_installed_packages() { } elastic_fleet_agent_policy_ids() { - if output=$(fleet_api "agent_policies"); + if output=$(fleet_api "agent_policies"); then echo "$output" | jq -r .items[].id else echo "Error: Failed to retrieve agent policies." diff --git a/salt/elasticfleet/tools/sbin_jinja/so-elastic-fleet-setup b/salt/elasticfleet/tools/sbin_jinja/so-elastic-fleet-setup index 066edd2da..9033d7a0c 100755 --- a/salt/elasticfleet/tools/sbin_jinja/so-elastic-fleet-setup +++ b/salt/elasticfleet/tools/sbin_jinja/so-elastic-fleet-setup @@ -50,7 +50,7 @@ if [[ "$RETURN_CODE" != "0" ]]; then fi printf "\n### Create ES Token ###\n" -if ESTOKEN_RAW=$(fleet_api "service_tokens" -XPOST-H 'kbn-xsrf: true' -H 'Content-Type: application/json'); then +if ESTOKEN_RAW=$(fleet_api "service_tokens" -XPOST -H 'kbn-xsrf: true' -H 'Content-Type: application/json'); then ESTOKEN=$(echo "$ESTOKEN_RAW" | jq -r .value) else echo -e "\nFailed to create ES token..." From 878a3f8962668570d0245ef3ae3cecd849c2863f Mon Sep 17 00:00:00 2001 From: reyesj2 <94730068+reyesj2@users.noreply.github.com> Date: Thu, 18 Sep 2025 16:05:34 -0500 Subject: [PATCH 027/124] flip logic to check there aren't two default policies and fleet-default-output is disabled --- salt/elasticfleet/tools/sbin_jinja/so-elastic-fleet-setup | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/salt/elasticfleet/tools/sbin_jinja/so-elastic-fleet-setup b/salt/elasticfleet/tools/sbin_jinja/so-elastic-fleet-setup index 9033d7a0c..e4ce8be68 100755 --- a/salt/elasticfleet/tools/sbin_jinja/so-elastic-fleet-setup +++ b/salt/elasticfleet/tools/sbin_jinja/so-elastic-fleet-setup @@ -75,11 +75,11 @@ printf "\n\n" status "Verifying so-manager_elasticsearch policy is configured as the current default" # Grab the fleet-default-output policy instead of so-manager_elasticsearch, because a weird state can exist where both fleet-default-output & so-elasticsearch_manager can be set as the active default output for logs / metrics. Resulting in logs not ingesting on import/eval nodes -# Check that fleet-default-output isn't configured as a default for anything if DEFAULTPOLICY=$(fleet_api "outputs/fleet-default-output"); then fleet_default=$(echo "$DEFAULTPOLICY" | jq -er '.item.is_default') fleet_default_monitoring=$(echo "$DEFAULTPOLICY" | jq -er '.item.is_default_monitoring') - if [[ ! $fleet_default ]] && [[ ! $fleet_default_monitoring ]]; then +# Check that fleet-default-output isn't configured as a default for anything ( both variables return false ) + if [[ $fleet_default ]] && [[ $fleet_default_monitoring ]]; then echo -e "\nso-manager_elasticsearch is configured as the current default policy..." else echo -e "\nVerification of so-manager_elasticsearch policy failed... The default 'fleet-default-output' output is still active..." From 29ac4f23c6166070ca91b7d1682809d20ea88883 Mon Sep 17 00:00:00 2001 From: reyesj2 <94730068+reyesj2@users.noreply.github.com> Date: Thu, 18 Sep 2025 16:26:37 -0500 Subject: [PATCH 028/124] typo --- salt/elasticfleet/tools/sbin_jinja/so-elastic-fleet-setup | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/salt/elasticfleet/tools/sbin_jinja/so-elastic-fleet-setup b/salt/elasticfleet/tools/sbin_jinja/so-elastic-fleet-setup index e4ce8be68..9fbd92bfd 100755 --- a/salt/elasticfleet/tools/sbin_jinja/so-elastic-fleet-setup +++ b/salt/elasticfleet/tools/sbin_jinja/so-elastic-fleet-setup @@ -184,7 +184,7 @@ JSON_STRING=$( jq -n \ '{"name":$NAME,"host":$URL,"is_default":true}' ) -if ! fleet_api "agent_download_sources" -XPOST-H 'kbn-xsrf: true' -H 'Content-Type: application/json' -d "$JSON_STRING"; then +if ! fleet_api "agent_download_sources" -XPOST -H 'kbn-xsrf: true' -H 'Content-Type: application/json' -d "$JSON_STRING"; then echo -e "\nFailed to update Elastic Agent artifact URL" exit 1 fi From 87281efc248d089804adc622b5414b6b5463c0be Mon Sep 17 00:00:00 2001 From: reyesj2 <94730068+reyesj2@users.noreply.github.com> Date: Thu, 18 Sep 2025 16:41:33 -0500 Subject: [PATCH 029/124] typo --- salt/elasticfleet/tools/sbin_jinja/so-elastic-fleet-setup | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/salt/elasticfleet/tools/sbin_jinja/so-elastic-fleet-setup b/salt/elasticfleet/tools/sbin_jinja/so-elastic-fleet-setup index e4ce8be68..586c68a80 100755 --- a/salt/elasticfleet/tools/sbin_jinja/so-elastic-fleet-setup +++ b/salt/elasticfleet/tools/sbin_jinja/so-elastic-fleet-setup @@ -184,7 +184,7 @@ JSON_STRING=$( jq -n \ '{"name":$NAME,"host":$URL,"is_default":true}' ) -if ! fleet_api "agent_download_sources" -XPOST-H 'kbn-xsrf: true' -H 'Content-Type: application/json' -d "$JSON_STRING"; then +if ! fleet_api "agent_download_sources" -XPOST -H 'kbn-xsrf: true' -H 'Content-Type: application/json' -d "$JSON_STRING"; then echo -e "\nFailed to update Elastic Agent artifact URL" exit 1 fi @@ -192,7 +192,7 @@ fi ### Finalization ### # Query for Enrollment Tokens for default policies -if ENDPOINTSENROLLMENTOKEN_RAW=$(fleet_api "/enrollment_api_keys" -H 'kbn-xsrf: true' -H 'Content-Type: application/json'); then +if ENDPOINTSENROLLMENTOKEN_RAW=$(fleet_api "enrollment_api_keys" -H 'kbn-xsrf: true' -H 'Content-Type: application/json'); then ENDPOINTSENROLLMENTOKEN=$(echo "$ENDPOINTSENROLLMENTOKEN_RAW" | jq .list | jq -r -c '.[] | select(.policy_id | contains("endpoints-initial")) | .api_key') else echo -e "\nFailed to query for Endpoints enrollment token" From 138849d25891c8376482f5bb60af7a63fe07f256 Mon Sep 17 00:00:00 2001 From: reyesj2 <94730068+reyesj2@users.noreply.github.com> Date: Thu, 18 Sep 2025 17:33:42 -0500 Subject: [PATCH 030/124] more typos --- .../tools/sbin_jinja/so-elastic-fleet-integration-upgrade | 6 +++--- .../sbin_jinja/so-elastic-fleet-optional-integrations-load | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/salt/elasticfleet/tools/sbin_jinja/so-elastic-fleet-integration-upgrade b/salt/elasticfleet/tools/sbin_jinja/so-elastic-fleet-integration-upgrade index 318ce45e6..f1154af1e 100644 --- a/salt/elasticfleet/tools/sbin_jinja/so-elastic-fleet-integration-upgrade +++ b/salt/elasticfleet/tools/sbin_jinja/so-elastic-fleet-integration-upgrade @@ -25,7 +25,7 @@ fi default_packages=({% for pkg in SUPPORTED_PACKAGES %}"{{ pkg }}"{% if not loop.last %} {% endif %}{% endfor %}) for AGENT_POLICY in $agent_policies; do - if ! integrations=$(elastic_fleet_integration_policy_names "$AGENT_POLICY") + if ! integrations=$(elastic_fleet_integration_policy_names "$AGENT_POLICY"); then # this script upgrades default integration packages, exit 1 and let salt handle retrying exit 1 fi @@ -53,7 +53,7 @@ for AGENT_POLICY in $agent_policies; do fi # Get integration ID - if ! INTEGRATION_ID=$(elastic_fleet_integration_id "$AGENT_POLICY" "$INTEGRATION"); + if ! INTEGRATION_ID=$(elastic_fleet_integration_id "$AGENT_POLICY" "$INTEGRATION"); then exit 1 fi @@ -63,7 +63,7 @@ for AGENT_POLICY in $agent_policies; do echo "Current $PACKAGE_NAME package version ($PACKAGE_VERSION) is not the same as the latest available package ($AVAILABLE_VERSION)..." echo "Upgrading $INTEGRATION..." echo "Starting dry run..." - if ! DRYRUN_OUTPUT=$(elastic_fleet_integration_policy_dryrun_upgrade "$INTEGRATION_ID") + if ! DRYRUN_OUTPUT=$(elastic_fleet_integration_policy_dryrun_upgrade "$INTEGRATION_ID"); then exit 1 fi DRYRUN_ERRORS=$(echo "$DRYRUN_OUTPUT" | jq .[].hasErrors) diff --git a/salt/elasticfleet/tools/sbin_jinja/so-elastic-fleet-optional-integrations-load b/salt/elasticfleet/tools/sbin_jinja/so-elastic-fleet-optional-integrations-load index 896f8adef..01777e5da 100644 --- a/salt/elasticfleet/tools/sbin_jinja/so-elastic-fleet-optional-integrations-load +++ b/salt/elasticfleet/tools/sbin_jinja/so-elastic-fleet-optional-integrations-load @@ -69,7 +69,7 @@ for AGENT_POLICY in $agent_policies; do continue fi for INTEGRATION in $integrations; do - if ! PACKAGE_NAME=$(elastic_fleet_integration_policy_package_name "$AGENT_POLICY" "$INTEGRATION") + if ! PACKAGE_NAME=$(elastic_fleet_integration_policy_package_name "$AGENT_POLICY" "$INTEGRATION"); then echo "Not adding $INTEGRATION, couldn't get package name" continue fi From c9db52433f2e8dcc29c09d1705488c6ea0e2fe45 Mon Sep 17 00:00:00 2001 From: reyesj2 <94730068+reyesj2@users.noreply.github.com> Date: Fri, 19 Sep 2025 11:08:42 -0500 Subject: [PATCH 031/124] add oom check to so-log-check Signed-off-by: reyesj2 <94730068+reyesj2@users.noreply.github.com> --- salt/common/tools/sbin/so-log-check | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/salt/common/tools/sbin/so-log-check b/salt/common/tools/sbin/so-log-check index 72ece1919..eb0b84ac4 100755 --- a/salt/common/tools/sbin/so-log-check +++ b/salt/common/tools/sbin/so-log-check @@ -268,6 +268,13 @@ for log_file in $(cat /tmp/log_check_files); do tail -n $RECENT_LOG_LINES $log_file > /tmp/log_check check_for_errors done +# Look for OOM specific errors in /var/log/messages which can lead to odd behavior / test failures +if [[ -f /var/log/messages ]]; then + status "Checking log file /var/log/messages" + if cat /var/log/messages | grep -iE 'out of memory|oom-kill'; then + RESULT=1 + fi +fi # Cleanup temp files rm -f /tmp/log_check_files From d03dd7ac2d97e0f48382f1a134eb4f273d255d26 Mon Sep 17 00:00:00 2001 From: reyesj2 <94730068+reyesj2@users.noreply.github.com> Date: Fri, 19 Sep 2025 11:32:13 -0500 Subject: [PATCH 032/124] check for oom kill only in the last 24 hours Signed-off-by: reyesj2 <94730068+reyesj2@users.noreply.github.com> --- salt/common/tools/sbin/so-log-check | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/salt/common/tools/sbin/so-log-check b/salt/common/tools/sbin/so-log-check index eb0b84ac4..e3768da46 100755 --- a/salt/common/tools/sbin/so-log-check +++ b/salt/common/tools/sbin/so-log-check @@ -271,7 +271,7 @@ done # Look for OOM specific errors in /var/log/messages which can lead to odd behavior / test failures if [[ -f /var/log/messages ]]; then status "Checking log file /var/log/messages" - if cat /var/log/messages | grep -iE 'out of memory|oom-kill'; then + if journalctl --since "24 hours ago" | grep -iE 'out of memory|oom-kill'; then RESULT=1 fi fi From ba710c994462b7cd291077396822faa10d97ddf4 Mon Sep 17 00:00:00 2001 From: Jorge Reyes <94730068+reyesj2@users.noreply.github.com> Date: Fri, 19 Sep 2025 12:26:08 -0500 Subject: [PATCH 033/124] import or eval should get updated --- salt/manager/tools/sbin/soup | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/salt/manager/tools/sbin/soup b/salt/manager/tools/sbin/soup index 46010da61..93c35ac26 100755 --- a/salt/manager/tools/sbin/soup +++ b/salt/manager/tools/sbin/soup @@ -611,7 +611,7 @@ post_to_2.4.180() { post_to_2.4.190() { # Only need to update import / eval nodes - if [[ "$MINIONID" =~ "_import" ]] || [[ ! "$MINIONID" =~ "_eval" ]]; then + if [[ "$MINIONID" =~ "_import" ]] || [[ "$MINIONID" =~ "_eval" ]]; then update_import_fleet_output fi From f066baf6ba4c28137579153568a0ae8574c66a6b Mon Sep 17 00:00:00 2001 From: reyesj2 <94730068+reyesj2@users.noreply.github.com> Date: Fri, 19 Sep 2025 12:54:04 -0500 Subject: [PATCH 034/124] use only the characters up to the last seen '_' --- salt/manager/tools/sbin/soup | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/salt/manager/tools/sbin/soup b/salt/manager/tools/sbin/soup index 46010da61..81e5b9c18 100755 --- a/salt/manager/tools/sbin/soup +++ b/salt/manager/tools/sbin/soup @@ -611,7 +611,7 @@ post_to_2.4.180() { post_to_2.4.190() { # Only need to update import / eval nodes - if [[ "$MINIONID" =~ "_import" ]] || [[ ! "$MINIONID" =~ "_eval" ]]; then + if [[ "${MINIONID##*_}" == "import" ]] || [[ "${MINIONID##*_}" == "eval" ]]; then update_import_fleet_output fi @@ -1429,7 +1429,7 @@ main() { if [ "$is_hotfix" == "true" ]; then echo "Applying $HOTFIXVERSION hotfix" # since we don't run the backup.config_backup state on import we wont snapshot previous version states and pillars - if [[ ! "$MINIONID" =~ "_import" ]]; then + if [[ ! "${MINIONID##*_}" == "import" ]]; then backup_old_states_pillars fi copy_new_files @@ -1492,7 +1492,7 @@ main() { fi # since we don't run the backup.config_backup state on import we wont snapshot previous version states and pillars - if [[ ! "$MINIONID" =~ "_import" ]]; then + if [[ ! "${MINIONID##*_}" == "import" ]]; then echo "" echo "Creating snapshots of default and local Salt states and pillars and saving to /nsm/backup/" backup_old_states_pillars From c92dc580a2b119850b445f289b913f78a525746e Mon Sep 17 00:00:00 2001 From: reyesj2 <94730068+reyesj2@users.noreply.github.com> Date: Fri, 19 Sep 2025 13:17:52 -0500 Subject: [PATCH 035/124] centralize MINION_ROLE lookup_role --- salt/common/tools/sbin/so-common | 3 +-- salt/manager/tools/sbin/soup | 7 ++++--- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/salt/common/tools/sbin/so-common b/salt/common/tools/sbin/so-common index 203b54cd0..365852e63 100755 --- a/salt/common/tools/sbin/so-common +++ b/salt/common/tools/sbin/so-common @@ -441,8 +441,7 @@ lookup_grain() { lookup_role() { id=$(lookup_grain id) - pieces=($(echo $id | tr '_' ' ')) - echo ${pieces[1]} + echo "${id##*_}" } is_feature_enabled() { diff --git a/salt/manager/tools/sbin/soup b/salt/manager/tools/sbin/soup index 81e5b9c18..f324924cb 100755 --- a/salt/manager/tools/sbin/soup +++ b/salt/manager/tools/sbin/soup @@ -611,7 +611,7 @@ post_to_2.4.180() { post_to_2.4.190() { # Only need to update import / eval nodes - if [[ "${MINIONID##*_}" == "import" ]] || [[ "${MINIONID##*_}" == "eval" ]]; then + if [[ "$MINION_ROLE" == "import" ]] || [[ "$MINION_ROLE" == "eval" ]]; then update_import_fleet_output fi @@ -1387,6 +1387,7 @@ main() { fi set_minionid + MINION_ROLE=$(lookup_role) echo "Found that Security Onion $INSTALLEDVERSION is currently installed." echo "" if [[ $is_airgap -eq 0 ]]; then @@ -1429,7 +1430,7 @@ main() { if [ "$is_hotfix" == "true" ]; then echo "Applying $HOTFIXVERSION hotfix" # since we don't run the backup.config_backup state on import we wont snapshot previous version states and pillars - if [[ ! "${MINIONID##*_}" == "import" ]]; then + if [[ ! "$MINION_ROLE" == "import" ]]; then backup_old_states_pillars fi copy_new_files @@ -1492,7 +1493,7 @@ main() { fi # since we don't run the backup.config_backup state on import we wont snapshot previous version states and pillars - if [[ ! "${MINIONID##*_}" == "import" ]]; then + if [[ ! "$MINION_ROLE" == "import" ]]; then echo "" echo "Creating snapshots of default and local Salt states and pillars and saving to /nsm/backup/" backup_old_states_pillars From 4599b95ae7d5b53d1ab047cc0540a9c268df0c7f Mon Sep 17 00:00:00 2001 From: Josh Patterson Date: Mon, 22 Sep 2025 16:37:16 -0400 Subject: [PATCH 036/124] separate salt-minion service file --- salt/libvirt/bridge.sls | 25 ++---- salt/salt/mine_functions.sls | 2 +- salt/salt/minion.sls | 124 ------------------------------ salt/salt/minion/service_file.sls | 26 +++++++ 4 files changed, 35 insertions(+), 142 deletions(-) delete mode 100644 salt/salt/minion.sls create mode 100644 salt/salt/minion/service_file.sls diff --git a/salt/libvirt/bridge.sls b/salt/libvirt/bridge.sls index b8f720993..ed405584e 100644 --- a/salt/libvirt/bridge.sls +++ b/salt/libvirt/bridge.sls @@ -4,6 +4,9 @@ # Elastic License 2.0. # We do not import GLOBALS in this state because it is called during setup +include: + - salt.mine_functions + - salt.minion.service_file down_original_mgmt_interface: cmd.run: @@ -28,29 +31,17 @@ wait_for_br0_ip: - timeout: 95 - onchanges: - cmd: down_original_mgmt_interface + - onchanges_in: + - file: salt_minion_service_unit_file + - file: mine_functions -{% if grains.role == 'so-hypervisor' %} - -update_mine_functions: - file.managed: - - name: /etc/salt/minion.d/mine_functions.conf - - contents: | - mine_interval: 25 - mine_functions: - network.ip_addrs: - - interface: br0 - {%- if role in ['so-eval','so-import','so-manager','so-managerhype','so-managersearch','so-standalone'] %} - x509.get_pem_entries: - - glob_path: '/etc/pki/ca.crt' - {% endif %} - - onchanges: - - cmd: wait_for_br0_ip +{% if grains.role in ['so-hypervisor', 'so-managerhype'] %} restart_salt_minion_service: service.running: - name: salt-minion - enable: True - listen: - - file: update_mine_functions + - file: mine_functions {% endif %} diff --git a/salt/salt/mine_functions.sls b/salt/salt/mine_functions.sls index ed786e997..ae3df1ce9 100644 --- a/salt/salt/mine_functions.sls +++ b/salt/salt/mine_functions.sls @@ -3,7 +3,7 @@ # https://securityonion.net/license; you may not use this file except in compliance with the # Elastic License 2.0. -# this state was seperated from salt.minion state since it is called during setup +# this state was separated from salt.minion state since it is called during setup # GLOBALS are imported in the salt.minion state and that is not available at that point in setup # this state is included in the salt.minion state diff --git a/salt/salt/minion.sls b/salt/salt/minion.sls deleted file mode 100644 index b85fad1c0..000000000 --- a/salt/salt/minion.sls +++ /dev/null @@ -1,124 +0,0 @@ -{% from 'vars/globals.map.jinja' import GLOBALS %} -{% from 'salt/map.jinja' import UPGRADECOMMAND with context %} -{% from 'salt/map.jinja' import SALTVERSION %} -{% from 'salt/map.jinja' import INSTALLEDSALTVERSION %} -{% from 'salt/map.jinja' import SALTPACKAGES %} -{% from 'salt/map.jinja' import SYSTEMD_UNIT_FILE %} -{% import_yaml 'salt/minion.defaults.yaml' as SALTMINION %} - -include: - - salt.python_modules - - salt.patch.x509_v2 - - salt - - systemd.reload - - repo.client - - salt.mine_functions -{% if GLOBALS.role in GLOBALS.manager_roles %} - - ca -{% endif %} - -{% if INSTALLEDSALTVERSION|string != SALTVERSION|string %} - -{# this is added in 2.4.120 to remove salt repo files pointing to saltproject.io to accomodate the move to broadcom and new bootstrap-salt script #} -{% if salt['pkg.version_cmp'](GLOBALS.so_version, '2.4.120') == -1 %} -{% set saltrepofile = '/etc/yum.repos.d/salt.repo' %} -{% if grains.os_family == 'Debian' %} -{% set saltrepofile = '/etc/apt/sources.list.d/salt.list' %} -{% endif %} -remove_saltproject_io_repo_minion: - file.absent: - - name: {{ saltrepofile }} -{% endif %} - -unhold_salt_packages: - pkg.unheld: - - pkgs: -{% for package in SALTPACKAGES %} - - {{ package }} -{% endfor %} - -install_salt_minion: - cmd.run: - - name: /bin/sh -c '{{ UPGRADECOMMAND }}' - -# minion service is in failed state after upgrade. this command will start it after the state run for the upgrade completes -start_minion_post_upgrade: - cmd.run: - - name: | - exec 0>&- # close stdin - exec 1>&- # close stdout - exec 2>&- # close stderr - nohup /bin/sh -c 'sleep 30; systemctl start salt-minion' & - - require: - - cmd: install_salt_minion - - watch: - - cmd: install_salt_minion - - order: last - -{% endif %} - -{% if INSTALLEDSALTVERSION|string == SALTVERSION|string %} - -{% for package in SALTPACKAGES %} -# only hold the package if it is already installed -{% if salt['pkg.version'](package) %} -hold_{{ package }}_package: - pkg.held: - - name: {{ package }} - - version: {{SALTVERSION}}-0.* -{% endif %} -{% endfor %} - -remove_error_log_level_logfile: - file.line: - - name: /etc/salt/minion - - match: "log_level_logfile: error" - - mode: delete - -remove_error_log_level: - file.line: - - name: /etc/salt/minion - - match: "log_level: error" - - mode: delete - -set_log_levels: - file.append: - - name: /etc/salt/minion - - text: - - "log_level: info" - - "log_level_logfile: info" - -enable_startup_states: - file.uncomment: - - name: /etc/salt/minion - - regex: '^startup_states: highstate$' - - unless: pgrep so-setup - -# prior to 2.4.30 this managed file would restart the salt-minion service when updated -# since this file is currently only adding a delay service start -# it is not required to restart the service -salt_minion_service_unit_file: - file.managed: - - name: {{ SYSTEMD_UNIT_FILE }} - - source: salt://salt/service/salt-minion.service.jinja - - template: jinja - - onchanges_in: - - module: systemd_reload - -{% endif %} - -# this has to be outside the if statement above since there are _in calls to this state -salt_minion_service: - service.running: - - name: salt-minion - - enable: True - - onlyif: test "{{INSTALLEDSALTVERSION}}" == "{{SALTVERSION}}" - - listen: - - file: mine_functions -{% if INSTALLEDSALTVERSION|string == SALTVERSION|string %} - - file: set_log_levels -{% endif %} -{% if GLOBALS.role in GLOBALS.manager_roles %} - - file: /etc/salt/minion.d/signing_policies.conf -{% endif %} - - order: last diff --git a/salt/salt/minion/service_file.sls b/salt/salt/minion/service_file.sls new file mode 100644 index 000000000..8aded2d60 --- /dev/null +++ b/salt/salt/minion/service_file.sls @@ -0,0 +1,26 @@ +# Copyright Security Onion Solutions LLC and/or licensed to Security Onion Solutions LLC under one +# or more contributor license agreements. Licensed under the Elastic License 2.0 as shown at +# https://securityonion.net/license; you may not use this file except in compliance with the +# Elastic License 2.0. + +{% from 'salt/map.jinja' import SALTVERSION %} +{% from 'salt/map.jinja' import INSTALLEDSALTVERSION %} +{% from 'salt/map.jinja' import SYSTEMD_UNIT_FILE %} + +include: + - systemd.reload + +{% if INSTALLEDSALTVERSION|string == SALTVERSION|string %} + +# prior to 2.4.30 this managed file would restart the salt-minion service when updated +# since this file is currently only adding a delay service start +# it is not required to restart the service +salt_minion_service_unit_file: + file.managed: + - name: {{ SYSTEMD_UNIT_FILE }} + - source: salt://salt/service/salt-minion.service.jinja + - template: jinja + - onchanges_in: + - module: systemd_reload + +{% endif %} From 14ddbd32add8dab355c253a9aa3f31544979f5d6 Mon Sep 17 00:00:00 2001 From: Josh Patterson Date: Mon, 22 Sep 2025 16:38:40 -0400 Subject: [PATCH 037/124] salt-minion service file changes for hypervisor and managerhype --- salt/libvirt/bridge.sls | 4 -- salt/salt/minion/init.sls | 117 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 117 insertions(+), 4 deletions(-) create mode 100644 salt/salt/minion/init.sls diff --git a/salt/libvirt/bridge.sls b/salt/libvirt/bridge.sls index ed405584e..bd76f8ef4 100644 --- a/salt/libvirt/bridge.sls +++ b/salt/libvirt/bridge.sls @@ -35,13 +35,9 @@ wait_for_br0_ip: - file: salt_minion_service_unit_file - file: mine_functions -{% if grains.role in ['so-hypervisor', 'so-managerhype'] %} - restart_salt_minion_service: service.running: - name: salt-minion - enable: True - listen: - file: mine_functions - -{% endif %} diff --git a/salt/salt/minion/init.sls b/salt/salt/minion/init.sls new file mode 100644 index 000000000..374e6954c --- /dev/null +++ b/salt/salt/minion/init.sls @@ -0,0 +1,117 @@ +# Copyright Security Onion Solutions LLC and/or licensed to Security Onion Solutions LLC under one +# or more contributor license agreements. Licensed under the Elastic License 2.0 as shown at +# https://securityonion.net/license; you may not use this file except in compliance with the +# Elastic License 2.0. + +{% from 'vars/globals.map.jinja' import GLOBALS %} +{% from 'salt/map.jinja' import UPGRADECOMMAND with context %} +{% from 'salt/map.jinja' import SALTVERSION %} +{% from 'salt/map.jinja' import INSTALLEDSALTVERSION %} +{% from 'salt/map.jinja' import SALTPACKAGES %} +{% import_yaml 'salt/minion.defaults.yaml' as SALTMINION %} + +include: + - salt.python_modules + - salt.patch.x509_v2 + - salt + - repo.client + - salt.mine_functions + - salt.minion.service_file +{% if GLOBALS.role in GLOBALS.manager_roles %} + - ca +{% endif %} + +{% if INSTALLEDSALTVERSION|string != SALTVERSION|string %} + +{# this is added in 2.4.120 to remove salt repo files pointing to saltproject.io to accomodate the move to broadcom and new bootstrap-salt script #} +{% if salt['pkg.version_cmp'](GLOBALS.so_version, '2.4.120') == -1 %} +{% set saltrepofile = '/etc/yum.repos.d/salt.repo' %} +{% if grains.os_family == 'Debian' %} +{% set saltrepofile = '/etc/apt/sources.list.d/salt.list' %} +{% endif %} +remove_saltproject_io_repo_minion: + file.absent: + - name: {{ saltrepofile }} +{% endif %} + +unhold_salt_packages: + pkg.unheld: + - pkgs: +{% for package in SALTPACKAGES %} + - {{ package }} +{% endfor %} + +install_salt_minion: + cmd.run: + - name: /bin/sh -c '{{ UPGRADECOMMAND }}' + +# minion service is in failed state after upgrade. this command will start it after the state run for the upgrade completes +start_minion_post_upgrade: + cmd.run: + - name: | + exec 0>&- # close stdin + exec 1>&- # close stdout + exec 2>&- # close stderr + nohup /bin/sh -c 'sleep 30; systemctl start salt-minion' & + - require: + - cmd: install_salt_minion + - watch: + - cmd: install_salt_minion + - order: last + +{% endif %} + +{% if INSTALLEDSALTVERSION|string == SALTVERSION|string %} + +{% for package in SALTPACKAGES %} +# only hold the package if it is already installed +{% if salt['pkg.version'](package) %} +hold_{{ package }}_package: + pkg.held: + - name: {{ package }} + - version: {{SALTVERSION}}-0.* +{% endif %} +{% endfor %} + +remove_error_log_level_logfile: + file.line: + - name: /etc/salt/minion + - match: "log_level_logfile: error" + - mode: delete + +remove_error_log_level: + file.line: + - name: /etc/salt/minion + - match: "log_level: error" + - mode: delete + +set_log_levels: + file.append: + - name: /etc/salt/minion + - text: + - "log_level: info" + - "log_level_logfile: info" + +enable_startup_states: + file.uncomment: + - name: /etc/salt/minion + - regex: '^startup_states: highstate$' + - unless: pgrep so-setup + +{% endif %} + +# this has to be outside the if statement above since there are _in calls to this state +salt_minion_service: + service.running: + - name: salt-minion + - enable: True + - onlyif: test "{{INSTALLEDSALTVERSION}}" == "{{SALTVERSION}}" + - listen: + - file: mine_functions +{% if INSTALLEDSALTVERSION|string == SALTVERSION|string %} + - file: set_log_levels +{% endif %} +{% if GLOBALS.role in GLOBALS.manager_roles %} + - file: /etc/salt/minion.d/signing_policies.conf +{% endif %} + - order: last From 4587301cca382501eae464943c72b1dacde84170 Mon Sep 17 00:00:00 2001 From: Josh Patterson Date: Tue, 23 Sep 2025 15:56:00 -0400 Subject: [PATCH 038/124] only update mine for managerhype during setup --- salt/libvirt/bridge.sls | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/salt/libvirt/bridge.sls b/salt/libvirt/bridge.sls index bd76f8ef4..096616e2e 100644 --- a/salt/libvirt/bridge.sls +++ b/salt/libvirt/bridge.sls @@ -5,7 +5,11 @@ # We do not import GLOBALS in this state because it is called during setup include: +{# If we update the mine functions for the so-hypervisor node, then it will not be able to update the mine when the state run. #} +{# This state is called from so-functions during setup and the so-hypervisor node would not have an accepted minion key and therefore couldn't update mine #} +{% if grains.role == 'so-managerhype '%} - salt.mine_functions +{% endif %} - salt.minion.service_file down_original_mgmt_interface: @@ -33,11 +37,15 @@ wait_for_br0_ip: - cmd: down_original_mgmt_interface - onchanges_in: - file: salt_minion_service_unit_file +{% if grains.role == 'so-managerhype '%} - file: mine_functions +{% endif %} +{% if grains.role == 'so-managerhype '%} restart_salt_minion_service: service.running: - name: salt-minion - enable: True - listen: - file: mine_functions +{% endif %} From 5a67b89a808e89167014aa6fc0d1c03e8a8b409d Mon Sep 17 00:00:00 2001 From: Josh Patterson Date: Wed, 24 Sep 2025 09:49:02 -0400 Subject: [PATCH 039/124] Update so-saltstack-update add -v -vv and test / dry run mode --- salt/manager/tools/sbin/so-saltstack-update | 160 +++++++++++++++++++- 1 file changed, 153 insertions(+), 7 deletions(-) diff --git a/salt/manager/tools/sbin/so-saltstack-update b/salt/manager/tools/sbin/so-saltstack-update index 4be8f095c..2f385ab89 100755 --- a/salt/manager/tools/sbin/so-saltstack-update +++ b/salt/manager/tools/sbin/so-saltstack-update @@ -5,10 +5,12 @@ # https://securityonion.net/license; you may not use this file except in compliance with the # Elastic License 2.0. - default_salt_dir=/opt/so/saltstack/default -clone_to_tmp() { +VERBOSE=0 +VERY_VERBOSE=0 +TEST_MODE=0 +clone_to_tmp() { # TODO Need to add a air gap option # Make a temp location for the files mkdir /tmp/sogh @@ -16,19 +18,110 @@ clone_to_tmp() { #git clone -b dev https://github.com/Security-Onion-Solutions/securityonion.git git clone https://github.com/Security-Onion-Solutions/securityonion.git cd /tmp +} +show_file_changes() { + local source_dir="$1" + local dest_dir="$2" + local dir_type="$3" # "salt" or "pillar" + + if [ $VERBOSE -eq 0 ]; then + return + fi + + echo "=== Changes for $dir_type directory ===" + + # Find all files in source directory + if [ -d "$source_dir" ]; then + find "$source_dir" -type f | while read -r source_file; do + # Get relative path + rel_path="${source_file#$source_dir/}" + dest_file="$dest_dir/$rel_path" + + if [ ! -f "$dest_file" ]; then + echo "ADDED: $dest_file" + if [ $VERY_VERBOSE -eq 1 ]; then + echo " (New file - showing first 20 lines)" + head -n 20 "$source_file" | sed 's/^/ + /' + echo "" + fi + elif ! cmp -s "$source_file" "$dest_file"; then + echo "MODIFIED: $dest_file" + if [ $VERY_VERBOSE -eq 1 ]; then + echo " (Changes:)" + diff -u "$dest_file" "$source_file" | sed 's/^/ /' + echo "" + fi + fi + done + fi + + # Find deleted files (exist in dest but not in source) + if [ -d "$dest_dir" ]; then + find "$dest_dir" -type f | while read -r dest_file; do + # Get relative path + rel_path="${dest_file#$dest_dir/}" + source_file="$source_dir/$rel_path" + + if [ ! -f "$source_file" ]; then + echo "DELETED: $dest_file" + if [ $VERY_VERBOSE -eq 1 ]; then + echo " (File was deleted)" + echo "" + fi + fi + done + fi + + echo "" } copy_new_files() { - # Copy new files over to the salt dir cd /tmp/sogh/securityonion git checkout $BRANCH VERSION=$(cat VERSION) + + if [ $TEST_MODE -eq 1 ]; then + echo "=== TEST MODE: Showing what would change without making changes ===" + echo "Branch: $BRANCH" + echo "Version: $VERSION" + echo "" + fi + + # Show changes before copying if verbose mode is enabled OR if in test mode + if [ $VERBOSE -eq 1 ] || [ $TEST_MODE -eq 1 ]; then + if [ $TEST_MODE -eq 1 ]; then + # In test mode, force at least basic verbose output + local old_verbose=$VERBOSE + if [ $VERBOSE -eq 0 ]; then + VERBOSE=1 + fi + fi + + echo "Analyzing file changes..." + show_file_changes "$(pwd)/salt" "$default_salt_dir/salt" "salt" + show_file_changes "$(pwd)/pillar" "$default_salt_dir/pillar" "pillar" + + if [ $TEST_MODE -eq 1 ] && [ $old_verbose -eq 0 ]; then + # Restore original verbose setting + VERBOSE=$old_verbose + fi + fi + + # If in test mode, don't copy files + if [ $TEST_MODE -eq 1 ]; then + echo "=== TEST MODE: No files were modified ===" + echo "To apply these changes, run without --test option" + rm -rf /tmp/sogh + return + fi + # We need to overwrite if there is a repo file if [ -d /opt/so/repo ]; then tar -czf /opt/so/repo/"$VERSION".tar.gz -C "$(pwd)/.." . fi + rsync -a salt $default_salt_dir/ rsync -a pillar $default_salt_dir/ chown -R socore:socore $default_salt_dir/salt @@ -45,11 +138,64 @@ got_root(){ fi } -got_root -if [ $# -ne 1 ] ; then +show_usage() { + echo "Usage: $0 [-v] [-vv] [--test] [branch]" + echo " -v Show verbose output (files changed/added/deleted)" + echo " -vv Show very verbose output (includes file diffs)" + echo " --test Test mode - show what would change without making changes" + echo " branch Git branch to checkout (default: 2.4/main)" + echo "" + echo "Examples:" + echo " $0 # Normal operation" + echo " $0 -v # Show which files change" + echo " $0 -vv # Show files and their diffs" + echo " $0 --test # See what would change (dry run)" + echo " $0 --test -vv # Test mode with detailed diffs" + echo " $0 -v dev-branch # Use specific branch with verbose output" + exit 1 +} + +# Parse command line arguments +while [[ $# -gt 0 ]]; do + case $1 in + -v) + VERBOSE=1 + shift + ;; + -vv) + VERBOSE=1 + VERY_VERBOSE=1 + shift + ;; + --test) + TEST_MODE=1 + shift + ;; + -h|--help) + show_usage + ;; + -*) + echo "Unknown option $1" + show_usage + ;; + *) + # This should be the branch name + if [ -z "$BRANCH" ]; then + BRANCH="$1" + else + echo "Too many arguments" + show_usage + fi + shift + ;; + esac +done + +# Set default branch if not provided +if [ -z "$BRANCH" ]; then BRANCH=2.4/main -else - BRANCH=$1 fi + +got_root clone_to_tmp copy_new_files From a3401aad117c51e970718eb4874b0978b5e0cc99 Mon Sep 17 00:00:00 2001 From: Jorge Reyes <94730068+reyesj2@users.noreply.github.com> Date: Wed, 24 Sep 2025 08:56:40 -0500 Subject: [PATCH 040/124] typo --- salt/manager/tools/sbin/soup | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/salt/manager/tools/sbin/soup b/salt/manager/tools/sbin/soup index f324924cb..e49be133f 100755 --- a/salt/manager/tools/sbin/soup +++ b/salt/manager/tools/sbin/soup @@ -1162,7 +1162,7 @@ update_import_fleet_output() { if output=$(curl -sK /opt/so/conf/elasticsearch/curl.config -L "localhost:5601/api/fleet/outputs/so-manager_elasticsearch" --retry 3 --fail 2>/dev/null); then # Update the current config of so-manager_elasticsearch output policy in place (leaving any customizations like having changed the preset value from 'balanced' to 'performance') CAFINGERPRINT=$(openssl x509 -in /etc/pki/tls/certs/intca.crt -outform DER | sha256sum | cut -d' ' -f1 | tr '[:lower:]' '[:upper:]') - updated_policy=$(jq --args CAFINGERPRINT "$CAFINGERPRINT" '.item | (del(.id) | .ca_trusted_fingerprint = $CAFINGERPRINT)' <<< "$output") + updated_policy=$(jq --arg CAFINGERPRINT "$CAFINGERPRINT" '.item | (del(.id) | .ca_trusted_fingerprint = $CAFINGERPRINT)' <<< "$output") if curl -sK /opt/so/conf/elasticsearch/curl.config -L "localhost:5601/api/fleet/outputs/so-manager_elasticsearch" -XPUT -H 'kbn-xsrf: true' -H 'Content-Type: application/json' -d "$updated_policy" --retry 3 --fail 2>/dev/null; then echo "Successfully updated so-manager_elasticsearch fleet output policy" else From 3a87af805fffc3e508031b935b64b16317c6caa4 Mon Sep 17 00:00:00 2001 From: Josh Patterson Date: Wed, 24 Sep 2025 15:19:46 -0400 Subject: [PATCH 041/124] update service file, use salt.minion state to update mine_functions --- salt/libvirt/bridge.sls | 12 +----------- setup/so-functions | 6 ++---- 2 files changed, 3 insertions(+), 15 deletions(-) diff --git a/salt/libvirt/bridge.sls b/salt/libvirt/bridge.sls index 096616e2e..9e8c59f90 100644 --- a/salt/libvirt/bridge.sls +++ b/salt/libvirt/bridge.sls @@ -5,11 +5,6 @@ # We do not import GLOBALS in this state because it is called during setup include: -{# If we update the mine functions for the so-hypervisor node, then it will not be able to update the mine when the state run. #} -{# This state is called from so-functions during setup and the so-hypervisor node would not have an accepted minion key and therefore couldn't update mine #} -{% if grains.role == 'so-managerhype '%} - - salt.mine_functions -{% endif %} - salt.minion.service_file down_original_mgmt_interface: @@ -37,15 +32,10 @@ wait_for_br0_ip: - cmd: down_original_mgmt_interface - onchanges_in: - file: salt_minion_service_unit_file -{% if grains.role == 'so-managerhype '%} - - file: mine_functions -{% endif %} -{% if grains.role == 'so-managerhype '%} restart_salt_minion_service: service.running: - name: salt-minion - enable: True - listen: - - file: mine_functions -{% endif %} + - file: salt_minion_service_unit_file diff --git a/setup/so-functions b/setup/so-functions index 9ab11a904..4434dc908 100755 --- a/setup/so-functions +++ b/setup/so-functions @@ -1194,10 +1194,8 @@ hypervisor_local_states() { info "Running libvirt states for hypervisor" logCmd "salt-call state.apply libvirt.64962 --local --file-root=../salt/ -l info queue=True" info "Setting up bridge for $MNIC" - salt-call state.apply libvirt.bridge --local --file-root=../salt/ -l info pillar='{"host": {"mainint": "'$MNIC'"}}' queue=True - if [ $is_managerhype ]; then - logCmd "salt-call state.apply salt.minion queue=True" - fi + salt-call state.apply libvirt.bridge --local --file-root=../salt/ -l info pillar='{"host": {"mainint": "'$MNIC'"}}' queue=True + logCmd "salt-call state.apply salt.minion queue=True" fi } From c836dd2acd43aebb95c9fe52f06880072dbeb700 Mon Sep 17 00:00:00 2001 From: Josh Patterson Date: Wed, 24 Sep 2025 16:50:29 -0400 Subject: [PATCH 042/124] set interface for network.ip_addrs for hypervisors --- salt/libvirt/bridge.sls | 3 +++ salt/salt/map.jinja | 2 +- setup/so-functions | 4 +++- 3 files changed, 7 insertions(+), 2 deletions(-) diff --git a/salt/libvirt/bridge.sls b/salt/libvirt/bridge.sls index 9e8c59f90..cc28bd8b7 100644 --- a/salt/libvirt/bridge.sls +++ b/salt/libvirt/bridge.sls @@ -6,6 +6,7 @@ # We do not import GLOBALS in this state because it is called during setup include: - salt.minion.service_file + - salt.mine_functions down_original_mgmt_interface: cmd.run: @@ -32,6 +33,7 @@ wait_for_br0_ip: - cmd: down_original_mgmt_interface - onchanges_in: - file: salt_minion_service_unit_file + - file: mine_functions restart_salt_minion_service: service.running: @@ -39,3 +41,4 @@ restart_salt_minion_service: - enable: True - listen: - file: salt_minion_service_unit_file + - file: mine_functions diff --git a/salt/salt/map.jinja b/salt/salt/map.jinja index 1e3b200f4..81baa100a 100644 --- a/salt/salt/map.jinja +++ b/salt/salt/map.jinja @@ -4,7 +4,7 @@ Elastic License 2.0. #} {% set role = salt['grains.get']('role', '') %} -{% if role in ['so-hypervisor','so-managerhype'] and salt['network.ip_addrs']('br0')|length > 0 %} +{% if role in ['so-hypervisor','so-managerhype'] %} {% set interface = 'br0' %} {% else %} {% set interface = pillar.host.mainint %} diff --git a/setup/so-functions b/setup/so-functions index 4434dc908..00f2e46c1 100755 --- a/setup/so-functions +++ b/setup/so-functions @@ -1195,7 +1195,9 @@ hypervisor_local_states() { logCmd "salt-call state.apply libvirt.64962 --local --file-root=../salt/ -l info queue=True" info "Setting up bridge for $MNIC" salt-call state.apply libvirt.bridge --local --file-root=../salt/ -l info pillar='{"host": {"mainint": "'$MNIC'"}}' queue=True - logCmd "salt-call state.apply salt.minion queue=True" + if [ $is_managerhype ]; then + logCmd "salt-call state.apply salt.minion queue=True" + fi fi } From 23e12811a1d76305db5841a191a2f9e49e77c1c3 Mon Sep 17 00:00:00 2001 From: Jorge Reyes <94730068+reyesj2@users.noreply.github.com> Date: Thu, 25 Sep 2025 09:51:32 -0500 Subject: [PATCH 043/124] make sure fleet-default-output is not set as either default output policy --- salt/elasticfleet/tools/sbin_jinja/so-elastic-fleet-setup | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/salt/elasticfleet/tools/sbin_jinja/so-elastic-fleet-setup b/salt/elasticfleet/tools/sbin_jinja/so-elastic-fleet-setup index 586c68a80..ee74d1056 100755 --- a/salt/elasticfleet/tools/sbin_jinja/so-elastic-fleet-setup +++ b/salt/elasticfleet/tools/sbin_jinja/so-elastic-fleet-setup @@ -79,7 +79,7 @@ if DEFAULTPOLICY=$(fleet_api "outputs/fleet-default-output"); then fleet_default=$(echo "$DEFAULTPOLICY" | jq -er '.item.is_default') fleet_default_monitoring=$(echo "$DEFAULTPOLICY" | jq -er '.item.is_default_monitoring') # Check that fleet-default-output isn't configured as a default for anything ( both variables return false ) - if [[ $fleet_default ]] && [[ $fleet_default_monitoring ]]; then + if [[ ! $fleet_default ]] && [[ ! $fleet_default_monitoring ]]; then echo -e "\nso-manager_elasticsearch is configured as the current default policy..." else echo -e "\nVerification of so-manager_elasticsearch policy failed... The default 'fleet-default-output' output is still active..." From d81d9a0722f2dbcc247e6f8882e94ea93a79eda8 Mon Sep 17 00:00:00 2001 From: Matthew Wright Date: Thu, 25 Sep 2025 14:45:06 -0400 Subject: [PATCH 044/124] small tweak to investigation prompt --- salt/soc/defaults.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/salt/soc/defaults.yaml b/salt/soc/defaults.yaml index 58b3a3827..6caeddbe3 100644 --- a/salt/soc/defaults.yaml +++ b/salt/soc/defaults.yaml @@ -2545,7 +2545,7 @@ soc: level: 'high' # info | low | medium | high | critical assistant: enabled: false - investigationPrompt: Investigate Alert ID {socid} + investigationPrompt: Investigate Alert ID {socId} contextLimitSmall: 200000 contextLimitLarge: 1000000 thresholdColorRatioLow: 0.5 From 3a2ceb0b6fb995d7e37d1e320268dbdf7eaf9a28 Mon Sep 17 00:00:00 2001 From: Jason Ertel Date: Thu, 25 Sep 2025 15:40:00 -0400 Subject: [PATCH 045/124] retry kratos pulls since this is the first image to install during setup --- salt/kratos/enabled.sls | 3 +++ 1 file changed, 3 insertions(+) diff --git a/salt/kratos/enabled.sls b/salt/kratos/enabled.sls index 31097ccf4..f0345edec 100644 --- a/salt/kratos/enabled.sls +++ b/salt/kratos/enabled.sls @@ -54,6 +54,9 @@ so-kratos: - file: kratosconfig - file: kratoslogdir - file: kratosdir + - retry: + attempts: 10 + interval: 10 delete_so-kratos_so-status.disabled: file.uncomment: From 1fb558cc7764d2952d9cfa3ef5f81eeabe8c52b3 Mon Sep 17 00:00:00 2001 From: Josh Patterson Date: Thu, 25 Sep 2025 16:06:25 -0400 Subject: [PATCH 046/124] managerhype br0 setup --- salt/salt/map.jinja | 5 ++++- setup/so-functions | 17 ++++++++++++----- setup/so-setup | 2 +- 3 files changed, 17 insertions(+), 7 deletions(-) diff --git a/salt/salt/map.jinja b/salt/salt/map.jinja index 81baa100a..62b7f1b18 100644 --- a/salt/salt/map.jinja +++ b/salt/salt/map.jinja @@ -4,7 +4,10 @@ Elastic License 2.0. #} {% set role = salt['grains.get']('role', '') %} -{% if role in ['so-hypervisor','so-managerhype'] %} +{# We are using usebr0 mostly for setup of the so-managerhype node and controlling when we use br0 vs the physical interface #} +{% set usebr0 = salt['pillar.get']('usebr0', True) %} + +{% if role in ['so-hypervisor','so-managerhype'] and usebr0 %} {% set interface = 'br0' %} {% else %} {% set interface = pillar.host.mainint %} diff --git a/setup/so-functions b/setup/so-functions index 00f2e46c1..0d7890d17 100755 --- a/setup/so-functions +++ b/setup/so-functions @@ -541,8 +541,15 @@ configure_minion() { "log_file: /opt/so/log/salt/minion"\ "#startup_states: highstate" >> "$minion_config" - info "Running: salt-call state.apply salt.mine_functions --local --file-root=../salt/ -l info pillar='{"host": {"mainint": "$MNIC"}}'" - salt-call state.apply salt.mine_functions --local --file-root=../salt/ -l info pillar="{'host': {'mainint': $MNIC}}" + # At the time the so-managerhype node does not yet have the bridge configured. + # The so-hypervisor node doesn't either, but it doesn't cause issues here. + local usebr0=false + if [ "$minion_type" == 'hypervisor' ]; then + usebr0=true + fi + local pillar_json="{\"host\": {\"mainint\": \"$MNIC\"}, \"usebr0\": $usebr0}" + info "Running: salt-call state.apply salt.mine_functions --local --file-root=../salt/ -l info pillar='$pillar_json'" + salt-call state.apply salt.mine_functions --local --file-root=../salt/ -l info pillar="$pillar_json" { logCmd "systemctl enable salt-minion"; @@ -1195,9 +1202,9 @@ hypervisor_local_states() { logCmd "salt-call state.apply libvirt.64962 --local --file-root=../salt/ -l info queue=True" info "Setting up bridge for $MNIC" salt-call state.apply libvirt.bridge --local --file-root=../salt/ -l info pillar='{"host": {"mainint": "'$MNIC'"}}' queue=True - if [ $is_managerhype ]; then - logCmd "salt-call state.apply salt.minion queue=True" - fi + #if [ $is_managerhype ]; then + # logCmd "salt-call state.apply salt.minion queue=True" + #fi fi } diff --git a/setup/so-setup b/setup/so-setup index 347a7165c..ab055fd2d 100755 --- a/setup/so-setup +++ b/setup/so-setup @@ -762,6 +762,7 @@ if ! [[ -f $install_opt_file ]]; then fi logCmd "salt-call state.apply common.packages" logCmd "salt-call state.apply common" + hypervisor_local_states # this will apply the salt.minion state first since salt.master includes salt.minion logCmd "salt-call state.apply salt.master" # wait here until we get a response from the salt-master since it may have just restarted @@ -826,7 +827,6 @@ if ! [[ -f $install_opt_file ]]; then checkin_at_boot set_initial_firewall_access logCmd "salt-call schedule.enable -linfo --local" - hypervisor_local_states verify_setup else touch /root/accept_changes From 8e5fa9576ceee0d4a943747737e0011c26fbfaab Mon Sep 17 00:00:00 2001 From: reyesj2 <94730068+reyesj2@users.noreply.github.com> Date: Fri, 26 Sep 2025 11:32:25 -0500 Subject: [PATCH 047/124] create disabled so-manager_elasticsearch output policy first, update it then verify it is the only active output --- .../tools/sbin_jinja/so-elastic-fleet-setup | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/salt/elasticfleet/tools/sbin_jinja/so-elastic-fleet-setup b/salt/elasticfleet/tools/sbin_jinja/so-elastic-fleet-setup index ee74d1056..ab6757893 100755 --- a/salt/elasticfleet/tools/sbin_jinja/so-elastic-fleet-setup +++ b/salt/elasticfleet/tools/sbin_jinja/so-elastic-fleet-setup @@ -63,7 +63,7 @@ printf "\nAdd Manager Elasticsearch Output...\n" ESCACRT=$(openssl x509 -in "$INTCA" -outform DER | sha256sum | cut -d' ' -f1 | tr '[:lower:]' '[:upper:]') JSON_STRING=$(jq -n \ --arg ESCACRT "$ESCACRT" \ - '{"name":"so-manager_elasticsearch","id":"so-manager_elasticsearch","type":"elasticsearch","hosts":["https://{{ GLOBALS.manager_ip }}:9200","https://{{ GLOBALS.manager }}:9200"],"is_default":true,"is_default_monitoring":true,"config_yaml":"","ca_trusted_fingerprint": $ESCACRT}') + '{"name":"so-manager_elasticsearch","id":"so-manager_elasticsearch","type":"elasticsearch","hosts":["https://{{ GLOBALS.manager_ip }}:9200","https://{{ GLOBALS.manager }}:9200"],"is_default":false,"is_default_monitoring":false,"config_yaml":"","ca_trusted_fingerprint": $ESCACRT}') if ! fleet_api "outputs" -XPOST -H 'kbn-xsrf: true' -H 'Content-Type: application/json' -d "$JSON_STRING"; then echo -e "\nFailed to create so-elasticsearch_manager policy..." @@ -71,6 +71,13 @@ if ! fleet_api "outputs" -XPOST -H 'kbn-xsrf: true' -H 'Content-Type: applicatio fi printf "\n\n" +# so-manager_elasticsearch should exist and be disabled. Now update it before checking its the only default policy +MANAGER_OUTPUT_ENABLED=$(echo "$JSON_STRING" | jq 'del(.id) | .is_default = true | .is_default_monitoring = true') +if ! curl -sK /opt/so/conf/elasticsearch/curl.config -L -X PUT "localhost:5601/api/fleet/outputs/so-manager_elasticsearch" -H 'kbn-xsrf: true' -H 'Content-Type: application/json' -d "$MANAGER_OUTPUT_ENABLED"; then + echo -e "\n failed to update so-manager_elasticsearch" + exit 1 +fi + # At this point there should only be two policies. fleet-default-output & so-manager_elasticsearch status "Verifying so-manager_elasticsearch policy is configured as the current default" @@ -79,7 +86,7 @@ if DEFAULTPOLICY=$(fleet_api "outputs/fleet-default-output"); then fleet_default=$(echo "$DEFAULTPOLICY" | jq -er '.item.is_default') fleet_default_monitoring=$(echo "$DEFAULTPOLICY" | jq -er '.item.is_default_monitoring') # Check that fleet-default-output isn't configured as a default for anything ( both variables return false ) - if [[ ! $fleet_default ]] && [[ ! $fleet_default_monitoring ]]; then + if [[ $fleet_default == "false" ]] && [[ $fleet_default_monitoring == "false" ]]; then echo -e "\nso-manager_elasticsearch is configured as the current default policy..." else echo -e "\nVerification of so-manager_elasticsearch policy failed... The default 'fleet-default-output' output is still active..." From e7752994804bee1f8eb9ba3d9094f1c73832f4b9 Mon Sep 17 00:00:00 2001 From: Josh Patterson Date: Fri, 26 Sep 2025 15:43:49 -0400 Subject: [PATCH 048/124] so-user target minions with pillar elasticsearch:enabled:true --- salt/manager/tools/sbin/so-user | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/salt/manager/tools/sbin/so-user b/salt/manager/tools/sbin/so-user index 92b3ba385..060dcf3a4 100755 --- a/salt/manager/tools/sbin/so-user +++ b/salt/manager/tools/sbin/so-user @@ -387,7 +387,7 @@ function syncElastic() { if [[ -z "$SKIP_STATE_APPLY" ]]; then echo "Elastic state will be re-applied to affected minions. This will run in the background and may take several minutes to complete." echo "Applying elastic state to elastic minions at $(date)" >> /opt/so/log/soc/sync.log 2>&1 - salt --async -C 'G@role:so-standalone or G@role:so-eval or G@role:so-import or G@role:so-manager or G@role:so-managersearch or G@role:so-searchnode or G@role:so-heavynode' state.apply elasticsearch queue=True >> /opt/so/log/soc/sync.log 2>&1 + salt --async -C 'I@elasticsearch:enabled:true' state.apply elasticsearch queue=True >> /opt/so/log/soc/sync.log 2>&1 fi else echo "Newly generated users/roles files are incomplete; aborting." From 6c892fed78a243a4d9b3aab768baa15f3e4ab038 Mon Sep 17 00:00:00 2001 From: Jason Ertel Date: Mon, 29 Sep 2025 16:47:05 -0400 Subject: [PATCH 049/124] restart registry after upgrading images (in airgap mode) --- salt/manager/tools/sbin/soup | 2 ++ 1 file changed, 2 insertions(+) diff --git a/salt/manager/tools/sbin/soup b/salt/manager/tools/sbin/soup index e49be133f..52d6e92e9 100755 --- a/salt/manager/tools/sbin/soup +++ b/salt/manager/tools/sbin/soup @@ -169,6 +169,8 @@ airgap_update_dockers() { tar xf "$AGDOCKER/registry.tar" -C /nsm/docker-registry/docker echo "Add Registry back" docker load -i "$AGDOCKER/registry_image.tar" + echo "Restart registry container" + salt-call state.apply registry queue=True fi fi } From c8814d06326341c93e28cffcbbb779ce5dbcd3cb Mon Sep 17 00:00:00 2001 From: Josh Patterson Date: Mon, 29 Sep 2025 16:58:45 -0400 Subject: [PATCH 050/124] removed commented code --- setup/so-functions | 3 --- 1 file changed, 3 deletions(-) diff --git a/setup/so-functions b/setup/so-functions index 0d7890d17..5847df704 100755 --- a/setup/so-functions +++ b/setup/so-functions @@ -1202,9 +1202,6 @@ hypervisor_local_states() { logCmd "salt-call state.apply libvirt.64962 --local --file-root=../salt/ -l info queue=True" info "Setting up bridge for $MNIC" salt-call state.apply libvirt.bridge --local --file-root=../salt/ -l info pillar='{"host": {"mainint": "'$MNIC'"}}' queue=True - #if [ $is_managerhype ]; then - # logCmd "salt-call state.apply salt.minion queue=True" - #fi fi } From e9af46a8cbddba5e17d292d2776067c3523f2b51 Mon Sep 17 00:00:00 2001 From: reyesj2 <94730068+reyesj2@users.noreply.github.com> Date: Tue, 30 Sep 2025 14:28:42 -0500 Subject: [PATCH 051/124] less strict exits for fleet configuration --- .../tools/sbin/so-elastic-fleet-common | 2 +- ...ic-fleet-integration-policy-elastic-defend | 10 ++++++-- .../so-elastic-fleet-integration-policy-load | 25 ++++++++++++------- .../so-elastic-fleet-integration-upgrade | 10 ++++++-- 4 files changed, 33 insertions(+), 14 deletions(-) diff --git a/salt/elasticfleet/tools/sbin/so-elastic-fleet-common b/salt/elasticfleet/tools/sbin/so-elastic-fleet-common index 4ca5030aa..1a597b1db 100644 --- a/salt/elasticfleet/tools/sbin/so-elastic-fleet-common +++ b/salt/elasticfleet/tools/sbin/so-elastic-fleet-common @@ -27,7 +27,7 @@ fleet_api() { local QUERYPATH=$1 shift - curl -sK /opt/so/conf/elasticsearch/curl.config -L "localhost:5601/api/fleet/${QUERYPATH}" "$@" --retry 3 --fail 2>/dev/null + curl -sK /opt/so/conf/elasticsearch/curl.config -L "localhost:5601/api/fleet/${QUERYPATH}" "$@" --retry 3 --retry-delay 10 --fail 2>/dev/null } elastic_fleet_integration_check() { diff --git a/salt/elasticfleet/tools/sbin/so-elastic-fleet-integration-policy-elastic-defend b/salt/elasticfleet/tools/sbin/so-elastic-fleet-integration-policy-elastic-defend index 9769f2f79..d036f0d94 100755 --- a/salt/elasticfleet/tools/sbin/so-elastic-fleet-integration-policy-elastic-defend +++ b/salt/elasticfleet/tools/sbin/so-elastic-fleet-integration-policy-elastic-defend @@ -8,6 +8,7 @@ . /usr/sbin/so-elastic-fleet-common +ERROR=false # Manage Elastic Defend Integration for Initial Endpoints Policy for INTEGRATION in /opt/so/conf/elastic-fleet/integrations/elastic-defend/*.json do @@ -17,13 +18,18 @@ do printf "\n\nIntegration $NAME exists - Upgrading integration policy\n" if ! elastic_fleet_integration_policy_upgrade "$INTEGRATION_ID"; then echo -e "\nFailed to upgrade integration policy for ${INTEGRATION##*/}" - exit 1 + ERROR=true + continue fi else printf "\n\nIntegration does not exist - Creating integration\n" if ! elastic_fleet_integration_create "@$INTEGRATION"; then echo -e "\nFailed to create integration for ${INTEGRATION##*/}" - exit 1 + ERROR=true + continue fi fi done +if [[ "$ERROR" == "true" ]]; then + exit 1 +fi \ No newline at end of file diff --git a/salt/elasticfleet/tools/sbin/so-elastic-fleet-integration-policy-load b/salt/elasticfleet/tools/sbin/so-elastic-fleet-integration-policy-load index 8427b47bc..ca260891f 100644 --- a/salt/elasticfleet/tools/sbin/so-elastic-fleet-integration-policy-load +++ b/salt/elasticfleet/tools/sbin/so-elastic-fleet-integration-policy-load @@ -17,7 +17,6 @@ if [ ! -f /opt/so/state/eaintegrations.txt ]; then # Third, configure Elastic Defend Integration seperately /usr/sbin/so-elastic-fleet-integration-policy-elastic-defend - # Initial Endpoints for INTEGRATION in /opt/so/conf/elastic-fleet/integrations/endpoints-initial/*.json do @@ -27,13 +26,15 @@ if [ ! -f /opt/so/state/eaintegrations.txt ]; then printf "\n\nIntegration $NAME exists - Updating integration\n" if ! elastic_fleet_integration_update "$INTEGRATION_ID" "@$INTEGRATION"; then echo -e "\nFailed to update integration for ${INTEGRATION##*/}" - exit 1 + RETURN_CODE=1 + continue fi else printf "\n\nIntegration does not exist - Creating integration\n" if ! elastic_fleet_integration_create "@$INTEGRATION"; then echo -e "\nFailed to create integration for ${INTEGRATION##*/}" - exit 1 + RETURN_CODE=1 + continue fi fi done @@ -47,13 +48,15 @@ if [ ! -f /opt/so/state/eaintegrations.txt ]; then printf "\n\nIntegration $NAME exists - Updating integration\n" if ! elastic_fleet_integration_update "$INTEGRATION_ID" "@$INTEGRATION"; then echo -e "\nFailed to update integration for ${INTEGRATION##*/}" - exit 1 + RETURN_CODE=1 + continue fi else printf "\n\nIntegration does not exist - Creating integration\n" if ! elastic_fleet_integration_create "@$INTEGRATION"; then echo -e "\nFailed to create integration for ${INTEGRATION##*/}" - exit 1 + RETURN_CODE=1 + continue fi fi done @@ -70,14 +73,16 @@ if [ ! -f /opt/so/state/eaintegrations.txt ]; then printf "\n\nIntegration $NAME exists - Updating integration\n" if ! elastic_fleet_integration_update "$INTEGRATION_ID" "@$INTEGRATION"; then echo -e "\nFailed to update integration for ${INTEGRATION##*/}" - exit 1 + RETURN_CODE=1 + continue fi else printf "\n\nIntegration does not exist - Creating integration\n" if [ "$NAME" != "elasticsearch-logs" ]; then if ! elastic_fleet_integration_create "@$INTEGRATION"; then echo -e "\nFailed to create integration for ${INTEGRATION##*/}" - exit 1 + RETURN_CODE=1 + continue fi fi fi @@ -97,14 +102,16 @@ if [ ! -f /opt/so/state/eaintegrations.txt ]; then printf "\n\nIntegration $NAME exists - Updating integration\n" if ! elastic_fleet_integration_update "$INTEGRATION_ID" "@$INTEGRATION"; then echo -e "\nFailed to update integration for ${INTEGRATION##*/}" - exit 1 + RETURN_CODE=1 + continue fi else printf "\n\nIntegration does not exist - Creating integration\n" if [ "$NAME" != "elasticsearch-logs" ]; then if ! elastic_fleet_integration_create "@$INTEGRATION"; then echo -e "\nFailed to create integration for ${INTEGRATION##*/}" - exit 1 + RETURN_CODE=1 + continue fi fi fi diff --git a/salt/elasticfleet/tools/sbin_jinja/so-elastic-fleet-integration-upgrade b/salt/elasticfleet/tools/sbin_jinja/so-elastic-fleet-integration-upgrade index f1154af1e..1a1448c53 100644 --- a/salt/elasticfleet/tools/sbin_jinja/so-elastic-fleet-integration-upgrade +++ b/salt/elasticfleet/tools/sbin_jinja/so-elastic-fleet-integration-upgrade @@ -24,6 +24,7 @@ fi default_packages=({% for pkg in SUPPORTED_PACKAGES %}"{{ pkg }}"{% if not loop.last %} {% endif %}{% endfor %}) +ERROR=false for AGENT_POLICY in $agent_policies; do if ! integrations=$(elastic_fleet_integration_policy_names "$AGENT_POLICY"); then # this script upgrades default integration packages, exit 1 and let salt handle retrying @@ -73,11 +74,13 @@ for AGENT_POLICY in $agent_policies; do echo "No errors detected. Proceeding with upgrade..." if ! elastic_fleet_integration_policy_upgrade "$INTEGRATION_ID"; then echo "Error: Upgrade failed for $PACKAGE_NAME with integration ID '$INTEGRATION_ID'." - exit 1 + ERROR=true + continue fi else echo "Errors detected during dry run for $PACKAGE_NAME policy upgrade..." - exit 1 + ERROR=true + continue fi fi {%- if not AUTO_UPGRADE_INTEGRATIONS %} @@ -86,4 +89,7 @@ for AGENT_POLICY in $agent_policies; do fi done done +if [[ "$ERROR" == "true" ]]; then + exit 1 +fi echo From 5a2e70490969e0c6b3a487b75f4cc3b753140203 Mon Sep 17 00:00:00 2001 From: Corey Ogburn Date: Tue, 30 Sep 2025 15:33:20 -0600 Subject: [PATCH 052/124] New field for assistant health check The health check has a smaller, configurable timeout. --- salt/soc/defaults.yaml | 1 + salt/soc/soc_soc.yaml | 4 ++++ 2 files changed, 5 insertions(+) diff --git a/salt/soc/defaults.yaml b/salt/soc/defaults.yaml index 6caeddbe3..d93b405b1 100644 --- a/salt/soc/defaults.yaml +++ b/salt/soc/defaults.yaml @@ -1493,6 +1493,7 @@ soc: folder: securityonion-normalized assistant: apiUrl: https://onionai.securityonion.net + healthTimeoutSeconds: 3 salt: queueDir: /opt/sensoroni/queue timeoutMs: 45000 diff --git a/salt/soc/soc_soc.yaml b/salt/soc/soc_soc.yaml index 4af20d444..aaa01b5c6 100644 --- a/salt/soc/soc_soc.yaml +++ b/salt/soc/soc_soc.yaml @@ -585,6 +585,10 @@ soc: description: The URL of the AI gateway. advanced: True global: True + healthTimeoutSeconds: + description: Timeout in seconds for the Onion AI health check. + global: True + advanced: True client: assistant: enabled: From 066e227325eafbfaf6faf75e2dd6ebe3cce1d2fb Mon Sep 17 00:00:00 2001 From: Matthew Wright Date: Wed, 1 Oct 2025 11:01:10 -0400 Subject: [PATCH 053/124] made lowBalanceColorAlert global --- salt/soc/soc_soc.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/salt/soc/soc_soc.yaml b/salt/soc/soc_soc.yaml index aaa01b5c6..3fa914227 100644 --- a/salt/soc/soc_soc.yaml +++ b/salt/soc/soc_soc.yaml @@ -619,6 +619,7 @@ soc: advanced: True lowBalanceColorAlert: description: Onion AI credit amount at which balance turns red. + global: True advanced: True apiTimeoutMs: description: Duration (in milliseconds) to wait for a response from the SOC server API before giving up and showing an error on the SOC UI. From 030e4961d7afa8bc091718d1ad985133009191ed Mon Sep 17 00:00:00 2001 From: Jason Ertel Date: Wed, 1 Oct 2025 12:13:56 -0400 Subject: [PATCH 054/124] updates for wiretap lib --- salt/common/tools/sbin_jinja/so-import-pcap | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/salt/common/tools/sbin_jinja/so-import-pcap b/salt/common/tools/sbin_jinja/so-import-pcap index e8c2b84c8..b630df015 100755 --- a/salt/common/tools/sbin_jinja/so-import-pcap +++ b/salt/common/tools/sbin_jinja/so-import-pcap @@ -173,7 +173,7 @@ for PCAP in $INPUT_FILES; do status "- assigning unique identifier to import: $HASH" pcap_data=$(pcapinfo "${PCAP}") - if ! echo "$pcap_data" | grep -q "First packet time:" || echo "$pcap_data" |egrep -q "Last packet time: 1970-01-01|Last packet time: n/a"; then + if ! echo "$pcap_data" | grep -q "Earliest packet time:" || echo "$pcap_data" |egrep -q "Latest packet time: 1970-01-01|Latest packet time: n/a"; then status "- this PCAP file is invalid; skipping" INVALID_PCAPS_COUNT=$((INVALID_PCAPS_COUNT + 1)) else @@ -205,8 +205,8 @@ for PCAP in $INPUT_FILES; do HASHES="${HASHES} ${HASH}" fi - START=$(pcapinfo "${PCAP}" -a |grep "First packet time:" | awk '{print $4}') - END=$(pcapinfo "${PCAP}" -e |grep "Last packet time:" | awk '{print $4}') + START=$(pcapinfo "${PCAP}" -a |grep "Earliest packet time:" | awk '{print $4}') + END=$(pcapinfo "${PCAP}" -e |grep "Latest packet time:" | awk '{print $4}') status "- found PCAP data spanning dates $START through $END" # compare $START to $START_OLDEST From 86eca53d4b39b9370aa3790be366844be0d3c253 Mon Sep 17 00:00:00 2001 From: Josh Patterson Date: Wed, 1 Oct 2025 14:57:25 -0400 Subject: [PATCH 055/124] support for byodmodel --- salt/hypervisor/map.jinja | 6 ++++-- salt/salt/engines/master/virtual_node_manager.py | 6 +++--- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/salt/hypervisor/map.jinja b/salt/hypervisor/map.jinja index dae3985d4..3519f6078 100644 --- a/salt/hypervisor/map.jinja +++ b/salt/hypervisor/map.jinja @@ -13,6 +13,7 @@ {# Import defaults.yaml for model hardware capabilities #} {% import_yaml 'hypervisor/defaults.yaml' as DEFAULTS %} +{% set HYPERVISORMERGED = salt['pillar.get']('hypervisor', default=DEFAULTS.hypervisor, merge=True) %} {# Get hypervisor nodes from pillar #} {% set NODES = salt['pillar.get']('hypervisor:nodes', {}) %} @@ -30,9 +31,10 @@ {% set model = '' %} {% if grains %} {% set minion_id = grains.keys() | first %} - {% set model = grains[minion_id].get('sosmodel', '') %} + {% set model = grains[minion_id].get('sosmodel', grains[minion_id].get('byodmodel', '')) %} {% endif %} - {% set model_config = DEFAULTS.hypervisor.model.get(model, {}) %} + + {% set model_config = HYPERVISORMERGED.model.get(model, {}) %} {# Get VM list from VMs file #} {% set vms = {} %} diff --git a/salt/salt/engines/master/virtual_node_manager.py b/salt/salt/engines/master/virtual_node_manager.py index 88ccede9c..f09aca751 100644 --- a/salt/salt/engines/master/virtual_node_manager.py +++ b/salt/salt/engines/master/virtual_node_manager.py @@ -271,7 +271,7 @@ def parse_hardware_indices(hw_value: Any) -> List[int]: return indices def get_hypervisor_model(hypervisor: str) -> str: - """Get sosmodel from hypervisor grains.""" + """Get sosmodel or byodmodel from hypervisor grains.""" try: # Get cached grains using Salt runner grains = runner.cmd( @@ -283,9 +283,9 @@ def get_hypervisor_model(hypervisor: str) -> str: # Get the first minion ID that matches our hypervisor minion_id = next(iter(grains.keys())) - model = grains[minion_id].get('sosmodel') + model = grains[minion_id].get('sosmodel', grains[minion_id].get('byodmodel', '')) if not model: - raise ValueError(f"No sosmodel grain found for hypervisor {hypervisor}") + raise ValueError(f"No sosmodel or byodmodel grain found for hypervisor {hypervisor}") log.debug("Found model %s for hypervisor %s", model, hypervisor) return model From 6b8e2e2643542a5540aaf3df148348b7084cd903 Mon Sep 17 00:00:00 2001 From: Mike Reeves Date: Wed, 1 Oct 2025 19:58:07 -0400 Subject: [PATCH 056/124] Add Filters --- .claude/settings.local.json | 9 +++++ salt/zeek/policy/custom/filters/dns | 30 +++++++++++++++++ salt/zeek/policy/custom/filters/files | 1 + salt/zeek/policy/custom/filters/httphost | 20 +++++++++++ salt/zeek/policy/custom/filters/httpuri | 14 ++++++++ salt/zeek/policy/custom/filters/ssl | 29 ++++++++++++++++ salt/zeek/policy/custom/filters/tunnel | 17 ++++++++++ salt/zeek/soc_zeek.yaml | 42 ++++++++++++++++++++++++ 8 files changed, 162 insertions(+) create mode 100644 .claude/settings.local.json create mode 100644 salt/zeek/policy/custom/filters/dns create mode 100644 salt/zeek/policy/custom/filters/files create mode 100644 salt/zeek/policy/custom/filters/httphost create mode 100644 salt/zeek/policy/custom/filters/httpuri create mode 100644 salt/zeek/policy/custom/filters/ssl create mode 100644 salt/zeek/policy/custom/filters/tunnel diff --git a/.claude/settings.local.json b/.claude/settings.local.json new file mode 100644 index 000000000..9f305e068 --- /dev/null +++ b/.claude/settings.local.json @@ -0,0 +1,9 @@ +{ + "permissions": { + "allow": [ + "Bash(grep:*)" + ], + "deny": [] + }, + "enableAllProjectMcpServers": false +} \ No newline at end of file diff --git a/salt/zeek/policy/custom/filters/dns b/salt/zeek/policy/custom/filters/dns new file mode 100644 index 000000000..e79032c19 --- /dev/null +++ b/salt/zeek/policy/custom/filters/dns @@ -0,0 +1,30 @@ +hook DNS::log_policy(rec: DNS::Info, id: Log::ID, filter: Log::Filter) + { + # Only put a single name per line otherwise there will be memory issues! + # If the query comes back blank don't log + if (!rec?$query) + break; + + # If the query comes back with one of these don't log + if (rec?$query && /google.com$/ in rec$query) + break; + + # If the query comes back with one of these don't log + if (rec?$query && /.apple.com$/ in rec$query) + break; + + # Don't log reverse lookups + if (rec?$query && /.in-addr.arpa/ in to_lower(rec$query)) + break; + + # Don't log netbios lookups. This generates a cray amount of logs + if (rec?$qtype_name && /NB/ in rec$qtype_name) + break; + } + +event zeek_init() +{ + Log::remove_default_filter(DNS::LOG); + local filter: Log::Filter = [$name="dns-filter"]; + Log::add_filter(DNS::LOG, filter); +} \ No newline at end of file diff --git a/salt/zeek/policy/custom/filters/files b/salt/zeek/policy/custom/filters/files new file mode 100644 index 000000000..867e2c849 --- /dev/null +++ b/salt/zeek/policy/custom/filters/files @@ -0,0 +1 @@ +# Placeholder \ No newline at end of file diff --git a/salt/zeek/policy/custom/filters/httphost b/salt/zeek/policy/custom/filters/httphost new file mode 100644 index 000000000..29c682d33 --- /dev/null +++ b/salt/zeek/policy/custom/filters/httphost @@ -0,0 +1,20 @@ +### HTTP filter by host entries by string ##### + +module Filterhttp; + +export { + global remove_host_entries: set[string] = {"www.genevalab.com", "www.google.com"}; + } + +hook HTTP::log_policy(rec: HTTP::Info, id: Log::ID, filter: Log::Filter) + { + # Remove HTTP host entries + if ( ! rec?$host || rec$host in remove_host_entries ) + break; + } +event zeek_init() +{ + Log::remove_default_filter(HTTP::LOG); + local filter: Log::Filter = [$name="http-filter"]; + Log::add_filter(HTTP::LOG, filter); +} \ No newline at end of file diff --git a/salt/zeek/policy/custom/filters/httpuri b/salt/zeek/policy/custom/filters/httpuri new file mode 100644 index 000000000..9a57cc5ff --- /dev/null +++ b/salt/zeek/policy/custom/filters/httpuri @@ -0,0 +1,14 @@ +### HTTP filter by uri using pattern #### + +hook HTTP::log_policy(rec: HTTP::Info, id: Log::ID, filter: Log::Filter) + { + # Remove HTTP uri entries by regex + if ( rec?$uri && /^\/kratos\// in rec$uri ) + break; + } +event zeek_init() +{ + Log::remove_default_filter(HTTP::LOG); + local filter: Log::Filter = [$name="http-filter"]; + Log::add_filter(HTTP::LOG, filter); +} \ No newline at end of file diff --git a/salt/zeek/policy/custom/filters/ssl b/salt/zeek/policy/custom/filters/ssl new file mode 100644 index 000000000..e7be0f768 --- /dev/null +++ b/salt/zeek/policy/custom/filters/ssl @@ -0,0 +1,29 @@ +### Log filter by JA3S md5 hash: +hook SSL::log_policy(rec: SSL::Info, id: Log::ID, filter: Log::Filter) + { + # SSL log filter Ja3s by md5 + if (rec?c$ssl$ja3s_cipher && ( /623de93db17d313345d7ea481e7443cf/ )in rec$c$ssl$ja3s_cipher) + break; + } + +event zeek_init() +{ + Log::remove_default_filter(SSL::LOG); + local filter: Log::Filter = [$name="ssl-filter"]; + Log::add_filter(SSL::LOG, filter); +} + +### Log filter by server name: +hook SSL::log_policy(rec: SSL::Info, id: Log::ID, filter: Log::Filter) + { + # SSL log filter by server name + if (rec?$server_name && ( /api.github.com$/ ) in rec$server_name) + break; + } + +event zeek_init() +{ + Log::remove_default_filter(SSL::LOG); + local filter: Log::Filter = [$name="ssl-filter"]; + Log::add_filter(SSL::LOG, filter); +} \ No newline at end of file diff --git a/salt/zeek/policy/custom/filters/tunnel b/salt/zeek/policy/custom/filters/tunnel new file mode 100644 index 000000000..dd58caa4d --- /dev/null +++ b/salt/zeek/policy/custom/filters/tunnel @@ -0,0 +1,17 @@ +global tunnel_subnet: set[subnet]={ + + 10.19.0.0/24 + +}; + +hook Tunnel::log_policy(rec: Tunnel::Info, id: Log::ID, Filter: Log::Filter) + { + if (rec$id$orig_h in tunnel_subnet || rec$id$resp_h in tunnel_subnet) + break; + } +event zeek_init() +{ + Log::remove_default_filter(Tunnel::LOG); + local filter: Log::Filter = [$name="tunnel-filter"]; + Log::add_filter(Tunnel::LOG, filter); +} \ No newline at end of file diff --git a/salt/zeek/soc_zeek.yaml b/salt/zeek/soc_zeek.yaml index b3b655083..929b9debd 100644 --- a/salt/zeek/soc_zeek.yaml +++ b/salt/zeek/soc_zeek.yaml @@ -61,6 +61,48 @@ zeek: global: True advanced: True duplicates: True + dns: + description: DNS Filter for Zeek. This is an advanced setting and will take further action to enable. + helpLink: zeek.html + file: True + global: True + advanced: True + duplicates: True + files: + description: Files Filter for Zeek. This is an advanced setting and will take further action to enable. + helpLink: zeek.html + file: True + global: True + advanced: True + duplicates: True + httphost: + description: HTTP Hosts Filter for Zeek. This is an advanced setting and will take further action to enable. + helpLink: zeek.html + file: True + global: True + advanced: True + duplicates: True + httpuri: + description: HTTP URI Filter for Zeek. This is an advanced setting and will take further action to enable. + helpLink: zeek.html + file: True + global: True + advanced: True + duplicates: True + ssl: + description: SSL Filter for Zeek. This is an advanced setting and will take further action to enable. + helpLink: zeek.html + file: True + global: True + advanced: True + duplicates: True + tunnel: + description: Tunnel Filter for Zeek. This is an advanced setting and will take further action to enable. + helpLink: zeek.html + file: True + global: True + advanced: True + duplicates: True file_extraction: description: Contains a list of file or MIME types Zeek will extract from the network streams. Values must adhere to the following format - {"MIME_TYPE":"FILE_EXTENSION"} forcedType: "[]{}" From 9752d6169916271c7dce7b630dfa81d1ee162cf7 Mon Sep 17 00:00:00 2001 From: Mike Reeves Date: Wed, 1 Oct 2025 19:59:28 -0400 Subject: [PATCH 057/124] Add Filters --- .claude/settings.local.json | 9 --------- 1 file changed, 9 deletions(-) delete mode 100644 .claude/settings.local.json diff --git a/.claude/settings.local.json b/.claude/settings.local.json deleted file mode 100644 index 9f305e068..000000000 --- a/.claude/settings.local.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "permissions": { - "allow": [ - "Bash(grep:*)" - ], - "deny": [] - }, - "enableAllProjectMcpServers": false -} \ No newline at end of file From 7deef44ff61603d1bce8dad2a667681b2d3e020f Mon Sep 17 00:00:00 2001 From: Josh Patterson Date: Thu, 2 Oct 2025 11:55:50 -0400 Subject: [PATCH 058/124] check defaults or pillar file --- .../engines/master/virtual_node_manager.py | 41 +++++++++++++++++-- 1 file changed, 37 insertions(+), 4 deletions(-) diff --git a/salt/salt/engines/master/virtual_node_manager.py b/salt/salt/engines/master/virtual_node_manager.py index f09aca751..7783e7c35 100644 --- a/salt/salt/engines/master/virtual_node_manager.py +++ b/salt/salt/engines/master/virtual_node_manager.py @@ -161,6 +161,7 @@ DEFAULT_BASE_PATH = '/opt/so/saltstack/local/salt/hypervisor/hosts' VALID_ROLES = ['sensor', 'searchnode', 'idh', 'receiver', 'heavynode', 'fleet'] LICENSE_PATH = '/opt/so/saltstack/local/pillar/soc/license.sls' DEFAULTS_PATH = '/opt/so/saltstack/default/salt/hypervisor/defaults.yaml' +HYPERVISOR_PILLAR_PATH = '/opt/so/saltstack/local/pillar/hypervisor/soc_hypervisor.sls' # Define the retention period for destroyed VMs (in hours) DESTROYED_VM_RETENTION_HOURS = 48 @@ -295,16 +296,48 @@ def get_hypervisor_model(hypervisor: str) -> str: raise def load_hardware_defaults(model: str) -> dict: - """Load hardware configuration from defaults.yaml.""" + """Load hardware configuration from defaults.yaml and optionally override with pillar configuration.""" + config = None + config_source = None + try: + # First, try to load from defaults.yaml + log.debug("Checking for model %s in %s", model, DEFAULTS_PATH) defaults = read_yaml_file(DEFAULTS_PATH) if not defaults or 'hypervisor' not in defaults: raise ValueError("Invalid defaults.yaml structure") if 'model' not in defaults['hypervisor']: raise ValueError("No model configurations found in defaults.yaml") - if model not in defaults['hypervisor']['model']: - raise ValueError(f"Model {model} not found in defaults.yaml") - return defaults['hypervisor']['model'][model] + + # Check if model exists in defaults + if model in defaults['hypervisor']['model']: + config = defaults['hypervisor']['model'][model] + config_source = DEFAULTS_PATH + log.debug("Found model %s in %s", model, DEFAULTS_PATH) + + # Then, try to load from pillar file (if it exists) + try: + log.debug("Checking for model %s in %s", model, HYPERVISOR_PILLAR_PATH) + pillar_config = read_yaml_file(HYPERVISOR_PILLAR_PATH) + if pillar_config and 'hypervisor' in pillar_config: + if 'model' in pillar_config['hypervisor']: + if model in pillar_config['hypervisor']['model']: + # Override with pillar configuration + config = pillar_config['hypervisor']['model'][model] + config_source = HYPERVISOR_PILLAR_PATH + log.debug("Found model %s in %s (overriding defaults)", model, HYPERVISOR_PILLAR_PATH) + except FileNotFoundError: + log.debug("Pillar file %s not found, using defaults only", HYPERVISOR_PILLAR_PATH) + except Exception as e: + log.warning("Failed to read pillar file %s: %s (using defaults)", HYPERVISOR_PILLAR_PATH, str(e)) + + # If model was not found in either file, raise an error + if config is None: + raise ValueError(f"Model {model} not found in {DEFAULTS_PATH} or {HYPERVISOR_PILLAR_PATH}") + + log.debug("Using hardware configuration for model %s from %s", model, config_source) + return config + except Exception as e: log.error("Failed to load hardware defaults: %s", str(e)) raise From 05321cf1edf2becd89ada079e94ef136b68bdd52 Mon Sep 17 00:00:00 2001 From: Josh Patterson Date: Thu, 2 Oct 2025 15:03:11 -0400 Subject: [PATCH 059/124] add --force-cleanup to nvme raid script --- salt/hypervisor/tools/sbin/so-nvme-raid1.sh | 122 +++++++++++++++++++- 1 file changed, 116 insertions(+), 6 deletions(-) diff --git a/salt/hypervisor/tools/sbin/so-nvme-raid1.sh b/salt/hypervisor/tools/sbin/so-nvme-raid1.sh index cc9916a4c..fe96c063b 100644 --- a/salt/hypervisor/tools/sbin/so-nvme-raid1.sh +++ b/salt/hypervisor/tools/sbin/so-nvme-raid1.sh @@ -30,7 +30,9 @@ # # WARNING: This script will DESTROY all data on the target drives! # -# USAGE: sudo ./so-nvme-raid1.sh +# USAGE: +# sudo ./so-nvme-raid1.sh # Normal operation +# sudo ./so-nvme-raid1.sh --force-cleanup # Force cleanup of existing RAID # ################################################################# @@ -41,6 +43,19 @@ set -e RAID_ARRAY_NAME="md0" RAID_DEVICE="/dev/${RAID_ARRAY_NAME}" MOUNT_POINT="/nsm" +FORCE_CLEANUP=false + +# Parse command line arguments +for arg in "$@"; do + case $arg in + --force-cleanup) + FORCE_CLEANUP=true + shift + ;; + *) + ;; + esac +done # Function to log messages log() { @@ -55,6 +70,91 @@ check_root() { fi } +# Function to force cleanup all RAID components +force_cleanup_raid() { + log "=== FORCE CLEANUP MODE ===" + log "This will destroy all RAID configurations and data on target drives!" + + # Stop all MD arrays + log "Stopping all MD arrays" + mdadm --stop --scan 2>/dev/null || true + + # Wait for arrays to stop + sleep 2 + + # Remove any running md devices + for md in /dev/md*; do + if [ -b "$md" ]; then + log "Stopping $md" + mdadm --stop "$md" 2>/dev/null || true + fi + done + + # Force cleanup both NVMe drives + for device in "/dev/nvme0n1" "/dev/nvme1n1"; do + log "Force cleaning $device" + + # Kill any processes using the device + fuser -k "${device}"* 2>/dev/null || true + + # Unmount any mounted partitions + for part in "${device}"*; do + if [ -b "$part" ]; then + umount -f "$part" 2>/dev/null || true + fi + done + + # Force zero RAID superblocks on partitions + for part in "${device}"p*; do + if [ -b "$part" ]; then + log "Zeroing RAID superblock on $part" + mdadm --zero-superblock --force "$part" 2>/dev/null || true + fi + done + + # Zero superblock on the device itself + log "Zeroing RAID superblock on $device" + mdadm --zero-superblock --force "$device" 2>/dev/null || true + + # Remove LVM physical volumes + pvremove -ff -y "$device" 2>/dev/null || true + + # Wipe all filesystem and partition signatures + log "Wiping all signatures from $device" + wipefs -af "$device" 2>/dev/null || true + + # Overwrite the beginning of the drive (partition table area) + log "Clearing partition table on $device" + dd if=/dev/zero of="$device" bs=1M count=10 2>/dev/null || true + + # Clear the end of the drive (backup partition table area) + local device_size=$(blockdev --getsz "$device" 2>/dev/null || echo "0") + if [ "$device_size" -gt 0 ]; then + dd if=/dev/zero of="$device" bs=512 seek=$(( device_size - 2048 )) count=2048 2>/dev/null || true + fi + + # Force kernel to re-read partition table + blockdev --rereadpt "$device" 2>/dev/null || true + partprobe -s "$device" 2>/dev/null || true + done + + # Clear mdadm configuration + log "Clearing mdadm configuration" + echo "DEVICE partitions" > /etc/mdadm.conf + + # Remove any fstab entries for the RAID device or mount point + log "Cleaning fstab entries" + sed -i "\|${RAID_DEVICE}|d" /etc/fstab + sed -i "\|${MOUNT_POINT}|d" /etc/fstab + + # Wait for system to settle + udevadm settle + sleep 5 + + log "Force cleanup complete!" + log "Proceeding with RAID setup..." +} + # Function to find MD arrays using specific devices find_md_arrays_using_devices() { local target_devices=("$@") @@ -205,10 +305,15 @@ check_existing_raid() { fi log "Error: $device appears to be part of an existing RAID array" - log "To reuse this device, you must first:" - log "1. Unmount any filesystems" - log "2. Stop the RAID array: mdadm --stop $array_name" - log "3. Zero the superblock: mdadm --zero-superblock ${device}p1" + log "Old RAID metadata detected but array is not running." + log "" + log "To fix this, run the script with --force-cleanup:" + log " sudo $0 --force-cleanup" + log "" + log "Or manually clean up with:" + log "1. Stop any arrays: mdadm --stop --scan" + log "2. Zero superblocks: mdadm --zero-superblock --force ${device}p1" + log "3. Wipe signatures: wipefs -af $device" exit 1 fi done @@ -238,7 +343,7 @@ ensure_devices_free() { done # Clear MD superblock - mdadm --zero-superblock "${device}"* 2>/dev/null || true + mdadm --zero-superblock --force "${device}"* 2>/dev/null || true # Remove LVM PV if exists pvremove -ff -y "$device" 2>/dev/null || true @@ -263,6 +368,11 @@ main() { # Check if running as root check_root + # If force cleanup flag is set, do aggressive cleanup first + if [ "$FORCE_CLEANUP" = true ]; then + force_cleanup_raid + fi + # Check for existing RAID setup check_existing_raid From c8a360357781b7eeab87e4071c210792004f2cc0 Mon Sep 17 00:00:00 2001 From: reyesj2 <94730068+reyesj2@users.noreply.github.com> Date: Thu, 2 Oct 2025 14:47:38 -0500 Subject: [PATCH 060/124] update logstash fleet output policy --- .../so-elastic-fleet-outputs-update | 14 ++++++-- .../tools/sbin_jinja/so-elastic-fleet-setup | 2 +- salt/manager/tools/sbin/soup | 34 +++++++++++++++++++ 3 files changed, 47 insertions(+), 3 deletions(-) diff --git a/salt/elasticfleet/tools/sbin_jinja/so-elastic-fleet-outputs-update b/salt/elasticfleet/tools/sbin_jinja/so-elastic-fleet-outputs-update index 43eef6ee9..24f38765a 100644 --- a/salt/elasticfleet/tools/sbin_jinja/so-elastic-fleet-outputs-update +++ b/salt/elasticfleet/tools/sbin_jinja/so-elastic-fleet-outputs-update @@ -15,8 +15,18 @@ if ! is_manager_node; then fi function update_logstash_outputs() { - # Generate updated JSON payload - JSON_STRING=$(jq -n --arg UPDATEDLIST $NEW_LIST_JSON '{"name":"grid-logstash","type":"logstash","hosts": $UPDATEDLIST,"is_default":true,"is_default_monitoring":true,"config_yaml":""}') + if logstash_policy=$(curl -K /opt/so/conf/elasticsearch/curl.config -L "http://localhost:5601/api/fleet/outputs/so-manager_logstash" --retry 3 --retry-delay 10 --fail 2>/dev/null); then + SSL_CONFIG=$(echo "$logstash_policy" | jq -r '.item.ssl') + if SECRETS=$(echo "$logstash_policy" | jq -er '.item.secrets' 2>/dev/null); then + JSON_STRING=$(jq -n \ + --arg UPDATEDLIST $NEW_LIST_JSON \ + '{"name":"grid-logstash","type":"logstash","hosts": $UPDATEDLIST,"is_default":true,"is_default_monitoring":true,"config_yaml":"","ssl": $SSL_CONFIG,"secrets": $SECRETS}') + else + JSON_STRING=$(jq -n \ + --arg UPDATEDLIST $NEW_LIST_JSON \ + '{"name":"grid-logstash","type":"logstash","hosts": $UPDATEDLIST,"is_default":true,"is_default_monitoring":true,"config_yaml":"","ssl": $SSL_CONFIG}') + fi + fi # Update Logstash Outputs curl -K /opt/so/conf/elasticsearch/curl.config -L -X PUT "localhost:5601/api/fleet/outputs/so-manager_logstash" -H 'kbn-xsrf: true' -H 'Content-Type: application/json' -d "$JSON_STRING" | jq diff --git a/salt/elasticfleet/tools/sbin_jinja/so-elastic-fleet-setup b/salt/elasticfleet/tools/sbin_jinja/so-elastic-fleet-setup index ab6757893..446fc6c9a 100755 --- a/salt/elasticfleet/tools/sbin_jinja/so-elastic-fleet-setup +++ b/salt/elasticfleet/tools/sbin_jinja/so-elastic-fleet-setup @@ -127,7 +127,7 @@ JSON_STRING=$( jq -n \ --arg LOGSTASHCRT "$LOGSTASHCRT" \ --arg LOGSTASHKEY "$LOGSTASHKEY" \ --arg LOGSTASHCA "$LOGSTASHCA" \ - '{"name":"grid-logstash","is_default":true,"is_default_monitoring":true,"id":"so-manager_logstash","type":"logstash","hosts":["{{ GLOBALS.manager_ip }}:5055", "{{ GLOBALS.manager }}:5055"],"config_yaml":"","ssl":{"certificate": $LOGSTASHCRT,"key": $LOGSTASHKEY,"certificate_authorities":[ $LOGSTASHCA ]},"proxy_id":null}' + '{"name":"grid-logstash","is_default":true,"is_default_monitoring":true,"id":"so-manager_logstash","type":"logstash","hosts":["{{ GLOBALS.manager_ip }}:5055", "{{ GLOBALS.manager }}:5055"],"config_yaml":"","ssl":{"certificate": $LOGSTASHCRT,"certificate_authorities":[ $LOGSTASHCA ]},"secrets":{"ssl":{"key": $LOGSTASHKEY }},"proxy_id":null}' ) if ! fleet_api "outputs" -XPOST -H 'kbn-xsrf: true' -H 'Content-Type: application/json' -d "$JSON_STRING"; then echo -e "\nFailed to create logstash fleet output" diff --git a/salt/manager/tools/sbin/soup b/salt/manager/tools/sbin/soup index 52d6e92e9..18ed1581f 100755 --- a/salt/manager/tools/sbin/soup +++ b/salt/manager/tools/sbin/soup @@ -422,6 +422,7 @@ preupgrade_changes() { [[ "$INSTALLEDVERSION" == 2.4.150 ]] && up_to_2.4.160 [[ "$INSTALLEDVERSION" == 2.4.160 ]] && up_to_2.4.170 [[ "$INSTALLEDVERSION" == 2.4.170 ]] && up_to_2.4.180 + [[ "$INSTALLEDVERSION" == 2.4.180 ]] && up_to_2.4.190 true } @@ -617,6 +618,16 @@ post_to_2.4.190() { update_import_fleet_output fi + # Check if expected default policy is logstash (global.pipeline is REDIS or "") + pipeline=$(lookup_pillar "pipeline" "global") + if [[ -z "$pipeline" ]] || [[ "$pipeline" == "REDIS" ]]; then + # Check if this grid is currently affected by corrupt fleet output policy + if elastic-agent status | grep "config: key file not configured" > /dev/null 2>&1; then + echo "Elastic Agent shows an ssl error connecting to logstash output. Updating output policy..." + update_default_logstash_output + fi + fi + POSTVERSION=2.4.190 } @@ -1173,6 +1184,29 @@ update_import_fleet_output() { fi } +update_default_logstash_output() { + echo "Updating fleet logstash output policy grid-logstash" + if logstash_policy=$(curl -K /opt/so/conf/elasticsearch/curl.config -L "http://localhost:5601/api/fleet/outputs/so-manager_logstash" --retry 3 --retry-delay 10 --fail 2>/dev/null); then + SSL_CONFIG=$(echo "$logstash_policy" | jq -r '.item.ssl') + # Keep already configured hosts for this update, subsequent host updates come from so-elastic-fleet-outputs-update + HOSTS=$(echo "$logstash_policy" | jq -r '.item.hosts') + DEFAULT_ENABLED=$(echo "$logstash_policy" | jq -r '.item.is_default') + DEFAULT_MONITORING_ENABLED=$(echo "$logstash_policy" | jq -r '.item.is_default_monitoring') + LOGSTASHKEY=$(openssl rsa -in /etc/pki/elasticfleet-logstash.key) + JSON_STRING=$(jq -n \ + --argjson HOSTS "$HOSTS" \ + --arg DEFAULT_ENABLED "$DEFAULT_ENABLED" \ + --arg DEFAULT_MONITORING_ENABLED "$DEFAULT_MONITORING_ENABLED" \ + --argjson SSL_CONFIG "$SSL_CONFIG" \ + --arg LOGSTASHKEY "$LOGSTASHKEY" \ + '{"name":"grid-logstash","type":"logstash","hosts": $HOSTS,"is_default": $DEFAULT_ENABLED,"is_default_monitoring": $DEFAULT_MONITORING_ENABLED,"config_yaml":"","ssl": $SSL_CONFIG,"secrets":{"ssl":{"key": $LOGSTASHKEY }}}') + fi + + if curl -K /opt/so/conf/elasticsearch/curl.config -L -X PUT "localhost:5601/api/fleet/outputs/so-manager_logstash" -H 'kbn-xsrf: true' -H 'Content-Type: application/json' -d "$JSON_STRING" --retry 3 --retry-delay 10 --fail; then + echo "Successfully updated grid-logstash fleet output policy" + fi +} + update_salt_mine() { echo "Populating the mine with mine_functions for each host." set +e From e5563eb9b8d53025e82c899c4e33313c6ef88b94 Mon Sep 17 00:00:00 2001 From: reyesj2 <94730068+reyesj2@users.noreply.github.com> Date: Thu, 2 Oct 2025 15:29:55 -0500 Subject: [PATCH 061/124] send full new ssl config --- salt/manager/tools/sbin/soup | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/salt/manager/tools/sbin/soup b/salt/manager/tools/sbin/soup index 18ed1581f..ff9414b2d 100755 --- a/salt/manager/tools/sbin/soup +++ b/salt/manager/tools/sbin/soup @@ -1187,19 +1187,21 @@ update_import_fleet_output() { update_default_logstash_output() { echo "Updating fleet logstash output policy grid-logstash" if logstash_policy=$(curl -K /opt/so/conf/elasticsearch/curl.config -L "http://localhost:5601/api/fleet/outputs/so-manager_logstash" --retry 3 --retry-delay 10 --fail 2>/dev/null); then - SSL_CONFIG=$(echo "$logstash_policy" | jq -r '.item.ssl') # Keep already configured hosts for this update, subsequent host updates come from so-elastic-fleet-outputs-update HOSTS=$(echo "$logstash_policy" | jq -r '.item.hosts') DEFAULT_ENABLED=$(echo "$logstash_policy" | jq -r '.item.is_default') DEFAULT_MONITORING_ENABLED=$(echo "$logstash_policy" | jq -r '.item.is_default_monitoring') LOGSTASHKEY=$(openssl rsa -in /etc/pki/elasticfleet-logstash.key) + LOGSTASHCRT=$(openssl x509 -in /etc/pki/elasticfleet-logstash.crt) + LOGSTASHCA=$(openssl x509 -in /etc/pki/tls/certs/intca.crt) JSON_STRING=$(jq -n \ --argjson HOSTS "$HOSTS" \ --arg DEFAULT_ENABLED "$DEFAULT_ENABLED" \ --arg DEFAULT_MONITORING_ENABLED "$DEFAULT_MONITORING_ENABLED" \ - --argjson SSL_CONFIG "$SSL_CONFIG" \ --arg LOGSTASHKEY "$LOGSTASHKEY" \ - '{"name":"grid-logstash","type":"logstash","hosts": $HOSTS,"is_default": $DEFAULT_ENABLED,"is_default_monitoring": $DEFAULT_MONITORING_ENABLED,"config_yaml":"","ssl": $SSL_CONFIG,"secrets":{"ssl":{"key": $LOGSTASHKEY }}}') + --arg LOGSTASHCRT "$LOGSTASHCRT" \ + --arg LOGSTASHCA "$LOGSTASHCA" \ + '{"name":"grid-logstash","type":"logstash","hosts": $HOSTS,"is_default": $DEFAULT_ENABLED,"is_default_monitoring": $DEFAULT_MONITORING_ENABLED,"config_yaml":"","ssl":{"certificate": $LOGSTASHCRT,"certificate_authorities":[ $LOGSTASHCA ]},"secrets":{"ssl":{"key": $LOGSTASHKEY }}}') fi if curl -K /opt/so/conf/elasticsearch/curl.config -L -X PUT "localhost:5601/api/fleet/outputs/so-manager_logstash" -H 'kbn-xsrf: true' -H 'Content-Type: application/json' -d "$JSON_STRING" --retry 3 --retry-delay 10 --fail; then From 9fd1b9aec1570acce995ad0025460256a3ae02da Mon Sep 17 00:00:00 2001 From: reyesj2 <94730068+reyesj2@users.noreply.github.com> Date: Thu, 2 Oct 2025 16:38:47 -0500 Subject: [PATCH 062/124] make sure to pass in variables to json_string.. --- .../tools/sbin_jinja/so-elastic-fleet-outputs-update | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/salt/elasticfleet/tools/sbin_jinja/so-elastic-fleet-outputs-update b/salt/elasticfleet/tools/sbin_jinja/so-elastic-fleet-outputs-update index 24f38765a..9efe8a19d 100644 --- a/salt/elasticfleet/tools/sbin_jinja/so-elastic-fleet-outputs-update +++ b/salt/elasticfleet/tools/sbin_jinja/so-elastic-fleet-outputs-update @@ -19,11 +19,14 @@ function update_logstash_outputs() { SSL_CONFIG=$(echo "$logstash_policy" | jq -r '.item.ssl') if SECRETS=$(echo "$logstash_policy" | jq -er '.item.secrets' 2>/dev/null); then JSON_STRING=$(jq -n \ - --arg UPDATEDLIST $NEW_LIST_JSON \ + --arg UPDATEDLIST "$NEW_LIST_JSON" \ + --argjson SECRETS "$SECRETS" \ + --argjson SSL_CONFIG "$SSL_CONFIG" \ '{"name":"grid-logstash","type":"logstash","hosts": $UPDATEDLIST,"is_default":true,"is_default_monitoring":true,"config_yaml":"","ssl": $SSL_CONFIG,"secrets": $SECRETS}') else JSON_STRING=$(jq -n \ - --arg UPDATEDLIST $NEW_LIST_JSON \ + --arg UPDATEDLIST "$NEW_LIST_JSON" \ + --argjson SSL_CONFIG "$SSL_CONFIG" \ '{"name":"grid-logstash","type":"logstash","hosts": $UPDATEDLIST,"is_default":true,"is_default_monitoring":true,"config_yaml":"","ssl": $SSL_CONFIG}') fi fi From ac0d6c57e1dfcd5255a467b473d2823525174d29 Mon Sep 17 00:00:00 2001 From: Josh Patterson Date: Mon, 6 Oct 2025 11:52:35 -0400 Subject: [PATCH 063/124] create common.grains state and nsm_total grain --- salt/common/grains.sls | 21 +++++++++++++++++++++ salt/common/init.sls | 1 + 2 files changed, 22 insertions(+) create mode 100644 salt/common/grains.sls diff --git a/salt/common/grains.sls b/salt/common/grains.sls new file mode 100644 index 000000000..b8d3a4c90 --- /dev/null +++ b/salt/common/grains.sls @@ -0,0 +1,21 @@ +# Copyright Security Onion Solutions LLC and/or licensed to Security Onion Solutions LLC under one +# or more contributor license agreements. Licensed under the Elastic License 2.0 as shown at +# https://securityonion.net/license; you may not use this file except in compliance with the +# Elastic License 2.0. + +{% set nsm_exists = salt['file.directory_exists']('/nsm') %} +{% if nsm_exists %} +{% set nsm_total = salt['cmd.shell']('df -BG /nsm | tail -1 | awk \'{print $2}\'') %} + +nsm_total: + grains.present: + - name: nsm_total + - value: {{ nsm_total }} + +{% else %} + +nsm_missing: + test.succeed_without_changes: + - name: /nsm does not exist, skipping grain assignment + +{% endif %} diff --git a/salt/common/init.sls b/salt/common/init.sls index 7137ff11f..eba18f651 100644 --- a/salt/common/init.sls +++ b/salt/common/init.sls @@ -4,6 +4,7 @@ {% from 'vars/globals.map.jinja' import GLOBALS %} include: + - common.grains - common.packages {% if GLOBALS.role in GLOBALS.manager_roles %} - manager.elasticsearch # needed for elastic_curl_config state From 8675193d1f3d05b3e408bade186ae6cdea189cc7 Mon Sep 17 00:00:00 2001 From: reyesj2 <94730068+reyesj2@users.noreply.github.com> Date: Mon, 6 Oct 2025 12:56:31 -0500 Subject: [PATCH 064/124] elasticsearch upgrade 8.18.8 --- salt/elasticsearch/defaults.yaml | 2 +- salt/kibana/defaults.yaml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/salt/elasticsearch/defaults.yaml b/salt/elasticsearch/defaults.yaml index 6ed55a936..23eee8df0 100644 --- a/salt/elasticsearch/defaults.yaml +++ b/salt/elasticsearch/defaults.yaml @@ -1,6 +1,6 @@ elasticsearch: enabled: false - version: 8.18.6 + version: 8.18.8 index_clean: true config: action: diff --git a/salt/kibana/defaults.yaml b/salt/kibana/defaults.yaml index 645821b6c..078f826a0 100644 --- a/salt/kibana/defaults.yaml +++ b/salt/kibana/defaults.yaml @@ -22,7 +22,7 @@ kibana: - default - file migrations: - discardCorruptObjects: "8.18.6" + discardCorruptObjects: "8.18.8" telemetry: enabled: False security: From 7af95317db79e0babc32d1d9d68fe3c1cb80be7c Mon Sep 17 00:00:00 2001 From: reyesj2 <94730068+reyesj2@users.noreply.github.com> Date: Mon, 6 Oct 2025 16:23:22 -0500 Subject: [PATCH 065/124] es upgrade 8.18.8 pipeline updates --- .../files/integrations/grid-nodes_general/import-evtx-logs.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/salt/elasticfleet/files/integrations/grid-nodes_general/import-evtx-logs.json b/salt/elasticfleet/files/integrations/grid-nodes_general/import-evtx-logs.json index 8132f4a09..dd95e6337 100644 --- a/salt/elasticfleet/files/integrations/grid-nodes_general/import-evtx-logs.json +++ b/salt/elasticfleet/files/integrations/grid-nodes_general/import-evtx-logs.json @@ -20,7 +20,7 @@ ], "data_stream.dataset": "import", "custom": "", - "processors": "- dissect:\n tokenizer: \"/nsm/import/%{import.id}/evtx/%{import.file}\"\n field: \"log.file.path\"\n target_prefix: \"\"\n- decode_json_fields:\n fields: [\"message\"]\n target: \"\"\n- drop_fields:\n fields: [\"host\"]\n ignore_missing: true\n- add_fields:\n target: data_stream\n fields:\n type: logs\n dataset: system.security\n- add_fields:\n target: event\n fields:\n dataset: system.security\n module: system\n imported: true\n- add_fields:\n target: \"@metadata\"\n fields:\n pipeline: logs-system.security-2.5.4\n- if:\n equals:\n winlog.channel: 'Microsoft-Windows-Sysmon/Operational'\n then: \n - add_fields:\n target: data_stream\n fields:\n dataset: windows.sysmon_operational\n - add_fields:\n target: event\n fields:\n dataset: windows.sysmon_operational\n module: windows\n imported: true\n - add_fields:\n target: \"@metadata\"\n fields:\n pipeline: logs-windows.sysmon_operational-3.1.2\n- if:\n equals:\n winlog.channel: 'Application'\n then: \n - add_fields:\n target: data_stream\n fields:\n dataset: system.application\n - add_fields:\n target: event\n fields:\n dataset: system.application\n - add_fields:\n target: \"@metadata\"\n fields:\n pipeline: logs-system.application-2.5.4\n- if:\n equals:\n winlog.channel: 'System'\n then: \n - add_fields:\n target: data_stream\n fields:\n dataset: system.system\n - add_fields:\n target: event\n fields:\n dataset: system.system\n - add_fields:\n target: \"@metadata\"\n fields:\n pipeline: logs-system.system-2.5.4\n \n- if:\n equals:\n winlog.channel: 'Microsoft-Windows-PowerShell/Operational'\n then: \n - add_fields:\n target: data_stream\n fields:\n dataset: windows.powershell_operational\n - add_fields:\n target: event\n fields:\n dataset: windows.powershell_operational\n module: windows\n - add_fields:\n target: \"@metadata\"\n fields:\n pipeline: logs-windows.powershell_operational-3.1.2\n- add_fields:\n target: data_stream\n fields:\n dataset: import", + "processors": "- dissect:\n tokenizer: \"/nsm/import/%{import.id}/evtx/%{import.file}\"\n field: \"log.file.path\"\n target_prefix: \"\"\n- decode_json_fields:\n fields: [\"message\"]\n target: \"\"\n- drop_fields:\n fields: [\"host\"]\n ignore_missing: true\n- add_fields:\n target: data_stream\n fields:\n type: logs\n dataset: system.security\n- add_fields:\n target: event\n fields:\n dataset: system.security\n module: system\n imported: true\n- add_fields:\n target: \"@metadata\"\n fields:\n pipeline: logs-system.security-2.6.1\n- if:\n equals:\n winlog.channel: 'Microsoft-Windows-Sysmon/Operational'\n then: \n - add_fields:\n target: data_stream\n fields:\n dataset: windows.sysmon_operational\n - add_fields:\n target: event\n fields:\n dataset: windows.sysmon_operational\n module: windows\n imported: true\n - add_fields:\n target: \"@metadata\"\n fields:\n pipeline: logs-windows.sysmon_operational-3.1.2\n- if:\n equals:\n winlog.channel: 'Application'\n then: \n - add_fields:\n target: data_stream\n fields:\n dataset: system.application\n - add_fields:\n target: event\n fields:\n dataset: system.application\n - add_fields:\n target: \"@metadata\"\n fields:\n pipeline: logs-system.application-2.6.1\n- if:\n equals:\n winlog.channel: 'System'\n then: \n - add_fields:\n target: data_stream\n fields:\n dataset: system.system\n - add_fields:\n target: event\n fields:\n dataset: system.system\n - add_fields:\n target: \"@metadata\"\n fields:\n pipeline: logs-system.system-2.6.1\n \n- if:\n equals:\n winlog.channel: 'Microsoft-Windows-PowerShell/Operational'\n then: \n - add_fields:\n target: data_stream\n fields:\n dataset: windows.powershell_operational\n - add_fields:\n target: event\n fields:\n dataset: windows.powershell_operational\n module: windows\n - add_fields:\n target: \"@metadata\"\n fields:\n pipeline: logs-windows.powershell_operational-3.1.2\n- add_fields:\n target: data_stream\n fields:\n dataset: import", "tags": [ "import" ] From 39432198cccdf7115fc64cd9f7d1bdfe36aa6e62 Mon Sep 17 00:00:00 2001 From: reyesj2 <94730068+reyesj2@users.noreply.github.com> Date: Mon, 6 Oct 2025 16:25:52 -0500 Subject: [PATCH 066/124] Elastic 8.18.8 elastic agent build --- salt/manager/tools/sbin/soup | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/salt/manager/tools/sbin/soup b/salt/manager/tools/sbin/soup index e49be133f..6da34aa75 100755 --- a/salt/manager/tools/sbin/soup +++ b/salt/manager/tools/sbin/soup @@ -874,14 +874,14 @@ up_to_2.4.170() { } up_to_2.4.180() { - # Elastic Update for this release, so download Elastic Agent files - determine_elastic_agent_upgrade - + echo "Nothing to do for 2.4.180" INSTALLEDVERSION=2.4.180 } up_to_2.4.190() { - echo "Nothing to do for 2.4.190" + # Elastic Update for this release, so download Elastic Agent files + determine_elastic_agent_upgrade + INSTALLEDVERSION=2.4.190 } From 60cccb21b4d40822040977122afec4d6f727cad6 Mon Sep 17 00:00:00 2001 From: Josh Patterson Date: Tue, 7 Oct 2025 12:20:42 -0400 Subject: [PATCH 067/124] create volume --- salt/_modules/qcow2.py | 95 +++- .../tools/sbin_jinja/so-kvm-create-volume | 533 ++++++++++++++++++ salt/manager/tools/sbin_jinja/so-salt-cloud | 96 +++- .../engines/master/virtual_node_manager.py | 34 ++ salt/soc/dyanno/hypervisor/hypervisor.yaml | 6 +- 5 files changed, 758 insertions(+), 6 deletions(-) create mode 100644 salt/hypervisor/tools/sbin_jinja/so-kvm-create-volume diff --git a/salt/_modules/qcow2.py b/salt/_modules/qcow2.py index 6e71dc459..10c4d185b 100644 --- a/salt/_modules/qcow2.py +++ b/salt/_modules/qcow2.py @@ -7,12 +7,14 @@ """ Salt module for managing QCOW2 image configurations and VM hardware settings. This module provides functions -for modifying network configurations within QCOW2 images and adjusting virtual machine hardware settings. -It serves as a Salt interface to the so-qcow2-modify-network and so-kvm-modify-hardware scripts. +for modifying network configurations within QCOW2 images, adjusting virtual machine hardware settings, and +creating virtual storage volumes. It serves as a Salt interface to the so-qcow2-modify-network, +so-kvm-modify-hardware, and so-kvm-create-volume scripts. -The module offers two main capabilities: +The module offers three main capabilities: 1. Network Configuration: Modify network settings (DHCP/static IP) within QCOW2 images 2. Hardware Configuration: Adjust VM hardware settings (CPU, memory, PCI passthrough) +3. Volume Management: Create and attach virtual storage volumes for NSM data This module is intended to work with Security Onion's virtualization infrastructure and is typically used in conjunction with salt-cloud for VM provisioning and management. @@ -244,3 +246,90 @@ def modify_hardware_config(vm_name, cpu=None, memory=None, pci=None, start=False except Exception as e: log.error('qcow2 module: An error occurred while executing the script: {}'.format(e)) raise + +def create_volume_config(vm_name, size_gb, start=False): + ''' + Usage: + salt '*' qcow2.create_volume_config vm_name= size_gb= [start=] + + Options: + vm_name + Name of the virtual machine to attach the volume to + size_gb + Volume size in GB (positive integer) + This determines the capacity of the virtual storage volume + start + Boolean flag to start the VM after volume creation + Optional - defaults to False + + Examples: + 1. **Create 500GB Volume:** + ```bash + salt '*' qcow2.create_volume_config vm_name='sensor1_sensor' size_gb=500 + ``` + This creates a 500GB virtual volume for NSM storage + + 2. **Create 1TB Volume and Start VM:** + ```bash + salt '*' qcow2.create_volume_config vm_name='sensor1_sensor' size_gb=1000 start=True + ``` + This creates a 1TB volume and starts the VM after attachment + + Notes: + - VM must be stopped before volume creation + - Volume is created as a qcow2 image and attached to the VM + - This is an alternative to disk passthrough via modify_hardware_config + - Volume is automatically attached to the VM's libvirt configuration + - Requires so-kvm-create-volume script to be installed + - Volume files are stored in the hypervisor's VM storage directory + + Description: + This function creates and attaches a virtual storage volume to a KVM virtual machine + using the so-kvm-create-volume script. It creates a qcow2 disk image of the specified + size and attaches it to the VM for NSM (Network Security Monitoring) storage purposes. + This provides an alternative to physical disk passthrough, allowing flexible storage + allocation without requiring dedicated hardware. The VM can optionally be started + after the volume is successfully created and attached. + + Exit Codes: + 0: Success + 1: Invalid parameters + 2: VM state error (running when should be stopped) + 3: Volume creation error + 4: System command error + 255: Unexpected error + + Logging: + - All operations are logged to the salt minion log + - Log entries are prefixed with 'qcow2 module:' + - Volume creation and attachment operations are logged + - Errors include detailed messages and stack traces + - Final status of volume creation is logged + ''' + + # Validate size_gb parameter + if not isinstance(size_gb, int) or size_gb <= 0: + raise ValueError('size_gb must be a positive integer.') + + cmd = ['/usr/sbin/so-kvm-create-volume', '-v', vm_name, '-s', str(size_gb)] + + if start: + cmd.append('-S') + + log.info('qcow2 module: Executing command: {}'.format(' '.join(shlex.quote(arg) for arg in cmd))) + + try: + result = subprocess.run(cmd, capture_output=True, text=True, check=False) + ret = { + 'retcode': result.returncode, + 'stdout': result.stdout, + 'stderr': result.stderr + } + if result.returncode != 0: + log.error('qcow2 module: Script execution failed with return code {}: {}'.format(result.returncode, result.stderr)) + else: + log.info('qcow2 module: Script executed successfully.') + return ret + except Exception as e: + log.error('qcow2 module: An error occurred while executing the script: {}'.format(e)) + raise diff --git a/salt/hypervisor/tools/sbin_jinja/so-kvm-create-volume b/salt/hypervisor/tools/sbin_jinja/so-kvm-create-volume new file mode 100644 index 000000000..8b7cd8a23 --- /dev/null +++ b/salt/hypervisor/tools/sbin_jinja/so-kvm-create-volume @@ -0,0 +1,533 @@ +#!/usr/bin/python3 + +# Copyright Security Onion Solutions LLC and/or licensed to Security Onion Solutions LLC under one +# or more contributor license agreements. Licensed under the Elastic License 2.0 as shown at +# https://securityonion.net/license; you may not use this file except in compliance with the +# Elastic License 2.0. +# +# Note: Per the Elastic License 2.0, the second limitation states: +# +# "You may not move, change, disable, or circumvent the license key functionality +# in the software, and you may not remove or obscure any functionality in the +# software that is protected by the license key." + +{% if 'vrt' in salt['pillar.get']('features', []) %} + +""" +Script for creating and attaching virtual volumes to KVM virtual machines for NSM storage. +This script provides functionality to create pre-allocated raw disk images and attach them +to VMs as virtio-blk devices for high-performance network security monitoring data storage. + +The script handles the complete volume lifecycle: +1. Volume Creation: Creates pre-allocated raw disk images using qemu-img +2. Volume Attachment: Attaches volumes to VMs as virtio-blk devices +3. VM Management: Stops/starts VMs as needed during the process + +This script is designed to work with Security Onion's virtualization infrastructure and is typically +used during VM provisioning to add dedicated NSM storage volumes. + +**Usage:** + so-kvm-create-volume -v -s [-S] + +**Options:** + -v, --vm Name of the virtual machine to attach the volume to (required). + -s, --size Size of the volume in GB (required, must be a positive integer). + -S, --start Start the VM after volume creation and attachment (optional). + +**Examples:** + +1. **Create and Attach 500GB Volume:** + + ```bash + so-kvm-create-volume -v vm1_sensor -s 500 + ``` + + This command creates and attaches a volume with the following settings: + - VM Name: `vm1_sensor` + - Volume Size: `500` GB + - Volume Path: `/nsm/libvirt/volumes/vm1_sensor-nsm.img` + - Device: `/dev/vdb` (virtio-blk) + - VM remains stopped after attachment + +2. **Create Volume and Start VM:** + + ```bash + so-kvm-create-volume -v vm2_sensor -s 1000 -S + ``` + + This command creates a volume and starts the VM: + - VM Name: `vm2_sensor` + - Volume Size: `1000` GB (1 TB) + - VM is started after volume attachment due to the `-S` flag + +3. **Create Large Volume for Heavy Traffic:** + + ```bash + so-kvm-create-volume -v vm3_sensor -s 2000 -S + ``` + + This command creates a large volume for high-traffic environments: + - VM Name: `vm3_sensor` + - Volume Size: `2000` GB (2 TB) + - VM is started after attachment + +**Notes:** + +- The script automatically stops the VM if it's running before creating and attaching the volume. +- Volumes are created with full pre-allocation for optimal performance. +- Volume files are stored in `/nsm/libvirt/volumes/` with naming pattern `-nsm.img`. +- Volumes are attached as `/dev/vdb` using virtio-blk for high performance. +- The script checks available disk space before creating the volume. +- Ownership is set to `socore:socore` with permissions `644`. +- Without the `-S` flag, the VM remains stopped after volume attachment. + +**Description:** + +The `so-kvm-create-volume` script creates and attaches NSM storage volumes using the following process: + +1. **Pre-flight Checks:** + - Validates input parameters (VM name, size) + - Checks available disk space in `/nsm/libvirt/volumes/` + - Ensures sufficient space for the requested volume size + +2. **VM State Management:** + - Connects to the local libvirt daemon + - Stops the VM if it's currently running + - Retrieves current VM configuration + +3. **Volume Creation:** + - Creates volume directory if it doesn't exist + - Uses `qemu-img create` with full pre-allocation + - Sets proper ownership (socore:socore) and permissions (644) + - Validates volume creation success + +4. **Volume Attachment:** + - Modifies VM's libvirt XML configuration + - Adds disk element with virtio-blk driver + - Configures cache='none' and io='native' for performance + - Attaches volume as `/dev/vdb` + +5. **VM Redefinition:** + - Applies the new configuration by redefining the VM + - Optionally starts the VM if requested + - Emits deployment status events for monitoring + +6. **Error Handling:** + - Validates all input parameters + - Checks disk space before creation + - Handles volume creation failures + - Handles volume attachment failures + - Provides detailed error messages for troubleshooting + +**Exit Codes:** + +- `0`: Success +- `1`: An error occurred during execution + +**Logging:** + +- Logs are written to `/opt/so/log/hypervisor/so-kvm-create-volume.log` +- Both file and console logging are enabled for real-time monitoring +- Log entries include timestamps and severity levels +- Log prefixes: VOLUME:, VM:, HARDWARE:, SPACE: +- Detailed error messages are logged for troubleshooting +""" + +import argparse +import sys +import os +import libvirt +import logging +import socket +import subprocess +import pwd +import grp +import xml.etree.ElementTree as ET +from io import StringIO +from so_vm_utils import start_vm, stop_vm +from so_logging_utils import setup_logging + +# Get hypervisor name from local hostname +HYPERVISOR = socket.gethostname() + +# Volume storage directory +VOLUME_DIR = '/nsm/libvirt/volumes' + +# Custom exception classes +class InsufficientSpaceError(Exception): + """Raised when there is insufficient disk space for volume creation.""" + pass + +class VolumeCreationError(Exception): + """Raised when volume creation fails.""" + pass + +class VolumeAttachmentError(Exception): + """Raised when volume attachment fails.""" + pass + +# Custom log handler to capture output +class StringIOHandler(logging.Handler): + def __init__(self): + super().__init__() + self.strio = StringIO() + + def emit(self, record): + msg = self.format(record) + self.strio.write(msg + '\n') + + def get_value(self): + return self.strio.getvalue() + +def parse_arguments(): + """Parse command-line arguments.""" + parser = argparse.ArgumentParser(description='Create and attach a virtual volume to a KVM virtual machine for NSM storage.') + parser.add_argument('-v', '--vm', required=True, help='Name of the virtual machine to attach the volume to.') + parser.add_argument('-s', '--size', type=int, required=True, help='Size of the volume in GB (must be a positive integer).') + parser.add_argument('-S', '--start', action='store_true', help='Start the VM after volume creation and attachment.') + args = parser.parse_args() + + # Validate size is positive + if args.size <= 0: + parser.error("Volume size must be a positive integer.") + + return args + +def check_disk_space(size_gb, logger): + """ + Check if there is sufficient disk space available for volume creation. + + Args: + size_gb: Size of the volume in GB + logger: Logger instance + + Raises: + InsufficientSpaceError: If there is not enough disk space + """ + try: + stat = os.statvfs(VOLUME_DIR) + # Available space in bytes + available_bytes = stat.f_bavail * stat.f_frsize + # Required space in bytes (add 10% buffer) + required_bytes = size_gb * 1024 * 1024 * 1024 * 1.1 + + available_gb = available_bytes / (1024 * 1024 * 1024) + required_gb = required_bytes / (1024 * 1024 * 1024) + + logger.info(f"SPACE: Available: {available_gb:.2f} GB, Required: {required_gb:.2f} GB") + + if available_bytes < required_bytes: + raise InsufficientSpaceError( + f"Insufficient disk space. Available: {available_gb:.2f} GB, Required: {required_gb:.2f} GB" + ) + + logger.info(f"SPACE: Sufficient disk space available for {size_gb} GB volume") + + except OSError as e: + logger.error(f"SPACE: Failed to check disk space: {e}") + raise + +def create_volume_file(vm_name, size_gb, logger): + """ + Create a pre-allocated raw disk image for the VM. + + Args: + vm_name: Name of the VM + size_gb: Size of the volume in GB + logger: Logger instance + + Returns: + Path to the created volume file + + Raises: + VolumeCreationError: If volume creation fails + """ + # Create volume directory if it doesn't exist + try: + if not os.path.exists(VOLUME_DIR): + os.makedirs(VOLUME_DIR, mode=0o755) + logger.info(f"VOLUME: Created volume directory: {VOLUME_DIR}") + except OSError as e: + logger.error(f"VOLUME: Failed to create volume directory: {e}") + raise VolumeCreationError(f"Failed to create volume directory: {e}") + + # Define volume path + volume_path = os.path.join(VOLUME_DIR, f"{vm_name}-nsm.img") + + # Check if volume already exists + if os.path.exists(volume_path): + logger.error(f"VOLUME: Volume already exists: {volume_path}") + raise VolumeCreationError(f"Volume already exists: {volume_path}") + + logger.info(f"VOLUME: Creating {size_gb} GB volume at {volume_path}") + + # Create volume using qemu-img with full pre-allocation + try: + cmd = [ + 'qemu-img', 'create', + '-f', 'raw', + '-o', 'preallocation=full', + volume_path, + f"{size_gb}G" + ] + + result = subprocess.run( + cmd, + capture_output=True, + text=True, + check=True + ) + + logger.info(f"VOLUME: Volume created successfully") + if result.stdout: + logger.debug(f"VOLUME: qemu-img output: {result.stdout.strip()}") + + except subprocess.CalledProcessError as e: + logger.error(f"VOLUME: Failed to create volume: {e}") + if e.stderr: + logger.error(f"VOLUME: qemu-img error: {e.stderr.strip()}") + raise VolumeCreationError(f"Failed to create volume: {e}") + + # Set ownership to socore:socore + try: + socore_uid = pwd.getpwnam('socore').pw_uid + socore_gid = grp.getgrnam('socore').gr_gid + os.chown(volume_path, socore_uid, socore_gid) + logger.info(f"VOLUME: Set ownership to socore:socore") + except (KeyError, OSError) as e: + logger.error(f"VOLUME: Failed to set ownership: {e}") + raise VolumeCreationError(f"Failed to set ownership: {e}") + + # Set permissions to 644 + try: + os.chmod(volume_path, 0o644) + logger.info(f"VOLUME: Set permissions to 644") + except OSError as e: + logger.error(f"VOLUME: Failed to set permissions: {e}") + raise VolumeCreationError(f"Failed to set permissions: {e}") + + # Verify volume was created + if not os.path.exists(volume_path): + logger.error(f"VOLUME: Volume file not found after creation: {volume_path}") + raise VolumeCreationError(f"Volume file not found after creation: {volume_path}") + + volume_size = os.path.getsize(volume_path) + logger.info(f"VOLUME: Volume created: {volume_path} ({volume_size} bytes)") + + return volume_path + +def attach_volume_to_vm(conn, vm_name, volume_path, logger): + """ + Attach the volume to the VM's libvirt XML configuration. + + Args: + conn: Libvirt connection + vm_name: Name of the VM + volume_path: Path to the volume file + logger: Logger instance + + Raises: + VolumeAttachmentError: If volume attachment fails + """ + try: + # Get the VM domain + dom = conn.lookupByName(vm_name) + + # Get the XML description of the VM + xml_desc = dom.XMLDesc() + root = ET.fromstring(xml_desc) + + # Find the devices element + devices_elem = root.find('./devices') + if devices_elem is None: + logger.error("VM: Could not find element in XML") + raise VolumeAttachmentError("Could not find element in VM XML") + + # Check if vdb already exists + for disk in devices_elem.findall('./disk'): + target = disk.find('./target') + if target is not None and target.get('dev') == 'vdb': + logger.error("VM: Device vdb already exists in VM configuration") + raise VolumeAttachmentError("Device vdb already exists in VM configuration") + + logger.info(f"VM: Attaching volume to {vm_name} as /dev/vdb") + + # Create disk element + disk_elem = ET.SubElement(devices_elem, 'disk', attrib={ + 'type': 'file', + 'device': 'disk' + }) + + # Add driver element + ET.SubElement(disk_elem, 'driver', attrib={ + 'name': 'qemu', + 'type': 'raw', + 'cache': 'none', + 'io': 'native' + }) + + # Add source element + ET.SubElement(disk_elem, 'source', attrib={ + 'file': volume_path + }) + + # Add target element + ET.SubElement(disk_elem, 'target', attrib={ + 'dev': 'vdb', + 'bus': 'virtio' + }) + + # Add address element + ET.SubElement(disk_elem, 'address', attrib={ + 'type': 'pci', + 'domain': '0x0000', + 'bus': '0x00', + 'slot': '0x07', + 'function': '0x0' + }) + + logger.info(f"HARDWARE: Added disk configuration for vdb") + + # Convert XML back to string + new_xml_desc = ET.tostring(root, encoding='unicode') + + # Redefine the VM with the new XML + conn.defineXML(new_xml_desc) + logger.info(f"VM: VM redefined with volume attached") + + except libvirt.libvirtError as e: + logger.error(f"VM: Failed to attach volume: {e}") + raise VolumeAttachmentError(f"Failed to attach volume: {e}") + except Exception as e: + logger.error(f"VM: Failed to attach volume: {e}") + raise VolumeAttachmentError(f"Failed to attach volume: {e}") + +def emit_status_event(vm_name, status): + """ + Emit a deployment status event. + + Args: + vm_name: Name of the VM + status: Status message + """ + try: + subprocess.run([ + 'so-salt-emit-vm-deployment-status-event', + '-v', vm_name, + '-H', HYPERVISOR, + '-s', status + ], check=True) + except subprocess.CalledProcessError as e: + # Don't fail the entire operation if status event fails + pass + +def main(): + """Main function to orchestrate volume creation and attachment.""" + # Set up logging using the so_logging_utils library + string_handler = StringIOHandler() + string_handler.setFormatter(logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')) + logger = setup_logging( + logger_name='so-kvm-create-volume', + log_file_path='/opt/so/log/hypervisor/so-kvm-create-volume.log', + log_level=logging.INFO, + format_str='%(asctime)s - %(levelname)s - %(message)s' + ) + logger.addHandler(string_handler) + + vm_name = None + + try: + # Parse arguments + args = parse_arguments() + + vm_name = args.vm + size_gb = args.size + start_vm_flag = args.start + + logger.info(f"VOLUME: Starting volume creation for VM '{vm_name}' with size {size_gb} GB") + + # Emit start status event + emit_status_event(vm_name, 'Volume Creation') + + # Check disk space + check_disk_space(size_gb, logger) + + # Connect to libvirt + try: + conn = libvirt.open(None) + logger.info("VM: Connected to libvirt") + except libvirt.libvirtError as e: + logger.error(f"VM: Failed to open connection to libvirt: {e}") + emit_status_event(vm_name, 'Volume Configuration Failed') + sys.exit(1) + + # Stop VM if running + dom = stop_vm(conn, vm_name, logger) + + # Create volume file + volume_path = create_volume_file(vm_name, size_gb, logger) + + # Attach volume to VM + attach_volume_to_vm(conn, vm_name, volume_path, logger) + + # Start VM if -S or --start argument is provided + if start_vm_flag: + dom = conn.lookupByName(vm_name) + start_vm(dom, logger) + logger.info(f"VM: VM '{vm_name}' started successfully") + else: + logger.info("VM: Start flag not provided; VM will remain stopped") + + # Close connection + conn.close() + + # Emit success status event + emit_status_event(vm_name, 'Volume Configuration') + + logger.info(f"VOLUME: Volume creation and attachment completed successfully for VM '{vm_name}'") + + except KeyboardInterrupt: + error_msg = "Operation cancelled by user" + logger.error(error_msg) + if vm_name: + emit_status_event(vm_name, 'Volume Configuration Failed') + sys.exit(1) + + except InsufficientSpaceError as e: + error_msg = f"SPACE: {str(e)}" + logger.error(error_msg) + if vm_name: + emit_status_event(vm_name, 'Volume Configuration Failed') + sys.exit(1) + + except VolumeCreationError as e: + error_msg = f"VOLUME: {str(e)}" + logger.error(error_msg) + if vm_name: + emit_status_event(vm_name, 'Volume Configuration Failed') + sys.exit(1) + + except VolumeAttachmentError as e: + error_msg = f"VM: {str(e)}" + logger.error(error_msg) + if vm_name: + emit_status_event(vm_name, 'Volume Configuration Failed') + sys.exit(1) + + except Exception as e: + error_msg = f"An error occurred: {str(e)}" + logger.error(error_msg) + if vm_name: + emit_status_event(vm_name, 'Volume Configuration Failed') + sys.exit(1) + +if __name__ == '__main__': + main() + +{%- else -%} + +echo "Hypervisor nodes are a feature supported only for customers with a valid license. \ + Contact Security Onion Solutions, LLC via our website at https://securityonionsolutions.com \ + for more information about purchasing a license to enable this feature." + +{% endif -%} diff --git a/salt/manager/tools/sbin_jinja/so-salt-cloud b/salt/manager/tools/sbin_jinja/so-salt-cloud index daa92fa67..e8674cc2c 100644 --- a/salt/manager/tools/sbin_jinja/so-salt-cloud +++ b/salt/manager/tools/sbin_jinja/so-salt-cloud @@ -533,6 +533,64 @@ def run_qcow2_modify_hardware_config(profile, vm_name, cpu=None, memory=None, pc except Exception as e: logger.error(f"An error occurred while running qcow2.modify_hardware_config: {e}") +def run_qcow2_create_volume_config(profile, vm_name, size_gb, cpu=None, memory=None, start=False): + """Create a volume for the VM and optionally configure CPU/memory. + + Args: + profile (str): The cloud profile name + vm_name (str): The name of the VM + size_gb (int): Size of the volume in GB + cpu (int, optional): Number of CPUs to assign + memory (int, optional): Amount of memory in MiB + start (bool): Whether to start the VM after configuration + """ + hv_name = profile.split('-')[1] + target = hv_name + "_*" + + try: + # Step 1: Create the volume + logger.info(f"Creating {size_gb}GB volume for VM {vm_name}") + volume_result = local.cmd( + target, + 'qcow2.create_volume_config', + kwarg={ + 'vm_name': vm_name, + 'size_gb': size_gb, + 'start': False # Don't start yet if we need to configure CPU/memory + } + ) + format_qcow2_output('Volume creation', volume_result) + + # Step 2: Configure CPU and memory if specified + if cpu or memory: + logger.info(f"Configuring hardware for VM {vm_name}: CPU={cpu}, Memory={memory}MiB") + hw_result = local.cmd( + target, + 'qcow2.modify_hardware_config', + kwarg={ + 'vm_name': vm_name, + 'cpu': cpu, + 'memory': memory, + 'start': start + } + ) + format_qcow2_output('Hardware configuration', hw_result) + elif start: + # If no CPU/memory config needed but we need to start the VM + logger.info(f"Starting VM {vm_name}") + start_result = local.cmd( + target, + 'qcow2.modify_hardware_config', + kwarg={ + 'vm_name': vm_name, + 'start': True + } + ) + format_qcow2_output('VM startup', start_result) + + except Exception as e: + logger.error(f"An error occurred while creating volume and configuring hardware: {e}") + def run_qcow2_modify_network_config(profile, vm_name, mode, ip=None, gateway=None, dns=None, search_domain=None): hv_name = profile.split('-')[1] target = hv_name + "_*" @@ -586,6 +644,7 @@ def parse_arguments(): network_group.add_argument('-c', '--cpu', type=int, help='Number of virtual CPUs to assign.') network_group.add_argument('-m', '--memory', type=int, help='Amount of memory to assign in MiB.') network_group.add_argument('-P', '--pci', action='append', help='PCI hardware ID(s) to passthrough to the VM (e.g., 0000:c7:00.0). Can be specified multiple times.') + network_group.add_argument('--nsm-size', type=int, help='Size in GB for NSM volume creation. If both --pci and --nsm-size are specified, --pci takes precedence.') args = parser.parse_args() @@ -621,6 +680,8 @@ def main(): hw_config.append(f"{args.memory}MB RAM") if args.pci: hw_config.append(f"PCI devices: {', '.join(args.pci)}") + if args.nsm_size: + hw_config.append(f"NSM volume: {args.nsm_size}GB") hw_string = f" and hardware config: {', '.join(hw_config)}" if hw_config else "" logger.info(f"Received request to create VM '{args.vm_name}' using profile '{args.profile}' {network_config}{hw_string}") @@ -643,8 +704,39 @@ def main(): # Step 2: Provision the VM (without starting it) call_salt_cloud(args.profile, args.vm_name) - # Step 3: Modify hardware configuration - run_qcow2_modify_hardware_config(args.profile, args.vm_name, cpu=args.cpu, memory=args.memory, pci_list=args.pci, start=True) + # Step 3: Determine storage configuration approach + # Priority: disk passthrough (--pci) > volume creation (--nsm-size) + use_disk_passthrough = False + use_volume_creation = False + + if args.pci: + use_disk_passthrough = True + logger.info("Using disk passthrough (--pci parameter specified)") + if args.nsm_size: + logger.warning(f"Both --pci and --nsm-size specified. Using --pci (disk passthrough) and ignoring --nsm-size={args.nsm_size}GB") + elif args.nsm_size: + use_volume_creation = True + # Validate nsm_size + if args.nsm_size <= 0: + logger.error(f"Invalid nsm_size value: {args.nsm_size}. Must be a positive integer.") + sys.exit(1) + logger.info(f"Using volume creation with size {args.nsm_size}GB (--nsm-size parameter specified)") + + # Step 4: Configure hardware based on storage approach + if use_disk_passthrough: + # Use existing disk passthrough logic via modify_hardware_config + run_qcow2_modify_hardware_config(args.profile, args.vm_name, cpu=args.cpu, memory=args.memory, pci_list=args.pci, start=True) + elif use_volume_creation: + # Use new volume creation logic + run_qcow2_create_volume_config(args.profile, args.vm_name, size_gb=args.nsm_size, cpu=args.cpu, memory=args.memory, start=True) + else: + # No storage configuration, just configure CPU/memory if specified + if args.cpu or args.memory: + run_qcow2_modify_hardware_config(args.profile, args.vm_name, cpu=args.cpu, memory=args.memory, pci_list=None, start=True) + else: + # No hardware configuration needed, just start the VM + logger.info(f"No hardware configuration specified, starting VM {args.vm_name}") + run_qcow2_modify_hardware_config(args.profile, args.vm_name, cpu=None, memory=None, pci_list=None, start=True) except KeyboardInterrupt: logger.error("so-salt-cloud: Operation cancelled by user.") diff --git a/salt/salt/engines/master/virtual_node_manager.py b/salt/salt/engines/master/virtual_node_manager.py index bc098d075..1e13c6022 100644 --- a/salt/salt/engines/master/virtual_node_manager.py +++ b/salt/salt/engines/master/virtual_node_manager.py @@ -633,6 +633,35 @@ def process_vm_creation(hypervisor_path: str, vm_config: dict) -> None: except subprocess.CalledProcessError as e: logger.error(f"Failed to emit success status event: {e}") + # Validate nsm_size if present + if 'nsm_size' in vm_config: + try: + size = int(vm_config['nsm_size']) + if size <= 0: + log.error("VM: %s - nsm_size must be a positive integer, got: %d", vm_name, size) + mark_invalid_hardware(hypervisor_path, vm_name, vm_config, + {'nsm_size': 'Invalid nsm_size: must be positive integer'}) + return + if size > 10000: # 10TB reasonable maximum + log.error("VM: %s - nsm_size %dGB exceeds reasonable maximum (10000GB)", vm_name, size) + mark_invalid_hardware(hypervisor_path, vm_name, vm_config, + {'nsm_size': f'Invalid nsm_size: {size}GB exceeds maximum (10000GB)'}) + return + log.debug("VM: %s - nsm_size validated: %dGB", vm_name, size) + except (ValueError, TypeError) as e: + log.error("VM: %s - nsm_size must be a valid integer, got: %s", vm_name, vm_config.get('nsm_size')) + mark_invalid_hardware(hypervisor_path, vm_name, vm_config, + {'nsm_size': 'Invalid nsm_size: must be valid integer'}) + return + + # Check for conflicting storage configurations + has_disk = 'disk' in vm_config and vm_config['disk'] + has_nsm_size = 'nsm_size' in vm_config and vm_config['nsm_size'] + + if has_disk and has_nsm_size: + log.warning("VM: %s - Both disk and nsm_size specified. disk takes precedence, nsm_size will be ignored.", + vm_name) + # Initial hardware validation against model is_valid, errors = validate_hardware_request(model_config, vm_config) if not is_valid: @@ -668,6 +697,11 @@ def process_vm_creation(hypervisor_path: str, vm_config: dict) -> None: if 'memory' in vm_config: memory_mib = int(vm_config['memory']) * 1024 cmd.extend(['-m', str(memory_mib)]) + + # Add nsm_size if specified and disk is not specified + if 'nsm_size' in vm_config and vm_config['nsm_size'] and not ('disk' in vm_config and vm_config['disk']): + cmd.extend(['--nsm-size', str(vm_config['nsm_size'])]) + log.debug("VM: %s - Adding nsm_size parameter: %s", vm_name, vm_config['nsm_size']) # Add PCI devices for hw_type in ['disk', 'copper', 'sfp']: diff --git a/salt/soc/dyanno/hypervisor/hypervisor.yaml b/salt/soc/dyanno/hypervisor/hypervisor.yaml index d13c928ec..7ae0631cb 100644 --- a/salt/soc/dyanno/hypervisor/hypervisor.yaml +++ b/salt/soc/dyanno/hypervisor/hypervisor.yaml @@ -63,8 +63,12 @@ hypervisor: required: true readonly: true forcedType: int + - field: nsm_size + label: "Size of /nsm, in GB. Only used if there is not a passthrough disk." + forcedType: int + readonly: true - field: disk - label: "Disk(s) for passthrough. Free: FREE | Total: TOTAL" + label: "Disk(s) for passthrough to /nsm. Free: FREE | Total: TOTAL" readonly: true options: [] forcedType: '[]int' From 564374a8fb67c1c4bcca5edb84ac45edcd4d185c Mon Sep 17 00:00:00 2001 From: reyesj2 <94730068+reyesj2@users.noreply.github.com> Date: Tue, 7 Oct 2025 12:21:26 -0500 Subject: [PATCH 068/124] generate new elastic agents in post soup --- salt/manager/tools/sbin/soup | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/salt/manager/tools/sbin/soup b/salt/manager/tools/sbin/soup index 6da34aa75..3d1a7504a 100755 --- a/salt/manager/tools/sbin/soup +++ b/salt/manager/tools/sbin/soup @@ -600,9 +600,6 @@ post_to_2.4.170() { } post_to_2.4.180() { - echo "Regenerating Elastic Agent Installers" - /sbin/so-elastic-agent-gen-installers - # Force update to Kafka output policy /usr/sbin/so-kafka-fleet-output-policy --force @@ -610,6 +607,9 @@ post_to_2.4.180() { } post_to_2.4.190() { + echo "Regenerating Elastic Agent Installers" + /sbin/so-elastic-agent-gen-installers + # Only need to update import / eval nodes if [[ "$MINION_ROLE" == "import" ]] || [[ "$MINION_ROLE" == "eval" ]]; then update_import_fleet_output From c16bf50493f673c50695dc1168fb4501e8a3504f Mon Sep 17 00:00:00 2001 From: Mike Reeves Date: Tue, 7 Oct 2025 14:20:25 -0400 Subject: [PATCH 069/124] Update files --- salt/zeek/policy/custom/filters/files | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/salt/zeek/policy/custom/filters/files b/salt/zeek/policy/custom/filters/files index 867e2c849..311f37cc2 100644 --- a/salt/zeek/policy/custom/filters/files +++ b/salt/zeek/policy/custom/filters/files @@ -1 +1,13 @@ -# Placeholder \ No newline at end of file +hook Files::log_policy(rec: Files::Info, id: Log::ID, filter: Log::Filter) + { + # Turn off a specific mimetype + if (rec?$mime_type && ( /soap+xml/ | /json/ | /xml/ | /x509/ )in rec$mime_type) + break; + } + +event zeek_init() +{ + Log::remove_default_filter(Files::LOG); + local filter: Log::Filter = [$name="files-filter"]; + Log::add_filter(Files::LOG, filter); +} From a1a8f75409f62157184f888a92ad556fd298a1c5 Mon Sep 17 00:00:00 2001 From: Josh Patterson Date: Tue, 7 Oct 2025 16:36:23 -0400 Subject: [PATCH 070/124] create and mount volume. being mounted as vda --- .../tools/sbin_jinja/so-kvm-create-volume | 23 +++++++++++-------- salt/manager/tools/sbin_jinja/so-salt-cloud | 2 +- 2 files changed, 14 insertions(+), 11 deletions(-) diff --git a/salt/hypervisor/tools/sbin_jinja/so-kvm-create-volume b/salt/hypervisor/tools/sbin_jinja/so-kvm-create-volume index 8b7cd8a23..dc9b28c21 100644 --- a/salt/hypervisor/tools/sbin_jinja/so-kvm-create-volume +++ b/salt/hypervisor/tools/sbin_jinja/so-kvm-create-volume @@ -242,16 +242,7 @@ def create_volume_file(vm_name, size_gb, logger): Raises: VolumeCreationError: If volume creation fails """ - # Create volume directory if it doesn't exist - try: - if not os.path.exists(VOLUME_DIR): - os.makedirs(VOLUME_DIR, mode=0o755) - logger.info(f"VOLUME: Created volume directory: {VOLUME_DIR}") - except OSError as e: - logger.error(f"VOLUME: Failed to create volume directory: {e}") - raise VolumeCreationError(f"Failed to create volume directory: {e}") - - # Define volume path + # Define volume path (directory already created in main()) volume_path = os.path.join(VOLUME_DIR, f"{vm_name}-nsm.img") # Check if volume already exists @@ -449,6 +440,18 @@ def main(): # Emit start status event emit_status_event(vm_name, 'Volume Creation') + # Ensure volume directory exists before checking disk space + try: + os.makedirs(VOLUME_DIR, mode=0o755, exist_ok=True) + socore_uid = pwd.getpwnam('socore').pw_uid + socore_gid = grp.getgrnam('socore').gr_gid + os.chown(VOLUME_DIR, socore_uid, socore_gid) + logger.debug(f"VOLUME: Ensured volume directory exists: {VOLUME_DIR}") + except Exception as e: + logger.error(f"VOLUME: Failed to create volume directory: {e}") + emit_status_event(vm_name, 'Volume Configuration Failed') + sys.exit(1) + # Check disk space check_disk_space(size_gb, logger) diff --git a/salt/manager/tools/sbin_jinja/so-salt-cloud b/salt/manager/tools/sbin_jinja/so-salt-cloud index b5e2d0ab1..2f99f769b 100644 --- a/salt/manager/tools/sbin_jinja/so-salt-cloud +++ b/salt/manager/tools/sbin_jinja/so-salt-cloud @@ -544,7 +544,7 @@ def run_qcow2_create_volume_config(profile, vm_name, size_gb, cpu=None, memory=N memory (int, optional): Amount of memory in MiB start (bool): Whether to start the VM after configuration """ - hv_name = profile.split('-')[1] + hv_name = profile.split('_')[1] target = hv_name + "_*" try: From c7edaac42a5ef21eb6011eba1e45f79e829fe179 Mon Sep 17 00:00:00 2001 From: Josh Patterson Date: Tue, 7 Oct 2025 17:20:11 -0400 Subject: [PATCH 071/124] nsm volume as vdb, os vda by ordering pci slots --- .../tools/sbin_jinja/so-kvm-create-volume | 54 ++++++++++++++++++- 1 file changed, 52 insertions(+), 2 deletions(-) diff --git a/salt/hypervisor/tools/sbin_jinja/so-kvm-create-volume b/salt/hypervisor/tools/sbin_jinja/so-kvm-create-volume index dc9b28c21..57309ec8e 100644 --- a/salt/hypervisor/tools/sbin_jinja/so-kvm-create-volume +++ b/salt/hypervisor/tools/sbin_jinja/so-kvm-create-volume @@ -334,6 +334,34 @@ def attach_volume_to_vm(conn, vm_name, volume_path, logger): logger.error("VM: Could not find element in XML") raise VolumeAttachmentError("Could not find element in VM XML") + # Log ALL devices with PCI addresses to find conflicts + logger.info("DISK_DEBUG: Examining ALL devices with PCI addresses") + for device in devices_elem: + address = device.find('./address') + if address is not None and address.get('type') == 'pci': + bus = address.get('bus', 'unknown') + slot = address.get('slot', 'unknown') + function = address.get('function', 'unknown') + logger.info(f"DISK_DEBUG: Device {device.tag}: bus={bus}, slot={slot}, function={function}") + + # Log existing disk configuration for debugging + logger.info("DISK_DEBUG: Examining existing disk configuration") + existing_disks = devices_elem.findall('./disk') + for idx, disk in enumerate(existing_disks): + target = disk.find('./target') + source = disk.find('./source') + address = disk.find('./address') + + dev_name = target.get('dev') if target is not None else 'unknown' + source_file = source.get('file') if source is not None else 'unknown' + + if address is not None: + slot = address.get('slot', 'unknown') + bus = address.get('bus', 'unknown') + logger.info(f"DISK_DEBUG: Disk {idx}: dev={dev_name}, source={source_file}, slot={slot}, bus={bus}") + else: + logger.info(f"DISK_DEBUG: Disk {idx}: dev={dev_name}, source={source_file}, no address element") + # Check if vdb already exists for disk in devices_elem.findall('./disk'): target = disk.find('./target') @@ -369,16 +397,38 @@ def attach_volume_to_vm(conn, vm_name, volume_path, logger): }) # Add address element + # Use bus 0x07 with slot 0x00 to ensure NSM volume appears after OS disk (which is on bus 0x04) + # Bus 0x05 is used by memballoon, bus 0x06 is used by rng device + # Libvirt requires slot <= 0 for non-zero buses + # This ensures vda = OS disk, vdb = NSM volume ET.SubElement(disk_elem, 'address', attrib={ 'type': 'pci', 'domain': '0x0000', - 'bus': '0x00', - 'slot': '0x07', + 'bus': '0x07', + 'slot': '0x00', 'function': '0x0' }) logger.info(f"HARDWARE: Added disk configuration for vdb") + # Log disk ordering after adding new disk + logger.info("DISK_DEBUG: Disk configuration after adding NSM volume") + all_disks = devices_elem.findall('./disk') + for idx, disk in enumerate(all_disks): + target = disk.find('./target') + source = disk.find('./source') + address = disk.find('./address') + + dev_name = target.get('dev') if target is not None else 'unknown' + source_file = source.get('file') if source is not None else 'unknown' + + if address is not None: + slot = address.get('slot', 'unknown') + bus = address.get('bus', 'unknown') + logger.info(f"DISK_DEBUG: Disk {idx}: dev={dev_name}, source={source_file}, slot={slot}, bus={bus}") + else: + logger.info(f"DISK_DEBUG: Disk {idx}: dev={dev_name}, source={source_file}, no address element") + # Convert XML back to string new_xml_desc = ET.tostring(root, encoding='unicode') From 659c039ba8035bbbdebc2ed8f9fe8c1256bd2fe7 Mon Sep 17 00:00:00 2001 From: Josh Patterson Date: Wed, 8 Oct 2025 10:51:04 -0400 Subject: [PATCH 072/124] handle nsm volume size and non disk passthrough --- salt/manager/tools/sbin_jinja/so-salt-cloud | 61 ++++++++++++++------- 1 file changed, 41 insertions(+), 20 deletions(-) diff --git a/salt/manager/tools/sbin_jinja/so-salt-cloud b/salt/manager/tools/sbin_jinja/so-salt-cloud index 2f99f769b..6d1d2e529 100644 --- a/salt/manager/tools/sbin_jinja/so-salt-cloud +++ b/salt/manager/tools/sbin_jinja/so-salt-cloud @@ -516,17 +516,21 @@ def run_qcow2_modify_hardware_config(profile, vm_name, cpu=None, memory=None, pc target = hv_name + "_*" try: - args_list = [ - 'vm_name=' + vm_name, - 'cpu=' + str(cpu) if cpu else '', - 'memory=' + str(memory) if memory else '', - 'start=' + str(start) - ] - + args_list = ['vm_name=' + vm_name] + + # Only add parameters that are actually specified + if cpu is not None: + args_list.append('cpu=' + str(cpu)) + if memory is not None: + args_list.append('memory=' + str(memory)) + # Add PCI devices if provided if pci_list: # Pass all PCI devices as a comma-separated list args_list.append('pci=' + ','.join(pci_list)) + + # Always add start parameter + args_list.append('start=' + str(start)) result = local.cmd(target, 'qcow2.modify_hardware_config', args_list) format_qcow2_output('Hardware configuration', result) @@ -644,7 +648,7 @@ def parse_arguments(): network_group.add_argument('-c', '--cpu', type=int, help='Number of virtual CPUs to assign.') network_group.add_argument('-m', '--memory', type=int, help='Amount of memory to assign in MiB.') network_group.add_argument('-P', '--pci', action='append', help='PCI hardware ID(s) to passthrough to the VM (e.g., 0000:c7:00.0). Can be specified multiple times.') - network_group.add_argument('--nsm-size', type=int, help='Size in GB for NSM volume creation. If both --pci and --nsm-size are specified, --pci takes precedence.') + network_group.add_argument('--nsm-size', type=int, help='Size in GB for NSM volume creation. Can be used with copper/sfp NICs (--pci). Only disk passthrough (without --nsm-size) prevents volume creation.') args = parser.parse_args() @@ -705,30 +709,47 @@ def main(): call_salt_cloud(args.profile, args.vm_name) # Step 3: Determine storage configuration approach - # Priority: disk passthrough (--pci) > volume creation (--nsm-size) + # Priority: disk passthrough > volume creation (but volume can coexist with copper/sfp NICs) + # Note: virtual_node_manager.py already filters out --nsm-size when disk is present, + # so if both --pci and --nsm-size are present here, the PCI devices are copper/sfp NICs use_disk_passthrough = False use_volume_creation = False + has_nic_passthrough = False - if args.pci: - use_disk_passthrough = True - logger.info("Using disk passthrough (--pci parameter specified)") - if args.nsm_size: - logger.warning(f"Both --pci and --nsm-size specified. Using --pci (disk passthrough) and ignoring --nsm-size={args.nsm_size}GB") - elif args.nsm_size: - use_volume_creation = True + if args.nsm_size: # Validate nsm_size if args.nsm_size <= 0: logger.error(f"Invalid nsm_size value: {args.nsm_size}. Must be a positive integer.") sys.exit(1) + use_volume_creation = True logger.info(f"Using volume creation with size {args.nsm_size}GB (--nsm-size parameter specified)") + + if args.pci: + # If both nsm_size and PCI are present, PCI devices are copper/sfp NICs + # (virtual_node_manager.py filters out nsm_size when disk is present) + has_nic_passthrough = True + logger.info(f"PCI devices (copper/sfp NICs) will be passed through along with volume: {', '.join(args.pci)}") + elif args.pci: + # Only PCI devices, no nsm_size - this is disk passthrough + use_disk_passthrough = True + logger.info(f"Using disk passthrough (--pci parameter specified without --nsm-size)") # Step 4: Configure hardware based on storage approach - if use_disk_passthrough: + if use_volume_creation: + # Create volume first + run_qcow2_create_volume_config(args.profile, args.vm_name, size_gb=args.nsm_size, cpu=args.cpu, memory=args.memory, start=False) + + # Then configure NICs if present + if has_nic_passthrough: + logger.info(f"Configuring NIC passthrough for VM {args.vm_name}") + run_qcow2_modify_hardware_config(args.profile, args.vm_name, cpu=None, memory=None, pci_list=args.pci, start=True) + else: + # No NICs, just start the VM + logger.info(f"Starting VM {args.vm_name}") + run_qcow2_modify_hardware_config(args.profile, args.vm_name, cpu=None, memory=None, pci_list=None, start=True) + elif use_disk_passthrough: # Use existing disk passthrough logic via modify_hardware_config run_qcow2_modify_hardware_config(args.profile, args.vm_name, cpu=args.cpu, memory=args.memory, pci_list=args.pci, start=True) - elif use_volume_creation: - # Use new volume creation logic - run_qcow2_create_volume_config(args.profile, args.vm_name, size_gb=args.nsm_size, cpu=args.cpu, memory=args.memory, start=True) else: # No storage configuration, just configure CPU/memory if specified if args.cpu or args.memory: From e45b0bf871278c33ecff4f0fb4f777c64c2df08e Mon Sep 17 00:00:00 2001 From: Josh Patterson Date: Wed, 8 Oct 2025 11:51:35 -0400 Subject: [PATCH 073/124] var and comment update --- salt/manager/tools/sbin_jinja/so-salt-cloud | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/salt/manager/tools/sbin_jinja/so-salt-cloud b/salt/manager/tools/sbin_jinja/so-salt-cloud index 6d1d2e529..a1f99712a 100644 --- a/salt/manager/tools/sbin_jinja/so-salt-cloud +++ b/salt/manager/tools/sbin_jinja/so-salt-cloud @@ -712,7 +712,7 @@ def main(): # Priority: disk passthrough > volume creation (but volume can coexist with copper/sfp NICs) # Note: virtual_node_manager.py already filters out --nsm-size when disk is present, # so if both --pci and --nsm-size are present here, the PCI devices are copper/sfp NICs - use_disk_passthrough = False + use_passthrough = False use_volume_creation = False has_nic_passthrough = False @@ -730,9 +730,11 @@ def main(): has_nic_passthrough = True logger.info(f"PCI devices (copper/sfp NICs) will be passed through along with volume: {', '.join(args.pci)}") elif args.pci: - # Only PCI devices, no nsm_size - this is disk passthrough - use_disk_passthrough = True - logger.info(f"Using disk passthrough (--pci parameter specified without --nsm-size)") + # Only PCI devices, no nsm_size - could be disk or NICs + # this script is called by virtual_node_manager and that strips any possibility that nsm_size and the disk pci slot is sent to this script + # we might have not specified a disk passthrough or nsm_size, but pass another pci slot and we end up here + use_passthrough = True + logger.info(f"Configuring PCI device passthrough.(--pci parameter specified without --nsm-size)") # Step 4: Configure hardware based on storage approach if use_volume_creation: @@ -747,8 +749,8 @@ def main(): # No NICs, just start the VM logger.info(f"Starting VM {args.vm_name}") run_qcow2_modify_hardware_config(args.profile, args.vm_name, cpu=None, memory=None, pci_list=None, start=True) - elif use_disk_passthrough: - # Use existing disk passthrough logic via modify_hardware_config + elif use_passthrough: + # Use existing passthrough logic via modify_hardware_config run_qcow2_modify_hardware_config(args.profile, args.vm_name, cpu=args.cpu, memory=args.memory, pci_list=args.pci, start=True) else: # No storage configuration, just configure CPU/memory if specified From 7827e05c2458cbf51f5e77a7c31a24140674c0a8 Mon Sep 17 00:00:00 2001 From: Josh Patterson Date: Wed, 8 Oct 2025 12:18:34 -0400 Subject: [PATCH 074/124] handle mounting vdb as nsm when nsm set in soc grid config --- salt/storage/init.sls | 11 +- .../{nsm_mount.sls => nsm_mount_nvme.sls} | 6 +- salt/storage/nsm_mount_virtio.sls | 39 ++++ .../sbin/{so-nsm-mount => so-nsm-mount-nvme} | 2 +- salt/storage/tools/sbin/so-nsm-mount-virtio | 171 ++++++++++++++++++ 5 files changed, 223 insertions(+), 6 deletions(-) rename salt/storage/{nsm_mount.sls => nsm_mount_nvme.sls} (87%) create mode 100644 salt/storage/nsm_mount_virtio.sls rename salt/storage/tools/sbin/{so-nsm-mount => so-nsm-mount-nvme} (99%) create mode 100644 salt/storage/tools/sbin/so-nsm-mount-virtio diff --git a/salt/storage/init.sls b/salt/storage/init.sls index 533366fd0..ab5926bf5 100644 --- a/salt/storage/init.sls +++ b/salt/storage/init.sls @@ -4,10 +4,17 @@ # Elastic License 2.0. -{% set nvme_devices = salt['cmd.shell']("find /dev -name 'nvme*n1' 2>/dev/null") %} +{% set nvme_devices = salt['cmd.shell']("ls /dev/nvme*n1 2>/dev/null || echo ''") %} +{% set virtio_devices = salt['cmd.shell']("test -b /dev/vdb && echo '/dev/vdb' || echo ''") %} + {% if nvme_devices %} include: - - storage.nsm_mount + - storage.nsm_mount_nvme + +{% elif virtio_devices %} + +include: + - storage.nsm_mount_virtio {% endif %} diff --git a/salt/storage/nsm_mount.sls b/salt/storage/nsm_mount_nvme.sls similarity index 87% rename from salt/storage/nsm_mount.sls rename to salt/storage/nsm_mount_nvme.sls index ed9e97c33..a0d317014 100644 --- a/salt/storage/nsm_mount.sls +++ b/salt/storage/nsm_mount_nvme.sls @@ -22,8 +22,8 @@ storage_nsm_mount_logdir: # Install the NSM mount script storage_nsm_mount_script: file.managed: - - name: /usr/sbin/so-nsm-mount - - source: salt://storage/tools/sbin/so-nsm-mount + - name: /usr/sbin/so-nsm-mount-nvme + - source: salt://storage/tools/sbin/so-nsm-mount-nvme - mode: 755 - user: root - group: root @@ -34,7 +34,7 @@ storage_nsm_mount_script: # Execute the mount script if not already mounted storage_nsm_mount_execute: cmd.run: - - name: /usr/sbin/so-nsm-mount + - name: /usr/sbin/so-nsm-mount-nvme - unless: mountpoint -q /nsm - require: - file: storage_nsm_mount_script diff --git a/salt/storage/nsm_mount_virtio.sls b/salt/storage/nsm_mount_virtio.sls new file mode 100644 index 000000000..34ca8a883 --- /dev/null +++ b/salt/storage/nsm_mount_virtio.sls @@ -0,0 +1,39 @@ +# Copyright Security Onion Solutions LLC and/or licensed to Security Onion Solutions LLC under one +# or more contributor license agreements. Licensed under the Elastic License 2.0 as shown at +# https://securityonion.net/license; you may not use this file except in compliance with the +# Elastic License 2.0. + +# Install required packages +storage_nsm_mount_virtio_packages: + pkg.installed: + - pkgs: + - xfsprogs + +# Ensure log directory exists +storage_nsm_mount_virtio_logdir: + file.directory: + - name: /opt/so/log + - makedirs: True + - user: root + - group: root + - mode: 755 + +# Install the NSM mount script +storage_nsm_mount_virtio_script: + file.managed: + - name: /usr/sbin/so-nsm-mount-virtio + - source: salt://storage/tools/sbin/so-nsm-mount-virtio + - mode: 755 + - user: root + - group: root + - require: + - pkg: storage_nsm_mount_virtio_packages + - file: storage_nsm_mount_virtio_logdir + +# Execute the mount script if not already mounted +storage_nsm_mount_virtio_execute: + cmd.run: + - name: /usr/sbin/so-nsm-mount-virtio + - unless: mountpoint -q /nsm + - require: + - file: storage_nsm_mount_virtio_script diff --git a/salt/storage/tools/sbin/so-nsm-mount b/salt/storage/tools/sbin/so-nsm-mount-nvme similarity index 99% rename from salt/storage/tools/sbin/so-nsm-mount rename to salt/storage/tools/sbin/so-nsm-mount-nvme index 24125fc40..f612c9915 100644 --- a/salt/storage/tools/sbin/so-nsm-mount +++ b/salt/storage/tools/sbin/so-nsm-mount-nvme @@ -81,7 +81,7 @@ set -e -LOG_FILE="/opt/so/log/so-nsm-mount.log" +LOG_FILE="/opt/so/log/so-nsm-mount-nvme.log" VG_NAME="" LV_NAME="nsm" MOUNT_POINT="/nsm" diff --git a/salt/storage/tools/sbin/so-nsm-mount-virtio b/salt/storage/tools/sbin/so-nsm-mount-virtio new file mode 100644 index 000000000..8385d7c21 --- /dev/null +++ b/salt/storage/tools/sbin/so-nsm-mount-virtio @@ -0,0 +1,171 @@ +#!/bin/bash + +# Copyright Security Onion Solutions LLC and/or licensed to Security Onion Solutions LLC under one +# or more contributor license agreements. Licensed under the Elastic License 2.0 as shown at +# https://securityonion.net/license; you may not use this file except in compliance with the +# Elastic License 2.0. + +# Usage: +# so-nsm-mount-virtio +# +# Options: +# None - script automatically configures /dev/vdb +# +# Examples: +# 1. Configure and mount virtio-blk device: +# ```bash +# sudo so-nsm-mount-virtio +# ``` +# +# Notes: +# - Requires root privileges +# - Mounts /dev/vdb as /nsm +# - Creates XFS filesystem if needed +# - Configures persistent mount via /etc/fstab +# - Safe to run multiple times +# +# Description: +# This script automates the configuration and mounting of virtio-blk devices +# as /nsm in Security Onion virtual machines. It performs these steps: +# +# Dependencies: +# - xfsprogs: Required for XFS filesystem operations +# +# 1. Safety Checks: +# - Verifies root privileges +# - Checks if /nsm is already mounted +# - Verifies /dev/vdb exists +# +# 2. Filesystem Creation: +# - Creates XFS filesystem on /dev/vdb if not already formatted +# +# 3. Mount Configuration: +# - Creates /nsm directory if needed +# - Adds entry to /etc/fstab for persistence +# - Mounts the filesystem as /nsm +# +# Exit Codes: +# 0: Success conditions: +# - Device configured and mounted +# - Already properly mounted +# 1: Error conditions: +# - Must be run as root +# - Device /dev/vdb not found +# - Filesystem creation failed +# - Mount operation failed +# +# Logging: +# - All operations logged to /opt/so/log/so-nsm-mount-virtio.log + +set -e + +LOG_FILE="/opt/so/log/so-nsm-mount-virtio.log" +DEVICE="/dev/vdb" +MOUNT_POINT="/nsm" + +# Function to log messages +log() { + echo "$(date '+%Y-%m-%d %H:%M:%S') $1" | tee -a "$LOG_FILE" +} + +# Function to log errors +log_error() { + echo "$(date '+%Y-%m-%d %H:%M:%S') ERROR: $1" | tee -a "$LOG_FILE" >&2 +} + +# Function to check if running as root +check_root() { + if [ "$EUID" -ne 0 ]; then + log_error "Must be run as root" + exit 1 + fi +} + +# Main execution +main() { + log "==========================================" + log "Starting virtio-blk NSM mount process" + log "==========================================" + + # Check root privileges + check_root + + # Check if already mounted + if mountpoint -q "$MOUNT_POINT"; then + log "$MOUNT_POINT is already mounted" + log "==========================================" + exit 0 + fi + + # Check if device exists + if [ ! -b "$DEVICE" ]; then + log_error "Device $DEVICE not found" + log "==========================================" + exit 1 + fi + + log "Found device: $DEVICE" + + # Get device size + local size=$(lsblk -dbn -o SIZE "$DEVICE" 2>/dev/null | numfmt --to=iec) + log "Device size: $size" + + # Check if device has filesystem + if ! blkid "$DEVICE" | grep -q 'TYPE="xfs"'; then + log "Creating XFS filesystem on $DEVICE" + if ! mkfs.xfs -f "$DEVICE" 2>&1 | tee -a "$LOG_FILE"; then + log_error "Failed to create filesystem" + log "==========================================" + exit 1 + fi + log "Filesystem created successfully" + else + log "Device already has XFS filesystem" + fi + + # Create mount point + if [ ! -d "$MOUNT_POINT" ]; then + log "Creating mount point $MOUNT_POINT" + mkdir -p "$MOUNT_POINT" + fi + + # Add to fstab if not present + if ! grep -q "$DEVICE.*$MOUNT_POINT" /etc/fstab; then + log "Adding entry to /etc/fstab" + echo "$DEVICE $MOUNT_POINT xfs defaults 0 0" >> /etc/fstab + log "Entry added to /etc/fstab" + else + log "Entry already exists in /etc/fstab" + fi + + # Mount the filesystem + log "Mounting $DEVICE to $MOUNT_POINT" + if mount "$MOUNT_POINT" 2>&1 | tee -a "$LOG_FILE"; then + log "Successfully mounted $DEVICE to $MOUNT_POINT" + + # Verify mount + if mountpoint -q "$MOUNT_POINT"; then + log "Mount verified successfully" + + # Display mount information + log "Mount details:" + df -h "$MOUNT_POINT" | tail -n 1 | tee -a "$LOG_FILE" + else + log_error "Mount verification failed" + log "==========================================" + exit 1 + fi + else + log_error "Failed to mount $DEVICE" + log "==========================================" + exit 1 + fi + + log "==========================================" + log "Virtio-blk NSM mount process completed successfully" + log "==========================================" + exit 0 +} + +# Run main function +main From bad9a16ebbca0308f99f6eaa6a0c7d1649e4a85b Mon Sep 17 00:00:00 2001 From: Jason Ertel Date: Wed, 8 Oct 2025 13:02:44 -0400 Subject: [PATCH 075/124] support non-async state apply --- salt/soc/files/bin/salt-relay.sh | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/salt/soc/files/bin/salt-relay.sh b/salt/soc/files/bin/salt-relay.sh index 16c387f86..4fc7d8d3d 100755 --- a/salt/soc/files/bin/salt-relay.sh +++ b/salt/soc/files/bin/salt-relay.sh @@ -237,10 +237,22 @@ function manage_salt() { case "$op" in state) - log "Performing '$op' for '$state' on minion '$minion'" state=$(echo "$request" | jq -r .state) - response=$(salt --async "$minion" state.apply "$state" queue=2) + async=$(echo "$request" | jq -r .async) + if [[ $async == "true" ]]; then + log "Performing async '$op' on minion $minion with state '$state'" + response=$(salt --async "$minion" state.apply "$state" queue=2) + else + log "Performing '$op' on minion $minion with state '$state'" + response=$(salt "$minion" state.apply "$state") + fi + exit_code=$? + if [[ $exit_code -ne 0 && "$response" =~ "is running as PID" ]]; then + log "Salt already running: $response ($exit_code)" + respond "$id" "ERROR_SALT_ALREADY_RUNNING" + return + fi ;; highstate) log "Performing '$op' on minion $minion" @@ -259,7 +271,7 @@ function manage_salt() { ;; esac - if [[ exit_code -eq 0 ]]; then + if [[ $exit_code -eq 0 ]]; then log "Successful command execution: $response" respond "$id" "true" else From af42c31740441863edad62aa9d68a338ee91ec0e Mon Sep 17 00:00:00 2001 From: Josh Patterson Date: Wed, 8 Oct 2025 13:24:54 -0400 Subject: [PATCH 076/124] update yaml for annotation --- salt/soc/dyanno/hypervisor/hypervisor.yaml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/salt/soc/dyanno/hypervisor/hypervisor.yaml b/salt/soc/dyanno/hypervisor/hypervisor.yaml index 7ae0631cb..b3b802d8e 100644 --- a/salt/soc/dyanno/hypervisor/hypervisor.yaml +++ b/salt/soc/dyanno/hypervisor/hypervisor.yaml @@ -64,21 +64,21 @@ hypervisor: readonly: true forcedType: int - field: nsm_size - label: "Size of /nsm, in GB. Only used if there is not a passthrough disk." + label: "Size of virtual disk to create and use for /nsm, in GB. Only applicable if no passthrough disk." forcedType: int readonly: true - field: disk - label: "Disk(s) for passthrough to /nsm. Free: FREE | Total: TOTAL" + label: "Disk(s) to passthrough for /nsm. Free: FREE | Total: TOTAL" readonly: true options: [] forcedType: '[]int' - field: copper - label: "Copper port(s) for passthrough. Free: FREE | Total: TOTAL" + label: "Copper port(s) to passthrough. Free: FREE | Total: TOTAL" readonly: true options: [] forcedType: '[]int' - field: sfp - label: "SFP port(s) for passthrough. Free: FREE | Total: TOTAL" + label: "SFP port(s) to passthrough. Free: FREE | Total: TOTAL" readonly: true options: [] forcedType: '[]int' From a3e7649a3c0ca1ecb2263e4c4a51baddbc0af4e5 Mon Sep 17 00:00:00 2001 From: Josh Patterson Date: Wed, 8 Oct 2025 13:52:34 -0400 Subject: [PATCH 077/124] minor hypervisor annotation --- salt/soc/dyanno/hypervisor/hypervisor.yaml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/salt/soc/dyanno/hypervisor/hypervisor.yaml b/salt/soc/dyanno/hypervisor/hypervisor.yaml index b3b802d8e..143a2f5cb 100644 --- a/salt/soc/dyanno/hypervisor/hypervisor.yaml +++ b/salt/soc/dyanno/hypervisor/hypervisor.yaml @@ -64,21 +64,21 @@ hypervisor: readonly: true forcedType: int - field: nsm_size - label: "Size of virtual disk to create and use for /nsm, in GB. Only applicable if no passthrough disk." + label: "Size of virtual disk to create and use for /nsm, in GB. Only applicable if no pass-through disk." forcedType: int readonly: true - field: disk - label: "Disk(s) to passthrough for /nsm. Free: FREE | Total: TOTAL" + label: "Disk(s) to pass through for /nsm. Free: FREE | Total: TOTAL" readonly: true options: [] forcedType: '[]int' - field: copper - label: "Copper port(s) to passthrough. Free: FREE | Total: TOTAL" + label: "Copper port(s) to pass through. Free: FREE | Total: TOTAL" readonly: true options: [] forcedType: '[]int' - field: sfp - label: "SFP port(s) to passthrough. Free: FREE | Total: TOTAL" + label: "SFP port(s) to pass through. Free: FREE | Total: TOTAL" readonly: true options: [] forcedType: '[]int' From 8a8ea04088cc2ad06a9341409a9162bb54b09ee2 Mon Sep 17 00:00:00 2001 From: reyesj2 <94730068+reyesj2@users.noreply.github.com> Date: Wed, 8 Oct 2025 14:01:18 -0500 Subject: [PATCH 078/124] ignore error for elastic-fleet agent --- salt/common/tools/sbin/so-log-check | 1 + 1 file changed, 1 insertion(+) diff --git a/salt/common/tools/sbin/so-log-check b/salt/common/tools/sbin/so-log-check index e3768da46..5960a7946 100755 --- a/salt/common/tools/sbin/so-log-check +++ b/salt/common/tools/sbin/so-log-check @@ -222,6 +222,7 @@ if [[ $EXCLUDE_KNOWN_ERRORS == 'Y' ]]; then EXCLUDED_ERRORS="$EXCLUDED_ERRORS|Initialized license manager" # SOC log: before fields.status was changed to fields.licenseStatus EXCLUDED_ERRORS="$EXCLUDED_ERRORS|from NIC checksum offloading" # zeek reporter.log EXCLUDED_ERRORS="$EXCLUDED_ERRORS|marked for removal" # docker container getting recycled + EXCLUDED_ERRORS="$EXCLUDED_ERRORS|tcp 127.0.0.1:6791: bind: address already in use" # so-elastic-fleet agent restarting. Seen starting w/ 8.18.8 https://github.com/elastic/kibana/issues/201459 fi RESULT=0 From e551c6e037579cc8c7cfa8f17874b612130a3a33 Mon Sep 17 00:00:00 2001 From: Josh Patterson Date: Thu, 9 Oct 2025 10:19:25 -0400 Subject: [PATCH 079/124] owner and perms of volumes --- .../tools/sbin_jinja/so-kvm-create-volume | 28 +++++++++---------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/salt/hypervisor/tools/sbin_jinja/so-kvm-create-volume b/salt/hypervisor/tools/sbin_jinja/so-kvm-create-volume index 57309ec8e..2322c3a94 100644 --- a/salt/hypervisor/tools/sbin_jinja/so-kvm-create-volume +++ b/salt/hypervisor/tools/sbin_jinja/so-kvm-create-volume @@ -78,7 +78,7 @@ used during VM provisioning to add dedicated NSM storage volumes. - Volume files are stored in `/nsm/libvirt/volumes/` with naming pattern `-nsm.img`. - Volumes are attached as `/dev/vdb` using virtio-blk for high performance. - The script checks available disk space before creating the volume. -- Ownership is set to `socore:socore` with permissions `644`. +- Ownership is set to `qemu:qemu` with permissions `640`. - Without the `-S` flag, the VM remains stopped after volume attachment. **Description:** @@ -98,7 +98,7 @@ The `so-kvm-create-volume` script creates and attaches NSM storage volumes using 3. **Volume Creation:** - Creates volume directory if it doesn't exist - Uses `qemu-img create` with full pre-allocation - - Sets proper ownership (socore:socore) and permissions (644) + - Sets proper ownership (qemu:qemu) and permissions (640) - Validates volume creation success 4. **Volume Attachment:** @@ -279,20 +279,20 @@ def create_volume_file(vm_name, size_gb, logger): logger.error(f"VOLUME: qemu-img error: {e.stderr.strip()}") raise VolumeCreationError(f"Failed to create volume: {e}") - # Set ownership to socore:socore + # Set ownership to qemu:qemu try: - socore_uid = pwd.getpwnam('socore').pw_uid - socore_gid = grp.getgrnam('socore').gr_gid - os.chown(volume_path, socore_uid, socore_gid) - logger.info(f"VOLUME: Set ownership to socore:socore") + qemu_uid = pwd.getpwnam('qemu').pw_uid + qemu_gid = grp.getgrnam('qemu').gr_gid + os.chown(volume_path, qemu_uid, qemu_gid) + logger.info(f"VOLUME: Set ownership to qemu:qemu") except (KeyError, OSError) as e: logger.error(f"VOLUME: Failed to set ownership: {e}") raise VolumeCreationError(f"Failed to set ownership: {e}") - # Set permissions to 644 + # Set permissions to 640 try: - os.chmod(volume_path, 0o644) - logger.info(f"VOLUME: Set permissions to 644") + os.chmod(volume_path, 0o640) + logger.info(f"VOLUME: Set permissions to 640") except OSError as e: logger.error(f"VOLUME: Failed to set permissions: {e}") raise VolumeCreationError(f"Failed to set permissions: {e}") @@ -492,10 +492,10 @@ def main(): # Ensure volume directory exists before checking disk space try: - os.makedirs(VOLUME_DIR, mode=0o755, exist_ok=True) - socore_uid = pwd.getpwnam('socore').pw_uid - socore_gid = grp.getgrnam('socore').gr_gid - os.chown(VOLUME_DIR, socore_uid, socore_gid) + os.makedirs(VOLUME_DIR, mode=0o754, exist_ok=True) + qemu_uid = pwd.getpwnam('qemu').pw_uid + qemu_gid = grp.getgrnam('qemu').gr_gid + os.chown(VOLUME_DIR, qemu_uid, qemu_gid) logger.debug(f"VOLUME: Ensured volume directory exists: {VOLUME_DIR}") except Exception as e: logger.error(f"VOLUME: Failed to create volume directory: {e}") From 8f75bfb0a46019d2afa31f7a1146c6218613e22b Mon Sep 17 00:00:00 2001 From: Jason Ertel Date: Thu, 9 Oct 2025 13:02:02 -0400 Subject: [PATCH 080/124] csv delimiter --- salt/sensoroni/defaults.yaml | 6 ++++++ salt/sensoroni/files/sensoroni.json | 8 +++++++- salt/sensoroni/soc_sensoroni.yaml | 21 +++++++++++++++++++++ 3 files changed, 34 insertions(+), 1 deletion(-) diff --git a/salt/sensoroni/defaults.yaml b/salt/sensoroni/defaults.yaml index bd74da7ec..b4c76841f 100644 --- a/salt/sensoroni/defaults.yaml +++ b/salt/sensoroni/defaults.yaml @@ -5,6 +5,12 @@ sensoroni: enabled: False timeout_ms: 900000 parallel_limit: 5 + export: + timeoutMs: 1200000 + cacheRefreshIntervalMs: 10000 + exportMetricLimit: 10000 + exportEventLimit: 10000 + csvSeparator: ',' node_checkin_interval_ms: 10000 sensoronikey: soc_host: diff --git a/salt/sensoroni/files/sensoroni.json b/salt/sensoroni/files/sensoroni.json index c7079c08c..a0f512fa2 100644 --- a/salt/sensoroni/files/sensoroni.json +++ b/salt/sensoroni/files/sensoroni.json @@ -21,7 +21,13 @@ }, {%- endif %} "importer": {}, - "export": {}, + "export": { + "timeoutMs": {{ SENSORONIMERGED.config.export.timeout_ms }}, + "cacheRefreshIntervalMs": {{ SENSORONIMERGED.config.export.cache_refresh_interval_ms }}, + "exportMetricLimit": {{ SENSORONIMERGED.config.export.export_metric_limit }}, + "exportEventLimit": {{ SENSORONIMERGED.config.export.export_event_limit }}, + "csvSeparator": "{{ SENSORONIMERGED.config.export.csv_separator }}" + }, "statickeyauth": { "apiKey": "{{ GLOBALS.sensoroni_key }}" {% if GLOBALS.is_sensor %} diff --git a/salt/sensoroni/soc_sensoroni.yaml b/salt/sensoroni/soc_sensoroni.yaml index 2344655f6..cf69ec52a 100644 --- a/salt/sensoroni/soc_sensoroni.yaml +++ b/salt/sensoroni/soc_sensoroni.yaml @@ -17,6 +17,27 @@ sensoroni: description: Parallel limit for the analyzer. advanced: True helpLink: cases.html + export: + timeout_ms: + description: Timeout period for the exporter to finish export-related tasks. + advanced: True + helpLink: reports.html + cache_refresh_interval_ms: + description: Refresh interval for cache updates. Longer intervals result in less compute usage but risks stale data included in reports. + advanced: True + helpLink: reports.html + export_metric_limit: + description: Maximum number of metric values to include in each metric aggregation group. + advanced: True + helpLink: reports.html + export_event_limit: + description: Maximum number of events to include per event list. + advanced: True + helpLink: reports.html + csv_separator: + description: Separator character to use for CSV exports. + advanced: False + helpLink: reports.html node_checkin_interval_ms: description: Interval in ms to checkin to the soc_host. advanced: True From feddd90e41bf23d2c1cd0089f6950a53aafd429f Mon Sep 17 00:00:00 2001 From: Jason Ertel Date: Thu, 9 Oct 2025 20:50:09 -0400 Subject: [PATCH 081/124] missed commit --- salt/sensoroni/defaults.yaml | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/salt/sensoroni/defaults.yaml b/salt/sensoroni/defaults.yaml index b4c76841f..acfae6766 100644 --- a/salt/sensoroni/defaults.yaml +++ b/salt/sensoroni/defaults.yaml @@ -6,11 +6,11 @@ sensoroni: timeout_ms: 900000 parallel_limit: 5 export: - timeoutMs: 1200000 - cacheRefreshIntervalMs: 10000 - exportMetricLimit: 10000 - exportEventLimit: 10000 - csvSeparator: ',' + timeout_ms: 1200000 + cache_refresh_interval_ms: 10000 + export_metric_limit: 10000 + export_event_limit: 10000 + csv_separator: ',' node_checkin_interval_ms: 10000 sensoronikey: soc_host: From 09d699432a29e66163781703fb8a6b63a7fae733 Mon Sep 17 00:00:00 2001 From: Josh Patterson Date: Fri, 10 Oct 2025 17:07:02 -0400 Subject: [PATCH 082/124] ui notification of nsm volume creation failure and cleanup of vm inventory in soc grid config for hypervisor --- salt/_modules/hypervisor.py | 91 ++++++++ salt/hypervisor/map.jinja | 22 +- .../engines/master/virtual_node_manager.py | 204 +++++++++++++++++- salt/soc/dyanno/hypervisor/map.jinja | 1 + .../dyanno/hypervisor/remove_failed_vm.sls | 51 +++++ 5 files changed, 355 insertions(+), 14 deletions(-) create mode 100644 salt/_modules/hypervisor.py create mode 100644 salt/soc/dyanno/hypervisor/remove_failed_vm.sls diff --git a/salt/_modules/hypervisor.py b/salt/_modules/hypervisor.py new file mode 100644 index 000000000..7119c8507 --- /dev/null +++ b/salt/_modules/hypervisor.py @@ -0,0 +1,91 @@ +#!/opt/saltstack/salt/bin/python3 + +# Copyright Security Onion Solutions LLC and/or licensed to Security Onion Solutions LLC under one +# or more contributor license agreements. Licensed under the Elastic License 2.0 as shown at +# https://securityonion.net/license; you may not use this file except in compliance with the +# Elastic License 2.0. +# +# Note: Per the Elastic License 2.0, the second limitation states: +# +# "You may not move, change, disable, or circumvent the license key functionality +# in the software, and you may not remove or obscure any functionality in the +# software that is protected by the license key." + +""" +Salt execution module for hypervisor operations. + +This module provides functions for managing hypervisor configurations, +including VM file management. +""" + +import json +import logging +import os + +log = logging.getLogger(__name__) + +__virtualname__ = 'hypervisor' + + +def __virtual__(): + """ + Only load this module if we're on a system that can manage hypervisors. + """ + return __virtualname__ + + +def remove_vm_from_vms_file(vms_file_path, vm_hostname, vm_role): + """ + Remove a VM entry from the hypervisorVMs file. + + Args: + vms_file_path (str): Path to the hypervisorVMs file + vm_hostname (str): Hostname of the VM to remove (without role suffix) + vm_role (str): Role of the VM + + Returns: + dict: Result dictionary with success status and message + + CLI Example: + salt '*' hypervisor.remove_vm_from_vms_file /opt/so/saltstack/local/salt/hypervisor/hosts/hypervisor1VMs node1 nsm + """ + try: + # Check if file exists + if not os.path.exists(vms_file_path): + msg = f"VMs file not found: {vms_file_path}" + log.error(msg) + return {'result': False, 'comment': msg} + + # Read current VMs + with open(vms_file_path, 'r') as f: + content = f.read().strip() + vms = json.loads(content) if content else [] + + # Find and remove the VM entry + original_count = len(vms) + vms = [vm for vm in vms if not (vm.get('hostname') == vm_hostname and vm.get('role') == vm_role)] + + if len(vms) < original_count: + # VM was found and removed, write back to file + with open(vms_file_path, 'w') as f: + json.dump(vms, f, indent=2) + + # Set socore:socore ownership (939:939) + os.chown(vms_file_path, 939, 939) + + msg = f"Removed VM {vm_hostname}_{vm_role} from {vms_file_path}" + log.info(msg) + return {'result': True, 'comment': msg} + else: + msg = f"VM {vm_hostname}_{vm_role} not found in {vms_file_path}" + log.warning(msg) + return {'result': False, 'comment': msg} + + except json.JSONDecodeError as e: + msg = f"Failed to parse JSON in {vms_file_path}: {str(e)}" + log.error(msg) + return {'result': False, 'comment': msg} + except Exception as e: + msg = f"Failed to remove VM {vm_hostname}_{vm_role} from {vms_file_path}: {str(e)}" + log.error(msg) + return {'result': False, 'comment': msg} diff --git a/salt/hypervisor/map.jinja b/salt/hypervisor/map.jinja index 3519f6078..087fd7bf7 100644 --- a/salt/hypervisor/map.jinja +++ b/salt/hypervisor/map.jinja @@ -58,10 +58,26 @@ {% set role = vm.get('role', '') %} {% do salt.log.debug('salt/hypervisor/map.jinja: Processing VM - hostname: ' ~ hostname ~ ', role: ' ~ role) %} - {# Load VM configuration from config file #} + {# Try to load VM configuration from config file first, then .error file if config doesn't exist #} {% set vm_file = 'hypervisor/hosts/' ~ hypervisor ~ '/' ~ hostname ~ '_' ~ role %} + {% set vm_error_file = vm_file ~ '.error' %} {% do salt.log.debug('salt/hypervisor/map.jinja: VM config file: ' ~ vm_file) %} - {% import_json vm_file as vm_state %} + + {# Check if base config file exists #} + {% set config_exists = salt['file.file_exists']('/opt/so/saltstack/local/salt/' ~ vm_file) %} + {% set error_exists = salt['file.file_exists']('/opt/so/saltstack/local/salt/' ~ vm_error_file) %} + + {% set vm_state = none %} + {% if config_exists %} + {% import_json vm_file as vm_state %} + {% do salt.log.debug('salt/hypervisor/map.jinja: Loaded VM config from base file') %} + {% elif error_exists %} + {% import_json vm_error_file as vm_state %} + {% do salt.log.debug('salt/hypervisor/map.jinja: Loaded VM config from .error file') %} + {% else %} + {% do salt.log.warning('salt/hypervisor/map.jinja: No config or error file found for VM ' ~ hostname ~ '_' ~ role) %} + {% endif %} + {% if vm_state %} {% do salt.log.debug('salt/hypervisor/map.jinja: VM config content: ' ~ vm_state | tojson) %} {% set vm_data = {'config': vm_state.config} %} @@ -85,7 +101,7 @@ {% endif %} {% do vms.update({hostname ~ '_' ~ role: vm_data}) %} {% else %} - {% do salt.log.debug('salt/hypervisor/map.jinja: Config file empty: ' ~ vm_file) %} + {% do salt.log.debug('salt/hypervisor/map.jinja: Skipping VM ' ~ hostname ~ '_' ~ role ~ ' - no config available') %} {% endif %} {% endfor %} diff --git a/salt/salt/engines/master/virtual_node_manager.py b/salt/salt/engines/master/virtual_node_manager.py index 1c4eae7ea..270a93c11 100644 --- a/salt/salt/engines/master/virtual_node_manager.py +++ b/salt/salt/engines/master/virtual_node_manager.py @@ -117,7 +117,7 @@ Exit Codes: 4: VM provisioning failure (so-salt-cloud execution failed) Logging: - Log files are written to /opt/so/log/salt/engines/virtual_node_manager.log + Log files are written to /opt/so/log/salt/engines/virtual_node_manager Comprehensive logging includes: - Hardware validation details - PCI ID conversion process @@ -138,23 +138,49 @@ import pwd import grp import salt.config import salt.runner +import salt.client from typing import Dict, List, Optional, Tuple, Any from datetime import datetime, timedelta from threading import Lock -# Get socore uid/gid -SOCORE_UID = pwd.getpwnam('socore').pw_uid -SOCORE_GID = grp.getgrnam('socore').gr_gid - -# Initialize Salt runner once +# Initialize Salt runner and local client once opts = salt.config.master_config('/etc/salt/master') opts['output'] = 'json' runner = salt.runner.RunnerClient(opts) +local = salt.client.LocalClient() + +# Get socore uid/gid for file ownership +SOCORE_UID = pwd.getpwnam('socore').pw_uid +SOCORE_GID = grp.getgrnam('socore').gr_gid # Configure logging log = logging.getLogger(__name__) log.setLevel(logging.DEBUG) +# Prevent propagation to parent loggers to avoid duplicate log entries +log.propagate = False + +# Add file handler for dedicated log file +log_dir = '/opt/so/log/salt' +log_file = os.path.join(log_dir, 'virtual_node_manager') + +# Create log directory if it doesn't exist +os.makedirs(log_dir, exist_ok=True) + +# Create file handler +file_handler = logging.FileHandler(log_file) +file_handler.setLevel(logging.DEBUG) + +# Create formatter +formatter = logging.Formatter( + '%(asctime)s [%(name)s:%(lineno)d][%(levelname)-8s][%(process)d] %(message)s', + datefmt='%Y-%m-%d %H:%M:%S' +) +file_handler.setFormatter(formatter) + +# Add handler to logger +log.addHandler(file_handler) + # Constants DEFAULT_INTERVAL = 30 DEFAULT_BASE_PATH = '/opt/so/saltstack/local/salt/hypervisor/hosts' @@ -203,6 +229,39 @@ def write_json_file(file_path: str, data: Any) -> None: except Exception as e: log.error("Failed to write JSON file %s: %s", file_path, str(e)) raise +def remove_vm_from_vms_file(vms_file_path: str, vm_hostname: str, vm_role: str) -> bool: + """ + Remove a VM entry from the hypervisorVMs file. + + Args: + vms_file_path: Path to the hypervisorVMs file + vm_hostname: Hostname of the VM to remove (without role suffix) + vm_role: Role of the VM + + Returns: + bool: True if VM was removed, False otherwise + """ + try: + # Read current VMs + vms = read_json_file(vms_file_path) + + # Find and remove the VM entry + original_count = len(vms) + vms = [vm for vm in vms if not (vm.get('hostname') == vm_hostname and vm.get('role') == vm_role)] + + if len(vms) < original_count: + # VM was found and removed, write back to file + write_json_file(vms_file_path, vms) + log.info("Removed VM %s_%s from %s", vm_hostname, vm_role, vms_file_path) + return True + else: + log.warning("VM %s_%s not found in %s", vm_hostname, vm_role, vms_file_path) + return False + + except Exception as e: + log.error("Failed to remove VM %s_%s from %s: %s", vm_hostname, vm_role, vms_file_path, str(e)) + return False + def read_yaml_file(file_path: str) -> dict: """Read and parse a YAML file.""" @@ -558,6 +617,13 @@ def mark_vm_failed(vm_file: str, error_code: int, message: str) -> None: # Remove the original file since we'll create an error file os.remove(vm_file) + # Clear hardware resource claims so failed VMs don't consume resources + # Keep nsm_size for reference but clear cpu, memory, sfp, copper + config.pop('cpu', None) + config.pop('memory', None) + config.pop('sfp', None) + config.pop('copper', None) + # Create error file error_file = f"{vm_file}.error" data = { @@ -586,8 +652,16 @@ def mark_invalid_hardware(hypervisor_path: str, vm_name: str, config: dict, erro # Join all messages with proper sentence structure full_message = "Hardware validation failure: " + " ".join(error_messages) + # Clear hardware resource claims so failed VMs don't consume resources + # Keep nsm_size for reference but clear cpu, memory, sfp, copper + config_copy = config.copy() + config_copy.pop('cpu', None) + config_copy.pop('memory', None) + config_copy.pop('sfp', None) + config_copy.pop('copper', None) + data = { - 'config': config, + 'config': config_copy, 'status': 'error', 'timestamp': datetime.now().isoformat(), 'error_details': { @@ -634,6 +708,61 @@ def validate_vrt_license() -> bool: log.error("Error reading license file: %s", str(e)) return False +def check_hypervisor_disk_space(hypervisor: str, size_gb: int) -> Tuple[bool, Optional[str]]: + """ + Check if hypervisor has sufficient disk space for volume creation. + + Args: + hypervisor: Hypervisor hostname + size_gb: Required size in GB + + Returns: + Tuple of (has_space, error_message) + """ + try: + # Get hypervisor minion ID + hypervisor_minion = f"{hypervisor}_hypervisor" + + # Check disk space on /nsm/libvirt/volumes using LocalClient + result = local.cmd( + hypervisor_minion, + 'cmd.run', + ["df -BG /nsm/libvirt/volumes | tail -1 | awk '{print $4}' | sed 's/G//'"] + ) + + if not result or hypervisor_minion not in result: + log.error("Failed to check disk space on hypervisor %s", hypervisor) + return False, "Failed to check disk space on hypervisor" + + available_gb_str = result[hypervisor_minion].strip() + if not available_gb_str: + log.error("Empty disk space response from hypervisor %s", hypervisor) + return False, "Failed to get disk space information" + + try: + available_gb = float(available_gb_str) + except ValueError: + log.error("Invalid disk space value from hypervisor %s: %s", hypervisor, available_gb_str) + return False, f"Invalid disk space value: {available_gb_str}" + + # Add 10% buffer for filesystem overhead + required_gb = size_gb * 1.1 + + log.debug("Hypervisor %s disk space check: Available=%.2fGB, Required=%.2fGB", + hypervisor, available_gb, required_gb) + + if available_gb < required_gb: + error_msg = f"Insufficient disk space on hypervisor {hypervisor}. Available: {available_gb:.2f}GB, Required: {required_gb:.2f}GB (including 10% overhead)" + log.error(error_msg) + return False, error_msg + + log.info("Hypervisor %s has sufficient disk space for %dGB volume", hypervisor, size_gb) + return True, None + + except Exception as e: + log.error("Error checking disk space on hypervisor %s: %s", hypervisor, str(e)) + return False, f"Error checking disk space: {str(e)}" + def process_vm_creation(hypervisor_path: str, vm_config: dict) -> None: """ Process a single VM creation request. @@ -695,6 +824,33 @@ def process_vm_creation(hypervisor_path: str, vm_config: dict) -> None: log.warning("VM: %s - Both disk and nsm_size specified. disk takes precedence, nsm_size will be ignored.", vm_name) + # Check disk space BEFORE creating VM if nsm_size is specified + if has_nsm_size and not has_disk: + size_gb = int(vm_config['nsm_size']) + has_space, space_error = check_hypervisor_disk_space(hypervisor, size_gb) + if not has_space: + log.error("VM: %s - %s", vm_name, space_error) + + # Send Volume nsm Create Failed status event + try: + subprocess.run([ + 'so-salt-emit-vm-deployment-status-event', + '-v', vm_name, + '-H', hypervisor, + '-s', 'Volume nsm Create Failed' + ], check=True) + except subprocess.CalledProcessError as e: + log.error("Failed to emit volume create failed event for %s: %s", vm_name, str(e)) + + mark_invalid_hardware( + hypervisor_path, + vm_name, + vm_config, + {'disk_space': f"Insufficient disk space for {size_gb}GB volume: {space_error}"} + ) + return + log.debug("VM: %s - Hypervisor has sufficient space for %dGB volume", vm_name, size_gb) + # Initial hardware validation against model is_valid, errors = validate_hardware_request(model_config, vm_config) if not is_valid: @@ -967,12 +1123,21 @@ def process_hypervisor(hypervisor_path: str) -> None: if not nodes_config: log.debug("Empty VMs configuration in %s", vms_file) - # Get existing VMs + # Get existing VMs and track failed VMs separately existing_vms = set() + failed_vms = set() # VMs with .error files for file_path in glob.glob(os.path.join(hypervisor_path, '*_*')): basename = os.path.basename(file_path) - # Skip error and status files - if not basename.endswith('.error') and not basename.endswith('.status'): + # Skip status files + if basename.endswith('.status'): + continue + # Track VMs with .error files separately + if basename.endswith('.error'): + vm_name = basename[:-6] # Remove '.error' suffix + failed_vms.add(vm_name) + existing_vms.add(vm_name) # Also add to existing to prevent recreation + log.debug(f"Found failed VM with .error file: {vm_name}") + else: existing_vms.add(basename) # Process new VMs @@ -989,12 +1154,29 @@ def process_hypervisor(hypervisor_path: str) -> None: # process_vm_creation handles its own locking process_vm_creation(hypervisor_path, vm_config) - # Process VM deletions + # Process VM deletions (but skip failed VMs that only have .error files) vms_to_delete = existing_vms - configured_vms log.debug(f"Existing VMs: {existing_vms}") log.debug(f"Configured VMs: {configured_vms}") + log.debug(f"Failed VMs: {failed_vms}") log.debug(f"VMs to delete: {vms_to_delete}") for vm_name in vms_to_delete: + # Skip deletion if VM only has .error file (no actual VM to delete) + if vm_name in failed_vms: + error_file = os.path.join(hypervisor_path, f"{vm_name}.error") + base_file = os.path.join(hypervisor_path, vm_name) + # Only skip if there's no base file (VM never successfully created) + if not os.path.exists(base_file): + log.info(f"Skipping deletion of failed VM {vm_name} (VM never successfully created)") + # Clean up the .error and .status files since VM is no longer configured + if os.path.exists(error_file): + os.remove(error_file) + log.info(f"Removed .error file for unconfigured VM: {vm_name}") + status_file = os.path.join(hypervisor_path, f"{vm_name}.status") + if os.path.exists(status_file): + os.remove(status_file) + log.info(f"Removed .status file for unconfigured VM: {vm_name}") + continue log.info(f"Initiating deletion process for VM: {vm_name}") process_vm_deletion(hypervisor_path, vm_name) diff --git a/salt/soc/dyanno/hypervisor/map.jinja b/salt/soc/dyanno/hypervisor/map.jinja index 4a5107371..139003f17 100644 --- a/salt/soc/dyanno/hypervisor/map.jinja +++ b/salt/soc/dyanno/hypervisor/map.jinja @@ -3,6 +3,7 @@ {# Define the list of process steps in order (case-sensitive) #} {% set PROCESS_STEPS = [ 'Processing', + 'Volume nsm Create Failed', 'IP Configuration', 'Starting Create', 'Executing Deploy Script', diff --git a/salt/soc/dyanno/hypervisor/remove_failed_vm.sls b/salt/soc/dyanno/hypervisor/remove_failed_vm.sls new file mode 100644 index 000000000..a47eff595 --- /dev/null +++ b/salt/soc/dyanno/hypervisor/remove_failed_vm.sls @@ -0,0 +1,51 @@ +# Copyright Security Onion Solutions LLC and/or licensed to Security Onion Solutions LLC under one +# or more contributor license agreements. Licensed under the Elastic License 2.0 as shown at +# https://securityonion.net/license; you may not use this file except in compliance with the +# Elastic License 2.0. +# +# Note: Per the Elastic License 2.0, the second limitation states: +# +# "You may not move, change, disable, or circumvent the license key functionality +# in the software, and you may not remove or obscure any functionality in the +# software that is protected by the license key." + +{% if 'vrt' in salt['pillar.get']('features', []) %} + +{% do salt.log.info('soc/dyanno/hypervisor/remove_failed_vm: Running') %} +{% set vm_name = pillar.get('vm_name') %} +{% set hypervisor = pillar.get('hypervisor') %} + +{% if vm_name and hypervisor %} +{% set vm_parts = vm_name.split('_') %} +{% if vm_parts | length >= 2 %} +{% set vm_role = vm_parts[-1] %} +{% set vm_hostname = '_'.join(vm_parts[:-1]) %} +{% set vms_file = '/opt/so/saltstack/local/salt/hypervisor/hosts/' ~ hypervisor ~ 'VMs' %} + +{% do salt.log.info('soc/dyanno/hypervisor/remove_failed_vm: Removing VM ' ~ vm_name ~ ' from ' ~ vms_file) %} + +remove_vm_{{ vm_name }}_from_vms_file: + module.run: + - name: hypervisor.remove_vm_from_vms_file + - vms_file_path: {{ vms_file }} + - vm_hostname: {{ vm_hostname }} + - vm_role: {{ vm_role }} + +{% else %} +{% do salt.log.error('soc/dyanno/hypervisor/remove_failed_vm: Invalid vm_name format: ' ~ vm_name) %} +{% endif %} +{% else %} +{% do salt.log.error('soc/dyanno/hypervisor/remove_failed_vm: Missing required pillar data (vm_name or hypervisor)') %} +{% endif %} + +{% do salt.log.info('soc/dyanno/hypervisor/remove_failed_vm: Completed') %} + +{% else %} + +{% do salt.log.error( + 'Hypervisor nodes are a feature supported only for customers with a valid license. ' + 'Contact Security Onion Solutions, LLC via our website at https://securityonionsolutions.com ' + 'for more information about purchasing a license to enable this feature.' +) %} + +{% endif %} From fe3caf66a112e032107c1cc8f480713c502615ac Mon Sep 17 00:00:00 2001 From: Josh Patterson Date: Fri, 10 Oct 2025 17:21:09 -0400 Subject: [PATCH 083/124] update failure description --- salt/salt/engines/master/virtual_node_manager.py | 4 ++-- salt/soc/dyanno/hypervisor/map.jinja | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/salt/salt/engines/master/virtual_node_manager.py b/salt/salt/engines/master/virtual_node_manager.py index 270a93c11..cc3a3fd81 100644 --- a/salt/salt/engines/master/virtual_node_manager.py +++ b/salt/salt/engines/master/virtual_node_manager.py @@ -831,13 +831,13 @@ def process_vm_creation(hypervisor_path: str, vm_config: dict) -> None: if not has_space: log.error("VM: %s - %s", vm_name, space_error) - # Send Volume nsm Create Failed status event + # Send Hypervisor NSM Disk Full status event try: subprocess.run([ 'so-salt-emit-vm-deployment-status-event', '-v', vm_name, '-H', hypervisor, - '-s', 'Volume nsm Create Failed' + '-s', 'Hypervisor NSM Disk Full' ], check=True) except subprocess.CalledProcessError as e: log.error("Failed to emit volume create failed event for %s: %s", vm_name, str(e)) diff --git a/salt/soc/dyanno/hypervisor/map.jinja b/salt/soc/dyanno/hypervisor/map.jinja index 139003f17..8fa54c146 100644 --- a/salt/soc/dyanno/hypervisor/map.jinja +++ b/salt/soc/dyanno/hypervisor/map.jinja @@ -3,7 +3,7 @@ {# Define the list of process steps in order (case-sensitive) #} {% set PROCESS_STEPS = [ 'Processing', - 'Volume nsm Create Failed', + 'Hypervisor NSM Disk Full', 'IP Configuration', 'Starting Create', 'Executing Deploy Script', From 254e782da67c02436e089dfdafa73fe3c88af937 Mon Sep 17 00:00:00 2001 From: Josh Patterson Date: Fri, 10 Oct 2025 22:15:20 -0400 Subject: [PATCH 084/124] add volume creation and configuration process steps --- salt/soc/dyanno/hypervisor/map.jinja | 2 ++ 1 file changed, 2 insertions(+) diff --git a/salt/soc/dyanno/hypervisor/map.jinja b/salt/soc/dyanno/hypervisor/map.jinja index 8fa54c146..cb0810959 100644 --- a/salt/soc/dyanno/hypervisor/map.jinja +++ b/salt/soc/dyanno/hypervisor/map.jinja @@ -9,6 +9,8 @@ 'Executing Deploy Script', 'Initialize Minion Pillars', 'Created Instance', + 'Volume Creation', + 'Volume Configuration', 'Hardware Configuration', 'Highstate Initiated', 'Destroyed Instance' From f9c5aa3fefa50af99a4722005adfbb5c9f1a959e Mon Sep 17 00:00:00 2001 From: Josh Patterson Date: Tue, 14 Oct 2025 09:36:05 -0400 Subject: [PATCH 085/124] remove PROCESS_STEPS from hypervisor annotation --- salt/soc/dyanno/hypervisor/soc_hypervisor.yaml.jinja | 3 --- 1 file changed, 3 deletions(-) diff --git a/salt/soc/dyanno/hypervisor/soc_hypervisor.yaml.jinja b/salt/soc/dyanno/hypervisor/soc_hypervisor.yaml.jinja index 926263b9d..8e49b60b5 100644 --- a/salt/soc/dyanno/hypervisor/soc_hypervisor.yaml.jinja +++ b/salt/soc/dyanno/hypervisor/soc_hypervisor.yaml.jinja @@ -13,7 +13,6 @@ {%- import_yaml 'soc/dyanno/hypervisor/hypervisor.yaml' as ANNOTATION -%} {%- from 'hypervisor/map.jinja' import HYPERVISORS -%} -{%- from 'soc/dyanno/hypervisor/map.jinja' import PROCESS_STEPS -%} {%- set TEMPLATE = ANNOTATION.hypervisor.hosts.pop('defaultHost') -%} @@ -27,7 +26,6 @@ {%- if baseDomainStatus == 'Initialized' %} {%- if vm_list %} #### Virtual Machines -Status values: {% for step in PROCESS_STEPS %}{{ step }}{% if not loop.last %}, {% endif %}{% endfor %}. "Last Updated" shows when status changed. After "Highstate Initiated", only "Destroyed Instance" updates the timestamp. | Name | Status | CPU Cores | Memory (GB)| Disk | Copper | SFP | Last Updated | |--------------------|--------------------|-----------|------------|------|--------|------|---------------------| @@ -42,7 +40,6 @@ Status values: {% for step in PROCESS_STEPS %}{{ step }}{% if not loop.last %}, {%- endfor %} {%- else %} #### Virtual Machines -Status values: {% for step in PROCESS_STEPS %}{{ step }}{% if not loop.last %}, {% endif %}{% endfor %}. "Last Updated" shows when status changed. After "Highstate Initiated", only "Destroyed Instance" updates the timestamp. No Virtual Machines Found {%- endif %} From 793e98f75ce3c4e939606d3b80ce69da31d8b8d2 Mon Sep 17 00:00:00 2001 From: Josh Patterson Date: Tue, 14 Oct 2025 10:37:16 -0400 Subject: [PATCH 086/124] update annotation after failed vm removal from VMs file --- salt/salt/engines/master/virtual_node_manager.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/salt/salt/engines/master/virtual_node_manager.py b/salt/salt/engines/master/virtual_node_manager.py index cc3a3fd81..6d88bd688 100644 --- a/salt/salt/engines/master/virtual_node_manager.py +++ b/salt/salt/engines/master/virtual_node_manager.py @@ -1176,6 +1176,14 @@ def process_hypervisor(hypervisor_path: str) -> None: if os.path.exists(status_file): os.remove(status_file) log.info(f"Removed .status file for unconfigured VM: {vm_name}") + + # Trigger hypervisor annotation update to reflect the removal + try: + log.info(f"Triggering hypervisor annotation update after removing failed VM: {vm_name}") + runner.cmd('state.orch', ['orch.dyanno_hypervisor']) + except Exception as e: + log.error(f"Failed to trigger hypervisor annotation update for {vm_name}: {str(e)}") + continue log.info(f"Initiating deletion process for VM: {vm_name}") process_vm_deletion(hypervisor_path, vm_name) From d56af4acab4a13a3f484f76980900483c6634432 Mon Sep 17 00:00:00 2001 From: Josh Patterson Date: Tue, 14 Oct 2025 10:58:57 -0400 Subject: [PATCH 087/124] remove .log extension --- salt/manager/tools/sbin_jinja/so-salt-cloud | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/salt/manager/tools/sbin_jinja/so-salt-cloud b/salt/manager/tools/sbin_jinja/so-salt-cloud index a1f99712a..c8177e1bc 100644 --- a/salt/manager/tools/sbin_jinja/so-salt-cloud +++ b/salt/manager/tools/sbin_jinja/so-salt-cloud @@ -211,7 +211,7 @@ Exit Codes: Logging: -- Logs are written to /opt/so/log/salt/so-salt-cloud.log. +- Logs are written to /opt/so/log/salt/so-salt-cloud. - Both file and console logging are enabled for real-time monitoring. """ @@ -233,7 +233,7 @@ local = salt.client.LocalClient() logger = logging.getLogger(__name__) logger.setLevel(logging.INFO) -file_handler = logging.FileHandler('/opt/so/log/salt/so-salt-cloud.log') +file_handler = logging.FileHandler('/opt/so/log/salt/so-salt-cloud') console_handler = logging.StreamHandler() formatter = logging.Formatter('%(asctime)s %(message)s') From 860710f5f922cad8ab82ce4dcc3495d18fd3529c Mon Sep 17 00:00:00 2001 From: Josh Patterson Date: Tue, 14 Oct 2025 11:03:00 -0400 Subject: [PATCH 088/124] remove .log extension --- salt/storage/tools/sbin/so-nsm-mount-nvme | 2 +- salt/storage/tools/sbin/so-nsm-mount-virtio | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/salt/storage/tools/sbin/so-nsm-mount-nvme b/salt/storage/tools/sbin/so-nsm-mount-nvme index f612c9915..fdde0c2e9 100644 --- a/salt/storage/tools/sbin/so-nsm-mount-nvme +++ b/salt/storage/tools/sbin/so-nsm-mount-nvme @@ -81,7 +81,7 @@ set -e -LOG_FILE="/opt/so/log/so-nsm-mount-nvme.log" +LOG_FILE="/opt/so/log/so-nsm-mount-nvme" VG_NAME="" LV_NAME="nsm" MOUNT_POINT="/nsm" diff --git a/salt/storage/tools/sbin/so-nsm-mount-virtio b/salt/storage/tools/sbin/so-nsm-mount-virtio index 8385d7c21..03476e378 100644 --- a/salt/storage/tools/sbin/so-nsm-mount-virtio +++ b/salt/storage/tools/sbin/so-nsm-mount-virtio @@ -55,11 +55,11 @@ # - Mount operation failed # # Logging: -# - All operations logged to /opt/so/log/so-nsm-mount-virtio.log +# - All operations logged to /opt/so/log/so-nsm-mount-virtio set -e -LOG_FILE="/opt/so/log/so-nsm-mount-virtio.log" +LOG_FILE="/opt/so/log/so-nsm-mount-virtio" DEVICE="/dev/vdb" MOUNT_POINT="/nsm" From 378d37d74ebb0530fd070eddde2a196c01fab6e5 Mon Sep 17 00:00:00 2001 From: reyesj2 <94730068+reyesj2@users.noreply.github.com> Date: Tue, 14 Oct 2025 12:44:51 -0500 Subject: [PATCH 089/124] add event.module to elasticsearch server logs --- salt/elasticsearch/defaults.yaml | 64 +++++++++++++++++++ salt/elasticsearch/files/ingest/global@custom | 1 + salt/elasticsearch/soc_elasticsearch.yaml | 1 + salt/manager/tools/sbin/soup | 2 + 4 files changed, 68 insertions(+) diff --git a/salt/elasticsearch/defaults.yaml b/salt/elasticsearch/defaults.yaml index 23eee8df0..592f47a2b 100644 --- a/salt/elasticsearch/defaults.yaml +++ b/salt/elasticsearch/defaults.yaml @@ -1991,6 +1991,70 @@ elasticsearch: set_priority: priority: 50 min_age: 30d + so-logs-elasticsearch_x_server: + index_sorting: false + index_template: + composed_of: + - logs-elasticsearch.server@package + - logs-elasticsearch.server@custom + - so-fleet_integrations.ip_mappings-1 + - so-fleet_globals-1 + - so-fleet_agent_id_verification-1 + data_stream: + allow_custom_routing: false + hidden: false + ignore_missing_component_templates: + - logs-elasticsearch.server@custom + index_patterns: + - logs-elasticsearch.server-* + priority: 501 + template: + mappings: + _meta: + managed: true + managed_by: security_onion + package: + name: elastic_agent + settings: + index: + lifecycle: + name: so-logs-elasticsearch.server-logs + mapping: + total_fields: + limit: 5000 + number_of_replicas: 0 + sort: + field: '@timestamp' + order: desc + policy: + _meta: + managed: true + managed_by: security_onion + package: + name: elastic_agent + phases: + cold: + actions: + set_priority: + priority: 0 + min_age: 60d + delete: + actions: + delete: {} + min_age: 365d + hot: + actions: + rollover: + max_age: 30d + max_primary_shard_size: 50gb + set_priority: + priority: 100 + min_age: 0ms + warm: + actions: + set_priority: + priority: 50 + min_age: 30d so-logs-endpoint_x_actions: index_sorting: false index_template: diff --git a/salt/elasticsearch/files/ingest/global@custom b/salt/elasticsearch/files/ingest/global@custom index c92c15612..8e48eb0b9 100644 --- a/salt/elasticsearch/files/ingest/global@custom +++ b/salt/elasticsearch/files/ingest/global@custom @@ -23,6 +23,7 @@ { "set": { "if": "ctx.event?.module == 'fim'", "override": true, "field": "event.module", "value": "file_integrity" } }, { "rename": { "if": "ctx.winlog?.provider_name == 'Microsoft-Windows-Windows Defender'", "ignore_missing": true, "field": "winlog.event_data.Threat Name", "target_field": "winlog.event_data.threat_name" } }, { "set": { "if": "ctx?.metadata?.kafka != null" , "field": "kafka.id", "value": "{{metadata.kafka.partition}}{{metadata.kafka.offset}}{{metadata.kafka.timestamp}}", "ignore_failure": true } }, + { "set": { "if": "ctx.event?.dataset != null && ctx.event?.dataset == 'elasticsearch.server'", "field": "event.module", "value":"elasticsearch" }}, {"append": {"field":"related.ip","value":["{{source.ip}}","{{destination.ip}}"],"allow_duplicates":false,"if":"ctx?.event?.dataset == 'endpoint.events.network' && ctx?.source?.ip != null","ignore_failure":true}}, {"foreach": {"field":"host.ip","processor":{"append":{"field":"related.ip","value":"{{_ingest._value}}","allow_duplicates":false}},"if":"ctx?.event?.module == 'endpoint' && ctx?.host?.ip != null","ignore_missing":true, "description":"Extract IPs from Elastic Agent events (host.ip) and adds them to related.ip"}}, { "remove": { "field": [ "message2", "type", "fields", "category", "module", "dataset", "event.dataset_temp", "dataset_tag_temp", "module_temp", "datastream_dataset_temp" ], "ignore_missing": true, "ignore_failure": true } } diff --git a/salt/elasticsearch/soc_elasticsearch.yaml b/salt/elasticsearch/soc_elasticsearch.yaml index c268cc493..097a53296 100644 --- a/salt/elasticsearch/soc_elasticsearch.yaml +++ b/salt/elasticsearch/soc_elasticsearch.yaml @@ -392,6 +392,7 @@ elasticsearch: so-logs-elastic_agent_x_metricbeat: *indexSettings so-logs-elastic_agent_x_osquerybeat: *indexSettings so-logs-elastic_agent_x_packetbeat: *indexSettings + so-logs-elasticsearch_x_server: *indexSettings so-metrics-endpoint_x_metadata: *indexSettings so-metrics-endpoint_x_metrics: *indexSettings so-metrics-endpoint_x_policy: *indexSettings diff --git a/salt/manager/tools/sbin/soup b/salt/manager/tools/sbin/soup index 8c607963f..952645c61 100755 --- a/salt/manager/tools/sbin/soup +++ b/salt/manager/tools/sbin/soup @@ -627,6 +627,8 @@ post_to_2.4.190() { update_default_logstash_output fi fi + # Apply new elasticsearch.server index template + rollover_index "logs-elasticsearch.server-default" POSTVERSION=2.4.190 } From 2baf2478da38822a2c91b725b20ff5db7b6d51d4 Mon Sep 17 00:00:00 2001 From: reyesj2 <94730068+reyesj2@users.noreply.github.com> Date: Tue, 14 Oct 2025 12:47:03 -0500 Subject: [PATCH 090/124] add additional elasticsearch log output in json format for elasticsearch log integration to parse --- .../elasticsearch-logs.json | 2 +- salt/elasticsearch/files/log4j2.properties | 21 +++++++++++++++++++ 2 files changed, 22 insertions(+), 1 deletion(-) diff --git a/salt/elasticfleet/files/integrations/grid-nodes_general/elasticsearch-logs.json b/salt/elasticfleet/files/integrations/grid-nodes_general/elasticsearch-logs.json index a2aaf5e0a..0c74a7fd5 100644 --- a/salt/elasticfleet/files/integrations/grid-nodes_general/elasticsearch-logs.json +++ b/salt/elasticfleet/files/integrations/grid-nodes_general/elasticsearch-logs.json @@ -40,7 +40,7 @@ "enabled": true, "vars": { "paths": [ - "/opt/so/log/elasticsearch/*.log" + "/opt/so/log/elasticsearch/*.json" ] } }, diff --git a/salt/elasticsearch/files/log4j2.properties b/salt/elasticsearch/files/log4j2.properties index 014fa61a1..0a337e751 100644 --- a/salt/elasticsearch/files/log4j2.properties +++ b/salt/elasticsearch/files/log4j2.properties @@ -23,5 +23,26 @@ appender.rolling.strategy.action.condition.type = IfFileName appender.rolling.strategy.action.condition.glob = *.gz appender.rolling.strategy.action.condition.nested_condition.type = IfLastModified appender.rolling.strategy.action.condition.nested_condition.age = 7D + +appender.rolling_json.type = RollingFile +appender.rolling_json.name = rolling_json +appender.rolling_json.fileName = ${sys:es.logs.base_path}${sys:file.separator}${sys:es.logs.cluster_name}.json +appender.rolling_json.layout.type = ECSJsonLayout +appender.rolling_json.layout.dataset = elasticsearch.server +appender.rolling_json.filePattern = ${sys:es.logs.base_path}${sys:file.separator}${sys:es.logs.cluster_name}-%d{yyyy-MM-dd}-%i.json.gz +appender.rolling_json.policies.type = Policies +appender.rolling_json.policies.time.type = TimeBasedTriggeringPolicy +appender.rolling_json.policies.time.interval = 1 +appender.rolling_json.policies.time.modulate = true +appender.rolling_json.strategy.type = DefaultRolloverStrategy +appender.rolling_json.strategy.action.type = Delete +appender.rolling_json.strategy.action.basepath = /var/log/elasticsearch +appender.rolling_json.strategy.action.condition.type = IfFileName +appender.rolling_json.strategy.action.condition.glob = *.gz +appender.rolling_json.strategy.action.condition.nested_condition.type = IfLastModified +appender.rolling_json.strategy.action.condition.nested_condition.age = 7D + + rootLogger.level = info rootLogger.appenderRef.rolling.ref = rolling +rootLogger.appenderRef.rolling_json.ref = rolling_json From 8773ebc3dcf08b0554b24ab42d5ce62ec071f189 Mon Sep 17 00:00:00 2001 From: reyesj2 <94730068+reyesj2@users.noreply.github.com> Date: Tue, 14 Oct 2025 13:34:33 -0500 Subject: [PATCH 091/124] logstash wrappers for troubleshooting --- salt/logstash/tools/sbin/so-logstash-flow-stats | 3 +++ salt/logstash/tools/sbin/so-logstash-health | 3 +++ salt/logstash/tools/sbin/so-logstash-jvm-stats | 3 +++ 3 files changed, 9 insertions(+) create mode 100644 salt/logstash/tools/sbin/so-logstash-flow-stats create mode 100644 salt/logstash/tools/sbin/so-logstash-health create mode 100644 salt/logstash/tools/sbin/so-logstash-jvm-stats diff --git a/salt/logstash/tools/sbin/so-logstash-flow-stats b/salt/logstash/tools/sbin/so-logstash-flow-stats new file mode 100644 index 000000000..70f9852e9 --- /dev/null +++ b/salt/logstash/tools/sbin/so-logstash-flow-stats @@ -0,0 +1,3 @@ +#!/bin/bash + +curl -s -L http://localhost:9600/_node/stats/flow | jq \ No newline at end of file diff --git a/salt/logstash/tools/sbin/so-logstash-health b/salt/logstash/tools/sbin/so-logstash-health new file mode 100644 index 000000000..9520ca507 --- /dev/null +++ b/salt/logstash/tools/sbin/so-logstash-health @@ -0,0 +1,3 @@ +#!/bin/bash + +curl -s -L http://localhost:9600/_health_report | jq \ No newline at end of file diff --git a/salt/logstash/tools/sbin/so-logstash-jvm-stats b/salt/logstash/tools/sbin/so-logstash-jvm-stats new file mode 100644 index 000000000..5c0e4f59f --- /dev/null +++ b/salt/logstash/tools/sbin/so-logstash-jvm-stats @@ -0,0 +1,3 @@ +#!/bin/bash + +curl -s -L http://localhost:9600/_node/stats/jvm | jq \ No newline at end of file From c8aad2b03b9e668086696464aa24b202b4462bc7 Mon Sep 17 00:00:00 2001 From: Corey Ogburn Date: Tue, 14 Oct 2025 13:24:43 -0600 Subject: [PATCH 092/124] New Config Entries --- salt/soc/defaults.yaml | 2 ++ salt/soc/soc_soc.yaml | 8 ++++++++ 2 files changed, 10 insertions(+) diff --git a/salt/soc/defaults.yaml b/salt/soc/defaults.yaml index d93b405b1..0a6285d34 100644 --- a/salt/soc/defaults.yaml +++ b/salt/soc/defaults.yaml @@ -1494,6 +1494,8 @@ soc: assistant: apiUrl: https://onionai.securityonion.net healthTimeoutSeconds: 3 + systemPromptAddendum: "" + systemPromptAddendumMaxLength: 50000 salt: queueDir: /opt/sensoroni/queue timeoutMs: 45000 diff --git a/salt/soc/soc_soc.yaml b/salt/soc/soc_soc.yaml index 3fa914227..589b995ef 100644 --- a/salt/soc/soc_soc.yaml +++ b/salt/soc/soc_soc.yaml @@ -589,6 +589,14 @@ soc: description: Timeout in seconds for the Onion AI health check. global: True advanced: True + systemPromptAddendum: + description: Additional context to provide to the AI assistant about this SOC deployment. This can include information about your environment, policies, or any other relevant details that can help the AI provide more accurate and tailored assistance. Long prompts may be shortened. + global: True + advanced: False + systemPromptAddendumMaxLength: + description: Maximum length of the system prompt addendum. Longer prompts will be truncated. + global: True + advanced: True client: assistant: enabled: From 3e22043ea6b6f7a691d986f5c4fb7176149ed1db Mon Sep 17 00:00:00 2001 From: reyesj2 <94730068+reyesj2@users.noreply.github.com> Date: Tue, 14 Oct 2025 15:08:51 -0500 Subject: [PATCH 093/124] es logging retention --- salt/elasticsearch/files/log4j2.properties | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/salt/elasticsearch/files/log4j2.properties b/salt/elasticsearch/files/log4j2.properties index 0a337e751..305069882 100644 --- a/salt/elasticsearch/files/log4j2.properties +++ b/salt/elasticsearch/files/log4j2.properties @@ -20,7 +20,7 @@ appender.rolling.strategy.type = DefaultRolloverStrategy appender.rolling.strategy.action.type = Delete appender.rolling.strategy.action.basepath = /var/log/elasticsearch appender.rolling.strategy.action.condition.type = IfFileName -appender.rolling.strategy.action.condition.glob = *.gz +appender.rolling.strategy.action.condition.glob = *.log.gz appender.rolling.strategy.action.condition.nested_condition.type = IfLastModified appender.rolling.strategy.action.condition.nested_condition.age = 7D @@ -29,19 +29,13 @@ appender.rolling_json.name = rolling_json appender.rolling_json.fileName = ${sys:es.logs.base_path}${sys:file.separator}${sys:es.logs.cluster_name}.json appender.rolling_json.layout.type = ECSJsonLayout appender.rolling_json.layout.dataset = elasticsearch.server -appender.rolling_json.filePattern = ${sys:es.logs.base_path}${sys:file.separator}${sys:es.logs.cluster_name}-%d{yyyy-MM-dd}-%i.json.gz +appender.rolling_json.filePattern = ${sys:es.logs.base_path}${sys:file.separator}${sys:es.logs.cluster_name}-%d{yyyy-MM-dd}.json.gz appender.rolling_json.policies.type = Policies appender.rolling_json.policies.time.type = TimeBasedTriggeringPolicy appender.rolling_json.policies.time.interval = 1 appender.rolling_json.policies.time.modulate = true appender.rolling_json.strategy.type = DefaultRolloverStrategy -appender.rolling_json.strategy.action.type = Delete -appender.rolling_json.strategy.action.basepath = /var/log/elasticsearch -appender.rolling_json.strategy.action.condition.type = IfFileName -appender.rolling_json.strategy.action.condition.glob = *.gz -appender.rolling_json.strategy.action.condition.nested_condition.type = IfLastModified -appender.rolling_json.strategy.action.condition.nested_condition.age = 7D - +appender.rolling_json.strategy.max = 1 rootLogger.level = info rootLogger.appenderRef.rolling.ref = rolling From 348809bdbb15404176cf223be68d636acbaa1c83 Mon Sep 17 00:00:00 2001 From: Josh Patterson Date: Wed, 15 Oct 2025 10:30:14 -0400 Subject: [PATCH 094/124] implement host os overhead based on role --- .../hypervisor/soc_hypervisor.yaml.jinja | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/salt/soc/dyanno/hypervisor/soc_hypervisor.yaml.jinja b/salt/soc/dyanno/hypervisor/soc_hypervisor.yaml.jinja index 8e49b60b5..ac2fd6fea 100644 --- a/salt/soc/dyanno/hypervisor/soc_hypervisor.yaml.jinja +++ b/salt/soc/dyanno/hypervisor/soc_hypervisor.yaml.jinja @@ -93,9 +93,21 @@ Base domain has not been initialized. {%- endif -%} {%- endfor -%} -{# Calculate available resources #} -{%- set cpu_free = hw_config.cpu - ns.used_cpu -%} -{%- set mem_free = hw_config.memory - ns.used_memory -%} +{# Determine host OS overhead based on role #} +{%- if role == 'hypervisor' -%} +{%- set host_os_cpu = 8 -%} +{%- set host_os_memory = 16 -%} +{%- elif role == 'managerhype' -%} +{%- set host_os_cpu = 16 -%} +{%- set host_os_memory = 32 -%} +{%- else -%} +{%- set host_os_cpu = 0 -%} +{%- set host_os_memory = 0 -%} +{%- endif -%} + +{# Calculate available resources (subtract both VM usage and host OS overhead) #} +{%- set cpu_free = hw_config.cpu - ns.used_cpu - host_os_cpu -%} +{%- set mem_free = hw_config.memory - ns.used_memory - host_os_memory -%} {# Get used PCI indices #} {%- set used_disk = [] -%} From ed5bd19f0e77ec24030445f71d5d4d0aa6b92a73 Mon Sep 17 00:00:00 2001 From: Corey Ogburn Date: Wed, 15 Oct 2025 09:00:27 -0600 Subject: [PATCH 095/124] Should be multiline --- salt/soc/soc_soc.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/salt/soc/soc_soc.yaml b/salt/soc/soc_soc.yaml index 589b995ef..0a063f53e 100644 --- a/salt/soc/soc_soc.yaml +++ b/salt/soc/soc_soc.yaml @@ -593,6 +593,7 @@ soc: description: Additional context to provide to the AI assistant about this SOC deployment. This can include information about your environment, policies, or any other relevant details that can help the AI provide more accurate and tailored assistance. Long prompts may be shortened. global: True advanced: False + multiline: True systemPromptAddendumMaxLength: description: Maximum length of the system prompt addendum. Longer prompts will be truncated. global: True From d9f70898dd8960c104b95ef4e95c170a639db109 Mon Sep 17 00:00:00 2001 From: Josh Patterson Date: Wed, 15 Oct 2025 14:59:37 -0400 Subject: [PATCH 096/124] omit new hypervisor state name fp --- setup/so-verify | 1 + 1 file changed, 1 insertion(+) diff --git a/setup/so-verify b/setup/so-verify index d22b80fc2..f99c6e418 100755 --- a/setup/so-verify +++ b/setup/so-verify @@ -68,6 +68,7 @@ log_has_errors() { grep -vE "Command failed with exit code" | \ grep -vE "Running scope as unit" | \ grep -vE "securityonion-resources/sigma/stable" | \ + grep -vE "remove_failed_vm.sls" | \ grep -vE "log-.*-pipeline_failed_attempts" &> "$error_log" if [[ $? -eq 0 ]]; then From ee617eeff44835452d99c9e9cdc432d53b6ef1bb Mon Sep 17 00:00:00 2001 From: Josh Patterson Date: Wed, 15 Oct 2025 16:44:24 -0400 Subject: [PATCH 097/124] do not log set_timezone in setup creates additional sosetup.log file --- setup/so-functions | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup/so-functions b/setup/so-functions index 5847df704..9b65c9f0e 100755 --- a/setup/so-functions +++ b/setup/so-functions @@ -2305,7 +2305,7 @@ set_redirect() { set_timezone() { - logCmd "timedatectl set-timezone Etc/UTC" + timedatectl set-timezone Etc/UTC } From e910de0a066dcf719300336990648de1a2597ea2 Mon Sep 17 00:00:00 2001 From: reyesj2 <94730068+reyesj2@users.noreply.github.com> Date: Thu, 16 Oct 2025 16:19:55 -0500 Subject: [PATCH 098/124] update log4j2 policy for ES json output Signed-off-by: reyesj2 <94730068+reyesj2@users.noreply.github.com> --- salt/elasticsearch/files/log4j2.properties | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/salt/elasticsearch/files/log4j2.properties b/salt/elasticsearch/files/log4j2.properties index 305069882..45d3d852b 100644 --- a/salt/elasticsearch/files/log4j2.properties +++ b/salt/elasticsearch/files/log4j2.properties @@ -35,7 +35,12 @@ appender.rolling_json.policies.time.type = TimeBasedTriggeringPolicy appender.rolling_json.policies.time.interval = 1 appender.rolling_json.policies.time.modulate = true appender.rolling_json.strategy.type = DefaultRolloverStrategy -appender.rolling_json.strategy.max = 1 +appender.rolling_json.strategy.action.type = Delete +appender.rolling_json.strategy.action.basepath = /var/log/elasticsearch +appender.rolling_json.strategy.action.condition.type = IfFileName +appender.rolling_json.strategy.action.condition.glob = *.json.gz +appender.rolling_json.strategy.action.condition.nested_condition.type = IfLastModified +appender.rolling_json.strategy.action.condition.nested_condition.exceeds = 1D rootLogger.level = info rootLogger.appenderRef.rolling.ref = rolling From d2aa60b96129a568452f6f354fbefd618ba173ac Mon Sep 17 00:00:00 2001 From: Jorge Reyes <94730068+reyesj2@users.noreply.github.com> Date: Fri, 17 Oct 2025 07:40:44 -0500 Subject: [PATCH 099/124] log4j2 settings --- salt/elasticsearch/files/log4j2.properties | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/salt/elasticsearch/files/log4j2.properties b/salt/elasticsearch/files/log4j2.properties index 45d3d852b..b29378d6a 100644 --- a/salt/elasticsearch/files/log4j2.properties +++ b/salt/elasticsearch/files/log4j2.properties @@ -40,7 +40,7 @@ appender.rolling_json.strategy.action.basepath = /var/log/elasticsearch appender.rolling_json.strategy.action.condition.type = IfFileName appender.rolling_json.strategy.action.condition.glob = *.json.gz appender.rolling_json.strategy.action.condition.nested_condition.type = IfLastModified -appender.rolling_json.strategy.action.condition.nested_condition.exceeds = 1D +appender.rolling_json.strategy.action.condition.nested_condition.age = 1D rootLogger.level = info rootLogger.appenderRef.rolling.ref = rolling From bdcd1e099d62071b9df433607e925725cb97c085 Mon Sep 17 00:00:00 2001 From: Jason Ertel Date: Tue, 21 Oct 2025 09:33:41 -0400 Subject: [PATCH 100/124] add exclusion toggle --- salt/soc/defaults.yaml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/salt/soc/defaults.yaml b/salt/soc/defaults.yaml index d93b405b1..447024e1a 100644 --- a/salt/soc/defaults.yaml +++ b/salt/soc/defaults.yaml @@ -1636,6 +1636,9 @@ soc: - name: socExcludeToggle filter: 'NOT event.module:"soc"' enabled: true + - name: onionaiExcludeToggle + filter: 'NOT _index:"*:so-assistant-*"' + enabled: true queries: - name: Default Query description: Show all events grouped by the observer host From 39572f36f43289fa700d84d7453f682fbf1246be Mon Sep 17 00:00:00 2001 From: Mike Reeves Date: Thu, 23 Oct 2025 14:07:05 -0400 Subject: [PATCH 101/124] 2.4.190 --- DOWNLOAD_AND_VERIFY_ISO.md | 22 ++++++++++---------- sigs/securityonion-2.4.190-20251024.iso.sig | Bin 0 -> 566 bytes 2 files changed, 11 insertions(+), 11 deletions(-) create mode 100644 sigs/securityonion-2.4.190-20251024.iso.sig diff --git a/DOWNLOAD_AND_VERIFY_ISO.md b/DOWNLOAD_AND_VERIFY_ISO.md index ec4e4657c..f354ed191 100644 --- a/DOWNLOAD_AND_VERIFY_ISO.md +++ b/DOWNLOAD_AND_VERIFY_ISO.md @@ -1,17 +1,17 @@ -### 2.4.180-20250916 ISO image released on 2025/09/17 +### 2.4.190-20251024 ISO image released on 2025/10/24 ### Download and Verify -2.4.180-20250916 ISO image: -https://download.securityonion.net/file/securityonion/securityonion-2.4.180-20250916.iso +2.4.190-20251024 ISO image: +https://download.securityonion.net/file/securityonion/securityonion-2.4.190-20251024.iso -MD5: DE93880E38DE4BE45D05A41E1745CB1F -SHA1: AEA6948911E50A4A38E8729E0E965C565402E3FC -SHA256: C9BD8CA071E43B048ABF9ED145B87935CB1D4BB839B2244A06FAD1BBA8EAC84A +MD5: 25358481FB876226499C011FC0710358 +SHA1: 0B26173C0CE136F2CA40A15046D1DFB78BCA1165 +SHA256: 4FD9F62EDA672408828B3C0C446FE5EA9FF3C4EE8488A7AB1101544A3C487872 Signature for ISO image: -https://github.com/Security-Onion-Solutions/securityonion/raw/2.4/main/sigs/securityonion-2.4.180-20250916.iso.sig +https://github.com/Security-Onion-Solutions/securityonion/raw/2.4/main/sigs/securityonion-2.4.190-20251024.iso.sig Signing key: https://raw.githubusercontent.com/Security-Onion-Solutions/securityonion/2.4/main/KEYS @@ -25,22 +25,22 @@ wget https://raw.githubusercontent.com/Security-Onion-Solutions/securityonion/2. Download the signature file for the ISO: ``` -wget https://github.com/Security-Onion-Solutions/securityonion/raw/2.4/main/sigs/securityonion-2.4.180-20250916.iso.sig +wget https://github.com/Security-Onion-Solutions/securityonion/raw/2.4/main/sigs/securityonion-2.4.190-20251024.iso.sig ``` Download the ISO image: ``` -wget https://download.securityonion.net/file/securityonion/securityonion-2.4.180-20250916.iso +wget https://download.securityonion.net/file/securityonion/securityonion-2.4.190-20251024.iso ``` Verify the downloaded ISO image using the signature file: ``` -gpg --verify securityonion-2.4.180-20250916.iso.sig securityonion-2.4.180-20250916.iso +gpg --verify securityonion-2.4.190-20251024.iso.sig securityonion-2.4.190-20251024.iso ``` The output should show "Good signature" and the Primary key fingerprint should match what's shown below: ``` -gpg: Signature made Tue 16 Sep 2025 06:30:19 PM EDT using RSA key ID FE507013 +gpg: Signature made Thu 23 Oct 2025 07:21:46 AM EDT using RSA key ID FE507013 gpg: Good signature from "Security Onion Solutions, LLC " gpg: WARNING: This key is not certified with a trusted signature! gpg: There is no indication that the signature belongs to the owner. diff --git a/sigs/securityonion-2.4.190-20251024.iso.sig b/sigs/securityonion-2.4.190-20251024.iso.sig new file mode 100644 index 0000000000000000000000000000000000000000..430c09e474e261c58a3c3c9124e13566062228eb GIT binary patch literal 566 zcmV-60?GY}0y6{v0SEvc79j-41gSkXz6^6dp_W8^5Ma0dP;e6k0%-aV$^Z%p5PT3| zxBgIY6ScSx{v@iq7-Hvr)3(JTE$&o~0Y7ac^su*jCKwb&+nnETC&)=VeFA|hr2AhX z?uZ#1LYT3%m(Ckn11{l1>A0~dkY4zkIuYptrekHO#f!**1zwbSwXv`V8CBXCUi=KpZD6kx8iY4Sjc8d*5?PivK@=65U4Vlz0S(eePjZ3_;hzzQ)deQi-&Aw9^J^m z*RvncKV{-$Oi);MxWjH0;caVB@${|sB+$hFTgoVWBUNuS3+V$iH Date: Fri, 24 Oct 2025 16:11:50 -0400 Subject: [PATCH 102/124] bump version --- VERSION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/VERSION b/VERSION index 3f8c50a50..86df31761 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -2.4.190 +2.4.200 From f348c7168feff3558ef5470cd9f6b7e287048535 Mon Sep 17 00:00:00 2001 From: Jason Ertel Date: Fri, 24 Oct 2025 16:19:24 -0400 Subject: [PATCH 103/124] bump version --- .github/DISCUSSION_TEMPLATE/2-4.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/DISCUSSION_TEMPLATE/2-4.yml b/.github/DISCUSSION_TEMPLATE/2-4.yml index c85249fac..229e9f612 100644 --- a/.github/DISCUSSION_TEMPLATE/2-4.yml +++ b/.github/DISCUSSION_TEMPLATE/2-4.yml @@ -32,6 +32,7 @@ body: - 2.4.170 - 2.4.180 - 2.4.190 + - 2.4.200 - Other (please provide detail below) validations: required: true From 10ae53f1089637a91b752258b1b53a0fa717badd Mon Sep 17 00:00:00 2001 From: Josh Patterson Date: Wed, 29 Oct 2025 10:23:44 -0400 Subject: [PATCH 104/124] upgrade salt 3006.16 --- salt/salt/master.defaults.yaml | 2 +- salt/salt/minion.defaults.yaml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/salt/salt/master.defaults.yaml b/salt/salt/master.defaults.yaml index 8e1a618fd..9dfe8587f 100644 --- a/salt/salt/master.defaults.yaml +++ b/salt/salt/master.defaults.yaml @@ -1,4 +1,4 @@ # version cannot be used elsewhere in this pillar as soup is grepping for it to determine if Salt needs to be patched salt: master: - version: '3006.9' + version: '3006.16' diff --git a/salt/salt/minion.defaults.yaml b/salt/salt/minion.defaults.yaml index 7ec839950..e897313d2 100644 --- a/salt/salt/minion.defaults.yaml +++ b/salt/salt/minion.defaults.yaml @@ -1,5 +1,5 @@ # version cannot be used elsewhere in this pillar as soup is grepping for it to determine if Salt needs to be patched salt: minion: - version: '3006.9' + version: '3006.16' check_threshold: 3600 # in seconds, threshold used for so-salt-minion-check. any value less than 600 seconds may cause a lot of salt-minion restarts since the job to touch the file occurs every 5-8 minutes by default From 835b2609b676c2138b4232305d9a0b05b22db6aa Mon Sep 17 00:00:00 2001 From: reyesj2 <94730068+reyesj2@users.noreply.github.com> Date: Wed, 29 Oct 2025 13:45:55 -0500 Subject: [PATCH 105/124] telegraf - increase esindexsize.sh script timeout --- salt/telegraf/etc/telegraf.conf | 1 + 1 file changed, 1 insertion(+) diff --git a/salt/telegraf/etc/telegraf.conf b/salt/telegraf/etc/telegraf.conf index b358c178f..d2cb87057 100644 --- a/salt/telegraf/etc/telegraf.conf +++ b/salt/telegraf/etc/telegraf.conf @@ -337,4 +337,5 @@ ] data_format = "influx" interval = "1h" + timeout = "120s" {%- endif %} From 2fb41c8d6529ba6a161a09301f60b999590c25eb Mon Sep 17 00:00:00 2001 From: reyesj2 <94730068+reyesj2@users.noreply.github.com> Date: Wed, 29 Oct 2025 14:24:43 -0500 Subject: [PATCH 106/124] elasticsearch retention estimate --- .../sbin/so-elasticsearch-retention-estimate | 1159 +++++++++++++++++ 1 file changed, 1159 insertions(+) create mode 100755 salt/elasticsearch/tools/sbin/so-elasticsearch-retention-estimate diff --git a/salt/elasticsearch/tools/sbin/so-elasticsearch-retention-estimate b/salt/elasticsearch/tools/sbin/so-elasticsearch-retention-estimate new file mode 100755 index 000000000..4c34d3a02 --- /dev/null +++ b/salt/elasticsearch/tools/sbin/so-elasticsearch-retention-estimate @@ -0,0 +1,1159 @@ +#!/bin/bash + +# Copyright Security Onion Solutions LLC and/or licensed to Security Onion Solutions LLC under one +# or more contributor license agreements. Licensed under the Elastic License 2.0 as shown at +# https://securityonion.net/license; you may not use this file except in compliance with the +# Elastic License 2.0. + +INFLUX_URL="https://localhost:8086/api/v2" +JSON_OUTPUT=false +VERBOSE=false +TEMP_FILES=() + +. /usr/sbin/so-common + +RED='\033[0;31m' +GREEN='\033[0;32m' +YELLOW='\033[1;33m' +BLUE='\033[0;34m' +BOLD='\033[1;37m' +NC='\033[0m' +REDBOLD='\033[1;31m' +YELLOWBOLD='\033[1;33m' + +declare -a recommendation_lines +declare -a recommendation_records + +cleanup_temp_files() { + local file + for file in "${TEMP_FILES[@]}"; do + [ -f "$file" ] && rm -f "$file" 2>/dev/null + done +} + +trap cleanup_temp_files EXIT INT TERM + +create_temp_file() { + local tmpfile + tmpfile=$(mktemp) + TEMP_FILES+=("$tmpfile") + echo "$tmpfile" +} + +log_title() { + if [ $1 == "LOG" ]; then + echo -e "\n${BOLD}================ $2 ================${NC}\n" + elif [ $1 == "OK" ]; then + echo -e "${GREEN} $2 ${NC}" + elif [ $1 == "WARN" ]; then + echo -e "${YELLOW} $2 ${NC}" + elif [ $1 == "ERROR" ]; then + echo -e "${RED} $2 ${NC}" + fi +} + +usage() { + cat << EOF +Usage: $(basename "$0") [OPTIONS] + +Estimate remaining days until Elasticsearch cluster reaches low watermark threshold. + +OPTIONS: + --json Output results in JSON format + -v, --verbose Show additional output + -h, --help Show this help message + +EOF + exit 0 +} + +while [[ $# -gt 0 ]]; do + case $1 in + --json) + JSON_OUTPUT=true + shift + ;; + -v|--verbose) + VERBOSE=true + shift + ;; + -h|--help) + usage + ;; + *) + echo "Unknown option: $1" >&2 + usage + ;; + esac +done + +request() { + curl -skK /opt/so/conf/influxdb/curl.config "$INFLUX_URL/$@" +} + +lookup_org_id() { + request "orgs?org=Security+Onion" | jq -r '.orgs[] | select(.name == "Security Onion").id' +} + +run_flux_query() { + local query=$1 + request "query?org=$ORG_ID" \ + -H 'Accept:application/csv' \ + -H 'Content-type:application/vnd.flux' \ + -d "$query" -XPOST 2>/dev/null +} + +read_csv_value() { + local input="$1" + + printf '%s\n' "$input" | awk -F',' ' + $0 ~ /^#/ { next } + NF < 1 { next } + { + gsub(/\r|\t/, "") + for (i = 1; i <= NF; i++) { + sub(/^[[:space:]]+/, "", $i) + sub(/[[:space:]]+$/, "", $i) + } + if (($2 == "_result" || $2 == "result") && $3 != "table" && $NF != "") { + print $NF + exit + } + } + ' +} + +normalize_number() { + local value="${1:-0}" + awk -v val="$value" 'BEGIN { + if (val == "" || val == "null") { printf "0"; exit } + if (val == val + 0) { printf "%.0f", val + 0; exit } + printf "0" + }' +} + +bytes_to_gb() { + local bytes="${1:-0}" + awk -v b="$bytes" 'BEGIN { + if (b == "" || b == "null") { printf "0.00"; exit } + printf "%.2f", b / 1024 / 1024 / 1024 + }' +} + +expand_node_roles() { + local role_string="$1" + local -a roles=() + + # Only show data-related roles: d=data, h=data_hot, w=data_warm, c=data_cold, s=data_content f=data_frozen + [[ "$role_string" =~ h ]] && roles+=("data_hot") + [[ "$role_string" =~ w ]] && roles+=("data_warm") + [[ "$role_string" =~ c ]] && roles+=("data_cold") + [[ "$role_string" =~ s ]] && roles+=("data_content") + [[ "$role_string" =~ f ]] && roles+=("data_frozen") + [[ "$role_string" =~ d ]] && roles+=("data") + + local IFS=',' + echo "${roles[*]}" +} + +run_indices_growth() { + if ! command -v so-elasticsearch-indices-growth >/dev/null 2>&1; then + return 1 + fi + + if [ "$EUID" -ne 0 ] && command -v sudo >/dev/null 2>&1; then + sudo -n so-elasticsearch-indices-growth 2>/dev/null || so-elasticsearch-indices-growth 2>/dev/null + else + so-elasticsearch-indices-growth 2>/dev/null + fi +} + +fetch_total_bytes() { + local start="$1" + local stop="$2" + local range_line + + if [ -n "$stop" ]; then + range_line=" |> range(start: ${start}, stop: ${stop})" + else + range_line=" |> range(start: ${start})" + fi + + local query + query=$(cat <<-EOF +from(bucket: "telegraf/so_long_term") +${range_line} + |> filter(fn: (r) => r._measurement == "elasticsearch_index_size") + |> last() + |> group() + |> sum() + |> keep(columns: ["_value"]) +EOF + ) + + local result value + result=$(run_flux_query "$query") + value=$(read_csv_value "$result") + normalize_number "$value" +} + +fail() { + if [ "$JSON_OUTPUT" = true ]; then + jq -n --arg error "$1" '{error: $error}' + else + echo "ERROR: $1" >&2 + fi + exit 1 +} + +echo -e "\nDISCLAIMER: Script output is based on current data patterns, but are approximations soley intended to assist with getting a general ILM policy configured." + +ORG_ID=$(lookup_org_id) +[ -n "$ORG_ID" ] || fail "Unable to resolve InfluxDB org id" + +cluster_storage_size=0 +indexed_storage_source="elasticsearch" +cluster_storage_size_output=$(so-elasticsearch-query '_cluster/stats?filter_path=indices.store.size_in_bytes' --fail 2>/dev/null || true) +if [ -n "$cluster_storage_size_output" ]; then + cluster_storage_size=$(echo "$cluster_storage_size_output" | jq -r '.indices.store.size_in_bytes // 0' 2>/dev/null) + if ! printf '%s' "$cluster_storage_size" | grep -Eq '^[0-9]+$'; then + cluster_storage_size=0 + fi +fi + +# historical data from influxdb for growth calculation +one_day_total=$(fetch_total_bytes "-25h" "-23h") +seven_day_total=$(fetch_total_bytes "-7d8h" "-7d") +thirty_day_total=$(fetch_total_bytes "-30d8h" "-30d") + +# available historical windows (prefer 30d/7d when available, to avoid using recent 24h traffic spike as true daily ingest rate) +history_days=0 +historical_total=0 + +if [ "$thirty_day_total" -gt 0 ]; then + history_days=30 + history_label="30-day" + historical_total=$thirty_day_total +elif [ "$seven_day_total" -gt 0 ]; then + history_days=7 + history_label="7-day" + historical_total=$seven_day_total +elif [ "$one_day_total" -gt 0 ]; then + history_days=1 + history_label="24-hour" + historical_total=$one_day_total +fi + +[ "$history_days" -gt 0 ] || fail "Historical InfluxDB data unavailable for growth calculation. If this a newer grid try re-running this script in a few days. Otherwise review /opt/so/log/telegraf/telegraf.log for errors with collecting required ES metrics." + +# Daily growth rate +growth_bytes=$(( cluster_storage_size - historical_total )) +daily_growth_bytes=$(awk -v diff="$growth_bytes" -v days="$history_days" 'BEGIN { + if (days <= 0) { print 0; exit } + printf "%.0f", diff / days +}') + +# Daily shard creation rate using same time window (30d / 7d / 24h) +daily_shard_creation=0 +now_ms=$(date +%s)000 +history_ago_ms=$(awk -v now="$now_ms" -v days="$history_days" 'BEGIN { printf "%.0f", now - (days * 86400 * 1000) }') +shard_creation_output=$(so-elasticsearch-query "_cat/indices/.ds-*?format=json&h=index,pri,rep,creation.date" --fail 2>/dev/null || true) +if [ -n "$shard_creation_output" ]; then + recent_shards=$(echo "$shard_creation_output" | jq --argjson cutoff "$history_ago_ms" ' + [.[] | + select(.["creation.date"] != null and (.["creation.date"] | tonumber) >= $cutoff) | + (.pri | tonumber) + ((.pri | tonumber) * (.rep | tonumber)) + ] | add // 0 + ' 2>/dev/null) + if [ -n "$recent_shards" ] && [[ "$recent_shards" =~ ^[0-9]+$ ]]; then + daily_shard_creation=$(awk -v total="$recent_shards" -v days="$history_days" 'BEGIN { + if (days <= 0) { print 0; exit } + printf "%.1f", total / days + }') + fi +fi + +# Find expected ILM deletions +ilm_delete_7d=0 +ilm_delete_30d=0 +ilm_indices_7d=0 +ilm_indices_30d=0 +ilm_delete_immediate=0 +ilm_indices_immediate=0 +ilm_delete_scheduled_7d=0 +ilm_indices_scheduled_7d=0 +ilm_delete_scheduled_30d=0 +ilm_indices_scheduled_30d=0 +ilm_shards_7d=0 +ilm_shards_30d=0 +ilm_shards_immediate=0 +ilm_shards_scheduled_7d=0 +ilm_shards_scheduled_30d=0 + + # For verbose output +declare -a scheduled_indices_names +declare -a scheduled_indices_sizes +declare -a scheduled_indices_days +declare -a immediate_indices_names +declare -a immediate_indices_sizes + +# Get ilm policy delete ages per policy + # example output 'so-logs-1password.audit_events-logs|365' +tmpfile_policies=$(create_temp_file) +so-elasticsearch-query '_ilm/policy' --fail 2>/dev/null | jq -r ' + def age_to_days: + if type == "number" then . + elif type == "string" then + (ascii_downcase) as $s | + (try ($s | capture("^(?-?[0-9.]+)(?[smhd]?)$")) catch {num:"0", unit:""}) as $m | + (($m.num | tonumber? // 0)) as $val | + (if $m.unit == "d" or $m.unit == "" then $val + elif $m.unit == "h" then $val / 24 + elif $m.unit == "m" then $val / 1440 + elif $m.unit == "s" then $val / 86400 + else $val end) + else 0 end; + to_entries[] | + select(.value.policy.phases.delete.min_age?) | + "\(.key)|\((.value.policy.phases.delete.min_age | age_to_days))" +' > "$tmpfile_policies" 2>/dev/null || true + +declare -A policy_ages + +if [ -s "$tmpfile_policies" ]; then + # create associative array of policy -> delete_age + while IFS='|' read -r policy age; do + policy_ages["$policy"]=$age + done < "$tmpfile_policies" + + # Get ILM managed indices with their age and policy, figure days until deletion + tmpfile_indices=$(create_temp_file) + so-elasticsearch-query '_all/_ilm/explain' --fail 2>/dev/null | jq -r ' + def age_to_days: + if type == "number" then . + elif type == "string" then + (ascii_downcase) as $s | + (try ($s | capture("^(?-?[0-9.]+)(?[smhd]?)$")) catch {num:"0", unit:""}) as $m | + (($m.num | tonumber? // 0)) as $val | + (if $m.unit == "d" or $m.unit == "" then $val + elif $m.unit == "h" then $val / 24 + elif $m.unit == "m" then $val / 1440 + elif $m.unit == "s" then $val / 86400 + else $val end) + else 0 end; + .indices | to_entries[] | + select(.value.managed == true and .value.policy) | + "\(.key)|\(.value.policy)|\(((.value.age? // "0") | age_to_days))|\(.value.phase? // "")" + ' > "$tmpfile_indices" 2>/dev/null || true + + # Process each index and calculate totals + tmpfile_all=$(create_temp_file) + while IFS='|' read -r index policy age phase; do + if [ -n "${policy_ages[$policy]:-}" ]; then + delete_age=${policy_ages[$policy]} + delete_age=${delete_age:-0} + age=${age:-0} + days_until_ceiling=$(awk -v del="$delete_age" -v aged="$age" 'BEGIN { + diff = del - aged; + if (diff <= 0) { + print 0; + exit + } + base = int(diff); + if (diff > base) { base = base + 1 } + print base; + }') + if [ -z "$days_until_ceiling" ]; then + days_until_ceiling=0 + fi + if [ "$days_until_ceiling" -lt 0 ]; then + days_until_ceiling=0 + fi + bucket="scheduled" + if [ "$phase" = "delete" ]; then + days_until_ceiling=0 + bucket="immediate" + fi + if [ "$days_until_ceiling" -le 30 ] 2>/dev/null; then + echo "$index|$days_until_ceiling|$bucket" >> "$tmpfile_all" + fi + fi + done < "$tmpfile_indices" + + # Get size and shard counts for indices + if [ -s "$tmpfile_all" ]; then + candidate_indices=$(cut -d'|' -f1 "$tmpfile_all" | tr '\n' ',' | sed 's/,$//') + if [ -n "$candidate_indices" ]; then + tmpfile_sizes=$(create_temp_file) + so-elasticsearch-query "_cat/indices/${candidate_indices}?format=json&h=index,pri.store.size,pri,rep&bytes=b" --fail 2>/dev/null | \ + jq -r '.[] | "\(.index)|\(.["pri.store.size"])|\(.pri)|\(.rep)"' > "$tmpfile_sizes" 2>/dev/null || true + + # Build size and shard lookup + declare -A index_sizes + declare -A index_shards + while IFS='|' read -r idx size pri rep; do + index_sizes["$idx"]=$size + # Total shards = pri + (pri * rep) + total_shards=$(awk -v p="$pri" -v r="$rep" 'BEGIN { printf "%.0f", p + (p * r) }') + index_shards["$idx"]=$total_shards + done < "$tmpfile_sizes" + + # Calculate totals for ilm deletes + while IFS='|' read -r index days_until bucket; do + size=${index_sizes[$index]:-0} + shards=${index_shards[$index]:-0} + if [ "$bucket" = "immediate" ]; then + ilm_delete_immediate=$((ilm_delete_immediate + size)) + ilm_indices_immediate=$((ilm_indices_immediate + 1)) + ilm_shards_immediate=$((ilm_shards_immediate + shards)) + if [ "$VERBOSE" = true ]; then + immediate_indices_names+=("$index") + immediate_indices_sizes+=("$size") + fi + else + if [ "$days_until" -le 7 ] 2>/dev/null; then + ilm_delete_scheduled_7d=$((ilm_delete_scheduled_7d + size)) + ilm_indices_scheduled_7d=$((ilm_indices_scheduled_7d + 1)) + ilm_shards_scheduled_7d=$((ilm_shards_scheduled_7d + shards)) + if [ "$VERBOSE" = true ]; then + scheduled_indices_names+=("$index") + scheduled_indices_sizes+=("$size") + scheduled_indices_days+=("$days_until") + fi + fi + ilm_delete_scheduled_30d=$((ilm_delete_scheduled_30d + size)) + ilm_indices_scheduled_30d=$((ilm_indices_scheduled_30d + 1)) + ilm_shards_scheduled_30d=$((ilm_shards_scheduled_30d + shards)) + fi + + if [ "$days_until" -le 7 ] 2>/dev/null; then + ilm_delete_7d=$((ilm_delete_7d + size)) + ilm_indices_7d=$((ilm_indices_7d + 1)) + ilm_shards_7d=$((ilm_shards_7d + shards)) + fi + ilm_delete_30d=$((ilm_delete_30d + size)) + ilm_indices_30d=$((ilm_indices_30d + 1)) + ilm_shards_30d=$((ilm_shards_30d + shards)) + done < "$tmpfile_all" + fi + fi +fi + +# Get the average daily ILM deletion rate (smooth out over 30d / 7d for consistency) +daily_ilm_delete_bytes=0 +if [ "$ilm_delete_scheduled_30d" -gt 0 ] && [ "$ilm_indices_scheduled_30d" -gt 0 ]; then + daily_ilm_delete_bytes=$(awk -v total="$ilm_delete_scheduled_30d" 'BEGIN { printf "%.0f", total / 30 }') +elif [ "$ilm_delete_scheduled_7d" -gt 0 ] && [ "$ilm_indices_scheduled_7d" -gt 0 ]; then + daily_ilm_delete_bytes=$(awk -v total="$ilm_delete_scheduled_7d" 'BEGIN { printf "%.0f", total / 7 }') +fi + +# Net storage growth (growth - deletions) +net_growth_bytes=$(awk -v growth="$daily_growth_bytes" -v deletions="$daily_ilm_delete_bytes" 'BEGIN { + printf "%.0f", growth - deletions +}') + +ilm_delete_7d_gb=$(bytes_to_gb "$ilm_delete_7d") +ilm_delete_30d_gb=$(bytes_to_gb "$ilm_delete_30d") +ilm_delete_immediate_gb=$(bytes_to_gb "$ilm_delete_immediate") +ilm_delete_scheduled_7d_gb=$(bytes_to_gb "$ilm_delete_scheduled_7d") +ilm_delete_scheduled_30d_gb=$(bytes_to_gb "$ilm_delete_scheduled_30d") +daily_ilm_delete_gb=$(bytes_to_gb "$daily_ilm_delete_bytes") + +ilm_impact_pct="0.0" +if [ "$cluster_storage_size" -gt 0 ] && [ "$ilm_delete_7d" -gt 0 ]; then + ilm_impact_pct=$(awk -v ilm="$ilm_delete_7d" -v total="$cluster_storage_size" 'BEGIN { + if (total <= 0) { printf "0.0"; exit } + printf "%.1f", (ilm / total) * 100 + }') +fi + +ilm_window_daily_bytes=0 +ilm_window_daily_gb="0.00" +if [ "$ilm_delete_7d" -gt 0 ]; then + ilm_window_daily_bytes=$(awk -v total="$ilm_delete_7d" 'BEGIN { printf "%.0f", total / 7 }') + ilm_window_daily_gb=$(awk -v total="$ilm_delete_7d" 'BEGIN { printf "%.2f", total / 7 / 1024 / 1024 / 1024 }') +fi + +ilm_rate_variance_pct="" +ilm_rate_variance_warning=false +if [ "$daily_ilm_delete_bytes" -gt 0 ] && [ "$ilm_window_daily_bytes" -gt 0 ]; then + ilm_rate_variance_pct=$(awk -v window="$ilm_window_daily_bytes" -v daily="$daily_ilm_delete_bytes" 'BEGIN { + if (daily == 0) { print ""; exit } + diff = window - daily; + if (diff < 0) diff = -diff; + pct = diff / daily * 100; + if (pct < 0) pct = -pct; + printf "%.0f", pct + }') + if [ -n "$ilm_rate_variance_pct" ]; then + ilm_rate_flag=$(awk -v v="$ilm_rate_variance_pct" 'BEGIN { if (v + 0 > 30) print 1; else print 0 }') + if [ "$ilm_rate_flag" -eq 1 ] 2>/dev/null; then + ilm_rate_variance_warning=true + fi + fi +fi + +ilm_rate_variance_warning_json="false" +if [ "$ilm_rate_variance_warning" = true ]; then + ilm_rate_variance_warning_json="true" +fi + +# Elasticsearch cluster disk watermark settings (fallback to 85/90/95 defaults) +watermark_output=$(so-elasticsearch-query '_cluster/settings?include_defaults=true&filter_path=*.cluster.routing.allocation.disk.*' --fail 2>/dev/null) || fail "Failed to query Elasticsearch cluster settings" + +low=$(echo "$watermark_output" | jq -r '.transient.cluster.routing.allocation.disk.watermark.low // .persistent.cluster.routing.allocation.disk.watermark.low // .defaults.cluster.routing.allocation.disk.watermark.low // empty') +high=$(echo "$watermark_output" | jq -r '.transient.cluster.routing.allocation.disk.watermark.high // .persistent.cluster.routing.allocation.disk.watermark.high // .defaults.cluster.routing.allocation.disk.watermark.high // empty') +flood=$(echo "$watermark_output" | jq -r '.transient.cluster.routing.allocation.disk.watermark.flood_stage // .persistent.cluster.routing.allocation.disk.watermark.flood_stage // .defaults.cluster.routing.allocation.disk.watermark.flood_stage // empty') + +low=${low:-"85%"} +high=${high:-"90%"} +flood=${flood:-"95%"} + +low_percent=${low%\%} +low_fraction=$(awk -v p="$low_percent" 'BEGIN { + if (p == "" || p + 0 <= 0) { printf "%.6f", 0.85; exit } + printf "%.6f", p / 100 +}') + +high_percent=${high%\%} +high_fraction=$(awk -v p="$high_percent" 'BEGIN { + if (p == "" || p + 0 <= 0) { printf "%.6f", 0.90; exit } + printf "%.6f", p / 100 +}') + +# Cluster shard total +cluster_shards_output=$(so-elasticsearch-query '_cluster/stats?filter_path=indices.shards.total' --fail 2>/dev/null) || fail "Failed to query cluster shard stats" +total_shards=$(echo "$cluster_shards_output" | jq -r '.indices.shards.total // 0' 2>/dev/null) + +# Get max shards per node setting (with default 1000) +max_shards_per_node_output=$(so-elasticsearch-query '_cluster/settings?include_defaults=true&filter_path=*.cluster.max_shards_per_node' --fail 2>/dev/null) || fail "Failed to query cluster shard settings" +max_shards_per_node=$(echo "$max_shards_per_node_output" | jq -r '.transient.cluster.max_shards_per_node // .persistent.cluster.max_shards_per_node // .defaults.cluster.max_shards_per_node // "1000"' 2>/dev/null) +max_shards_per_node=${max_shards_per_node:-1000} + +# Get same disk usage metric ES uses for watermark (not only ES used storage, but OS level storage usage) +nodes_output=$(so-elasticsearch-query '_cat/nodes?format=json&h=name,ip,node.role,disk.total,disk.used,disk.avail&bytes=b' --fail 2>/dev/null) || fail "Failed to query Elasticsearch node disk usage" + +# Parse nodes with data roles and calculate cluster totals +# Only include nodes with data roles: d=data, h=data_hot, w=data_warm, c=data_cold, s=data_content, f=data_frozen +cluster_stats=$(echo "$nodes_output" | jq --argjson low "$low_fraction" ' + [ .[] + | select(.["node.role"] | test("[dhwcsf]")) + | .total = (.["disk.total"] | tostring | gsub("[^0-9.]"; "") | tonumber) + | .used = (.["disk.used"] | tostring | gsub("[^0-9.]"; "") | tonumber) + | .avail = (.["disk.avail"] | tostring | gsub("[^0-9.]"; "") | tonumber) + | select(.total? and .used?) + | .low_threshold = (.total * $low) + | .remaining = (.low_threshold - .used) + ] + | { + total: ([.[].total] | add // 0), + used: ([.[].used] | add // 0), + low_threshold: ([.[].low_threshold] | add // 0), + remaining: ([.[].remaining] | add // 0) + } +') + +cluster_total=$(echo "$cluster_stats" | jq -r '.total') +cluster_used=$(echo "$cluster_stats" | jq -r '.used') +cluster_low_threshold=$(echo "$cluster_stats" | jq -r '.low_threshold') +cluster_remaining=$(echo "$cluster_stats" | jq -r '.remaining') + +cluster_high_threshold=$(awk -v total="$cluster_total" -v frac="$high_fraction" 'BEGIN { + if (total == "" || frac == "" || total + 0 <= 0 || frac + 0 <= 0) { printf "0"; exit } + printf "%.0f", total * frac +}') +cluster_over_low_bytes=$(awk -v used="$cluster_used" -v threshold="$cluster_low_threshold" 'BEGIN { + if (used == "" || threshold == "") { printf "0"; exit } + diff = used - threshold; + if (diff < 0) diff = 0; + printf "%.0f", diff +}') +cluster_over_high_bytes=$(awk -v used="$cluster_used" -v threshold="$cluster_high_threshold" 'BEGIN { + if (used == "" || threshold == "") { printf "0"; exit } + diff = used - threshold; + if (diff < 0) diff = 0; + printf "%.0f", diff +}') + +# Count data nodes and calculate shard capacity +# Only count nodes with data roles: d=data, h=data_hot, w=data_warm, c=data_cold, s=data_content f=data_frozen +data_node_count=$(echo "$nodes_output" | jq '[.[] | select(.["node.role"] | test("[dhwcsf]"))] | length') +max_shard_capacity=$((data_node_count * max_shards_per_node)) + +declare -a data_node_names +declare -a data_node_roles +if [ "$data_node_count" -gt 0 ]; then + while IFS='|' read -r node_name node_role; do + data_node_names+=("$node_name") + data_node_roles+=("$node_role") + done < <(echo "$nodes_output" | jq -r '.[] | select(.["node.role"] | test("[dhwcsf]")) | "\(.name)|\(.["node.role"])"') +fi +shard_usage_percent="0.0" +if [ "$max_shard_capacity" -gt 0 ]; then + shard_usage_percent=$(awk -v current="$total_shards" -v max="$max_shard_capacity" 'BEGIN { + if (max <= 0) { printf "0.0"; exit } + printf "%.1f", (current / max) * 100 + }') +fi + +recommendations_triggered=false +recommendations_ready=false +recommendations_message="" +recommendations_json='[]' +recommendations_triggered_json=false +recommendation_lines=() +recommendation_records=() +should_trigger_recommendations=false +recommendations_reason="" + +days_to_low_numeric="" +days_to_low_gross_numeric="" + +[ "$cluster_total" -gt 0 ] || fail "No Elasticsearch data nodes retrieved from _cat/nodes" + +# Calculate current retention period (age of oldest .ds-logs-* index) +oldest_index_days="" +oldest_index_name="" +oldest_index_output=$(so-elasticsearch-query '_cat/indices/.ds-logs-*?format=json&h=index,creation.date&s=creation.date:asc' --fail 2>/dev/null | jq -r '.[0] // empty' 2>/dev/null || true) +if [ -n "$oldest_index_output" ]; then + oldest_index_name=$(echo "$oldest_index_output" | jq -r '.index // empty' 2>/dev/null) + oldest_creation_ms=$(echo "$oldest_index_output" | jq -r '.["creation.date"] // empty' 2>/dev/null) + if [ -n "$oldest_creation_ms" ] && [[ "$oldest_creation_ms" =~ ^[0-9]+$ ]]; then + oldest_creation_sec=$((oldest_creation_ms / 1000)) + if [ "$oldest_creation_sec" -gt 0 ]; then + now_sec=$(date +%s) + if [ "$now_sec" -ge "$oldest_creation_sec" ]; then + age_sec=$((now_sec - oldest_creation_sec)) + oldest_index_days=$(awk -v age="$age_sec" 'BEGIN { printf "%.1f", age / 86400 }') + fi + fi + fi +fi + +# Calculate days until low watermark using net growth +days_to_low="" +days_to_low_gross="" +target_date="" + +# Calculate with gross growth +if [ "$daily_growth_bytes" -gt 0 ] && [ "$(echo "$cluster_remaining > 0" | bc -l 2>/dev/null || awk -v r="$cluster_remaining" 'BEGIN { if (r > 0) print 1; else print 0 }')" -eq 1 ]; then + days_to_low_gross=$(awk -v rem="$cluster_remaining" -v perday="$daily_growth_bytes" 'BEGIN { + printf "%.2f", rem / perday + }') +fi + +# Calculate with net growth (minus ILM deletions) +if [ "$net_growth_bytes" -gt 0 ] && [ "$(echo "$cluster_remaining > 0" | bc -l 2>/dev/null || awk -v r="$cluster_remaining" 'BEGIN { if (r > 0) print 1; else print 0 }')" -eq 1 ]; then + days_to_low=$(awk -v rem="$cluster_remaining" -v perday="$net_growth_bytes" 'BEGIN { + printf "%.2f", rem / perday + }') + ceil_days=$(awk -v d="$days_to_low" 'BEGIN { + base = int(d); + if (d > base) { base = base + 1 } + if (base < 0) { base = 0 } + printf "%d", base + }') + target_date=$(date -d "+${ceil_days} days" +%F 2>/dev/null) +elif [ "$(echo "$cluster_remaining > 0" | bc -l 2>/dev/null || awk -v r="$cluster_remaining" 'BEGIN { if (r > 0) print 1; else print 0 }')" -eq 1 ]; then + # Net growth is zero or negative, cluster is in equilibrium or shrinking + days_to_low="stable" +fi + +if [ -n "$days_to_low" ] && [ "$days_to_low" != "stable" ] && [[ "$days_to_low" =~ ^[0-9]+(\.[0-9]+)?$ ]]; then + days_to_low_numeric="$days_to_low" +fi + +if [ -n "$days_to_low_gross" ] && [[ "$days_to_low_gross" =~ ^[0-9]+(\.[0-9]+)?$ ]]; then + days_to_low_gross_numeric="$days_to_low_gross" +fi + +# Calculate estimated retention (oldest index age + days until low watermark) +estimated_retention_days="" +if [ -n "$oldest_index_days" ] && [ -n "$days_to_low_numeric" ]; then + estimated_retention_days=$(awk -v oldest="$oldest_index_days" -v remaining="$days_to_low_numeric" 'BEGIN { + printf "%.1f", oldest + remaining + }') +fi + +cluster_at_or_below_low=$(echo "$cluster_remaining <= 0" | bc -l 2>/dev/null || awk -v r="$cluster_remaining" 'BEGIN { if (r <= 0) print 1; else print 0 }') + +if [ "$cluster_at_or_below_low" -eq 1 ]; then + should_trigger_recommendations=true + if [ "$cluster_over_high_bytes" -gt 0 ] 2>/dev/null; then + recommendations_reason="Cluster is beyond the high watermark threshold. Reduce retention on the fastest-growing indices immediately." + else + recommendations_reason="Cluster is at or beyond the low watermark threshold. Reduce retention on the fastest-growing indices immediately." + fi +elif [ -n "$days_to_low_numeric" ]; then + within_seven=$(awk -v d="$days_to_low_numeric" 'BEGIN { if (d <= 7) print 1; else print 0 }') + if [ "$within_seven" -eq 1 ]; then + should_trigger_recommendations=true + recommendations_reason="Projected low watermark breach in ~${days_to_low_numeric} days (${target_date:-N/A}). Reduce retention on the fastest-growing indices." + fi +elif [ -n "$days_to_low_gross_numeric" ]; then + within_seven_gross=$(awk -v d="$days_to_low_gross_numeric" 'BEGIN { if (d <= 7) print 1; else print 0 }') + if [ "$within_seven_gross" -eq 1 ]; then + should_trigger_recommendations=true + recommendations_reason="Gross growth trend indicates a low watermark breach in ~${days_to_low_gross_numeric} days (${target_date:-N/A}). Reduce retention on the fastest-growing indices before ILM deletions." + fi +fi + +cluster_over_high_flag=0 +if [ "$cluster_over_high_bytes" -gt 0 ] 2>/dev/null; then + cluster_over_high_flag=1 +fi + +cluster_over_low_flag=0 +if [ "$cluster_over_low_bytes" -gt 0 ] 2>/dev/null; then + cluster_over_low_flag=1 +fi + +cluster_high_threshold_gb=$(bytes_to_gb "$cluster_high_threshold") +cluster_over_low_gb=$(bytes_to_gb "$cluster_over_low_bytes") +cluster_over_high_gb=$(bytes_to_gb "$cluster_over_high_bytes") + +if [ "$should_trigger_recommendations" = true ]; then + recommendations_triggered=true + recommendations_triggered_json=true + if [ -n "$recommendations_reason" ]; then + recommendations_message="$recommendations_reason" + else + recommendations_message="Cluster is nearing the low watermark threshold. Reduce retention on the fastest-growing indices." + fi + + growth_output=$(run_indices_growth || true) + if [ -n "${growth_output//[[:space:]]/}" ]; then + mapfile -t recommendation_source_lines < <(printf '%s\n' "$growth_output" | tail -n +3 | awk 'NF' | head -n 3) + for line in "${recommendation_source_lines[@]}"; do + index=$(echo "$line" | awk '{print $1}') + [ -n "$index" ] || continue + + growth_24h_gb=$(echo "$line" | awk '{print $(NF-2)}') + + creation_date_display="" + retention_days="" + policy="" + delete_min_age="" + + index_info=$(so-elasticsearch-query "_cat/indices/${index}?format=json&h=index,creation.date,creation.date.string" --fail 2>/dev/null) || true + if [ -n "$index_info" ]; then + creation_epoch=$(echo "$index_info" | jq -r '.[0]."creation.date" // empty' 2>/dev/null) + creation_readable=$(echo "$index_info" | jq -r '.[0]."creation.date.string" // empty' 2>/dev/null) + if [ -n "$creation_epoch" ] && [[ "$creation_epoch" =~ ^[0-9]+$ ]]; then + creation_seconds=$((creation_epoch / 1000)) + if [ "$creation_seconds" -gt 0 ]; then + creation_date_display=$(date -u -d "@$creation_seconds" +%FT%TZ 2>/dev/null) + now_seconds=$(date +%s) + if [ "$now_seconds" -ge "$creation_seconds" ]; then + retention_days=$(awk -v now="$now_seconds" -v created="$creation_seconds" 'BEGIN { diff = now - created; if (diff < 0) diff = 0; printf "%.1f", diff / 86400 }') + fi + fi + fi + if [ -z "$creation_date_display" ] && [ -n "$creation_readable" ] && [ "$creation_readable" != "null" ]; then + creation_date_display="$creation_readable" + fi + fi + + ilm_output=$(so-elasticsearch-query "${index}/_ilm/explain" --fail 2>/dev/null) || true + if [ -n "$ilm_output" ]; then + policy=$(echo "$ilm_output" | jq -r ".indices.\"$index\".policy // empty" 2>/dev/null) + fi + if [ -n "$policy" ] && [ -n "${policy_ages[$policy]:-}" ]; then + delete_min_age=${policy_ages[$policy]} + fi + + retention_days_display=${retention_days:-unknown} + retention_days_floor="" + if [ -n "$retention_days" ]; then + retention_days_floor=$(awk -v v="$retention_days" 'BEGIN { if (v == "" || v == "null") { print ""; exit } val = v + 0; if (val < 1) val = 1; printf "%d", int(val) }') + if [ -n "$retention_days_floor" ] && [ "$retention_days_floor" -lt 1 ]; then + retention_days_floor=1 + fi + fi + + delete_min_age_numeric="" + if [ -n "$delete_min_age" ]; then + delete_min_age_numeric=$(awk -v v="$delete_min_age" 'BEGIN { if (v == "" || v == "null") { print ""; exit } val = v + 0; if (val < 1) val = 1; printf "%d", int(val) }') + fi + + recommended_delete_min_age="" + if [ -n "$retention_days_floor" ]; then + recommended_delete_min_age="$retention_days_floor" + fi + if [ -n "$delete_min_age_numeric" ]; then + if [ -n "$recommended_delete_min_age" ]; then + recommended_delete_min_age=$(awk -v rec="$recommended_delete_min_age" -v cur="$delete_min_age_numeric" 'BEGIN { rec += 0; cur += 0; if (cur < rec) printf "%d", cur; else printf "%d", rec }') + else + recommended_delete_min_age="$delete_min_age_numeric" + fi + fi + if [ -z "$recommended_delete_min_age" ] && [ -n "$retention_days_floor" ]; then + recommended_delete_min_age="$retention_days_floor" + fi + + action_phrase="" + if [ -n "$recommended_delete_min_age" ]; then + if [ -n "$delete_min_age_numeric" ] && [ "$recommended_delete_min_age" -lt "$delete_min_age_numeric" ]; then + action_phrase="Lower delete.min_age to ~${recommended_delete_min_age}d" + else + action_phrase="Cap delete.min_age at ~${recommended_delete_min_age}d" + fi + if [ -n "$retention_days_floor" ]; then + action_phrase="${action_phrase} (observed retention ~${retention_days_floor}d)" + fi + action_phrase="${action_phrase}; consider whether a tighter cap (e.g., 30d) fits requirements." + else + action_phrase="Review ILM delete.min_age for this index; consider more aggressive retention if throughput stays high." + fi + + policy_clause="" + if [ -n "$policy" ]; then + policy_clause=", policy ${policy}" + fi + if [ -n "$delete_min_age" ]; then + policy_clause="${policy_clause} (current delete.min_age ${delete_min_age}d)" + fi + + recommendation_lines+=(" - ${BOLD}${index}${NC}: ~${growth_24h_gb} GB growth in last 24h, retention ~${retention_days_display} days (created ${creation_date_display:-unknown})${policy_clause}. ${action_phrase}") + record=$(jq -nc \ + --arg index "$index" \ + --arg growth "$growth_24h_gb" \ + --arg retention "${retention_days:-}" \ + --arg created "${creation_date_display:-}" \ + --arg policy "$policy" \ + --arg delete_age "${delete_min_age:-}" \ + --arg suggested "${recommended_delete_min_age:-}" \ + --arg action "$action_phrase" \ + '{ + index: $index, + growth_gb_last_24h: (if ($growth | length) > 0 then ($growth | tonumber) else null end), + retention_days: (if ($retention | length) > 0 then ($retention | tonumber) else null end), + creation_date: (if ($created | length) > 0 then $created else null end), + ilm_policy: (if ($policy | length) > 0 then $policy else null end), + delete_min_age_days: (if ($delete_age | length) > 0 then ($delete_age | tonumber) else null end), + suggested_delete_min_age_days: (if ($suggested | length) > 0 then ($suggested | tonumber) else null end), + recommendation: (if ($action | length) > 0 then $action else null end) + }') + recommendation_records+=("$record") + done + fi + + if [ ${#recommendation_records[@]} -gt 0 ]; then + recommendations_ready=true + recommendations_json=$(printf '%s\n' "${recommendation_records[@]}" | jq -s '.') + else + if [ -n "$recommendations_reason" ]; then + recommendations_message="$recommendations_reason Unable to retrieve detailed growth data from so-elasticsearch-indices-growth." + else + recommendations_message="Unable to retrieve growth data from so-elasticsearch-indices-growth while near the low watermark threshold." + fi + fi +fi + +if [ "$JSON_OUTPUT" = true ]; then + jq -n \ + --arg indexed_storage_source "$indexed_storage_source" \ + --arg current_gb "$(bytes_to_gb "$cluster_storage_size")" \ + --arg oldest_index_days "$oldest_index_days" \ + --arg estimated_retention_days "$estimated_retention_days" \ + --arg daily_growth_gb "$(bytes_to_gb "$daily_growth_bytes")" \ + --arg daily_ilm_delete_gb "$daily_ilm_delete_gb" \ + --arg net_growth_gb "$(bytes_to_gb "$net_growth_bytes")" \ + --arg ilm_delete_7d_gb "$ilm_delete_7d_gb" \ + --arg ilm_delete_immediate_gb "$ilm_delete_immediate_gb" \ + --arg ilm_delete_scheduled_7d_gb "$ilm_delete_scheduled_7d_gb" \ + --arg ilm_delete_scheduled_30d_gb "$ilm_delete_scheduled_30d_gb" \ + --arg ilm_delete_30d_gb "$ilm_delete_30d_gb" \ + --arg ilm_window_daily_gb "$ilm_window_daily_gb" \ + --arg ilm_impact_pct "$ilm_impact_pct" \ + --arg ilm_rate_variance_pct "$ilm_rate_variance_pct" \ + --arg growth_window "$history_label" \ + --arg cluster_total_gb "$(bytes_to_gb "$cluster_total")" \ + --arg cluster_used_gb "$(bytes_to_gb "$cluster_used")" \ + --arg cluster_remaining_gb "$(bytes_to_gb "$cluster_remaining")" \ + --arg cluster_low_threshold_gb "$(bytes_to_gb "$cluster_low_threshold")" \ + --arg cluster_high_threshold_gb "$cluster_high_threshold_gb" \ + --arg cluster_over_low_gb "$cluster_over_low_gb" \ + --arg cluster_over_high_gb "$cluster_over_high_gb" \ + --arg shard_usage_percent "$shard_usage_percent" \ + --arg low_watermark "$low" \ + --arg high_watermark "$high" \ + --arg flood_watermark "$flood" \ + --arg days_to_low "${days_to_low:-null}" \ + --arg days_to_low_gross "${days_to_low_gross:-null}" \ + --arg estimated_date "${target_date:-null}" \ + --arg recommendation_message "$recommendations_message" \ + --argjson total_shards "$total_shards" \ + --argjson max_shard_capacity "$max_shard_capacity" \ + --argjson data_node_count "$data_node_count" \ + --argjson max_shards_per_node "$max_shards_per_node" \ + --argjson ilm_indices_7d "$ilm_indices_7d" \ + --argjson ilm_indices_immediate "$ilm_indices_immediate" \ + --argjson ilm_indices_scheduled_7d "$ilm_indices_scheduled_7d" \ + --argjson ilm_indices_scheduled_30d "$ilm_indices_scheduled_30d" \ + --argjson ilm_indices_30d "$ilm_indices_30d" \ + --argjson ilm_shards_7d "$ilm_shards_7d" \ + --argjson ilm_shards_30d "$ilm_shards_30d" \ + --argjson ilm_shards_immediate "$ilm_shards_immediate" \ + --argjson ilm_shards_scheduled_7d "$ilm_shards_scheduled_7d" \ + --argjson ilm_shards_scheduled_30d "$ilm_shards_scheduled_30d" \ + --arg daily_shard_creation "$daily_shard_creation" \ + --argjson recommendations "$recommendations_json" \ + --argjson recommendations_triggered "$recommendations_triggered_json" \ + ' { + indexed_storage_gb: ($current_gb | tonumber), + indexed_storage_source: $indexed_storage_source, + oldest_index_days: (if ($oldest_index_days | length) > 0 then ($oldest_index_days | tonumber) else null end), + estimated_retention_days: (if ($estimated_retention_days | length) > 0 then ($estimated_retention_days | tonumber) else null end), + growth: { + daily_growth_gb: ($daily_growth_gb | tonumber), + daily_ilm_delete_gb: (if ($daily_ilm_delete_gb | length) > 0 then ($daily_ilm_delete_gb | tonumber) else null end), + net_growth_gb: (if ($net_growth_gb | length) > 0 then ($net_growth_gb | tonumber) else null end), + daily_shard_creation: (if ($daily_shard_creation | length) > 0 then ($daily_shard_creation | tonumber) else null end), + }, + ilm: { + deleting_now: { + indices: $ilm_indices_immediate, + storage_gb: (if ($ilm_delete_immediate_gb | length) > 0 then ($ilm_delete_immediate_gb | tonumber) else null end), + shards: $ilm_shards_immediate + }, + scheduled_7d: { + indices: $ilm_indices_scheduled_7d, + storage_gb: (if ($ilm_delete_scheduled_7d_gb | length) > 0 then ($ilm_delete_scheduled_7d_gb | tonumber) else null end), + shards: $ilm_shards_scheduled_7d + }, + scheduled_30d: { + indices: $ilm_indices_scheduled_30d, + storage_gb: (if ($ilm_delete_scheduled_30d_gb | length) > 0 then ($ilm_delete_scheduled_30d_gb | tonumber) else null end), + shards: $ilm_shards_scheduled_30d + }, + indices_to_delete_7d: $ilm_indices_7d, + storage_to_delete_7d_gb: (if ($ilm_delete_7d_gb | length) > 0 then ($ilm_delete_7d_gb | tonumber) else null end), + shards_to_delete_7d: $ilm_shards_7d, + total_30d_indices: $ilm_indices_30d, + total_30d_storage_gb: (if ($ilm_delete_30d_gb | length) > 0 then ($ilm_delete_30d_gb | tonumber) else null end), + total_30d_shards: $ilm_shards_30d, + percent_of_current_data: (if ($ilm_impact_pct | length) > 0 then ($ilm_impact_pct | tonumber) else null end), + windowed_daily_avg_gb: (if ($ilm_window_daily_gb | length) > 0 then ($ilm_window_daily_gb | tonumber) else null end), + }, + cluster: { + total_gb: ($cluster_total_gb | tonumber), + used_gb: ($cluster_used_gb | tonumber), + remaining_before_low_watermark_gb: (if ($cluster_remaining_gb | length) > 0 then ($cluster_remaining_gb | tonumber) else null end), + low_watermark_threshold_gb: (if ($cluster_low_threshold_gb | length) > 0 then ($cluster_low_threshold_gb | tonumber) else null end), + high_watermark_threshold_gb: (if ($cluster_high_threshold_gb | length) > 0 then ($cluster_high_threshold_gb | tonumber) else null end), + over_low_watermark_gb: (if ($cluster_over_low_gb | length) > 0 then ($cluster_over_low_gb | tonumber) else null end), + over_high_watermark_gb: (if ($cluster_over_high_gb | length) > 0 then ($cluster_over_high_gb | tonumber) else null end), + low_watermark_setting: $low_watermark, + high_watermark_setting: $high_watermark, + flood_watermark_setting: $flood_watermark, + shards: { + current: $total_shards, + max_capacity: $max_shard_capacity, + usage_percent: (if ($shard_usage_percent | length) > 0 then ($shard_usage_percent | tonumber) else null end), + data_nodes: $data_node_count, + max_shards_per_node: $max_shards_per_node + } + }, + projection: { + days_to_low_watermark_net: (if $days_to_low == "null" or $days_to_low == "stable" then $days_to_low else ($days_to_low | tonumber) end), + days_to_low_watermark_gross: (if $days_to_low_gross == "null" then null else ($days_to_low_gross | tonumber) end), + estimated_breach_date: (if $estimated_date == "null" then null else $estimated_date end) + }, + recommendations: { + triggered: $recommendations_triggered, + message: (if ($recommendation_message | length) > 0 then $recommendation_message else null end), + indices: $recommendations + } + }' +else + log_title "LOG" "Storage Overview" + + indexed_gb_display=$(bytes_to_gb "$cluster_storage_size") + echo -e "${BOLD}Indexed data size:${NC} ${indexed_gb_display} GB (Elasticsearch)" + echo -e "${BOLD}Cluster capacity:${NC} $(bytes_to_gb "$cluster_total") GB total" + echo -e "${BOLD}Cluster used:${NC} $(bytes_to_gb "$cluster_used") GB" + echo -e "${BOLD}Low watermark:${NC} $low ($(bytes_to_gb "$cluster_low_threshold") GB threshold)" + if [ "$cluster_over_low_flag" -eq 1 ]; then + if [ "$cluster_over_high_flag" -eq 1 ]; then + echo -e "${BOLD}Remaining space:${NC} ${REDBOLD}${cluster_over_high_gb} GB${NC} OVER the high watermark" + else + echo -e "${BOLD}Remaining space:${NC} ${YELLOWBOLD}${cluster_over_low_gb} GB${NC} OVER the low watermark" + fi + else + echo -e "${BOLD}Remaining space:${NC} $(bytes_to_gb "$cluster_remaining") GB before low watermark" + fi + + # Display shard capacity information + shard_warning_flag=$(awk -v pct="$shard_usage_percent" 'BEGIN { if (pct + 0 >= 80) print 1; else print 0 }') + if [ "$shard_warning_flag" -eq 1 ]; then + echo -e "${BOLD}Cluster shards:${NC} ${YELLOW}${total_shards} / ${max_shard_capacity} (${shard_usage_percent}%)${NC}" + else + echo -e "${BOLD}Cluster shards:${NC} ${total_shards} / ${max_shard_capacity} (${shard_usage_percent}%)" + fi + + # Display data nodes with roles (only data-related roles) + if [ "$data_node_count" -gt 0 ]; then + echo -e "${BOLD}Cluster data nodes:${NC} ${data_node_count}" + for i in "${!data_node_names[@]}"; do + node_name="${data_node_names[$i]}" + node_role="${data_node_roles[$i]}" + expanded_roles=$(expand_node_roles "$node_role") + echo -e " ${node_name}: ${expanded_roles}" + done + fi + + log_title "LOG" "ES Growth" + + echo -e "${BOLD}Daily growth rate:${NC} $(bytes_to_gb "$daily_growth_bytes") GB/day" + + if [ "$daily_ilm_delete_bytes" -gt 0 ]; then + echo -e "${BOLD}ILM deletion rate:${NC} ${daily_ilm_delete_gb} GB/day (scheduled)" + echo -e "${BOLD}Net growth rate:${NC} $(bytes_to_gb "$net_growth_bytes") GB/day" + else + echo -e "${BOLD}ILM deletion rate:${NC} 0.00 GB/day (scheduled)" + echo -e "${BOLD}Net growth rate:${NC} $(bytes_to_gb "$net_growth_bytes") GB/day" + fi + + # Display daily shards + if [ -n "$daily_shard_creation" ] && [ "$(awk -v d="$daily_shard_creation" 'BEGIN { if (d > 0) print 1; else print 0 }')" -eq 1 ]; then + daily_shard_creation_rounded=$(awk -v d="$daily_shard_creation" 'BEGIN { printf "%.0f", d }') + echo -e "${BOLD}Daily shard creation:${NC} ~${daily_shard_creation_rounded} shards/day" + fi + + if [ "$ilm_indices_immediate" -gt 0 ]; then + echo -e "${BOLD}Deleting now:${NC} $ilm_indices_immediate indices (~${ilm_delete_immediate_gb} GB, $ilm_shards_immediate shards)" + fi + if [ "$ilm_indices_7d" -gt 0 ]; then + echo -e "${BOLD}Storage to be freed (7d):${NC} $ilm_indices_7d indices (~${ilm_delete_7d_gb} GB, $ilm_shards_7d shards)" + fi + + log_title "LOG" "Retention Projection" + + if [ -n "$oldest_index_days" ]; then + oldest_days_rounded=$(awk -v d="$oldest_index_days" 'BEGIN { printf "%.0f", d }') + if [ -n "$oldest_index_name" ]; then + echo -e "${BOLD}Oldest index:${NC} ~${oldest_days_rounded} days (${oldest_index_name})" + else + echo -e "${BOLD}Oldest index:${NC} ~${oldest_days_rounded} days (.ds-logs-* only)" + fi + + if [ -n "$estimated_retention_days" ]; then + estimated_days_rounded=$(awk -v d="$estimated_retention_days" 'BEGIN { printf "%.0f", d }') + echo -e "${BOLD}Estimated retention:${NC} ~${estimated_days_rounded} days (until configured low watermark setting)" + fi + echo + fi + + if [ "$days_to_low" = "stable" ]; then + if [ "$net_growth_bytes" -lt 0 ]; then + shrink_rate_gb=$(bytes_to_gb "${net_growth_bytes#-}") + log_title "OK" "Cluster is shrinking - ILM deletions exceed growth" + echo + echo -e "${BOLD}Storage trend:${NC} Decreasing at ~${shrink_rate_gb} GB/day" + echo -e "${BOLD}Note:${NC} Current ILM policies are reclaiming more space than incoming data consumes." + if [ "$cluster_over_low_bytes" -gt 0 ] 2>/dev/null; then + recovery_days=$(awk -v excess="$cluster_over_low_bytes" -v rate="${net_growth_bytes#-}" 'BEGIN { + if (rate <= 0) { print ""; exit } + printf "%.1f", excess / rate + }') + if [ -n "$recovery_days" ]; then + echo -e "${BOLD}Recovery time:${NC} Estimated ${recovery_days} days to fall below the low watermark if trend continues" + fi + fi + else + log_title "OK" "Cluster is in equilibrium - ILM deletions balance growth" + echo + echo -e "${BOLD}Storage trend:${NC} Stable (net growth ~0 GB/day)" + echo -e "${BOLD}Note:${NC} Current ILM policies are keeping storage steady." + fi + elif [ -z "$days_to_low" ]; then + if [ "$net_growth_bytes" -lt 0 ] && [ "$daily_ilm_delete_bytes" -gt 0 ]; then + shrink_rate_gb=$(bytes_to_gb "${net_growth_bytes#-}") + log_title "OK" "Cluster is shrinking - ILM deletions exceed growth" + echo + echo -e "${BOLD}Storage trend:${NC} Decreasing at ~${shrink_rate_gb} GB/day" + echo -e "${BOLD}Note:${NC} Storage is expected to continue decreasing due to ILM policies." + elif [ "$daily_growth_bytes" -le 0 ]; then + log_title "WARN" "Unable to project: Growth rate is zero or negative" + elif [ "$(echo "$cluster_remaining <= 0" | bc -l 2>/dev/null || awk -v r="$cluster_remaining" 'BEGIN { if (r <= 0) print 1; else print 0 }')" -eq 1 ]; then + log_title "ERROR" "Cluster already at low watermark threshold! Review recommendations below and consider updating ILM." + else + log_title "WARN" "Unable to calculate projection" + fi + else + if (( $(echo "$days_to_low < 7" | bc -l 2>/dev/null || awk -v d="$days_to_low" 'BEGIN { if (d < 7) print 1; else print 0 }') )); then + log_title "ERROR" "Low watermark breach estimated in ~$days_to_low days (${target_date:-N/A})" + elif (( $(echo "$days_to_low < 14" | bc -l 2>/dev/null || awk -v d="$days_to_low" 'BEGIN { if (d < 14) print 1; else print 0 }') )); then + log_title "WARN" "Low watermark breach estimated in ~$days_to_low days (${target_date:-N/A})" + else + log_title "OK" "Low watermark breach estimated in ~$days_to_low days (${target_date:-N/A})" + fi + echo + fi + + if [ "$recommendations_triggered" = true ]; then + log_title "LOG" "Recommendations" + if [ "$recommendations_ready" = true ]; then + echo -e "${BOLD}Action:${NC} Reduce retention on the fastest-growing indices to reduce overall storage usage." + for rec_line in "${recommendation_lines[@]}"; do + echo -e "$rec_line" + done + else + if [ -n "$recommendations_message" ]; then + echo -e "${BOLD}Note:${NC} $recommendations_message" + fi + fi + echo + fi + + if [ "$VERBOSE" = true ]; then + log_title "LOG" "Scheduled Deletions (Detailed)" + + if [ ${#immediate_indices_names[@]} -gt 0 ]; then + echo -e "${BOLD}Deleting Now (in delete phase):${NC}" + echo + total_immediate_mb=0 + for i in "${!immediate_indices_names[@]}"; do + index_name="${immediate_indices_names[$i]}" + size_bytes="${immediate_indices_sizes[$i]}" + size_mb=$(awk -v b="$size_bytes" 'BEGIN { printf "%.2f", b / 1024 / 1024 }') + total_immediate_mb=$(awk -v total="$total_immediate_mb" -v size="$size_mb" 'BEGIN { printf "%.2f", total + size }') + printf " %-60s %10s MB\n" "$index_name" "$size_mb" + done + echo -e "${BOLD}Total:${NC} ${total_immediate_mb} MB (${#immediate_indices_names[@]} indices)" + echo + fi + + if [ ${#scheduled_indices_names[@]} -gt 0 ]; then + echo -e "${BOLD}Scheduled for Deletion (≤7 days):${NC}" + echo + total_scheduled_mb=0 + # Sort by days_until deletion + sorted_indices=() + for i in "${!scheduled_indices_names[@]}"; do + sorted_indices+=("${scheduled_indices_days[$i]}|${scheduled_indices_names[$i]}|${scheduled_indices_sizes[$i]}") + done + IFS=$'\n' sorted_indices=($(sort -t'|' -k1 -n <<<"${sorted_indices[*]}")) + unset IFS + + for entry in "${sorted_indices[@]}"; do + IFS='|' read -r days_until index_name size_bytes <<< "$entry" + size_mb=$(awk -v b="$size_bytes" 'BEGIN { printf "%.2f", b / 1024 / 1024 }') + total_scheduled_mb=$(awk -v total="$total_scheduled_mb" -v size="$size_mb" 'BEGIN { printf "%.2f", total + size }') + days_display=$(awk -v d="$days_until" 'BEGIN { printf "%.1f", d }') + printf " %-55s %10s MB (in ~%s days)\n" "$index_name" "$size_mb" "$days_display" + done + echo -e "${BOLD}Total:${NC} ${total_scheduled_mb} MB (${#scheduled_indices_names[@]} indices)" + echo + fi + + if [ ${#immediate_indices_names[@]} -eq 0 ] && [ ${#scheduled_indices_names[@]} -eq 0 ]; then + echo -e "No indices scheduled for deletion within the next 7 days." + echo + fi + fi + echo +fi + +exit 0 \ No newline at end of file From 6d12a8bfa16b5f9438e7457d0b1143af361e00ed Mon Sep 17 00:00:00 2001 From: Josh Patterson Date: Wed, 29 Oct 2025 15:31:46 -0400 Subject: [PATCH 107/124] handle salt-cloud upgrade during soup --- salt/manager/tools/sbin/soup | 27 ++++++++++++++++++++++++--- 1 file changed, 24 insertions(+), 3 deletions(-) diff --git a/salt/manager/tools/sbin/soup b/salt/manager/tools/sbin/soup index 952645c61..3bc4e9ca9 100755 --- a/salt/manager/tools/sbin/soup +++ b/salt/manager/tools/sbin/soup @@ -1265,19 +1265,36 @@ upgrade_salt() { echo "" # If rhel family if [[ $is_rpm ]]; then + # Check if salt-cloud is installed + local salt_cloud_installed=false + if rpm -q salt-cloud &>/dev/null; then + salt_cloud_installed=true + fi + echo "Removing yum versionlock for Salt." echo "" yum versionlock delete "salt" yum versionlock delete "salt-minion" yum versionlock delete "salt-master" + # Remove salt-cloud versionlock if installed + if [[ $salt_cloud_installed == true ]]; then + yum versionlock delete "salt-cloud" + fi echo "Updating Salt packages." echo "" set +e # if oracle run with -r to ignore repos set by bootstrap if [[ $OS == 'oracle' ]]; then - run_check_net_err \ - "sh $UPDATE_DIR/salt/salt/scripts/bootstrap-salt.sh -X -r -F -M stable \"$NEWSALTVERSION\"" \ - "Could not update salt, please check $SOUP_LOG for details." + # Add -L flag only if salt-cloud is already installed + if [[ $salt_cloud_installed == true ]]; then + run_check_net_err \ + "sh $UPDATE_DIR/salt/salt/scripts/bootstrap-salt.sh -X -r -L -F -M stable \"$NEWSALTVERSION\"" \ + "Could not update salt, please check $SOUP_LOG for details." + else + run_check_net_err \ + "sh $UPDATE_DIR/salt/salt/scripts/bootstrap-salt.sh -X -r -F -M stable \"$NEWSALTVERSION\"" \ + "Could not update salt, please check $SOUP_LOG for details." + fi # if another rhel family variant we want to run without -r to allow the bootstrap script to manage repos else run_check_net_err \ @@ -1290,6 +1307,10 @@ upgrade_salt() { yum versionlock add "salt-0:$NEWSALTVERSION-0.*" yum versionlock add "salt-minion-0:$NEWSALTVERSION-0.*" yum versionlock add "salt-master-0:$NEWSALTVERSION-0.*" + # Add salt-cloud versionlock if installed + if [[ $salt_cloud_installed == true ]]; then + yum versionlock add "salt-cloud-0:$NEWSALTVERSION-0.*" + fi # Else do Ubuntu things elif [[ $is_deb ]]; then echo "Removing apt hold for Salt." From 30970acfafd18d8ba1588b5e66bf6e0104253259 Mon Sep 17 00:00:00 2001 From: Josh Patterson Date: Wed, 29 Oct 2025 16:05:12 -0400 Subject: [PATCH 108/124] var for SALTVERSION in cloud config --- salt/salt/cloud/cloud.profiles.d/socloud.conf.jinja | 2 +- salt/salt/cloud/config.sls | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/salt/salt/cloud/cloud.profiles.d/socloud.conf.jinja b/salt/salt/cloud/cloud.profiles.d/socloud.conf.jinja index 025e23d89..23fd15983 100644 --- a/salt/salt/cloud/cloud.profiles.d/socloud.conf.jinja +++ b/salt/salt/cloud/cloud.profiles.d/socloud.conf.jinja @@ -14,7 +14,7 @@ sool9_{{host}}: private_key: /etc/ssh/auth_keys/soqemussh/id_ecdsa sudo: True deploy_command: sh /tmp/.saltcloud-*/deploy.sh - script_args: -r -F -x python3 stable 3006.9 + script_args: -r -F -x python3 stable {{ SALTVERSION }} minion: master: {{ grains.host }} master_port: 4506 diff --git a/salt/salt/cloud/config.sls b/salt/salt/cloud/config.sls index dfbfda56b..8dfbf325e 100644 --- a/salt/salt/cloud/config.sls +++ b/salt/salt/cloud/config.sls @@ -13,6 +13,7 @@ {% if '.'.join(sls.split('.')[:2]) in allowed_states %} {% if 'vrt' in salt['pillar.get']('features', []) %} {% set HYPERVISORS = salt['pillar.get']('hypervisor:nodes', {} ) %} +{% from 'salt/map.jinja' import SALTVERSION %} {% if HYPERVISORS %} cloud_providers: @@ -32,6 +33,7 @@ cloud_profiles: HYPERVISORS: {{HYPERVISORS}} MANAGERHOSTNAME: {{ grains.host }} MANAGERIP: {{ pillar.host.mainip }} + SALTVERSION: {{ SALTVERSION }} - template: jinja - makedirs: True {% endif %} From 1949be90c23a19611c54d3efa69c5028f735d29f Mon Sep 17 00:00:00 2001 From: Josh Patterson Date: Wed, 29 Oct 2025 16:49:59 -0400 Subject: [PATCH 109/124] allow to preserve files --- salt/common/tools/sbin/so-common | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/salt/common/tools/sbin/so-common b/salt/common/tools/sbin/so-common index 365852e63..ba2cb5ae7 100755 --- a/salt/common/tools/sbin/so-common +++ b/salt/common/tools/sbin/so-common @@ -220,12 +220,22 @@ compare_es_versions() { } copy_new_files() { + # Define files to exclude from deletion (relative to their respective base directories) + local EXCLUDE_FILES=( + "salt/hypervisor/soc_hypervisor.yaml" + ) + + # Build rsync exclude arguments + local EXCLUDE_ARGS=() + for file in "${EXCLUDE_FILES[@]}"; do + EXCLUDE_ARGS+=(--exclude="$file") + done + # Copy new files over to the salt dir cd $UPDATE_DIR - rsync -a salt $DEFAULT_SALT_DIR/ --delete - rsync -a pillar $DEFAULT_SALT_DIR/ --delete + rsync -a salt $DEFAULT_SALT_DIR/ --delete "${EXCLUDE_ARGS[@]}" + rsync -a pillar $DEFAULT_SALT_DIR/ --delete "${EXCLUDE_ARGS[@]}" chown -R socore:socore $DEFAULT_SALT_DIR/ - chmod 755 $DEFAULT_SALT_DIR/pillar/firewall/addfirewall.sh cd /tmp } From 8ea66bb0e92f44791deacf0630b61ce0bde8e4be Mon Sep 17 00:00:00 2001 From: Josh Patterson Date: Thu, 30 Oct 2025 11:02:36 -0400 Subject: [PATCH 110/124] create libvirt volumes directory --- salt/libvirt/init.sls | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/salt/libvirt/init.sls b/salt/libvirt/init.sls index e25a3bcc6..096e0f55c 100644 --- a/salt/libvirt/init.sls +++ b/salt/libvirt/init.sls @@ -31,6 +31,19 @@ libvirt_conf_dir: - group: 939 - makedirs: True +libvirt_volumes: + file.directory: + - name: /nsm/libvirt/volumes + - user: qemu + - group: qemu + - dir_mode: 755 + - file_mode: 640 + - recurse: + - user + - group + - mode + - makedirs: True + libvirt_config: file.managed: - name: /opt/so/conf/libvirt/libvirtd.conf From 78c951cb70bcb938ebdf33288814eb5a67d27a61 Mon Sep 17 00:00:00 2001 From: reyesj2 <94730068+reyesj2@users.noreply.github.com> Date: Thu, 30 Oct 2025 11:15:58 -0500 Subject: [PATCH 111/124] add manager role to elastic ingest time spent --- .../templates/dashboard-security_onion_performance.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/salt/influxdb/templates/dashboard-security_onion_performance.json b/salt/influxdb/templates/dashboard-security_onion_performance.json index 835aedb03..1e66b2b40 100644 --- a/salt/influxdb/templates/dashboard-security_onion_performance.json +++ b/salt/influxdb/templates/dashboard-security_onion_performance.json @@ -1 +1 @@ -[{"apiVersion":"influxdata.com/v2alpha1","kind":"Dashboard","metadata":{"name":"vivid-wilson-002001"},"spec":{"charts":[{"colors":[{"id":"base","name":"laser","type":"text","hex":"#00C9FF"}],"decimalPlaces":1,"height":2,"kind":"Single_Stat","name":"Uptime","queries":[{"query":"hostFilter = (tables=<-) =>\n if v.Host != \"(All)\" then\n tables |> filter(fn: (r) => r[\"host\"] == v.Host)\n else\n tables\n\nfrom(bucket: \"telegraf/so_short_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"system\")\n |> filter(fn: (r) => r[\"_field\"] == \"uptime\")\n |> hostFilter()\n |> map(fn: (r) => ({r with _value: r._value / (24 * 60 * 60)}))\n |> group(columns: [\"host\"])\n |> last()\n |> lowestMin(n:1)"}],"staticLegend":{},"suffix":" days","width":1},{"colors":[{"id":"base","name":"laser","type":"text","hex":"#00C9FF"},{"id":"z83MTSufTrlrCoEPiBXda","name":"ruby","type":"text","hex":"#BF3D5E","value":1}],"decimalPlaces":0,"height":2,"kind":"Single_Stat","name":"Critical Alarms","queries":[{"query":"from(bucket: \"_monitoring\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"statuses\")\n |> filter(fn: (r) => r[\"_field\"] == \"_message\")\n |> group(columns: [\"_check_id\"])\n |> sort(columns: [\"_time\"])\n |> last()\n |> group()\n |> filter(fn: (r) => r[\"_level\"] == \"crit\")\n |> count()"}],"staticLegend":{},"suffix":" ","width":1,"yPos":2},{"colors":[{"id":"base","name":"rainforest","type":"text","hex":"#4ED8A0"},{"id":"QCTYWuGuHkikYFsZSKMzQ","name":"rainforest","type":"text","hex":"#4ED8A0"},{"id":"QdpMyTRBb0LJ56-P5wfAW","name":"laser","type":"text","hex":"#00C9FF","value":1},{"id":"VQGwCoMrxZyP8asiOW5Cq","name":"tiger","type":"text","hex":"#F48D38","value":2},{"id":"zSO9QkesSIxrU_ntCBx2i","name":"ruby","type":"text","hex":"#BF3D5E","value":3}],"fieldOptions":[{"fieldName":"_time","visible":true},{"displayName":"Alarm","fieldName":"_check_name","visible":true},{"displayName":"Severity","fieldName":"_value","visible":true},{"displayName":"Status","fieldName":"_level","visible":true}],"height":6,"kind":"Table","name":"Alarm Status","queries":[{"query":"from(bucket: \"_monitoring\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"statuses\")\n |> filter(fn: (r) => r[\"_field\"] == \"_message\")\n |> drop(columns: [\"_value\"])\n |> duplicate(column: \"_level\", as: \"_value\")\n |> map(fn: (r) => ({ r with _value: if r._value == \"ok\" then 0 else if r._value == \"info\" then 1 else if r._value == \"warn\" then 2 else 3 }))\n |> group(columns: [\"_check_id\"])\n |> sort(columns: [\"_time\"])\n |> last()\n |> group()\n |> keep(columns: [\"_check_name\",\"_level\",\"_value\"])"}],"staticLegend":{},"tableOptions":{"sortBy":"_check_name","verticalTimeAxis":true},"timeFormat":"YYYY-MM-DD HH:mm:ss","width":3,"yPos":4},{"axes":[{"base":"10","name":"x","scale":"linear"},{"base":"10","name":"y","scale":"linear","suffix":"B"}],"colorizeRows":true,"colors":[{"id":"3PVw3hQuZUzyar7Js3mMH","name":"Ectoplasm","type":"scale","hex":"#DA6FF1"},{"id":"O34ux-D8Xq_1-eeWRyYYH","name":"Ectoplasm","type":"scale","hex":"#00717A"},{"id":"P04RoKOHBdLdvfrfFbn0F","name":"Ectoplasm","type":"scale","hex":"#ACFF76"}],"geom":"line","height":4,"hoverDimension":"auto","kind":"Xy","legendColorizeRows":true,"legendOpacity":1,"legendOrientationThreshold":100000000,"name":"Elasticsearch Storage Size","opacity":1,"orientationThreshold":100000000,"position":"overlaid","queries":[{"query":"import \"join\"\n\nhostFilter = (tables=<-) =>\n if v.Host != \"(All)\" then\n tables |> filter(fn: (r) => r[\"host\"] == v.Host)\n else\n tables\n \nfrom(bucket: \"telegraf/so_short_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"elasticsearch_indices\")\n |> filter(fn: (r) => r[\"_field\"] == \"store_size_in_bytes\")\n |> filter(fn: (r) => r[\"host\"] == r[\"node_name\"])\n |> hostFilter()\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)\n |> group(columns: [\"_field\",\"host\"])\n |> yield(name: \"mean\")"},{"query":"import \"join\"\n\nhostFilter = (tables=<-) =>\n if v.Host != \"(All)\" then\n tables |> filter(fn: (r) => r[\"host\"] == v.Host)\n else\n tables\n\nfrom(bucket: \"telegraf/so_long_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"elasticsearch_indices\")\n |> filter(fn: (r) => r[\"_field\"] == \"store_size_in_bytes\")\n |> filter(fn: (r) => r[\"host\"] == r[\"node_name\"])\n |> hostFilter()\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)\n |> group(columns: [\"_field\",\"host\"])\n |> yield(name: \"Trend\")"}],"staticLegend":{"colorizeRows":true,"opacity":1,"orientationThreshold":100000000,"widthRatio":1},"width":4,"widthRatio":1,"xCol":"_time","yCol":"_value","yPos":10},{"axes":[{"base":"10","name":"x","scale":"linear"},{"base":"10","name":"y","scale":"linear","suffix":"B"}],"colorizeRows":true,"colors":[{"id":"3PVw3hQuZUzyar7Js3mMH","name":"Ectoplasm","type":"scale","hex":"#DA6FF1"},{"id":"O34ux-D8Xq_1-eeWRyYYH","name":"Ectoplasm","type":"scale","hex":"#00717A"},{"id":"P04RoKOHBdLdvfrfFbn0F","name":"Ectoplasm","type":"scale","hex":"#ACFF76"}],"geom":"line","height":4,"hoverDimension":"auto","kind":"Xy","legendColorizeRows":true,"legendOpacity":1,"legendOrientationThreshold":100000000,"name":"InfluxDB Size","opacity":1,"orientationThreshold":100000000,"position":"overlaid","queries":[{"query":"import \"join\"\n\nfrom(bucket: \"telegraf/so_short_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"influxsize\")\n |> filter(fn: (r) => r[\"_field\"] == \"kbytes\")\n |> map(fn: (r) => ({r with \"_value\": r._value * 1000.0}))\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)\n |> group(columns: [\"_field\",\"host\"])\n |> yield(name: \"mean\")"},{"query":"import \"join\"\n\nfrom(bucket: \"telegraf/so_long_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"influxsize\")\n |> filter(fn: (r) => r[\"_field\"] == \"kbytes\")\n |> map(fn: (r) => ({r with \"_value\": r._value * 1000.0}))\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)\n |> group(columns: [\"_field\",\"host\"])\n |> yield(name: \"Trend\")"}],"staticLegend":{"colorizeRows":true,"opacity":1,"orientationThreshold":100000000,"widthRatio":1},"width":4,"widthRatio":1,"xCol":"_time","yCol":"_value","yPos":14},{"axes":[{"base":"10","name":"x","scale":"linear"},{"name":"y","scale":"linear","suffix":" days"}],"colorizeRows":true,"colors":[{"id":"sW2GqpGAsGB5Adx16jKjp","name":"Nineteen Eighty Four","type":"scale","hex":"#31C0F6"},{"id":"TsdXuXwdI5Npi9S8L4f-i","name":"Nineteen Eighty Four","type":"scale","hex":"#A500A5"},{"id":"OGL29-SUbJ6FyQb0JzbaD","name":"Nineteen Eighty Four","type":"scale","hex":"#FF7E27"}],"geom":"line","height":4,"hoverDimension":"auto","kind":"Xy","legendColorizeRows":true,"legendOpacity":1,"legendOrientationThreshold":100000000,"name":"System Uptime","opacity":1,"orientationThreshold":100000000,"position":"overlaid","queries":[{"query":"hostFilter = (tables=<-) =>\n if v.Host != \"(All)\" then\n tables |> filter(fn: (r) => r[\"host\"] == v.Host)\n else\n tables\n\nroleFilter = (tables=<-) =>\n if v.Role != \"(All)\" then\n tables |> filter(fn: (r) => r[\"role\"] == v.Role)\n else\n tables\n\nfrom(bucket: \"telegraf/so_short_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"system\")\n |> filter(fn: (r) => r[\"_field\"] == \"uptime\")\n |> hostFilter()\n |> roleFilter()\n |> group(columns: [\"host\", \"role\"])\n |> map(fn: (r) => ({r with _value: float(v: r._value) / float(v: 24 * 60 * 60)}))\n |> yield(name: \"last\")"},{"query":"hostFilter = (tables=<-) =>\n if v.Host != \"(All)\" then\n tables |> filter(fn: (r) => r[\"host\"] == v.Host)\n else\n tables\n\nroleFilter = (tables=<-) =>\n if v.Role != \"(All)\" then\n tables |> filter(fn: (r) => r[\"role\"] == v.Role)\n else\n tables\n\nfrom(bucket: \"telegraf/so_long_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"system\")\n |> filter(fn: (r) => r[\"_field\"] == \"uptime\")\n |> hostFilter()\n |> roleFilter()\n |> group(columns: [\"host\", \"role\"])\n |> map(fn: (r) => ({r with _value: float(v: r._value) / float(v: 24 * 60 * 60)}))\n |> yield(name: \"Trend\")"}],"shade":true,"staticLegend":{"colorizeRows":true,"opacity":1,"orientationThreshold":100000000,"widthRatio":1},"width":4,"widthRatio":1,"xCol":"_time","yCol":"_value","yPos":18},{"axes":[{"base":"10","name":"x","scale":"linear"},{"base":"10","name":"y","scale":"linear"}],"colorizeRows":true,"colors":[{"id":"lQ75rvTyd2Lq5pZjzy6LB","name":"Nineteen Eighty Four","type":"scale","hex":"#31C0F6"},{"id":"KLfpRZtiEnU2GxjPtrrzQ","name":"Nineteen Eighty Four","type":"scale","hex":"#A500A5"},{"id":"1kLynwKxvJ3B5IeJnrBqp","name":"Nineteen Eighty Four","type":"scale","hex":"#FF7E27"}],"geom":"line","height":4,"hoverDimension":"auto","kind":"Xy","legendColorizeRows":true,"legendOpacity":1,"legendOrientationThreshold":100000000,"name":"Kafka EPS","opacity":1,"orientationThreshold":100000000,"position":"overlaid","queries":[{"query":"controllerHosts = from(bucket: \"telegraf/so_short_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"kafka_controller\")\n |> filter(fn: (r) => r[\"_field\"] == \"ActiveControllerCount.Value\")\n |> filter(fn: (r) => r[\"_value\"] == 1)\n |> keep(columns: [\"host\"])\n |> distinct(column: \"host\")\n |> map(fn: (r) => ({r with _value: r.host}))\n |> keep(columns: [\"_value\"])\n\ncontrollerHostNames = controllerHosts |> findColumn(fn: (key) => true, column: \"_value\")\n\nfrom(bucket: \"telegraf/so_short_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"kafka_topics\")\n |> filter(fn: (r) => r[\"_field\"] == \"MessagesInPerSec.Count\")\n |> filter(fn: (r) => not contains(value: r.host, set: controllerHostNames))\n |> derivative(unit: 1s, nonNegative: true)\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)\n |> group(columns: [\"_field\", \"host\", \"role\"])\n |> yield(name: \"mean\")"},{"query":"controllerHosts = from(bucket: \"telegraf/so_short_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"kafka_controller\")\n |> filter(fn: (r) => r[\"_field\"] == \"ActiveControllerCount.Value\")\n |> filter(fn: (r) => r[\"_value\"] == 1)\n |> keep(columns: [\"host\"])\n |> distinct(column: \"host\")\n |> map(fn: (r) => ({r with _value: r.host}))\n |> keep(columns: [\"_value\"])\n\ncontrollerHostNames = controllerHosts |> findColumn(fn: (key) => true, column: \"_value\")\n\nfrom(bucket: \"telegraf/so_long_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"kafka_topics\")\n |> filter(fn: (r) => r[\"_field\"] == \"MessagesInPerSec.Count\")\n |> filter(fn: (r) => not contains(value: r.host, set: controllerHostNames))\n |> derivative(unit: 1s, nonNegative: true)\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)\n |> group(columns: [\"_field\", \"host\", \"role\"])\n |> yield(name: \"trend\")"}],"staticLegend":{"colorizeRows":true,"opacity":1,"orientationThreshold":100000000,"widthRatio":1},"width":4,"widthRatio":1,"xCol":"_time","yCol":"_value","yPos":22},{"axes":[{"base":"10","name":"x","scale":"linear"},{"name":"y","scale":"linear","suffix":"%"}],"colorizeRows":true,"colors":[{"id":"sW2GqpGAsGB5Adx16jKjp","name":"Nineteen Eighty Four","type":"scale","hex":"#31C0F6"},{"id":"TsdXuXwdI5Npi9S8L4f-i","name":"Nineteen Eighty Four","type":"scale","hex":"#A500A5"},{"id":"OGL29-SUbJ6FyQb0JzbaD","name":"Nineteen Eighty Four","type":"scale","hex":"#FF7E27"}],"geom":"line","height":4,"hoverDimension":"auto","kind":"Xy","legendColorizeRows":true,"legendOpacity":1,"legendOrientationThreshold":100000000,"name":"System CPU Usage","opacity":1,"orientationThreshold":100000000,"position":"overlaid","queries":[{"query":"hostFilter = (tables=<-) =>\n if v.Host != \"(All)\" then\n tables |> filter(fn: (r) => r[\"host\"] == v.Host)\n else\n tables\n\nroleFilter = (tables=<-) =>\n if v.Role != \"(All)\" then\n tables |> filter(fn: (r) => r[\"role\"] == v.Role)\n else\n tables\n\nfrom(bucket: \"telegraf/so_short_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"cpu\")\n |> filter(fn: (r) => r[\"_field\"] == \"usage_idle\")\n |> filter(fn: (r) => r[\"cpu\"] == \"cpu-total\")\n |> hostFilter()\n |> roleFilter()\n |> group(columns: [\"_field\", \"host\", \"role\"])\n |> map(fn: (r) => ({r with _value: r._value * -1.0 + 100.0}))\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: true)\n |> yield(name: \"mean\")"},{"query":"hostFilter = (tables=<-) =>\n if v.Host != \"(All)\" then\n tables |> filter(fn: (r) => r[\"host\"] == v.Host)\n else\n tables\n\nroleFilter = (tables=<-) =>\n if v.Role != \"(All)\" then\n tables |> filter(fn: (r) => r[\"role\"] == v.Role)\n else\n tables\n\nfrom(bucket: \"telegraf/so_long_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"cpu\")\n |> filter(fn: (r) => r[\"_field\"] == \"usage_idle\")\n |> filter(fn: (r) => r[\"cpu\"] == \"cpu-total\")\n |> hostFilter()\n |> roleFilter()\n |> group(columns: [\"_field\",\"host\", \"role\"])\n |> map(fn: (r) => ({r with _value: r._value * -1.0 + 100.0}))\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: true)\n |> yield(name: \"Trend\")"}],"staticLegend":{"colorizeRows":true,"opacity":1,"orientationThreshold":100000000,"widthRatio":1},"width":4,"widthRatio":1,"xCol":"_time","yCol":"_value","yPos":26},{"axes":[{"base":"10","name":"x","scale":"linear"},{"name":"y","scale":"linear","suffix":"%"}],"colorizeRows":true,"colors":[{"id":"QDwChKZWuQV0BaJcEeSam","name":"Atlantis","type":"scale","hex":"#74D495"},{"id":"ThD0WTqKHltQEVlq9mo6K","name":"Atlantis","type":"scale","hex":"#3F3FBA"},{"id":"FBHYZiwDLKyQK3eRfUD-0","name":"Atlantis","type":"scale","hex":"#FF4D9E"}],"geom":"line","height":4,"hoverDimension":"auto","kind":"Xy","legendColorizeRows":true,"legendOpacity":1,"legendOrientationThreshold":100000000,"name":"System Memory Usage","opacity":1,"orientationThreshold":100000000,"position":"overlaid","queries":[{"query":"hostFilter = (tables=<-) =>\n if v.Host != \"(All)\" then\n tables |> filter(fn: (r) => r[\"host\"] == v.Host)\n else\n tables\n\nroleFilter = (tables=<-) =>\n if v.Role != \"(All)\" then\n tables |> filter(fn: (r) => r[\"role\"] == v.Role)\n else\n tables\n\nfrom(bucket: \"telegraf/so_short_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"mem\")\n |> filter(fn: (r) => r[\"_field\"] == \"used_percent\")\n |> hostFilter()\n |> roleFilter()\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)\n |> group(columns: [\"_field\", \"host\", \"role\"])\n |> yield(name: \"mean\")"},{"query":"hostFilter = (tables=<-) =>\n if v.Host != \"(All)\" then\n tables |> filter(fn: (r) => r[\"host\"] == v.Host)\n else\n tables\n\nroleFilter = (tables=<-) =>\n if v.Role != \"(All)\" then\n tables |> filter(fn: (r) => r[\"role\"] == v.Role)\n else\n tables\n\nfrom(bucket: \"telegraf/so_long_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"mem\")\n |> filter(fn: (r) => r[\"_field\"] == \"used_percent\")\n |> hostFilter()\n |> roleFilter()\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)\n |> group(columns: [\"_field\", \"host\", \"role\"])\n |> yield(name: \"Trend\")"}],"staticLegend":{"colorizeRows":true,"opacity":1,"orientationThreshold":100000000,"widthRatio":1},"width":4,"widthRatio":1,"xCol":"_time","yCol":"_value","yPos":30},{"axes":[{"base":"10","name":"x","scale":"linear"},{"base":"10","name":"y","scale":"linear","suffix":"b/s"}],"colorizeRows":true,"colors":[{"id":"TtgHQAXNep94KBgtu48C_","name":"Cthulhu","type":"scale","hex":"#FDC44F"},{"id":"_IuzkORho_8QXTE6vMllv","name":"Cthulhu","type":"scale","hex":"#007C76"},{"id":"bUszW_YI_9oColDbLNQ-d","name":"Cthulhu","type":"scale","hex":"#8983FF"}],"geom":"line","height":4,"heightRatio":0.18482490272373542,"hoverDimension":"auto","kind":"Xy","legendColorizeRows":true,"legendOpacity":1,"legendOrientationThreshold":100000000,"name":"Monitor Interface Traffic - Inbound","opacity":1,"orientationThreshold":100000000,"position":"overlaid","queries":[{"query":"import \"join\"\n\nhostFilter = (tables=<-) =>\n if v.Host != \"(All)\" then\n tables |> filter(fn: (r) => r[\"host\"] == v.Host)\n else\n tables\n\nroleFilter = (tables=<-) =>\n if v.Role != \"(All)\" then\n tables |> filter(fn: (r) => r[\"role\"] == v.Role)\n else\n tables\n \nmanints = from(bucket: \"telegraf/so_short_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"node_config\")\n |> hostFilter()\n |> filter(fn: (r) => r[\"_field\"] == \"monint\")\n |> distinct()\n |> group(columns: [\"host\"])\n\ntraffic = from(bucket: \"telegraf/so_short_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"net\")\n |> filter(fn: (r) => r[\"_field\"] == \"bytes_recv\")\n |> hostFilter()\n |> roleFilter()\n |> derivative(unit: 1s, nonNegative: true, columns: [\"_value\"], timeColumn: \"_time\")\n |> map(fn: (r) => ({r with \"_value\": r._value * 8.0}))\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)\n |> group(columns: [\"host\"])\n\njoin.inner(left: traffic, right: manints,\n on: (l,r) => l.interface == r._value,\n as: (l, r) => ({l with _value: l._value, result: \"bytes_recv\"}))"},{"query":"import \"join\"\n\nhostFilter = (tables=<-) =>\n if v.Host != \"(All)\" then\n tables |> filter(fn: (r) => r[\"host\"] == v.Host)\n else\n tables\n\nroleFilter = (tables=<-) =>\n if v.Role != \"(All)\" then\n tables |> filter(fn: (r) => r[\"role\"] == v.Role)\n else\n tables\n \nmanints = from(bucket: \"telegraf/so_short_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"node_config\")\n |> hostFilter()\n |> filter(fn: (r) => r[\"_field\"] == \"monint\")\n |> distinct()\n |> group(columns: [\"host\"])\n\ntraffic = from(bucket: \"telegraf/so_long_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"net\")\n |> filter(fn: (r) => r[\"_field\"] == \"bytes_recv\")\n |> hostFilter()\n |> roleFilter()\n |> derivative(unit: 1s, nonNegative: true, columns: [\"_value\"], timeColumn: \"_time\")\n |> map(fn: (r) => ({r with \"_value\": r._value * 8.0}))\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)\n |> group(columns: [\"host\"])\n\njoin.inner(left: traffic, right: manints,\n on: (l,r) => l.interface == r._value,\n as: (l, r) => ({l with _value: l._value, result: \"Trend\"}))"}],"staticLegend":{"colorizeRows":true,"heightRatio":0.18482490272373542,"opacity":1,"orientationThreshold":100000000,"widthRatio":1},"width":4,"widthRatio":1,"xCol":"_time","yCol":"_value","yPos":34},{"axes":[{"base":"10","name":"x","scale":"linear"},{"base":"10","name":"y","scale":"linear","suffix":"b/s"}],"colorizeRows":true,"colors":[{"id":"TtgHQAXNep94KBgtu48C_","name":"Cthulhu","type":"scale","hex":"#FDC44F"},{"id":"_IuzkORho_8QXTE6vMllv","name":"Cthulhu","type":"scale","hex":"#007C76"},{"id":"bUszW_YI_9oColDbLNQ-d","name":"Cthulhu","type":"scale","hex":"#8983FF"}],"geom":"line","height":4,"heightRatio":0.18482490272373542,"hoverDimension":"auto","kind":"Xy","legendColorizeRows":true,"legendOpacity":1,"legendOrientationThreshold":100000000,"name":"Management Interface Traffic - Inbound","opacity":1,"orientationThreshold":100000000,"position":"overlaid","queries":[{"query":"import \"join\"\n\nhostFilter = (tables=<-) =>\n if v.Host != \"(All)\" then\n tables |> filter(fn: (r) => r[\"host\"] == v.Host)\n else\n tables\n\nroleFilter = (tables=<-) =>\n if v.Role != \"(All)\" then\n tables |> filter(fn: (r) => r[\"role\"] == v.Role)\n else\n tables\n\nmanints = from(bucket: \"telegraf/so_short_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"node_config\")\n |> hostFilter()\n |> filter(fn: (r) => r[\"_field\"] == \"manint\")\n |> distinct()\n |> group(columns: [\"host\"])\n\ntraffic = from(bucket: \"telegraf/so_short_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"net\")\n |> filter(fn: (r) => r[\"_field\"] == \"bytes_recv\")\n |> hostFilter()\n |> roleFilter()\n |> derivative(unit: 1s, nonNegative: true, columns: [\"_value\"], timeColumn: \"_time\")\n |> map(fn: (r) => ({r with \"_value\": r._value * 8.0}))\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)\n |> group(columns: [\"host\"])\n\njoin.inner(left: traffic, right: manints,\n on: (l,r) => l.interface == r._value,\n as: (l, r) => ({l with _value: l._value, result: \"bytes_recv\"}))"},{"query":"import \"join\"\n\nhostFilter = (tables=<-) =>\n if v.Host != \"(All)\" then\n tables |> filter(fn: (r) => r[\"host\"] == v.Host)\n else\n tables\n\nroleFilter = (tables=<-) =>\n if v.Role != \"(All)\" then\n tables |> filter(fn: (r) => r[\"role\"] == v.Role)\n else\n tables\n\nmanints = from(bucket: \"telegraf/so_short_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"node_config\")\n |> hostFilter()\n |> filter(fn: (r) => r[\"_field\"] == \"manint\")\n |> distinct()\n |> group(columns: [\"host\"])\n\ntraffic = from(bucket: \"telegraf/so_long_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"net\")\n |> filter(fn: (r) => r[\"_field\"] == \"bytes_recv\")\n |> hostFilter()\n |> roleFilter()\n |> derivative(unit: 1s, nonNegative: true, columns: [\"_value\"], timeColumn: \"_time\")\n |> map(fn: (r) => ({r with \"_value\": r._value * 8.0}))\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)\n |> group(columns: [\"host\"])\n\njoin.inner(left: traffic, right: manints,\n on: (l,r) => l.interface == r._value,\n as: (l, r) => ({l with _value: l._value, result: \"Trend\"}))"}],"staticLegend":{"colorizeRows":true,"heightRatio":0.18482490272373542,"opacity":1,"orientationThreshold":100000000,"widthRatio":1},"width":6,"widthRatio":1,"xCol":"_time","yCol":"_value","yPos":38},{"axes":[{"base":"10","name":"x","scale":"linear"},{"name":"y","scale":"linear","suffix":"%"}],"colorizeRows":true,"colors":[{"id":"TtgHQAXNep94KBgtu48C_","name":"Cthulhu","type":"scale","hex":"#FDC44F"},{"id":"_IuzkORho_8QXTE6vMllv","name":"Cthulhu","type":"scale","hex":"#007C76"},{"id":"bUszW_YI_9oColDbLNQ-d","name":"Cthulhu","type":"scale","hex":"#8983FF"}],"geom":"line","height":4,"hoverDimension":"auto","kind":"Xy","legendColorizeRows":true,"legendOpacity":1,"legendOrientationThreshold":100000000,"name":"Stenographer Packet Loss","opacity":1,"orientationThreshold":100000000,"position":"overlaid","queries":[{"query":"hostFilter = (tables=<-) =>\n if v.Host != \"(All)\" then\n tables |> filter(fn: (r) => r[\"host\"] == v.Host)\n else\n tables\n\nroleFilter = (tables=<-) =>\n if v.Role != \"(All)\" then\n tables |> filter(fn: (r) => r[\"role\"] == v.Role)\n else\n tables\n\nfrom(bucket: \"telegraf/so_short_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"stenodrop\")\n |> filter(fn: (r) => r[\"_field\"] == \"drop\")\n |> hostFilter()\n |> roleFilter()\n |> group(columns: [\"_field\", \"host\", \"role\"])\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)\n |> yield(name: \"mean\")"},{"query":"hostFilter = (tables=<-) =>\n if v.Host != \"(All)\" then\n tables |> filter(fn: (r) => r[\"host\"] == v.Host)\n else\n tables\n\nroleFilter = (tables=<-) =>\n if v.Role != \"(All)\" then\n tables |> filter(fn: (r) => r[\"role\"] == v.Role)\n else\n tables\n\nfrom(bucket: \"telegraf/so_long_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"stenodrop\")\n |> filter(fn: (r) => r[\"_field\"] == \"drop\")\n |> hostFilter()\n |> roleFilter()\n |> group(columns: [\"_field\", \"host\", \"role\"])\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)\n |> yield(name: \"Trend\")"}],"staticLegend":{"colorizeRows":true,"opacity":1,"orientationThreshold":100000000,"widthRatio":1},"width":3,"widthRatio":1,"xCol":"_time","yCol":"_value","yPos":42},{"axes":[{"base":"10","name":"x","scale":"linear"},{"name":"y","scale":"linear","suffix":"%"}],"colorizeRows":true,"colors":[{"id":"3PVw3hQuZUzyar7Js3mMH","name":"Ectoplasm","type":"scale","hex":"#DA6FF1"},{"id":"O34ux-D8Xq_1-eeWRyYYH","name":"Ectoplasm","type":"scale","hex":"#00717A"},{"id":"P04RoKOHBdLdvfrfFbn0F","name":"Ectoplasm","type":"scale","hex":"#ACFF76"}],"geom":"line","height":4,"hoverDimension":"auto","kind":"Xy","legendColorizeRows":true,"legendOpacity":1,"legendOrientationThreshold":100000000,"name":"Disk Usage /","opacity":1,"orientationThreshold":100000000,"position":"overlaid","queries":[{"query":"hostFilter = (tables=<-) =>\n if v.Host != \"(All)\" then\n tables |> filter(fn: (r) => r[\"host\"] == v.Host)\n else\n tables\n\nroleFilter = (tables=<-) =>\n if v.Role != \"(All)\" then\n tables |> filter(fn: (r) => r[\"role\"] == v.Role)\n else\n tables\n\nfrom(bucket: \"telegraf/so_short_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"disk\")\n |> filter(fn: (r) => r[\"_field\"] == \"used_percent\")\n |> filter(fn: (r) => r[\"path\"] == \"/\")\n |> hostFilter()\n |> roleFilter()\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)\n |> group(columns: [\"_field\", \"host\", \"role\"])\n |> yield(name: \"mean\")"},{"query":"hostFilter = (tables=<-) =>\n if v.Host != \"(All)\" then\n tables |> filter(fn: (r) => r[\"host\"] == v.Host)\n else\n tables\n\nroleFilter = (tables=<-) =>\n if v.Role != \"(All)\" then\n tables |> filter(fn: (r) => r[\"role\"] == v.Role)\n else\n tables\n\nfrom(bucket: \"telegraf/so_long_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"disk\")\n |> filter(fn: (r) => r[\"_field\"] == \"used_percent\")\n |> filter(fn: (r) => r[\"path\"] == \"/\")\n |> hostFilter()\n |> roleFilter()\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)\n |> group(columns: [\"_field\", \"host\", \"role\"])\n |> yield(name: \"Trend\")"}],"staticLegend":{"colorizeRows":true,"opacity":1,"orientationThreshold":100000000,"widthRatio":1},"width":4,"widthRatio":1,"xCol":"_time","yCol":"_value","yPos":46},{"colors":[{"id":"base","name":"laser","type":"text","hex":"#00C9FF"}],"decimalPlaces":1,"height":2,"kind":"Single_Stat","name":"5m Load Average","queries":[{"query":"hostFilter = (tables=<-) =>\n if v.Host != \"(All)\" then\n tables |> filter(fn: (r) => r[\"host\"] == v.Host)\n else\n tables\n\nfrom(bucket: \"telegraf/so_short_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"system\")\n |> filter(fn: (r) => r[\"_field\"] == \"load5\")\n |> hostFilter()\n |> group(columns: [\"host\"])\n |> last()\n |> aggregateWindow(every: v.windowPeriod, fn: mean)\n |> highestMax(n:1)"}],"staticLegend":{},"width":1,"xPos":1},{"colors":[{"id":"base","name":"laser","type":"text","hex":"#00C9FF"},{"id":"z83MTSufTrlrCoEPiBXda","name":"tiger","type":"text","hex":"#F48D38","value":1}],"decimalPlaces":0,"height":2,"kind":"Single_Stat","name":"Warning Alarms","queries":[{"query":"from(bucket: \"_monitoring\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"statuses\")\n |> filter(fn: (r) => r[\"_field\"] == \"_message\")\n |> group(columns: [\"_check_id\"])\n |> sort(columns: [\"_time\"])\n |> last()\n |> group()\n |> filter(fn: (r) => r[\"_level\"] == \"warn\")\n |> count()"}],"staticLegend":{},"suffix":" ","width":1,"xPos":1,"yPos":2},{"colors":[{"id":"base","name":"laser","type":"text","hex":"#00C9FF"}],"decimalPlaces":1,"height":2,"kind":"Single_Stat","name":"IO Wait","queries":[{"query":"hostFilter = (tables=<-) =>\n if v.Host != \"(All)\" then\n tables |> filter(fn: (r) => r[\"host\"] == v.Host)\n else\n tables\n \nfrom(bucket: \"telegraf/so_short_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"cpu\")\n |> filter(fn: (r) => r[\"cpu\"] == \"cpu-total\")\n |> filter(fn: (r) => r[\"_field\"] == \"usage_iowait\")\n |> hostFilter()\n |> group(columns: [\"host\"])\n |> last()\n |> aggregateWindow(every: v.windowPeriod, fn: mean)\n |> highestMax(n:1)"}],"staticLegend":{},"suffix":"%","width":1,"xPos":2},{"colors":[{"id":"base","name":"laser","type":"text","hex":"#00C9FF"},{"id":"z83MTSufTrlrCoEPiBXda","name":"laser","type":"text","hex":"#00C9FF","value":1}],"decimalPlaces":0,"height":2,"kind":"Single_Stat","name":"Informative Alarms","queries":[{"query":"from(bucket: \"_monitoring\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"statuses\")\n |> filter(fn: (r) => r[\"_field\"] == \"_message\")\n |> group(columns: [\"_check_id\"])\n |> sort(columns: [\"_time\"])\n |> last()\n |> group()\n |> filter(fn: (r) => r[\"_level\"] == \"info\")\n |> count()"}],"staticLegend":{},"suffix":" ","width":1,"xPos":2,"yPos":2},{"colors":[{"id":"base","name":"laser","type":"text","hex":"#00C9FF"}],"decimalPlaces":0,"height":2,"kind":"Single_Stat","name":"Estimated EPS In","queries":[{"query":"hostFilter = (tables=<-) =>\n if v.Host != \"(All)\" then\n tables |> filter(fn: (r) => r[\"host\"] == v.Host)\n else\n tables\n \nfrom(bucket: \"telegraf/so_short_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"logstash_events\")\n |> filter(fn: (r) => r[\"_field\"] == \"in\")\n |> hostFilter()\n |> derivative(unit: 1s, nonNegative: true, columns: [\"_value\"], timeColumn: \"_time\") \n |> group(columns: [\"host\"])\n |> last()\n |> aggregateWindow(every: v.windowPeriod, fn: mean)\n |> highestMax(n:1)"}],"staticLegend":{},"width":1,"xPos":3},{"colors":[{"id":"0","name":"viridian","type":"min","hex":"#32B08C"},{"id":"5IArg2lDb8KvnphywgUXa","name":"pineapple","type":"threshold","hex":"#FFB94A","value":70},{"id":"yFhH3mtavjuAZh6cEt5lx","name":"fire","type":"threshold","hex":"#DC4E58","value":80},{"id":"1","name":"ruby","type":"max","hex":"#BF3D5E","value":100}],"decimalPlaces":0,"height":4,"kind":"Gauge","name":"CPU Usage","queries":[{"query":"hostFilter = (tables=<-) =>\n if v.Host != \"(All)\" then\n tables |> filter(fn: (r) => r[\"host\"] == v.Host)\n else\n tables\n\nfrom(bucket: \"telegraf/so_short_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"cpu\")\n |> filter(fn: (r) => r[\"cpu\"] == \"cpu-total\")\n |> filter(fn: (r) => r[\"_field\"] == \"usage_idle\")\n |> hostFilter()\n |> group(columns: [\"host\"])\n |> last()\n |> highestMax(n: 1)\n |> map(fn: (r) => ({r with _value: r._value * -1.0 + 100.0}))\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)\n |> yield(name: \"mean\")"}],"staticLegend":{},"suffix":"%","tickSuffix":"%","width":3,"xPos":3,"yPos":2},{"colors":[{"id":"0","name":"viridian","type":"min","hex":"#32B08C"},{"id":"kOQLOg2H4FVEE-E1_L8Kq","name":"laser","type":"threshold","hex":"#00C9FF","value":85},{"id":"5IArg2lDb8KvnphywgUXa","name":"tiger","type":"threshold","hex":"#F48D38","value":90},{"id":"yFhH3mtavjuAZh6cEt5lx","name":"ruby","type":"threshold","hex":"#BF3D5E","value":95},{"id":"1","name":"ruby","type":"max","hex":"#BF3D5E","value":100}],"decimalPlaces":0,"height":4,"kind":"Gauge","name":"Root Disk Usage","queries":[{"query":"hostFilter = (tables=<-) =>\n if v.Host != \"(All)\" then\n tables |> filter(fn: (r) => r[\"host\"] == v.Host)\n else\n tables\n\nfrom(bucket: \"telegraf/so_short_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"disk\")\n |> filter(fn: (r) => r[\"path\"] == \"/\")\n |> filter(fn: (r) => r[\"_field\"] == \"used_percent\")\n |> hostFilter()\n |> group(columns: [\"host\"])\n |> last()\n |> highestMax(n: 1)\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)\n |> yield(name: \"mean\")"}],"staticLegend":{},"suffix":"%","tickSuffix":"%","width":3,"xPos":3,"yPos":6},{"axes":[{"base":"10","name":"x","scale":"linear"},{"name":"y","scale":"linear","suffix":"%"}],"colorizeRows":true,"colors":[{"id":"TtgHQAXNep94KBgtu48C_","name":"Cthulhu","type":"scale","hex":"#FDC44F"},{"id":"_IuzkORho_8QXTE6vMllv","name":"Cthulhu","type":"scale","hex":"#007C76"},{"id":"bUszW_YI_9oColDbLNQ-d","name":"Cthulhu","type":"scale","hex":"#8983FF"}],"geom":"line","height":4,"hoverDimension":"auto","kind":"Xy","legendColorizeRows":true,"legendOpacity":1,"legendOrientationThreshold":100000000,"name":"Suricata Packet Loss","opacity":1,"orientationThreshold":100000000,"position":"overlaid","queries":[{"query":"hostFilter = (tables=<-) =>\n if v.Host != \"(All)\" then\n tables |> filter(fn: (r) => r[\"host\"] == v.Host)\n else\n tables\n\nroleFilter = (tables=<-) =>\n if v.Role != \"(All)\" then\n tables |> filter(fn: (r) => r[\"role\"] == v.Role)\n else\n tables\n\nfrom(bucket: \"telegraf/so_short_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"suridrop\")\n |> filter(fn: (r) => r[\"_field\"] == \"drop\")\n |> hostFilter()\n |> roleFilter()\n |> map(fn: (r) => ({r with _value: r._value * 100.0}))\n |> group(columns: [\"_field\", \"host\", \"role\"])\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)\n |> yield(name: \"mean\")"},{"query":"hostFilter = (tables=<-) =>\n if v.Host != \"(All)\" then\n tables |> filter(fn: (r) => r[\"host\"] == v.Host)\n else\n tables\n\nroleFilter = (tables=<-) =>\n if v.Role != \"(All)\" then\n tables |> filter(fn: (r) => r[\"role\"] == v.Role)\n else\n tables\n\nfrom(bucket: \"telegraf/so_long_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"suridrop\")\n |> filter(fn: (r) => r[\"_field\"] == \"drop\")\n |> hostFilter()\n |> roleFilter()\n |> map(fn: (r) => ({r with _value: r._value * 100.0}))\n |> group(columns: [\"_field\", \"host\", \"role\"])\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)\n |> yield(name: \"Trend\")"}],"staticLegend":{"colorizeRows":true,"opacity":1,"orientationThreshold":100000000,"widthRatio":1},"width":3,"widthRatio":1,"xCol":"_time","xPos":3,"yCol":"_value","yPos":42},{"colors":[{"id":"base","name":"laser","type":"text","hex":"#00C9FF"}],"decimalPlaces":0,"height":2,"kind":"Single_Stat","name":"Redis Queue","queries":[{"query":"from(bucket: \"telegraf/so_short_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"redisqueue\")\n |> filter(fn: (r) => r[\"_field\"] == \"unparsed\")\n |> group(columns: [\"host\"])\n |> last()\n |> aggregateWindow(every: v.windowPeriod, fn: mean)\n |> highestMax(n:1)"}],"staticLegend":{},"width":1,"xPos":4},{"axes":[{"base":"10","name":"x","scale":"linear"},{"base":"10","name":"y","scale":"linear"}],"colorizeRows":true,"colors":[{"id":"xflqbsX-j3iq4ry5QOntK","name":"Do Androids Dream of Electric Sheep?","type":"scale","hex":"#8F8AF4"},{"id":"5H28HcITm6QVfQsXon0vq","name":"Do Androids Dream of Electric Sheep?","type":"scale","hex":"#A51414"},{"id":"25MrINwurNBkQqeKCkMPg","name":"Do Androids Dream of Electric Sheep?","type":"scale","hex":"#F4CF31"}],"geom":"line","height":4,"hoverDimension":"auto","kind":"Xy","legendColorizeRows":true,"legendOpacity":1,"legendOrientationThreshold":100000000,"name":"Elasticsearch Document Count","opacity":1,"orientationThreshold":100000000,"position":"overlaid","queries":[{"query":"import \"join\"\n\nhostFilter = (tables=<-) =>\n if v.Host != \"(All)\" then\n tables |> filter(fn: (r) => r[\"host\"] == v.Host)\n else\n tables\n \nfrom(bucket: \"telegraf/so_short_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"elasticsearch_indices\")\n |> filter(fn: (r) => r[\"_field\"] == \"docs_count\")\n |> filter(fn: (r) => r[\"host\"] == r[\"node_name\"])\n |> hostFilter()\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)\n |> group(columns: [\"_field\",\"host\"])\n |> yield(name: \"mean\")"},{"query":"import \"join\"\n\nhostFilter = (tables=<-) =>\n if v.Host != \"(All)\" then\n tables |> filter(fn: (r) => r[\"host\"] == v.Host)\n else\n tables\n\nfrom(bucket: \"telegraf/so_long_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"elasticsearch_indices\")\n |> filter(fn: (r) => r[\"_field\"] == \"docs_count\")\n |> filter(fn: (r) => r[\"host\"] == r[\"node_name\"])\n |> hostFilter()\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)\n |> group(columns: [\"_field\",\"host\"])\n |> yield(name: \"Trend\")"}],"staticLegend":{"colorizeRows":true,"opacity":1,"orientationThreshold":100000000,"widthRatio":1},"width":4,"widthRatio":1,"xCol":"_time","xPos":4,"yCol":"_value","yPos":10},{"axes":[{"base":"10","name":"x","scale":"linear"},{"base":"10","name":"y","scale":"linear"}],"colorizeRows":true,"colors":[{"id":"xflqbsX-j3iq4ry5QOntK","name":"Do Androids Dream of Electric Sheep?","type":"scale","hex":"#8F8AF4"},{"id":"5H28HcITm6QVfQsXon0vq","name":"Do Androids Dream of Electric Sheep?","type":"scale","hex":"#A51414"},{"id":"25MrINwurNBkQqeKCkMPg","name":"Do Androids Dream of Electric Sheep?","type":"scale","hex":"#F4CF31"}],"geom":"line","height":4,"heightRatio":0.301556420233463,"hoverDimension":"auto","kind":"Xy","legendColorizeRows":true,"legendOpacity":1,"legendOrientationThreshold":100000000,"name":"Redis Queue","opacity":1,"orientationThreshold":100000000,"position":"overlaid","queries":[{"query":"from(bucket: \"telegraf/so_short_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"redisqueue\")\n |> filter(fn: (r) => r[\"_field\"] == \"unparsed\")\n |> group(columns: [\"host\", \"_field\"])\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)\n |> yield(name: \"mean\")"},{"query":"from(bucket: \"telegraf/so_long_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"redisqueue\")\n |> filter(fn: (r) => r[\"_field\"] == \"unparsed\")\n |> group(columns: [\"host\", \"_field\"])\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)\n |> yield(name: \"Trend\")"}],"staticLegend":{"colorizeRows":true,"heightRatio":0.301556420233463,"opacity":1,"orientationThreshold":100000000,"widthRatio":1},"width":4,"widthRatio":1,"xCol":"_time","xPos":4,"yCol":"_value","yPos":14},{"axes":[{"base":"10","name":"x","scale":"linear"},{"name":"y","scale":"linear","suffix":" days"}],"colorizeRows":true,"colors":[{"id":"sW2GqpGAsGB5Adx16jKjp","name":"Nineteen Eighty Four","type":"scale","hex":"#31C0F6"},{"id":"TsdXuXwdI5Npi9S8L4f-i","name":"Nineteen Eighty Four","type":"scale","hex":"#A500A5"},{"id":"OGL29-SUbJ6FyQb0JzbaD","name":"Nineteen Eighty Four","type":"scale","hex":"#FF7E27"}],"geom":"line","height":4,"hoverDimension":"auto","kind":"Xy","legendColorizeRows":true,"legendOpacity":1,"legendOrientationThreshold":100000000,"name":"Container Uptime","opacity":1,"orientationThreshold":100000000,"position":"overlaid","queries":[{"query":"containerFilter = (tables=<-) =>\n if v.Container != \"(All)\" then\n tables |> filter(fn: (r) => r[\"container_name\"] == v.Container)\n else\n tables\n\nhostFilter = (tables=<-) =>\n if v.Host != \"(All)\" then\n tables |> filter(fn: (r) => r[\"host\"] == v.Host)\n else\n tables\n\nroleFilter = (tables=<-) =>\n if v.Role != \"(All)\" then\n tables |> filter(fn: (r) => r[\"role\"] == v.Role)\n else\n tables\n\nfrom(bucket: \"telegraf/so_short_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"docker_container_status\")\n |> filter(fn: (r) => r[\"_field\"] == \"uptime_ns\")\n |> filter(fn: (r) => r[\"container_status\"] == \"running\")\n |> hostFilter()\n |> roleFilter()\n |> containerFilter()\n |> group(columns: [\"host\", \"role\", \"container_name\"])\n |> sort(columns: [\"_time\"])\n |> map(fn: (r) => ({r with _value: float(v: r._value) / float(v: 24 * 60 * 60 * 1000000000)}))\n |> yield(name: \"last\")"},{"query":"containerFilter = (tables=<-) =>\n if v.Container != \"(All)\" then\n tables |> filter(fn: (r) => r[\"container_name\"] == v.Container)\n else\n tables\n\nhostFilter = (tables=<-) =>\n if v.Host != \"(All)\" then\n tables |> filter(fn: (r) => r[\"host\"] == v.Host)\n else\n tables\n\nroleFilter = (tables=<-) =>\n if v.Role != \"(All)\" then\n tables |> filter(fn: (r) => r[\"role\"] == v.Role)\n else\n tables\n\nfrom(bucket: \"telegraf/so_long_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"docker_container_status\")\n |> filter(fn: (r) => r[\"_field\"] == \"uptime_ns\")\n |> filter(fn: (r) => r[\"container_status\"] == \"running\")\n |> hostFilter()\n |> roleFilter()\n |> containerFilter()\n |> group(columns: [\"host\", \"role\", \"container_name\"])\n |> sort(columns: [\"_time\"])\n |> map(fn: (r) => ({r with _value: float(v: r._value) / float(v: 24.0 * 60.0 * 60.0 * 1000000000.0)}))\n |> yield(name: \"Trend\")"}],"staticLegend":{"colorizeRows":true,"opacity":1,"orientationThreshold":100000000,"widthRatio":1},"width":4,"widthRatio":1,"xCol":"_time","xPos":4,"yCol":"_value","yPos":18},{"axes":[{"base":"10","name":"x","scale":"linear"},{"base":"10","name":"y","scale":"linear"}],"colorizeRows":true,"colors":[{"id":"base","name":"laser","type":"text","hex":"#00C9FF"}],"decimalPlaces":0,"height":2,"hoverDimension":"auto","kind":"Single_Stat_Plus_Line","legendColorizeRows":true,"legendOpacity":1,"legendOrientationThreshold":100000000,"name":"Kafka Active Controllers","opacity":1,"orientationThreshold":100000000,"position":"overlaid","queries":[{"query":"from(bucket: \"telegraf/so_short_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"kafka_controller\")\n |> filter(fn: (r) => r[\"_field\"] == \"ActiveControllerCount.Value\")\n |> aggregateWindow(every: v.windowPeriod, fn: last, createEmpty: false)\n |> group(columns: [\"_field\", \"host\", \"role\"])\n |> yield(name: \"current\")"},{"query":"from(bucket: \"telegraf/so_long_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"kafka_controller\")\n |> filter(fn: (r) => r[\"_field\"] == \"ActiveControllerCount.Value\")\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)\n |> group(columns: [\"_field\", \"host\", \"role\"])\n |> yield(name: \"trend\")"}],"staticLegend":{"colorizeRows":true,"opacity":1,"orientationThreshold":100000000,"widthRatio":1},"width":4,"widthRatio":1,"xCol":"_time","xPos":4,"yCol":"_value","yPos":22},{"axes":[{"base":"10","name":"x","scale":"linear"},{"base":"10","name":"y","scale":"linear"}],"colorizeRows":true,"colors":[{"id":"base","name":"laser","type":"text","hex":"#00C9FF"}],"decimalPlaces":0,"height":2,"hoverDimension":"auto","kind":"Single_Stat_Plus_Line","legendColorizeRows":true,"legendOpacity":1,"legendOrientationThreshold":100000000,"name":"Kafka Active Brokers","opacity":1,"orientationThreshold":100000000,"position":"overlaid","queries":[{"query":"from(bucket: \"telegraf/so_long_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"kafka_controller\")\n |> filter(fn: (r) => r[\"_field\"] == \"ActiveBrokerCount.Value\")\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)\n |> group(columns: [\"_field\", \"host\", \"role\"])\n |> yield(name: \"trend\")"},{"query":"from(bucket: \"telegraf/so_short_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"kafka_controller\")\n |> filter(fn: (r) => r[\"_field\"] == \"ActiveBrokerCount.Value\")\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)\n |> group(columns: [\"_field\", \"host\", \"role\"])\n |> yield(name: \"current\")"}],"staticLegend":{"colorizeRows":true,"opacity":1,"orientationThreshold":100000000,"widthRatio":1},"width":4,"widthRatio":1,"xCol":"_time","xPos":4,"yCol":"_value","yPos":24},{"axes":[{"base":"10","name":"x","scale":"linear"},{"name":"y","scale":"linear","suffix":"%"}],"colorizeRows":true,"colors":[{"id":"yT5vTIlaaFChSrQvKLfqf","name":"Nineteen Eighty Four","type":"scale","hex":"#31C0F6"},{"id":"mzzUVSu3ibTph1JmQmDAQ","name":"Nineteen Eighty Four","type":"scale","hex":"#A500A5"},{"id":"mOcnDo7l8ii6qNLFIB5rs","name":"Nineteen Eighty Four","type":"scale","hex":"#FF7E27"}],"geom":"line","height":4,"hoverDimension":"auto","kind":"Xy","legendColorizeRows":true,"legendOpacity":1,"legendOrientationThreshold":100000000,"name":"Container CPU Usage","opacity":1,"orientationThreshold":100000000,"position":"overlaid","queries":[{"query":"containerFilter = (tables=<-) =>\n if v.Container != \"(All)\" then\n tables |> filter(fn: (r) => r[\"container_name\"] == v.Container)\n else\n tables\n\nhostFilter = (tables=<-) =>\n if v.Host != \"(All)\" then\n tables |> filter(fn: (r) => r[\"host\"] == v.Host)\n else\n tables\n\nroleFilter = (tables=<-) =>\n if v.Role != \"(All)\" then\n tables |> filter(fn: (r) => r[\"role\"] == v.Role)\n else\n tables\n\nfrom(bucket: \"telegraf/so_short_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"docker_container_cpu\")\n |> filter(fn: (r) => r[\"_field\"] == \"usage_percent\")\n |> filter(fn: (r) => r[\"container_status\"] == \"running\")\n |> hostFilter()\n |> roleFilter()\n |> containerFilter()\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)\n |> group(columns: [\"_field\", \"host\", \"role\", \"container_name\"])\n |> sort(columns: [\"_time\"])\n |> yield(name: \"mean\")"},{"query":"containerFilter = (tables=<-) =>\n if v.Container != \"(All)\" then\n tables |> filter(fn: (r) => r[\"container_name\"] == v.Container)\n else\n tables\n\nhostFilter = (tables=<-) =>\n if v.Host != \"(All)\" then\n tables |> filter(fn: (r) => r[\"host\"] == v.Host)\n else\n tables\n\nroleFilter = (tables=<-) =>\n if v.Role != \"(All)\" then\n tables |> filter(fn: (r) => r[\"role\"] == v.Role)\n else\n tables\n\nfrom(bucket: \"telegraf/so_long_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"docker_container_cpu\")\n |> filter(fn: (r) => r[\"_field\"] == \"usage_percent\")\n |> filter(fn: (r) => r[\"container_status\"] == \"running\")\n |> hostFilter()\n |> roleFilter()\n |> containerFilter()\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)\n |> group(columns: [\"_field\", \"host\", \"role\", \"container_name\"])\n |> sort(columns: [\"_time\"])\n |> yield(name: \"Trend\")"}],"staticLegend":{"colorizeRows":true,"opacity":1,"orientationThreshold":100000000,"widthRatio":1},"width":4,"widthRatio":1,"xCol":"_time","xPos":4,"yCol":"_value","yPos":26},{"axes":[{"base":"10","name":"x","scale":"linear"},{"name":"y","scale":"linear","suffix":"%"}],"colorizeRows":true,"colors":[{"id":"QDwChKZWuQV0BaJcEeSam","name":"Atlantis","type":"scale","hex":"#74D495"},{"id":"ThD0WTqKHltQEVlq9mo6K","name":"Atlantis","type":"scale","hex":"#3F3FBA"},{"id":"FBHYZiwDLKyQK3eRfUD-0","name":"Atlantis","type":"scale","hex":"#FF4D9E"}],"geom":"line","height":4,"hoverDimension":"auto","kind":"Xy","legendColorizeRows":true,"legendOpacity":1,"legendOrientationThreshold":100000000,"name":"Container Memory Usage","opacity":1,"orientationThreshold":100000000,"position":"overlaid","queries":[{"query":"containerFilter = (tables=<-) =>\n if v.Container != \"(All)\" then\n tables |> filter(fn: (r) => r[\"container_name\"] == v.Container)\n else\n tables\n\nhostFilter = (tables=<-) =>\n if v.Host != \"(All)\" then\n tables |> filter(fn: (r) => r[\"host\"] == v.Host)\n else\n tables\n\nroleFilter = (tables=<-) =>\n if v.Role != \"(All)\" then\n tables |> filter(fn: (r) => r[\"role\"] == v.Role)\n else\n tables\n\nfrom(bucket: \"telegraf/so_short_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"docker_container_mem\")\n |> filter(fn: (r) => r[\"_field\"] == \"usage_percent\")\n |> filter(fn: (r) => r[\"container_status\"] == \"running\")\n |> hostFilter()\n |> roleFilter()\n |> containerFilter()\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)\n |> group(columns: [\"_field\", \"host\", \"role\", \"container_name\"])\n |> sort(columns: [\"_time\"])\n |> yield(name: \"mean\")"},{"query":"containerFilter = (tables=<-) =>\n if v.Container != \"(All)\" then\n tables |> filter(fn: (r) => r[\"container_name\"] == v.Container)\n else\n tables\n\nhostFilter = (tables=<-) =>\n if v.Host != \"(All)\" then\n tables |> filter(fn: (r) => r[\"host\"] == v.Host)\n else\n tables\n\nroleFilter = (tables=<-) =>\n if v.Role != \"(All)\" then\n tables |> filter(fn: (r) => r[\"role\"] == v.Role)\n else\n tables\n\nfrom(bucket: \"telegraf/so_long_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"docker_container_mem\")\n |> filter(fn: (r) => r[\"_field\"] == \"usage_percent\")\n |> filter(fn: (r) => r[\"container_status\"] == \"running\")\n |> hostFilter()\n |> roleFilter()\n |> containerFilter()\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)\n |> group(columns: [\"_field\", \"host\", \"role\", \"container_name\"])\n |> sort(columns: [\"_time\"])\n |> yield(name: \"Trend\")"}],"staticLegend":{"colorizeRows":true,"opacity":1,"orientationThreshold":100000000,"widthRatio":1},"width":4,"widthRatio":1,"xCol":"_time","xPos":4,"yCol":"_value","yPos":30},{"axes":[{"base":"10","name":"x","scale":"linear"},{"base":"10","name":"y","scale":"linear","suffix":"b"}],"colorizeRows":true,"colors":[{"id":"0ynR6Zs0wuQ3WY0Lz-_KC","name":"Cthulhu","type":"scale","hex":"#FDC44F"},{"id":"YiArehCNBwFm9mn8DSXSG","name":"Cthulhu","type":"scale","hex":"#007C76"},{"id":"DxByY_EQW9Xs2jD5ktkG5","name":"Cthulhu","type":"scale","hex":"#8983FF"}],"geom":"line","height":4,"hoverDimension":"auto","kind":"Xy","legendColorizeRows":true,"legendOpacity":1,"legendOrientationThreshold":100000000,"name":"Container Traffic - Inbound","opacity":1,"orientationThreshold":100000000,"position":"overlaid","queries":[{"query":"containerFilter = (tables=<-) =>\n if v.Container != \"(All)\" then\n tables |> filter(fn: (r) => r[\"container_name\"] == v.Container)\n else\n tables\n\nhostFilter = (tables=<-) =>\n if v.Host != \"(All)\" then\n tables |> filter(fn: (r) => r[\"host\"] == v.Host)\n else\n tables\n\nroleFilter = (tables=<-) =>\n if v.Role != \"(All)\" then\n tables |> filter(fn: (r) => r[\"role\"] == v.Role)\n else\n tables\n\nfrom(bucket: \"telegraf/so_short_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"docker_container_net\")\n |> filter(fn: (r) => r[\"_field\"] == \"rx_bytes\")\n |> hostFilter()\n |> roleFilter()\n |> containerFilter()\n |> derivative(unit: 1s, nonNegative: true, columns: [\"_value\"], timeColumn: \"_time\") \n |> map(fn: (r) => ({r with _value: r._value * 8.0}))\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)\n |> group(columns: [\"_field\", \"host\", \"role\", \"container_name\"])\n |> sort(columns: [\"_time\"])\n |> yield(name: \"mean\")"},{"query":"containerFilter = (tables=<-) =>\n if v.Container != \"(All)\" then\n tables |> filter(fn: (r) => r[\"container_name\"] == v.Container)\n else\n tables\n\nhostFilter = (tables=<-) =>\n if v.Host != \"(All)\" then\n tables |> filter(fn: (r) => r[\"host\"] == v.Host)\n else\n tables\n\nroleFilter = (tables=<-) =>\n if v.Role != \"(All)\" then\n tables |> filter(fn: (r) => r[\"role\"] == v.Role)\n else\n tables\n\nfrom(bucket: \"telegraf/so_long_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"docker_container_net\")\n |> filter(fn: (r) => r[\"_field\"] == \"rx_bytes\")\n |> hostFilter()\n |> roleFilter()\n |> containerFilter()\n |> derivative(unit: 1s, nonNegative: true, columns: [\"_value\"], timeColumn: \"_time\")\n |> map(fn: (r) => ({r with _value: r._value * 8.0}))\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)\n |> group(columns: [\"_field\", \"host\", \"role\", \"container_name\"])\n |> sort(columns: [\"_time\"])\n |> yield(name: \"Trend\")"}],"staticLegend":{"colorizeRows":true,"opacity":1,"orientationThreshold":100000000,"widthRatio":1},"width":4,"widthRatio":1,"xCol":"_time","xPos":4,"yCol":"_value","yPos":34},{"axes":[{"base":"10","name":"x","scale":"linear"},{"name":"y","scale":"linear","suffix":"%"}],"colorizeRows":true,"colors":[{"id":"3PVw3hQuZUzyar7Js3mMH","name":"Ectoplasm","type":"scale","hex":"#DA6FF1"},{"id":"O34ux-D8Xq_1-eeWRyYYH","name":"Ectoplasm","type":"scale","hex":"#00717A"},{"id":"P04RoKOHBdLdvfrfFbn0F","name":"Ectoplasm","type":"scale","hex":"#ACFF76"}],"geom":"line","height":4,"hoverDimension":"auto","kind":"Xy","legendColorizeRows":true,"legendOpacity":1,"legendOrientationThreshold":100000000,"name":"Disk Usage /nsm","opacity":1,"orientationThreshold":100000000,"position":"overlaid","queries":[{"query":"hostFilter = (tables=<-) =>\n if v.Host != \"(All)\" then\n tables |> filter(fn: (r) => r[\"host\"] == v.Host)\n else\n tables\n\nroleFilter = (tables=<-) =>\n if v.Role != \"(All)\" then\n tables |> filter(fn: (r) => r[\"role\"] == v.Role)\n else\n tables\n\nfrom(bucket: \"telegraf/so_short_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"disk\")\n |> filter(fn: (r) => r[\"_field\"] == \"used_percent\")\n |> filter(fn: (r) => r[\"path\"] == \"/nsm\")\n |> hostFilter()\n |> roleFilter()\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)\n |> group(columns: [\"_field\", \"host\", \"role\"])\n |> yield(name: \"mean\")"},{"query":"hostFilter = (tables=<-) =>\n if v.Host != \"(All)\" then\n tables |> filter(fn: (r) => r[\"host\"] == v.Host)\n else\n tables\n\nroleFilter = (tables=<-) =>\n if v.Role != \"(All)\" then\n tables |> filter(fn: (r) => r[\"role\"] == v.Role)\n else\n tables\n\nfrom(bucket: \"telegraf/so_long_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"disk\")\n |> filter(fn: (r) => r[\"_field\"] == \"used_percent\")\n |> filter(fn: (r) => r[\"path\"] == \"/nsm\")\n |> hostFilter()\n |> roleFilter()\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)\n |> group(columns: [\"_field\", \"host\", \"role\"])\n |> yield(name: \"Trend\")"}],"staticLegend":{"colorizeRows":true,"opacity":1,"orientationThreshold":100000000,"widthRatio":1},"width":4,"widthRatio":1,"xPos":4,"yPos":46},{"colors":[{"id":"base","name":"laser","type":"text","hex":"#00C9FF"}],"decimalPlaces":1,"height":2,"kind":"Single_Stat","name":"Inbound Traffic","queries":[{"query":"from(bucket: \"telegraf/so_short_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"net\")\n |> filter(fn: (r) => r[\"_field\"] == \"bytes_recv\") \n |> derivative(unit: 1s, nonNegative: true, columns: [\"_value\"], timeColumn: \"_time\") \n |> map(fn: (r) => ({r with _value: r._value * 8.0 / (1000.0 * 1000.0)}))\n |> group(columns: [\"host\"])\n |> aggregateWindow(every: v.windowPeriod, fn: mean)\n |> last()\n |> highestMax(n:1)"}],"staticLegend":{},"suffix":" Mb/s","width":1,"xPos":5},{"colors":[{"id":"base","name":"laser","type":"text","hex":"#00C9FF"}],"decimalPlaces":1,"height":2,"kind":"Single_Stat","name":"Inbound Drops","queries":[{"query":"from(bucket: \"telegraf/so_short_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"net\")\n |> filter(fn: (r) => r[\"_field\"] == \"drop_in\") \n |> derivative(unit: 1s, nonNegative: true, columns: [\"_value\"], timeColumn: \"_time\") \n |> map(fn: (r) => ({r with _value: r._value * 8.0 / (1000.0 * 1000.0)}))\n |> group(columns: [\"host\"])\n |> aggregateWindow(every: v.windowPeriod, fn: mean)\n |> last()\n |> highestMax(n:1)"}],"staticLegend":{},"suffix":" Mb/s","width":1,"xPos":6},{"colors":[{"id":"0","name":"viridian","type":"min","hex":"#32B08C"},{"id":"5IArg2lDb8KvnphywgUXa","name":"pineapple","type":"threshold","hex":"#FFB94A","value":70},{"id":"yFhH3mtavjuAZh6cEt5lx","name":"fire","type":"threshold","hex":"#DC4E58","value":80},{"id":"1","name":"ruby","type":"max","hex":"#BF3D5E","value":100}],"decimalPlaces":0,"height":4,"kind":"Gauge","name":"Memory Usage","queries":[{"query":"hostFilter = (tables=<-) =>\n if v.Host != \"(All)\" then\n tables |> filter(fn: (r) => r[\"host\"] == v.Host)\n else\n tables\n\nfrom(bucket: \"telegraf/so_short_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"mem\")\n |> filter(fn: (r) => r[\"_field\"] == \"used_percent\")\n |> hostFilter()\n |> group(columns: [\"host\"])\n |> last()\n |> highestMax(n: 1)\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)\n |> yield(name: \"mean\")"}],"staticLegend":{},"suffix":"%","tickSuffix":"%","width":3,"xPos":6,"yPos":2},{"colors":[{"id":"0","name":"viridian","type":"min","hex":"#32B08C"},{"id":"5IArg2lDb8KvnphywgUXa","name":"laser","type":"threshold","hex":"#00C9FF","value":85},{"id":"yFhH3mtavjuAZh6cEt5lx","name":"tiger","type":"threshold","hex":"#F48D38","value":90},{"id":"H7uprvKmMEh39en6X-ms_","name":"ruby","type":"threshold","hex":"#BF3D5E","value":95},{"id":"1","name":"ruby","type":"max","hex":"#BF3D5E","value":100}],"decimalPlaces":0,"height":4,"kind":"Gauge","name":"NSM Disk Usage","queries":[{"query":"hostFilter = (tables=<-) =>\n if v.Host != \"(All)\" then\n tables |> filter(fn: (r) => r[\"host\"] == v.Host)\n else\n tables\n\nfrom(bucket: \"telegraf/so_short_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"disk\")\n |> filter(fn: (r) => r[\"path\"] == \"/nsm\")\n |> filter(fn: (r) => r[\"_field\"] == \"used_percent\")\n |> hostFilter()\n |> group(columns: [\"host\"])\n |> last()\n |> highestMax(n: 1)\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)\n |> yield(name: \"mean\")"}],"staticLegend":{},"suffix":"%","tickSuffix":"%","width":3,"xPos":6,"yPos":6},{"axes":[{"base":"10","name":"x","scale":"linear"},{"base":"10","name":"y","scale":"linear","suffix":"b/s"}],"colorizeRows":true,"colors":[{"id":"TtgHQAXNep94KBgtu48C_","name":"Cthulhu","type":"scale","hex":"#FDC44F"},{"id":"_IuzkORho_8QXTE6vMllv","name":"Cthulhu","type":"scale","hex":"#007C76"},{"id":"bUszW_YI_9oColDbLNQ-d","name":"Cthulhu","type":"scale","hex":"#8983FF"}],"geom":"line","height":4,"heightRatio":0.18482490272373542,"hoverDimension":"auto","kind":"Xy","legendColorizeRows":true,"legendOpacity":1,"legendOrientationThreshold":100000000,"name":"Management Interface Traffic - Outbound","opacity":1,"orientationThreshold":100000000,"position":"overlaid","queries":[{"query":"import \"join\"\n\nhostFilter = (tables=<-) =>\n if v.Host != \"(All)\" then\n tables |> filter(fn: (r) => r[\"host\"] == v.Host)\n else\n tables\n\nroleFilter = (tables=<-) =>\n if v.Role != \"(All)\" then\n tables |> filter(fn: (r) => r[\"role\"] == v.Role)\n else\n tables\n \nmanints = from(bucket: \"telegraf/so_short_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"node_config\")\n |> hostFilter()\n |> filter(fn: (r) => r[\"_field\"] == \"manint\")\n |> distinct()\n |> group(columns: [\"host\"])\n\ntraffic = from(bucket: \"telegraf/so_short_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"net\")\n |> filter(fn: (r) => r[\"_field\"] == \"bytes_sent\")\n |> hostFilter()\n |> roleFilter()\n |> derivative(unit: 1s, nonNegative: true, columns: [\"_value\"], timeColumn: \"_time\")\n |> map(fn: (r) => ({r with \"_value\": r._value * 8.0}))\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)\n |> group(columns: [\"host\"])\n \n\njoin.inner(left: traffic, right: manints,\n on: (l,r) => l.interface == r._value,\n as: (l, r) => ({l with _value: l._value, result: \"bytes_sent\"}))"},{"query":"import \"join\"\n\nhostFilter = (tables=<-) =>\n if v.Host != \"(All)\" then\n tables |> filter(fn: (r) => r[\"host\"] == v.Host)\n else\n tables\n\nroleFilter = (tables=<-) =>\n if v.Role != \"(All)\" then\n tables |> filter(fn: (r) => r[\"role\"] == v.Role)\n else\n tables\n \nmanints = from(bucket: \"telegraf/so_short_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"node_config\")\n |> hostFilter()\n |> filter(fn: (r) => r[\"_field\"] == \"manint\")\n |> distinct()\n |> group(columns: [\"host\"])\n\ntraffic = from(bucket: \"telegraf/so_long_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"net\")\n |> filter(fn: (r) => r[\"_field\"] == \"bytes_sent\")\n |> hostFilter()\n |> roleFilter()\n |> derivative(unit: 1s, nonNegative: true, columns: [\"_value\"], timeColumn: \"_time\")\n |> map(fn: (r) => ({r with \"_value\": r._value * 8.0}))\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)\n |> group(columns: [\"host\"])\n \n\njoin.inner(left: traffic, right: manints,\n on: (l,r) => l.interface == r._value,\n as: (l, r) => ({l with _value: l._value, result: \"Trend\"}))"}],"staticLegend":{"colorizeRows":true,"heightRatio":0.18482490272373542,"opacity":1,"orientationThreshold":100000000,"widthRatio":1},"width":6,"widthRatio":1,"xCol":"_time","xPos":6,"yCol":"_value","yPos":38},{"axes":[{"base":"10","name":"x","scale":"linear"},{"name":"y","scale":"linear","suffix":"%"}],"colorizeRows":true,"colors":[{"id":"TtgHQAXNep94KBgtu48C_","name":"Cthulhu","type":"scale","hex":"#FDC44F"},{"id":"_IuzkORho_8QXTE6vMllv","name":"Cthulhu","type":"scale","hex":"#007C76"},{"id":"bUszW_YI_9oColDbLNQ-d","name":"Cthulhu","type":"scale","hex":"#8983FF"}],"geom":"line","height":4,"hoverDimension":"auto","kind":"Xy","legendColorizeRows":true,"legendOpacity":1,"legendOrientationThreshold":100000000,"name":"Zeek Packet Loss","opacity":1,"orientationThreshold":100000000,"position":"overlaid","queries":[{"query":"hostFilter = (tables=<-) =>\n if v.Host != \"(All)\" then\n tables |> filter(fn: (r) => r[\"host\"] == v.Host)\n else\n tables\n\nroleFilter = (tables=<-) =>\n if v.Role != \"(All)\" then\n tables |> filter(fn: (r) => r[\"role\"] == v.Role)\n else\n tables\n\nfrom(bucket: \"telegraf/so_short_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"zeekdrop\")\n |> filter(fn: (r) => r[\"_field\"] == \"drop\")\n |> hostFilter()\n |> roleFilter()\n |> map(fn: (r) => ({r with _value: r._value * 100.0}))\n |> group(columns: [\"_field\", \"host\", \"role\"])\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)\n |> yield(name: \"mean\")"},{"query":"hostFilter = (tables=<-) =>\n if v.Host != \"(All)\" then\n tables |> filter(fn: (r) => r[\"host\"] == v.Host)\n else\n tables\n\nroleFilter = (tables=<-) =>\n if v.Role != \"(All)\" then\n tables |> filter(fn: (r) => r[\"role\"] == v.Role)\n else\n tables\n\nfrom(bucket: \"telegraf/so_long_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"zeekdrop\")\n |> filter(fn: (r) => r[\"_field\"] == \"drop\")\n |> hostFilter()\n |> roleFilter()\n |> map(fn: (r) => ({r with _value: r._value * 100.0}))\n |> group(columns: [\"_field\", \"host\", \"role\"])\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)\n |> yield(name: \"Trend\")"}],"staticLegend":{"colorizeRows":true,"opacity":1,"orientationThreshold":100000000,"widthRatio":1},"width":3,"widthRatio":1,"xCol":"_time","xPos":6,"yCol":"_value","yPos":42},{"colors":[{"id":"base","name":"laser","type":"text","hex":"#00C9FF"}],"decimalPlaces":1,"height":2,"kind":"Single_Stat","name":"Capture Loss","queries":[{"query":"hostFilter = (tables=<-) =>\n if v.Host != \"(All)\" then\n tables |> filter(fn: (r) => r[\"host\"] == v.Host)\n else\n tables\n\nfrom(bucket: \"telegraf/so_short_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"zeekcaptureloss\")\n |> filter(fn: (r) => r[\"_field\"] == \"loss\")\n |> hostFilter()\n |> group(columns: [\"host\"])\n |> last()\n |> aggregateWindow(every: v.windowPeriod, fn: mean)\n |> highestMax(n:1)"}],"staticLegend":{},"suffix":"%","width":1,"xPos":7},{"colors":[{"id":"base","name":"laser","type":"text","hex":"#00C9FF"}],"decimalPlaces":1,"height":2,"kind":"Single_Stat","name":"Zeek Loss","queries":[{"query":"hostFilter = (tables=<-) =>\n if v.Host != \"(All)\" then\n tables |> filter(fn: (r) => r[\"host\"] == v.Host)\n else\n tables\n \nfrom(bucket: \"telegraf/so_short_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"zeekdrop\")\n |> filter(fn: (r) => r[\"_field\"] == \"drop\")\n |> map(fn: (r) => ({r with _value: r._value * 100.0}))\n |> hostFilter()\n |> group(columns: [\"host\"])\n |> last()\n |> aggregateWindow(every: v.windowPeriod, fn: mean)\n |> highestMax(n:1)"}],"staticLegend":{},"suffix":"%","width":1,"xPos":8},{"axes":[{"base":"10","name":"x","scale":"linear"},{"base":"10","name":"y","scale":"linear","suffix":"s"}],"colorizeRows":true,"colors":[{"id":"xflqbsX-j3iq4ry5QOntK","name":"Do Androids Dream of Electric Sheep?","type":"scale","hex":"#8F8AF4"},{"id":"5H28HcITm6QVfQsXon0vq","name":"Do Androids Dream of Electric Sheep?","type":"scale","hex":"#A51414"},{"id":"25MrINwurNBkQqeKCkMPg","name":"Do Androids Dream of Electric Sheep?","type":"scale","hex":"#F4CF31"}],"geom":"line","height":4,"heightRatio":0.301556420233463,"hoverDimension":"auto","kind":"Xy","legendColorizeRows":true,"legendOpacity":1,"legendOrientationThreshold":100000000,"name":"Elastic Ingest Time Spent","opacity":1,"orientationThreshold":100000000,"position":"overlaid","queries":[{"query":"from(bucket: \"telegraf/so_short_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"elasticsearch_clusterstats_nodes\")\n |> filter(fn: (r) => r.role == \"standalone\" or r.role == \"eval\" or r.role == \"import\" or r.role == \"managersearch\" or r.role == \"search\" or r.role == \"node\" or r.role == \"heavynode\")\n |> filter(fn: (r) => r[\"_field\"] == \"ingest_processor_stats_community_id_time_in_millis\")\n |> derivative(unit: 1s, nonNegative: true, columns: [\"_value\"], timeColumn: \"_time\") \n |> group(columns: [\"host\", \"role\"])\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)\n |> yield(name: \"community.id_time\")"},{"query":"from(bucket: \"telegraf/so_short_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"elasticsearch_clusterstats_nodes\")\n |> filter(fn: (r) => r.role == \"standalone\" or r.role == \"eval\" or r.role == \"import\" or r.role == \"managersearch\" or r.role == \"search\" or r.role == \"node\" or r.role == \"heavynode\")\n |> filter(fn: (r) => r[\"_field\"] == \"ingest_processor_stats_conditional_time_in_millis\")\n |> derivative(unit: 1s, nonNegative: true, columns: [\"_value\"], timeColumn: \"_time\") \n |> group(columns: [\"host\", \"role\"])\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)\n |> yield(name: \"conditional_time\")"},{"query":"from(bucket: \"telegraf/so_short_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"elasticsearch_clusterstats_nodes\")\n |> filter(fn: (r) => r.role == \"standalone\" or r.role == \"eval\" or r.role == \"import\" or r.role == \"managersearch\" or r.role == \"search\" or r.role == \"node\" or r.role == \"heavynode\")\n |> filter(fn: (r) => r[\"_field\"] == \"ingest_processor_stats_date_index_name_time_in_millis\")\n |> derivative(unit: 1s, nonNegative: true, columns: [\"_value\"], timeColumn: \"_time\") \n |> group(columns: [\"host\", \"role\"])\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)\n |> yield(name: \"date.index.name_time\")"},{"query":"from(bucket: \"telegraf/so_short_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"elasticsearch_clusterstats_nodes\")\n |> filter(fn: (r) => r.role == \"standalone\" or r.role == \"eval\" or r.role == \"import\" or r.role == \"managersearch\" or r.role == \"search\" or r.role == \"node\" or r.role == \"heavynode\")\n |> filter(fn: (r) => r[\"_field\"] == \"ingest_processor_stats_date_time_in_millis\")\n |> derivative(unit: 1s, nonNegative: true, columns: [\"_value\"], timeColumn: \"_time\") \n |> group(columns: [\"host\", \"role\"])\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)\n |> yield(name: \"date_time\")"},{"query":"from(bucket: \"telegraf/so_short_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"elasticsearch_clusterstats_nodes\")\n |> filter(fn: (r) => r.role == \"standalone\" or r.role == \"eval\" or r.role == \"import\" or r.role == \"managersearch\" or r.role == \"search\" or r.role == \"node\" or r.role == \"heavynode\")\n |> filter(fn: (r) => r[\"_field\"] == \"ingest_processor_stats_dissect_time_in_millis\")\n |> derivative(unit: 1s, nonNegative: true, columns: [\"_value\"], timeColumn: \"_time\") \n |> group(columns: [\"host\", \"role\"])\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)\n |> yield(name: \"dissect_time\")"},{"query":"from(bucket: \"telegraf/so_short_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"elasticsearch_clusterstats_nodes\")\n |> filter(fn: (r) => r.role == \"standalone\" or r.role == \"eval\" or r.role == \"import\" or r.role == \"managersearch\" or r.role == \"search\" or r.role == \"node\" or r.role == \"heavynode\")\n |> filter(fn: (r) => r[\"_field\"] == \"ingest_processor_stats_dot_expander_time_in_millis\")\n |> derivative(unit: 1s, nonNegative: true, columns: [\"_value\"], timeColumn: \"_time\") \n |> group(columns: [\"host\", \"role\"])\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)\n |> yield(name: \"dot.expander_time\")"},{"query":"from(bucket: \"telegraf/so_short_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"elasticsearch_clusterstats_nodes\")\n |> filter(fn: (r) => r.role == \"standalone\" or r.role == \"eval\" or r.role == \"import\" or r.role == \"managersearch\" or r.role == \"search\" or r.role == \"node\" or r.role == \"heavynode\")\n |> filter(fn: (r) => r[\"_field\"] == \"ingest_processor_stats_geoip_time_in_millis\")\n |> derivative(unit: 1s, nonNegative: true, columns: [\"_value\"], timeColumn: \"_time\") \n |> group(columns: [\"host\", \"role\"])\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)\n |> yield(name: \"geoip_time\")"},{"query":"from(bucket: \"telegraf/so_short_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"elasticsearch_clusterstats_nodes\")\n |> filter(fn: (r) => r.role == \"standalone\" or r.role == \"eval\" or r.role == \"import\" or r.role == \"managersearch\" or r.role == \"search\" or r.role == \"node\" or r.role == \"heavynode\")\n |> filter(fn: (r) => r[\"_field\"] == \"ingest_processor_stats_grok_time_in_millis\")\n |> derivative(unit: 1s, nonNegative: true, columns: [\"_value\"], timeColumn: \"_time\") \n |> group(columns: [\"host\", \"role\"])\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)\n |> yield(name: \"grok_time\")"},{"query":"from(bucket: \"telegraf/so_short_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"elasticsearch_clusterstats_nodes\")\n |> filter(fn: (r) => r.role == \"standalone\" or r.role == \"eval\" or r.role == \"import\" or r.role == \"managersearch\" or r.role == \"search\" or r.role == \"node\" or r.role == \"heavynode\")\n |> filter(fn: (r) => r[\"_field\"] == \"ingest_processor_stats_json_time_in_millis\")\n |> derivative(unit: 1s, nonNegative: true, columns: [\"_value\"], timeColumn: \"_time\") \n |> group(columns: [\"host\", \"role\"])\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)\n |> yield(name: \"json_time\")"},{"query":"from(bucket: \"telegraf/so_short_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"elasticsearch_clusterstats_nodes\")\n |> filter(fn: (r) => r.role == \"standalone\" or r.role == \"eval\" or r.role == \"import\" or r.role == \"managersearch\" or r.role == \"search\" or r.role == \"node\" or r.role == \"heavynode\")\n |> filter(fn: (r) => r[\"_field\"] == \"ingest_processor_stats_kv_time_in_millis\")\n |> derivative(unit: 1s, nonNegative: true, columns: [\"_value\"], timeColumn: \"_time\") \n |> group(columns: [\"host\", \"role\"])\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)\n |> yield(name: \"kv_time\")"},{"query":"from(bucket: \"telegraf/so_short_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"elasticsearch_clusterstats_nodes\")\n |> filter(fn: (r) => r.role == \"standalone\" or r.role == \"eval\" or r.role == \"import\" or r.role == \"managersearch\" or r.role == \"search\" or r.role == \"node\" or r.role == \"heavynode\")\n |> filter(fn: (r) => r[\"_field\"] == \"ingest_processor_stats_lowercase_time_in_millis\")\n |> derivative(unit: 1s, nonNegative: true, columns: [\"_value\"], timeColumn: \"_time\") \n |> group(columns: [\"host\", \"role\"])\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)\n |> yield(name: \"lowercase_time\")"},{"query":"from(bucket: \"telegraf/so_short_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"elasticsearch_clusterstats_nodes\")\n |> filter(fn: (r) => r.role == \"standalone\" or r.role == \"eval\" or r.role == \"import\" or r.role == \"managersearch\" or r.role == \"search\" or r.role == \"node\" or r.role == \"heavynode\")\n |> filter(fn: (r) => r[\"_field\"] == \"ingest_processor_stats_rename_time_in_millis\")\n |> derivative(unit: 1s, nonNegative: true, columns: [\"_value\"], timeColumn: \"_time\") \n |> group(columns: [\"host\", \"role\"])\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)\n |> yield(name: \"rename_time\")"},{"query":"from(bucket: \"telegraf/so_short_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"elasticsearch_clusterstats_nodes\")\n |> filter(fn: (r) => r.role == \"standalone\" or r.role == \"eval\" or r.role == \"import\" or r.role == \"managersearch\" or r.role == \"search\" or r.role == \"node\" or r.role == \"heavynode\")\n |> filter(fn: (r) => r[\"_field\"] == \"ingest_processor_stats_script_time_in_millis\")\n |> derivative(unit: 1s, nonNegative: true, columns: [\"_value\"], timeColumn: \"_time\") \n |> group(columns: [\"host\", \"role\"])\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)\n |> yield(name: \"script_time\")"},{"query":"from(bucket: \"telegraf/so_short_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"elasticsearch_clusterstats_nodes\")\n |> filter(fn: (r) => r.role == \"standalone\" or r.role == \"eval\" or r.role == \"import\" or r.role == \"managersearch\" or r.role == \"search\" or r.role == \"node\" or r.role == \"heavynode\")\n |> filter(fn: (r) => r[\"_field\"] == \"ingest_processor_stats_user_agent_time_in_millis\")\n |> derivative(unit: 1s, nonNegative: true, columns: [\"_value\"], timeColumn: \"_time\") \n |> group(columns: [\"host\", \"role\"])\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)\n |> yield(name: \"user.agent_time\")"}],"staticLegend":{"colorizeRows":true,"heightRatio":0.301556420233463,"opacity":1,"orientationThreshold":100000000,"widthRatio":1},"width":4,"widthRatio":1,"xCol":"_time","xPos":8,"yCol":"_value","yPos":10},{"axes":[{"base":"10","name":"x","scale":"linear"},{"name":"y","scale":"linear"}],"colorizeRows":true,"colors":[{"id":"sW2GqpGAsGB5Adx16jKjp","name":"Nineteen Eighty Four","type":"scale","hex":"#31C0F6"},{"id":"TsdXuXwdI5Npi9S8L4f-i","name":"Nineteen Eighty Four","type":"scale","hex":"#A500A5"},{"id":"OGL29-SUbJ6FyQb0JzbaD","name":"Nineteen Eighty Four","type":"scale","hex":"#FF7E27"}],"geom":"line","height":4,"hoverDimension":"auto","kind":"Xy","legendColorizeRows":true,"legendOpacity":1,"legendOrientationThreshold":100000000,"name":"1m Load Average","opacity":1,"orientationThreshold":100000000,"position":"overlaid","queries":[{"query":"hostFilter = (tables=<-) =>\n if v.Host != \"(All)\" then\n tables |> filter(fn: (r) => r[\"host\"] == v.Host)\n else\n tables\n\nroleFilter = (tables=<-) =>\n if v.Role != \"(All)\" then\n tables |> filter(fn: (r) => r[\"role\"] == v.Role)\n else\n tables\n\nfrom(bucket: \"telegraf/so_short_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"system\")\n |> filter(fn: (r) => r[\"_field\"] == \"load1\")\n |> hostFilter()\n |> roleFilter()\n |> group(columns: [\"_field\", \"host\", \"role\"])\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: true)\n |> yield(name: \"mean\")"},{"query":"hostFilter = (tables=<-) =>\n if v.Host != \"(All)\" then\n tables |> filter(fn: (r) => r[\"host\"] == v.Host)\n else\n tables\n\nroleFilter = (tables=<-) =>\n if v.Role != \"(All)\" then\n tables |> filter(fn: (r) => r[\"role\"] == v.Role)\n else\n tables\n\nfrom(bucket: \"telegraf/so_long_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"system\")\n |> filter(fn: (r) => r[\"_field\"] == \"load1\")\n |> hostFilter()\n |> roleFilter()\n |> group(columns: [\"_field\",\"host\", \"role\"])\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: true)\n |> yield(name: \"Trend\")"}],"staticLegend":{"colorizeRows":true,"opacity":1,"orientationThreshold":100000000,"widthRatio":1},"width":4,"widthRatio":1,"xCol":"_time","xPos":8,"yCol":"_value","yPos":14,"yTickStep":1},{"axes":[{"base":"10","name":"x","scale":"linear"},{"base":"10","name":"y","scale":"linear","suffix":" e/s"}],"colorizeRows":true,"colors":[{"id":"xflqbsX-j3iq4ry5QOntK","name":"Do Androids Dream of Electric Sheep?","type":"scale","hex":"#8F8AF4"},{"id":"5H28HcITm6QVfQsXon0vq","name":"Do Androids Dream of Electric Sheep?","type":"scale","hex":"#A51414"},{"id":"25MrINwurNBkQqeKCkMPg","name":"Do Androids Dream of Electric Sheep?","type":"scale","hex":"#F4CF31"}],"geom":"line","height":4,"heightRatio":0.301556420233463,"hoverDimension":"auto","kind":"Xy","legendColorizeRows":true,"legendOpacity":1,"legendOrientationThreshold":100000000,"name":"Logstash EPS","opacity":1,"orientationThreshold":100000000,"position":"overlaid","queries":[{"query":"from(bucket: \"telegraf/so_short_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"logstash_events\")\n |> filter(fn: (r) => r[\"_field\"] == \"in\")\n |> derivative(unit: 1s, nonNegative: true, columns: [\"_value\"], timeColumn: \"_time\") \n |> group(columns: [\"_field\", \"host\", \"pipeline\", \"role\"])\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)\n |> yield(name: \"mean\")"},{"query":"from(bucket: \"telegraf/so_short_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"logstash_events\")\n |> filter(fn: (r) => r[\"_field\"] == \"out\")\n |> derivative(unit: 1s, nonNegative: true, columns: [\"_value\"], timeColumn: \"_time\") \n |> map(fn: (r) => ({r with _value: -r._value}))\n |> group(columns: [\"_field\", \"host\", \"pipeline\", \"role\"])\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)\n |> yield(name: \"mean\")"},{"query":"from(bucket: \"telegraf/so_long_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"logstash_events\")\n |> filter(fn: (r) => r[\"_field\"] == \"in\")\n |> derivative(unit: 1s, nonNegative: true, columns: [\"_value\"], timeColumn: \"_time\") \n |> group(columns: [\"_field\", \"host\", \"pipeline\", \"role\"])\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)\n |> yield(name: \"Trend\")"},{"query":"from(bucket: \"telegraf/so_long_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"logstash_events\")\n |> filter(fn: (r) => r[\"_field\"] == \"out\")\n |> derivative(unit: 1s, nonNegative: true, columns: [\"_value\"], timeColumn: \"_time\") \n |> map(fn: (r) => ({r with _value: -r._value}))\n |> group(columns: [\"_field\", \"host\", \"pipeline\", \"role\"])\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)\n |> yield(name: \"Trend\")"}],"staticLegend":{"colorizeRows":true,"heightRatio":0.301556420233463,"opacity":1,"orientationThreshold":100000000,"widthRatio":1},"width":4,"widthRatio":1,"xCol":"_time","xPos":8,"yCol":"_value","yPos":18},{"axes":[{"base":"10","name":"x","scale":"linear"},{"base":"10","name":"y","scale":"linear"}],"colorizeRows":true,"colors":[{"id":"base","name":"laser","type":"text","hex":"#00C9FF"}],"decimalPlaces":0,"height":4,"hoverDimension":"auto","kind":"Single_Stat_Plus_Line","legendColorizeRows":true,"legendOpacity":1,"legendOrientationThreshold":100000000,"name":"Kafka Under Replicated Partitions","opacity":1,"orientationThreshold":100000000,"position":"overlaid","queries":[{"query":"from(bucket: \"telegraf/so_short_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"kafka_partition\")\n |> filter(fn: (r) => r[\"_field\"] == \"UnderReplicatedPartitions\")\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)\n |> group(columns: [\"_field\", \"partition\",\"host\", \"role\"])\n |> yield(name: \"mean\")"},{"query":"from(bucket: \"telegraf/so_long_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"kafka_partition\")\n |> filter(fn: (r) => r[\"_field\"] == \"UnderReplicatedPartitions\")\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)\n |> group(columns: [\"_field\", \"partition\",\"host\", \"role\"])\n |> yield(name: \"trend\")"}],"staticLegend":{"colorizeRows":true,"opacity":1,"orientationThreshold":100000000,"widthRatio":1},"width":4,"widthRatio":1,"xCol":"_time","xPos":8,"yCol":"_value","yPos":22},{"axes":[{"base":"10","name":"x","scale":"linear"},{"name":"y","scale":"linear","suffix":"%"}],"colorizeRows":true,"colors":[{"id":"UAehjIsi65P8u92M_3sQY","name":"Nineteen Eighty Four","type":"scale","hex":"#31C0F6"},{"id":"_SCP8Npp4NVMx2N4mfuzX","name":"Nineteen Eighty Four","type":"scale","hex":"#A500A5"},{"id":"BoMPg4R1KDp_UsRORdV3_","name":"Nineteen Eighty Four","type":"scale","hex":"#FF7E27"}],"geom":"line","height":4,"hoverDimension":"auto","kind":"Xy","legendColorizeRows":true,"legendOpacity":1,"legendOrientationThreshold":100000000,"name":"IO Wait","opacity":1,"orientationThreshold":100000000,"position":"overlaid","queries":[{"query":"hostFilter = (tables=<-) =>\n if v.Host != \"(All)\" then\n tables |> filter(fn: (r) => r[\"host\"] == v.Host)\n else\n tables\n\nroleFilter = (tables=<-) =>\n if v.Role != \"(All)\" then\n tables |> filter(fn: (r) => r[\"role\"] == v.Role)\n else\n tables\n\nfrom(bucket: \"telegraf/so_short_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"cpu\")\n |> filter(fn: (r) => r[\"cpu\"] == \"cpu-total\")\n |> filter(fn: (r) => r[\"_field\"] == \"usage_iowait\")\n |> hostFilter()\n |> roleFilter()\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)\n |> group(columns: [\"_field\", \"host\", \"role\"])\n |> yield(name: \"mean\")"},{"query":"hostFilter = (tables=<-) =>\n if v.Host != \"(All)\" then\n tables |> filter(fn: (r) => r[\"host\"] == v.Host)\n else\n tables\n\nroleFilter = (tables=<-) =>\n if v.Role != \"(All)\" then\n tables |> filter(fn: (r) => r[\"role\"] == v.Role)\n else\n tables\n\nfrom(bucket: \"telegraf/so_long_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"cpu\")\n |> filter(fn: (r) => r[\"cpu\"] == \"cpu-total\")\n |> filter(fn: (r) => r[\"_field\"] == \"usage_iowait\")\n |> hostFilter()\n |> roleFilter()\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)\n |> group(columns: [\"_field\", \"host\", \"role\"])\n |> yield(name: \"Trend\")"}],"staticLegend":{"colorizeRows":true,"opacity":1,"orientationThreshold":100000000,"widthRatio":1},"width":4,"widthRatio":1,"xCol":"_time","xPos":8,"yCol":"_value","yPos":26},{"axes":[{"base":"10","name":"x","scale":"linear"},{"name":"y","scale":"linear","suffix":"%"}],"colorizeRows":true,"colors":[{"id":"QDwChKZWuQV0BaJcEeSam","name":"Atlantis","type":"scale","hex":"#74D495"},{"id":"ThD0WTqKHltQEVlq9mo6K","name":"Atlantis","type":"scale","hex":"#3F3FBA"},{"id":"FBHYZiwDLKyQK3eRfUD-0","name":"Atlantis","type":"scale","hex":"#FF4D9E"}],"geom":"line","height":4,"hoverDimension":"auto","kind":"Xy","legendColorizeRows":true,"legendOpacity":1,"legendOrientationThreshold":100000000,"name":"Swap Usage","opacity":1,"orientationThreshold":100000000,"position":"overlaid","queries":[{"query":"hostFilter = (tables=<-) =>\n if v.Host != \"(All)\" then\n tables |> filter(fn: (r) => r[\"host\"] == v.Host)\n else\n tables\n\nroleFilter = (tables=<-) =>\n if v.Role != \"(All)\" then\n tables |> filter(fn: (r) => r[\"role\"] == v.Role)\n else\n tables\n\nfrom(bucket: \"telegraf/so_short_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"swap\")\n |> filter(fn: (r) => r[\"_field\"] == \"used_percent\")\n |> hostFilter()\n |> roleFilter()\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)\n |> group(columns: [\"_field\", \"host\", \"role\"])\n |> yield(name: \"mean\")"},{"query":"hostFilter = (tables=<-) =>\n if v.Host != \"(All)\" then\n tables |> filter(fn: (r) => r[\"host\"] == v.Host)\n else\n tables\n\nroleFilter = (tables=<-) =>\n if v.Role != \"(All)\" then\n tables |> filter(fn: (r) => r[\"role\"] == v.Role)\n else\n tables\n\nfrom(bucket: \"telegraf/so_long_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"swap\")\n |> filter(fn: (r) => r[\"_field\"] == \"used_percent\")\n |> hostFilter()\n |> roleFilter()\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)\n |> group(columns: [\"_field\", \"host\", \"role\"])\n |> yield(name: \"Trend\")"}],"staticLegend":{"colorizeRows":true,"opacity":1,"orientationThreshold":100000000,"widthRatio":1},"width":4,"widthRatio":1,"xCol":"_time","xPos":8,"yCol":"_value","yPos":30},{"axes":[{"base":"10","name":"x","scale":"linear"},{"base":"10","name":"y","scale":"linear","suffix":"b/s"}],"colorizeRows":true,"colors":[{"id":"TtgHQAXNep94KBgtu48C_","name":"Cthulhu","type":"scale","hex":"#FDC44F"},{"id":"_IuzkORho_8QXTE6vMllv","name":"Cthulhu","type":"scale","hex":"#007C76"},{"id":"bUszW_YI_9oColDbLNQ-d","name":"Cthulhu","type":"scale","hex":"#8983FF"}],"geom":"line","height":4,"heightRatio":0.18482490272373542,"hoverDimension":"auto","kind":"Xy","legendColorizeRows":true,"legendOpacity":1,"legendOrientationThreshold":100000000,"name":"Monitor Interface Drops - Inbound","opacity":1,"orientationThreshold":100000000,"position":"overlaid","queries":[{"query":"import \"join\"\n\nhostFilter = (tables=<-) =>\n if v.Host != \"(All)\" then\n tables |> filter(fn: (r) => r[\"host\"] == v.Host)\n else\n tables\n\nroleFilter = (tables=<-) =>\n if v.Role != \"(All)\" then\n tables |> filter(fn: (r) => r[\"role\"] == v.Role)\n else\n tables\n \nmanints = from(bucket: \"telegraf/so_short_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"node_config\")\n |> hostFilter()\n |> filter(fn: (r) => r[\"_field\"] == \"monint\")\n |> distinct()\n |> group(columns: [\"host\"])\n\ntraffic = from(bucket: \"telegraf/so_short_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"net\")\n |> filter(fn: (r) => r[\"_field\"] == \"drop_in\")\n |> hostFilter()\n |> roleFilter()\n |> derivative(unit: 1s, nonNegative: true, columns: [\"_value\"], timeColumn: \"_time\")\n |> map(fn: (r) => ({r with \"_value\": r._value * 8.0}))\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)\n |> group(columns: [\"host\"])\n\njoin.inner(left: traffic, right: manints,\n on: (l,r) => l.interface == r._value,\n as: (l, r) => ({l with _value: l._value, result: \"drop_in\"}))"},{"query":"import \"join\"\n\nhostFilter = (tables=<-) =>\n if v.Host != \"(All)\" then\n tables |> filter(fn: (r) => r[\"host\"] == v.Host)\n else\n tables\n\nroleFilter = (tables=<-) =>\n if v.Role != \"(All)\" then\n tables |> filter(fn: (r) => r[\"role\"] == v.Role)\n else\n tables\n \nmanints = from(bucket: \"telegraf/so_short_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"node_config\")\n |> hostFilter()\n |> filter(fn: (r) => r[\"_field\"] == \"monint\")\n |> distinct()\n |> group(columns: [\"host\"])\n\ntraffic = from(bucket: \"telegraf/so_long_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"net\")\n |> filter(fn: (r) => r[\"_field\"] == \"drop_in\")\n |> hostFilter()\n |> roleFilter()\n |> derivative(unit: 1s, nonNegative: true, columns: [\"_value\"], timeColumn: \"_time\")\n |> map(fn: (r) => ({r with \"_value\": r._value * 8.0}))\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)\n |> group(columns: [\"host\"])\n\njoin.inner(left: traffic, right: manints,\n on: (l,r) => l.interface == r._value,\n as: (l, r) => ({l with _value: l._value, result: \"Trend\"}))"}],"staticLegend":{"colorizeRows":true,"heightRatio":0.18482490272373542,"opacity":1,"orientationThreshold":100000000,"widthRatio":1},"width":4,"widthRatio":1,"xCol":"_time","xPos":8,"yCol":"_value","yPos":34},{"axes":[{"base":"10","name":"x","scale":"linear"},{"name":"y","scale":"linear","suffix":" days"}],"colorizeRows":true,"colors":[{"id":"3PVw3hQuZUzyar7Js3mMH","name":"Ectoplasm","type":"scale","hex":"#DA6FF1"},{"id":"O34ux-D8Xq_1-eeWRyYYH","name":"Ectoplasm","type":"scale","hex":"#00717A"},{"id":"P04RoKOHBdLdvfrfFbn0F","name":"Ectoplasm","type":"scale","hex":"#ACFF76"}],"geom":"line","height":4,"hoverDimension":"auto","kind":"Xy","legendColorizeRows":true,"legendOpacity":1,"legendOrientationThreshold":100000000,"name":"Stenographer PCAP Retention","opacity":1,"orientationThreshold":100000000,"position":"overlaid","queries":[{"query":"import \"join\"\n\nfrom(bucket: \"telegraf/so_short_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"pcapage\")\n |> filter(fn: (r) => r[\"_field\"] == \"seconds\")\n |> map(fn: (r) => ({ r with _value: r._value / (24.0 * 3600.0)}))\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)\n |> group(columns: [\"_field\",\"host\"])"},{"query":"import \"join\"\n\nfrom(bucket: \"telegraf/so_long_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"pcapage\")\n |> filter(fn: (r) => r[\"_field\"] == \"seconds\")\n |> map(fn: (r) => ({ r with _value: r._value / (24.0 * 3600.0)}))\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)\n |> group(columns: [\"_field\",\"host\"])\n |> yield(name: \"Trend\")"}],"staticLegend":{"colorizeRows":true,"opacity":1,"orientationThreshold":100000000,"widthRatio":1},"width":4,"widthRatio":1,"xCol":"_time","xPos":8,"yCol":"_value","yPos":46},{"colors":[{"id":"base","name":"laser","type":"text","hex":"#00C9FF"}],"decimalPlaces":1,"height":2,"kind":"Single_Stat","name":"Suricata Loss","queries":[{"query":"hostFilter = (tables=<-) =>\n if v.Host != \"(All)\" then\n tables |> filter(fn: (r) => r[\"host\"] == v.Host)\n else\n tables\n\nfrom(bucket: \"telegraf/so_short_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"suridrop\")\n |> filter(fn: (r) => r[\"_field\"] == \"drop\")\n |> map(fn: (r) => ({r with _value: r._value * 100.0}))\n |> hostFilter()\n |> group(columns: [\"host\"])\n |> last()\n |> aggregateWindow(every: v.windowPeriod, fn: mean)\n |> highestMax(n:1)"}],"staticLegend":{},"suffix":"%","width":1,"xPos":9},{"colors":[{"id":"0","name":"viridian","type":"min","hex":"#32B08C"},{"id":"5IArg2lDb8KvnphywgUXa","name":"pineapple","type":"threshold","hex":"#FFB94A","value":50},{"id":"yFhH3mtavjuAZh6cEt5lx","name":"fire","type":"threshold","hex":"#DC4E58","value":70},{"id":"1","name":"ruby","type":"max","hex":"#BF3D5E","value":100}],"decimalPlaces":0,"height":4,"kind":"Gauge","name":"Swap Usage","queries":[{"query":"hostFilter = (tables=<-) =>\n if v.Host != \"(All)\" then\n tables |> filter(fn: (r) => r[\"host\"] == v.Host)\n else\n tables\n\nfrom(bucket: \"telegraf/so_short_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"swap\")\n |> filter(fn: (r) => r[\"_field\"] == \"used_percent\")\n |> hostFilter()\n |> group(columns: [\"host\"])\n |> last()\n |> highestMax(n: 1)\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)\n |> yield(name: \"mean\")"}],"staticLegend":{},"suffix":"%","tickSuffix":"%","width":3,"xPos":9,"yPos":2},{"colors":[{"id":"base","name":"white","type":"text","hex":"#ffffff"}],"fieldOptions":[{"displayName":"Host","fieldName":"host","visible":true},{"displayName":"Name","fieldName":"container_name","visible":true},{"displayName":"Status","fieldName":"container_status","visible":true},{"displayName":"OOM Killed","fieldName":"_value","visible":true},{"displayName":"_start","fieldName":"_start","visible":true},{"displayName":"_stop","fieldName":"_stop","visible":true},{"displayName":"_time","fieldName":"_time","visible":true},{"displayName":"_field","fieldName":"_field","visible":true},{"displayName":"_measurement","fieldName":"_measurement","visible":true},{"displayName":"engine_host","fieldName":"engine_host","visible":true},{"displayName":"role","fieldName":"role","visible":true},{"displayName":"server_version","fieldName":"server_version","visible":true},{"displayName":"container_image","fieldName":"container_image","visible":true},{"displayName":"container_version","fieldName":"container_version","visible":true},{"displayName":"description","fieldName":"description","visible":true},{"displayName":"maintainer","fieldName":"maintainer","visible":true},{"displayName":"io.k8s.description","fieldName":"io.k8s.description","visible":true},{"displayName":"io.k8s.display-name","fieldName":"io.k8s.display-name","visible":true},{"displayName":"license","fieldName":"license","visible":true},{"displayName":"name","fieldName":"name","visible":true},{"displayName":"org.label-schema.build-date","fieldName":"org.label-schema.build-date","visible":true},{"displayName":"org.label-schema.license","fieldName":"org.label-schema.license","visible":true},{"displayName":"org.label-schema.name","fieldName":"org.label-schema.name","visible":true},{"displayName":"org.label-schema.schema-version","fieldName":"org.label-schema.schema-version","visible":true},{"displayName":"org.label-schema.url","fieldName":"org.label-schema.url","visible":true},{"displayName":"org.label-schema.vcs-ref","fieldName":"org.label-schema.vcs-ref","visible":true},{"displayName":"org.label-schema.vcs-url","fieldName":"org.label-schema.vcs-url","visible":true},{"displayName":"org.label-schema.vendor","fieldName":"org.label-schema.vendor","visible":true},{"displayName":"org.label-schema.version","fieldName":"org.label-schema.version","visible":true},{"displayName":"org.opencontainers.image.created","fieldName":"org.opencontainers.image.created","visible":true},{"displayName":"org.opencontainers.image.licenses","fieldName":"org.opencontainers.image.licenses","visible":true},{"displayName":"org.opencontainers.image.title","fieldName":"org.opencontainers.image.title","visible":true},{"displayName":"org.opencontainers.image.vendor","fieldName":"org.opencontainers.image.vendor","visible":true},{"displayName":"release","fieldName":"release","visible":true},{"displayName":"summary","fieldName":"summary","visible":true},{"displayName":"url","fieldName":"url","visible":true},{"displayName":"vendor","fieldName":"vendor","visible":true},{"displayName":"version","fieldName":"version","visible":true},{"displayName":"org.label-schema.usage","fieldName":"org.label-schema.usage","visible":true},{"displayName":"org.opencontainers.image.documentation","fieldName":"org.opencontainers.image.documentation","visible":true},{"displayName":"org.opencontainers.image.revision","fieldName":"org.opencontainers.image.revision","visible":true},{"displayName":"org.opencontainers.image.source","fieldName":"org.opencontainers.image.source","visible":true},{"displayName":"org.opencontainers.image.url","fieldName":"org.opencontainers.image.url","visible":true},{"displayName":"org.opencontainers.image.version","fieldName":"org.opencontainers.image.version","visible":true},{"displayName":"org.opencontainers.image.description","fieldName":"org.opencontainers.image.description","visible":true}],"height":4,"kind":"Table","name":"Most Recent Container Events","queries":[{"query":"hostFilter = (tables=<-) =>\n if v.Host != \"(All)\" then\n tables |> filter(fn: (r) => r[\"host\"] == v.Host)\n else\n tables\n\nroleFilter = (tables=<-) =>\n if v.Role != \"(All)\" then\n tables |> filter(fn: (r) => r[\"role\"] == v.Role)\n else\n tables\n \nfrom(bucket: \"telegraf/so_short_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"docker_container_status\")\n |> filter(fn: (r) => r[\"_field\"] == \"oomkilled\")\n |> filter(fn: (r) => r[\"container_status\"] != \"running\")\n |> hostFilter()\n |> roleFilter()\n |> group(columns: [\"container_name\", \"host\"])\n |> last()\n |> group()\n |> keep(columns: [\"_value\", \"container_name\", \"host\", \"container_status\"])"}],"staticLegend":{},"tableOptions":{"sortBy":"container_name","verticalTimeAxis":true},"timeFormat":"YYYY-MM-DD HH:mm:ss","width":3,"xPos":9,"yPos":6},{"axes":[{"base":"10","name":"x","scale":"linear"},{"name":"y","scale":"linear","suffix":"%"}],"colorizeRows":true,"colors":[{"id":"TtgHQAXNep94KBgtu48C_","name":"Cthulhu","type":"scale","hex":"#FDC44F"},{"id":"_IuzkORho_8QXTE6vMllv","name":"Cthulhu","type":"scale","hex":"#007C76"},{"id":"bUszW_YI_9oColDbLNQ-d","name":"Cthulhu","type":"scale","hex":"#8983FF"}],"geom":"line","height":4,"hoverDimension":"auto","kind":"Xy","legendColorizeRows":true,"legendOpacity":1,"legendOrientationThreshold":100000000,"name":"Zeek Capture Loss","opacity":1,"orientationThreshold":100000000,"position":"overlaid","queries":[{"query":"hostFilter = (tables=<-) =>\n if v.Host != \"(All)\" then\n tables |> filter(fn: (r) => r[\"host\"] == v.Host)\n else\n tables\n\nroleFilter = (tables=<-) =>\n if v.Role != \"(All)\" then\n tables |> filter(fn: (r) => r[\"role\"] == v.Role)\n else\n tables\n\nfrom(bucket: \"telegraf/so_short_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"zeekcaptureloss\")\n |> filter(fn: (r) => r[\"_field\"] == \"loss\")\n |> hostFilter()\n |> roleFilter()\n |> group(columns: [\"_field\", \"host\", \"role\"])\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)\n |> yield(name: \"mean\")"},{"query":"hostFilter = (tables=<-) =>\n if v.Host != \"(All)\" then\n tables |> filter(fn: (r) => r[\"host\"] == v.Host)\n else\n tables\n\nroleFilter = (tables=<-) =>\n if v.Role != \"(All)\" then\n tables |> filter(fn: (r) => r[\"role\"] == v.Role)\n else\n tables\n\nfrom(bucket: \"telegraf/so_long_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"zeekcaptureloss\")\n |> filter(fn: (r) => r[\"_field\"] == \"loss\")\n |> hostFilter()\n |> roleFilter()\n |> group(columns: [\"_field\", \"host\", \"role\"])\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)\n |> yield(name: \"Trend\")"}],"staticLegend":{"colorizeRows":true,"opacity":1,"orientationThreshold":100000000,"widthRatio":1},"width":3,"widthRatio":1,"xCol":"_time","xPos":9,"yCol":"_value","yPos":42},{"colors":[{"id":"base","name":"laser","type":"text","hex":"#00C9FF"}],"decimalPlaces":1,"height":2,"kind":"Single_Stat","name":"Stenographer Loss","queries":[{"query":"hostFilter = (tables=<-) =>\n if v.Host != \"(All)\" then\n tables |> filter(fn: (r) => r[\"host\"] == v.Host)\n else\n tables\n\nfrom(bucket: \"telegraf/so_short_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"stenodrop\")\n |> filter(fn: (r) => r[\"_field\"] == \"drop\")\n |> map(fn: (r) => ({r with _value: r._value * 100.0}))\n |> hostFilter()\n |> group(columns: [\"host\"])\n |> last()\n |> aggregateWindow(every: v.windowPeriod, fn: mean)\n |> highestMax(n:1)"}],"staticLegend":{},"suffix":"%","width":1,"xPos":10},{"colors":[{"id":"base","name":"laser","type":"text","hex":"#00C9FF"}],"decimalPlaces":1,"height":2,"kind":"Single_Stat","name":"PCAP Retention","queries":[{"query":"hostFilter = (tables=<-) =>\n if v.Host != \"(All)\" then\n tables |> filter(fn: (r) => r[\"host\"] == v.Host)\n else\n tables\n \nfrom(bucket: \"telegraf/so_short_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"pcapage\")\n |> filter(fn: (r) => r[\"_field\"] == \"seconds\")\n |> hostFilter()\n |> map(fn: (r) => ({r with _value: r._value / (24.0 * 60.0 * 60.0)}))\n |> group(columns: [\"host\"])\n |> last()\n |> highestMax(n:1)"}],"staticLegend":{},"suffix":" days","width":1,"xPos":11}],"description":"Visualize the Security Onion grid performance metrics and alarm statuses.","name":"Security Onion Performance"}}] \ No newline at end of file +[{"apiVersion":"influxdata.com/v2alpha1","kind":"Dashboard","metadata":{"name":"vivid-wilson-002001"},"spec":{"charts":[{"colors":[{"id":"base","name":"laser","type":"text","hex":"#00C9FF"}],"decimalPlaces":1,"height":2,"kind":"Single_Stat","name":"Uptime","queries":[{"query":"hostFilter = (tables=<-) =>\n if v.Host != \"(All)\" then\n tables |> filter(fn: (r) => r[\"host\"] == v.Host)\n else\n tables\n\nfrom(bucket: \"telegraf/so_short_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"system\")\n |> filter(fn: (r) => r[\"_field\"] == \"uptime\")\n |> hostFilter()\n |> map(fn: (r) => ({r with _value: r._value / (24 * 60 * 60)}))\n |> group(columns: [\"host\"])\n |> last()\n |> lowestMin(n:1)"}],"staticLegend":{},"suffix":" days","width":1},{"colors":[{"id":"base","name":"laser","type":"text","hex":"#00C9FF"},{"id":"z83MTSufTrlrCoEPiBXda","name":"ruby","type":"text","hex":"#BF3D5E","value":1}],"decimalPlaces":0,"height":2,"kind":"Single_Stat","name":"Critical Alarms","queries":[{"query":"from(bucket: \"_monitoring\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"statuses\")\n |> filter(fn: (r) => r[\"_field\"] == \"_message\")\n |> group(columns: [\"_check_id\"])\n |> sort(columns: [\"_time\"])\n |> last()\n |> group()\n |> filter(fn: (r) => r[\"_level\"] == \"crit\")\n |> count()"}],"staticLegend":{},"suffix":" ","width":1,"yPos":2},{"colors":[{"id":"base","name":"rainforest","type":"text","hex":"#4ED8A0"},{"id":"QCTYWuGuHkikYFsZSKMzQ","name":"rainforest","type":"text","hex":"#4ED8A0"},{"id":"QdpMyTRBb0LJ56-P5wfAW","name":"laser","type":"text","hex":"#00C9FF","value":1},{"id":"VQGwCoMrxZyP8asiOW5Cq","name":"tiger","type":"text","hex":"#F48D38","value":2},{"id":"zSO9QkesSIxrU_ntCBx2i","name":"ruby","type":"text","hex":"#BF3D5E","value":3}],"fieldOptions":[{"fieldName":"_time","visible":true},{"displayName":"Alarm","fieldName":"_check_name","visible":true},{"displayName":"Severity","fieldName":"_value","visible":true},{"displayName":"Status","fieldName":"_level","visible":true}],"height":6,"kind":"Table","name":"Alarm Status","queries":[{"query":"from(bucket: \"_monitoring\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"statuses\")\n |> filter(fn: (r) => r[\"_field\"] == \"_message\")\n |> drop(columns: [\"_value\"])\n |> duplicate(column: \"_level\", as: \"_value\")\n |> map(fn: (r) => ({ r with _value: if r._value == \"ok\" then 0 else if r._value == \"info\" then 1 else if r._value == \"warn\" then 2 else 3 }))\n |> group(columns: [\"_check_id\"])\n |> sort(columns: [\"_time\"])\n |> last()\n |> group()\n |> keep(columns: [\"_check_name\",\"_level\",\"_value\"])"}],"staticLegend":{},"tableOptions":{"sortBy":"_check_name","verticalTimeAxis":true},"timeFormat":"YYYY-MM-DD HH:mm:ss","width":3,"yPos":4},{"axes":[{"base":"10","name":"x","scale":"linear"},{"base":"10","name":"y","scale":"linear","suffix":"B"}],"colorizeRows":true,"colors":[{"id":"3PVw3hQuZUzyar7Js3mMH","name":"Ectoplasm","type":"scale","hex":"#DA6FF1"},{"id":"O34ux-D8Xq_1-eeWRyYYH","name":"Ectoplasm","type":"scale","hex":"#00717A"},{"id":"P04RoKOHBdLdvfrfFbn0F","name":"Ectoplasm","type":"scale","hex":"#ACFF76"}],"geom":"line","height":4,"hoverDimension":"auto","kind":"Xy","legendColorizeRows":true,"legendOpacity":1,"legendOrientationThreshold":100000000,"name":"Elasticsearch Storage Size","opacity":1,"orientationThreshold":100000000,"position":"overlaid","queries":[{"query":"import \"join\"\n\nhostFilter = (tables=<-) =>\n if v.Host != \"(All)\" then\n tables |> filter(fn: (r) => r[\"host\"] == v.Host)\n else\n tables\n \nfrom(bucket: \"telegraf/so_short_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"elasticsearch_indices\")\n |> filter(fn: (r) => r[\"_field\"] == \"store_size_in_bytes\")\n |> filter(fn: (r) => r[\"host\"] == r[\"node_name\"])\n |> hostFilter()\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)\n |> group(columns: [\"_field\",\"host\"])\n |> yield(name: \"mean\")"},{"query":"import \"join\"\n\nhostFilter = (tables=<-) =>\n if v.Host != \"(All)\" then\n tables |> filter(fn: (r) => r[\"host\"] == v.Host)\n else\n tables\n\nfrom(bucket: \"telegraf/so_long_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"elasticsearch_indices\")\n |> filter(fn: (r) => r[\"_field\"] == \"store_size_in_bytes\")\n |> filter(fn: (r) => r[\"host\"] == r[\"node_name\"])\n |> hostFilter()\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)\n |> group(columns: [\"_field\",\"host\"])\n |> yield(name: \"Trend\")"}],"staticLegend":{"colorizeRows":true,"opacity":1,"orientationThreshold":100000000,"widthRatio":1},"width":4,"widthRatio":1,"xCol":"_time","yCol":"_value","yPos":10},{"axes":[{"base":"10","name":"x","scale":"linear"},{"base":"10","name":"y","scale":"linear","suffix":"B"}],"colorizeRows":true,"colors":[{"id":"3PVw3hQuZUzyar7Js3mMH","name":"Ectoplasm","type":"scale","hex":"#DA6FF1"},{"id":"O34ux-D8Xq_1-eeWRyYYH","name":"Ectoplasm","type":"scale","hex":"#00717A"},{"id":"P04RoKOHBdLdvfrfFbn0F","name":"Ectoplasm","type":"scale","hex":"#ACFF76"}],"geom":"line","height":4,"hoverDimension":"auto","kind":"Xy","legendColorizeRows":true,"legendOpacity":1,"legendOrientationThreshold":100000000,"name":"InfluxDB Size","opacity":1,"orientationThreshold":100000000,"position":"overlaid","queries":[{"query":"import \"join\"\n\nfrom(bucket: \"telegraf/so_short_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"influxsize\")\n |> filter(fn: (r) => r[\"_field\"] == \"kbytes\")\n |> map(fn: (r) => ({r with \"_value\": r._value * 1000.0}))\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)\n |> group(columns: [\"_field\",\"host\"])\n |> yield(name: \"mean\")"},{"query":"import \"join\"\n\nfrom(bucket: \"telegraf/so_long_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"influxsize\")\n |> filter(fn: (r) => r[\"_field\"] == \"kbytes\")\n |> map(fn: (r) => ({r with \"_value\": r._value * 1000.0}))\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)\n |> group(columns: [\"_field\",\"host\"])\n |> yield(name: \"Trend\")"}],"staticLegend":{"colorizeRows":true,"opacity":1,"orientationThreshold":100000000,"widthRatio":1},"width":4,"widthRatio":1,"xCol":"_time","yCol":"_value","yPos":14},{"axes":[{"base":"10","name":"x","scale":"linear"},{"name":"y","scale":"linear","suffix":" days"}],"colorizeRows":true,"colors":[{"id":"sW2GqpGAsGB5Adx16jKjp","name":"Nineteen Eighty Four","type":"scale","hex":"#31C0F6"},{"id":"TsdXuXwdI5Npi9S8L4f-i","name":"Nineteen Eighty Four","type":"scale","hex":"#A500A5"},{"id":"OGL29-SUbJ6FyQb0JzbaD","name":"Nineteen Eighty Four","type":"scale","hex":"#FF7E27"}],"geom":"line","height":4,"hoverDimension":"auto","kind":"Xy","legendColorizeRows":true,"legendOpacity":1,"legendOrientationThreshold":100000000,"name":"System Uptime","opacity":1,"orientationThreshold":100000000,"position":"overlaid","queries":[{"query":"hostFilter = (tables=<-) =>\n if v.Host != \"(All)\" then\n tables |> filter(fn: (r) => r[\"host\"] == v.Host)\n else\n tables\n\nroleFilter = (tables=<-) =>\n if v.Role != \"(All)\" then\n tables |> filter(fn: (r) => r[\"role\"] == v.Role)\n else\n tables\n\nfrom(bucket: \"telegraf/so_short_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"system\")\n |> filter(fn: (r) => r[\"_field\"] == \"uptime\")\n |> hostFilter()\n |> roleFilter()\n |> group(columns: [\"host\", \"role\"])\n |> map(fn: (r) => ({r with _value: float(v: r._value) / float(v: 24 * 60 * 60)}))\n |> yield(name: \"last\")"},{"query":"hostFilter = (tables=<-) =>\n if v.Host != \"(All)\" then\n tables |> filter(fn: (r) => r[\"host\"] == v.Host)\n else\n tables\n\nroleFilter = (tables=<-) =>\n if v.Role != \"(All)\" then\n tables |> filter(fn: (r) => r[\"role\"] == v.Role)\n else\n tables\n\nfrom(bucket: \"telegraf/so_long_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"system\")\n |> filter(fn: (r) => r[\"_field\"] == \"uptime\")\n |> hostFilter()\n |> roleFilter()\n |> group(columns: [\"host\", \"role\"])\n |> map(fn: (r) => ({r with _value: float(v: r._value) / float(v: 24 * 60 * 60)}))\n |> yield(name: \"Trend\")"}],"shade":true,"staticLegend":{"colorizeRows":true,"opacity":1,"orientationThreshold":100000000,"widthRatio":1},"width":4,"widthRatio":1,"xCol":"_time","yCol":"_value","yPos":18},{"axes":[{"base":"10","name":"x","scale":"linear"},{"base":"10","name":"y","scale":"linear"}],"colorizeRows":true,"colors":[{"id":"lQ75rvTyd2Lq5pZjzy6LB","name":"Nineteen Eighty Four","type":"scale","hex":"#31C0F6"},{"id":"KLfpRZtiEnU2GxjPtrrzQ","name":"Nineteen Eighty Four","type":"scale","hex":"#A500A5"},{"id":"1kLynwKxvJ3B5IeJnrBqp","name":"Nineteen Eighty Four","type":"scale","hex":"#FF7E27"}],"geom":"line","height":4,"hoverDimension":"auto","kind":"Xy","legendColorizeRows":true,"legendOpacity":1,"legendOrientationThreshold":100000000,"name":"Kafka EPS","opacity":1,"orientationThreshold":100000000,"position":"overlaid","queries":[{"query":"controllerHosts = from(bucket: \"telegraf/so_short_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"kafka_controller\")\n |> filter(fn: (r) => r[\"_field\"] == \"ActiveControllerCount.Value\")\n |> filter(fn: (r) => r[\"_value\"] == 1)\n |> keep(columns: [\"host\"])\n |> distinct(column: \"host\")\n |> map(fn: (r) => ({r with _value: r.host}))\n |> keep(columns: [\"_value\"])\n\ncontrollerHostNames = controllerHosts |> findColumn(fn: (key) => true, column: \"_value\")\n\nfrom(bucket: \"telegraf/so_short_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"kafka_topics\")\n |> filter(fn: (r) => r[\"_field\"] == \"MessagesInPerSec.Count\")\n |> filter(fn: (r) => not contains(value: r.host, set: controllerHostNames))\n |> derivative(unit: 1s, nonNegative: true)\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)\n |> group(columns: [\"_field\", \"host\", \"role\"])\n |> yield(name: \"mean\")"},{"query":"controllerHosts = from(bucket: \"telegraf/so_short_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"kafka_controller\")\n |> filter(fn: (r) => r[\"_field\"] == \"ActiveControllerCount.Value\")\n |> filter(fn: (r) => r[\"_value\"] == 1)\n |> keep(columns: [\"host\"])\n |> distinct(column: \"host\")\n |> map(fn: (r) => ({r with _value: r.host}))\n |> keep(columns: [\"_value\"])\n\ncontrollerHostNames = controllerHosts |> findColumn(fn: (key) => true, column: \"_value\")\n\nfrom(bucket: \"telegraf/so_long_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"kafka_topics\")\n |> filter(fn: (r) => r[\"_field\"] == \"MessagesInPerSec.Count\")\n |> filter(fn: (r) => not contains(value: r.host, set: controllerHostNames))\n |> derivative(unit: 1s, nonNegative: true)\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)\n |> group(columns: [\"_field\", \"host\", \"role\"])\n |> yield(name: \"trend\")"}],"staticLegend":{"colorizeRows":true,"opacity":1,"orientationThreshold":100000000,"widthRatio":1},"width":4,"widthRatio":1,"xCol":"_time","yCol":"_value","yPos":22},{"axes":[{"base":"10","name":"x","scale":"linear"},{"name":"y","scale":"linear","suffix":"%"}],"colorizeRows":true,"colors":[{"id":"sW2GqpGAsGB5Adx16jKjp","name":"Nineteen Eighty Four","type":"scale","hex":"#31C0F6"},{"id":"TsdXuXwdI5Npi9S8L4f-i","name":"Nineteen Eighty Four","type":"scale","hex":"#A500A5"},{"id":"OGL29-SUbJ6FyQb0JzbaD","name":"Nineteen Eighty Four","type":"scale","hex":"#FF7E27"}],"geom":"line","height":4,"hoverDimension":"auto","kind":"Xy","legendColorizeRows":true,"legendOpacity":1,"legendOrientationThreshold":100000000,"name":"System CPU Usage","opacity":1,"orientationThreshold":100000000,"position":"overlaid","queries":[{"query":"hostFilter = (tables=<-) =>\n if v.Host != \"(All)\" then\n tables |> filter(fn: (r) => r[\"host\"] == v.Host)\n else\n tables\n\nroleFilter = (tables=<-) =>\n if v.Role != \"(All)\" then\n tables |> filter(fn: (r) => r[\"role\"] == v.Role)\n else\n tables\n\nfrom(bucket: \"telegraf/so_short_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"cpu\")\n |> filter(fn: (r) => r[\"_field\"] == \"usage_idle\")\n |> filter(fn: (r) => r[\"cpu\"] == \"cpu-total\")\n |> hostFilter()\n |> roleFilter()\n |> group(columns: [\"_field\", \"host\", \"role\"])\n |> map(fn: (r) => ({r with _value: r._value * -1.0 + 100.0}))\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: true)\n |> yield(name: \"mean\")"},{"query":"hostFilter = (tables=<-) =>\n if v.Host != \"(All)\" then\n tables |> filter(fn: (r) => r[\"host\"] == v.Host)\n else\n tables\n\nroleFilter = (tables=<-) =>\n if v.Role != \"(All)\" then\n tables |> filter(fn: (r) => r[\"role\"] == v.Role)\n else\n tables\n\nfrom(bucket: \"telegraf/so_long_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"cpu\")\n |> filter(fn: (r) => r[\"_field\"] == \"usage_idle\")\n |> filter(fn: (r) => r[\"cpu\"] == \"cpu-total\")\n |> hostFilter()\n |> roleFilter()\n |> group(columns: [\"_field\",\"host\", \"role\"])\n |> map(fn: (r) => ({r with _value: r._value * -1.0 + 100.0}))\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: true)\n |> yield(name: \"Trend\")"}],"staticLegend":{"colorizeRows":true,"opacity":1,"orientationThreshold":100000000,"widthRatio":1},"width":4,"widthRatio":1,"xCol":"_time","yCol":"_value","yPos":26},{"axes":[{"base":"10","name":"x","scale":"linear"},{"name":"y","scale":"linear","suffix":"%"}],"colorizeRows":true,"colors":[{"id":"QDwChKZWuQV0BaJcEeSam","name":"Atlantis","type":"scale","hex":"#74D495"},{"id":"ThD0WTqKHltQEVlq9mo6K","name":"Atlantis","type":"scale","hex":"#3F3FBA"},{"id":"FBHYZiwDLKyQK3eRfUD-0","name":"Atlantis","type":"scale","hex":"#FF4D9E"}],"geom":"line","height":4,"hoverDimension":"auto","kind":"Xy","legendColorizeRows":true,"legendOpacity":1,"legendOrientationThreshold":100000000,"name":"System Memory Usage","opacity":1,"orientationThreshold":100000000,"position":"overlaid","queries":[{"query":"hostFilter = (tables=<-) =>\n if v.Host != \"(All)\" then\n tables |> filter(fn: (r) => r[\"host\"] == v.Host)\n else\n tables\n\nroleFilter = (tables=<-) =>\n if v.Role != \"(All)\" then\n tables |> filter(fn: (r) => r[\"role\"] == v.Role)\n else\n tables\n\nfrom(bucket: \"telegraf/so_short_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"mem\")\n |> filter(fn: (r) => r[\"_field\"] == \"used_percent\")\n |> hostFilter()\n |> roleFilter()\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)\n |> group(columns: [\"_field\", \"host\", \"role\"])\n |> yield(name: \"mean\")"},{"query":"hostFilter = (tables=<-) =>\n if v.Host != \"(All)\" then\n tables |> filter(fn: (r) => r[\"host\"] == v.Host)\n else\n tables\n\nroleFilter = (tables=<-) =>\n if v.Role != \"(All)\" then\n tables |> filter(fn: (r) => r[\"role\"] == v.Role)\n else\n tables\n\nfrom(bucket: \"telegraf/so_long_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"mem\")\n |> filter(fn: (r) => r[\"_field\"] == \"used_percent\")\n |> hostFilter()\n |> roleFilter()\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)\n |> group(columns: [\"_field\", \"host\", \"role\"])\n |> yield(name: \"Trend\")"}],"staticLegend":{"colorizeRows":true,"opacity":1,"orientationThreshold":100000000,"widthRatio":1},"width":4,"widthRatio":1,"xCol":"_time","yCol":"_value","yPos":30},{"axes":[{"base":"10","name":"x","scale":"linear"},{"base":"10","name":"y","scale":"linear","suffix":"b/s"}],"colorizeRows":true,"colors":[{"id":"TtgHQAXNep94KBgtu48C_","name":"Cthulhu","type":"scale","hex":"#FDC44F"},{"id":"_IuzkORho_8QXTE6vMllv","name":"Cthulhu","type":"scale","hex":"#007C76"},{"id":"bUszW_YI_9oColDbLNQ-d","name":"Cthulhu","type":"scale","hex":"#8983FF"}],"geom":"line","height":4,"heightRatio":0.18482490272373542,"hoverDimension":"auto","kind":"Xy","legendColorizeRows":true,"legendOpacity":1,"legendOrientationThreshold":100000000,"name":"Monitor Interface Traffic - Inbound","opacity":1,"orientationThreshold":100000000,"position":"overlaid","queries":[{"query":"import \"join\"\n\nhostFilter = (tables=<-) =>\n if v.Host != \"(All)\" then\n tables |> filter(fn: (r) => r[\"host\"] == v.Host)\n else\n tables\n\nroleFilter = (tables=<-) =>\n if v.Role != \"(All)\" then\n tables |> filter(fn: (r) => r[\"role\"] == v.Role)\n else\n tables\n \nmanints = from(bucket: \"telegraf/so_short_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"node_config\")\n |> hostFilter()\n |> filter(fn: (r) => r[\"_field\"] == \"monint\")\n |> distinct()\n |> group(columns: [\"host\"])\n\ntraffic = from(bucket: \"telegraf/so_short_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"net\")\n |> filter(fn: (r) => r[\"_field\"] == \"bytes_recv\")\n |> hostFilter()\n |> roleFilter()\n |> derivative(unit: 1s, nonNegative: true, columns: [\"_value\"], timeColumn: \"_time\")\n |> map(fn: (r) => ({r with \"_value\": r._value * 8.0}))\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)\n |> group(columns: [\"host\"])\n\njoin.inner(left: traffic, right: manints,\n on: (l,r) => l.interface == r._value,\n as: (l, r) => ({l with _value: l._value, result: \"bytes_recv\"}))"},{"query":"import \"join\"\n\nhostFilter = (tables=<-) =>\n if v.Host != \"(All)\" then\n tables |> filter(fn: (r) => r[\"host\"] == v.Host)\n else\n tables\n\nroleFilter = (tables=<-) =>\n if v.Role != \"(All)\" then\n tables |> filter(fn: (r) => r[\"role\"] == v.Role)\n else\n tables\n \nmanints = from(bucket: \"telegraf/so_short_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"node_config\")\n |> hostFilter()\n |> filter(fn: (r) => r[\"_field\"] == \"monint\")\n |> distinct()\n |> group(columns: [\"host\"])\n\ntraffic = from(bucket: \"telegraf/so_long_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"net\")\n |> filter(fn: (r) => r[\"_field\"] == \"bytes_recv\")\n |> hostFilter()\n |> roleFilter()\n |> derivative(unit: 1s, nonNegative: true, columns: [\"_value\"], timeColumn: \"_time\")\n |> map(fn: (r) => ({r with \"_value\": r._value * 8.0}))\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)\n |> group(columns: [\"host\"])\n\njoin.inner(left: traffic, right: manints,\n on: (l,r) => l.interface == r._value,\n as: (l, r) => ({l with _value: l._value, result: \"Trend\"}))"}],"staticLegend":{"colorizeRows":true,"heightRatio":0.18482490272373542,"opacity":1,"orientationThreshold":100000000,"widthRatio":1},"width":4,"widthRatio":1,"xCol":"_time","yCol":"_value","yPos":34},{"axes":[{"base":"10","name":"x","scale":"linear"},{"base":"10","name":"y","scale":"linear","suffix":"b/s"}],"colorizeRows":true,"colors":[{"id":"TtgHQAXNep94KBgtu48C_","name":"Cthulhu","type":"scale","hex":"#FDC44F"},{"id":"_IuzkORho_8QXTE6vMllv","name":"Cthulhu","type":"scale","hex":"#007C76"},{"id":"bUszW_YI_9oColDbLNQ-d","name":"Cthulhu","type":"scale","hex":"#8983FF"}],"geom":"line","height":4,"heightRatio":0.18482490272373542,"hoverDimension":"auto","kind":"Xy","legendColorizeRows":true,"legendOpacity":1,"legendOrientationThreshold":100000000,"name":"Management Interface Traffic - Inbound","opacity":1,"orientationThreshold":100000000,"position":"overlaid","queries":[{"query":"import \"join\"\n\nhostFilter = (tables=<-) =>\n if v.Host != \"(All)\" then\n tables |> filter(fn: (r) => r[\"host\"] == v.Host)\n else\n tables\n\nroleFilter = (tables=<-) =>\n if v.Role != \"(All)\" then\n tables |> filter(fn: (r) => r[\"role\"] == v.Role)\n else\n tables\n\nmanints = from(bucket: \"telegraf/so_short_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"node_config\")\n |> hostFilter()\n |> filter(fn: (r) => r[\"_field\"] == \"manint\")\n |> distinct()\n |> group(columns: [\"host\"])\n\ntraffic = from(bucket: \"telegraf/so_short_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"net\")\n |> filter(fn: (r) => r[\"_field\"] == \"bytes_recv\")\n |> hostFilter()\n |> roleFilter()\n |> derivative(unit: 1s, nonNegative: true, columns: [\"_value\"], timeColumn: \"_time\")\n |> map(fn: (r) => ({r with \"_value\": r._value * 8.0}))\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)\n |> group(columns: [\"host\"])\n\njoin.inner(left: traffic, right: manints,\n on: (l,r) => l.interface == r._value,\n as: (l, r) => ({l with _value: l._value, result: \"bytes_recv\"}))"},{"query":"import \"join\"\n\nhostFilter = (tables=<-) =>\n if v.Host != \"(All)\" then\n tables |> filter(fn: (r) => r[\"host\"] == v.Host)\n else\n tables\n\nroleFilter = (tables=<-) =>\n if v.Role != \"(All)\" then\n tables |> filter(fn: (r) => r[\"role\"] == v.Role)\n else\n tables\n\nmanints = from(bucket: \"telegraf/so_short_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"node_config\")\n |> hostFilter()\n |> filter(fn: (r) => r[\"_field\"] == \"manint\")\n |> distinct()\n |> group(columns: [\"host\"])\n\ntraffic = from(bucket: \"telegraf/so_long_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"net\")\n |> filter(fn: (r) => r[\"_field\"] == \"bytes_recv\")\n |> hostFilter()\n |> roleFilter()\n |> derivative(unit: 1s, nonNegative: true, columns: [\"_value\"], timeColumn: \"_time\")\n |> map(fn: (r) => ({r with \"_value\": r._value * 8.0}))\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)\n |> group(columns: [\"host\"])\n\njoin.inner(left: traffic, right: manints,\n on: (l,r) => l.interface == r._value,\n as: (l, r) => ({l with _value: l._value, result: \"Trend\"}))"}],"staticLegend":{"colorizeRows":true,"heightRatio":0.18482490272373542,"opacity":1,"orientationThreshold":100000000,"widthRatio":1},"width":6,"widthRatio":1,"xCol":"_time","yCol":"_value","yPos":38},{"axes":[{"base":"10","name":"x","scale":"linear"},{"name":"y","scale":"linear","suffix":"%"}],"colorizeRows":true,"colors":[{"id":"TtgHQAXNep94KBgtu48C_","name":"Cthulhu","type":"scale","hex":"#FDC44F"},{"id":"_IuzkORho_8QXTE6vMllv","name":"Cthulhu","type":"scale","hex":"#007C76"},{"id":"bUszW_YI_9oColDbLNQ-d","name":"Cthulhu","type":"scale","hex":"#8983FF"}],"geom":"line","height":4,"hoverDimension":"auto","kind":"Xy","legendColorizeRows":true,"legendOpacity":1,"legendOrientationThreshold":100000000,"name":"Stenographer Packet Loss","opacity":1,"orientationThreshold":100000000,"position":"overlaid","queries":[{"query":"hostFilter = (tables=<-) =>\n if v.Host != \"(All)\" then\n tables |> filter(fn: (r) => r[\"host\"] == v.Host)\n else\n tables\n\nroleFilter = (tables=<-) =>\n if v.Role != \"(All)\" then\n tables |> filter(fn: (r) => r[\"role\"] == v.Role)\n else\n tables\n\nfrom(bucket: \"telegraf/so_short_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"stenodrop\")\n |> filter(fn: (r) => r[\"_field\"] == \"drop\")\n |> hostFilter()\n |> roleFilter()\n |> group(columns: [\"_field\", \"host\", \"role\"])\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)\n |> yield(name: \"mean\")"},{"query":"hostFilter = (tables=<-) =>\n if v.Host != \"(All)\" then\n tables |> filter(fn: (r) => r[\"host\"] == v.Host)\n else\n tables\n\nroleFilter = (tables=<-) =>\n if v.Role != \"(All)\" then\n tables |> filter(fn: (r) => r[\"role\"] == v.Role)\n else\n tables\n\nfrom(bucket: \"telegraf/so_long_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"stenodrop\")\n |> filter(fn: (r) => r[\"_field\"] == \"drop\")\n |> hostFilter()\n |> roleFilter()\n |> group(columns: [\"_field\", \"host\", \"role\"])\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)\n |> yield(name: \"Trend\")"}],"staticLegend":{"colorizeRows":true,"opacity":1,"orientationThreshold":100000000,"widthRatio":1},"width":3,"widthRatio":1,"xCol":"_time","yCol":"_value","yPos":42},{"axes":[{"base":"10","name":"x","scale":"linear"},{"name":"y","scale":"linear","suffix":"%"}],"colorizeRows":true,"colors":[{"id":"3PVw3hQuZUzyar7Js3mMH","name":"Ectoplasm","type":"scale","hex":"#DA6FF1"},{"id":"O34ux-D8Xq_1-eeWRyYYH","name":"Ectoplasm","type":"scale","hex":"#00717A"},{"id":"P04RoKOHBdLdvfrfFbn0F","name":"Ectoplasm","type":"scale","hex":"#ACFF76"}],"geom":"line","height":4,"hoverDimension":"auto","kind":"Xy","legendColorizeRows":true,"legendOpacity":1,"legendOrientationThreshold":100000000,"name":"Disk Usage /","opacity":1,"orientationThreshold":100000000,"position":"overlaid","queries":[{"query":"hostFilter = (tables=<-) =>\n if v.Host != \"(All)\" then\n tables |> filter(fn: (r) => r[\"host\"] == v.Host)\n else\n tables\n\nroleFilter = (tables=<-) =>\n if v.Role != \"(All)\" then\n tables |> filter(fn: (r) => r[\"role\"] == v.Role)\n else\n tables\n\nfrom(bucket: \"telegraf/so_short_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"disk\")\n |> filter(fn: (r) => r[\"_field\"] == \"used_percent\")\n |> filter(fn: (r) => r[\"path\"] == \"/\")\n |> hostFilter()\n |> roleFilter()\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)\n |> group(columns: [\"_field\", \"host\", \"role\"])\n |> yield(name: \"mean\")"},{"query":"hostFilter = (tables=<-) =>\n if v.Host != \"(All)\" then\n tables |> filter(fn: (r) => r[\"host\"] == v.Host)\n else\n tables\n\nroleFilter = (tables=<-) =>\n if v.Role != \"(All)\" then\n tables |> filter(fn: (r) => r[\"role\"] == v.Role)\n else\n tables\n\nfrom(bucket: \"telegraf/so_long_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"disk\")\n |> filter(fn: (r) => r[\"_field\"] == \"used_percent\")\n |> filter(fn: (r) => r[\"path\"] == \"/\")\n |> hostFilter()\n |> roleFilter()\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)\n |> group(columns: [\"_field\", \"host\", \"role\"])\n |> yield(name: \"Trend\")"}],"staticLegend":{"colorizeRows":true,"opacity":1,"orientationThreshold":100000000,"widthRatio":1},"width":4,"widthRatio":1,"xCol":"_time","yCol":"_value","yPos":46},{"colors":[{"id":"base","name":"laser","type":"text","hex":"#00C9FF"}],"decimalPlaces":1,"height":2,"kind":"Single_Stat","name":"5m Load Average","queries":[{"query":"hostFilter = (tables=<-) =>\n if v.Host != \"(All)\" then\n tables |> filter(fn: (r) => r[\"host\"] == v.Host)\n else\n tables\n\nfrom(bucket: \"telegraf/so_short_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"system\")\n |> filter(fn: (r) => r[\"_field\"] == \"load5\")\n |> hostFilter()\n |> group(columns: [\"host\"])\n |> last()\n |> aggregateWindow(every: v.windowPeriod, fn: mean)\n |> highestMax(n:1)"}],"staticLegend":{},"width":1,"xPos":1},{"colors":[{"id":"base","name":"laser","type":"text","hex":"#00C9FF"},{"id":"z83MTSufTrlrCoEPiBXda","name":"tiger","type":"text","hex":"#F48D38","value":1}],"decimalPlaces":0,"height":2,"kind":"Single_Stat","name":"Warning Alarms","queries":[{"query":"from(bucket: \"_monitoring\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"statuses\")\n |> filter(fn: (r) => r[\"_field\"] == \"_message\")\n |> group(columns: [\"_check_id\"])\n |> sort(columns: [\"_time\"])\n |> last()\n |> group()\n |> filter(fn: (r) => r[\"_level\"] == \"warn\")\n |> count()"}],"staticLegend":{},"suffix":" ","width":1,"xPos":1,"yPos":2},{"colors":[{"id":"base","name":"laser","type":"text","hex":"#00C9FF"}],"decimalPlaces":1,"height":2,"kind":"Single_Stat","name":"IO Wait","queries":[{"query":"hostFilter = (tables=<-) =>\n if v.Host != \"(All)\" then\n tables |> filter(fn: (r) => r[\"host\"] == v.Host)\n else\n tables\n \nfrom(bucket: \"telegraf/so_short_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"cpu\")\n |> filter(fn: (r) => r[\"cpu\"] == \"cpu-total\")\n |> filter(fn: (r) => r[\"_field\"] == \"usage_iowait\")\n |> hostFilter()\n |> group(columns: [\"host\"])\n |> last()\n |> aggregateWindow(every: v.windowPeriod, fn: mean)\n |> highestMax(n:1)"}],"staticLegend":{},"suffix":"%","width":1,"xPos":2},{"colors":[{"id":"base","name":"laser","type":"text","hex":"#00C9FF"},{"id":"z83MTSufTrlrCoEPiBXda","name":"laser","type":"text","hex":"#00C9FF","value":1}],"decimalPlaces":0,"height":2,"kind":"Single_Stat","name":"Informative Alarms","queries":[{"query":"from(bucket: \"_monitoring\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"statuses\")\n |> filter(fn: (r) => r[\"_field\"] == \"_message\")\n |> group(columns: [\"_check_id\"])\n |> sort(columns: [\"_time\"])\n |> last()\n |> group()\n |> filter(fn: (r) => r[\"_level\"] == \"info\")\n |> count()"}],"staticLegend":{},"suffix":" ","width":1,"xPos":2,"yPos":2},{"colors":[{"id":"base","name":"laser","type":"text","hex":"#00C9FF"}],"decimalPlaces":0,"height":2,"kind":"Single_Stat","name":"Estimated EPS In","queries":[{"query":"hostFilter = (tables=<-) =>\n if v.Host != \"(All)\" then\n tables |> filter(fn: (r) => r[\"host\"] == v.Host)\n else\n tables\n \nfrom(bucket: \"telegraf/so_short_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"logstash_events\")\n |> filter(fn: (r) => r[\"_field\"] == \"in\")\n |> hostFilter()\n |> derivative(unit: 1s, nonNegative: true, columns: [\"_value\"], timeColumn: \"_time\") \n |> group(columns: [\"host\"])\n |> last()\n |> aggregateWindow(every: v.windowPeriod, fn: mean)\n |> highestMax(n:1)"}],"staticLegend":{},"width":1,"xPos":3},{"colors":[{"id":"0","name":"viridian","type":"min","hex":"#32B08C"},{"id":"5IArg2lDb8KvnphywgUXa","name":"pineapple","type":"threshold","hex":"#FFB94A","value":70},{"id":"yFhH3mtavjuAZh6cEt5lx","name":"fire","type":"threshold","hex":"#DC4E58","value":80},{"id":"1","name":"ruby","type":"max","hex":"#BF3D5E","value":100}],"decimalPlaces":0,"height":4,"kind":"Gauge","name":"CPU Usage","queries":[{"query":"hostFilter = (tables=<-) =>\n if v.Host != \"(All)\" then\n tables |> filter(fn: (r) => r[\"host\"] == v.Host)\n else\n tables\n\nfrom(bucket: \"telegraf/so_short_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"cpu\")\n |> filter(fn: (r) => r[\"cpu\"] == \"cpu-total\")\n |> filter(fn: (r) => r[\"_field\"] == \"usage_idle\")\n |> hostFilter()\n |> group(columns: [\"host\"])\n |> last()\n |> highestMax(n: 1)\n |> map(fn: (r) => ({r with _value: r._value * -1.0 + 100.0}))\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)\n |> yield(name: \"mean\")"}],"staticLegend":{},"suffix":"%","tickSuffix":"%","width":3,"xPos":3,"yPos":2},{"colors":[{"id":"0","name":"viridian","type":"min","hex":"#32B08C"},{"id":"kOQLOg2H4FVEE-E1_L8Kq","name":"laser","type":"threshold","hex":"#00C9FF","value":85},{"id":"5IArg2lDb8KvnphywgUXa","name":"tiger","type":"threshold","hex":"#F48D38","value":90},{"id":"yFhH3mtavjuAZh6cEt5lx","name":"ruby","type":"threshold","hex":"#BF3D5E","value":95},{"id":"1","name":"ruby","type":"max","hex":"#BF3D5E","value":100}],"decimalPlaces":0,"height":4,"kind":"Gauge","name":"Root Disk Usage","queries":[{"query":"hostFilter = (tables=<-) =>\n if v.Host != \"(All)\" then\n tables |> filter(fn: (r) => r[\"host\"] == v.Host)\n else\n tables\n\nfrom(bucket: \"telegraf/so_short_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"disk\")\n |> filter(fn: (r) => r[\"path\"] == \"/\")\n |> filter(fn: (r) => r[\"_field\"] == \"used_percent\")\n |> hostFilter()\n |> group(columns: [\"host\"])\n |> last()\n |> highestMax(n: 1)\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)\n |> yield(name: \"mean\")"}],"staticLegend":{},"suffix":"%","tickSuffix":"%","width":3,"xPos":3,"yPos":6},{"axes":[{"base":"10","name":"x","scale":"linear"},{"name":"y","scale":"linear","suffix":"%"}],"colorizeRows":true,"colors":[{"id":"TtgHQAXNep94KBgtu48C_","name":"Cthulhu","type":"scale","hex":"#FDC44F"},{"id":"_IuzkORho_8QXTE6vMllv","name":"Cthulhu","type":"scale","hex":"#007C76"},{"id":"bUszW_YI_9oColDbLNQ-d","name":"Cthulhu","type":"scale","hex":"#8983FF"}],"geom":"line","height":4,"hoverDimension":"auto","kind":"Xy","legendColorizeRows":true,"legendOpacity":1,"legendOrientationThreshold":100000000,"name":"Suricata Packet Loss","opacity":1,"orientationThreshold":100000000,"position":"overlaid","queries":[{"query":"hostFilter = (tables=<-) =>\n if v.Host != \"(All)\" then\n tables |> filter(fn: (r) => r[\"host\"] == v.Host)\n else\n tables\n\nroleFilter = (tables=<-) =>\n if v.Role != \"(All)\" then\n tables |> filter(fn: (r) => r[\"role\"] == v.Role)\n else\n tables\n\nfrom(bucket: \"telegraf/so_short_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"suridrop\")\n |> filter(fn: (r) => r[\"_field\"] == \"drop\")\n |> hostFilter()\n |> roleFilter()\n |> map(fn: (r) => ({r with _value: r._value * 100.0}))\n |> group(columns: [\"_field\", \"host\", \"role\"])\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)\n |> yield(name: \"mean\")"},{"query":"hostFilter = (tables=<-) =>\n if v.Host != \"(All)\" then\n tables |> filter(fn: (r) => r[\"host\"] == v.Host)\n else\n tables\n\nroleFilter = (tables=<-) =>\n if v.Role != \"(All)\" then\n tables |> filter(fn: (r) => r[\"role\"] == v.Role)\n else\n tables\n\nfrom(bucket: \"telegraf/so_long_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"suridrop\")\n |> filter(fn: (r) => r[\"_field\"] == \"drop\")\n |> hostFilter()\n |> roleFilter()\n |> map(fn: (r) => ({r with _value: r._value * 100.0}))\n |> group(columns: [\"_field\", \"host\", \"role\"])\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)\n |> yield(name: \"Trend\")"}],"staticLegend":{"colorizeRows":true,"opacity":1,"orientationThreshold":100000000,"widthRatio":1},"width":3,"widthRatio":1,"xCol":"_time","xPos":3,"yCol":"_value","yPos":42},{"colors":[{"id":"base","name":"laser","type":"text","hex":"#00C9FF"}],"decimalPlaces":0,"height":2,"kind":"Single_Stat","name":"Redis Queue","queries":[{"query":"from(bucket: \"telegraf/so_short_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"redisqueue\")\n |> filter(fn: (r) => r[\"_field\"] == \"unparsed\")\n |> group(columns: [\"host\"])\n |> last()\n |> aggregateWindow(every: v.windowPeriod, fn: mean)\n |> highestMax(n:1)"}],"staticLegend":{},"width":1,"xPos":4},{"axes":[{"base":"10","name":"x","scale":"linear"},{"base":"10","name":"y","scale":"linear"}],"colorizeRows":true,"colors":[{"id":"xflqbsX-j3iq4ry5QOntK","name":"Do Androids Dream of Electric Sheep?","type":"scale","hex":"#8F8AF4"},{"id":"5H28HcITm6QVfQsXon0vq","name":"Do Androids Dream of Electric Sheep?","type":"scale","hex":"#A51414"},{"id":"25MrINwurNBkQqeKCkMPg","name":"Do Androids Dream of Electric Sheep?","type":"scale","hex":"#F4CF31"}],"geom":"line","height":4,"hoverDimension":"auto","kind":"Xy","legendColorizeRows":true,"legendOpacity":1,"legendOrientationThreshold":100000000,"name":"Elasticsearch Document Count","opacity":1,"orientationThreshold":100000000,"position":"overlaid","queries":[{"query":"import \"join\"\n\nhostFilter = (tables=<-) =>\n if v.Host != \"(All)\" then\n tables |> filter(fn: (r) => r[\"host\"] == v.Host)\n else\n tables\n \nfrom(bucket: \"telegraf/so_short_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"elasticsearch_indices\")\n |> filter(fn: (r) => r[\"_field\"] == \"docs_count\")\n |> filter(fn: (r) => r[\"host\"] == r[\"node_name\"])\n |> hostFilter()\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)\n |> group(columns: [\"_field\",\"host\"])\n |> yield(name: \"mean\")"},{"query":"import \"join\"\n\nhostFilter = (tables=<-) =>\n if v.Host != \"(All)\" then\n tables |> filter(fn: (r) => r[\"host\"] == v.Host)\n else\n tables\n\nfrom(bucket: \"telegraf/so_long_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"elasticsearch_indices\")\n |> filter(fn: (r) => r[\"_field\"] == \"docs_count\")\n |> filter(fn: (r) => r[\"host\"] == r[\"node_name\"])\n |> hostFilter()\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)\n |> group(columns: [\"_field\",\"host\"])\n |> yield(name: \"Trend\")"}],"staticLegend":{"colorizeRows":true,"opacity":1,"orientationThreshold":100000000,"widthRatio":1},"width":4,"widthRatio":1,"xCol":"_time","xPos":4,"yCol":"_value","yPos":10},{"axes":[{"base":"10","name":"x","scale":"linear"},{"base":"10","name":"y","scale":"linear"}],"colorizeRows":true,"colors":[{"id":"xflqbsX-j3iq4ry5QOntK","name":"Do Androids Dream of Electric Sheep?","type":"scale","hex":"#8F8AF4"},{"id":"5H28HcITm6QVfQsXon0vq","name":"Do Androids Dream of Electric Sheep?","type":"scale","hex":"#A51414"},{"id":"25MrINwurNBkQqeKCkMPg","name":"Do Androids Dream of Electric Sheep?","type":"scale","hex":"#F4CF31"}],"geom":"line","height":4,"heightRatio":0.301556420233463,"hoverDimension":"auto","kind":"Xy","legendColorizeRows":true,"legendOpacity":1,"legendOrientationThreshold":100000000,"name":"Redis Queue","opacity":1,"orientationThreshold":100000000,"position":"overlaid","queries":[{"query":"from(bucket: \"telegraf/so_short_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"redisqueue\")\n |> filter(fn: (r) => r[\"_field\"] == \"unparsed\")\n |> group(columns: [\"host\", \"_field\"])\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)\n |> yield(name: \"mean\")"},{"query":"from(bucket: \"telegraf/so_long_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"redisqueue\")\n |> filter(fn: (r) => r[\"_field\"] == \"unparsed\")\n |> group(columns: [\"host\", \"_field\"])\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)\n |> yield(name: \"Trend\")"}],"staticLegend":{"colorizeRows":true,"heightRatio":0.301556420233463,"opacity":1,"orientationThreshold":100000000,"widthRatio":1},"width":4,"widthRatio":1,"xCol":"_time","xPos":4,"yCol":"_value","yPos":14},{"axes":[{"base":"10","name":"x","scale":"linear"},{"name":"y","scale":"linear","suffix":" days"}],"colorizeRows":true,"colors":[{"id":"sW2GqpGAsGB5Adx16jKjp","name":"Nineteen Eighty Four","type":"scale","hex":"#31C0F6"},{"id":"TsdXuXwdI5Npi9S8L4f-i","name":"Nineteen Eighty Four","type":"scale","hex":"#A500A5"},{"id":"OGL29-SUbJ6FyQb0JzbaD","name":"Nineteen Eighty Four","type":"scale","hex":"#FF7E27"}],"geom":"line","height":4,"hoverDimension":"auto","kind":"Xy","legendColorizeRows":true,"legendOpacity":1,"legendOrientationThreshold":100000000,"name":"Container Uptime","opacity":1,"orientationThreshold":100000000,"position":"overlaid","queries":[{"query":"containerFilter = (tables=<-) =>\n if v.Container != \"(All)\" then\n tables |> filter(fn: (r) => r[\"container_name\"] == v.Container)\n else\n tables\n\nhostFilter = (tables=<-) =>\n if v.Host != \"(All)\" then\n tables |> filter(fn: (r) => r[\"host\"] == v.Host)\n else\n tables\n\nroleFilter = (tables=<-) =>\n if v.Role != \"(All)\" then\n tables |> filter(fn: (r) => r[\"role\"] == v.Role)\n else\n tables\n\nfrom(bucket: \"telegraf/so_short_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"docker_container_status\")\n |> filter(fn: (r) => r[\"_field\"] == \"uptime_ns\")\n |> filter(fn: (r) => r[\"container_status\"] == \"running\")\n |> hostFilter()\n |> roleFilter()\n |> containerFilter()\n |> group(columns: [\"host\", \"role\", \"container_name\"])\n |> sort(columns: [\"_time\"])\n |> map(fn: (r) => ({r with _value: float(v: r._value) / float(v: 24 * 60 * 60 * 1000000000)}))\n |> yield(name: \"last\")"},{"query":"containerFilter = (tables=<-) =>\n if v.Container != \"(All)\" then\n tables |> filter(fn: (r) => r[\"container_name\"] == v.Container)\n else\n tables\n\nhostFilter = (tables=<-) =>\n if v.Host != \"(All)\" then\n tables |> filter(fn: (r) => r[\"host\"] == v.Host)\n else\n tables\n\nroleFilter = (tables=<-) =>\n if v.Role != \"(All)\" then\n tables |> filter(fn: (r) => r[\"role\"] == v.Role)\n else\n tables\n\nfrom(bucket: \"telegraf/so_long_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"docker_container_status\")\n |> filter(fn: (r) => r[\"_field\"] == \"uptime_ns\")\n |> filter(fn: (r) => r[\"container_status\"] == \"running\")\n |> hostFilter()\n |> roleFilter()\n |> containerFilter()\n |> group(columns: [\"host\", \"role\", \"container_name\"])\n |> sort(columns: [\"_time\"])\n |> map(fn: (r) => ({r with _value: float(v: r._value) / float(v: 24.0 * 60.0 * 60.0 * 1000000000.0)}))\n |> yield(name: \"Trend\")"}],"staticLegend":{"colorizeRows":true,"opacity":1,"orientationThreshold":100000000,"widthRatio":1},"width":4,"widthRatio":1,"xCol":"_time","xPos":4,"yCol":"_value","yPos":18},{"axes":[{"base":"10","name":"x","scale":"linear"},{"base":"10","name":"y","scale":"linear"}],"colorizeRows":true,"colors":[{"id":"base","name":"laser","type":"text","hex":"#00C9FF"}],"decimalPlaces":0,"height":2,"hoverDimension":"auto","kind":"Single_Stat_Plus_Line","legendColorizeRows":true,"legendOpacity":1,"legendOrientationThreshold":100000000,"name":"Kafka Active Controllers","opacity":1,"orientationThreshold":100000000,"position":"overlaid","queries":[{"query":"from(bucket: \"telegraf/so_short_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"kafka_controller\")\n |> filter(fn: (r) => r[\"_field\"] == \"ActiveControllerCount.Value\")\n |> aggregateWindow(every: v.windowPeriod, fn: last, createEmpty: false)\n |> group(columns: [\"_field\", \"host\", \"role\"])\n |> yield(name: \"current\")"},{"query":"from(bucket: \"telegraf/so_long_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"kafka_controller\")\n |> filter(fn: (r) => r[\"_field\"] == \"ActiveControllerCount.Value\")\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)\n |> group(columns: [\"_field\", \"host\", \"role\"])\n |> yield(name: \"trend\")"}],"staticLegend":{"colorizeRows":true,"opacity":1,"orientationThreshold":100000000,"widthRatio":1},"width":4,"widthRatio":1,"xCol":"_time","xPos":4,"yCol":"_value","yPos":22},{"axes":[{"base":"10","name":"x","scale":"linear"},{"base":"10","name":"y","scale":"linear"}],"colorizeRows":true,"colors":[{"id":"base","name":"laser","type":"text","hex":"#00C9FF"}],"decimalPlaces":0,"height":2,"hoverDimension":"auto","kind":"Single_Stat_Plus_Line","legendColorizeRows":true,"legendOpacity":1,"legendOrientationThreshold":100000000,"name":"Kafka Active Brokers","opacity":1,"orientationThreshold":100000000,"position":"overlaid","queries":[{"query":"from(bucket: \"telegraf/so_long_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"kafka_controller\")\n |> filter(fn: (r) => r[\"_field\"] == \"ActiveBrokerCount.Value\")\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)\n |> group(columns: [\"_field\", \"host\", \"role\"])\n |> yield(name: \"trend\")"},{"query":"from(bucket: \"telegraf/so_short_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"kafka_controller\")\n |> filter(fn: (r) => r[\"_field\"] == \"ActiveBrokerCount.Value\")\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)\n |> group(columns: [\"_field\", \"host\", \"role\"])\n |> yield(name: \"current\")"}],"staticLegend":{"colorizeRows":true,"opacity":1,"orientationThreshold":100000000,"widthRatio":1},"width":4,"widthRatio":1,"xCol":"_time","xPos":4,"yCol":"_value","yPos":24},{"axes":[{"base":"10","name":"x","scale":"linear"},{"name":"y","scale":"linear","suffix":"%"}],"colorizeRows":true,"colors":[{"id":"yT5vTIlaaFChSrQvKLfqf","name":"Nineteen Eighty Four","type":"scale","hex":"#31C0F6"},{"id":"mzzUVSu3ibTph1JmQmDAQ","name":"Nineteen Eighty Four","type":"scale","hex":"#A500A5"},{"id":"mOcnDo7l8ii6qNLFIB5rs","name":"Nineteen Eighty Four","type":"scale","hex":"#FF7E27"}],"geom":"line","height":4,"hoverDimension":"auto","kind":"Xy","legendColorizeRows":true,"legendOpacity":1,"legendOrientationThreshold":100000000,"name":"Container CPU Usage","opacity":1,"orientationThreshold":100000000,"position":"overlaid","queries":[{"query":"containerFilter = (tables=<-) =>\n if v.Container != \"(All)\" then\n tables |> filter(fn: (r) => r[\"container_name\"] == v.Container)\n else\n tables\n\nhostFilter = (tables=<-) =>\n if v.Host != \"(All)\" then\n tables |> filter(fn: (r) => r[\"host\"] == v.Host)\n else\n tables\n\nroleFilter = (tables=<-) =>\n if v.Role != \"(All)\" then\n tables |> filter(fn: (r) => r[\"role\"] == v.Role)\n else\n tables\n\nfrom(bucket: \"telegraf/so_short_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"docker_container_cpu\")\n |> filter(fn: (r) => r[\"_field\"] == \"usage_percent\")\n |> filter(fn: (r) => r[\"container_status\"] == \"running\")\n |> hostFilter()\n |> roleFilter()\n |> containerFilter()\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)\n |> group(columns: [\"_field\", \"host\", \"role\", \"container_name\"])\n |> sort(columns: [\"_time\"])\n |> yield(name: \"mean\")"},{"query":"containerFilter = (tables=<-) =>\n if v.Container != \"(All)\" then\n tables |> filter(fn: (r) => r[\"container_name\"] == v.Container)\n else\n tables\n\nhostFilter = (tables=<-) =>\n if v.Host != \"(All)\" then\n tables |> filter(fn: (r) => r[\"host\"] == v.Host)\n else\n tables\n\nroleFilter = (tables=<-) =>\n if v.Role != \"(All)\" then\n tables |> filter(fn: (r) => r[\"role\"] == v.Role)\n else\n tables\n\nfrom(bucket: \"telegraf/so_long_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"docker_container_cpu\")\n |> filter(fn: (r) => r[\"_field\"] == \"usage_percent\")\n |> filter(fn: (r) => r[\"container_status\"] == \"running\")\n |> hostFilter()\n |> roleFilter()\n |> containerFilter()\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)\n |> group(columns: [\"_field\", \"host\", \"role\", \"container_name\"])\n |> sort(columns: [\"_time\"])\n |> yield(name: \"Trend\")"}],"staticLegend":{"colorizeRows":true,"opacity":1,"orientationThreshold":100000000,"widthRatio":1},"width":4,"widthRatio":1,"xCol":"_time","xPos":4,"yCol":"_value","yPos":26},{"axes":[{"base":"10","name":"x","scale":"linear"},{"name":"y","scale":"linear","suffix":"%"}],"colorizeRows":true,"colors":[{"id":"QDwChKZWuQV0BaJcEeSam","name":"Atlantis","type":"scale","hex":"#74D495"},{"id":"ThD0WTqKHltQEVlq9mo6K","name":"Atlantis","type":"scale","hex":"#3F3FBA"},{"id":"FBHYZiwDLKyQK3eRfUD-0","name":"Atlantis","type":"scale","hex":"#FF4D9E"}],"geom":"line","height":4,"hoverDimension":"auto","kind":"Xy","legendColorizeRows":true,"legendOpacity":1,"legendOrientationThreshold":100000000,"name":"Container Memory Usage","opacity":1,"orientationThreshold":100000000,"position":"overlaid","queries":[{"query":"containerFilter = (tables=<-) =>\n if v.Container != \"(All)\" then\n tables |> filter(fn: (r) => r[\"container_name\"] == v.Container)\n else\n tables\n\nhostFilter = (tables=<-) =>\n if v.Host != \"(All)\" then\n tables |> filter(fn: (r) => r[\"host\"] == v.Host)\n else\n tables\n\nroleFilter = (tables=<-) =>\n if v.Role != \"(All)\" then\n tables |> filter(fn: (r) => r[\"role\"] == v.Role)\n else\n tables\n\nfrom(bucket: \"telegraf/so_short_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"docker_container_mem\")\n |> filter(fn: (r) => r[\"_field\"] == \"usage_percent\")\n |> filter(fn: (r) => r[\"container_status\"] == \"running\")\n |> hostFilter()\n |> roleFilter()\n |> containerFilter()\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)\n |> group(columns: [\"_field\", \"host\", \"role\", \"container_name\"])\n |> sort(columns: [\"_time\"])\n |> yield(name: \"mean\")"},{"query":"containerFilter = (tables=<-) =>\n if v.Container != \"(All)\" then\n tables |> filter(fn: (r) => r[\"container_name\"] == v.Container)\n else\n tables\n\nhostFilter = (tables=<-) =>\n if v.Host != \"(All)\" then\n tables |> filter(fn: (r) => r[\"host\"] == v.Host)\n else\n tables\n\nroleFilter = (tables=<-) =>\n if v.Role != \"(All)\" then\n tables |> filter(fn: (r) => r[\"role\"] == v.Role)\n else\n tables\n\nfrom(bucket: \"telegraf/so_long_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"docker_container_mem\")\n |> filter(fn: (r) => r[\"_field\"] == \"usage_percent\")\n |> filter(fn: (r) => r[\"container_status\"] == \"running\")\n |> hostFilter()\n |> roleFilter()\n |> containerFilter()\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)\n |> group(columns: [\"_field\", \"host\", \"role\", \"container_name\"])\n |> sort(columns: [\"_time\"])\n |> yield(name: \"Trend\")"}],"staticLegend":{"colorizeRows":true,"opacity":1,"orientationThreshold":100000000,"widthRatio":1},"width":4,"widthRatio":1,"xCol":"_time","xPos":4,"yCol":"_value","yPos":30},{"axes":[{"base":"10","name":"x","scale":"linear"},{"base":"10","name":"y","scale":"linear","suffix":"b"}],"colorizeRows":true,"colors":[{"id":"0ynR6Zs0wuQ3WY0Lz-_KC","name":"Cthulhu","type":"scale","hex":"#FDC44F"},{"id":"YiArehCNBwFm9mn8DSXSG","name":"Cthulhu","type":"scale","hex":"#007C76"},{"id":"DxByY_EQW9Xs2jD5ktkG5","name":"Cthulhu","type":"scale","hex":"#8983FF"}],"geom":"line","height":4,"hoverDimension":"auto","kind":"Xy","legendColorizeRows":true,"legendOpacity":1,"legendOrientationThreshold":100000000,"name":"Container Traffic - Inbound","opacity":1,"orientationThreshold":100000000,"position":"overlaid","queries":[{"query":"containerFilter = (tables=<-) =>\n if v.Container != \"(All)\" then\n tables |> filter(fn: (r) => r[\"container_name\"] == v.Container)\n else\n tables\n\nhostFilter = (tables=<-) =>\n if v.Host != \"(All)\" then\n tables |> filter(fn: (r) => r[\"host\"] == v.Host)\n else\n tables\n\nroleFilter = (tables=<-) =>\n if v.Role != \"(All)\" then\n tables |> filter(fn: (r) => r[\"role\"] == v.Role)\n else\n tables\n\nfrom(bucket: \"telegraf/so_short_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"docker_container_net\")\n |> filter(fn: (r) => r[\"_field\"] == \"rx_bytes\")\n |> hostFilter()\n |> roleFilter()\n |> containerFilter()\n |> derivative(unit: 1s, nonNegative: true, columns: [\"_value\"], timeColumn: \"_time\") \n |> map(fn: (r) => ({r with _value: r._value * 8.0}))\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)\n |> group(columns: [\"_field\", \"host\", \"role\", \"container_name\"])\n |> sort(columns: [\"_time\"])\n |> yield(name: \"mean\")"},{"query":"containerFilter = (tables=<-) =>\n if v.Container != \"(All)\" then\n tables |> filter(fn: (r) => r[\"container_name\"] == v.Container)\n else\n tables\n\nhostFilter = (tables=<-) =>\n if v.Host != \"(All)\" then\n tables |> filter(fn: (r) => r[\"host\"] == v.Host)\n else\n tables\n\nroleFilter = (tables=<-) =>\n if v.Role != \"(All)\" then\n tables |> filter(fn: (r) => r[\"role\"] == v.Role)\n else\n tables\n\nfrom(bucket: \"telegraf/so_long_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"docker_container_net\")\n |> filter(fn: (r) => r[\"_field\"] == \"rx_bytes\")\n |> hostFilter()\n |> roleFilter()\n |> containerFilter()\n |> derivative(unit: 1s, nonNegative: true, columns: [\"_value\"], timeColumn: \"_time\")\n |> map(fn: (r) => ({r with _value: r._value * 8.0}))\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)\n |> group(columns: [\"_field\", \"host\", \"role\", \"container_name\"])\n |> sort(columns: [\"_time\"])\n |> yield(name: \"Trend\")"}],"staticLegend":{"colorizeRows":true,"opacity":1,"orientationThreshold":100000000,"widthRatio":1},"width":4,"widthRatio":1,"xCol":"_time","xPos":4,"yCol":"_value","yPos":34},{"axes":[{"base":"10","name":"x","scale":"linear"},{"name":"y","scale":"linear","suffix":"%"}],"colorizeRows":true,"colors":[{"id":"3PVw3hQuZUzyar7Js3mMH","name":"Ectoplasm","type":"scale","hex":"#DA6FF1"},{"id":"O34ux-D8Xq_1-eeWRyYYH","name":"Ectoplasm","type":"scale","hex":"#00717A"},{"id":"P04RoKOHBdLdvfrfFbn0F","name":"Ectoplasm","type":"scale","hex":"#ACFF76"}],"geom":"line","height":4,"hoverDimension":"auto","kind":"Xy","legendColorizeRows":true,"legendOpacity":1,"legendOrientationThreshold":100000000,"name":"Disk Usage /nsm","opacity":1,"orientationThreshold":100000000,"position":"overlaid","queries":[{"query":"hostFilter = (tables=<-) =>\n if v.Host != \"(All)\" then\n tables |> filter(fn: (r) => r[\"host\"] == v.Host)\n else\n tables\n\nroleFilter = (tables=<-) =>\n if v.Role != \"(All)\" then\n tables |> filter(fn: (r) => r[\"role\"] == v.Role)\n else\n tables\n\nfrom(bucket: \"telegraf/so_short_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"disk\")\n |> filter(fn: (r) => r[\"_field\"] == \"used_percent\")\n |> filter(fn: (r) => r[\"path\"] == \"/nsm\")\n |> hostFilter()\n |> roleFilter()\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)\n |> group(columns: [\"_field\", \"host\", \"role\"])\n |> yield(name: \"mean\")"},{"query":"hostFilter = (tables=<-) =>\n if v.Host != \"(All)\" then\n tables |> filter(fn: (r) => r[\"host\"] == v.Host)\n else\n tables\n\nroleFilter = (tables=<-) =>\n if v.Role != \"(All)\" then\n tables |> filter(fn: (r) => r[\"role\"] == v.Role)\n else\n tables\n\nfrom(bucket: \"telegraf/so_long_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"disk\")\n |> filter(fn: (r) => r[\"_field\"] == \"used_percent\")\n |> filter(fn: (r) => r[\"path\"] == \"/nsm\")\n |> hostFilter()\n |> roleFilter()\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)\n |> group(columns: [\"_field\", \"host\", \"role\"])\n |> yield(name: \"Trend\")"}],"staticLegend":{"colorizeRows":true,"opacity":1,"orientationThreshold":100000000,"widthRatio":1},"width":4,"widthRatio":1,"xPos":4,"yPos":46},{"colors":[{"id":"base","name":"laser","type":"text","hex":"#00C9FF"}],"decimalPlaces":1,"height":2,"kind":"Single_Stat","name":"Inbound Traffic","queries":[{"query":"from(bucket: \"telegraf/so_short_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"net\")\n |> filter(fn: (r) => r[\"_field\"] == \"bytes_recv\") \n |> derivative(unit: 1s, nonNegative: true, columns: [\"_value\"], timeColumn: \"_time\") \n |> map(fn: (r) => ({r with _value: r._value * 8.0 / (1000.0 * 1000.0)}))\n |> group(columns: [\"host\"])\n |> aggregateWindow(every: v.windowPeriod, fn: mean)\n |> last()\n |> highestMax(n:1)"}],"staticLegend":{},"suffix":" Mb/s","width":1,"xPos":5},{"colors":[{"id":"base","name":"laser","type":"text","hex":"#00C9FF"}],"decimalPlaces":1,"height":2,"kind":"Single_Stat","name":"Inbound Drops","queries":[{"query":"from(bucket: \"telegraf/so_short_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"net\")\n |> filter(fn: (r) => r[\"_field\"] == \"drop_in\") \n |> derivative(unit: 1s, nonNegative: true, columns: [\"_value\"], timeColumn: \"_time\") \n |> map(fn: (r) => ({r with _value: r._value * 8.0 / (1000.0 * 1000.0)}))\n |> group(columns: [\"host\"])\n |> aggregateWindow(every: v.windowPeriod, fn: mean)\n |> last()\n |> highestMax(n:1)"}],"staticLegend":{},"suffix":" Mb/s","width":1,"xPos":6},{"colors":[{"id":"0","name":"viridian","type":"min","hex":"#32B08C"},{"id":"5IArg2lDb8KvnphywgUXa","name":"pineapple","type":"threshold","hex":"#FFB94A","value":70},{"id":"yFhH3mtavjuAZh6cEt5lx","name":"fire","type":"threshold","hex":"#DC4E58","value":80},{"id":"1","name":"ruby","type":"max","hex":"#BF3D5E","value":100}],"decimalPlaces":0,"height":4,"kind":"Gauge","name":"Memory Usage","queries":[{"query":"hostFilter = (tables=<-) =>\n if v.Host != \"(All)\" then\n tables |> filter(fn: (r) => r[\"host\"] == v.Host)\n else\n tables\n\nfrom(bucket: \"telegraf/so_short_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"mem\")\n |> filter(fn: (r) => r[\"_field\"] == \"used_percent\")\n |> hostFilter()\n |> group(columns: [\"host\"])\n |> last()\n |> highestMax(n: 1)\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)\n |> yield(name: \"mean\")"}],"staticLegend":{},"suffix":"%","tickSuffix":"%","width":3,"xPos":6,"yPos":2},{"colors":[{"id":"0","name":"viridian","type":"min","hex":"#32B08C"},{"id":"5IArg2lDb8KvnphywgUXa","name":"laser","type":"threshold","hex":"#00C9FF","value":85},{"id":"yFhH3mtavjuAZh6cEt5lx","name":"tiger","type":"threshold","hex":"#F48D38","value":90},{"id":"H7uprvKmMEh39en6X-ms_","name":"ruby","type":"threshold","hex":"#BF3D5E","value":95},{"id":"1","name":"ruby","type":"max","hex":"#BF3D5E","value":100}],"decimalPlaces":0,"height":4,"kind":"Gauge","name":"NSM Disk Usage","queries":[{"query":"hostFilter = (tables=<-) =>\n if v.Host != \"(All)\" then\n tables |> filter(fn: (r) => r[\"host\"] == v.Host)\n else\n tables\n\nfrom(bucket: \"telegraf/so_short_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"disk\")\n |> filter(fn: (r) => r[\"path\"] == \"/nsm\")\n |> filter(fn: (r) => r[\"_field\"] == \"used_percent\")\n |> hostFilter()\n |> group(columns: [\"host\"])\n |> last()\n |> highestMax(n: 1)\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)\n |> yield(name: \"mean\")"}],"staticLegend":{},"suffix":"%","tickSuffix":"%","width":3,"xPos":6,"yPos":6},{"axes":[{"base":"10","name":"x","scale":"linear"},{"base":"10","name":"y","scale":"linear","suffix":"b/s"}],"colorizeRows":true,"colors":[{"id":"TtgHQAXNep94KBgtu48C_","name":"Cthulhu","type":"scale","hex":"#FDC44F"},{"id":"_IuzkORho_8QXTE6vMllv","name":"Cthulhu","type":"scale","hex":"#007C76"},{"id":"bUszW_YI_9oColDbLNQ-d","name":"Cthulhu","type":"scale","hex":"#8983FF"}],"geom":"line","height":4,"heightRatio":0.18482490272373542,"hoverDimension":"auto","kind":"Xy","legendColorizeRows":true,"legendOpacity":1,"legendOrientationThreshold":100000000,"name":"Management Interface Traffic - Outbound","opacity":1,"orientationThreshold":100000000,"position":"overlaid","queries":[{"query":"import \"join\"\n\nhostFilter = (tables=<-) =>\n if v.Host != \"(All)\" then\n tables |> filter(fn: (r) => r[\"host\"] == v.Host)\n else\n tables\n\nroleFilter = (tables=<-) =>\n if v.Role != \"(All)\" then\n tables |> filter(fn: (r) => r[\"role\"] == v.Role)\n else\n tables\n \nmanints = from(bucket: \"telegraf/so_short_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"node_config\")\n |> hostFilter()\n |> filter(fn: (r) => r[\"_field\"] == \"manint\")\n |> distinct()\n |> group(columns: [\"host\"])\n\ntraffic = from(bucket: \"telegraf/so_short_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"net\")\n |> filter(fn: (r) => r[\"_field\"] == \"bytes_sent\")\n |> hostFilter()\n |> roleFilter()\n |> derivative(unit: 1s, nonNegative: true, columns: [\"_value\"], timeColumn: \"_time\")\n |> map(fn: (r) => ({r with \"_value\": r._value * 8.0}))\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)\n |> group(columns: [\"host\"])\n \n\njoin.inner(left: traffic, right: manints,\n on: (l,r) => l.interface == r._value,\n as: (l, r) => ({l with _value: l._value, result: \"bytes_sent\"}))"},{"query":"import \"join\"\n\nhostFilter = (tables=<-) =>\n if v.Host != \"(All)\" then\n tables |> filter(fn: (r) => r[\"host\"] == v.Host)\n else\n tables\n\nroleFilter = (tables=<-) =>\n if v.Role != \"(All)\" then\n tables |> filter(fn: (r) => r[\"role\"] == v.Role)\n else\n tables\n \nmanints = from(bucket: \"telegraf/so_short_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"node_config\")\n |> hostFilter()\n |> filter(fn: (r) => r[\"_field\"] == \"manint\")\n |> distinct()\n |> group(columns: [\"host\"])\n\ntraffic = from(bucket: \"telegraf/so_long_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"net\")\n |> filter(fn: (r) => r[\"_field\"] == \"bytes_sent\")\n |> hostFilter()\n |> roleFilter()\n |> derivative(unit: 1s, nonNegative: true, columns: [\"_value\"], timeColumn: \"_time\")\n |> map(fn: (r) => ({r with \"_value\": r._value * 8.0}))\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)\n |> group(columns: [\"host\"])\n \n\njoin.inner(left: traffic, right: manints,\n on: (l,r) => l.interface == r._value,\n as: (l, r) => ({l with _value: l._value, result: \"Trend\"}))"}],"staticLegend":{"colorizeRows":true,"heightRatio":0.18482490272373542,"opacity":1,"orientationThreshold":100000000,"widthRatio":1},"width":6,"widthRatio":1,"xCol":"_time","xPos":6,"yCol":"_value","yPos":38},{"axes":[{"base":"10","name":"x","scale":"linear"},{"name":"y","scale":"linear","suffix":"%"}],"colorizeRows":true,"colors":[{"id":"TtgHQAXNep94KBgtu48C_","name":"Cthulhu","type":"scale","hex":"#FDC44F"},{"id":"_IuzkORho_8QXTE6vMllv","name":"Cthulhu","type":"scale","hex":"#007C76"},{"id":"bUszW_YI_9oColDbLNQ-d","name":"Cthulhu","type":"scale","hex":"#8983FF"}],"geom":"line","height":4,"hoverDimension":"auto","kind":"Xy","legendColorizeRows":true,"legendOpacity":1,"legendOrientationThreshold":100000000,"name":"Zeek Packet Loss","opacity":1,"orientationThreshold":100000000,"position":"overlaid","queries":[{"query":"hostFilter = (tables=<-) =>\n if v.Host != \"(All)\" then\n tables |> filter(fn: (r) => r[\"host\"] == v.Host)\n else\n tables\n\nroleFilter = (tables=<-) =>\n if v.Role != \"(All)\" then\n tables |> filter(fn: (r) => r[\"role\"] == v.Role)\n else\n tables\n\nfrom(bucket: \"telegraf/so_short_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"zeekdrop\")\n |> filter(fn: (r) => r[\"_field\"] == \"drop\")\n |> hostFilter()\n |> roleFilter()\n |> map(fn: (r) => ({r with _value: r._value * 100.0}))\n |> group(columns: [\"_field\", \"host\", \"role\"])\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)\n |> yield(name: \"mean\")"},{"query":"hostFilter = (tables=<-) =>\n if v.Host != \"(All)\" then\n tables |> filter(fn: (r) => r[\"host\"] == v.Host)\n else\n tables\n\nroleFilter = (tables=<-) =>\n if v.Role != \"(All)\" then\n tables |> filter(fn: (r) => r[\"role\"] == v.Role)\n else\n tables\n\nfrom(bucket: \"telegraf/so_long_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"zeekdrop\")\n |> filter(fn: (r) => r[\"_field\"] == \"drop\")\n |> hostFilter()\n |> roleFilter()\n |> map(fn: (r) => ({r with _value: r._value * 100.0}))\n |> group(columns: [\"_field\", \"host\", \"role\"])\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)\n |> yield(name: \"Trend\")"}],"staticLegend":{"colorizeRows":true,"opacity":1,"orientationThreshold":100000000,"widthRatio":1},"width":3,"widthRatio":1,"xCol":"_time","xPos":6,"yCol":"_value","yPos":42},{"colors":[{"id":"base","name":"laser","type":"text","hex":"#00C9FF"}],"decimalPlaces":1,"height":2,"kind":"Single_Stat","name":"Capture Loss","queries":[{"query":"hostFilter = (tables=<-) =>\n if v.Host != \"(All)\" then\n tables |> filter(fn: (r) => r[\"host\"] == v.Host)\n else\n tables\n\nfrom(bucket: \"telegraf/so_short_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"zeekcaptureloss\")\n |> filter(fn: (r) => r[\"_field\"] == \"loss\")\n |> hostFilter()\n |> group(columns: [\"host\"])\n |> last()\n |> aggregateWindow(every: v.windowPeriod, fn: mean)\n |> highestMax(n:1)"}],"staticLegend":{},"suffix":"%","width":1,"xPos":7},{"colors":[{"id":"base","name":"laser","type":"text","hex":"#00C9FF"}],"decimalPlaces":1,"height":2,"kind":"Single_Stat","name":"Zeek Loss","queries":[{"query":"hostFilter = (tables=<-) =>\n if v.Host != \"(All)\" then\n tables |> filter(fn: (r) => r[\"host\"] == v.Host)\n else\n tables\n \nfrom(bucket: \"telegraf/so_short_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"zeekdrop\")\n |> filter(fn: (r) => r[\"_field\"] == \"drop\")\n |> map(fn: (r) => ({r with _value: r._value * 100.0}))\n |> hostFilter()\n |> group(columns: [\"host\"])\n |> last()\n |> aggregateWindow(every: v.windowPeriod, fn: mean)\n |> highestMax(n:1)"}],"staticLegend":{},"suffix":"%","width":1,"xPos":8},{"axes":[{"base":"10","name":"x","scale":"linear"},{"base":"10","name":"y","scale":"linear","suffix":"s"}],"colorizeRows":true,"colors":[{"id":"xflqbsX-j3iq4ry5QOntK","name":"Do Androids Dream of Electric Sheep?","type":"scale","hex":"#8F8AF4"},{"id":"5H28HcITm6QVfQsXon0vq","name":"Do Androids Dream of Electric Sheep?","type":"scale","hex":"#A51414"},{"id":"25MrINwurNBkQqeKCkMPg","name":"Do Androids Dream of Electric Sheep?","type":"scale","hex":"#F4CF31"}],"geom":"line","height":4,"heightRatio":0.301556420233463,"hoverDimension":"auto","kind":"Xy","legendColorizeRows":true,"legendOpacity":1,"legendOrientationThreshold":100000000,"name":"Elastic Ingest Time Spent","opacity":1,"orientationThreshold":100000000,"position":"overlaid","queries":[{"query":"from(bucket: \"telegraf/so_short_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"elasticsearch_clusterstats_nodes\")\n |> filter(fn: (r) => r.role == \"standalone\" or r.role == \"eval\" or r.role == \"import\" or r.role == \"manager\" or r.role == \"managersearch\" or r.role == \"search\" or r.role == \"node\" or r.role == \"heavynode\")\n |> filter(fn: (r) => r[\"_field\"] == \"ingest_processor_stats_community_id_time_in_millis\")\n |> derivative(unit: 1s, nonNegative: true, columns: [\"_value\"], timeColumn: \"_time\") \n |> group(columns: [\"host\", \"role\"])\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)\n |> yield(name: \"community.id_time\")"},{"query":"from(bucket: \"telegraf/so_short_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"elasticsearch_clusterstats_nodes\")\n |> filter(fn: (r) => r.role == \"standalone\" or r.role == \"eval\" or r.role == \"import\" or r.role == \"manager\" or r.role == \"managersearch\" or r.role == \"search\" or r.role == \"node\" or r.role == \"heavynode\")\n |> filter(fn: (r) => r[\"_field\"] == \"ingest_processor_stats_conditional_time_in_millis\")\n |> derivative(unit: 1s, nonNegative: true, columns: [\"_value\"], timeColumn: \"_time\") \n |> group(columns: [\"host\", \"role\"])\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)\n |> yield(name: \"conditional_time\")"},{"query":"from(bucket: \"telegraf/so_short_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"elasticsearch_clusterstats_nodes\")\n |> filter(fn: (r) => r.role == \"standalone\" or r.role == \"eval\" or r.role == \"import\" or r.role == \"manager\" or r.role == \"managersearch\" or r.role == \"search\" or r.role == \"node\" or r.role == \"heavynode\")\n |> filter(fn: (r) => r[\"_field\"] == \"ingest_processor_stats_date_index_name_time_in_millis\")\n |> derivative(unit: 1s, nonNegative: true, columns: [\"_value\"], timeColumn: \"_time\") \n |> group(columns: [\"host\", \"role\"])\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)\n |> yield(name: \"date.index.name_time\")"},{"query":"from(bucket: \"telegraf/so_short_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"elasticsearch_clusterstats_nodes\")\n |> filter(fn: (r) => r.role == \"standalone\" or r.role == \"eval\" or r.role == \"import\" or r.role == \"manager\" or r.role == \"managersearch\" or r.role == \"search\" or r.role == \"node\" or r.role == \"heavynode\")\n |> filter(fn: (r) => r[\"_field\"] == \"ingest_processor_stats_date_time_in_millis\")\n |> derivative(unit: 1s, nonNegative: true, columns: [\"_value\"], timeColumn: \"_time\") \n |> group(columns: [\"host\", \"role\"])\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)\n |> yield(name: \"date_time\")"},{"query":"from(bucket: \"telegraf/so_short_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"elasticsearch_clusterstats_nodes\")\n |> filter(fn: (r) => r.role == \"standalone\" or r.role == \"eval\" or r.role == \"import\" or r.role == \"manager\" or r.role == \"managersearch\" or r.role == \"search\" or r.role == \"node\" or r.role == \"heavynode\")\n |> filter(fn: (r) => r[\"_field\"] == \"ingest_processor_stats_dissect_time_in_millis\")\n |> derivative(unit: 1s, nonNegative: true, columns: [\"_value\"], timeColumn: \"_time\") \n |> group(columns: [\"host\", \"role\"])\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)\n |> yield(name: \"dissect_time\")"},{"query":"from(bucket: \"telegraf/so_short_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"elasticsearch_clusterstats_nodes\")\n |> filter(fn: (r) => r.role == \"standalone\" or r.role == \"eval\" or r.role == \"import\" or r.role == \"manager\" or r.role == \"managersearch\" or r.role == \"search\" or r.role == \"node\" or r.role == \"heavynode\")\n |> filter(fn: (r) => r[\"_field\"] == \"ingest_processor_stats_dot_expander_time_in_millis\")\n |> derivative(unit: 1s, nonNegative: true, columns: [\"_value\"], timeColumn: \"_time\") \n |> group(columns: [\"host\", \"role\"])\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)\n |> yield(name: \"dot.expander_time\")"},{"query":"from(bucket: \"telegraf/so_short_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"elasticsearch_clusterstats_nodes\")\n |> filter(fn: (r) => r.role == \"standalone\" or r.role == \"eval\" or r.role == \"import\" or r.role == \"manager\" or r.role == \"managersearch\" or r.role == \"search\" or r.role == \"node\" or r.role == \"heavynode\")\n |> filter(fn: (r) => r[\"_field\"] == \"ingest_processor_stats_geoip_time_in_millis\")\n |> derivative(unit: 1s, nonNegative: true, columns: [\"_value\"], timeColumn: \"_time\") \n |> group(columns: [\"host\", \"role\"])\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)\n |> yield(name: \"geoip_time\")"},{"query":"from(bucket: \"telegraf/so_short_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"elasticsearch_clusterstats_nodes\")\n |> filter(fn: (r) => r.role == \"standalone\" or r.role == \"eval\" or r.role == \"import\" or r.role == \"manager\" or r.role == \"managersearch\" or r.role == \"search\" or r.role == \"node\" or r.role == \"heavynode\")\n |> filter(fn: (r) => r[\"_field\"] == \"ingest_processor_stats_grok_time_in_millis\")\n |> derivative(unit: 1s, nonNegative: true, columns: [\"_value\"], timeColumn: \"_time\") \n |> group(columns: [\"host\", \"role\"])\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)\n |> yield(name: \"grok_time\")"},{"query":"from(bucket: \"telegraf/so_short_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"elasticsearch_clusterstats_nodes\")\n |> filter(fn: (r) => r.role == \"standalone\" or r.role == \"eval\" or r.role == \"import\" or r.role == \"manager\" or r.role == \"managersearch\" or r.role == \"search\" or r.role == \"node\" or r.role == \"heavynode\")\n |> filter(fn: (r) => r[\"_field\"] == \"ingest_processor_stats_json_time_in_millis\")\n |> derivative(unit: 1s, nonNegative: true, columns: [\"_value\"], timeColumn: \"_time\") \n |> group(columns: [\"host\", \"role\"])\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)\n |> yield(name: \"json_time\")"},{"query":"from(bucket: \"telegraf/so_short_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"elasticsearch_clusterstats_nodes\")\n |> filter(fn: (r) => r.role == \"standalone\" or r.role == \"eval\" or r.role == \"import\" or r.role == \"manager\" or r.role == \"managersearch\" or r.role == \"search\" or r.role == \"node\" or r.role == \"heavynode\")\n |> filter(fn: (r) => r[\"_field\"] == \"ingest_processor_stats_kv_time_in_millis\")\n |> derivative(unit: 1s, nonNegative: true, columns: [\"_value\"], timeColumn: \"_time\") \n |> group(columns: [\"host\", \"role\"])\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)\n |> yield(name: \"kv_time\")"},{"query":"from(bucket: \"telegraf/so_short_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"elasticsearch_clusterstats_nodes\")\n |> filter(fn: (r) => r.role == \"standalone\" or r.role == \"eval\" or r.role == \"import\" or r.role == \"manager\" or r.role == \"managersearch\" or r.role == \"search\" or r.role == \"node\" or r.role == \"heavynode\")\n |> filter(fn: (r) => r[\"_field\"] == \"ingest_processor_stats_lowercase_time_in_millis\")\n |> derivative(unit: 1s, nonNegative: true, columns: [\"_value\"], timeColumn: \"_time\") \n |> group(columns: [\"host\", \"role\"])\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)\n |> yield(name: \"lowercase_time\")"},{"query":"from(bucket: \"telegraf/so_short_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"elasticsearch_clusterstats_nodes\")\n |> filter(fn: (r) => r.role == \"standalone\" or r.role == \"eval\" or r.role == \"import\" or r.role == \"manager\" or r.role == \"managersearch\" or r.role == \"search\" or r.role == \"node\" or r.role == \"heavynode\")\n |> filter(fn: (r) => r[\"_field\"] == \"ingest_processor_stats_rename_time_in_millis\")\n |> derivative(unit: 1s, nonNegative: true, columns: [\"_value\"], timeColumn: \"_time\") \n |> group(columns: [\"host\", \"role\"])\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)\n |> yield(name: \"rename_time\")"},{"query":"from(bucket: \"telegraf/so_short_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"elasticsearch_clusterstats_nodes\")\n |> filter(fn: (r) => r.role == \"standalone\" or r.role == \"eval\" or r.role == \"import\" or r.role == \"manager\" or r.role == \"managersearch\" or r.role == \"search\" or r.role == \"node\" or r.role == \"heavynode\")\n |> filter(fn: (r) => r[\"_field\"] == \"ingest_processor_stats_script_time_in_millis\")\n |> derivative(unit: 1s, nonNegative: true, columns: [\"_value\"], timeColumn: \"_time\") \n |> group(columns: [\"host\", \"role\"])\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)\n |> yield(name: \"script_time\")"},{"query":"from(bucket: \"telegraf/so_short_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"elasticsearch_clusterstats_nodes\")\n |> filter(fn: (r) => r.role == \"standalone\" or r.role == \"eval\" or r.role == \"import\" or r.role == \"manager\" or r.role == \"managersearch\" or r.role == \"search\" or r.role == \"node\" or r.role == \"heavynode\")\n |> filter(fn: (r) => r[\"_field\"] == \"ingest_processor_stats_user_agent_time_in_millis\")\n |> derivative(unit: 1s, nonNegative: true, columns: [\"_value\"], timeColumn: \"_time\") \n |> group(columns: [\"host\", \"role\"])\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)\n |> yield(name: \"user.agent_time\")"}],"staticLegend":{"colorizeRows":true,"heightRatio":0.301556420233463,"opacity":1,"orientationThreshold":100000000,"widthRatio":1},"width":4,"widthRatio":1,"xCol":"_time","xPos":8,"yCol":"_value","yPos":10},{"axes":[{"base":"10","name":"x","scale":"linear"},{"name":"y","scale":"linear"}],"colorizeRows":true,"colors":[{"id":"sW2GqpGAsGB5Adx16jKjp","name":"Nineteen Eighty Four","type":"scale","hex":"#31C0F6"},{"id":"TsdXuXwdI5Npi9S8L4f-i","name":"Nineteen Eighty Four","type":"scale","hex":"#A500A5"},{"id":"OGL29-SUbJ6FyQb0JzbaD","name":"Nineteen Eighty Four","type":"scale","hex":"#FF7E27"}],"geom":"line","height":4,"hoverDimension":"auto","kind":"Xy","legendColorizeRows":true,"legendOpacity":1,"legendOrientationThreshold":100000000,"name":"1m Load Average","opacity":1,"orientationThreshold":100000000,"position":"overlaid","queries":[{"query":"hostFilter = (tables=<-) =>\n if v.Host != \"(All)\" then\n tables |> filter(fn: (r) => r[\"host\"] == v.Host)\n else\n tables\n\nroleFilter = (tables=<-) =>\n if v.Role != \"(All)\" then\n tables |> filter(fn: (r) => r[\"role\"] == v.Role)\n else\n tables\n\nfrom(bucket: \"telegraf/so_short_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"system\")\n |> filter(fn: (r) => r[\"_field\"] == \"load1\")\n |> hostFilter()\n |> roleFilter()\n |> group(columns: [\"_field\", \"host\", \"role\"])\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: true)\n |> yield(name: \"mean\")"},{"query":"hostFilter = (tables=<-) =>\n if v.Host != \"(All)\" then\n tables |> filter(fn: (r) => r[\"host\"] == v.Host)\n else\n tables\n\nroleFilter = (tables=<-) =>\n if v.Role != \"(All)\" then\n tables |> filter(fn: (r) => r[\"role\"] == v.Role)\n else\n tables\n\nfrom(bucket: \"telegraf/so_long_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"system\")\n |> filter(fn: (r) => r[\"_field\"] == \"load1\")\n |> hostFilter()\n |> roleFilter()\n |> group(columns: [\"_field\",\"host\", \"role\"])\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: true)\n |> yield(name: \"Trend\")"}],"staticLegend":{"colorizeRows":true,"opacity":1,"orientationThreshold":100000000,"widthRatio":1},"width":4,"widthRatio":1,"xCol":"_time","xPos":8,"yCol":"_value","yPos":14,"yTickStep":1},{"axes":[{"base":"10","name":"x","scale":"linear"},{"base":"10","name":"y","scale":"linear","suffix":" e/s"}],"colorizeRows":true,"colors":[{"id":"xflqbsX-j3iq4ry5QOntK","name":"Do Androids Dream of Electric Sheep?","type":"scale","hex":"#8F8AF4"},{"id":"5H28HcITm6QVfQsXon0vq","name":"Do Androids Dream of Electric Sheep?","type":"scale","hex":"#A51414"},{"id":"25MrINwurNBkQqeKCkMPg","name":"Do Androids Dream of Electric Sheep?","type":"scale","hex":"#F4CF31"}],"geom":"line","height":4,"heightRatio":0.301556420233463,"hoverDimension":"auto","kind":"Xy","legendColorizeRows":true,"legendOpacity":1,"legendOrientationThreshold":100000000,"name":"Logstash EPS","opacity":1,"orientationThreshold":100000000,"position":"overlaid","queries":[{"query":"from(bucket: \"telegraf/so_short_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"logstash_events\")\n |> filter(fn: (r) => r[\"_field\"] == \"in\")\n |> derivative(unit: 1s, nonNegative: true, columns: [\"_value\"], timeColumn: \"_time\") \n |> group(columns: [\"_field\", \"host\", \"pipeline\", \"role\"])\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)\n |> yield(name: \"mean\")"},{"query":"from(bucket: \"telegraf/so_short_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"logstash_events\")\n |> filter(fn: (r) => r[\"_field\"] == \"out\")\n |> derivative(unit: 1s, nonNegative: true, columns: [\"_value\"], timeColumn: \"_time\") \n |> map(fn: (r) => ({r with _value: -r._value}))\n |> group(columns: [\"_field\", \"host\", \"pipeline\", \"role\"])\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)\n |> yield(name: \"mean\")"},{"query":"from(bucket: \"telegraf/so_long_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"logstash_events\")\n |> filter(fn: (r) => r[\"_field\"] == \"in\")\n |> derivative(unit: 1s, nonNegative: true, columns: [\"_value\"], timeColumn: \"_time\") \n |> group(columns: [\"_field\", \"host\", \"pipeline\", \"role\"])\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)\n |> yield(name: \"Trend\")"},{"query":"from(bucket: \"telegraf/so_long_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"logstash_events\")\n |> filter(fn: (r) => r[\"_field\"] == \"out\")\n |> derivative(unit: 1s, nonNegative: true, columns: [\"_value\"], timeColumn: \"_time\") \n |> map(fn: (r) => ({r with _value: -r._value}))\n |> group(columns: [\"_field\", \"host\", \"pipeline\", \"role\"])\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)\n |> yield(name: \"Trend\")"}],"staticLegend":{"colorizeRows":true,"heightRatio":0.301556420233463,"opacity":1,"orientationThreshold":100000000,"widthRatio":1},"width":4,"widthRatio":1,"xCol":"_time","xPos":8,"yCol":"_value","yPos":18},{"axes":[{"base":"10","name":"x","scale":"linear"},{"base":"10","name":"y","scale":"linear"}],"colorizeRows":true,"colors":[{"id":"base","name":"laser","type":"text","hex":"#00C9FF"}],"decimalPlaces":0,"height":4,"hoverDimension":"auto","kind":"Single_Stat_Plus_Line","legendColorizeRows":true,"legendOpacity":1,"legendOrientationThreshold":100000000,"name":"Kafka Under Replicated Partitions","opacity":1,"orientationThreshold":100000000,"position":"overlaid","queries":[{"query":"from(bucket: \"telegraf/so_short_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"kafka_partition\")\n |> filter(fn: (r) => r[\"_field\"] == \"UnderReplicatedPartitions\")\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)\n |> group(columns: [\"_field\", \"partition\",\"host\", \"role\"])\n |> yield(name: \"mean\")"},{"query":"from(bucket: \"telegraf/so_long_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"kafka_partition\")\n |> filter(fn: (r) => r[\"_field\"] == \"UnderReplicatedPartitions\")\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)\n |> group(columns: [\"_field\", \"partition\",\"host\", \"role\"])\n |> yield(name: \"trend\")"}],"staticLegend":{"colorizeRows":true,"opacity":1,"orientationThreshold":100000000,"widthRatio":1},"width":4,"widthRatio":1,"xCol":"_time","xPos":8,"yCol":"_value","yPos":22},{"axes":[{"base":"10","name":"x","scale":"linear"},{"name":"y","scale":"linear","suffix":"%"}],"colorizeRows":true,"colors":[{"id":"UAehjIsi65P8u92M_3sQY","name":"Nineteen Eighty Four","type":"scale","hex":"#31C0F6"},{"id":"_SCP8Npp4NVMx2N4mfuzX","name":"Nineteen Eighty Four","type":"scale","hex":"#A500A5"},{"id":"BoMPg4R1KDp_UsRORdV3_","name":"Nineteen Eighty Four","type":"scale","hex":"#FF7E27"}],"geom":"line","height":4,"hoverDimension":"auto","kind":"Xy","legendColorizeRows":true,"legendOpacity":1,"legendOrientationThreshold":100000000,"name":"IO Wait","opacity":1,"orientationThreshold":100000000,"position":"overlaid","queries":[{"query":"hostFilter = (tables=<-) =>\n if v.Host != \"(All)\" then\n tables |> filter(fn: (r) => r[\"host\"] == v.Host)\n else\n tables\n\nroleFilter = (tables=<-) =>\n if v.Role != \"(All)\" then\n tables |> filter(fn: (r) => r[\"role\"] == v.Role)\n else\n tables\n\nfrom(bucket: \"telegraf/so_short_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"cpu\")\n |> filter(fn: (r) => r[\"cpu\"] == \"cpu-total\")\n |> filter(fn: (r) => r[\"_field\"] == \"usage_iowait\")\n |> hostFilter()\n |> roleFilter()\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)\n |> group(columns: [\"_field\", \"host\", \"role\"])\n |> yield(name: \"mean\")"},{"query":"hostFilter = (tables=<-) =>\n if v.Host != \"(All)\" then\n tables |> filter(fn: (r) => r[\"host\"] == v.Host)\n else\n tables\n\nroleFilter = (tables=<-) =>\n if v.Role != \"(All)\" then\n tables |> filter(fn: (r) => r[\"role\"] == v.Role)\n else\n tables\n\nfrom(bucket: \"telegraf/so_long_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"cpu\")\n |> filter(fn: (r) => r[\"cpu\"] == \"cpu-total\")\n |> filter(fn: (r) => r[\"_field\"] == \"usage_iowait\")\n |> hostFilter()\n |> roleFilter()\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)\n |> group(columns: [\"_field\", \"host\", \"role\"])\n |> yield(name: \"Trend\")"}],"staticLegend":{"colorizeRows":true,"opacity":1,"orientationThreshold":100000000,"widthRatio":1},"width":4,"widthRatio":1,"xCol":"_time","xPos":8,"yCol":"_value","yPos":26},{"axes":[{"base":"10","name":"x","scale":"linear"},{"name":"y","scale":"linear","suffix":"%"}],"colorizeRows":true,"colors":[{"id":"QDwChKZWuQV0BaJcEeSam","name":"Atlantis","type":"scale","hex":"#74D495"},{"id":"ThD0WTqKHltQEVlq9mo6K","name":"Atlantis","type":"scale","hex":"#3F3FBA"},{"id":"FBHYZiwDLKyQK3eRfUD-0","name":"Atlantis","type":"scale","hex":"#FF4D9E"}],"geom":"line","height":4,"hoverDimension":"auto","kind":"Xy","legendColorizeRows":true,"legendOpacity":1,"legendOrientationThreshold":100000000,"name":"Swap Usage","opacity":1,"orientationThreshold":100000000,"position":"overlaid","queries":[{"query":"hostFilter = (tables=<-) =>\n if v.Host != \"(All)\" then\n tables |> filter(fn: (r) => r[\"host\"] == v.Host)\n else\n tables\n\nroleFilter = (tables=<-) =>\n if v.Role != \"(All)\" then\n tables |> filter(fn: (r) => r[\"role\"] == v.Role)\n else\n tables\n\nfrom(bucket: \"telegraf/so_short_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"swap\")\n |> filter(fn: (r) => r[\"_field\"] == \"used_percent\")\n |> hostFilter()\n |> roleFilter()\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)\n |> group(columns: [\"_field\", \"host\", \"role\"])\n |> yield(name: \"mean\")"},{"query":"hostFilter = (tables=<-) =>\n if v.Host != \"(All)\" then\n tables |> filter(fn: (r) => r[\"host\"] == v.Host)\n else\n tables\n\nroleFilter = (tables=<-) =>\n if v.Role != \"(All)\" then\n tables |> filter(fn: (r) => r[\"role\"] == v.Role)\n else\n tables\n\nfrom(bucket: \"telegraf/so_long_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"swap\")\n |> filter(fn: (r) => r[\"_field\"] == \"used_percent\")\n |> hostFilter()\n |> roleFilter()\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)\n |> group(columns: [\"_field\", \"host\", \"role\"])\n |> yield(name: \"Trend\")"}],"staticLegend":{"colorizeRows":true,"opacity":1,"orientationThreshold":100000000,"widthRatio":1},"width":4,"widthRatio":1,"xCol":"_time","xPos":8,"yCol":"_value","yPos":30},{"axes":[{"base":"10","name":"x","scale":"linear"},{"base":"10","name":"y","scale":"linear","suffix":"b/s"}],"colorizeRows":true,"colors":[{"id":"TtgHQAXNep94KBgtu48C_","name":"Cthulhu","type":"scale","hex":"#FDC44F"},{"id":"_IuzkORho_8QXTE6vMllv","name":"Cthulhu","type":"scale","hex":"#007C76"},{"id":"bUszW_YI_9oColDbLNQ-d","name":"Cthulhu","type":"scale","hex":"#8983FF"}],"geom":"line","height":4,"heightRatio":0.18482490272373542,"hoverDimension":"auto","kind":"Xy","legendColorizeRows":true,"legendOpacity":1,"legendOrientationThreshold":100000000,"name":"Monitor Interface Drops - Inbound","opacity":1,"orientationThreshold":100000000,"position":"overlaid","queries":[{"query":"import \"join\"\n\nhostFilter = (tables=<-) =>\n if v.Host != \"(All)\" then\n tables |> filter(fn: (r) => r[\"host\"] == v.Host)\n else\n tables\n\nroleFilter = (tables=<-) =>\n if v.Role != \"(All)\" then\n tables |> filter(fn: (r) => r[\"role\"] == v.Role)\n else\n tables\n \nmanints = from(bucket: \"telegraf/so_short_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"node_config\")\n |> hostFilter()\n |> filter(fn: (r) => r[\"_field\"] == \"monint\")\n |> distinct()\n |> group(columns: [\"host\"])\n\ntraffic = from(bucket: \"telegraf/so_short_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"net\")\n |> filter(fn: (r) => r[\"_field\"] == \"drop_in\")\n |> hostFilter()\n |> roleFilter()\n |> derivative(unit: 1s, nonNegative: true, columns: [\"_value\"], timeColumn: \"_time\")\n |> map(fn: (r) => ({r with \"_value\": r._value * 8.0}))\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)\n |> group(columns: [\"host\"])\n\njoin.inner(left: traffic, right: manints,\n on: (l,r) => l.interface == r._value,\n as: (l, r) => ({l with _value: l._value, result: \"drop_in\"}))"},{"query":"import \"join\"\n\nhostFilter = (tables=<-) =>\n if v.Host != \"(All)\" then\n tables |> filter(fn: (r) => r[\"host\"] == v.Host)\n else\n tables\n\nroleFilter = (tables=<-) =>\n if v.Role != \"(All)\" then\n tables |> filter(fn: (r) => r[\"role\"] == v.Role)\n else\n tables\n \nmanints = from(bucket: \"telegraf/so_short_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"node_config\")\n |> hostFilter()\n |> filter(fn: (r) => r[\"_field\"] == \"monint\")\n |> distinct()\n |> group(columns: [\"host\"])\n\ntraffic = from(bucket: \"telegraf/so_long_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"net\")\n |> filter(fn: (r) => r[\"_field\"] == \"drop_in\")\n |> hostFilter()\n |> roleFilter()\n |> derivative(unit: 1s, nonNegative: true, columns: [\"_value\"], timeColumn: \"_time\")\n |> map(fn: (r) => ({r with \"_value\": r._value * 8.0}))\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)\n |> group(columns: [\"host\"])\n\njoin.inner(left: traffic, right: manints,\n on: (l,r) => l.interface == r._value,\n as: (l, r) => ({l with _value: l._value, result: \"Trend\"}))"}],"staticLegend":{"colorizeRows":true,"heightRatio":0.18482490272373542,"opacity":1,"orientationThreshold":100000000,"widthRatio":1},"width":4,"widthRatio":1,"xCol":"_time","xPos":8,"yCol":"_value","yPos":34},{"axes":[{"base":"10","name":"x","scale":"linear"},{"name":"y","scale":"linear","suffix":" days"}],"colorizeRows":true,"colors":[{"id":"3PVw3hQuZUzyar7Js3mMH","name":"Ectoplasm","type":"scale","hex":"#DA6FF1"},{"id":"O34ux-D8Xq_1-eeWRyYYH","name":"Ectoplasm","type":"scale","hex":"#00717A"},{"id":"P04RoKOHBdLdvfrfFbn0F","name":"Ectoplasm","type":"scale","hex":"#ACFF76"}],"geom":"line","height":4,"hoverDimension":"auto","kind":"Xy","legendColorizeRows":true,"legendOpacity":1,"legendOrientationThreshold":100000000,"name":"Stenographer PCAP Retention","opacity":1,"orientationThreshold":100000000,"position":"overlaid","queries":[{"query":"import \"join\"\n\nfrom(bucket: \"telegraf/so_short_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"pcapage\")\n |> filter(fn: (r) => r[\"_field\"] == \"seconds\")\n |> map(fn: (r) => ({ r with _value: r._value / (24.0 * 3600.0)}))\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)\n |> group(columns: [\"_field\",\"host\"])"},{"query":"import \"join\"\n\nfrom(bucket: \"telegraf/so_long_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"pcapage\")\n |> filter(fn: (r) => r[\"_field\"] == \"seconds\")\n |> map(fn: (r) => ({ r with _value: r._value / (24.0 * 3600.0)}))\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)\n |> group(columns: [\"_field\",\"host\"])\n |> yield(name: \"Trend\")"}],"staticLegend":{"colorizeRows":true,"opacity":1,"orientationThreshold":100000000,"widthRatio":1},"width":4,"widthRatio":1,"xCol":"_time","xPos":8,"yCol":"_value","yPos":46},{"colors":[{"id":"base","name":"laser","type":"text","hex":"#00C9FF"}],"decimalPlaces":1,"height":2,"kind":"Single_Stat","name":"Suricata Loss","queries":[{"query":"hostFilter = (tables=<-) =>\n if v.Host != \"(All)\" then\n tables |> filter(fn: (r) => r[\"host\"] == v.Host)\n else\n tables\n\nfrom(bucket: \"telegraf/so_short_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"suridrop\")\n |> filter(fn: (r) => r[\"_field\"] == \"drop\")\n |> map(fn: (r) => ({r with _value: r._value * 100.0}))\n |> hostFilter()\n |> group(columns: [\"host\"])\n |> last()\n |> aggregateWindow(every: v.windowPeriod, fn: mean)\n |> highestMax(n:1)"}],"staticLegend":{},"suffix":"%","width":1,"xPos":9},{"colors":[{"id":"0","name":"viridian","type":"min","hex":"#32B08C"},{"id":"5IArg2lDb8KvnphywgUXa","name":"pineapple","type":"threshold","hex":"#FFB94A","value":50},{"id":"yFhH3mtavjuAZh6cEt5lx","name":"fire","type":"threshold","hex":"#DC4E58","value":70},{"id":"1","name":"ruby","type":"max","hex":"#BF3D5E","value":100}],"decimalPlaces":0,"height":4,"kind":"Gauge","name":"Swap Usage","queries":[{"query":"hostFilter = (tables=<-) =>\n if v.Host != \"(All)\" then\n tables |> filter(fn: (r) => r[\"host\"] == v.Host)\n else\n tables\n\nfrom(bucket: \"telegraf/so_short_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"swap\")\n |> filter(fn: (r) => r[\"_field\"] == \"used_percent\")\n |> hostFilter()\n |> group(columns: [\"host\"])\n |> last()\n |> highestMax(n: 1)\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)\n |> yield(name: \"mean\")"}],"staticLegend":{},"suffix":"%","tickSuffix":"%","width":3,"xPos":9,"yPos":2},{"colors":[{"id":"base","name":"white","type":"text","hex":"#ffffff"}],"fieldOptions":[{"displayName":"Host","fieldName":"host","visible":true},{"displayName":"Name","fieldName":"container_name","visible":true},{"displayName":"Status","fieldName":"container_status","visible":true},{"displayName":"OOM Killed","fieldName":"_value","visible":true},{"displayName":"_start","fieldName":"_start","visible":true},{"displayName":"_stop","fieldName":"_stop","visible":true},{"displayName":"_time","fieldName":"_time","visible":true},{"displayName":"_field","fieldName":"_field","visible":true},{"displayName":"_measurement","fieldName":"_measurement","visible":true},{"displayName":"engine_host","fieldName":"engine_host","visible":true},{"displayName":"role","fieldName":"role","visible":true},{"displayName":"server_version","fieldName":"server_version","visible":true},{"displayName":"container_image","fieldName":"container_image","visible":true},{"displayName":"container_version","fieldName":"container_version","visible":true},{"displayName":"description","fieldName":"description","visible":true},{"displayName":"maintainer","fieldName":"maintainer","visible":true},{"displayName":"io.k8s.description","fieldName":"io.k8s.description","visible":true},{"displayName":"io.k8s.display-name","fieldName":"io.k8s.display-name","visible":true},{"displayName":"license","fieldName":"license","visible":true},{"displayName":"name","fieldName":"name","visible":true},{"displayName":"org.label-schema.build-date","fieldName":"org.label-schema.build-date","visible":true},{"displayName":"org.label-schema.license","fieldName":"org.label-schema.license","visible":true},{"displayName":"org.label-schema.name","fieldName":"org.label-schema.name","visible":true},{"displayName":"org.label-schema.schema-version","fieldName":"org.label-schema.schema-version","visible":true},{"displayName":"org.label-schema.url","fieldName":"org.label-schema.url","visible":true},{"displayName":"org.label-schema.vcs-ref","fieldName":"org.label-schema.vcs-ref","visible":true},{"displayName":"org.label-schema.vcs-url","fieldName":"org.label-schema.vcs-url","visible":true},{"displayName":"org.label-schema.vendor","fieldName":"org.label-schema.vendor","visible":true},{"displayName":"org.label-schema.version","fieldName":"org.label-schema.version","visible":true},{"displayName":"org.opencontainers.image.created","fieldName":"org.opencontainers.image.created","visible":true},{"displayName":"org.opencontainers.image.licenses","fieldName":"org.opencontainers.image.licenses","visible":true},{"displayName":"org.opencontainers.image.title","fieldName":"org.opencontainers.image.title","visible":true},{"displayName":"org.opencontainers.image.vendor","fieldName":"org.opencontainers.image.vendor","visible":true},{"displayName":"release","fieldName":"release","visible":true},{"displayName":"summary","fieldName":"summary","visible":true},{"displayName":"url","fieldName":"url","visible":true},{"displayName":"vendor","fieldName":"vendor","visible":true},{"displayName":"version","fieldName":"version","visible":true},{"displayName":"org.label-schema.usage","fieldName":"org.label-schema.usage","visible":true},{"displayName":"org.opencontainers.image.documentation","fieldName":"org.opencontainers.image.documentation","visible":true},{"displayName":"org.opencontainers.image.revision","fieldName":"org.opencontainers.image.revision","visible":true},{"displayName":"org.opencontainers.image.source","fieldName":"org.opencontainers.image.source","visible":true},{"displayName":"org.opencontainers.image.url","fieldName":"org.opencontainers.image.url","visible":true},{"displayName":"org.opencontainers.image.version","fieldName":"org.opencontainers.image.version","visible":true},{"displayName":"org.opencontainers.image.description","fieldName":"org.opencontainers.image.description","visible":true}],"height":4,"kind":"Table","name":"Most Recent Container Events","queries":[{"query":"hostFilter = (tables=<-) =>\n if v.Host != \"(All)\" then\n tables |> filter(fn: (r) => r[\"host\"] == v.Host)\n else\n tables\n\nroleFilter = (tables=<-) =>\n if v.Role != \"(All)\" then\n tables |> filter(fn: (r) => r[\"role\"] == v.Role)\n else\n tables\n \nfrom(bucket: \"telegraf/so_short_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"docker_container_status\")\n |> filter(fn: (r) => r[\"_field\"] == \"oomkilled\")\n |> filter(fn: (r) => r[\"container_status\"] != \"running\")\n |> hostFilter()\n |> roleFilter()\n |> group(columns: [\"container_name\", \"host\"])\n |> last()\n |> group()\n |> keep(columns: [\"_value\", \"container_name\", \"host\", \"container_status\"])"}],"staticLegend":{},"tableOptions":{"sortBy":"container_name","verticalTimeAxis":true},"timeFormat":"YYYY-MM-DD HH:mm:ss","width":3,"xPos":9,"yPos":6},{"axes":[{"base":"10","name":"x","scale":"linear"},{"name":"y","scale":"linear","suffix":"%"}],"colorizeRows":true,"colors":[{"id":"TtgHQAXNep94KBgtu48C_","name":"Cthulhu","type":"scale","hex":"#FDC44F"},{"id":"_IuzkORho_8QXTE6vMllv","name":"Cthulhu","type":"scale","hex":"#007C76"},{"id":"bUszW_YI_9oColDbLNQ-d","name":"Cthulhu","type":"scale","hex":"#8983FF"}],"geom":"line","height":4,"hoverDimension":"auto","kind":"Xy","legendColorizeRows":true,"legendOpacity":1,"legendOrientationThreshold":100000000,"name":"Zeek Capture Loss","opacity":1,"orientationThreshold":100000000,"position":"overlaid","queries":[{"query":"hostFilter = (tables=<-) =>\n if v.Host != \"(All)\" then\n tables |> filter(fn: (r) => r[\"host\"] == v.Host)\n else\n tables\n\nroleFilter = (tables=<-) =>\n if v.Role != \"(All)\" then\n tables |> filter(fn: (r) => r[\"role\"] == v.Role)\n else\n tables\n\nfrom(bucket: \"telegraf/so_short_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"zeekcaptureloss\")\n |> filter(fn: (r) => r[\"_field\"] == \"loss\")\n |> hostFilter()\n |> roleFilter()\n |> group(columns: [\"_field\", \"host\", \"role\"])\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)\n |> yield(name: \"mean\")"},{"query":"hostFilter = (tables=<-) =>\n if v.Host != \"(All)\" then\n tables |> filter(fn: (r) => r[\"host\"] == v.Host)\n else\n tables\n\nroleFilter = (tables=<-) =>\n if v.Role != \"(All)\" then\n tables |> filter(fn: (r) => r[\"role\"] == v.Role)\n else\n tables\n\nfrom(bucket: \"telegraf/so_long_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"zeekcaptureloss\")\n |> filter(fn: (r) => r[\"_field\"] == \"loss\")\n |> hostFilter()\n |> roleFilter()\n |> group(columns: [\"_field\", \"host\", \"role\"])\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)\n |> yield(name: \"Trend\")"}],"staticLegend":{"colorizeRows":true,"opacity":1,"orientationThreshold":100000000,"widthRatio":1},"width":3,"widthRatio":1,"xCol":"_time","xPos":9,"yCol":"_value","yPos":42},{"colors":[{"id":"base","name":"laser","type":"text","hex":"#00C9FF"}],"decimalPlaces":1,"height":2,"kind":"Single_Stat","name":"Stenographer Loss","queries":[{"query":"hostFilter = (tables=<-) =>\n if v.Host != \"(All)\" then\n tables |> filter(fn: (r) => r[\"host\"] == v.Host)\n else\n tables\n\nfrom(bucket: \"telegraf/so_short_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"stenodrop\")\n |> filter(fn: (r) => r[\"_field\"] == \"drop\")\n |> map(fn: (r) => ({r with _value: r._value * 100.0}))\n |> hostFilter()\n |> group(columns: [\"host\"])\n |> last()\n |> aggregateWindow(every: v.windowPeriod, fn: mean)\n |> highestMax(n:1)"}],"staticLegend":{},"suffix":"%","width":1,"xPos":10},{"colors":[{"id":"base","name":"laser","type":"text","hex":"#00C9FF"}],"decimalPlaces":1,"height":2,"kind":"Single_Stat","name":"PCAP Retention","queries":[{"query":"hostFilter = (tables=<-) =>\n if v.Host != \"(All)\" then\n tables |> filter(fn: (r) => r[\"host\"] == v.Host)\n else\n tables\n \nfrom(bucket: \"telegraf/so_short_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"pcapage\")\n |> filter(fn: (r) => r[\"_field\"] == \"seconds\")\n |> hostFilter()\n |> map(fn: (r) => ({r with _value: r._value / (24.0 * 60.0 * 60.0)}))\n |> group(columns: [\"host\"])\n |> last()\n |> highestMax(n:1)"}],"staticLegend":{},"suffix":" days","width":1,"xPos":11}],"description":"Visualize the Security Onion grid performance metrics and alarm statuses.","name":"Security Onion Performance"}}] \ No newline at end of file From 70d4223a7588121ec1c5283646eb09433760de0b Mon Sep 17 00:00:00 2001 From: Josh Patterson Date: Thu, 30 Oct 2025 13:13:16 -0400 Subject: [PATCH 112/124] update salt-cloud config if salt was upgraded --- salt/manager/tools/sbin/soup | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/salt/manager/tools/sbin/soup b/salt/manager/tools/sbin/soup index 3bc4e9ca9..8f399707c 100755 --- a/salt/manager/tools/sbin/soup +++ b/salt/manager/tools/sbin/soup @@ -1345,6 +1345,10 @@ upgrade_salt() { else echo "Salt upgrade success." echo "" + if [[ $salt_cloud_installed == true ]]; then + echo "Updating salt-cloud config to use the new Salt version" + salt-call state.apply salt.cloud.config pillar='{"passedVersion": "'$NEWSALTVERSION'"}' concurrent=True + fi fi } From 8ca5276a0e97043564f7a85b91063c05cb1e8ca3 Mon Sep 17 00:00:00 2001 From: Josh Patterson Date: Thu, 30 Oct 2025 13:59:08 -0400 Subject: [PATCH 113/124] update cloud profile with local and point to new code --- salt/manager/tools/sbin/soup | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/salt/manager/tools/sbin/soup b/salt/manager/tools/sbin/soup index 8f399707c..c5a94af86 100755 --- a/salt/manager/tools/sbin/soup +++ b/salt/manager/tools/sbin/soup @@ -1347,7 +1347,9 @@ upgrade_salt() { echo "" if [[ $salt_cloud_installed == true ]]; then echo "Updating salt-cloud config to use the new Salt version" - salt-call state.apply salt.cloud.config pillar='{"passedVersion": "'$NEWSALTVERSION'"}' concurrent=True + # neither salt-minion or salt-master is running so we need to run with --local + # the Salt upgrade happens before we copy the new code to default so we need to point the file root to the new code + salt-call state.apply salt.cloud.config --local --file-root=$UPDATE_DIR/salt pillar='{"passedVersion": "'$NEWSALTVERSION'"}' concurrent=True fi fi From 9027e4e06558ba61b6a4a7d58198639dfca33102 Mon Sep 17 00:00:00 2001 From: Josh Patterson Date: Thu, 30 Oct 2025 14:48:48 -0400 Subject: [PATCH 114/124] update salt-cloud profile after new code copied --- salt/salt/cloud/config.sls | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/salt/salt/cloud/config.sls b/salt/salt/cloud/config.sls index 8dfbf325e..8b5e15fe1 100644 --- a/salt/salt/cloud/config.sls +++ b/salt/salt/cloud/config.sls @@ -21,7 +21,7 @@ cloud_providers: - name: /etc/salt/cloud.providers.d/libvirt.conf - source: salt://salt/cloud/cloud.providers.d/libvirt.conf.jinja - defaults: - HYPERVISORS: {{HYPERVISORS}} + HYPERVISORS: {{ HYPERVISORS }} - template: jinja - makedirs: True @@ -30,7 +30,7 @@ cloud_profiles: - name: /etc/salt/cloud.profiles.d/socloud.conf - source: salt://salt/cloud/cloud.profiles.d/socloud.conf.jinja - defaults: - HYPERVISORS: {{HYPERVISORS}} + HYPERVISORS: {{ HYPERVISORS }} MANAGERHOSTNAME: {{ grains.host }} MANAGERIP: {{ pillar.host.mainip }} SALTVERSION: {{ SALTVERSION }} From b8c2808abe8629a736efef67c7b8a283b6f1d13d Mon Sep 17 00:00:00 2001 From: Josh Patterson Date: Thu, 30 Oct 2025 15:09:40 -0400 Subject: [PATCH 115/124] update salt-cloud profile after new code copied --- salt/manager/tools/sbin/soup | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/salt/manager/tools/sbin/soup b/salt/manager/tools/sbin/soup index c5a94af86..8fd3f0b64 100755 --- a/salt/manager/tools/sbin/soup +++ b/salt/manager/tools/sbin/soup @@ -21,6 +21,8 @@ whiptail_title='Security Onion UPdater' NOTIFYCUSTOMELASTICCONFIG=false TOPFILE=/opt/so/saltstack/default/salt/top.sls BACKUPTOPFILE=/opt/so/saltstack/default/salt/top.sls.backup +SALTUPGRADED=false +SALT_CLOUD_INSTALLED=false # used to display messages to the user at the end of soup declare -a FINAL_MESSAGE_QUEUE=() @@ -1260,15 +1262,13 @@ upgrade_check_salt() { } upgrade_salt() { - SALTUPGRADED=True echo "Performing upgrade of Salt from $INSTALLEDSALTVERSION to $NEWSALTVERSION." echo "" # If rhel family if [[ $is_rpm ]]; then # Check if salt-cloud is installed - local salt_cloud_installed=false if rpm -q salt-cloud &>/dev/null; then - salt_cloud_installed=true + SALT_CLOUD_INSTALLED=true fi echo "Removing yum versionlock for Salt." @@ -1277,7 +1277,7 @@ upgrade_salt() { yum versionlock delete "salt-minion" yum versionlock delete "salt-master" # Remove salt-cloud versionlock if installed - if [[ $salt_cloud_installed == true ]]; then + if [[ $SALT_CLOUD_INSTALLED == true ]]; then yum versionlock delete "salt-cloud" fi echo "Updating Salt packages." @@ -1286,7 +1286,7 @@ upgrade_salt() { # if oracle run with -r to ignore repos set by bootstrap if [[ $OS == 'oracle' ]]; then # Add -L flag only if salt-cloud is already installed - if [[ $salt_cloud_installed == true ]]; then + if [[ $SALT_CLOUD_INSTALLED == true ]]; then run_check_net_err \ "sh $UPDATE_DIR/salt/salt/scripts/bootstrap-salt.sh -X -r -L -F -M stable \"$NEWSALTVERSION\"" \ "Could not update salt, please check $SOUP_LOG for details." @@ -1308,7 +1308,7 @@ upgrade_salt() { yum versionlock add "salt-minion-0:$NEWSALTVERSION-0.*" yum versionlock add "salt-master-0:$NEWSALTVERSION-0.*" # Add salt-cloud versionlock if installed - if [[ $salt_cloud_installed == true ]]; then + if [[ $SALT_CLOUD_INSTALLED == true ]]; then yum versionlock add "salt-cloud-0:$NEWSALTVERSION-0.*" fi # Else do Ubuntu things @@ -1343,14 +1343,9 @@ upgrade_salt() { echo "" exit 1 else + SALTUPGRADED=true echo "Salt upgrade success." echo "" - if [[ $salt_cloud_installed == true ]]; then - echo "Updating salt-cloud config to use the new Salt version" - # neither salt-minion or salt-master is running so we need to run with --local - # the Salt upgrade happens before we copy the new code to default so we need to point the file root to the new code - salt-call state.apply salt.cloud.config --local --file-root=$UPDATE_DIR/salt pillar='{"passedVersion": "'$NEWSALTVERSION'"}' concurrent=True - fi fi } @@ -1592,6 +1587,11 @@ main() { # ensure the mine is updated and populated before highstates run, following the salt-master restart update_salt_mine + if [[ $SALT_CLOUD_INSTALLED == true && $SALTUPGRADED == true ]]; then + echo "Updating salt-cloud config to use the new Salt version" + salt-call state.apply salt.cloud.config concurrent=True + fi + enable_highstate echo "" From 806173f7e31cc3f5afba3b20d251d8d0a7e1465e Mon Sep 17 00:00:00 2001 From: Corey Ogburn Date: Fri, 31 Oct 2025 14:07:11 -0600 Subject: [PATCH 116/124] Available Models Utilizes Jason's new Array of Objects UI. --- salt/soc/defaults.yaml | 19 ++++++++++++++++--- salt/soc/soc_soc.yaml | 34 ++++++++++++++++++++++++++-------- 2 files changed, 42 insertions(+), 11 deletions(-) diff --git a/salt/soc/defaults.yaml b/salt/soc/defaults.yaml index e9e65f7f4..d6b27b24e 100644 --- a/salt/soc/defaults.yaml +++ b/salt/soc/defaults.yaml @@ -2552,9 +2552,22 @@ soc: assistant: enabled: false investigationPrompt: Investigate Alert ID {socId} - contextLimitSmall: 200000 - contextLimitLarge: 1000000 thresholdColorRatioLow: 0.5 thresholdColorRatioMed: 0.75 thresholdColorRatioMax: 1 - lowBalanceColorAlert: 500000 \ No newline at end of file + availableModels: + - id: sonnet-4 + displayName: Claude Sonnet 4 + contextLimitSmall: 200000 + contextLimitLarge: 1000000 + lowBalanceColorAlert: 500000 + - id: sonnet-4.5 + displayName: Claude Sonnet 4.5 + contextLimitSmall: 200000 + contextLimitLarge: 1000000 + lowBalanceColorAlert: 500000 + - id: gptoss-120b + displayName: GPT-OSS 120B + contextLimitSmall: 200000 + contextLimitLarge: 1000000 + lowBalanceColorAlert: 500000 \ No newline at end of file diff --git a/salt/soc/soc_soc.yaml b/salt/soc/soc_soc.yaml index 0a063f53e..8a31c977d 100644 --- a/salt/soc/soc_soc.yaml +++ b/salt/soc/soc_soc.yaml @@ -606,14 +606,6 @@ soc: investigationPrompt: description: Prompt given to Onion AI when beginning an investigation. global: True - contextLimitSmall: - description: Smaller context limit for Onion AI. - global: True - advanced: True - contextLimitLarge: - description: Larger context limit for Onion AI. - global: True - advanced: True thresholdColorRatioLow: description: Lower visual context color change threshold. global: True @@ -630,6 +622,32 @@ soc: description: Onion AI credit amount at which balance turns red. global: True advanced: True + availableModels: + description: List of AI models available for use in SOC as well as model specific warning thresholds. + global: True + advanced: True + forcedType: "[]{}" + helpLink: assistant.html + syntax: json + uiElements: + - field: id + label: Model ID + required: True + - field: displayName + label: Display Name + required: True + - field: contextLimitSmall + label: Context Limit (Small) + forcedType: int + required: True + - field: contextLimitLarge + label: Context Limit (Large) + forcedType: int + required: True + - field: lowBalanceColorAlert + label: Low Balance Color Alert + forcedType: int + required: True apiTimeoutMs: description: Duration (in milliseconds) to wait for a response from the SOC server API before giving up and showing an error on the SOC UI. global: True From f80b090c932e04ece75003b5c8c01a78f40aac3b Mon Sep 17 00:00:00 2001 From: Corey Ogburn Date: Fri, 31 Oct 2025 14:48:30 -0600 Subject: [PATCH 117/124] Update limits --- salt/soc/defaults.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/salt/soc/defaults.yaml b/salt/soc/defaults.yaml index d6b27b24e..813716f39 100644 --- a/salt/soc/defaults.yaml +++ b/salt/soc/defaults.yaml @@ -2568,6 +2568,6 @@ soc: lowBalanceColorAlert: 500000 - id: gptoss-120b displayName: GPT-OSS 120B - contextLimitSmall: 200000 - contextLimitLarge: 1000000 + contextLimitSmall: 128000 + contextLimitLarge: 128000 lowBalanceColorAlert: 500000 \ No newline at end of file From 635545630bb19a66ec0a09c3bf6fd5fe068e9e93 Mon Sep 17 00:00:00 2001 From: reyesj2 <94730068+reyesj2@users.noreply.github.com> Date: Mon, 3 Nov 2025 09:36:46 -0600 Subject: [PATCH 118/124] strelka use single master image --- salt/common/tools/sbin/so-image-common | 2 -- salt/strelka/filestream/enabled.sls | 2 +- salt/strelka/frontend/enabled.sls | 2 +- 3 files changed, 2 insertions(+), 4 deletions(-) diff --git a/salt/common/tools/sbin/so-image-common b/salt/common/tools/sbin/so-image-common index 7fd35d5ac..588c767f1 100755 --- a/salt/common/tools/sbin/so-image-common +++ b/salt/common/tools/sbin/so-image-common @@ -62,8 +62,6 @@ container_list() { "so-soc" "so-steno" "so-strelka-backend" - "so-strelka-filestream" - "so-strelka-frontend" "so-strelka-manager" "so-suricata" "so-telegraf" diff --git a/salt/strelka/filestream/enabled.sls b/salt/strelka/filestream/enabled.sls index c90b1e83f..ef5d593ba 100644 --- a/salt/strelka/filestream/enabled.sls +++ b/salt/strelka/filestream/enabled.sls @@ -14,7 +14,7 @@ include: strelka_filestream: docker_container.running: - - image: {{ GLOBALS.registry_host }}:5000/{{ GLOBALS.image_repo }}/so-strelka-filestream:{{ GLOBALS.so_version }} + - image: {{ GLOBALS.registry_host }}:5000/{{ GLOBALS.image_repo }}/so-strelka-manager:{{ GLOBALS.so_version }} - binds: - /opt/so/conf/strelka/filestream/:/etc/strelka/:ro - /nsm/strelka:/nsm/strelka diff --git a/salt/strelka/frontend/enabled.sls b/salt/strelka/frontend/enabled.sls index f95a31a7e..709b3e71c 100644 --- a/salt/strelka/frontend/enabled.sls +++ b/salt/strelka/frontend/enabled.sls @@ -14,7 +14,7 @@ include: strelka_frontend: docker_container.running: - - image: {{ GLOBALS.registry_host }}:5000/{{ GLOBALS.image_repo }}/so-strelka-frontend:{{ GLOBALS.so_version }} + - image: {{ GLOBALS.registry_host }}:5000/{{ GLOBALS.image_repo }}/so-strelka-manager:{{ GLOBALS.so_version }} - binds: - /opt/so/conf/strelka/frontend/:/etc/strelka/:ro - /nsm/strelka/log/:/var/log/strelka/:rw From fa154f1a8f86793b6afaf61762baf27131b1f4f5 Mon Sep 17 00:00:00 2001 From: Josh Patterson Date: Mon, 3 Nov 2025 14:12:19 -0500 Subject: [PATCH 119/124] update salt cloud config if configured --- salt/manager/tools/sbin/soup | 16 ++++++++-------- salt/salt/cloud/config.sls | 5 +++++ 2 files changed, 13 insertions(+), 8 deletions(-) diff --git a/salt/manager/tools/sbin/soup b/salt/manager/tools/sbin/soup index 8fd3f0b64..291744e50 100755 --- a/salt/manager/tools/sbin/soup +++ b/salt/manager/tools/sbin/soup @@ -22,7 +22,7 @@ NOTIFYCUSTOMELASTICCONFIG=false TOPFILE=/opt/so/saltstack/default/salt/top.sls BACKUPTOPFILE=/opt/so/saltstack/default/salt/top.sls.backup SALTUPGRADED=false -SALT_CLOUD_INSTALLED=false +SALT_CLOUD_CONFIGURED=false # used to display messages to the user at the end of soup declare -a FINAL_MESSAGE_QUEUE=() @@ -1266,9 +1266,9 @@ upgrade_salt() { echo "" # If rhel family if [[ $is_rpm ]]; then - # Check if salt-cloud is installed - if rpm -q salt-cloud &>/dev/null; then - SALT_CLOUD_INSTALLED=true + # Check if salt-cloud is configured + if [[ -f /etc/salt/cloud.profiles.d/socloud.conf ]]; then + SALT_CLOUD_CONFIGURED=true fi echo "Removing yum versionlock for Salt." @@ -1277,7 +1277,7 @@ upgrade_salt() { yum versionlock delete "salt-minion" yum versionlock delete "salt-master" # Remove salt-cloud versionlock if installed - if [[ $SALT_CLOUD_INSTALLED == true ]]; then + if [[ $SALT_CLOUD_CONFIGURED == true ]]; then yum versionlock delete "salt-cloud" fi echo "Updating Salt packages." @@ -1286,7 +1286,7 @@ upgrade_salt() { # if oracle run with -r to ignore repos set by bootstrap if [[ $OS == 'oracle' ]]; then # Add -L flag only if salt-cloud is already installed - if [[ $SALT_CLOUD_INSTALLED == true ]]; then + if [[ $SALT_CLOUD_CONFIGURED == true ]]; then run_check_net_err \ "sh $UPDATE_DIR/salt/salt/scripts/bootstrap-salt.sh -X -r -L -F -M stable \"$NEWSALTVERSION\"" \ "Could not update salt, please check $SOUP_LOG for details." @@ -1308,7 +1308,7 @@ upgrade_salt() { yum versionlock add "salt-minion-0:$NEWSALTVERSION-0.*" yum versionlock add "salt-master-0:$NEWSALTVERSION-0.*" # Add salt-cloud versionlock if installed - if [[ $SALT_CLOUD_INSTALLED == true ]]; then + if [[ $SALT_CLOUD_CONFIGURED == true ]]; then yum versionlock add "salt-cloud-0:$NEWSALTVERSION-0.*" fi # Else do Ubuntu things @@ -1587,7 +1587,7 @@ main() { # ensure the mine is updated and populated before highstates run, following the salt-master restart update_salt_mine - if [[ $SALT_CLOUD_INSTALLED == true && $SALTUPGRADED == true ]]; then + if [[ $SALT_CLOUD_CONFIGURED == true && $SALTUPGRADED == true ]]; then echo "Updating salt-cloud config to use the new Salt version" salt-call state.apply salt.cloud.config concurrent=True fi diff --git a/salt/salt/cloud/config.sls b/salt/salt/cloud/config.sls index 8b5e15fe1..dce0e873a 100644 --- a/salt/salt/cloud/config.sls +++ b/salt/salt/cloud/config.sls @@ -36,6 +36,11 @@ cloud_profiles: SALTVERSION: {{ SALTVERSION }} - template: jinja - makedirs: True +{% else %} +no_hypervisors_configured: + test.succeed_without_changes: + - name: no_hypervisors_configured + - comment: No hypervisors are configured {% endif %} {% else %} From 574703e5517ca56a97c2fafd76d3884e60db3da6 Mon Sep 17 00:00:00 2001 From: Josh Patterson Date: Mon, 3 Nov 2025 15:39:19 -0500 Subject: [PATCH 120/124] unlock/lock salt-cloud if installed --- salt/manager/tools/sbin/soup | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/salt/manager/tools/sbin/soup b/salt/manager/tools/sbin/soup index 291744e50..f32b6edf8 100755 --- a/salt/manager/tools/sbin/soup +++ b/salt/manager/tools/sbin/soup @@ -22,6 +22,7 @@ NOTIFYCUSTOMELASTICCONFIG=false TOPFILE=/opt/so/saltstack/default/salt/top.sls BACKUPTOPFILE=/opt/so/saltstack/default/salt/top.sls.backup SALTUPGRADED=false +SALT_CLOUD_INSTALLED=false SALT_CLOUD_CONFIGURED=false # used to display messages to the user at the end of soup declare -a FINAL_MESSAGE_QUEUE=() @@ -1266,6 +1267,10 @@ upgrade_salt() { echo "" # If rhel family if [[ $is_rpm ]]; then + # Check if salt-cloud is installed + if rpm -q salt-cloud &>/dev/null; then + SALT_CLOUD_INSTALLED=true + fi # Check if salt-cloud is configured if [[ -f /etc/salt/cloud.profiles.d/socloud.conf ]]; then SALT_CLOUD_CONFIGURED=true @@ -1277,7 +1282,7 @@ upgrade_salt() { yum versionlock delete "salt-minion" yum versionlock delete "salt-master" # Remove salt-cloud versionlock if installed - if [[ $SALT_CLOUD_CONFIGURED == true ]]; then + if [[ $SALT_CLOUD_INSTALLED == true ]]; then yum versionlock delete "salt-cloud" fi echo "Updating Salt packages." @@ -1286,7 +1291,7 @@ upgrade_salt() { # if oracle run with -r to ignore repos set by bootstrap if [[ $OS == 'oracle' ]]; then # Add -L flag only if salt-cloud is already installed - if [[ $SALT_CLOUD_CONFIGURED == true ]]; then + if [[ $SALT_CLOUD_INSTALLED == true ]]; then run_check_net_err \ "sh $UPDATE_DIR/salt/salt/scripts/bootstrap-salt.sh -X -r -L -F -M stable \"$NEWSALTVERSION\"" \ "Could not update salt, please check $SOUP_LOG for details." @@ -1308,7 +1313,7 @@ upgrade_salt() { yum versionlock add "salt-minion-0:$NEWSALTVERSION-0.*" yum versionlock add "salt-master-0:$NEWSALTVERSION-0.*" # Add salt-cloud versionlock if installed - if [[ $SALT_CLOUD_CONFIGURED == true ]]; then + if [[ $SALT_CLOUD_INSTALLED == true ]]; then yum versionlock add "salt-cloud-0:$NEWSALTVERSION-0.*" fi # Else do Ubuntu things From 5a8ea57a1b663abd0bfdbc6e92ed6441e547e7c8 Mon Sep 17 00:00:00 2001 From: reyesj2 <94730068+reyesj2@users.noreply.github.com> Date: Mon, 3 Nov 2025 15:31:14 -0600 Subject: [PATCH 121/124] move off of cmd.script with args \ https://github.com/saltstack/salt/issues/68298 --- salt/elasticfleet/install_agent_grid.sls | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/salt/elasticfleet/install_agent_grid.sls b/salt/elasticfleet/install_agent_grid.sls index 8b5bb4c3e..57164b74d 100644 --- a/salt/elasticfleet/install_agent_grid.sls +++ b/salt/elasticfleet/install_agent_grid.sls @@ -8,20 +8,28 @@ {% set AGENT_STATUS = salt['service.available']('elastic-agent') %} {% if not AGENT_STATUS %} +pull_agent_installer: + file.managed: + - name: /opt/so/so-elastic-agent_linux_amd64 + - source: salt://elasticfleet/files/so_agent-installers/so-elastic-agent_linux_amd64 + - mode: 755 + - makedirs: True + {% if grains.role not in ['so-heavynode'] %} run_installer: - cmd.script: - - name: salt://elasticfleet/files/so_agent-installers/so-elastic-agent_linux_amd64 + cmd.run: + - name: ./so-elastic-agent_linux_amd64 -token={{ GRIDNODETOKENGENERAL }} - cwd: /opt/so - - args: -token={{ GRIDNODETOKENGENERAL }} - retry: True {% else %} run_installer: - cmd.script: - - name: salt://elasticfleet/files/so_agent-installers/so-elastic-agent_linux_amd64 + cmd.run: + - name: ./so-elastic-agent_linux_amd64 -token={{ GRIDNODETOKENHEAVY }} - cwd: /opt/so - - args: -token={{ GRIDNODETOKENHEAVY }} - retry: True {% endif %} +cleanup_agent_installer: + file.absent: + - name: /opt/so/so-elastic-agent_linux_amd64 {% endif %} From ccb8ffd6eb2bba8926649460ad9fc0ed0dd473fa Mon Sep 17 00:00:00 2001 From: Josh Patterson Date: Mon, 3 Nov 2025 17:05:48 -0500 Subject: [PATCH 122/124] Update install_agent_grid.sls --- salt/elasticfleet/install_agent_grid.sls | 20 ++++++++------------ 1 file changed, 8 insertions(+), 12 deletions(-) diff --git a/salt/elasticfleet/install_agent_grid.sls b/salt/elasticfleet/install_agent_grid.sls index 57164b74d..4a185e0bb 100644 --- a/salt/elasticfleet/install_agent_grid.sls +++ b/salt/elasticfleet/install_agent_grid.sls @@ -2,8 +2,10 @@ # or more contributor license agreements. Licensed under the Elastic License 2.0; you may not use # this file except in compliance with the Elastic License 2.0. -{%- set GRIDNODETOKENGENERAL = salt['pillar.get']('global:fleet_grid_enrollment_token_general') -%} -{%- set GRIDNODETOKENHEAVY = salt['pillar.get']('global:fleet_grid_enrollment_token_heavy') -%} +{% set GRIDNODETOKEN = salt['pillar.get']('global:fleet_grid_enrollment_token_general') -%} +{% if grains.role == 'so-heavynode' %} +{% set GRIDNODETOKEN = salt['pillar.get']('global:fleet_grid_enrollment_token_heavy') -%} +{% endif %} {% set AGENT_STATUS = salt['service.available']('elastic-agent') %} {% if not AGENT_STATUS %} @@ -15,19 +17,13 @@ pull_agent_installer: - mode: 755 - makedirs: True -{% if grains.role not in ['so-heavynode'] %} run_installer: cmd.run: - - name: ./so-elastic-agent_linux_amd64 -token={{ GRIDNODETOKENGENERAL }} + - name: ./so-elastic-agent_linux_amd64 -token={{ GRIDNODETOKEN }} - cwd: /opt/so - - retry: True -{% else %} -run_installer: - cmd.run: - - name: ./so-elastic-agent_linux_amd64 -token={{ GRIDNODETOKENHEAVY }} - - cwd: /opt/so - - retry: True -{% endif %} + - retry: + attempts: 3 + interval: 20 cleanup_agent_installer: file.absent: From d95122ca01c3bf73174e3f4e6ed19d660cf55b49 Mon Sep 17 00:00:00 2001 From: Jason Ertel Date: Tue, 4 Nov 2025 16:02:39 -0500 Subject: [PATCH 123/124] ensure previous setup outcomes are cleared --- setup/so-functions | 10 ++++++---- setup/so-setup | 4 ++++ 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/setup/so-functions b/setup/so-functions index 9b65c9f0e..88da7ee9e 100755 --- a/setup/so-functions +++ b/setup/so-functions @@ -1646,6 +1646,12 @@ reserve_ports() { fi } +clear_previous_setup_results() { + # Disregard previous setup outcomes. + rm -f /root/failure + rm -f /root/success +} + reinstall_init() { info "Putting system in state to run setup again" @@ -1657,10 +1663,6 @@ reinstall_init() { local service_retry_count=20 - # Disregard previous install outcomes - rm -f /root/failure - rm -f /root/success - { # remove all of root's cronjobs logCmd "crontab -r -u root" diff --git a/setup/so-setup b/setup/so-setup index ab055fd2d..bdb1c38e2 100755 --- a/setup/so-setup +++ b/setup/so-setup @@ -132,6 +132,10 @@ if [[ -f /root/accept_changes ]]; then reset_proxy fi +# Previous setup attempts, even if setup doesn't actually start the installation, +# can leave behind results that may interfere with the current setup attempt. +clear_previous_setup_results + title "Parsing Username for Install" parse_install_username From 1aa871ec94aede74dcb8f528726168070c24a1f1 Mon Sep 17 00:00:00 2001 From: reyesj2 <94730068+reyesj2@users.noreply.github.com> Date: Wed, 5 Nov 2025 17:55:57 -0600 Subject: [PATCH 124/124] small fixes --- .../tools/sbin/so-elasticsearch-retention-estimate | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/salt/elasticsearch/tools/sbin/so-elasticsearch-retention-estimate b/salt/elasticsearch/tools/sbin/so-elasticsearch-retention-estimate index 4c34d3a02..96219c50c 100755 --- a/salt/elasticsearch/tools/sbin/so-elasticsearch-retention-estimate +++ b/salt/elasticsearch/tools/sbin/so-elasticsearch-retention-estimate @@ -41,13 +41,13 @@ create_temp_file() { } log_title() { - if [ $1 == "LOG" ]; then + if [ "$1" == "LOG" ]; then echo -e "\n${BOLD}================ $2 ================${NC}\n" - elif [ $1 == "OK" ]; then + elif [ "$1" == "OK" ]; then echo -e "${GREEN} $2 ${NC}" - elif [ $1 == "WARN" ]; then + elif [ "$1" == "WARN" ]; then echo -e "${YELLOW} $2 ${NC}" - elif [ $1 == "ERROR" ]; then + elif [ "$1" == "ERROR" ]; then echo -e "${RED} $2 ${NC}" fi } @@ -756,7 +756,7 @@ if [ "$should_trigger_recommendations" = true ]; then ilm_output=$(so-elasticsearch-query "${index}/_ilm/explain" --fail 2>/dev/null) || true if [ -n "$ilm_output" ]; then - policy=$(echo "$ilm_output" | jq -r ".indices.\"$index\".policy // empty" 2>/dev/null) + policy=$(echo "$ilm_output" | jq --arg idx "$index" -r ".indices[$idx].policy // empty" 2>/dev/null) fi if [ -n "$policy" ] && [ -n "${policy_ages[$policy]:-}" ]; then delete_min_age=${policy_ages[$policy]} @@ -1134,8 +1134,9 @@ else for i in "${!scheduled_indices_names[@]}"; do sorted_indices+=("${scheduled_indices_days[$i]}|${scheduled_indices_names[$i]}|${scheduled_indices_sizes[$i]}") done + OLD_IFS="$IFS" IFS=$'\n' sorted_indices=($(sort -t'|' -k1 -n <<<"${sorted_indices[*]}")) - unset IFS + IFS="$OLD_IFS" for entry in "${sorted_indices[@]}"; do IFS='|' read -r days_until index_name size_bytes <<< "$entry"