diff --git a/.github/DISCUSSION_TEMPLATE/2-4.yml b/.github/DISCUSSION_TEMPLATE/2-4.yml index 273430e7d..229e9f612 100644 --- a/.github/DISCUSSION_TEMPLATE/2-4.yml +++ b/.github/DISCUSSION_TEMPLATE/2-4.yml @@ -31,6 +31,8 @@ body: - 2.4.160 - 2.4.170 - 2.4.180 + - 2.4.190 + - 2.4.200 - Other (please provide detail below) validations: required: true diff --git a/DOWNLOAD_AND_VERIFY_ISO.md b/DOWNLOAD_AND_VERIFY_ISO.md index 6b966957c..f354ed191 100644 --- a/DOWNLOAD_AND_VERIFY_ISO.md +++ b/DOWNLOAD_AND_VERIFY_ISO.md @@ -1,17 +1,17 @@ -### 2.4.170-20250812 ISO image released on 2025/08/12 +### 2.4.190-20251024 ISO image released on 2025/10/24 ### Download and Verify -2.4.170-20250812 ISO image: -https://download.securityonion.net/file/securityonion/securityonion-2.4.170-20250812.iso +2.4.190-20251024 ISO image: +https://download.securityonion.net/file/securityonion/securityonion-2.4.190-20251024.iso -MD5: 50ECAAD05736298452DECEAE074FA773 -SHA1: 1B1EB520DE61ECC4BF34E512DAFE307317D7666A -SHA256: 87D176A48A58BAD1C2D57196F999BED23DE9B526226E3754F0C166C866CCDC1A +MD5: 25358481FB876226499C011FC0710358 +SHA1: 0B26173C0CE136F2CA40A15046D1DFB78BCA1165 +SHA256: 4FD9F62EDA672408828B3C0C446FE5EA9FF3C4EE8488A7AB1101544A3C487872 Signature for ISO image: -https://github.com/Security-Onion-Solutions/securityonion/raw/2.4/main/sigs/securityonion-2.4.170-20250812.iso.sig +https://github.com/Security-Onion-Solutions/securityonion/raw/2.4/main/sigs/securityonion-2.4.190-20251024.iso.sig Signing key: https://raw.githubusercontent.com/Security-Onion-Solutions/securityonion/2.4/main/KEYS @@ -25,22 +25,22 @@ wget https://raw.githubusercontent.com/Security-Onion-Solutions/securityonion/2. Download the signature file for the ISO: ``` -wget https://github.com/Security-Onion-Solutions/securityonion/raw/2.4/main/sigs/securityonion-2.4.170-20250812.iso.sig +wget https://github.com/Security-Onion-Solutions/securityonion/raw/2.4/main/sigs/securityonion-2.4.190-20251024.iso.sig ``` Download the ISO image: ``` -wget https://download.securityonion.net/file/securityonion/securityonion-2.4.170-20250812.iso +wget https://download.securityonion.net/file/securityonion/securityonion-2.4.190-20251024.iso ``` Verify the downloaded ISO image using the signature file: ``` -gpg --verify securityonion-2.4.170-20250812.iso.sig securityonion-2.4.170-20250812.iso +gpg --verify securityonion-2.4.190-20251024.iso.sig securityonion-2.4.190-20251024.iso ``` The output should show "Good signature" and the Primary key fingerprint should match what's shown below: ``` -gpg: Signature made Fri 08 Aug 2025 06:24:56 PM EDT using RSA key ID FE507013 +gpg: Signature made Thu 23 Oct 2025 07:21:46 AM EDT using RSA key ID FE507013 gpg: Good signature from "Security Onion Solutions, LLC " gpg: WARNING: This key is not certified with a trusted signature! gpg: There is no indication that the signature belongs to the owner. diff --git a/VERSION b/VERSION index 1ff799fad..86df31761 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -2.4.180 +2.4.200 diff --git a/salt/_modules/hypervisor.py b/salt/_modules/hypervisor.py new file mode 100644 index 000000000..7119c8507 --- /dev/null +++ b/salt/_modules/hypervisor.py @@ -0,0 +1,91 @@ +#!/opt/saltstack/salt/bin/python3 + +# Copyright Security Onion Solutions LLC and/or licensed to Security Onion Solutions LLC under one +# or more contributor license agreements. Licensed under the Elastic License 2.0 as shown at +# https://securityonion.net/license; you may not use this file except in compliance with the +# Elastic License 2.0. +# +# Note: Per the Elastic License 2.0, the second limitation states: +# +# "You may not move, change, disable, or circumvent the license key functionality +# in the software, and you may not remove or obscure any functionality in the +# software that is protected by the license key." + +""" +Salt execution module for hypervisor operations. + +This module provides functions for managing hypervisor configurations, +including VM file management. +""" + +import json +import logging +import os + +log = logging.getLogger(__name__) + +__virtualname__ = 'hypervisor' + + +def __virtual__(): + """ + Only load this module if we're on a system that can manage hypervisors. + """ + return __virtualname__ + + +def remove_vm_from_vms_file(vms_file_path, vm_hostname, vm_role): + """ + Remove a VM entry from the hypervisorVMs file. + + Args: + vms_file_path (str): Path to the hypervisorVMs file + vm_hostname (str): Hostname of the VM to remove (without role suffix) + vm_role (str): Role of the VM + + Returns: + dict: Result dictionary with success status and message + + CLI Example: + salt '*' hypervisor.remove_vm_from_vms_file /opt/so/saltstack/local/salt/hypervisor/hosts/hypervisor1VMs node1 nsm + """ + try: + # Check if file exists + if not os.path.exists(vms_file_path): + msg = f"VMs file not found: {vms_file_path}" + log.error(msg) + return {'result': False, 'comment': msg} + + # Read current VMs + with open(vms_file_path, 'r') as f: + content = f.read().strip() + vms = json.loads(content) if content else [] + + # Find and remove the VM entry + original_count = len(vms) + vms = [vm for vm in vms if not (vm.get('hostname') == vm_hostname and vm.get('role') == vm_role)] + + if len(vms) < original_count: + # VM was found and removed, write back to file + with open(vms_file_path, 'w') as f: + json.dump(vms, f, indent=2) + + # Set socore:socore ownership (939:939) + os.chown(vms_file_path, 939, 939) + + msg = f"Removed VM {vm_hostname}_{vm_role} from {vms_file_path}" + log.info(msg) + return {'result': True, 'comment': msg} + else: + msg = f"VM {vm_hostname}_{vm_role} not found in {vms_file_path}" + log.warning(msg) + return {'result': False, 'comment': msg} + + except json.JSONDecodeError as e: + msg = f"Failed to parse JSON in {vms_file_path}: {str(e)}" + log.error(msg) + return {'result': False, 'comment': msg} + except Exception as e: + msg = f"Failed to remove VM {vm_hostname}_{vm_role} from {vms_file_path}: {str(e)}" + log.error(msg) + return {'result': False, 'comment': msg} diff --git a/salt/_modules/qcow2.py b/salt/_modules/qcow2.py index 6e71dc459..10c4d185b 100644 --- a/salt/_modules/qcow2.py +++ b/salt/_modules/qcow2.py @@ -7,12 +7,14 @@ """ Salt module for managing QCOW2 image configurations and VM hardware settings. This module provides functions -for modifying network configurations within QCOW2 images and adjusting virtual machine hardware settings. -It serves as a Salt interface to the so-qcow2-modify-network and so-kvm-modify-hardware scripts. +for modifying network configurations within QCOW2 images, adjusting virtual machine hardware settings, and +creating virtual storage volumes. It serves as a Salt interface to the so-qcow2-modify-network, +so-kvm-modify-hardware, and so-kvm-create-volume scripts. -The module offers two main capabilities: +The module offers three main capabilities: 1. Network Configuration: Modify network settings (DHCP/static IP) within QCOW2 images 2. Hardware Configuration: Adjust VM hardware settings (CPU, memory, PCI passthrough) +3. Volume Management: Create and attach virtual storage volumes for NSM data This module is intended to work with Security Onion's virtualization infrastructure and is typically used in conjunction with salt-cloud for VM provisioning and management. @@ -244,3 +246,90 @@ def modify_hardware_config(vm_name, cpu=None, memory=None, pci=None, start=False except Exception as e: log.error('qcow2 module: An error occurred while executing the script: {}'.format(e)) raise + +def create_volume_config(vm_name, size_gb, start=False): + ''' + Usage: + salt '*' qcow2.create_volume_config vm_name= size_gb= [start=] + + Options: + vm_name + Name of the virtual machine to attach the volume to + size_gb + Volume size in GB (positive integer) + This determines the capacity of the virtual storage volume + start + Boolean flag to start the VM after volume creation + Optional - defaults to False + + Examples: + 1. **Create 500GB Volume:** + ```bash + salt '*' qcow2.create_volume_config vm_name='sensor1_sensor' size_gb=500 + ``` + This creates a 500GB virtual volume for NSM storage + + 2. **Create 1TB Volume and Start VM:** + ```bash + salt '*' qcow2.create_volume_config vm_name='sensor1_sensor' size_gb=1000 start=True + ``` + This creates a 1TB volume and starts the VM after attachment + + Notes: + - VM must be stopped before volume creation + - Volume is created as a qcow2 image and attached to the VM + - This is an alternative to disk passthrough via modify_hardware_config + - Volume is automatically attached to the VM's libvirt configuration + - Requires so-kvm-create-volume script to be installed + - Volume files are stored in the hypervisor's VM storage directory + + Description: + This function creates and attaches a virtual storage volume to a KVM virtual machine + using the so-kvm-create-volume script. It creates a qcow2 disk image of the specified + size and attaches it to the VM for NSM (Network Security Monitoring) storage purposes. + This provides an alternative to physical disk passthrough, allowing flexible storage + allocation without requiring dedicated hardware. The VM can optionally be started + after the volume is successfully created and attached. + + Exit Codes: + 0: Success + 1: Invalid parameters + 2: VM state error (running when should be stopped) + 3: Volume creation error + 4: System command error + 255: Unexpected error + + Logging: + - All operations are logged to the salt minion log + - Log entries are prefixed with 'qcow2 module:' + - Volume creation and attachment operations are logged + - Errors include detailed messages and stack traces + - Final status of volume creation is logged + ''' + + # Validate size_gb parameter + if not isinstance(size_gb, int) or size_gb <= 0: + raise ValueError('size_gb must be a positive integer.') + + cmd = ['/usr/sbin/so-kvm-create-volume', '-v', vm_name, '-s', str(size_gb)] + + if start: + cmd.append('-S') + + log.info('qcow2 module: Executing command: {}'.format(' '.join(shlex.quote(arg) for arg in cmd))) + + try: + result = subprocess.run(cmd, capture_output=True, text=True, check=False) + ret = { + 'retcode': result.returncode, + 'stdout': result.stdout, + 'stderr': result.stderr + } + if result.returncode != 0: + log.error('qcow2 module: Script execution failed with return code {}: {}'.format(result.returncode, result.stderr)) + else: + log.info('qcow2 module: Script executed successfully.') + return ret + except Exception as e: + log.error('qcow2 module: An error occurred while executing the script: {}'.format(e)) + raise diff --git a/salt/common/grains.sls b/salt/common/grains.sls new file mode 100644 index 000000000..b8d3a4c90 --- /dev/null +++ b/salt/common/grains.sls @@ -0,0 +1,21 @@ +# Copyright Security Onion Solutions LLC and/or licensed to Security Onion Solutions LLC under one +# or more contributor license agreements. Licensed under the Elastic License 2.0 as shown at +# https://securityonion.net/license; you may not use this file except in compliance with the +# Elastic License 2.0. + +{% set nsm_exists = salt['file.directory_exists']('/nsm') %} +{% if nsm_exists %} +{% set nsm_total = salt['cmd.shell']('df -BG /nsm | tail -1 | awk \'{print $2}\'') %} + +nsm_total: + grains.present: + - name: nsm_total + - value: {{ nsm_total }} + +{% else %} + +nsm_missing: + test.succeed_without_changes: + - name: /nsm does not exist, skipping grain assignment + +{% endif %} diff --git a/salt/common/init.sls b/salt/common/init.sls index 7137ff11f..eba18f651 100644 --- a/salt/common/init.sls +++ b/salt/common/init.sls @@ -4,6 +4,7 @@ {% from 'vars/globals.map.jinja' import GLOBALS %} include: + - common.grains - common.packages {% if GLOBALS.role in GLOBALS.manager_roles %} - manager.elasticsearch # needed for elastic_curl_config state diff --git a/salt/common/tools/sbin/so-common b/salt/common/tools/sbin/so-common index 203b54cd0..ba2cb5ae7 100755 --- a/salt/common/tools/sbin/so-common +++ b/salt/common/tools/sbin/so-common @@ -220,12 +220,22 @@ compare_es_versions() { } copy_new_files() { + # Define files to exclude from deletion (relative to their respective base directories) + local EXCLUDE_FILES=( + "salt/hypervisor/soc_hypervisor.yaml" + ) + + # Build rsync exclude arguments + local EXCLUDE_ARGS=() + for file in "${EXCLUDE_FILES[@]}"; do + EXCLUDE_ARGS+=(--exclude="$file") + done + # Copy new files over to the salt dir cd $UPDATE_DIR - rsync -a salt $DEFAULT_SALT_DIR/ --delete - rsync -a pillar $DEFAULT_SALT_DIR/ --delete + rsync -a salt $DEFAULT_SALT_DIR/ --delete "${EXCLUDE_ARGS[@]}" + rsync -a pillar $DEFAULT_SALT_DIR/ --delete "${EXCLUDE_ARGS[@]}" chown -R socore:socore $DEFAULT_SALT_DIR/ - chmod 755 $DEFAULT_SALT_DIR/pillar/firewall/addfirewall.sh cd /tmp } @@ -441,8 +451,7 @@ lookup_grain() { lookup_role() { id=$(lookup_grain id) - pieces=($(echo $id | tr '_' ' ')) - echo ${pieces[1]} + echo "${id##*_}" } is_feature_enabled() { diff --git a/salt/common/tools/sbin/so-image-common b/salt/common/tools/sbin/so-image-common index e2fe4f715..4358e6d00 100755 --- a/salt/common/tools/sbin/so-image-common +++ b/salt/common/tools/sbin/so-image-common @@ -60,8 +60,6 @@ container_list() { "so-soc" "so-steno" "so-strelka-backend" - "so-strelka-filestream" - "so-strelka-frontend" "so-strelka-manager" "so-suricata" "so-telegraf" diff --git a/salt/common/tools/sbin/so-log-check b/salt/common/tools/sbin/so-log-check index 72ece1919..5960a7946 100755 --- a/salt/common/tools/sbin/so-log-check +++ b/salt/common/tools/sbin/so-log-check @@ -222,6 +222,7 @@ if [[ $EXCLUDE_KNOWN_ERRORS == 'Y' ]]; then EXCLUDED_ERRORS="$EXCLUDED_ERRORS|Initialized license manager" # SOC log: before fields.status was changed to fields.licenseStatus EXCLUDED_ERRORS="$EXCLUDED_ERRORS|from NIC checksum offloading" # zeek reporter.log EXCLUDED_ERRORS="$EXCLUDED_ERRORS|marked for removal" # docker container getting recycled + EXCLUDED_ERRORS="$EXCLUDED_ERRORS|tcp 127.0.0.1:6791: bind: address already in use" # so-elastic-fleet agent restarting. Seen starting w/ 8.18.8 https://github.com/elastic/kibana/issues/201459 fi RESULT=0 @@ -268,6 +269,13 @@ for log_file in $(cat /tmp/log_check_files); do tail -n $RECENT_LOG_LINES $log_file > /tmp/log_check check_for_errors done +# Look for OOM specific errors in /var/log/messages which can lead to odd behavior / test failures +if [[ -f /var/log/messages ]]; then + status "Checking log file /var/log/messages" + if journalctl --since "24 hours ago" | grep -iE 'out of memory|oom-kill'; then + RESULT=1 + fi +fi # Cleanup temp files rm -f /tmp/log_check_files diff --git a/salt/common/tools/sbin_jinja/so-import-pcap b/salt/common/tools/sbin_jinja/so-import-pcap index e8c2b84c8..b630df015 100755 --- a/salt/common/tools/sbin_jinja/so-import-pcap +++ b/salt/common/tools/sbin_jinja/so-import-pcap @@ -173,7 +173,7 @@ for PCAP in $INPUT_FILES; do status "- assigning unique identifier to import: $HASH" pcap_data=$(pcapinfo "${PCAP}") - if ! echo "$pcap_data" | grep -q "First packet time:" || echo "$pcap_data" |egrep -q "Last packet time: 1970-01-01|Last packet time: n/a"; then + if ! echo "$pcap_data" | grep -q "Earliest packet time:" || echo "$pcap_data" |egrep -q "Latest packet time: 1970-01-01|Latest packet time: n/a"; then status "- this PCAP file is invalid; skipping" INVALID_PCAPS_COUNT=$((INVALID_PCAPS_COUNT + 1)) else @@ -205,8 +205,8 @@ for PCAP in $INPUT_FILES; do HASHES="${HASHES} ${HASH}" fi - START=$(pcapinfo "${PCAP}" -a |grep "First packet time:" | awk '{print $4}') - END=$(pcapinfo "${PCAP}" -e |grep "Last packet time:" | awk '{print $4}') + START=$(pcapinfo "${PCAP}" -a |grep "Earliest packet time:" | awk '{print $4}') + END=$(pcapinfo "${PCAP}" -e |grep "Latest packet time:" | awk '{print $4}') status "- found PCAP data spanning dates $START through $END" # compare $START to $START_OLDEST diff --git a/salt/elasticfleet/enabled.sls b/salt/elasticfleet/enabled.sls index 0ca54ccb8..cef47168f 100644 --- a/salt/elasticfleet/enabled.sls +++ b/salt/elasticfleet/enabled.sls @@ -135,12 +135,18 @@ so-elastic-fleet-package-statefile: so-elastic-fleet-package-upgrade: cmd.run: - name: /usr/sbin/so-elastic-fleet-package-upgrade + - retry: + attempts: 3 + interval: 10 - onchanges: - file: /opt/so/state/elastic_fleet_packages.txt so-elastic-fleet-integrations: cmd.run: - name: /usr/sbin/so-elastic-fleet-integration-policy-load + - retry: + attempts: 3 + interval: 10 so-elastic-agent-grid-upgrade: cmd.run: @@ -152,7 +158,11 @@ so-elastic-agent-grid-upgrade: so-elastic-fleet-integration-upgrade: cmd.run: - name: /usr/sbin/so-elastic-fleet-integration-upgrade + - retry: + attempts: 3 + interval: 10 +{# Optional integrations script doesn't need the retries like so-elastic-fleet-integration-upgrade which loads the default integrations #} so-elastic-fleet-addon-integrations: cmd.run: - name: /usr/sbin/so-elastic-fleet-optional-integrations-load diff --git a/salt/elasticfleet/files/integrations/grid-nodes_general/elasticsearch-logs.json b/salt/elasticfleet/files/integrations/grid-nodes_general/elasticsearch-logs.json index a2aaf5e0a..0c74a7fd5 100644 --- a/salt/elasticfleet/files/integrations/grid-nodes_general/elasticsearch-logs.json +++ b/salt/elasticfleet/files/integrations/grid-nodes_general/elasticsearch-logs.json @@ -40,7 +40,7 @@ "enabled": true, "vars": { "paths": [ - "/opt/so/log/elasticsearch/*.log" + "/opt/so/log/elasticsearch/*.json" ] } }, diff --git a/salt/elasticfleet/files/integrations/grid-nodes_general/import-evtx-logs.json b/salt/elasticfleet/files/integrations/grid-nodes_general/import-evtx-logs.json index 8132f4a09..dd95e6337 100644 --- a/salt/elasticfleet/files/integrations/grid-nodes_general/import-evtx-logs.json +++ b/salt/elasticfleet/files/integrations/grid-nodes_general/import-evtx-logs.json @@ -20,7 +20,7 @@ ], "data_stream.dataset": "import", "custom": "", - "processors": "- dissect:\n tokenizer: \"/nsm/import/%{import.id}/evtx/%{import.file}\"\n field: \"log.file.path\"\n target_prefix: \"\"\n- decode_json_fields:\n fields: [\"message\"]\n target: \"\"\n- drop_fields:\n fields: [\"host\"]\n ignore_missing: true\n- add_fields:\n target: data_stream\n fields:\n type: logs\n dataset: system.security\n- add_fields:\n target: event\n fields:\n dataset: system.security\n module: system\n imported: true\n- add_fields:\n target: \"@metadata\"\n fields:\n pipeline: logs-system.security-2.5.4\n- if:\n equals:\n winlog.channel: 'Microsoft-Windows-Sysmon/Operational'\n then: \n - add_fields:\n target: data_stream\n fields:\n dataset: windows.sysmon_operational\n - add_fields:\n target: event\n fields:\n dataset: windows.sysmon_operational\n module: windows\n imported: true\n - add_fields:\n target: \"@metadata\"\n fields:\n pipeline: logs-windows.sysmon_operational-3.1.2\n- if:\n equals:\n winlog.channel: 'Application'\n then: \n - add_fields:\n target: data_stream\n fields:\n dataset: system.application\n - add_fields:\n target: event\n fields:\n dataset: system.application\n - add_fields:\n target: \"@metadata\"\n fields:\n pipeline: logs-system.application-2.5.4\n- if:\n equals:\n winlog.channel: 'System'\n then: \n - add_fields:\n target: data_stream\n fields:\n dataset: system.system\n - add_fields:\n target: event\n fields:\n dataset: system.system\n - add_fields:\n target: \"@metadata\"\n fields:\n pipeline: logs-system.system-2.5.4\n \n- if:\n equals:\n winlog.channel: 'Microsoft-Windows-PowerShell/Operational'\n then: \n - add_fields:\n target: data_stream\n fields:\n dataset: windows.powershell_operational\n - add_fields:\n target: event\n fields:\n dataset: windows.powershell_operational\n module: windows\n - add_fields:\n target: \"@metadata\"\n fields:\n pipeline: logs-windows.powershell_operational-3.1.2\n- add_fields:\n target: data_stream\n fields:\n dataset: import", + "processors": "- dissect:\n tokenizer: \"/nsm/import/%{import.id}/evtx/%{import.file}\"\n field: \"log.file.path\"\n target_prefix: \"\"\n- decode_json_fields:\n fields: [\"message\"]\n target: \"\"\n- drop_fields:\n fields: [\"host\"]\n ignore_missing: true\n- add_fields:\n target: data_stream\n fields:\n type: logs\n dataset: system.security\n- add_fields:\n target: event\n fields:\n dataset: system.security\n module: system\n imported: true\n- add_fields:\n target: \"@metadata\"\n fields:\n pipeline: logs-system.security-2.6.1\n- if:\n equals:\n winlog.channel: 'Microsoft-Windows-Sysmon/Operational'\n then: \n - add_fields:\n target: data_stream\n fields:\n dataset: windows.sysmon_operational\n - add_fields:\n target: event\n fields:\n dataset: windows.sysmon_operational\n module: windows\n imported: true\n - add_fields:\n target: \"@metadata\"\n fields:\n pipeline: logs-windows.sysmon_operational-3.1.2\n- if:\n equals:\n winlog.channel: 'Application'\n then: \n - add_fields:\n target: data_stream\n fields:\n dataset: system.application\n - add_fields:\n target: event\n fields:\n dataset: system.application\n - add_fields:\n target: \"@metadata\"\n fields:\n pipeline: logs-system.application-2.6.1\n- if:\n equals:\n winlog.channel: 'System'\n then: \n - add_fields:\n target: data_stream\n fields:\n dataset: system.system\n - add_fields:\n target: event\n fields:\n dataset: system.system\n - add_fields:\n target: \"@metadata\"\n fields:\n pipeline: logs-system.system-2.6.1\n \n- if:\n equals:\n winlog.channel: 'Microsoft-Windows-PowerShell/Operational'\n then: \n - add_fields:\n target: data_stream\n fields:\n dataset: windows.powershell_operational\n - add_fields:\n target: event\n fields:\n dataset: windows.powershell_operational\n module: windows\n - add_fields:\n target: \"@metadata\"\n fields:\n pipeline: logs-windows.powershell_operational-3.1.2\n- add_fields:\n target: data_stream\n fields:\n dataset: import", "tags": [ "import" ] diff --git a/salt/elasticfleet/install_agent_grid.sls b/salt/elasticfleet/install_agent_grid.sls index 8b5bb4c3e..4a185e0bb 100644 --- a/salt/elasticfleet/install_agent_grid.sls +++ b/salt/elasticfleet/install_agent_grid.sls @@ -2,26 +2,30 @@ # or more contributor license agreements. Licensed under the Elastic License 2.0; you may not use # this file except in compliance with the Elastic License 2.0. -{%- set GRIDNODETOKENGENERAL = salt['pillar.get']('global:fleet_grid_enrollment_token_general') -%} -{%- set GRIDNODETOKENHEAVY = salt['pillar.get']('global:fleet_grid_enrollment_token_heavy') -%} +{% set GRIDNODETOKEN = salt['pillar.get']('global:fleet_grid_enrollment_token_general') -%} +{% if grains.role == 'so-heavynode' %} +{% set GRIDNODETOKEN = salt['pillar.get']('global:fleet_grid_enrollment_token_heavy') -%} +{% endif %} {% set AGENT_STATUS = salt['service.available']('elastic-agent') %} {% if not AGENT_STATUS %} -{% if grains.role not in ['so-heavynode'] %} -run_installer: - cmd.script: - - name: salt://elasticfleet/files/so_agent-installers/so-elastic-agent_linux_amd64 - - cwd: /opt/so - - args: -token={{ GRIDNODETOKENGENERAL }} - - retry: True -{% else %} -run_installer: - cmd.script: - - name: salt://elasticfleet/files/so_agent-installers/so-elastic-agent_linux_amd64 - - cwd: /opt/so - - args: -token={{ GRIDNODETOKENHEAVY }} - - retry: True -{% endif %} +pull_agent_installer: + file.managed: + - name: /opt/so/so-elastic-agent_linux_amd64 + - source: salt://elasticfleet/files/so_agent-installers/so-elastic-agent_linux_amd64 + - mode: 755 + - makedirs: True +run_installer: + cmd.run: + - name: ./so-elastic-agent_linux_amd64 -token={{ GRIDNODETOKEN }} + - cwd: /opt/so + - retry: + attempts: 3 + interval: 20 + +cleanup_agent_installer: + file.absent: + - name: /opt/so/so-elastic-agent_linux_amd64 {% endif %} diff --git a/salt/elasticfleet/tools/sbin/so-elastic-fleet-common b/salt/elasticfleet/tools/sbin/so-elastic-fleet-common index 9780c8b12..1a597b1db 100644 --- a/salt/elasticfleet/tools/sbin/so-elastic-fleet-common +++ b/salt/elasticfleet/tools/sbin/so-elastic-fleet-common @@ -23,6 +23,13 @@ fi # Define a banner to separate sections banner="=========================================================================" +fleet_api() { + local QUERYPATH=$1 + shift + + curl -sK /opt/so/conf/elasticsearch/curl.config -L "localhost:5601/api/fleet/${QUERYPATH}" "$@" --retry 3 --retry-delay 10 --fail 2>/dev/null +} + elastic_fleet_integration_check() { AGENT_POLICY=$1 @@ -39,7 +46,9 @@ elastic_fleet_integration_create() { JSON_STRING=$1 - curl -K /opt/so/conf/elasticsearch/curl.config -L -X POST "localhost:5601/api/fleet/package_policies" -H 'kbn-xsrf: true' -H 'Content-Type: application/json' -d "$JSON_STRING" + if ! fleet_api "package_policies" -H 'kbn-xsrf: true' -H 'Content-Type: application/json' -XPOST -d "$JSON_STRING"; then + return 1 + fi } @@ -56,7 +65,10 @@ elastic_fleet_integration_remove() { '{"packagePolicyIds":[$INTEGRATIONID]}' ) - curl -K /opt/so/conf/elasticsearch/curl.config -L -X POST "localhost:5601/api/fleet/package_policies/delete" -H 'kbn-xsrf: true' -H 'Content-Type: application/json' -d "$JSON_STRING" + if ! fleet_api "package_policies/delete" -XPOST -H 'kbn-xsrf: true' -H 'Content-Type: application/json' -d "$JSON_STRING"; then + echo "Error: Unable to delete '$NAME' from '$AGENT_POLICY'" + return 1 + fi } elastic_fleet_integration_update() { @@ -65,7 +77,9 @@ elastic_fleet_integration_update() { JSON_STRING=$2 - curl -K /opt/so/conf/elasticsearch/curl.config -L -X PUT "localhost:5601/api/fleet/package_policies/$UPDATE_ID" -H 'kbn-xsrf: true' -H 'Content-Type: application/json' -d "$JSON_STRING" + if ! fleet_api "package_policies/$UPDATE_ID" -H 'kbn-xsrf: true' -H 'Content-Type: application/json' -XPUT -d "$JSON_STRING"; then + return 1 + fi } elastic_fleet_integration_policy_upgrade() { @@ -77,78 +91,83 @@ elastic_fleet_integration_policy_upgrade() { '{"packagePolicyIds":[$INTEGRATIONID]}' ) - curl -K /opt/so/conf/elasticsearch/curl.config -L -X POST "localhost:5601/api/fleet/package_policies/upgrade" -H 'kbn-xsrf: true' -H 'Content-Type: application/json' -d "$JSON_STRING" + if ! fleet_api "package_policies/upgrade" -XPOST -H 'kbn-xsrf: true' -H 'Content-Type: application/json' -d "$JSON_STRING"; then + return 1 + fi } elastic_fleet_package_version_check() { PACKAGE=$1 - curl -s -K /opt/so/conf/elasticsearch/curl.config -b "sid=$SESSIONCOOKIE" -L -X GET "localhost:5601/api/fleet/epm/packages/$PACKAGE" | jq -r '.item.version' + + if output=$(fleet_api "epm/packages/$PACKAGE"); then + echo "$output" | jq -r '.item.version' + else + echo "Error: Failed to get current package version for '$PACKAGE'" + return 1 + fi } elastic_fleet_package_latest_version_check() { PACKAGE=$1 - if output=$(curl -s -K /opt/so/conf/elasticsearch/curl.config -b "sid=$SESSIONCOOKIE" -L -X GET "localhost:5601/api/fleet/epm/packages/$PACKAGE" --fail); then + if output=$(fleet_api "epm/packages/$PACKAGE"); then if version=$(jq -e -r '.item.latestVersion' <<< $output); then echo "$version" fi else - echo "Error: Failed to get latest version for $PACKAGE" + echo "Error: Failed to get latest version for '$PACKAGE'" + return 1 fi } elastic_fleet_package_install() { PKG=$1 VERSION=$2 - curl -s -K /opt/so/conf/elasticsearch/curl.config -b "sid=$SESSIONCOOKIE" -L -X POST -H 'kbn-xsrf: true' -H 'Content-Type: application/json' -d '{"force":true}' "localhost:5601/api/fleet/epm/packages/$PKG/$VERSION" + if ! fleet_api "epm/packages/$PKG/$VERSION" -XPOST -H 'kbn-xsrf: true' -H 'Content-Type: application/json' -d '{"force":true}'; then + return 1 + fi } elastic_fleet_bulk_package_install() { BULK_PKG_LIST=$1 - curl -s -K /opt/so/conf/elasticsearch/curl.config -b "sid=$SESSIONCOOKIE" -L -X POST -H 'kbn-xsrf: true' -H 'Content-Type: application/json' -d@$1 "localhost:5601/api/fleet/epm/packages/_bulk" -} - -elastic_fleet_package_is_installed() { - PACKAGE=$1 - curl -s -K /opt/so/conf/elasticsearch/curl.config -b "sid=$SESSIONCOOKIE" -L -X GET -H 'kbn-xsrf: true' "localhost:5601/api/fleet/epm/packages/$PACKAGE" | jq -r '.item.status' -} - -elastic_fleet_installed_packages() { - curl -s -K /opt/so/conf/elasticsearch/curl.config -b "sid=$SESSIONCOOKIE" -L -X GET -H 'kbn-xsrf: true' -H 'Content-Type: application/json' "localhost:5601/api/fleet/epm/packages/installed?perPage=500" -} - -elastic_fleet_agent_policy_ids() { - curl -s -K /opt/so/conf/elasticsearch/curl.config -b "sid=$SESSIONCOOKIE" -L -X GET "localhost:5601/api/fleet/agent_policies" | jq -r .items[].id - if [ $? -ne 0 ]; then - echo "Error: Failed to retrieve agent policies." - exit 1 + if ! fleet_api "epm/packages/_bulk" -XPOST -H 'kbn-xsrf: true' -H 'Content-Type: application/json' -d@$BULK_PKG_LIST; then + return 1 fi } -elastic_fleet_agent_policy_names() { - curl -s -K /opt/so/conf/elasticsearch/curl.config -b "sid=$SESSIONCOOKIE" -L -X GET "localhost:5601/api/fleet/agent_policies" | jq -r .items[].name - if [ $? -ne 0 ]; then +elastic_fleet_installed_packages() { + if ! fleet_api "epm/packages/installed?perPage=500"; then + return 1 + fi +} + +elastic_fleet_agent_policy_ids() { + if output=$(fleet_api "agent_policies"); then + echo "$output" | jq -r .items[].id + else echo "Error: Failed to retrieve agent policies." - exit 1 + return 1 fi } elastic_fleet_integration_policy_names() { AGENT_POLICY=$1 - curl -s -K /opt/so/conf/elasticsearch/curl.config -b "sid=$SESSIONCOOKIE" -L -X GET "localhost:5601/api/fleet/agent_policies/$AGENT_POLICY" | jq -r .item.package_policies[].name - if [ $? -ne 0 ]; then + if output=$(fleet_api "agent_policies/$AGENT_POLICY"); then + echo "$output" | jq -r .item.package_policies[].name + else echo "Error: Failed to retrieve integrations for '$AGENT_POLICY'." - exit 1 + return 1 fi } elastic_fleet_integration_policy_package_name() { AGENT_POLICY=$1 INTEGRATION=$2 - curl -s -K /opt/so/conf/elasticsearch/curl.config -b "sid=$SESSIONCOOKIE" -L -X GET "localhost:5601/api/fleet/agent_policies/$AGENT_POLICY" | jq -r --arg INTEGRATION "$INTEGRATION" '.item.package_policies[] | select(.name==$INTEGRATION)| .package.name' - if [ $? -ne 0 ]; then + if output=$(fleet_api "agent_policies/$AGENT_POLICY"); then + echo "$output" | jq -r --arg INTEGRATION "$INTEGRATION" '.item.package_policies[] | select(.name==$INTEGRATION)| .package.name' + else echo "Error: Failed to retrieve package name for '$INTEGRATION' in '$AGENT_POLICY'." - exit 1 + return 1 fi } @@ -156,32 +175,32 @@ elastic_fleet_integration_policy_package_version() { AGENT_POLICY=$1 INTEGRATION=$2 - if output=$(curl -s -K /opt/so/conf/elasticsearch/curl.config -L -X GET "localhost:5601/api/fleet/agent_policies/$AGENT_POLICY" --fail); then - if version=$(jq -e -r --arg INTEGRATION "$INTEGRATION" '.item.package_policies[] | select(.name==$INTEGRATION)| .package.version' <<< $output); then + if output=$(fleet_api "agent_policies/$AGENT_POLICY"); then + if version=$(jq -e -r --arg INTEGRATION "$INTEGRATION" '.item.package_policies[] | select(.name==$INTEGRATION)| .package.version' <<< "$output"); then echo "$version" fi else - echo "Error: Failed to retrieve agent policy $AGENT_POLICY" - exit 1 + echo "Error: Failed to retrieve integration version for '$INTEGRATION' in policy '$AGENT_POLICY'" + return 1 fi } elastic_fleet_integration_id() { AGENT_POLICY=$1 INTEGRATION=$2 - curl -s -K /opt/so/conf/elasticsearch/curl.config -b "sid=$SESSIONCOOKIE" -L -X GET "localhost:5601/api/fleet/agent_policies/$AGENT_POLICY" | jq -r --arg INTEGRATION "$INTEGRATION" '.item.package_policies[] | select(.name==$INTEGRATION)| .id' - if [ $? -ne 0 ]; then + if output=$(fleet_api "agent_policies/$AGENT_POLICY"); then + echo "$output" | jq -r --arg INTEGRATION "$INTEGRATION" '.item.package_policies[] | select(.name==$INTEGRATION)| .id' + else echo "Error: Failed to retrieve integration ID for '$INTEGRATION' in '$AGENT_POLICY'." - exit 1 + return 1 fi } elastic_fleet_integration_policy_dryrun_upgrade() { INTEGRATION_ID=$1 - curl -s -K /opt/so/conf/elasticsearch/curl.config -b "sid=$SESSIONCOOKIE" -H "Content-Type: application/json" -H 'kbn-xsrf: true' -L -X POST "localhost:5601/api/fleet/package_policies/upgrade/dryrun" -d "{\"packagePolicyIds\":[\"$INTEGRATION_ID\"]}" - if [ $? -ne 0 ]; then + if ! fleet_api "package_policies/upgrade/dryrun" -H "Content-Type: application/json" -H 'kbn-xsrf: true' -XPOST -d "{\"packagePolicyIds\":[\"$INTEGRATION_ID\"]}"; then echo "Error: Failed to complete dry run for '$INTEGRATION_ID'." - exit 1 + return 1 fi } @@ -190,25 +209,18 @@ elastic_fleet_policy_create() { NAME=$1 DESC=$2 FLEETSERVER=$3 - TIMEOUT=$4 + TIMEOUT=$4 JSON_STRING=$( jq -n \ - --arg NAME "$NAME" \ - --arg DESC "$DESC" \ - --arg TIMEOUT $TIMEOUT \ - --arg FLEETSERVER "$FLEETSERVER" \ - '{"name": $NAME,"id":$NAME,"description":$DESC,"namespace":"default","monitoring_enabled":["logs"],"inactivity_timeout":$TIMEOUT,"has_fleet_server":$FLEETSERVER}' - ) + --arg NAME "$NAME" \ + --arg DESC "$DESC" \ + --arg TIMEOUT $TIMEOUT \ + --arg FLEETSERVER "$FLEETSERVER" \ + '{"name": $NAME,"id":$NAME,"description":$DESC,"namespace":"default","monitoring_enabled":["logs"],"inactivity_timeout":$TIMEOUT,"has_fleet_server":$FLEETSERVER}' + ) # Create Fleet Policy - curl -K /opt/so/conf/elasticsearch/curl.config -L -X POST "localhost:5601/api/fleet/agent_policies" -H 'kbn-xsrf: true' -H 'Content-Type: application/json' -d "$JSON_STRING" + if ! fleet_api "agent_policies" -XPOST -H 'kbn-xsrf: true' -H 'Content-Type: application/json' -d "$JSON_STRING"; then + return 1 + fi } - -elastic_fleet_policy_update() { - - POLICYID=$1 - JSON_STRING=$2 - - curl -K /opt/so/conf/elasticsearch/curl.config -L -X PUT "localhost:5601/api/fleet/agent_policies/$POLICYID" -H 'kbn-xsrf: true' -H 'Content-Type: application/json' -d "$JSON_STRING" -} - diff --git a/salt/elasticfleet/tools/sbin/so-elastic-fleet-integration-policy-elastic-defend b/salt/elasticfleet/tools/sbin/so-elastic-fleet-integration-policy-elastic-defend index 636942490..d036f0d94 100755 --- a/salt/elasticfleet/tools/sbin/so-elastic-fleet-integration-policy-elastic-defend +++ b/salt/elasticfleet/tools/sbin/so-elastic-fleet-integration-policy-elastic-defend @@ -8,6 +8,7 @@ . /usr/sbin/so-elastic-fleet-common +ERROR=false # Manage Elastic Defend Integration for Initial Endpoints Policy for INTEGRATION in /opt/so/conf/elastic-fleet/integrations/elastic-defend/*.json do @@ -15,9 +16,20 @@ do elastic_fleet_integration_check "endpoints-initial" "$INTEGRATION" if [ -n "$INTEGRATION_ID" ]; then printf "\n\nIntegration $NAME exists - Upgrading integration policy\n" - elastic_fleet_integration_policy_upgrade "$INTEGRATION_ID" + if ! elastic_fleet_integration_policy_upgrade "$INTEGRATION_ID"; then + echo -e "\nFailed to upgrade integration policy for ${INTEGRATION##*/}" + ERROR=true + continue + fi else printf "\n\nIntegration does not exist - Creating integration\n" - elastic_fleet_integration_create "@$INTEGRATION" + if ! elastic_fleet_integration_create "@$INTEGRATION"; then + echo -e "\nFailed to create integration for ${INTEGRATION##*/}" + ERROR=true + continue + fi fi done +if [[ "$ERROR" == "true" ]]; then + exit 1 +fi \ No newline at end of file diff --git a/salt/elasticfleet/tools/sbin/so-elastic-fleet-integration-policy-elastic-fleet-server b/salt/elasticfleet/tools/sbin/so-elastic-fleet-integration-policy-elastic-fleet-server index 8f7c8b8b4..caa684829 100644 --- a/salt/elasticfleet/tools/sbin/so-elastic-fleet-integration-policy-elastic-fleet-server +++ b/salt/elasticfleet/tools/sbin/so-elastic-fleet-integration-policy-elastic-fleet-server @@ -25,5 +25,9 @@ for POLICYNAME in $POLICY; do .name = $name' /opt/so/conf/elastic-fleet/integrations/fleet-server/fleet-server.json) # Now update the integration policy using the modified JSON - elastic_fleet_integration_update "$INTEGRATION_ID" "$UPDATED_INTEGRATION_POLICY" + if ! elastic_fleet_integration_update "$INTEGRATION_ID" "$UPDATED_INTEGRATION_POLICY"; then + # exit 1 on failure to update fleet integration policies, let salt handle retries + echo "Failed to update $POLICYNAME.." + exit 1 + fi done \ No newline at end of file diff --git a/salt/elasticfleet/tools/sbin/so-elastic-fleet-integration-policy-load b/salt/elasticfleet/tools/sbin/so-elastic-fleet-integration-policy-load index 26414a94b..ca260891f 100644 --- a/salt/elasticfleet/tools/sbin/so-elastic-fleet-integration-policy-load +++ b/salt/elasticfleet/tools/sbin/so-elastic-fleet-integration-policy-load @@ -13,11 +13,10 @@ if [ ! -f /opt/so/state/eaintegrations.txt ]; then /usr/sbin/so-elastic-fleet-package-upgrade # Second, update Fleet Server policies - /sbin/so-elastic-fleet-integration-policy-elastic-fleet-server + /usr/sbin/so-elastic-fleet-integration-policy-elastic-fleet-server # Third, configure Elastic Defend Integration seperately /usr/sbin/so-elastic-fleet-integration-policy-elastic-defend - # Initial Endpoints for INTEGRATION in /opt/so/conf/elastic-fleet/integrations/endpoints-initial/*.json do @@ -25,10 +24,18 @@ if [ ! -f /opt/so/state/eaintegrations.txt ]; then elastic_fleet_integration_check "endpoints-initial" "$INTEGRATION" if [ -n "$INTEGRATION_ID" ]; then printf "\n\nIntegration $NAME exists - Updating integration\n" - elastic_fleet_integration_update "$INTEGRATION_ID" "@$INTEGRATION" + if ! elastic_fleet_integration_update "$INTEGRATION_ID" "@$INTEGRATION"; then + echo -e "\nFailed to update integration for ${INTEGRATION##*/}" + RETURN_CODE=1 + continue + fi else printf "\n\nIntegration does not exist - Creating integration\n" - elastic_fleet_integration_create "@$INTEGRATION" + if ! elastic_fleet_integration_create "@$INTEGRATION"; then + echo -e "\nFailed to create integration for ${INTEGRATION##*/}" + RETURN_CODE=1 + continue + fi fi done @@ -39,10 +46,18 @@ if [ ! -f /opt/so/state/eaintegrations.txt ]; then elastic_fleet_integration_check "so-grid-nodes_general" "$INTEGRATION" if [ -n "$INTEGRATION_ID" ]; then printf "\n\nIntegration $NAME exists - Updating integration\n" - elastic_fleet_integration_update "$INTEGRATION_ID" "@$INTEGRATION" + if ! elastic_fleet_integration_update "$INTEGRATION_ID" "@$INTEGRATION"; then + echo -e "\nFailed to update integration for ${INTEGRATION##*/}" + RETURN_CODE=1 + continue + fi else printf "\n\nIntegration does not exist - Creating integration\n" - elastic_fleet_integration_create "@$INTEGRATION" + if ! elastic_fleet_integration_create "@$INTEGRATION"; then + echo -e "\nFailed to create integration for ${INTEGRATION##*/}" + RETURN_CODE=1 + continue + fi fi done if [[ "$RETURN_CODE" != "1" ]]; then @@ -56,11 +71,19 @@ if [ ! -f /opt/so/state/eaintegrations.txt ]; then elastic_fleet_integration_check "so-grid-nodes_heavy" "$INTEGRATION" if [ -n "$INTEGRATION_ID" ]; then printf "\n\nIntegration $NAME exists - Updating integration\n" - elastic_fleet_integration_update "$INTEGRATION_ID" "@$INTEGRATION" + if ! elastic_fleet_integration_update "$INTEGRATION_ID" "@$INTEGRATION"; then + echo -e "\nFailed to update integration for ${INTEGRATION##*/}" + RETURN_CODE=1 + continue + fi else printf "\n\nIntegration does not exist - Creating integration\n" if [ "$NAME" != "elasticsearch-logs" ]; then - elastic_fleet_integration_create "@$INTEGRATION" + if ! elastic_fleet_integration_create "@$INTEGRATION"; then + echo -e "\nFailed to create integration for ${INTEGRATION##*/}" + RETURN_CODE=1 + continue + fi fi fi done @@ -77,11 +100,19 @@ if [ ! -f /opt/so/state/eaintegrations.txt ]; then elastic_fleet_integration_check "$FLEET_POLICY" "$INTEGRATION" if [ -n "$INTEGRATION_ID" ]; then printf "\n\nIntegration $NAME exists - Updating integration\n" - elastic_fleet_integration_update "$INTEGRATION_ID" "@$INTEGRATION" + if ! elastic_fleet_integration_update "$INTEGRATION_ID" "@$INTEGRATION"; then + echo -e "\nFailed to update integration for ${INTEGRATION##*/}" + RETURN_CODE=1 + continue + fi else printf "\n\nIntegration does not exist - Creating integration\n" if [ "$NAME" != "elasticsearch-logs" ]; then - elastic_fleet_integration_create "@$INTEGRATION" + if ! elastic_fleet_integration_create "@$INTEGRATION"; then + echo -e "\nFailed to create integration for ${INTEGRATION##*/}" + RETURN_CODE=1 + continue + fi fi fi fi diff --git a/salt/elasticfleet/tools/sbin_jinja/so-elastic-fleet-integration-upgrade b/salt/elasticfleet/tools/sbin_jinja/so-elastic-fleet-integration-upgrade index 68a644798..1a1448c53 100644 --- a/salt/elasticfleet/tools/sbin_jinja/so-elastic-fleet-integration-upgrade +++ b/salt/elasticfleet/tools/sbin_jinja/so-elastic-fleet-integration-upgrade @@ -24,12 +24,18 @@ fi default_packages=({% for pkg in SUPPORTED_PACKAGES %}"{{ pkg }}"{% if not loop.last %} {% endif %}{% endfor %}) +ERROR=false for AGENT_POLICY in $agent_policies; do - integrations=$(elastic_fleet_integration_policy_names "$AGENT_POLICY") + if ! integrations=$(elastic_fleet_integration_policy_names "$AGENT_POLICY"); then + # this script upgrades default integration packages, exit 1 and let salt handle retrying + exit 1 + fi for INTEGRATION in $integrations; do if ! [[ "$INTEGRATION" == "elastic-defend-endpoints" ]] && ! [[ "$INTEGRATION" == "fleet_server-"* ]]; then # Get package name so we know what package to look for when checking the current and latest available version - PACKAGE_NAME=$(elastic_fleet_integration_policy_package_name "$AGENT_POLICY" "$INTEGRATION") + if ! PACKAGE_NAME=$(elastic_fleet_integration_policy_package_name "$AGENT_POLICY" "$INTEGRATION"); then + exit 1 + fi {%- if not AUTO_UPGRADE_INTEGRATIONS %} if [[ " ${default_packages[@]} " =~ " $PACKAGE_NAME " ]]; then {%- endif %} @@ -48,7 +54,9 @@ for AGENT_POLICY in $agent_policies; do fi # Get integration ID - INTEGRATION_ID=$(elastic_fleet_integration_id "$AGENT_POLICY" "$INTEGRATION") + if ! INTEGRATION_ID=$(elastic_fleet_integration_id "$AGENT_POLICY" "$INTEGRATION"); then + exit 1 + fi if [[ "$PACKAGE_VERSION" != "$AVAILABLE_VERSION" ]]; then # Dry run of the upgrade @@ -56,20 +64,23 @@ for AGENT_POLICY in $agent_policies; do echo "Current $PACKAGE_NAME package version ($PACKAGE_VERSION) is not the same as the latest available package ($AVAILABLE_VERSION)..." echo "Upgrading $INTEGRATION..." echo "Starting dry run..." - DRYRUN_OUTPUT=$(elastic_fleet_integration_policy_dryrun_upgrade "$INTEGRATION_ID") + if ! DRYRUN_OUTPUT=$(elastic_fleet_integration_policy_dryrun_upgrade "$INTEGRATION_ID"); then + exit 1 + fi DRYRUN_ERRORS=$(echo "$DRYRUN_OUTPUT" | jq .[].hasErrors) # If no errors with dry run, proceed with actual upgrade if [[ "$DRYRUN_ERRORS" == "false" ]]; then echo "No errors detected. Proceeding with upgrade..." - elastic_fleet_integration_policy_upgrade "$INTEGRATION_ID" - if [ $? -ne 0 ]; then + if ! elastic_fleet_integration_policy_upgrade "$INTEGRATION_ID"; then echo "Error: Upgrade failed for $PACKAGE_NAME with integration ID '$INTEGRATION_ID'." - exit 1 + ERROR=true + continue fi else echo "Errors detected during dry run for $PACKAGE_NAME policy upgrade..." - exit 1 + ERROR=true + continue fi fi {%- if not AUTO_UPGRADE_INTEGRATIONS %} @@ -78,4 +89,7 @@ for AGENT_POLICY in $agent_policies; do fi done done +if [[ "$ERROR" == "true" ]]; then + exit 1 +fi echo diff --git a/salt/elasticfleet/tools/sbin_jinja/so-elastic-fleet-optional-integrations-load b/salt/elasticfleet/tools/sbin_jinja/so-elastic-fleet-optional-integrations-load index 886bbf75c..01777e5da 100644 --- a/salt/elasticfleet/tools/sbin_jinja/so-elastic-fleet-optional-integrations-load +++ b/salt/elasticfleet/tools/sbin_jinja/so-elastic-fleet-optional-integrations-load @@ -62,9 +62,17 @@ default_packages=({% for pkg in SUPPORTED_PACKAGES %}"{{ pkg }}"{% if not loop.l in_use_integrations=() for AGENT_POLICY in $agent_policies; do - integrations=$(elastic_fleet_integration_policy_names "$AGENT_POLICY") + + if ! integrations=$(elastic_fleet_integration_policy_names "$AGENT_POLICY"); then + # skip the agent policy if we can't get required info, let salt retry. Integrations loaded by this script are non-default integrations. + echo "Skipping $AGENT_POLICY.. " + continue + fi for INTEGRATION in $integrations; do - PACKAGE_NAME=$(elastic_fleet_integration_policy_package_name "$AGENT_POLICY" "$INTEGRATION") + if ! PACKAGE_NAME=$(elastic_fleet_integration_policy_package_name "$AGENT_POLICY" "$INTEGRATION"); then + echo "Not adding $INTEGRATION, couldn't get package name" + continue + fi # non-default integrations that are in-use in any policy if ! [[ " ${default_packages[@]} " =~ " $PACKAGE_NAME " ]]; then in_use_integrations+=("$PACKAGE_NAME") @@ -160,7 +168,11 @@ if [[ -f $STATE_FILE_SUCCESS ]]; then for file in "${pkg_filename}_"*.json; do [ -e "$file" ] || continue - elastic_fleet_bulk_package_install $file >> $BULK_INSTALL_OUTPUT + if ! elastic_fleet_bulk_package_install $file >> $BULK_INSTALL_OUTPUT; then + # integrations loaded my this script are non-essential and shouldn't cause exit, skip them for now next highstate run can retry + echo "Failed to complete a chunk of bulk package installs -- $file " + continue + fi done # cleanup any temp files for chunked package install rm -f ${pkg_filename}_*.json $BULK_INSTALL_PACKAGE_LIST @@ -168,8 +180,9 @@ if [[ -f $STATE_FILE_SUCCESS ]]; then echo "Elastic integrations don't appear to need installation/updating..." fi # Write out file for generating index/component/ilm templates - latest_installed_package_list=$(elastic_fleet_installed_packages) - echo $latest_installed_package_list | jq '[.items[] | {name: .name, es_index_patterns: .dataStreams}]' > $PACKAGE_COMPONENTS + if latest_installed_package_list=$(elastic_fleet_installed_packages); then + echo $latest_installed_package_list | jq '[.items[] | {name: .name, es_index_patterns: .dataStreams}]' > $PACKAGE_COMPONENTS + fi if retry 3 1 "so-elasticsearch-query / --fail --output /dev/null"; then # Refresh installed component template list latest_component_templates_list=$(so-elasticsearch-query _component_template | jq '.component_templates[] | .name' | jq -s '.') diff --git a/salt/elasticfleet/tools/sbin_jinja/so-elastic-fleet-outputs-update b/salt/elasticfleet/tools/sbin_jinja/so-elastic-fleet-outputs-update index 43eef6ee9..9efe8a19d 100644 --- a/salt/elasticfleet/tools/sbin_jinja/so-elastic-fleet-outputs-update +++ b/salt/elasticfleet/tools/sbin_jinja/so-elastic-fleet-outputs-update @@ -15,8 +15,21 @@ if ! is_manager_node; then fi function update_logstash_outputs() { - # Generate updated JSON payload - JSON_STRING=$(jq -n --arg UPDATEDLIST $NEW_LIST_JSON '{"name":"grid-logstash","type":"logstash","hosts": $UPDATEDLIST,"is_default":true,"is_default_monitoring":true,"config_yaml":""}') + if logstash_policy=$(curl -K /opt/so/conf/elasticsearch/curl.config -L "http://localhost:5601/api/fleet/outputs/so-manager_logstash" --retry 3 --retry-delay 10 --fail 2>/dev/null); then + SSL_CONFIG=$(echo "$logstash_policy" | jq -r '.item.ssl') + if SECRETS=$(echo "$logstash_policy" | jq -er '.item.secrets' 2>/dev/null); then + JSON_STRING=$(jq -n \ + --arg UPDATEDLIST "$NEW_LIST_JSON" \ + --argjson SECRETS "$SECRETS" \ + --argjson SSL_CONFIG "$SSL_CONFIG" \ + '{"name":"grid-logstash","type":"logstash","hosts": $UPDATEDLIST,"is_default":true,"is_default_monitoring":true,"config_yaml":"","ssl": $SSL_CONFIG,"secrets": $SECRETS}') + else + JSON_STRING=$(jq -n \ + --arg UPDATEDLIST "$NEW_LIST_JSON" \ + --argjson SSL_CONFIG "$SSL_CONFIG" \ + '{"name":"grid-logstash","type":"logstash","hosts": $UPDATEDLIST,"is_default":true,"is_default_monitoring":true,"config_yaml":"","ssl": $SSL_CONFIG}') + fi + fi # Update Logstash Outputs curl -K /opt/so/conf/elasticsearch/curl.config -L -X PUT "localhost:5601/api/fleet/outputs/so-manager_logstash" -H 'kbn-xsrf: true' -H 'Content-Type: application/json' -d "$JSON_STRING" | jq diff --git a/salt/elasticfleet/tools/sbin_jinja/so-elastic-fleet-package-load b/salt/elasticfleet/tools/sbin_jinja/so-elastic-fleet-package-load index 819d7ecff..52fa96cd5 100755 --- a/salt/elasticfleet/tools/sbin_jinja/so-elastic-fleet-package-load +++ b/salt/elasticfleet/tools/sbin_jinja/so-elastic-fleet-package-load @@ -10,8 +10,16 @@ {%- for PACKAGE in SUPPORTED_PACKAGES %} echo "Setting up {{ PACKAGE }} package..." -VERSION=$(elastic_fleet_package_version_check "{{ PACKAGE }}") -elastic_fleet_package_install "{{ PACKAGE }}" "$VERSION" +if VERSION=$(elastic_fleet_package_version_check "{{ PACKAGE }}"); then + if ! elastic_fleet_package_install "{{ PACKAGE }}" "$VERSION"; then + # packages loaded by this script should never fail to install and REQUIRED before an installation of SO can be considered successful + echo -e "\nERROR: Failed to install default integration package -- $PACKAGE $VERSION" + exit 1 + fi +else + echo -e "\nERROR: Failed to get version information for integration $PACKAGE" + exit 1 +fi echo {%- endfor %} echo diff --git a/salt/elasticfleet/tools/sbin_jinja/so-elastic-fleet-package-upgrade b/salt/elasticfleet/tools/sbin_jinja/so-elastic-fleet-package-upgrade index a092e3ecb..18211a7c6 100644 --- a/salt/elasticfleet/tools/sbin_jinja/so-elastic-fleet-package-upgrade +++ b/salt/elasticfleet/tools/sbin_jinja/so-elastic-fleet-package-upgrade @@ -10,8 +10,15 @@ {%- for PACKAGE in SUPPORTED_PACKAGES %} echo "Upgrading {{ PACKAGE }} package..." -VERSION=$(elastic_fleet_package_latest_version_check "{{ PACKAGE }}") -elastic_fleet_package_install "{{ PACKAGE }}" "$VERSION" +if VERSION=$(elastic_fleet_package_latest_version_check "{{ PACKAGE }}"); then + if ! elastic_fleet_package_install "{{ PACKAGE }}" "$VERSION"; then + # exit 1 on failure to upgrade a default package, allow salt to handle retries + echo -e "\nERROR: Failed to upgrade $PACKAGE to version: $VERSION" + exit 1 + fi +else + echo -e "\nERROR: Failed to get version information for integration $PACKAGE" +fi echo {%- endfor %} echo diff --git a/salt/elasticfleet/tools/sbin_jinja/so-elastic-fleet-setup b/salt/elasticfleet/tools/sbin_jinja/so-elastic-fleet-setup index deb16dadf..446fc6c9a 100755 --- a/salt/elasticfleet/tools/sbin_jinja/so-elastic-fleet-setup +++ b/salt/elasticfleet/tools/sbin_jinja/so-elastic-fleet-setup @@ -23,18 +23,17 @@ if [[ "$RETURN_CODE" != "0" ]]; then exit 1 fi -ALIASES=".fleet-servers .fleet-policies-leader .fleet-policies .fleet-agents .fleet-artifacts .fleet-enrollment-api-keys .kibana_ingest" -for ALIAS in ${ALIASES} -do +ALIASES=(.fleet-servers .fleet-policies-leader .fleet-policies .fleet-agents .fleet-artifacts .fleet-enrollment-api-keys .kibana_ingest) +for ALIAS in "${ALIASES[@]}"; do # Get all concrete indices from alias - INDXS=$(curl -K /opt/so/conf/kibana/curl.config -s -k -L -H "Content-Type: application/json" "https://localhost:9200/_resolve/index/${ALIAS}" | jq -r '.aliases[].indices[]') - - # Delete all resolved indices - for INDX in ${INDXS} - do + if INDXS_RAW=$(curl -sK /opt/so/conf/kibana/curl.config -s -k -L -H "Content-Type: application/json" "https://localhost:9200/_resolve/index/${ALIAS}" --fail 2>/dev/null); then + INDXS=$(echo "$INDXS_RAW" | jq -r '.aliases[].indices[]') + # Delete all resolved indices + for INDX in ${INDXS}; do status "Deleting $INDX" curl -K /opt/so/conf/kibana/curl.config -s -k -L -H "Content-Type: application/json" "https://localhost:9200/${INDX}" -XDELETE - done + done + fi done # Restarting Kibana... @@ -51,22 +50,61 @@ if [[ "$RETURN_CODE" != "0" ]]; then fi printf "\n### Create ES Token ###\n" -ESTOKEN=$(curl -K /opt/so/conf/elasticsearch/curl.config -L -X POST "localhost:5601/api/fleet/service_tokens" -H 'kbn-xsrf: true' -H 'Content-Type: application/json' | jq -r .value) +if ESTOKEN_RAW=$(fleet_api "service_tokens" -XPOST -H 'kbn-xsrf: true' -H 'Content-Type: application/json'); then + ESTOKEN=$(echo "$ESTOKEN_RAW" | jq -r .value) +else + echo -e "\nFailed to create ES token..." + exit 1 +fi ### Create Outputs, Fleet Policy and Fleet URLs ### # Create the Manager Elasticsearch Output first and set it as the default output printf "\nAdd Manager Elasticsearch Output...\n" -ESCACRT=$(openssl x509 -in $INTCA) -JSON_STRING=$( jq -n \ - --arg ESCACRT "$ESCACRT" \ - '{"name":"so-manager_elasticsearch","id":"so-manager_elasticsearch","type":"elasticsearch","hosts":["https://{{ GLOBALS.manager_ip }}:9200","https://{{ GLOBALS.manager }}:9200"],"is_default":true,"is_default_monitoring":true,"config_yaml":"","ssl":{"certificate_authorities": [$ESCACRT]}}' ) -curl -K /opt/so/conf/elasticsearch/curl.config -L -X POST "localhost:5601/api/fleet/outputs" -H 'kbn-xsrf: true' -H 'Content-Type: application/json' -d "$JSON_STRING" +ESCACRT=$(openssl x509 -in "$INTCA" -outform DER | sha256sum | cut -d' ' -f1 | tr '[:lower:]' '[:upper:]') +JSON_STRING=$(jq -n \ + --arg ESCACRT "$ESCACRT" \ + '{"name":"so-manager_elasticsearch","id":"so-manager_elasticsearch","type":"elasticsearch","hosts":["https://{{ GLOBALS.manager_ip }}:9200","https://{{ GLOBALS.manager }}:9200"],"is_default":false,"is_default_monitoring":false,"config_yaml":"","ca_trusted_fingerprint": $ESCACRT}') + +if ! fleet_api "outputs" -XPOST -H 'kbn-xsrf: true' -H 'Content-Type: application/json' -d "$JSON_STRING"; then + echo -e "\nFailed to create so-elasticsearch_manager policy..." + exit 1 +fi printf "\n\n" +# so-manager_elasticsearch should exist and be disabled. Now update it before checking its the only default policy +MANAGER_OUTPUT_ENABLED=$(echo "$JSON_STRING" | jq 'del(.id) | .is_default = true | .is_default_monitoring = true') +if ! curl -sK /opt/so/conf/elasticsearch/curl.config -L -X PUT "localhost:5601/api/fleet/outputs/so-manager_elasticsearch" -H 'kbn-xsrf: true' -H 'Content-Type: application/json' -d "$MANAGER_OUTPUT_ENABLED"; then + echo -e "\n failed to update so-manager_elasticsearch" + exit 1 +fi + +# At this point there should only be two policies. fleet-default-output & so-manager_elasticsearch +status "Verifying so-manager_elasticsearch policy is configured as the current default" + +# Grab the fleet-default-output policy instead of so-manager_elasticsearch, because a weird state can exist where both fleet-default-output & so-elasticsearch_manager can be set as the active default output for logs / metrics. Resulting in logs not ingesting on import/eval nodes +if DEFAULTPOLICY=$(fleet_api "outputs/fleet-default-output"); then + fleet_default=$(echo "$DEFAULTPOLICY" | jq -er '.item.is_default') + fleet_default_monitoring=$(echo "$DEFAULTPOLICY" | jq -er '.item.is_default_monitoring') +# Check that fleet-default-output isn't configured as a default for anything ( both variables return false ) + if [[ $fleet_default == "false" ]] && [[ $fleet_default_monitoring == "false" ]]; then + echo -e "\nso-manager_elasticsearch is configured as the current default policy..." + else + echo -e "\nVerification of so-manager_elasticsearch policy failed... The default 'fleet-default-output' output is still active..." + exit 1 + fi +else + # fleet-output-policy is created automatically by fleet when started. Should always exist on any installation type + echo -e "\nDefault fleet-default-output policy doesn't exist...\n" + exit 1 +fi + # Create the Manager Fleet Server Host Agent Policy # This has to be done while the Elasticsearch Output is set to the default Output printf "Create Manager Fleet Server Policy...\n" -elastic_fleet_policy_create "FleetServer_{{ GLOBALS.hostname }}" "Fleet Server - {{ GLOBALS.hostname }}" "false" "120" +if ! elastic_fleet_policy_create "FleetServer_{{ GLOBALS.hostname }}" "Fleet Server - {{ GLOBALS.hostname }}" "false" "120"; then + echo -e "\n Failed to create Manager fleet server policy..." + exit 1 +fi # Modify the default integration policy to update the policy_id with the correct naming UPDATED_INTEGRATION_POLICY=$(jq --arg policy_id "FleetServer_{{ GLOBALS.hostname }}" --arg name "fleet_server-{{ GLOBALS.hostname }}" ' @@ -74,7 +112,10 @@ UPDATED_INTEGRATION_POLICY=$(jq --arg policy_id "FleetServer_{{ GLOBALS.hostname .name = $name' /opt/so/conf/elastic-fleet/integrations/fleet-server/fleet-server.json) # Add the Fleet Server Integration to the new Fleet Policy -elastic_fleet_integration_create "$UPDATED_INTEGRATION_POLICY" +if ! elastic_fleet_integration_create "$UPDATED_INTEGRATION_POLICY"; then + echo -e "\nFailed to create Fleet server integration for Manager.." + exit 1 +fi # Now we can create the Logstash Output and set it to to be the default Output printf "\n\nCreate Logstash Output Config if node is not an Import or Eval install\n" @@ -86,9 +127,12 @@ JSON_STRING=$( jq -n \ --arg LOGSTASHCRT "$LOGSTASHCRT" \ --arg LOGSTASHKEY "$LOGSTASHKEY" \ --arg LOGSTASHCA "$LOGSTASHCA" \ - '{"name":"grid-logstash","is_default":true,"is_default_monitoring":true,"id":"so-manager_logstash","type":"logstash","hosts":["{{ GLOBALS.manager_ip }}:5055", "{{ GLOBALS.manager }}:5055"],"config_yaml":"","ssl":{"certificate": $LOGSTASHCRT,"key": $LOGSTASHKEY,"certificate_authorities":[ $LOGSTASHCA ]},"proxy_id":null}' + '{"name":"grid-logstash","is_default":true,"is_default_monitoring":true,"id":"so-manager_logstash","type":"logstash","hosts":["{{ GLOBALS.manager_ip }}:5055", "{{ GLOBALS.manager }}:5055"],"config_yaml":"","ssl":{"certificate": $LOGSTASHCRT,"certificate_authorities":[ $LOGSTASHCA ]},"secrets":{"ssl":{"key": $LOGSTASHKEY }},"proxy_id":null}' ) -curl -K /opt/so/conf/elasticsearch/curl.config -L -X POST "localhost:5601/api/fleet/outputs" -H 'kbn-xsrf: true' -H 'Content-Type: application/json' -d "$JSON_STRING" +if ! fleet_api "outputs" -XPOST -H 'kbn-xsrf: true' -H 'Content-Type: application/json' -d "$JSON_STRING"; then + echo -e "\nFailed to create logstash fleet output" + exit 1 +fi printf "\n\n" {%- endif %} @@ -106,7 +150,10 @@ else fi ## This array replaces whatever URLs are currently configured -curl -K /opt/so/conf/elasticsearch/curl.config -L -X POST "localhost:5601/api/fleet/fleet_server_hosts" -H 'kbn-xsrf: true' -H 'Content-Type: application/json' -d "$JSON_STRING" +if ! fleet_api "fleet_server_hosts" -XPOST -H 'kbn-xsrf: true' -H 'Content-Type: application/json' -d "$JSON_STRING"; then + echo -e "\nFailed to add manager fleet URL" + exit 1 +fi printf "\n\n" ### Create Policies & Associated Integration Configuration ### @@ -117,13 +164,22 @@ printf "\n\n" /usr/sbin/so-elasticsearch-templates-load # Initial Endpoints Policy -elastic_fleet_policy_create "endpoints-initial" "Initial Endpoint Policy" "false" "1209600" +if ! elastic_fleet_policy_create "endpoints-initial" "Initial Endpoint Policy" "false" "1209600"; then + echo -e "\nFailed to create endpoints-initial policy..." + exit 1 +fi # Grid Nodes - General Policy -elastic_fleet_policy_create "so-grid-nodes_general" "SO Grid Nodes - General Purpose" "false" "1209600" +if ! elastic_fleet_policy_create "so-grid-nodes_general" "SO Grid Nodes - General Purpose" "false" "1209600"; then + echo -e "\nFailed to create so-grid-nodes_general policy..." + exit 1 +fi # Grid Nodes - Heavy Node Policy -elastic_fleet_policy_create "so-grid-nodes_heavy" "SO Grid Nodes - Heavy Node" "false" "1209600" +if ! elastic_fleet_policy_create "so-grid-nodes_heavy" "SO Grid Nodes - Heavy Node" "false" "1209600"; then + echo -e "\nFailed to create so-grid-nodes_heavy policy..." + exit 1 +fi # Load Integrations for default policies so-elastic-fleet-integration-policy-load @@ -135,14 +191,34 @@ JSON_STRING=$( jq -n \ '{"name":$NAME,"host":$URL,"is_default":true}' ) -curl -K /opt/so/conf/elasticsearch/curl.config -L -X POST "localhost:5601/api/fleet/agent_download_sources" -H 'kbn-xsrf: true' -H 'Content-Type: application/json' -d "$JSON_STRING" +if ! fleet_api "agent_download_sources" -XPOST -H 'kbn-xsrf: true' -H 'Content-Type: application/json' -d "$JSON_STRING"; then + echo -e "\nFailed to update Elastic Agent artifact URL" + exit 1 +fi ### Finalization ### # Query for Enrollment Tokens for default policies -ENDPOINTSENROLLMENTOKEN=$(curl -K /opt/so/conf/elasticsearch/curl.config -L "localhost:5601/api/fleet/enrollment_api_keys" -H 'kbn-xsrf: true' -H 'Content-Type: application/json' | jq .list | jq -r -c '.[] | select(.policy_id | contains("endpoints-initial")) | .api_key') -GRIDNODESENROLLMENTOKENGENERAL=$(curl -K /opt/so/conf/elasticsearch/curl.config -L "localhost:5601/api/fleet/enrollment_api_keys" -H 'kbn-xsrf: true' -H 'Content-Type: application/json' | jq .list | jq -r -c '.[] | select(.policy_id | contains("so-grid-nodes_general")) | .api_key') -GRIDNODESENROLLMENTOKENHEAVY=$(curl -K /opt/so/conf/elasticsearch/curl.config -L "localhost:5601/api/fleet/enrollment_api_keys" -H 'kbn-xsrf: true' -H 'Content-Type: application/json' | jq .list | jq -r -c '.[] | select(.policy_id | contains("so-grid-nodes_heavy")) | .api_key') +if ENDPOINTSENROLLMENTOKEN_RAW=$(fleet_api "enrollment_api_keys" -H 'kbn-xsrf: true' -H 'Content-Type: application/json'); then + ENDPOINTSENROLLMENTOKEN=$(echo "$ENDPOINTSENROLLMENTOKEN_RAW" | jq .list | jq -r -c '.[] | select(.policy_id | contains("endpoints-initial")) | .api_key') +else + echo -e "\nFailed to query for Endpoints enrollment token" + exit 1 +fi + +if GRIDNODESENROLLMENTOKENGENERAL_RAW=$(fleet_api "enrollment_api_keys" -H 'kbn-xsrf: true' -H 'Content-Type: application/json'); then + GRIDNODESENROLLMENTOKENGENERAL=$(echo "$GRIDNODESENROLLMENTOKENGENERAL_RAW" | jq .list | jq -r -c '.[] | select(.policy_id | contains("so-grid-nodes_general")) | .api_key') +else + echo -e "\nFailed to query for Grid nodes - General enrollment token" + exit 1 +fi + +if GRIDNODESENROLLMENTOKENHEAVY_RAW=$(fleet_api "enrollment_api_keys" -H 'kbn-xsrf: true' -H 'Content-Type: application/json'); then + GRIDNODESENROLLMENTOKENHEAVY=$(echo "$GRIDNODESENROLLMENTOKENHEAVY_RAW" | jq .list | jq -r -c '.[] | select(.policy_id | contains("so-grid-nodes_heavy")) | .api_key') +else + echo -e "\nFailed to query for Grid nodes - Heavy enrollment token" + exit 1 +fi # Store needed data in minion pillar pillar_file=/opt/so/saltstack/local/pillar/minions/{{ GLOBALS.minion_id }}.sls diff --git a/salt/elasticsearch/defaults.yaml b/salt/elasticsearch/defaults.yaml index 6ed55a936..592f47a2b 100644 --- a/salt/elasticsearch/defaults.yaml +++ b/salt/elasticsearch/defaults.yaml @@ -1,6 +1,6 @@ elasticsearch: enabled: false - version: 8.18.6 + version: 8.18.8 index_clean: true config: action: @@ -1991,6 +1991,70 @@ elasticsearch: set_priority: priority: 50 min_age: 30d + so-logs-elasticsearch_x_server: + index_sorting: false + index_template: + composed_of: + - logs-elasticsearch.server@package + - logs-elasticsearch.server@custom + - so-fleet_integrations.ip_mappings-1 + - so-fleet_globals-1 + - so-fleet_agent_id_verification-1 + data_stream: + allow_custom_routing: false + hidden: false + ignore_missing_component_templates: + - logs-elasticsearch.server@custom + index_patterns: + - logs-elasticsearch.server-* + priority: 501 + template: + mappings: + _meta: + managed: true + managed_by: security_onion + package: + name: elastic_agent + settings: + index: + lifecycle: + name: so-logs-elasticsearch.server-logs + mapping: + total_fields: + limit: 5000 + number_of_replicas: 0 + sort: + field: '@timestamp' + order: desc + policy: + _meta: + managed: true + managed_by: security_onion + package: + name: elastic_agent + phases: + cold: + actions: + set_priority: + priority: 0 + min_age: 60d + delete: + actions: + delete: {} + min_age: 365d + hot: + actions: + rollover: + max_age: 30d + max_primary_shard_size: 50gb + set_priority: + priority: 100 + min_age: 0ms + warm: + actions: + set_priority: + priority: 50 + min_age: 30d so-logs-endpoint_x_actions: index_sorting: false index_template: diff --git a/salt/elasticsearch/files/ingest/global@custom b/salt/elasticsearch/files/ingest/global@custom index c92c15612..8e48eb0b9 100644 --- a/salt/elasticsearch/files/ingest/global@custom +++ b/salt/elasticsearch/files/ingest/global@custom @@ -23,6 +23,7 @@ { "set": { "if": "ctx.event?.module == 'fim'", "override": true, "field": "event.module", "value": "file_integrity" } }, { "rename": { "if": "ctx.winlog?.provider_name == 'Microsoft-Windows-Windows Defender'", "ignore_missing": true, "field": "winlog.event_data.Threat Name", "target_field": "winlog.event_data.threat_name" } }, { "set": { "if": "ctx?.metadata?.kafka != null" , "field": "kafka.id", "value": "{{metadata.kafka.partition}}{{metadata.kafka.offset}}{{metadata.kafka.timestamp}}", "ignore_failure": true } }, + { "set": { "if": "ctx.event?.dataset != null && ctx.event?.dataset == 'elasticsearch.server'", "field": "event.module", "value":"elasticsearch" }}, {"append": {"field":"related.ip","value":["{{source.ip}}","{{destination.ip}}"],"allow_duplicates":false,"if":"ctx?.event?.dataset == 'endpoint.events.network' && ctx?.source?.ip != null","ignore_failure":true}}, {"foreach": {"field":"host.ip","processor":{"append":{"field":"related.ip","value":"{{_ingest._value}}","allow_duplicates":false}},"if":"ctx?.event?.module == 'endpoint' && ctx?.host?.ip != null","ignore_missing":true, "description":"Extract IPs from Elastic Agent events (host.ip) and adds them to related.ip"}}, { "remove": { "field": [ "message2", "type", "fields", "category", "module", "dataset", "event.dataset_temp", "dataset_tag_temp", "module_temp", "datastream_dataset_temp" ], "ignore_missing": true, "ignore_failure": true } } diff --git a/salt/elasticsearch/files/log4j2.properties b/salt/elasticsearch/files/log4j2.properties index 014fa61a1..b29378d6a 100644 --- a/salt/elasticsearch/files/log4j2.properties +++ b/salt/elasticsearch/files/log4j2.properties @@ -20,8 +20,28 @@ appender.rolling.strategy.type = DefaultRolloverStrategy appender.rolling.strategy.action.type = Delete appender.rolling.strategy.action.basepath = /var/log/elasticsearch appender.rolling.strategy.action.condition.type = IfFileName -appender.rolling.strategy.action.condition.glob = *.gz +appender.rolling.strategy.action.condition.glob = *.log.gz appender.rolling.strategy.action.condition.nested_condition.type = IfLastModified appender.rolling.strategy.action.condition.nested_condition.age = 7D + +appender.rolling_json.type = RollingFile +appender.rolling_json.name = rolling_json +appender.rolling_json.fileName = ${sys:es.logs.base_path}${sys:file.separator}${sys:es.logs.cluster_name}.json +appender.rolling_json.layout.type = ECSJsonLayout +appender.rolling_json.layout.dataset = elasticsearch.server +appender.rolling_json.filePattern = ${sys:es.logs.base_path}${sys:file.separator}${sys:es.logs.cluster_name}-%d{yyyy-MM-dd}.json.gz +appender.rolling_json.policies.type = Policies +appender.rolling_json.policies.time.type = TimeBasedTriggeringPolicy +appender.rolling_json.policies.time.interval = 1 +appender.rolling_json.policies.time.modulate = true +appender.rolling_json.strategy.type = DefaultRolloverStrategy +appender.rolling_json.strategy.action.type = Delete +appender.rolling_json.strategy.action.basepath = /var/log/elasticsearch +appender.rolling_json.strategy.action.condition.type = IfFileName +appender.rolling_json.strategy.action.condition.glob = *.json.gz +appender.rolling_json.strategy.action.condition.nested_condition.type = IfLastModified +appender.rolling_json.strategy.action.condition.nested_condition.age = 1D + rootLogger.level = info rootLogger.appenderRef.rolling.ref = rolling +rootLogger.appenderRef.rolling_json.ref = rolling_json diff --git a/salt/elasticsearch/soc_elasticsearch.yaml b/salt/elasticsearch/soc_elasticsearch.yaml index c268cc493..097a53296 100644 --- a/salt/elasticsearch/soc_elasticsearch.yaml +++ b/salt/elasticsearch/soc_elasticsearch.yaml @@ -392,6 +392,7 @@ elasticsearch: so-logs-elastic_agent_x_metricbeat: *indexSettings so-logs-elastic_agent_x_osquerybeat: *indexSettings so-logs-elastic_agent_x_packetbeat: *indexSettings + so-logs-elasticsearch_x_server: *indexSettings so-metrics-endpoint_x_metadata: *indexSettings so-metrics-endpoint_x_metrics: *indexSettings so-metrics-endpoint_x_policy: *indexSettings diff --git a/salt/elasticsearch/tools/sbin/so-elasticsearch-retention-estimate b/salt/elasticsearch/tools/sbin/so-elasticsearch-retention-estimate new file mode 100755 index 000000000..96219c50c --- /dev/null +++ b/salt/elasticsearch/tools/sbin/so-elasticsearch-retention-estimate @@ -0,0 +1,1160 @@ +#!/bin/bash + +# Copyright Security Onion Solutions LLC and/or licensed to Security Onion Solutions LLC under one +# or more contributor license agreements. Licensed under the Elastic License 2.0 as shown at +# https://securityonion.net/license; you may not use this file except in compliance with the +# Elastic License 2.0. + +INFLUX_URL="https://localhost:8086/api/v2" +JSON_OUTPUT=false +VERBOSE=false +TEMP_FILES=() + +. /usr/sbin/so-common + +RED='\033[0;31m' +GREEN='\033[0;32m' +YELLOW='\033[1;33m' +BLUE='\033[0;34m' +BOLD='\033[1;37m' +NC='\033[0m' +REDBOLD='\033[1;31m' +YELLOWBOLD='\033[1;33m' + +declare -a recommendation_lines +declare -a recommendation_records + +cleanup_temp_files() { + local file + for file in "${TEMP_FILES[@]}"; do + [ -f "$file" ] && rm -f "$file" 2>/dev/null + done +} + +trap cleanup_temp_files EXIT INT TERM + +create_temp_file() { + local tmpfile + tmpfile=$(mktemp) + TEMP_FILES+=("$tmpfile") + echo "$tmpfile" +} + +log_title() { + if [ "$1" == "LOG" ]; then + echo -e "\n${BOLD}================ $2 ================${NC}\n" + elif [ "$1" == "OK" ]; then + echo -e "${GREEN} $2 ${NC}" + elif [ "$1" == "WARN" ]; then + echo -e "${YELLOW} $2 ${NC}" + elif [ "$1" == "ERROR" ]; then + echo -e "${RED} $2 ${NC}" + fi +} + +usage() { + cat << EOF +Usage: $(basename "$0") [OPTIONS] + +Estimate remaining days until Elasticsearch cluster reaches low watermark threshold. + +OPTIONS: + --json Output results in JSON format + -v, --verbose Show additional output + -h, --help Show this help message + +EOF + exit 0 +} + +while [[ $# -gt 0 ]]; do + case $1 in + --json) + JSON_OUTPUT=true + shift + ;; + -v|--verbose) + VERBOSE=true + shift + ;; + -h|--help) + usage + ;; + *) + echo "Unknown option: $1" >&2 + usage + ;; + esac +done + +request() { + curl -skK /opt/so/conf/influxdb/curl.config "$INFLUX_URL/$@" +} + +lookup_org_id() { + request "orgs?org=Security+Onion" | jq -r '.orgs[] | select(.name == "Security Onion").id' +} + +run_flux_query() { + local query=$1 + request "query?org=$ORG_ID" \ + -H 'Accept:application/csv' \ + -H 'Content-type:application/vnd.flux' \ + -d "$query" -XPOST 2>/dev/null +} + +read_csv_value() { + local input="$1" + + printf '%s\n' "$input" | awk -F',' ' + $0 ~ /^#/ { next } + NF < 1 { next } + { + gsub(/\r|\t/, "") + for (i = 1; i <= NF; i++) { + sub(/^[[:space:]]+/, "", $i) + sub(/[[:space:]]+$/, "", $i) + } + if (($2 == "_result" || $2 == "result") && $3 != "table" && $NF != "") { + print $NF + exit + } + } + ' +} + +normalize_number() { + local value="${1:-0}" + awk -v val="$value" 'BEGIN { + if (val == "" || val == "null") { printf "0"; exit } + if (val == val + 0) { printf "%.0f", val + 0; exit } + printf "0" + }' +} + +bytes_to_gb() { + local bytes="${1:-0}" + awk -v b="$bytes" 'BEGIN { + if (b == "" || b == "null") { printf "0.00"; exit } + printf "%.2f", b / 1024 / 1024 / 1024 + }' +} + +expand_node_roles() { + local role_string="$1" + local -a roles=() + + # Only show data-related roles: d=data, h=data_hot, w=data_warm, c=data_cold, s=data_content f=data_frozen + [[ "$role_string" =~ h ]] && roles+=("data_hot") + [[ "$role_string" =~ w ]] && roles+=("data_warm") + [[ "$role_string" =~ c ]] && roles+=("data_cold") + [[ "$role_string" =~ s ]] && roles+=("data_content") + [[ "$role_string" =~ f ]] && roles+=("data_frozen") + [[ "$role_string" =~ d ]] && roles+=("data") + + local IFS=',' + echo "${roles[*]}" +} + +run_indices_growth() { + if ! command -v so-elasticsearch-indices-growth >/dev/null 2>&1; then + return 1 + fi + + if [ "$EUID" -ne 0 ] && command -v sudo >/dev/null 2>&1; then + sudo -n so-elasticsearch-indices-growth 2>/dev/null || so-elasticsearch-indices-growth 2>/dev/null + else + so-elasticsearch-indices-growth 2>/dev/null + fi +} + +fetch_total_bytes() { + local start="$1" + local stop="$2" + local range_line + + if [ -n "$stop" ]; then + range_line=" |> range(start: ${start}, stop: ${stop})" + else + range_line=" |> range(start: ${start})" + fi + + local query + query=$(cat <<-EOF +from(bucket: "telegraf/so_long_term") +${range_line} + |> filter(fn: (r) => r._measurement == "elasticsearch_index_size") + |> last() + |> group() + |> sum() + |> keep(columns: ["_value"]) +EOF + ) + + local result value + result=$(run_flux_query "$query") + value=$(read_csv_value "$result") + normalize_number "$value" +} + +fail() { + if [ "$JSON_OUTPUT" = true ]; then + jq -n --arg error "$1" '{error: $error}' + else + echo "ERROR: $1" >&2 + fi + exit 1 +} + +echo -e "\nDISCLAIMER: Script output is based on current data patterns, but are approximations soley intended to assist with getting a general ILM policy configured." + +ORG_ID=$(lookup_org_id) +[ -n "$ORG_ID" ] || fail "Unable to resolve InfluxDB org id" + +cluster_storage_size=0 +indexed_storage_source="elasticsearch" +cluster_storage_size_output=$(so-elasticsearch-query '_cluster/stats?filter_path=indices.store.size_in_bytes' --fail 2>/dev/null || true) +if [ -n "$cluster_storage_size_output" ]; then + cluster_storage_size=$(echo "$cluster_storage_size_output" | jq -r '.indices.store.size_in_bytes // 0' 2>/dev/null) + if ! printf '%s' "$cluster_storage_size" | grep -Eq '^[0-9]+$'; then + cluster_storage_size=0 + fi +fi + +# historical data from influxdb for growth calculation +one_day_total=$(fetch_total_bytes "-25h" "-23h") +seven_day_total=$(fetch_total_bytes "-7d8h" "-7d") +thirty_day_total=$(fetch_total_bytes "-30d8h" "-30d") + +# available historical windows (prefer 30d/7d when available, to avoid using recent 24h traffic spike as true daily ingest rate) +history_days=0 +historical_total=0 + +if [ "$thirty_day_total" -gt 0 ]; then + history_days=30 + history_label="30-day" + historical_total=$thirty_day_total +elif [ "$seven_day_total" -gt 0 ]; then + history_days=7 + history_label="7-day" + historical_total=$seven_day_total +elif [ "$one_day_total" -gt 0 ]; then + history_days=1 + history_label="24-hour" + historical_total=$one_day_total +fi + +[ "$history_days" -gt 0 ] || fail "Historical InfluxDB data unavailable for growth calculation. If this a newer grid try re-running this script in a few days. Otherwise review /opt/so/log/telegraf/telegraf.log for errors with collecting required ES metrics." + +# Daily growth rate +growth_bytes=$(( cluster_storage_size - historical_total )) +daily_growth_bytes=$(awk -v diff="$growth_bytes" -v days="$history_days" 'BEGIN { + if (days <= 0) { print 0; exit } + printf "%.0f", diff / days +}') + +# Daily shard creation rate using same time window (30d / 7d / 24h) +daily_shard_creation=0 +now_ms=$(date +%s)000 +history_ago_ms=$(awk -v now="$now_ms" -v days="$history_days" 'BEGIN { printf "%.0f", now - (days * 86400 * 1000) }') +shard_creation_output=$(so-elasticsearch-query "_cat/indices/.ds-*?format=json&h=index,pri,rep,creation.date" --fail 2>/dev/null || true) +if [ -n "$shard_creation_output" ]; then + recent_shards=$(echo "$shard_creation_output" | jq --argjson cutoff "$history_ago_ms" ' + [.[] | + select(.["creation.date"] != null and (.["creation.date"] | tonumber) >= $cutoff) | + (.pri | tonumber) + ((.pri | tonumber) * (.rep | tonumber)) + ] | add // 0 + ' 2>/dev/null) + if [ -n "$recent_shards" ] && [[ "$recent_shards" =~ ^[0-9]+$ ]]; then + daily_shard_creation=$(awk -v total="$recent_shards" -v days="$history_days" 'BEGIN { + if (days <= 0) { print 0; exit } + printf "%.1f", total / days + }') + fi +fi + +# Find expected ILM deletions +ilm_delete_7d=0 +ilm_delete_30d=0 +ilm_indices_7d=0 +ilm_indices_30d=0 +ilm_delete_immediate=0 +ilm_indices_immediate=0 +ilm_delete_scheduled_7d=0 +ilm_indices_scheduled_7d=0 +ilm_delete_scheduled_30d=0 +ilm_indices_scheduled_30d=0 +ilm_shards_7d=0 +ilm_shards_30d=0 +ilm_shards_immediate=0 +ilm_shards_scheduled_7d=0 +ilm_shards_scheduled_30d=0 + + # For verbose output +declare -a scheduled_indices_names +declare -a scheduled_indices_sizes +declare -a scheduled_indices_days +declare -a immediate_indices_names +declare -a immediate_indices_sizes + +# Get ilm policy delete ages per policy + # example output 'so-logs-1password.audit_events-logs|365' +tmpfile_policies=$(create_temp_file) +so-elasticsearch-query '_ilm/policy' --fail 2>/dev/null | jq -r ' + def age_to_days: + if type == "number" then . + elif type == "string" then + (ascii_downcase) as $s | + (try ($s | capture("^(?-?[0-9.]+)(?[smhd]?)$")) catch {num:"0", unit:""}) as $m | + (($m.num | tonumber? // 0)) as $val | + (if $m.unit == "d" or $m.unit == "" then $val + elif $m.unit == "h" then $val / 24 + elif $m.unit == "m" then $val / 1440 + elif $m.unit == "s" then $val / 86400 + else $val end) + else 0 end; + to_entries[] | + select(.value.policy.phases.delete.min_age?) | + "\(.key)|\((.value.policy.phases.delete.min_age | age_to_days))" +' > "$tmpfile_policies" 2>/dev/null || true + +declare -A policy_ages + +if [ -s "$tmpfile_policies" ]; then + # create associative array of policy -> delete_age + while IFS='|' read -r policy age; do + policy_ages["$policy"]=$age + done < "$tmpfile_policies" + + # Get ILM managed indices with their age and policy, figure days until deletion + tmpfile_indices=$(create_temp_file) + so-elasticsearch-query '_all/_ilm/explain' --fail 2>/dev/null | jq -r ' + def age_to_days: + if type == "number" then . + elif type == "string" then + (ascii_downcase) as $s | + (try ($s | capture("^(?-?[0-9.]+)(?[smhd]?)$")) catch {num:"0", unit:""}) as $m | + (($m.num | tonumber? // 0)) as $val | + (if $m.unit == "d" or $m.unit == "" then $val + elif $m.unit == "h" then $val / 24 + elif $m.unit == "m" then $val / 1440 + elif $m.unit == "s" then $val / 86400 + else $val end) + else 0 end; + .indices | to_entries[] | + select(.value.managed == true and .value.policy) | + "\(.key)|\(.value.policy)|\(((.value.age? // "0") | age_to_days))|\(.value.phase? // "")" + ' > "$tmpfile_indices" 2>/dev/null || true + + # Process each index and calculate totals + tmpfile_all=$(create_temp_file) + while IFS='|' read -r index policy age phase; do + if [ -n "${policy_ages[$policy]:-}" ]; then + delete_age=${policy_ages[$policy]} + delete_age=${delete_age:-0} + age=${age:-0} + days_until_ceiling=$(awk -v del="$delete_age" -v aged="$age" 'BEGIN { + diff = del - aged; + if (diff <= 0) { + print 0; + exit + } + base = int(diff); + if (diff > base) { base = base + 1 } + print base; + }') + if [ -z "$days_until_ceiling" ]; then + days_until_ceiling=0 + fi + if [ "$days_until_ceiling" -lt 0 ]; then + days_until_ceiling=0 + fi + bucket="scheduled" + if [ "$phase" = "delete" ]; then + days_until_ceiling=0 + bucket="immediate" + fi + if [ "$days_until_ceiling" -le 30 ] 2>/dev/null; then + echo "$index|$days_until_ceiling|$bucket" >> "$tmpfile_all" + fi + fi + done < "$tmpfile_indices" + + # Get size and shard counts for indices + if [ -s "$tmpfile_all" ]; then + candidate_indices=$(cut -d'|' -f1 "$tmpfile_all" | tr '\n' ',' | sed 's/,$//') + if [ -n "$candidate_indices" ]; then + tmpfile_sizes=$(create_temp_file) + so-elasticsearch-query "_cat/indices/${candidate_indices}?format=json&h=index,pri.store.size,pri,rep&bytes=b" --fail 2>/dev/null | \ + jq -r '.[] | "\(.index)|\(.["pri.store.size"])|\(.pri)|\(.rep)"' > "$tmpfile_sizes" 2>/dev/null || true + + # Build size and shard lookup + declare -A index_sizes + declare -A index_shards + while IFS='|' read -r idx size pri rep; do + index_sizes["$idx"]=$size + # Total shards = pri + (pri * rep) + total_shards=$(awk -v p="$pri" -v r="$rep" 'BEGIN { printf "%.0f", p + (p * r) }') + index_shards["$idx"]=$total_shards + done < "$tmpfile_sizes" + + # Calculate totals for ilm deletes + while IFS='|' read -r index days_until bucket; do + size=${index_sizes[$index]:-0} + shards=${index_shards[$index]:-0} + if [ "$bucket" = "immediate" ]; then + ilm_delete_immediate=$((ilm_delete_immediate + size)) + ilm_indices_immediate=$((ilm_indices_immediate + 1)) + ilm_shards_immediate=$((ilm_shards_immediate + shards)) + if [ "$VERBOSE" = true ]; then + immediate_indices_names+=("$index") + immediate_indices_sizes+=("$size") + fi + else + if [ "$days_until" -le 7 ] 2>/dev/null; then + ilm_delete_scheduled_7d=$((ilm_delete_scheduled_7d + size)) + ilm_indices_scheduled_7d=$((ilm_indices_scheduled_7d + 1)) + ilm_shards_scheduled_7d=$((ilm_shards_scheduled_7d + shards)) + if [ "$VERBOSE" = true ]; then + scheduled_indices_names+=("$index") + scheduled_indices_sizes+=("$size") + scheduled_indices_days+=("$days_until") + fi + fi + ilm_delete_scheduled_30d=$((ilm_delete_scheduled_30d + size)) + ilm_indices_scheduled_30d=$((ilm_indices_scheduled_30d + 1)) + ilm_shards_scheduled_30d=$((ilm_shards_scheduled_30d + shards)) + fi + + if [ "$days_until" -le 7 ] 2>/dev/null; then + ilm_delete_7d=$((ilm_delete_7d + size)) + ilm_indices_7d=$((ilm_indices_7d + 1)) + ilm_shards_7d=$((ilm_shards_7d + shards)) + fi + ilm_delete_30d=$((ilm_delete_30d + size)) + ilm_indices_30d=$((ilm_indices_30d + 1)) + ilm_shards_30d=$((ilm_shards_30d + shards)) + done < "$tmpfile_all" + fi + fi +fi + +# Get the average daily ILM deletion rate (smooth out over 30d / 7d for consistency) +daily_ilm_delete_bytes=0 +if [ "$ilm_delete_scheduled_30d" -gt 0 ] && [ "$ilm_indices_scheduled_30d" -gt 0 ]; then + daily_ilm_delete_bytes=$(awk -v total="$ilm_delete_scheduled_30d" 'BEGIN { printf "%.0f", total / 30 }') +elif [ "$ilm_delete_scheduled_7d" -gt 0 ] && [ "$ilm_indices_scheduled_7d" -gt 0 ]; then + daily_ilm_delete_bytes=$(awk -v total="$ilm_delete_scheduled_7d" 'BEGIN { printf "%.0f", total / 7 }') +fi + +# Net storage growth (growth - deletions) +net_growth_bytes=$(awk -v growth="$daily_growth_bytes" -v deletions="$daily_ilm_delete_bytes" 'BEGIN { + printf "%.0f", growth - deletions +}') + +ilm_delete_7d_gb=$(bytes_to_gb "$ilm_delete_7d") +ilm_delete_30d_gb=$(bytes_to_gb "$ilm_delete_30d") +ilm_delete_immediate_gb=$(bytes_to_gb "$ilm_delete_immediate") +ilm_delete_scheduled_7d_gb=$(bytes_to_gb "$ilm_delete_scheduled_7d") +ilm_delete_scheduled_30d_gb=$(bytes_to_gb "$ilm_delete_scheduled_30d") +daily_ilm_delete_gb=$(bytes_to_gb "$daily_ilm_delete_bytes") + +ilm_impact_pct="0.0" +if [ "$cluster_storage_size" -gt 0 ] && [ "$ilm_delete_7d" -gt 0 ]; then + ilm_impact_pct=$(awk -v ilm="$ilm_delete_7d" -v total="$cluster_storage_size" 'BEGIN { + if (total <= 0) { printf "0.0"; exit } + printf "%.1f", (ilm / total) * 100 + }') +fi + +ilm_window_daily_bytes=0 +ilm_window_daily_gb="0.00" +if [ "$ilm_delete_7d" -gt 0 ]; then + ilm_window_daily_bytes=$(awk -v total="$ilm_delete_7d" 'BEGIN { printf "%.0f", total / 7 }') + ilm_window_daily_gb=$(awk -v total="$ilm_delete_7d" 'BEGIN { printf "%.2f", total / 7 / 1024 / 1024 / 1024 }') +fi + +ilm_rate_variance_pct="" +ilm_rate_variance_warning=false +if [ "$daily_ilm_delete_bytes" -gt 0 ] && [ "$ilm_window_daily_bytes" -gt 0 ]; then + ilm_rate_variance_pct=$(awk -v window="$ilm_window_daily_bytes" -v daily="$daily_ilm_delete_bytes" 'BEGIN { + if (daily == 0) { print ""; exit } + diff = window - daily; + if (diff < 0) diff = -diff; + pct = diff / daily * 100; + if (pct < 0) pct = -pct; + printf "%.0f", pct + }') + if [ -n "$ilm_rate_variance_pct" ]; then + ilm_rate_flag=$(awk -v v="$ilm_rate_variance_pct" 'BEGIN { if (v + 0 > 30) print 1; else print 0 }') + if [ "$ilm_rate_flag" -eq 1 ] 2>/dev/null; then + ilm_rate_variance_warning=true + fi + fi +fi + +ilm_rate_variance_warning_json="false" +if [ "$ilm_rate_variance_warning" = true ]; then + ilm_rate_variance_warning_json="true" +fi + +# Elasticsearch cluster disk watermark settings (fallback to 85/90/95 defaults) +watermark_output=$(so-elasticsearch-query '_cluster/settings?include_defaults=true&filter_path=*.cluster.routing.allocation.disk.*' --fail 2>/dev/null) || fail "Failed to query Elasticsearch cluster settings" + +low=$(echo "$watermark_output" | jq -r '.transient.cluster.routing.allocation.disk.watermark.low // .persistent.cluster.routing.allocation.disk.watermark.low // .defaults.cluster.routing.allocation.disk.watermark.low // empty') +high=$(echo "$watermark_output" | jq -r '.transient.cluster.routing.allocation.disk.watermark.high // .persistent.cluster.routing.allocation.disk.watermark.high // .defaults.cluster.routing.allocation.disk.watermark.high // empty') +flood=$(echo "$watermark_output" | jq -r '.transient.cluster.routing.allocation.disk.watermark.flood_stage // .persistent.cluster.routing.allocation.disk.watermark.flood_stage // .defaults.cluster.routing.allocation.disk.watermark.flood_stage // empty') + +low=${low:-"85%"} +high=${high:-"90%"} +flood=${flood:-"95%"} + +low_percent=${low%\%} +low_fraction=$(awk -v p="$low_percent" 'BEGIN { + if (p == "" || p + 0 <= 0) { printf "%.6f", 0.85; exit } + printf "%.6f", p / 100 +}') + +high_percent=${high%\%} +high_fraction=$(awk -v p="$high_percent" 'BEGIN { + if (p == "" || p + 0 <= 0) { printf "%.6f", 0.90; exit } + printf "%.6f", p / 100 +}') + +# Cluster shard total +cluster_shards_output=$(so-elasticsearch-query '_cluster/stats?filter_path=indices.shards.total' --fail 2>/dev/null) || fail "Failed to query cluster shard stats" +total_shards=$(echo "$cluster_shards_output" | jq -r '.indices.shards.total // 0' 2>/dev/null) + +# Get max shards per node setting (with default 1000) +max_shards_per_node_output=$(so-elasticsearch-query '_cluster/settings?include_defaults=true&filter_path=*.cluster.max_shards_per_node' --fail 2>/dev/null) || fail "Failed to query cluster shard settings" +max_shards_per_node=$(echo "$max_shards_per_node_output" | jq -r '.transient.cluster.max_shards_per_node // .persistent.cluster.max_shards_per_node // .defaults.cluster.max_shards_per_node // "1000"' 2>/dev/null) +max_shards_per_node=${max_shards_per_node:-1000} + +# Get same disk usage metric ES uses for watermark (not only ES used storage, but OS level storage usage) +nodes_output=$(so-elasticsearch-query '_cat/nodes?format=json&h=name,ip,node.role,disk.total,disk.used,disk.avail&bytes=b' --fail 2>/dev/null) || fail "Failed to query Elasticsearch node disk usage" + +# Parse nodes with data roles and calculate cluster totals +# Only include nodes with data roles: d=data, h=data_hot, w=data_warm, c=data_cold, s=data_content, f=data_frozen +cluster_stats=$(echo "$nodes_output" | jq --argjson low "$low_fraction" ' + [ .[] + | select(.["node.role"] | test("[dhwcsf]")) + | .total = (.["disk.total"] | tostring | gsub("[^0-9.]"; "") | tonumber) + | .used = (.["disk.used"] | tostring | gsub("[^0-9.]"; "") | tonumber) + | .avail = (.["disk.avail"] | tostring | gsub("[^0-9.]"; "") | tonumber) + | select(.total? and .used?) + | .low_threshold = (.total * $low) + | .remaining = (.low_threshold - .used) + ] + | { + total: ([.[].total] | add // 0), + used: ([.[].used] | add // 0), + low_threshold: ([.[].low_threshold] | add // 0), + remaining: ([.[].remaining] | add // 0) + } +') + +cluster_total=$(echo "$cluster_stats" | jq -r '.total') +cluster_used=$(echo "$cluster_stats" | jq -r '.used') +cluster_low_threshold=$(echo "$cluster_stats" | jq -r '.low_threshold') +cluster_remaining=$(echo "$cluster_stats" | jq -r '.remaining') + +cluster_high_threshold=$(awk -v total="$cluster_total" -v frac="$high_fraction" 'BEGIN { + if (total == "" || frac == "" || total + 0 <= 0 || frac + 0 <= 0) { printf "0"; exit } + printf "%.0f", total * frac +}') +cluster_over_low_bytes=$(awk -v used="$cluster_used" -v threshold="$cluster_low_threshold" 'BEGIN { + if (used == "" || threshold == "") { printf "0"; exit } + diff = used - threshold; + if (diff < 0) diff = 0; + printf "%.0f", diff +}') +cluster_over_high_bytes=$(awk -v used="$cluster_used" -v threshold="$cluster_high_threshold" 'BEGIN { + if (used == "" || threshold == "") { printf "0"; exit } + diff = used - threshold; + if (diff < 0) diff = 0; + printf "%.0f", diff +}') + +# Count data nodes and calculate shard capacity +# Only count nodes with data roles: d=data, h=data_hot, w=data_warm, c=data_cold, s=data_content f=data_frozen +data_node_count=$(echo "$nodes_output" | jq '[.[] | select(.["node.role"] | test("[dhwcsf]"))] | length') +max_shard_capacity=$((data_node_count * max_shards_per_node)) + +declare -a data_node_names +declare -a data_node_roles +if [ "$data_node_count" -gt 0 ]; then + while IFS='|' read -r node_name node_role; do + data_node_names+=("$node_name") + data_node_roles+=("$node_role") + done < <(echo "$nodes_output" | jq -r '.[] | select(.["node.role"] | test("[dhwcsf]")) | "\(.name)|\(.["node.role"])"') +fi +shard_usage_percent="0.0" +if [ "$max_shard_capacity" -gt 0 ]; then + shard_usage_percent=$(awk -v current="$total_shards" -v max="$max_shard_capacity" 'BEGIN { + if (max <= 0) { printf "0.0"; exit } + printf "%.1f", (current / max) * 100 + }') +fi + +recommendations_triggered=false +recommendations_ready=false +recommendations_message="" +recommendations_json='[]' +recommendations_triggered_json=false +recommendation_lines=() +recommendation_records=() +should_trigger_recommendations=false +recommendations_reason="" + +days_to_low_numeric="" +days_to_low_gross_numeric="" + +[ "$cluster_total" -gt 0 ] || fail "No Elasticsearch data nodes retrieved from _cat/nodes" + +# Calculate current retention period (age of oldest .ds-logs-* index) +oldest_index_days="" +oldest_index_name="" +oldest_index_output=$(so-elasticsearch-query '_cat/indices/.ds-logs-*?format=json&h=index,creation.date&s=creation.date:asc' --fail 2>/dev/null | jq -r '.[0] // empty' 2>/dev/null || true) +if [ -n "$oldest_index_output" ]; then + oldest_index_name=$(echo "$oldest_index_output" | jq -r '.index // empty' 2>/dev/null) + oldest_creation_ms=$(echo "$oldest_index_output" | jq -r '.["creation.date"] // empty' 2>/dev/null) + if [ -n "$oldest_creation_ms" ] && [[ "$oldest_creation_ms" =~ ^[0-9]+$ ]]; then + oldest_creation_sec=$((oldest_creation_ms / 1000)) + if [ "$oldest_creation_sec" -gt 0 ]; then + now_sec=$(date +%s) + if [ "$now_sec" -ge "$oldest_creation_sec" ]; then + age_sec=$((now_sec - oldest_creation_sec)) + oldest_index_days=$(awk -v age="$age_sec" 'BEGIN { printf "%.1f", age / 86400 }') + fi + fi + fi +fi + +# Calculate days until low watermark using net growth +days_to_low="" +days_to_low_gross="" +target_date="" + +# Calculate with gross growth +if [ "$daily_growth_bytes" -gt 0 ] && [ "$(echo "$cluster_remaining > 0" | bc -l 2>/dev/null || awk -v r="$cluster_remaining" 'BEGIN { if (r > 0) print 1; else print 0 }')" -eq 1 ]; then + days_to_low_gross=$(awk -v rem="$cluster_remaining" -v perday="$daily_growth_bytes" 'BEGIN { + printf "%.2f", rem / perday + }') +fi + +# Calculate with net growth (minus ILM deletions) +if [ "$net_growth_bytes" -gt 0 ] && [ "$(echo "$cluster_remaining > 0" | bc -l 2>/dev/null || awk -v r="$cluster_remaining" 'BEGIN { if (r > 0) print 1; else print 0 }')" -eq 1 ]; then + days_to_low=$(awk -v rem="$cluster_remaining" -v perday="$net_growth_bytes" 'BEGIN { + printf "%.2f", rem / perday + }') + ceil_days=$(awk -v d="$days_to_low" 'BEGIN { + base = int(d); + if (d > base) { base = base + 1 } + if (base < 0) { base = 0 } + printf "%d", base + }') + target_date=$(date -d "+${ceil_days} days" +%F 2>/dev/null) +elif [ "$(echo "$cluster_remaining > 0" | bc -l 2>/dev/null || awk -v r="$cluster_remaining" 'BEGIN { if (r > 0) print 1; else print 0 }')" -eq 1 ]; then + # Net growth is zero or negative, cluster is in equilibrium or shrinking + days_to_low="stable" +fi + +if [ -n "$days_to_low" ] && [ "$days_to_low" != "stable" ] && [[ "$days_to_low" =~ ^[0-9]+(\.[0-9]+)?$ ]]; then + days_to_low_numeric="$days_to_low" +fi + +if [ -n "$days_to_low_gross" ] && [[ "$days_to_low_gross" =~ ^[0-9]+(\.[0-9]+)?$ ]]; then + days_to_low_gross_numeric="$days_to_low_gross" +fi + +# Calculate estimated retention (oldest index age + days until low watermark) +estimated_retention_days="" +if [ -n "$oldest_index_days" ] && [ -n "$days_to_low_numeric" ]; then + estimated_retention_days=$(awk -v oldest="$oldest_index_days" -v remaining="$days_to_low_numeric" 'BEGIN { + printf "%.1f", oldest + remaining + }') +fi + +cluster_at_or_below_low=$(echo "$cluster_remaining <= 0" | bc -l 2>/dev/null || awk -v r="$cluster_remaining" 'BEGIN { if (r <= 0) print 1; else print 0 }') + +if [ "$cluster_at_or_below_low" -eq 1 ]; then + should_trigger_recommendations=true + if [ "$cluster_over_high_bytes" -gt 0 ] 2>/dev/null; then + recommendations_reason="Cluster is beyond the high watermark threshold. Reduce retention on the fastest-growing indices immediately." + else + recommendations_reason="Cluster is at or beyond the low watermark threshold. Reduce retention on the fastest-growing indices immediately." + fi +elif [ -n "$days_to_low_numeric" ]; then + within_seven=$(awk -v d="$days_to_low_numeric" 'BEGIN { if (d <= 7) print 1; else print 0 }') + if [ "$within_seven" -eq 1 ]; then + should_trigger_recommendations=true + recommendations_reason="Projected low watermark breach in ~${days_to_low_numeric} days (${target_date:-N/A}). Reduce retention on the fastest-growing indices." + fi +elif [ -n "$days_to_low_gross_numeric" ]; then + within_seven_gross=$(awk -v d="$days_to_low_gross_numeric" 'BEGIN { if (d <= 7) print 1; else print 0 }') + if [ "$within_seven_gross" -eq 1 ]; then + should_trigger_recommendations=true + recommendations_reason="Gross growth trend indicates a low watermark breach in ~${days_to_low_gross_numeric} days (${target_date:-N/A}). Reduce retention on the fastest-growing indices before ILM deletions." + fi +fi + +cluster_over_high_flag=0 +if [ "$cluster_over_high_bytes" -gt 0 ] 2>/dev/null; then + cluster_over_high_flag=1 +fi + +cluster_over_low_flag=0 +if [ "$cluster_over_low_bytes" -gt 0 ] 2>/dev/null; then + cluster_over_low_flag=1 +fi + +cluster_high_threshold_gb=$(bytes_to_gb "$cluster_high_threshold") +cluster_over_low_gb=$(bytes_to_gb "$cluster_over_low_bytes") +cluster_over_high_gb=$(bytes_to_gb "$cluster_over_high_bytes") + +if [ "$should_trigger_recommendations" = true ]; then + recommendations_triggered=true + recommendations_triggered_json=true + if [ -n "$recommendations_reason" ]; then + recommendations_message="$recommendations_reason" + else + recommendations_message="Cluster is nearing the low watermark threshold. Reduce retention on the fastest-growing indices." + fi + + growth_output=$(run_indices_growth || true) + if [ -n "${growth_output//[[:space:]]/}" ]; then + mapfile -t recommendation_source_lines < <(printf '%s\n' "$growth_output" | tail -n +3 | awk 'NF' | head -n 3) + for line in "${recommendation_source_lines[@]}"; do + index=$(echo "$line" | awk '{print $1}') + [ -n "$index" ] || continue + + growth_24h_gb=$(echo "$line" | awk '{print $(NF-2)}') + + creation_date_display="" + retention_days="" + policy="" + delete_min_age="" + + index_info=$(so-elasticsearch-query "_cat/indices/${index}?format=json&h=index,creation.date,creation.date.string" --fail 2>/dev/null) || true + if [ -n "$index_info" ]; then + creation_epoch=$(echo "$index_info" | jq -r '.[0]."creation.date" // empty' 2>/dev/null) + creation_readable=$(echo "$index_info" | jq -r '.[0]."creation.date.string" // empty' 2>/dev/null) + if [ -n "$creation_epoch" ] && [[ "$creation_epoch" =~ ^[0-9]+$ ]]; then + creation_seconds=$((creation_epoch / 1000)) + if [ "$creation_seconds" -gt 0 ]; then + creation_date_display=$(date -u -d "@$creation_seconds" +%FT%TZ 2>/dev/null) + now_seconds=$(date +%s) + if [ "$now_seconds" -ge "$creation_seconds" ]; then + retention_days=$(awk -v now="$now_seconds" -v created="$creation_seconds" 'BEGIN { diff = now - created; if (diff < 0) diff = 0; printf "%.1f", diff / 86400 }') + fi + fi + fi + if [ -z "$creation_date_display" ] && [ -n "$creation_readable" ] && [ "$creation_readable" != "null" ]; then + creation_date_display="$creation_readable" + fi + fi + + ilm_output=$(so-elasticsearch-query "${index}/_ilm/explain" --fail 2>/dev/null) || true + if [ -n "$ilm_output" ]; then + policy=$(echo "$ilm_output" | jq --arg idx "$index" -r ".indices[$idx].policy // empty" 2>/dev/null) + fi + if [ -n "$policy" ] && [ -n "${policy_ages[$policy]:-}" ]; then + delete_min_age=${policy_ages[$policy]} + fi + + retention_days_display=${retention_days:-unknown} + retention_days_floor="" + if [ -n "$retention_days" ]; then + retention_days_floor=$(awk -v v="$retention_days" 'BEGIN { if (v == "" || v == "null") { print ""; exit } val = v + 0; if (val < 1) val = 1; printf "%d", int(val) }') + if [ -n "$retention_days_floor" ] && [ "$retention_days_floor" -lt 1 ]; then + retention_days_floor=1 + fi + fi + + delete_min_age_numeric="" + if [ -n "$delete_min_age" ]; then + delete_min_age_numeric=$(awk -v v="$delete_min_age" 'BEGIN { if (v == "" || v == "null") { print ""; exit } val = v + 0; if (val < 1) val = 1; printf "%d", int(val) }') + fi + + recommended_delete_min_age="" + if [ -n "$retention_days_floor" ]; then + recommended_delete_min_age="$retention_days_floor" + fi + if [ -n "$delete_min_age_numeric" ]; then + if [ -n "$recommended_delete_min_age" ]; then + recommended_delete_min_age=$(awk -v rec="$recommended_delete_min_age" -v cur="$delete_min_age_numeric" 'BEGIN { rec += 0; cur += 0; if (cur < rec) printf "%d", cur; else printf "%d", rec }') + else + recommended_delete_min_age="$delete_min_age_numeric" + fi + fi + if [ -z "$recommended_delete_min_age" ] && [ -n "$retention_days_floor" ]; then + recommended_delete_min_age="$retention_days_floor" + fi + + action_phrase="" + if [ -n "$recommended_delete_min_age" ]; then + if [ -n "$delete_min_age_numeric" ] && [ "$recommended_delete_min_age" -lt "$delete_min_age_numeric" ]; then + action_phrase="Lower delete.min_age to ~${recommended_delete_min_age}d" + else + action_phrase="Cap delete.min_age at ~${recommended_delete_min_age}d" + fi + if [ -n "$retention_days_floor" ]; then + action_phrase="${action_phrase} (observed retention ~${retention_days_floor}d)" + fi + action_phrase="${action_phrase}; consider whether a tighter cap (e.g., 30d) fits requirements." + else + action_phrase="Review ILM delete.min_age for this index; consider more aggressive retention if throughput stays high." + fi + + policy_clause="" + if [ -n "$policy" ]; then + policy_clause=", policy ${policy}" + fi + if [ -n "$delete_min_age" ]; then + policy_clause="${policy_clause} (current delete.min_age ${delete_min_age}d)" + fi + + recommendation_lines+=(" - ${BOLD}${index}${NC}: ~${growth_24h_gb} GB growth in last 24h, retention ~${retention_days_display} days (created ${creation_date_display:-unknown})${policy_clause}. ${action_phrase}") + record=$(jq -nc \ + --arg index "$index" \ + --arg growth "$growth_24h_gb" \ + --arg retention "${retention_days:-}" \ + --arg created "${creation_date_display:-}" \ + --arg policy "$policy" \ + --arg delete_age "${delete_min_age:-}" \ + --arg suggested "${recommended_delete_min_age:-}" \ + --arg action "$action_phrase" \ + '{ + index: $index, + growth_gb_last_24h: (if ($growth | length) > 0 then ($growth | tonumber) else null end), + retention_days: (if ($retention | length) > 0 then ($retention | tonumber) else null end), + creation_date: (if ($created | length) > 0 then $created else null end), + ilm_policy: (if ($policy | length) > 0 then $policy else null end), + delete_min_age_days: (if ($delete_age | length) > 0 then ($delete_age | tonumber) else null end), + suggested_delete_min_age_days: (if ($suggested | length) > 0 then ($suggested | tonumber) else null end), + recommendation: (if ($action | length) > 0 then $action else null end) + }') + recommendation_records+=("$record") + done + fi + + if [ ${#recommendation_records[@]} -gt 0 ]; then + recommendations_ready=true + recommendations_json=$(printf '%s\n' "${recommendation_records[@]}" | jq -s '.') + else + if [ -n "$recommendations_reason" ]; then + recommendations_message="$recommendations_reason Unable to retrieve detailed growth data from so-elasticsearch-indices-growth." + else + recommendations_message="Unable to retrieve growth data from so-elasticsearch-indices-growth while near the low watermark threshold." + fi + fi +fi + +if [ "$JSON_OUTPUT" = true ]; then + jq -n \ + --arg indexed_storage_source "$indexed_storage_source" \ + --arg current_gb "$(bytes_to_gb "$cluster_storage_size")" \ + --arg oldest_index_days "$oldest_index_days" \ + --arg estimated_retention_days "$estimated_retention_days" \ + --arg daily_growth_gb "$(bytes_to_gb "$daily_growth_bytes")" \ + --arg daily_ilm_delete_gb "$daily_ilm_delete_gb" \ + --arg net_growth_gb "$(bytes_to_gb "$net_growth_bytes")" \ + --arg ilm_delete_7d_gb "$ilm_delete_7d_gb" \ + --arg ilm_delete_immediate_gb "$ilm_delete_immediate_gb" \ + --arg ilm_delete_scheduled_7d_gb "$ilm_delete_scheduled_7d_gb" \ + --arg ilm_delete_scheduled_30d_gb "$ilm_delete_scheduled_30d_gb" \ + --arg ilm_delete_30d_gb "$ilm_delete_30d_gb" \ + --arg ilm_window_daily_gb "$ilm_window_daily_gb" \ + --arg ilm_impact_pct "$ilm_impact_pct" \ + --arg ilm_rate_variance_pct "$ilm_rate_variance_pct" \ + --arg growth_window "$history_label" \ + --arg cluster_total_gb "$(bytes_to_gb "$cluster_total")" \ + --arg cluster_used_gb "$(bytes_to_gb "$cluster_used")" \ + --arg cluster_remaining_gb "$(bytes_to_gb "$cluster_remaining")" \ + --arg cluster_low_threshold_gb "$(bytes_to_gb "$cluster_low_threshold")" \ + --arg cluster_high_threshold_gb "$cluster_high_threshold_gb" \ + --arg cluster_over_low_gb "$cluster_over_low_gb" \ + --arg cluster_over_high_gb "$cluster_over_high_gb" \ + --arg shard_usage_percent "$shard_usage_percent" \ + --arg low_watermark "$low" \ + --arg high_watermark "$high" \ + --arg flood_watermark "$flood" \ + --arg days_to_low "${days_to_low:-null}" \ + --arg days_to_low_gross "${days_to_low_gross:-null}" \ + --arg estimated_date "${target_date:-null}" \ + --arg recommendation_message "$recommendations_message" \ + --argjson total_shards "$total_shards" \ + --argjson max_shard_capacity "$max_shard_capacity" \ + --argjson data_node_count "$data_node_count" \ + --argjson max_shards_per_node "$max_shards_per_node" \ + --argjson ilm_indices_7d "$ilm_indices_7d" \ + --argjson ilm_indices_immediate "$ilm_indices_immediate" \ + --argjson ilm_indices_scheduled_7d "$ilm_indices_scheduled_7d" \ + --argjson ilm_indices_scheduled_30d "$ilm_indices_scheduled_30d" \ + --argjson ilm_indices_30d "$ilm_indices_30d" \ + --argjson ilm_shards_7d "$ilm_shards_7d" \ + --argjson ilm_shards_30d "$ilm_shards_30d" \ + --argjson ilm_shards_immediate "$ilm_shards_immediate" \ + --argjson ilm_shards_scheduled_7d "$ilm_shards_scheduled_7d" \ + --argjson ilm_shards_scheduled_30d "$ilm_shards_scheduled_30d" \ + --arg daily_shard_creation "$daily_shard_creation" \ + --argjson recommendations "$recommendations_json" \ + --argjson recommendations_triggered "$recommendations_triggered_json" \ + ' { + indexed_storage_gb: ($current_gb | tonumber), + indexed_storage_source: $indexed_storage_source, + oldest_index_days: (if ($oldest_index_days | length) > 0 then ($oldest_index_days | tonumber) else null end), + estimated_retention_days: (if ($estimated_retention_days | length) > 0 then ($estimated_retention_days | tonumber) else null end), + growth: { + daily_growth_gb: ($daily_growth_gb | tonumber), + daily_ilm_delete_gb: (if ($daily_ilm_delete_gb | length) > 0 then ($daily_ilm_delete_gb | tonumber) else null end), + net_growth_gb: (if ($net_growth_gb | length) > 0 then ($net_growth_gb | tonumber) else null end), + daily_shard_creation: (if ($daily_shard_creation | length) > 0 then ($daily_shard_creation | tonumber) else null end), + }, + ilm: { + deleting_now: { + indices: $ilm_indices_immediate, + storage_gb: (if ($ilm_delete_immediate_gb | length) > 0 then ($ilm_delete_immediate_gb | tonumber) else null end), + shards: $ilm_shards_immediate + }, + scheduled_7d: { + indices: $ilm_indices_scheduled_7d, + storage_gb: (if ($ilm_delete_scheduled_7d_gb | length) > 0 then ($ilm_delete_scheduled_7d_gb | tonumber) else null end), + shards: $ilm_shards_scheduled_7d + }, + scheduled_30d: { + indices: $ilm_indices_scheduled_30d, + storage_gb: (if ($ilm_delete_scheduled_30d_gb | length) > 0 then ($ilm_delete_scheduled_30d_gb | tonumber) else null end), + shards: $ilm_shards_scheduled_30d + }, + indices_to_delete_7d: $ilm_indices_7d, + storage_to_delete_7d_gb: (if ($ilm_delete_7d_gb | length) > 0 then ($ilm_delete_7d_gb | tonumber) else null end), + shards_to_delete_7d: $ilm_shards_7d, + total_30d_indices: $ilm_indices_30d, + total_30d_storage_gb: (if ($ilm_delete_30d_gb | length) > 0 then ($ilm_delete_30d_gb | tonumber) else null end), + total_30d_shards: $ilm_shards_30d, + percent_of_current_data: (if ($ilm_impact_pct | length) > 0 then ($ilm_impact_pct | tonumber) else null end), + windowed_daily_avg_gb: (if ($ilm_window_daily_gb | length) > 0 then ($ilm_window_daily_gb | tonumber) else null end), + }, + cluster: { + total_gb: ($cluster_total_gb | tonumber), + used_gb: ($cluster_used_gb | tonumber), + remaining_before_low_watermark_gb: (if ($cluster_remaining_gb | length) > 0 then ($cluster_remaining_gb | tonumber) else null end), + low_watermark_threshold_gb: (if ($cluster_low_threshold_gb | length) > 0 then ($cluster_low_threshold_gb | tonumber) else null end), + high_watermark_threshold_gb: (if ($cluster_high_threshold_gb | length) > 0 then ($cluster_high_threshold_gb | tonumber) else null end), + over_low_watermark_gb: (if ($cluster_over_low_gb | length) > 0 then ($cluster_over_low_gb | tonumber) else null end), + over_high_watermark_gb: (if ($cluster_over_high_gb | length) > 0 then ($cluster_over_high_gb | tonumber) else null end), + low_watermark_setting: $low_watermark, + high_watermark_setting: $high_watermark, + flood_watermark_setting: $flood_watermark, + shards: { + current: $total_shards, + max_capacity: $max_shard_capacity, + usage_percent: (if ($shard_usage_percent | length) > 0 then ($shard_usage_percent | tonumber) else null end), + data_nodes: $data_node_count, + max_shards_per_node: $max_shards_per_node + } + }, + projection: { + days_to_low_watermark_net: (if $days_to_low == "null" or $days_to_low == "stable" then $days_to_low else ($days_to_low | tonumber) end), + days_to_low_watermark_gross: (if $days_to_low_gross == "null" then null else ($days_to_low_gross | tonumber) end), + estimated_breach_date: (if $estimated_date == "null" then null else $estimated_date end) + }, + recommendations: { + triggered: $recommendations_triggered, + message: (if ($recommendation_message | length) > 0 then $recommendation_message else null end), + indices: $recommendations + } + }' +else + log_title "LOG" "Storage Overview" + + indexed_gb_display=$(bytes_to_gb "$cluster_storage_size") + echo -e "${BOLD}Indexed data size:${NC} ${indexed_gb_display} GB (Elasticsearch)" + echo -e "${BOLD}Cluster capacity:${NC} $(bytes_to_gb "$cluster_total") GB total" + echo -e "${BOLD}Cluster used:${NC} $(bytes_to_gb "$cluster_used") GB" + echo -e "${BOLD}Low watermark:${NC} $low ($(bytes_to_gb "$cluster_low_threshold") GB threshold)" + if [ "$cluster_over_low_flag" -eq 1 ]; then + if [ "$cluster_over_high_flag" -eq 1 ]; then + echo -e "${BOLD}Remaining space:${NC} ${REDBOLD}${cluster_over_high_gb} GB${NC} OVER the high watermark" + else + echo -e "${BOLD}Remaining space:${NC} ${YELLOWBOLD}${cluster_over_low_gb} GB${NC} OVER the low watermark" + fi + else + echo -e "${BOLD}Remaining space:${NC} $(bytes_to_gb "$cluster_remaining") GB before low watermark" + fi + + # Display shard capacity information + shard_warning_flag=$(awk -v pct="$shard_usage_percent" 'BEGIN { if (pct + 0 >= 80) print 1; else print 0 }') + if [ "$shard_warning_flag" -eq 1 ]; then + echo -e "${BOLD}Cluster shards:${NC} ${YELLOW}${total_shards} / ${max_shard_capacity} (${shard_usage_percent}%)${NC}" + else + echo -e "${BOLD}Cluster shards:${NC} ${total_shards} / ${max_shard_capacity} (${shard_usage_percent}%)" + fi + + # Display data nodes with roles (only data-related roles) + if [ "$data_node_count" -gt 0 ]; then + echo -e "${BOLD}Cluster data nodes:${NC} ${data_node_count}" + for i in "${!data_node_names[@]}"; do + node_name="${data_node_names[$i]}" + node_role="${data_node_roles[$i]}" + expanded_roles=$(expand_node_roles "$node_role") + echo -e " ${node_name}: ${expanded_roles}" + done + fi + + log_title "LOG" "ES Growth" + + echo -e "${BOLD}Daily growth rate:${NC} $(bytes_to_gb "$daily_growth_bytes") GB/day" + + if [ "$daily_ilm_delete_bytes" -gt 0 ]; then + echo -e "${BOLD}ILM deletion rate:${NC} ${daily_ilm_delete_gb} GB/day (scheduled)" + echo -e "${BOLD}Net growth rate:${NC} $(bytes_to_gb "$net_growth_bytes") GB/day" + else + echo -e "${BOLD}ILM deletion rate:${NC} 0.00 GB/day (scheduled)" + echo -e "${BOLD}Net growth rate:${NC} $(bytes_to_gb "$net_growth_bytes") GB/day" + fi + + # Display daily shards + if [ -n "$daily_shard_creation" ] && [ "$(awk -v d="$daily_shard_creation" 'BEGIN { if (d > 0) print 1; else print 0 }')" -eq 1 ]; then + daily_shard_creation_rounded=$(awk -v d="$daily_shard_creation" 'BEGIN { printf "%.0f", d }') + echo -e "${BOLD}Daily shard creation:${NC} ~${daily_shard_creation_rounded} shards/day" + fi + + if [ "$ilm_indices_immediate" -gt 0 ]; then + echo -e "${BOLD}Deleting now:${NC} $ilm_indices_immediate indices (~${ilm_delete_immediate_gb} GB, $ilm_shards_immediate shards)" + fi + if [ "$ilm_indices_7d" -gt 0 ]; then + echo -e "${BOLD}Storage to be freed (7d):${NC} $ilm_indices_7d indices (~${ilm_delete_7d_gb} GB, $ilm_shards_7d shards)" + fi + + log_title "LOG" "Retention Projection" + + if [ -n "$oldest_index_days" ]; then + oldest_days_rounded=$(awk -v d="$oldest_index_days" 'BEGIN { printf "%.0f", d }') + if [ -n "$oldest_index_name" ]; then + echo -e "${BOLD}Oldest index:${NC} ~${oldest_days_rounded} days (${oldest_index_name})" + else + echo -e "${BOLD}Oldest index:${NC} ~${oldest_days_rounded} days (.ds-logs-* only)" + fi + + if [ -n "$estimated_retention_days" ]; then + estimated_days_rounded=$(awk -v d="$estimated_retention_days" 'BEGIN { printf "%.0f", d }') + echo -e "${BOLD}Estimated retention:${NC} ~${estimated_days_rounded} days (until configured low watermark setting)" + fi + echo + fi + + if [ "$days_to_low" = "stable" ]; then + if [ "$net_growth_bytes" -lt 0 ]; then + shrink_rate_gb=$(bytes_to_gb "${net_growth_bytes#-}") + log_title "OK" "Cluster is shrinking - ILM deletions exceed growth" + echo + echo -e "${BOLD}Storage trend:${NC} Decreasing at ~${shrink_rate_gb} GB/day" + echo -e "${BOLD}Note:${NC} Current ILM policies are reclaiming more space than incoming data consumes." + if [ "$cluster_over_low_bytes" -gt 0 ] 2>/dev/null; then + recovery_days=$(awk -v excess="$cluster_over_low_bytes" -v rate="${net_growth_bytes#-}" 'BEGIN { + if (rate <= 0) { print ""; exit } + printf "%.1f", excess / rate + }') + if [ -n "$recovery_days" ]; then + echo -e "${BOLD}Recovery time:${NC} Estimated ${recovery_days} days to fall below the low watermark if trend continues" + fi + fi + else + log_title "OK" "Cluster is in equilibrium - ILM deletions balance growth" + echo + echo -e "${BOLD}Storage trend:${NC} Stable (net growth ~0 GB/day)" + echo -e "${BOLD}Note:${NC} Current ILM policies are keeping storage steady." + fi + elif [ -z "$days_to_low" ]; then + if [ "$net_growth_bytes" -lt 0 ] && [ "$daily_ilm_delete_bytes" -gt 0 ]; then + shrink_rate_gb=$(bytes_to_gb "${net_growth_bytes#-}") + log_title "OK" "Cluster is shrinking - ILM deletions exceed growth" + echo + echo -e "${BOLD}Storage trend:${NC} Decreasing at ~${shrink_rate_gb} GB/day" + echo -e "${BOLD}Note:${NC} Storage is expected to continue decreasing due to ILM policies." + elif [ "$daily_growth_bytes" -le 0 ]; then + log_title "WARN" "Unable to project: Growth rate is zero or negative" + elif [ "$(echo "$cluster_remaining <= 0" | bc -l 2>/dev/null || awk -v r="$cluster_remaining" 'BEGIN { if (r <= 0) print 1; else print 0 }')" -eq 1 ]; then + log_title "ERROR" "Cluster already at low watermark threshold! Review recommendations below and consider updating ILM." + else + log_title "WARN" "Unable to calculate projection" + fi + else + if (( $(echo "$days_to_low < 7" | bc -l 2>/dev/null || awk -v d="$days_to_low" 'BEGIN { if (d < 7) print 1; else print 0 }') )); then + log_title "ERROR" "Low watermark breach estimated in ~$days_to_low days (${target_date:-N/A})" + elif (( $(echo "$days_to_low < 14" | bc -l 2>/dev/null || awk -v d="$days_to_low" 'BEGIN { if (d < 14) print 1; else print 0 }') )); then + log_title "WARN" "Low watermark breach estimated in ~$days_to_low days (${target_date:-N/A})" + else + log_title "OK" "Low watermark breach estimated in ~$days_to_low days (${target_date:-N/A})" + fi + echo + fi + + if [ "$recommendations_triggered" = true ]; then + log_title "LOG" "Recommendations" + if [ "$recommendations_ready" = true ]; then + echo -e "${BOLD}Action:${NC} Reduce retention on the fastest-growing indices to reduce overall storage usage." + for rec_line in "${recommendation_lines[@]}"; do + echo -e "$rec_line" + done + else + if [ -n "$recommendations_message" ]; then + echo -e "${BOLD}Note:${NC} $recommendations_message" + fi + fi + echo + fi + + if [ "$VERBOSE" = true ]; then + log_title "LOG" "Scheduled Deletions (Detailed)" + + if [ ${#immediate_indices_names[@]} -gt 0 ]; then + echo -e "${BOLD}Deleting Now (in delete phase):${NC}" + echo + total_immediate_mb=0 + for i in "${!immediate_indices_names[@]}"; do + index_name="${immediate_indices_names[$i]}" + size_bytes="${immediate_indices_sizes[$i]}" + size_mb=$(awk -v b="$size_bytes" 'BEGIN { printf "%.2f", b / 1024 / 1024 }') + total_immediate_mb=$(awk -v total="$total_immediate_mb" -v size="$size_mb" 'BEGIN { printf "%.2f", total + size }') + printf " %-60s %10s MB\n" "$index_name" "$size_mb" + done + echo -e "${BOLD}Total:${NC} ${total_immediate_mb} MB (${#immediate_indices_names[@]} indices)" + echo + fi + + if [ ${#scheduled_indices_names[@]} -gt 0 ]; then + echo -e "${BOLD}Scheduled for Deletion (≤7 days):${NC}" + echo + total_scheduled_mb=0 + # Sort by days_until deletion + sorted_indices=() + for i in "${!scheduled_indices_names[@]}"; do + sorted_indices+=("${scheduled_indices_days[$i]}|${scheduled_indices_names[$i]}|${scheduled_indices_sizes[$i]}") + done + OLD_IFS="$IFS" + IFS=$'\n' sorted_indices=($(sort -t'|' -k1 -n <<<"${sorted_indices[*]}")) + IFS="$OLD_IFS" + + for entry in "${sorted_indices[@]}"; do + IFS='|' read -r days_until index_name size_bytes <<< "$entry" + size_mb=$(awk -v b="$size_bytes" 'BEGIN { printf "%.2f", b / 1024 / 1024 }') + total_scheduled_mb=$(awk -v total="$total_scheduled_mb" -v size="$size_mb" 'BEGIN { printf "%.2f", total + size }') + days_display=$(awk -v d="$days_until" 'BEGIN { printf "%.1f", d }') + printf " %-55s %10s MB (in ~%s days)\n" "$index_name" "$size_mb" "$days_display" + done + echo -e "${BOLD}Total:${NC} ${total_scheduled_mb} MB (${#scheduled_indices_names[@]} indices)" + echo + fi + + if [ ${#immediate_indices_names[@]} -eq 0 ] && [ ${#scheduled_indices_names[@]} -eq 0 ]; then + echo -e "No indices scheduled for deletion within the next 7 days." + echo + fi + fi + echo +fi + +exit 0 \ No newline at end of file diff --git a/salt/hypervisor/map.jinja b/salt/hypervisor/map.jinja index dae3985d4..087fd7bf7 100644 --- a/salt/hypervisor/map.jinja +++ b/salt/hypervisor/map.jinja @@ -13,6 +13,7 @@ {# Import defaults.yaml for model hardware capabilities #} {% import_yaml 'hypervisor/defaults.yaml' as DEFAULTS %} +{% set HYPERVISORMERGED = salt['pillar.get']('hypervisor', default=DEFAULTS.hypervisor, merge=True) %} {# Get hypervisor nodes from pillar #} {% set NODES = salt['pillar.get']('hypervisor:nodes', {}) %} @@ -30,9 +31,10 @@ {% set model = '' %} {% if grains %} {% set minion_id = grains.keys() | first %} - {% set model = grains[minion_id].get('sosmodel', '') %} + {% set model = grains[minion_id].get('sosmodel', grains[minion_id].get('byodmodel', '')) %} {% endif %} - {% set model_config = DEFAULTS.hypervisor.model.get(model, {}) %} + + {% set model_config = HYPERVISORMERGED.model.get(model, {}) %} {# Get VM list from VMs file #} {% set vms = {} %} @@ -56,10 +58,26 @@ {% set role = vm.get('role', '') %} {% do salt.log.debug('salt/hypervisor/map.jinja: Processing VM - hostname: ' ~ hostname ~ ', role: ' ~ role) %} - {# Load VM configuration from config file #} + {# Try to load VM configuration from config file first, then .error file if config doesn't exist #} {% set vm_file = 'hypervisor/hosts/' ~ hypervisor ~ '/' ~ hostname ~ '_' ~ role %} + {% set vm_error_file = vm_file ~ '.error' %} {% do salt.log.debug('salt/hypervisor/map.jinja: VM config file: ' ~ vm_file) %} - {% import_json vm_file as vm_state %} + + {# Check if base config file exists #} + {% set config_exists = salt['file.file_exists']('/opt/so/saltstack/local/salt/' ~ vm_file) %} + {% set error_exists = salt['file.file_exists']('/opt/so/saltstack/local/salt/' ~ vm_error_file) %} + + {% set vm_state = none %} + {% if config_exists %} + {% import_json vm_file as vm_state %} + {% do salt.log.debug('salt/hypervisor/map.jinja: Loaded VM config from base file') %} + {% elif error_exists %} + {% import_json vm_error_file as vm_state %} + {% do salt.log.debug('salt/hypervisor/map.jinja: Loaded VM config from .error file') %} + {% else %} + {% do salt.log.warning('salt/hypervisor/map.jinja: No config or error file found for VM ' ~ hostname ~ '_' ~ role) %} + {% endif %} + {% if vm_state %} {% do salt.log.debug('salt/hypervisor/map.jinja: VM config content: ' ~ vm_state | tojson) %} {% set vm_data = {'config': vm_state.config} %} @@ -83,7 +101,7 @@ {% endif %} {% do vms.update({hostname ~ '_' ~ role: vm_data}) %} {% else %} - {% do salt.log.debug('salt/hypervisor/map.jinja: Config file empty: ' ~ vm_file) %} + {% do salt.log.debug('salt/hypervisor/map.jinja: Skipping VM ' ~ hostname ~ '_' ~ role ~ ' - no config available') %} {% endif %} {% endfor %} diff --git a/salt/hypervisor/tools/sbin/so-nvme-raid1.sh b/salt/hypervisor/tools/sbin/so-nvme-raid1.sh index cc9916a4c..fe96c063b 100644 --- a/salt/hypervisor/tools/sbin/so-nvme-raid1.sh +++ b/salt/hypervisor/tools/sbin/so-nvme-raid1.sh @@ -30,7 +30,9 @@ # # WARNING: This script will DESTROY all data on the target drives! # -# USAGE: sudo ./so-nvme-raid1.sh +# USAGE: +# sudo ./so-nvme-raid1.sh # Normal operation +# sudo ./so-nvme-raid1.sh --force-cleanup # Force cleanup of existing RAID # ################################################################# @@ -41,6 +43,19 @@ set -e RAID_ARRAY_NAME="md0" RAID_DEVICE="/dev/${RAID_ARRAY_NAME}" MOUNT_POINT="/nsm" +FORCE_CLEANUP=false + +# Parse command line arguments +for arg in "$@"; do + case $arg in + --force-cleanup) + FORCE_CLEANUP=true + shift + ;; + *) + ;; + esac +done # Function to log messages log() { @@ -55,6 +70,91 @@ check_root() { fi } +# Function to force cleanup all RAID components +force_cleanup_raid() { + log "=== FORCE CLEANUP MODE ===" + log "This will destroy all RAID configurations and data on target drives!" + + # Stop all MD arrays + log "Stopping all MD arrays" + mdadm --stop --scan 2>/dev/null || true + + # Wait for arrays to stop + sleep 2 + + # Remove any running md devices + for md in /dev/md*; do + if [ -b "$md" ]; then + log "Stopping $md" + mdadm --stop "$md" 2>/dev/null || true + fi + done + + # Force cleanup both NVMe drives + for device in "/dev/nvme0n1" "/dev/nvme1n1"; do + log "Force cleaning $device" + + # Kill any processes using the device + fuser -k "${device}"* 2>/dev/null || true + + # Unmount any mounted partitions + for part in "${device}"*; do + if [ -b "$part" ]; then + umount -f "$part" 2>/dev/null || true + fi + done + + # Force zero RAID superblocks on partitions + for part in "${device}"p*; do + if [ -b "$part" ]; then + log "Zeroing RAID superblock on $part" + mdadm --zero-superblock --force "$part" 2>/dev/null || true + fi + done + + # Zero superblock on the device itself + log "Zeroing RAID superblock on $device" + mdadm --zero-superblock --force "$device" 2>/dev/null || true + + # Remove LVM physical volumes + pvremove -ff -y "$device" 2>/dev/null || true + + # Wipe all filesystem and partition signatures + log "Wiping all signatures from $device" + wipefs -af "$device" 2>/dev/null || true + + # Overwrite the beginning of the drive (partition table area) + log "Clearing partition table on $device" + dd if=/dev/zero of="$device" bs=1M count=10 2>/dev/null || true + + # Clear the end of the drive (backup partition table area) + local device_size=$(blockdev --getsz "$device" 2>/dev/null || echo "0") + if [ "$device_size" -gt 0 ]; then + dd if=/dev/zero of="$device" bs=512 seek=$(( device_size - 2048 )) count=2048 2>/dev/null || true + fi + + # Force kernel to re-read partition table + blockdev --rereadpt "$device" 2>/dev/null || true + partprobe -s "$device" 2>/dev/null || true + done + + # Clear mdadm configuration + log "Clearing mdadm configuration" + echo "DEVICE partitions" > /etc/mdadm.conf + + # Remove any fstab entries for the RAID device or mount point + log "Cleaning fstab entries" + sed -i "\|${RAID_DEVICE}|d" /etc/fstab + sed -i "\|${MOUNT_POINT}|d" /etc/fstab + + # Wait for system to settle + udevadm settle + sleep 5 + + log "Force cleanup complete!" + log "Proceeding with RAID setup..." +} + # Function to find MD arrays using specific devices find_md_arrays_using_devices() { local target_devices=("$@") @@ -205,10 +305,15 @@ check_existing_raid() { fi log "Error: $device appears to be part of an existing RAID array" - log "To reuse this device, you must first:" - log "1. Unmount any filesystems" - log "2. Stop the RAID array: mdadm --stop $array_name" - log "3. Zero the superblock: mdadm --zero-superblock ${device}p1" + log "Old RAID metadata detected but array is not running." + log "" + log "To fix this, run the script with --force-cleanup:" + log " sudo $0 --force-cleanup" + log "" + log "Or manually clean up with:" + log "1. Stop any arrays: mdadm --stop --scan" + log "2. Zero superblocks: mdadm --zero-superblock --force ${device}p1" + log "3. Wipe signatures: wipefs -af $device" exit 1 fi done @@ -238,7 +343,7 @@ ensure_devices_free() { done # Clear MD superblock - mdadm --zero-superblock "${device}"* 2>/dev/null || true + mdadm --zero-superblock --force "${device}"* 2>/dev/null || true # Remove LVM PV if exists pvremove -ff -y "$device" 2>/dev/null || true @@ -263,6 +368,11 @@ main() { # Check if running as root check_root + # If force cleanup flag is set, do aggressive cleanup first + if [ "$FORCE_CLEANUP" = true ]; then + force_cleanup_raid + fi + # Check for existing RAID setup check_existing_raid diff --git a/salt/hypervisor/tools/sbin_jinja/so-kvm-create-volume b/salt/hypervisor/tools/sbin_jinja/so-kvm-create-volume new file mode 100644 index 000000000..2322c3a94 --- /dev/null +++ b/salt/hypervisor/tools/sbin_jinja/so-kvm-create-volume @@ -0,0 +1,586 @@ +#!/usr/bin/python3 + +# Copyright Security Onion Solutions LLC and/or licensed to Security Onion Solutions LLC under one +# or more contributor license agreements. Licensed under the Elastic License 2.0 as shown at +# https://securityonion.net/license; you may not use this file except in compliance with the +# Elastic License 2.0. +# +# Note: Per the Elastic License 2.0, the second limitation states: +# +# "You may not move, change, disable, or circumvent the license key functionality +# in the software, and you may not remove or obscure any functionality in the +# software that is protected by the license key." + +{% if 'vrt' in salt['pillar.get']('features', []) %} + +""" +Script for creating and attaching virtual volumes to KVM virtual machines for NSM storage. +This script provides functionality to create pre-allocated raw disk images and attach them +to VMs as virtio-blk devices for high-performance network security monitoring data storage. + +The script handles the complete volume lifecycle: +1. Volume Creation: Creates pre-allocated raw disk images using qemu-img +2. Volume Attachment: Attaches volumes to VMs as virtio-blk devices +3. VM Management: Stops/starts VMs as needed during the process + +This script is designed to work with Security Onion's virtualization infrastructure and is typically +used during VM provisioning to add dedicated NSM storage volumes. + +**Usage:** + so-kvm-create-volume -v -s [-S] + +**Options:** + -v, --vm Name of the virtual machine to attach the volume to (required). + -s, --size Size of the volume in GB (required, must be a positive integer). + -S, --start Start the VM after volume creation and attachment (optional). + +**Examples:** + +1. **Create and Attach 500GB Volume:** + + ```bash + so-kvm-create-volume -v vm1_sensor -s 500 + ``` + + This command creates and attaches a volume with the following settings: + - VM Name: `vm1_sensor` + - Volume Size: `500` GB + - Volume Path: `/nsm/libvirt/volumes/vm1_sensor-nsm.img` + - Device: `/dev/vdb` (virtio-blk) + - VM remains stopped after attachment + +2. **Create Volume and Start VM:** + + ```bash + so-kvm-create-volume -v vm2_sensor -s 1000 -S + ``` + + This command creates a volume and starts the VM: + - VM Name: `vm2_sensor` + - Volume Size: `1000` GB (1 TB) + - VM is started after volume attachment due to the `-S` flag + +3. **Create Large Volume for Heavy Traffic:** + + ```bash + so-kvm-create-volume -v vm3_sensor -s 2000 -S + ``` + + This command creates a large volume for high-traffic environments: + - VM Name: `vm3_sensor` + - Volume Size: `2000` GB (2 TB) + - VM is started after attachment + +**Notes:** + +- The script automatically stops the VM if it's running before creating and attaching the volume. +- Volumes are created with full pre-allocation for optimal performance. +- Volume files are stored in `/nsm/libvirt/volumes/` with naming pattern `-nsm.img`. +- Volumes are attached as `/dev/vdb` using virtio-blk for high performance. +- The script checks available disk space before creating the volume. +- Ownership is set to `qemu:qemu` with permissions `640`. +- Without the `-S` flag, the VM remains stopped after volume attachment. + +**Description:** + +The `so-kvm-create-volume` script creates and attaches NSM storage volumes using the following process: + +1. **Pre-flight Checks:** + - Validates input parameters (VM name, size) + - Checks available disk space in `/nsm/libvirt/volumes/` + - Ensures sufficient space for the requested volume size + +2. **VM State Management:** + - Connects to the local libvirt daemon + - Stops the VM if it's currently running + - Retrieves current VM configuration + +3. **Volume Creation:** + - Creates volume directory if it doesn't exist + - Uses `qemu-img create` with full pre-allocation + - Sets proper ownership (qemu:qemu) and permissions (640) + - Validates volume creation success + +4. **Volume Attachment:** + - Modifies VM's libvirt XML configuration + - Adds disk element with virtio-blk driver + - Configures cache='none' and io='native' for performance + - Attaches volume as `/dev/vdb` + +5. **VM Redefinition:** + - Applies the new configuration by redefining the VM + - Optionally starts the VM if requested + - Emits deployment status events for monitoring + +6. **Error Handling:** + - Validates all input parameters + - Checks disk space before creation + - Handles volume creation failures + - Handles volume attachment failures + - Provides detailed error messages for troubleshooting + +**Exit Codes:** + +- `0`: Success +- `1`: An error occurred during execution + +**Logging:** + +- Logs are written to `/opt/so/log/hypervisor/so-kvm-create-volume.log` +- Both file and console logging are enabled for real-time monitoring +- Log entries include timestamps and severity levels +- Log prefixes: VOLUME:, VM:, HARDWARE:, SPACE: +- Detailed error messages are logged for troubleshooting +""" + +import argparse +import sys +import os +import libvirt +import logging +import socket +import subprocess +import pwd +import grp +import xml.etree.ElementTree as ET +from io import StringIO +from so_vm_utils import start_vm, stop_vm +from so_logging_utils import setup_logging + +# Get hypervisor name from local hostname +HYPERVISOR = socket.gethostname() + +# Volume storage directory +VOLUME_DIR = '/nsm/libvirt/volumes' + +# Custom exception classes +class InsufficientSpaceError(Exception): + """Raised when there is insufficient disk space for volume creation.""" + pass + +class VolumeCreationError(Exception): + """Raised when volume creation fails.""" + pass + +class VolumeAttachmentError(Exception): + """Raised when volume attachment fails.""" + pass + +# Custom log handler to capture output +class StringIOHandler(logging.Handler): + def __init__(self): + super().__init__() + self.strio = StringIO() + + def emit(self, record): + msg = self.format(record) + self.strio.write(msg + '\n') + + def get_value(self): + return self.strio.getvalue() + +def parse_arguments(): + """Parse command-line arguments.""" + parser = argparse.ArgumentParser(description='Create and attach a virtual volume to a KVM virtual machine for NSM storage.') + parser.add_argument('-v', '--vm', required=True, help='Name of the virtual machine to attach the volume to.') + parser.add_argument('-s', '--size', type=int, required=True, help='Size of the volume in GB (must be a positive integer).') + parser.add_argument('-S', '--start', action='store_true', help='Start the VM after volume creation and attachment.') + args = parser.parse_args() + + # Validate size is positive + if args.size <= 0: + parser.error("Volume size must be a positive integer.") + + return args + +def check_disk_space(size_gb, logger): + """ + Check if there is sufficient disk space available for volume creation. + + Args: + size_gb: Size of the volume in GB + logger: Logger instance + + Raises: + InsufficientSpaceError: If there is not enough disk space + """ + try: + stat = os.statvfs(VOLUME_DIR) + # Available space in bytes + available_bytes = stat.f_bavail * stat.f_frsize + # Required space in bytes (add 10% buffer) + required_bytes = size_gb * 1024 * 1024 * 1024 * 1.1 + + available_gb = available_bytes / (1024 * 1024 * 1024) + required_gb = required_bytes / (1024 * 1024 * 1024) + + logger.info(f"SPACE: Available: {available_gb:.2f} GB, Required: {required_gb:.2f} GB") + + if available_bytes < required_bytes: + raise InsufficientSpaceError( + f"Insufficient disk space. Available: {available_gb:.2f} GB, Required: {required_gb:.2f} GB" + ) + + logger.info(f"SPACE: Sufficient disk space available for {size_gb} GB volume") + + except OSError as e: + logger.error(f"SPACE: Failed to check disk space: {e}") + raise + +def create_volume_file(vm_name, size_gb, logger): + """ + Create a pre-allocated raw disk image for the VM. + + Args: + vm_name: Name of the VM + size_gb: Size of the volume in GB + logger: Logger instance + + Returns: + Path to the created volume file + + Raises: + VolumeCreationError: If volume creation fails + """ + # Define volume path (directory already created in main()) + volume_path = os.path.join(VOLUME_DIR, f"{vm_name}-nsm.img") + + # Check if volume already exists + if os.path.exists(volume_path): + logger.error(f"VOLUME: Volume already exists: {volume_path}") + raise VolumeCreationError(f"Volume already exists: {volume_path}") + + logger.info(f"VOLUME: Creating {size_gb} GB volume at {volume_path}") + + # Create volume using qemu-img with full pre-allocation + try: + cmd = [ + 'qemu-img', 'create', + '-f', 'raw', + '-o', 'preallocation=full', + volume_path, + f"{size_gb}G" + ] + + result = subprocess.run( + cmd, + capture_output=True, + text=True, + check=True + ) + + logger.info(f"VOLUME: Volume created successfully") + if result.stdout: + logger.debug(f"VOLUME: qemu-img output: {result.stdout.strip()}") + + except subprocess.CalledProcessError as e: + logger.error(f"VOLUME: Failed to create volume: {e}") + if e.stderr: + logger.error(f"VOLUME: qemu-img error: {e.stderr.strip()}") + raise VolumeCreationError(f"Failed to create volume: {e}") + + # Set ownership to qemu:qemu + try: + qemu_uid = pwd.getpwnam('qemu').pw_uid + qemu_gid = grp.getgrnam('qemu').gr_gid + os.chown(volume_path, qemu_uid, qemu_gid) + logger.info(f"VOLUME: Set ownership to qemu:qemu") + except (KeyError, OSError) as e: + logger.error(f"VOLUME: Failed to set ownership: {e}") + raise VolumeCreationError(f"Failed to set ownership: {e}") + + # Set permissions to 640 + try: + os.chmod(volume_path, 0o640) + logger.info(f"VOLUME: Set permissions to 640") + except OSError as e: + logger.error(f"VOLUME: Failed to set permissions: {e}") + raise VolumeCreationError(f"Failed to set permissions: {e}") + + # Verify volume was created + if not os.path.exists(volume_path): + logger.error(f"VOLUME: Volume file not found after creation: {volume_path}") + raise VolumeCreationError(f"Volume file not found after creation: {volume_path}") + + volume_size = os.path.getsize(volume_path) + logger.info(f"VOLUME: Volume created: {volume_path} ({volume_size} bytes)") + + return volume_path + +def attach_volume_to_vm(conn, vm_name, volume_path, logger): + """ + Attach the volume to the VM's libvirt XML configuration. + + Args: + conn: Libvirt connection + vm_name: Name of the VM + volume_path: Path to the volume file + logger: Logger instance + + Raises: + VolumeAttachmentError: If volume attachment fails + """ + try: + # Get the VM domain + dom = conn.lookupByName(vm_name) + + # Get the XML description of the VM + xml_desc = dom.XMLDesc() + root = ET.fromstring(xml_desc) + + # Find the devices element + devices_elem = root.find('./devices') + if devices_elem is None: + logger.error("VM: Could not find element in XML") + raise VolumeAttachmentError("Could not find element in VM XML") + + # Log ALL devices with PCI addresses to find conflicts + logger.info("DISK_DEBUG: Examining ALL devices with PCI addresses") + for device in devices_elem: + address = device.find('./address') + if address is not None and address.get('type') == 'pci': + bus = address.get('bus', 'unknown') + slot = address.get('slot', 'unknown') + function = address.get('function', 'unknown') + logger.info(f"DISK_DEBUG: Device {device.tag}: bus={bus}, slot={slot}, function={function}") + + # Log existing disk configuration for debugging + logger.info("DISK_DEBUG: Examining existing disk configuration") + existing_disks = devices_elem.findall('./disk') + for idx, disk in enumerate(existing_disks): + target = disk.find('./target') + source = disk.find('./source') + address = disk.find('./address') + + dev_name = target.get('dev') if target is not None else 'unknown' + source_file = source.get('file') if source is not None else 'unknown' + + if address is not None: + slot = address.get('slot', 'unknown') + bus = address.get('bus', 'unknown') + logger.info(f"DISK_DEBUG: Disk {idx}: dev={dev_name}, source={source_file}, slot={slot}, bus={bus}") + else: + logger.info(f"DISK_DEBUG: Disk {idx}: dev={dev_name}, source={source_file}, no address element") + + # Check if vdb already exists + for disk in devices_elem.findall('./disk'): + target = disk.find('./target') + if target is not None and target.get('dev') == 'vdb': + logger.error("VM: Device vdb already exists in VM configuration") + raise VolumeAttachmentError("Device vdb already exists in VM configuration") + + logger.info(f"VM: Attaching volume to {vm_name} as /dev/vdb") + + # Create disk element + disk_elem = ET.SubElement(devices_elem, 'disk', attrib={ + 'type': 'file', + 'device': 'disk' + }) + + # Add driver element + ET.SubElement(disk_elem, 'driver', attrib={ + 'name': 'qemu', + 'type': 'raw', + 'cache': 'none', + 'io': 'native' + }) + + # Add source element + ET.SubElement(disk_elem, 'source', attrib={ + 'file': volume_path + }) + + # Add target element + ET.SubElement(disk_elem, 'target', attrib={ + 'dev': 'vdb', + 'bus': 'virtio' + }) + + # Add address element + # Use bus 0x07 with slot 0x00 to ensure NSM volume appears after OS disk (which is on bus 0x04) + # Bus 0x05 is used by memballoon, bus 0x06 is used by rng device + # Libvirt requires slot <= 0 for non-zero buses + # This ensures vda = OS disk, vdb = NSM volume + ET.SubElement(disk_elem, 'address', attrib={ + 'type': 'pci', + 'domain': '0x0000', + 'bus': '0x07', + 'slot': '0x00', + 'function': '0x0' + }) + + logger.info(f"HARDWARE: Added disk configuration for vdb") + + # Log disk ordering after adding new disk + logger.info("DISK_DEBUG: Disk configuration after adding NSM volume") + all_disks = devices_elem.findall('./disk') + for idx, disk in enumerate(all_disks): + target = disk.find('./target') + source = disk.find('./source') + address = disk.find('./address') + + dev_name = target.get('dev') if target is not None else 'unknown' + source_file = source.get('file') if source is not None else 'unknown' + + if address is not None: + slot = address.get('slot', 'unknown') + bus = address.get('bus', 'unknown') + logger.info(f"DISK_DEBUG: Disk {idx}: dev={dev_name}, source={source_file}, slot={slot}, bus={bus}") + else: + logger.info(f"DISK_DEBUG: Disk {idx}: dev={dev_name}, source={source_file}, no address element") + + # Convert XML back to string + new_xml_desc = ET.tostring(root, encoding='unicode') + + # Redefine the VM with the new XML + conn.defineXML(new_xml_desc) + logger.info(f"VM: VM redefined with volume attached") + + except libvirt.libvirtError as e: + logger.error(f"VM: Failed to attach volume: {e}") + raise VolumeAttachmentError(f"Failed to attach volume: {e}") + except Exception as e: + logger.error(f"VM: Failed to attach volume: {e}") + raise VolumeAttachmentError(f"Failed to attach volume: {e}") + +def emit_status_event(vm_name, status): + """ + Emit a deployment status event. + + Args: + vm_name: Name of the VM + status: Status message + """ + try: + subprocess.run([ + 'so-salt-emit-vm-deployment-status-event', + '-v', vm_name, + '-H', HYPERVISOR, + '-s', status + ], check=True) + except subprocess.CalledProcessError as e: + # Don't fail the entire operation if status event fails + pass + +def main(): + """Main function to orchestrate volume creation and attachment.""" + # Set up logging using the so_logging_utils library + string_handler = StringIOHandler() + string_handler.setFormatter(logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')) + logger = setup_logging( + logger_name='so-kvm-create-volume', + log_file_path='/opt/so/log/hypervisor/so-kvm-create-volume.log', + log_level=logging.INFO, + format_str='%(asctime)s - %(levelname)s - %(message)s' + ) + logger.addHandler(string_handler) + + vm_name = None + + try: + # Parse arguments + args = parse_arguments() + + vm_name = args.vm + size_gb = args.size + start_vm_flag = args.start + + logger.info(f"VOLUME: Starting volume creation for VM '{vm_name}' with size {size_gb} GB") + + # Emit start status event + emit_status_event(vm_name, 'Volume Creation') + + # Ensure volume directory exists before checking disk space + try: + os.makedirs(VOLUME_DIR, mode=0o754, exist_ok=True) + qemu_uid = pwd.getpwnam('qemu').pw_uid + qemu_gid = grp.getgrnam('qemu').gr_gid + os.chown(VOLUME_DIR, qemu_uid, qemu_gid) + logger.debug(f"VOLUME: Ensured volume directory exists: {VOLUME_DIR}") + except Exception as e: + logger.error(f"VOLUME: Failed to create volume directory: {e}") + emit_status_event(vm_name, 'Volume Configuration Failed') + sys.exit(1) + + # Check disk space + check_disk_space(size_gb, logger) + + # Connect to libvirt + try: + conn = libvirt.open(None) + logger.info("VM: Connected to libvirt") + except libvirt.libvirtError as e: + logger.error(f"VM: Failed to open connection to libvirt: {e}") + emit_status_event(vm_name, 'Volume Configuration Failed') + sys.exit(1) + + # Stop VM if running + dom = stop_vm(conn, vm_name, logger) + + # Create volume file + volume_path = create_volume_file(vm_name, size_gb, logger) + + # Attach volume to VM + attach_volume_to_vm(conn, vm_name, volume_path, logger) + + # Start VM if -S or --start argument is provided + if start_vm_flag: + dom = conn.lookupByName(vm_name) + start_vm(dom, logger) + logger.info(f"VM: VM '{vm_name}' started successfully") + else: + logger.info("VM: Start flag not provided; VM will remain stopped") + + # Close connection + conn.close() + + # Emit success status event + emit_status_event(vm_name, 'Volume Configuration') + + logger.info(f"VOLUME: Volume creation and attachment completed successfully for VM '{vm_name}'") + + except KeyboardInterrupt: + error_msg = "Operation cancelled by user" + logger.error(error_msg) + if vm_name: + emit_status_event(vm_name, 'Volume Configuration Failed') + sys.exit(1) + + except InsufficientSpaceError as e: + error_msg = f"SPACE: {str(e)}" + logger.error(error_msg) + if vm_name: + emit_status_event(vm_name, 'Volume Configuration Failed') + sys.exit(1) + + except VolumeCreationError as e: + error_msg = f"VOLUME: {str(e)}" + logger.error(error_msg) + if vm_name: + emit_status_event(vm_name, 'Volume Configuration Failed') + sys.exit(1) + + except VolumeAttachmentError as e: + error_msg = f"VM: {str(e)}" + logger.error(error_msg) + if vm_name: + emit_status_event(vm_name, 'Volume Configuration Failed') + sys.exit(1) + + except Exception as e: + error_msg = f"An error occurred: {str(e)}" + logger.error(error_msg) + if vm_name: + emit_status_event(vm_name, 'Volume Configuration Failed') + sys.exit(1) + +if __name__ == '__main__': + main() + +{%- else -%} + +echo "Hypervisor nodes are a feature supported only for customers with a valid license. \ + Contact Security Onion Solutions, LLC via our website at https://securityonionsolutions.com \ + for more information about purchasing a license to enable this feature." + +{% endif -%} diff --git a/salt/influxdb/templates/dashboard-security_onion_performance.json b/salt/influxdb/templates/dashboard-security_onion_performance.json index 835aedb03..1e66b2b40 100644 --- a/salt/influxdb/templates/dashboard-security_onion_performance.json +++ b/salt/influxdb/templates/dashboard-security_onion_performance.json @@ -1 +1 @@ -[{"apiVersion":"influxdata.com/v2alpha1","kind":"Dashboard","metadata":{"name":"vivid-wilson-002001"},"spec":{"charts":[{"colors":[{"id":"base","name":"laser","type":"text","hex":"#00C9FF"}],"decimalPlaces":1,"height":2,"kind":"Single_Stat","name":"Uptime","queries":[{"query":"hostFilter = (tables=<-) =>\n if v.Host != \"(All)\" then\n tables |> filter(fn: (r) => r[\"host\"] == v.Host)\n else\n tables\n\nfrom(bucket: \"telegraf/so_short_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"system\")\n |> filter(fn: (r) => r[\"_field\"] == \"uptime\")\n |> hostFilter()\n |> map(fn: (r) => ({r with _value: r._value / (24 * 60 * 60)}))\n |> group(columns: [\"host\"])\n |> last()\n |> lowestMin(n:1)"}],"staticLegend":{},"suffix":" days","width":1},{"colors":[{"id":"base","name":"laser","type":"text","hex":"#00C9FF"},{"id":"z83MTSufTrlrCoEPiBXda","name":"ruby","type":"text","hex":"#BF3D5E","value":1}],"decimalPlaces":0,"height":2,"kind":"Single_Stat","name":"Critical Alarms","queries":[{"query":"from(bucket: \"_monitoring\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"statuses\")\n |> filter(fn: (r) => r[\"_field\"] == \"_message\")\n |> group(columns: [\"_check_id\"])\n |> sort(columns: [\"_time\"])\n |> last()\n |> group()\n |> filter(fn: (r) => r[\"_level\"] == \"crit\")\n |> count()"}],"staticLegend":{},"suffix":" ","width":1,"yPos":2},{"colors":[{"id":"base","name":"rainforest","type":"text","hex":"#4ED8A0"},{"id":"QCTYWuGuHkikYFsZSKMzQ","name":"rainforest","type":"text","hex":"#4ED8A0"},{"id":"QdpMyTRBb0LJ56-P5wfAW","name":"laser","type":"text","hex":"#00C9FF","value":1},{"id":"VQGwCoMrxZyP8asiOW5Cq","name":"tiger","type":"text","hex":"#F48D38","value":2},{"id":"zSO9QkesSIxrU_ntCBx2i","name":"ruby","type":"text","hex":"#BF3D5E","value":3}],"fieldOptions":[{"fieldName":"_time","visible":true},{"displayName":"Alarm","fieldName":"_check_name","visible":true},{"displayName":"Severity","fieldName":"_value","visible":true},{"displayName":"Status","fieldName":"_level","visible":true}],"height":6,"kind":"Table","name":"Alarm Status","queries":[{"query":"from(bucket: \"_monitoring\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"statuses\")\n |> filter(fn: (r) => r[\"_field\"] == \"_message\")\n |> drop(columns: [\"_value\"])\n |> duplicate(column: \"_level\", as: \"_value\")\n |> map(fn: (r) => ({ r with _value: if r._value == \"ok\" then 0 else if r._value == \"info\" then 1 else if r._value == \"warn\" then 2 else 3 }))\n |> group(columns: [\"_check_id\"])\n |> sort(columns: [\"_time\"])\n |> last()\n |> group()\n |> keep(columns: [\"_check_name\",\"_level\",\"_value\"])"}],"staticLegend":{},"tableOptions":{"sortBy":"_check_name","verticalTimeAxis":true},"timeFormat":"YYYY-MM-DD HH:mm:ss","width":3,"yPos":4},{"axes":[{"base":"10","name":"x","scale":"linear"},{"base":"10","name":"y","scale":"linear","suffix":"B"}],"colorizeRows":true,"colors":[{"id":"3PVw3hQuZUzyar7Js3mMH","name":"Ectoplasm","type":"scale","hex":"#DA6FF1"},{"id":"O34ux-D8Xq_1-eeWRyYYH","name":"Ectoplasm","type":"scale","hex":"#00717A"},{"id":"P04RoKOHBdLdvfrfFbn0F","name":"Ectoplasm","type":"scale","hex":"#ACFF76"}],"geom":"line","height":4,"hoverDimension":"auto","kind":"Xy","legendColorizeRows":true,"legendOpacity":1,"legendOrientationThreshold":100000000,"name":"Elasticsearch Storage Size","opacity":1,"orientationThreshold":100000000,"position":"overlaid","queries":[{"query":"import \"join\"\n\nhostFilter = (tables=<-) =>\n if v.Host != \"(All)\" then\n tables |> filter(fn: (r) => r[\"host\"] == v.Host)\n else\n tables\n \nfrom(bucket: \"telegraf/so_short_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"elasticsearch_indices\")\n |> filter(fn: (r) => r[\"_field\"] == \"store_size_in_bytes\")\n |> filter(fn: (r) => r[\"host\"] == r[\"node_name\"])\n |> hostFilter()\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)\n |> group(columns: [\"_field\",\"host\"])\n |> yield(name: \"mean\")"},{"query":"import \"join\"\n\nhostFilter = (tables=<-) =>\n if v.Host != \"(All)\" then\n tables |> filter(fn: (r) => r[\"host\"] == v.Host)\n else\n tables\n\nfrom(bucket: \"telegraf/so_long_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"elasticsearch_indices\")\n |> filter(fn: (r) => r[\"_field\"] == \"store_size_in_bytes\")\n |> filter(fn: (r) => r[\"host\"] == r[\"node_name\"])\n |> hostFilter()\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)\n |> group(columns: [\"_field\",\"host\"])\n |> yield(name: \"Trend\")"}],"staticLegend":{"colorizeRows":true,"opacity":1,"orientationThreshold":100000000,"widthRatio":1},"width":4,"widthRatio":1,"xCol":"_time","yCol":"_value","yPos":10},{"axes":[{"base":"10","name":"x","scale":"linear"},{"base":"10","name":"y","scale":"linear","suffix":"B"}],"colorizeRows":true,"colors":[{"id":"3PVw3hQuZUzyar7Js3mMH","name":"Ectoplasm","type":"scale","hex":"#DA6FF1"},{"id":"O34ux-D8Xq_1-eeWRyYYH","name":"Ectoplasm","type":"scale","hex":"#00717A"},{"id":"P04RoKOHBdLdvfrfFbn0F","name":"Ectoplasm","type":"scale","hex":"#ACFF76"}],"geom":"line","height":4,"hoverDimension":"auto","kind":"Xy","legendColorizeRows":true,"legendOpacity":1,"legendOrientationThreshold":100000000,"name":"InfluxDB Size","opacity":1,"orientationThreshold":100000000,"position":"overlaid","queries":[{"query":"import \"join\"\n\nfrom(bucket: \"telegraf/so_short_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"influxsize\")\n |> filter(fn: (r) => r[\"_field\"] == \"kbytes\")\n |> map(fn: (r) => ({r with \"_value\": r._value * 1000.0}))\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)\n |> group(columns: [\"_field\",\"host\"])\n |> yield(name: \"mean\")"},{"query":"import \"join\"\n\nfrom(bucket: \"telegraf/so_long_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"influxsize\")\n |> filter(fn: (r) => r[\"_field\"] == \"kbytes\")\n |> map(fn: (r) => ({r with \"_value\": r._value * 1000.0}))\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)\n |> group(columns: [\"_field\",\"host\"])\n |> yield(name: \"Trend\")"}],"staticLegend":{"colorizeRows":true,"opacity":1,"orientationThreshold":100000000,"widthRatio":1},"width":4,"widthRatio":1,"xCol":"_time","yCol":"_value","yPos":14},{"axes":[{"base":"10","name":"x","scale":"linear"},{"name":"y","scale":"linear","suffix":" days"}],"colorizeRows":true,"colors":[{"id":"sW2GqpGAsGB5Adx16jKjp","name":"Nineteen Eighty Four","type":"scale","hex":"#31C0F6"},{"id":"TsdXuXwdI5Npi9S8L4f-i","name":"Nineteen Eighty Four","type":"scale","hex":"#A500A5"},{"id":"OGL29-SUbJ6FyQb0JzbaD","name":"Nineteen Eighty Four","type":"scale","hex":"#FF7E27"}],"geom":"line","height":4,"hoverDimension":"auto","kind":"Xy","legendColorizeRows":true,"legendOpacity":1,"legendOrientationThreshold":100000000,"name":"System Uptime","opacity":1,"orientationThreshold":100000000,"position":"overlaid","queries":[{"query":"hostFilter = (tables=<-) =>\n if v.Host != \"(All)\" then\n tables |> filter(fn: (r) => r[\"host\"] == v.Host)\n else\n tables\n\nroleFilter = (tables=<-) =>\n if v.Role != \"(All)\" then\n tables |> filter(fn: (r) => r[\"role\"] == v.Role)\n else\n tables\n\nfrom(bucket: \"telegraf/so_short_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"system\")\n |> filter(fn: (r) => r[\"_field\"] == \"uptime\")\n |> hostFilter()\n |> roleFilter()\n |> group(columns: [\"host\", \"role\"])\n |> map(fn: (r) => ({r with _value: float(v: r._value) / float(v: 24 * 60 * 60)}))\n |> yield(name: \"last\")"},{"query":"hostFilter = (tables=<-) =>\n if v.Host != \"(All)\" then\n tables |> filter(fn: (r) => r[\"host\"] == v.Host)\n else\n tables\n\nroleFilter = (tables=<-) =>\n if v.Role != \"(All)\" then\n tables |> filter(fn: (r) => r[\"role\"] == v.Role)\n else\n tables\n\nfrom(bucket: \"telegraf/so_long_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"system\")\n |> filter(fn: (r) => r[\"_field\"] == \"uptime\")\n |> hostFilter()\n |> roleFilter()\n |> group(columns: [\"host\", \"role\"])\n |> map(fn: (r) => ({r with _value: float(v: r._value) / float(v: 24 * 60 * 60)}))\n |> yield(name: \"Trend\")"}],"shade":true,"staticLegend":{"colorizeRows":true,"opacity":1,"orientationThreshold":100000000,"widthRatio":1},"width":4,"widthRatio":1,"xCol":"_time","yCol":"_value","yPos":18},{"axes":[{"base":"10","name":"x","scale":"linear"},{"base":"10","name":"y","scale":"linear"}],"colorizeRows":true,"colors":[{"id":"lQ75rvTyd2Lq5pZjzy6LB","name":"Nineteen Eighty Four","type":"scale","hex":"#31C0F6"},{"id":"KLfpRZtiEnU2GxjPtrrzQ","name":"Nineteen Eighty Four","type":"scale","hex":"#A500A5"},{"id":"1kLynwKxvJ3B5IeJnrBqp","name":"Nineteen Eighty Four","type":"scale","hex":"#FF7E27"}],"geom":"line","height":4,"hoverDimension":"auto","kind":"Xy","legendColorizeRows":true,"legendOpacity":1,"legendOrientationThreshold":100000000,"name":"Kafka EPS","opacity":1,"orientationThreshold":100000000,"position":"overlaid","queries":[{"query":"controllerHosts = from(bucket: \"telegraf/so_short_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"kafka_controller\")\n |> filter(fn: (r) => r[\"_field\"] == \"ActiveControllerCount.Value\")\n |> filter(fn: (r) => r[\"_value\"] == 1)\n |> keep(columns: [\"host\"])\n |> distinct(column: \"host\")\n |> map(fn: (r) => ({r with _value: r.host}))\n |> keep(columns: [\"_value\"])\n\ncontrollerHostNames = controllerHosts |> findColumn(fn: (key) => true, column: \"_value\")\n\nfrom(bucket: \"telegraf/so_short_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"kafka_topics\")\n |> filter(fn: (r) => r[\"_field\"] == \"MessagesInPerSec.Count\")\n |> filter(fn: (r) => not contains(value: r.host, set: controllerHostNames))\n |> derivative(unit: 1s, nonNegative: true)\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)\n |> group(columns: [\"_field\", \"host\", \"role\"])\n |> yield(name: \"mean\")"},{"query":"controllerHosts = from(bucket: \"telegraf/so_short_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"kafka_controller\")\n |> filter(fn: (r) => r[\"_field\"] == \"ActiveControllerCount.Value\")\n |> filter(fn: (r) => r[\"_value\"] == 1)\n |> keep(columns: [\"host\"])\n |> distinct(column: \"host\")\n |> map(fn: (r) => ({r with _value: r.host}))\n |> keep(columns: [\"_value\"])\n\ncontrollerHostNames = controllerHosts |> findColumn(fn: (key) => true, column: \"_value\")\n\nfrom(bucket: \"telegraf/so_long_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"kafka_topics\")\n |> filter(fn: (r) => r[\"_field\"] == \"MessagesInPerSec.Count\")\n |> filter(fn: (r) => not contains(value: r.host, set: controllerHostNames))\n |> derivative(unit: 1s, nonNegative: true)\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)\n |> group(columns: [\"_field\", \"host\", \"role\"])\n |> yield(name: \"trend\")"}],"staticLegend":{"colorizeRows":true,"opacity":1,"orientationThreshold":100000000,"widthRatio":1},"width":4,"widthRatio":1,"xCol":"_time","yCol":"_value","yPos":22},{"axes":[{"base":"10","name":"x","scale":"linear"},{"name":"y","scale":"linear","suffix":"%"}],"colorizeRows":true,"colors":[{"id":"sW2GqpGAsGB5Adx16jKjp","name":"Nineteen Eighty Four","type":"scale","hex":"#31C0F6"},{"id":"TsdXuXwdI5Npi9S8L4f-i","name":"Nineteen Eighty Four","type":"scale","hex":"#A500A5"},{"id":"OGL29-SUbJ6FyQb0JzbaD","name":"Nineteen Eighty Four","type":"scale","hex":"#FF7E27"}],"geom":"line","height":4,"hoverDimension":"auto","kind":"Xy","legendColorizeRows":true,"legendOpacity":1,"legendOrientationThreshold":100000000,"name":"System CPU Usage","opacity":1,"orientationThreshold":100000000,"position":"overlaid","queries":[{"query":"hostFilter = (tables=<-) =>\n if v.Host != \"(All)\" then\n tables |> filter(fn: (r) => r[\"host\"] == v.Host)\n else\n tables\n\nroleFilter = (tables=<-) =>\n if v.Role != \"(All)\" then\n tables |> filter(fn: (r) => r[\"role\"] == v.Role)\n else\n tables\n\nfrom(bucket: \"telegraf/so_short_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"cpu\")\n |> filter(fn: (r) => r[\"_field\"] == \"usage_idle\")\n |> filter(fn: (r) => r[\"cpu\"] == \"cpu-total\")\n |> hostFilter()\n |> roleFilter()\n |> group(columns: [\"_field\", \"host\", \"role\"])\n |> map(fn: (r) => ({r with _value: r._value * -1.0 + 100.0}))\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: true)\n |> yield(name: \"mean\")"},{"query":"hostFilter = (tables=<-) =>\n if v.Host != \"(All)\" then\n tables |> filter(fn: (r) => r[\"host\"] == v.Host)\n else\n tables\n\nroleFilter = (tables=<-) =>\n if v.Role != \"(All)\" then\n tables |> filter(fn: (r) => r[\"role\"] == v.Role)\n else\n tables\n\nfrom(bucket: \"telegraf/so_long_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"cpu\")\n |> filter(fn: (r) => r[\"_field\"] == \"usage_idle\")\n |> filter(fn: (r) => r[\"cpu\"] == \"cpu-total\")\n |> hostFilter()\n |> roleFilter()\n |> group(columns: [\"_field\",\"host\", \"role\"])\n |> map(fn: (r) => ({r with _value: r._value * -1.0 + 100.0}))\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: true)\n |> yield(name: \"Trend\")"}],"staticLegend":{"colorizeRows":true,"opacity":1,"orientationThreshold":100000000,"widthRatio":1},"width":4,"widthRatio":1,"xCol":"_time","yCol":"_value","yPos":26},{"axes":[{"base":"10","name":"x","scale":"linear"},{"name":"y","scale":"linear","suffix":"%"}],"colorizeRows":true,"colors":[{"id":"QDwChKZWuQV0BaJcEeSam","name":"Atlantis","type":"scale","hex":"#74D495"},{"id":"ThD0WTqKHltQEVlq9mo6K","name":"Atlantis","type":"scale","hex":"#3F3FBA"},{"id":"FBHYZiwDLKyQK3eRfUD-0","name":"Atlantis","type":"scale","hex":"#FF4D9E"}],"geom":"line","height":4,"hoverDimension":"auto","kind":"Xy","legendColorizeRows":true,"legendOpacity":1,"legendOrientationThreshold":100000000,"name":"System Memory Usage","opacity":1,"orientationThreshold":100000000,"position":"overlaid","queries":[{"query":"hostFilter = (tables=<-) =>\n if v.Host != \"(All)\" then\n tables |> filter(fn: (r) => r[\"host\"] == v.Host)\n else\n tables\n\nroleFilter = (tables=<-) =>\n if v.Role != \"(All)\" then\n tables |> filter(fn: (r) => r[\"role\"] == v.Role)\n else\n tables\n\nfrom(bucket: \"telegraf/so_short_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"mem\")\n |> filter(fn: (r) => r[\"_field\"] == \"used_percent\")\n |> hostFilter()\n |> roleFilter()\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)\n |> group(columns: [\"_field\", \"host\", \"role\"])\n |> yield(name: \"mean\")"},{"query":"hostFilter = (tables=<-) =>\n if v.Host != \"(All)\" then\n tables |> filter(fn: (r) => r[\"host\"] == v.Host)\n else\n tables\n\nroleFilter = (tables=<-) =>\n if v.Role != \"(All)\" then\n tables |> filter(fn: (r) => r[\"role\"] == v.Role)\n else\n tables\n\nfrom(bucket: \"telegraf/so_long_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"mem\")\n |> filter(fn: (r) => r[\"_field\"] == \"used_percent\")\n |> hostFilter()\n |> roleFilter()\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)\n |> group(columns: [\"_field\", \"host\", \"role\"])\n |> yield(name: \"Trend\")"}],"staticLegend":{"colorizeRows":true,"opacity":1,"orientationThreshold":100000000,"widthRatio":1},"width":4,"widthRatio":1,"xCol":"_time","yCol":"_value","yPos":30},{"axes":[{"base":"10","name":"x","scale":"linear"},{"base":"10","name":"y","scale":"linear","suffix":"b/s"}],"colorizeRows":true,"colors":[{"id":"TtgHQAXNep94KBgtu48C_","name":"Cthulhu","type":"scale","hex":"#FDC44F"},{"id":"_IuzkORho_8QXTE6vMllv","name":"Cthulhu","type":"scale","hex":"#007C76"},{"id":"bUszW_YI_9oColDbLNQ-d","name":"Cthulhu","type":"scale","hex":"#8983FF"}],"geom":"line","height":4,"heightRatio":0.18482490272373542,"hoverDimension":"auto","kind":"Xy","legendColorizeRows":true,"legendOpacity":1,"legendOrientationThreshold":100000000,"name":"Monitor Interface Traffic - Inbound","opacity":1,"orientationThreshold":100000000,"position":"overlaid","queries":[{"query":"import \"join\"\n\nhostFilter = (tables=<-) =>\n if v.Host != \"(All)\" then\n tables |> filter(fn: (r) => r[\"host\"] == v.Host)\n else\n tables\n\nroleFilter = (tables=<-) =>\n if v.Role != \"(All)\" then\n tables |> filter(fn: (r) => r[\"role\"] == v.Role)\n else\n tables\n \nmanints = from(bucket: \"telegraf/so_short_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"node_config\")\n |> hostFilter()\n |> filter(fn: (r) => r[\"_field\"] == \"monint\")\n |> distinct()\n |> group(columns: [\"host\"])\n\ntraffic = from(bucket: \"telegraf/so_short_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"net\")\n |> filter(fn: (r) => r[\"_field\"] == \"bytes_recv\")\n |> hostFilter()\n |> roleFilter()\n |> derivative(unit: 1s, nonNegative: true, columns: [\"_value\"], timeColumn: \"_time\")\n |> map(fn: (r) => ({r with \"_value\": r._value * 8.0}))\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)\n |> group(columns: [\"host\"])\n\njoin.inner(left: traffic, right: manints,\n on: (l,r) => l.interface == r._value,\n as: (l, r) => ({l with _value: l._value, result: \"bytes_recv\"}))"},{"query":"import \"join\"\n\nhostFilter = (tables=<-) =>\n if v.Host != \"(All)\" then\n tables |> filter(fn: (r) => r[\"host\"] == v.Host)\n else\n tables\n\nroleFilter = (tables=<-) =>\n if v.Role != \"(All)\" then\n tables |> filter(fn: (r) => r[\"role\"] == v.Role)\n else\n tables\n \nmanints = from(bucket: \"telegraf/so_short_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"node_config\")\n |> hostFilter()\n |> filter(fn: (r) => r[\"_field\"] == \"monint\")\n |> distinct()\n |> group(columns: [\"host\"])\n\ntraffic = from(bucket: \"telegraf/so_long_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"net\")\n |> filter(fn: (r) => r[\"_field\"] == \"bytes_recv\")\n |> hostFilter()\n |> roleFilter()\n |> derivative(unit: 1s, nonNegative: true, columns: [\"_value\"], timeColumn: \"_time\")\n |> map(fn: (r) => ({r with \"_value\": r._value * 8.0}))\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)\n |> group(columns: [\"host\"])\n\njoin.inner(left: traffic, right: manints,\n on: (l,r) => l.interface == r._value,\n as: (l, r) => ({l with _value: l._value, result: \"Trend\"}))"}],"staticLegend":{"colorizeRows":true,"heightRatio":0.18482490272373542,"opacity":1,"orientationThreshold":100000000,"widthRatio":1},"width":4,"widthRatio":1,"xCol":"_time","yCol":"_value","yPos":34},{"axes":[{"base":"10","name":"x","scale":"linear"},{"base":"10","name":"y","scale":"linear","suffix":"b/s"}],"colorizeRows":true,"colors":[{"id":"TtgHQAXNep94KBgtu48C_","name":"Cthulhu","type":"scale","hex":"#FDC44F"},{"id":"_IuzkORho_8QXTE6vMllv","name":"Cthulhu","type":"scale","hex":"#007C76"},{"id":"bUszW_YI_9oColDbLNQ-d","name":"Cthulhu","type":"scale","hex":"#8983FF"}],"geom":"line","height":4,"heightRatio":0.18482490272373542,"hoverDimension":"auto","kind":"Xy","legendColorizeRows":true,"legendOpacity":1,"legendOrientationThreshold":100000000,"name":"Management Interface Traffic - Inbound","opacity":1,"orientationThreshold":100000000,"position":"overlaid","queries":[{"query":"import \"join\"\n\nhostFilter = (tables=<-) =>\n if v.Host != \"(All)\" then\n tables |> filter(fn: (r) => r[\"host\"] == v.Host)\n else\n tables\n\nroleFilter = (tables=<-) =>\n if v.Role != \"(All)\" then\n tables |> filter(fn: (r) => r[\"role\"] == v.Role)\n else\n tables\n\nmanints = from(bucket: \"telegraf/so_short_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"node_config\")\n |> hostFilter()\n |> filter(fn: (r) => r[\"_field\"] == \"manint\")\n |> distinct()\n |> group(columns: [\"host\"])\n\ntraffic = from(bucket: \"telegraf/so_short_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"net\")\n |> filter(fn: (r) => r[\"_field\"] == \"bytes_recv\")\n |> hostFilter()\n |> roleFilter()\n |> derivative(unit: 1s, nonNegative: true, columns: [\"_value\"], timeColumn: \"_time\")\n |> map(fn: (r) => ({r with \"_value\": r._value * 8.0}))\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)\n |> group(columns: [\"host\"])\n\njoin.inner(left: traffic, right: manints,\n on: (l,r) => l.interface == r._value,\n as: (l, r) => ({l with _value: l._value, result: \"bytes_recv\"}))"},{"query":"import \"join\"\n\nhostFilter = (tables=<-) =>\n if v.Host != \"(All)\" then\n tables |> filter(fn: (r) => r[\"host\"] == v.Host)\n else\n tables\n\nroleFilter = (tables=<-) =>\n if v.Role != \"(All)\" then\n tables |> filter(fn: (r) => r[\"role\"] == v.Role)\n else\n tables\n\nmanints = from(bucket: \"telegraf/so_short_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"node_config\")\n |> hostFilter()\n |> filter(fn: (r) => r[\"_field\"] == \"manint\")\n |> distinct()\n |> group(columns: [\"host\"])\n\ntraffic = from(bucket: \"telegraf/so_long_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"net\")\n |> filter(fn: (r) => r[\"_field\"] == \"bytes_recv\")\n |> hostFilter()\n |> roleFilter()\n |> derivative(unit: 1s, nonNegative: true, columns: [\"_value\"], timeColumn: \"_time\")\n |> map(fn: (r) => ({r with \"_value\": r._value * 8.0}))\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)\n |> group(columns: [\"host\"])\n\njoin.inner(left: traffic, right: manints,\n on: (l,r) => l.interface == r._value,\n as: (l, r) => ({l with _value: l._value, result: \"Trend\"}))"}],"staticLegend":{"colorizeRows":true,"heightRatio":0.18482490272373542,"opacity":1,"orientationThreshold":100000000,"widthRatio":1},"width":6,"widthRatio":1,"xCol":"_time","yCol":"_value","yPos":38},{"axes":[{"base":"10","name":"x","scale":"linear"},{"name":"y","scale":"linear","suffix":"%"}],"colorizeRows":true,"colors":[{"id":"TtgHQAXNep94KBgtu48C_","name":"Cthulhu","type":"scale","hex":"#FDC44F"},{"id":"_IuzkORho_8QXTE6vMllv","name":"Cthulhu","type":"scale","hex":"#007C76"},{"id":"bUszW_YI_9oColDbLNQ-d","name":"Cthulhu","type":"scale","hex":"#8983FF"}],"geom":"line","height":4,"hoverDimension":"auto","kind":"Xy","legendColorizeRows":true,"legendOpacity":1,"legendOrientationThreshold":100000000,"name":"Stenographer Packet Loss","opacity":1,"orientationThreshold":100000000,"position":"overlaid","queries":[{"query":"hostFilter = (tables=<-) =>\n if v.Host != \"(All)\" then\n tables |> filter(fn: (r) => r[\"host\"] == v.Host)\n else\n tables\n\nroleFilter = (tables=<-) =>\n if v.Role != \"(All)\" then\n tables |> filter(fn: (r) => r[\"role\"] == v.Role)\n else\n tables\n\nfrom(bucket: \"telegraf/so_short_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"stenodrop\")\n |> filter(fn: (r) => r[\"_field\"] == \"drop\")\n |> hostFilter()\n |> roleFilter()\n |> group(columns: [\"_field\", \"host\", \"role\"])\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)\n |> yield(name: \"mean\")"},{"query":"hostFilter = (tables=<-) =>\n if v.Host != \"(All)\" then\n tables |> filter(fn: (r) => r[\"host\"] == v.Host)\n else\n tables\n\nroleFilter = (tables=<-) =>\n if v.Role != \"(All)\" then\n tables |> filter(fn: (r) => r[\"role\"] == v.Role)\n else\n tables\n\nfrom(bucket: \"telegraf/so_long_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"stenodrop\")\n |> filter(fn: (r) => r[\"_field\"] == \"drop\")\n |> hostFilter()\n |> roleFilter()\n |> group(columns: [\"_field\", \"host\", \"role\"])\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)\n |> yield(name: \"Trend\")"}],"staticLegend":{"colorizeRows":true,"opacity":1,"orientationThreshold":100000000,"widthRatio":1},"width":3,"widthRatio":1,"xCol":"_time","yCol":"_value","yPos":42},{"axes":[{"base":"10","name":"x","scale":"linear"},{"name":"y","scale":"linear","suffix":"%"}],"colorizeRows":true,"colors":[{"id":"3PVw3hQuZUzyar7Js3mMH","name":"Ectoplasm","type":"scale","hex":"#DA6FF1"},{"id":"O34ux-D8Xq_1-eeWRyYYH","name":"Ectoplasm","type":"scale","hex":"#00717A"},{"id":"P04RoKOHBdLdvfrfFbn0F","name":"Ectoplasm","type":"scale","hex":"#ACFF76"}],"geom":"line","height":4,"hoverDimension":"auto","kind":"Xy","legendColorizeRows":true,"legendOpacity":1,"legendOrientationThreshold":100000000,"name":"Disk Usage /","opacity":1,"orientationThreshold":100000000,"position":"overlaid","queries":[{"query":"hostFilter = (tables=<-) =>\n if v.Host != \"(All)\" then\n tables |> filter(fn: (r) => r[\"host\"] == v.Host)\n else\n tables\n\nroleFilter = (tables=<-) =>\n if v.Role != \"(All)\" then\n tables |> filter(fn: (r) => r[\"role\"] == v.Role)\n else\n tables\n\nfrom(bucket: \"telegraf/so_short_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"disk\")\n |> filter(fn: (r) => r[\"_field\"] == \"used_percent\")\n |> filter(fn: (r) => r[\"path\"] == \"/\")\n |> hostFilter()\n |> roleFilter()\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)\n |> group(columns: [\"_field\", \"host\", \"role\"])\n |> yield(name: \"mean\")"},{"query":"hostFilter = (tables=<-) =>\n if v.Host != \"(All)\" then\n tables |> filter(fn: (r) => r[\"host\"] == v.Host)\n else\n tables\n\nroleFilter = (tables=<-) =>\n if v.Role != \"(All)\" then\n tables |> filter(fn: (r) => r[\"role\"] == v.Role)\n else\n tables\n\nfrom(bucket: \"telegraf/so_long_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"disk\")\n |> filter(fn: (r) => r[\"_field\"] == \"used_percent\")\n |> filter(fn: (r) => r[\"path\"] == \"/\")\n |> hostFilter()\n |> roleFilter()\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)\n |> group(columns: [\"_field\", \"host\", \"role\"])\n |> yield(name: \"Trend\")"}],"staticLegend":{"colorizeRows":true,"opacity":1,"orientationThreshold":100000000,"widthRatio":1},"width":4,"widthRatio":1,"xCol":"_time","yCol":"_value","yPos":46},{"colors":[{"id":"base","name":"laser","type":"text","hex":"#00C9FF"}],"decimalPlaces":1,"height":2,"kind":"Single_Stat","name":"5m Load Average","queries":[{"query":"hostFilter = (tables=<-) =>\n if v.Host != \"(All)\" then\n tables |> filter(fn: (r) => r[\"host\"] == v.Host)\n else\n tables\n\nfrom(bucket: \"telegraf/so_short_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"system\")\n |> filter(fn: (r) => r[\"_field\"] == \"load5\")\n |> hostFilter()\n |> group(columns: [\"host\"])\n |> last()\n |> aggregateWindow(every: v.windowPeriod, fn: mean)\n |> highestMax(n:1)"}],"staticLegend":{},"width":1,"xPos":1},{"colors":[{"id":"base","name":"laser","type":"text","hex":"#00C9FF"},{"id":"z83MTSufTrlrCoEPiBXda","name":"tiger","type":"text","hex":"#F48D38","value":1}],"decimalPlaces":0,"height":2,"kind":"Single_Stat","name":"Warning Alarms","queries":[{"query":"from(bucket: \"_monitoring\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"statuses\")\n |> filter(fn: (r) => r[\"_field\"] == \"_message\")\n |> group(columns: [\"_check_id\"])\n |> sort(columns: [\"_time\"])\n |> last()\n |> group()\n |> filter(fn: (r) => r[\"_level\"] == \"warn\")\n |> count()"}],"staticLegend":{},"suffix":" ","width":1,"xPos":1,"yPos":2},{"colors":[{"id":"base","name":"laser","type":"text","hex":"#00C9FF"}],"decimalPlaces":1,"height":2,"kind":"Single_Stat","name":"IO Wait","queries":[{"query":"hostFilter = (tables=<-) =>\n if v.Host != \"(All)\" then\n tables |> filter(fn: (r) => r[\"host\"] == v.Host)\n else\n tables\n \nfrom(bucket: \"telegraf/so_short_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"cpu\")\n |> filter(fn: (r) => r[\"cpu\"] == \"cpu-total\")\n |> filter(fn: (r) => r[\"_field\"] == \"usage_iowait\")\n |> hostFilter()\n |> group(columns: [\"host\"])\n |> last()\n |> aggregateWindow(every: v.windowPeriod, fn: mean)\n |> highestMax(n:1)"}],"staticLegend":{},"suffix":"%","width":1,"xPos":2},{"colors":[{"id":"base","name":"laser","type":"text","hex":"#00C9FF"},{"id":"z83MTSufTrlrCoEPiBXda","name":"laser","type":"text","hex":"#00C9FF","value":1}],"decimalPlaces":0,"height":2,"kind":"Single_Stat","name":"Informative Alarms","queries":[{"query":"from(bucket: \"_monitoring\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"statuses\")\n |> filter(fn: (r) => r[\"_field\"] == \"_message\")\n |> group(columns: [\"_check_id\"])\n |> sort(columns: [\"_time\"])\n |> last()\n |> group()\n |> filter(fn: (r) => r[\"_level\"] == \"info\")\n |> count()"}],"staticLegend":{},"suffix":" ","width":1,"xPos":2,"yPos":2},{"colors":[{"id":"base","name":"laser","type":"text","hex":"#00C9FF"}],"decimalPlaces":0,"height":2,"kind":"Single_Stat","name":"Estimated EPS In","queries":[{"query":"hostFilter = (tables=<-) =>\n if v.Host != \"(All)\" then\n tables |> filter(fn: (r) => r[\"host\"] == v.Host)\n else\n tables\n \nfrom(bucket: \"telegraf/so_short_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"logstash_events\")\n |> filter(fn: (r) => r[\"_field\"] == \"in\")\n |> hostFilter()\n |> derivative(unit: 1s, nonNegative: true, columns: [\"_value\"], timeColumn: \"_time\") \n |> group(columns: [\"host\"])\n |> last()\n |> aggregateWindow(every: v.windowPeriod, fn: mean)\n |> highestMax(n:1)"}],"staticLegend":{},"width":1,"xPos":3},{"colors":[{"id":"0","name":"viridian","type":"min","hex":"#32B08C"},{"id":"5IArg2lDb8KvnphywgUXa","name":"pineapple","type":"threshold","hex":"#FFB94A","value":70},{"id":"yFhH3mtavjuAZh6cEt5lx","name":"fire","type":"threshold","hex":"#DC4E58","value":80},{"id":"1","name":"ruby","type":"max","hex":"#BF3D5E","value":100}],"decimalPlaces":0,"height":4,"kind":"Gauge","name":"CPU Usage","queries":[{"query":"hostFilter = (tables=<-) =>\n if v.Host != \"(All)\" then\n tables |> filter(fn: (r) => r[\"host\"] == v.Host)\n else\n tables\n\nfrom(bucket: \"telegraf/so_short_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"cpu\")\n |> filter(fn: (r) => r[\"cpu\"] == \"cpu-total\")\n |> filter(fn: (r) => r[\"_field\"] == \"usage_idle\")\n |> hostFilter()\n |> group(columns: [\"host\"])\n |> last()\n |> highestMax(n: 1)\n |> map(fn: (r) => ({r with _value: r._value * -1.0 + 100.0}))\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)\n |> yield(name: \"mean\")"}],"staticLegend":{},"suffix":"%","tickSuffix":"%","width":3,"xPos":3,"yPos":2},{"colors":[{"id":"0","name":"viridian","type":"min","hex":"#32B08C"},{"id":"kOQLOg2H4FVEE-E1_L8Kq","name":"laser","type":"threshold","hex":"#00C9FF","value":85},{"id":"5IArg2lDb8KvnphywgUXa","name":"tiger","type":"threshold","hex":"#F48D38","value":90},{"id":"yFhH3mtavjuAZh6cEt5lx","name":"ruby","type":"threshold","hex":"#BF3D5E","value":95},{"id":"1","name":"ruby","type":"max","hex":"#BF3D5E","value":100}],"decimalPlaces":0,"height":4,"kind":"Gauge","name":"Root Disk Usage","queries":[{"query":"hostFilter = (tables=<-) =>\n if v.Host != \"(All)\" then\n tables |> filter(fn: (r) => r[\"host\"] == v.Host)\n else\n tables\n\nfrom(bucket: \"telegraf/so_short_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"disk\")\n |> filter(fn: (r) => r[\"path\"] == \"/\")\n |> filter(fn: (r) => r[\"_field\"] == \"used_percent\")\n |> hostFilter()\n |> group(columns: [\"host\"])\n |> last()\n |> highestMax(n: 1)\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)\n |> yield(name: \"mean\")"}],"staticLegend":{},"suffix":"%","tickSuffix":"%","width":3,"xPos":3,"yPos":6},{"axes":[{"base":"10","name":"x","scale":"linear"},{"name":"y","scale":"linear","suffix":"%"}],"colorizeRows":true,"colors":[{"id":"TtgHQAXNep94KBgtu48C_","name":"Cthulhu","type":"scale","hex":"#FDC44F"},{"id":"_IuzkORho_8QXTE6vMllv","name":"Cthulhu","type":"scale","hex":"#007C76"},{"id":"bUszW_YI_9oColDbLNQ-d","name":"Cthulhu","type":"scale","hex":"#8983FF"}],"geom":"line","height":4,"hoverDimension":"auto","kind":"Xy","legendColorizeRows":true,"legendOpacity":1,"legendOrientationThreshold":100000000,"name":"Suricata Packet Loss","opacity":1,"orientationThreshold":100000000,"position":"overlaid","queries":[{"query":"hostFilter = (tables=<-) =>\n if v.Host != \"(All)\" then\n tables |> filter(fn: (r) => r[\"host\"] == v.Host)\n else\n tables\n\nroleFilter = (tables=<-) =>\n if v.Role != \"(All)\" then\n tables |> filter(fn: (r) => r[\"role\"] == v.Role)\n else\n tables\n\nfrom(bucket: \"telegraf/so_short_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"suridrop\")\n |> filter(fn: (r) => r[\"_field\"] == \"drop\")\n |> hostFilter()\n |> roleFilter()\n |> map(fn: (r) => ({r with _value: r._value * 100.0}))\n |> group(columns: [\"_field\", \"host\", \"role\"])\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)\n |> yield(name: \"mean\")"},{"query":"hostFilter = (tables=<-) =>\n if v.Host != \"(All)\" then\n tables |> filter(fn: (r) => r[\"host\"] == v.Host)\n else\n tables\n\nroleFilter = (tables=<-) =>\n if v.Role != \"(All)\" then\n tables |> filter(fn: (r) => r[\"role\"] == v.Role)\n else\n tables\n\nfrom(bucket: \"telegraf/so_long_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"suridrop\")\n |> filter(fn: (r) => r[\"_field\"] == \"drop\")\n |> hostFilter()\n |> roleFilter()\n |> map(fn: (r) => ({r with _value: r._value * 100.0}))\n |> group(columns: [\"_field\", \"host\", \"role\"])\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)\n |> yield(name: \"Trend\")"}],"staticLegend":{"colorizeRows":true,"opacity":1,"orientationThreshold":100000000,"widthRatio":1},"width":3,"widthRatio":1,"xCol":"_time","xPos":3,"yCol":"_value","yPos":42},{"colors":[{"id":"base","name":"laser","type":"text","hex":"#00C9FF"}],"decimalPlaces":0,"height":2,"kind":"Single_Stat","name":"Redis Queue","queries":[{"query":"from(bucket: \"telegraf/so_short_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"redisqueue\")\n |> filter(fn: (r) => r[\"_field\"] == \"unparsed\")\n |> group(columns: [\"host\"])\n |> last()\n |> aggregateWindow(every: v.windowPeriod, fn: mean)\n |> highestMax(n:1)"}],"staticLegend":{},"width":1,"xPos":4},{"axes":[{"base":"10","name":"x","scale":"linear"},{"base":"10","name":"y","scale":"linear"}],"colorizeRows":true,"colors":[{"id":"xflqbsX-j3iq4ry5QOntK","name":"Do Androids Dream of Electric Sheep?","type":"scale","hex":"#8F8AF4"},{"id":"5H28HcITm6QVfQsXon0vq","name":"Do Androids Dream of Electric Sheep?","type":"scale","hex":"#A51414"},{"id":"25MrINwurNBkQqeKCkMPg","name":"Do Androids Dream of Electric Sheep?","type":"scale","hex":"#F4CF31"}],"geom":"line","height":4,"hoverDimension":"auto","kind":"Xy","legendColorizeRows":true,"legendOpacity":1,"legendOrientationThreshold":100000000,"name":"Elasticsearch Document Count","opacity":1,"orientationThreshold":100000000,"position":"overlaid","queries":[{"query":"import \"join\"\n\nhostFilter = (tables=<-) =>\n if v.Host != \"(All)\" then\n tables |> filter(fn: (r) => r[\"host\"] == v.Host)\n else\n tables\n \nfrom(bucket: \"telegraf/so_short_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"elasticsearch_indices\")\n |> filter(fn: (r) => r[\"_field\"] == \"docs_count\")\n |> filter(fn: (r) => r[\"host\"] == r[\"node_name\"])\n |> hostFilter()\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)\n |> group(columns: [\"_field\",\"host\"])\n |> yield(name: \"mean\")"},{"query":"import \"join\"\n\nhostFilter = (tables=<-) =>\n if v.Host != \"(All)\" then\n tables |> filter(fn: (r) => r[\"host\"] == v.Host)\n else\n tables\n\nfrom(bucket: \"telegraf/so_long_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"elasticsearch_indices\")\n |> filter(fn: (r) => r[\"_field\"] == \"docs_count\")\n |> filter(fn: (r) => r[\"host\"] == r[\"node_name\"])\n |> hostFilter()\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)\n |> group(columns: [\"_field\",\"host\"])\n |> yield(name: \"Trend\")"}],"staticLegend":{"colorizeRows":true,"opacity":1,"orientationThreshold":100000000,"widthRatio":1},"width":4,"widthRatio":1,"xCol":"_time","xPos":4,"yCol":"_value","yPos":10},{"axes":[{"base":"10","name":"x","scale":"linear"},{"base":"10","name":"y","scale":"linear"}],"colorizeRows":true,"colors":[{"id":"xflqbsX-j3iq4ry5QOntK","name":"Do Androids Dream of Electric Sheep?","type":"scale","hex":"#8F8AF4"},{"id":"5H28HcITm6QVfQsXon0vq","name":"Do Androids Dream of Electric Sheep?","type":"scale","hex":"#A51414"},{"id":"25MrINwurNBkQqeKCkMPg","name":"Do Androids Dream of Electric Sheep?","type":"scale","hex":"#F4CF31"}],"geom":"line","height":4,"heightRatio":0.301556420233463,"hoverDimension":"auto","kind":"Xy","legendColorizeRows":true,"legendOpacity":1,"legendOrientationThreshold":100000000,"name":"Redis Queue","opacity":1,"orientationThreshold":100000000,"position":"overlaid","queries":[{"query":"from(bucket: \"telegraf/so_short_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"redisqueue\")\n |> filter(fn: (r) => r[\"_field\"] == \"unparsed\")\n |> group(columns: [\"host\", \"_field\"])\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)\n |> yield(name: \"mean\")"},{"query":"from(bucket: \"telegraf/so_long_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"redisqueue\")\n |> filter(fn: (r) => r[\"_field\"] == \"unparsed\")\n |> group(columns: [\"host\", \"_field\"])\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)\n |> yield(name: \"Trend\")"}],"staticLegend":{"colorizeRows":true,"heightRatio":0.301556420233463,"opacity":1,"orientationThreshold":100000000,"widthRatio":1},"width":4,"widthRatio":1,"xCol":"_time","xPos":4,"yCol":"_value","yPos":14},{"axes":[{"base":"10","name":"x","scale":"linear"},{"name":"y","scale":"linear","suffix":" days"}],"colorizeRows":true,"colors":[{"id":"sW2GqpGAsGB5Adx16jKjp","name":"Nineteen Eighty Four","type":"scale","hex":"#31C0F6"},{"id":"TsdXuXwdI5Npi9S8L4f-i","name":"Nineteen Eighty Four","type":"scale","hex":"#A500A5"},{"id":"OGL29-SUbJ6FyQb0JzbaD","name":"Nineteen Eighty Four","type":"scale","hex":"#FF7E27"}],"geom":"line","height":4,"hoverDimension":"auto","kind":"Xy","legendColorizeRows":true,"legendOpacity":1,"legendOrientationThreshold":100000000,"name":"Container Uptime","opacity":1,"orientationThreshold":100000000,"position":"overlaid","queries":[{"query":"containerFilter = (tables=<-) =>\n if v.Container != \"(All)\" then\n tables |> filter(fn: (r) => r[\"container_name\"] == v.Container)\n else\n tables\n\nhostFilter = (tables=<-) =>\n if v.Host != \"(All)\" then\n tables |> filter(fn: (r) => r[\"host\"] == v.Host)\n else\n tables\n\nroleFilter = (tables=<-) =>\n if v.Role != \"(All)\" then\n tables |> filter(fn: (r) => r[\"role\"] == v.Role)\n else\n tables\n\nfrom(bucket: \"telegraf/so_short_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"docker_container_status\")\n |> filter(fn: (r) => r[\"_field\"] == \"uptime_ns\")\n |> filter(fn: (r) => r[\"container_status\"] == \"running\")\n |> hostFilter()\n |> roleFilter()\n |> containerFilter()\n |> group(columns: [\"host\", \"role\", \"container_name\"])\n |> sort(columns: [\"_time\"])\n |> map(fn: (r) => ({r with _value: float(v: r._value) / float(v: 24 * 60 * 60 * 1000000000)}))\n |> yield(name: \"last\")"},{"query":"containerFilter = (tables=<-) =>\n if v.Container != \"(All)\" then\n tables |> filter(fn: (r) => r[\"container_name\"] == v.Container)\n else\n tables\n\nhostFilter = (tables=<-) =>\n if v.Host != \"(All)\" then\n tables |> filter(fn: (r) => r[\"host\"] == v.Host)\n else\n tables\n\nroleFilter = (tables=<-) =>\n if v.Role != \"(All)\" then\n tables |> filter(fn: (r) => r[\"role\"] == v.Role)\n else\n tables\n\nfrom(bucket: \"telegraf/so_long_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"docker_container_status\")\n |> filter(fn: (r) => r[\"_field\"] == \"uptime_ns\")\n |> filter(fn: (r) => r[\"container_status\"] == \"running\")\n |> hostFilter()\n |> roleFilter()\n |> containerFilter()\n |> group(columns: [\"host\", \"role\", \"container_name\"])\n |> sort(columns: [\"_time\"])\n |> map(fn: (r) => ({r with _value: float(v: r._value) / float(v: 24.0 * 60.0 * 60.0 * 1000000000.0)}))\n |> yield(name: \"Trend\")"}],"staticLegend":{"colorizeRows":true,"opacity":1,"orientationThreshold":100000000,"widthRatio":1},"width":4,"widthRatio":1,"xCol":"_time","xPos":4,"yCol":"_value","yPos":18},{"axes":[{"base":"10","name":"x","scale":"linear"},{"base":"10","name":"y","scale":"linear"}],"colorizeRows":true,"colors":[{"id":"base","name":"laser","type":"text","hex":"#00C9FF"}],"decimalPlaces":0,"height":2,"hoverDimension":"auto","kind":"Single_Stat_Plus_Line","legendColorizeRows":true,"legendOpacity":1,"legendOrientationThreshold":100000000,"name":"Kafka Active Controllers","opacity":1,"orientationThreshold":100000000,"position":"overlaid","queries":[{"query":"from(bucket: \"telegraf/so_short_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"kafka_controller\")\n |> filter(fn: (r) => r[\"_field\"] == \"ActiveControllerCount.Value\")\n |> aggregateWindow(every: v.windowPeriod, fn: last, createEmpty: false)\n |> group(columns: [\"_field\", \"host\", \"role\"])\n |> yield(name: \"current\")"},{"query":"from(bucket: \"telegraf/so_long_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"kafka_controller\")\n |> filter(fn: (r) => r[\"_field\"] == \"ActiveControllerCount.Value\")\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)\n |> group(columns: [\"_field\", \"host\", \"role\"])\n |> yield(name: \"trend\")"}],"staticLegend":{"colorizeRows":true,"opacity":1,"orientationThreshold":100000000,"widthRatio":1},"width":4,"widthRatio":1,"xCol":"_time","xPos":4,"yCol":"_value","yPos":22},{"axes":[{"base":"10","name":"x","scale":"linear"},{"base":"10","name":"y","scale":"linear"}],"colorizeRows":true,"colors":[{"id":"base","name":"laser","type":"text","hex":"#00C9FF"}],"decimalPlaces":0,"height":2,"hoverDimension":"auto","kind":"Single_Stat_Plus_Line","legendColorizeRows":true,"legendOpacity":1,"legendOrientationThreshold":100000000,"name":"Kafka Active Brokers","opacity":1,"orientationThreshold":100000000,"position":"overlaid","queries":[{"query":"from(bucket: \"telegraf/so_long_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"kafka_controller\")\n |> filter(fn: (r) => r[\"_field\"] == \"ActiveBrokerCount.Value\")\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)\n |> group(columns: [\"_field\", \"host\", \"role\"])\n |> yield(name: \"trend\")"},{"query":"from(bucket: \"telegraf/so_short_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"kafka_controller\")\n |> filter(fn: (r) => r[\"_field\"] == \"ActiveBrokerCount.Value\")\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)\n |> group(columns: [\"_field\", \"host\", \"role\"])\n |> yield(name: \"current\")"}],"staticLegend":{"colorizeRows":true,"opacity":1,"orientationThreshold":100000000,"widthRatio":1},"width":4,"widthRatio":1,"xCol":"_time","xPos":4,"yCol":"_value","yPos":24},{"axes":[{"base":"10","name":"x","scale":"linear"},{"name":"y","scale":"linear","suffix":"%"}],"colorizeRows":true,"colors":[{"id":"yT5vTIlaaFChSrQvKLfqf","name":"Nineteen Eighty Four","type":"scale","hex":"#31C0F6"},{"id":"mzzUVSu3ibTph1JmQmDAQ","name":"Nineteen Eighty Four","type":"scale","hex":"#A500A5"},{"id":"mOcnDo7l8ii6qNLFIB5rs","name":"Nineteen Eighty Four","type":"scale","hex":"#FF7E27"}],"geom":"line","height":4,"hoverDimension":"auto","kind":"Xy","legendColorizeRows":true,"legendOpacity":1,"legendOrientationThreshold":100000000,"name":"Container CPU Usage","opacity":1,"orientationThreshold":100000000,"position":"overlaid","queries":[{"query":"containerFilter = (tables=<-) =>\n if v.Container != \"(All)\" then\n tables |> filter(fn: (r) => r[\"container_name\"] == v.Container)\n else\n tables\n\nhostFilter = (tables=<-) =>\n if v.Host != \"(All)\" then\n tables |> filter(fn: (r) => r[\"host\"] == v.Host)\n else\n tables\n\nroleFilter = (tables=<-) =>\n if v.Role != \"(All)\" then\n tables |> filter(fn: (r) => r[\"role\"] == v.Role)\n else\n tables\n\nfrom(bucket: \"telegraf/so_short_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"docker_container_cpu\")\n |> filter(fn: (r) => r[\"_field\"] == \"usage_percent\")\n |> filter(fn: (r) => r[\"container_status\"] == \"running\")\n |> hostFilter()\n |> roleFilter()\n |> containerFilter()\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)\n |> group(columns: [\"_field\", \"host\", \"role\", \"container_name\"])\n |> sort(columns: [\"_time\"])\n |> yield(name: \"mean\")"},{"query":"containerFilter = (tables=<-) =>\n if v.Container != \"(All)\" then\n tables |> filter(fn: (r) => r[\"container_name\"] == v.Container)\n else\n tables\n\nhostFilter = (tables=<-) =>\n if v.Host != \"(All)\" then\n tables |> filter(fn: (r) => r[\"host\"] == v.Host)\n else\n tables\n\nroleFilter = (tables=<-) =>\n if v.Role != \"(All)\" then\n tables |> filter(fn: (r) => r[\"role\"] == v.Role)\n else\n tables\n\nfrom(bucket: \"telegraf/so_long_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"docker_container_cpu\")\n |> filter(fn: (r) => r[\"_field\"] == \"usage_percent\")\n |> filter(fn: (r) => r[\"container_status\"] == \"running\")\n |> hostFilter()\n |> roleFilter()\n |> containerFilter()\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)\n |> group(columns: [\"_field\", \"host\", \"role\", \"container_name\"])\n |> sort(columns: [\"_time\"])\n |> yield(name: \"Trend\")"}],"staticLegend":{"colorizeRows":true,"opacity":1,"orientationThreshold":100000000,"widthRatio":1},"width":4,"widthRatio":1,"xCol":"_time","xPos":4,"yCol":"_value","yPos":26},{"axes":[{"base":"10","name":"x","scale":"linear"},{"name":"y","scale":"linear","suffix":"%"}],"colorizeRows":true,"colors":[{"id":"QDwChKZWuQV0BaJcEeSam","name":"Atlantis","type":"scale","hex":"#74D495"},{"id":"ThD0WTqKHltQEVlq9mo6K","name":"Atlantis","type":"scale","hex":"#3F3FBA"},{"id":"FBHYZiwDLKyQK3eRfUD-0","name":"Atlantis","type":"scale","hex":"#FF4D9E"}],"geom":"line","height":4,"hoverDimension":"auto","kind":"Xy","legendColorizeRows":true,"legendOpacity":1,"legendOrientationThreshold":100000000,"name":"Container Memory Usage","opacity":1,"orientationThreshold":100000000,"position":"overlaid","queries":[{"query":"containerFilter = (tables=<-) =>\n if v.Container != \"(All)\" then\n tables |> filter(fn: (r) => r[\"container_name\"] == v.Container)\n else\n tables\n\nhostFilter = (tables=<-) =>\n if v.Host != \"(All)\" then\n tables |> filter(fn: (r) => r[\"host\"] == v.Host)\n else\n tables\n\nroleFilter = (tables=<-) =>\n if v.Role != \"(All)\" then\n tables |> filter(fn: (r) => r[\"role\"] == v.Role)\n else\n tables\n\nfrom(bucket: \"telegraf/so_short_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"docker_container_mem\")\n |> filter(fn: (r) => r[\"_field\"] == \"usage_percent\")\n |> filter(fn: (r) => r[\"container_status\"] == \"running\")\n |> hostFilter()\n |> roleFilter()\n |> containerFilter()\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)\n |> group(columns: [\"_field\", \"host\", \"role\", \"container_name\"])\n |> sort(columns: [\"_time\"])\n |> yield(name: \"mean\")"},{"query":"containerFilter = (tables=<-) =>\n if v.Container != \"(All)\" then\n tables |> filter(fn: (r) => r[\"container_name\"] == v.Container)\n else\n tables\n\nhostFilter = (tables=<-) =>\n if v.Host != \"(All)\" then\n tables |> filter(fn: (r) => r[\"host\"] == v.Host)\n else\n tables\n\nroleFilter = (tables=<-) =>\n if v.Role != \"(All)\" then\n tables |> filter(fn: (r) => r[\"role\"] == v.Role)\n else\n tables\n\nfrom(bucket: \"telegraf/so_long_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"docker_container_mem\")\n |> filter(fn: (r) => r[\"_field\"] == \"usage_percent\")\n |> filter(fn: (r) => r[\"container_status\"] == \"running\")\n |> hostFilter()\n |> roleFilter()\n |> containerFilter()\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)\n |> group(columns: [\"_field\", \"host\", \"role\", \"container_name\"])\n |> sort(columns: [\"_time\"])\n |> yield(name: \"Trend\")"}],"staticLegend":{"colorizeRows":true,"opacity":1,"orientationThreshold":100000000,"widthRatio":1},"width":4,"widthRatio":1,"xCol":"_time","xPos":4,"yCol":"_value","yPos":30},{"axes":[{"base":"10","name":"x","scale":"linear"},{"base":"10","name":"y","scale":"linear","suffix":"b"}],"colorizeRows":true,"colors":[{"id":"0ynR6Zs0wuQ3WY0Lz-_KC","name":"Cthulhu","type":"scale","hex":"#FDC44F"},{"id":"YiArehCNBwFm9mn8DSXSG","name":"Cthulhu","type":"scale","hex":"#007C76"},{"id":"DxByY_EQW9Xs2jD5ktkG5","name":"Cthulhu","type":"scale","hex":"#8983FF"}],"geom":"line","height":4,"hoverDimension":"auto","kind":"Xy","legendColorizeRows":true,"legendOpacity":1,"legendOrientationThreshold":100000000,"name":"Container Traffic - Inbound","opacity":1,"orientationThreshold":100000000,"position":"overlaid","queries":[{"query":"containerFilter = (tables=<-) =>\n if v.Container != \"(All)\" then\n tables |> filter(fn: (r) => r[\"container_name\"] == v.Container)\n else\n tables\n\nhostFilter = (tables=<-) =>\n if v.Host != \"(All)\" then\n tables |> filter(fn: (r) => r[\"host\"] == v.Host)\n else\n tables\n\nroleFilter = (tables=<-) =>\n if v.Role != \"(All)\" then\n tables |> filter(fn: (r) => r[\"role\"] == v.Role)\n else\n tables\n\nfrom(bucket: \"telegraf/so_short_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"docker_container_net\")\n |> filter(fn: (r) => r[\"_field\"] == \"rx_bytes\")\n |> hostFilter()\n |> roleFilter()\n |> containerFilter()\n |> derivative(unit: 1s, nonNegative: true, columns: [\"_value\"], timeColumn: \"_time\") \n |> map(fn: (r) => ({r with _value: r._value * 8.0}))\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)\n |> group(columns: [\"_field\", \"host\", \"role\", \"container_name\"])\n |> sort(columns: [\"_time\"])\n |> yield(name: \"mean\")"},{"query":"containerFilter = (tables=<-) =>\n if v.Container != \"(All)\" then\n tables |> filter(fn: (r) => r[\"container_name\"] == v.Container)\n else\n tables\n\nhostFilter = (tables=<-) =>\n if v.Host != \"(All)\" then\n tables |> filter(fn: (r) => r[\"host\"] == v.Host)\n else\n tables\n\nroleFilter = (tables=<-) =>\n if v.Role != \"(All)\" then\n tables |> filter(fn: (r) => r[\"role\"] == v.Role)\n else\n tables\n\nfrom(bucket: \"telegraf/so_long_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"docker_container_net\")\n |> filter(fn: (r) => r[\"_field\"] == \"rx_bytes\")\n |> hostFilter()\n |> roleFilter()\n |> containerFilter()\n |> derivative(unit: 1s, nonNegative: true, columns: [\"_value\"], timeColumn: \"_time\")\n |> map(fn: (r) => ({r with _value: r._value * 8.0}))\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)\n |> group(columns: [\"_field\", \"host\", \"role\", \"container_name\"])\n |> sort(columns: [\"_time\"])\n |> yield(name: \"Trend\")"}],"staticLegend":{"colorizeRows":true,"opacity":1,"orientationThreshold":100000000,"widthRatio":1},"width":4,"widthRatio":1,"xCol":"_time","xPos":4,"yCol":"_value","yPos":34},{"axes":[{"base":"10","name":"x","scale":"linear"},{"name":"y","scale":"linear","suffix":"%"}],"colorizeRows":true,"colors":[{"id":"3PVw3hQuZUzyar7Js3mMH","name":"Ectoplasm","type":"scale","hex":"#DA6FF1"},{"id":"O34ux-D8Xq_1-eeWRyYYH","name":"Ectoplasm","type":"scale","hex":"#00717A"},{"id":"P04RoKOHBdLdvfrfFbn0F","name":"Ectoplasm","type":"scale","hex":"#ACFF76"}],"geom":"line","height":4,"hoverDimension":"auto","kind":"Xy","legendColorizeRows":true,"legendOpacity":1,"legendOrientationThreshold":100000000,"name":"Disk Usage /nsm","opacity":1,"orientationThreshold":100000000,"position":"overlaid","queries":[{"query":"hostFilter = (tables=<-) =>\n if v.Host != \"(All)\" then\n tables |> filter(fn: (r) => r[\"host\"] == v.Host)\n else\n tables\n\nroleFilter = (tables=<-) =>\n if v.Role != \"(All)\" then\n tables |> filter(fn: (r) => r[\"role\"] == v.Role)\n else\n tables\n\nfrom(bucket: \"telegraf/so_short_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"disk\")\n |> filter(fn: (r) => r[\"_field\"] == \"used_percent\")\n |> filter(fn: (r) => r[\"path\"] == \"/nsm\")\n |> hostFilter()\n |> roleFilter()\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)\n |> group(columns: [\"_field\", \"host\", \"role\"])\n |> yield(name: \"mean\")"},{"query":"hostFilter = (tables=<-) =>\n if v.Host != \"(All)\" then\n tables |> filter(fn: (r) => r[\"host\"] == v.Host)\n else\n tables\n\nroleFilter = (tables=<-) =>\n if v.Role != \"(All)\" then\n tables |> filter(fn: (r) => r[\"role\"] == v.Role)\n else\n tables\n\nfrom(bucket: \"telegraf/so_long_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"disk\")\n |> filter(fn: (r) => r[\"_field\"] == \"used_percent\")\n |> filter(fn: (r) => r[\"path\"] == \"/nsm\")\n |> hostFilter()\n |> roleFilter()\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)\n |> group(columns: [\"_field\", \"host\", \"role\"])\n |> yield(name: \"Trend\")"}],"staticLegend":{"colorizeRows":true,"opacity":1,"orientationThreshold":100000000,"widthRatio":1},"width":4,"widthRatio":1,"xPos":4,"yPos":46},{"colors":[{"id":"base","name":"laser","type":"text","hex":"#00C9FF"}],"decimalPlaces":1,"height":2,"kind":"Single_Stat","name":"Inbound Traffic","queries":[{"query":"from(bucket: \"telegraf/so_short_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"net\")\n |> filter(fn: (r) => r[\"_field\"] == \"bytes_recv\") \n |> derivative(unit: 1s, nonNegative: true, columns: [\"_value\"], timeColumn: \"_time\") \n |> map(fn: (r) => ({r with _value: r._value * 8.0 / (1000.0 * 1000.0)}))\n |> group(columns: [\"host\"])\n |> aggregateWindow(every: v.windowPeriod, fn: mean)\n |> last()\n |> highestMax(n:1)"}],"staticLegend":{},"suffix":" Mb/s","width":1,"xPos":5},{"colors":[{"id":"base","name":"laser","type":"text","hex":"#00C9FF"}],"decimalPlaces":1,"height":2,"kind":"Single_Stat","name":"Inbound Drops","queries":[{"query":"from(bucket: \"telegraf/so_short_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"net\")\n |> filter(fn: (r) => r[\"_field\"] == \"drop_in\") \n |> derivative(unit: 1s, nonNegative: true, columns: [\"_value\"], timeColumn: \"_time\") \n |> map(fn: (r) => ({r with _value: r._value * 8.0 / (1000.0 * 1000.0)}))\n |> group(columns: [\"host\"])\n |> aggregateWindow(every: v.windowPeriod, fn: mean)\n |> last()\n |> highestMax(n:1)"}],"staticLegend":{},"suffix":" Mb/s","width":1,"xPos":6},{"colors":[{"id":"0","name":"viridian","type":"min","hex":"#32B08C"},{"id":"5IArg2lDb8KvnphywgUXa","name":"pineapple","type":"threshold","hex":"#FFB94A","value":70},{"id":"yFhH3mtavjuAZh6cEt5lx","name":"fire","type":"threshold","hex":"#DC4E58","value":80},{"id":"1","name":"ruby","type":"max","hex":"#BF3D5E","value":100}],"decimalPlaces":0,"height":4,"kind":"Gauge","name":"Memory Usage","queries":[{"query":"hostFilter = (tables=<-) =>\n if v.Host != \"(All)\" then\n tables |> filter(fn: (r) => r[\"host\"] == v.Host)\n else\n tables\n\nfrom(bucket: \"telegraf/so_short_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"mem\")\n |> filter(fn: (r) => r[\"_field\"] == \"used_percent\")\n |> hostFilter()\n |> group(columns: [\"host\"])\n |> last()\n |> highestMax(n: 1)\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)\n |> yield(name: \"mean\")"}],"staticLegend":{},"suffix":"%","tickSuffix":"%","width":3,"xPos":6,"yPos":2},{"colors":[{"id":"0","name":"viridian","type":"min","hex":"#32B08C"},{"id":"5IArg2lDb8KvnphywgUXa","name":"laser","type":"threshold","hex":"#00C9FF","value":85},{"id":"yFhH3mtavjuAZh6cEt5lx","name":"tiger","type":"threshold","hex":"#F48D38","value":90},{"id":"H7uprvKmMEh39en6X-ms_","name":"ruby","type":"threshold","hex":"#BF3D5E","value":95},{"id":"1","name":"ruby","type":"max","hex":"#BF3D5E","value":100}],"decimalPlaces":0,"height":4,"kind":"Gauge","name":"NSM Disk Usage","queries":[{"query":"hostFilter = (tables=<-) =>\n if v.Host != \"(All)\" then\n tables |> filter(fn: (r) => r[\"host\"] == v.Host)\n else\n tables\n\nfrom(bucket: \"telegraf/so_short_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"disk\")\n |> filter(fn: (r) => r[\"path\"] == \"/nsm\")\n |> filter(fn: (r) => r[\"_field\"] == \"used_percent\")\n |> hostFilter()\n |> group(columns: [\"host\"])\n |> last()\n |> highestMax(n: 1)\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)\n |> yield(name: \"mean\")"}],"staticLegend":{},"suffix":"%","tickSuffix":"%","width":3,"xPos":6,"yPos":6},{"axes":[{"base":"10","name":"x","scale":"linear"},{"base":"10","name":"y","scale":"linear","suffix":"b/s"}],"colorizeRows":true,"colors":[{"id":"TtgHQAXNep94KBgtu48C_","name":"Cthulhu","type":"scale","hex":"#FDC44F"},{"id":"_IuzkORho_8QXTE6vMllv","name":"Cthulhu","type":"scale","hex":"#007C76"},{"id":"bUszW_YI_9oColDbLNQ-d","name":"Cthulhu","type":"scale","hex":"#8983FF"}],"geom":"line","height":4,"heightRatio":0.18482490272373542,"hoverDimension":"auto","kind":"Xy","legendColorizeRows":true,"legendOpacity":1,"legendOrientationThreshold":100000000,"name":"Management Interface Traffic - Outbound","opacity":1,"orientationThreshold":100000000,"position":"overlaid","queries":[{"query":"import \"join\"\n\nhostFilter = (tables=<-) =>\n if v.Host != \"(All)\" then\n tables |> filter(fn: (r) => r[\"host\"] == v.Host)\n else\n tables\n\nroleFilter = (tables=<-) =>\n if v.Role != \"(All)\" then\n tables |> filter(fn: (r) => r[\"role\"] == v.Role)\n else\n tables\n \nmanints = from(bucket: \"telegraf/so_short_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"node_config\")\n |> hostFilter()\n |> filter(fn: (r) => r[\"_field\"] == \"manint\")\n |> distinct()\n |> group(columns: [\"host\"])\n\ntraffic = from(bucket: \"telegraf/so_short_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"net\")\n |> filter(fn: (r) => r[\"_field\"] == \"bytes_sent\")\n |> hostFilter()\n |> roleFilter()\n |> derivative(unit: 1s, nonNegative: true, columns: [\"_value\"], timeColumn: \"_time\")\n |> map(fn: (r) => ({r with \"_value\": r._value * 8.0}))\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)\n |> group(columns: [\"host\"])\n \n\njoin.inner(left: traffic, right: manints,\n on: (l,r) => l.interface == r._value,\n as: (l, r) => ({l with _value: l._value, result: \"bytes_sent\"}))"},{"query":"import \"join\"\n\nhostFilter = (tables=<-) =>\n if v.Host != \"(All)\" then\n tables |> filter(fn: (r) => r[\"host\"] == v.Host)\n else\n tables\n\nroleFilter = (tables=<-) =>\n if v.Role != \"(All)\" then\n tables |> filter(fn: (r) => r[\"role\"] == v.Role)\n else\n tables\n \nmanints = from(bucket: \"telegraf/so_short_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"node_config\")\n |> hostFilter()\n |> filter(fn: (r) => r[\"_field\"] == \"manint\")\n |> distinct()\n |> group(columns: [\"host\"])\n\ntraffic = from(bucket: \"telegraf/so_long_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"net\")\n |> filter(fn: (r) => r[\"_field\"] == \"bytes_sent\")\n |> hostFilter()\n |> roleFilter()\n |> derivative(unit: 1s, nonNegative: true, columns: [\"_value\"], timeColumn: \"_time\")\n |> map(fn: (r) => ({r with \"_value\": r._value * 8.0}))\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)\n |> group(columns: [\"host\"])\n \n\njoin.inner(left: traffic, right: manints,\n on: (l,r) => l.interface == r._value,\n as: (l, r) => ({l with _value: l._value, result: \"Trend\"}))"}],"staticLegend":{"colorizeRows":true,"heightRatio":0.18482490272373542,"opacity":1,"orientationThreshold":100000000,"widthRatio":1},"width":6,"widthRatio":1,"xCol":"_time","xPos":6,"yCol":"_value","yPos":38},{"axes":[{"base":"10","name":"x","scale":"linear"},{"name":"y","scale":"linear","suffix":"%"}],"colorizeRows":true,"colors":[{"id":"TtgHQAXNep94KBgtu48C_","name":"Cthulhu","type":"scale","hex":"#FDC44F"},{"id":"_IuzkORho_8QXTE6vMllv","name":"Cthulhu","type":"scale","hex":"#007C76"},{"id":"bUszW_YI_9oColDbLNQ-d","name":"Cthulhu","type":"scale","hex":"#8983FF"}],"geom":"line","height":4,"hoverDimension":"auto","kind":"Xy","legendColorizeRows":true,"legendOpacity":1,"legendOrientationThreshold":100000000,"name":"Zeek Packet Loss","opacity":1,"orientationThreshold":100000000,"position":"overlaid","queries":[{"query":"hostFilter = (tables=<-) =>\n if v.Host != \"(All)\" then\n tables |> filter(fn: (r) => r[\"host\"] == v.Host)\n else\n tables\n\nroleFilter = (tables=<-) =>\n if v.Role != \"(All)\" then\n tables |> filter(fn: (r) => r[\"role\"] == v.Role)\n else\n tables\n\nfrom(bucket: \"telegraf/so_short_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"zeekdrop\")\n |> filter(fn: (r) => r[\"_field\"] == \"drop\")\n |> hostFilter()\n |> roleFilter()\n |> map(fn: (r) => ({r with _value: r._value * 100.0}))\n |> group(columns: [\"_field\", \"host\", \"role\"])\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)\n |> yield(name: \"mean\")"},{"query":"hostFilter = (tables=<-) =>\n if v.Host != \"(All)\" then\n tables |> filter(fn: (r) => r[\"host\"] == v.Host)\n else\n tables\n\nroleFilter = (tables=<-) =>\n if v.Role != \"(All)\" then\n tables |> filter(fn: (r) => r[\"role\"] == v.Role)\n else\n tables\n\nfrom(bucket: \"telegraf/so_long_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"zeekdrop\")\n |> filter(fn: (r) => r[\"_field\"] == \"drop\")\n |> hostFilter()\n |> roleFilter()\n |> map(fn: (r) => ({r with _value: r._value * 100.0}))\n |> group(columns: [\"_field\", \"host\", \"role\"])\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)\n |> yield(name: \"Trend\")"}],"staticLegend":{"colorizeRows":true,"opacity":1,"orientationThreshold":100000000,"widthRatio":1},"width":3,"widthRatio":1,"xCol":"_time","xPos":6,"yCol":"_value","yPos":42},{"colors":[{"id":"base","name":"laser","type":"text","hex":"#00C9FF"}],"decimalPlaces":1,"height":2,"kind":"Single_Stat","name":"Capture Loss","queries":[{"query":"hostFilter = (tables=<-) =>\n if v.Host != \"(All)\" then\n tables |> filter(fn: (r) => r[\"host\"] == v.Host)\n else\n tables\n\nfrom(bucket: \"telegraf/so_short_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"zeekcaptureloss\")\n |> filter(fn: (r) => r[\"_field\"] == \"loss\")\n |> hostFilter()\n |> group(columns: [\"host\"])\n |> last()\n |> aggregateWindow(every: v.windowPeriod, fn: mean)\n |> highestMax(n:1)"}],"staticLegend":{},"suffix":"%","width":1,"xPos":7},{"colors":[{"id":"base","name":"laser","type":"text","hex":"#00C9FF"}],"decimalPlaces":1,"height":2,"kind":"Single_Stat","name":"Zeek Loss","queries":[{"query":"hostFilter = (tables=<-) =>\n if v.Host != \"(All)\" then\n tables |> filter(fn: (r) => r[\"host\"] == v.Host)\n else\n tables\n \nfrom(bucket: \"telegraf/so_short_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"zeekdrop\")\n |> filter(fn: (r) => r[\"_field\"] == \"drop\")\n |> map(fn: (r) => ({r with _value: r._value * 100.0}))\n |> hostFilter()\n |> group(columns: [\"host\"])\n |> last()\n |> aggregateWindow(every: v.windowPeriod, fn: mean)\n |> highestMax(n:1)"}],"staticLegend":{},"suffix":"%","width":1,"xPos":8},{"axes":[{"base":"10","name":"x","scale":"linear"},{"base":"10","name":"y","scale":"linear","suffix":"s"}],"colorizeRows":true,"colors":[{"id":"xflqbsX-j3iq4ry5QOntK","name":"Do Androids Dream of Electric Sheep?","type":"scale","hex":"#8F8AF4"},{"id":"5H28HcITm6QVfQsXon0vq","name":"Do Androids Dream of Electric Sheep?","type":"scale","hex":"#A51414"},{"id":"25MrINwurNBkQqeKCkMPg","name":"Do Androids Dream of Electric Sheep?","type":"scale","hex":"#F4CF31"}],"geom":"line","height":4,"heightRatio":0.301556420233463,"hoverDimension":"auto","kind":"Xy","legendColorizeRows":true,"legendOpacity":1,"legendOrientationThreshold":100000000,"name":"Elastic Ingest Time Spent","opacity":1,"orientationThreshold":100000000,"position":"overlaid","queries":[{"query":"from(bucket: \"telegraf/so_short_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"elasticsearch_clusterstats_nodes\")\n |> filter(fn: (r) => r.role == \"standalone\" or r.role == \"eval\" or r.role == \"import\" or r.role == \"managersearch\" or r.role == \"search\" or r.role == \"node\" or r.role == \"heavynode\")\n |> filter(fn: (r) => r[\"_field\"] == \"ingest_processor_stats_community_id_time_in_millis\")\n |> derivative(unit: 1s, nonNegative: true, columns: [\"_value\"], timeColumn: \"_time\") \n |> group(columns: [\"host\", \"role\"])\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)\n |> yield(name: \"community.id_time\")"},{"query":"from(bucket: \"telegraf/so_short_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"elasticsearch_clusterstats_nodes\")\n |> filter(fn: (r) => r.role == \"standalone\" or r.role == \"eval\" or r.role == \"import\" or r.role == \"managersearch\" or r.role == \"search\" or r.role == \"node\" or r.role == \"heavynode\")\n |> filter(fn: (r) => r[\"_field\"] == \"ingest_processor_stats_conditional_time_in_millis\")\n |> derivative(unit: 1s, nonNegative: true, columns: [\"_value\"], timeColumn: \"_time\") \n |> group(columns: [\"host\", \"role\"])\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)\n |> yield(name: \"conditional_time\")"},{"query":"from(bucket: \"telegraf/so_short_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"elasticsearch_clusterstats_nodes\")\n |> filter(fn: (r) => r.role == \"standalone\" or r.role == \"eval\" or r.role == \"import\" or r.role == \"managersearch\" or r.role == \"search\" or r.role == \"node\" or r.role == \"heavynode\")\n |> filter(fn: (r) => r[\"_field\"] == \"ingest_processor_stats_date_index_name_time_in_millis\")\n |> derivative(unit: 1s, nonNegative: true, columns: [\"_value\"], timeColumn: \"_time\") \n |> group(columns: [\"host\", \"role\"])\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)\n |> yield(name: \"date.index.name_time\")"},{"query":"from(bucket: \"telegraf/so_short_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"elasticsearch_clusterstats_nodes\")\n |> filter(fn: (r) => r.role == \"standalone\" or r.role == \"eval\" or r.role == \"import\" or r.role == \"managersearch\" or r.role == \"search\" or r.role == \"node\" or r.role == \"heavynode\")\n |> filter(fn: (r) => r[\"_field\"] == \"ingest_processor_stats_date_time_in_millis\")\n |> derivative(unit: 1s, nonNegative: true, columns: [\"_value\"], timeColumn: \"_time\") \n |> group(columns: [\"host\", \"role\"])\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)\n |> yield(name: \"date_time\")"},{"query":"from(bucket: \"telegraf/so_short_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"elasticsearch_clusterstats_nodes\")\n |> filter(fn: (r) => r.role == \"standalone\" or r.role == \"eval\" or r.role == \"import\" or r.role == \"managersearch\" or r.role == \"search\" or r.role == \"node\" or r.role == \"heavynode\")\n |> filter(fn: (r) => r[\"_field\"] == \"ingest_processor_stats_dissect_time_in_millis\")\n |> derivative(unit: 1s, nonNegative: true, columns: [\"_value\"], timeColumn: \"_time\") \n |> group(columns: [\"host\", \"role\"])\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)\n |> yield(name: \"dissect_time\")"},{"query":"from(bucket: \"telegraf/so_short_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"elasticsearch_clusterstats_nodes\")\n |> filter(fn: (r) => r.role == \"standalone\" or r.role == \"eval\" or r.role == \"import\" or r.role == \"managersearch\" or r.role == \"search\" or r.role == \"node\" or r.role == \"heavynode\")\n |> filter(fn: (r) => r[\"_field\"] == \"ingest_processor_stats_dot_expander_time_in_millis\")\n |> derivative(unit: 1s, nonNegative: true, columns: [\"_value\"], timeColumn: \"_time\") \n |> group(columns: [\"host\", \"role\"])\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)\n |> yield(name: \"dot.expander_time\")"},{"query":"from(bucket: \"telegraf/so_short_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"elasticsearch_clusterstats_nodes\")\n |> filter(fn: (r) => r.role == \"standalone\" or r.role == \"eval\" or r.role == \"import\" or r.role == \"managersearch\" or r.role == \"search\" or r.role == \"node\" or r.role == \"heavynode\")\n |> filter(fn: (r) => r[\"_field\"] == \"ingest_processor_stats_geoip_time_in_millis\")\n |> derivative(unit: 1s, nonNegative: true, columns: [\"_value\"], timeColumn: \"_time\") \n |> group(columns: [\"host\", \"role\"])\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)\n |> yield(name: \"geoip_time\")"},{"query":"from(bucket: \"telegraf/so_short_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"elasticsearch_clusterstats_nodes\")\n |> filter(fn: (r) => r.role == \"standalone\" or r.role == \"eval\" or r.role == \"import\" or r.role == \"managersearch\" or r.role == \"search\" or r.role == \"node\" or r.role == \"heavynode\")\n |> filter(fn: (r) => r[\"_field\"] == \"ingest_processor_stats_grok_time_in_millis\")\n |> derivative(unit: 1s, nonNegative: true, columns: [\"_value\"], timeColumn: \"_time\") \n |> group(columns: [\"host\", \"role\"])\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)\n |> yield(name: \"grok_time\")"},{"query":"from(bucket: \"telegraf/so_short_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"elasticsearch_clusterstats_nodes\")\n |> filter(fn: (r) => r.role == \"standalone\" or r.role == \"eval\" or r.role == \"import\" or r.role == \"managersearch\" or r.role == \"search\" or r.role == \"node\" or r.role == \"heavynode\")\n |> filter(fn: (r) => r[\"_field\"] == \"ingest_processor_stats_json_time_in_millis\")\n |> derivative(unit: 1s, nonNegative: true, columns: [\"_value\"], timeColumn: \"_time\") \n |> group(columns: [\"host\", \"role\"])\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)\n |> yield(name: \"json_time\")"},{"query":"from(bucket: \"telegraf/so_short_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"elasticsearch_clusterstats_nodes\")\n |> filter(fn: (r) => r.role == \"standalone\" or r.role == \"eval\" or r.role == \"import\" or r.role == \"managersearch\" or r.role == \"search\" or r.role == \"node\" or r.role == \"heavynode\")\n |> filter(fn: (r) => r[\"_field\"] == \"ingest_processor_stats_kv_time_in_millis\")\n |> derivative(unit: 1s, nonNegative: true, columns: [\"_value\"], timeColumn: \"_time\") \n |> group(columns: [\"host\", \"role\"])\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)\n |> yield(name: \"kv_time\")"},{"query":"from(bucket: \"telegraf/so_short_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"elasticsearch_clusterstats_nodes\")\n |> filter(fn: (r) => r.role == \"standalone\" or r.role == \"eval\" or r.role == \"import\" or r.role == \"managersearch\" or r.role == \"search\" or r.role == \"node\" or r.role == \"heavynode\")\n |> filter(fn: (r) => r[\"_field\"] == \"ingest_processor_stats_lowercase_time_in_millis\")\n |> derivative(unit: 1s, nonNegative: true, columns: [\"_value\"], timeColumn: \"_time\") \n |> group(columns: [\"host\", \"role\"])\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)\n |> yield(name: \"lowercase_time\")"},{"query":"from(bucket: \"telegraf/so_short_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"elasticsearch_clusterstats_nodes\")\n |> filter(fn: (r) => r.role == \"standalone\" or r.role == \"eval\" or r.role == \"import\" or r.role == \"managersearch\" or r.role == \"search\" or r.role == \"node\" or r.role == \"heavynode\")\n |> filter(fn: (r) => r[\"_field\"] == \"ingest_processor_stats_rename_time_in_millis\")\n |> derivative(unit: 1s, nonNegative: true, columns: [\"_value\"], timeColumn: \"_time\") \n |> group(columns: [\"host\", \"role\"])\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)\n |> yield(name: \"rename_time\")"},{"query":"from(bucket: \"telegraf/so_short_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"elasticsearch_clusterstats_nodes\")\n |> filter(fn: (r) => r.role == \"standalone\" or r.role == \"eval\" or r.role == \"import\" or r.role == \"managersearch\" or r.role == \"search\" or r.role == \"node\" or r.role == \"heavynode\")\n |> filter(fn: (r) => r[\"_field\"] == \"ingest_processor_stats_script_time_in_millis\")\n |> derivative(unit: 1s, nonNegative: true, columns: [\"_value\"], timeColumn: \"_time\") \n |> group(columns: [\"host\", \"role\"])\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)\n |> yield(name: \"script_time\")"},{"query":"from(bucket: \"telegraf/so_short_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"elasticsearch_clusterstats_nodes\")\n |> filter(fn: (r) => r.role == \"standalone\" or r.role == \"eval\" or r.role == \"import\" or r.role == \"managersearch\" or r.role == \"search\" or r.role == \"node\" or r.role == \"heavynode\")\n |> filter(fn: (r) => r[\"_field\"] == \"ingest_processor_stats_user_agent_time_in_millis\")\n |> derivative(unit: 1s, nonNegative: true, columns: [\"_value\"], timeColumn: \"_time\") \n |> group(columns: [\"host\", \"role\"])\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)\n |> yield(name: \"user.agent_time\")"}],"staticLegend":{"colorizeRows":true,"heightRatio":0.301556420233463,"opacity":1,"orientationThreshold":100000000,"widthRatio":1},"width":4,"widthRatio":1,"xCol":"_time","xPos":8,"yCol":"_value","yPos":10},{"axes":[{"base":"10","name":"x","scale":"linear"},{"name":"y","scale":"linear"}],"colorizeRows":true,"colors":[{"id":"sW2GqpGAsGB5Adx16jKjp","name":"Nineteen Eighty Four","type":"scale","hex":"#31C0F6"},{"id":"TsdXuXwdI5Npi9S8L4f-i","name":"Nineteen Eighty Four","type":"scale","hex":"#A500A5"},{"id":"OGL29-SUbJ6FyQb0JzbaD","name":"Nineteen Eighty Four","type":"scale","hex":"#FF7E27"}],"geom":"line","height":4,"hoverDimension":"auto","kind":"Xy","legendColorizeRows":true,"legendOpacity":1,"legendOrientationThreshold":100000000,"name":"1m Load Average","opacity":1,"orientationThreshold":100000000,"position":"overlaid","queries":[{"query":"hostFilter = (tables=<-) =>\n if v.Host != \"(All)\" then\n tables |> filter(fn: (r) => r[\"host\"] == v.Host)\n else\n tables\n\nroleFilter = (tables=<-) =>\n if v.Role != \"(All)\" then\n tables |> filter(fn: (r) => r[\"role\"] == v.Role)\n else\n tables\n\nfrom(bucket: \"telegraf/so_short_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"system\")\n |> filter(fn: (r) => r[\"_field\"] == \"load1\")\n |> hostFilter()\n |> roleFilter()\n |> group(columns: [\"_field\", \"host\", \"role\"])\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: true)\n |> yield(name: \"mean\")"},{"query":"hostFilter = (tables=<-) =>\n if v.Host != \"(All)\" then\n tables |> filter(fn: (r) => r[\"host\"] == v.Host)\n else\n tables\n\nroleFilter = (tables=<-) =>\n if v.Role != \"(All)\" then\n tables |> filter(fn: (r) => r[\"role\"] == v.Role)\n else\n tables\n\nfrom(bucket: \"telegraf/so_long_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"system\")\n |> filter(fn: (r) => r[\"_field\"] == \"load1\")\n |> hostFilter()\n |> roleFilter()\n |> group(columns: [\"_field\",\"host\", \"role\"])\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: true)\n |> yield(name: \"Trend\")"}],"staticLegend":{"colorizeRows":true,"opacity":1,"orientationThreshold":100000000,"widthRatio":1},"width":4,"widthRatio":1,"xCol":"_time","xPos":8,"yCol":"_value","yPos":14,"yTickStep":1},{"axes":[{"base":"10","name":"x","scale":"linear"},{"base":"10","name":"y","scale":"linear","suffix":" e/s"}],"colorizeRows":true,"colors":[{"id":"xflqbsX-j3iq4ry5QOntK","name":"Do Androids Dream of Electric Sheep?","type":"scale","hex":"#8F8AF4"},{"id":"5H28HcITm6QVfQsXon0vq","name":"Do Androids Dream of Electric Sheep?","type":"scale","hex":"#A51414"},{"id":"25MrINwurNBkQqeKCkMPg","name":"Do Androids Dream of Electric Sheep?","type":"scale","hex":"#F4CF31"}],"geom":"line","height":4,"heightRatio":0.301556420233463,"hoverDimension":"auto","kind":"Xy","legendColorizeRows":true,"legendOpacity":1,"legendOrientationThreshold":100000000,"name":"Logstash EPS","opacity":1,"orientationThreshold":100000000,"position":"overlaid","queries":[{"query":"from(bucket: \"telegraf/so_short_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"logstash_events\")\n |> filter(fn: (r) => r[\"_field\"] == \"in\")\n |> derivative(unit: 1s, nonNegative: true, columns: [\"_value\"], timeColumn: \"_time\") \n |> group(columns: [\"_field\", \"host\", \"pipeline\", \"role\"])\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)\n |> yield(name: \"mean\")"},{"query":"from(bucket: \"telegraf/so_short_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"logstash_events\")\n |> filter(fn: (r) => r[\"_field\"] == \"out\")\n |> derivative(unit: 1s, nonNegative: true, columns: [\"_value\"], timeColumn: \"_time\") \n |> map(fn: (r) => ({r with _value: -r._value}))\n |> group(columns: [\"_field\", \"host\", \"pipeline\", \"role\"])\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)\n |> yield(name: \"mean\")"},{"query":"from(bucket: \"telegraf/so_long_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"logstash_events\")\n |> filter(fn: (r) => r[\"_field\"] == \"in\")\n |> derivative(unit: 1s, nonNegative: true, columns: [\"_value\"], timeColumn: \"_time\") \n |> group(columns: [\"_field\", \"host\", \"pipeline\", \"role\"])\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)\n |> yield(name: \"Trend\")"},{"query":"from(bucket: \"telegraf/so_long_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"logstash_events\")\n |> filter(fn: (r) => r[\"_field\"] == \"out\")\n |> derivative(unit: 1s, nonNegative: true, columns: [\"_value\"], timeColumn: \"_time\") \n |> map(fn: (r) => ({r with _value: -r._value}))\n |> group(columns: [\"_field\", \"host\", \"pipeline\", \"role\"])\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)\n |> yield(name: \"Trend\")"}],"staticLegend":{"colorizeRows":true,"heightRatio":0.301556420233463,"opacity":1,"orientationThreshold":100000000,"widthRatio":1},"width":4,"widthRatio":1,"xCol":"_time","xPos":8,"yCol":"_value","yPos":18},{"axes":[{"base":"10","name":"x","scale":"linear"},{"base":"10","name":"y","scale":"linear"}],"colorizeRows":true,"colors":[{"id":"base","name":"laser","type":"text","hex":"#00C9FF"}],"decimalPlaces":0,"height":4,"hoverDimension":"auto","kind":"Single_Stat_Plus_Line","legendColorizeRows":true,"legendOpacity":1,"legendOrientationThreshold":100000000,"name":"Kafka Under Replicated Partitions","opacity":1,"orientationThreshold":100000000,"position":"overlaid","queries":[{"query":"from(bucket: \"telegraf/so_short_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"kafka_partition\")\n |> filter(fn: (r) => r[\"_field\"] == \"UnderReplicatedPartitions\")\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)\n |> group(columns: [\"_field\", \"partition\",\"host\", \"role\"])\n |> yield(name: \"mean\")"},{"query":"from(bucket: \"telegraf/so_long_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"kafka_partition\")\n |> filter(fn: (r) => r[\"_field\"] == \"UnderReplicatedPartitions\")\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)\n |> group(columns: [\"_field\", \"partition\",\"host\", \"role\"])\n |> yield(name: \"trend\")"}],"staticLegend":{"colorizeRows":true,"opacity":1,"orientationThreshold":100000000,"widthRatio":1},"width":4,"widthRatio":1,"xCol":"_time","xPos":8,"yCol":"_value","yPos":22},{"axes":[{"base":"10","name":"x","scale":"linear"},{"name":"y","scale":"linear","suffix":"%"}],"colorizeRows":true,"colors":[{"id":"UAehjIsi65P8u92M_3sQY","name":"Nineteen Eighty Four","type":"scale","hex":"#31C0F6"},{"id":"_SCP8Npp4NVMx2N4mfuzX","name":"Nineteen Eighty Four","type":"scale","hex":"#A500A5"},{"id":"BoMPg4R1KDp_UsRORdV3_","name":"Nineteen Eighty Four","type":"scale","hex":"#FF7E27"}],"geom":"line","height":4,"hoverDimension":"auto","kind":"Xy","legendColorizeRows":true,"legendOpacity":1,"legendOrientationThreshold":100000000,"name":"IO Wait","opacity":1,"orientationThreshold":100000000,"position":"overlaid","queries":[{"query":"hostFilter = (tables=<-) =>\n if v.Host != \"(All)\" then\n tables |> filter(fn: (r) => r[\"host\"] == v.Host)\n else\n tables\n\nroleFilter = (tables=<-) =>\n if v.Role != \"(All)\" then\n tables |> filter(fn: (r) => r[\"role\"] == v.Role)\n else\n tables\n\nfrom(bucket: \"telegraf/so_short_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"cpu\")\n |> filter(fn: (r) => r[\"cpu\"] == \"cpu-total\")\n |> filter(fn: (r) => r[\"_field\"] == \"usage_iowait\")\n |> hostFilter()\n |> roleFilter()\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)\n |> group(columns: [\"_field\", \"host\", \"role\"])\n |> yield(name: \"mean\")"},{"query":"hostFilter = (tables=<-) =>\n if v.Host != \"(All)\" then\n tables |> filter(fn: (r) => r[\"host\"] == v.Host)\n else\n tables\n\nroleFilter = (tables=<-) =>\n if v.Role != \"(All)\" then\n tables |> filter(fn: (r) => r[\"role\"] == v.Role)\n else\n tables\n\nfrom(bucket: \"telegraf/so_long_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"cpu\")\n |> filter(fn: (r) => r[\"cpu\"] == \"cpu-total\")\n |> filter(fn: (r) => r[\"_field\"] == \"usage_iowait\")\n |> hostFilter()\n |> roleFilter()\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)\n |> group(columns: [\"_field\", \"host\", \"role\"])\n |> yield(name: \"Trend\")"}],"staticLegend":{"colorizeRows":true,"opacity":1,"orientationThreshold":100000000,"widthRatio":1},"width":4,"widthRatio":1,"xCol":"_time","xPos":8,"yCol":"_value","yPos":26},{"axes":[{"base":"10","name":"x","scale":"linear"},{"name":"y","scale":"linear","suffix":"%"}],"colorizeRows":true,"colors":[{"id":"QDwChKZWuQV0BaJcEeSam","name":"Atlantis","type":"scale","hex":"#74D495"},{"id":"ThD0WTqKHltQEVlq9mo6K","name":"Atlantis","type":"scale","hex":"#3F3FBA"},{"id":"FBHYZiwDLKyQK3eRfUD-0","name":"Atlantis","type":"scale","hex":"#FF4D9E"}],"geom":"line","height":4,"hoverDimension":"auto","kind":"Xy","legendColorizeRows":true,"legendOpacity":1,"legendOrientationThreshold":100000000,"name":"Swap Usage","opacity":1,"orientationThreshold":100000000,"position":"overlaid","queries":[{"query":"hostFilter = (tables=<-) =>\n if v.Host != \"(All)\" then\n tables |> filter(fn: (r) => r[\"host\"] == v.Host)\n else\n tables\n\nroleFilter = (tables=<-) =>\n if v.Role != \"(All)\" then\n tables |> filter(fn: (r) => r[\"role\"] == v.Role)\n else\n tables\n\nfrom(bucket: \"telegraf/so_short_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"swap\")\n |> filter(fn: (r) => r[\"_field\"] == \"used_percent\")\n |> hostFilter()\n |> roleFilter()\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)\n |> group(columns: [\"_field\", \"host\", \"role\"])\n |> yield(name: \"mean\")"},{"query":"hostFilter = (tables=<-) =>\n if v.Host != \"(All)\" then\n tables |> filter(fn: (r) => r[\"host\"] == v.Host)\n else\n tables\n\nroleFilter = (tables=<-) =>\n if v.Role != \"(All)\" then\n tables |> filter(fn: (r) => r[\"role\"] == v.Role)\n else\n tables\n\nfrom(bucket: \"telegraf/so_long_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"swap\")\n |> filter(fn: (r) => r[\"_field\"] == \"used_percent\")\n |> hostFilter()\n |> roleFilter()\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)\n |> group(columns: [\"_field\", \"host\", \"role\"])\n |> yield(name: \"Trend\")"}],"staticLegend":{"colorizeRows":true,"opacity":1,"orientationThreshold":100000000,"widthRatio":1},"width":4,"widthRatio":1,"xCol":"_time","xPos":8,"yCol":"_value","yPos":30},{"axes":[{"base":"10","name":"x","scale":"linear"},{"base":"10","name":"y","scale":"linear","suffix":"b/s"}],"colorizeRows":true,"colors":[{"id":"TtgHQAXNep94KBgtu48C_","name":"Cthulhu","type":"scale","hex":"#FDC44F"},{"id":"_IuzkORho_8QXTE6vMllv","name":"Cthulhu","type":"scale","hex":"#007C76"},{"id":"bUszW_YI_9oColDbLNQ-d","name":"Cthulhu","type":"scale","hex":"#8983FF"}],"geom":"line","height":4,"heightRatio":0.18482490272373542,"hoverDimension":"auto","kind":"Xy","legendColorizeRows":true,"legendOpacity":1,"legendOrientationThreshold":100000000,"name":"Monitor Interface Drops - Inbound","opacity":1,"orientationThreshold":100000000,"position":"overlaid","queries":[{"query":"import \"join\"\n\nhostFilter = (tables=<-) =>\n if v.Host != \"(All)\" then\n tables |> filter(fn: (r) => r[\"host\"] == v.Host)\n else\n tables\n\nroleFilter = (tables=<-) =>\n if v.Role != \"(All)\" then\n tables |> filter(fn: (r) => r[\"role\"] == v.Role)\n else\n tables\n \nmanints = from(bucket: \"telegraf/so_short_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"node_config\")\n |> hostFilter()\n |> filter(fn: (r) => r[\"_field\"] == \"monint\")\n |> distinct()\n |> group(columns: [\"host\"])\n\ntraffic = from(bucket: \"telegraf/so_short_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"net\")\n |> filter(fn: (r) => r[\"_field\"] == \"drop_in\")\n |> hostFilter()\n |> roleFilter()\n |> derivative(unit: 1s, nonNegative: true, columns: [\"_value\"], timeColumn: \"_time\")\n |> map(fn: (r) => ({r with \"_value\": r._value * 8.0}))\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)\n |> group(columns: [\"host\"])\n\njoin.inner(left: traffic, right: manints,\n on: (l,r) => l.interface == r._value,\n as: (l, r) => ({l with _value: l._value, result: \"drop_in\"}))"},{"query":"import \"join\"\n\nhostFilter = (tables=<-) =>\n if v.Host != \"(All)\" then\n tables |> filter(fn: (r) => r[\"host\"] == v.Host)\n else\n tables\n\nroleFilter = (tables=<-) =>\n if v.Role != \"(All)\" then\n tables |> filter(fn: (r) => r[\"role\"] == v.Role)\n else\n tables\n \nmanints = from(bucket: \"telegraf/so_short_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"node_config\")\n |> hostFilter()\n |> filter(fn: (r) => r[\"_field\"] == \"monint\")\n |> distinct()\n |> group(columns: [\"host\"])\n\ntraffic = from(bucket: \"telegraf/so_long_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"net\")\n |> filter(fn: (r) => r[\"_field\"] == \"drop_in\")\n |> hostFilter()\n |> roleFilter()\n |> derivative(unit: 1s, nonNegative: true, columns: [\"_value\"], timeColumn: \"_time\")\n |> map(fn: (r) => ({r with \"_value\": r._value * 8.0}))\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)\n |> group(columns: [\"host\"])\n\njoin.inner(left: traffic, right: manints,\n on: (l,r) => l.interface == r._value,\n as: (l, r) => ({l with _value: l._value, result: \"Trend\"}))"}],"staticLegend":{"colorizeRows":true,"heightRatio":0.18482490272373542,"opacity":1,"orientationThreshold":100000000,"widthRatio":1},"width":4,"widthRatio":1,"xCol":"_time","xPos":8,"yCol":"_value","yPos":34},{"axes":[{"base":"10","name":"x","scale":"linear"},{"name":"y","scale":"linear","suffix":" days"}],"colorizeRows":true,"colors":[{"id":"3PVw3hQuZUzyar7Js3mMH","name":"Ectoplasm","type":"scale","hex":"#DA6FF1"},{"id":"O34ux-D8Xq_1-eeWRyYYH","name":"Ectoplasm","type":"scale","hex":"#00717A"},{"id":"P04RoKOHBdLdvfrfFbn0F","name":"Ectoplasm","type":"scale","hex":"#ACFF76"}],"geom":"line","height":4,"hoverDimension":"auto","kind":"Xy","legendColorizeRows":true,"legendOpacity":1,"legendOrientationThreshold":100000000,"name":"Stenographer PCAP Retention","opacity":1,"orientationThreshold":100000000,"position":"overlaid","queries":[{"query":"import \"join\"\n\nfrom(bucket: \"telegraf/so_short_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"pcapage\")\n |> filter(fn: (r) => r[\"_field\"] == \"seconds\")\n |> map(fn: (r) => ({ r with _value: r._value / (24.0 * 3600.0)}))\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)\n |> group(columns: [\"_field\",\"host\"])"},{"query":"import \"join\"\n\nfrom(bucket: \"telegraf/so_long_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"pcapage\")\n |> filter(fn: (r) => r[\"_field\"] == \"seconds\")\n |> map(fn: (r) => ({ r with _value: r._value / (24.0 * 3600.0)}))\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)\n |> group(columns: [\"_field\",\"host\"])\n |> yield(name: \"Trend\")"}],"staticLegend":{"colorizeRows":true,"opacity":1,"orientationThreshold":100000000,"widthRatio":1},"width":4,"widthRatio":1,"xCol":"_time","xPos":8,"yCol":"_value","yPos":46},{"colors":[{"id":"base","name":"laser","type":"text","hex":"#00C9FF"}],"decimalPlaces":1,"height":2,"kind":"Single_Stat","name":"Suricata Loss","queries":[{"query":"hostFilter = (tables=<-) =>\n if v.Host != \"(All)\" then\n tables |> filter(fn: (r) => r[\"host\"] == v.Host)\n else\n tables\n\nfrom(bucket: \"telegraf/so_short_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"suridrop\")\n |> filter(fn: (r) => r[\"_field\"] == \"drop\")\n |> map(fn: (r) => ({r with _value: r._value * 100.0}))\n |> hostFilter()\n |> group(columns: [\"host\"])\n |> last()\n |> aggregateWindow(every: v.windowPeriod, fn: mean)\n |> highestMax(n:1)"}],"staticLegend":{},"suffix":"%","width":1,"xPos":9},{"colors":[{"id":"0","name":"viridian","type":"min","hex":"#32B08C"},{"id":"5IArg2lDb8KvnphywgUXa","name":"pineapple","type":"threshold","hex":"#FFB94A","value":50},{"id":"yFhH3mtavjuAZh6cEt5lx","name":"fire","type":"threshold","hex":"#DC4E58","value":70},{"id":"1","name":"ruby","type":"max","hex":"#BF3D5E","value":100}],"decimalPlaces":0,"height":4,"kind":"Gauge","name":"Swap Usage","queries":[{"query":"hostFilter = (tables=<-) =>\n if v.Host != \"(All)\" then\n tables |> filter(fn: (r) => r[\"host\"] == v.Host)\n else\n tables\n\nfrom(bucket: \"telegraf/so_short_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"swap\")\n |> filter(fn: (r) => r[\"_field\"] == \"used_percent\")\n |> hostFilter()\n |> group(columns: [\"host\"])\n |> last()\n |> highestMax(n: 1)\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)\n |> yield(name: \"mean\")"}],"staticLegend":{},"suffix":"%","tickSuffix":"%","width":3,"xPos":9,"yPos":2},{"colors":[{"id":"base","name":"white","type":"text","hex":"#ffffff"}],"fieldOptions":[{"displayName":"Host","fieldName":"host","visible":true},{"displayName":"Name","fieldName":"container_name","visible":true},{"displayName":"Status","fieldName":"container_status","visible":true},{"displayName":"OOM Killed","fieldName":"_value","visible":true},{"displayName":"_start","fieldName":"_start","visible":true},{"displayName":"_stop","fieldName":"_stop","visible":true},{"displayName":"_time","fieldName":"_time","visible":true},{"displayName":"_field","fieldName":"_field","visible":true},{"displayName":"_measurement","fieldName":"_measurement","visible":true},{"displayName":"engine_host","fieldName":"engine_host","visible":true},{"displayName":"role","fieldName":"role","visible":true},{"displayName":"server_version","fieldName":"server_version","visible":true},{"displayName":"container_image","fieldName":"container_image","visible":true},{"displayName":"container_version","fieldName":"container_version","visible":true},{"displayName":"description","fieldName":"description","visible":true},{"displayName":"maintainer","fieldName":"maintainer","visible":true},{"displayName":"io.k8s.description","fieldName":"io.k8s.description","visible":true},{"displayName":"io.k8s.display-name","fieldName":"io.k8s.display-name","visible":true},{"displayName":"license","fieldName":"license","visible":true},{"displayName":"name","fieldName":"name","visible":true},{"displayName":"org.label-schema.build-date","fieldName":"org.label-schema.build-date","visible":true},{"displayName":"org.label-schema.license","fieldName":"org.label-schema.license","visible":true},{"displayName":"org.label-schema.name","fieldName":"org.label-schema.name","visible":true},{"displayName":"org.label-schema.schema-version","fieldName":"org.label-schema.schema-version","visible":true},{"displayName":"org.label-schema.url","fieldName":"org.label-schema.url","visible":true},{"displayName":"org.label-schema.vcs-ref","fieldName":"org.label-schema.vcs-ref","visible":true},{"displayName":"org.label-schema.vcs-url","fieldName":"org.label-schema.vcs-url","visible":true},{"displayName":"org.label-schema.vendor","fieldName":"org.label-schema.vendor","visible":true},{"displayName":"org.label-schema.version","fieldName":"org.label-schema.version","visible":true},{"displayName":"org.opencontainers.image.created","fieldName":"org.opencontainers.image.created","visible":true},{"displayName":"org.opencontainers.image.licenses","fieldName":"org.opencontainers.image.licenses","visible":true},{"displayName":"org.opencontainers.image.title","fieldName":"org.opencontainers.image.title","visible":true},{"displayName":"org.opencontainers.image.vendor","fieldName":"org.opencontainers.image.vendor","visible":true},{"displayName":"release","fieldName":"release","visible":true},{"displayName":"summary","fieldName":"summary","visible":true},{"displayName":"url","fieldName":"url","visible":true},{"displayName":"vendor","fieldName":"vendor","visible":true},{"displayName":"version","fieldName":"version","visible":true},{"displayName":"org.label-schema.usage","fieldName":"org.label-schema.usage","visible":true},{"displayName":"org.opencontainers.image.documentation","fieldName":"org.opencontainers.image.documentation","visible":true},{"displayName":"org.opencontainers.image.revision","fieldName":"org.opencontainers.image.revision","visible":true},{"displayName":"org.opencontainers.image.source","fieldName":"org.opencontainers.image.source","visible":true},{"displayName":"org.opencontainers.image.url","fieldName":"org.opencontainers.image.url","visible":true},{"displayName":"org.opencontainers.image.version","fieldName":"org.opencontainers.image.version","visible":true},{"displayName":"org.opencontainers.image.description","fieldName":"org.opencontainers.image.description","visible":true}],"height":4,"kind":"Table","name":"Most Recent Container Events","queries":[{"query":"hostFilter = (tables=<-) =>\n if v.Host != \"(All)\" then\n tables |> filter(fn: (r) => r[\"host\"] == v.Host)\n else\n tables\n\nroleFilter = (tables=<-) =>\n if v.Role != \"(All)\" then\n tables |> filter(fn: (r) => r[\"role\"] == v.Role)\n else\n tables\n \nfrom(bucket: \"telegraf/so_short_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"docker_container_status\")\n |> filter(fn: (r) => r[\"_field\"] == \"oomkilled\")\n |> filter(fn: (r) => r[\"container_status\"] != \"running\")\n |> hostFilter()\n |> roleFilter()\n |> group(columns: [\"container_name\", \"host\"])\n |> last()\n |> group()\n |> keep(columns: [\"_value\", \"container_name\", \"host\", \"container_status\"])"}],"staticLegend":{},"tableOptions":{"sortBy":"container_name","verticalTimeAxis":true},"timeFormat":"YYYY-MM-DD HH:mm:ss","width":3,"xPos":9,"yPos":6},{"axes":[{"base":"10","name":"x","scale":"linear"},{"name":"y","scale":"linear","suffix":"%"}],"colorizeRows":true,"colors":[{"id":"TtgHQAXNep94KBgtu48C_","name":"Cthulhu","type":"scale","hex":"#FDC44F"},{"id":"_IuzkORho_8QXTE6vMllv","name":"Cthulhu","type":"scale","hex":"#007C76"},{"id":"bUszW_YI_9oColDbLNQ-d","name":"Cthulhu","type":"scale","hex":"#8983FF"}],"geom":"line","height":4,"hoverDimension":"auto","kind":"Xy","legendColorizeRows":true,"legendOpacity":1,"legendOrientationThreshold":100000000,"name":"Zeek Capture Loss","opacity":1,"orientationThreshold":100000000,"position":"overlaid","queries":[{"query":"hostFilter = (tables=<-) =>\n if v.Host != \"(All)\" then\n tables |> filter(fn: (r) => r[\"host\"] == v.Host)\n else\n tables\n\nroleFilter = (tables=<-) =>\n if v.Role != \"(All)\" then\n tables |> filter(fn: (r) => r[\"role\"] == v.Role)\n else\n tables\n\nfrom(bucket: \"telegraf/so_short_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"zeekcaptureloss\")\n |> filter(fn: (r) => r[\"_field\"] == \"loss\")\n |> hostFilter()\n |> roleFilter()\n |> group(columns: [\"_field\", \"host\", \"role\"])\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)\n |> yield(name: \"mean\")"},{"query":"hostFilter = (tables=<-) =>\n if v.Host != \"(All)\" then\n tables |> filter(fn: (r) => r[\"host\"] == v.Host)\n else\n tables\n\nroleFilter = (tables=<-) =>\n if v.Role != \"(All)\" then\n tables |> filter(fn: (r) => r[\"role\"] == v.Role)\n else\n tables\n\nfrom(bucket: \"telegraf/so_long_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"zeekcaptureloss\")\n |> filter(fn: (r) => r[\"_field\"] == \"loss\")\n |> hostFilter()\n |> roleFilter()\n |> group(columns: [\"_field\", \"host\", \"role\"])\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)\n |> yield(name: \"Trend\")"}],"staticLegend":{"colorizeRows":true,"opacity":1,"orientationThreshold":100000000,"widthRatio":1},"width":3,"widthRatio":1,"xCol":"_time","xPos":9,"yCol":"_value","yPos":42},{"colors":[{"id":"base","name":"laser","type":"text","hex":"#00C9FF"}],"decimalPlaces":1,"height":2,"kind":"Single_Stat","name":"Stenographer Loss","queries":[{"query":"hostFilter = (tables=<-) =>\n if v.Host != \"(All)\" then\n tables |> filter(fn: (r) => r[\"host\"] == v.Host)\n else\n tables\n\nfrom(bucket: \"telegraf/so_short_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"stenodrop\")\n |> filter(fn: (r) => r[\"_field\"] == \"drop\")\n |> map(fn: (r) => ({r with _value: r._value * 100.0}))\n |> hostFilter()\n |> group(columns: [\"host\"])\n |> last()\n |> aggregateWindow(every: v.windowPeriod, fn: mean)\n |> highestMax(n:1)"}],"staticLegend":{},"suffix":"%","width":1,"xPos":10},{"colors":[{"id":"base","name":"laser","type":"text","hex":"#00C9FF"}],"decimalPlaces":1,"height":2,"kind":"Single_Stat","name":"PCAP Retention","queries":[{"query":"hostFilter = (tables=<-) =>\n if v.Host != \"(All)\" then\n tables |> filter(fn: (r) => r[\"host\"] == v.Host)\n else\n tables\n \nfrom(bucket: \"telegraf/so_short_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"pcapage\")\n |> filter(fn: (r) => r[\"_field\"] == \"seconds\")\n |> hostFilter()\n |> map(fn: (r) => ({r with _value: r._value / (24.0 * 60.0 * 60.0)}))\n |> group(columns: [\"host\"])\n |> last()\n |> highestMax(n:1)"}],"staticLegend":{},"suffix":" days","width":1,"xPos":11}],"description":"Visualize the Security Onion grid performance metrics and alarm statuses.","name":"Security Onion Performance"}}] \ No newline at end of file +[{"apiVersion":"influxdata.com/v2alpha1","kind":"Dashboard","metadata":{"name":"vivid-wilson-002001"},"spec":{"charts":[{"colors":[{"id":"base","name":"laser","type":"text","hex":"#00C9FF"}],"decimalPlaces":1,"height":2,"kind":"Single_Stat","name":"Uptime","queries":[{"query":"hostFilter = (tables=<-) =>\n if v.Host != \"(All)\" then\n tables |> filter(fn: (r) => r[\"host\"] == v.Host)\n else\n tables\n\nfrom(bucket: \"telegraf/so_short_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"system\")\n |> filter(fn: (r) => r[\"_field\"] == \"uptime\")\n |> hostFilter()\n |> map(fn: (r) => ({r with _value: r._value / (24 * 60 * 60)}))\n |> group(columns: [\"host\"])\n |> last()\n |> lowestMin(n:1)"}],"staticLegend":{},"suffix":" days","width":1},{"colors":[{"id":"base","name":"laser","type":"text","hex":"#00C9FF"},{"id":"z83MTSufTrlrCoEPiBXda","name":"ruby","type":"text","hex":"#BF3D5E","value":1}],"decimalPlaces":0,"height":2,"kind":"Single_Stat","name":"Critical Alarms","queries":[{"query":"from(bucket: \"_monitoring\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"statuses\")\n |> filter(fn: (r) => r[\"_field\"] == \"_message\")\n |> group(columns: [\"_check_id\"])\n |> sort(columns: [\"_time\"])\n |> last()\n |> group()\n |> filter(fn: (r) => r[\"_level\"] == \"crit\")\n |> count()"}],"staticLegend":{},"suffix":" ","width":1,"yPos":2},{"colors":[{"id":"base","name":"rainforest","type":"text","hex":"#4ED8A0"},{"id":"QCTYWuGuHkikYFsZSKMzQ","name":"rainforest","type":"text","hex":"#4ED8A0"},{"id":"QdpMyTRBb0LJ56-P5wfAW","name":"laser","type":"text","hex":"#00C9FF","value":1},{"id":"VQGwCoMrxZyP8asiOW5Cq","name":"tiger","type":"text","hex":"#F48D38","value":2},{"id":"zSO9QkesSIxrU_ntCBx2i","name":"ruby","type":"text","hex":"#BF3D5E","value":3}],"fieldOptions":[{"fieldName":"_time","visible":true},{"displayName":"Alarm","fieldName":"_check_name","visible":true},{"displayName":"Severity","fieldName":"_value","visible":true},{"displayName":"Status","fieldName":"_level","visible":true}],"height":6,"kind":"Table","name":"Alarm Status","queries":[{"query":"from(bucket: \"_monitoring\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"statuses\")\n |> filter(fn: (r) => r[\"_field\"] == \"_message\")\n |> drop(columns: [\"_value\"])\n |> duplicate(column: \"_level\", as: \"_value\")\n |> map(fn: (r) => ({ r with _value: if r._value == \"ok\" then 0 else if r._value == \"info\" then 1 else if r._value == \"warn\" then 2 else 3 }))\n |> group(columns: [\"_check_id\"])\n |> sort(columns: [\"_time\"])\n |> last()\n |> group()\n |> keep(columns: [\"_check_name\",\"_level\",\"_value\"])"}],"staticLegend":{},"tableOptions":{"sortBy":"_check_name","verticalTimeAxis":true},"timeFormat":"YYYY-MM-DD HH:mm:ss","width":3,"yPos":4},{"axes":[{"base":"10","name":"x","scale":"linear"},{"base":"10","name":"y","scale":"linear","suffix":"B"}],"colorizeRows":true,"colors":[{"id":"3PVw3hQuZUzyar7Js3mMH","name":"Ectoplasm","type":"scale","hex":"#DA6FF1"},{"id":"O34ux-D8Xq_1-eeWRyYYH","name":"Ectoplasm","type":"scale","hex":"#00717A"},{"id":"P04RoKOHBdLdvfrfFbn0F","name":"Ectoplasm","type":"scale","hex":"#ACFF76"}],"geom":"line","height":4,"hoverDimension":"auto","kind":"Xy","legendColorizeRows":true,"legendOpacity":1,"legendOrientationThreshold":100000000,"name":"Elasticsearch Storage Size","opacity":1,"orientationThreshold":100000000,"position":"overlaid","queries":[{"query":"import \"join\"\n\nhostFilter = (tables=<-) =>\n if v.Host != \"(All)\" then\n tables |> filter(fn: (r) => r[\"host\"] == v.Host)\n else\n tables\n \nfrom(bucket: \"telegraf/so_short_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"elasticsearch_indices\")\n |> filter(fn: (r) => r[\"_field\"] == \"store_size_in_bytes\")\n |> filter(fn: (r) => r[\"host\"] == r[\"node_name\"])\n |> hostFilter()\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)\n |> group(columns: [\"_field\",\"host\"])\n |> yield(name: \"mean\")"},{"query":"import \"join\"\n\nhostFilter = (tables=<-) =>\n if v.Host != \"(All)\" then\n tables |> filter(fn: (r) => r[\"host\"] == v.Host)\n else\n tables\n\nfrom(bucket: \"telegraf/so_long_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"elasticsearch_indices\")\n |> filter(fn: (r) => r[\"_field\"] == \"store_size_in_bytes\")\n |> filter(fn: (r) => r[\"host\"] == r[\"node_name\"])\n |> hostFilter()\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)\n |> group(columns: [\"_field\",\"host\"])\n |> yield(name: \"Trend\")"}],"staticLegend":{"colorizeRows":true,"opacity":1,"orientationThreshold":100000000,"widthRatio":1},"width":4,"widthRatio":1,"xCol":"_time","yCol":"_value","yPos":10},{"axes":[{"base":"10","name":"x","scale":"linear"},{"base":"10","name":"y","scale":"linear","suffix":"B"}],"colorizeRows":true,"colors":[{"id":"3PVw3hQuZUzyar7Js3mMH","name":"Ectoplasm","type":"scale","hex":"#DA6FF1"},{"id":"O34ux-D8Xq_1-eeWRyYYH","name":"Ectoplasm","type":"scale","hex":"#00717A"},{"id":"P04RoKOHBdLdvfrfFbn0F","name":"Ectoplasm","type":"scale","hex":"#ACFF76"}],"geom":"line","height":4,"hoverDimension":"auto","kind":"Xy","legendColorizeRows":true,"legendOpacity":1,"legendOrientationThreshold":100000000,"name":"InfluxDB Size","opacity":1,"orientationThreshold":100000000,"position":"overlaid","queries":[{"query":"import \"join\"\n\nfrom(bucket: \"telegraf/so_short_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"influxsize\")\n |> filter(fn: (r) => r[\"_field\"] == \"kbytes\")\n |> map(fn: (r) => ({r with \"_value\": r._value * 1000.0}))\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)\n |> group(columns: [\"_field\",\"host\"])\n |> yield(name: \"mean\")"},{"query":"import \"join\"\n\nfrom(bucket: \"telegraf/so_long_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"influxsize\")\n |> filter(fn: (r) => r[\"_field\"] == \"kbytes\")\n |> map(fn: (r) => ({r with \"_value\": r._value * 1000.0}))\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)\n |> group(columns: [\"_field\",\"host\"])\n |> yield(name: \"Trend\")"}],"staticLegend":{"colorizeRows":true,"opacity":1,"orientationThreshold":100000000,"widthRatio":1},"width":4,"widthRatio":1,"xCol":"_time","yCol":"_value","yPos":14},{"axes":[{"base":"10","name":"x","scale":"linear"},{"name":"y","scale":"linear","suffix":" days"}],"colorizeRows":true,"colors":[{"id":"sW2GqpGAsGB5Adx16jKjp","name":"Nineteen Eighty Four","type":"scale","hex":"#31C0F6"},{"id":"TsdXuXwdI5Npi9S8L4f-i","name":"Nineteen Eighty Four","type":"scale","hex":"#A500A5"},{"id":"OGL29-SUbJ6FyQb0JzbaD","name":"Nineteen Eighty Four","type":"scale","hex":"#FF7E27"}],"geom":"line","height":4,"hoverDimension":"auto","kind":"Xy","legendColorizeRows":true,"legendOpacity":1,"legendOrientationThreshold":100000000,"name":"System Uptime","opacity":1,"orientationThreshold":100000000,"position":"overlaid","queries":[{"query":"hostFilter = (tables=<-) =>\n if v.Host != \"(All)\" then\n tables |> filter(fn: (r) => r[\"host\"] == v.Host)\n else\n tables\n\nroleFilter = (tables=<-) =>\n if v.Role != \"(All)\" then\n tables |> filter(fn: (r) => r[\"role\"] == v.Role)\n else\n tables\n\nfrom(bucket: \"telegraf/so_short_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"system\")\n |> filter(fn: (r) => r[\"_field\"] == \"uptime\")\n |> hostFilter()\n |> roleFilter()\n |> group(columns: [\"host\", \"role\"])\n |> map(fn: (r) => ({r with _value: float(v: r._value) / float(v: 24 * 60 * 60)}))\n |> yield(name: \"last\")"},{"query":"hostFilter = (tables=<-) =>\n if v.Host != \"(All)\" then\n tables |> filter(fn: (r) => r[\"host\"] == v.Host)\n else\n tables\n\nroleFilter = (tables=<-) =>\n if v.Role != \"(All)\" then\n tables |> filter(fn: (r) => r[\"role\"] == v.Role)\n else\n tables\n\nfrom(bucket: \"telegraf/so_long_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"system\")\n |> filter(fn: (r) => r[\"_field\"] == \"uptime\")\n |> hostFilter()\n |> roleFilter()\n |> group(columns: [\"host\", \"role\"])\n |> map(fn: (r) => ({r with _value: float(v: r._value) / float(v: 24 * 60 * 60)}))\n |> yield(name: \"Trend\")"}],"shade":true,"staticLegend":{"colorizeRows":true,"opacity":1,"orientationThreshold":100000000,"widthRatio":1},"width":4,"widthRatio":1,"xCol":"_time","yCol":"_value","yPos":18},{"axes":[{"base":"10","name":"x","scale":"linear"},{"base":"10","name":"y","scale":"linear"}],"colorizeRows":true,"colors":[{"id":"lQ75rvTyd2Lq5pZjzy6LB","name":"Nineteen Eighty Four","type":"scale","hex":"#31C0F6"},{"id":"KLfpRZtiEnU2GxjPtrrzQ","name":"Nineteen Eighty Four","type":"scale","hex":"#A500A5"},{"id":"1kLynwKxvJ3B5IeJnrBqp","name":"Nineteen Eighty Four","type":"scale","hex":"#FF7E27"}],"geom":"line","height":4,"hoverDimension":"auto","kind":"Xy","legendColorizeRows":true,"legendOpacity":1,"legendOrientationThreshold":100000000,"name":"Kafka EPS","opacity":1,"orientationThreshold":100000000,"position":"overlaid","queries":[{"query":"controllerHosts = from(bucket: \"telegraf/so_short_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"kafka_controller\")\n |> filter(fn: (r) => r[\"_field\"] == \"ActiveControllerCount.Value\")\n |> filter(fn: (r) => r[\"_value\"] == 1)\n |> keep(columns: [\"host\"])\n |> distinct(column: \"host\")\n |> map(fn: (r) => ({r with _value: r.host}))\n |> keep(columns: [\"_value\"])\n\ncontrollerHostNames = controllerHosts |> findColumn(fn: (key) => true, column: \"_value\")\n\nfrom(bucket: \"telegraf/so_short_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"kafka_topics\")\n |> filter(fn: (r) => r[\"_field\"] == \"MessagesInPerSec.Count\")\n |> filter(fn: (r) => not contains(value: r.host, set: controllerHostNames))\n |> derivative(unit: 1s, nonNegative: true)\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)\n |> group(columns: [\"_field\", \"host\", \"role\"])\n |> yield(name: \"mean\")"},{"query":"controllerHosts = from(bucket: \"telegraf/so_short_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"kafka_controller\")\n |> filter(fn: (r) => r[\"_field\"] == \"ActiveControllerCount.Value\")\n |> filter(fn: (r) => r[\"_value\"] == 1)\n |> keep(columns: [\"host\"])\n |> distinct(column: \"host\")\n |> map(fn: (r) => ({r with _value: r.host}))\n |> keep(columns: [\"_value\"])\n\ncontrollerHostNames = controllerHosts |> findColumn(fn: (key) => true, column: \"_value\")\n\nfrom(bucket: \"telegraf/so_long_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"kafka_topics\")\n |> filter(fn: (r) => r[\"_field\"] == \"MessagesInPerSec.Count\")\n |> filter(fn: (r) => not contains(value: r.host, set: controllerHostNames))\n |> derivative(unit: 1s, nonNegative: true)\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)\n |> group(columns: [\"_field\", \"host\", \"role\"])\n |> yield(name: \"trend\")"}],"staticLegend":{"colorizeRows":true,"opacity":1,"orientationThreshold":100000000,"widthRatio":1},"width":4,"widthRatio":1,"xCol":"_time","yCol":"_value","yPos":22},{"axes":[{"base":"10","name":"x","scale":"linear"},{"name":"y","scale":"linear","suffix":"%"}],"colorizeRows":true,"colors":[{"id":"sW2GqpGAsGB5Adx16jKjp","name":"Nineteen Eighty Four","type":"scale","hex":"#31C0F6"},{"id":"TsdXuXwdI5Npi9S8L4f-i","name":"Nineteen Eighty Four","type":"scale","hex":"#A500A5"},{"id":"OGL29-SUbJ6FyQb0JzbaD","name":"Nineteen Eighty Four","type":"scale","hex":"#FF7E27"}],"geom":"line","height":4,"hoverDimension":"auto","kind":"Xy","legendColorizeRows":true,"legendOpacity":1,"legendOrientationThreshold":100000000,"name":"System CPU Usage","opacity":1,"orientationThreshold":100000000,"position":"overlaid","queries":[{"query":"hostFilter = (tables=<-) =>\n if v.Host != \"(All)\" then\n tables |> filter(fn: (r) => r[\"host\"] == v.Host)\n else\n tables\n\nroleFilter = (tables=<-) =>\n if v.Role != \"(All)\" then\n tables |> filter(fn: (r) => r[\"role\"] == v.Role)\n else\n tables\n\nfrom(bucket: \"telegraf/so_short_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"cpu\")\n |> filter(fn: (r) => r[\"_field\"] == \"usage_idle\")\n |> filter(fn: (r) => r[\"cpu\"] == \"cpu-total\")\n |> hostFilter()\n |> roleFilter()\n |> group(columns: [\"_field\", \"host\", \"role\"])\n |> map(fn: (r) => ({r with _value: r._value * -1.0 + 100.0}))\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: true)\n |> yield(name: \"mean\")"},{"query":"hostFilter = (tables=<-) =>\n if v.Host != \"(All)\" then\n tables |> filter(fn: (r) => r[\"host\"] == v.Host)\n else\n tables\n\nroleFilter = (tables=<-) =>\n if v.Role != \"(All)\" then\n tables |> filter(fn: (r) => r[\"role\"] == v.Role)\n else\n tables\n\nfrom(bucket: \"telegraf/so_long_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"cpu\")\n |> filter(fn: (r) => r[\"_field\"] == \"usage_idle\")\n |> filter(fn: (r) => r[\"cpu\"] == \"cpu-total\")\n |> hostFilter()\n |> roleFilter()\n |> group(columns: [\"_field\",\"host\", \"role\"])\n |> map(fn: (r) => ({r with _value: r._value * -1.0 + 100.0}))\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: true)\n |> yield(name: \"Trend\")"}],"staticLegend":{"colorizeRows":true,"opacity":1,"orientationThreshold":100000000,"widthRatio":1},"width":4,"widthRatio":1,"xCol":"_time","yCol":"_value","yPos":26},{"axes":[{"base":"10","name":"x","scale":"linear"},{"name":"y","scale":"linear","suffix":"%"}],"colorizeRows":true,"colors":[{"id":"QDwChKZWuQV0BaJcEeSam","name":"Atlantis","type":"scale","hex":"#74D495"},{"id":"ThD0WTqKHltQEVlq9mo6K","name":"Atlantis","type":"scale","hex":"#3F3FBA"},{"id":"FBHYZiwDLKyQK3eRfUD-0","name":"Atlantis","type":"scale","hex":"#FF4D9E"}],"geom":"line","height":4,"hoverDimension":"auto","kind":"Xy","legendColorizeRows":true,"legendOpacity":1,"legendOrientationThreshold":100000000,"name":"System Memory Usage","opacity":1,"orientationThreshold":100000000,"position":"overlaid","queries":[{"query":"hostFilter = (tables=<-) =>\n if v.Host != \"(All)\" then\n tables |> filter(fn: (r) => r[\"host\"] == v.Host)\n else\n tables\n\nroleFilter = (tables=<-) =>\n if v.Role != \"(All)\" then\n tables |> filter(fn: (r) => r[\"role\"] == v.Role)\n else\n tables\n\nfrom(bucket: \"telegraf/so_short_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"mem\")\n |> filter(fn: (r) => r[\"_field\"] == \"used_percent\")\n |> hostFilter()\n |> roleFilter()\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)\n |> group(columns: [\"_field\", \"host\", \"role\"])\n |> yield(name: \"mean\")"},{"query":"hostFilter = (tables=<-) =>\n if v.Host != \"(All)\" then\n tables |> filter(fn: (r) => r[\"host\"] == v.Host)\n else\n tables\n\nroleFilter = (tables=<-) =>\n if v.Role != \"(All)\" then\n tables |> filter(fn: (r) => r[\"role\"] == v.Role)\n else\n tables\n\nfrom(bucket: \"telegraf/so_long_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"mem\")\n |> filter(fn: (r) => r[\"_field\"] == \"used_percent\")\n |> hostFilter()\n |> roleFilter()\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)\n |> group(columns: [\"_field\", \"host\", \"role\"])\n |> yield(name: \"Trend\")"}],"staticLegend":{"colorizeRows":true,"opacity":1,"orientationThreshold":100000000,"widthRatio":1},"width":4,"widthRatio":1,"xCol":"_time","yCol":"_value","yPos":30},{"axes":[{"base":"10","name":"x","scale":"linear"},{"base":"10","name":"y","scale":"linear","suffix":"b/s"}],"colorizeRows":true,"colors":[{"id":"TtgHQAXNep94KBgtu48C_","name":"Cthulhu","type":"scale","hex":"#FDC44F"},{"id":"_IuzkORho_8QXTE6vMllv","name":"Cthulhu","type":"scale","hex":"#007C76"},{"id":"bUszW_YI_9oColDbLNQ-d","name":"Cthulhu","type":"scale","hex":"#8983FF"}],"geom":"line","height":4,"heightRatio":0.18482490272373542,"hoverDimension":"auto","kind":"Xy","legendColorizeRows":true,"legendOpacity":1,"legendOrientationThreshold":100000000,"name":"Monitor Interface Traffic - Inbound","opacity":1,"orientationThreshold":100000000,"position":"overlaid","queries":[{"query":"import \"join\"\n\nhostFilter = (tables=<-) =>\n if v.Host != \"(All)\" then\n tables |> filter(fn: (r) => r[\"host\"] == v.Host)\n else\n tables\n\nroleFilter = (tables=<-) =>\n if v.Role != \"(All)\" then\n tables |> filter(fn: (r) => r[\"role\"] == v.Role)\n else\n tables\n \nmanints = from(bucket: \"telegraf/so_short_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"node_config\")\n |> hostFilter()\n |> filter(fn: (r) => r[\"_field\"] == \"monint\")\n |> distinct()\n |> group(columns: [\"host\"])\n\ntraffic = from(bucket: \"telegraf/so_short_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"net\")\n |> filter(fn: (r) => r[\"_field\"] == \"bytes_recv\")\n |> hostFilter()\n |> roleFilter()\n |> derivative(unit: 1s, nonNegative: true, columns: [\"_value\"], timeColumn: \"_time\")\n |> map(fn: (r) => ({r with \"_value\": r._value * 8.0}))\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)\n |> group(columns: [\"host\"])\n\njoin.inner(left: traffic, right: manints,\n on: (l,r) => l.interface == r._value,\n as: (l, r) => ({l with _value: l._value, result: \"bytes_recv\"}))"},{"query":"import \"join\"\n\nhostFilter = (tables=<-) =>\n if v.Host != \"(All)\" then\n tables |> filter(fn: (r) => r[\"host\"] == v.Host)\n else\n tables\n\nroleFilter = (tables=<-) =>\n if v.Role != \"(All)\" then\n tables |> filter(fn: (r) => r[\"role\"] == v.Role)\n else\n tables\n \nmanints = from(bucket: \"telegraf/so_short_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"node_config\")\n |> hostFilter()\n |> filter(fn: (r) => r[\"_field\"] == \"monint\")\n |> distinct()\n |> group(columns: [\"host\"])\n\ntraffic = from(bucket: \"telegraf/so_long_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"net\")\n |> filter(fn: (r) => r[\"_field\"] == \"bytes_recv\")\n |> hostFilter()\n |> roleFilter()\n |> derivative(unit: 1s, nonNegative: true, columns: [\"_value\"], timeColumn: \"_time\")\n |> map(fn: (r) => ({r with \"_value\": r._value * 8.0}))\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)\n |> group(columns: [\"host\"])\n\njoin.inner(left: traffic, right: manints,\n on: (l,r) => l.interface == r._value,\n as: (l, r) => ({l with _value: l._value, result: \"Trend\"}))"}],"staticLegend":{"colorizeRows":true,"heightRatio":0.18482490272373542,"opacity":1,"orientationThreshold":100000000,"widthRatio":1},"width":4,"widthRatio":1,"xCol":"_time","yCol":"_value","yPos":34},{"axes":[{"base":"10","name":"x","scale":"linear"},{"base":"10","name":"y","scale":"linear","suffix":"b/s"}],"colorizeRows":true,"colors":[{"id":"TtgHQAXNep94KBgtu48C_","name":"Cthulhu","type":"scale","hex":"#FDC44F"},{"id":"_IuzkORho_8QXTE6vMllv","name":"Cthulhu","type":"scale","hex":"#007C76"},{"id":"bUszW_YI_9oColDbLNQ-d","name":"Cthulhu","type":"scale","hex":"#8983FF"}],"geom":"line","height":4,"heightRatio":0.18482490272373542,"hoverDimension":"auto","kind":"Xy","legendColorizeRows":true,"legendOpacity":1,"legendOrientationThreshold":100000000,"name":"Management Interface Traffic - Inbound","opacity":1,"orientationThreshold":100000000,"position":"overlaid","queries":[{"query":"import \"join\"\n\nhostFilter = (tables=<-) =>\n if v.Host != \"(All)\" then\n tables |> filter(fn: (r) => r[\"host\"] == v.Host)\n else\n tables\n\nroleFilter = (tables=<-) =>\n if v.Role != \"(All)\" then\n tables |> filter(fn: (r) => r[\"role\"] == v.Role)\n else\n tables\n\nmanints = from(bucket: \"telegraf/so_short_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"node_config\")\n |> hostFilter()\n |> filter(fn: (r) => r[\"_field\"] == \"manint\")\n |> distinct()\n |> group(columns: [\"host\"])\n\ntraffic = from(bucket: \"telegraf/so_short_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"net\")\n |> filter(fn: (r) => r[\"_field\"] == \"bytes_recv\")\n |> hostFilter()\n |> roleFilter()\n |> derivative(unit: 1s, nonNegative: true, columns: [\"_value\"], timeColumn: \"_time\")\n |> map(fn: (r) => ({r with \"_value\": r._value * 8.0}))\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)\n |> group(columns: [\"host\"])\n\njoin.inner(left: traffic, right: manints,\n on: (l,r) => l.interface == r._value,\n as: (l, r) => ({l with _value: l._value, result: \"bytes_recv\"}))"},{"query":"import \"join\"\n\nhostFilter = (tables=<-) =>\n if v.Host != \"(All)\" then\n tables |> filter(fn: (r) => r[\"host\"] == v.Host)\n else\n tables\n\nroleFilter = (tables=<-) =>\n if v.Role != \"(All)\" then\n tables |> filter(fn: (r) => r[\"role\"] == v.Role)\n else\n tables\n\nmanints = from(bucket: \"telegraf/so_short_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"node_config\")\n |> hostFilter()\n |> filter(fn: (r) => r[\"_field\"] == \"manint\")\n |> distinct()\n |> group(columns: [\"host\"])\n\ntraffic = from(bucket: \"telegraf/so_long_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"net\")\n |> filter(fn: (r) => r[\"_field\"] == \"bytes_recv\")\n |> hostFilter()\n |> roleFilter()\n |> derivative(unit: 1s, nonNegative: true, columns: [\"_value\"], timeColumn: \"_time\")\n |> map(fn: (r) => ({r with \"_value\": r._value * 8.0}))\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)\n |> group(columns: [\"host\"])\n\njoin.inner(left: traffic, right: manints,\n on: (l,r) => l.interface == r._value,\n as: (l, r) => ({l with _value: l._value, result: \"Trend\"}))"}],"staticLegend":{"colorizeRows":true,"heightRatio":0.18482490272373542,"opacity":1,"orientationThreshold":100000000,"widthRatio":1},"width":6,"widthRatio":1,"xCol":"_time","yCol":"_value","yPos":38},{"axes":[{"base":"10","name":"x","scale":"linear"},{"name":"y","scale":"linear","suffix":"%"}],"colorizeRows":true,"colors":[{"id":"TtgHQAXNep94KBgtu48C_","name":"Cthulhu","type":"scale","hex":"#FDC44F"},{"id":"_IuzkORho_8QXTE6vMllv","name":"Cthulhu","type":"scale","hex":"#007C76"},{"id":"bUszW_YI_9oColDbLNQ-d","name":"Cthulhu","type":"scale","hex":"#8983FF"}],"geom":"line","height":4,"hoverDimension":"auto","kind":"Xy","legendColorizeRows":true,"legendOpacity":1,"legendOrientationThreshold":100000000,"name":"Stenographer Packet Loss","opacity":1,"orientationThreshold":100000000,"position":"overlaid","queries":[{"query":"hostFilter = (tables=<-) =>\n if v.Host != \"(All)\" then\n tables |> filter(fn: (r) => r[\"host\"] == v.Host)\n else\n tables\n\nroleFilter = (tables=<-) =>\n if v.Role != \"(All)\" then\n tables |> filter(fn: (r) => r[\"role\"] == v.Role)\n else\n tables\n\nfrom(bucket: \"telegraf/so_short_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"stenodrop\")\n |> filter(fn: (r) => r[\"_field\"] == \"drop\")\n |> hostFilter()\n |> roleFilter()\n |> group(columns: [\"_field\", \"host\", \"role\"])\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)\n |> yield(name: \"mean\")"},{"query":"hostFilter = (tables=<-) =>\n if v.Host != \"(All)\" then\n tables |> filter(fn: (r) => r[\"host\"] == v.Host)\n else\n tables\n\nroleFilter = (tables=<-) =>\n if v.Role != \"(All)\" then\n tables |> filter(fn: (r) => r[\"role\"] == v.Role)\n else\n tables\n\nfrom(bucket: \"telegraf/so_long_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"stenodrop\")\n |> filter(fn: (r) => r[\"_field\"] == \"drop\")\n |> hostFilter()\n |> roleFilter()\n |> group(columns: [\"_field\", \"host\", \"role\"])\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)\n |> yield(name: \"Trend\")"}],"staticLegend":{"colorizeRows":true,"opacity":1,"orientationThreshold":100000000,"widthRatio":1},"width":3,"widthRatio":1,"xCol":"_time","yCol":"_value","yPos":42},{"axes":[{"base":"10","name":"x","scale":"linear"},{"name":"y","scale":"linear","suffix":"%"}],"colorizeRows":true,"colors":[{"id":"3PVw3hQuZUzyar7Js3mMH","name":"Ectoplasm","type":"scale","hex":"#DA6FF1"},{"id":"O34ux-D8Xq_1-eeWRyYYH","name":"Ectoplasm","type":"scale","hex":"#00717A"},{"id":"P04RoKOHBdLdvfrfFbn0F","name":"Ectoplasm","type":"scale","hex":"#ACFF76"}],"geom":"line","height":4,"hoverDimension":"auto","kind":"Xy","legendColorizeRows":true,"legendOpacity":1,"legendOrientationThreshold":100000000,"name":"Disk Usage /","opacity":1,"orientationThreshold":100000000,"position":"overlaid","queries":[{"query":"hostFilter = (tables=<-) =>\n if v.Host != \"(All)\" then\n tables |> filter(fn: (r) => r[\"host\"] == v.Host)\n else\n tables\n\nroleFilter = (tables=<-) =>\n if v.Role != \"(All)\" then\n tables |> filter(fn: (r) => r[\"role\"] == v.Role)\n else\n tables\n\nfrom(bucket: \"telegraf/so_short_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"disk\")\n |> filter(fn: (r) => r[\"_field\"] == \"used_percent\")\n |> filter(fn: (r) => r[\"path\"] == \"/\")\n |> hostFilter()\n |> roleFilter()\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)\n |> group(columns: [\"_field\", \"host\", \"role\"])\n |> yield(name: \"mean\")"},{"query":"hostFilter = (tables=<-) =>\n if v.Host != \"(All)\" then\n tables |> filter(fn: (r) => r[\"host\"] == v.Host)\n else\n tables\n\nroleFilter = (tables=<-) =>\n if v.Role != \"(All)\" then\n tables |> filter(fn: (r) => r[\"role\"] == v.Role)\n else\n tables\n\nfrom(bucket: \"telegraf/so_long_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"disk\")\n |> filter(fn: (r) => r[\"_field\"] == \"used_percent\")\n |> filter(fn: (r) => r[\"path\"] == \"/\")\n |> hostFilter()\n |> roleFilter()\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)\n |> group(columns: [\"_field\", \"host\", \"role\"])\n |> yield(name: \"Trend\")"}],"staticLegend":{"colorizeRows":true,"opacity":1,"orientationThreshold":100000000,"widthRatio":1},"width":4,"widthRatio":1,"xCol":"_time","yCol":"_value","yPos":46},{"colors":[{"id":"base","name":"laser","type":"text","hex":"#00C9FF"}],"decimalPlaces":1,"height":2,"kind":"Single_Stat","name":"5m Load Average","queries":[{"query":"hostFilter = (tables=<-) =>\n if v.Host != \"(All)\" then\n tables |> filter(fn: (r) => r[\"host\"] == v.Host)\n else\n tables\n\nfrom(bucket: \"telegraf/so_short_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"system\")\n |> filter(fn: (r) => r[\"_field\"] == \"load5\")\n |> hostFilter()\n |> group(columns: [\"host\"])\n |> last()\n |> aggregateWindow(every: v.windowPeriod, fn: mean)\n |> highestMax(n:1)"}],"staticLegend":{},"width":1,"xPos":1},{"colors":[{"id":"base","name":"laser","type":"text","hex":"#00C9FF"},{"id":"z83MTSufTrlrCoEPiBXda","name":"tiger","type":"text","hex":"#F48D38","value":1}],"decimalPlaces":0,"height":2,"kind":"Single_Stat","name":"Warning Alarms","queries":[{"query":"from(bucket: \"_monitoring\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"statuses\")\n |> filter(fn: (r) => r[\"_field\"] == \"_message\")\n |> group(columns: [\"_check_id\"])\n |> sort(columns: [\"_time\"])\n |> last()\n |> group()\n |> filter(fn: (r) => r[\"_level\"] == \"warn\")\n |> count()"}],"staticLegend":{},"suffix":" ","width":1,"xPos":1,"yPos":2},{"colors":[{"id":"base","name":"laser","type":"text","hex":"#00C9FF"}],"decimalPlaces":1,"height":2,"kind":"Single_Stat","name":"IO Wait","queries":[{"query":"hostFilter = (tables=<-) =>\n if v.Host != \"(All)\" then\n tables |> filter(fn: (r) => r[\"host\"] == v.Host)\n else\n tables\n \nfrom(bucket: \"telegraf/so_short_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"cpu\")\n |> filter(fn: (r) => r[\"cpu\"] == \"cpu-total\")\n |> filter(fn: (r) => r[\"_field\"] == \"usage_iowait\")\n |> hostFilter()\n |> group(columns: [\"host\"])\n |> last()\n |> aggregateWindow(every: v.windowPeriod, fn: mean)\n |> highestMax(n:1)"}],"staticLegend":{},"suffix":"%","width":1,"xPos":2},{"colors":[{"id":"base","name":"laser","type":"text","hex":"#00C9FF"},{"id":"z83MTSufTrlrCoEPiBXda","name":"laser","type":"text","hex":"#00C9FF","value":1}],"decimalPlaces":0,"height":2,"kind":"Single_Stat","name":"Informative Alarms","queries":[{"query":"from(bucket: \"_monitoring\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"statuses\")\n |> filter(fn: (r) => r[\"_field\"] == \"_message\")\n |> group(columns: [\"_check_id\"])\n |> sort(columns: [\"_time\"])\n |> last()\n |> group()\n |> filter(fn: (r) => r[\"_level\"] == \"info\")\n |> count()"}],"staticLegend":{},"suffix":" ","width":1,"xPos":2,"yPos":2},{"colors":[{"id":"base","name":"laser","type":"text","hex":"#00C9FF"}],"decimalPlaces":0,"height":2,"kind":"Single_Stat","name":"Estimated EPS In","queries":[{"query":"hostFilter = (tables=<-) =>\n if v.Host != \"(All)\" then\n tables |> filter(fn: (r) => r[\"host\"] == v.Host)\n else\n tables\n \nfrom(bucket: \"telegraf/so_short_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"logstash_events\")\n |> filter(fn: (r) => r[\"_field\"] == \"in\")\n |> hostFilter()\n |> derivative(unit: 1s, nonNegative: true, columns: [\"_value\"], timeColumn: \"_time\") \n |> group(columns: [\"host\"])\n |> last()\n |> aggregateWindow(every: v.windowPeriod, fn: mean)\n |> highestMax(n:1)"}],"staticLegend":{},"width":1,"xPos":3},{"colors":[{"id":"0","name":"viridian","type":"min","hex":"#32B08C"},{"id":"5IArg2lDb8KvnphywgUXa","name":"pineapple","type":"threshold","hex":"#FFB94A","value":70},{"id":"yFhH3mtavjuAZh6cEt5lx","name":"fire","type":"threshold","hex":"#DC4E58","value":80},{"id":"1","name":"ruby","type":"max","hex":"#BF3D5E","value":100}],"decimalPlaces":0,"height":4,"kind":"Gauge","name":"CPU Usage","queries":[{"query":"hostFilter = (tables=<-) =>\n if v.Host != \"(All)\" then\n tables |> filter(fn: (r) => r[\"host\"] == v.Host)\n else\n tables\n\nfrom(bucket: \"telegraf/so_short_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"cpu\")\n |> filter(fn: (r) => r[\"cpu\"] == \"cpu-total\")\n |> filter(fn: (r) => r[\"_field\"] == \"usage_idle\")\n |> hostFilter()\n |> group(columns: [\"host\"])\n |> last()\n |> highestMax(n: 1)\n |> map(fn: (r) => ({r with _value: r._value * -1.0 + 100.0}))\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)\n |> yield(name: \"mean\")"}],"staticLegend":{},"suffix":"%","tickSuffix":"%","width":3,"xPos":3,"yPos":2},{"colors":[{"id":"0","name":"viridian","type":"min","hex":"#32B08C"},{"id":"kOQLOg2H4FVEE-E1_L8Kq","name":"laser","type":"threshold","hex":"#00C9FF","value":85},{"id":"5IArg2lDb8KvnphywgUXa","name":"tiger","type":"threshold","hex":"#F48D38","value":90},{"id":"yFhH3mtavjuAZh6cEt5lx","name":"ruby","type":"threshold","hex":"#BF3D5E","value":95},{"id":"1","name":"ruby","type":"max","hex":"#BF3D5E","value":100}],"decimalPlaces":0,"height":4,"kind":"Gauge","name":"Root Disk Usage","queries":[{"query":"hostFilter = (tables=<-) =>\n if v.Host != \"(All)\" then\n tables |> filter(fn: (r) => r[\"host\"] == v.Host)\n else\n tables\n\nfrom(bucket: \"telegraf/so_short_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"disk\")\n |> filter(fn: (r) => r[\"path\"] == \"/\")\n |> filter(fn: (r) => r[\"_field\"] == \"used_percent\")\n |> hostFilter()\n |> group(columns: [\"host\"])\n |> last()\n |> highestMax(n: 1)\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)\n |> yield(name: \"mean\")"}],"staticLegend":{},"suffix":"%","tickSuffix":"%","width":3,"xPos":3,"yPos":6},{"axes":[{"base":"10","name":"x","scale":"linear"},{"name":"y","scale":"linear","suffix":"%"}],"colorizeRows":true,"colors":[{"id":"TtgHQAXNep94KBgtu48C_","name":"Cthulhu","type":"scale","hex":"#FDC44F"},{"id":"_IuzkORho_8QXTE6vMllv","name":"Cthulhu","type":"scale","hex":"#007C76"},{"id":"bUszW_YI_9oColDbLNQ-d","name":"Cthulhu","type":"scale","hex":"#8983FF"}],"geom":"line","height":4,"hoverDimension":"auto","kind":"Xy","legendColorizeRows":true,"legendOpacity":1,"legendOrientationThreshold":100000000,"name":"Suricata Packet Loss","opacity":1,"orientationThreshold":100000000,"position":"overlaid","queries":[{"query":"hostFilter = (tables=<-) =>\n if v.Host != \"(All)\" then\n tables |> filter(fn: (r) => r[\"host\"] == v.Host)\n else\n tables\n\nroleFilter = (tables=<-) =>\n if v.Role != \"(All)\" then\n tables |> filter(fn: (r) => r[\"role\"] == v.Role)\n else\n tables\n\nfrom(bucket: \"telegraf/so_short_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"suridrop\")\n |> filter(fn: (r) => r[\"_field\"] == \"drop\")\n |> hostFilter()\n |> roleFilter()\n |> map(fn: (r) => ({r with _value: r._value * 100.0}))\n |> group(columns: [\"_field\", \"host\", \"role\"])\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)\n |> yield(name: \"mean\")"},{"query":"hostFilter = (tables=<-) =>\n if v.Host != \"(All)\" then\n tables |> filter(fn: (r) => r[\"host\"] == v.Host)\n else\n tables\n\nroleFilter = (tables=<-) =>\n if v.Role != \"(All)\" then\n tables |> filter(fn: (r) => r[\"role\"] == v.Role)\n else\n tables\n\nfrom(bucket: \"telegraf/so_long_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"suridrop\")\n |> filter(fn: (r) => r[\"_field\"] == \"drop\")\n |> hostFilter()\n |> roleFilter()\n |> map(fn: (r) => ({r with _value: r._value * 100.0}))\n |> group(columns: [\"_field\", \"host\", \"role\"])\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)\n |> yield(name: \"Trend\")"}],"staticLegend":{"colorizeRows":true,"opacity":1,"orientationThreshold":100000000,"widthRatio":1},"width":3,"widthRatio":1,"xCol":"_time","xPos":3,"yCol":"_value","yPos":42},{"colors":[{"id":"base","name":"laser","type":"text","hex":"#00C9FF"}],"decimalPlaces":0,"height":2,"kind":"Single_Stat","name":"Redis Queue","queries":[{"query":"from(bucket: \"telegraf/so_short_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"redisqueue\")\n |> filter(fn: (r) => r[\"_field\"] == \"unparsed\")\n |> group(columns: [\"host\"])\n |> last()\n |> aggregateWindow(every: v.windowPeriod, fn: mean)\n |> highestMax(n:1)"}],"staticLegend":{},"width":1,"xPos":4},{"axes":[{"base":"10","name":"x","scale":"linear"},{"base":"10","name":"y","scale":"linear"}],"colorizeRows":true,"colors":[{"id":"xflqbsX-j3iq4ry5QOntK","name":"Do Androids Dream of Electric Sheep?","type":"scale","hex":"#8F8AF4"},{"id":"5H28HcITm6QVfQsXon0vq","name":"Do Androids Dream of Electric Sheep?","type":"scale","hex":"#A51414"},{"id":"25MrINwurNBkQqeKCkMPg","name":"Do Androids Dream of Electric Sheep?","type":"scale","hex":"#F4CF31"}],"geom":"line","height":4,"hoverDimension":"auto","kind":"Xy","legendColorizeRows":true,"legendOpacity":1,"legendOrientationThreshold":100000000,"name":"Elasticsearch Document Count","opacity":1,"orientationThreshold":100000000,"position":"overlaid","queries":[{"query":"import \"join\"\n\nhostFilter = (tables=<-) =>\n if v.Host != \"(All)\" then\n tables |> filter(fn: (r) => r[\"host\"] == v.Host)\n else\n tables\n \nfrom(bucket: \"telegraf/so_short_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"elasticsearch_indices\")\n |> filter(fn: (r) => r[\"_field\"] == \"docs_count\")\n |> filter(fn: (r) => r[\"host\"] == r[\"node_name\"])\n |> hostFilter()\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)\n |> group(columns: [\"_field\",\"host\"])\n |> yield(name: \"mean\")"},{"query":"import \"join\"\n\nhostFilter = (tables=<-) =>\n if v.Host != \"(All)\" then\n tables |> filter(fn: (r) => r[\"host\"] == v.Host)\n else\n tables\n\nfrom(bucket: \"telegraf/so_long_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"elasticsearch_indices\")\n |> filter(fn: (r) => r[\"_field\"] == \"docs_count\")\n |> filter(fn: (r) => r[\"host\"] == r[\"node_name\"])\n |> hostFilter()\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)\n |> group(columns: [\"_field\",\"host\"])\n |> yield(name: \"Trend\")"}],"staticLegend":{"colorizeRows":true,"opacity":1,"orientationThreshold":100000000,"widthRatio":1},"width":4,"widthRatio":1,"xCol":"_time","xPos":4,"yCol":"_value","yPos":10},{"axes":[{"base":"10","name":"x","scale":"linear"},{"base":"10","name":"y","scale":"linear"}],"colorizeRows":true,"colors":[{"id":"xflqbsX-j3iq4ry5QOntK","name":"Do Androids Dream of Electric Sheep?","type":"scale","hex":"#8F8AF4"},{"id":"5H28HcITm6QVfQsXon0vq","name":"Do Androids Dream of Electric Sheep?","type":"scale","hex":"#A51414"},{"id":"25MrINwurNBkQqeKCkMPg","name":"Do Androids Dream of Electric Sheep?","type":"scale","hex":"#F4CF31"}],"geom":"line","height":4,"heightRatio":0.301556420233463,"hoverDimension":"auto","kind":"Xy","legendColorizeRows":true,"legendOpacity":1,"legendOrientationThreshold":100000000,"name":"Redis Queue","opacity":1,"orientationThreshold":100000000,"position":"overlaid","queries":[{"query":"from(bucket: \"telegraf/so_short_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"redisqueue\")\n |> filter(fn: (r) => r[\"_field\"] == \"unparsed\")\n |> group(columns: [\"host\", \"_field\"])\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)\n |> yield(name: \"mean\")"},{"query":"from(bucket: \"telegraf/so_long_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"redisqueue\")\n |> filter(fn: (r) => r[\"_field\"] == \"unparsed\")\n |> group(columns: [\"host\", \"_field\"])\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)\n |> yield(name: \"Trend\")"}],"staticLegend":{"colorizeRows":true,"heightRatio":0.301556420233463,"opacity":1,"orientationThreshold":100000000,"widthRatio":1},"width":4,"widthRatio":1,"xCol":"_time","xPos":4,"yCol":"_value","yPos":14},{"axes":[{"base":"10","name":"x","scale":"linear"},{"name":"y","scale":"linear","suffix":" days"}],"colorizeRows":true,"colors":[{"id":"sW2GqpGAsGB5Adx16jKjp","name":"Nineteen Eighty Four","type":"scale","hex":"#31C0F6"},{"id":"TsdXuXwdI5Npi9S8L4f-i","name":"Nineteen Eighty Four","type":"scale","hex":"#A500A5"},{"id":"OGL29-SUbJ6FyQb0JzbaD","name":"Nineteen Eighty Four","type":"scale","hex":"#FF7E27"}],"geom":"line","height":4,"hoverDimension":"auto","kind":"Xy","legendColorizeRows":true,"legendOpacity":1,"legendOrientationThreshold":100000000,"name":"Container Uptime","opacity":1,"orientationThreshold":100000000,"position":"overlaid","queries":[{"query":"containerFilter = (tables=<-) =>\n if v.Container != \"(All)\" then\n tables |> filter(fn: (r) => r[\"container_name\"] == v.Container)\n else\n tables\n\nhostFilter = (tables=<-) =>\n if v.Host != \"(All)\" then\n tables |> filter(fn: (r) => r[\"host\"] == v.Host)\n else\n tables\n\nroleFilter = (tables=<-) =>\n if v.Role != \"(All)\" then\n tables |> filter(fn: (r) => r[\"role\"] == v.Role)\n else\n tables\n\nfrom(bucket: \"telegraf/so_short_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"docker_container_status\")\n |> filter(fn: (r) => r[\"_field\"] == \"uptime_ns\")\n |> filter(fn: (r) => r[\"container_status\"] == \"running\")\n |> hostFilter()\n |> roleFilter()\n |> containerFilter()\n |> group(columns: [\"host\", \"role\", \"container_name\"])\n |> sort(columns: [\"_time\"])\n |> map(fn: (r) => ({r with _value: float(v: r._value) / float(v: 24 * 60 * 60 * 1000000000)}))\n |> yield(name: \"last\")"},{"query":"containerFilter = (tables=<-) =>\n if v.Container != \"(All)\" then\n tables |> filter(fn: (r) => r[\"container_name\"] == v.Container)\n else\n tables\n\nhostFilter = (tables=<-) =>\n if v.Host != \"(All)\" then\n tables |> filter(fn: (r) => r[\"host\"] == v.Host)\n else\n tables\n\nroleFilter = (tables=<-) =>\n if v.Role != \"(All)\" then\n tables |> filter(fn: (r) => r[\"role\"] == v.Role)\n else\n tables\n\nfrom(bucket: \"telegraf/so_long_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"docker_container_status\")\n |> filter(fn: (r) => r[\"_field\"] == \"uptime_ns\")\n |> filter(fn: (r) => r[\"container_status\"] == \"running\")\n |> hostFilter()\n |> roleFilter()\n |> containerFilter()\n |> group(columns: [\"host\", \"role\", \"container_name\"])\n |> sort(columns: [\"_time\"])\n |> map(fn: (r) => ({r with _value: float(v: r._value) / float(v: 24.0 * 60.0 * 60.0 * 1000000000.0)}))\n |> yield(name: \"Trend\")"}],"staticLegend":{"colorizeRows":true,"opacity":1,"orientationThreshold":100000000,"widthRatio":1},"width":4,"widthRatio":1,"xCol":"_time","xPos":4,"yCol":"_value","yPos":18},{"axes":[{"base":"10","name":"x","scale":"linear"},{"base":"10","name":"y","scale":"linear"}],"colorizeRows":true,"colors":[{"id":"base","name":"laser","type":"text","hex":"#00C9FF"}],"decimalPlaces":0,"height":2,"hoverDimension":"auto","kind":"Single_Stat_Plus_Line","legendColorizeRows":true,"legendOpacity":1,"legendOrientationThreshold":100000000,"name":"Kafka Active Controllers","opacity":1,"orientationThreshold":100000000,"position":"overlaid","queries":[{"query":"from(bucket: \"telegraf/so_short_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"kafka_controller\")\n |> filter(fn: (r) => r[\"_field\"] == \"ActiveControllerCount.Value\")\n |> aggregateWindow(every: v.windowPeriod, fn: last, createEmpty: false)\n |> group(columns: [\"_field\", \"host\", \"role\"])\n |> yield(name: \"current\")"},{"query":"from(bucket: \"telegraf/so_long_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"kafka_controller\")\n |> filter(fn: (r) => r[\"_field\"] == \"ActiveControllerCount.Value\")\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)\n |> group(columns: [\"_field\", \"host\", \"role\"])\n |> yield(name: \"trend\")"}],"staticLegend":{"colorizeRows":true,"opacity":1,"orientationThreshold":100000000,"widthRatio":1},"width":4,"widthRatio":1,"xCol":"_time","xPos":4,"yCol":"_value","yPos":22},{"axes":[{"base":"10","name":"x","scale":"linear"},{"base":"10","name":"y","scale":"linear"}],"colorizeRows":true,"colors":[{"id":"base","name":"laser","type":"text","hex":"#00C9FF"}],"decimalPlaces":0,"height":2,"hoverDimension":"auto","kind":"Single_Stat_Plus_Line","legendColorizeRows":true,"legendOpacity":1,"legendOrientationThreshold":100000000,"name":"Kafka Active Brokers","opacity":1,"orientationThreshold":100000000,"position":"overlaid","queries":[{"query":"from(bucket: \"telegraf/so_long_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"kafka_controller\")\n |> filter(fn: (r) => r[\"_field\"] == \"ActiveBrokerCount.Value\")\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)\n |> group(columns: [\"_field\", \"host\", \"role\"])\n |> yield(name: \"trend\")"},{"query":"from(bucket: \"telegraf/so_short_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"kafka_controller\")\n |> filter(fn: (r) => r[\"_field\"] == \"ActiveBrokerCount.Value\")\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)\n |> group(columns: [\"_field\", \"host\", \"role\"])\n |> yield(name: \"current\")"}],"staticLegend":{"colorizeRows":true,"opacity":1,"orientationThreshold":100000000,"widthRatio":1},"width":4,"widthRatio":1,"xCol":"_time","xPos":4,"yCol":"_value","yPos":24},{"axes":[{"base":"10","name":"x","scale":"linear"},{"name":"y","scale":"linear","suffix":"%"}],"colorizeRows":true,"colors":[{"id":"yT5vTIlaaFChSrQvKLfqf","name":"Nineteen Eighty Four","type":"scale","hex":"#31C0F6"},{"id":"mzzUVSu3ibTph1JmQmDAQ","name":"Nineteen Eighty Four","type":"scale","hex":"#A500A5"},{"id":"mOcnDo7l8ii6qNLFIB5rs","name":"Nineteen Eighty Four","type":"scale","hex":"#FF7E27"}],"geom":"line","height":4,"hoverDimension":"auto","kind":"Xy","legendColorizeRows":true,"legendOpacity":1,"legendOrientationThreshold":100000000,"name":"Container CPU Usage","opacity":1,"orientationThreshold":100000000,"position":"overlaid","queries":[{"query":"containerFilter = (tables=<-) =>\n if v.Container != \"(All)\" then\n tables |> filter(fn: (r) => r[\"container_name\"] == v.Container)\n else\n tables\n\nhostFilter = (tables=<-) =>\n if v.Host != \"(All)\" then\n tables |> filter(fn: (r) => r[\"host\"] == v.Host)\n else\n tables\n\nroleFilter = (tables=<-) =>\n if v.Role != \"(All)\" then\n tables |> filter(fn: (r) => r[\"role\"] == v.Role)\n else\n tables\n\nfrom(bucket: \"telegraf/so_short_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"docker_container_cpu\")\n |> filter(fn: (r) => r[\"_field\"] == \"usage_percent\")\n |> filter(fn: (r) => r[\"container_status\"] == \"running\")\n |> hostFilter()\n |> roleFilter()\n |> containerFilter()\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)\n |> group(columns: [\"_field\", \"host\", \"role\", \"container_name\"])\n |> sort(columns: [\"_time\"])\n |> yield(name: \"mean\")"},{"query":"containerFilter = (tables=<-) =>\n if v.Container != \"(All)\" then\n tables |> filter(fn: (r) => r[\"container_name\"] == v.Container)\n else\n tables\n\nhostFilter = (tables=<-) =>\n if v.Host != \"(All)\" then\n tables |> filter(fn: (r) => r[\"host\"] == v.Host)\n else\n tables\n\nroleFilter = (tables=<-) =>\n if v.Role != \"(All)\" then\n tables |> filter(fn: (r) => r[\"role\"] == v.Role)\n else\n tables\n\nfrom(bucket: \"telegraf/so_long_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"docker_container_cpu\")\n |> filter(fn: (r) => r[\"_field\"] == \"usage_percent\")\n |> filter(fn: (r) => r[\"container_status\"] == \"running\")\n |> hostFilter()\n |> roleFilter()\n |> containerFilter()\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)\n |> group(columns: [\"_field\", \"host\", \"role\", \"container_name\"])\n |> sort(columns: [\"_time\"])\n |> yield(name: \"Trend\")"}],"staticLegend":{"colorizeRows":true,"opacity":1,"orientationThreshold":100000000,"widthRatio":1},"width":4,"widthRatio":1,"xCol":"_time","xPos":4,"yCol":"_value","yPos":26},{"axes":[{"base":"10","name":"x","scale":"linear"},{"name":"y","scale":"linear","suffix":"%"}],"colorizeRows":true,"colors":[{"id":"QDwChKZWuQV0BaJcEeSam","name":"Atlantis","type":"scale","hex":"#74D495"},{"id":"ThD0WTqKHltQEVlq9mo6K","name":"Atlantis","type":"scale","hex":"#3F3FBA"},{"id":"FBHYZiwDLKyQK3eRfUD-0","name":"Atlantis","type":"scale","hex":"#FF4D9E"}],"geom":"line","height":4,"hoverDimension":"auto","kind":"Xy","legendColorizeRows":true,"legendOpacity":1,"legendOrientationThreshold":100000000,"name":"Container Memory Usage","opacity":1,"orientationThreshold":100000000,"position":"overlaid","queries":[{"query":"containerFilter = (tables=<-) =>\n if v.Container != \"(All)\" then\n tables |> filter(fn: (r) => r[\"container_name\"] == v.Container)\n else\n tables\n\nhostFilter = (tables=<-) =>\n if v.Host != \"(All)\" then\n tables |> filter(fn: (r) => r[\"host\"] == v.Host)\n else\n tables\n\nroleFilter = (tables=<-) =>\n if v.Role != \"(All)\" then\n tables |> filter(fn: (r) => r[\"role\"] == v.Role)\n else\n tables\n\nfrom(bucket: \"telegraf/so_short_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"docker_container_mem\")\n |> filter(fn: (r) => r[\"_field\"] == \"usage_percent\")\n |> filter(fn: (r) => r[\"container_status\"] == \"running\")\n |> hostFilter()\n |> roleFilter()\n |> containerFilter()\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)\n |> group(columns: [\"_field\", \"host\", \"role\", \"container_name\"])\n |> sort(columns: [\"_time\"])\n |> yield(name: \"mean\")"},{"query":"containerFilter = (tables=<-) =>\n if v.Container != \"(All)\" then\n tables |> filter(fn: (r) => r[\"container_name\"] == v.Container)\n else\n tables\n\nhostFilter = (tables=<-) =>\n if v.Host != \"(All)\" then\n tables |> filter(fn: (r) => r[\"host\"] == v.Host)\n else\n tables\n\nroleFilter = (tables=<-) =>\n if v.Role != \"(All)\" then\n tables |> filter(fn: (r) => r[\"role\"] == v.Role)\n else\n tables\n\nfrom(bucket: \"telegraf/so_long_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"docker_container_mem\")\n |> filter(fn: (r) => r[\"_field\"] == \"usage_percent\")\n |> filter(fn: (r) => r[\"container_status\"] == \"running\")\n |> hostFilter()\n |> roleFilter()\n |> containerFilter()\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)\n |> group(columns: [\"_field\", \"host\", \"role\", \"container_name\"])\n |> sort(columns: [\"_time\"])\n |> yield(name: \"Trend\")"}],"staticLegend":{"colorizeRows":true,"opacity":1,"orientationThreshold":100000000,"widthRatio":1},"width":4,"widthRatio":1,"xCol":"_time","xPos":4,"yCol":"_value","yPos":30},{"axes":[{"base":"10","name":"x","scale":"linear"},{"base":"10","name":"y","scale":"linear","suffix":"b"}],"colorizeRows":true,"colors":[{"id":"0ynR6Zs0wuQ3WY0Lz-_KC","name":"Cthulhu","type":"scale","hex":"#FDC44F"},{"id":"YiArehCNBwFm9mn8DSXSG","name":"Cthulhu","type":"scale","hex":"#007C76"},{"id":"DxByY_EQW9Xs2jD5ktkG5","name":"Cthulhu","type":"scale","hex":"#8983FF"}],"geom":"line","height":4,"hoverDimension":"auto","kind":"Xy","legendColorizeRows":true,"legendOpacity":1,"legendOrientationThreshold":100000000,"name":"Container Traffic - Inbound","opacity":1,"orientationThreshold":100000000,"position":"overlaid","queries":[{"query":"containerFilter = (tables=<-) =>\n if v.Container != \"(All)\" then\n tables |> filter(fn: (r) => r[\"container_name\"] == v.Container)\n else\n tables\n\nhostFilter = (tables=<-) =>\n if v.Host != \"(All)\" then\n tables |> filter(fn: (r) => r[\"host\"] == v.Host)\n else\n tables\n\nroleFilter = (tables=<-) =>\n if v.Role != \"(All)\" then\n tables |> filter(fn: (r) => r[\"role\"] == v.Role)\n else\n tables\n\nfrom(bucket: \"telegraf/so_short_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"docker_container_net\")\n |> filter(fn: (r) => r[\"_field\"] == \"rx_bytes\")\n |> hostFilter()\n |> roleFilter()\n |> containerFilter()\n |> derivative(unit: 1s, nonNegative: true, columns: [\"_value\"], timeColumn: \"_time\") \n |> map(fn: (r) => ({r with _value: r._value * 8.0}))\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)\n |> group(columns: [\"_field\", \"host\", \"role\", \"container_name\"])\n |> sort(columns: [\"_time\"])\n |> yield(name: \"mean\")"},{"query":"containerFilter = (tables=<-) =>\n if v.Container != \"(All)\" then\n tables |> filter(fn: (r) => r[\"container_name\"] == v.Container)\n else\n tables\n\nhostFilter = (tables=<-) =>\n if v.Host != \"(All)\" then\n tables |> filter(fn: (r) => r[\"host\"] == v.Host)\n else\n tables\n\nroleFilter = (tables=<-) =>\n if v.Role != \"(All)\" then\n tables |> filter(fn: (r) => r[\"role\"] == v.Role)\n else\n tables\n\nfrom(bucket: \"telegraf/so_long_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"docker_container_net\")\n |> filter(fn: (r) => r[\"_field\"] == \"rx_bytes\")\n |> hostFilter()\n |> roleFilter()\n |> containerFilter()\n |> derivative(unit: 1s, nonNegative: true, columns: [\"_value\"], timeColumn: \"_time\")\n |> map(fn: (r) => ({r with _value: r._value * 8.0}))\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)\n |> group(columns: [\"_field\", \"host\", \"role\", \"container_name\"])\n |> sort(columns: [\"_time\"])\n |> yield(name: \"Trend\")"}],"staticLegend":{"colorizeRows":true,"opacity":1,"orientationThreshold":100000000,"widthRatio":1},"width":4,"widthRatio":1,"xCol":"_time","xPos":4,"yCol":"_value","yPos":34},{"axes":[{"base":"10","name":"x","scale":"linear"},{"name":"y","scale":"linear","suffix":"%"}],"colorizeRows":true,"colors":[{"id":"3PVw3hQuZUzyar7Js3mMH","name":"Ectoplasm","type":"scale","hex":"#DA6FF1"},{"id":"O34ux-D8Xq_1-eeWRyYYH","name":"Ectoplasm","type":"scale","hex":"#00717A"},{"id":"P04RoKOHBdLdvfrfFbn0F","name":"Ectoplasm","type":"scale","hex":"#ACFF76"}],"geom":"line","height":4,"hoverDimension":"auto","kind":"Xy","legendColorizeRows":true,"legendOpacity":1,"legendOrientationThreshold":100000000,"name":"Disk Usage /nsm","opacity":1,"orientationThreshold":100000000,"position":"overlaid","queries":[{"query":"hostFilter = (tables=<-) =>\n if v.Host != \"(All)\" then\n tables |> filter(fn: (r) => r[\"host\"] == v.Host)\n else\n tables\n\nroleFilter = (tables=<-) =>\n if v.Role != \"(All)\" then\n tables |> filter(fn: (r) => r[\"role\"] == v.Role)\n else\n tables\n\nfrom(bucket: \"telegraf/so_short_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"disk\")\n |> filter(fn: (r) => r[\"_field\"] == \"used_percent\")\n |> filter(fn: (r) => r[\"path\"] == \"/nsm\")\n |> hostFilter()\n |> roleFilter()\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)\n |> group(columns: [\"_field\", \"host\", \"role\"])\n |> yield(name: \"mean\")"},{"query":"hostFilter = (tables=<-) =>\n if v.Host != \"(All)\" then\n tables |> filter(fn: (r) => r[\"host\"] == v.Host)\n else\n tables\n\nroleFilter = (tables=<-) =>\n if v.Role != \"(All)\" then\n tables |> filter(fn: (r) => r[\"role\"] == v.Role)\n else\n tables\n\nfrom(bucket: \"telegraf/so_long_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"disk\")\n |> filter(fn: (r) => r[\"_field\"] == \"used_percent\")\n |> filter(fn: (r) => r[\"path\"] == \"/nsm\")\n |> hostFilter()\n |> roleFilter()\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)\n |> group(columns: [\"_field\", \"host\", \"role\"])\n |> yield(name: \"Trend\")"}],"staticLegend":{"colorizeRows":true,"opacity":1,"orientationThreshold":100000000,"widthRatio":1},"width":4,"widthRatio":1,"xPos":4,"yPos":46},{"colors":[{"id":"base","name":"laser","type":"text","hex":"#00C9FF"}],"decimalPlaces":1,"height":2,"kind":"Single_Stat","name":"Inbound Traffic","queries":[{"query":"from(bucket: \"telegraf/so_short_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"net\")\n |> filter(fn: (r) => r[\"_field\"] == \"bytes_recv\") \n |> derivative(unit: 1s, nonNegative: true, columns: [\"_value\"], timeColumn: \"_time\") \n |> map(fn: (r) => ({r with _value: r._value * 8.0 / (1000.0 * 1000.0)}))\n |> group(columns: [\"host\"])\n |> aggregateWindow(every: v.windowPeriod, fn: mean)\n |> last()\n |> highestMax(n:1)"}],"staticLegend":{},"suffix":" Mb/s","width":1,"xPos":5},{"colors":[{"id":"base","name":"laser","type":"text","hex":"#00C9FF"}],"decimalPlaces":1,"height":2,"kind":"Single_Stat","name":"Inbound Drops","queries":[{"query":"from(bucket: \"telegraf/so_short_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"net\")\n |> filter(fn: (r) => r[\"_field\"] == \"drop_in\") \n |> derivative(unit: 1s, nonNegative: true, columns: [\"_value\"], timeColumn: \"_time\") \n |> map(fn: (r) => ({r with _value: r._value * 8.0 / (1000.0 * 1000.0)}))\n |> group(columns: [\"host\"])\n |> aggregateWindow(every: v.windowPeriod, fn: mean)\n |> last()\n |> highestMax(n:1)"}],"staticLegend":{},"suffix":" Mb/s","width":1,"xPos":6},{"colors":[{"id":"0","name":"viridian","type":"min","hex":"#32B08C"},{"id":"5IArg2lDb8KvnphywgUXa","name":"pineapple","type":"threshold","hex":"#FFB94A","value":70},{"id":"yFhH3mtavjuAZh6cEt5lx","name":"fire","type":"threshold","hex":"#DC4E58","value":80},{"id":"1","name":"ruby","type":"max","hex":"#BF3D5E","value":100}],"decimalPlaces":0,"height":4,"kind":"Gauge","name":"Memory Usage","queries":[{"query":"hostFilter = (tables=<-) =>\n if v.Host != \"(All)\" then\n tables |> filter(fn: (r) => r[\"host\"] == v.Host)\n else\n tables\n\nfrom(bucket: \"telegraf/so_short_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"mem\")\n |> filter(fn: (r) => r[\"_field\"] == \"used_percent\")\n |> hostFilter()\n |> group(columns: [\"host\"])\n |> last()\n |> highestMax(n: 1)\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)\n |> yield(name: \"mean\")"}],"staticLegend":{},"suffix":"%","tickSuffix":"%","width":3,"xPos":6,"yPos":2},{"colors":[{"id":"0","name":"viridian","type":"min","hex":"#32B08C"},{"id":"5IArg2lDb8KvnphywgUXa","name":"laser","type":"threshold","hex":"#00C9FF","value":85},{"id":"yFhH3mtavjuAZh6cEt5lx","name":"tiger","type":"threshold","hex":"#F48D38","value":90},{"id":"H7uprvKmMEh39en6X-ms_","name":"ruby","type":"threshold","hex":"#BF3D5E","value":95},{"id":"1","name":"ruby","type":"max","hex":"#BF3D5E","value":100}],"decimalPlaces":0,"height":4,"kind":"Gauge","name":"NSM Disk Usage","queries":[{"query":"hostFilter = (tables=<-) =>\n if v.Host != \"(All)\" then\n tables |> filter(fn: (r) => r[\"host\"] == v.Host)\n else\n tables\n\nfrom(bucket: \"telegraf/so_short_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"disk\")\n |> filter(fn: (r) => r[\"path\"] == \"/nsm\")\n |> filter(fn: (r) => r[\"_field\"] == \"used_percent\")\n |> hostFilter()\n |> group(columns: [\"host\"])\n |> last()\n |> highestMax(n: 1)\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)\n |> yield(name: \"mean\")"}],"staticLegend":{},"suffix":"%","tickSuffix":"%","width":3,"xPos":6,"yPos":6},{"axes":[{"base":"10","name":"x","scale":"linear"},{"base":"10","name":"y","scale":"linear","suffix":"b/s"}],"colorizeRows":true,"colors":[{"id":"TtgHQAXNep94KBgtu48C_","name":"Cthulhu","type":"scale","hex":"#FDC44F"},{"id":"_IuzkORho_8QXTE6vMllv","name":"Cthulhu","type":"scale","hex":"#007C76"},{"id":"bUszW_YI_9oColDbLNQ-d","name":"Cthulhu","type":"scale","hex":"#8983FF"}],"geom":"line","height":4,"heightRatio":0.18482490272373542,"hoverDimension":"auto","kind":"Xy","legendColorizeRows":true,"legendOpacity":1,"legendOrientationThreshold":100000000,"name":"Management Interface Traffic - Outbound","opacity":1,"orientationThreshold":100000000,"position":"overlaid","queries":[{"query":"import \"join\"\n\nhostFilter = (tables=<-) =>\n if v.Host != \"(All)\" then\n tables |> filter(fn: (r) => r[\"host\"] == v.Host)\n else\n tables\n\nroleFilter = (tables=<-) =>\n if v.Role != \"(All)\" then\n tables |> filter(fn: (r) => r[\"role\"] == v.Role)\n else\n tables\n \nmanints = from(bucket: \"telegraf/so_short_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"node_config\")\n |> hostFilter()\n |> filter(fn: (r) => r[\"_field\"] == \"manint\")\n |> distinct()\n |> group(columns: [\"host\"])\n\ntraffic = from(bucket: \"telegraf/so_short_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"net\")\n |> filter(fn: (r) => r[\"_field\"] == \"bytes_sent\")\n |> hostFilter()\n |> roleFilter()\n |> derivative(unit: 1s, nonNegative: true, columns: [\"_value\"], timeColumn: \"_time\")\n |> map(fn: (r) => ({r with \"_value\": r._value * 8.0}))\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)\n |> group(columns: [\"host\"])\n \n\njoin.inner(left: traffic, right: manints,\n on: (l,r) => l.interface == r._value,\n as: (l, r) => ({l with _value: l._value, result: \"bytes_sent\"}))"},{"query":"import \"join\"\n\nhostFilter = (tables=<-) =>\n if v.Host != \"(All)\" then\n tables |> filter(fn: (r) => r[\"host\"] == v.Host)\n else\n tables\n\nroleFilter = (tables=<-) =>\n if v.Role != \"(All)\" then\n tables |> filter(fn: (r) => r[\"role\"] == v.Role)\n else\n tables\n \nmanints = from(bucket: \"telegraf/so_short_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"node_config\")\n |> hostFilter()\n |> filter(fn: (r) => r[\"_field\"] == \"manint\")\n |> distinct()\n |> group(columns: [\"host\"])\n\ntraffic = from(bucket: \"telegraf/so_long_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"net\")\n |> filter(fn: (r) => r[\"_field\"] == \"bytes_sent\")\n |> hostFilter()\n |> roleFilter()\n |> derivative(unit: 1s, nonNegative: true, columns: [\"_value\"], timeColumn: \"_time\")\n |> map(fn: (r) => ({r with \"_value\": r._value * 8.0}))\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)\n |> group(columns: [\"host\"])\n \n\njoin.inner(left: traffic, right: manints,\n on: (l,r) => l.interface == r._value,\n as: (l, r) => ({l with _value: l._value, result: \"Trend\"}))"}],"staticLegend":{"colorizeRows":true,"heightRatio":0.18482490272373542,"opacity":1,"orientationThreshold":100000000,"widthRatio":1},"width":6,"widthRatio":1,"xCol":"_time","xPos":6,"yCol":"_value","yPos":38},{"axes":[{"base":"10","name":"x","scale":"linear"},{"name":"y","scale":"linear","suffix":"%"}],"colorizeRows":true,"colors":[{"id":"TtgHQAXNep94KBgtu48C_","name":"Cthulhu","type":"scale","hex":"#FDC44F"},{"id":"_IuzkORho_8QXTE6vMllv","name":"Cthulhu","type":"scale","hex":"#007C76"},{"id":"bUszW_YI_9oColDbLNQ-d","name":"Cthulhu","type":"scale","hex":"#8983FF"}],"geom":"line","height":4,"hoverDimension":"auto","kind":"Xy","legendColorizeRows":true,"legendOpacity":1,"legendOrientationThreshold":100000000,"name":"Zeek Packet Loss","opacity":1,"orientationThreshold":100000000,"position":"overlaid","queries":[{"query":"hostFilter = (tables=<-) =>\n if v.Host != \"(All)\" then\n tables |> filter(fn: (r) => r[\"host\"] == v.Host)\n else\n tables\n\nroleFilter = (tables=<-) =>\n if v.Role != \"(All)\" then\n tables |> filter(fn: (r) => r[\"role\"] == v.Role)\n else\n tables\n\nfrom(bucket: \"telegraf/so_short_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"zeekdrop\")\n |> filter(fn: (r) => r[\"_field\"] == \"drop\")\n |> hostFilter()\n |> roleFilter()\n |> map(fn: (r) => ({r with _value: r._value * 100.0}))\n |> group(columns: [\"_field\", \"host\", \"role\"])\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)\n |> yield(name: \"mean\")"},{"query":"hostFilter = (tables=<-) =>\n if v.Host != \"(All)\" then\n tables |> filter(fn: (r) => r[\"host\"] == v.Host)\n else\n tables\n\nroleFilter = (tables=<-) =>\n if v.Role != \"(All)\" then\n tables |> filter(fn: (r) => r[\"role\"] == v.Role)\n else\n tables\n\nfrom(bucket: \"telegraf/so_long_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"zeekdrop\")\n |> filter(fn: (r) => r[\"_field\"] == \"drop\")\n |> hostFilter()\n |> roleFilter()\n |> map(fn: (r) => ({r with _value: r._value * 100.0}))\n |> group(columns: [\"_field\", \"host\", \"role\"])\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)\n |> yield(name: \"Trend\")"}],"staticLegend":{"colorizeRows":true,"opacity":1,"orientationThreshold":100000000,"widthRatio":1},"width":3,"widthRatio":1,"xCol":"_time","xPos":6,"yCol":"_value","yPos":42},{"colors":[{"id":"base","name":"laser","type":"text","hex":"#00C9FF"}],"decimalPlaces":1,"height":2,"kind":"Single_Stat","name":"Capture Loss","queries":[{"query":"hostFilter = (tables=<-) =>\n if v.Host != \"(All)\" then\n tables |> filter(fn: (r) => r[\"host\"] == v.Host)\n else\n tables\n\nfrom(bucket: \"telegraf/so_short_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"zeekcaptureloss\")\n |> filter(fn: (r) => r[\"_field\"] == \"loss\")\n |> hostFilter()\n |> group(columns: [\"host\"])\n |> last()\n |> aggregateWindow(every: v.windowPeriod, fn: mean)\n |> highestMax(n:1)"}],"staticLegend":{},"suffix":"%","width":1,"xPos":7},{"colors":[{"id":"base","name":"laser","type":"text","hex":"#00C9FF"}],"decimalPlaces":1,"height":2,"kind":"Single_Stat","name":"Zeek Loss","queries":[{"query":"hostFilter = (tables=<-) =>\n if v.Host != \"(All)\" then\n tables |> filter(fn: (r) => r[\"host\"] == v.Host)\n else\n tables\n \nfrom(bucket: \"telegraf/so_short_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"zeekdrop\")\n |> filter(fn: (r) => r[\"_field\"] == \"drop\")\n |> map(fn: (r) => ({r with _value: r._value * 100.0}))\n |> hostFilter()\n |> group(columns: [\"host\"])\n |> last()\n |> aggregateWindow(every: v.windowPeriod, fn: mean)\n |> highestMax(n:1)"}],"staticLegend":{},"suffix":"%","width":1,"xPos":8},{"axes":[{"base":"10","name":"x","scale":"linear"},{"base":"10","name":"y","scale":"linear","suffix":"s"}],"colorizeRows":true,"colors":[{"id":"xflqbsX-j3iq4ry5QOntK","name":"Do Androids Dream of Electric Sheep?","type":"scale","hex":"#8F8AF4"},{"id":"5H28HcITm6QVfQsXon0vq","name":"Do Androids Dream of Electric Sheep?","type":"scale","hex":"#A51414"},{"id":"25MrINwurNBkQqeKCkMPg","name":"Do Androids Dream of Electric Sheep?","type":"scale","hex":"#F4CF31"}],"geom":"line","height":4,"heightRatio":0.301556420233463,"hoverDimension":"auto","kind":"Xy","legendColorizeRows":true,"legendOpacity":1,"legendOrientationThreshold":100000000,"name":"Elastic Ingest Time Spent","opacity":1,"orientationThreshold":100000000,"position":"overlaid","queries":[{"query":"from(bucket: \"telegraf/so_short_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"elasticsearch_clusterstats_nodes\")\n |> filter(fn: (r) => r.role == \"standalone\" or r.role == \"eval\" or r.role == \"import\" or r.role == \"manager\" or r.role == \"managersearch\" or r.role == \"search\" or r.role == \"node\" or r.role == \"heavynode\")\n |> filter(fn: (r) => r[\"_field\"] == \"ingest_processor_stats_community_id_time_in_millis\")\n |> derivative(unit: 1s, nonNegative: true, columns: [\"_value\"], timeColumn: \"_time\") \n |> group(columns: [\"host\", \"role\"])\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)\n |> yield(name: \"community.id_time\")"},{"query":"from(bucket: \"telegraf/so_short_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"elasticsearch_clusterstats_nodes\")\n |> filter(fn: (r) => r.role == \"standalone\" or r.role == \"eval\" or r.role == \"import\" or r.role == \"manager\" or r.role == \"managersearch\" or r.role == \"search\" or r.role == \"node\" or r.role == \"heavynode\")\n |> filter(fn: (r) => r[\"_field\"] == \"ingest_processor_stats_conditional_time_in_millis\")\n |> derivative(unit: 1s, nonNegative: true, columns: [\"_value\"], timeColumn: \"_time\") \n |> group(columns: [\"host\", \"role\"])\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)\n |> yield(name: \"conditional_time\")"},{"query":"from(bucket: \"telegraf/so_short_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"elasticsearch_clusterstats_nodes\")\n |> filter(fn: (r) => r.role == \"standalone\" or r.role == \"eval\" or r.role == \"import\" or r.role == \"manager\" or r.role == \"managersearch\" or r.role == \"search\" or r.role == \"node\" or r.role == \"heavynode\")\n |> filter(fn: (r) => r[\"_field\"] == \"ingest_processor_stats_date_index_name_time_in_millis\")\n |> derivative(unit: 1s, nonNegative: true, columns: [\"_value\"], timeColumn: \"_time\") \n |> group(columns: [\"host\", \"role\"])\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)\n |> yield(name: \"date.index.name_time\")"},{"query":"from(bucket: \"telegraf/so_short_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"elasticsearch_clusterstats_nodes\")\n |> filter(fn: (r) => r.role == \"standalone\" or r.role == \"eval\" or r.role == \"import\" or r.role == \"manager\" or r.role == \"managersearch\" or r.role == \"search\" or r.role == \"node\" or r.role == \"heavynode\")\n |> filter(fn: (r) => r[\"_field\"] == \"ingest_processor_stats_date_time_in_millis\")\n |> derivative(unit: 1s, nonNegative: true, columns: [\"_value\"], timeColumn: \"_time\") \n |> group(columns: [\"host\", \"role\"])\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)\n |> yield(name: \"date_time\")"},{"query":"from(bucket: \"telegraf/so_short_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"elasticsearch_clusterstats_nodes\")\n |> filter(fn: (r) => r.role == \"standalone\" or r.role == \"eval\" or r.role == \"import\" or r.role == \"manager\" or r.role == \"managersearch\" or r.role == \"search\" or r.role == \"node\" or r.role == \"heavynode\")\n |> filter(fn: (r) => r[\"_field\"] == \"ingest_processor_stats_dissect_time_in_millis\")\n |> derivative(unit: 1s, nonNegative: true, columns: [\"_value\"], timeColumn: \"_time\") \n |> group(columns: [\"host\", \"role\"])\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)\n |> yield(name: \"dissect_time\")"},{"query":"from(bucket: \"telegraf/so_short_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"elasticsearch_clusterstats_nodes\")\n |> filter(fn: (r) => r.role == \"standalone\" or r.role == \"eval\" or r.role == \"import\" or r.role == \"manager\" or r.role == \"managersearch\" or r.role == \"search\" or r.role == \"node\" or r.role == \"heavynode\")\n |> filter(fn: (r) => r[\"_field\"] == \"ingest_processor_stats_dot_expander_time_in_millis\")\n |> derivative(unit: 1s, nonNegative: true, columns: [\"_value\"], timeColumn: \"_time\") \n |> group(columns: [\"host\", \"role\"])\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)\n |> yield(name: \"dot.expander_time\")"},{"query":"from(bucket: \"telegraf/so_short_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"elasticsearch_clusterstats_nodes\")\n |> filter(fn: (r) => r.role == \"standalone\" or r.role == \"eval\" or r.role == \"import\" or r.role == \"manager\" or r.role == \"managersearch\" or r.role == \"search\" or r.role == \"node\" or r.role == \"heavynode\")\n |> filter(fn: (r) => r[\"_field\"] == \"ingest_processor_stats_geoip_time_in_millis\")\n |> derivative(unit: 1s, nonNegative: true, columns: [\"_value\"], timeColumn: \"_time\") \n |> group(columns: [\"host\", \"role\"])\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)\n |> yield(name: \"geoip_time\")"},{"query":"from(bucket: \"telegraf/so_short_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"elasticsearch_clusterstats_nodes\")\n |> filter(fn: (r) => r.role == \"standalone\" or r.role == \"eval\" or r.role == \"import\" or r.role == \"manager\" or r.role == \"managersearch\" or r.role == \"search\" or r.role == \"node\" or r.role == \"heavynode\")\n |> filter(fn: (r) => r[\"_field\"] == \"ingest_processor_stats_grok_time_in_millis\")\n |> derivative(unit: 1s, nonNegative: true, columns: [\"_value\"], timeColumn: \"_time\") \n |> group(columns: [\"host\", \"role\"])\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)\n |> yield(name: \"grok_time\")"},{"query":"from(bucket: \"telegraf/so_short_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"elasticsearch_clusterstats_nodes\")\n |> filter(fn: (r) => r.role == \"standalone\" or r.role == \"eval\" or r.role == \"import\" or r.role == \"manager\" or r.role == \"managersearch\" or r.role == \"search\" or r.role == \"node\" or r.role == \"heavynode\")\n |> filter(fn: (r) => r[\"_field\"] == \"ingest_processor_stats_json_time_in_millis\")\n |> derivative(unit: 1s, nonNegative: true, columns: [\"_value\"], timeColumn: \"_time\") \n |> group(columns: [\"host\", \"role\"])\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)\n |> yield(name: \"json_time\")"},{"query":"from(bucket: \"telegraf/so_short_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"elasticsearch_clusterstats_nodes\")\n |> filter(fn: (r) => r.role == \"standalone\" or r.role == \"eval\" or r.role == \"import\" or r.role == \"manager\" or r.role == \"managersearch\" or r.role == \"search\" or r.role == \"node\" or r.role == \"heavynode\")\n |> filter(fn: (r) => r[\"_field\"] == \"ingest_processor_stats_kv_time_in_millis\")\n |> derivative(unit: 1s, nonNegative: true, columns: [\"_value\"], timeColumn: \"_time\") \n |> group(columns: [\"host\", \"role\"])\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)\n |> yield(name: \"kv_time\")"},{"query":"from(bucket: \"telegraf/so_short_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"elasticsearch_clusterstats_nodes\")\n |> filter(fn: (r) => r.role == \"standalone\" or r.role == \"eval\" or r.role == \"import\" or r.role == \"manager\" or r.role == \"managersearch\" or r.role == \"search\" or r.role == \"node\" or r.role == \"heavynode\")\n |> filter(fn: (r) => r[\"_field\"] == \"ingest_processor_stats_lowercase_time_in_millis\")\n |> derivative(unit: 1s, nonNegative: true, columns: [\"_value\"], timeColumn: \"_time\") \n |> group(columns: [\"host\", \"role\"])\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)\n |> yield(name: \"lowercase_time\")"},{"query":"from(bucket: \"telegraf/so_short_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"elasticsearch_clusterstats_nodes\")\n |> filter(fn: (r) => r.role == \"standalone\" or r.role == \"eval\" or r.role == \"import\" or r.role == \"manager\" or r.role == \"managersearch\" or r.role == \"search\" or r.role == \"node\" or r.role == \"heavynode\")\n |> filter(fn: (r) => r[\"_field\"] == \"ingest_processor_stats_rename_time_in_millis\")\n |> derivative(unit: 1s, nonNegative: true, columns: [\"_value\"], timeColumn: \"_time\") \n |> group(columns: [\"host\", \"role\"])\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)\n |> yield(name: \"rename_time\")"},{"query":"from(bucket: \"telegraf/so_short_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"elasticsearch_clusterstats_nodes\")\n |> filter(fn: (r) => r.role == \"standalone\" or r.role == \"eval\" or r.role == \"import\" or r.role == \"manager\" or r.role == \"managersearch\" or r.role == \"search\" or r.role == \"node\" or r.role == \"heavynode\")\n |> filter(fn: (r) => r[\"_field\"] == \"ingest_processor_stats_script_time_in_millis\")\n |> derivative(unit: 1s, nonNegative: true, columns: [\"_value\"], timeColumn: \"_time\") \n |> group(columns: [\"host\", \"role\"])\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)\n |> yield(name: \"script_time\")"},{"query":"from(bucket: \"telegraf/so_short_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"elasticsearch_clusterstats_nodes\")\n |> filter(fn: (r) => r.role == \"standalone\" or r.role == \"eval\" or r.role == \"import\" or r.role == \"manager\" or r.role == \"managersearch\" or r.role == \"search\" or r.role == \"node\" or r.role == \"heavynode\")\n |> filter(fn: (r) => r[\"_field\"] == \"ingest_processor_stats_user_agent_time_in_millis\")\n |> derivative(unit: 1s, nonNegative: true, columns: [\"_value\"], timeColumn: \"_time\") \n |> group(columns: [\"host\", \"role\"])\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)\n |> yield(name: \"user.agent_time\")"}],"staticLegend":{"colorizeRows":true,"heightRatio":0.301556420233463,"opacity":1,"orientationThreshold":100000000,"widthRatio":1},"width":4,"widthRatio":1,"xCol":"_time","xPos":8,"yCol":"_value","yPos":10},{"axes":[{"base":"10","name":"x","scale":"linear"},{"name":"y","scale":"linear"}],"colorizeRows":true,"colors":[{"id":"sW2GqpGAsGB5Adx16jKjp","name":"Nineteen Eighty Four","type":"scale","hex":"#31C0F6"},{"id":"TsdXuXwdI5Npi9S8L4f-i","name":"Nineteen Eighty Four","type":"scale","hex":"#A500A5"},{"id":"OGL29-SUbJ6FyQb0JzbaD","name":"Nineteen Eighty Four","type":"scale","hex":"#FF7E27"}],"geom":"line","height":4,"hoverDimension":"auto","kind":"Xy","legendColorizeRows":true,"legendOpacity":1,"legendOrientationThreshold":100000000,"name":"1m Load Average","opacity":1,"orientationThreshold":100000000,"position":"overlaid","queries":[{"query":"hostFilter = (tables=<-) =>\n if v.Host != \"(All)\" then\n tables |> filter(fn: (r) => r[\"host\"] == v.Host)\n else\n tables\n\nroleFilter = (tables=<-) =>\n if v.Role != \"(All)\" then\n tables |> filter(fn: (r) => r[\"role\"] == v.Role)\n else\n tables\n\nfrom(bucket: \"telegraf/so_short_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"system\")\n |> filter(fn: (r) => r[\"_field\"] == \"load1\")\n |> hostFilter()\n |> roleFilter()\n |> group(columns: [\"_field\", \"host\", \"role\"])\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: true)\n |> yield(name: \"mean\")"},{"query":"hostFilter = (tables=<-) =>\n if v.Host != \"(All)\" then\n tables |> filter(fn: (r) => r[\"host\"] == v.Host)\n else\n tables\n\nroleFilter = (tables=<-) =>\n if v.Role != \"(All)\" then\n tables |> filter(fn: (r) => r[\"role\"] == v.Role)\n else\n tables\n\nfrom(bucket: \"telegraf/so_long_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"system\")\n |> filter(fn: (r) => r[\"_field\"] == \"load1\")\n |> hostFilter()\n |> roleFilter()\n |> group(columns: [\"_field\",\"host\", \"role\"])\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: true)\n |> yield(name: \"Trend\")"}],"staticLegend":{"colorizeRows":true,"opacity":1,"orientationThreshold":100000000,"widthRatio":1},"width":4,"widthRatio":1,"xCol":"_time","xPos":8,"yCol":"_value","yPos":14,"yTickStep":1},{"axes":[{"base":"10","name":"x","scale":"linear"},{"base":"10","name":"y","scale":"linear","suffix":" e/s"}],"colorizeRows":true,"colors":[{"id":"xflqbsX-j3iq4ry5QOntK","name":"Do Androids Dream of Electric Sheep?","type":"scale","hex":"#8F8AF4"},{"id":"5H28HcITm6QVfQsXon0vq","name":"Do Androids Dream of Electric Sheep?","type":"scale","hex":"#A51414"},{"id":"25MrINwurNBkQqeKCkMPg","name":"Do Androids Dream of Electric Sheep?","type":"scale","hex":"#F4CF31"}],"geom":"line","height":4,"heightRatio":0.301556420233463,"hoverDimension":"auto","kind":"Xy","legendColorizeRows":true,"legendOpacity":1,"legendOrientationThreshold":100000000,"name":"Logstash EPS","opacity":1,"orientationThreshold":100000000,"position":"overlaid","queries":[{"query":"from(bucket: \"telegraf/so_short_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"logstash_events\")\n |> filter(fn: (r) => r[\"_field\"] == \"in\")\n |> derivative(unit: 1s, nonNegative: true, columns: [\"_value\"], timeColumn: \"_time\") \n |> group(columns: [\"_field\", \"host\", \"pipeline\", \"role\"])\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)\n |> yield(name: \"mean\")"},{"query":"from(bucket: \"telegraf/so_short_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"logstash_events\")\n |> filter(fn: (r) => r[\"_field\"] == \"out\")\n |> derivative(unit: 1s, nonNegative: true, columns: [\"_value\"], timeColumn: \"_time\") \n |> map(fn: (r) => ({r with _value: -r._value}))\n |> group(columns: [\"_field\", \"host\", \"pipeline\", \"role\"])\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)\n |> yield(name: \"mean\")"},{"query":"from(bucket: \"telegraf/so_long_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"logstash_events\")\n |> filter(fn: (r) => r[\"_field\"] == \"in\")\n |> derivative(unit: 1s, nonNegative: true, columns: [\"_value\"], timeColumn: \"_time\") \n |> group(columns: [\"_field\", \"host\", \"pipeline\", \"role\"])\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)\n |> yield(name: \"Trend\")"},{"query":"from(bucket: \"telegraf/so_long_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"logstash_events\")\n |> filter(fn: (r) => r[\"_field\"] == \"out\")\n |> derivative(unit: 1s, nonNegative: true, columns: [\"_value\"], timeColumn: \"_time\") \n |> map(fn: (r) => ({r with _value: -r._value}))\n |> group(columns: [\"_field\", \"host\", \"pipeline\", \"role\"])\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)\n |> yield(name: \"Trend\")"}],"staticLegend":{"colorizeRows":true,"heightRatio":0.301556420233463,"opacity":1,"orientationThreshold":100000000,"widthRatio":1},"width":4,"widthRatio":1,"xCol":"_time","xPos":8,"yCol":"_value","yPos":18},{"axes":[{"base":"10","name":"x","scale":"linear"},{"base":"10","name":"y","scale":"linear"}],"colorizeRows":true,"colors":[{"id":"base","name":"laser","type":"text","hex":"#00C9FF"}],"decimalPlaces":0,"height":4,"hoverDimension":"auto","kind":"Single_Stat_Plus_Line","legendColorizeRows":true,"legendOpacity":1,"legendOrientationThreshold":100000000,"name":"Kafka Under Replicated Partitions","opacity":1,"orientationThreshold":100000000,"position":"overlaid","queries":[{"query":"from(bucket: \"telegraf/so_short_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"kafka_partition\")\n |> filter(fn: (r) => r[\"_field\"] == \"UnderReplicatedPartitions\")\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)\n |> group(columns: [\"_field\", \"partition\",\"host\", \"role\"])\n |> yield(name: \"mean\")"},{"query":"from(bucket: \"telegraf/so_long_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"kafka_partition\")\n |> filter(fn: (r) => r[\"_field\"] == \"UnderReplicatedPartitions\")\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)\n |> group(columns: [\"_field\", \"partition\",\"host\", \"role\"])\n |> yield(name: \"trend\")"}],"staticLegend":{"colorizeRows":true,"opacity":1,"orientationThreshold":100000000,"widthRatio":1},"width":4,"widthRatio":1,"xCol":"_time","xPos":8,"yCol":"_value","yPos":22},{"axes":[{"base":"10","name":"x","scale":"linear"},{"name":"y","scale":"linear","suffix":"%"}],"colorizeRows":true,"colors":[{"id":"UAehjIsi65P8u92M_3sQY","name":"Nineteen Eighty Four","type":"scale","hex":"#31C0F6"},{"id":"_SCP8Npp4NVMx2N4mfuzX","name":"Nineteen Eighty Four","type":"scale","hex":"#A500A5"},{"id":"BoMPg4R1KDp_UsRORdV3_","name":"Nineteen Eighty Four","type":"scale","hex":"#FF7E27"}],"geom":"line","height":4,"hoverDimension":"auto","kind":"Xy","legendColorizeRows":true,"legendOpacity":1,"legendOrientationThreshold":100000000,"name":"IO Wait","opacity":1,"orientationThreshold":100000000,"position":"overlaid","queries":[{"query":"hostFilter = (tables=<-) =>\n if v.Host != \"(All)\" then\n tables |> filter(fn: (r) => r[\"host\"] == v.Host)\n else\n tables\n\nroleFilter = (tables=<-) =>\n if v.Role != \"(All)\" then\n tables |> filter(fn: (r) => r[\"role\"] == v.Role)\n else\n tables\n\nfrom(bucket: \"telegraf/so_short_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"cpu\")\n |> filter(fn: (r) => r[\"cpu\"] == \"cpu-total\")\n |> filter(fn: (r) => r[\"_field\"] == \"usage_iowait\")\n |> hostFilter()\n |> roleFilter()\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)\n |> group(columns: [\"_field\", \"host\", \"role\"])\n |> yield(name: \"mean\")"},{"query":"hostFilter = (tables=<-) =>\n if v.Host != \"(All)\" then\n tables |> filter(fn: (r) => r[\"host\"] == v.Host)\n else\n tables\n\nroleFilter = (tables=<-) =>\n if v.Role != \"(All)\" then\n tables |> filter(fn: (r) => r[\"role\"] == v.Role)\n else\n tables\n\nfrom(bucket: \"telegraf/so_long_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"cpu\")\n |> filter(fn: (r) => r[\"cpu\"] == \"cpu-total\")\n |> filter(fn: (r) => r[\"_field\"] == \"usage_iowait\")\n |> hostFilter()\n |> roleFilter()\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)\n |> group(columns: [\"_field\", \"host\", \"role\"])\n |> yield(name: \"Trend\")"}],"staticLegend":{"colorizeRows":true,"opacity":1,"orientationThreshold":100000000,"widthRatio":1},"width":4,"widthRatio":1,"xCol":"_time","xPos":8,"yCol":"_value","yPos":26},{"axes":[{"base":"10","name":"x","scale":"linear"},{"name":"y","scale":"linear","suffix":"%"}],"colorizeRows":true,"colors":[{"id":"QDwChKZWuQV0BaJcEeSam","name":"Atlantis","type":"scale","hex":"#74D495"},{"id":"ThD0WTqKHltQEVlq9mo6K","name":"Atlantis","type":"scale","hex":"#3F3FBA"},{"id":"FBHYZiwDLKyQK3eRfUD-0","name":"Atlantis","type":"scale","hex":"#FF4D9E"}],"geom":"line","height":4,"hoverDimension":"auto","kind":"Xy","legendColorizeRows":true,"legendOpacity":1,"legendOrientationThreshold":100000000,"name":"Swap Usage","opacity":1,"orientationThreshold":100000000,"position":"overlaid","queries":[{"query":"hostFilter = (tables=<-) =>\n if v.Host != \"(All)\" then\n tables |> filter(fn: (r) => r[\"host\"] == v.Host)\n else\n tables\n\nroleFilter = (tables=<-) =>\n if v.Role != \"(All)\" then\n tables |> filter(fn: (r) => r[\"role\"] == v.Role)\n else\n tables\n\nfrom(bucket: \"telegraf/so_short_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"swap\")\n |> filter(fn: (r) => r[\"_field\"] == \"used_percent\")\n |> hostFilter()\n |> roleFilter()\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)\n |> group(columns: [\"_field\", \"host\", \"role\"])\n |> yield(name: \"mean\")"},{"query":"hostFilter = (tables=<-) =>\n if v.Host != \"(All)\" then\n tables |> filter(fn: (r) => r[\"host\"] == v.Host)\n else\n tables\n\nroleFilter = (tables=<-) =>\n if v.Role != \"(All)\" then\n tables |> filter(fn: (r) => r[\"role\"] == v.Role)\n else\n tables\n\nfrom(bucket: \"telegraf/so_long_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"swap\")\n |> filter(fn: (r) => r[\"_field\"] == \"used_percent\")\n |> hostFilter()\n |> roleFilter()\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)\n |> group(columns: [\"_field\", \"host\", \"role\"])\n |> yield(name: \"Trend\")"}],"staticLegend":{"colorizeRows":true,"opacity":1,"orientationThreshold":100000000,"widthRatio":1},"width":4,"widthRatio":1,"xCol":"_time","xPos":8,"yCol":"_value","yPos":30},{"axes":[{"base":"10","name":"x","scale":"linear"},{"base":"10","name":"y","scale":"linear","suffix":"b/s"}],"colorizeRows":true,"colors":[{"id":"TtgHQAXNep94KBgtu48C_","name":"Cthulhu","type":"scale","hex":"#FDC44F"},{"id":"_IuzkORho_8QXTE6vMllv","name":"Cthulhu","type":"scale","hex":"#007C76"},{"id":"bUszW_YI_9oColDbLNQ-d","name":"Cthulhu","type":"scale","hex":"#8983FF"}],"geom":"line","height":4,"heightRatio":0.18482490272373542,"hoverDimension":"auto","kind":"Xy","legendColorizeRows":true,"legendOpacity":1,"legendOrientationThreshold":100000000,"name":"Monitor Interface Drops - Inbound","opacity":1,"orientationThreshold":100000000,"position":"overlaid","queries":[{"query":"import \"join\"\n\nhostFilter = (tables=<-) =>\n if v.Host != \"(All)\" then\n tables |> filter(fn: (r) => r[\"host\"] == v.Host)\n else\n tables\n\nroleFilter = (tables=<-) =>\n if v.Role != \"(All)\" then\n tables |> filter(fn: (r) => r[\"role\"] == v.Role)\n else\n tables\n \nmanints = from(bucket: \"telegraf/so_short_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"node_config\")\n |> hostFilter()\n |> filter(fn: (r) => r[\"_field\"] == \"monint\")\n |> distinct()\n |> group(columns: [\"host\"])\n\ntraffic = from(bucket: \"telegraf/so_short_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"net\")\n |> filter(fn: (r) => r[\"_field\"] == \"drop_in\")\n |> hostFilter()\n |> roleFilter()\n |> derivative(unit: 1s, nonNegative: true, columns: [\"_value\"], timeColumn: \"_time\")\n |> map(fn: (r) => ({r with \"_value\": r._value * 8.0}))\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)\n |> group(columns: [\"host\"])\n\njoin.inner(left: traffic, right: manints,\n on: (l,r) => l.interface == r._value,\n as: (l, r) => ({l with _value: l._value, result: \"drop_in\"}))"},{"query":"import \"join\"\n\nhostFilter = (tables=<-) =>\n if v.Host != \"(All)\" then\n tables |> filter(fn: (r) => r[\"host\"] == v.Host)\n else\n tables\n\nroleFilter = (tables=<-) =>\n if v.Role != \"(All)\" then\n tables |> filter(fn: (r) => r[\"role\"] == v.Role)\n else\n tables\n \nmanints = from(bucket: \"telegraf/so_short_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"node_config\")\n |> hostFilter()\n |> filter(fn: (r) => r[\"_field\"] == \"monint\")\n |> distinct()\n |> group(columns: [\"host\"])\n\ntraffic = from(bucket: \"telegraf/so_long_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"net\")\n |> filter(fn: (r) => r[\"_field\"] == \"drop_in\")\n |> hostFilter()\n |> roleFilter()\n |> derivative(unit: 1s, nonNegative: true, columns: [\"_value\"], timeColumn: \"_time\")\n |> map(fn: (r) => ({r with \"_value\": r._value * 8.0}))\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)\n |> group(columns: [\"host\"])\n\njoin.inner(left: traffic, right: manints,\n on: (l,r) => l.interface == r._value,\n as: (l, r) => ({l with _value: l._value, result: \"Trend\"}))"}],"staticLegend":{"colorizeRows":true,"heightRatio":0.18482490272373542,"opacity":1,"orientationThreshold":100000000,"widthRatio":1},"width":4,"widthRatio":1,"xCol":"_time","xPos":8,"yCol":"_value","yPos":34},{"axes":[{"base":"10","name":"x","scale":"linear"},{"name":"y","scale":"linear","suffix":" days"}],"colorizeRows":true,"colors":[{"id":"3PVw3hQuZUzyar7Js3mMH","name":"Ectoplasm","type":"scale","hex":"#DA6FF1"},{"id":"O34ux-D8Xq_1-eeWRyYYH","name":"Ectoplasm","type":"scale","hex":"#00717A"},{"id":"P04RoKOHBdLdvfrfFbn0F","name":"Ectoplasm","type":"scale","hex":"#ACFF76"}],"geom":"line","height":4,"hoverDimension":"auto","kind":"Xy","legendColorizeRows":true,"legendOpacity":1,"legendOrientationThreshold":100000000,"name":"Stenographer PCAP Retention","opacity":1,"orientationThreshold":100000000,"position":"overlaid","queries":[{"query":"import \"join\"\n\nfrom(bucket: \"telegraf/so_short_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"pcapage\")\n |> filter(fn: (r) => r[\"_field\"] == \"seconds\")\n |> map(fn: (r) => ({ r with _value: r._value / (24.0 * 3600.0)}))\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)\n |> group(columns: [\"_field\",\"host\"])"},{"query":"import \"join\"\n\nfrom(bucket: \"telegraf/so_long_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"pcapage\")\n |> filter(fn: (r) => r[\"_field\"] == \"seconds\")\n |> map(fn: (r) => ({ r with _value: r._value / (24.0 * 3600.0)}))\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)\n |> group(columns: [\"_field\",\"host\"])\n |> yield(name: \"Trend\")"}],"staticLegend":{"colorizeRows":true,"opacity":1,"orientationThreshold":100000000,"widthRatio":1},"width":4,"widthRatio":1,"xCol":"_time","xPos":8,"yCol":"_value","yPos":46},{"colors":[{"id":"base","name":"laser","type":"text","hex":"#00C9FF"}],"decimalPlaces":1,"height":2,"kind":"Single_Stat","name":"Suricata Loss","queries":[{"query":"hostFilter = (tables=<-) =>\n if v.Host != \"(All)\" then\n tables |> filter(fn: (r) => r[\"host\"] == v.Host)\n else\n tables\n\nfrom(bucket: \"telegraf/so_short_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"suridrop\")\n |> filter(fn: (r) => r[\"_field\"] == \"drop\")\n |> map(fn: (r) => ({r with _value: r._value * 100.0}))\n |> hostFilter()\n |> group(columns: [\"host\"])\n |> last()\n |> aggregateWindow(every: v.windowPeriod, fn: mean)\n |> highestMax(n:1)"}],"staticLegend":{},"suffix":"%","width":1,"xPos":9},{"colors":[{"id":"0","name":"viridian","type":"min","hex":"#32B08C"},{"id":"5IArg2lDb8KvnphywgUXa","name":"pineapple","type":"threshold","hex":"#FFB94A","value":50},{"id":"yFhH3mtavjuAZh6cEt5lx","name":"fire","type":"threshold","hex":"#DC4E58","value":70},{"id":"1","name":"ruby","type":"max","hex":"#BF3D5E","value":100}],"decimalPlaces":0,"height":4,"kind":"Gauge","name":"Swap Usage","queries":[{"query":"hostFilter = (tables=<-) =>\n if v.Host != \"(All)\" then\n tables |> filter(fn: (r) => r[\"host\"] == v.Host)\n else\n tables\n\nfrom(bucket: \"telegraf/so_short_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"swap\")\n |> filter(fn: (r) => r[\"_field\"] == \"used_percent\")\n |> hostFilter()\n |> group(columns: [\"host\"])\n |> last()\n |> highestMax(n: 1)\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)\n |> yield(name: \"mean\")"}],"staticLegend":{},"suffix":"%","tickSuffix":"%","width":3,"xPos":9,"yPos":2},{"colors":[{"id":"base","name":"white","type":"text","hex":"#ffffff"}],"fieldOptions":[{"displayName":"Host","fieldName":"host","visible":true},{"displayName":"Name","fieldName":"container_name","visible":true},{"displayName":"Status","fieldName":"container_status","visible":true},{"displayName":"OOM Killed","fieldName":"_value","visible":true},{"displayName":"_start","fieldName":"_start","visible":true},{"displayName":"_stop","fieldName":"_stop","visible":true},{"displayName":"_time","fieldName":"_time","visible":true},{"displayName":"_field","fieldName":"_field","visible":true},{"displayName":"_measurement","fieldName":"_measurement","visible":true},{"displayName":"engine_host","fieldName":"engine_host","visible":true},{"displayName":"role","fieldName":"role","visible":true},{"displayName":"server_version","fieldName":"server_version","visible":true},{"displayName":"container_image","fieldName":"container_image","visible":true},{"displayName":"container_version","fieldName":"container_version","visible":true},{"displayName":"description","fieldName":"description","visible":true},{"displayName":"maintainer","fieldName":"maintainer","visible":true},{"displayName":"io.k8s.description","fieldName":"io.k8s.description","visible":true},{"displayName":"io.k8s.display-name","fieldName":"io.k8s.display-name","visible":true},{"displayName":"license","fieldName":"license","visible":true},{"displayName":"name","fieldName":"name","visible":true},{"displayName":"org.label-schema.build-date","fieldName":"org.label-schema.build-date","visible":true},{"displayName":"org.label-schema.license","fieldName":"org.label-schema.license","visible":true},{"displayName":"org.label-schema.name","fieldName":"org.label-schema.name","visible":true},{"displayName":"org.label-schema.schema-version","fieldName":"org.label-schema.schema-version","visible":true},{"displayName":"org.label-schema.url","fieldName":"org.label-schema.url","visible":true},{"displayName":"org.label-schema.vcs-ref","fieldName":"org.label-schema.vcs-ref","visible":true},{"displayName":"org.label-schema.vcs-url","fieldName":"org.label-schema.vcs-url","visible":true},{"displayName":"org.label-schema.vendor","fieldName":"org.label-schema.vendor","visible":true},{"displayName":"org.label-schema.version","fieldName":"org.label-schema.version","visible":true},{"displayName":"org.opencontainers.image.created","fieldName":"org.opencontainers.image.created","visible":true},{"displayName":"org.opencontainers.image.licenses","fieldName":"org.opencontainers.image.licenses","visible":true},{"displayName":"org.opencontainers.image.title","fieldName":"org.opencontainers.image.title","visible":true},{"displayName":"org.opencontainers.image.vendor","fieldName":"org.opencontainers.image.vendor","visible":true},{"displayName":"release","fieldName":"release","visible":true},{"displayName":"summary","fieldName":"summary","visible":true},{"displayName":"url","fieldName":"url","visible":true},{"displayName":"vendor","fieldName":"vendor","visible":true},{"displayName":"version","fieldName":"version","visible":true},{"displayName":"org.label-schema.usage","fieldName":"org.label-schema.usage","visible":true},{"displayName":"org.opencontainers.image.documentation","fieldName":"org.opencontainers.image.documentation","visible":true},{"displayName":"org.opencontainers.image.revision","fieldName":"org.opencontainers.image.revision","visible":true},{"displayName":"org.opencontainers.image.source","fieldName":"org.opencontainers.image.source","visible":true},{"displayName":"org.opencontainers.image.url","fieldName":"org.opencontainers.image.url","visible":true},{"displayName":"org.opencontainers.image.version","fieldName":"org.opencontainers.image.version","visible":true},{"displayName":"org.opencontainers.image.description","fieldName":"org.opencontainers.image.description","visible":true}],"height":4,"kind":"Table","name":"Most Recent Container Events","queries":[{"query":"hostFilter = (tables=<-) =>\n if v.Host != \"(All)\" then\n tables |> filter(fn: (r) => r[\"host\"] == v.Host)\n else\n tables\n\nroleFilter = (tables=<-) =>\n if v.Role != \"(All)\" then\n tables |> filter(fn: (r) => r[\"role\"] == v.Role)\n else\n tables\n \nfrom(bucket: \"telegraf/so_short_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"docker_container_status\")\n |> filter(fn: (r) => r[\"_field\"] == \"oomkilled\")\n |> filter(fn: (r) => r[\"container_status\"] != \"running\")\n |> hostFilter()\n |> roleFilter()\n |> group(columns: [\"container_name\", \"host\"])\n |> last()\n |> group()\n |> keep(columns: [\"_value\", \"container_name\", \"host\", \"container_status\"])"}],"staticLegend":{},"tableOptions":{"sortBy":"container_name","verticalTimeAxis":true},"timeFormat":"YYYY-MM-DD HH:mm:ss","width":3,"xPos":9,"yPos":6},{"axes":[{"base":"10","name":"x","scale":"linear"},{"name":"y","scale":"linear","suffix":"%"}],"colorizeRows":true,"colors":[{"id":"TtgHQAXNep94KBgtu48C_","name":"Cthulhu","type":"scale","hex":"#FDC44F"},{"id":"_IuzkORho_8QXTE6vMllv","name":"Cthulhu","type":"scale","hex":"#007C76"},{"id":"bUszW_YI_9oColDbLNQ-d","name":"Cthulhu","type":"scale","hex":"#8983FF"}],"geom":"line","height":4,"hoverDimension":"auto","kind":"Xy","legendColorizeRows":true,"legendOpacity":1,"legendOrientationThreshold":100000000,"name":"Zeek Capture Loss","opacity":1,"orientationThreshold":100000000,"position":"overlaid","queries":[{"query":"hostFilter = (tables=<-) =>\n if v.Host != \"(All)\" then\n tables |> filter(fn: (r) => r[\"host\"] == v.Host)\n else\n tables\n\nroleFilter = (tables=<-) =>\n if v.Role != \"(All)\" then\n tables |> filter(fn: (r) => r[\"role\"] == v.Role)\n else\n tables\n\nfrom(bucket: \"telegraf/so_short_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"zeekcaptureloss\")\n |> filter(fn: (r) => r[\"_field\"] == \"loss\")\n |> hostFilter()\n |> roleFilter()\n |> group(columns: [\"_field\", \"host\", \"role\"])\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)\n |> yield(name: \"mean\")"},{"query":"hostFilter = (tables=<-) =>\n if v.Host != \"(All)\" then\n tables |> filter(fn: (r) => r[\"host\"] == v.Host)\n else\n tables\n\nroleFilter = (tables=<-) =>\n if v.Role != \"(All)\" then\n tables |> filter(fn: (r) => r[\"role\"] == v.Role)\n else\n tables\n\nfrom(bucket: \"telegraf/so_long_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"zeekcaptureloss\")\n |> filter(fn: (r) => r[\"_field\"] == \"loss\")\n |> hostFilter()\n |> roleFilter()\n |> group(columns: [\"_field\", \"host\", \"role\"])\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)\n |> yield(name: \"Trend\")"}],"staticLegend":{"colorizeRows":true,"opacity":1,"orientationThreshold":100000000,"widthRatio":1},"width":3,"widthRatio":1,"xCol":"_time","xPos":9,"yCol":"_value","yPos":42},{"colors":[{"id":"base","name":"laser","type":"text","hex":"#00C9FF"}],"decimalPlaces":1,"height":2,"kind":"Single_Stat","name":"Stenographer Loss","queries":[{"query":"hostFilter = (tables=<-) =>\n if v.Host != \"(All)\" then\n tables |> filter(fn: (r) => r[\"host\"] == v.Host)\n else\n tables\n\nfrom(bucket: \"telegraf/so_short_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"stenodrop\")\n |> filter(fn: (r) => r[\"_field\"] == \"drop\")\n |> map(fn: (r) => ({r with _value: r._value * 100.0}))\n |> hostFilter()\n |> group(columns: [\"host\"])\n |> last()\n |> aggregateWindow(every: v.windowPeriod, fn: mean)\n |> highestMax(n:1)"}],"staticLegend":{},"suffix":"%","width":1,"xPos":10},{"colors":[{"id":"base","name":"laser","type":"text","hex":"#00C9FF"}],"decimalPlaces":1,"height":2,"kind":"Single_Stat","name":"PCAP Retention","queries":[{"query":"hostFilter = (tables=<-) =>\n if v.Host != \"(All)\" then\n tables |> filter(fn: (r) => r[\"host\"] == v.Host)\n else\n tables\n \nfrom(bucket: \"telegraf/so_short_term\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"pcapage\")\n |> filter(fn: (r) => r[\"_field\"] == \"seconds\")\n |> hostFilter()\n |> map(fn: (r) => ({r with _value: r._value / (24.0 * 60.0 * 60.0)}))\n |> group(columns: [\"host\"])\n |> last()\n |> highestMax(n:1)"}],"staticLegend":{},"suffix":" days","width":1,"xPos":11}],"description":"Visualize the Security Onion grid performance metrics and alarm statuses.","name":"Security Onion Performance"}}] \ No newline at end of file diff --git a/salt/kibana/defaults.yaml b/salt/kibana/defaults.yaml index 645821b6c..078f826a0 100644 --- a/salt/kibana/defaults.yaml +++ b/salt/kibana/defaults.yaml @@ -22,7 +22,7 @@ kibana: - default - file migrations: - discardCorruptObjects: "8.18.6" + discardCorruptObjects: "8.18.8" telemetry: enabled: False security: diff --git a/salt/kratos/enabled.sls b/salt/kratos/enabled.sls index 31097ccf4..f0345edec 100644 --- a/salt/kratos/enabled.sls +++ b/salt/kratos/enabled.sls @@ -54,6 +54,9 @@ so-kratos: - file: kratosconfig - file: kratoslogdir - file: kratosdir + - retry: + attempts: 10 + interval: 10 delete_so-kratos_so-status.disabled: file.uncomment: diff --git a/salt/libvirt/bridge.sls b/salt/libvirt/bridge.sls index b8f720993..cc28bd8b7 100644 --- a/salt/libvirt/bridge.sls +++ b/salt/libvirt/bridge.sls @@ -4,6 +4,9 @@ # Elastic License 2.0. # We do not import GLOBALS in this state because it is called during setup +include: + - salt.minion.service_file + - salt.mine_functions down_original_mgmt_interface: cmd.run: @@ -28,29 +31,14 @@ wait_for_br0_ip: - timeout: 95 - onchanges: - cmd: down_original_mgmt_interface - -{% if grains.role == 'so-hypervisor' %} - -update_mine_functions: - file.managed: - - name: /etc/salt/minion.d/mine_functions.conf - - contents: | - mine_interval: 25 - mine_functions: - network.ip_addrs: - - interface: br0 - {%- if role in ['so-eval','so-import','so-manager','so-managerhype','so-managersearch','so-standalone'] %} - x509.get_pem_entries: - - glob_path: '/etc/pki/ca.crt' - {% endif %} - - onchanges: - - cmd: wait_for_br0_ip + - onchanges_in: + - file: salt_minion_service_unit_file + - file: mine_functions restart_salt_minion_service: service.running: - name: salt-minion - enable: True - listen: - - file: update_mine_functions - -{% endif %} + - file: salt_minion_service_unit_file + - file: mine_functions diff --git a/salt/libvirt/init.sls b/salt/libvirt/init.sls index e25a3bcc6..096e0f55c 100644 --- a/salt/libvirt/init.sls +++ b/salt/libvirt/init.sls @@ -31,6 +31,19 @@ libvirt_conf_dir: - group: 939 - makedirs: True +libvirt_volumes: + file.directory: + - name: /nsm/libvirt/volumes + - user: qemu + - group: qemu + - dir_mode: 755 + - file_mode: 640 + - recurse: + - user + - group + - mode + - makedirs: True + libvirt_config: file.managed: - name: /opt/so/conf/libvirt/libvirtd.conf diff --git a/salt/logstash/tools/sbin/so-logstash-flow-stats b/salt/logstash/tools/sbin/so-logstash-flow-stats new file mode 100644 index 000000000..70f9852e9 --- /dev/null +++ b/salt/logstash/tools/sbin/so-logstash-flow-stats @@ -0,0 +1,3 @@ +#!/bin/bash + +curl -s -L http://localhost:9600/_node/stats/flow | jq \ No newline at end of file diff --git a/salt/logstash/tools/sbin/so-logstash-health b/salt/logstash/tools/sbin/so-logstash-health new file mode 100644 index 000000000..9520ca507 --- /dev/null +++ b/salt/logstash/tools/sbin/so-logstash-health @@ -0,0 +1,3 @@ +#!/bin/bash + +curl -s -L http://localhost:9600/_health_report | jq \ No newline at end of file diff --git a/salt/logstash/tools/sbin/so-logstash-jvm-stats b/salt/logstash/tools/sbin/so-logstash-jvm-stats new file mode 100644 index 000000000..5c0e4f59f --- /dev/null +++ b/salt/logstash/tools/sbin/so-logstash-jvm-stats @@ -0,0 +1,3 @@ +#!/bin/bash + +curl -s -L http://localhost:9600/_node/stats/jvm | jq \ No newline at end of file diff --git a/salt/manager/tools/sbin/so-saltstack-update b/salt/manager/tools/sbin/so-saltstack-update index 4be8f095c..2f385ab89 100755 --- a/salt/manager/tools/sbin/so-saltstack-update +++ b/salt/manager/tools/sbin/so-saltstack-update @@ -5,10 +5,12 @@ # https://securityonion.net/license; you may not use this file except in compliance with the # Elastic License 2.0. - default_salt_dir=/opt/so/saltstack/default -clone_to_tmp() { +VERBOSE=0 +VERY_VERBOSE=0 +TEST_MODE=0 +clone_to_tmp() { # TODO Need to add a air gap option # Make a temp location for the files mkdir /tmp/sogh @@ -16,19 +18,110 @@ clone_to_tmp() { #git clone -b dev https://github.com/Security-Onion-Solutions/securityonion.git git clone https://github.com/Security-Onion-Solutions/securityonion.git cd /tmp +} +show_file_changes() { + local source_dir="$1" + local dest_dir="$2" + local dir_type="$3" # "salt" or "pillar" + + if [ $VERBOSE -eq 0 ]; then + return + fi + + echo "=== Changes for $dir_type directory ===" + + # Find all files in source directory + if [ -d "$source_dir" ]; then + find "$source_dir" -type f | while read -r source_file; do + # Get relative path + rel_path="${source_file#$source_dir/}" + dest_file="$dest_dir/$rel_path" + + if [ ! -f "$dest_file" ]; then + echo "ADDED: $dest_file" + if [ $VERY_VERBOSE -eq 1 ]; then + echo " (New file - showing first 20 lines)" + head -n 20 "$source_file" | sed 's/^/ + /' + echo "" + fi + elif ! cmp -s "$source_file" "$dest_file"; then + echo "MODIFIED: $dest_file" + if [ $VERY_VERBOSE -eq 1 ]; then + echo " (Changes:)" + diff -u "$dest_file" "$source_file" | sed 's/^/ /' + echo "" + fi + fi + done + fi + + # Find deleted files (exist in dest but not in source) + if [ -d "$dest_dir" ]; then + find "$dest_dir" -type f | while read -r dest_file; do + # Get relative path + rel_path="${dest_file#$dest_dir/}" + source_file="$source_dir/$rel_path" + + if [ ! -f "$source_file" ]; then + echo "DELETED: $dest_file" + if [ $VERY_VERBOSE -eq 1 ]; then + echo " (File was deleted)" + echo "" + fi + fi + done + fi + + echo "" } copy_new_files() { - # Copy new files over to the salt dir cd /tmp/sogh/securityonion git checkout $BRANCH VERSION=$(cat VERSION) + + if [ $TEST_MODE -eq 1 ]; then + echo "=== TEST MODE: Showing what would change without making changes ===" + echo "Branch: $BRANCH" + echo "Version: $VERSION" + echo "" + fi + + # Show changes before copying if verbose mode is enabled OR if in test mode + if [ $VERBOSE -eq 1 ] || [ $TEST_MODE -eq 1 ]; then + if [ $TEST_MODE -eq 1 ]; then + # In test mode, force at least basic verbose output + local old_verbose=$VERBOSE + if [ $VERBOSE -eq 0 ]; then + VERBOSE=1 + fi + fi + + echo "Analyzing file changes..." + show_file_changes "$(pwd)/salt" "$default_salt_dir/salt" "salt" + show_file_changes "$(pwd)/pillar" "$default_salt_dir/pillar" "pillar" + + if [ $TEST_MODE -eq 1 ] && [ $old_verbose -eq 0 ]; then + # Restore original verbose setting + VERBOSE=$old_verbose + fi + fi + + # If in test mode, don't copy files + if [ $TEST_MODE -eq 1 ]; then + echo "=== TEST MODE: No files were modified ===" + echo "To apply these changes, run without --test option" + rm -rf /tmp/sogh + return + fi + # We need to overwrite if there is a repo file if [ -d /opt/so/repo ]; then tar -czf /opt/so/repo/"$VERSION".tar.gz -C "$(pwd)/.." . fi + rsync -a salt $default_salt_dir/ rsync -a pillar $default_salt_dir/ chown -R socore:socore $default_salt_dir/salt @@ -45,11 +138,64 @@ got_root(){ fi } -got_root -if [ $# -ne 1 ] ; then +show_usage() { + echo "Usage: $0 [-v] [-vv] [--test] [branch]" + echo " -v Show verbose output (files changed/added/deleted)" + echo " -vv Show very verbose output (includes file diffs)" + echo " --test Test mode - show what would change without making changes" + echo " branch Git branch to checkout (default: 2.4/main)" + echo "" + echo "Examples:" + echo " $0 # Normal operation" + echo " $0 -v # Show which files change" + echo " $0 -vv # Show files and their diffs" + echo " $0 --test # See what would change (dry run)" + echo " $0 --test -vv # Test mode with detailed diffs" + echo " $0 -v dev-branch # Use specific branch with verbose output" + exit 1 +} + +# Parse command line arguments +while [[ $# -gt 0 ]]; do + case $1 in + -v) + VERBOSE=1 + shift + ;; + -vv) + VERBOSE=1 + VERY_VERBOSE=1 + shift + ;; + --test) + TEST_MODE=1 + shift + ;; + -h|--help) + show_usage + ;; + -*) + echo "Unknown option $1" + show_usage + ;; + *) + # This should be the branch name + if [ -z "$BRANCH" ]; then + BRANCH="$1" + else + echo "Too many arguments" + show_usage + fi + shift + ;; + esac +done + +# Set default branch if not provided +if [ -z "$BRANCH" ]; then BRANCH=2.4/main -else - BRANCH=$1 fi + +got_root clone_to_tmp copy_new_files diff --git a/salt/manager/tools/sbin/so-user b/salt/manager/tools/sbin/so-user index 92b3ba385..060dcf3a4 100755 --- a/salt/manager/tools/sbin/so-user +++ b/salt/manager/tools/sbin/so-user @@ -387,7 +387,7 @@ function syncElastic() { if [[ -z "$SKIP_STATE_APPLY" ]]; then echo "Elastic state will be re-applied to affected minions. This will run in the background and may take several minutes to complete." echo "Applying elastic state to elastic minions at $(date)" >> /opt/so/log/soc/sync.log 2>&1 - salt --async -C 'G@role:so-standalone or G@role:so-eval or G@role:so-import or G@role:so-manager or G@role:so-managersearch or G@role:so-searchnode or G@role:so-heavynode' state.apply elasticsearch queue=True >> /opt/so/log/soc/sync.log 2>&1 + salt --async -C 'I@elasticsearch:enabled:true' state.apply elasticsearch queue=True >> /opt/so/log/soc/sync.log 2>&1 fi else echo "Newly generated users/roles files are incomplete; aborting." diff --git a/salt/manager/tools/sbin/soup b/salt/manager/tools/sbin/soup index 86595c162..f32b6edf8 100755 --- a/salt/manager/tools/sbin/soup +++ b/salt/manager/tools/sbin/soup @@ -21,6 +21,9 @@ whiptail_title='Security Onion UPdater' NOTIFYCUSTOMELASTICCONFIG=false TOPFILE=/opt/so/saltstack/default/salt/top.sls BACKUPTOPFILE=/opt/so/saltstack/default/salt/top.sls.backup +SALTUPGRADED=false +SALT_CLOUD_INSTALLED=false +SALT_CLOUD_CONFIGURED=false # used to display messages to the user at the end of soup declare -a FINAL_MESSAGE_QUEUE=() @@ -169,6 +172,8 @@ airgap_update_dockers() { tar xf "$AGDOCKER/registry.tar" -C /nsm/docker-registry/docker echo "Add Registry back" docker load -i "$AGDOCKER/registry_image.tar" + echo "Restart registry container" + salt-call state.apply registry queue=True fi fi } @@ -420,6 +425,7 @@ preupgrade_changes() { [[ "$INSTALLEDVERSION" == 2.4.150 ]] && up_to_2.4.160 [[ "$INSTALLEDVERSION" == 2.4.160 ]] && up_to_2.4.170 [[ "$INSTALLEDVERSION" == 2.4.170 ]] && up_to_2.4.180 + [[ "$INSTALLEDVERSION" == 2.4.180 ]] && up_to_2.4.190 true } @@ -450,6 +456,7 @@ postupgrade_changes() { [[ "$POSTVERSION" == 2.4.150 ]] && post_to_2.4.160 [[ "$POSTVERSION" == 2.4.160 ]] && post_to_2.4.170 [[ "$POSTVERSION" == 2.4.170 ]] && post_to_2.4.180 + [[ "$POSTVERSION" == 2.4.180 ]] && post_to_2.4.190 true } @@ -599,15 +606,36 @@ post_to_2.4.170() { } post_to_2.4.180() { - echo "Regenerating Elastic Agent Installers" - /sbin/so-elastic-agent-gen-installers - # Force update to Kafka output policy /usr/sbin/so-kafka-fleet-output-policy --force POSTVERSION=2.4.180 } +post_to_2.4.190() { + echo "Regenerating Elastic Agent Installers" + /sbin/so-elastic-agent-gen-installers + + # Only need to update import / eval nodes + if [[ "$MINION_ROLE" == "import" ]] || [[ "$MINION_ROLE" == "eval" ]]; then + update_import_fleet_output + fi + + # Check if expected default policy is logstash (global.pipeline is REDIS or "") + pipeline=$(lookup_pillar "pipeline" "global") + if [[ -z "$pipeline" ]] || [[ "$pipeline" == "REDIS" ]]; then + # Check if this grid is currently affected by corrupt fleet output policy + if elastic-agent status | grep "config: key file not configured" > /dev/null 2>&1; then + echo "Elastic Agent shows an ssl error connecting to logstash output. Updating output policy..." + update_default_logstash_output + fi + fi + # Apply new elasticsearch.server index template + rollover_index "logs-elasticsearch.server-default" + + POSTVERSION=2.4.190 +} + repo_sync() { echo "Sync the local repo." su socore -c '/usr/sbin/so-repo-sync' || fail "Unable to complete so-repo-sync." @@ -864,10 +892,15 @@ up_to_2.4.170() { } up_to_2.4.180() { + echo "Nothing to do for 2.4.180" + INSTALLEDVERSION=2.4.180 +} + +up_to_2.4.190() { # Elastic Update for this release, so download Elastic Agent files determine_elastic_agent_upgrade - INSTALLEDVERSION=2.4.180 + INSTALLEDVERSION=2.4.190 } add_hydra_pillars() { @@ -1143,6 +1176,44 @@ update_elasticsearch_index_settings() { done } +update_import_fleet_output() { + if output=$(curl -sK /opt/so/conf/elasticsearch/curl.config -L "localhost:5601/api/fleet/outputs/so-manager_elasticsearch" --retry 3 --fail 2>/dev/null); then + # Update the current config of so-manager_elasticsearch output policy in place (leaving any customizations like having changed the preset value from 'balanced' to 'performance') + CAFINGERPRINT=$(openssl x509 -in /etc/pki/tls/certs/intca.crt -outform DER | sha256sum | cut -d' ' -f1 | tr '[:lower:]' '[:upper:]') + updated_policy=$(jq --arg CAFINGERPRINT "$CAFINGERPRINT" '.item | (del(.id) | .ca_trusted_fingerprint = $CAFINGERPRINT)' <<< "$output") + if curl -sK /opt/so/conf/elasticsearch/curl.config -L "localhost:5601/api/fleet/outputs/so-manager_elasticsearch" -XPUT -H 'kbn-xsrf: true' -H 'Content-Type: application/json' -d "$updated_policy" --retry 3 --fail 2>/dev/null; then + echo "Successfully updated so-manager_elasticsearch fleet output policy" + else + fail "Failed to update so-manager_elasticsearch fleet output policy" + fi + fi +} + +update_default_logstash_output() { + echo "Updating fleet logstash output policy grid-logstash" + if logstash_policy=$(curl -K /opt/so/conf/elasticsearch/curl.config -L "http://localhost:5601/api/fleet/outputs/so-manager_logstash" --retry 3 --retry-delay 10 --fail 2>/dev/null); then + # Keep already configured hosts for this update, subsequent host updates come from so-elastic-fleet-outputs-update + HOSTS=$(echo "$logstash_policy" | jq -r '.item.hosts') + DEFAULT_ENABLED=$(echo "$logstash_policy" | jq -r '.item.is_default') + DEFAULT_MONITORING_ENABLED=$(echo "$logstash_policy" | jq -r '.item.is_default_monitoring') + LOGSTASHKEY=$(openssl rsa -in /etc/pki/elasticfleet-logstash.key) + LOGSTASHCRT=$(openssl x509 -in /etc/pki/elasticfleet-logstash.crt) + LOGSTASHCA=$(openssl x509 -in /etc/pki/tls/certs/intca.crt) + JSON_STRING=$(jq -n \ + --argjson HOSTS "$HOSTS" \ + --arg DEFAULT_ENABLED "$DEFAULT_ENABLED" \ + --arg DEFAULT_MONITORING_ENABLED "$DEFAULT_MONITORING_ENABLED" \ + --arg LOGSTASHKEY "$LOGSTASHKEY" \ + --arg LOGSTASHCRT "$LOGSTASHCRT" \ + --arg LOGSTASHCA "$LOGSTASHCA" \ + '{"name":"grid-logstash","type":"logstash","hosts": $HOSTS,"is_default": $DEFAULT_ENABLED,"is_default_monitoring": $DEFAULT_MONITORING_ENABLED,"config_yaml":"","ssl":{"certificate": $LOGSTASHCRT,"certificate_authorities":[ $LOGSTASHCA ]},"secrets":{"ssl":{"key": $LOGSTASHKEY }}}') + fi + + if curl -K /opt/so/conf/elasticsearch/curl.config -L -X PUT "localhost:5601/api/fleet/outputs/so-manager_logstash" -H 'kbn-xsrf: true' -H 'Content-Type: application/json' -d "$JSON_STRING" --retry 3 --retry-delay 10 --fail; then + echo "Successfully updated grid-logstash fleet output policy" + fi +} + update_salt_mine() { echo "Populating the mine with mine_functions for each host." set +e @@ -1192,24 +1263,43 @@ upgrade_check_salt() { } upgrade_salt() { - SALTUPGRADED=True echo "Performing upgrade of Salt from $INSTALLEDSALTVERSION to $NEWSALTVERSION." echo "" # If rhel family if [[ $is_rpm ]]; then + # Check if salt-cloud is installed + if rpm -q salt-cloud &>/dev/null; then + SALT_CLOUD_INSTALLED=true + fi + # Check if salt-cloud is configured + if [[ -f /etc/salt/cloud.profiles.d/socloud.conf ]]; then + SALT_CLOUD_CONFIGURED=true + fi + echo "Removing yum versionlock for Salt." echo "" yum versionlock delete "salt" yum versionlock delete "salt-minion" yum versionlock delete "salt-master" + # Remove salt-cloud versionlock if installed + if [[ $SALT_CLOUD_INSTALLED == true ]]; then + yum versionlock delete "salt-cloud" + fi echo "Updating Salt packages." echo "" set +e # if oracle run with -r to ignore repos set by bootstrap if [[ $OS == 'oracle' ]]; then - run_check_net_err \ - "sh $UPDATE_DIR/salt/salt/scripts/bootstrap-salt.sh -X -r -F -M stable \"$NEWSALTVERSION\"" \ - "Could not update salt, please check $SOUP_LOG for details." + # Add -L flag only if salt-cloud is already installed + if [[ $SALT_CLOUD_INSTALLED == true ]]; then + run_check_net_err \ + "sh $UPDATE_DIR/salt/salt/scripts/bootstrap-salt.sh -X -r -L -F -M stable \"$NEWSALTVERSION\"" \ + "Could not update salt, please check $SOUP_LOG for details." + else + run_check_net_err \ + "sh $UPDATE_DIR/salt/salt/scripts/bootstrap-salt.sh -X -r -F -M stable \"$NEWSALTVERSION\"" \ + "Could not update salt, please check $SOUP_LOG for details." + fi # if another rhel family variant we want to run without -r to allow the bootstrap script to manage repos else run_check_net_err \ @@ -1222,6 +1312,10 @@ upgrade_salt() { yum versionlock add "salt-0:$NEWSALTVERSION-0.*" yum versionlock add "salt-minion-0:$NEWSALTVERSION-0.*" yum versionlock add "salt-master-0:$NEWSALTVERSION-0.*" + # Add salt-cloud versionlock if installed + if [[ $SALT_CLOUD_INSTALLED == true ]]; then + yum versionlock add "salt-cloud-0:$NEWSALTVERSION-0.*" + fi # Else do Ubuntu things elif [[ $is_deb ]]; then echo "Removing apt hold for Salt." @@ -1254,6 +1348,7 @@ upgrade_salt() { echo "" exit 1 else + SALTUPGRADED=true echo "Salt upgrade success." echo "" fi @@ -1359,6 +1454,7 @@ main() { fi set_minionid + MINION_ROLE=$(lookup_role) echo "Found that Security Onion $INSTALLEDVERSION is currently installed." echo "" if [[ $is_airgap -eq 0 ]]; then @@ -1401,7 +1497,7 @@ main() { if [ "$is_hotfix" == "true" ]; then echo "Applying $HOTFIXVERSION hotfix" # since we don't run the backup.config_backup state on import we wont snapshot previous version states and pillars - if [[ ! "$MINIONID" =~ "_import" ]]; then + if [[ ! "$MINION_ROLE" == "import" ]]; then backup_old_states_pillars fi copy_new_files @@ -1464,7 +1560,7 @@ main() { fi # since we don't run the backup.config_backup state on import we wont snapshot previous version states and pillars - if [[ ! "$MINIONID" =~ "_import" ]]; then + if [[ ! "$MINION_ROLE" == "import" ]]; then echo "" echo "Creating snapshots of default and local Salt states and pillars and saving to /nsm/backup/" backup_old_states_pillars @@ -1496,6 +1592,11 @@ main() { # ensure the mine is updated and populated before highstates run, following the salt-master restart update_salt_mine + if [[ $SALT_CLOUD_CONFIGURED == true && $SALTUPGRADED == true ]]; then + echo "Updating salt-cloud config to use the new Salt version" + salt-call state.apply salt.cloud.config concurrent=True + fi + enable_highstate echo "" diff --git a/salt/manager/tools/sbin_jinja/so-salt-cloud b/salt/manager/tools/sbin_jinja/so-salt-cloud index 0c98750f4..c8177e1bc 100644 --- a/salt/manager/tools/sbin_jinja/so-salt-cloud +++ b/salt/manager/tools/sbin_jinja/so-salt-cloud @@ -211,7 +211,7 @@ Exit Codes: Logging: -- Logs are written to /opt/so/log/salt/so-salt-cloud.log. +- Logs are written to /opt/so/log/salt/so-salt-cloud. - Both file and console logging are enabled for real-time monitoring. """ @@ -233,7 +233,7 @@ local = salt.client.LocalClient() logger = logging.getLogger(__name__) logger.setLevel(logging.INFO) -file_handler = logging.FileHandler('/opt/so/log/salt/so-salt-cloud.log') +file_handler = logging.FileHandler('/opt/so/log/salt/so-salt-cloud') console_handler = logging.StreamHandler() formatter = logging.Formatter('%(asctime)s %(message)s') @@ -516,23 +516,85 @@ def run_qcow2_modify_hardware_config(profile, vm_name, cpu=None, memory=None, pc target = hv_name + "_*" try: - args_list = [ - 'vm_name=' + vm_name, - 'cpu=' + str(cpu) if cpu else '', - 'memory=' + str(memory) if memory else '', - 'start=' + str(start) - ] - + args_list = ['vm_name=' + vm_name] + + # Only add parameters that are actually specified + if cpu is not None: + args_list.append('cpu=' + str(cpu)) + if memory is not None: + args_list.append('memory=' + str(memory)) + # Add PCI devices if provided if pci_list: # Pass all PCI devices as a comma-separated list args_list.append('pci=' + ','.join(pci_list)) + + # Always add start parameter + args_list.append('start=' + str(start)) result = local.cmd(target, 'qcow2.modify_hardware_config', args_list) format_qcow2_output('Hardware configuration', result) except Exception as e: logger.error(f"An error occurred while running qcow2.modify_hardware_config: {e}") +def run_qcow2_create_volume_config(profile, vm_name, size_gb, cpu=None, memory=None, start=False): + """Create a volume for the VM and optionally configure CPU/memory. + + Args: + profile (str): The cloud profile name + vm_name (str): The name of the VM + size_gb (int): Size of the volume in GB + cpu (int, optional): Number of CPUs to assign + memory (int, optional): Amount of memory in MiB + start (bool): Whether to start the VM after configuration + """ + hv_name = profile.split('_')[1] + target = hv_name + "_*" + + try: + # Step 1: Create the volume + logger.info(f"Creating {size_gb}GB volume for VM {vm_name}") + volume_result = local.cmd( + target, + 'qcow2.create_volume_config', + kwarg={ + 'vm_name': vm_name, + 'size_gb': size_gb, + 'start': False # Don't start yet if we need to configure CPU/memory + } + ) + format_qcow2_output('Volume creation', volume_result) + + # Step 2: Configure CPU and memory if specified + if cpu or memory: + logger.info(f"Configuring hardware for VM {vm_name}: CPU={cpu}, Memory={memory}MiB") + hw_result = local.cmd( + target, + 'qcow2.modify_hardware_config', + kwarg={ + 'vm_name': vm_name, + 'cpu': cpu, + 'memory': memory, + 'start': start + } + ) + format_qcow2_output('Hardware configuration', hw_result) + elif start: + # If no CPU/memory config needed but we need to start the VM + logger.info(f"Starting VM {vm_name}") + start_result = local.cmd( + target, + 'qcow2.modify_hardware_config', + kwarg={ + 'vm_name': vm_name, + 'start': True + } + ) + format_qcow2_output('VM startup', start_result) + + except Exception as e: + logger.error(f"An error occurred while creating volume and configuring hardware: {e}") + def run_qcow2_modify_network_config(profile, vm_name, mode, ip=None, gateway=None, dns=None, search_domain=None): hv_name = profile.split('_')[1] target = hv_name + "_*" @@ -586,6 +648,7 @@ def parse_arguments(): network_group.add_argument('-c', '--cpu', type=int, help='Number of virtual CPUs to assign.') network_group.add_argument('-m', '--memory', type=int, help='Amount of memory to assign in MiB.') network_group.add_argument('-P', '--pci', action='append', help='PCI hardware ID(s) to passthrough to the VM (e.g., 0000:c7:00.0). Can be specified multiple times.') + network_group.add_argument('--nsm-size', type=int, help='Size in GB for NSM volume creation. Can be used with copper/sfp NICs (--pci). Only disk passthrough (without --nsm-size) prevents volume creation.') args = parser.parse_args() @@ -621,6 +684,8 @@ def main(): hw_config.append(f"{args.memory}MB RAM") if args.pci: hw_config.append(f"PCI devices: {', '.join(args.pci)}") + if args.nsm_size: + hw_config.append(f"NSM volume: {args.nsm_size}GB") hw_string = f" and hardware config: {', '.join(hw_config)}" if hw_config else "" logger.info(f"Received request to create VM '{args.vm_name}' using profile '{args.profile}' {network_config}{hw_string}") @@ -643,8 +708,58 @@ def main(): # Step 2: Provision the VM (without starting it) call_salt_cloud(args.profile, args.vm_name) - # Step 3: Modify hardware configuration - run_qcow2_modify_hardware_config(args.profile, args.vm_name, cpu=args.cpu, memory=args.memory, pci_list=args.pci, start=True) + # Step 3: Determine storage configuration approach + # Priority: disk passthrough > volume creation (but volume can coexist with copper/sfp NICs) + # Note: virtual_node_manager.py already filters out --nsm-size when disk is present, + # so if both --pci and --nsm-size are present here, the PCI devices are copper/sfp NICs + use_passthrough = False + use_volume_creation = False + has_nic_passthrough = False + + if args.nsm_size: + # Validate nsm_size + if args.nsm_size <= 0: + logger.error(f"Invalid nsm_size value: {args.nsm_size}. Must be a positive integer.") + sys.exit(1) + use_volume_creation = True + logger.info(f"Using volume creation with size {args.nsm_size}GB (--nsm-size parameter specified)") + + if args.pci: + # If both nsm_size and PCI are present, PCI devices are copper/sfp NICs + # (virtual_node_manager.py filters out nsm_size when disk is present) + has_nic_passthrough = True + logger.info(f"PCI devices (copper/sfp NICs) will be passed through along with volume: {', '.join(args.pci)}") + elif args.pci: + # Only PCI devices, no nsm_size - could be disk or NICs + # this script is called by virtual_node_manager and that strips any possibility that nsm_size and the disk pci slot is sent to this script + # we might have not specified a disk passthrough or nsm_size, but pass another pci slot and we end up here + use_passthrough = True + logger.info(f"Configuring PCI device passthrough.(--pci parameter specified without --nsm-size)") + + # Step 4: Configure hardware based on storage approach + if use_volume_creation: + # Create volume first + run_qcow2_create_volume_config(args.profile, args.vm_name, size_gb=args.nsm_size, cpu=args.cpu, memory=args.memory, start=False) + + # Then configure NICs if present + if has_nic_passthrough: + logger.info(f"Configuring NIC passthrough for VM {args.vm_name}") + run_qcow2_modify_hardware_config(args.profile, args.vm_name, cpu=None, memory=None, pci_list=args.pci, start=True) + else: + # No NICs, just start the VM + logger.info(f"Starting VM {args.vm_name}") + run_qcow2_modify_hardware_config(args.profile, args.vm_name, cpu=None, memory=None, pci_list=None, start=True) + elif use_passthrough: + # Use existing passthrough logic via modify_hardware_config + run_qcow2_modify_hardware_config(args.profile, args.vm_name, cpu=args.cpu, memory=args.memory, pci_list=args.pci, start=True) + else: + # No storage configuration, just configure CPU/memory if specified + if args.cpu or args.memory: + run_qcow2_modify_hardware_config(args.profile, args.vm_name, cpu=args.cpu, memory=args.memory, pci_list=None, start=True) + else: + # No hardware configuration needed, just start the VM + logger.info(f"No hardware configuration specified, starting VM {args.vm_name}") + run_qcow2_modify_hardware_config(args.profile, args.vm_name, cpu=None, memory=None, pci_list=None, start=True) except KeyboardInterrupt: logger.error("so-salt-cloud: Operation cancelled by user.") diff --git a/salt/salt/cloud/cloud.profiles.d/socloud.conf.jinja b/salt/salt/cloud/cloud.profiles.d/socloud.conf.jinja index 025e23d89..23fd15983 100644 --- a/salt/salt/cloud/cloud.profiles.d/socloud.conf.jinja +++ b/salt/salt/cloud/cloud.profiles.d/socloud.conf.jinja @@ -14,7 +14,7 @@ sool9_{{host}}: private_key: /etc/ssh/auth_keys/soqemussh/id_ecdsa sudo: True deploy_command: sh /tmp/.saltcloud-*/deploy.sh - script_args: -r -F -x python3 stable 3006.9 + script_args: -r -F -x python3 stable {{ SALTVERSION }} minion: master: {{ grains.host }} master_port: 4506 diff --git a/salt/salt/cloud/config.sls b/salt/salt/cloud/config.sls index dfbfda56b..dce0e873a 100644 --- a/salt/salt/cloud/config.sls +++ b/salt/salt/cloud/config.sls @@ -13,6 +13,7 @@ {% if '.'.join(sls.split('.')[:2]) in allowed_states %} {% if 'vrt' in salt['pillar.get']('features', []) %} {% set HYPERVISORS = salt['pillar.get']('hypervisor:nodes', {} ) %} +{% from 'salt/map.jinja' import SALTVERSION %} {% if HYPERVISORS %} cloud_providers: @@ -20,7 +21,7 @@ cloud_providers: - name: /etc/salt/cloud.providers.d/libvirt.conf - source: salt://salt/cloud/cloud.providers.d/libvirt.conf.jinja - defaults: - HYPERVISORS: {{HYPERVISORS}} + HYPERVISORS: {{ HYPERVISORS }} - template: jinja - makedirs: True @@ -29,11 +30,17 @@ cloud_profiles: - name: /etc/salt/cloud.profiles.d/socloud.conf - source: salt://salt/cloud/cloud.profiles.d/socloud.conf.jinja - defaults: - HYPERVISORS: {{HYPERVISORS}} + HYPERVISORS: {{ HYPERVISORS }} MANAGERHOSTNAME: {{ grains.host }} MANAGERIP: {{ pillar.host.mainip }} + SALTVERSION: {{ SALTVERSION }} - template: jinja - makedirs: True +{% else %} +no_hypervisors_configured: + test.succeed_without_changes: + - name: no_hypervisors_configured + - comment: No hypervisors are configured {% endif %} {% else %} diff --git a/salt/salt/engines/master/virtual_node_manager.py b/salt/salt/engines/master/virtual_node_manager.py index 88ccede9c..6d88bd688 100644 --- a/salt/salt/engines/master/virtual_node_manager.py +++ b/salt/salt/engines/master/virtual_node_manager.py @@ -117,7 +117,7 @@ Exit Codes: 4: VM provisioning failure (so-salt-cloud execution failed) Logging: - Log files are written to /opt/so/log/salt/engines/virtual_node_manager.log + Log files are written to /opt/so/log/salt/engines/virtual_node_manager Comprehensive logging includes: - Hardware validation details - PCI ID conversion process @@ -138,29 +138,56 @@ import pwd import grp import salt.config import salt.runner +import salt.client from typing import Dict, List, Optional, Tuple, Any from datetime import datetime, timedelta from threading import Lock -# Get socore uid/gid -SOCORE_UID = pwd.getpwnam('socore').pw_uid -SOCORE_GID = grp.getgrnam('socore').gr_gid - -# Initialize Salt runner once +# Initialize Salt runner and local client once opts = salt.config.master_config('/etc/salt/master') opts['output'] = 'json' runner = salt.runner.RunnerClient(opts) +local = salt.client.LocalClient() + +# Get socore uid/gid for file ownership +SOCORE_UID = pwd.getpwnam('socore').pw_uid +SOCORE_GID = grp.getgrnam('socore').gr_gid # Configure logging log = logging.getLogger(__name__) log.setLevel(logging.DEBUG) +# Prevent propagation to parent loggers to avoid duplicate log entries +log.propagate = False + +# Add file handler for dedicated log file +log_dir = '/opt/so/log/salt' +log_file = os.path.join(log_dir, 'virtual_node_manager') + +# Create log directory if it doesn't exist +os.makedirs(log_dir, exist_ok=True) + +# Create file handler +file_handler = logging.FileHandler(log_file) +file_handler.setLevel(logging.DEBUG) + +# Create formatter +formatter = logging.Formatter( + '%(asctime)s [%(name)s:%(lineno)d][%(levelname)-8s][%(process)d] %(message)s', + datefmt='%Y-%m-%d %H:%M:%S' +) +file_handler.setFormatter(formatter) + +# Add handler to logger +log.addHandler(file_handler) + # Constants DEFAULT_INTERVAL = 30 DEFAULT_BASE_PATH = '/opt/so/saltstack/local/salt/hypervisor/hosts' VALID_ROLES = ['sensor', 'searchnode', 'idh', 'receiver', 'heavynode', 'fleet'] LICENSE_PATH = '/opt/so/saltstack/local/pillar/soc/license.sls' DEFAULTS_PATH = '/opt/so/saltstack/default/salt/hypervisor/defaults.yaml' +HYPERVISOR_PILLAR_PATH = '/opt/so/saltstack/local/pillar/hypervisor/soc_hypervisor.sls' # Define the retention period for destroyed VMs (in hours) DESTROYED_VM_RETENTION_HOURS = 48 @@ -202,6 +229,39 @@ def write_json_file(file_path: str, data: Any) -> None: except Exception as e: log.error("Failed to write JSON file %s: %s", file_path, str(e)) raise +def remove_vm_from_vms_file(vms_file_path: str, vm_hostname: str, vm_role: str) -> bool: + """ + Remove a VM entry from the hypervisorVMs file. + + Args: + vms_file_path: Path to the hypervisorVMs file + vm_hostname: Hostname of the VM to remove (without role suffix) + vm_role: Role of the VM + + Returns: + bool: True if VM was removed, False otherwise + """ + try: + # Read current VMs + vms = read_json_file(vms_file_path) + + # Find and remove the VM entry + original_count = len(vms) + vms = [vm for vm in vms if not (vm.get('hostname') == vm_hostname and vm.get('role') == vm_role)] + + if len(vms) < original_count: + # VM was found and removed, write back to file + write_json_file(vms_file_path, vms) + log.info("Removed VM %s_%s from %s", vm_hostname, vm_role, vms_file_path) + return True + else: + log.warning("VM %s_%s not found in %s", vm_hostname, vm_role, vms_file_path) + return False + + except Exception as e: + log.error("Failed to remove VM %s_%s from %s: %s", vm_hostname, vm_role, vms_file_path, str(e)) + return False + def read_yaml_file(file_path: str) -> dict: """Read and parse a YAML file.""" @@ -271,7 +331,7 @@ def parse_hardware_indices(hw_value: Any) -> List[int]: return indices def get_hypervisor_model(hypervisor: str) -> str: - """Get sosmodel from hypervisor grains.""" + """Get sosmodel or byodmodel from hypervisor grains.""" try: # Get cached grains using Salt runner grains = runner.cmd( @@ -283,9 +343,9 @@ def get_hypervisor_model(hypervisor: str) -> str: # Get the first minion ID that matches our hypervisor minion_id = next(iter(grains.keys())) - model = grains[minion_id].get('sosmodel') + model = grains[minion_id].get('sosmodel', grains[minion_id].get('byodmodel', '')) if not model: - raise ValueError(f"No sosmodel grain found for hypervisor {hypervisor}") + raise ValueError(f"No sosmodel or byodmodel grain found for hypervisor {hypervisor}") log.debug("Found model %s for hypervisor %s", model, hypervisor) return model @@ -295,16 +355,48 @@ def get_hypervisor_model(hypervisor: str) -> str: raise def load_hardware_defaults(model: str) -> dict: - """Load hardware configuration from defaults.yaml.""" + """Load hardware configuration from defaults.yaml and optionally override with pillar configuration.""" + config = None + config_source = None + try: + # First, try to load from defaults.yaml + log.debug("Checking for model %s in %s", model, DEFAULTS_PATH) defaults = read_yaml_file(DEFAULTS_PATH) if not defaults or 'hypervisor' not in defaults: raise ValueError("Invalid defaults.yaml structure") if 'model' not in defaults['hypervisor']: raise ValueError("No model configurations found in defaults.yaml") - if model not in defaults['hypervisor']['model']: - raise ValueError(f"Model {model} not found in defaults.yaml") - return defaults['hypervisor']['model'][model] + + # Check if model exists in defaults + if model in defaults['hypervisor']['model']: + config = defaults['hypervisor']['model'][model] + config_source = DEFAULTS_PATH + log.debug("Found model %s in %s", model, DEFAULTS_PATH) + + # Then, try to load from pillar file (if it exists) + try: + log.debug("Checking for model %s in %s", model, HYPERVISOR_PILLAR_PATH) + pillar_config = read_yaml_file(HYPERVISOR_PILLAR_PATH) + if pillar_config and 'hypervisor' in pillar_config: + if 'model' in pillar_config['hypervisor']: + if model in pillar_config['hypervisor']['model']: + # Override with pillar configuration + config = pillar_config['hypervisor']['model'][model] + config_source = HYPERVISOR_PILLAR_PATH + log.debug("Found model %s in %s (overriding defaults)", model, HYPERVISOR_PILLAR_PATH) + except FileNotFoundError: + log.debug("Pillar file %s not found, using defaults only", HYPERVISOR_PILLAR_PATH) + except Exception as e: + log.warning("Failed to read pillar file %s: %s (using defaults)", HYPERVISOR_PILLAR_PATH, str(e)) + + # If model was not found in either file, raise an error + if config is None: + raise ValueError(f"Model {model} not found in {DEFAULTS_PATH} or {HYPERVISOR_PILLAR_PATH}") + + log.debug("Using hardware configuration for model %s from %s", model, config_source) + return config + except Exception as e: log.error("Failed to load hardware defaults: %s", str(e)) raise @@ -525,6 +617,13 @@ def mark_vm_failed(vm_file: str, error_code: int, message: str) -> None: # Remove the original file since we'll create an error file os.remove(vm_file) + # Clear hardware resource claims so failed VMs don't consume resources + # Keep nsm_size for reference but clear cpu, memory, sfp, copper + config.pop('cpu', None) + config.pop('memory', None) + config.pop('sfp', None) + config.pop('copper', None) + # Create error file error_file = f"{vm_file}.error" data = { @@ -553,8 +652,16 @@ def mark_invalid_hardware(hypervisor_path: str, vm_name: str, config: dict, erro # Join all messages with proper sentence structure full_message = "Hardware validation failure: " + " ".join(error_messages) + # Clear hardware resource claims so failed VMs don't consume resources + # Keep nsm_size for reference but clear cpu, memory, sfp, copper + config_copy = config.copy() + config_copy.pop('cpu', None) + config_copy.pop('memory', None) + config_copy.pop('sfp', None) + config_copy.pop('copper', None) + data = { - 'config': config, + 'config': config_copy, 'status': 'error', 'timestamp': datetime.now().isoformat(), 'error_details': { @@ -601,6 +708,61 @@ def validate_vrt_license() -> bool: log.error("Error reading license file: %s", str(e)) return False +def check_hypervisor_disk_space(hypervisor: str, size_gb: int) -> Tuple[bool, Optional[str]]: + """ + Check if hypervisor has sufficient disk space for volume creation. + + Args: + hypervisor: Hypervisor hostname + size_gb: Required size in GB + + Returns: + Tuple of (has_space, error_message) + """ + try: + # Get hypervisor minion ID + hypervisor_minion = f"{hypervisor}_hypervisor" + + # Check disk space on /nsm/libvirt/volumes using LocalClient + result = local.cmd( + hypervisor_minion, + 'cmd.run', + ["df -BG /nsm/libvirt/volumes | tail -1 | awk '{print $4}' | sed 's/G//'"] + ) + + if not result or hypervisor_minion not in result: + log.error("Failed to check disk space on hypervisor %s", hypervisor) + return False, "Failed to check disk space on hypervisor" + + available_gb_str = result[hypervisor_minion].strip() + if not available_gb_str: + log.error("Empty disk space response from hypervisor %s", hypervisor) + return False, "Failed to get disk space information" + + try: + available_gb = float(available_gb_str) + except ValueError: + log.error("Invalid disk space value from hypervisor %s: %s", hypervisor, available_gb_str) + return False, f"Invalid disk space value: {available_gb_str}" + + # Add 10% buffer for filesystem overhead + required_gb = size_gb * 1.1 + + log.debug("Hypervisor %s disk space check: Available=%.2fGB, Required=%.2fGB", + hypervisor, available_gb, required_gb) + + if available_gb < required_gb: + error_msg = f"Insufficient disk space on hypervisor {hypervisor}. Available: {available_gb:.2f}GB, Required: {required_gb:.2f}GB (including 10% overhead)" + log.error(error_msg) + return False, error_msg + + log.info("Hypervisor %s has sufficient disk space for %dGB volume", hypervisor, size_gb) + return True, None + + except Exception as e: + log.error("Error checking disk space on hypervisor %s: %s", hypervisor, str(e)) + return False, f"Error checking disk space: {str(e)}" + def process_vm_creation(hypervisor_path: str, vm_config: dict) -> None: """ Process a single VM creation request. @@ -633,6 +795,62 @@ def process_vm_creation(hypervisor_path: str, vm_config: dict) -> None: except subprocess.CalledProcessError as e: logger.error(f"Failed to emit success status event: {e}") + # Validate nsm_size if present + if 'nsm_size' in vm_config: + try: + size = int(vm_config['nsm_size']) + if size <= 0: + log.error("VM: %s - nsm_size must be a positive integer, got: %d", vm_name, size) + mark_invalid_hardware(hypervisor_path, vm_name, vm_config, + {'nsm_size': 'Invalid nsm_size: must be positive integer'}) + return + if size > 10000: # 10TB reasonable maximum + log.error("VM: %s - nsm_size %dGB exceeds reasonable maximum (10000GB)", vm_name, size) + mark_invalid_hardware(hypervisor_path, vm_name, vm_config, + {'nsm_size': f'Invalid nsm_size: {size}GB exceeds maximum (10000GB)'}) + return + log.debug("VM: %s - nsm_size validated: %dGB", vm_name, size) + except (ValueError, TypeError) as e: + log.error("VM: %s - nsm_size must be a valid integer, got: %s", vm_name, vm_config.get('nsm_size')) + mark_invalid_hardware(hypervisor_path, vm_name, vm_config, + {'nsm_size': 'Invalid nsm_size: must be valid integer'}) + return + + # Check for conflicting storage configurations + has_disk = 'disk' in vm_config and vm_config['disk'] + has_nsm_size = 'nsm_size' in vm_config and vm_config['nsm_size'] + + if has_disk and has_nsm_size: + log.warning("VM: %s - Both disk and nsm_size specified. disk takes precedence, nsm_size will be ignored.", + vm_name) + + # Check disk space BEFORE creating VM if nsm_size is specified + if has_nsm_size and not has_disk: + size_gb = int(vm_config['nsm_size']) + has_space, space_error = check_hypervisor_disk_space(hypervisor, size_gb) + if not has_space: + log.error("VM: %s - %s", vm_name, space_error) + + # Send Hypervisor NSM Disk Full status event + try: + subprocess.run([ + 'so-salt-emit-vm-deployment-status-event', + '-v', vm_name, + '-H', hypervisor, + '-s', 'Hypervisor NSM Disk Full' + ], check=True) + except subprocess.CalledProcessError as e: + log.error("Failed to emit volume create failed event for %s: %s", vm_name, str(e)) + + mark_invalid_hardware( + hypervisor_path, + vm_name, + vm_config, + {'disk_space': f"Insufficient disk space for {size_gb}GB volume: {space_error}"} + ) + return + log.debug("VM: %s - Hypervisor has sufficient space for %dGB volume", vm_name, size_gb) + # Initial hardware validation against model is_valid, errors = validate_hardware_request(model_config, vm_config) if not is_valid: @@ -668,6 +886,11 @@ def process_vm_creation(hypervisor_path: str, vm_config: dict) -> None: if 'memory' in vm_config: memory_mib = int(vm_config['memory']) * 1024 cmd.extend(['-m', str(memory_mib)]) + + # Add nsm_size if specified and disk is not specified + if 'nsm_size' in vm_config and vm_config['nsm_size'] and not ('disk' in vm_config and vm_config['disk']): + cmd.extend(['--nsm-size', str(vm_config['nsm_size'])]) + log.debug("VM: %s - Adding nsm_size parameter: %s", vm_name, vm_config['nsm_size']) # Add PCI devices for hw_type in ['disk', 'copper', 'sfp']: @@ -900,12 +1123,21 @@ def process_hypervisor(hypervisor_path: str) -> None: if not nodes_config: log.debug("Empty VMs configuration in %s", vms_file) - # Get existing VMs + # Get existing VMs and track failed VMs separately existing_vms = set() + failed_vms = set() # VMs with .error files for file_path in glob.glob(os.path.join(hypervisor_path, '*_*')): basename = os.path.basename(file_path) - # Skip error and status files - if not basename.endswith('.error') and not basename.endswith('.status'): + # Skip status files + if basename.endswith('.status'): + continue + # Track VMs with .error files separately + if basename.endswith('.error'): + vm_name = basename[:-6] # Remove '.error' suffix + failed_vms.add(vm_name) + existing_vms.add(vm_name) # Also add to existing to prevent recreation + log.debug(f"Found failed VM with .error file: {vm_name}") + else: existing_vms.add(basename) # Process new VMs @@ -922,12 +1154,37 @@ def process_hypervisor(hypervisor_path: str) -> None: # process_vm_creation handles its own locking process_vm_creation(hypervisor_path, vm_config) - # Process VM deletions + # Process VM deletions (but skip failed VMs that only have .error files) vms_to_delete = existing_vms - configured_vms log.debug(f"Existing VMs: {existing_vms}") log.debug(f"Configured VMs: {configured_vms}") + log.debug(f"Failed VMs: {failed_vms}") log.debug(f"VMs to delete: {vms_to_delete}") for vm_name in vms_to_delete: + # Skip deletion if VM only has .error file (no actual VM to delete) + if vm_name in failed_vms: + error_file = os.path.join(hypervisor_path, f"{vm_name}.error") + base_file = os.path.join(hypervisor_path, vm_name) + # Only skip if there's no base file (VM never successfully created) + if not os.path.exists(base_file): + log.info(f"Skipping deletion of failed VM {vm_name} (VM never successfully created)") + # Clean up the .error and .status files since VM is no longer configured + if os.path.exists(error_file): + os.remove(error_file) + log.info(f"Removed .error file for unconfigured VM: {vm_name}") + status_file = os.path.join(hypervisor_path, f"{vm_name}.status") + if os.path.exists(status_file): + os.remove(status_file) + log.info(f"Removed .status file for unconfigured VM: {vm_name}") + + # Trigger hypervisor annotation update to reflect the removal + try: + log.info(f"Triggering hypervisor annotation update after removing failed VM: {vm_name}") + runner.cmd('state.orch', ['orch.dyanno_hypervisor']) + except Exception as e: + log.error(f"Failed to trigger hypervisor annotation update for {vm_name}: {str(e)}") + + continue log.info(f"Initiating deletion process for VM: {vm_name}") process_vm_deletion(hypervisor_path, vm_name) diff --git a/salt/salt/map.jinja b/salt/salt/map.jinja index 1e3b200f4..62b7f1b18 100644 --- a/salt/salt/map.jinja +++ b/salt/salt/map.jinja @@ -4,7 +4,10 @@ Elastic License 2.0. #} {% set role = salt['grains.get']('role', '') %} -{% if role in ['so-hypervisor','so-managerhype'] and salt['network.ip_addrs']('br0')|length > 0 %} +{# We are using usebr0 mostly for setup of the so-managerhype node and controlling when we use br0 vs the physical interface #} +{% set usebr0 = salt['pillar.get']('usebr0', True) %} + +{% if role in ['so-hypervisor','so-managerhype'] and usebr0 %} {% set interface = 'br0' %} {% else %} {% set interface = pillar.host.mainint %} diff --git a/salt/salt/master.defaults.yaml b/salt/salt/master.defaults.yaml index 8e1a618fd..9dfe8587f 100644 --- a/salt/salt/master.defaults.yaml +++ b/salt/salt/master.defaults.yaml @@ -1,4 +1,4 @@ # version cannot be used elsewhere in this pillar as soup is grepping for it to determine if Salt needs to be patched salt: master: - version: '3006.9' + version: '3006.16' diff --git a/salt/salt/mine_functions.sls b/salt/salt/mine_functions.sls index ed786e997..ae3df1ce9 100644 --- a/salt/salt/mine_functions.sls +++ b/salt/salt/mine_functions.sls @@ -3,7 +3,7 @@ # https://securityonion.net/license; you may not use this file except in compliance with the # Elastic License 2.0. -# this state was seperated from salt.minion state since it is called during setup +# this state was separated from salt.minion state since it is called during setup # GLOBALS are imported in the salt.minion state and that is not available at that point in setup # this state is included in the salt.minion state diff --git a/salt/salt/minion.defaults.yaml b/salt/salt/minion.defaults.yaml index 7ec839950..e897313d2 100644 --- a/salt/salt/minion.defaults.yaml +++ b/salt/salt/minion.defaults.yaml @@ -1,5 +1,5 @@ # version cannot be used elsewhere in this pillar as soup is grepping for it to determine if Salt needs to be patched salt: minion: - version: '3006.9' + version: '3006.16' check_threshold: 3600 # in seconds, threshold used for so-salt-minion-check. any value less than 600 seconds may cause a lot of salt-minion restarts since the job to touch the file occurs every 5-8 minutes by default diff --git a/salt/salt/minion.sls b/salt/salt/minion/init.sls similarity index 86% rename from salt/salt/minion.sls rename to salt/salt/minion/init.sls index b85fad1c0..374e6954c 100644 --- a/salt/salt/minion.sls +++ b/salt/salt/minion/init.sls @@ -1,18 +1,22 @@ +# Copyright Security Onion Solutions LLC and/or licensed to Security Onion Solutions LLC under one +# or more contributor license agreements. Licensed under the Elastic License 2.0 as shown at +# https://securityonion.net/license; you may not use this file except in compliance with the +# Elastic License 2.0. + {% from 'vars/globals.map.jinja' import GLOBALS %} {% from 'salt/map.jinja' import UPGRADECOMMAND with context %} {% from 'salt/map.jinja' import SALTVERSION %} {% from 'salt/map.jinja' import INSTALLEDSALTVERSION %} {% from 'salt/map.jinja' import SALTPACKAGES %} -{% from 'salt/map.jinja' import SYSTEMD_UNIT_FILE %} {% import_yaml 'salt/minion.defaults.yaml' as SALTMINION %} include: - salt.python_modules - salt.patch.x509_v2 - salt - - systemd.reload - repo.client - salt.mine_functions + - salt.minion.service_file {% if GLOBALS.role in GLOBALS.manager_roles %} - ca {% endif %} @@ -94,17 +98,6 @@ enable_startup_states: - regex: '^startup_states: highstate$' - unless: pgrep so-setup -# prior to 2.4.30 this managed file would restart the salt-minion service when updated -# since this file is currently only adding a delay service start -# it is not required to restart the service -salt_minion_service_unit_file: - file.managed: - - name: {{ SYSTEMD_UNIT_FILE }} - - source: salt://salt/service/salt-minion.service.jinja - - template: jinja - - onchanges_in: - - module: systemd_reload - {% endif %} # this has to be outside the if statement above since there are _in calls to this state diff --git a/salt/salt/minion/service_file.sls b/salt/salt/minion/service_file.sls new file mode 100644 index 000000000..8aded2d60 --- /dev/null +++ b/salt/salt/minion/service_file.sls @@ -0,0 +1,26 @@ +# Copyright Security Onion Solutions LLC and/or licensed to Security Onion Solutions LLC under one +# or more contributor license agreements. Licensed under the Elastic License 2.0 as shown at +# https://securityonion.net/license; you may not use this file except in compliance with the +# Elastic License 2.0. + +{% from 'salt/map.jinja' import SALTVERSION %} +{% from 'salt/map.jinja' import INSTALLEDSALTVERSION %} +{% from 'salt/map.jinja' import SYSTEMD_UNIT_FILE %} + +include: + - systemd.reload + +{% if INSTALLEDSALTVERSION|string == SALTVERSION|string %} + +# prior to 2.4.30 this managed file would restart the salt-minion service when updated +# since this file is currently only adding a delay service start +# it is not required to restart the service +salt_minion_service_unit_file: + file.managed: + - name: {{ SYSTEMD_UNIT_FILE }} + - source: salt://salt/service/salt-minion.service.jinja + - template: jinja + - onchanges_in: + - module: systemd_reload + +{% endif %} diff --git a/salt/sensoroni/defaults.yaml b/salt/sensoroni/defaults.yaml index bd74da7ec..acfae6766 100644 --- a/salt/sensoroni/defaults.yaml +++ b/salt/sensoroni/defaults.yaml @@ -5,6 +5,12 @@ sensoroni: enabled: False timeout_ms: 900000 parallel_limit: 5 + export: + timeout_ms: 1200000 + cache_refresh_interval_ms: 10000 + export_metric_limit: 10000 + export_event_limit: 10000 + csv_separator: ',' node_checkin_interval_ms: 10000 sensoronikey: soc_host: diff --git a/salt/sensoroni/files/sensoroni.json b/salt/sensoroni/files/sensoroni.json index c7079c08c..a0f512fa2 100644 --- a/salt/sensoroni/files/sensoroni.json +++ b/salt/sensoroni/files/sensoroni.json @@ -21,7 +21,13 @@ }, {%- endif %} "importer": {}, - "export": {}, + "export": { + "timeoutMs": {{ SENSORONIMERGED.config.export.timeout_ms }}, + "cacheRefreshIntervalMs": {{ SENSORONIMERGED.config.export.cache_refresh_interval_ms }}, + "exportMetricLimit": {{ SENSORONIMERGED.config.export.export_metric_limit }}, + "exportEventLimit": {{ SENSORONIMERGED.config.export.export_event_limit }}, + "csvSeparator": "{{ SENSORONIMERGED.config.export.csv_separator }}" + }, "statickeyauth": { "apiKey": "{{ GLOBALS.sensoroni_key }}" {% if GLOBALS.is_sensor %} diff --git a/salt/sensoroni/soc_sensoroni.yaml b/salt/sensoroni/soc_sensoroni.yaml index 2344655f6..cf69ec52a 100644 --- a/salt/sensoroni/soc_sensoroni.yaml +++ b/salt/sensoroni/soc_sensoroni.yaml @@ -17,6 +17,27 @@ sensoroni: description: Parallel limit for the analyzer. advanced: True helpLink: cases.html + export: + timeout_ms: + description: Timeout period for the exporter to finish export-related tasks. + advanced: True + helpLink: reports.html + cache_refresh_interval_ms: + description: Refresh interval for cache updates. Longer intervals result in less compute usage but risks stale data included in reports. + advanced: True + helpLink: reports.html + export_metric_limit: + description: Maximum number of metric values to include in each metric aggregation group. + advanced: True + helpLink: reports.html + export_event_limit: + description: Maximum number of events to include per event list. + advanced: True + helpLink: reports.html + csv_separator: + description: Separator character to use for CSV exports. + advanced: False + helpLink: reports.html node_checkin_interval_ms: description: Interval in ms to checkin to the soc_host. advanced: True diff --git a/salt/soc/defaults.yaml b/salt/soc/defaults.yaml index b0bf2fdac..adf0a2e6b 100644 --- a/salt/soc/defaults.yaml +++ b/salt/soc/defaults.yaml @@ -1493,6 +1493,9 @@ soc: folder: securityonion-normalized assistant: apiUrl: https://onionai.securityonion.net + healthTimeoutSeconds: 3 + systemPromptAddendum: "" + systemPromptAddendumMaxLength: 50000 salt: queueDir: /opt/sensoroni/queue timeoutMs: 45000 @@ -1695,6 +1698,9 @@ soc: - name: socExcludeToggle filter: 'NOT event.module:"soc"' enabled: true + - name: onionaiExcludeToggle + filter: 'NOT _index:"*:so-assistant-*"' + enabled: true queries: - name: Default Query description: Show all events grouped by the observer host @@ -2605,10 +2611,23 @@ soc: level: 'high' # info | low | medium | high | critical assistant: enabled: false - investigationPrompt: Investigate Alert ID {socid} - contextLimitSmall: 200000 - contextLimitLarge: 1000000 + investigationPrompt: Investigate Alert ID {socId} thresholdColorRatioLow: 0.5 thresholdColorRatioMed: 0.75 thresholdColorRatioMax: 1 - lowBalanceColorAlert: 500000 \ No newline at end of file + availableModels: + - id: sonnet-4 + displayName: Claude Sonnet 4 + contextLimitSmall: 200000 + contextLimitLarge: 1000000 + lowBalanceColorAlert: 500000 + - id: sonnet-4.5 + displayName: Claude Sonnet 4.5 + contextLimitSmall: 200000 + contextLimitLarge: 1000000 + lowBalanceColorAlert: 500000 + - id: gptoss-120b + displayName: GPT-OSS 120B + contextLimitSmall: 128000 + contextLimitLarge: 128000 + lowBalanceColorAlert: 500000 \ No newline at end of file diff --git a/salt/soc/dyanno/hypervisor/hypervisor.yaml b/salt/soc/dyanno/hypervisor/hypervisor.yaml index d13c928ec..143a2f5cb 100644 --- a/salt/soc/dyanno/hypervisor/hypervisor.yaml +++ b/salt/soc/dyanno/hypervisor/hypervisor.yaml @@ -63,18 +63,22 @@ hypervisor: required: true readonly: true forcedType: int + - field: nsm_size + label: "Size of virtual disk to create and use for /nsm, in GB. Only applicable if no pass-through disk." + forcedType: int + readonly: true - field: disk - label: "Disk(s) for passthrough. Free: FREE | Total: TOTAL" + label: "Disk(s) to pass through for /nsm. Free: FREE | Total: TOTAL" readonly: true options: [] forcedType: '[]int' - field: copper - label: "Copper port(s) for passthrough. Free: FREE | Total: TOTAL" + label: "Copper port(s) to pass through. Free: FREE | Total: TOTAL" readonly: true options: [] forcedType: '[]int' - field: sfp - label: "SFP port(s) for passthrough. Free: FREE | Total: TOTAL" + label: "SFP port(s) to pass through. Free: FREE | Total: TOTAL" readonly: true options: [] forcedType: '[]int' diff --git a/salt/soc/dyanno/hypervisor/map.jinja b/salt/soc/dyanno/hypervisor/map.jinja index 4a5107371..cb0810959 100644 --- a/salt/soc/dyanno/hypervisor/map.jinja +++ b/salt/soc/dyanno/hypervisor/map.jinja @@ -3,11 +3,14 @@ {# Define the list of process steps in order (case-sensitive) #} {% set PROCESS_STEPS = [ 'Processing', + 'Hypervisor NSM Disk Full', 'IP Configuration', 'Starting Create', 'Executing Deploy Script', 'Initialize Minion Pillars', 'Created Instance', + 'Volume Creation', + 'Volume Configuration', 'Hardware Configuration', 'Highstate Initiated', 'Destroyed Instance' diff --git a/salt/soc/dyanno/hypervisor/remove_failed_vm.sls b/salt/soc/dyanno/hypervisor/remove_failed_vm.sls new file mode 100644 index 000000000..a47eff595 --- /dev/null +++ b/salt/soc/dyanno/hypervisor/remove_failed_vm.sls @@ -0,0 +1,51 @@ +# Copyright Security Onion Solutions LLC and/or licensed to Security Onion Solutions LLC under one +# or more contributor license agreements. Licensed under the Elastic License 2.0 as shown at +# https://securityonion.net/license; you may not use this file except in compliance with the +# Elastic License 2.0. +# +# Note: Per the Elastic License 2.0, the second limitation states: +# +# "You may not move, change, disable, or circumvent the license key functionality +# in the software, and you may not remove or obscure any functionality in the +# software that is protected by the license key." + +{% if 'vrt' in salt['pillar.get']('features', []) %} + +{% do salt.log.info('soc/dyanno/hypervisor/remove_failed_vm: Running') %} +{% set vm_name = pillar.get('vm_name') %} +{% set hypervisor = pillar.get('hypervisor') %} + +{% if vm_name and hypervisor %} +{% set vm_parts = vm_name.split('_') %} +{% if vm_parts | length >= 2 %} +{% set vm_role = vm_parts[-1] %} +{% set vm_hostname = '_'.join(vm_parts[:-1]) %} +{% set vms_file = '/opt/so/saltstack/local/salt/hypervisor/hosts/' ~ hypervisor ~ 'VMs' %} + +{% do salt.log.info('soc/dyanno/hypervisor/remove_failed_vm: Removing VM ' ~ vm_name ~ ' from ' ~ vms_file) %} + +remove_vm_{{ vm_name }}_from_vms_file: + module.run: + - name: hypervisor.remove_vm_from_vms_file + - vms_file_path: {{ vms_file }} + - vm_hostname: {{ vm_hostname }} + - vm_role: {{ vm_role }} + +{% else %} +{% do salt.log.error('soc/dyanno/hypervisor/remove_failed_vm: Invalid vm_name format: ' ~ vm_name) %} +{% endif %} +{% else %} +{% do salt.log.error('soc/dyanno/hypervisor/remove_failed_vm: Missing required pillar data (vm_name or hypervisor)') %} +{% endif %} + +{% do salt.log.info('soc/dyanno/hypervisor/remove_failed_vm: Completed') %} + +{% else %} + +{% do salt.log.error( + 'Hypervisor nodes are a feature supported only for customers with a valid license. ' + 'Contact Security Onion Solutions, LLC via our website at https://securityonionsolutions.com ' + 'for more information about purchasing a license to enable this feature.' +) %} + +{% endif %} diff --git a/salt/soc/dyanno/hypervisor/soc_hypervisor.yaml.jinja b/salt/soc/dyanno/hypervisor/soc_hypervisor.yaml.jinja index 926263b9d..ac2fd6fea 100644 --- a/salt/soc/dyanno/hypervisor/soc_hypervisor.yaml.jinja +++ b/salt/soc/dyanno/hypervisor/soc_hypervisor.yaml.jinja @@ -13,7 +13,6 @@ {%- import_yaml 'soc/dyanno/hypervisor/hypervisor.yaml' as ANNOTATION -%} {%- from 'hypervisor/map.jinja' import HYPERVISORS -%} -{%- from 'soc/dyanno/hypervisor/map.jinja' import PROCESS_STEPS -%} {%- set TEMPLATE = ANNOTATION.hypervisor.hosts.pop('defaultHost') -%} @@ -27,7 +26,6 @@ {%- if baseDomainStatus == 'Initialized' %} {%- if vm_list %} #### Virtual Machines -Status values: {% for step in PROCESS_STEPS %}{{ step }}{% if not loop.last %}, {% endif %}{% endfor %}. "Last Updated" shows when status changed. After "Highstate Initiated", only "Destroyed Instance" updates the timestamp. | Name | Status | CPU Cores | Memory (GB)| Disk | Copper | SFP | Last Updated | |--------------------|--------------------|-----------|------------|------|--------|------|---------------------| @@ -42,7 +40,6 @@ Status values: {% for step in PROCESS_STEPS %}{{ step }}{% if not loop.last %}, {%- endfor %} {%- else %} #### Virtual Machines -Status values: {% for step in PROCESS_STEPS %}{{ step }}{% if not loop.last %}, {% endif %}{% endfor %}. "Last Updated" shows when status changed. After "Highstate Initiated", only "Destroyed Instance" updates the timestamp. No Virtual Machines Found {%- endif %} @@ -96,9 +93,21 @@ Base domain has not been initialized. {%- endif -%} {%- endfor -%} -{# Calculate available resources #} -{%- set cpu_free = hw_config.cpu - ns.used_cpu -%} -{%- set mem_free = hw_config.memory - ns.used_memory -%} +{# Determine host OS overhead based on role #} +{%- if role == 'hypervisor' -%} +{%- set host_os_cpu = 8 -%} +{%- set host_os_memory = 16 -%} +{%- elif role == 'managerhype' -%} +{%- set host_os_cpu = 16 -%} +{%- set host_os_memory = 32 -%} +{%- else -%} +{%- set host_os_cpu = 0 -%} +{%- set host_os_memory = 0 -%} +{%- endif -%} + +{# Calculate available resources (subtract both VM usage and host OS overhead) #} +{%- set cpu_free = hw_config.cpu - ns.used_cpu - host_os_cpu -%} +{%- set mem_free = hw_config.memory - ns.used_memory - host_os_memory -%} {# Get used PCI indices #} {%- set used_disk = [] -%} diff --git a/salt/soc/files/bin/salt-relay.sh b/salt/soc/files/bin/salt-relay.sh index 16c387f86..4fc7d8d3d 100755 --- a/salt/soc/files/bin/salt-relay.sh +++ b/salt/soc/files/bin/salt-relay.sh @@ -237,10 +237,22 @@ function manage_salt() { case "$op" in state) - log "Performing '$op' for '$state' on minion '$minion'" state=$(echo "$request" | jq -r .state) - response=$(salt --async "$minion" state.apply "$state" queue=2) + async=$(echo "$request" | jq -r .async) + if [[ $async == "true" ]]; then + log "Performing async '$op' on minion $minion with state '$state'" + response=$(salt --async "$minion" state.apply "$state" queue=2) + else + log "Performing '$op' on minion $minion with state '$state'" + response=$(salt "$minion" state.apply "$state") + fi + exit_code=$? + if [[ $exit_code -ne 0 && "$response" =~ "is running as PID" ]]; then + log "Salt already running: $response ($exit_code)" + respond "$id" "ERROR_SALT_ALREADY_RUNNING" + return + fi ;; highstate) log "Performing '$op' on minion $minion" @@ -259,7 +271,7 @@ function manage_salt() { ;; esac - if [[ exit_code -eq 0 ]]; then + if [[ $exit_code -eq 0 ]]; then log "Successful command execution: $response" respond "$id" "true" else diff --git a/salt/soc/soc_soc.yaml b/salt/soc/soc_soc.yaml index 031b3faef..bc67a5295 100644 --- a/salt/soc/soc_soc.yaml +++ b/salt/soc/soc_soc.yaml @@ -631,6 +631,19 @@ soc: description: The URL of the AI gateway. advanced: True global: True + healthTimeoutSeconds: + description: Timeout in seconds for the Onion AI health check. + global: True + advanced: True + systemPromptAddendum: + description: Additional context to provide to the AI assistant about this SOC deployment. This can include information about your environment, policies, or any other relevant details that can help the AI provide more accurate and tailored assistance. Long prompts may be shortened. + global: True + advanced: False + multiline: True + systemPromptAddendumMaxLength: + description: Maximum length of the system prompt addendum. Longer prompts will be truncated. + global: True + advanced: True client: assistant: enabled: @@ -639,14 +652,6 @@ soc: investigationPrompt: description: Prompt given to Onion AI when beginning an investigation. global: True - contextLimitSmall: - description: Smaller context limit for Onion AI. - global: True - advanced: True - contextLimitLarge: - description: Larger context limit for Onion AI. - global: True - advanced: True thresholdColorRatioLow: description: Lower visual context color change threshold. global: True @@ -661,7 +666,34 @@ soc: advanced: True lowBalanceColorAlert: description: Onion AI credit amount at which balance turns red. + global: True advanced: True + availableModels: + description: List of AI models available for use in SOC as well as model specific warning thresholds. + global: True + advanced: True + forcedType: "[]{}" + helpLink: assistant.html + syntax: json + uiElements: + - field: id + label: Model ID + required: True + - field: displayName + label: Display Name + required: True + - field: contextLimitSmall + label: Context Limit (Small) + forcedType: int + required: True + - field: contextLimitLarge + label: Context Limit (Large) + forcedType: int + required: True + - field: lowBalanceColorAlert + label: Low Balance Color Alert + forcedType: int + required: True apiTimeoutMs: description: Duration (in milliseconds) to wait for a response from the SOC server API before giving up and showing an error on the SOC UI. global: True diff --git a/salt/storage/init.sls b/salt/storage/init.sls index 533366fd0..ab5926bf5 100644 --- a/salt/storage/init.sls +++ b/salt/storage/init.sls @@ -4,10 +4,17 @@ # Elastic License 2.0. -{% set nvme_devices = salt['cmd.shell']("find /dev -name 'nvme*n1' 2>/dev/null") %} +{% set nvme_devices = salt['cmd.shell']("ls /dev/nvme*n1 2>/dev/null || echo ''") %} +{% set virtio_devices = salt['cmd.shell']("test -b /dev/vdb && echo '/dev/vdb' || echo ''") %} + {% if nvme_devices %} include: - - storage.nsm_mount + - storage.nsm_mount_nvme + +{% elif virtio_devices %} + +include: + - storage.nsm_mount_virtio {% endif %} diff --git a/salt/storage/nsm_mount.sls b/salt/storage/nsm_mount_nvme.sls similarity index 87% rename from salt/storage/nsm_mount.sls rename to salt/storage/nsm_mount_nvme.sls index ed9e97c33..a0d317014 100644 --- a/salt/storage/nsm_mount.sls +++ b/salt/storage/nsm_mount_nvme.sls @@ -22,8 +22,8 @@ storage_nsm_mount_logdir: # Install the NSM mount script storage_nsm_mount_script: file.managed: - - name: /usr/sbin/so-nsm-mount - - source: salt://storage/tools/sbin/so-nsm-mount + - name: /usr/sbin/so-nsm-mount-nvme + - source: salt://storage/tools/sbin/so-nsm-mount-nvme - mode: 755 - user: root - group: root @@ -34,7 +34,7 @@ storage_nsm_mount_script: # Execute the mount script if not already mounted storage_nsm_mount_execute: cmd.run: - - name: /usr/sbin/so-nsm-mount + - name: /usr/sbin/so-nsm-mount-nvme - unless: mountpoint -q /nsm - require: - file: storage_nsm_mount_script diff --git a/salt/storage/nsm_mount_virtio.sls b/salt/storage/nsm_mount_virtio.sls new file mode 100644 index 000000000..34ca8a883 --- /dev/null +++ b/salt/storage/nsm_mount_virtio.sls @@ -0,0 +1,39 @@ +# Copyright Security Onion Solutions LLC and/or licensed to Security Onion Solutions LLC under one +# or more contributor license agreements. Licensed under the Elastic License 2.0 as shown at +# https://securityonion.net/license; you may not use this file except in compliance with the +# Elastic License 2.0. + +# Install required packages +storage_nsm_mount_virtio_packages: + pkg.installed: + - pkgs: + - xfsprogs + +# Ensure log directory exists +storage_nsm_mount_virtio_logdir: + file.directory: + - name: /opt/so/log + - makedirs: True + - user: root + - group: root + - mode: 755 + +# Install the NSM mount script +storage_nsm_mount_virtio_script: + file.managed: + - name: /usr/sbin/so-nsm-mount-virtio + - source: salt://storage/tools/sbin/so-nsm-mount-virtio + - mode: 755 + - user: root + - group: root + - require: + - pkg: storage_nsm_mount_virtio_packages + - file: storage_nsm_mount_virtio_logdir + +# Execute the mount script if not already mounted +storage_nsm_mount_virtio_execute: + cmd.run: + - name: /usr/sbin/so-nsm-mount-virtio + - unless: mountpoint -q /nsm + - require: + - file: storage_nsm_mount_virtio_script diff --git a/salt/storage/tools/sbin/so-nsm-mount b/salt/storage/tools/sbin/so-nsm-mount-nvme similarity index 99% rename from salt/storage/tools/sbin/so-nsm-mount rename to salt/storage/tools/sbin/so-nsm-mount-nvme index 24125fc40..fdde0c2e9 100644 --- a/salt/storage/tools/sbin/so-nsm-mount +++ b/salt/storage/tools/sbin/so-nsm-mount-nvme @@ -81,7 +81,7 @@ set -e -LOG_FILE="/opt/so/log/so-nsm-mount.log" +LOG_FILE="/opt/so/log/so-nsm-mount-nvme" VG_NAME="" LV_NAME="nsm" MOUNT_POINT="/nsm" diff --git a/salt/storage/tools/sbin/so-nsm-mount-virtio b/salt/storage/tools/sbin/so-nsm-mount-virtio new file mode 100644 index 000000000..03476e378 --- /dev/null +++ b/salt/storage/tools/sbin/so-nsm-mount-virtio @@ -0,0 +1,171 @@ +#!/bin/bash + +# Copyright Security Onion Solutions LLC and/or licensed to Security Onion Solutions LLC under one +# or more contributor license agreements. Licensed under the Elastic License 2.0 as shown at +# https://securityonion.net/license; you may not use this file except in compliance with the +# Elastic License 2.0. + +# Usage: +# so-nsm-mount-virtio +# +# Options: +# None - script automatically configures /dev/vdb +# +# Examples: +# 1. Configure and mount virtio-blk device: +# ```bash +# sudo so-nsm-mount-virtio +# ``` +# +# Notes: +# - Requires root privileges +# - Mounts /dev/vdb as /nsm +# - Creates XFS filesystem if needed +# - Configures persistent mount via /etc/fstab +# - Safe to run multiple times +# +# Description: +# This script automates the configuration and mounting of virtio-blk devices +# as /nsm in Security Onion virtual machines. It performs these steps: +# +# Dependencies: +# - xfsprogs: Required for XFS filesystem operations +# +# 1. Safety Checks: +# - Verifies root privileges +# - Checks if /nsm is already mounted +# - Verifies /dev/vdb exists +# +# 2. Filesystem Creation: +# - Creates XFS filesystem on /dev/vdb if not already formatted +# +# 3. Mount Configuration: +# - Creates /nsm directory if needed +# - Adds entry to /etc/fstab for persistence +# - Mounts the filesystem as /nsm +# +# Exit Codes: +# 0: Success conditions: +# - Device configured and mounted +# - Already properly mounted +# 1: Error conditions: +# - Must be run as root +# - Device /dev/vdb not found +# - Filesystem creation failed +# - Mount operation failed +# +# Logging: +# - All operations logged to /opt/so/log/so-nsm-mount-virtio + +set -e + +LOG_FILE="/opt/so/log/so-nsm-mount-virtio" +DEVICE="/dev/vdb" +MOUNT_POINT="/nsm" + +# Function to log messages +log() { + echo "$(date '+%Y-%m-%d %H:%M:%S') $1" | tee -a "$LOG_FILE" +} + +# Function to log errors +log_error() { + echo "$(date '+%Y-%m-%d %H:%M:%S') ERROR: $1" | tee -a "$LOG_FILE" >&2 +} + +# Function to check if running as root +check_root() { + if [ "$EUID" -ne 0 ]; then + log_error "Must be run as root" + exit 1 + fi +} + +# Main execution +main() { + log "==========================================" + log "Starting virtio-blk NSM mount process" + log "==========================================" + + # Check root privileges + check_root + + # Check if already mounted + if mountpoint -q "$MOUNT_POINT"; then + log "$MOUNT_POINT is already mounted" + log "==========================================" + exit 0 + fi + + # Check if device exists + if [ ! -b "$DEVICE" ]; then + log_error "Device $DEVICE not found" + log "==========================================" + exit 1 + fi + + log "Found device: $DEVICE" + + # Get device size + local size=$(lsblk -dbn -o SIZE "$DEVICE" 2>/dev/null | numfmt --to=iec) + log "Device size: $size" + + # Check if device has filesystem + if ! blkid "$DEVICE" | grep -q 'TYPE="xfs"'; then + log "Creating XFS filesystem on $DEVICE" + if ! mkfs.xfs -f "$DEVICE" 2>&1 | tee -a "$LOG_FILE"; then + log_error "Failed to create filesystem" + log "==========================================" + exit 1 + fi + log "Filesystem created successfully" + else + log "Device already has XFS filesystem" + fi + + # Create mount point + if [ ! -d "$MOUNT_POINT" ]; then + log "Creating mount point $MOUNT_POINT" + mkdir -p "$MOUNT_POINT" + fi + + # Add to fstab if not present + if ! grep -q "$DEVICE.*$MOUNT_POINT" /etc/fstab; then + log "Adding entry to /etc/fstab" + echo "$DEVICE $MOUNT_POINT xfs defaults 0 0" >> /etc/fstab + log "Entry added to /etc/fstab" + else + log "Entry already exists in /etc/fstab" + fi + + # Mount the filesystem + log "Mounting $DEVICE to $MOUNT_POINT" + if mount "$MOUNT_POINT" 2>&1 | tee -a "$LOG_FILE"; then + log "Successfully mounted $DEVICE to $MOUNT_POINT" + + # Verify mount + if mountpoint -q "$MOUNT_POINT"; then + log "Mount verified successfully" + + # Display mount information + log "Mount details:" + df -h "$MOUNT_POINT" | tail -n 1 | tee -a "$LOG_FILE" + else + log_error "Mount verification failed" + log "==========================================" + exit 1 + fi + else + log_error "Failed to mount $DEVICE" + log "==========================================" + exit 1 + fi + + log "==========================================" + log "Virtio-blk NSM mount process completed successfully" + log "==========================================" + exit 0 +} + +# Run main function +main diff --git a/salt/strelka/filestream/enabled.sls b/salt/strelka/filestream/enabled.sls index c90b1e83f..ef5d593ba 100644 --- a/salt/strelka/filestream/enabled.sls +++ b/salt/strelka/filestream/enabled.sls @@ -14,7 +14,7 @@ include: strelka_filestream: docker_container.running: - - image: {{ GLOBALS.registry_host }}:5000/{{ GLOBALS.image_repo }}/so-strelka-filestream:{{ GLOBALS.so_version }} + - image: {{ GLOBALS.registry_host }}:5000/{{ GLOBALS.image_repo }}/so-strelka-manager:{{ GLOBALS.so_version }} - binds: - /opt/so/conf/strelka/filestream/:/etc/strelka/:ro - /nsm/strelka:/nsm/strelka diff --git a/salt/strelka/frontend/enabled.sls b/salt/strelka/frontend/enabled.sls index f95a31a7e..709b3e71c 100644 --- a/salt/strelka/frontend/enabled.sls +++ b/salt/strelka/frontend/enabled.sls @@ -14,7 +14,7 @@ include: strelka_frontend: docker_container.running: - - image: {{ GLOBALS.registry_host }}:5000/{{ GLOBALS.image_repo }}/so-strelka-frontend:{{ GLOBALS.so_version }} + - image: {{ GLOBALS.registry_host }}:5000/{{ GLOBALS.image_repo }}/so-strelka-manager:{{ GLOBALS.so_version }} - binds: - /opt/so/conf/strelka/frontend/:/etc/strelka/:ro - /nsm/strelka/log/:/var/log/strelka/:rw diff --git a/salt/telegraf/etc/telegraf.conf b/salt/telegraf/etc/telegraf.conf index b358c178f..d2cb87057 100644 --- a/salt/telegraf/etc/telegraf.conf +++ b/salt/telegraf/etc/telegraf.conf @@ -337,4 +337,5 @@ ] data_format = "influx" interval = "1h" + timeout = "120s" {%- endif %} diff --git a/salt/zeek/policy/custom/filters/dns b/salt/zeek/policy/custom/filters/dns new file mode 100644 index 000000000..e79032c19 --- /dev/null +++ b/salt/zeek/policy/custom/filters/dns @@ -0,0 +1,30 @@ +hook DNS::log_policy(rec: DNS::Info, id: Log::ID, filter: Log::Filter) + { + # Only put a single name per line otherwise there will be memory issues! + # If the query comes back blank don't log + if (!rec?$query) + break; + + # If the query comes back with one of these don't log + if (rec?$query && /google.com$/ in rec$query) + break; + + # If the query comes back with one of these don't log + if (rec?$query && /.apple.com$/ in rec$query) + break; + + # Don't log reverse lookups + if (rec?$query && /.in-addr.arpa/ in to_lower(rec$query)) + break; + + # Don't log netbios lookups. This generates a cray amount of logs + if (rec?$qtype_name && /NB/ in rec$qtype_name) + break; + } + +event zeek_init() +{ + Log::remove_default_filter(DNS::LOG); + local filter: Log::Filter = [$name="dns-filter"]; + Log::add_filter(DNS::LOG, filter); +} \ No newline at end of file diff --git a/salt/zeek/policy/custom/filters/files b/salt/zeek/policy/custom/filters/files new file mode 100644 index 000000000..311f37cc2 --- /dev/null +++ b/salt/zeek/policy/custom/filters/files @@ -0,0 +1,13 @@ +hook Files::log_policy(rec: Files::Info, id: Log::ID, filter: Log::Filter) + { + # Turn off a specific mimetype + if (rec?$mime_type && ( /soap+xml/ | /json/ | /xml/ | /x509/ )in rec$mime_type) + break; + } + +event zeek_init() +{ + Log::remove_default_filter(Files::LOG); + local filter: Log::Filter = [$name="files-filter"]; + Log::add_filter(Files::LOG, filter); +} diff --git a/salt/zeek/policy/custom/filters/httphost b/salt/zeek/policy/custom/filters/httphost new file mode 100644 index 000000000..29c682d33 --- /dev/null +++ b/salt/zeek/policy/custom/filters/httphost @@ -0,0 +1,20 @@ +### HTTP filter by host entries by string ##### + +module Filterhttp; + +export { + global remove_host_entries: set[string] = {"www.genevalab.com", "www.google.com"}; + } + +hook HTTP::log_policy(rec: HTTP::Info, id: Log::ID, filter: Log::Filter) + { + # Remove HTTP host entries + if ( ! rec?$host || rec$host in remove_host_entries ) + break; + } +event zeek_init() +{ + Log::remove_default_filter(HTTP::LOG); + local filter: Log::Filter = [$name="http-filter"]; + Log::add_filter(HTTP::LOG, filter); +} \ No newline at end of file diff --git a/salt/zeek/policy/custom/filters/httpuri b/salt/zeek/policy/custom/filters/httpuri new file mode 100644 index 000000000..9a57cc5ff --- /dev/null +++ b/salt/zeek/policy/custom/filters/httpuri @@ -0,0 +1,14 @@ +### HTTP filter by uri using pattern #### + +hook HTTP::log_policy(rec: HTTP::Info, id: Log::ID, filter: Log::Filter) + { + # Remove HTTP uri entries by regex + if ( rec?$uri && /^\/kratos\// in rec$uri ) + break; + } +event zeek_init() +{ + Log::remove_default_filter(HTTP::LOG); + local filter: Log::Filter = [$name="http-filter"]; + Log::add_filter(HTTP::LOG, filter); +} \ No newline at end of file diff --git a/salt/zeek/policy/custom/filters/ssl b/salt/zeek/policy/custom/filters/ssl new file mode 100644 index 000000000..e7be0f768 --- /dev/null +++ b/salt/zeek/policy/custom/filters/ssl @@ -0,0 +1,29 @@ +### Log filter by JA3S md5 hash: +hook SSL::log_policy(rec: SSL::Info, id: Log::ID, filter: Log::Filter) + { + # SSL log filter Ja3s by md5 + if (rec?c$ssl$ja3s_cipher && ( /623de93db17d313345d7ea481e7443cf/ )in rec$c$ssl$ja3s_cipher) + break; + } + +event zeek_init() +{ + Log::remove_default_filter(SSL::LOG); + local filter: Log::Filter = [$name="ssl-filter"]; + Log::add_filter(SSL::LOG, filter); +} + +### Log filter by server name: +hook SSL::log_policy(rec: SSL::Info, id: Log::ID, filter: Log::Filter) + { + # SSL log filter by server name + if (rec?$server_name && ( /api.github.com$/ ) in rec$server_name) + break; + } + +event zeek_init() +{ + Log::remove_default_filter(SSL::LOG); + local filter: Log::Filter = [$name="ssl-filter"]; + Log::add_filter(SSL::LOG, filter); +} \ No newline at end of file diff --git a/salt/zeek/policy/custom/filters/tunnel b/salt/zeek/policy/custom/filters/tunnel new file mode 100644 index 000000000..dd58caa4d --- /dev/null +++ b/salt/zeek/policy/custom/filters/tunnel @@ -0,0 +1,17 @@ +global tunnel_subnet: set[subnet]={ + + 10.19.0.0/24 + +}; + +hook Tunnel::log_policy(rec: Tunnel::Info, id: Log::ID, Filter: Log::Filter) + { + if (rec$id$orig_h in tunnel_subnet || rec$id$resp_h in tunnel_subnet) + break; + } +event zeek_init() +{ + Log::remove_default_filter(Tunnel::LOG); + local filter: Log::Filter = [$name="tunnel-filter"]; + Log::add_filter(Tunnel::LOG, filter); +} \ No newline at end of file diff --git a/salt/zeek/soc_zeek.yaml b/salt/zeek/soc_zeek.yaml index b3b655083..929b9debd 100644 --- a/salt/zeek/soc_zeek.yaml +++ b/salt/zeek/soc_zeek.yaml @@ -61,6 +61,48 @@ zeek: global: True advanced: True duplicates: True + dns: + description: DNS Filter for Zeek. This is an advanced setting and will take further action to enable. + helpLink: zeek.html + file: True + global: True + advanced: True + duplicates: True + files: + description: Files Filter for Zeek. This is an advanced setting and will take further action to enable. + helpLink: zeek.html + file: True + global: True + advanced: True + duplicates: True + httphost: + description: HTTP Hosts Filter for Zeek. This is an advanced setting and will take further action to enable. + helpLink: zeek.html + file: True + global: True + advanced: True + duplicates: True + httpuri: + description: HTTP URI Filter for Zeek. This is an advanced setting and will take further action to enable. + helpLink: zeek.html + file: True + global: True + advanced: True + duplicates: True + ssl: + description: SSL Filter for Zeek. This is an advanced setting and will take further action to enable. + helpLink: zeek.html + file: True + global: True + advanced: True + duplicates: True + tunnel: + description: Tunnel Filter for Zeek. This is an advanced setting and will take further action to enable. + helpLink: zeek.html + file: True + global: True + advanced: True + duplicates: True file_extraction: description: Contains a list of file or MIME types Zeek will extract from the network streams. Values must adhere to the following format - {"MIME_TYPE":"FILE_EXTENSION"} forcedType: "[]{}" diff --git a/setup/so-functions b/setup/so-functions index 0e5d78ce8..7db039f00 100755 --- a/setup/so-functions +++ b/setup/so-functions @@ -541,8 +541,15 @@ configure_minion() { "log_file: /opt/so/log/salt/minion"\ "#startup_states: highstate" >> "$minion_config" - info "Running: salt-call state.apply salt.mine_functions --local --file-root=../salt/ -l info pillar='{"host": {"mainint": "$MNIC"}}'" - salt-call state.apply salt.mine_functions --local --file-root=../salt/ -l info pillar="{'host': {'mainint': $MNIC}}" + # At the time the so-managerhype node does not yet have the bridge configured. + # The so-hypervisor node doesn't either, but it doesn't cause issues here. + local usebr0=false + if [ "$minion_type" == 'hypervisor' ]; then + usebr0=true + fi + local pillar_json="{\"host\": {\"mainint\": \"$MNIC\"}, \"usebr0\": $usebr0}" + info "Running: salt-call state.apply salt.mine_functions --local --file-root=../salt/ -l info pillar='$pillar_json'" + salt-call state.apply salt.mine_functions --local --file-root=../salt/ -l info pillar="$pillar_json" { logCmd "systemctl enable salt-minion"; @@ -1193,10 +1200,7 @@ hypervisor_local_states() { info "Running libvirt states for hypervisor" logCmd "salt-call state.apply libvirt.64962 --local --file-root=../salt/ -l info queue=True" info "Setting up bridge for $MNIC" - salt-call state.apply libvirt.bridge --local --file-root=../salt/ -l info pillar='{"host": {"mainint": "'$MNIC'"}}' queue=True - if [ $is_managerhype ]; then - logCmd "salt-call state.apply salt.minion queue=True" - fi + salt-call state.apply libvirt.bridge --local --file-root=../salt/ -l info pillar='{"host": {"mainint": "'$MNIC'"}}' queue=True fi } @@ -1636,6 +1640,12 @@ reserve_ports() { fi } +clear_previous_setup_results() { + # Disregard previous setup outcomes. + rm -f /root/failure + rm -f /root/success +} + reinstall_init() { info "Putting system in state to run setup again" @@ -1647,10 +1657,6 @@ reinstall_init() { local service_retry_count=20 - # Disregard previous install outcomes - rm -f /root/failure - rm -f /root/success - { # remove all of root's cronjobs logCmd "crontab -r -u root" @@ -2295,7 +2301,7 @@ set_redirect() { set_timezone() { - logCmd "timedatectl set-timezone Etc/UTC" + timedatectl set-timezone Etc/UTC } diff --git a/setup/so-setup b/setup/so-setup index 347a7165c..bdb1c38e2 100755 --- a/setup/so-setup +++ b/setup/so-setup @@ -132,6 +132,10 @@ if [[ -f /root/accept_changes ]]; then reset_proxy fi +# Previous setup attempts, even if setup doesn't actually start the installation, +# can leave behind results that may interfere with the current setup attempt. +clear_previous_setup_results + title "Parsing Username for Install" parse_install_username @@ -762,6 +766,7 @@ if ! [[ -f $install_opt_file ]]; then fi logCmd "salt-call state.apply common.packages" logCmd "salt-call state.apply common" + hypervisor_local_states # this will apply the salt.minion state first since salt.master includes salt.minion logCmd "salt-call state.apply salt.master" # wait here until we get a response from the salt-master since it may have just restarted @@ -826,7 +831,6 @@ if ! [[ -f $install_opt_file ]]; then checkin_at_boot set_initial_firewall_access logCmd "salt-call schedule.enable -linfo --local" - hypervisor_local_states verify_setup else touch /root/accept_changes diff --git a/setup/so-verify b/setup/so-verify index d22b80fc2..f99c6e418 100755 --- a/setup/so-verify +++ b/setup/so-verify @@ -68,6 +68,7 @@ log_has_errors() { grep -vE "Command failed with exit code" | \ grep -vE "Running scope as unit" | \ grep -vE "securityonion-resources/sigma/stable" | \ + grep -vE "remove_failed_vm.sls" | \ grep -vE "log-.*-pipeline_failed_attempts" &> "$error_log" if [[ $? -eq 0 ]]; then diff --git a/sigs/securityonion-2.4.180-20250916.iso.sig b/sigs/securityonion-2.4.180-20250916.iso.sig new file mode 100644 index 000000000..4ba61d389 Binary files /dev/null and b/sigs/securityonion-2.4.180-20250916.iso.sig differ diff --git a/sigs/securityonion-2.4.190-20251024.iso.sig b/sigs/securityonion-2.4.190-20251024.iso.sig new file mode 100644 index 000000000..430c09e47 Binary files /dev/null and b/sigs/securityonion-2.4.190-20251024.iso.sig differ