From de4424fab0b666ab4299fa3ba259a1b84ba60a78 Mon Sep 17 00:00:00 2001 From: reyesj2 <94730068+reyesj2@users.noreply.github.com> Date: Fri, 14 Nov 2025 19:15:51 -0600 Subject: [PATCH 01/17] remove typos --- salt/elasticsearch/defaults.yaml | 5 ----- 1 file changed, 5 deletions(-) diff --git a/salt/elasticsearch/defaults.yaml b/salt/elasticsearch/defaults.yaml index 592f47a2b..bbfaf3244 100644 --- a/salt/elasticsearch/defaults.yaml +++ b/salt/elasticsearch/defaults.yaml @@ -245,7 +245,6 @@ elasticsearch: set_priority: priority: 50 min_age: 30d - warm: 7 so-detection: index_sorting: false index_template: @@ -584,7 +583,6 @@ elasticsearch: set_priority: priority: 50 min_age: 30d - warm: 7 so-import: index_sorting: false index_template: @@ -932,7 +930,6 @@ elasticsearch: set_priority: priority: 50 min_age: 30d - warm: 7 so-hydra: close: 30 delete: 365 @@ -1043,7 +1040,6 @@ elasticsearch: set_priority: priority: 50 min_age: 30d - warm: 7 so-lists: index_sorting: false index_template: @@ -3123,7 +3119,6 @@ elasticsearch: set_priority: priority: 50 min_age: 30d - warm: 7 so-logs-system_x_application: index_sorting: false index_template: From a155f450362854ca65f037ce713386aea931103d Mon Sep 17 00:00:00 2001 From: reyesj2 <94730068+reyesj2@users.noreply.github.com> Date: Wed, 19 Nov 2025 13:24:29 -0600 Subject: [PATCH 02/17] always update annotation / defaults for managed integrations --- salt/manager/managed_soc_annotations.sls | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/salt/manager/managed_soc_annotations.sls b/salt/manager/managed_soc_annotations.sls index d8f175df6..64f45d40d 100644 --- a/salt/manager/managed_soc_annotations.sls +++ b/salt/manager/managed_soc_annotations.sls @@ -25,9 +25,7 @@ {% set index_settings = es.get('index_settings', {}) %} {% set input = index_settings.get('so-logs', {}) %} {% for k in matched_integration_names %} - {% if k not in index_settings %} - {% set _ = index_settings.update({k: input}) %} - {% endif %} + {% set _ = index_settings.update({k: input}) %} {% endfor %} {% for k in addon_integration_keys %} {% if k not in matched_integration_names and k in index_settings %} @@ -45,10 +43,8 @@ {% set es = data.get('elasticsearch', {}) %} {% set index_settings = es.get('index_settings', {}) %} {% for k in matched_integration_names %} - {% if k not in index_settings %} - {% set input = ADDON_INTEGRATION_DEFAULTS[k] %} - {% set _ = index_settings.update({k: input})%} - {% endif %} + {% set input = ADDON_INTEGRATION_DEFAULTS[k] %} + {% set _ = index_settings.update({k: input})%} {% endfor %} {% for k in addon_integration_keys %} {% if k not in matched_integration_names and k in index_settings %} From b52dd53e2906e92af253c09cb5b2475caa47c77e Mon Sep 17 00:00:00 2001 From: reyesj2 <94730068+reyesj2@users.noreply.github.com> Date: Wed, 19 Nov 2025 13:24:55 -0600 Subject: [PATCH 03/17] advanced ilm actions --- .../integration-defaults.map.jinja | 22 +++ salt/elasticsearch/defaults.yaml | 32 +++ salt/elasticsearch/soc_elasticsearch.yaml | 186 ++++++++++++++++++ salt/elasticsearch/template.map.jinja | 87 ++++++++ 4 files changed, 327 insertions(+) diff --git a/salt/elasticfleet/integration-defaults.map.jinja b/salt/elasticfleet/integration-defaults.map.jinja index 500a9e63c..69ce7f3af 100644 --- a/salt/elasticfleet/integration-defaults.map.jinja +++ b/salt/elasticfleet/integration-defaults.map.jinja @@ -121,6 +121,9 @@ "phases": { "cold": { "actions": { + "allocate":{ + "number_of_replicas": "" + }, "set_priority": {"priority": 0} }, "min_age": "60d" @@ -137,12 +140,31 @@ "max_age": "30d", "max_primary_shard_size": "50gb" }, + "forcemerge":{ + "max_num_segments": "" + }, + "shrink":{ + "max_primary_shard_size": "", + "method": "COUNT", + "number_of_shards": "" + }, "set_priority": {"priority": 100} }, "min_age": "0ms" }, "warm": { "actions": { + "allocate": { + "number_of_replicas": "" + }, + "forcemerge": { + "max_num_segments": "" + }, + "shrink":{ + "max_primary_shard_size": "", + "method": "COUNT", + "number_of_shards": "" + }, "set_priority": {"priority": 50} }, "min_age": "30d" diff --git a/salt/elasticsearch/defaults.yaml b/salt/elasticsearch/defaults.yaml index bbfaf3244..5cfb9a0e0 100644 --- a/salt/elasticsearch/defaults.yaml +++ b/salt/elasticsearch/defaults.yaml @@ -72,6 +72,8 @@ elasticsearch: actions: set_priority: priority: 0 + allocate: + number_of_replicas: "" min_age: 60d delete: actions: @@ -84,11 +86,25 @@ elasticsearch: max_primary_shard_size: 50gb set_priority: priority: 100 + forcemerge: + max_num_segments: "" + shrink: + max_primary_shard_size: "" + method: COUNT + number_of_shards: "" min_age: 0ms warm: actions: set_priority: priority: 50 + forcemerge: + max_num_segments: "" + shrink: + max_primary_shard_size: "" + method: COUNT + number_of_shards: "" + allocate: + number_of_replicas: "" min_age: 30d so-case: index_sorting: false @@ -1123,6 +1139,8 @@ elasticsearch: actions: set_priority: priority: 0 + allocate: + number_of_replicas: "" min_age: 60d delete: actions: @@ -1135,11 +1153,25 @@ elasticsearch: max_primary_shard_size: 50gb set_priority: priority: 100 + forcemerge: + max_num_segments: "" + shrink: + max_primary_shard_size: "" + method: COUNT + number_of_shards: "" min_age: 0ms warm: actions: set_priority: priority: 50 + allocate: + number_of_replicas: "" + forcemerge: + max_num_segments: "" + shrink: + max_primary_shard_size: "" + method: COUNT + number_of_shards: "" min_age: 30d so-logs-detections_x_alerts: index_sorting: false diff --git a/salt/elasticsearch/soc_elasticsearch.yaml b/salt/elasticsearch/soc_elasticsearch.yaml index 097a53296..27d5654b5 100644 --- a/salt/elasticsearch/soc_elasticsearch.yaml +++ b/salt/elasticsearch/soc_elasticsearch.yaml @@ -131,6 +131,47 @@ elasticsearch: description: Maximum primary shard size. Once an index reaches this limit, it will be rolled over into a new index. global: True helpLink: elasticsearch.html + shrink: + method: + description: Shrink the index to a new index with fewer primary shards. Shrink operation is by count or size. + options: + - COUNT + - SIZE + global: True + advanced: True + forcedType: string + number_of_shards: + title: shard count + description: Desired shard count. Note that this value is only used when the shrink method selected is 'COUNT'. + global: True + forcedType: int + advanced: True + max_primary_shard_size: + title: max shard size + description: Desired shard size in gb/tb/pb eg. 100gb. Note that this value is only used when the shrink method selected is 'SIZE'. + regex: ^(?:[0-9]+(?:gb|tb|pb)|)$ + global: True + forcedType: string + advanced: True + allow_write_after_shrink: + description: Allow writes after shrink. + global: True + forcedType: bool + default: False + advanced: True + forcemerge: + max_num_segments: + description: Reduce the number of segments in each index shard and clean up deleted documents. + global: True + forcedType: int + advanced: True + index_codec: + title: compression + description: Use higher compression for stored fields at the cost of slower performance. + forcedType: bool + global: True + default: False + advanced: True cold: min_age: description: Minimum age of index. ex. 60d - This determines when the index should be moved to the cold tier. While still searchable, this tier is typically optimized for lower storage costs rather than search speed. It’s important to note that this is calculated relative to the rollover date (NOT the original creation date of the index). For example, if you have an index that is set to rollover after 30 days and cold min_age set to 60 then there will be 30 days from index creation to rollover and then an additional 60 days before moving to cold tier. @@ -144,6 +185,12 @@ elasticsearch: description: Used for index recovery after a node restart. Indices with higher priorities are recovered before indices with lower priorities. global: True helpLink: elasticsearch.html + allocate: + number_of_replicas: + description: Set the number of replicas. Remains the same as the previous phase by default. + forcedType: int + global: True + advanced: True warm: min_age: description: Minimum age of index. ex. 30d - This determines when the index should be moved to the warm tier. Nodes in the warm tier generally don’t need to be as fast as those in the hot tier. It’s important to note that this is calculated relative to the rollover date (NOT the original creation date of the index). For example, if you have an index that is set to rollover after 30 days and warm min_age set to 30 then there will be 30 days from index creation to rollover and then an additional 30 days before moving to warm tier. @@ -158,6 +205,52 @@ elasticsearch: forcedType: int global: True helpLink: elasticsearch.html + shrink: + method: + description: Shrink the index to a new index with fewer primary shards. Shrink operation is by count or size. + options: + - COUNT + - SIZE + global: True + advanced: True + number_of_shards: + title: shard count + description: Desired shard count. Note that this value is only used when the shrink method selected is 'COUNT'. + global: True + forcedType: int + advanced: True + max_primary_shard_size: + title: max shard size + description: Desired shard size in gb/tb/pb eg. 100gb. Note that this value is only used when the shrink method selected is 'SIZE'. + regex: ^(?:[0-9]+(?:gb|tb|pb)|)$ + global: True + forcedType: string + advanced: True + allow_write_after_shrink: + description: Allow writes after shrink. + global: True + forcedType: bool + default: False + advanced: True + forcemerge: + max_num_segments: + description: Reduce the number of segments in each index shard and clean up deleted documents. + global: True + forcedType: int + advanced: True + index_codec: + title: compression + description: Use higher compression for stored fields at the cost of slower performance. + forcedType: bool + global: True + default: False + advanced: True + allocate: + number_of_replicas: + description: Set the number of replicas. Remains the same as the previous phase by default. + forcedType: int + global: True + advanced: True delete: min_age: description: Minimum age of index. ex. 90d - This determines when the index should be deleted. It’s important to note that this is calculated relative to the rollover date (NOT the original creation date of the index). For example, if you have an index that is set to rollover after 30 days and delete min_age set to 90 then there will be 30 days from index creation to rollover and then an additional 90 days before deletion. @@ -287,6 +380,47 @@ elasticsearch: global: True advanced: True helpLink: elasticsearch.html + shrink: + method: + description: Shrink the index to a new index with fewer primary shards. Shrink operation is by count or size. + options: + - COUNT + - SIZE + global: True + advanced: True + forcedType: string + number_of_shards: + title: shard count + description: Desired shard count. Note that this value is only used when the shrink method selected is 'COUNT'. + global: True + forcedType: int + advanced: True + max_primary_shard_size: + title: max shard size + description: Desired shard size in gb/tb/pb eg. 100gb. Note that this value is only used when the shrink method selected is 'SIZE'. + regex: ^(?:[0-9]+(?:gb|tb|pb)|)$ + global: True + forcedType: string + advanced: True + allow_write_after_shrink: + description: Allow writes after shrink. + global: True + forcedType: bool + default: False + advanced: True + forcemerge: + max_num_segments: + description: Reduce the number of segments in each index shard and clean up deleted documents. + global: True + forcedType: int + advanced: True + index_codec: + title: compression + description: Use higher compression for stored fields at the cost of slower performance. + forcedType: bool + global: True + default: False + advanced: True warm: min_age: description: Minimum age of index. ex. 30d - This determines when the index should be moved to the warm tier. Nodes in the warm tier generally don’t need to be as fast as those in the hot tier. It’s important to note that this is calculated relative to the rollover date (NOT the original creation date of the index). For example, if you have an index that is set to rollover after 30 days and warm min_age set to 30 then there will be 30 days from index creation to rollover and then an additional 30 days before moving to warm tier. @@ -314,6 +448,52 @@ elasticsearch: global: True advanced: True helpLink: elasticsearch.html + shrink: + method: + description: Shrink the index to a new index with fewer primary shards. Shrink operation is by count or size. + options: + - COUNT + - SIZE + global: True + advanced: True + number_of_shards: + title: shard count + description: Desired shard count. Note that this value is only used when the shrink method selected is 'COUNT'. + global: True + forcedType: int + advanced: True + max_primary_shard_size: + title: max shard size + description: Desired shard size in gb/tb/pb eg. 100gb. Note that this value is only used when the shrink method selected is 'SIZE'. + regex: ^(?:[0-9]+(?:gb|tb|pb)|)$ + global: True + forcedType: string + advanced: True + allow_write_after_shrink: + description: Allow writes after shrink. + global: True + forcedType: bool + default: False + advanced: True + forcemerge: + max_num_segments: + description: Reduce the number of segments in each index shard and clean up deleted documents. + global: True + forcedType: int + advanced: True + index_codec: + title: compression + description: Use higher compression for stored fields at the cost of slower performance. + forcedType: bool + global: True + default: False + advanced: True + allocate: + number_of_replicas: + description: Set the number of replicas. Remains the same as the previous phase by default. + forcedType: int + global: True + advanced: True cold: min_age: description: Minimum age of index. ex. 60d - This determines when the index should be moved to the cold tier. While still searchable, this tier is typically optimized for lower storage costs rather than search speed. It’s important to note that this is calculated relative to the rollover date (NOT the original creation date of the index). For example, if you have an index that is set to rollover after 30 days and cold min_age set to 60 then there will be 30 days from index creation to rollover and then an additional 60 days before moving to cold tier. @@ -330,6 +510,12 @@ elasticsearch: global: True advanced: True helpLink: elasticsearch.html + allocate: + number_of_replicas: + description: Set the number of replicas. Remains the same as the previous phase by default. + forcedType: int + global: True + advanced: True delete: min_age: description: Minimum age of index. ex. 90d - This determines when the index should be deleted. It’s important to note that this is calculated relative to the rollover date (NOT the original creation date of the index). For example, if you have an index that is set to rollover after 30 days and delete min_age set to 90 then there will be 30 days from index creation to rollover and then an additional 90 days before deletion. diff --git a/salt/elasticsearch/template.map.jinja b/salt/elasticsearch/template.map.jinja index 414d8a6b4..904ab862c 100644 --- a/salt/elasticsearch/template.map.jinja +++ b/salt/elasticsearch/template.map.jinja @@ -61,5 +61,92 @@ {% do settings.index_template.template.settings.index.pop('sort') %} {% endif %} {% endif %} + +{# advanced ilm actions #} +{% if settings.policy is defined and settings.policy.phases is defined %} +{# start HOT actions #} +{# only run if hot action is defined for this index #} +{% if settings.policy.phases.hot is defined and settings.policy.phases.hot.actions is defined %} +{% set HA = settings.policy.phases.hot.actions %} +{% if HA.shrink is defined %} +{% if HA.shrink.method is defined %} +{% if HA.shrink.method == 'COUNT' and HA.shrink.number_of_shards is defined and HA.shrink.number_of_shards %} +{# remove max_primary_shard_size value when doing shrink operation by count vs size #} +{% do HA.shrink.pop('max_primary_shard_size', none) %} +{% elif HA.shrink.method == 'SIZE' and HA.shrink.max_primary_shard_size is defined and HA.shrink.max_primary_shard_size %} +{# remove number_of_shards value when doing shrink operation by size vs count #} +{% do HA.shrink.pop('number_of_shards', none) %} +{% else %} +{# method isn't defined or missing a required config number_of_shards/max_primary_shard_size #} +{% do HA.pop('shrink', none) %} +{% endif %} +{% endif %} +{% endif %} +{# always remove method since its only used for SOC config, not in the actual ilm policy #} +{% if HA.shrink is defined %} +{% do HA.shrink.pop('method', none) %} +{% endif %} +{# end shrink action #} +{# start force merge #} +{% if HA.forcemerge is defined %} +{% if HA.forcemerge.index_codec is defined and HA.forcemerge.index_codec %} +{% do HA.forcemerge.update({'index_codec': 'best_compression'}) %} +{% else %} +{% do HA.forcemerge.pop('index_codec', none) %} +{% endif %} +{% if HA.forcemerge.max_num_segments is defined and not HA.forcemerge.max_num_segments %} +{# max_num_segments is empty, drop it #} +{% do HA.pop('forcemerge', none) %} +{% endif %} +{% endif %} +{# end force merge #} +{% endif %} +{# end HOT actions #} +{# Start WARM actions #} +{# only run if warm action is defined for this index #} +{% if settings.policy.phases.warm is defined and settings.policy.phases.warm.actions is defined %} +{% set WA = settings.policy.phases.warm.actions %} +{# start warm shrink action #} +{% if WA.shrink is defined %} +{% if WA.shrink.method is defined %} +{% if WA.shrink.method == 'COUNT' and WA.shrink.number_of_shards is defined and WA.shrink.number_of_shards %} +{# remove max_primary_shard_size value when doing shrink operation by count vs size #} +{% do WA.shrink.pop('max_primary_shard_size', none) %} +{% elif WA.shrink.method == 'SIZE' and WA.shrink.max_primary_shard_size is defined and WA.shrink.max_primary_shard_size %} +{# remove number_of_shards value when doing shrink operation by size vs count #} +{% do WA.shrink.pop('number_of_shards', none) %} +{% else %} +{# method isn't defined or missing a required config number_of_shards/max_primary_shard_size #} +{% do WA.pop('shrink', none) %} +{% endif %} +{% endif %} +{% endif %} +{# always remove method since its only used for SOC config, not in the actual ilm policy #} +{% if WA.shrink is defined %} +{% do WA.shrink.pop('method', none) %} +{% endif %} +{# end shrink action #} +{# start force merge #} +{% if WA.forcemerge is defined %} +{% if WA.forcemerge.index_codec is defined and WA.forcemerge.index_codec %} +{% do WA.forcemerge.update({'index_codec': 'best_compression'}) %} +{% else %} +{% do WA.forcemerge.pop('index_codec', none) %} +{% endif %} +{% if WA.forcemerge.max_num_segments is defined and not WA.forcemerge.max_num_segments %} +{# max_num_segments is empty, drop it #} +{% do WA.pop('forcemerge', none) %} +{% endif %} +{% endif %} +{# end force merge #} +{% if WA.allocate is defined %} +{% if WA.allocate.number_of_replicas is defined and not WA.allocate.number_of_replicas %} +{% do WA.pop('allocate', none) %} +{% endif %} +{% endif %} +{% endif %} +{# end WARM actions #} +{% endif %} + {% do ES_INDEX_SETTINGS.update({index | replace("_x_", "."): ES_INDEX_SETTINGS_GLOBAL_OVERRIDES[index]}) %} {% endfor %} From bce7a20d8b61146d8c600253adcb8febec66dc55 Mon Sep 17 00:00:00 2001 From: reyesj2 <94730068+reyesj2@users.noreply.github.com> Date: Fri, 21 Nov 2025 14:19:51 -0600 Subject: [PATCH 04/17] soc configurable EA logstash output adv settings --- salt/elasticfleet/defaults.yaml | 8 +++++ salt/elasticfleet/soc_elasticfleet.yaml | 40 +++++++++++++++++++++++++ 2 files changed, 48 insertions(+) diff --git a/salt/elasticfleet/defaults.yaml b/salt/elasticfleet/defaults.yaml index 0f013e320..a3132d3f4 100644 --- a/salt/elasticfleet/defaults.yaml +++ b/salt/elasticfleet/defaults.yaml @@ -10,6 +10,14 @@ elasticfleet: grid_enrollment: '' defend_filters: enable_auto_configuration: False + outputs: + logstash: + bulk_max_size: '' + worker: '' + queue_mem_events: '' + timeout: '' + loadbalance: False + compression_level: '' subscription_integrations: False auto_upgrade_integrations: False logging: diff --git a/salt/elasticfleet/soc_elasticfleet.yaml b/salt/elasticfleet/soc_elasticfleet.yaml index 450e044e6..d7c324855 100644 --- a/salt/elasticfleet/soc_elasticfleet.yaml +++ b/salt/elasticfleet/soc_elasticfleet.yaml @@ -50,6 +50,46 @@ elasticfleet: global: True forcedType: bool helpLink: elastic-fleet.html + outputs: + logstash: + bulk_max_size: + description: The maximum number of events to bulk in a single Logstash request. + global: True + forcedType: int + advanced: True + helpLink: elastic-fleet.html + worker: + description: The number of workers per configured host publishing events. + global: True + forcedType: int + advanced: true + helpLink: elastic-fleet.html + queue_mem_events: + title: queued events + description: The number of events the queue can store. This value should be evenly divisible by the smaller of 'bulk_max_size' to avoid sending partial batches to the output. + global: True + forcedType: int + advanced: True + helpLink: elastic-fleet.html + timeout: + description: The number of seconds to wait for responses from the Logstash server before timing out. Eg 30s + regex: ^[0-9]+s$ + advanced: True + global: True + helpLink: elastic-fleet.html + loadbalance: + description: If true and multiple Logstash hosts are configured, the output plugin load balances published events onto all Logstash hosts. If false, the output plugin sends all events to one host (determined at random) and switches to another host if the selected one becomes unresponsive. + forcedType: bool + advanced: True + global: True + helpLink: elastic-fleet.html + compression: + description: The gzip compression level. The compression level must be in the range of 1 (best speed) to 9 (best compression). + regex: ^[1-9]$ + forcedType: int + advanced: True + global: True + helpLink: elastic-fleet.html server: custom_fqdn: description: Custom FQDN for Agents to connect to. One per line. From 4490ea763594b320315740d1435120cf993693f0 Mon Sep 17 00:00:00 2001 From: reyesj2 <94730068+reyesj2@users.noreply.github.com> Date: Fri, 21 Nov 2025 14:21:17 -0600 Subject: [PATCH 05/17] format EA logstash output adv config items --- salt/elasticfleet/config.map.jinja | 34 ++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) create mode 100644 salt/elasticfleet/config.map.jinja diff --git a/salt/elasticfleet/config.map.jinja b/salt/elasticfleet/config.map.jinja new file mode 100644 index 000000000..b95a3e895 --- /dev/null +++ b/salt/elasticfleet/config.map.jinja @@ -0,0 +1,34 @@ +{# Copyright Security Onion Solutions LLC and/or licensed to Security Onion Solutions LLC under one + or more contributor license agreements. Licensed under the Elastic License 2.0 as shown at + https://securityonion.net/license; you may not use this file except in compliance with the + Elastic License 2.0. #} + +{% from 'elasticfleet/map.jinja' import ELASTICFLEETMERGED %} + +{# advanced config_yaml options for elasticfleet logstash output #} +{% set ADV_OUTPUT_LOGSTASH_RAW = ELASTICFLEETMERGED.config.outputs.logstash %} +{% set ADV_OUTPUT_LOGSTASH = {} %} +{% for k, v in ADV_OUTPUT_LOGSTASH_RAW.items() %} +{% if v != "" and v is not none %} +{% if k == 'queue_mem_events' %} +{# rename queue_mem_events queue.mem.events #} +{% do ADV_OUTPUT_LOGSTASH.update({'queue.mem.events':v}) %} +{% elif k == 'loadbalance' %} +{% if v %} +{# only include loadbalance config when its True #} +{% do ADV_OUTPUT_LOGSTASH.update({k:v}) %} +{% endif %} +{% else %} +{% do ADV_OUTPUT_LOGSTASH.update({k:v}) %} +{% endif %} +{% endif %} +{% endfor %} + +{% set LOGSTASH_CONFIG_YAML_RAW = [] %} +{% if ADV_OUTPUT_LOGSTASH %} +{% for k, v in ADV_OUTPUT_LOGSTASH.items() %} +{% do LOGSTASH_CONFIG_YAML_RAW.append(k ~ ': ' ~ v) %} +{% endfor %} +{% endif %} + +{% set LOGSTASH_CONFIG_YAML = LOGSTASH_CONFIG_YAML_RAW | join('\\n') if LOGSTASH_CONFIG_YAML_RAW else '' %} From 1fb00c8eb696d91eb8c0e16ce96976c3ecdb298d Mon Sep 17 00:00:00 2001 From: reyesj2 <94730068+reyesj2@users.noreply.github.com> Date: Fri, 21 Nov 2025 14:22:42 -0600 Subject: [PATCH 06/17] update so-elastic-fleet-outputs-update to use advanced output options when set, else empty "". Also trigger update_logstash_outputs() when hash of config_yaml has changed --- .../so-elastic-fleet-outputs-update | 23 ++++++++++++++----- 1 file changed, 17 insertions(+), 6 deletions(-) diff --git a/salt/elasticfleet/tools/sbin_jinja/so-elastic-fleet-outputs-update b/salt/elasticfleet/tools/sbin_jinja/so-elastic-fleet-outputs-update index 9efe8a19d..de9b5f93f 100644 --- a/salt/elasticfleet/tools/sbin_jinja/so-elastic-fleet-outputs-update +++ b/salt/elasticfleet/tools/sbin_jinja/so-elastic-fleet-outputs-update @@ -3,11 +3,13 @@ # Copyright Security Onion Solutions LLC and/or licensed to Security Onion Solutions LLC under one # or more contributor license agreements. Licensed under the Elastic License 2.0; you may not use # this file except in compliance with the Elastic License 2.0. -{% from 'vars/globals.map.jinja' import GLOBALS %} -{% from 'elasticfleet/map.jinja' import ELASTICFLEETMERGED %} +{%- from 'vars/globals.map.jinja' import GLOBALS %} +{%- from 'elasticfleet/map.jinja' import ELASTICFLEETMERGED %} +{%- from 'elasticfleet/config.map.jinja' import LOGSTASH_CONFIG_YAML %} . /usr/sbin/so-common +FORCE_UPDATE=false # Only run on Managers if ! is_manager_node; then printf "Not a Manager Node... Exiting" @@ -22,7 +24,7 @@ function update_logstash_outputs() { --arg UPDATEDLIST "$NEW_LIST_JSON" \ --argjson SECRETS "$SECRETS" \ --argjson SSL_CONFIG "$SSL_CONFIG" \ - '{"name":"grid-logstash","type":"logstash","hosts": $UPDATEDLIST,"is_default":true,"is_default_monitoring":true,"config_yaml":"","ssl": $SSL_CONFIG,"secrets": $SECRETS}') + '{"name":"grid-logstash","type":"logstash","hosts": $UPDATEDLIST,"is_default":true,"is_default_monitoring":true,"config_yaml":"{{ LOGSTASH_CONFIG_YAML }}","ssl": $SSL_CONFIG,"secrets": $SECRETS}') else JSON_STRING=$(jq -n \ --arg UPDATEDLIST "$NEW_LIST_JSON" \ @@ -97,9 +99,18 @@ function update_kafka_outputs() { exit 1 fi + CURRENT_LOGSTASH_ADV_CONFIG=$(jq -r '.item.config_yaml // ""' <<< "$RAW_JSON") + CURRENT_LOGSTASH_ADV_CONFIG_HASH=$(sha256sum <<< "$CURRENT_LOGSTASH_ADV_CONFIG" | awk '{print $1}') + NEW_LOGSTASH_ADV_CONFIG=$'{{ LOGSTASH_CONFIG_YAML }}' + NEW_LOGSTASH_ADV_CONFIG_HASH=$(sha256sum <<< "$NEW_LOGSTASH_ADV_CONFIG" | awk '{print $1}') + + if [ "$CURRENT_LOGSTASH_ADV_CONFIG_HASH" != "$NEW_LOGSTASH_ADV_CONFIG_HASH" ]; then + FORCE_UPDATE=true + fi + # Get the current list of Logstash outputs & hash them CURRENT_LIST=$(jq -c -r '.item.hosts' <<< "$RAW_JSON") - CURRENT_HASH=$(sha1sum <<< "$CURRENT_LIST" | awk '{print $1}') + CURRENT_HASH=$(sha256sum <<< "$CURRENT_LIST" | awk '{print $1}') declare -a NEW_LIST=() @@ -148,10 +159,10 @@ function update_kafka_outputs() { # Sort & hash the new list of Logstash Outputs NEW_LIST_JSON=$(jq --compact-output --null-input '$ARGS.positional' --args -- "${NEW_LIST[@]}") -NEW_HASH=$(sha1sum <<< "$NEW_LIST_JSON" | awk '{print $1}') +NEW_HASH=$(sha256sum <<< "$NEW_LIST_JSON" | awk '{print $1}') # Compare the current & new list of outputs - if different, update the Logstash outputs -if [ "$NEW_HASH" = "$CURRENT_HASH" ]; then +if [[ "$NEW_HASH" = "$CURRENT_HASH" ]] && [[ "$FORCE_UPDATE" != "true" ]]; then printf "\nHashes match - no update needed.\n" printf "Current List: $CURRENT_LIST\nNew List: $NEW_LIST_JSON\n" From 99cb51482fb1b03c32af7375ab6c1b57517b2687 Mon Sep 17 00:00:00 2001 From: reyesj2 <94730068+reyesj2@users.noreply.github.com> Date: Fri, 21 Nov 2025 14:32:58 -0600 Subject: [PATCH 07/17] unneeded 'set' --- salt/manager/managed_soc_annotations.sls | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/salt/manager/managed_soc_annotations.sls b/salt/manager/managed_soc_annotations.sls index 64f45d40d..4357b53a2 100644 --- a/salt/manager/managed_soc_annotations.sls +++ b/salt/manager/managed_soc_annotations.sls @@ -25,11 +25,11 @@ {% set index_settings = es.get('index_settings', {}) %} {% set input = index_settings.get('so-logs', {}) %} {% for k in matched_integration_names %} - {% set _ = index_settings.update({k: input}) %} + {% do index_settings.update({k: input}) %} {% endfor %} {% for k in addon_integration_keys %} {% if k not in matched_integration_names and k in index_settings %} - {% set _ = index_settings.pop(k) %} + {% do index_settings.pop(k) %} {% endif %} {% endfor %} {{ data }} @@ -44,11 +44,11 @@ {% set index_settings = es.get('index_settings', {}) %} {% for k in matched_integration_names %} {% set input = ADDON_INTEGRATION_DEFAULTS[k] %} - {% set _ = index_settings.update({k: input})%} + {% do index_settings.update({k: input})%} {% endfor %} {% for k in addon_integration_keys %} {% if k not in matched_integration_names and k in index_settings %} - {% set _ = index_settings.pop(k) %} + {% do index_settings.pop(k) %} {% endif %} {% endfor %} {{ data }} From b80ec95fa8e16345b356d40726bd1ce41d1aeef9 Mon Sep 17 00:00:00 2001 From: reyesj2 <94730068+reyesj2@users.noreply.github.com> Date: Fri, 21 Nov 2025 14:41:03 -0600 Subject: [PATCH 08/17] update regex, revert to default will allow setting value back to '' | None --- salt/elasticsearch/soc_elasticsearch.yaml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/salt/elasticsearch/soc_elasticsearch.yaml b/salt/elasticsearch/soc_elasticsearch.yaml index 27d5654b5..7fd4f8329 100644 --- a/salt/elasticsearch/soc_elasticsearch.yaml +++ b/salt/elasticsearch/soc_elasticsearch.yaml @@ -149,7 +149,7 @@ elasticsearch: max_primary_shard_size: title: max shard size description: Desired shard size in gb/tb/pb eg. 100gb. Note that this value is only used when the shrink method selected is 'SIZE'. - regex: ^(?:[0-9]+(?:gb|tb|pb)|)$ + regex: ^[0-9]+(?:gb|tb|pb)$ global: True forcedType: string advanced: True @@ -222,7 +222,7 @@ elasticsearch: max_primary_shard_size: title: max shard size description: Desired shard size in gb/tb/pb eg. 100gb. Note that this value is only used when the shrink method selected is 'SIZE'. - regex: ^(?:[0-9]+(?:gb|tb|pb)|)$ + regex: ^[0-9]+(?:gb|tb|pb)$ global: True forcedType: string advanced: True @@ -398,7 +398,7 @@ elasticsearch: max_primary_shard_size: title: max shard size description: Desired shard size in gb/tb/pb eg. 100gb. Note that this value is only used when the shrink method selected is 'SIZE'. - regex: ^(?:[0-9]+(?:gb|tb|pb)|)$ + regex: ^[0-9]+(?:gb|tb|pb)$ global: True forcedType: string advanced: True @@ -465,7 +465,7 @@ elasticsearch: max_primary_shard_size: title: max shard size description: Desired shard size in gb/tb/pb eg. 100gb. Note that this value is only used when the shrink method selected is 'SIZE'. - regex: ^(?:[0-9]+(?:gb|tb|pb)|)$ + regex: ^[0-9]+(?:gb|tb|pb)$ global: True forcedType: string advanced: True From 415ea07a4ff43e73242c5f121797758921fb976b Mon Sep 17 00:00:00 2001 From: reyesj2 <94730068+reyesj2@users.noreply.github.com> Date: Fri, 21 Nov 2025 16:04:26 -0600 Subject: [PATCH 09/17] clean up --- salt/elasticsearch/template.map.jinja | 113 +++++++++----------------- 1 file changed, 38 insertions(+), 75 deletions(-) diff --git a/salt/elasticsearch/template.map.jinja b/salt/elasticsearch/template.map.jinja index 904ab862c..659823df8 100644 --- a/salt/elasticsearch/template.map.jinja +++ b/salt/elasticsearch/template.map.jinja @@ -64,88 +64,51 @@ {# advanced ilm actions #} {% if settings.policy is defined and settings.policy.phases is defined %} -{# start HOT actions #} -{# only run if hot action is defined for this index #} -{% if settings.policy.phases.hot is defined and settings.policy.phases.hot.actions is defined %} -{% set HA = settings.policy.phases.hot.actions %} -{% if HA.shrink is defined %} -{% if HA.shrink.method is defined %} -{% if HA.shrink.method == 'COUNT' and HA.shrink.number_of_shards is defined and HA.shrink.number_of_shards %} -{# remove max_primary_shard_size value when doing shrink operation by count vs size #} -{% do HA.shrink.pop('max_primary_shard_size', none) %} -{% elif HA.shrink.method == 'SIZE' and HA.shrink.max_primary_shard_size is defined and HA.shrink.max_primary_shard_size %} -{# remove number_of_shards value when doing shrink operation by size vs count #} -{% do HA.shrink.pop('number_of_shards', none) %} -{% else %} -{# method isn't defined or missing a required config number_of_shards/max_primary_shard_size #} -{% do HA.pop('shrink', none) %} +{% set PHASE_NAMES = ["hot", "warm", "cold"] %} +{% for P in PHASE_NAMES %} +{% if settings.policy.phases[P] is defined and settings.policy.phases[P].actions is defined %} +{% set PHASE = settings.policy.phases[P].actions %} +{# remove allocate action if number_of_replicas isn't configured #} +{% if PHASE.allocate is defined %} +{% if PHASE.allocate.number_of_replicas is defined and not PHASE.allocate.number_of_replicas %} +{% do PHASE.pop('allocate', none) %} {% endif %} {% endif %} -{% endif %} -{# always remove method since its only used for SOC config, not in the actual ilm policy #} -{% if HA.shrink is defined %} -{% do HA.shrink.pop('method', none) %} -{% endif %} -{# end shrink action #} -{# start force merge #} -{% if HA.forcemerge is defined %} -{% if HA.forcemerge.index_codec is defined and HA.forcemerge.index_codec %} -{% do HA.forcemerge.update({'index_codec': 'best_compression'}) %} -{% else %} -{% do HA.forcemerge.pop('index_codec', none) %} -{% endif %} -{% if HA.forcemerge.max_num_segments is defined and not HA.forcemerge.max_num_segments %} -{# max_num_segments is empty, drop it #} -{% do HA.pop('forcemerge', none) %} -{% endif %} -{% endif %} -{# end force merge #} -{% endif %} -{# end HOT actions #} -{# Start WARM actions #} -{# only run if warm action is defined for this index #} -{% if settings.policy.phases.warm is defined and settings.policy.phases.warm.actions is defined %} -{% set WA = settings.policy.phases.warm.actions %} -{# start warm shrink action #} -{% if WA.shrink is defined %} -{% if WA.shrink.method is defined %} -{% if WA.shrink.method == 'COUNT' and WA.shrink.number_of_shards is defined and WA.shrink.number_of_shards %} -{# remove max_primary_shard_size value when doing shrink operation by count vs size #} -{% do WA.shrink.pop('max_primary_shard_size', none) %} -{% elif WA.shrink.method == 'SIZE' and WA.shrink.max_primary_shard_size is defined and WA.shrink.max_primary_shard_size %} -{# remove number_of_shards value when doing shrink operation by size vs count #} -{% do WA.shrink.pop('number_of_shards', none) %} -{% else %} -{# method isn't defined or missing a required config number_of_shards/max_primary_shard_size #} -{% do WA.pop('shrink', none) %} +{# start shrink action #} +{% if PHASE.shrink is defined %} +{% if PHASE.shrink.method is defined %} +{% if PHASE.shrink.method == 'COUNT' and PHASE.shrink.number_of_shards is defined and PHASE.shrink.number_of_shards %} +{# remove max_primary_shard_size value when doing shrink operation by count vs size #} +{% do PHASE.shrink.pop('max_primary_shard_size', none) %} +{% elif PHASE.shrink.method == 'SIZE' and PHASE.shrink.max_primary_shard_size is defined and PHASE.shrink.max_primary_shard_size %} +{# remove number_of_shards value when doing shrink operation by size vs count #} +{% do PHASE.shrink.pop('number_of_shards', none) %} +{% else %} +{# method isn't defined or missing a required config number_of_shards/max_primary_shard_size #} +{% do PHASE.pop('shrink', none) %} +{% endif %} {% endif %} {% endif %} -{% endif %} -{# always remove method since its only used for SOC config, not in the actual ilm policy #} -{% if WA.shrink is defined %} -{% do WA.shrink.pop('method', none) %} -{% endif %} -{# end shrink action #} -{# start force merge #} -{% if WA.forcemerge is defined %} -{% if WA.forcemerge.index_codec is defined and WA.forcemerge.index_codec %} -{% do WA.forcemerge.update({'index_codec': 'best_compression'}) %} -{% else %} -{% do WA.forcemerge.pop('index_codec', none) %} +{# always remove shrink method since its only used for SOC config, not in the actual ilm policy #} +{% if PHASE.shrink is defined %} +{% do PHASE.shrink.pop('method', none) %} {% endif %} -{% if WA.forcemerge.max_num_segments is defined and not WA.forcemerge.max_num_segments %} -{# max_num_segments is empty, drop it #} -{% do WA.pop('forcemerge', none) %} +{# end shrink action #} +{# start force merge #} +{% if PHASE.forcemerge is defined %} +{% if PHASE.forcemerge.index_codec is defined and PHASE.forcemerge.index_codec %} +{% do PHASE.forcemerge.update({'index_codec': 'best_compression'}) %} +{% else %} +{% do PHASE.forcemerge.pop('index_codec', none) %} +{% endif %} +{% if PHASE.forcemerge.max_num_segments is defined and not PHASE.forcemerge.max_num_segments %} +{# max_num_segments is empty, drop it #} +{% do PHASE.pop('forcemerge', none) %} +{% endif %} {% endif %} +{# end force merge #} {% endif %} -{# end force merge #} -{% if WA.allocate is defined %} -{% if WA.allocate.number_of_replicas is defined and not WA.allocate.number_of_replicas %} -{% do WA.pop('allocate', none) %} -{% endif %} -{% endif %} -{% endif %} -{# end WARM actions #} +{% endfor %} {% endif %} {% do ES_INDEX_SETTINGS.update({index | replace("_x_", "."): ES_INDEX_SETTINGS_GLOBAL_OVERRIDES[index]}) %} From 3339b50dafbb08ba503bd842f41339c99ce484f8 Mon Sep 17 00:00:00 2001 From: reyesj2 <94730068+reyesj2@users.noreply.github.com> Date: Fri, 21 Nov 2025 16:39:45 -0600 Subject: [PATCH 10/17] drop forcemerge when max_num_segements doesn't exist or empty --- salt/elasticsearch/template.map.jinja | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/salt/elasticsearch/template.map.jinja b/salt/elasticsearch/template.map.jinja index 659823df8..b726445ed 100644 --- a/salt/elasticsearch/template.map.jinja +++ b/salt/elasticsearch/template.map.jinja @@ -101,7 +101,7 @@ {% else %} {% do PHASE.forcemerge.pop('index_codec', none) %} {% endif %} -{% if PHASE.forcemerge.max_num_segments is defined and not PHASE.forcemerge.max_num_segments %} +{% if PHASE.forcemerge.max_num_segments is not defined or not PHASE.forcemerge.max_num_segments %} {# max_num_segments is empty, drop it #} {% do PHASE.pop('forcemerge', none) %} {% endif %} From cc8fb96047544a62f73b357fd86b70fb0e8fe5d2 Mon Sep 17 00:00:00 2001 From: reyesj2 <94730068+reyesj2@users.noreply.github.com> Date: Mon, 24 Nov 2025 11:12:09 -0600 Subject: [PATCH 11/17] valid config for number_of_replicas in allocate action includes 0 --- salt/elasticsearch/template.map.jinja | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/salt/elasticsearch/template.map.jinja b/salt/elasticsearch/template.map.jinja index b726445ed..2563f8e23 100644 --- a/salt/elasticsearch/template.map.jinja +++ b/salt/elasticsearch/template.map.jinja @@ -70,7 +70,7 @@ {% set PHASE = settings.policy.phases[P].actions %} {# remove allocate action if number_of_replicas isn't configured #} {% if PHASE.allocate is defined %} -{% if PHASE.allocate.number_of_replicas is defined and not PHASE.allocate.number_of_replicas %} +{% if PHASE.allocate.number_of_replicas is not defined or PHASE.allocate.number_of_replicas == "" %} {% do PHASE.pop('allocate', none) %} {% endif %} {% endif %} From 45a8c0acd1d0ac6d68aafc23d7298748746f6770 Mon Sep 17 00:00:00 2001 From: reyesj2 <94730068+reyesj2@users.noreply.github.com> Date: Tue, 2 Dec 2025 11:16:08 -0600 Subject: [PATCH 12/17] merge 2.4/dev --- salt/_runners/setup_hypervisor.py | 175 ++++++++++++----- salt/elasticfleet/enabled.sls | 10 + .../so-elastic-fleet-outputs-update | 63 +++++- .../files/ingest/suricata.common | 183 +++++++++++++++--- .../templates/component/ecs/suricata.json | 4 + .../tools/sbin_jinja/so-kvm-create-volume | 15 +- .../engines/master/virtual_node_manager.py | 3 +- .../hypervisor/soc_hypervisor.yaml.jinja | 20 +- salt/zeek/files/config.zeek.ja4 | 2 + 9 files changed, 381 insertions(+), 94 deletions(-) diff --git a/salt/_runners/setup_hypervisor.py b/salt/_runners/setup_hypervisor.py index 929801783..182a9b2c8 100644 --- a/salt/_runners/setup_hypervisor.py +++ b/salt/_runners/setup_hypervisor.py @@ -172,7 +172,15 @@ MANAGER_HOSTNAME = socket.gethostname() def _download_image(): """ - Download and validate the Oracle Linux KVM image. + Download and validate the Oracle Linux KVM image with retry logic and progress monitoring. + + Features: + - Detects stalled downloads (no progress for 30 seconds) + - Retries up to 3 times on failure + - Connection timeout of 30 seconds + - Read timeout of 60 seconds + - Cleans up partial downloads on failure + Returns: bool: True if successful or file exists with valid checksum, False on error """ @@ -185,45 +193,107 @@ def _download_image(): os.unlink(IMAGE_PATH) log.info("Starting image download process") + + # Retry configuration + max_attempts = 3 + retry_delay = 5 # seconds to wait between retry attempts + stall_timeout = 30 # seconds without progress before considering download stalled + connection_timeout = 30 # seconds to establish connection + read_timeout = 60 # seconds to wait for data chunks + + for attempt in range(1, max_attempts + 1): + log.info("Download attempt %d of %d", attempt, max_attempts) + + try: + # Download file with timeouts + log.info("Downloading Oracle Linux KVM image from %s to %s", IMAGE_URL, IMAGE_PATH) + response = requests.get( + IMAGE_URL, + stream=True, + timeout=(connection_timeout, read_timeout) + ) + response.raise_for_status() - try: - # Download file - log.info("Downloading Oracle Linux KVM image from %s to %s", IMAGE_URL, IMAGE_PATH) - response = requests.get(IMAGE_URL, stream=True) - response.raise_for_status() + # Get total file size for progress tracking + total_size = int(response.headers.get('content-length', 0)) + downloaded_size = 0 + last_log_time = 0 + last_progress_time = time.time() + last_downloaded_size = 0 - # Get total file size for progress tracking - total_size = int(response.headers.get('content-length', 0)) - downloaded_size = 0 - last_log_time = 0 + # Save file with progress logging and stall detection + with salt.utils.files.fopen(IMAGE_PATH, 'wb') as f: + for chunk in response.iter_content(chunk_size=8192): + if chunk: # filter out keep-alive new chunks + f.write(chunk) + downloaded_size += len(chunk) + current_time = time.time() + + # Check for stalled download + if downloaded_size > last_downloaded_size: + # Progress made, reset stall timer + last_progress_time = current_time + last_downloaded_size = downloaded_size + elif current_time - last_progress_time > stall_timeout: + # No progress for stall_timeout seconds + raise Exception( + f"Download stalled: no progress for {stall_timeout} seconds " + f"at {downloaded_size}/{total_size} bytes" + ) + + # Log progress every second + if current_time - last_log_time >= 1: + progress = (downloaded_size / total_size) * 100 if total_size > 0 else 0 + log.info("Progress - %.1f%% (%d/%d bytes)", + progress, downloaded_size, total_size) + last_log_time = current_time - # Save file with progress logging - with salt.utils.files.fopen(IMAGE_PATH, 'wb') as f: - for chunk in response.iter_content(chunk_size=8192): - f.write(chunk) - downloaded_size += len(chunk) + # Validate downloaded file + log.info("Download complete, validating checksum...") + if not _validate_image_checksum(IMAGE_PATH, IMAGE_SHA256): + log.error("Checksum validation failed on attempt %d", attempt) + os.unlink(IMAGE_PATH) + if attempt < max_attempts: + log.info("Will retry download...") + continue + else: + log.error("All download attempts failed due to checksum mismatch") + return False + + log.info("Successfully downloaded and validated Oracle Linux KVM image") + return True + + except requests.exceptions.Timeout as e: + log.error("Download attempt %d failed: Timeout - %s", attempt, str(e)) + if os.path.exists(IMAGE_PATH): + os.unlink(IMAGE_PATH) + if attempt < max_attempts: + log.info("Will retry download in %d seconds...", retry_delay) + time.sleep(retry_delay) + else: + log.error("All download attempts failed due to timeout") - # Log progress every second - current_time = time.time() - if current_time - last_log_time >= 1: - progress = (downloaded_size / total_size) * 100 if total_size > 0 else 0 - log.info("Progress - %.1f%% (%d/%d bytes)", - progress, downloaded_size, total_size) - last_log_time = current_time - - # Validate downloaded file - if not _validate_image_checksum(IMAGE_PATH, IMAGE_SHA256): - os.unlink(IMAGE_PATH) - return False - - log.info("Successfully downloaded and validated Oracle Linux KVM image") - return True - - except Exception as e: - log.error("Error downloading hypervisor image: %s", str(e)) - if os.path.exists(IMAGE_PATH): - os.unlink(IMAGE_PATH) - return False + except requests.exceptions.RequestException as e: + log.error("Download attempt %d failed: Network error - %s", attempt, str(e)) + if os.path.exists(IMAGE_PATH): + os.unlink(IMAGE_PATH) + if attempt < max_attempts: + log.info("Will retry download in %d seconds...", retry_delay) + time.sleep(retry_delay) + else: + log.error("All download attempts failed due to network errors") + + except Exception as e: + log.error("Download attempt %d failed: %s", attempt, str(e)) + if os.path.exists(IMAGE_PATH): + os.unlink(IMAGE_PATH) + if attempt < max_attempts: + log.info("Will retry download in %d seconds...", retry_delay) + time.sleep(retry_delay) + else: + log.error("All download attempts failed") + + return False def _check_ssh_keys_exist(): """ @@ -419,25 +489,28 @@ def _ensure_hypervisor_host_dir(minion_id: str = None): log.error(f"Error creating hypervisor host directory: {str(e)}") return False -def _apply_dyanno_hypervisor_state(): +def _apply_dyanno_hypervisor_state(status): """ Apply the soc.dyanno.hypervisor state on the salt master. This function applies the soc.dyanno.hypervisor state on the salt master to update the hypervisor annotation and ensure all hypervisor host directories exist. + Args: + status: Status passed to the hypervisor annotation state + Returns: bool: True if state was applied successfully, False otherwise """ try: - log.info("Applying soc.dyanno.hypervisor state on salt master") + log.info(f"Applying soc.dyanno.hypervisor state on salt master with status: {status}") # Initialize the LocalClient local = salt.client.LocalClient() # Target the salt master to apply the soc.dyanno.hypervisor state target = MANAGER_HOSTNAME + '_*' - state_result = local.cmd(target, 'state.apply', ['soc.dyanno.hypervisor', "pillar={'baseDomain': {'status': 'PreInit'}}", 'concurrent=True'], tgt_type='glob') + state_result = local.cmd(target, 'state.apply', ['soc.dyanno.hypervisor', f"pillar={{'baseDomain': {{'status': '{status}'}}}}", 'concurrent=True'], tgt_type='glob') log.debug(f"state_result: {state_result}") # Check if state was applied successfully if state_result: @@ -454,17 +527,17 @@ def _apply_dyanno_hypervisor_state(): success = False if success: - log.info("Successfully applied soc.dyanno.hypervisor state") + log.info(f"Successfully applied soc.dyanno.hypervisor state with status: {status}") return True else: - log.error("Failed to apply soc.dyanno.hypervisor state") + log.error(f"Failed to apply soc.dyanno.hypervisor state with status: {status}") return False else: - log.error("No response from salt master when applying soc.dyanno.hypervisor state") + log.error(f"No response from salt master when applying soc.dyanno.hypervisor state with status: {status}") return False except Exception as e: - log.error(f"Error applying soc.dyanno.hypervisor state: {str(e)}") + log.error(f"Error applying soc.dyanno.hypervisor state with status: {status}: {str(e)}") return False def _apply_cloud_config_state(): @@ -598,11 +671,6 @@ def setup_environment(vm_name: str = 'sool9', disk_size: str = '220G', minion_id log.warning("Failed to apply salt.cloud.config state, continuing with setup") # We don't return an error here as we want to continue with the setup process - # Apply the soc.dyanno.hypervisor state on the salt master - if not _apply_dyanno_hypervisor_state(): - log.warning("Failed to apply soc.dyanno.hypervisor state, continuing with setup") - # We don't return an error here as we want to continue with the setup process - log.info("Starting setup_environment in setup_hypervisor runner") # Check if environment is already set up @@ -616,9 +684,12 @@ def setup_environment(vm_name: str = 'sool9', disk_size: str = '220G', minion_id # Handle image setup if needed if not image_valid: + _apply_dyanno_hypervisor_state('ImageDownloadStart') log.info("Starting image download/validation process") if not _download_image(): log.error("Image download failed") + # Update hypervisor annotation with failure status + _apply_dyanno_hypervisor_state('ImageDownloadFailed') return { 'success': False, 'error': 'Image download failed', @@ -631,6 +702,8 @@ def setup_environment(vm_name: str = 'sool9', disk_size: str = '220G', minion_id log.info("Setting up SSH keys") if not _setup_ssh_keys(): log.error("SSH key setup failed") + # Update hypervisor annotation with failure status + _apply_dyanno_hypervisor_state('SSHKeySetupFailed') return { 'success': False, 'error': 'SSH key setup failed', @@ -655,6 +728,12 @@ def setup_environment(vm_name: str = 'sool9', disk_size: str = '220G', minion_id success = vm_result.get('success', False) log.info("Setup environment completed with status: %s", "SUCCESS" if success else "FAILED") + # Update hypervisor annotation with success status + if success: + _apply_dyanno_hypervisor_state('PreInit') + else: + _apply_dyanno_hypervisor_state('SetupFailed') + # If setup was successful and we have a minion_id, run highstate if success and minion_id: log.info("Running highstate on hypervisor %s", minion_id) diff --git a/salt/elasticfleet/enabled.sls b/salt/elasticfleet/enabled.sls index cef47168f..ec8c8337e 100644 --- a/salt/elasticfleet/enabled.sls +++ b/salt/elasticfleet/enabled.sls @@ -32,6 +32,16 @@ so-elastic-fleet-auto-configure-logstash-outputs: - retry: attempts: 4 interval: 30 + +{# Separate from above in order to catch elasticfleet-logstash.crt changes and force update to fleet output policy #} +so-elastic-fleet-auto-configure-logstash-outputs-force: + cmd.run: + - name: /usr/sbin/so-elastic-fleet-outputs-update --force --certs + - retry: + attempts: 4 + interval: 30 + - onchanges: + - x509: etc_elasticfleet_logstash_crt {% endif %} # If enabled, automatically update Fleet Server URLs & ES Connection diff --git a/salt/elasticfleet/tools/sbin_jinja/so-elastic-fleet-outputs-update b/salt/elasticfleet/tools/sbin_jinja/so-elastic-fleet-outputs-update index de9b5f93f..c64d022a4 100644 --- a/salt/elasticfleet/tools/sbin_jinja/so-elastic-fleet-outputs-update +++ b/salt/elasticfleet/tools/sbin_jinja/so-elastic-fleet-outputs-update @@ -10,6 +10,26 @@ . /usr/sbin/so-common FORCE_UPDATE=false +UPDATE_CERTS=false + +while [[ $# -gt 0 ]]; do + case $1 in + -f|--force) + FORCE_UPDATE=true + shift + ;; + -c| --certs) + UPDATE_CERTS=true + shift + ;; + *) + echo "Unknown option $1" + echo "Usage: $0 [-f|--force] [-c|--certs]" + exit 1 + ;; + esac +done + # Only run on Managers if ! is_manager_node; then printf "Not a Manager Node... Exiting" @@ -19,17 +39,42 @@ fi function update_logstash_outputs() { if logstash_policy=$(curl -K /opt/so/conf/elasticsearch/curl.config -L "http://localhost:5601/api/fleet/outputs/so-manager_logstash" --retry 3 --retry-delay 10 --fail 2>/dev/null); then SSL_CONFIG=$(echo "$logstash_policy" | jq -r '.item.ssl') + LOGSTASHKEY=$(openssl rsa -in /etc/pki/elasticfleet-logstash.key) + LOGSTASHCRT=$(openssl x509 -in /etc/pki/elasticfleet-logstash.crt) + LOGSTASHCA=$(openssl x509 -in /etc/pki/tls/certs/intca.crt) if SECRETS=$(echo "$logstash_policy" | jq -er '.item.secrets' 2>/dev/null); then - JSON_STRING=$(jq -n \ - --arg UPDATEDLIST "$NEW_LIST_JSON" \ - --argjson SECRETS "$SECRETS" \ - --argjson SSL_CONFIG "$SSL_CONFIG" \ - '{"name":"grid-logstash","type":"logstash","hosts": $UPDATEDLIST,"is_default":true,"is_default_monitoring":true,"config_yaml":"{{ LOGSTASH_CONFIG_YAML }}","ssl": $SSL_CONFIG,"secrets": $SECRETS}') + if [[ "$UPDATE_CERTS" != "true" ]]; then + # Reuse existing secret + JSON_STRING=$(jq -n \ + --arg UPDATEDLIST "$NEW_LIST_JSON" \ + --argjson SECRETS "$SECRETS" \ + --argjson SSL_CONFIG "$SSL_CONFIG" \ + '{"name":"grid-logstash","type":"logstash","hosts": $UPDATEDLIST,"is_default":true,"is_default_monitoring":true,"config_yaml":"{{ LOGSTASH_CONFIG_YAML }}","ssl": $SSL_CONFIG,"secrets": $SECRETS}') + else + # Update certs, creating new secret + JSON_STRING=$(jq -n \ + --arg UPDATEDLIST "$NEW_LIST_JSON" \ + --arg LOGSTASHKEY "$LOGSTASHKEY" \ + --arg LOGSTASHCRT "$LOGSTASHCRT" \ + --arg LOGSTASHCA "$LOGSTASHCA" \ + '{"name":"grid-logstash","type":"logstash","hosts": $UPDATEDLIST,"is_default":true,"is_default_monitoring":true,"config_yaml":"{{ LOGSTASH_CONFIG_YAML }}","ssl": {"certificate": $LOGSTASHCRT,"certificate_authorities":[ $LOGSTASHCA ]},"secrets": {"ssl":{"key": $LOGSTASHKEY }}}') + fi else - JSON_STRING=$(jq -n \ - --arg UPDATEDLIST "$NEW_LIST_JSON" \ - --argjson SSL_CONFIG "$SSL_CONFIG" \ - '{"name":"grid-logstash","type":"logstash","hosts": $UPDATEDLIST,"is_default":true,"is_default_monitoring":true,"config_yaml":"","ssl": $SSL_CONFIG}') + if [[ "$UPDATE_CERTS" != "true" ]]; then + # Reuse existing ssl config + JSON_STRING=$(jq -n \ + --arg UPDATEDLIST "$NEW_LIST_JSON" \ + --argjson SSL_CONFIG "$SSL_CONFIG" \ + '{"name":"grid-logstash","type":"logstash","hosts": $UPDATEDLIST,"is_default":true,"is_default_monitoring":true,"config_yaml":"{{ LOGSTASH_CONFIG_YAML }}","ssl": $SSL_CONFIG}') + else + # Update ssl config + JSON_STRING=$(jq -n \ + --arg UPDATEDLIST "$NEW_LIST_JSON" \ + --arg LOGSTASHKEY "$LOGSTASHKEY" \ + --arg LOGSTASHCRT "$LOGSTASHCRT" \ + --arg LOGSTASHCA "$LOGSTASHCA" \ + '{"name":"grid-logstash","type":"logstash","hosts": $UPDATEDLIST,"is_default":true,"is_default_monitoring":true,"config_yaml":"{{ LOGSTASH_CONFIG_YAML }}","ssl": {"certificate": $LOGSTASHCRT,"key": $LOGSTASHKEY,"certificate_authorities":[ $LOGSTASHCA ]}}') + fi fi fi diff --git a/salt/elasticsearch/files/ingest/suricata.common b/salt/elasticsearch/files/ingest/suricata.common index 102b5dac8..7b2dc7eeb 100644 --- a/salt/elasticsearch/files/ingest/suricata.common +++ b/salt/elasticsearch/files/ingest/suricata.common @@ -1,30 +1,155 @@ { - "description" : "suricata.common", - "processors" : [ - { "json": { "field": "message", "target_field": "message2", "ignore_failure": true } }, - { "rename": { "field": "message2.pkt_src", "target_field": "network.packet_source","ignore_failure": true } }, - { "rename": { "field": "message2.proto", "target_field": "network.transport", "ignore_failure": true } }, - { "rename": { "field": "message2.in_iface", "target_field": "observer.ingress.interface.name", "ignore_failure": true } }, - { "rename": { "field": "message2.flow_id", "target_field": "log.id.uid", "ignore_failure": true } }, - { "rename": { "field": "message2.src_ip", "target_field": "source.ip", "ignore_failure": true } }, - { "rename": { "field": "message2.src_port", "target_field": "source.port", "ignore_failure": true } }, - { "rename": { "field": "message2.dest_ip", "target_field": "destination.ip", "ignore_failure": true } }, - { "rename": { "field": "message2.dest_port", "target_field": "destination.port", "ignore_failure": true } }, - { "rename": { "field": "message2.vlan", "target_field": "network.vlan.id", "ignore_failure": true } }, - { "rename": { "field": "message2.community_id", "target_field": "network.community_id", "ignore_missing": true } }, - { "rename": { "field": "message2.xff", "target_field": "xff.ip", "ignore_missing": true } }, - { "set": { "field": "event.dataset", "value": "{{ message2.event_type }}" } }, - { "set": { "field": "observer.name", "value": "{{agent.name}}" } }, - { "set": { "field": "event.ingested", "value": "{{@timestamp}}" } }, - { "date": { "field": "message2.timestamp", "target_field": "@timestamp", "formats": ["ISO8601", "UNIX"], "timezone": "UTC", "ignore_failure": true } }, - { "remove":{ "field": "agent", "ignore_failure": true } }, - {"append":{"field":"related.ip","value":["{{source.ip}}","{{destination.ip}}"],"allow_duplicates":false,"ignore_failure":true}}, - { - "script": { - "source": "boolean isPrivate(def ip) { if (ip == null) return false; int dot1 = ip.indexOf('.'); if (dot1 == -1) return false; int dot2 = ip.indexOf('.', dot1 + 1); if (dot2 == -1) return false; int first = Integer.parseInt(ip.substring(0, dot1)); if (first == 10) return true; if (first == 192 && ip.startsWith('168.', dot1 + 1)) return true; if (first == 172) { int second = Integer.parseInt(ip.substring(dot1 + 1, dot2)); return second >= 16 && second <= 31; } return false; } String[] fields = new String[] {\"source\", \"destination\"}; for (int i = 0; i < fields.length; i++) { def field = fields[i]; def ip = ctx[field]?.ip; if (ip != null) { if (ctx.network == null) ctx.network = new HashMap(); if (isPrivate(ip)) { if (ctx.network.private_ip == null) ctx.network.private_ip = new ArrayList(); if (!ctx.network.private_ip.contains(ip)) ctx.network.private_ip.add(ip); } else { if (ctx.network.public_ip == null) ctx.network.public_ip = new ArrayList(); if (!ctx.network.public_ip.contains(ip)) ctx.network.public_ip.add(ip); } } }", - "ignore_failure": false - } - }, - { "pipeline": { "if": "ctx?.event?.dataset != null", "name": "suricata.{{event.dataset}}" } } - ] -} + "description": "suricata.common", + "processors": [ + { + "json": { + "field": "message", + "target_field": "message2", + "ignore_failure": true + } + }, + { + "rename": { + "field": "message2.pkt_src", + "target_field": "network.packet_source", + "ignore_failure": true + } + }, + { + "rename": { + "field": "message2.proto", + "target_field": "network.transport", + "ignore_failure": true + } + }, + { + "rename": { + "field": "message2.in_iface", + "target_field": "observer.ingress.interface.name", + "ignore_failure": true + } + }, + { + "rename": { + "field": "message2.flow_id", + "target_field": "log.id.uid", + "ignore_failure": true + } + }, + { + "rename": { + "field": "message2.src_ip", + "target_field": "source.ip", + "ignore_failure": true + } + }, + { + "rename": { + "field": "message2.src_port", + "target_field": "source.port", + "ignore_failure": true + } + }, + { + "rename": { + "field": "message2.dest_ip", + "target_field": "destination.ip", + "ignore_failure": true + } + }, + { + "rename": { + "field": "message2.dest_port", + "target_field": "destination.port", + "ignore_failure": true + } + }, + { + "rename": { + "field": "message2.vlan", + "target_field": "network.vlan.id", + "ignore_failure": true + } + }, + { + "rename": { + "field": "message2.community_id", + "target_field": "network.community_id", + "ignore_missing": true + } + }, + { + "rename": { + "field": "message2.xff", + "target_field": "xff.ip", + "ignore_missing": true + } + }, + { + "set": { + "field": "event.dataset", + "value": "{{ message2.event_type }}" + } + }, + { + "set": { + "field": "observer.name", + "value": "{{agent.name}}" + } + }, + { + "set": { + "field": "event.ingested", + "value": "{{@timestamp}}" + } + }, + { + "date": { + "field": "message2.timestamp", + "target_field": "@timestamp", + "formats": [ + "ISO8601", + "UNIX" + ], + "timezone": "UTC", + "ignore_failure": true + } + }, + { + "remove": { + "field": "agent", + "ignore_failure": true + } + }, + { + "append": { + "field": "related.ip", + "value": [ + "{{source.ip}}", + "{{destination.ip}}" + ], + "allow_duplicates": false, + "ignore_failure": true + } + }, + { + "script": { + "source": "boolean isPrivate(def ip) { if (ip == null) return false; int dot1 = ip.indexOf('.'); if (dot1 == -1) return false; int dot2 = ip.indexOf('.', dot1 + 1); if (dot2 == -1) return false; int first = Integer.parseInt(ip.substring(0, dot1)); if (first == 10) return true; if (first == 192 && ip.startsWith('168.', dot1 + 1)) return true; if (first == 172) { int second = Integer.parseInt(ip.substring(dot1 + 1, dot2)); return second >= 16 && second <= 31; } return false; } String[] fields = new String[] {\"source\", \"destination\"}; for (int i = 0; i < fields.length; i++) { def field = fields[i]; def ip = ctx[field]?.ip; if (ip != null) { if (ctx.network == null) ctx.network = new HashMap(); if (isPrivate(ip)) { if (ctx.network.private_ip == null) ctx.network.private_ip = new ArrayList(); if (!ctx.network.private_ip.contains(ip)) ctx.network.private_ip.add(ip); } else { if (ctx.network.public_ip == null) ctx.network.public_ip = new ArrayList(); if (!ctx.network.public_ip.contains(ip)) ctx.network.public_ip.add(ip); } } }", + "ignore_failure": false + } + }, + { + "rename": { + "field": "message2.capture_file", + "target_field": "suricata.capture_file", + "ignore_missing": true + } + }, + { + "pipeline": { + "if": "ctx?.event?.dataset != null", + "name": "suricata.{{event.dataset}}" + } + } + ] +} \ No newline at end of file diff --git a/salt/elasticsearch/templates/component/ecs/suricata.json b/salt/elasticsearch/templates/component/ecs/suricata.json index 1eb06d266..3f393ff6a 100644 --- a/salt/elasticsearch/templates/component/ecs/suricata.json +++ b/salt/elasticsearch/templates/component/ecs/suricata.json @@ -841,6 +841,10 @@ "type": "long" } } + }, + "capture_file": { + "type": "keyword", + "ignore_above": 1024 } } } diff --git a/salt/hypervisor/tools/sbin_jinja/so-kvm-create-volume b/salt/hypervisor/tools/sbin_jinja/so-kvm-create-volume index 2322c3a94..601de643f 100644 --- a/salt/hypervisor/tools/sbin_jinja/so-kvm-create-volume +++ b/salt/hypervisor/tools/sbin_jinja/so-kvm-create-volume @@ -45,7 +45,7 @@ used during VM provisioning to add dedicated NSM storage volumes. This command creates and attaches a volume with the following settings: - VM Name: `vm1_sensor` - Volume Size: `500` GB - - Volume Path: `/nsm/libvirt/volumes/vm1_sensor-nsm.img` + - Volume Path: `/nsm/libvirt/volumes/vm1_sensor-nsm-.img` - Device: `/dev/vdb` (virtio-blk) - VM remains stopped after attachment @@ -75,7 +75,8 @@ used during VM provisioning to add dedicated NSM storage volumes. - The script automatically stops the VM if it's running before creating and attaching the volume. - Volumes are created with full pre-allocation for optimal performance. -- Volume files are stored in `/nsm/libvirt/volumes/` with naming pattern `-nsm.img`. +- Volume files are stored in `/nsm/libvirt/volumes/` with naming pattern `-nsm-.img`. +- The epoch timestamp ensures unique volume names and prevents conflicts. - Volumes are attached as `/dev/vdb` using virtio-blk for high performance. - The script checks available disk space before creating the volume. - Ownership is set to `qemu:qemu` with permissions `640`. @@ -142,6 +143,7 @@ import socket import subprocess import pwd import grp +import time import xml.etree.ElementTree as ET from io import StringIO from so_vm_utils import start_vm, stop_vm @@ -242,10 +244,13 @@ def create_volume_file(vm_name, size_gb, logger): Raises: VolumeCreationError: If volume creation fails """ - # Define volume path (directory already created in main()) - volume_path = os.path.join(VOLUME_DIR, f"{vm_name}-nsm.img") + # Generate epoch timestamp for unique volume naming + epoch_timestamp = int(time.time()) - # Check if volume already exists + # Define volume path with epoch timestamp for uniqueness + volume_path = os.path.join(VOLUME_DIR, f"{vm_name}-nsm-{epoch_timestamp}.img") + + # Check if volume already exists (shouldn't be possible with timestamp) if os.path.exists(volume_path): logger.error(f"VOLUME: Volume already exists: {volume_path}") raise VolumeCreationError(f"Volume already exists: {volume_path}") diff --git a/salt/salt/engines/master/virtual_node_manager.py b/salt/salt/engines/master/virtual_node_manager.py index 6d88bd688..ccc063d64 100644 --- a/salt/salt/engines/master/virtual_node_manager.py +++ b/salt/salt/engines/master/virtual_node_manager.py @@ -727,7 +727,8 @@ def check_hypervisor_disk_space(hypervisor: str, size_gb: int) -> Tuple[bool, Op result = local.cmd( hypervisor_minion, 'cmd.run', - ["df -BG /nsm/libvirt/volumes | tail -1 | awk '{print $4}' | sed 's/G//'"] + ["df -BG /nsm/libvirt/volumes | tail -1 | awk '{print $4}' | sed 's/G//'"], + kwarg={'python_shell': True} ) if not result or hypervisor_minion not in result: diff --git a/salt/soc/dyanno/hypervisor/soc_hypervisor.yaml.jinja b/salt/soc/dyanno/hypervisor/soc_hypervisor.yaml.jinja index ac2fd6fea..f23fdb5d9 100644 --- a/salt/soc/dyanno/hypervisor/soc_hypervisor.yaml.jinja +++ b/salt/soc/dyanno/hypervisor/soc_hypervisor.yaml.jinja @@ -43,10 +43,26 @@ No Virtual Machines Found {%- endif %} -{%- else %} +{%- elif baseDomainStatus == 'ImageDownloadStart' %} +#### INFO + +Base domain image download started. +{%- elif baseDomainStatus == 'ImageDownloadFailed' %} +#### ERROR + +Base domain image download failed. Please check the salt-master log for details and verify network connectivity. +{%- elif baseDomainStatus == 'SSHKeySetupFailed' %} +#### ERROR + +SSH key setup failed. Please check the salt-master log for details. +{%- elif baseDomainStatus == 'SetupFailed' %} #### WARNING -Base domain has not been initialized. +Setup failed. Please check the salt-master log for details. +{%- elif baseDomainStatus == 'PreInit' %} +#### WARNING + +Base domain has not been initialized. Waiting for hypervisor to highstate. {%- endif %} {%- endmacro -%} diff --git a/salt/zeek/files/config.zeek.ja4 b/salt/zeek/files/config.zeek.ja4 index e3dd08a48..3d0035481 100644 --- a/salt/zeek/files/config.zeek.ja4 +++ b/salt/zeek/files/config.zeek.ja4 @@ -11,6 +11,8 @@ export { option JA4S_enabled: bool = F; option JA4S_raw: bool = F; + option JA4D_enabled: bool = F; + option JA4H_enabled: bool = F; option JA4H_raw: bool = F; From 18accae47ea56c4d9e58fdf19cd5ae07f9e4ee4e Mon Sep 17 00:00:00 2001 From: reyesj2 <94730068+reyesj2@users.noreply.github.com> Date: Tue, 2 Dec 2025 15:10:29 -0600 Subject: [PATCH 13/17] annotation typo --- salt/elasticfleet/soc_elasticfleet.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/salt/elasticfleet/soc_elasticfleet.yaml b/salt/elasticfleet/soc_elasticfleet.yaml index d7c324855..d78189f96 100644 --- a/salt/elasticfleet/soc_elasticfleet.yaml +++ b/salt/elasticfleet/soc_elasticfleet.yaml @@ -83,7 +83,7 @@ elasticfleet: advanced: True global: True helpLink: elastic-fleet.html - compression: + compression_level: description: The gzip compression level. The compression level must be in the range of 1 (best speed) to 9 (best compression). regex: ^[1-9]$ forcedType: int From b0d9426f1be143ef8b7768f2707746a563a99244 Mon Sep 17 00:00:00 2001 From: reyesj2 <94730068+reyesj2@users.noreply.github.com> Date: Tue, 2 Dec 2025 15:11:00 -0600 Subject: [PATCH 14/17] automated cert update for kafka fleet output policy --- salt/elasticfleet/enabled.sls | 1 + .../so-elastic-fleet-outputs-update | 47 ++++++++++++++----- 2 files changed, 36 insertions(+), 12 deletions(-) diff --git a/salt/elasticfleet/enabled.sls b/salt/elasticfleet/enabled.sls index ec8c8337e..25fca759d 100644 --- a/salt/elasticfleet/enabled.sls +++ b/salt/elasticfleet/enabled.sls @@ -42,6 +42,7 @@ so-elastic-fleet-auto-configure-logstash-outputs-force: interval: 30 - onchanges: - x509: etc_elasticfleet_logstash_crt + - x509: elasticfleet_kafka_crt {% endif %} # If enabled, automatically update Fleet Server URLs & ES Connection diff --git a/salt/elasticfleet/tools/sbin_jinja/so-elastic-fleet-outputs-update b/salt/elasticfleet/tools/sbin_jinja/so-elastic-fleet-outputs-update index c64d022a4..715d53a3b 100644 --- a/salt/elasticfleet/tools/sbin_jinja/so-elastic-fleet-outputs-update +++ b/salt/elasticfleet/tools/sbin_jinja/so-elastic-fleet-outputs-update @@ -85,19 +85,42 @@ function update_kafka_outputs() { # Make sure SSL configuration is included in policy updates for Kafka output. SSL is configured in so-elastic-fleet-setup if kafka_policy=$(curl -K /opt/so/conf/elasticsearch/curl.config -L "http://localhost:5601/api/fleet/outputs/so-manager_kafka" --fail 2>/dev/null); then SSL_CONFIG=$(echo "$kafka_policy" | jq -r '.item.ssl') + KAFKAKEY=$(openssl rsa -in /etc/pki/elasticfleet-kafka.key) + KAFKACRT=$(openssl x509 -in /etc/pki/elasticfleet-kafka.crt) + KAFKACA=$(openssl x509 -in /etc/pki/tls/certs/intca.crt) if SECRETS=$(echo "$kafka_policy" | jq -er '.item.secrets' 2>/dev/null); then - # Update policy when fleet has secrets enabled - JSON_STRING=$(jq -n \ - --arg UPDATEDLIST "$NEW_LIST_JSON" \ - --argjson SSL_CONFIG "$SSL_CONFIG" \ - --argjson SECRETS "$SECRETS" \ - '{"name": "grid-kafka","type": "kafka","hosts": $UPDATEDLIST,"is_default": true,"is_default_monitoring": true,"config_yaml": "","ssl": $SSL_CONFIG,"secrets": $SECRETS}') + if [[ "$UPDATE_CERTS" != "true" ]]; then + # Update policy when fleet has secrets enabled + JSON_STRING=$(jq -n \ + --arg UPDATEDLIST "$NEW_LIST_JSON" \ + --argjson SSL_CONFIG "$SSL_CONFIG" \ + --argjson SECRETS "$SECRETS" \ + '{"name": "grid-kafka","type": "kafka","hosts": $UPDATEDLIST,"is_default": true,"is_default_monitoring": true,"config_yaml": "","ssl": $SSL_CONFIG,"secrets": $SECRETS}') + else + # Update certs, creating new secret + JSON_STRING=$(jq -n \ + --arg UPDATEDLIST "$NEW_LIST_JSON" \ + --arg KAFKAKEY "$KAFKAKEY" \ + --arg KAFKACRT "$KAFKACRT" \ + --arg KAFKACA "$KAFKACA" \ + '{"name": "grid-kafka","type": "kafka","hosts": $UPDATEDLIST,"is_default": true,"is_default_monitoring": true,"config_yaml": "","ssl": {"certificate_authorities":[ $KAFKACA ],"certificate": $KAFKACRT ,"key":"","verification_mode":"full"},"secrets": {"ssl":{"key": $KAFKAKEY }}}') + fi else - # Update policy when fleet has secrets disabled or policy hasn't been force updated - JSON_STRING=$(jq -n \ - --arg UPDATEDLIST "$NEW_LIST_JSON" \ - --argjson SSL_CONFIG "$SSL_CONFIG" \ - '{"name": "grid-kafka","type": "kafka","hosts": $UPDATEDLIST,"is_default": true,"is_default_monitoring": true,"config_yaml": "","ssl": $SSL_CONFIG}') + if [[ "$UPDATE_CERTS" != "true" ]]; then + # Update policy when fleet has secrets disabled or policy hasn't been force updated + JSON_STRING=$(jq -n \ + --arg UPDATEDLIST "$NEW_LIST_JSON" \ + --argjson SSL_CONFIG "$SSL_CONFIG" \ + '{"name": "grid-kafka","type": "kafka","hosts": $UPDATEDLIST,"is_default": true,"is_default_monitoring": true,"config_yaml": "","ssl": $SSL_CONFIG}') + else + # Update ssl config + JSON_STRING=$(jq -n \ + --arg UPDATEDLIST "$NEW_LIST_JSON" \ + --arg KAFKAKEY "$KAFKAKEY" \ + --arg KAFKACRT "$KAFKACRT" \ + --arg KAFKACA "$KAFKACA" \ + '{"name": "grid-kafka","type": "kafka","hosts": $UPDATEDLIST,"is_default": true,"is_default_monitoring": true,"config_yaml": "","ssl": { "certificate_authorities": [ $KAFKACA ], "certificate": $KAFKACRT, "key": $KAFKAKEY, "verification_mode": "full" }}') + fi fi # Update Kafka outputs curl -K /opt/so/conf/elasticsearch/curl.config -L -X PUT "localhost:5601/api/fleet/outputs/so-manager_kafka" -H 'kbn-xsrf: true' -H 'Content-Type: application/json' -d "$JSON_STRING" | jq @@ -120,7 +143,7 @@ function update_kafka_outputs() { # Get the current list of kafka outputs & hash them CURRENT_LIST=$(jq -c -r '.item.hosts' <<< "$RAW_JSON") - CURRENT_HASH=$(sha1sum <<< "$CURRENT_LIST" | awk '{print $1}') + CURRENT_HASH=$(sha256sum <<< "$CURRENT_LIST" | awk '{print $1}') declare -a NEW_LIST=() From 877444ac29c50093634b323ff1fd3f1ac14f6b0a Mon Sep 17 00:00:00 2001 From: reyesj2 <94730068+reyesj2@users.noreply.github.com> Date: Tue, 2 Dec 2025 15:16:59 -0600 Subject: [PATCH 15/17] cert update is a forced update --- salt/elasticfleet/enabled.sls | 2 +- .../tools/sbin_jinja/so-elastic-fleet-outputs-update | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/salt/elasticfleet/enabled.sls b/salt/elasticfleet/enabled.sls index 25fca759d..db10a7182 100644 --- a/salt/elasticfleet/enabled.sls +++ b/salt/elasticfleet/enabled.sls @@ -36,7 +36,7 @@ so-elastic-fleet-auto-configure-logstash-outputs: {# Separate from above in order to catch elasticfleet-logstash.crt changes and force update to fleet output policy #} so-elastic-fleet-auto-configure-logstash-outputs-force: cmd.run: - - name: /usr/sbin/so-elastic-fleet-outputs-update --force --certs + - name: /usr/sbin/so-elastic-fleet-outputs-update --certs - retry: attempts: 4 interval: 30 diff --git a/salt/elasticfleet/tools/sbin_jinja/so-elastic-fleet-outputs-update b/salt/elasticfleet/tools/sbin_jinja/so-elastic-fleet-outputs-update index 715d53a3b..281e05c59 100644 --- a/salt/elasticfleet/tools/sbin_jinja/so-elastic-fleet-outputs-update +++ b/salt/elasticfleet/tools/sbin_jinja/so-elastic-fleet-outputs-update @@ -20,6 +20,7 @@ while [[ $# -gt 0 ]]; do ;; -c| --certs) UPDATE_CERTS=true + FORCE_UPDATE=true shift ;; *) From 847742091144e2832e345c8073dee94f1741b276 Mon Sep 17 00:00:00 2001 From: reyesj2 <94730068+reyesj2@users.noreply.github.com> Date: Wed, 3 Dec 2025 20:10:06 -0600 Subject: [PATCH 16/17] logstash adv config state file --- .../so-elastic-fleet-outputs-update | 33 ++++++++++++------- 1 file changed, 21 insertions(+), 12 deletions(-) diff --git a/salt/elasticfleet/tools/sbin_jinja/so-elastic-fleet-outputs-update b/salt/elasticfleet/tools/sbin_jinja/so-elastic-fleet-outputs-update index 281e05c59..58baadca5 100644 --- a/salt/elasticfleet/tools/sbin_jinja/so-elastic-fleet-outputs-update +++ b/salt/elasticfleet/tools/sbin_jinja/so-elastic-fleet-outputs-update @@ -11,6 +11,8 @@ FORCE_UPDATE=false UPDATE_CERTS=false +LOGSTASH_PILLAR_CONFIG_YAML="{{ LOGSTASH_CONFIG_YAML }}" +LOGSTASH_PILLAR_STATE_FILE="/opt/so/state/esfleet_logstash_config_pillar" while [[ $# -gt 0 ]]; do case $1 in @@ -43,38 +45,45 @@ function update_logstash_outputs() { LOGSTASHKEY=$(openssl rsa -in /etc/pki/elasticfleet-logstash.key) LOGSTASHCRT=$(openssl x509 -in /etc/pki/elasticfleet-logstash.crt) LOGSTASHCA=$(openssl x509 -in /etc/pki/tls/certs/intca.crt) + # Revert escaped \\n to \n for jq + LOGSTASH_PILLAR_CONFIG_YAML=$(printf '%b' "$LOGSTASH_PILLAR_CONFIG_YAML") + if SECRETS=$(echo "$logstash_policy" | jq -er '.item.secrets' 2>/dev/null); then if [[ "$UPDATE_CERTS" != "true" ]]; then # Reuse existing secret JSON_STRING=$(jq -n \ --arg UPDATEDLIST "$NEW_LIST_JSON" \ + --arg CONFIG_YAML "$LOGSTASH_PILLAR_CONFIG_YAML" \ --argjson SECRETS "$SECRETS" \ --argjson SSL_CONFIG "$SSL_CONFIG" \ - '{"name":"grid-logstash","type":"logstash","hosts": $UPDATEDLIST,"is_default":true,"is_default_monitoring":true,"config_yaml":"{{ LOGSTASH_CONFIG_YAML }}","ssl": $SSL_CONFIG,"secrets": $SECRETS}') + '{"name":"grid-logstash","type":"logstash","hosts": $UPDATEDLIST,"is_default":true,"is_default_monitoring":true,"config_yaml":$CONFIG_YAML,"ssl": $SSL_CONFIG,"secrets": $SECRETS}') else # Update certs, creating new secret JSON_STRING=$(jq -n \ --arg UPDATEDLIST "$NEW_LIST_JSON" \ + --arg CONFIG_YAML "$LOGSTASH_PILLAR_CONFIG_YAML" \ --arg LOGSTASHKEY "$LOGSTASHKEY" \ --arg LOGSTASHCRT "$LOGSTASHCRT" \ --arg LOGSTASHCA "$LOGSTASHCA" \ - '{"name":"grid-logstash","type":"logstash","hosts": $UPDATEDLIST,"is_default":true,"is_default_monitoring":true,"config_yaml":"{{ LOGSTASH_CONFIG_YAML }}","ssl": {"certificate": $LOGSTASHCRT,"certificate_authorities":[ $LOGSTASHCA ]},"secrets": {"ssl":{"key": $LOGSTASHKEY }}}') + '{"name":"grid-logstash","type":"logstash","hosts": $UPDATEDLIST,"is_default":true,"is_default_monitoring":true,"config_yaml":$CONFIG_YAML,"ssl": {"certificate": $LOGSTASHCRT,"certificate_authorities":[ $LOGSTASHCA ]},"secrets": {"ssl":{"key": $LOGSTASHKEY }}}') fi else if [[ "$UPDATE_CERTS" != "true" ]]; then # Reuse existing ssl config JSON_STRING=$(jq -n \ --arg UPDATEDLIST "$NEW_LIST_JSON" \ + --arg CONFIG_YAML "$LOGSTASH_PILLAR_CONFIG_YAML" \ --argjson SSL_CONFIG "$SSL_CONFIG" \ - '{"name":"grid-logstash","type":"logstash","hosts": $UPDATEDLIST,"is_default":true,"is_default_monitoring":true,"config_yaml":"{{ LOGSTASH_CONFIG_YAML }}","ssl": $SSL_CONFIG}') + '{"name":"grid-logstash","type":"logstash","hosts": $UPDATEDLIST,"is_default":true,"is_default_monitoring":true,"config_yaml":$CONFIG_YAML,"ssl": $SSL_CONFIG}') else # Update ssl config JSON_STRING=$(jq -n \ --arg UPDATEDLIST "$NEW_LIST_JSON" \ + --arg CONFIG_YAML "$LOGSTASH_PILLAR_CONFIG_YAML" \ --arg LOGSTASHKEY "$LOGSTASHKEY" \ --arg LOGSTASHCRT "$LOGSTASHCRT" \ --arg LOGSTASHCA "$LOGSTASHCA" \ - '{"name":"grid-logstash","type":"logstash","hosts": $UPDATEDLIST,"is_default":true,"is_default_monitoring":true,"config_yaml":"{{ LOGSTASH_CONFIG_YAML }}","ssl": {"certificate": $LOGSTASHCRT,"key": $LOGSTASHKEY,"certificate_authorities":[ $LOGSTASHCA ]}}') + '{"name":"grid-logstash","type":"logstash","hosts": $UPDATEDLIST,"is_default":true,"is_default_monitoring":true,"config_yaml":$CONFIG_YAML,"ssl": {"certificate": $LOGSTASHCRT,"key": $LOGSTASHKEY,"certificate_authorities":[ $LOGSTASHCA ]}}') fi fi fi @@ -167,14 +176,14 @@ function update_kafka_outputs() { printf "Failed to query for current Logstash Outputs..." exit 1 fi - - CURRENT_LOGSTASH_ADV_CONFIG=$(jq -r '.item.config_yaml // ""' <<< "$RAW_JSON") - CURRENT_LOGSTASH_ADV_CONFIG_HASH=$(sha256sum <<< "$CURRENT_LOGSTASH_ADV_CONFIG" | awk '{print $1}') - NEW_LOGSTASH_ADV_CONFIG=$'{{ LOGSTASH_CONFIG_YAML }}' - NEW_LOGSTASH_ADV_CONFIG_HASH=$(sha256sum <<< "$NEW_LOGSTASH_ADV_CONFIG" | awk '{print $1}') - - if [ "$CURRENT_LOGSTASH_ADV_CONFIG_HASH" != "$NEW_LOGSTASH_ADV_CONFIG_HASH" ]; then - FORCE_UPDATE=true + # logstash adv config - compare pillar to last state file value + if [[ -f "$LOGSTASH_PILLAR_STATE_FILE" ]]; then + PREVIOUS_LOGSTASH_PILLAR_CONFIG_YAML=$(cat "$LOGSTASH_PILLAR_STATE_FILE") + if [[ "$LOGSTASH_PILLAR_CONFIG_YAML" != "$PREVIOUS_LOGSTASH_PILLAR_CONFIG_YAML" ]]; then + echo "Logstash pillar config has changed - forcing update" + FORCE_UPDATE=true + fi + echo "$LOGSTASH_PILLAR_CONFIG_YAML" > "$LOGSTASH_PILLAR_STATE_FILE" fi # Get the current list of Logstash outputs & hash them From 0b127582cbea904b88f17bf9b4023aeb887dc560 Mon Sep 17 00:00:00 2001 From: reyesj2 <94730068+reyesj2@users.noreply.github.com> Date: Wed, 3 Dec 2025 20:49:25 -0600 Subject: [PATCH 17/17] 2.4.200 soup changes --- salt/manager/tools/sbin/soup | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/salt/manager/tools/sbin/soup b/salt/manager/tools/sbin/soup index 32553b5c3..885f9b521 100755 --- a/salt/manager/tools/sbin/soup +++ b/salt/manager/tools/sbin/soup @@ -426,6 +426,7 @@ preupgrade_changes() { [[ "$INSTALLEDVERSION" == 2.4.160 ]] && up_to_2.4.170 [[ "$INSTALLEDVERSION" == 2.4.170 ]] && up_to_2.4.180 [[ "$INSTALLEDVERSION" == 2.4.180 ]] && up_to_2.4.190 + [[ "$INSTALLEDVERSION" == 2.4.190 ]] && up_to_2.4.200 true } @@ -457,6 +458,7 @@ postupgrade_changes() { [[ "$POSTVERSION" == 2.4.160 ]] && post_to_2.4.170 [[ "$POSTVERSION" == 2.4.170 ]] && post_to_2.4.180 [[ "$POSTVERSION" == 2.4.180 ]] && post_to_2.4.190 + [[ "$POSTVERSION" == 2.4.190 ]] && post_to_2.4.200 true } @@ -636,6 +638,11 @@ post_to_2.4.190() { POSTVERSION=2.4.190 } +post_to_2.4.200() { + echo "Nothing to apply" + POSTVERSION=2.4.200 +} + repo_sync() { echo "Sync the local repo." su socore -c '/usr/sbin/so-repo-sync' || fail "Unable to complete so-repo-sync." @@ -903,6 +910,12 @@ up_to_2.4.190() { INSTALLEDVERSION=2.4.190 } +up_to_2.4.200() { + touch /opt/so/state/esfleet_logstash_config_pillar + + INSTALLEDVERSION=2.4.200 +} + add_hydra_pillars() { mkdir -p /opt/so/saltstack/local/pillar/hydra touch /opt/so/saltstack/local/pillar/hydra/soc_hydra.sls