From 664f3fd18ab02ae2ececebc25f1cc456c402c5d1 Mon Sep 17 00:00:00 2001 From: Mike Reeves Date: Wed, 1 Apr 2026 14:47:05 -0400 Subject: [PATCH 001/110] Fix soup --- salt/manager/tools/sbin/soup | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/salt/manager/tools/sbin/soup b/salt/manager/tools/sbin/soup index d25153863..29d495ecc 100755 --- a/salt/manager/tools/sbin/soup +++ b/salt/manager/tools/sbin/soup @@ -924,7 +924,7 @@ run_network_intermediate_upgrade() { if [[ -n "$BRANCH" ]]; then local originally_requested_so_branch="$BRANCH" else - local originally_requested_so_branch="2.4/main" + local originally_requested_so_branch="3/main" fi echo "Starting automated intermediate upgrade to $next_step_so_version." From 8101bc4941c230418aeb3b8a675739392b3a0139 Mon Sep 17 00:00:00 2001 From: reyesj2 <94730068+reyesj2@users.noreply.github.com> Date: Mon, 6 Apr 2026 15:08:30 -0500 Subject: [PATCH 002/110] ES 9.3.2 --- .../elastic-defend-endpoints.json | 2 +- .../grid-nodes_general/import-evtx-logs.json | 2 +- salt/elasticsearch/defaults.yaml | 2 +- ...nse.log-1.23.1 => logs-pfsense.log-1.25.1} | 83 ++++++++++++++++--- ...icata => logs-pfsense.log-1.25.1-suricata} | 0 .../tools/sbin_jinja/so-kibana-space-defaults | 2 +- 6 files changed, 76 insertions(+), 15 deletions(-) rename salt/elasticsearch/files/ingest/{logs-pfsense.log-1.23.1 => logs-pfsense.log-1.25.1} (74%) rename salt/elasticsearch/files/ingest/{logs-pfsense.log-1.23.1-suricata => logs-pfsense.log-1.25.1-suricata} (100%) diff --git a/salt/elasticfleet/files/integrations/elastic-defend/elastic-defend-endpoints.json b/salt/elasticfleet/files/integrations/elastic-defend/elastic-defend-endpoints.json index debfc73a3..c27da26f7 100644 --- a/salt/elasticfleet/files/integrations/elastic-defend/elastic-defend-endpoints.json +++ b/salt/elasticfleet/files/integrations/elastic-defend/elastic-defend-endpoints.json @@ -5,7 +5,7 @@ "package": { "name": "endpoint", "title": "Elastic Defend", - "version": "9.0.2", + "version": "9.3.0", "requires_root": true }, "enabled": true, diff --git a/salt/elasticfleet/files/integrations/grid-nodes_general/import-evtx-logs.json b/salt/elasticfleet/files/integrations/grid-nodes_general/import-evtx-logs.json index 50ffd5dc7..3066303d9 100644 --- a/salt/elasticfleet/files/integrations/grid-nodes_general/import-evtx-logs.json +++ b/salt/elasticfleet/files/integrations/grid-nodes_general/import-evtx-logs.json @@ -23,7 +23,7 @@ "\\.gz$" ], "include_files": [], - "processors": "- dissect:\n tokenizer: \"/nsm/import/%{import.id}/evtx/%{import.file}\"\n field: \"log.file.path\"\n target_prefix: \"\"\n- decode_json_fields:\n fields: [\"message\"]\n target: \"\"\n- drop_fields:\n fields: [\"host\"]\n ignore_missing: true\n- add_fields:\n target: data_stream\n fields:\n type: logs\n dataset: system.security\n- add_fields:\n target: event\n fields:\n dataset: system.security\n module: system\n imported: true\n- add_fields:\n target: \"@metadata\"\n fields:\n pipeline: logs-system.security-2.6.1\n- if:\n equals:\n winlog.channel: 'Microsoft-Windows-Sysmon/Operational'\n then: \n - add_fields:\n target: data_stream\n fields:\n dataset: windows.sysmon_operational\n - add_fields:\n target: event\n fields:\n dataset: windows.sysmon_operational\n module: windows\n imported: true\n - add_fields:\n target: \"@metadata\"\n fields:\n pipeline: logs-windows.sysmon_operational-3.1.2\n- if:\n equals:\n winlog.channel: 'Application'\n then: \n - add_fields:\n target: data_stream\n fields:\n dataset: system.application\n - add_fields:\n target: event\n fields:\n dataset: system.application\n - add_fields:\n target: \"@metadata\"\n fields:\n pipeline: logs-system.application-2.6.1\n- if:\n equals:\n winlog.channel: 'System'\n then: \n - add_fields:\n target: data_stream\n fields:\n dataset: system.system\n - add_fields:\n target: event\n fields:\n dataset: system.system\n - add_fields:\n target: \"@metadata\"\n fields:\n pipeline: logs-system.system-2.6.1\n \n- if:\n equals:\n winlog.channel: 'Microsoft-Windows-PowerShell/Operational'\n then: \n - add_fields:\n target: data_stream\n fields:\n dataset: windows.powershell_operational\n - add_fields:\n target: event\n fields:\n dataset: windows.powershell_operational\n module: windows\n - add_fields:\n target: \"@metadata\"\n fields:\n pipeline: logs-windows.powershell_operational-3.1.2\n- add_fields:\n target: data_stream\n fields:\n dataset: import", + "processors": "- dissect:\n tokenizer: \"/nsm/import/%{import.id}/evtx/%{import.file}\"\n field: \"log.file.path\"\n target_prefix: \"\"\n- decode_json_fields:\n fields: [\"message\"]\n target: \"\"\n- drop_fields:\n fields: [\"host\"]\n ignore_missing: true\n- add_fields:\n target: data_stream\n fields:\n type: logs\n dataset: system.security\n- add_fields:\n target: event\n fields:\n dataset: system.security\n module: system\n imported: true\n- add_fields:\n target: \"@metadata\"\n fields:\n pipeline: logs-system.security-2.13.0\n- if:\n equals:\n winlog.channel: 'Microsoft-Windows-Sysmon/Operational'\n then: \n - add_fields:\n target: data_stream\n fields:\n dataset: windows.sysmon_operational\n - add_fields:\n target: event\n fields:\n dataset: windows.sysmon_operational\n module: windows\n imported: true\n - add_fields:\n target: \"@metadata\"\n fields:\n pipeline: logs-windows.sysmon_operational-3.6.0\n- if:\n equals:\n winlog.channel: 'Application'\n then: \n - add_fields:\n target: data_stream\n fields:\n dataset: system.application\n - add_fields:\n target: event\n fields:\n dataset: system.application\n - add_fields:\n target: \"@metadata\"\n fields:\n pipeline: logs-system.application-2.13.0\n- if:\n equals:\n winlog.channel: 'System'\n then: \n - add_fields:\n target: data_stream\n fields:\n dataset: system.system\n - add_fields:\n target: event\n fields:\n dataset: system.system\n - add_fields:\n target: \"@metadata\"\n fields:\n pipeline: logs-system.system-2.13.0\n \n- if:\n equals:\n winlog.channel: 'Microsoft-Windows-PowerShell/Operational'\n then: \n - add_fields:\n target: data_stream\n fields:\n dataset: windows.powershell_operational\n - add_fields:\n target: event\n fields:\n dataset: windows.powershell_operational\n module: windows\n - add_fields:\n target: \"@metadata\"\n fields:\n pipeline: logs-windows.powershell_operational-3.6.0\n- add_fields:\n target: data_stream\n fields:\n dataset: import", "tags": [ "import" ], diff --git a/salt/elasticsearch/defaults.yaml b/salt/elasticsearch/defaults.yaml index d0ab0f959..f355601dc 100644 --- a/salt/elasticsearch/defaults.yaml +++ b/salt/elasticsearch/defaults.yaml @@ -1,6 +1,6 @@ elasticsearch: enabled: false - version: 9.0.8 + version: 9.3.2 index_clean: true vm: max_map_count: 1048576 diff --git a/salt/elasticsearch/files/ingest/logs-pfsense.log-1.23.1 b/salt/elasticsearch/files/ingest/logs-pfsense.log-1.25.1 similarity index 74% rename from salt/elasticsearch/files/ingest/logs-pfsense.log-1.23.1 rename to salt/elasticsearch/files/ingest/logs-pfsense.log-1.25.1 index d3354f363..3037ce77a 100644 --- a/salt/elasticsearch/files/ingest/logs-pfsense.log-1.23.1 +++ b/salt/elasticsearch/files/ingest/logs-pfsense.log-1.25.1 @@ -10,24 +10,28 @@ "processors": [ { "set": { + "tag": "set_ecs_version_f5923549", "field": "ecs.version", "value": "8.17.0" } }, { "set": { + "tag": "set_observer_vendor_ad9d35cc", "field": "observer.vendor", "value": "netgate" } }, { "set": { + "tag": "set_observer_type_5dddf3ba", "field": "observer.type", "value": "firewall" } }, { "rename": { + "tag": "rename_message_to_event_original_56a77271", "field": "message", "target_field": "event.original", "ignore_missing": true, @@ -36,12 +40,14 @@ }, { "set": { + "tag": "set_event_kind_de80643c", "field": "event.kind", "value": "event" } }, { "set": { + "tag": "set_event_timezone_4ca44cac", "field": "event.timezone", "value": "{{{_tmp.tz_offset}}}", "if": "ctx._tmp?.tz_offset != null && ctx._tmp?.tz_offset != 'local'" @@ -49,6 +55,7 @@ }, { "grok": { + "tag": "grok_event_original_27d9c8c7", "description": "Parse syslog header", "field": "event.original", "patterns": [ @@ -72,6 +79,7 @@ }, { "date": { + "tag": "date__tmp_timestamp8601_to_timestamp_6ac9d3ce", "if": "ctx._tmp.timestamp8601 != null", "field": "_tmp.timestamp8601", "target_field": "@timestamp", @@ -82,6 +90,7 @@ }, { "date": { + "tag": "date__tmp_timestamp_to_timestamp_f21e536e", "if": "ctx.event?.timezone != null && ctx._tmp?.timestamp != null", "field": "_tmp.timestamp", "target_field": "@timestamp", @@ -95,6 +104,7 @@ }, { "grok": { + "tag": "grok_process_name_cef3d489", "description": "Set Event Provider", "field": "process.name", "patterns": [ @@ -107,71 +117,83 @@ }, { "pipeline": { - "name": "logs-pfsense.log-1.23.1-firewall", + "tag": "pipeline_e16851a7", + "name": "logs-pfsense.log-1.25.1-firewall", "if": "ctx.event.provider == 'filterlog'" } }, { "pipeline": { - "name": "logs-pfsense.log-1.23.1-openvpn", + "tag": "pipeline_828590b5", + "name": "logs-pfsense.log-1.25.1-openvpn", "if": "ctx.event.provider == 'openvpn'" } }, { "pipeline": { - "name": "logs-pfsense.log-1.23.1-ipsec", + "tag": "pipeline_9d37039c", + "name": "logs-pfsense.log-1.25.1-ipsec", "if": "ctx.event.provider == 'charon'" } }, { "pipeline": { - "name": "logs-pfsense.log-1.23.1-dhcp", + "tag": "pipeline_ad56bbca", + "name": "logs-pfsense.log-1.25.1-dhcp", "if": "[\"dhcpd\", \"dhclient\", \"dhcp6c\"].contains(ctx.event.provider)" } }, { "pipeline": { - "name": "logs-pfsense.log-1.23.1-unbound", + "tag": "pipeline_dd85553d", + "name": "logs-pfsense.log-1.25.1-unbound", "if": "ctx.event.provider == 'unbound'" } }, { "pipeline": { - "name": "logs-pfsense.log-1.23.1-haproxy", + "tag": "pipeline_720ed255", + "name": "logs-pfsense.log-1.25.1-haproxy", "if": "ctx.event.provider == 'haproxy'" } }, { "pipeline": { - "name": "logs-pfsense.log-1.23.1-php-fpm", + "tag": "pipeline_456beba5", + "name": "logs-pfsense.log-1.25.1-php-fpm", "if": "ctx.event.provider == 'php-fpm'" } }, { "pipeline": { - "name": "logs-pfsense.log-1.23.1-squid", + "tag": "pipeline_a0d89375", + "name": "logs-pfsense.log-1.25.1-squid", "if": "ctx.event.provider == 'squid'" } }, { "pipeline": { - "name": "logs-pfsense.log-1.23.1-snort", + "tag": "pipeline_c2f1ed55", + "name": "logs-pfsense.log-1.25.1-snort", "if": "ctx.event.provider == 'snort'" } }, { "pipeline": { - "name": "logs-pfsense.log-1.23.1-suricata", + "tag":"pipeline_33db1c9e", + "name": "logs-pfsense.log-1.25.1-suricata", "if": "ctx.event.provider == 'suricata'" } }, { "drop": { + "tag": "drop_9d7c46f8", "if": "![\"filterlog\", \"openvpn\", \"charon\", \"dhcpd\", \"dhclient\", \"dhcp6c\", \"unbound\", \"haproxy\", \"php-fpm\", \"squid\", \"snort\", \"suricata\"].contains(ctx.event?.provider)" } }, { "append": { + "tag": "append_event_category_4780a983", "field": "event.category", "value": "network", "if": "ctx.network != null" @@ -179,6 +201,7 @@ }, { "convert": { + "tag": "convert_source_address_to_source_ip_f5632a20", "field": "source.address", "target_field": "source.ip", "type": "ip", @@ -188,6 +211,7 @@ }, { "convert": { + "tag": "convert_destination_address_to_destination_ip_f1388f0c", "field": "destination.address", "target_field": "destination.ip", "type": "ip", @@ -197,6 +221,7 @@ }, { "set": { + "tag": "set_network_type_1f1d940a", "field": "network.type", "value": "ipv6", "if": "ctx.source?.ip != null && ctx.source.ip.contains(\":\")" @@ -204,6 +229,7 @@ }, { "set": { + "tag": "set_network_type_69deca38", "field": "network.type", "value": "ipv4", "if": "ctx.source?.ip != null && ctx.source.ip.contains(\".\")" @@ -211,6 +237,7 @@ }, { "geoip": { + "tag": "geoip_source_ip_to_source_geo_da2e41b2", "field": "source.ip", "target_field": "source.geo", "ignore_missing": true @@ -218,6 +245,7 @@ }, { "geoip": { + "tag": "geoip_destination_ip_to_destination_geo_ab5e2968", "field": "destination.ip", "target_field": "destination.geo", "ignore_missing": true @@ -225,6 +253,7 @@ }, { "geoip": { + "tag": "geoip_source_ip_to_source_as_28d69883", "ignore_missing": true, "database_file": "GeoLite2-ASN.mmdb", "field": "source.ip", @@ -237,6 +266,7 @@ }, { "geoip": { + "tag": "geoip_destination_ip_to_destination_as_8a007787", "database_file": "GeoLite2-ASN.mmdb", "field": "destination.ip", "target_field": "destination.as", @@ -249,6 +279,7 @@ }, { "rename": { + "tag": "rename_source_as_asn_to_source_as_number_a917047d", "field": "source.as.asn", "target_field": "source.as.number", "ignore_missing": true @@ -256,6 +287,7 @@ }, { "rename": { + "tag": "rename_source_as_organization_name_to_source_as_organization_name_f1362d0b", "field": "source.as.organization_name", "target_field": "source.as.organization.name", "ignore_missing": true @@ -263,6 +295,7 @@ }, { "rename": { + "tag": "rename_destination_as_asn_to_destination_as_number_3b459fcd", "field": "destination.as.asn", "target_field": "destination.as.number", "ignore_missing": true @@ -270,6 +303,7 @@ }, { "rename": { + "tag": "rename_destination_as_organization_name_to_destination_as_organization_name_814bd459", "field": "destination.as.organization_name", "target_field": "destination.as.organization.name", "ignore_missing": true @@ -277,12 +311,14 @@ }, { "community_id": { + "tag": "community_id_d2308e7a", "target_field": "network.community_id", "ignore_failure": true } }, { "grok": { + "tag": "grok_observer_ingress_interface_name_968018d3", "field": "observer.ingress.interface.name", "patterns": [ "%{DATA}.%{NONNEGINT:observer.ingress.vlan.id}" @@ -293,6 +329,7 @@ }, { "set": { + "tag": "set_network_vlan_id_efd4d96a", "field": "network.vlan.id", "copy_from": "observer.ingress.vlan.id", "ignore_empty_value": true @@ -300,6 +337,7 @@ }, { "append": { + "tag": "append_related_ip_c1a6356b", "field": "related.ip", "value": "{{{destination.ip}}}", "allow_duplicates": false, @@ -308,6 +346,7 @@ }, { "append": { + "tag": "append_related_ip_8121c591", "field": "related.ip", "value": "{{{source.ip}}}", "allow_duplicates": false, @@ -316,6 +355,7 @@ }, { "append": { + "tag": "append_related_ip_53b62ed8", "field": "related.ip", "value": "{{{source.nat.ip}}}", "allow_duplicates": false, @@ -324,6 +364,7 @@ }, { "append": { + "tag": "append_related_hosts_6f162628", "field": "related.hosts", "value": "{{{destination.domain}}}", "if": "ctx.destination?.domain != null" @@ -331,6 +372,7 @@ }, { "append": { + "tag": "append_related_user_c036eec2", "field": "related.user", "value": "{{{user.name}}}", "if": "ctx.user?.name != null" @@ -338,6 +380,7 @@ }, { "set": { + "tag": "set_network_direction_cb1e3125", "field": "network.direction", "value": "{{{network.direction}}}bound", "if": "ctx.network?.direction != null && ctx.network?.direction =~ /^(in|out)$/" @@ -345,6 +388,7 @@ }, { "remove": { + "tag": "remove_a82e20f2", "field": [ "_tmp" ], @@ -353,11 +397,21 @@ }, { "script": { + "tag": "script_a7f2c062", "lang": "painless", "description": "This script processor iterates over the whole document to remove fields with null values.", "source": "void handleMap(Map map) {\n for (def x : map.values()) {\n if (x instanceof Map) {\n handleMap(x);\n } else if (x instanceof List) {\n handleList(x);\n }\n }\n map.values().removeIf(v -> v == null || (v instanceof String && v == \"-\"));\n}\nvoid handleList(List list) {\n for (def x : list) {\n if (x instanceof Map) {\n handleMap(x);\n } else if (x instanceof List) {\n handleList(x);\n }\n }\n}\nhandleMap(ctx);\n" } }, + { + "append": { + "tag": "append_preserve_original_event_on_error", + "field": "tags", + "value": "preserve_original_event", + "allow_duplicates": false, + "if": "ctx.error?.message != null" + } + }, { "pipeline": { "name": "global@custom", @@ -405,7 +459,14 @@ { "append": { "field": "error.message", - "value": "{{{ _ingest.on_failure_message }}}" + "value": "Processor '{{{ _ingest.on_failure_processor_type }}}' {{#_ingest.on_failure_processor_tag}}with tag '{{{ _ingest.on_failure_processor_tag }}}' {{/_ingest.on_failure_processor_tag}}in pipeline '{{{ _ingest.pipeline }}}' failed with message '{{{ _ingest.on_failure_message }}}'" + } + }, + { + "append": { + "field": "tags", + "value": "preserve_original_event", + "allow_duplicates": false } } ] diff --git a/salt/elasticsearch/files/ingest/logs-pfsense.log-1.23.1-suricata b/salt/elasticsearch/files/ingest/logs-pfsense.log-1.25.1-suricata similarity index 100% rename from salt/elasticsearch/files/ingest/logs-pfsense.log-1.23.1-suricata rename to salt/elasticsearch/files/ingest/logs-pfsense.log-1.25.1-suricata diff --git a/salt/kibana/tools/sbin_jinja/so-kibana-space-defaults b/salt/kibana/tools/sbin_jinja/so-kibana-space-defaults index fcb80e606..d0447f514 100755 --- a/salt/kibana/tools/sbin_jinja/so-kibana-space-defaults +++ b/salt/kibana/tools/sbin_jinja/so-kibana-space-defaults @@ -9,5 +9,5 @@ SESSIONCOOKIE=$(curl -K /opt/so/conf/elasticsearch/curl.config -c - -X GET http: # Disable certain Features from showing up in the Kibana UI echo echo "Setting up default Kibana Space:" -curl -K /opt/so/conf/elasticsearch/curl.config -b "sid=$SESSIONCOOKIE" -L -X PUT "localhost:5601/api/spaces/space/default" -H 'kbn-xsrf: true' -H 'Content-Type: application/json' -d' {"id":"default","name":"Default","disabledFeatures":["ml","enterpriseSearch","logs","infrastructure","apm","uptime","monitoring","stackAlerts","actions","securitySolutionCasesV3","inventory","dataQuality","searchSynonyms","enterpriseSearchApplications","enterpriseSearchAnalytics","securitySolutionTimeline","securitySolutionNotes","entityManager"]} ' >> /opt/so/log/kibana/misc.log +curl -K /opt/so/conf/elasticsearch/curl.config -b "sid=$SESSIONCOOKIE" -L -X PUT "localhost:5601/api/spaces/space/default" -H 'kbn-xsrf: true' -H 'Content-Type: application/json' -d' {"id":"default","name":"Default","disabledFeatures":["ml","enterpriseSearch","logs","infrastructure","apm","uptime","monitoring","stackAlerts","actions","securitySolutionCasesV3","inventory","dataQuality","searchSynonyms","searchQueryRules","enterpriseSearchApplications","enterpriseSearchAnalytics","securitySolutionTimeline","securitySolutionNotes","securitySolutionRulesV1","entityManager","streams","cloudConnect","slo"]} ' >> /opt/so/log/kibana/misc.log echo From 51a3c04c3d945cd1ffb02bf38017ca194a62c6cd Mon Sep 17 00:00:00 2001 From: reyesj2 <94730068+reyesj2@users.noreply.github.com> Date: Mon, 6 Apr 2026 17:35:08 -0500 Subject: [PATCH 003/110] foxtrot version --- VERSION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/VERSION b/VERSION index fd2a01863..0e0d1ae9a 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -3.1.0 +3.0.0-foxtrot \ No newline at end of file From dd56e7f1aca5b4528c5050e090b1f73599d83ae7 Mon Sep 17 00:00:00 2001 From: reyesj2 <94730068+reyesj2@users.noreply.github.com> Date: Tue, 7 Apr 2026 11:08:10 -0500 Subject: [PATCH 004/110] filestream integration policy updates --- .../elastic-agent-monitor.json | 17 ++++++++++------- .../grid-nodes_general/hydra-logs.json | 14 ++++++++++---- .../grid-nodes_general/idh-logs.json | 14 ++++++++++---- .../grid-nodes_general/import-evtx-logs.json | 14 ++++++++++---- .../import-suricata-logs.json | 14 ++++++++++---- .../grid-nodes_general/rita-logs.json | 14 ++++++++++---- .../grid-nodes_general/so-ip-mappings.json | 14 ++++++++++---- .../grid-nodes_general/soc-auth-sync-logs.json | 14 ++++++++++---- .../grid-nodes_general/soc-detections-logs.json | 14 ++++++++++---- .../grid-nodes_general/soc-salt-relay-logs.json | 14 ++++++++++---- .../grid-nodes_general/soc-sensoroni-logs.json | 14 ++++++++++---- .../grid-nodes_general/soc-server-logs.json | 14 ++++++++++---- .../grid-nodes_general/strelka-logs.json | 14 ++++++++++---- .../grid-nodes_general/suricata-logs.json | 14 ++++++++++---- 14 files changed, 140 insertions(+), 59 deletions(-) diff --git a/salt/elasticfleet/files/integrations/grid-nodes_general/elastic-agent-monitor.json b/salt/elasticfleet/files/integrations/grid-nodes_general/elastic-agent-monitor.json index 0be40a3d3..3eec63d26 100644 --- a/salt/elasticfleet/files/integrations/grid-nodes_general/elastic-agent-monitor.json +++ b/salt/elasticfleet/files/integrations/grid-nodes_general/elastic-agent-monitor.json @@ -6,21 +6,23 @@ "name": "agent-monitor", "namespace": "", "description": "", + "policy_id": "so-grid-nodes_general", "policy_ids": [ "so-grid-nodes_general" ], - "output_id": null, "vars": {}, "inputs": { "filestream-filestream": { "enabled": true, "streams": { - "filestream.generic": { + "filestream.filestream": { "enabled": true, "vars": { "paths": [ "/opt/so/log/agents/agent-monitor.log" ], + "compression_gzip": false, + "use_logs_stream": false, "data_stream.dataset": "agentmonitor", "pipeline": "elasticagent.monitor", "parsers": "", @@ -34,15 +36,16 @@ "ignore_older": "72h", "clean_inactive": -1, "harvester_limit": 0, - "fingerprint": true, + "fingerprint": false, "fingerprint_offset": 0, - "fingerprint_length": 64, - "file_identity_native": false, + "file_identity_native": true, "exclude_lines": [], - "include_lines": [] + "include_lines": [], + "delete_enabled": false } } } } - } + }, + "force": true } diff --git a/salt/elasticfleet/files/integrations/grid-nodes_general/hydra-logs.json b/salt/elasticfleet/files/integrations/grid-nodes_general/hydra-logs.json index a4f944ba5..5dcd3012d 100644 --- a/salt/elasticfleet/files/integrations/grid-nodes_general/hydra-logs.json +++ b/salt/elasticfleet/files/integrations/grid-nodes_general/hydra-logs.json @@ -4,19 +4,25 @@ "version": "" }, "name": "hydra-logs", + "namespace": "so", "description": "Hydra logs", "policy_id": "so-grid-nodes_general", - "namespace": "so", + "policy_ids": [ + "so-grid-nodes_general" + ], + "vars": {}, "inputs": { "filestream-filestream": { "enabled": true, "streams": { - "filestream.generic": { + "filestream.filestream": { "enabled": true, "vars": { "paths": [ "/opt/so/log/hydra/hydra.log" ], + "compression_gzip": false, + "use_logs_stream": false, "data_stream.dataset": "hydra", "pipeline": "hydra", "parsers": "#- ndjson:\n# target: \"\"\n# message_key: msg\n#- multiline:\n# type: count\n# count_lines: 3\n", @@ -34,10 +40,10 @@ "harvester_limit": 0, "fingerprint": false, "fingerprint_offset": 0, - "fingerprint_length": "64", "file_identity_native": true, "exclude_lines": [], - "include_lines": [] + "include_lines": [], + "delete_enabled": false } } } diff --git a/salt/elasticfleet/files/integrations/grid-nodes_general/idh-logs.json b/salt/elasticfleet/files/integrations/grid-nodes_general/idh-logs.json index fef9c57fb..afaf77f0c 100644 --- a/salt/elasticfleet/files/integrations/grid-nodes_general/idh-logs.json +++ b/salt/elasticfleet/files/integrations/grid-nodes_general/idh-logs.json @@ -4,19 +4,25 @@ "version": "" }, "name": "idh-logs", + "namespace": "so", "description": "IDH integration", "policy_id": "so-grid-nodes_general", - "namespace": "so", + "policy_ids": [ + "so-grid-nodes_general" + ], + "vars": {}, "inputs": { "filestream-filestream": { "enabled": true, "streams": { - "filestream.generic": { + "filestream.filestream": { "enabled": true, "vars": { "paths": [ "/nsm/idh/opencanary.log" ], + "compression_gzip": false, + "use_logs_stream": false, "data_stream.dataset": "idh", "pipeline": "common", "parsers": "#- ndjson:\n# target: \"\"\n# message_key: msg\n#- multiline:\n# type: count\n# count_lines: 3\n", @@ -31,10 +37,10 @@ "harvester_limit": 0, "fingerprint": false, "fingerprint_offset": 0, - "fingerprint_length": "64", "file_identity_native": true, "exclude_lines": [], - "include_lines": [] + "include_lines": [], + "delete_enabled": false } } } diff --git a/salt/elasticfleet/files/integrations/grid-nodes_general/import-evtx-logs.json b/salt/elasticfleet/files/integrations/grid-nodes_general/import-evtx-logs.json index 3066303d9..0e42a0dfb 100644 --- a/salt/elasticfleet/files/integrations/grid-nodes_general/import-evtx-logs.json +++ b/salt/elasticfleet/files/integrations/grid-nodes_general/import-evtx-logs.json @@ -4,19 +4,25 @@ "version": "" }, "name": "import-evtx-logs", + "namespace": "so", "description": "Import Windows EVTX logs", "policy_id": "so-grid-nodes_general", - "namespace": "so", + "policy_ids": [ + "so-grid-nodes_general" + ], + "vars": {}, "inputs": { "filestream-filestream": { "enabled": true, "streams": { - "filestream.generic": { + "filestream.filestream": { "enabled": true, "vars": { "paths": [ "/nsm/import/*/evtx/*.json" ], + "compression_gzip": false, + "use_logs_stream": false, "data_stream.dataset": "import", "parsers": "#- ndjson:\n# target: \"\"\n# message_key: msg\n#- multiline:\n# type: count\n# count_lines: 3\n", "exclude_files": [ @@ -33,10 +39,10 @@ "harvester_limit": 0, "fingerprint": false, "fingerprint_offset": 0, - "fingerprint_length": "64", "file_identity_native": true, "exclude_lines": [], - "include_lines": [] + "include_lines": [], + "delete_enabled": false } } } diff --git a/salt/elasticfleet/files/integrations/grid-nodes_general/import-suricata-logs.json b/salt/elasticfleet/files/integrations/grid-nodes_general/import-suricata-logs.json index b8f3b0b29..3148b38e8 100644 --- a/salt/elasticfleet/files/integrations/grid-nodes_general/import-suricata-logs.json +++ b/salt/elasticfleet/files/integrations/grid-nodes_general/import-suricata-logs.json @@ -4,19 +4,25 @@ "version": "" }, "name": "import-suricata-logs", + "namespace": "so", "description": "Import Suricata logs", "policy_id": "so-grid-nodes_general", - "namespace": "so", + "policy_ids": [ + "so-grid-nodes_general" + ], + "vars": {}, "inputs": { "filestream-filestream": { "enabled": true, "streams": { - "filestream.generic": { + "filestream.filestream": { "enabled": true, "vars": { "paths": [ "/nsm/import/*/suricata/eve*.json" ], + "compression_gzip": false, + "use_logs_stream": false, "data_stream.dataset": "import", "pipeline": "suricata.common", "parsers": "#- ndjson:\n# target: \"\"\n# message_key: msg\n#- multiline:\n# type: count\n# count_lines: 3\n", @@ -32,10 +38,10 @@ "harvester_limit": 0, "fingerprint": false, "fingerprint_offset": 0, - "fingerprint_length": "64", "file_identity_native": true, "exclude_lines": [], - "include_lines": [] + "include_lines": [], + "delete_enabled": false } } } diff --git a/salt/elasticfleet/files/integrations/grid-nodes_general/rita-logs.json b/salt/elasticfleet/files/integrations/grid-nodes_general/rita-logs.json index 70259c3cf..f807c3b70 100644 --- a/salt/elasticfleet/files/integrations/grid-nodes_general/rita-logs.json +++ b/salt/elasticfleet/files/integrations/grid-nodes_general/rita-logs.json @@ -4,14 +4,18 @@ "version": "" }, "name": "rita-logs", + "namespace": "so", "description": "RITA Logs", "policy_id": "so-grid-nodes_general", - "namespace": "so", + "policy_ids": [ + "so-grid-nodes_general" + ], + "vars": {}, "inputs": { "filestream-filestream": { "enabled": true, "streams": { - "filestream.generic": { + "filestream.filestream": { "enabled": true, "vars": { "paths": [ @@ -19,6 +23,8 @@ "/nsm/rita/exploded-dns.csv", "/nsm/rita/long-connections.csv" ], + "compression_gzip": false, + "use_logs_stream": false, "data_stream.dataset": "rita", "parsers": "#- ndjson:\n# target: \"\"\n# message_key: msg\n#- multiline:\n# type: count\n# count_lines: 3\n", "exclude_files": [ @@ -33,10 +39,10 @@ "harvester_limit": 0, "fingerprint": false, "fingerprint_offset": 0, - "fingerprint_length": "64", "file_identity_native": true, "exclude_lines": [], - "include_lines": [] + "include_lines": [], + "delete_enabled": false } } } diff --git a/salt/elasticfleet/files/integrations/grid-nodes_general/so-ip-mappings.json b/salt/elasticfleet/files/integrations/grid-nodes_general/so-ip-mappings.json index a14e63559..24ed188f2 100644 --- a/salt/elasticfleet/files/integrations/grid-nodes_general/so-ip-mappings.json +++ b/salt/elasticfleet/files/integrations/grid-nodes_general/so-ip-mappings.json @@ -4,19 +4,25 @@ "version": "" }, "name": "so-ip-mappings", + "namespace": "so", "description": "IP Description mappings", "policy_id": "so-grid-nodes_general", - "namespace": "so", + "policy_ids": [ + "so-grid-nodes_general" + ], + "vars": {}, "inputs": { "filestream-filestream": { "enabled": true, "streams": { - "filestream.generic": { + "filestream.filestream": { "enabled": true, "vars": { "paths": [ "/nsm/custom-mappings/ip-descriptions.csv" ], + "compression_gzip": false, + "use_logs_stream": false, "data_stream.dataset": "hostnamemappings", "parsers": "#- ndjson:\n# target: \"\"\n# message_key: msg\n#- multiline:\n# type: count\n# count_lines: 3\n", "exclude_files": [ @@ -32,10 +38,10 @@ "harvester_limit": 0, "fingerprint": false, "fingerprint_offset": 0, - "fingerprint_length": "64", "file_identity_native": true, "exclude_lines": [], - "include_lines": [] + "include_lines": [], + "delete_enabled": false } } } diff --git a/salt/elasticfleet/files/integrations/grid-nodes_general/soc-auth-sync-logs.json b/salt/elasticfleet/files/integrations/grid-nodes_general/soc-auth-sync-logs.json index f4fd38e9d..c04b738d3 100644 --- a/salt/elasticfleet/files/integrations/grid-nodes_general/soc-auth-sync-logs.json +++ b/salt/elasticfleet/files/integrations/grid-nodes_general/soc-auth-sync-logs.json @@ -4,19 +4,25 @@ "version": "" }, "name": "soc-auth-sync-logs", + "namespace": "so", "description": "Security Onion - Elastic Auth Sync - Logs", "policy_id": "so-grid-nodes_general", - "namespace": "so", + "policy_ids": [ + "so-grid-nodes_general" + ], + "vars": {}, "inputs": { "filestream-filestream": { "enabled": true, "streams": { - "filestream.generic": { + "filestream.filestream": { "enabled": true, "vars": { "paths": [ "/opt/so/log/soc/sync.log" ], + "compression_gzip": false, + "use_logs_stream": false, "data_stream.dataset": "soc", "pipeline": "common", "parsers": "#- ndjson:\n# target: \"\"\n# message_key: msg\n#- multiline:\n# type: count\n# count_lines: 3\n", @@ -31,10 +37,10 @@ "harvester_limit": 0, "fingerprint": false, "fingerprint_offset": 0, - "fingerprint_length": "64", "file_identity_native": true, "exclude_lines": [], - "include_lines": [] + "include_lines": [], + "delete_enabled": false } } } diff --git a/salt/elasticfleet/files/integrations/grid-nodes_general/soc-detections-logs.json b/salt/elasticfleet/files/integrations/grid-nodes_general/soc-detections-logs.json index f1bdbc922..9d7812e42 100644 --- a/salt/elasticfleet/files/integrations/grid-nodes_general/soc-detections-logs.json +++ b/salt/elasticfleet/files/integrations/grid-nodes_general/soc-detections-logs.json @@ -4,20 +4,26 @@ "version": "" }, "name": "soc-detections-logs", + "namespace": "so", "description": "Security Onion Console - Detections Logs", "policy_id": "so-grid-nodes_general", - "namespace": "so", + "policy_ids": [ + "so-grid-nodes_general" + ], + "vars": {}, "inputs": { "filestream-filestream": { "enabled": true, "streams": { - "filestream.generic": { + "filestream.filestream": { "enabled": true, "vars": { "paths": [ "/opt/so/log/soc/detections_runtime-status_sigma.log", "/opt/so/log/soc/detections_runtime-status_yara.log" ], + "compression_gzip": false, + "use_logs_stream": false, "data_stream.dataset": "soc", "pipeline": "common", "parsers": "#- ndjson:\n# target: \"\"\n# message_key: msg\n#- multiline:\n# type: count\n# count_lines: 3\n", @@ -35,10 +41,10 @@ "harvester_limit": 0, "fingerprint": false, "fingerprint_offset": 0, - "fingerprint_length": "64", "file_identity_native": true, "exclude_lines": [], - "include_lines": [] + "include_lines": [], + "delete_enabled": false } } } diff --git a/salt/elasticfleet/files/integrations/grid-nodes_general/soc-salt-relay-logs.json b/salt/elasticfleet/files/integrations/grid-nodes_general/soc-salt-relay-logs.json index cb08d5b12..d1fa8b630 100644 --- a/salt/elasticfleet/files/integrations/grid-nodes_general/soc-salt-relay-logs.json +++ b/salt/elasticfleet/files/integrations/grid-nodes_general/soc-salt-relay-logs.json @@ -4,19 +4,25 @@ "version": "" }, "name": "soc-salt-relay-logs", + "namespace": "so", "description": "Security Onion - Salt Relay - Logs", "policy_id": "so-grid-nodes_general", - "namespace": "so", + "policy_ids": [ + "so-grid-nodes_general" + ], + "vars": {}, "inputs": { "filestream-filestream": { "enabled": true, "streams": { - "filestream.generic": { + "filestream.filestream": { "enabled": true, "vars": { "paths": [ "/opt/so/log/soc/salt-relay.log" ], + "compression_gzip": false, + "use_logs_stream": false, "data_stream.dataset": "soc", "pipeline": "common", "parsers": "#- ndjson:\n# target: \"\"\n# message_key: msg\n#- multiline:\n# type: count\n# count_lines: 3\n", @@ -33,10 +39,10 @@ "harvester_limit": 0, "fingerprint": false, "fingerprint_offset": 0, - "fingerprint_length": "64", "file_identity_native": true, "exclude_lines": [], - "include_lines": [] + "include_lines": [], + "delete_enabled": false } } } diff --git a/salt/elasticfleet/files/integrations/grid-nodes_general/soc-sensoroni-logs.json b/salt/elasticfleet/files/integrations/grid-nodes_general/soc-sensoroni-logs.json index 11e686c3d..467544c9d 100644 --- a/salt/elasticfleet/files/integrations/grid-nodes_general/soc-sensoroni-logs.json +++ b/salt/elasticfleet/files/integrations/grid-nodes_general/soc-sensoroni-logs.json @@ -4,19 +4,25 @@ "version": "" }, "name": "soc-sensoroni-logs", + "namespace": "so", "description": "Security Onion - Sensoroni - Logs", "policy_id": "so-grid-nodes_general", - "namespace": "so", + "policy_ids": [ + "so-grid-nodes_general" + ], + "vars": {}, "inputs": { "filestream-filestream": { "enabled": true, "streams": { - "filestream.generic": { + "filestream.filestream": { "enabled": true, "vars": { "paths": [ "/opt/so/log/sensoroni/sensoroni.log" ], + "compression_gzip": false, + "use_logs_stream": false, "data_stream.dataset": "soc", "pipeline": "common", "parsers": "#- ndjson:\n# target: \"\"\n# message_key: msg\n#- multiline:\n# type: count\n# count_lines: 3\n", @@ -31,10 +37,10 @@ "harvester_limit": 0, "fingerprint": false, "fingerprint_offset": 0, - "fingerprint_length": "64", "file_identity_native": true, "exclude_lines": [], - "include_lines": [] + "include_lines": [], + "delete_enabled": false } } } diff --git a/salt/elasticfleet/files/integrations/grid-nodes_general/soc-server-logs.json b/salt/elasticfleet/files/integrations/grid-nodes_general/soc-server-logs.json index decb6b22a..37eb02ab1 100644 --- a/salt/elasticfleet/files/integrations/grid-nodes_general/soc-server-logs.json +++ b/salt/elasticfleet/files/integrations/grid-nodes_general/soc-server-logs.json @@ -4,19 +4,25 @@ "version": "" }, "name": "soc-server-logs", + "namespace": "so", "description": "Security Onion Console Logs", "policy_id": "so-grid-nodes_general", - "namespace": "so", + "policy_ids": [ + "so-grid-nodes_general" + ], + "vars": {}, "inputs": { "filestream-filestream": { "enabled": true, "streams": { - "filestream.generic": { + "filestream.filestream": { "enabled": true, "vars": { "paths": [ "/opt/so/log/soc/sensoroni-server.log" ], + "compression_gzip": false, + "use_logs_stream": false, "data_stream.dataset": "soc", "pipeline": "common", "parsers": "#- ndjson:\n# target: \"\"\n# message_key: msg\n#- multiline:\n# type: count\n# count_lines: 3\n", @@ -33,10 +39,10 @@ "harvester_limit": 0, "fingerprint": false, "fingerprint_offset": 0, - "fingerprint_length": "64", "file_identity_native": true, "exclude_lines": [], - "include_lines": [] + "include_lines": [], + "delete_enabled": false } } } diff --git a/salt/elasticfleet/files/integrations/grid-nodes_general/strelka-logs.json b/salt/elasticfleet/files/integrations/grid-nodes_general/strelka-logs.json index 1f0203a91..3091baf44 100644 --- a/salt/elasticfleet/files/integrations/grid-nodes_general/strelka-logs.json +++ b/salt/elasticfleet/files/integrations/grid-nodes_general/strelka-logs.json @@ -4,19 +4,25 @@ "version": "" }, "name": "strelka-logs", + "namespace": "so", "description": "Strelka Logs", "policy_id": "so-grid-nodes_general", - "namespace": "so", + "policy_ids": [ + "so-grid-nodes_general" + ], + "vars": {}, "inputs": { "filestream-filestream": { "enabled": true, "streams": { - "filestream.generic": { + "filestream.filestream": { "enabled": true, "vars": { "paths": [ "/nsm/strelka/log/strelka.log" ], + "compression_gzip": false, + "use_logs_stream": false, "data_stream.dataset": "strelka", "pipeline": "strelka.file", "parsers": "#- ndjson:\n# target: \"\"\n# message_key: msg\n#- multiline:\n# type: count\n# count_lines: 3\n", @@ -31,10 +37,10 @@ "harvester_limit": 0, "fingerprint": false, "fingerprint_offset": 0, - "fingerprint_length": "64", "file_identity_native": true, "exclude_lines": [], - "include_lines": [] + "include_lines": [], + "delete_enabled": false } } } diff --git a/salt/elasticfleet/files/integrations/grid-nodes_general/suricata-logs.json b/salt/elasticfleet/files/integrations/grid-nodes_general/suricata-logs.json index 26dae5225..bb5cfd2c3 100644 --- a/salt/elasticfleet/files/integrations/grid-nodes_general/suricata-logs.json +++ b/salt/elasticfleet/files/integrations/grid-nodes_general/suricata-logs.json @@ -4,19 +4,25 @@ "version": "" }, "name": "suricata-logs", + "namespace": "so", "description": "Suricata integration", "policy_id": "so-grid-nodes_general", - "namespace": "so", + "policy_ids": [ + "so-grid-nodes_general" + ], + "vars": {}, "inputs": { "filestream-filestream": { "enabled": true, "streams": { - "filestream.generic": { + "filestream.filestream": { "enabled": true, "vars": { "paths": [ "/nsm/suricata/eve*.json" ], + "compression_gzip": false, + "use_logs_stream": false, "data_stream.dataset": "suricata", "pipeline": "suricata.common", "parsers": "#- ndjson:\n# target: \"\"\n# message_key: msg\n#- multiline:\n# type: count\n# count_lines: 3\n", @@ -31,10 +37,10 @@ "harvester_limit": 0, "fingerprint": false, "fingerprint_offset": 0, - "fingerprint_length": "64", "file_identity_native": true, "exclude_lines": [], - "include_lines": [] + "include_lines": [], + "delete_enabled": false } } } From 868cd1187431b0f99a74e0f35fd6526f6c70eeee Mon Sep 17 00:00:00 2001 From: Mike Reeves Date: Wed, 8 Apr 2026 10:58:52 -0400 Subject: [PATCH 005/110] Add so-postgres Salt states and integration wiring Phase 1 of the PostgreSQL central data platform: - Salt states: init, enabled, disabled, config, ssl, auth, sostatus - TLS via SO CA-signed certs with postgresql.conf template - Two-tier auth: postgres superuser + so_postgres application user - Firewall restricts port 5432 to manager-only (HA-ready) - Wired into top.sls, pillar/top.sls, allowed_states, firewall containers map, docker defaults, CA signing policies, and setup scripts for all manager-type roles --- pillar/top.sls | 20 ++++++ salt/allowed_states.map.jinja | 1 + salt/ca/files/signing_policies.conf | 14 ++++ salt/docker/defaults.yaml | 8 +++ salt/firewall/containers.map.jinja | 3 + salt/firewall/defaults.yaml | 9 +++ salt/postgres/auth.sls | 35 +++++++++ salt/postgres/config.sls | 63 ++++++++++++++++ salt/postgres/defaults.yaml | 14 ++++ salt/postgres/disabled.sls | 27 +++++++ salt/postgres/enabled.sls | 88 +++++++++++++++++++++++ salt/postgres/files/init-users.sh | 15 ++++ salt/postgres/files/postgresql.conf.jinja | 8 +++ salt/postgres/init.sls | 13 ++++ salt/postgres/map.jinja | 7 ++ salt/postgres/sostatus.sls | 21 ++++++ salt/postgres/ssl.sls | 54 ++++++++++++++ salt/top.sls | 5 ++ setup/so-functions | 13 +++- setup/so-variables | 6 ++ 20 files changed, 422 insertions(+), 2 deletions(-) create mode 100644 salt/postgres/auth.sls create mode 100644 salt/postgres/config.sls create mode 100644 salt/postgres/defaults.yaml create mode 100644 salt/postgres/disabled.sls create mode 100644 salt/postgres/enabled.sls create mode 100644 salt/postgres/files/init-users.sh create mode 100644 salt/postgres/files/postgresql.conf.jinja create mode 100644 salt/postgres/init.sls create mode 100644 salt/postgres/map.jinja create mode 100644 salt/postgres/sostatus.sls create mode 100644 salt/postgres/ssl.sls diff --git a/pillar/top.sls b/pillar/top.sls index 6cdc4808a..af18bee09 100644 --- a/pillar/top.sls +++ b/pillar/top.sls @@ -38,6 +38,9 @@ base: {% if salt['file.file_exists']('/opt/so/saltstack/local/pillar/elasticsearch/auth.sls') %} - elasticsearch.auth {% endif %} + {% if salt['file.file_exists']('/opt/so/saltstack/local/pillar/postgres/auth.sls') %} + - postgres.auth + {% endif %} {% if salt['file.file_exists']('/opt/so/saltstack/local/pillar/kibana/secrets.sls') %} - kibana.secrets {% endif %} @@ -60,6 +63,8 @@ base: - redis.adv_redis - influxdb.soc_influxdb - influxdb.adv_influxdb + - postgres.soc_postgres + - postgres.adv_postgres - elasticsearch.nodes - elasticsearch.soc_elasticsearch - elasticsearch.adv_elasticsearch @@ -101,6 +106,9 @@ base: {% if salt['file.file_exists']('/opt/so/saltstack/local/pillar/elasticsearch/auth.sls') %} - elasticsearch.auth {% endif %} + {% if salt['file.file_exists']('/opt/so/saltstack/local/pillar/postgres/auth.sls') %} + - postgres.auth + {% endif %} {% if salt['file.file_exists']('/opt/so/saltstack/local/pillar/kibana/secrets.sls') %} - kibana.secrets {% endif %} @@ -126,6 +134,8 @@ base: - redis.adv_redis - influxdb.soc_influxdb - influxdb.adv_influxdb + - postgres.soc_postgres + - postgres.adv_postgres - backup.soc_backup - backup.adv_backup - zeek.soc_zeek @@ -146,6 +156,9 @@ base: {% if salt['file.file_exists']('/opt/so/saltstack/local/pillar/elasticsearch/auth.sls') %} - elasticsearch.auth {% endif %} + {% if salt['file.file_exists']('/opt/so/saltstack/local/pillar/postgres/auth.sls') %} + - postgres.auth + {% endif %} {% if salt['file.file_exists']('/opt/so/saltstack/local/pillar/kibana/secrets.sls') %} - kibana.secrets {% endif %} @@ -160,6 +173,8 @@ base: - redis.adv_redis - influxdb.soc_influxdb - influxdb.adv_influxdb + - postgres.soc_postgres + - postgres.adv_postgres - elasticsearch.nodes - elasticsearch.soc_elasticsearch - elasticsearch.adv_elasticsearch @@ -260,6 +275,9 @@ base: {% if salt['file.file_exists']('/opt/so/saltstack/local/pillar/elasticsearch/auth.sls') %} - elasticsearch.auth {% endif %} + {% if salt['file.file_exists']('/opt/so/saltstack/local/pillar/postgres/auth.sls') %} + - postgres.auth + {% endif %} {% if salt['file.file_exists']('/opt/so/saltstack/local/pillar/kibana/secrets.sls') %} - kibana.secrets {% endif %} @@ -285,6 +303,8 @@ base: - redis.adv_redis - influxdb.soc_influxdb - influxdb.adv_influxdb + - postgres.soc_postgres + - postgres.adv_postgres - zeek.soc_zeek - zeek.adv_zeek - bpf.soc_bpf diff --git a/salt/allowed_states.map.jinja b/salt/allowed_states.map.jinja index 1fac0f0e3..2fb61a664 100644 --- a/salt/allowed_states.map.jinja +++ b/salt/allowed_states.map.jinja @@ -29,6 +29,7 @@ 'manager', 'nginx', 'influxdb', + 'postgres', 'soc', 'kratos', 'hydra', diff --git a/salt/ca/files/signing_policies.conf b/salt/ca/files/signing_policies.conf index 4fc04aacc..5424d7b37 100644 --- a/salt/ca/files/signing_policies.conf +++ b/salt/ca/files/signing_policies.conf @@ -54,6 +54,20 @@ x509_signing_policies: - extendedKeyUsage: serverAuth - days_valid: 820 - copypath: /etc/pki/issued_certs/ + postgres: + - minions: '*' + - signing_private_key: /etc/pki/ca.key + - signing_cert: /etc/pki/ca.crt + - C: US + - ST: Utah + - L: Salt Lake City + - basicConstraints: "critical CA:false" + - keyUsage: "critical keyEncipherment" + - subjectKeyIdentifier: hash + - authorityKeyIdentifier: keyid,issuer:always + - extendedKeyUsage: serverAuth + - days_valid: 820 + - copypath: /etc/pki/issued_certs/ elasticfleet: - minions: '*' - signing_private_key: /etc/pki/ca.key diff --git a/salt/docker/defaults.yaml b/salt/docker/defaults.yaml index 044ec98b0..900d2cf53 100644 --- a/salt/docker/defaults.yaml +++ b/salt/docker/defaults.yaml @@ -237,3 +237,11 @@ docker: extra_hosts: [] extra_env: [] ulimits: [] + 'so-postgres': + final_octet: 89 + port_bindings: + - 0.0.0.0:5432:5432 + custom_bind_mounts: [] + extra_hosts: [] + extra_env: [] + ulimits: [] diff --git a/salt/firewall/containers.map.jinja b/salt/firewall/containers.map.jinja index 2d1135e5f..b39ba2b31 100644 --- a/salt/firewall/containers.map.jinja +++ b/salt/firewall/containers.map.jinja @@ -11,6 +11,7 @@ 'so-kratos', 'so-hydra', 'so-nginx', + 'so-postgres', 'so-redis', 'so-soc', 'so-strelka-coordinator', @@ -34,6 +35,7 @@ 'so-hydra', 'so-logstash', 'so-nginx', + 'so-postgres', 'so-redis', 'so-soc', 'so-strelka-coordinator', @@ -77,6 +79,7 @@ 'so-kratos', 'so-hydra', 'so-nginx', + 'so-postgres', 'so-soc' ] %} diff --git a/salt/firewall/defaults.yaml b/salt/firewall/defaults.yaml index a11492e88..e9c82401d 100644 --- a/salt/firewall/defaults.yaml +++ b/salt/firewall/defaults.yaml @@ -98,6 +98,10 @@ firewall: tcp: - 8086 udp: [] + postgres: + tcp: + - 5432 + udp: [] kafka_controller: tcp: - 9093 @@ -193,6 +197,7 @@ firewall: - kibana - redis - influxdb + - postgres - elasticsearch_rest - elasticsearch_node - localrules @@ -379,6 +384,7 @@ firewall: - kibana - redis - influxdb + - postgres - elasticsearch_rest - elasticsearch_node - docker_registry @@ -590,6 +596,7 @@ firewall: - kibana - redis - influxdb + - postgres - elasticsearch_rest - elasticsearch_node - docker_registry @@ -799,6 +806,7 @@ firewall: - kibana - redis - influxdb + - postgres - elasticsearch_rest - elasticsearch_node - docker_registry @@ -1011,6 +1019,7 @@ firewall: - kibana - redis - influxdb + - postgres - elasticsearch_rest - elasticsearch_node - docker_registry diff --git a/salt/postgres/auth.sls b/salt/postgres/auth.sls new file mode 100644 index 000000000..a19b2341a --- /dev/null +++ b/salt/postgres/auth.sls @@ -0,0 +1,35 @@ +# Copyright Security Onion Solutions LLC and/or licensed to Security Onion Solutions LLC under one +# or more contributor license agreements. Licensed under the Elastic License 2.0 as shown at +# https://securityonion.net/license; you may not use this file except in compliance with the +# Elastic License 2.0. + +{% from 'allowed_states.map.jinja' import allowed_states %} +{% if sls in allowed_states %} + + {% set DIGITS = "1234567890" %} + {% set LOWERCASE = "qwertyuiopasdfghjklzxcvbnm" %} + {% set UPPERCASE = "QWERTYUIOPASDFGHJKLZXCVBNM" %} + {% set SYMBOLS = "~!@#^&*()-_=+[]|;:,.<>?" %} + {% set CHARS = DIGITS~LOWERCASE~UPPERCASE~SYMBOLS %} + {% set so_postgres_user_pass = salt['pillar.get']('postgres:auth:users:so_postgres_user:pass', salt['random.get_str'](72, chars=CHARS)) %} + +postgres_auth_pillar: + file.managed: + - name: /opt/so/saltstack/local/pillar/postgres/auth.sls + - mode: 640 + - reload_pillar: True + - contents: | + postgres: + auth: + users: + so_postgres_user: + user: so_postgres + pass: "{{ so_postgres_user_pass }}" + - show_changes: False +{% else %} + +{{sls}}_state_not_allowed: + test.fail_without_changes: + - name: {{sls}}_state_not_allowed + +{% endif %} diff --git a/salt/postgres/config.sls b/salt/postgres/config.sls new file mode 100644 index 000000000..3502b6409 --- /dev/null +++ b/salt/postgres/config.sls @@ -0,0 +1,63 @@ +# Copyright Security Onion Solutions LLC and/or licensed to Security Onion Solutions LLC under one +# or more contributor license agreements. Licensed under the Elastic License 2.0 as shown at +# https://securityonion.net/license; you may not use this file except in compliance with the +# Elastic License 2.0. + +{% from 'allowed_states.map.jinja' import allowed_states %} +{% if sls.split('.')[0] in allowed_states %} +{% from 'postgres/map.jinja' import PGMERGED %} + +# Postgres Setup +postgresconfdir: + file.directory: + - name: /opt/so/conf/postgres + - user: 939 + - group: 939 + - makedirs: True + +postgresdatadir: + file.directory: + - name: /nsm/postgres + - user: 939 + - group: 939 + - makedirs: True + +postgreslogdir: + file.directory: + - name: /opt/so/log/postgres + - user: 939 + - group: 939 + - makedirs: True + +postgresinitdir: + file.directory: + - name: /opt/so/conf/postgres/init + - user: 939 + - group: 939 + - makedirs: True + +postgresinitusers: + file.managed: + - name: /opt/so/conf/postgres/init/init-users.sh + - source: salt://postgres/files/init-users.sh + - user: 939 + - group: 939 + - mode: 755 + +postgresconf: + file.managed: + - name: /opt/so/conf/postgres/postgresql.conf + - source: salt://postgres/files/postgresql.conf.jinja + - user: 939 + - group: 939 + - template: jinja + - defaults: + PGMERGED: {{ PGMERGED }} + +{% else %} + +{{sls}}_state_not_allowed: + test.fail_without_changes: + - name: {{sls}}_state_not_allowed + +{% endif %} diff --git a/salt/postgres/defaults.yaml b/salt/postgres/defaults.yaml new file mode 100644 index 000000000..9757f08f3 --- /dev/null +++ b/salt/postgres/defaults.yaml @@ -0,0 +1,14 @@ +postgres: + enabled: False + config: + listen_addresses: '*' + port: 5432 + max_connections: 100 + shared_buffers: 256MB + ssl: 'on' + ssl_cert_file: '/conf/postgres.crt' + ssl_key_file: '/conf/postgres.key' + ssl_ca_file: '/conf/ca.crt' + log_destination: 'stderr' + logging_collector: 'off' + log_min_messages: 'warning' diff --git a/salt/postgres/disabled.sls b/salt/postgres/disabled.sls new file mode 100644 index 000000000..56dc451b7 --- /dev/null +++ b/salt/postgres/disabled.sls @@ -0,0 +1,27 @@ +# Copyright Security Onion Solutions LLC and/or licensed to Security Onion Solutions LLC under one +# or more contributor license agreements. Licensed under the Elastic License 2.0 as shown at +# https://securityonion.net/license; you may not use this file except in compliance with the +# Elastic License 2.0. + +{% from 'allowed_states.map.jinja' import allowed_states %} +{% if sls.split('.')[0] in allowed_states %} + +include: + - postgres.sostatus + +so-postgres: + docker_container.absent: + - force: True + +so-postgres_so-status.disabled: + file.comment: + - name: /opt/so/conf/so-status/so-status.conf + - regex: ^so-postgres$ + +{% else %} + +{{sls}}_state_not_allowed: + test.fail_without_changes: + - name: {{sls}}_state_not_allowed + +{% endif %} diff --git a/salt/postgres/enabled.sls b/salt/postgres/enabled.sls new file mode 100644 index 000000000..c103245ea --- /dev/null +++ b/salt/postgres/enabled.sls @@ -0,0 +1,88 @@ +# Copyright Security Onion Solutions LLC and/or licensed to Security Onion Solutions LLC under one +# or more contributor license agreements. Licensed under the Elastic License 2.0 as shown at +# https://securityonion.net/license; you may not use this file except in compliance with the +# Elastic License 2.0. + +{% from 'allowed_states.map.jinja' import allowed_states %} +{% if sls.split('.')[0] in allowed_states %} +{% from 'vars/globals.map.jinja' import GLOBALS %} +{% from 'docker/docker.map.jinja' import DOCKERMERGED %} +{% set PASSWORD = salt['pillar.get']('secrets:postgres_pass') %} +{% set SO_POSTGRES_USER = salt['pillar.get']('postgres:auth:users:so_postgres_user:user', 'so_postgres') %} +{% set SO_POSTGRES_PASS = salt['pillar.get']('postgres:auth:users:so_postgres_user:pass', '') %} + +include: + - postgres.auth + - postgres.ssl + - postgres.config + - postgres.sostatus + +so-postgres: + docker_container.running: + - image: {{ GLOBALS.registry_host }}:5000/{{ GLOBALS.image_repo }}/so-postgres:{{ GLOBALS.so_version }} + - hostname: so-postgres + - networks: + - sobridge: + - ipv4_address: {{ DOCKERMERGED.containers['so-postgres'].ip }} + - port_bindings: + {% for BINDING in DOCKERMERGED.containers['so-postgres'].port_bindings %} + - {{ BINDING }} + {% endfor %} + - environment: + - POSTGRES_DB=securityonion + - POSTGRES_PASSWORD={{ PASSWORD }} + - SO_POSTGRES_USER={{ SO_POSTGRES_USER }} + - SO_POSTGRES_PASS={{ SO_POSTGRES_PASS }} + {% if DOCKERMERGED.containers['so-postgres'].extra_env %} + {% for XTRAENV in DOCKERMERGED.containers['so-postgres'].extra_env %} + - {{ XTRAENV }} + {% endfor %} + {% endif %} + - binds: + - /opt/so/log/postgres/:/log:rw + - /nsm/postgres:/var/lib/postgresql/data:rw + - /opt/so/conf/postgres/postgresql.conf:/conf/postgresql.conf:ro + - /opt/so/conf/postgres/init/init-users.sh:/docker-entrypoint-initdb.d/init-users.sh:ro + - /etc/pki/postgres.crt:/conf/postgres.crt:ro + - /etc/pki/postgres.key:/conf/postgres.key:ro + - /etc/pki/tls/certs/intca.crt:/conf/ca.crt:ro + {% if DOCKERMERGED.containers['so-postgres'].custom_bind_mounts %} + {% for BIND in DOCKERMERGED.containers['so-postgres'].custom_bind_mounts %} + - {{ BIND }} + {% endfor %} + {% endif %} + {% if DOCKERMERGED.containers['so-postgres'].extra_hosts %} + - extra_hosts: + {% for XTRAHOST in DOCKERMERGED.containers['so-postgres'].extra_hosts %} + - {{ XTRAHOST }} + {% endfor %} + {% endif %} + {% if DOCKERMERGED.containers['so-postgres'].ulimits %} + - ulimits: + {% for ULIMIT in DOCKERMERGED.containers['so-postgres'].ulimits %} + - {{ ULIMIT.name }}={{ ULIMIT.soft }}:{{ ULIMIT.hard }} + {% endfor %} + {% endif %} + - watch: + - file: postgresconf + - file: postgresinitusers + - x509: postgres_crt + - x509: postgres_key + - require: + - file: postgresconf + - file: postgresinitusers + - x509: postgres_crt + - x509: postgres_key + +delete_so-postgres_so-status.disabled: + file.uncomment: + - name: /opt/so/conf/so-status/so-status.conf + - regex: ^so-postgres$ + +{% else %} + +{{sls}}_state_not_allowed: + test.fail_without_changes: + - name: {{sls}}_state_not_allowed + +{% endif %} diff --git a/salt/postgres/files/init-users.sh b/salt/postgres/files/init-users.sh new file mode 100644 index 000000000..6fa7e43ac --- /dev/null +++ b/salt/postgres/files/init-users.sh @@ -0,0 +1,15 @@ +#!/bin/bash +set -e + +# Create application user for SOC platform access +# This script runs on first database initialization only +psql -v ON_ERROR_STOP=1 --username "$POSTGRES_USER" --dbname "$POSTGRES_DB" <<-EOSQL + DO \$\$ + BEGIN + IF NOT EXISTS (SELECT FROM pg_catalog.pg_roles WHERE rolname = '$SO_POSTGRES_USER') THEN + CREATE ROLE "$SO_POSTGRES_USER" WITH LOGIN PASSWORD '$SO_POSTGRES_PASS'; + END IF; + END + \$\$; + GRANT ALL PRIVILEGES ON DATABASE "$POSTGRES_DB" TO "$SO_POSTGRES_USER"; +EOSQL diff --git a/salt/postgres/files/postgresql.conf.jinja b/salt/postgres/files/postgresql.conf.jinja new file mode 100644 index 000000000..6833b3dbc --- /dev/null +++ b/salt/postgres/files/postgresql.conf.jinja @@ -0,0 +1,8 @@ +{# Copyright Security Onion Solutions LLC and/or licensed to Security Onion Solutions LLC under one + or more contributor license agreements. Licensed under the Elastic License 2.0 as shown at + https://securityonion.net/license; you may not use this file except in compliance with the + Elastic License 2.0. #} + +{% for key, value in PGMERGED.config.items() %} +{{ key }} = '{{ value }}' +{% endfor %} diff --git a/salt/postgres/init.sls b/salt/postgres/init.sls new file mode 100644 index 000000000..2e3c9ffb7 --- /dev/null +++ b/salt/postgres/init.sls @@ -0,0 +1,13 @@ +# Copyright Security Onion Solutions LLC and/or licensed to Security Onion Solutions LLC under one +# or more contributor license agreements. Licensed under the Elastic License 2.0 as shown at +# https://securityonion.net/license; you may not use this file except in compliance with the +# Elastic License 2.0. + +{% from 'postgres/map.jinja' import PGMERGED %} + +include: +{% if PGMERGED.enabled %} + - postgres.enabled +{% else %} + - postgres.disabled +{% endif %} diff --git a/salt/postgres/map.jinja b/salt/postgres/map.jinja new file mode 100644 index 000000000..5250ca8fd --- /dev/null +++ b/salt/postgres/map.jinja @@ -0,0 +1,7 @@ +{# Copyright Security Onion Solutions LLC and/or licensed to Security Onion Solutions LLC under one + or more contributor license agreements. Licensed under the Elastic License 2.0 as shown at + https://securityonion.net/license; you may not use this file except in compliance with the + Elastic License 2.0. #} + +{% import_yaml 'postgres/defaults.yaml' as PGDEFAULTS %} +{% set PGMERGED = salt['pillar.get']('postgres', PGDEFAULTS.postgres, merge=True) %} diff --git a/salt/postgres/sostatus.sls b/salt/postgres/sostatus.sls new file mode 100644 index 000000000..4a61af3d1 --- /dev/null +++ b/salt/postgres/sostatus.sls @@ -0,0 +1,21 @@ +# Copyright Security Onion Solutions LLC and/or licensed to Security Onion Solutions LLC under one +# or more contributor license agreements. Licensed under the Elastic License 2.0 as shown at +# https://securityonion.net/license; you may not use this file except in compliance with the +# Elastic License 2.0. + +{% from 'allowed_states.map.jinja' import allowed_states %} +{% if sls.split('.')[0] in allowed_states %} + +append_so-postgres_so-status.conf: + file.append: + - name: /opt/so/conf/so-status/so-status.conf + - text: so-postgres + - unless: grep -q so-postgres /opt/so/conf/so-status/so-status.conf + +{% else %} + +{{sls}}_state_not_allowed: + test.fail_without_changes: + - name: {{sls}}_state_not_allowed + +{% endif %} diff --git a/salt/postgres/ssl.sls b/salt/postgres/ssl.sls new file mode 100644 index 000000000..ebd3ccbc9 --- /dev/null +++ b/salt/postgres/ssl.sls @@ -0,0 +1,54 @@ +# Copyright Security Onion Solutions LLC and/or licensed to Security Onion Solutions LLC under one +# or more contributor license agreements. Licensed under the Elastic License 2.0 as shown at +# https://securityonion.net/license; you may not use this file except in compliance with the +# Elastic License 2.0. + +{% from 'allowed_states.map.jinja' import allowed_states %} +{% if sls.split('.')[0] in allowed_states %} +{% from 'vars/globals.map.jinja' import GLOBALS %} +{% from 'ca/map.jinja' import CA %} + +postgres_key: + x509.private_key_managed: + - name: /etc/pki/postgres.key + - keysize: 4096 + - backup: True + - new: True + {% if salt['file.file_exists']('/etc/pki/postgres.key') -%} + - prereq: + - x509: /etc/pki/postgres.crt + {%- endif %} + - retry: + attempts: 5 + interval: 30 + +postgres_crt: + x509.certificate_managed: + - name: /etc/pki/postgres.crt + - ca_server: {{ CA.server }} + - subjectAltName: DNS:{{ GLOBALS.hostname }}, IP:{{ GLOBALS.node_ip }} + - signing_policy: postgres + - private_key: /etc/pki/postgres.key + - CN: {{ GLOBALS.hostname }} + - days_remaining: 7 + - days_valid: 820 + - backup: True + - timeout: 30 + - retry: + attempts: 5 + interval: 30 + +postgresKeyperms: + file.managed: + - replace: False + - name: /etc/pki/postgres.key + - mode: 640 + - group: 939 + +{% else %} + +{{sls}}_state_not_allowed: + test.fail_without_changes: + - name: {{sls}}_state_not_allowed + +{% endif %} diff --git a/salt/top.sls b/salt/top.sls index c7c6aa65d..ff789e89d 100644 --- a/salt/top.sls +++ b/salt/top.sls @@ -68,6 +68,7 @@ base: - backup.config_backup - nginx - influxdb + - postgres - soc - kratos - hydra @@ -95,6 +96,7 @@ base: - backup.config_backup - nginx - influxdb + - postgres - soc - kratos - hydra @@ -123,6 +125,7 @@ base: - registry - nginx - influxdb + - postgres - strelka.manager - soc - kratos @@ -153,6 +156,7 @@ base: - registry - nginx - influxdb + - postgres - strelka.manager - soc - kratos @@ -181,6 +185,7 @@ base: - manager - nginx - influxdb + - postgres - strelka.manager - soc - kratos diff --git a/setup/so-functions b/setup/so-functions index bf95ea9d8..8be08a27c 100755 --- a/setup/so-functions +++ b/setup/so-functions @@ -821,6 +821,7 @@ create_manager_pillars() { soc_pillar idh_pillar influxdb_pillar + postgres_pillar logrotate_pillar patch_pillar nginx_pillar @@ -1053,6 +1054,7 @@ generate_passwords(){ HYDRAKEY=$(get_random_value) HYDRASALT=$(get_random_value) REDISPASS=$(get_random_value) + POSTGRESPASS=$(get_random_value) SOCSRVKEY=$(get_random_value 64) IMPORTPASS=$(get_random_value) } @@ -1355,6 +1357,12 @@ influxdb_pillar() { " token: $INFLUXTOKEN" > $local_salt_dir/pillar/influxdb/token.sls } +postgres_pillar() { + title "Create the postgres pillar file" + touch $adv_postgres_pillar_file + touch $postgres_pillar_file +} + make_some_dirs() { mkdir -p /nsm mkdir -p "$default_salt_dir" @@ -1364,7 +1372,7 @@ make_some_dirs() { mkdir -p $local_salt_dir/salt/firewall/portgroups mkdir -p $local_salt_dir/salt/firewall/ports - for THEDIR in bpf elasticsearch ntp firewall redis backup influxdb strelka sensoroni soc docker zeek suricata nginx telegraf logstash soc manager kratos hydra idh elastalert stig global kafka versionlock hypervisor vm; do + for THEDIR in bpf elasticsearch ntp firewall redis backup influxdb postgres strelka sensoroni soc docker zeek suricata nginx telegraf logstash soc manager kratos hydra idh elastalert stig global kafka versionlock hypervisor vm; do mkdir -p $local_salt_dir/pillar/$THEDIR touch $local_salt_dir/pillar/$THEDIR/adv_$THEDIR.sls touch $local_salt_dir/pillar/$THEDIR/soc_$THEDIR.sls @@ -1832,7 +1840,8 @@ secrets_pillar(){ printf '%s\n'\ "secrets:"\ " import_pass: $IMPORTPASS"\ - " influx_pass: $INFLUXPASS" > $local_salt_dir/pillar/secrets.sls + " influx_pass: $INFLUXPASS"\ + " postgres_pass: $POSTGRESPASS" > $local_salt_dir/pillar/secrets.sls fi } diff --git a/setup/so-variables b/setup/so-variables index a0d7aadc1..975debf20 100644 --- a/setup/so-variables +++ b/setup/so-variables @@ -202,6 +202,12 @@ export influxdb_pillar_file adv_influxdb_pillar_file="$local_salt_dir/pillar/influxdb/adv_influxdb.sls" export adv_influxdb_pillar_file +postgres_pillar_file="$local_salt_dir/pillar/postgres/soc_postgres.sls" +export postgres_pillar_file + +adv_postgres_pillar_file="$local_salt_dir/pillar/postgres/adv_postgres.sls" +export adv_postgres_pillar_file + logrotate_pillar_file="$local_salt_dir/pillar/logrotate/soc_logrotate.sls" export logrotate_pillar_file From 762e73faf59c3e55beaf1d85a32582286256c172 Mon Sep 17 00:00:00 2001 From: Mike Reeves Date: Thu, 9 Apr 2026 09:55:42 -0400 Subject: [PATCH 006/110] Add so-postgres host management scripts - so-postgres-manage: wraps docker exec for psql operations (sql, sqlfile, shell, dblist, userlist) - so-postgres-start/stop/restart: standard container lifecycle - Scripts installed to /usr/sbin via file.recurse in config.sls --- salt/postgres/config.sls | 8 ++ salt/postgres/tools/sbin/so-postgres-manage | 80 ++++++++++++++++++++ salt/postgres/tools/sbin/so-postgres-restart | 10 +++ salt/postgres/tools/sbin/so-postgres-start | 10 +++ salt/postgres/tools/sbin/so-postgres-stop | 10 +++ 5 files changed, 118 insertions(+) create mode 100644 salt/postgres/tools/sbin/so-postgres-manage create mode 100644 salt/postgres/tools/sbin/so-postgres-restart create mode 100644 salt/postgres/tools/sbin/so-postgres-start create mode 100644 salt/postgres/tools/sbin/so-postgres-stop diff --git a/salt/postgres/config.sls b/salt/postgres/config.sls index 3502b6409..25bcf6ad3 100644 --- a/salt/postgres/config.sls +++ b/salt/postgres/config.sls @@ -54,6 +54,14 @@ postgresconf: - defaults: PGMERGED: {{ PGMERGED }} +postgres_sbin: + file.recurse: + - name: /usr/sbin + - source: salt://postgres/tools/sbin + - user: 939 + - group: 939 + - file_mode: 755 + {% else %} {{sls}}_state_not_allowed: diff --git a/salt/postgres/tools/sbin/so-postgres-manage b/salt/postgres/tools/sbin/so-postgres-manage new file mode 100644 index 000000000..3729d5c0d --- /dev/null +++ b/salt/postgres/tools/sbin/so-postgres-manage @@ -0,0 +1,80 @@ +#!/bin/bash + +# Copyright Security Onion Solutions LLC and/or licensed to Security Onion Solutions LLC under one +# or more contributor license agreements. Licensed under the Elastic License 2.0 as shown at +# https://securityonion.net/license; you may not use this file except in compliance with the +# Elastic License 2.0. + +. /usr/sbin/so-common + +usage() { + echo "Usage: $0 [args]" + echo "" + echo "Supported Operations:" + echo " sql Execute a SQL command, requires: " + echo " sqlfile Execute a SQL file, requires: " + echo " shell Open an interactive psql shell" + echo " dblist List databases" + echo " userlist List database roles" + echo "" + exit 1 +} + +if [ $# -lt 1 ]; then + usage +fi + +# Check for prerequisites +if [ "$(id -u)" -ne 0 ]; then + echo "This script must be run using sudo!" + exit 1 +fi + +COMMAND=$(basename $0) +OP=$1 +shift + +set -eo pipefail + +log() { + echo -e "$(date) | $COMMAND | $@" >&2 +} + +so_psql() { + docker exec so-postgres psql -U postgres -d securityonion "$@" +} + +case "$OP" in + + sql) + [ $# -lt 1 ] && usage + so_psql -c "$1" + ;; + + sqlfile) + [ $# -ne 1 ] && usage + if [ ! -f "$1" ]; then + log "File not found: $1" + exit 1 + fi + docker cp "$1" so-postgres:/tmp/sqlfile.sql + docker exec so-postgres psql -U postgres -d securityonion -f /tmp/sqlfile.sql + docker exec so-postgres rm -f /tmp/sqlfile.sql + ;; + + shell) + docker exec -it so-postgres psql -U postgres -d securityonion + ;; + + dblist) + so_psql -c "\l" + ;; + + userlist) + so_psql -c "\du" + ;; + + *) + usage + ;; +esac diff --git a/salt/postgres/tools/sbin/so-postgres-restart b/salt/postgres/tools/sbin/so-postgres-restart new file mode 100644 index 000000000..8e3e516dd --- /dev/null +++ b/salt/postgres/tools/sbin/so-postgres-restart @@ -0,0 +1,10 @@ +#!/bin/bash + +# Copyright Security Onion Solutions LLC and/or licensed to Security Onion Solutions LLC under one +# or more contributor license agreements. Licensed under the Elastic License 2.0 as shown at +# https://securityonion.net/license; you may not use this file except in compliance with the +# Elastic License 2.0. + +. /usr/sbin/so-common + +/usr/sbin/so-restart postgres $1 diff --git a/salt/postgres/tools/sbin/so-postgres-start b/salt/postgres/tools/sbin/so-postgres-start new file mode 100644 index 000000000..0893eaa2d --- /dev/null +++ b/salt/postgres/tools/sbin/so-postgres-start @@ -0,0 +1,10 @@ +#!/bin/bash + +# Copyright Security Onion Solutions LLC and/or licensed to Security Onion Solutions LLC under one +# or more contributor license agreements. Licensed under the Elastic License 2.0 as shown at +# https://securityonion.net/license; you may not use this file except in compliance with the +# Elastic License 2.0. + +. /usr/sbin/so-common + +/usr/sbin/so-start postgres $1 diff --git a/salt/postgres/tools/sbin/so-postgres-stop b/salt/postgres/tools/sbin/so-postgres-stop new file mode 100644 index 000000000..6fd0d9165 --- /dev/null +++ b/salt/postgres/tools/sbin/so-postgres-stop @@ -0,0 +1,10 @@ +#!/bin/bash + +# Copyright Security Onion Solutions LLC and/or licensed to Security Onion Solutions LLC under one +# or more contributor license agreements. Licensed under the Elastic License 2.0 as shown at +# https://securityonion.net/license; you may not use this file except in compliance with the +# Elastic License 2.0. + +. /usr/sbin/so-common + +/usr/sbin/so-stop postgres $1 From 358a2e6d3faf1e9120092a932730d442945a1b09 Mon Sep 17 00:00:00 2001 From: Mike Reeves Date: Thu, 9 Apr 2026 10:02:41 -0400 Subject: [PATCH 007/110] Add so-postgres to container image pull list Add to both the import and default manager container lists so the image gets downloaded during installation. --- salt/common/tools/sbin/so-image-common | 2 ++ 1 file changed, 2 insertions(+) diff --git a/salt/common/tools/sbin/so-image-common b/salt/common/tools/sbin/so-image-common index 5ce2da241..d797ae3cb 100755 --- a/salt/common/tools/sbin/so-image-common +++ b/salt/common/tools/sbin/so-image-common @@ -31,6 +31,7 @@ container_list() { "so-hydra" "so-nginx" "so-pcaptools" + "so-postgres" "so-soc" "so-suricata" "so-telegraf" @@ -55,6 +56,7 @@ container_list() { "so-logstash" "so-nginx" "so-pcaptools" + "so-postgres" "so-redis" "so-soc" "so-strelka-backend" From 61bdfb1a4b9b4dd96ed718046f65d2e7bb54cc4b Mon Sep 17 00:00:00 2001 From: Mike Reeves Date: Thu, 9 Apr 2026 10:29:10 -0400 Subject: [PATCH 008/110] Add daily PostgreSQL database backup - pg_dumpall piped through gzip, stored in /nsm/backup/ - Runs daily at 00:05 (4 minutes after config backup) - 7-day retention matching existing config backup policy - Skips gracefully if container isn't running --- salt/backup/config_backup.sls | 20 +++++++++++++ salt/backup/tools/sbin/so-postgres-backup | 36 +++++++++++++++++++++++ 2 files changed, 56 insertions(+) create mode 100644 salt/backup/tools/sbin/so-postgres-backup diff --git a/salt/backup/config_backup.sls b/salt/backup/config_backup.sls index a09c67b1b..c8e342463 100644 --- a/salt/backup/config_backup.sls +++ b/salt/backup/config_backup.sls @@ -32,3 +32,23 @@ so_config_backup: - daymonth: '*' - month: '*' - dayweek: '*' + +postgres_backup_script: + file.managed: + - name: /usr/sbin/so-postgres-backup + - user: root + - group: root + - mode: 755 + - source: salt://backup/tools/sbin/so-postgres-backup + +# Add postgres database backup +so_postgres_backup: + cron.present: + - name: /usr/sbin/so-postgres-backup > /dev/null 2>&1 + - identifier: so_postgres_backup + - user: root + - minute: '5' + - hour: '0' + - daymonth: '*' + - month: '*' + - dayweek: '*' diff --git a/salt/backup/tools/sbin/so-postgres-backup b/salt/backup/tools/sbin/so-postgres-backup new file mode 100644 index 000000000..c577f7b59 --- /dev/null +++ b/salt/backup/tools/sbin/so-postgres-backup @@ -0,0 +1,36 @@ +#!/bin/bash +# +# Copyright Security Onion Solutions LLC and/or licensed to Security Onion Solutions LLC under one +# or more contributor license agreements. Licensed under the Elastic License 2.0 as shown at +# https://securityonion.net/license; you may not use this file except in compliance with the +# Elastic License 2.0. + +. /usr/sbin/so-common + +TODAY=$(date '+%Y_%m_%d') +BACKUPDIR=/nsm/backup +BACKUPFILE="$BACKUPDIR/so-postgres-backup-$TODAY.sql.gz" +MAXBACKUPS=7 + +mkdir -p $BACKUPDIR + +# Skip if already backed up today +if [ -f "$BACKUPFILE" ]; then + exit 0 +fi + +# Skip if container isn't running +if ! docker ps --format '{{.Names}}' | grep -q '^so-postgres$'; then + exit 0 +fi + +# Dump all databases and roles, compress +docker exec so-postgres pg_dumpall -U postgres | gzip > "$BACKUPFILE" + +# Retention cleanup +NUMBACKUPS=$(find $BACKUPDIR -type f -name "so-postgres-backup*" | wc -l) +while [ "$NUMBACKUPS" -gt "$MAXBACKUPS" ]; do + OLDEST=$(find $BACKUPDIR -type f -name "so-postgres-backup*" -printf '%T+ %p\n' | sort | head -n 1 | awk -F" " '{print $2}') + rm -f "$OLDEST" + NUMBACKUPS=$(find $BACKUPDIR -type f -name "so-postgres-backup*" | wc -l) +done From 46e38d39bb7376d41d9c40c4acb3ebb837b5be1f Mon Sep 17 00:00:00 2001 From: Mike Reeves Date: Thu, 9 Apr 2026 12:23:47 -0400 Subject: [PATCH 009/110] Enable postgres by default Safe because postgres states are only applied to manager-type nodes via top.sls and allowed_states.map.jinja. --- salt/postgres/defaults.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/salt/postgres/defaults.yaml b/salt/postgres/defaults.yaml index 9757f08f3..c24a07f56 100644 --- a/salt/postgres/defaults.yaml +++ b/salt/postgres/defaults.yaml @@ -1,5 +1,5 @@ postgres: - enabled: False + enabled: True config: listen_addresses: '*' port: 5432 From b87af8ea3d938f9119dadd8dd34d5bd7d94294bd Mon Sep 17 00:00:00 2001 From: Mike Reeves Date: Thu, 9 Apr 2026 12:39:46 -0400 Subject: [PATCH 010/110] Add postgres.auth to allowed_states Matches the elasticsearch.auth pattern where auth states use the full sls path check and are explicitly listed. --- salt/allowed_states.map.jinja | 1 + 1 file changed, 1 insertion(+) diff --git a/salt/allowed_states.map.jinja b/salt/allowed_states.map.jinja index 2fb61a664..a1bd2784a 100644 --- a/salt/allowed_states.map.jinja +++ b/salt/allowed_states.map.jinja @@ -30,6 +30,7 @@ 'nginx', 'influxdb', 'postgres', + 'postgres.auth', 'soc', 'kratos', 'hydra', From f0b67a415accd492e1e75b233c0dfe7a9f8c2fbe Mon Sep 17 00:00:00 2001 From: reyesj2 <94730068+reyesj2@users.noreply.github.com> Date: Thu, 9 Apr 2026 12:40:55 -0500 Subject: [PATCH 011/110] more filestream integration policy updates --- .../grid-nodes_general/import-zeek-logs.json | 11 +++++++++-- .../grid-nodes_general/kratos-logs.json | 14 ++++++++++---- .../grid-nodes_general/zeek-logs.json | 12 +++++++++--- 3 files changed, 28 insertions(+), 9 deletions(-) diff --git a/salt/elasticfleet/files/integrations-dynamic/grid-nodes_general/import-zeek-logs.json b/salt/elasticfleet/files/integrations-dynamic/grid-nodes_general/import-zeek-logs.json index ac03f3c1d..c1fd7f147 100644 --- a/salt/elasticfleet/files/integrations-dynamic/grid-nodes_general/import-zeek-logs.json +++ b/salt/elasticfleet/files/integrations-dynamic/grid-nodes_general/import-zeek-logs.json @@ -9,16 +9,22 @@ "namespace": "so", "description": "Zeek Import logs", "policy_id": "so-grid-nodes_general", + "policy_ids": [ + "so-grid-nodes_general" + ], + "vars": {}, "inputs": { "filestream-filestream": { "enabled": true, "streams": { - "filestream.generic": { + "filestream.filestream": { "enabled": true, "vars": { "paths": [ "/nsm/import/*/zeek/logs/*.log" ], + "compression_gzip": false, + "use_logs_stream": false, "data_stream.dataset": "import", "pipeline": "", "parsers": "#- ndjson:\n# target: \"\"\n# message_key: msg\n#- multiline:\n# type: count\n# count_lines: 3\n", @@ -34,7 +40,8 @@ "fingerprint_length": "64", "file_identity_native": true, "exclude_lines": [], - "include_lines": [] + "include_lines": [], + "delete_enabled": false } } } diff --git a/salt/elasticfleet/files/integrations-dynamic/grid-nodes_general/kratos-logs.json b/salt/elasticfleet/files/integrations-dynamic/grid-nodes_general/kratos-logs.json index 545588521..83d153439 100644 --- a/salt/elasticfleet/files/integrations-dynamic/grid-nodes_general/kratos-logs.json +++ b/salt/elasticfleet/files/integrations-dynamic/grid-nodes_general/kratos-logs.json @@ -15,19 +15,25 @@ "version": "" }, "name": "kratos-logs", + "namespace": "so", "description": "Kratos logs", "policy_id": "so-grid-nodes_general", - "namespace": "so", + "policy_ids": [ + "so-grid-nodes_general" + ], + "vars": {}, "inputs": { "filestream-filestream": { "enabled": true, "streams": { - "filestream.generic": { + "filestream.filestream": { "enabled": true, "vars": { "paths": [ "/opt/so/log/kratos/kratos.log" ], + "compression_gzip": false, + "use_logs_stream": false, "data_stream.dataset": "kratos", "pipeline": "kratos", "parsers": "#- ndjson:\n# target: \"\"\n# message_key: msg\n#- multiline:\n# type: count\n# count_lines: 3\n", @@ -48,10 +54,10 @@ "harvester_limit": 0, "fingerprint": false, "fingerprint_offset": 0, - "fingerprint_length": "64", "file_identity_native": true, "exclude_lines": [], - "include_lines": [] + "include_lines": [], + "delete_enabled": false } } } diff --git a/salt/elasticfleet/files/integrations-dynamic/grid-nodes_general/zeek-logs.json b/salt/elasticfleet/files/integrations-dynamic/grid-nodes_general/zeek-logs.json index 4af2b2921..9797b9e75 100644 --- a/salt/elasticfleet/files/integrations-dynamic/grid-nodes_general/zeek-logs.json +++ b/salt/elasticfleet/files/integrations-dynamic/grid-nodes_general/zeek-logs.json @@ -9,16 +9,22 @@ "namespace": "so", "description": "Zeek logs", "policy_id": "so-grid-nodes_general", + "policy_ids": [ + "so-grid-nodes_general" + ], + "vars": {}, "inputs": { "filestream-filestream": { "enabled": true, "streams": { - "filestream.generic": { + "filestream.filestream": { "enabled": true, "vars": { "paths": [ "/nsm/zeek/logs/current/*.log" ], + "compression_gzip": false, + "use_logs_stream": false, "data_stream.dataset": "zeek", "parsers": "#- ndjson:\n# target: \"\"\n# message_key: msg\n#- multiline:\n# type: count\n# count_lines: 3\n", "exclude_files": ["({%- endraw -%}{{ ELASTICFLEETMERGED.logging.zeek.excluded | join('|') }}{%- raw -%})(\\..+)?\\.log$"], @@ -30,10 +36,10 @@ "harvester_limit": 0, "fingerprint": false, "fingerprint_offset": 0, - "fingerprint_length": "64", "file_identity_native": true, "exclude_lines": [], - "include_lines": [] + "include_lines": [], + "delete_enabled": false } } } From 2dfa83dd7da1ec2a0cb87aef8a9cabdc42fc0161 Mon Sep 17 00:00:00 2001 From: Mike Reeves Date: Thu, 9 Apr 2026 14:09:32 -0400 Subject: [PATCH 012/110] Wire postgres credentials into SOC module config - Create vars/postgres.map.jinja for postgres auth globals - Add POSTGRES_GLOBALS to all manager-type role vars (manager, eval, standalone, managersearch, import) - Add postgres module config to soc/defaults.yaml - Inject so_postgres credentials from auth pillar into soc/defaults.map.jinja (conditional on auth pillar existing) --- salt/soc/defaults.map.jinja | 4 ++++ salt/soc/defaults.yaml | 8 ++++++++ salt/vars/eval.map.jinja | 2 ++ salt/vars/import.map.jinja | 2 ++ salt/vars/manager.map.jinja | 4 +++- salt/vars/managersearch.map.jinja | 4 +++- salt/vars/postgres.map.jinja | 16 ++++++++++++++++ salt/vars/standalone.map.jinja | 2 ++ 8 files changed, 40 insertions(+), 2 deletions(-) create mode 100644 salt/vars/postgres.map.jinja diff --git a/salt/soc/defaults.map.jinja b/salt/soc/defaults.map.jinja index 2821bb8e5..e823477b0 100644 --- a/salt/soc/defaults.map.jinja +++ b/salt/soc/defaults.map.jinja @@ -24,6 +24,10 @@ {% do SOCDEFAULTS.soc.config.server.modules.elastic.update({'username': GLOBALS.elasticsearch.auth.users.so_elastic_user.user, 'password': GLOBALS.elasticsearch.auth.users.so_elastic_user.pass}) %} +{% if GLOBALS.postgres is defined and GLOBALS.postgres.auth is defined %} +{% do SOCDEFAULTS.soc.config.server.modules.postgres.update({'username': GLOBALS.postgres.auth.users.so_postgres_user.user, 'password': GLOBALS.postgres.auth.users.so_postgres_user.pass}) %} +{% endif %} + {% do SOCDEFAULTS.soc.config.server.modules.influxdb.update({'hostUrl': 'https://' ~ GLOBALS.influxdb_host ~ ':8086'}) %} {% do SOCDEFAULTS.soc.config.server.modules.influxdb.update({'token': INFLUXDB_TOKEN}) %} {% for tool in SOCDEFAULTS.soc.config.server.client.tools %} diff --git a/salt/soc/defaults.yaml b/salt/soc/defaults.yaml index 0bde8f20e..2bdbbae37 100644 --- a/salt/soc/defaults.yaml +++ b/salt/soc/defaults.yaml @@ -1491,6 +1491,14 @@ soc: org: Security Onion bucket: telegraf/so_short_term verifyCert: false + postgres: + hostUrl: so-postgres + port: 5432 + username: + password: + dbname: securityonion + sslMode: require + assistantEnabled: true playbook: autoUpdateEnabled: true playbookImportFrequencySeconds: 86400 diff --git a/salt/vars/eval.map.jinja b/salt/vars/eval.map.jinja index 3c2e66a97..3cba33797 100644 --- a/salt/vars/eval.map.jinja +++ b/salt/vars/eval.map.jinja @@ -1,4 +1,5 @@ {% from 'vars/elasticsearch.map.jinja' import ELASTICSEARCH_GLOBALS %} +{% from 'vars/postgres.map.jinja' import POSTGRES_GLOBALS %} {% from 'vars/sensor.map.jinja' import SENSOR_GLOBALS %} {% set ROLE_GLOBALS = {} %} @@ -6,6 +7,7 @@ {% set EVAL_GLOBALS = [ ELASTICSEARCH_GLOBALS, + POSTGRES_GLOBALS, SENSOR_GLOBALS ] %} diff --git a/salt/vars/import.map.jinja b/salt/vars/import.map.jinja index f9dfa0c25..8dea3ad7d 100644 --- a/salt/vars/import.map.jinja +++ b/salt/vars/import.map.jinja @@ -1,4 +1,5 @@ {% from 'vars/elasticsearch.map.jinja' import ELASTICSEARCH_GLOBALS %} +{% from 'vars/postgres.map.jinja' import POSTGRES_GLOBALS %} {% from 'vars/sensor.map.jinja' import SENSOR_GLOBALS %} {% set ROLE_GLOBALS = {} %} @@ -6,6 +7,7 @@ {% set IMPORT_GLOBALS = [ ELASTICSEARCH_GLOBALS, + POSTGRES_GLOBALS, SENSOR_GLOBALS ] %} diff --git a/salt/vars/manager.map.jinja b/salt/vars/manager.map.jinja index c6b348341..009dd5607 100644 --- a/salt/vars/manager.map.jinja +++ b/salt/vars/manager.map.jinja @@ -1,12 +1,14 @@ {% from 'vars/elasticsearch.map.jinja' import ELASTICSEARCH_GLOBALS %} {% from 'vars/logstash.map.jinja' import LOGSTASH_GLOBALS %} +{% from 'vars/postgres.map.jinja' import POSTGRES_GLOBALS %} {% set ROLE_GLOBALS = {} %} {% set MANAGER_GLOBALS = [ ELASTICSEARCH_GLOBALS, - LOGSTASH_GLOBALS + LOGSTASH_GLOBALS, + POSTGRES_GLOBALS ] %} diff --git a/salt/vars/managersearch.map.jinja b/salt/vars/managersearch.map.jinja index c2a3d9628..369efe5a4 100644 --- a/salt/vars/managersearch.map.jinja +++ b/salt/vars/managersearch.map.jinja @@ -1,12 +1,14 @@ {% from 'vars/elasticsearch.map.jinja' import ELASTICSEARCH_GLOBALS %} {% from 'vars/logstash.map.jinja' import LOGSTASH_GLOBALS %} +{% from 'vars/postgres.map.jinja' import POSTGRES_GLOBALS %} {% set ROLE_GLOBALS = {} %} {% set MANAGERSEARCH_GLOBALS = [ ELASTICSEARCH_GLOBALS, - LOGSTASH_GLOBALS + LOGSTASH_GLOBALS, + POSTGRES_GLOBALS ] %} diff --git a/salt/vars/postgres.map.jinja b/salt/vars/postgres.map.jinja new file mode 100644 index 000000000..ce65d2d1f --- /dev/null +++ b/salt/vars/postgres.map.jinja @@ -0,0 +1,16 @@ +{# Copyright Security Onion Solutions LLC and/or licensed to Security Onion Solutions LLC under one + or more contributor license agreements. Licensed under the Elastic License 2.0 as shown at + https://securityonion.net/license; you may not use this file except in compliance with the + Elastic License 2.0. #} + +{% import 'vars/init.map.jinja' as INIT %} + +{% + set POSTGRES_GLOBALS = { + 'postgres': {} + } +%} + +{% if salt['file.file_exists']('/opt/so/saltstack/local/pillar/postgres/auth.sls') %} +{% do POSTGRES_GLOBALS.postgres.update({'auth': INIT.PILLAR.postgres.auth}) %} +{% endif %} diff --git a/salt/vars/standalone.map.jinja b/salt/vars/standalone.map.jinja index 0e49a327d..6488eb998 100644 --- a/salt/vars/standalone.map.jinja +++ b/salt/vars/standalone.map.jinja @@ -1,5 +1,6 @@ {% from 'vars/elasticsearch.map.jinja' import ELASTICSEARCH_GLOBALS %} {% from 'vars/logstash.map.jinja' import LOGSTASH_GLOBALS %} +{% from 'vars/postgres.map.jinja' import POSTGRES_GLOBALS %} {% from 'vars/sensor.map.jinja' import SENSOR_GLOBALS %} {% set ROLE_GLOBALS = {} %} @@ -8,6 +9,7 @@ [ ELASTICSEARCH_GLOBALS, LOGSTASH_GLOBALS, + POSTGRES_GLOBALS, SENSOR_GLOBALS ] %} From 89e49d0bf35dcff8202c27462b3c966601737af7 Mon Sep 17 00:00:00 2001 From: reyesj2 <94730068+reyesj2@users.noreply.github.com> Date: Thu, 9 Apr 2026 16:44:51 -0500 Subject: [PATCH 013/110] rework elasticsearch index template generation --- salt/elasticfleet/content-defaults.map.jinja | 123 ++++++++++++++++++ salt/elasticfleet/input-defaults.map.jinja | 123 ++++++++++++++++++ .../integration-defaults.map.jinja | 27 +--- .../tools/sbin/so-elastic-fleet-common | 28 +++- ...o-elastic-fleet-optional-integrations-load | 15 ++- salt/elasticsearch/config.sls | 7 + salt/elasticsearch/enabled.sls | 58 +++++---- salt/elasticsearch/template.map.jinja | 80 +++++++++--- 8 files changed, 392 insertions(+), 69 deletions(-) create mode 100644 salt/elasticfleet/content-defaults.map.jinja create mode 100644 salt/elasticfleet/input-defaults.map.jinja diff --git a/salt/elasticfleet/content-defaults.map.jinja b/salt/elasticfleet/content-defaults.map.jinja new file mode 100644 index 000000000..f4237d6d1 --- /dev/null +++ b/salt/elasticfleet/content-defaults.map.jinja @@ -0,0 +1,123 @@ +{# Copyright Security Onion Solutions LLC and/or licensed to Security Onion Solutions LLC under one + or more contributor license agreements. Licensed under the Elastic License 2.0; you may not use + this file except in compliance with the Elastic License 2.0. #} + + +{% import_json '/opt/so/state/esfleet_content_package_components.json' as ADDON_CONTENT_PACKAGE_COMPONENTS %} +{% import_json '/opt/so/state/esfleet_component_templates.json' as INSTALLED_COMPONENT_TEMPLATES %} +{% import_yaml 'elasticfleet/defaults.yaml' as ELASTICFLEETDEFAULTS %} + +{% set CORE_ESFLEET_PACKAGES = ELASTICFLEETDEFAULTS.get('elasticfleet', {}).get('packages', {}) %} +{% set ADDON_CONTENT_INTEGRATION_DEFAULTS = {} %} +{% set DEBUG_STUFF = {} %} + +{% for pkg in ADDON_CONTENT_PACKAGE_COMPONENTS %} +{% if pkg.name in CORE_ESFLEET_PACKAGES %} +{# skip core content packages #} +{% elif pkg.name not in CORE_ESFLEET_PACKAGES %} +{# generate defaults for each content package #} +{% if pkg.dataStreams is defined and pkg.dataStreams is not none and pkg.dataStreams | length > 0%} +{% for pattern in pkg.dataStreams %} +{# in ES 9.3.2 'input' type integrations no longer create default component templates and instead they wait for user input during 'integration' setup (fleet ui config) + title: generic is an artifact of that and is not in use #} +{% if pattern.title == "generic" %} +{% continue %} +{% endif %} +{% if "metrics-" in pattern.name %} +{% set integration_type = "metrics-" %} +{% elif "logs-" in pattern.name %} +{% set integration_type = "logs-" %} +{% else %} +{% set integration_type = "" %} +{% endif %} +{# on content integrations the component name is user defined at the time it is added to an agent policy #} +{% set component_name = pattern.title %} +{% set index_pattern = pattern.name %} +{# component_name_x maintains the functionality of merging local pillar changes with generated 'defaults' via SOC UI #} +{% set component_name_x = component_name.replace(".","_x_") %} +{# pillar overrides/merge expects the key names to follow the naming in elasticsearch/defaults.yaml eg. so-logs-1password_x_item_usages . The _x_ is replaced later on in elasticsearch/template.map.jinja #} +{% set integration_key = "so-" ~ integration_type ~ pkg.name + '_x_' ~ component_name_x %} +{# Default integration settings #} +{% set integration_defaults = { + "index_sorting": false, + "index_template": { + "composed_of": [integration_type ~ component_name ~ "@package", integration_type ~ component_name ~ "@custom", "so-fleet_integrations.ip_mappings-1", "so-fleet_globals-1", "so-fleet_agent_id_verification-1"], + "data_stream": { + "allow_custom_routing": false, + "hidden": false + }, + "ignore_missing_component_templates": [integration_type ~ component_name ~ "@custom"], + "index_patterns": [index_pattern], + "priority": 501, + "template": { + "settings": { + "index": { + "lifecycle": {"name": "so-" ~ integration_type ~ component_name ~ "-logs"}, + "number_of_replicas": 0 + } + } + } + }, + "policy": { + "phases": { + "cold": { + "actions": { + "allocate":{ + "number_of_replicas": "" + }, + "set_priority": {"priority": 0} + }, + "min_age": "60d" + }, + "delete": { + "actions": { + "delete": {} + }, + "min_age": "365d" + }, + "hot": { + "actions": { + "rollover": { + "max_age": "30d", + "max_primary_shard_size": "50gb" + }, + "forcemerge":{ + "max_num_segments": "" + }, + "shrink":{ + "max_primary_shard_size": "", + "method": "COUNT", + "number_of_shards": "" + }, + "set_priority": {"priority": 100} + }, + "min_age": "0ms" + }, + "warm": { + "actions": { + "allocate": { + "number_of_replicas": "" + }, + "forcemerge": { + "max_num_segments": "" + }, + "shrink":{ + "max_primary_shard_size": "", + "method": "COUNT", + "number_of_shards": "" + }, + "set_priority": {"priority": 50} + }, + "min_age": "30d" + } + } + } + } %} + + +{% do ADDON_CONTENT_INTEGRATION_DEFAULTS.update({integration_key: integration_defaults}) %} +{% endfor %} +{% else %} +{% endif %} +{% endif %} +{% endfor %} diff --git a/salt/elasticfleet/input-defaults.map.jinja b/salt/elasticfleet/input-defaults.map.jinja new file mode 100644 index 000000000..a02844330 --- /dev/null +++ b/salt/elasticfleet/input-defaults.map.jinja @@ -0,0 +1,123 @@ +{# Copyright Security Onion Solutions LLC and/or licensed to Security Onion Solutions LLC under one + or more contributor license agreements. Licensed under the Elastic License 2.0; you may not use + this file except in compliance with the Elastic License 2.0. #} + + +{% import_json '/opt/so/state/esfleet_input_package_components.json' as ADDON_INPUT_PACKAGE_COMPONENTS %} +{% import_json '/opt/so/state/esfleet_component_templates.json' as INSTALLED_COMPONENT_TEMPLATES %} +{% import_yaml 'elasticfleet/defaults.yaml' as ELASTICFLEETDEFAULTS %} + +{% set CORE_ESFLEET_PACKAGES = ELASTICFLEETDEFAULTS.get('elasticfleet', {}).get('packages', {}) %} +{% set ADDON_INPUT_INTEGRATION_DEFAULTS = {} %} +{% set DEBUG_STUFF = {} %} + +{% for pkg in ADDON_INPUT_PACKAGE_COMPONENTS %} +{% if pkg.name in CORE_ESFLEET_PACKAGES %} +{# skip core input packages #} +{% elif pkg.name not in CORE_ESFLEET_PACKAGES %} +{# generate defaults for each input package #} +{% if pkg.dataStreams is defined and pkg.dataStreams is not none and pkg.dataStreams | length > 0 %} +{% for pattern in pkg.dataStreams %} +{# in ES 9.3.2 'input' type integrations no longer create default component templates and instead they wait for user input during 'integration' setup (fleet ui config) + title: generic is an artifact of that and is not in use #} +{% if pattern.title == "generic" %} +{% continue %} +{% endif %} +{% if "metrics-" in pattern.name %} +{% set integration_type = "metrics-" %} +{% elif "logs-" in pattern.name %} +{% set integration_type = "logs-" %} +{% else %} +{% set integration_type = "" %} +{% endif %} +{# on input integrations the component name is user defined at the time it is added to an agent policy #} +{% set component_name = pattern.title %} +{% set index_pattern = pattern.name %} +{# component_name_x maintains the functionality of merging local pillar changes with generated 'defaults' via SOC UI #} +{% set component_name_x = component_name.replace(".","_x_") %} +{# pillar overrides/merge expects the key names to follow the naming in elasticsearch/defaults.yaml eg. so-logs-1password_x_item_usages . The _x_ is replaced later on in elasticsearch/template.map.jinja #} +{% set integration_key = "so-" ~ integration_type ~ pkg.name + '_x_' ~ component_name_x %} +{# Default integration settings #} +{% set integration_defaults = { + "index_sorting": false, + "index_template": { + "composed_of": [integration_type ~ component_name ~ "@package", integration_type ~ component_name ~ "@custom", "so-fleet_integrations.ip_mappings-1", "so-fleet_globals-1", "so-fleet_agent_id_verification-1"], + "data_stream": { + "allow_custom_routing": false, + "hidden": false + }, + "ignore_missing_component_templates": [integration_type ~ component_name ~ "@custom"], + "index_patterns": [index_pattern], + "priority": 501, + "template": { + "settings": { + "index": { + "lifecycle": {"name": "so-" ~ integration_type ~ component_name ~ "-logs"}, + "number_of_replicas": 0 + } + } + } + }, + "policy": { + "phases": { + "cold": { + "actions": { + "allocate":{ + "number_of_replicas": "" + }, + "set_priority": {"priority": 0} + }, + "min_age": "60d" + }, + "delete": { + "actions": { + "delete": {} + }, + "min_age": "365d" + }, + "hot": { + "actions": { + "rollover": { + "max_age": "30d", + "max_primary_shard_size": "50gb" + }, + "forcemerge":{ + "max_num_segments": "" + }, + "shrink":{ + "max_primary_shard_size": "", + "method": "COUNT", + "number_of_shards": "" + }, + "set_priority": {"priority": 100} + }, + "min_age": "0ms" + }, + "warm": { + "actions": { + "allocate": { + "number_of_replicas": "" + }, + "forcemerge": { + "max_num_segments": "" + }, + "shrink":{ + "max_primary_shard_size": "", + "method": "COUNT", + "number_of_shards": "" + }, + "set_priority": {"priority": 50} + }, + "min_age": "30d" + } + } + } + } %} + + +{% do ADDON_INPUT_INTEGRATION_DEFAULTS.update({integration_key: integration_defaults}) %} +{% do DEBUG_STUFF.update({integration_key: "Generating defaults for "+ pkg.name })%} +{% endfor %} +{% endif %} +{% endif %} +{% endfor %} diff --git a/salt/elasticfleet/integration-defaults.map.jinja b/salt/elasticfleet/integration-defaults.map.jinja index f85a95ec9..eeb85123a 100644 --- a/salt/elasticfleet/integration-defaults.map.jinja +++ b/salt/elasticfleet/integration-defaults.map.jinja @@ -59,8 +59,8 @@ {# skip core integrations #} {% elif pkg.name not in CORE_ESFLEET_PACKAGES %} {# generate defaults for each integration #} -{% if pkg.es_index_patterns is defined and pkg.es_index_patterns is not none %} -{% for pattern in pkg.es_index_patterns %} +{% if pkg.dataStreams is defined and pkg.dataStreams is not none and pkg.dataStreams | length > 0 %} +{% for pattern in pkg.dataStreams %} {% if "metrics-" in pattern.name %} {% set integration_type = "metrics-" %} {% elif "logs-" in pattern.name %} @@ -75,44 +75,27 @@ {% if component_name in WEIRD_INTEGRATIONS %} {% set component_name = WEIRD_INTEGRATIONS[component_name] %} {% endif %} - -{# create duplicate of component_name, so we can split generics from @custom component templates in the index template below and overwrite the default @package when needed - eg. having to replace unifiedlogs.generic@package with filestream.generic@package, but keep the ability to customize unifiedlogs.generic@custom and its ILM policy #} -{% set custom_component_name = component_name %} - -{# duplicate integration_type to assist with sometimes needing to overwrite component templates with 'logs-filestream.generic@package' (there is no metrics-filestream.generic@package) #} -{% set generic_integration_type = integration_type %} - {# component_name_x maintains the functionality of merging local pillar changes with generated 'defaults' via SOC UI #} {% set component_name_x = component_name.replace(".","_x_") %} {# pillar overrides/merge expects the key names to follow the naming in elasticsearch/defaults.yaml eg. so-logs-1password_x_item_usages . The _x_ is replaced later on in elasticsearch/template.map.jinja #} {% set integration_key = "so-" ~ integration_type ~ component_name_x %} -{# if its a .generic template make sure that a .generic@package for the integration exists. Else default to logs-filestream.generic@package #} -{% if ".generic" in component_name and integration_type ~ component_name ~ "@package" not in INSTALLED_COMPONENT_TEMPLATES %} -{# these generic templates by default are directed to index_pattern of 'logs-generic-*', overwrite that here to point to eg gcp_pubsub.generic-* #} -{% set index_pattern = integration_type ~ component_name ~ "-*" %} -{# includes use of .generic component template, but it doesn't exist in installed component templates. Redirect it to filestream.generic@package #} -{% set component_name = "filestream.generic" %} -{% set generic_integration_type = "logs-" %} -{% endif %} - {# Default integration settings #} {% set integration_defaults = { "index_sorting": false, "index_template": { - "composed_of": [generic_integration_type ~ component_name ~ "@package", integration_type ~ custom_component_name ~ "@custom", "so-fleet_integrations.ip_mappings-1", "so-fleet_globals-1", "so-fleet_agent_id_verification-1"], + "composed_of": [integration_type ~ component_name ~ "@package", integration_type ~ component_name ~ "@custom", "so-fleet_integrations.ip_mappings-1", "so-fleet_globals-1", "so-fleet_agent_id_verification-1"], "data_stream": { "allow_custom_routing": false, "hidden": false }, - "ignore_missing_component_templates": [integration_type ~ custom_component_name ~ "@custom"], + "ignore_missing_component_templates": [integration_type ~ component_name ~ "@custom"], "index_patterns": [index_pattern], "priority": 501, "template": { "settings": { "index": { - "lifecycle": {"name": "so-" ~ integration_type ~ custom_component_name ~ "-logs"}, + "lifecycle": {"name": "so-" ~ integration_type ~ component_name ~ "-logs"}, "number_of_replicas": 0 } } diff --git a/salt/elasticfleet/tools/sbin/so-elastic-fleet-common b/salt/elasticfleet/tools/sbin/so-elastic-fleet-common index 1a597b1db..92532082a 100644 --- a/salt/elasticfleet/tools/sbin/so-elastic-fleet-common +++ b/salt/elasticfleet/tools/sbin/so-elastic-fleet-common @@ -135,9 +135,33 @@ elastic_fleet_bulk_package_install() { fi } -elastic_fleet_installed_packages() { - if ! fleet_api "epm/packages/installed?perPage=500"; then +elastic_fleet_get_package_list_by_type() { + if ! output=$(fleet_api "epm/packages"); then return 1 + else + is_integration=$(jq '[.items[] | select(.type=="integration") | .name ]' <<< "$output") + is_input=$(jq '[.items[] | select(.type=="input") | .name ]' <<< "$output") + is_content=$(jq '[.items[] | select(.type=="content") | .name ]' <<< "$output") + jq -n --argjson is_integration "${is_integration:-[]}" \ + --argjson is_input "${is_input:-[]}" \ + --argjson is_content "${is_content:-[]}" \ + '{"integration": $is_integration,"input": $is_input, "content": $is_content}' + fi +} +elastic_fleet_installed_packages_components() { + package_type=${1,,} + if [[ "$package_type" != "integration" && "$package_type" != "input" && "$package_type" != "content" ]]; then + echo "Error: Invalid package type ${package_type}. Valid types are 'integration', 'input', or 'content'." + return 1 + fi + + packages_by_type=$(elastic_fleet_get_package_list_by_type) + packages=$(jq --arg package_type "$package_type" '.[$package_type]' <<< "$packages_by_type") + + if ! output=$(fleet_api "epm/packages/installed?perPage=500"); then + return 1 + else + jq -c --argjson packages "$packages" '[.items[] | select(.name | IN($packages[])) | {name: .name, dataStreams: .dataStreams}]' <<< "$output" fi } diff --git a/salt/elasticfleet/tools/sbin_jinja/so-elastic-fleet-optional-integrations-load b/salt/elasticfleet/tools/sbin_jinja/so-elastic-fleet-optional-integrations-load index 8c0f627ef..ab38b7065 100644 --- a/salt/elasticfleet/tools/sbin_jinja/so-elastic-fleet-optional-integrations-load +++ b/salt/elasticfleet/tools/sbin_jinja/so-elastic-fleet-optional-integrations-load @@ -18,7 +18,9 @@ INSTALLED_PACKAGE_LIST=/tmp/esfleet_installed_packages.json BULK_INSTALL_PACKAGE_LIST=/tmp/esfleet_bulk_install.json BULK_INSTALL_PACKAGE_TMP=/tmp/esfleet_bulk_install_tmp.json BULK_INSTALL_OUTPUT=/opt/so/state/esfleet_bulk_install_results.json -PACKAGE_COMPONENTS=/opt/so/state/esfleet_package_components.json +INTEGRATION_PACKAGE_COMPONENTS=/opt/so/state/esfleet_package_components.json +INPUT_PACKAGE_COMPONENTS=/opt/so/state/esfleet_input_package_components.json +CONTENT_PACKAGE_COMPONENTS=/opt/so/state/esfleet_content_package_components.json COMPONENT_TEMPLATES=/opt/so/state/esfleet_component_templates.json PENDING_UPDATE=false @@ -179,10 +181,13 @@ if [[ -f $STATE_FILE_SUCCESS ]]; then else echo "Elastic integrations don't appear to need installation/updating..." fi - # Write out file for generating index/component/ilm templates - if latest_installed_package_list=$(elastic_fleet_installed_packages); then - echo $latest_installed_package_list | jq '[.items[] | {name: .name, es_index_patterns: .dataStreams}]' > $PACKAGE_COMPONENTS - fi + # Write out file for generating index/component/ilm templates, keeping each package type separate + for package_type in "INTEGRATION" "INPUT" "CONTENT"; do + if latest_installed_package_list=$(elastic_fleet_installed_packages_components "$package_type"); then + outfile="${package_type}_PACKAGE_COMPONENTS" + echo $latest_installed_package_list > "${!outfile}" + fi + done if retry 3 1 "so-elasticsearch-query / --fail --output /dev/null"; then # Refresh installed component template list latest_component_templates_list=$(so-elasticsearch-query _component_template | jq '.component_templates[] | .name' | jq -s '.') diff --git a/salt/elasticsearch/config.sls b/salt/elasticsearch/config.sls index 41ef02164..ac9fa8f72 100644 --- a/salt/elasticsearch/config.sls +++ b/salt/elasticsearch/config.sls @@ -91,6 +91,13 @@ estemplatedir: - group: 939 - makedirs: True +esaddontemplatedir: + file.directory: + - name: /opt/so/conf/elasticsearch/templates/addon-index + - user: 930 + - group: 939 + - makedirs: True + esrolesdir: file.directory: - name: /opt/so/conf/elasticsearch/roles diff --git a/salt/elasticsearch/enabled.sls b/salt/elasticsearch/enabled.sls index 29ab80329..61b9bad01 100644 --- a/salt/elasticsearch/enabled.sls +++ b/salt/elasticsearch/enabled.sls @@ -10,8 +10,7 @@ {% from 'elasticsearch/config.map.jinja' import ELASTICSEARCH_NODES %} {% from 'elasticsearch/config.map.jinja' import ELASTICSEARCH_SEED_HOSTS %} {% from 'elasticsearch/config.map.jinja' import ELASTICSEARCHMERGED %} -{% set TEMPLATES = salt['pillar.get']('elasticsearch:templates', {}) %} -{% from 'elasticsearch/template.map.jinja' import ES_INDEX_SETTINGS %} +{% from 'elasticsearch/template.map.jinja' import ES_INDEX_SETTINGS, ALL_ADDON_SETTINGS, SO_MANAGED_INDICES %} include: - ca @@ -117,16 +116,29 @@ escomponenttemplates: - onchanges_in: - file: so-elasticsearch-templates-reload - show_changes: False - -# Auto-generate templates from defaults file + +# Clean up legacy and non-SO managed templates from the elasticsearch/templates/index/ directory +so_index_template_dir: + file.directory: + - name: /opt/so/conf/elasticsearch/templates/index + - clean: True + {%- if SO_MANAGED_INDICES %} + - require: + {%- for index in SO_MANAGED_INDICES %} + - file: so_index_template_{{index}} + {%- endfor %} + {%- endif %} + +# Auto-generate index templates for SO managed indices (directly defined in elasticsearch/defaults.yaml) +# These index templates are for the core SO datasets and are always required {% for index, settings in ES_INDEX_SETTINGS.items() %} - {% if settings.index_template is defined %} -es_index_template_{{index}}: +{% if settings.index_template is defined %} +so_index_template_{{index}}: file.managed: - name: /opt/so/conf/elasticsearch/templates/index/{{ index }}-template.json - source: salt://elasticsearch/base-template.json.jinja - defaults: - TEMPLATE_CONFIG: {{ settings.index_template }} + TEMPLATE_CONFIG: {{ settings.index_template }} - template: jinja - show_changes: False - onchanges_in: @@ -134,25 +146,23 @@ es_index_template_{{index}}: {% endif %} {% endfor %} -{% if TEMPLATES %} -# Sync custom templates to /opt/so/conf/elasticsearch/templates -{% for TEMPLATE in TEMPLATES %} -es_template_{{TEMPLATE.split('.')[0] | replace("/","_") }}: +# Auto-generate optional index templates for integration | input | content packages +# These index templates are not used by default (until user adds package to an agent policy). +# Pre-configured with standard defaults, and incorporated into SOC configuration for user customization. +{% for index,settings in ALL_ADDON_SETTINGS.items() %} +{% if settings.index_template is defined %} +addon_index_template_{{index}}: file.managed: - - source: salt://elasticsearch/templates/index/{{TEMPLATE}} -{% if 'jinja' in TEMPLATE.split('.')[-1] %} - - name: /opt/so/conf/elasticsearch/templates/index/{{TEMPLATE.split('/')[1] | replace(".jinja", "")}} + - name: /opt/so/conf/elasticsearch/templates/addon-index/{{ index }}-template.json + - source: salt://elasticsearch/base-template.json.jinja + - defaults: + TEMPLATE_CONFIG: {{ settings.index_template }} - template: jinja -{% else %} - - name: /opt/so/conf/elasticsearch/templates/index/{{TEMPLATE.split('/')[1]}} -{% endif %} - - user: 930 - - group: 939 - show_changes: False - onchanges_in: - - file: so-elasticsearch-templates-reload -{% endfor %} -{% endif %} + - file: addon-elasticsearch-templates-reload +{% endif %} +{% endfor %} {% if GLOBALS.role in GLOBALS.manager_roles %} so-es-cluster-settings: @@ -179,6 +189,10 @@ so-elasticsearch-templates-reload: file.absent: - name: /opt/so/state/estemplates.txt +addon-elasticsearch-templates-reload: + file.absent: + - name: /opt/so/state/addon_estemplates.txt + so-elasticsearch-templates: cmd.run: - name: /usr/sbin/so-elasticsearch-templates-load diff --git a/salt/elasticsearch/template.map.jinja b/salt/elasticsearch/template.map.jinja index 2563f8e23..2690fa56f 100644 --- a/salt/elasticsearch/template.map.jinja +++ b/salt/elasticsearch/template.map.jinja @@ -15,14 +15,40 @@ {% set ES_INDEX_SETTINGS_ORIG = ELASTICSEARCHDEFAULTS.elasticsearch.index_settings %} {# start generation of integration default index_settings #} -{% if salt['file.file_exists']('/opt/so/state/esfleet_package_components.json') and salt['file.file_exists']('/opt/so/state/esfleet_component_templates.json') %} -{% set check_package_components = salt['file.stats']('/opt/so/state/esfleet_package_components.json') %} -{% if check_package_components.size > 1 %} -{% from 'elasticfleet/integration-defaults.map.jinja' import ADDON_INTEGRATION_DEFAULTS %} -{% for index, settings in ADDON_INTEGRATION_DEFAULTS.items() %} -{% do ES_INDEX_SETTINGS_ORIG.update({index: settings}) %} -{% endfor %} -{% endif%} +{% if salt['file.file_exists']('/opt/so/state/esfleet_component_templates.json') %} +{% set ALL_ADDON_INTEGRATION_DEFAULTS = {} %} +{% set ALL_ADDON_SETTINGS_ORIG = {} %} +{% set ALL_ADDON_SETTINGS_GLOBAL_OVERRIDES = {} %} +{# import integration type defaults #} +{% if salt['file.file_exists']('/opt/so/state/esfleet_integration_package_components.json') %} +{% set check_integration_package_components = salt['file.stats']('/opt/so/state/esfleet_integration_package_components.json') %} +{% if check_integration_package_components.size > 1 %} +{% from 'elasticfleet/integration-defaults.map.jinja' import ADDON_INTEGRATION_DEFAULTS %} +{% do ALL_ADDON_INTEGRATION_DEFAULTS.update(ADDON_INTEGRATION_DEFAULTS) %} +{% endif %} +{% endif %} + +{# import input type defaults #} +{% if salt['file.file_exists']('/opt/so/state/esfleet_input_package_components.json') %} +{% set check_input_package_components = salt['file.stats']('/opt/so/state/esfleet_input_package_components.json') %} +{% if check_input_package_components.size > 1 %} +{% from 'elasticfleet/input-defaults.map.jinja' import ADDON_INPUT_INTEGRATION_DEFAULTS %} +{% do ALL_ADDON_INTEGRATION_DEFAULTS.update(ADDON_INPUT_INTEGRATION_DEFAULTS) %} +{% endif %} +{% endif %} + +{# import content type defaults #} +{% if salt['file.file_exists']('/opt/so/state/esfleet_content_package_components.json') %} +{% set check_content_package_components = salt['file.stats']('/opt/so/state/esfleet_content_package_components.json') %} +{% if check_content_package_components.size > 1 %} +{% from 'elasticfleet/content-defaults.map.jinja' import ADDON_CONTENT_INTEGRATION_DEFAULTS %} +{% do ALL_ADDON_INTEGRATION_DEFAULTS.update(ADDON_CONTENT_INTEGRATION_DEFAULTS) %} +{% endif %} +{% endif %} + +{% for index, settings in ALL_ADDON_INTEGRATION_DEFAULTS.items() %} +{% do ALL_ADDON_SETTINGS_ORIG.update({index: settings}) %} +{% endfor %} {% endif %} {# end generation of integration default index_settings #} @@ -31,25 +57,34 @@ {% do ES_INDEX_SETTINGS_GLOBAL_OVERRIDES.update({index: salt['defaults.merge'](ELASTICSEARCHDEFAULTS.elasticsearch.index_settings[index], PILLAR_GLOBAL_OVERRIDES, in_place=False)}) %} {% endfor %} +{% if ALL_ADDON_SETTINGS_ORIG.keys() | length > 0 %} +{% for index in ALL_ADDON_SETTINGS_ORIG.keys() %} +{% do ALL_ADDON_SETTINGS_GLOBAL_OVERRIDES.update({index: salt['defaults.merge'](ALL_ADDON_SETTINGS_ORIG[index], PILLAR_GLOBAL_OVERRIDES, in_place=False)}) %} +{% endfor %} +{% endif %} + {% set ES_INDEX_SETTINGS = {} %} -{% do ES_INDEX_SETTINGS_GLOBAL_OVERRIDES.update(salt['defaults.merge'](ES_INDEX_SETTINGS_GLOBAL_OVERRIDES, ES_INDEX_PILLAR, in_place=False)) %} -{% for index, settings in ES_INDEX_SETTINGS_GLOBAL_OVERRIDES.items() %} +{% set ALL_ADDON_SETTINGS = {}%} +{% macro create_final_index_template(DEFINED_SETTINGS, GLOBAL_OVERRIDES, FINAL_INDEX_SETTINGS) %} + +{% do GLOBAL_OVERRIDES.update(salt['defaults.merge'](GLOBAL_OVERRIDES, ES_INDEX_PILLAR, in_place=False)) %} +{% for index, settings in GLOBAL_OVERRIDES.items() %} {# prevent this action from being performed on custom defined indices. #} {# the custom defined index is not present in either of the dictionaries and fails to reder. #} -{% if index in ES_INDEX_SETTINGS_ORIG and index in ES_INDEX_SETTINGS_GLOBAL_OVERRIDES %} +{% if index in DEFINED_SETTINGS and index in GLOBAL_OVERRIDES %} {# dont merge policy from the global_overrides if policy isn't defined in the original index settingss #} {# this will prevent so-elasticsearch-ilm-policy-load from trying to load policy on non ILM manged indices #} -{% if not ES_INDEX_SETTINGS_ORIG[index].policy is defined and ES_INDEX_SETTINGS_GLOBAL_OVERRIDES[index].policy is defined %} -{% do ES_INDEX_SETTINGS_GLOBAL_OVERRIDES[index].pop('policy') %} +{% if not DEFINED_SETTINGS[index].policy is defined and GLOBAL_OVERRIDES[index].policy is defined %} +{% do GLOBAL_OVERRIDES[index].pop('policy') %} {% endif %} {# this prevents and index from inderiting a policy phase from global overrides if it wasnt defined in the defaults. #} -{% if ES_INDEX_SETTINGS_GLOBAL_OVERRIDES[index].policy is defined %} -{% for phase in ES_INDEX_SETTINGS_GLOBAL_OVERRIDES[index].policy.phases.copy() %} -{% if ES_INDEX_SETTINGS_ORIG[index].policy.phases[phase] is not defined %} -{% do ES_INDEX_SETTINGS_GLOBAL_OVERRIDES[index].policy.phases.pop(phase) %} +{% if GLOBAL_OVERRIDES[index].policy is defined %} +{% for phase in GLOBAL_OVERRIDES[index].policy.phases.copy() %} +{% if DEFINED_SETTINGS[index].policy.phases[phase] is not defined %} +{% do GLOBAL_OVERRIDES[index].policy.phases.pop(phase) %} {% endif %} {% endfor %} {% endif %} @@ -111,5 +146,14 @@ {% endfor %} {% endif %} -{% do ES_INDEX_SETTINGS.update({index | replace("_x_", "."): ES_INDEX_SETTINGS_GLOBAL_OVERRIDES[index]}) %} +{% do FINAL_INDEX_SETTINGS.update({index | replace("_x_", "."): GLOBAL_OVERRIDES[index]}) %} {% endfor %} +{% endmacro %} + +{{ create_final_index_template(ES_INDEX_SETTINGS_ORIG, ES_INDEX_SETTINGS_GLOBAL_OVERRIDES, ES_INDEX_SETTINGS) }} +{{ create_final_index_template(ALL_ADDON_SETTINGS_ORIG, ALL_ADDON_SETTINGS_GLOBAL_OVERRIDES, ALL_ADDON_SETTINGS) }} + +{% set SO_MANAGED_INDICES = [] %} +{% for index, settings in ES_INDEX_SETTINGS.items() %} +{% do SO_MANAGED_INDICES.append(index) %} +{% endfor %} \ No newline at end of file From 6b8a6267daab3875d3288d2eeadbaf80138c5467 Mon Sep 17 00:00:00 2001 From: reyesj2 <94730068+reyesj2@users.noreply.github.com> Date: Thu, 9 Apr 2026 16:45:26 -0500 Subject: [PATCH 014/110] remove unused elasticsearch:index_template pillar references --- pillar/elasticsearch/index_templates.sls | 2 -- pillar/top.sls | 3 --- 2 files changed, 5 deletions(-) delete mode 100644 pillar/elasticsearch/index_templates.sls diff --git a/pillar/elasticsearch/index_templates.sls b/pillar/elasticsearch/index_templates.sls deleted file mode 100644 index a02a1818c..000000000 --- a/pillar/elasticsearch/index_templates.sls +++ /dev/null @@ -1,2 +0,0 @@ -elasticsearch: - index_settings: diff --git a/pillar/top.sls b/pillar/top.sls index 6cdc4808a..d3b24677c 100644 --- a/pillar/top.sls +++ b/pillar/top.sls @@ -97,7 +97,6 @@ base: - node_data.ips - secrets - healthcheck.eval - - elasticsearch.index_templates {% if salt['file.file_exists']('/opt/so/saltstack/local/pillar/elasticsearch/auth.sls') %} - elasticsearch.auth {% endif %} @@ -142,7 +141,6 @@ base: - logstash.nodes - logstash.soc_logstash - logstash.adv_logstash - - elasticsearch.index_templates {% if salt['file.file_exists']('/opt/so/saltstack/local/pillar/elasticsearch/auth.sls') %} - elasticsearch.auth {% endif %} @@ -256,7 +254,6 @@ base: '*_import': - node_data.ips - secrets - - elasticsearch.index_templates {% if salt['file.file_exists']('/opt/so/saltstack/local/pillar/elasticsearch/auth.sls') %} - elasticsearch.auth {% endif %} From c1b1452bd933136e69c7a7e4b728ff3d520d5cb0 Mon Sep 17 00:00:00 2001 From: Mike Reeves Date: Thu, 9 Apr 2026 19:34:14 -0400 Subject: [PATCH 015/110] Use manager IP for postgres hostUrl instead of container hostname SOC connects to postgres via the host network, not the Docker bridge network, so it needs the manager's IP address rather than the container hostname. --- salt/soc/defaults.map.jinja | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/salt/soc/defaults.map.jinja b/salt/soc/defaults.map.jinja index e823477b0..20d64603d 100644 --- a/salt/soc/defaults.map.jinja +++ b/salt/soc/defaults.map.jinja @@ -25,7 +25,7 @@ {% do SOCDEFAULTS.soc.config.server.modules.elastic.update({'username': GLOBALS.elasticsearch.auth.users.so_elastic_user.user, 'password': GLOBALS.elasticsearch.auth.users.so_elastic_user.pass}) %} {% if GLOBALS.postgres is defined and GLOBALS.postgres.auth is defined %} -{% do SOCDEFAULTS.soc.config.server.modules.postgres.update({'username': GLOBALS.postgres.auth.users.so_postgres_user.user, 'password': GLOBALS.postgres.auth.users.so_postgres_user.pass}) %} +{% do SOCDEFAULTS.soc.config.server.modules.postgres.update({'hostUrl': GLOBALS.manager_ip, 'username': GLOBALS.postgres.auth.users.so_postgres_user.user, 'password': GLOBALS.postgres.auth.users.so_postgres_user.pass}) %} {% endif %} {% do SOCDEFAULTS.soc.config.server.modules.influxdb.update({'hostUrl': 'https://' ~ GLOBALS.influxdb_host ~ ':8086'}) %} From 378d1ec81b68d99e0596e07657b1c335a1124dcb Mon Sep 17 00:00:00 2001 From: reyesj2 <94730068+reyesj2@users.noreply.github.com> Date: Thu, 9 Apr 2026 18:41:40 -0500 Subject: [PATCH 016/110] initialize vars --- salt/elasticsearch/template.map.jinja | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/salt/elasticsearch/template.map.jinja b/salt/elasticsearch/template.map.jinja index 2690fa56f..fc510324a 100644 --- a/salt/elasticsearch/template.map.jinja +++ b/salt/elasticsearch/template.map.jinja @@ -14,11 +14,12 @@ {% set ES_INDEX_SETTINGS_ORIG = ELASTICSEARCHDEFAULTS.elasticsearch.index_settings %} +{% set ALL_ADDON_INTEGRATION_DEFAULTS = {} %} +{% set ALL_ADDON_SETTINGS_ORIG = {} %} +{% set ALL_ADDON_SETTINGS_GLOBAL_OVERRIDES = {} %} +{% set ALL_ADDON_SETTINGS = {} %} {# start generation of integration default index_settings #} {% if salt['file.file_exists']('/opt/so/state/esfleet_component_templates.json') %} -{% set ALL_ADDON_INTEGRATION_DEFAULTS = {} %} -{% set ALL_ADDON_SETTINGS_ORIG = {} %} -{% set ALL_ADDON_SETTINGS_GLOBAL_OVERRIDES = {} %} {# import integration type defaults #} {% if salt['file.file_exists']('/opt/so/state/esfleet_integration_package_components.json') %} {% set check_integration_package_components = salt['file.stats']('/opt/so/state/esfleet_integration_package_components.json') %} @@ -64,7 +65,6 @@ {% endif %} {% set ES_INDEX_SETTINGS = {} %} -{% set ALL_ADDON_SETTINGS = {}%} {% macro create_final_index_template(DEFINED_SETTINGS, GLOBAL_OVERRIDES, FINAL_INDEX_SETTINGS) %} {% do GLOBAL_OVERRIDES.update(salt['defaults.merge'](GLOBAL_OVERRIDES, ES_INDEX_PILLAR, in_place=False)) %} From 55be1f1119fe6bab273911de372e0e302527e97a Mon Sep 17 00:00:00 2001 From: Mike Reeves Date: Thu, 9 Apr 2026 21:09:43 -0400 Subject: [PATCH 017/110] Only add postgres module config on manager nodes Removed postgres from soc/defaults.yaml (shared by all nodes) and moved it entirely into defaults.map.jinja, which only injects the config when postgres auth pillar exists (manager-type nodes). Sensors and other non-manager nodes will not have a postgres module section in their sensoroni.json, so sensoroni won't try to connect. --- salt/soc/defaults.map.jinja | 2 +- salt/soc/defaults.yaml | 8 -------- 2 files changed, 1 insertion(+), 9 deletions(-) diff --git a/salt/soc/defaults.map.jinja b/salt/soc/defaults.map.jinja index 20d64603d..99e85e364 100644 --- a/salt/soc/defaults.map.jinja +++ b/salt/soc/defaults.map.jinja @@ -25,7 +25,7 @@ {% do SOCDEFAULTS.soc.config.server.modules.elastic.update({'username': GLOBALS.elasticsearch.auth.users.so_elastic_user.user, 'password': GLOBALS.elasticsearch.auth.users.so_elastic_user.pass}) %} {% if GLOBALS.postgres is defined and GLOBALS.postgres.auth is defined %} -{% do SOCDEFAULTS.soc.config.server.modules.postgres.update({'hostUrl': GLOBALS.manager_ip, 'username': GLOBALS.postgres.auth.users.so_postgres_user.user, 'password': GLOBALS.postgres.auth.users.so_postgres_user.pass}) %} +{% do SOCDEFAULTS.soc.config.server.modules.update({'postgres': {'hostUrl': GLOBALS.manager_ip, 'port': 5432, 'username': GLOBALS.postgres.auth.users.so_postgres_user.user, 'password': GLOBALS.postgres.auth.users.so_postgres_user.pass, 'dbname': 'securityonion', 'sslMode': 'require', 'assistantEnabled': true}}) %} {% endif %} {% do SOCDEFAULTS.soc.config.server.modules.influxdb.update({'hostUrl': 'https://' ~ GLOBALS.influxdb_host ~ ':8086'}) %} diff --git a/salt/soc/defaults.yaml b/salt/soc/defaults.yaml index 2bdbbae37..0bde8f20e 100644 --- a/salt/soc/defaults.yaml +++ b/salt/soc/defaults.yaml @@ -1491,14 +1491,6 @@ soc: org: Security Onion bucket: telegraf/so_short_term verifyCert: false - postgres: - hostUrl: so-postgres - port: 5432 - username: - password: - dbname: securityonion - sslMode: require - assistantEnabled: true playbook: autoUpdateEnabled: true playbookImportFrequencySeconds: 86400 From da1045e052bf069b958ecb90da5ef92dbbeec295 Mon Sep 17 00:00:00 2001 From: Mike Reeves Date: Thu, 9 Apr 2026 21:52:20 -0400 Subject: [PATCH 018/110] Fix init-users.sh password escaping for special characters Use format() with %L for SQL literal escaping instead of raw string interpolation. Also ALTER ROLE if user already exists to keep password in sync with pillar. --- salt/postgres/files/init-users.sh | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/salt/postgres/files/init-users.sh b/salt/postgres/files/init-users.sh index 6fa7e43ac..7451e0bf8 100644 --- a/salt/postgres/files/init-users.sh +++ b/salt/postgres/files/init-users.sh @@ -1,13 +1,16 @@ #!/bin/bash set -e -# Create application user for SOC platform access -# This script runs on first database initialization only +# Create or update application user for SOC platform access +# This script runs on first database initialization via docker-entrypoint-initdb.d +# The password is properly escaped to handle special characters psql -v ON_ERROR_STOP=1 --username "$POSTGRES_USER" --dbname "$POSTGRES_DB" <<-EOSQL DO \$\$ BEGIN - IF NOT EXISTS (SELECT FROM pg_catalog.pg_roles WHERE rolname = '$SO_POSTGRES_USER') THEN - CREATE ROLE "$SO_POSTGRES_USER" WITH LOGIN PASSWORD '$SO_POSTGRES_PASS'; + IF NOT EXISTS (SELECT FROM pg_catalog.pg_roles WHERE rolname = '${SO_POSTGRES_USER}') THEN + EXECUTE format('CREATE ROLE %I WITH LOGIN PASSWORD %L', '${SO_POSTGRES_USER}', '${SO_POSTGRES_PASS}'); + ELSE + EXECUTE format('ALTER ROLE %I WITH PASSWORD %L', '${SO_POSTGRES_USER}', '${SO_POSTGRES_PASS}'); END IF; END \$\$; From 1ffdcab3bee688783eff845be2f2e6b787cabb8f Mon Sep 17 00:00:00 2001 From: Mike Reeves Date: Thu, 9 Apr 2026 22:21:35 -0400 Subject: [PATCH 019/110] Add postgres adminPassword to SOC module config Injects the postgres superuser password from secrets pillar so SOC can run schema migrations as admin before switching to the app user for normal operations. --- salt/soc/defaults.map.jinja | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/salt/soc/defaults.map.jinja b/salt/soc/defaults.map.jinja index 99e85e364..d99cf57f7 100644 --- a/salt/soc/defaults.map.jinja +++ b/salt/soc/defaults.map.jinja @@ -25,7 +25,8 @@ {% do SOCDEFAULTS.soc.config.server.modules.elastic.update({'username': GLOBALS.elasticsearch.auth.users.so_elastic_user.user, 'password': GLOBALS.elasticsearch.auth.users.so_elastic_user.pass}) %} {% if GLOBALS.postgres is defined and GLOBALS.postgres.auth is defined %} -{% do SOCDEFAULTS.soc.config.server.modules.update({'postgres': {'hostUrl': GLOBALS.manager_ip, 'port': 5432, 'username': GLOBALS.postgres.auth.users.so_postgres_user.user, 'password': GLOBALS.postgres.auth.users.so_postgres_user.pass, 'dbname': 'securityonion', 'sslMode': 'require', 'assistantEnabled': true}}) %} +{% set PG_ADMIN_PASS = salt['pillar.get']('secrets:postgres_pass', '') %} +{% do SOCDEFAULTS.soc.config.server.modules.update({'postgres': {'hostUrl': GLOBALS.manager_ip, 'port': 5432, 'username': GLOBALS.postgres.auth.users.so_postgres_user.user, 'password': GLOBALS.postgres.auth.users.so_postgres_user.pass, 'adminUser': 'postgres', 'adminPassword': PG_ADMIN_PASS, 'dbname': 'securityonion', 'sslMode': 'require', 'assistantEnabled': true}}) %} {% endif %} {% do SOCDEFAULTS.soc.config.server.modules.influxdb.update({'hostUrl': 'https://' ~ GLOBALS.influxdb_host ~ ':8086'}) %} From 9ccd0acb4f74bf724534d01a57f0afcc36d4dc44 Mon Sep 17 00:00:00 2001 From: Mike Reeves Date: Fri, 10 Apr 2026 11:41:33 -0400 Subject: [PATCH 020/110] Add ES credentials to postgres module config for migration Postgres module now queries Elasticsearch directly via HTTP for the chat migration (bypasses RBAC that needs user context). Pass esHostUrl, esUsername, esPassword alongside postgres creds. --- salt/soc/defaults.map.jinja | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/salt/soc/defaults.map.jinja b/salt/soc/defaults.map.jinja index d99cf57f7..46ae7e8fd 100644 --- a/salt/soc/defaults.map.jinja +++ b/salt/soc/defaults.map.jinja @@ -26,7 +26,7 @@ {% if GLOBALS.postgres is defined and GLOBALS.postgres.auth is defined %} {% set PG_ADMIN_PASS = salt['pillar.get']('secrets:postgres_pass', '') %} -{% do SOCDEFAULTS.soc.config.server.modules.update({'postgres': {'hostUrl': GLOBALS.manager_ip, 'port': 5432, 'username': GLOBALS.postgres.auth.users.so_postgres_user.user, 'password': GLOBALS.postgres.auth.users.so_postgres_user.pass, 'adminUser': 'postgres', 'adminPassword': PG_ADMIN_PASS, 'dbname': 'securityonion', 'sslMode': 'require', 'assistantEnabled': true}}) %} +{% do SOCDEFAULTS.soc.config.server.modules.update({'postgres': {'hostUrl': GLOBALS.manager_ip, 'port': 5432, 'username': GLOBALS.postgres.auth.users.so_postgres_user.user, 'password': GLOBALS.postgres.auth.users.so_postgres_user.pass, 'adminUser': 'postgres', 'adminPassword': PG_ADMIN_PASS, 'dbname': 'securityonion', 'sslMode': 'require', 'assistantEnabled': true, 'esHostUrl': 'https://' ~ GLOBALS.manager_ip ~ ':9200', 'esUsername': GLOBALS.elasticsearch.auth.users.so_elastic_user.user, 'esPassword': GLOBALS.elasticsearch.auth.users.so_elastic_user.pass}}) %} {% endif %} {% do SOCDEFAULTS.soc.config.server.modules.influxdb.update({'hostUrl': 'https://' ~ GLOBALS.influxdb_host ~ ':8086'}) %} From 6298397534c1c76fa54ed5b54f7853d797066a26 Mon Sep 17 00:00:00 2001 From: reyesj2 <94730068+reyesj2@users.noreply.github.com> Date: Sat, 11 Apr 2026 04:40:47 -0500 Subject: [PATCH 021/110] rework elasticsearch template load script -- for core templates --- salt/elasticsearch/enabled.sls | 20 +- .../so-elasticsearch-component-templates-list | 19 +- .../sbin/so-elasticsearch-templates-load | 190 ++++++++++++++++++ .../so-elasticsearch-templates-load | 165 --------------- 4 files changed, 219 insertions(+), 175 deletions(-) create mode 100755 salt/elasticsearch/tools/sbin/so-elasticsearch-templates-load delete mode 100755 salt/elasticsearch/tools/sbin_jinja/so-elasticsearch-templates-load diff --git a/salt/elasticsearch/enabled.sls b/salt/elasticsearch/enabled.sls index 61b9bad01..d95ec2f98 100644 --- a/salt/elasticsearch/enabled.sls +++ b/salt/elasticsearch/enabled.sls @@ -10,7 +10,10 @@ {% from 'elasticsearch/config.map.jinja' import ELASTICSEARCH_NODES %} {% from 'elasticsearch/config.map.jinja' import ELASTICSEARCH_SEED_HOSTS %} {% from 'elasticsearch/config.map.jinja' import ELASTICSEARCHMERGED %} -{% from 'elasticsearch/template.map.jinja' import ES_INDEX_SETTINGS, ALL_ADDON_SETTINGS, SO_MANAGED_INDICES %} +{% from 'elasticsearch/template.map.jinja' import ES_INDEX_SETTINGS, SO_MANAGED_INDICES %} +{% if GLOBALS.role != 'so-heavynode' %} +{% from 'elasticsearch/template.map.jinja' import ALL_ADDON_SETTINGS %} +{% endif %} include: - ca @@ -140,17 +143,17 @@ so_index_template_{{index}}: - defaults: TEMPLATE_CONFIG: {{ settings.index_template }} - template: jinja - - show_changes: False - onchanges_in: - file: so-elasticsearch-templates-reload {% endif %} {% endfor %} +{% if GLOBALS.role != "so-heavynode" %} # Auto-generate optional index templates for integration | input | content packages # These index templates are not used by default (until user adds package to an agent policy). # Pre-configured with standard defaults, and incorporated into SOC configuration for user customization. -{% for index,settings in ALL_ADDON_SETTINGS.items() %} -{% if settings.index_template is defined %} +{% for index,settings in ALL_ADDON_SETTINGS.items() %} +{% if settings.index_template is defined %} addon_index_template_{{index}}: file.managed: - name: /opt/so/conf/elasticsearch/templates/addon-index/{{ index }}-template.json @@ -161,8 +164,9 @@ addon_index_template_{{index}}: - show_changes: False - onchanges_in: - file: addon-elasticsearch-templates-reload -{% endif %} -{% endfor %} +{% endif %} +{% endfor %} +{% endif %} {% if GLOBALS.role in GLOBALS.manager_roles %} so-es-cluster-settings: @@ -195,7 +199,11 @@ addon-elasticsearch-templates-reload: so-elasticsearch-templates: cmd.run: +{%- if GLOBALS.role == "so-heavynode" %} + - name: /usr/sbin/so-elasticsearch-templates-load --heavynode +{%- else %} - name: /usr/sbin/so-elasticsearch-templates-load +{%- endif %} - cwd: /opt/so - template: jinja - require: diff --git a/salt/elasticsearch/tools/sbin/so-elasticsearch-component-templates-list b/salt/elasticsearch/tools/sbin/so-elasticsearch-component-templates-list index 2fccce9cb..6946e30da 100755 --- a/salt/elasticsearch/tools/sbin/so-elasticsearch-component-templates-list +++ b/salt/elasticsearch/tools/sbin/so-elasticsearch-component-templates-list @@ -6,8 +6,19 @@ # Elastic License 2.0. . /usr/sbin/so-common -if [ "$1" == "" ]; then - curl -K /opt/so/conf/elasticsearch/curl.config -s -k -L https://localhost:9200/_component_template | jq '.component_templates[] |.name'| sort + +if [[ -z "$1" ]]; then + if output=$(so-elasticsearch-query "_component_template" --retry 3 --retry-delay 1 --fail); then + jq '[.component_templates[] | .name] | sort' <<< "$output" + else + echo "Failed to retrieve component templates from Elasticsearch." + exit 1 + fi else - curl -K /opt/so/conf/elasticsearch/curl.config -s -k -L https://localhost:9200/_component_template/$1 | jq -fi + if output=$(so-elasticsearch-query "_component_template/$1" --retry 3 --retry-delay 1 --fail); then + jq <<< "$output" + else + echo "Failed to retrieve component template '$1' from Elasticsearch." + exit 1 + fi +fi \ No newline at end of file diff --git a/salt/elasticsearch/tools/sbin/so-elasticsearch-templates-load b/salt/elasticsearch/tools/sbin/so-elasticsearch-templates-load new file mode 100755 index 000000000..f44225ac6 --- /dev/null +++ b/salt/elasticsearch/tools/sbin/so-elasticsearch-templates-load @@ -0,0 +1,190 @@ +#!/bin/bash +# Copyright Security Onion Solutions LLC and/or licensed to Security Onion Solutions LLC under one +# or more contributor license agreements. Licensed under the Elastic License 2.0 as shown at +# https://securityonion.net/license; you may not use this file except in compliance with the +# Elastic License 2.0. + +. /usr/sbin/so-common + +SO_STATE_FILE_SUCCESS=/opt/so/state/estemplates.txt +ADDON_STATE_FILE_SUCCESS=/opt/so/state/addon-estemplates.txt +ELASTICSEARCH_TEMPLATES_DIR="/opt/so/conf/elasticsearch/templates" +SO_TEMPLATES_DIR="${ELASTICSEARCH_TEMPLATES_DIR}/index" +ADDON_TEMPLATES_DIR="${ELASTICSEARCH_TEMPLATES_DIR}/addon-index" +LOAD_FAILURES=0 +LOAD_FAILURES_NAMES=() +IS_HEAVYNODE="false" +FORCE="false" +VERBOSE="false" + +# If soup is running, ignore errors +pgrep soup >/dev/null && should_exit_on_failure=0 + +while [[ $# -gt 0 ]]; do + case "$1" in + --heavynode) + IS_HEAVYNODE="true" + ;; + --force) + FORCE="true" + ;; + --verbose) + VERBOSE="true" + ;; + *) + echo "Usage: $0 [options]" + echo "Options:" + echo " --heavynode Only loads index templates specific to heavynodes" + echo " --force Force reload all templates regardless of state file (default: false)" + echo " --verbose Enable verbose output" + exit 1 + ;; + esac + shift +done + +load_template() { + local uri="$1" + local file="$2" + + echo "Loading template file $file" + if ! output=$(retry 3 3 "so-elasticsearch-query $uri -d@$file -XPUT" "{\"acknowledged\":true}"); then + echo "$output" + + return 1 + + elif [[ "$VERBOSE" == "true" ]]; then + echo "$output" + fi + +} + +check_required_component_template_exists() { + local required + local missing + local file=$1 + + required=$(jq '[((.composed_of //[]) - (.ignore_missing_component_templates // []))[]]' "$file") + missing=$(jq -n --argjson required "$required" --argjson component_templates "$component_templates" '(($required) - ($component_templates))') + + if [[ $(jq length <<<"$missing") -gt 0 ]]; then + + return 1 + fi +} + +check_heavynode_compatiable_index_template() { + # The only templates that are relevant to heavynodes are from datasets defined in elasticagent/files/elastic-agent.yml.jinja. + # Heavynodes do not have fleet server packages installed and do not support elastic agents reporting directly to them. + local -A heavynode_index_templates=( + ["so-import"]=1 + ["so-syslog"]=1 + ["so-logs-soc"]=1 + ["so-suricata"]=1 + ["so-suricata.alerts"]=1 + ["so-zeek"]=1 + ["so-strelka"]=1 + ) + + local template_name="$1" + + if [[ ! -v heavynode_index_templates["$template_name"] ]]; then + + return 1 + fi + +} + +load_component_templates() { + local printed_name="$1" + local pattern="${ELASTICSEARCH_TEMPLATES_DIR}/component/$2" + + # current state of nullglob shell option + shopt -q nullglob && nullglob_set=1 || nullglob_set=0 + + shopt -s nullglob + echo -e "\nLoading $printed_name component templates...\n" + for component in "$pattern"/*.json; do + tmpl_name=$(basename "${component%.json}") + if ! load_template "_component_template/${tmpl_name}-mappings" "$component"; then + LOAD_FAILURES=$((LOAD_FAILURES + 1)) + LOAD_FAILURES_NAMES+=("$component") + fi + done + + # restore nullglob shell option if needed + if [[ $nullglob_set -eq 1 ]]; then + shopt -u nullglob + fi +} + +if [[ "$FORCE" == "true" || ! -f "$SO_STATE_FILE_SUCCESS" ]]; then + # Cannot load templates if Elasticsearch is not responding. + # NOTE: Slightly faster exit w/ failure than previous "retry 240 1" if there is a problem with Elasticsearch the + # script should exit sooner rather than hang at the 'so-elasticsearch-templates' salt state. + retry 3 15 "so-elasticsearch-query / --output /dev/null --fail" || + fail "Elasticsearch is not responding. Please review Elasticsearch logs /opt/so/log/elasticsearch/securityonion.log for more details. Additionally, consider running so-elasticsearch-troubleshoot." + + if [[ "$IS_HEAVYNODE" == "false" ]]; then + # TODO: Better way to check if fleet server is installed vs checking for Elastic Defend component template. + fleet_check="logs-endpoint.alerts@package" + if ! so-elasticsearch-query "_component_template/$fleet_check" --output /dev/null --retry 5 --retry-delay 3 --fail; then + echo -e "\nPackage $fleet_check not yet installed. Fleet Server may not be fully configured yet." + # Fleet Server is required because some SO index templates depend on components installed via + # specific integrations eg Elastic Defend. These are components that we do not manually create / manage + # via /opt/so/saltstack/salt/elasticsearch/templates/component/ + + exit 0 + fi + fi + + load_component_templates "ECS" "ecs" + load_component_templates "Elastic Agent" "elastic-agent" + load_component_templates "Security Onion" "securityonion" + + component_templates=$(so-elasticsearch-component-templates-list) + echo -e "Loading Security Onion index templates...\n" + for so_idx_tmpl in "${SO_TEMPLATES_DIR}"/*.json; do + tmpl_name=$(basename "${so_idx_tmpl%-template.json}") + + if [[ "$IS_HEAVYNODE" == "true" ]]; then + # TODO: Better way to load only heavynode specific templates + if ! check_heavynode_compatiable_index_template "$tmpl_name"; then + if [[ "$VERBOSE" == "true" ]]; then + echo "Skipping over $so_idx_tmpl, template is not a heavynode specific index template." + fi + + continue + fi + fi + + if check_required_component_template_exists "$so_idx_tmpl"; then + if ! load_template "_index_template/$tmpl_name" "$so_idx_tmpl"; then + LOAD_FAILURES=$((LOAD_FAILURES + 1)) + LOAD_FAILURES_NAMES+=("$so_idx_tmpl") + fi + else + echo "Skipping over $so_idx_tmpl due to missing required component template(s)." + LOAD_FAILURES=$((LOAD_FAILURES + 1)) + LOAD_FAILURES_NAMES+=("$so_idx_tmpl") + + continue + fi + done + + if [[ $LOAD_FAILURES -eq 0 ]]; then + echo "All templates loaded successfully." + + touch "$SO_STATE_FILE_SUCCESS" + else + echo "Encountered $LOAD_FAILURES failure(s) loading templates:" + for failed_template in "${LOAD_FAILURES_NAMES[@]}"; do + echo " - $failed_template" + done + fi + +else + + echo "Templates already loaded" + +fi \ No newline at end of file diff --git a/salt/elasticsearch/tools/sbin_jinja/so-elasticsearch-templates-load b/salt/elasticsearch/tools/sbin_jinja/so-elasticsearch-templates-load deleted file mode 100755 index ad3fe1344..000000000 --- a/salt/elasticsearch/tools/sbin_jinja/so-elasticsearch-templates-load +++ /dev/null @@ -1,165 +0,0 @@ -#!/bin/bash -# Copyright Security Onion Solutions LLC and/or licensed to Security Onion Solutions LLC under one -# or more contributor license agreements. Licensed under the Elastic License 2.0 as shown at -# https://securityonion.net/license; you may not use this file except in compliance with the -# Elastic License 2.0. -{%- import_yaml 'elasticfleet/defaults.yaml' as ELASTICFLEETDEFAULTS %} -{% from 'vars/globals.map.jinja' import GLOBALS %} - -STATE_FILE_INITIAL=/opt/so/state/estemplates_initial_load_attempt.txt -STATE_FILE_SUCCESS=/opt/so/state/estemplates.txt - -if [[ -f $STATE_FILE_INITIAL ]]; then - # The initial template load has already run. As this is a subsequent load, all dependencies should - # already be satisified. Therefore, immediately exit/abort this script upon any template load failure - # since this is an unrecoverable failure. - should_exit_on_failure=1 -else - # This is the initial template load, and there likely are some components not yet setup in Elasticsearch. - # Therefore load as many templates as possible at this time and if an error occurs proceed to the next - # template. But if at least one template fails to load do not mark the templates as having been loaded. - # This will allow the next load to resume the load of the templates that failed to load initially. - should_exit_on_failure=0 - echo "This is the initial template load" -fi - -# If soup is running, ignore errors -pgrep soup > /dev/null && should_exit_on_failure=0 - -load_failures=0 - -load_template() { - uri=$1 - file=$2 - - echo "Loading template file $i" - if ! retry 3 1 "so-elasticsearch-query $uri -d@$file -XPUT" "{\"acknowledged\":true}"; then - if [[ $should_exit_on_failure -eq 1 ]]; then - fail "Could not load template file: $file" - else - load_failures=$((load_failures+1)) - echo "Incremented load failure counter: $load_failures" - fi - fi -} - -if [ ! -f $STATE_FILE_SUCCESS ]; then - echo "State file $STATE_FILE_SUCCESS not found. Running so-elasticsearch-templates-load." - - . /usr/sbin/so-common - - {% if GLOBALS.role != 'so-heavynode' %} - if [ -f /usr/sbin/so-elastic-fleet-common ]; then - . /usr/sbin/so-elastic-fleet-common - fi - {% endif %} - - default_conf_dir=/opt/so/conf - - # Define a default directory to load pipelines from - ELASTICSEARCH_TEMPLATES="$default_conf_dir/elasticsearch/templates/" - - {% if GLOBALS.role == 'so-heavynode' %} - file="/opt/so/conf/elasticsearch/templates/index/so-common-template.json" - {% else %} - file="/usr/sbin/so-elastic-fleet-common" - {% endif %} - - if [ -f "$file" ]; then - # Wait for ElasticSearch to initialize - echo -n "Waiting for ElasticSearch..." - retry 240 1 "so-elasticsearch-query / -k --output /dev/null --silent --head --fail" || fail "Connection attempt timed out. Unable to connect to ElasticSearch. \nPlease try: \n -checking log(s) in /var/log/elasticsearch/\n -running 'sudo docker ps' \n -running 'sudo so-elastic-restart'" - {% if GLOBALS.role != 'so-heavynode' %} - TEMPLATE="logs-endpoint.alerts@package" - INSTALLED=$(so-elasticsearch-query _component_template/$TEMPLATE | jq -r .component_templates[0].name) - if [ "$INSTALLED" != "$TEMPLATE" ]; then - echo - echo "Packages not yet installed." - echo - exit 0 - fi - {% endif %} - - touch $STATE_FILE_INITIAL - - cd ${ELASTICSEARCH_TEMPLATES}/component/ecs - - echo "Loading ECS component templates..." - for i in *; do - TEMPLATE=$(echo $i | cut -d '.' -f1) - load_template "_component_template/${TEMPLATE}-mappings" "$i" - done - echo - - cd ${ELASTICSEARCH_TEMPLATES}/component/elastic-agent - - echo "Loading Elastic Agent component templates..." - {% if GLOBALS.role == 'so-heavynode' %} - component_pattern="so-*" - {% else %} - component_pattern="*" - {% endif %} - for i in $component_pattern; do - TEMPLATE=${i::-5} - load_template "_component_template/$TEMPLATE" "$i" - done - echo - - # Load SO-specific component templates - cd ${ELASTICSEARCH_TEMPLATES}/component/so - - echo "Loading Security Onion component templates..." - for i in *; do - TEMPLATE=$(echo $i | cut -d '.' -f1); - load_template "_component_template/$TEMPLATE" "$i" - done - echo - - # Load SO index templates - cd ${ELASTICSEARCH_TEMPLATES}/index - - echo "Loading Security Onion index templates..." - shopt -s extglob - {% if GLOBALS.role == 'so-heavynode' %} - pattern="!(*1password*|*aws*|*azure*|*cloudflare*|*elastic_agent*|*fim*|*github*|*google*|*osquery*|*system*|*windows*|*endpoint*|*elasticsearch*|*generic*|*fleet_server*|*soc*)" - {% else %} - pattern="*" - {% endif %} - # Index templates will be skipped if the following conditions are met: - # 1. The template is part of the "so-logs-" template group - # 2. The template name does not correlate to at least one existing component template - # In this situation, the script will treat the skipped template as a temporary failure - # and allow the templates to be loaded again on the next run or highstate, whichever - # comes first. - COMPONENT_LIST=$(so-elasticsearch-component-templates-list) - for i in $pattern; do - TEMPLATE=${i::-14} - COMPONENT_PATTERN=${TEMPLATE:3} - MATCH=$(echo "$TEMPLATE" | grep -E "^so-logs-|^so-metrics" | grep -vE "detections|osquery") - if [[ -n "$MATCH" && ! "$COMPONENT_LIST" =~ "$COMPONENT_PATTERN" && ! "$COMPONENT_PATTERN" =~ \.generic|logs-winlog\.winlog ]]; then - load_failures=$((load_failures+1)) - echo "Component template does not exist for $COMPONENT_PATTERN. The index template will not be loaded. Load failures: $load_failures" - else - load_template "_index_template/$TEMPLATE" "$i" - fi - done - else - {% if GLOBALS.role == 'so-heavynode' %} - echo "Common template does not exist. Exiting..." - {% else %} - echo "Elastic Fleet not configured. Exiting..." - {% endif %} - exit 0 - fi - - cd - >/dev/null - - if [[ $load_failures -eq 0 ]]; then - echo "All templates loaded successfully" - touch $STATE_FILE_SUCCESS - else - echo "Encountered $load_failures templates that were unable to load, likely due to missing dependencies that will be available later; will retry on next highstate" - fi -else - echo "Templates already loaded" -fi From b0584a4dc5358b75ce7c50564328a7a280fe2059 Mon Sep 17 00:00:00 2001 From: reyesj2 <94730068+reyesj2@users.noreply.github.com> Date: Sat, 11 Apr 2026 15:22:50 -0500 Subject: [PATCH 022/110] only append "-mappings" to component template names as needed --- .../tools/sbin/so-elasticsearch-templates-load | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/salt/elasticsearch/tools/sbin/so-elasticsearch-templates-load b/salt/elasticsearch/tools/sbin/so-elasticsearch-templates-load index f44225ac6..3b5aa3707 100755 --- a/salt/elasticsearch/tools/sbin/so-elasticsearch-templates-load +++ b/salt/elasticsearch/tools/sbin/so-elasticsearch-templates-load @@ -98,6 +98,7 @@ check_heavynode_compatiable_index_template() { load_component_templates() { local printed_name="$1" local pattern="${ELASTICSEARCH_TEMPLATES_DIR}/component/$2" + local append_mappings="${3:-"false"}" # current state of nullglob shell option shopt -q nullglob && nullglob_set=1 || nullglob_set=0 @@ -106,7 +107,13 @@ load_component_templates() { echo -e "\nLoading $printed_name component templates...\n" for component in "$pattern"/*.json; do tmpl_name=$(basename "${component%.json}") - if ! load_template "_component_template/${tmpl_name}-mappings" "$component"; then + + if [[ "$append_mappings" == "true" ]]; then + # avoid duplicating "-mappings" if it already exists in the component template filename + tmpl_name="${tmpl_name%-mappings}-mappings" + fi + + if ! load_template "_component_template/${tmpl_name}" "$component"; then LOAD_FAILURES=$((LOAD_FAILURES + 1)) LOAD_FAILURES_NAMES+=("$component") fi @@ -138,9 +145,9 @@ if [[ "$FORCE" == "true" || ! -f "$SO_STATE_FILE_SUCCESS" ]]; then fi fi - load_component_templates "ECS" "ecs" + load_component_templates "ECS" "ecs" "true" load_component_templates "Elastic Agent" "elastic-agent" - load_component_templates "Security Onion" "securityonion" + load_component_templates "Security Onion" "so" component_templates=$(so-elasticsearch-component-templates-list) echo -e "Loading Security Onion index templates...\n" @@ -187,4 +194,4 @@ else echo "Templates already loaded" -fi \ No newline at end of file +fi From a43947cca5649f77ef0b3436efae33bd92a0da3b Mon Sep 17 00:00:00 2001 From: reyesj2 <94730068+reyesj2@users.noreply.github.com> Date: Sun, 12 Apr 2026 00:23:26 -0500 Subject: [PATCH 023/110] elasticsearch template load script -- for addon index templates --- salt/elasticsearch/enabled.sls | 1 + salt/elasticsearch/template.map.jinja | 4 +- .../sbin/so-elasticsearch-templates-load | 91 +++++++++++++++---- 3 files changed, 74 insertions(+), 22 deletions(-) diff --git a/salt/elasticsearch/enabled.sls b/salt/elasticsearch/enabled.sls index d95ec2f98..dc404c509 100644 --- a/salt/elasticsearch/enabled.sls +++ b/salt/elasticsearch/enabled.sls @@ -197,6 +197,7 @@ addon-elasticsearch-templates-reload: file.absent: - name: /opt/so/state/addon_estemplates.txt +# so-elasticsearch-templates-load will have its first successful run during the 'so-elastic-fleet-setup' script so-elasticsearch-templates: cmd.run: {%- if GLOBALS.role == "so-heavynode" %} diff --git a/salt/elasticsearch/template.map.jinja b/salt/elasticsearch/template.map.jinja index fc510324a..e66057775 100644 --- a/salt/elasticsearch/template.map.jinja +++ b/salt/elasticsearch/template.map.jinja @@ -21,8 +21,8 @@ {# start generation of integration default index_settings #} {% if salt['file.file_exists']('/opt/so/state/esfleet_component_templates.json') %} {# import integration type defaults #} -{% if salt['file.file_exists']('/opt/so/state/esfleet_integration_package_components.json') %} -{% set check_integration_package_components = salt['file.stats']('/opt/so/state/esfleet_integration_package_components.json') %} +{% if salt['file.file_exists']('/opt/so/state/esfleet_package_components.json') %} +{% set check_integration_package_components = salt['file.stats']('/opt/so/state/esfleet_package_components.json') %} {% if check_integration_package_components.size > 1 %} {% from 'elasticfleet/integration-defaults.map.jinja' import ADDON_INTEGRATION_DEFAULTS %} {% do ALL_ADDON_INTEGRATION_DEFAULTS.update(ADDON_INTEGRATION_DEFAULTS) %} diff --git a/salt/elasticsearch/tools/sbin/so-elasticsearch-templates-load b/salt/elasticsearch/tools/sbin/so-elasticsearch-templates-load index 3b5aa3707..3a0361a9f 100755 --- a/salt/elasticsearch/tools/sbin/so-elasticsearch-templates-load +++ b/salt/elasticsearch/tools/sbin/so-elasticsearch-templates-load @@ -6,19 +6,22 @@ . /usr/sbin/so-common -SO_STATE_FILE_SUCCESS=/opt/so/state/estemplates.txt -ADDON_STATE_FILE_SUCCESS=/opt/so/state/addon-estemplates.txt +SO_STATEFILE_SUCCESS=/opt/so/state/estemplates.txt +ADDON_STATEFILE_SUCCESS=/opt/so/state/addon-estemplates.txt ELASTICSEARCH_TEMPLATES_DIR="/opt/so/conf/elasticsearch/templates" SO_TEMPLATES_DIR="${ELASTICSEARCH_TEMPLATES_DIR}/index" ADDON_TEMPLATES_DIR="${ELASTICSEARCH_TEMPLATES_DIR}/addon-index" -LOAD_FAILURES=0 -LOAD_FAILURES_NAMES=() +SO_LOAD_FAILURES=0 +ADDON_LOAD_FAILURES=0 +SO_LOAD_FAILURES_NAMES=() +ADDON_LOAD_FAILURES_NAMES=() IS_HEAVYNODE="false" FORCE="false" VERBOSE="false" +SHOULD_EXIT_ON_FAILURE="true" # If soup is running, ignore errors -pgrep soup >/dev/null && should_exit_on_failure=0 +pgrep soup >/dev/null && SHOULD_EXIT_ON_FAILURE="false" while [[ $# -gt 0 ]]; do case "$1" in @@ -35,7 +38,7 @@ while [[ $# -gt 0 ]]; do echo "Usage: $0 [options]" echo "Options:" echo " --heavynode Only loads index templates specific to heavynodes" - echo " --force Force reload all templates regardless of state file (default: false)" + echo " --force Force reload all templates regardless of statefiles (default: false)" echo " --verbose Enable verbose output" exit 1 ;; @@ -114,8 +117,8 @@ load_component_templates() { fi if ! load_template "_component_template/${tmpl_name}" "$component"; then - LOAD_FAILURES=$((LOAD_FAILURES + 1)) - LOAD_FAILURES_NAMES+=("$component") + SO_LOAD_FAILURES=$((SO_LOAD_FAILURES + 1)) + SO_LOAD_FAILURES_NAMES+=("$component") fi done @@ -125,17 +128,22 @@ load_component_templates() { fi } -if [[ "$FORCE" == "true" || ! -f "$SO_STATE_FILE_SUCCESS" ]]; then +check_elasticsearch_responsive() { # Cannot load templates if Elasticsearch is not responding. # NOTE: Slightly faster exit w/ failure than previous "retry 240 1" if there is a problem with Elasticsearch the # script should exit sooner rather than hang at the 'so-elasticsearch-templates' salt state. retry 3 15 "so-elasticsearch-query / --output /dev/null --fail" || fail "Elasticsearch is not responding. Please review Elasticsearch logs /opt/so/log/elasticsearch/securityonion.log for more details. Additionally, consider running so-elasticsearch-troubleshoot." +} + +if [[ "$FORCE" == "true" || ! -f "$SO_STATEFILE_SUCCESS" ]]; then + check_elasticsearch_responsive if [[ "$IS_HEAVYNODE" == "false" ]]; then # TODO: Better way to check if fleet server is installed vs checking for Elastic Defend component template. fleet_check="logs-endpoint.alerts@package" if ! so-elasticsearch-query "_component_template/$fleet_check" --output /dev/null --retry 5 --retry-delay 3 --fail; then + # This check prevents so-elasticsearch-templates-load from running before so-elastic-fleet-setup has run. echo -e "\nPackage $fleet_check not yet installed. Fleet Server may not be fully configured yet." # Fleet Server is required because some SO index templates depend on components installed via # specific integrations eg Elastic Defend. These are components that we do not manually create / manage @@ -145,6 +153,7 @@ if [[ "$FORCE" == "true" || ! -f "$SO_STATE_FILE_SUCCESS" ]]; then fi fi + # load_component_templates "Name" "directory" "append '-mappings'?" load_component_templates "ECS" "ecs" "true" load_component_templates "Elastic Agent" "elastic-agent" load_component_templates "Security Onion" "so" @@ -167,31 +176,73 @@ if [[ "$FORCE" == "true" || ! -f "$SO_STATE_FILE_SUCCESS" ]]; then if check_required_component_template_exists "$so_idx_tmpl"; then if ! load_template "_index_template/$tmpl_name" "$so_idx_tmpl"; then - LOAD_FAILURES=$((LOAD_FAILURES + 1)) - LOAD_FAILURES_NAMES+=("$so_idx_tmpl") + SO_LOAD_FAILURES=$((SO_LOAD_FAILURES + 1)) + SO_LOAD_FAILURES_NAMES+=("$so_idx_tmpl") fi else echo "Skipping over $so_idx_tmpl due to missing required component template(s)." - LOAD_FAILURES=$((LOAD_FAILURES + 1)) - LOAD_FAILURES_NAMES+=("$so_idx_tmpl") + SO_LOAD_FAILURES=$((SO_LOAD_FAILURES + 1)) + SO_LOAD_FAILURES_NAMES+=("$so_idx_tmpl") continue fi done - if [[ $LOAD_FAILURES -eq 0 ]]; then - echo "All templates loaded successfully." + if [[ $SO_LOAD_FAILURES -eq 0 ]]; then + echo "All Security Onion core templates loaded successfully." - touch "$SO_STATE_FILE_SUCCESS" + touch "$SO_STATEFILE_SUCCESS" else - echo "Encountered $LOAD_FAILURES failure(s) loading templates:" - for failed_template in "${LOAD_FAILURES_NAMES[@]}"; do + echo "Encountered $SO_LOAD_FAILURES failure(s) loading templates:" + for failed_template in "${SO_LOAD_FAILURES_NAMES[@]}"; do echo " - $failed_template" done + if [[ "$SHOULD_EXIT_ON_FAILURE" == "true" ]]; then + fail "Failed to load all Security Onion core templates successfully." + fi fi - else - echo "Templates already loaded" + echo "Security Onion core templates already loaded" +fi + +# Start loading addon templates +if [[ (-f "$SO_STATEFILE_SUCCESS" && "$IS_HEAVYNODE" == "false" && ! -f "$ADDON_STATEFILE_SUCCESS") || "$FORCE" == "true" ]]; then + + check_elasticsearch_responsive + + echo -e "\nLoading addon integration index templates...\n" + component_templates=$(so-elasticsearch-component-templates-list) + + for addon_idx_tmpl in "${ADDON_TEMPLATES_DIR}"/*.json; do + tmpl_name=$(basename "${addon_idx_tmpl%-template.json}") + + if check_required_component_template_exists "$addon_idx_tmpl"; then + if ! load_template "_index_template/${tmpl_name}" "$addon_idx_tmpl"; then + ADDON_LOAD_FAILURES=$((ADDON_LOAD_FAILURES + 1)) + ADDON_LOAD_FAILURES_NAMES+=("$addon_idx_tmpl") + fi + else + echo "Skipping over $addon_idx_tmpl due to missing required component template(s)." + ADDON_LOAD_FAILURES=$((ADDON_LOAD_FAILURES + 1)) + ADDON_LOAD_FAILURES_NAMES+=("$addon_idx_tmpl") + + continue + fi + done + + if [[ $ADDON_LOAD_FAILURES -eq 0 ]]; then + echo "All addon integration templates loaded successfully." + + touch "$ADDON_STATEFILE_SUCCESS" + else + echo "Encountered $ADDON_LOAD_FAILURES failure(s) loading addon integration templates:" + for failed_template in "${ADDON_LOAD_FAILURES_NAMES[@]}"; do + echo " - $failed_template" + done + if [[ "$SHOULD_EXIT_ON_FAILURE" == "true" ]]; then + fail "Failed to load all addon integration templates successfully." + fi + fi fi From abcad9fde04aedf0080b943c298bcacfb00e64d6 Mon Sep 17 00:00:00 2001 From: reyesj2 <94730068+reyesj2@users.noreply.github.com> Date: Sun, 12 Apr 2026 00:36:30 -0500 Subject: [PATCH 024/110] addon statefile --- salt/elasticsearch/tools/sbin/so-elasticsearch-templates-load | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/salt/elasticsearch/tools/sbin/so-elasticsearch-templates-load b/salt/elasticsearch/tools/sbin/so-elasticsearch-templates-load index 3a0361a9f..e9b382cba 100755 --- a/salt/elasticsearch/tools/sbin/so-elasticsearch-templates-load +++ b/salt/elasticsearch/tools/sbin/so-elasticsearch-templates-load @@ -7,7 +7,7 @@ . /usr/sbin/so-common SO_STATEFILE_SUCCESS=/opt/so/state/estemplates.txt -ADDON_STATEFILE_SUCCESS=/opt/so/state/addon-estemplates.txt +ADDON_STATEFILE_SUCCESS=/opt/so/state/addon_estemplates.txt ELASTICSEARCH_TEMPLATES_DIR="/opt/so/conf/elasticsearch/templates" SO_TEMPLATES_DIR="${ELASTICSEARCH_TEMPLATES_DIR}/index" ADDON_TEMPLATES_DIR="${ELASTICSEARCH_TEMPLATES_DIR}/addon-index" From 29e13b2c0b282418ab3d53761c949f1df0c8e727 Mon Sep 17 00:00:00 2001 From: reyesj2 <94730068+reyesj2@users.noreply.github.com> Date: Mon, 13 Apr 2026 10:00:17 -0500 Subject: [PATCH 025/110] elasticsearch ilm policy load script --- salt/elasticsearch/config.sls | 2 ++ salt/elasticsearch/enabled.sls | 1 + .../sbin_jinja/so-elasticsearch-ilm-policy-load | 13 +++++++++++++ 3 files changed, 16 insertions(+) diff --git a/salt/elasticsearch/config.sls b/salt/elasticsearch/config.sls index ac9fa8f72..8a4674c71 100644 --- a/salt/elasticsearch/config.sls +++ b/salt/elasticsearch/config.sls @@ -66,6 +66,8 @@ so-elasticsearch-ilm-policy-load-script: - group: 939 - mode: 754 - template: jinja + - defaults: + GLOBALS: {{ GLOBALS }} - show_changes: False so-elasticsearch-pipelines-script: diff --git a/salt/elasticsearch/enabled.sls b/salt/elasticsearch/enabled.sls index dc404c509..f4031ee5d 100644 --- a/salt/elasticsearch/enabled.sls +++ b/salt/elasticsearch/enabled.sls @@ -179,6 +179,7 @@ so-es-cluster-settings: - file: elasticsearch_sbin_jinja {% endif %} +# heavynodes will only load ILM policies for SO managed indices. (Indicies defined in elasticsearch/defaults.yaml) so-elasticsearch-ilm-policy-load: cmd.run: - name: /usr/sbin/so-elasticsearch-ilm-policy-load diff --git a/salt/elasticsearch/tools/sbin_jinja/so-elasticsearch-ilm-policy-load b/salt/elasticsearch/tools/sbin_jinja/so-elasticsearch-ilm-policy-load index 04a7a8ab0..7988c1905 100755 --- a/salt/elasticsearch/tools/sbin_jinja/so-elasticsearch-ilm-policy-load +++ b/salt/elasticsearch/tools/sbin_jinja/so-elasticsearch-ilm-policy-load @@ -7,6 +7,9 @@ . /usr/sbin/so-common {%- from 'elasticsearch/template.map.jinja' import ES_INDEX_SETTINGS %} +{%- if GLOBALS.role != "so-heavynode" %} +{%- from 'elasticsearch/template.map.jinja' import ALL_ADDON_SETTINGS %} +{%- endif %} {%- for index, settings in ES_INDEX_SETTINGS.items() %} {%- if settings.policy is defined %} @@ -33,3 +36,13 @@ {%- endif %} {%- endfor %} echo +{%- if GLOBALS.role != "so-heavynode" %} +{%- for index, settings in ALL_ADDON_SETTINGS.items() %} +{%- if settings.policy is defined %} + echo + echo "Setting up {{ index }}-logs policy..." + curl -K /opt/so/conf/elasticsearch/curl.config -b "sid=$SESSIONCOOKIE" -s -k -L -X PUT "https://localhost:9200/_ilm/policy/{{ index }}-logs" -H 'Content-Type: application/json' -d'{ "policy": {{ settings.policy | tojson(true) }} }' + echo +{%- endif %} +{%- endfor %} +{%- endif %} From dd40e44530faca3e4849cbd08062d7960df4227d Mon Sep 17 00:00:00 2001 From: reyesj2 <94730068+reyesj2@users.noreply.github.com> Date: Mon, 13 Apr 2026 12:36:42 -0500 Subject: [PATCH 026/110] show when addon integrations are already loaded --- .../tools/sbin/so-elasticsearch-templates-load | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/salt/elasticsearch/tools/sbin/so-elasticsearch-templates-load b/salt/elasticsearch/tools/sbin/so-elasticsearch-templates-load index e9b382cba..9ad0f418a 100755 --- a/salt/elasticsearch/tools/sbin/so-elasticsearch-templates-load +++ b/salt/elasticsearch/tools/sbin/so-elasticsearch-templates-load @@ -207,7 +207,7 @@ else fi # Start loading addon templates -if [[ (-f "$SO_STATEFILE_SUCCESS" && "$IS_HEAVYNODE" == "false" && ! -f "$ADDON_STATEFILE_SUCCESS") || "$FORCE" == "true" ]]; then +if [[ (-f "$SO_STATEFILE_SUCCESS" && "$IS_HEAVYNODE" == "false" && ! -f "$ADDON_STATEFILE_SUCCESS") || ("$IS_HEAVYNODE" == "false" && "$FORCE" == "true") ]]; then check_elasticsearch_responsive @@ -245,4 +245,9 @@ if [[ (-f "$SO_STATEFILE_SUCCESS" && "$IS_HEAVYNODE" == "false" && ! -f "$ADDON_ fi fi +elif [[ ! -f "$SO_STATEFILE_SUCCESS" && "$IS_HEAVYNODE" == "false" ]]; then + echo "Skipping loading addon integration templates until Security Onion core templates have been loaded." + +elif [[ -f "$ADDON_STATEFILE_SUCCESS" && "$IS_HEAVYNODE" == "false" && "$FORCE" == "false" ]]; then + echo "Addon integration templates already loaded" fi From a232cd89cc1fbdc5473fb685d1e1bc541c1dd1e1 Mon Sep 17 00:00:00 2001 From: reyesj2 <94730068+reyesj2@users.noreply.github.com> Date: Mon, 13 Apr 2026 13:36:51 -0500 Subject: [PATCH 027/110] ES 9.3.3 --- .../grid-nodes_general/import-evtx-logs.json | 2 +- salt/elasticsearch/defaults.yaml | 2 +- ...nse.log-1.25.1 => logs-pfsense.log-1.25.2} | 24 +++++++++---------- ...icata => logs-pfsense.log-1.25.2-suricata} | 0 4 files changed, 14 insertions(+), 14 deletions(-) rename salt/elasticsearch/files/ingest/{logs-pfsense.log-1.25.1 => logs-pfsense.log-1.25.2} (94%) rename salt/elasticsearch/files/ingest/{logs-pfsense.log-1.25.1-suricata => logs-pfsense.log-1.25.2-suricata} (100%) diff --git a/salt/elasticfleet/files/integrations/grid-nodes_general/import-evtx-logs.json b/salt/elasticfleet/files/integrations/grid-nodes_general/import-evtx-logs.json index 0e42a0dfb..32d210172 100644 --- a/salt/elasticfleet/files/integrations/grid-nodes_general/import-evtx-logs.json +++ b/salt/elasticfleet/files/integrations/grid-nodes_general/import-evtx-logs.json @@ -29,7 +29,7 @@ "\\.gz$" ], "include_files": [], - "processors": "- dissect:\n tokenizer: \"/nsm/import/%{import.id}/evtx/%{import.file}\"\n field: \"log.file.path\"\n target_prefix: \"\"\n- decode_json_fields:\n fields: [\"message\"]\n target: \"\"\n- drop_fields:\n fields: [\"host\"]\n ignore_missing: true\n- add_fields:\n target: data_stream\n fields:\n type: logs\n dataset: system.security\n- add_fields:\n target: event\n fields:\n dataset: system.security\n module: system\n imported: true\n- add_fields:\n target: \"@metadata\"\n fields:\n pipeline: logs-system.security-2.13.0\n- if:\n equals:\n winlog.channel: 'Microsoft-Windows-Sysmon/Operational'\n then: \n - add_fields:\n target: data_stream\n fields:\n dataset: windows.sysmon_operational\n - add_fields:\n target: event\n fields:\n dataset: windows.sysmon_operational\n module: windows\n imported: true\n - add_fields:\n target: \"@metadata\"\n fields:\n pipeline: logs-windows.sysmon_operational-3.6.0\n- if:\n equals:\n winlog.channel: 'Application'\n then: \n - add_fields:\n target: data_stream\n fields:\n dataset: system.application\n - add_fields:\n target: event\n fields:\n dataset: system.application\n - add_fields:\n target: \"@metadata\"\n fields:\n pipeline: logs-system.application-2.13.0\n- if:\n equals:\n winlog.channel: 'System'\n then: \n - add_fields:\n target: data_stream\n fields:\n dataset: system.system\n - add_fields:\n target: event\n fields:\n dataset: system.system\n - add_fields:\n target: \"@metadata\"\n fields:\n pipeline: logs-system.system-2.13.0\n \n- if:\n equals:\n winlog.channel: 'Microsoft-Windows-PowerShell/Operational'\n then: \n - add_fields:\n target: data_stream\n fields:\n dataset: windows.powershell_operational\n - add_fields:\n target: event\n fields:\n dataset: windows.powershell_operational\n module: windows\n - add_fields:\n target: \"@metadata\"\n fields:\n pipeline: logs-windows.powershell_operational-3.6.0\n- add_fields:\n target: data_stream\n fields:\n dataset: import", + "processors": "- dissect:\n tokenizer: \"/nsm/import/%{import.id}/evtx/%{import.file}\"\n field: \"log.file.path\"\n target_prefix: \"\"\n- decode_json_fields:\n fields: [\"message\"]\n target: \"\"\n- drop_fields:\n fields: [\"host\"]\n ignore_missing: true\n- add_fields:\n target: data_stream\n fields:\n type: logs\n dataset: system.security\n- add_fields:\n target: event\n fields:\n dataset: system.security\n module: system\n imported: true\n- add_fields:\n target: \"@metadata\"\n fields:\n pipeline: logs-system.security-2.15.0\n- if:\n equals:\n winlog.channel: 'Microsoft-Windows-Sysmon/Operational'\n then: \n - add_fields:\n target: data_stream\n fields:\n dataset: windows.sysmon_operational\n - add_fields:\n target: event\n fields:\n dataset: windows.sysmon_operational\n module: windows\n imported: true\n - add_fields:\n target: \"@metadata\"\n fields:\n pipeline: logs-windows.sysmon_operational-3.8.0\n- if:\n equals:\n winlog.channel: 'Application'\n then: \n - add_fields:\n target: data_stream\n fields:\n dataset: system.application\n - add_fields:\n target: event\n fields:\n dataset: system.application\n - add_fields:\n target: \"@metadata\"\n fields:\n pipeline: logs-system.application-2.15.0\n- if:\n equals:\n winlog.channel: 'System'\n then: \n - add_fields:\n target: data_stream\n fields:\n dataset: system.system\n - add_fields:\n target: event\n fields:\n dataset: system.system\n - add_fields:\n target: \"@metadata\"\n fields:\n pipeline: logs-system.system-2.15.0\n \n- if:\n equals:\n winlog.channel: 'Microsoft-Windows-PowerShell/Operational'\n then: \n - add_fields:\n target: data_stream\n fields:\n dataset: windows.powershell_operational\n - add_fields:\n target: event\n fields:\n dataset: windows.powershell_operational\n module: windows\n - add_fields:\n target: \"@metadata\"\n fields:\n pipeline: logs-windows.powershell_operational-3.8.0\n- add_fields:\n target: data_stream\n fields:\n dataset: import", "tags": [ "import" ], diff --git a/salt/elasticsearch/defaults.yaml b/salt/elasticsearch/defaults.yaml index f355601dc..6fb795bce 100644 --- a/salt/elasticsearch/defaults.yaml +++ b/salt/elasticsearch/defaults.yaml @@ -1,6 +1,6 @@ elasticsearch: enabled: false - version: 9.3.2 + version: 9.3.3 index_clean: true vm: max_map_count: 1048576 diff --git a/salt/elasticsearch/files/ingest/logs-pfsense.log-1.25.1 b/salt/elasticsearch/files/ingest/logs-pfsense.log-1.25.2 similarity index 94% rename from salt/elasticsearch/files/ingest/logs-pfsense.log-1.25.1 rename to salt/elasticsearch/files/ingest/logs-pfsense.log-1.25.2 index 3037ce77a..1ea828514 100644 --- a/salt/elasticsearch/files/ingest/logs-pfsense.log-1.25.1 +++ b/salt/elasticsearch/files/ingest/logs-pfsense.log-1.25.2 @@ -118,77 +118,77 @@ { "pipeline": { "tag": "pipeline_e16851a7", - "name": "logs-pfsense.log-1.25.1-firewall", + "name": "logs-pfsense.log-1.25.2-firewall", "if": "ctx.event.provider == 'filterlog'" } }, { "pipeline": { "tag": "pipeline_828590b5", - "name": "logs-pfsense.log-1.25.1-openvpn", + "name": "logs-pfsense.log-1.25.2-openvpn", "if": "ctx.event.provider == 'openvpn'" } }, { "pipeline": { "tag": "pipeline_9d37039c", - "name": "logs-pfsense.log-1.25.1-ipsec", + "name": "logs-pfsense.log-1.25.2-ipsec", "if": "ctx.event.provider == 'charon'" } }, { "pipeline": { "tag": "pipeline_ad56bbca", - "name": "logs-pfsense.log-1.25.1-dhcp", - "if": "[\"dhcpd\", \"dhclient\", \"dhcp6c\"].contains(ctx.event.provider)" + "name": "logs-pfsense.log-1.25.2-dhcp", + "if": "[\"dhcpd\", \"dhclient\", \"dhcp6c\", \"dnsmasq-dhcp\"].contains(ctx.event.provider)" } }, { "pipeline": { "tag": "pipeline_dd85553d", - "name": "logs-pfsense.log-1.25.1-unbound", + "name": "logs-pfsense.log-1.25.2-unbound", "if": "ctx.event.provider == 'unbound'" } }, { "pipeline": { "tag": "pipeline_720ed255", - "name": "logs-pfsense.log-1.25.1-haproxy", + "name": "logs-pfsense.log-1.25.2-haproxy", "if": "ctx.event.provider == 'haproxy'" } }, { "pipeline": { "tag": "pipeline_456beba5", - "name": "logs-pfsense.log-1.25.1-php-fpm", + "name": "logs-pfsense.log-1.25.2-php-fpm", "if": "ctx.event.provider == 'php-fpm'" } }, { "pipeline": { "tag": "pipeline_a0d89375", - "name": "logs-pfsense.log-1.25.1-squid", + "name": "logs-pfsense.log-1.25.2-squid", "if": "ctx.event.provider == 'squid'" } }, { "pipeline": { "tag": "pipeline_c2f1ed55", - "name": "logs-pfsense.log-1.25.1-snort", + "name": "logs-pfsense.log-1.25.2-snort", "if": "ctx.event.provider == 'snort'" } }, { "pipeline": { "tag":"pipeline_33db1c9e", - "name": "logs-pfsense.log-1.25.1-suricata", + "name": "logs-pfsense.log-1.25.2-suricata", "if": "ctx.event.provider == 'suricata'" } }, { "drop": { "tag": "drop_9d7c46f8", - "if": "![\"filterlog\", \"openvpn\", \"charon\", \"dhcpd\", \"dhclient\", \"dhcp6c\", \"unbound\", \"haproxy\", \"php-fpm\", \"squid\", \"snort\", \"suricata\"].contains(ctx.event?.provider)" + "if": "![\"filterlog\", \"openvpn\", \"charon\", \"dhcpd\", \"dnsmasq-dhcp\", \"dhclient\", \"dhcp6c\", \"unbound\", \"haproxy\", \"php-fpm\", \"squid\", \"snort\", \"suricata\"].contains(ctx.event?.provider)" } }, { diff --git a/salt/elasticsearch/files/ingest/logs-pfsense.log-1.25.1-suricata b/salt/elasticsearch/files/ingest/logs-pfsense.log-1.25.2-suricata similarity index 100% rename from salt/elasticsearch/files/ingest/logs-pfsense.log-1.25.1-suricata rename to salt/elasticsearch/files/ingest/logs-pfsense.log-1.25.2-suricata From 5634aed6797d0e34460ade9c4aa099eddc78df1c Mon Sep 17 00:00:00 2001 From: Jason Ertel Date: Mon, 13 Apr 2026 15:19:39 -0400 Subject: [PATCH 028/110] support minion node descriptions containing spaces --- salt/manager/tools/sbin/so-minion | 4 ++-- setup/so-setup | 1 + 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/salt/manager/tools/sbin/so-minion b/salt/manager/tools/sbin/so-minion index 2d5ef448e..76b067817 100755 --- a/salt/manager/tools/sbin/so-minion +++ b/salt/manager/tools/sbin/so-minion @@ -132,8 +132,8 @@ function getinstallinfo() { log "ERROR" "Failed to get install info from $MINION_ID" return 1 fi - - export $(echo "$INSTALLVARS" | xargs) + + while read -r var; do export "$var"; done <<< "$INSTALLVARS" if [ $? -ne 0 ]; then log "ERROR" "Failed to source install variables" return 1 diff --git a/setup/so-setup b/setup/so-setup index 823a379df..1ef88a342 100755 --- a/setup/so-setup +++ b/setup/so-setup @@ -219,6 +219,7 @@ if [ -n "$test_profile" ]; then WEBUSER=onionuser@somewhere.invalid WEBPASSWD1=0n10nus3r WEBPASSWD2=0n10nus3r + NODE_DESCRIPTION="${HOSTNAME} - ${install_type}" update_sudoers_for_testing fi From da7c2995b0560689bd8dcd2a2fbc2ad88d5ee138 Mon Sep 17 00:00:00 2001 From: Jason Ertel Date: Mon, 13 Apr 2026 17:09:10 -0400 Subject: [PATCH 029/110] include trailing numbers as an additional test --- setup/so-setup | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup/so-setup b/setup/so-setup index 1ef88a342..46b11fc11 100755 --- a/setup/so-setup +++ b/setup/so-setup @@ -219,7 +219,7 @@ if [ -n "$test_profile" ]; then WEBUSER=onionuser@somewhere.invalid WEBPASSWD1=0n10nus3r WEBPASSWD2=0n10nus3r - NODE_DESCRIPTION="${HOSTNAME} - ${install_type}" + NODE_DESCRIPTION="${HOSTNAME} - ${install_type} - ${MAINIP}" update_sudoers_for_testing fi From 0405a66c72ec46bb58acc24492f8c142a1c72445 Mon Sep 17 00:00:00 2001 From: reyesj2 <94730068+reyesj2@users.noreply.github.com> Date: Mon, 13 Apr 2026 16:27:28 -0500 Subject: [PATCH 030/110] enable elastic agent patch release for 9.3.3 --- salt/elasticfleet/defaults.yaml | 1 + .../tools/sbin_jinja/so-elastic-agent-grid-upgrade | 5 +++++ 2 files changed, 6 insertions(+) diff --git a/salt/elasticfleet/defaults.yaml b/salt/elasticfleet/defaults.yaml index a3132d3f4..022600083 100644 --- a/salt/elasticfleet/defaults.yaml +++ b/salt/elasticfleet/defaults.yaml @@ -1,5 +1,6 @@ elasticfleet: enabled: False + patch_version: 9.3.3+build202604082258 # Elastic Agent specific patch release. enable_manager_output: True config: server: diff --git a/salt/elasticfleet/tools/sbin_jinja/so-elastic-agent-grid-upgrade b/salt/elasticfleet/tools/sbin_jinja/so-elastic-agent-grid-upgrade index 0729531d3..aafc9c368 100644 --- a/salt/elasticfleet/tools/sbin_jinja/so-elastic-agent-grid-upgrade +++ b/salt/elasticfleet/tools/sbin_jinja/so-elastic-agent-grid-upgrade @@ -6,6 +6,11 @@ . /usr/sbin/so-common {%- import_yaml 'elasticsearch/defaults.yaml' as ELASTICSEARCHDEFAULTS %} +{%- import_yaml 'elasticfleet/defaults.yaml' as ELASTICFLEETDEFAULTS %} +{# Optionally override Elasticsearch version for Elastic Agent patch releases #} +{%- if ELASTICFLEETDEFAULTS.elasticfleet.patch_version is defined %} +{%- do ELASTICSEARCHDEFAULTS.update({'elasticsearch': {'version': ELASTICFLEETDEFAULTS.elasticfleet.patch_version}}) %} +{%- endif %} # Only run on Managers if ! is_manager_node; then From d598e20fbbc27fe5155a22d6c838ef20aa692ecc Mon Sep 17 00:00:00 2001 From: reyesj2 <94730068+reyesj2@users.noreply.github.com> Date: Tue, 14 Apr 2026 14:55:33 -0500 Subject: [PATCH 031/110] soup 3.1.0 --- salt/manager/tools/sbin/soup | 94 ++++++++++++++++++++++-------------- 1 file changed, 57 insertions(+), 37 deletions(-) diff --git a/salt/manager/tools/sbin/soup b/salt/manager/tools/sbin/soup index d25153863..a3b5daa23 100755 --- a/salt/manager/tools/sbin/soup +++ b/salt/manager/tools/sbin/soup @@ -363,6 +363,7 @@ preupgrade_changes() { echo "Checking to see if changes are needed." [[ "$INSTALLEDVERSION" =~ ^2\.4\.21[0-9]+$ ]] && up_to_3.0.0 + [[ "$INSTALLEDVERSION" == "3.0.0" ]] && up_to_3.1.0 true } @@ -371,6 +372,7 @@ postupgrade_changes() { echo "Running post upgrade processes." [[ "$POSTVERSION" =~ ^2\.4\.21[0-9]+$ ]] && post_to_3.0.0 + [[ "$POSTVERSION" == "3.0.0" ]] && post_to_3.1.0 true } @@ -445,7 +447,6 @@ migrate_pcap_to_suricata() { } up_to_3.0.0() { - determine_elastic_agent_upgrade migrate_pcap_to_suricata INSTALLEDVERSION=3.0.0 @@ -469,6 +470,32 @@ post_to_3.0.0() { ### 3.0.0 End ### +### 3.1.0 Scripts ### + +elasticsearch_backup_index_templates() { + echo "Backing up current elasticsearch index templates in /opt/so/conf/elasticsearch/templates/index/ to /nsm/backup/3.0.0_elasticsearch_index_templates.tar.gz" + tar -czf /nsm/backup/3.0.0_elasticsearch_index_templates.tar.gz -C /opt/so/conf/elasticsearch/templates/index/ . +} + +up_to_3.1.0() { + determine_elastic_agent_upgrade + elasticsearch_backup_index_templates + # Clear existing component template state file. + rm -f /opt/so/state/esfleet_component_templates.json + + + INSTALLEDVERSION=3.1.0 +} + +post_to_3.1.0() { + /usr/sbin/so-kibana-space-defaults + + POSTVERSION=3.1.0 +} + +### 3.1.0 End ### + + repo_sync() { echo "Sync the local repo." su socore -c '/usr/sbin/so-repo-sync' || fail "Unable to complete so-repo-sync." @@ -728,12 +755,12 @@ verify_es_version_compatibility() { local is_active_intermediate_upgrade=1 # supported upgrade paths for SO-ES versions declare -A es_upgrade_map=( - ["8.18.8"]="9.0.8" + ["9.0.8"]="9.3.3" ) # Elasticsearch MUST upgrade through these versions declare -A es_to_so_version=( - ["8.18.8"]="2.4.190-20251024" + ["9.0.8"]="3.0.0-20260331" ) # Get current Elasticsearch version @@ -745,26 +772,17 @@ verify_es_version_compatibility() { exit 160 fi - if ! target_es_version_raw=$(so-yaml.py get $UPDATE_DIR/salt/elasticsearch/defaults.yaml elasticsearch.version); then - # so-yaml.py failed to get the ES version from upgrade versions elasticsearch/defaults.yaml file. Likely they are upgrading to an SO version older than 2.4.110 prior to the ES version pinning and should be OKAY to continue with the upgrade. + if ! target_es_version=$(so-yaml.py get -r $UPDATE_DIR/salt/elasticsearch/defaults.yaml elasticsearch.version); then + echo "Couldn't determine the target Elasticsearch version (post soup version) to ensure compatibility with current Elasticsearch version. Exiting" - # if so-yaml.py failed to get the ES version AND the version we are upgrading to is newer than 2.4.110 then we should bail - if [[ $(cat $UPDATE_DIR/VERSION | cut -d'.' -f3) > 110 ]]; then - echo "Couldn't determine the target Elasticsearch version (post soup version) to ensure compatibility with current Elasticsearch version. Exiting" - - exit 160 - fi - - # allow upgrade to version < 2.4.110 without checking ES version compatibility - return 0 - else - target_es_version=$(sed -n '1p' <<< "$target_es_version_raw") + exit 160 fi for statefile in "${es_required_version_statefile_base}"-*; do [[ -f $statefile ]] || continue - local es_required_version_statefile_value=$(cat "$statefile") + local es_required_version_statefile_value + es_required_version_statefile_value=$(cat "$statefile") if [[ "$es_required_version_statefile_value" == "$target_es_version" ]]; then echo "Intermediate upgrade to ES $target_es_version is in progress. Skipping Elasticsearch version compatibility check." @@ -773,7 +791,7 @@ verify_es_version_compatibility() { fi # use sort to check if es_required_statefile_value is < the current es_version. - if [[ "$(printf '%s\n' $es_required_version_statefile_value $es_version | sort -V | head -n1)" == "$es_required_version_statefile_value" ]]; then + if [[ "$(printf '%s\n' "$es_required_version_statefile_value" "$es_version" | sort -V | head -n1)" == "$es_required_version_statefile_value" ]]; then rm -f "$statefile" continue fi @@ -784,8 +802,7 @@ verify_es_version_compatibility() { echo -e "\n##############################################################################################################################\n" echo "A previously required intermediate Elasticsearch upgrade was detected. Verifying that all Searchnodes/Heavynodes have successfully upgraded Elasticsearch to $es_required_version_statefile_value before proceeding with soup to avoid potential data loss! This command can take up to an hour to complete." - timeout --foreground 4000 bash "$es_verification_script" "$es_required_version_statefile_value" "$statefile" - if [[ $? -ne 0 ]]; then + if ! timeout --foreground 4000 bash "$es_verification_script" "$es_required_version_statefile_value" "$statefile"; then echo -e "\n!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!\n" echo "A previous required intermediate Elasticsearch upgrade to $es_required_version_statefile_value has yet to successfully complete across the grid. Please allow time for all Searchnodes/Heavynodes to have upgraded Elasticsearch to $es_required_version_statefile_value before running soup again to avoid potential data loss!" @@ -802,6 +819,7 @@ verify_es_version_compatibility() { return 0 fi + # shellcheck disable=SC2076 # Do not want a regex here eg usage " 8.18.8 9.0.8 " =~ " 9.0.8 " if [[ " ${es_upgrade_map[$es_version]} " =~ " $target_es_version " || "$es_version" == "$target_es_version" ]]; then # supported upgrade return 0 @@ -810,7 +828,7 @@ verify_es_version_compatibility() { if [[ -z "$compatible_versions" ]]; then # If current ES version is not explicitly defined in the upgrade map, we know they have an intermediate upgrade to do. # We default to the lowest ES version defined in es_to_so_version as $first_es_required_version - local first_es_required_version=$(printf '%s\n' "${!es_to_so_version[@]}" | sort -V | head -n1) + first_es_required_version=$(printf '%s\n' "${!es_to_so_version[@]}" | sort -V | head -n1) next_step_so_version=${es_to_so_version[$first_es_required_version]} required_es_upgrade_version="$first_es_required_version" else @@ -829,7 +847,7 @@ verify_es_version_compatibility() { if [[ $is_airgap -eq 0 ]]; then run_airgap_intermediate_upgrade else - if [[ ! -z $ISOLOC ]]; then + if [[ -n $ISOLOC ]]; then originally_requested_iso_location="$ISOLOC" fi # Make sure ISOLOC is not set. Network installs that used soup -f would have ISOLOC set. @@ -861,7 +879,8 @@ wait_for_salt_minion_with_restart() { } run_airgap_intermediate_upgrade() { - local originally_requested_so_version=$(cat $UPDATE_DIR/VERSION) + local originally_requested_so_version + originally_requested_so_version=$(cat "$UPDATE_DIR/VERSION") # preserve ISOLOC value, so we can try to use it post intermediate upgrade local originally_requested_iso_location="$ISOLOC" @@ -873,7 +892,8 @@ run_airgap_intermediate_upgrade() { while [[ -z "$next_iso_location" ]] || [[ ! -f "$next_iso_location" && ! -b "$next_iso_location" ]]; do # List removable devices if any are present - local removable_devices=$(lsblk -no PATH,SIZE,TYPE,MOUNTPOINTS,RM | awk '$NF==1') + local removable_devices + removable_devices=$(lsblk -no PATH,SIZE,TYPE,MOUNTPOINTS,RM | awk '$NF==1') if [[ -n "$removable_devices" ]]; then echo "PATH SIZE TYPE MOUNTPOINTS RM" echo "$removable_devices" @@ -894,21 +914,21 @@ run_airgap_intermediate_upgrade() { echo "Using $next_iso_location for required intermediary upgrade." exec bash < Date: Tue, 14 Apr 2026 19:26:37 -0500 Subject: [PATCH 032/110] check for addon-index templates dir before attempting to load addon index templates --- salt/elasticsearch/tools/sbin/so-elasticsearch-templates-load | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/salt/elasticsearch/tools/sbin/so-elasticsearch-templates-load b/salt/elasticsearch/tools/sbin/so-elasticsearch-templates-load index 9ad0f418a..840639a32 100755 --- a/salt/elasticsearch/tools/sbin/so-elasticsearch-templates-load +++ b/salt/elasticsearch/tools/sbin/so-elasticsearch-templates-load @@ -207,7 +207,7 @@ else fi # Start loading addon templates -if [[ (-f "$SO_STATEFILE_SUCCESS" && "$IS_HEAVYNODE" == "false" && ! -f "$ADDON_STATEFILE_SUCCESS") || ("$IS_HEAVYNODE" == "false" && "$FORCE" == "true") ]]; then +if [[ (-d "$ADDON_TEMPLATES_DIR" && -f "$SO_STATEFILE_SUCCESS" && "$IS_HEAVYNODE" == "false" && ! -f "$ADDON_STATEFILE_SUCCESS") || (-d "$ADDON_TEMPLATES_DIR" && "$IS_HEAVYNODE" == "false" && "$FORCE" == "true") ]]; then check_elasticsearch_responsive From cefbe01333ee9a1894aa623fdc7b5aa928b507ef Mon Sep 17 00:00:00 2001 From: Mike Reeves Date: Wed, 15 Apr 2026 14:32:10 -0400 Subject: [PATCH 033/110] Add telegraf_output selector for InfluxDB/Postgres dual-write Introduces global.telegraf_output (INFLUXDB|POSTGRES|BOTH, default BOTH) so Telegraf can write metrics to Postgres alongside or instead of InfluxDB. Each minion authenticates with its own so_telegraf_ role and writes to a matching schema inside a shared so_telegraf database, keeping blast radius per-credential to that minion's data. - Per-minion credentials auto-generated and persisted in postgres/auth.sls - postgres/telegraf_users.sls reconciles roles/schemas on every apply - Firewall opens 5432 only to minion hostgroups when Postgres output is active - Reactor on salt/auth + orch/telegraf_postgres_sync.sls provision new minions automatically on key accept - soup post_to_3.1.0 backfills users for existing minions on upgrade - so-show-stats prints latest CPU/mem/disk/load per minion for sanity checks - so-telegraf-trim + nightly cron prune rows older than postgres.telegraf.retention_days (default 14) --- salt/firewall/map.jinja | 12 +++ salt/global/defaults.yaml | 3 +- salt/global/soc_global.yaml | 11 +++ salt/manager/tools/sbin/soup | 25 ++++- salt/orch/telegraf_postgres_sync.sls | 26 +++++ salt/postgres/auth.sls | 23 +++++ salt/postgres/defaults.yaml | 2 + salt/postgres/enabled.sls | 16 ++++ salt/postgres/files/init-users.sh | 8 ++ salt/postgres/soc_postgres.yaml | 7 ++ salt/postgres/telegraf_users.sls | 49 ++++++++++ salt/postgres/tools/sbin/so-show-stats | 110 ++++++++++++++++++++++ salt/postgres/tools/sbin/so-telegraf-trim | 103 ++++++++++++++++++++ salt/reactor/telegraf_user_sync.sls | 18 ++++ salt/salt/master.sls | 13 +++ salt/telegraf/etc/telegraf.conf | 15 +++ salt/vars/globals.map.jinja | 1 + 17 files changed, 440 insertions(+), 2 deletions(-) create mode 100644 salt/orch/telegraf_postgres_sync.sls create mode 100644 salt/postgres/soc_postgres.yaml create mode 100644 salt/postgres/telegraf_users.sls create mode 100644 salt/postgres/tools/sbin/so-show-stats create mode 100644 salt/postgres/tools/sbin/so-telegraf-trim create mode 100644 salt/reactor/telegraf_user_sync.sls diff --git a/salt/firewall/map.jinja b/salt/firewall/map.jinja index 58d8c189d..2821f62b4 100644 --- a/salt/firewall/map.jinja +++ b/salt/firewall/map.jinja @@ -55,4 +55,16 @@ {% endif %} +{# Open Postgres (5432) to minion hostgroups when Telegraf is configured to write to Postgres #} +{% set TG_OUT = (GLOBALS.telegraf_output | default('INFLUXDB')) | upper %} +{% if TG_OUT in ['POSTGRES', 'BOTH'] %} +{% if role.startswith('manager') or role == 'standalone' or role == 'eval' %} +{% for r in ['sensor', 'searchnode', 'heavynode', 'receiver', 'fleet', 'idh', 'desktop', 'import'] %} +{% if FIREWALL_DEFAULT.firewall.role[role].chain["DOCKER-USER"].hostgroups[r] is defined %} +{% do FIREWALL_DEFAULT.firewall.role[role].chain["DOCKER-USER"].hostgroups[r].portgroups.append('postgres') %} +{% endif %} +{% endfor %} +{% endif %} +{% endif %} + {% set FIREWALL_MERGED = salt['pillar.get']('firewall', FIREWALL_DEFAULT.firewall, merge=True) %} diff --git a/salt/global/defaults.yaml b/salt/global/defaults.yaml index 92b9c1c1a..d041306a7 100644 --- a/salt/global/defaults.yaml +++ b/salt/global/defaults.yaml @@ -1,3 +1,4 @@ global: pcapengine: SURICATA - pipeline: REDIS \ No newline at end of file + pipeline: REDIS + telegraf_output: BOTH \ No newline at end of file diff --git a/salt/global/soc_global.yaml b/salt/global/soc_global.yaml index 33abbf690..a01d33cb8 100644 --- a/salt/global/soc_global.yaml +++ b/salt/global/soc_global.yaml @@ -65,4 +65,15 @@ global: description: Allows use of Endgame with Security Onion. This feature requires a license from Endgame. global: True advanced: True + telegraf_output: + description: Selects the backend(s) Telegraf writes metrics to. INFLUXDB keeps the current behavior; POSTGRES writes to the grid's Postgres instance; BOTH dual-writes for migration validation. + regex: ^(INFLUXDB|POSTGRES|BOTH)$ + options: + - INFLUXDB + - POSTGRES + - BOTH + regexFailureMessage: You must enter INFLUXDB, POSTGRES, or BOTH. + global: True + advanced: True + helpLink: influxdb diff --git a/salt/manager/tools/sbin/soup b/salt/manager/tools/sbin/soup index d25153863..d5ade0fab 100755 --- a/salt/manager/tools/sbin/soup +++ b/salt/manager/tools/sbin/soup @@ -362,7 +362,8 @@ preupgrade_changes() { # This function is to add any new pillar items if needed. echo "Checking to see if changes are needed." - [[ "$INSTALLEDVERSION" =~ ^2\.4\.21[0-9]+$ ]] && up_to_3.0.0 + [[ "$INSTALLEDVERSION" =~ ^2\.4\.21[0-9]+$ ]] && up_to_3.0.0 + [[ "$INSTALLEDVERSION" =~ ^3\.0\.[0-9]+$ ]] && up_to_3.1.0 true } @@ -371,6 +372,7 @@ postupgrade_changes() { echo "Running post upgrade processes." [[ "$POSTVERSION" =~ ^2\.4\.21[0-9]+$ ]] && post_to_3.0.0 + [[ "$POSTVERSION" =~ ^3\.0\.[0-9]+$ ]] && post_to_3.1.0 true } @@ -469,6 +471,27 @@ post_to_3.0.0() { ### 3.0.0 End ### +### 3.1.0 Start ### + +up_to_3.1.0() { + INSTALLEDVERSION=3.1.0 +} + +post_to_3.1.0() { + # Provision per-minion Telegraf Postgres users for every minion known to the + # manager. postgres.auth iterates manage.up to generate any missing passwords; + # postgres.telegraf_users reconciles the roles and schemas inside the so-postgres + # container. Then push a telegraf state to every minion so their telegraf.conf + # picks up the new credentials on the first apply after soup. + echo "Provisioning Telegraf Postgres users for existing minions." + salt-call --local state.apply postgres.auth postgres.telegraf_users || true + salt '*' state.sls telegraf || true + + POSTVERSION=3.1.0 +} + +### 3.1.0 End ### + repo_sync() { echo "Sync the local repo." su socore -c '/usr/sbin/so-repo-sync' || fail "Unable to complete so-repo-sync." diff --git a/salt/orch/telegraf_postgres_sync.sls b/salt/orch/telegraf_postgres_sync.sls new file mode 100644 index 000000000..90c42fc07 --- /dev/null +++ b/salt/orch/telegraf_postgres_sync.sls @@ -0,0 +1,26 @@ +# Copyright Security Onion Solutions LLC and/or licensed to Security Onion Solutions LLC under one +# or more contributor license agreements. Licensed under the Elastic License 2.0 as shown at +# https://securityonion.net/license; you may not use this file except in compliance with the +# Elastic License 2.0. + +{% set MINION = salt['pillar.get']('minion_id') %} +{% set MANAGER = salt['pillar.get']('setup:manager') or salt['grains.get']('master') %} + +manager_sync_telegraf_pg_users: + salt.state: + - tgt: {{ MANAGER }} + - sls: + - postgres.auth + - postgres.telegraf_users + - queue: True + +{% if MINION and MINION != MANAGER %} +{{ MINION }}_apply_telegraf: + salt.state: + - tgt: {{ MINION }} + - sls: + - telegraf + - queue: True + - require: + - salt: manager_sync_telegraf_pg_users +{% endif %} diff --git a/salt/postgres/auth.sls b/salt/postgres/auth.sls index a19b2341a..3da1bcde0 100644 --- a/salt/postgres/auth.sls +++ b/salt/postgres/auth.sls @@ -13,6 +13,24 @@ {% set CHARS = DIGITS~LOWERCASE~UPPERCASE~SYMBOLS %} {% set so_postgres_user_pass = salt['pillar.get']('postgres:auth:users:so_postgres_user:pass', salt['random.get_str'](72, chars=CHARS)) %} + {# Per-minion Telegraf Postgres credentials. Merge currently-up minions with any #} + {# previously-known entries in pillar so existing passwords persist across runs. #} + {% set existing = salt['pillar.get']('postgres:auth:users', {}) %} + {% set up_minions = salt['saltutil.runner']('manage.up') or [] %} + {% set telegraf_users = {} %} + {% for key, entry in existing.items() %} + {%- if key.startswith('telegraf_') and entry.get('user') and entry.get('pass') %} + {%- do telegraf_users.update({key: entry}) %} + {%- endif %} + {% endfor %} + {% for mid in up_minions %} + {%- set safe = mid | replace('.','_') | replace('-','_') | lower %} + {%- set key = 'telegraf_' ~ safe %} + {%- if key not in telegraf_users %} + {%- do telegraf_users.update({key: {'user': 'so_telegraf_' ~ safe, 'pass': salt['random.get_str'](72, chars=CHARS)}}) %} + {%- endif %} + {% endfor %} + postgres_auth_pillar: file.managed: - name: /opt/so/saltstack/local/pillar/postgres/auth.sls @@ -25,6 +43,11 @@ postgres_auth_pillar: so_postgres_user: user: so_postgres pass: "{{ so_postgres_user_pass }}" + {% for key, entry in telegraf_users.items() %} + {{ key }}: + user: {{ entry.user }} + pass: "{{ entry.pass }}" + {% endfor %} - show_changes: False {% else %} diff --git a/salt/postgres/defaults.yaml b/salt/postgres/defaults.yaml index c24a07f56..dd7994044 100644 --- a/salt/postgres/defaults.yaml +++ b/salt/postgres/defaults.yaml @@ -1,5 +1,7 @@ postgres: enabled: True + telegraf: + retention_days: 14 config: listen_addresses: '*' port: 5432 diff --git a/salt/postgres/enabled.sls b/salt/postgres/enabled.sls index c103245ea..24e348365 100644 --- a/salt/postgres/enabled.sls +++ b/salt/postgres/enabled.sls @@ -16,6 +16,7 @@ include: - postgres.ssl - postgres.config - postgres.sostatus + - postgres.telegraf_users so-postgres: docker_container.running: @@ -79,6 +80,21 @@ delete_so-postgres_so-status.disabled: - name: /opt/so/conf/so-status/so-status.conf - regex: ^so-postgres$ +so_telegraf_trim: +{% if GLOBALS.telegraf_output in ['POSTGRES', 'BOTH'] %} + cron.present: +{% else %} + cron.absent: +{% endif %} + - name: /usr/sbin/so-telegraf-trim >> /opt/so/log/postgres/telegraf-trim.log 2>&1 + - identifier: so_telegraf_trim + - user: root + - minute: '17' + - hour: '3' + - daymonth: '*' + - month: '*' + - dayweek: '*' + {% else %} {{sls}}_state_not_allowed: diff --git a/salt/postgres/files/init-users.sh b/salt/postgres/files/init-users.sh index 7451e0bf8..b07dfcdb0 100644 --- a/salt/postgres/files/init-users.sh +++ b/salt/postgres/files/init-users.sh @@ -16,3 +16,11 @@ psql -v ON_ERROR_STOP=1 --username "$POSTGRES_USER" --dbname "$POSTGRES_DB" <<-E \$\$; GRANT ALL PRIVILEGES ON DATABASE "$POSTGRES_DB" TO "$SO_POSTGRES_USER"; EOSQL + +# Bootstrap the Telegraf metrics database. Per-minion roles + schemas are +# reconciled on every state.apply by postgres/telegraf_users.sls; this block +# only ensures the shared database exists on first initialization. +psql -v ON_ERROR_STOP=1 --username "$POSTGRES_USER" --dbname "$POSTGRES_DB" <<-EOSQL + SELECT 'CREATE DATABASE so_telegraf' + WHERE NOT EXISTS (SELECT FROM pg_database WHERE datname = 'so_telegraf')\gexec +EOSQL diff --git a/salt/postgres/soc_postgres.yaml b/salt/postgres/soc_postgres.yaml new file mode 100644 index 000000000..167772e3f --- /dev/null +++ b/salt/postgres/soc_postgres.yaml @@ -0,0 +1,7 @@ +postgres: + telegraf: + retention_days: + description: Number of days of Telegraf metrics to keep in the so_telegraf database. Older rows are deleted nightly by so-telegraf-trim. + forcedType: int + advanced: True + helpLink: influxdb diff --git a/salt/postgres/telegraf_users.sls b/salt/postgres/telegraf_users.sls new file mode 100644 index 000000000..d510af9e5 --- /dev/null +++ b/salt/postgres/telegraf_users.sls @@ -0,0 +1,49 @@ +# Copyright Security Onion Solutions LLC and/or licensed to Security Onion Solutions LLC under one +# or more contributor license agreements. Licensed under the Elastic License 2.0 as shown at +# https://securityonion.net/license; you may not use this file except in compliance with the +# Elastic License 2.0. + +{% from 'allowed_states.map.jinja' import allowed_states %} +{% if sls.split('.')[0] in allowed_states %} +{% from 'vars/globals.map.jinja' import GLOBALS %} + +{% set TG_OUT = (GLOBALS.telegraf_output | default('INFLUXDB')) | upper %} +{% if TG_OUT in ['POSTGRES', 'BOTH'] %} + +{% set users = salt['pillar.get']('postgres:auth:users', {}) %} +{% for key, entry in users.items() %} +{% if key.startswith('telegraf_') and entry.get('user') and entry.get('pass') %} +{% set u = entry.user %} +{% set p = entry.pass | replace("'", "''") %} + +postgres_telegraf_role_{{ u }}: + cmd.run: + - name: | + docker exec -i so-postgres psql -v ON_ERROR_STOP=1 -U postgres -d so_telegraf <<'EOSQL' + DO $$ + BEGIN + IF NOT EXISTS (SELECT FROM pg_catalog.pg_roles WHERE rolname = '{{ u }}') THEN + EXECUTE format('CREATE ROLE %I WITH LOGIN PASSWORD %L', '{{ u }}', '{{ p }}'); + ELSE + EXECUTE format('ALTER ROLE %I WITH PASSWORD %L', '{{ u }}', '{{ p }}'); + END IF; + END + $$; + GRANT CONNECT ON DATABASE so_telegraf TO "{{ u }}"; + CREATE SCHEMA IF NOT EXISTS "{{ u }}" AUTHORIZATION "{{ u }}"; + EOSQL + - require: + - docker_container: so-postgres + +{% endif %} +{% endfor %} + +{% endif %} + +{% else %} + +{{sls}}_state_not_allowed: + test.fail_without_changes: + - name: {{sls}}_state_not_allowed + +{% endif %} diff --git a/salt/postgres/tools/sbin/so-show-stats b/salt/postgres/tools/sbin/so-show-stats new file mode 100644 index 000000000..a512ffb0c --- /dev/null +++ b/salt/postgres/tools/sbin/so-show-stats @@ -0,0 +1,110 @@ +#!/bin/bash + +# Copyright Security Onion Solutions LLC and/or licensed to Security Onion Solutions LLC under one +# or more contributor license agreements. Licensed under the Elastic License 2.0 as shown at +# https://securityonion.net/license; you may not use this file except in compliance with the +# Elastic License 2.0. + +# Point-in-time host metrics from the Telegraf Postgres backend. +# Sanity-check tool for verifying metrics are landing before the grid +# dashboards consume them. + +. /usr/sbin/so-common + +usage() { + cat </dev/null | cut -d\| -f1 | grep -qw so_telegraf; then + echo "Database so_telegraf not found. Is global.telegraf_output set to POSTGRES or BOTH?" + exit 2 +fi + +# List telegraf schemas (role-per-minion naming convention: so_telegraf_) +SCHEMAS=$(so_psql -c "SELECT schema_name FROM information_schema.schemata WHERE schema_name LIKE 'so_telegraf_%' ORDER BY schema_name;") + +if [ -z "$SCHEMAS" ]; then + echo "No minion schemas found in so_telegraf." + exit 0 +fi + +print_metric() { + local schema="$1" table="$2" query="$3" + # Confirm table exists in this schema before querying + local exists + exists=$(so_psql -c "SELECT 1 FROM information_schema.tables WHERE table_schema='${schema}' AND table_name='${table}' LIMIT 1;") + [ -z "$exists" ] && return 0 + so_psql -c "$query" +} + +for schema in $SCHEMAS; do + minion="${schema#so_telegraf_}" + if [ -n "$FILTER_MINION" ]; then + # Compare against the sanitized form used in schema names + want=$(echo "$FILTER_MINION" | tr '.-' '_' | tr '[:upper:]' '[:lower:]') + [ "$minion" != "$want" ] && continue + fi + + echo "====================================================================" + echo " Minion: $minion" + echo "====================================================================" + + print_metric "$schema" "cpu" " + SELECT 'cpu ' AS metric, + to_char(time, 'YYYY-MM-DD HH24:MI:SS') AS ts, + round((100 - usage_idle)::numeric, 1) || '% used' + FROM \"${schema}\".cpu + WHERE cpu = 'cpu-total' + ORDER BY time DESC LIMIT 1;" + + print_metric "$schema" "mem" " + SELECT 'memory ' AS metric, + to_char(time, 'YYYY-MM-DD HH24:MI:SS') AS ts, + round(used_percent::numeric, 1) || '% used (' || + pg_size_pretty(used) || ' of ' || pg_size_pretty(total) || ')' + FROM \"${schema}\".mem + ORDER BY time DESC LIMIT 1;" + + print_metric "$schema" "disk" " + SELECT 'disk ' || rpad(path, 8) AS metric, + to_char(time, 'YYYY-MM-DD HH24:MI:SS') AS ts, + round(used_percent::numeric, 1) || '% used (' || + pg_size_pretty(used) || ' of ' || pg_size_pretty(total) || ')' + FROM \"${schema}\".disk + WHERE time = (SELECT max(time) FROM \"${schema}\".disk) + ORDER BY path;" + + print_metric "$schema" "system" " + SELECT 'load ' AS metric, + to_char(time, 'YYYY-MM-DD HH24:MI:SS') AS ts, + load1 || ' / ' || load5 || ' / ' || load15 || ' (1/5/15m)' + FROM \"${schema}\".system + ORDER BY time DESC LIMIT 1;" + + echo "" +done diff --git a/salt/postgres/tools/sbin/so-telegraf-trim b/salt/postgres/tools/sbin/so-telegraf-trim new file mode 100644 index 000000000..0bf53c1d8 --- /dev/null +++ b/salt/postgres/tools/sbin/so-telegraf-trim @@ -0,0 +1,103 @@ +#!/bin/bash + +# Copyright Security Onion Solutions LLC and/or licensed to Security Onion Solutions LLC under one +# or more contributor license agreements. Licensed under the Elastic License 2.0 as shown at +# https://securityonion.net/license; you may not use this file except in compliance with the +# Elastic License 2.0. + +# Deletes Telegraf metric rows older than the configured retention window from +# every minion schema in the so_telegraf database. Intended to run daily from +# cron. Retention comes from pillar (postgres.telegraf.retention_days), +# defaulting to 14 days. An explicit --days argument overrides the pillar. + +. /usr/sbin/so-common + +usage() { + cat </dev/null) +fi +if ! [[ "$DAYS" =~ ^[0-9]+$ ]] || [ "$DAYS" -lt 1 ]; then + DAYS=14 +fi + +log() { + echo "$(date '+%Y-%m-%d %H:%M:%S') so-telegraf-trim: $*" +} + +so_psql() { + docker exec so-postgres psql -U postgres -d so_telegraf -At -F $'\t' "$@" +} + +if ! docker exec so-postgres psql -U postgres -lqt 2>/dev/null | cut -d\| -f1 | grep -qw so_telegraf; then + log "Database so_telegraf not present; nothing to trim." + exit 0 +fi + +log "Trimming rows older than ${DAYS} days (dry_run=${DRY_RUN})." + +TOTAL_DELETED=0 + +# One row per (schema, table) we might want to trim. +# Column name is 'time' for all telegraf output plugin tables; skip metadata +# tables (tag_* used for tags_as_foreign_keys). +ROWS=$(so_psql -c " + SELECT table_schema || '.' || table_name + FROM information_schema.columns + WHERE column_name = 'time' + AND data_type IN ('timestamp with time zone', 'timestamp without time zone') + AND table_schema LIKE 'so_telegraf_%' + ORDER BY 1;") + +if [ -z "$ROWS" ]; then + log "No telegraf metric tables found." + exit 0 +fi + +for qualified in $ROWS; do + if [ "$DRY_RUN" -eq 1 ]; then + count=$(so_psql -c "SELECT count(*) FROM \"${qualified%.*}\".\"${qualified#*.}\" WHERE time < now() - interval '${DAYS} days';") + log "would delete ${count:-0} rows from ${qualified}" + else + # RETURNING count via a CTE so we can log how much was trimmed per table + deleted=$(so_psql -c " + WITH d AS ( + DELETE FROM \"${qualified%.*}\".\"${qualified#*.}\" + WHERE time < now() - interval '${DAYS} days' + RETURNING 1 + ) + SELECT count(*) FROM d;") + deleted=${deleted:-0} + TOTAL_DELETED=$((TOTAL_DELETED + deleted)) + [ "$deleted" -gt 0 ] && log "deleted ${deleted} rows from ${qualified}" + fi +done + +if [ "$DRY_RUN" -eq 0 ]; then + log "Trim complete. Total rows deleted: ${TOTAL_DELETED}." +fi diff --git a/salt/reactor/telegraf_user_sync.sls b/salt/reactor/telegraf_user_sync.sls new file mode 100644 index 000000000..abf35d3b2 --- /dev/null +++ b/salt/reactor/telegraf_user_sync.sls @@ -0,0 +1,18 @@ +# Copyright Security Onion Solutions LLC and/or licensed to Security Onion Solutions LLC under one +# or more contributor license agreements. Licensed under the Elastic License 2.0 as shown at +# https://securityonion.net/license; you may not use this file except in compliance with the +# Elastic License 2.0. + +{# Fires on salt/auth. Only act on accepted keys — ignore pending/reject. #} +{% if data.get('act') == 'accept' and data.get('id') %} + +{{ data['id'] }}_telegraf_pg_sync: + runner.state.orchestrate: + - args: + - mods: orch.telegraf_postgres_sync + - pillar: + minion_id: {{ data['id'] }} + +{% do salt.log.info('telegraf_user_sync reactor: syncing telegraf PG user for minion %s' % data['id']) %} + +{% endif %} diff --git a/salt/salt/master.sls b/salt/salt/master.sls index 895150cd7..7e3e48074 100644 --- a/salt/salt/master.sls +++ b/salt/salt/master.sls @@ -62,6 +62,19 @@ engines_config: - name: /etc/salt/master.d/engines.conf - source: salt://salt/files/engines.conf +reactor_config_telegraf: + file.managed: + - name: /etc/salt/master.d/reactor_telegraf.conf + - contents: | + reactor: + - 'salt/auth': + - /opt/so/saltstack/default/salt/reactor/telegraf_user_sync.sls + - user: root + - group: root + - mode: 644 + - watch_in: + - service: salt_master_service + # update the bootstrap script when used for salt-cloud salt_bootstrap_cloud: file.managed: diff --git a/salt/telegraf/etc/telegraf.conf b/salt/telegraf/etc/telegraf.conf index aafcf6d77..4cdd81f20 100644 --- a/salt/telegraf/etc/telegraf.conf +++ b/salt/telegraf/etc/telegraf.conf @@ -8,6 +8,11 @@ {%- set ZEEK_ENABLED = salt['pillar.get']('zeek:enabled', True) %} {%- set MDENGINE = GLOBALS.md_engine %} {%- set LOGSTASH_ENABLED = LOGSTASH_MERGED.enabled %} +{%- set TG_OUT = GLOBALS.telegraf_output | upper %} +{%- set PG_HOST = GLOBALS.manager_ip %} +{%- set PG_SAFE = GLOBALS.minion_id | replace('.','_') | replace('-','_') | lower %} +{%- set PG_USER = 'so_telegraf_' ~ PG_SAFE %} +{%- set PG_PASS = salt['pillar.get']('postgres:auth:users:telegraf_' ~ PG_SAFE ~ ':pass', '') %} # Global tags can be specified here in key="value" format. [global_tags] role = "{{ GLOBALS.role.split('-') | last }}" @@ -72,6 +77,7 @@ # OUTPUT PLUGINS # ############################################################################### +{%- if TG_OUT in ['INFLUXDB', 'BOTH'] %} # Configuration for sending metrics to InfluxDB [[outputs.influxdb_v2]] urls = ["https://{{ INFLUXDBHOST }}:8086"] @@ -85,6 +91,15 @@ tls_key = "/etc/telegraf/telegraf.key" ## Use TLS but skip chain & host verification # insecure_skip_verify = false +{%- endif %} + +{%- if TG_OUT in ['POSTGRES', 'BOTH'] %} +# Configuration for sending metrics to PostgreSQL +[[outputs.postgresql]] + connection = "host={{ PG_HOST }} port=5432 user={{ PG_USER }} password={{ PG_PASS }} dbname=so_telegraf sslmode=verify-full sslrootcert=/etc/telegraf/ca.crt" + schema = "{{ PG_USER }}" + tags_as_foreign_keys = true +{%- endif %} ############################################################################### # PROCESSOR PLUGINS # diff --git a/salt/vars/globals.map.jinja b/salt/vars/globals.map.jinja index 385db02ae..787691b13 100644 --- a/salt/vars/globals.map.jinja +++ b/salt/vars/globals.map.jinja @@ -24,6 +24,7 @@ 'md_engine': INIT.PILLAR.global.mdengine, 'pcap_engine': GLOBALMERGED.pcapengine, 'pipeline': GLOBALMERGED.pipeline, + 'telegraf_output': GLOBALMERGED.telegraf_output, 'so_version': INIT.PILLAR.global.soversion, 'so_docker_gateway': DOCKERMERGED.gateway, 'so_docker_range': DOCKERMERGED.range, From 88582c94e8943abe301408a4bf503f070747e1f0 Mon Sep 17 00:00:00 2001 From: Jorge Reyes <94730068+reyesj2@users.noreply.github.com> Date: Wed, 15 Apr 2026 15:04:20 -0500 Subject: [PATCH 034/110] remove foxtrot version --- VERSION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/VERSION b/VERSION index 0e0d1ae9a..fd2a01863 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -3.0.0-foxtrot \ No newline at end of file +3.1.0 From d24808ff9854376f5e842312e6d69c27c572d39f Mon Sep 17 00:00:00 2001 From: Mike Reeves Date: Wed, 15 Apr 2026 19:28:10 -0400 Subject: [PATCH 035/110] Fix so-show-stats tag column resolution Telegraf's postgresql output stores tag values either as individual columns on _tag or as a single JSONB 'tags' column, depending on plugin version. Introspect information_schema.columns and build the right accessor per tag instead of assuming one layout. --- salt/postgres/tools/sbin/so-show-stats | 83 ++++++++++++++++++-------- 1 file changed, 59 insertions(+), 24 deletions(-) diff --git a/salt/postgres/tools/sbin/so-show-stats b/salt/postgres/tools/sbin/so-show-stats index a512ffb0c..68fd52d00 100644 --- a/salt/postgres/tools/sbin/so-show-stats +++ b/salt/postgres/tools/sbin/so-show-stats @@ -62,6 +62,33 @@ print_metric() { so_psql -c "$query" } +# Telegraf's postgresql output stores tag values either as individual columns +# on the _tag table or as a single JSONB "tags" column, depending on +# plugin version. Returns a SQL expression that extracts the named tag +# regardless of layout. Empty string if the tag table doesn't exist. +tag_expr() { + local schema="$1" table="$2" tag="$3" alias="$4" + local has_col + has_col=$(so_psql -c " + SELECT 1 FROM information_schema.columns + WHERE table_schema='${schema}' AND table_name='${table}_tag' AND column_name='${tag}' + LIMIT 1;") + if [ -n "$has_col" ]; then + echo "${alias}.${tag}" + return + fi + local has_tags + has_tags=$(so_psql -c " + SELECT 1 FROM information_schema.columns + WHERE table_schema='${schema}' AND table_name='${table}_tag' AND column_name='tags' + LIMIT 1;") + if [ -n "$has_tags" ]; then + echo "(${alias}.tags->>'${tag}')" + return + fi + echo "" +} + for schema in $SCHEMAS; do minion="${schema#so_telegraf_}" if [ -n "$FILTER_MINION" ]; then @@ -74,37 +101,45 @@ for schema in $SCHEMAS; do echo " Minion: $minion" echo "====================================================================" - print_metric "$schema" "cpu" " - SELECT 'cpu ' AS metric, - to_char(time, 'YYYY-MM-DD HH24:MI:SS') AS ts, - round((100 - usage_idle)::numeric, 1) || '% used' - FROM \"${schema}\".cpu - WHERE cpu = 'cpu-total' - ORDER BY time DESC LIMIT 1;" + cpu_tag=$(tag_expr "$schema" "cpu" "cpu" "t") + if [ -n "$cpu_tag" ]; then + print_metric "$schema" "cpu" " + SELECT 'cpu ' AS metric, + to_char(c.time, 'YYYY-MM-DD HH24:MI:SS') AS ts, + round((100 - c.usage_idle)::numeric, 1) || '% used' + FROM \"${schema}\".cpu c + JOIN \"${schema}\".cpu_tag t USING (tag_id) + WHERE ${cpu_tag} = 'cpu-total' + ORDER BY c.time DESC LIMIT 1;" + fi print_metric "$schema" "mem" " SELECT 'memory ' AS metric, - to_char(time, 'YYYY-MM-DD HH24:MI:SS') AS ts, - round(used_percent::numeric, 1) || '% used (' || - pg_size_pretty(used) || ' of ' || pg_size_pretty(total) || ')' - FROM \"${schema}\".mem - ORDER BY time DESC LIMIT 1;" + to_char(m.time, 'YYYY-MM-DD HH24:MI:SS') AS ts, + round(m.used_percent::numeric, 1) || '% used (' || + pg_size_pretty(m.used) || ' of ' || pg_size_pretty(m.total) || ')' + FROM \"${schema}\".mem m + ORDER BY m.time DESC LIMIT 1;" - print_metric "$schema" "disk" " - SELECT 'disk ' || rpad(path, 8) AS metric, - to_char(time, 'YYYY-MM-DD HH24:MI:SS') AS ts, - round(used_percent::numeric, 1) || '% used (' || - pg_size_pretty(used) || ' of ' || pg_size_pretty(total) || ')' - FROM \"${schema}\".disk - WHERE time = (SELECT max(time) FROM \"${schema}\".disk) - ORDER BY path;" + disk_path=$(tag_expr "$schema" "disk" "path" "t") + if [ -n "$disk_path" ]; then + print_metric "$schema" "disk" " + SELECT 'disk ' || rpad(${disk_path}, 12) AS metric, + to_char(d.time, 'YYYY-MM-DD HH24:MI:SS') AS ts, + round(d.used_percent::numeric, 1) || '% used (' || + pg_size_pretty(d.used) || ' of ' || pg_size_pretty(d.total) || ')' + FROM \"${schema}\".disk d + JOIN \"${schema}\".disk_tag t USING (tag_id) + WHERE d.time = (SELECT max(time) FROM \"${schema}\".disk) + ORDER BY ${disk_path};" + fi print_metric "$schema" "system" " SELECT 'load ' AS metric, - to_char(time, 'YYYY-MM-DD HH24:MI:SS') AS ts, - load1 || ' / ' || load5 || ' / ' || load15 || ' (1/5/15m)' - FROM \"${schema}\".system - ORDER BY time DESC LIMIT 1;" + to_char(s.time, 'YYYY-MM-DD HH24:MI:SS') AS ts, + s.load1 || ' / ' || s.load5 || ' / ' || s.load15 || ' (1/5/15m)' + FROM \"${schema}\".system s + ORDER BY s.time DESC LIMIT 1;" echo "" done From c12418698975007d3f63c4bc4002d2d876a9a629 Mon Sep 17 00:00:00 2001 From: Mike Reeves Date: Wed, 15 Apr 2026 19:45:42 -0400 Subject: [PATCH 036/110] so-log-check: exclude psql ON_ERROR_STOP flag The psql invocation flag '-v ON_ERROR_STOP=1' used by the so-postgres init script gets flagged by so-log-check because the token 'ERROR' matches its error regex. Add to the exclusion list. --- salt/common/tools/sbin/so-log-check | 1 + 1 file changed, 1 insertion(+) diff --git a/salt/common/tools/sbin/so-log-check b/salt/common/tools/sbin/so-log-check index 8c8bbf35c..d8446d6fe 100755 --- a/salt/common/tools/sbin/so-log-check +++ b/salt/common/tools/sbin/so-log-check @@ -229,6 +229,7 @@ if [[ $EXCLUDE_KNOWN_ERRORS == 'Y' ]]; then EXCLUDED_ERRORS="$EXCLUDED_ERRORS|tcp 127.0.0.1:6791: bind: address already in use" # so-elastic-fleet agent restarting. Seen starting w/ 8.18.8 https://github.com/elastic/kibana/issues/201459 EXCLUDED_ERRORS="$EXCLUDED_ERRORS|TransformTask\] \[logs-(tychon|aws_billing|microsoft_defender_endpoint).*user so_kibana lacks the required permissions \[logs-\1" # Known issue with 3 integrations using kibana_system role vs creating unique api creds with proper permissions. EXCLUDED_ERRORS="$EXCLUDED_ERRORS|manifest unknown" # appears in so-dockerregistry log for so-tcpreplay following docker upgrade to 29.2.1-1 + EXCLUDED_ERRORS="$EXCLUDED_ERRORS|-v ON_ERROR_STOP=1" # psql invocation flag from so-postgres init script, not an actual error fi RESULT=0 From 470b3bd4da726e7cce13cbc82aab9b897da4c342 Mon Sep 17 00:00:00 2001 From: Mike Reeves Date: Thu, 16 Apr 2026 15:40:54 -0400 Subject: [PATCH 037/110] Comingle Telegraf metrics into shared schema Per-minion schemas cause table count to explode (N minions * M metrics) and the per-minion revocation story isn't worth it when retention is short. Move all minions to a shared 'telegraf' schema while keeping per-minion login credentials for audit. - New so_telegraf NOLOGIN group role owns the telegraf schema; each per-minion role is a member and inherits insert/select via role inheritance - Telegraf connection string uses options='-c role=so_telegraf' so tables auto-created on first write belong to the group role - so-telegraf-trim walks the flat telegraf.* table set instead of per-minion schemas - so-stats-show filters by host tag; CLI arg is now the hostname as tagged by Telegraf rather than a sanitized schema suffix - Also renames so-show-stats -> so-stats-show --- salt/postgres/telegraf_users.sls | 26 +++- salt/postgres/tools/sbin/so-show-stats | 145 ------------------ salt/postgres/tools/sbin/so-stats-show | 170 ++++++++++++++++++++++ salt/postgres/tools/sbin/so-telegraf-trim | 8 +- salt/telegraf/etc/telegraf.conf | 9 +- 5 files changed, 204 insertions(+), 154 deletions(-) delete mode 100644 salt/postgres/tools/sbin/so-show-stats create mode 100644 salt/postgres/tools/sbin/so-stats-show diff --git a/salt/postgres/telegraf_users.sls b/salt/postgres/telegraf_users.sls index d510af9e5..7d62ee7f0 100644 --- a/salt/postgres/telegraf_users.sls +++ b/salt/postgres/telegraf_users.sls @@ -10,6 +10,28 @@ {% set TG_OUT = (GLOBALS.telegraf_output | default('INFLUXDB')) | upper %} {% if TG_OUT in ['POSTGRES', 'BOTH'] %} +# Provision the shared group role and schema once. Every per-minion role is a +# member of so_telegraf, and each Telegraf connection does SET ROLE so_telegraf +# (via options='-c role=so_telegraf' in the connection string) so tables created +# on first write are owned by the group role and every member can INSERT/SELECT. +postgres_telegraf_group_role: + cmd.run: + - name: | + docker exec -i so-postgres psql -v ON_ERROR_STOP=1 -U postgres -d so_telegraf <<'EOSQL' + DO $$ + BEGIN + IF NOT EXISTS (SELECT FROM pg_catalog.pg_roles WHERE rolname = 'so_telegraf') THEN + CREATE ROLE so_telegraf NOLOGIN; + END IF; + END + $$; + GRANT CONNECT ON DATABASE so_telegraf TO so_telegraf; + CREATE SCHEMA IF NOT EXISTS telegraf AUTHORIZATION so_telegraf; + GRANT USAGE, CREATE ON SCHEMA telegraf TO so_telegraf; + EOSQL + - require: + - docker_container: so-postgres + {% set users = salt['pillar.get']('postgres:auth:users', {}) %} {% for key, entry in users.items() %} {% if key.startswith('telegraf_') and entry.get('user') and entry.get('pass') %} @@ -30,10 +52,10 @@ postgres_telegraf_role_{{ u }}: END $$; GRANT CONNECT ON DATABASE so_telegraf TO "{{ u }}"; - CREATE SCHEMA IF NOT EXISTS "{{ u }}" AUTHORIZATION "{{ u }}"; + GRANT so_telegraf TO "{{ u }}"; EOSQL - require: - - docker_container: so-postgres + - cmd: postgres_telegraf_group_role {% endif %} {% endfor %} diff --git a/salt/postgres/tools/sbin/so-show-stats b/salt/postgres/tools/sbin/so-show-stats deleted file mode 100644 index 68fd52d00..000000000 --- a/salt/postgres/tools/sbin/so-show-stats +++ /dev/null @@ -1,145 +0,0 @@ -#!/bin/bash - -# Copyright Security Onion Solutions LLC and/or licensed to Security Onion Solutions LLC under one -# or more contributor license agreements. Licensed under the Elastic License 2.0 as shown at -# https://securityonion.net/license; you may not use this file except in compliance with the -# Elastic License 2.0. - -# Point-in-time host metrics from the Telegraf Postgres backend. -# Sanity-check tool for verifying metrics are landing before the grid -# dashboards consume them. - -. /usr/sbin/so-common - -usage() { - cat </dev/null | cut -d\| -f1 | grep -qw so_telegraf; then - echo "Database so_telegraf not found. Is global.telegraf_output set to POSTGRES or BOTH?" - exit 2 -fi - -# List telegraf schemas (role-per-minion naming convention: so_telegraf_) -SCHEMAS=$(so_psql -c "SELECT schema_name FROM information_schema.schemata WHERE schema_name LIKE 'so_telegraf_%' ORDER BY schema_name;") - -if [ -z "$SCHEMAS" ]; then - echo "No minion schemas found in so_telegraf." - exit 0 -fi - -print_metric() { - local schema="$1" table="$2" query="$3" - # Confirm table exists in this schema before querying - local exists - exists=$(so_psql -c "SELECT 1 FROM information_schema.tables WHERE table_schema='${schema}' AND table_name='${table}' LIMIT 1;") - [ -z "$exists" ] && return 0 - so_psql -c "$query" -} - -# Telegraf's postgresql output stores tag values either as individual columns -# on the _tag table or as a single JSONB "tags" column, depending on -# plugin version. Returns a SQL expression that extracts the named tag -# regardless of layout. Empty string if the tag table doesn't exist. -tag_expr() { - local schema="$1" table="$2" tag="$3" alias="$4" - local has_col - has_col=$(so_psql -c " - SELECT 1 FROM information_schema.columns - WHERE table_schema='${schema}' AND table_name='${table}_tag' AND column_name='${tag}' - LIMIT 1;") - if [ -n "$has_col" ]; then - echo "${alias}.${tag}" - return - fi - local has_tags - has_tags=$(so_psql -c " - SELECT 1 FROM information_schema.columns - WHERE table_schema='${schema}' AND table_name='${table}_tag' AND column_name='tags' - LIMIT 1;") - if [ -n "$has_tags" ]; then - echo "(${alias}.tags->>'${tag}')" - return - fi - echo "" -} - -for schema in $SCHEMAS; do - minion="${schema#so_telegraf_}" - if [ -n "$FILTER_MINION" ]; then - # Compare against the sanitized form used in schema names - want=$(echo "$FILTER_MINION" | tr '.-' '_' | tr '[:upper:]' '[:lower:]') - [ "$minion" != "$want" ] && continue - fi - - echo "====================================================================" - echo " Minion: $minion" - echo "====================================================================" - - cpu_tag=$(tag_expr "$schema" "cpu" "cpu" "t") - if [ -n "$cpu_tag" ]; then - print_metric "$schema" "cpu" " - SELECT 'cpu ' AS metric, - to_char(c.time, 'YYYY-MM-DD HH24:MI:SS') AS ts, - round((100 - c.usage_idle)::numeric, 1) || '% used' - FROM \"${schema}\".cpu c - JOIN \"${schema}\".cpu_tag t USING (tag_id) - WHERE ${cpu_tag} = 'cpu-total' - ORDER BY c.time DESC LIMIT 1;" - fi - - print_metric "$schema" "mem" " - SELECT 'memory ' AS metric, - to_char(m.time, 'YYYY-MM-DD HH24:MI:SS') AS ts, - round(m.used_percent::numeric, 1) || '% used (' || - pg_size_pretty(m.used) || ' of ' || pg_size_pretty(m.total) || ')' - FROM \"${schema}\".mem m - ORDER BY m.time DESC LIMIT 1;" - - disk_path=$(tag_expr "$schema" "disk" "path" "t") - if [ -n "$disk_path" ]; then - print_metric "$schema" "disk" " - SELECT 'disk ' || rpad(${disk_path}, 12) AS metric, - to_char(d.time, 'YYYY-MM-DD HH24:MI:SS') AS ts, - round(d.used_percent::numeric, 1) || '% used (' || - pg_size_pretty(d.used) || ' of ' || pg_size_pretty(d.total) || ')' - FROM \"${schema}\".disk d - JOIN \"${schema}\".disk_tag t USING (tag_id) - WHERE d.time = (SELECT max(time) FROM \"${schema}\".disk) - ORDER BY ${disk_path};" - fi - - print_metric "$schema" "system" " - SELECT 'load ' AS metric, - to_char(s.time, 'YYYY-MM-DD HH24:MI:SS') AS ts, - s.load1 || ' / ' || s.load5 || ' / ' || s.load15 || ' (1/5/15m)' - FROM \"${schema}\".system s - ORDER BY s.time DESC LIMIT 1;" - - echo "" -done diff --git a/salt/postgres/tools/sbin/so-stats-show b/salt/postgres/tools/sbin/so-stats-show new file mode 100644 index 000000000..fd8dff39f --- /dev/null +++ b/salt/postgres/tools/sbin/so-stats-show @@ -0,0 +1,170 @@ +#!/bin/bash + +# Copyright Security Onion Solutions LLC and/or licensed to Security Onion Solutions LLC under one +# or more contributor license agreements. Licensed under the Elastic License 2.0 as shown at +# https://securityonion.net/license; you may not use this file except in compliance with the +# Elastic License 2.0. + +# Point-in-time host metrics from the Telegraf Postgres backend. +# Sanity-check tool for verifying metrics are landing before the grid +# dashboards consume them. + +. /usr/sbin/so-common + +usage() { + cat </dev/null | cut -d\| -f1 | grep -qw so_telegraf; then + echo "Database so_telegraf not found. Is global.telegraf_output set to POSTGRES or BOTH?" + exit 2 +fi + +# Telegraf's postgresql output stores tag values either as individual columns +# on the _tag table or as a single JSONB "tags" column, depending on +# plugin version. Returns a SQL expression that extracts the named tag +# regardless of layout. Empty string if the tag table doesn't exist. +tag_expr() { + local table="$1" tag="$2" alias="$3" + local has_col + has_col=$(so_psql -c " + SELECT 1 FROM information_schema.columns + WHERE table_schema='${SCHEMA}' AND table_name='${table}_tag' AND column_name='${tag}' + LIMIT 1;") + if [ -n "$has_col" ]; then + echo "${alias}.${tag}" + return + fi + local has_tags + has_tags=$(so_psql -c " + SELECT 1 FROM information_schema.columns + WHERE table_schema='${SCHEMA}' AND table_name='${table}_tag' AND column_name='tags' + LIMIT 1;") + if [ -n "$has_tags" ]; then + echo "(${alias}.tags->>'${tag}')" + return + fi + echo "" +} + +table_exists() { + local table="$1" + [ -n "$(so_psql -c "SELECT 1 FROM information_schema.tables WHERE table_schema='${SCHEMA}' AND table_name='${table}' LIMIT 1;")" ] +} + +# Discover hosts from cpu_tag (every minion reports cpu). +host_expr=$(tag_expr "cpu" "host" "t") +if [ -z "$host_expr" ]; then + echo "Unable to determine host tag column on ${SCHEMA}.cpu_tag. Has Telegraf written any rows yet?" + exit 0 +fi + +HOSTS=$(so_psql -c " + SELECT DISTINCT ${host_expr} + FROM \"${SCHEMA}\".cpu_tag t + WHERE ${host_expr} IS NOT NULL + ORDER BY 1;") + +if [ -z "$HOSTS" ]; then + echo "No hosts found in ${SCHEMA}. Is Telegraf configured to write to Postgres?" + exit 0 +fi + +print_metric() { + local query="$1" + so_psql -c "$query" +} + +for host in $HOSTS; do + if [ -n "$FILTER_HOST" ] && [ "$host" != "$FILTER_HOST" ]; then + continue + fi + + echo "====================================================================" + echo " Host: $host" + echo "====================================================================" + + cpu_host=$(tag_expr "cpu" "host" "t") + cpu_tag=$(tag_expr "cpu" "cpu" "t") + if [ -n "$cpu_host" ] && [ -n "$cpu_tag" ]; then + print_metric " + SELECT 'cpu ' AS metric, + to_char(c.time, 'YYYY-MM-DD HH24:MI:SS') AS ts, + round((100 - c.usage_idle)::numeric, 1) || '% used' + FROM \"${SCHEMA}\".cpu c + JOIN \"${SCHEMA}\".cpu_tag t USING (tag_id) + WHERE ${cpu_host} = '${host}' AND ${cpu_tag} = 'cpu-total' + ORDER BY c.time DESC LIMIT 1;" + fi + + mem_host=$(tag_expr "mem" "host" "t") + if [ -n "$mem_host" ] && table_exists "mem"; then + print_metric " + SELECT 'memory ' AS metric, + to_char(m.time, 'YYYY-MM-DD HH24:MI:SS') AS ts, + round(m.used_percent::numeric, 1) || '% used (' || + pg_size_pretty(m.used) || ' of ' || pg_size_pretty(m.total) || ')' + FROM \"${SCHEMA}\".mem m + JOIN \"${SCHEMA}\".mem_tag t USING (tag_id) + WHERE ${mem_host} = '${host}' + ORDER BY m.time DESC LIMIT 1;" + fi + + disk_host=$(tag_expr "disk" "host" "t") + disk_path=$(tag_expr "disk" "path" "t") + if [ -n "$disk_host" ] && [ -n "$disk_path" ] && table_exists "disk"; then + print_metric " + SELECT 'disk ' || rpad(${disk_path}, 12) AS metric, + to_char(d.time, 'YYYY-MM-DD HH24:MI:SS') AS ts, + round(d.used_percent::numeric, 1) || '% used (' || + pg_size_pretty(d.used) || ' of ' || pg_size_pretty(d.total) || ')' + FROM \"${SCHEMA}\".disk d + JOIN \"${SCHEMA}\".disk_tag t USING (tag_id) + WHERE ${disk_host} = '${host}' + AND d.time = (SELECT max(d2.time) + FROM \"${SCHEMA}\".disk d2 + JOIN \"${SCHEMA}\".disk_tag t2 USING (tag_id) + WHERE ${disk_host/t./t2.} = '${host}') + ORDER BY ${disk_path};" + fi + + sys_host=$(tag_expr "system" "host" "t") + if [ -n "$sys_host" ] && table_exists "system"; then + print_metric " + SELECT 'load ' AS metric, + to_char(s.time, 'YYYY-MM-DD HH24:MI:SS') AS ts, + s.load1 || ' / ' || s.load5 || ' / ' || s.load15 || ' (1/5/15m)' + FROM \"${SCHEMA}\".system s + JOIN \"${SCHEMA}\".system_tag t USING (tag_id) + WHERE ${sys_host} = '${host}' + ORDER BY s.time DESC LIMIT 1;" + fi + + echo "" +done diff --git a/salt/postgres/tools/sbin/so-telegraf-trim b/salt/postgres/tools/sbin/so-telegraf-trim index 0bf53c1d8..664469d0c 100644 --- a/salt/postgres/tools/sbin/so-telegraf-trim +++ b/salt/postgres/tools/sbin/so-telegraf-trim @@ -63,15 +63,15 @@ log "Trimming rows older than ${DAYS} days (dry_run=${DRY_RUN})." TOTAL_DELETED=0 -# One row per (schema, table) we might want to trim. -# Column name is 'time' for all telegraf output plugin tables; skip metadata -# tables (tag_* used for tags_as_foreign_keys). +# Every metric table in the shared telegraf schema has a 'time' column. +# Tag tables (_tag) don't, so filtering on the column presence is +# enough to scope the trim to metric tables only. ROWS=$(so_psql -c " SELECT table_schema || '.' || table_name FROM information_schema.columns WHERE column_name = 'time' AND data_type IN ('timestamp with time zone', 'timestamp without time zone') - AND table_schema LIKE 'so_telegraf_%' + AND table_schema = 'telegraf' ORDER BY 1;") if [ -z "$ROWS" ]; then diff --git a/salt/telegraf/etc/telegraf.conf b/salt/telegraf/etc/telegraf.conf index 4cdd81f20..4f0c279cc 100644 --- a/salt/telegraf/etc/telegraf.conf +++ b/salt/telegraf/etc/telegraf.conf @@ -94,10 +94,13 @@ {%- endif %} {%- if TG_OUT in ['POSTGRES', 'BOTH'] %} -# Configuration for sending metrics to PostgreSQL +# Configuration for sending metrics to PostgreSQL. +# options='-c role=so_telegraf' makes every connection SET ROLE to the shared +# group role so tables created on first write are owned by so_telegraf, and +# all per-minion members can INSERT/SELECT them via role inheritance. [[outputs.postgresql]] - connection = "host={{ PG_HOST }} port=5432 user={{ PG_USER }} password={{ PG_PASS }} dbname=so_telegraf sslmode=verify-full sslrootcert=/etc/telegraf/ca.crt" - schema = "{{ PG_USER }}" + connection = "host={{ PG_HOST }} port=5432 user={{ PG_USER }} password={{ PG_PASS }} dbname=so_telegraf sslmode=verify-full sslrootcert=/etc/telegraf/ca.crt options='-c role=so_telegraf'" + schema = "telegraf" tags_as_foreign_keys = true {%- endif %} From ba00ae8a7b9b27cea2372092c3903e0391858805 Mon Sep 17 00:00:00 2001 From: reyesj2 <94730068+reyesj2@users.noreply.github.com> Date: Thu, 16 Apr 2026 14:41:25 -0500 Subject: [PATCH 038/110] supress noisy warning from ES 9.3.3 --- salt/elasticsearch/files/log4j2.properties | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/salt/elasticsearch/files/log4j2.properties b/salt/elasticsearch/files/log4j2.properties index b29378d6a..050071581 100644 --- a/salt/elasticsearch/files/log4j2.properties +++ b/salt/elasticsearch/files/log4j2.properties @@ -45,3 +45,7 @@ appender.rolling_json.strategy.action.condition.nested_condition.age = 1D rootLogger.level = info rootLogger.appenderRef.rolling.ref = rolling rootLogger.appenderRef.rolling_json.ref = rolling_json + +# Suppress NotEntitledException WARNs (ES 9.3.3 bug) +logger.entitlement_security.name = org.elasticsearch.entitlement.runtime.policy.PolicyManager.x-pack-security.org.elasticsearch.security.org.elasticsearch.xpack.security +logger.entitlement_security.level = error \ No newline at end of file From a2ffb92b8ddbd273d6bf31fdaf3c828217037f5b Mon Sep 17 00:00:00 2001 From: Mike Reeves Date: Thu, 16 Apr 2026 16:19:53 -0400 Subject: [PATCH 039/110] Fix soup --- salt/manager/tools/sbin/soup | 33 ++++++++++++++++++++------------- 1 file changed, 20 insertions(+), 13 deletions(-) diff --git a/salt/manager/tools/sbin/soup b/salt/manager/tools/sbin/soup index d25153863..22d3764ef 100755 --- a/salt/manager/tools/sbin/soup +++ b/salt/manager/tools/sbin/soup @@ -362,7 +362,9 @@ preupgrade_changes() { # This function is to add any new pillar items if needed. echo "Checking to see if changes are needed." - [[ "$INSTALLEDVERSION" =~ ^2\.4\.21[0-9]+$ ]] && up_to_3.0.0 + [[ "$INSTALLEDVERSION" =~ ^2\.4\.21[0-9]+$ ]] && up_to_3.0.0 + [[ "$INSTALLEDVERSION" == 3.0.0 ]] && up_to_3.1.0 + true } @@ -371,6 +373,7 @@ postupgrade_changes() { echo "Running post upgrade processes." [[ "$POSTVERSION" =~ ^2\.4\.21[0-9]+$ ]] && post_to_3.0.0 + [[ "$POSTVERSION" =~ 3.0.0 ]] && post_to_3.1.0 true } @@ -381,7 +384,7 @@ check_minimum_version() { fi } -### 3.0.0 Scripts ### +### 3.0.0 Start ### convert_suricata_yes_no() { echo "Starting suricata yes/no values to true/false conversion." @@ -452,23 +455,27 @@ up_to_3.0.0() { } post_to_3.0.0() { - for idx in "logs-idh-so" "logs-redis.log-default"; do - rollover_index "$idx" - done - - # Remove ILM for so-case and so-detection indices - for idx in "so-case" "so-casehistory" "so-detection" "so-detectionhistory"; do - so-elasticsearch-query $idx/_ilm/remove -XPOST - done - - # convert yes/no in suricata pillars to true/false - convert_suricata_yes_no + echo "Nothing to do" POSTVERSION=3.0.0 } ### 3.0.0 End ### +### 3.1.0 Start ### +up_to_3.1.0() { + echo "Nothing to do" + INSTALLEDVERSION=3.1.0 +} + +post_to_3.1.0() { + echo "Nothing to do" + + POSTVERSION=3.1.0 +} +### 3.1.1 End ### + + repo_sync() { echo "Sync the local repo." su socore -c '/usr/sbin/so-repo-sync' || fail "Unable to complete so-repo-sync." From f11d315fea88252533cfd2ac1da3a4590b8d8087 Mon Sep 17 00:00:00 2001 From: Mike Reeves Date: Thu, 16 Apr 2026 16:35:24 -0400 Subject: [PATCH 040/110] Fix soup --- salt/manager/tools/sbin/soup | 36 ++++++++++++------------------------ 1 file changed, 12 insertions(+), 24 deletions(-) diff --git a/salt/manager/tools/sbin/soup b/salt/manager/tools/sbin/soup index f1c0a3d50..5ed66134f 100755 --- a/salt/manager/tools/sbin/soup +++ b/salt/manager/tools/sbin/soup @@ -363,12 +363,8 @@ preupgrade_changes() { echo "Checking to see if changes are needed." [[ "$INSTALLEDVERSION" =~ ^2\.4\.21[0-9]+$ ]] && up_to_3.0.0 -<<<<<<< HEAD [[ "$INSTALLEDVERSION" == 3.0.0 ]] && up_to_3.1.0 -======= - [[ "$INSTALLEDVERSION" =~ ^3\.0\.[0-9]+$ ]] && up_to_3.1.0 ->>>>>>> 470b3bd4da726e7cce13cbc82aab9b897da4c342 true } @@ -377,11 +373,8 @@ postupgrade_changes() { echo "Running post upgrade processes." [[ "$POSTVERSION" =~ ^2\.4\.21[0-9]+$ ]] && post_to_3.0.0 -<<<<<<< HEAD [[ "$POSTVERSION" =~ 3.0.0 ]] && post_to_3.1.0 -======= - [[ "$POSTVERSION" =~ ^3\.0\.[0-9]+$ ]] && post_to_3.1.0 ->>>>>>> 470b3bd4da726e7cce13cbc82aab9b897da4c342 + true } @@ -463,7 +456,17 @@ up_to_3.0.0() { } post_to_3.0.0() { - echo "Nothing to do" + for idx in "logs-idh-so" "logs-redis.log-default"; do + rollover_index "$idx" + done + + # Remove ILM for so-case and so-detection indices + for idx in "so-case" "so-casehistory" "so-detection" "so-detectionhistory"; do + so-elasticsearch-query $idx/_ilm/remove -XPOST + done + + # convert yes/no in suricata pillars to true/false + convert_suricata_yes_no POSTVERSION=3.0.0 } @@ -471,25 +474,12 @@ post_to_3.0.0() { ### 3.0.0 End ### ### 3.1.0 Start ### -<<<<<<< HEAD up_to_3.1.0() { echo "Nothing to do" -======= - -up_to_3.1.0() { ->>>>>>> 470b3bd4da726e7cce13cbc82aab9b897da4c342 INSTALLEDVERSION=3.1.0 } post_to_3.1.0() { -<<<<<<< HEAD - echo "Nothing to do" - - POSTVERSION=3.1.0 -} -### 3.1.1 End ### - -======= # Provision per-minion Telegraf Postgres users for every minion known to the # manager. postgres.auth iterates manage.up to generate any missing passwords; # postgres.telegraf_users reconciles the roles and schemas inside the so-postgres @@ -497,13 +487,11 @@ post_to_3.1.0() { # picks up the new credentials on the first apply after soup. echo "Provisioning Telegraf Postgres users for existing minions." salt-call --local state.apply postgres.auth postgres.telegraf_users || true - salt '*' state.sls telegraf || true POSTVERSION=3.1.0 } ### 3.1.0 End ### ->>>>>>> 470b3bd4da726e7cce13cbc82aab9b897da4c342 repo_sync() { echo "Sync the local repo." From 9fe53d9cccc2242ca056d927df66852f33d9be6b Mon Sep 17 00:00:00 2001 From: Mike Reeves Date: Thu, 16 Apr 2026 17:02:21 -0400 Subject: [PATCH 041/110] Use JSONB for Telegraf fields/tags to avoid 1600-column limit High-cardinality inputs (docker, procstat, kafka) trigger ALTER TABLE ADD COLUMN on every new field name, and with all minions writing into a shared 'telegraf' schema the metric tables hit Postgres's 1600-column per-table ceiling quickly. Setting fields_as_jsonb and tags_as_jsonb on the postgresql output keeps metric tables fixed at (time, tag_id, fields jsonb) and tag tables at (tag_id, tags jsonb). - so-stats-show rewritten to use JSONB accessors ((fields->>'x')::numeric, tags->>'host', etc.) and cast memory/disk sizes to bigint so pg_size_pretty works - Drop regex/regexFailureMessage from telegraf_output SOC UI entry to match the convention upstream used when removing them from mdengine/pcapengine/pipeline; options: list drives validation --- salt/global/soc_global.yaml | 2 - salt/postgres/tools/sbin/so-stats-show | 96 ++++++++++---------------- salt/telegraf/etc/telegraf.conf | 5 ++ 3 files changed, 40 insertions(+), 63 deletions(-) diff --git a/salt/global/soc_global.yaml b/salt/global/soc_global.yaml index 3430ef777..61646168f 100644 --- a/salt/global/soc_global.yaml +++ b/salt/global/soc_global.yaml @@ -61,12 +61,10 @@ global: advanced: True telegraf_output: description: Selects the backend(s) Telegraf writes metrics to. INFLUXDB keeps the current behavior; POSTGRES writes to the grid's Postgres instance; BOTH dual-writes for migration validation. - regex: ^(INFLUXDB|POSTGRES|BOTH)$ options: - INFLUXDB - POSTGRES - BOTH - regexFailureMessage: You must enter INFLUXDB, POSTGRES, or BOTH. global: True advanced: True helpLink: influxdb diff --git a/salt/postgres/tools/sbin/so-stats-show b/salt/postgres/tools/sbin/so-stats-show index fd8dff39f..bfc81887a 100644 --- a/salt/postgres/tools/sbin/so-stats-show +++ b/salt/postgres/tools/sbin/so-stats-show @@ -8,16 +8,21 @@ # Point-in-time host metrics from the Telegraf Postgres backend. # Sanity-check tool for verifying metrics are landing before the grid # dashboards consume them. +# +# Assumes Telegraf's postgresql output is configured with +# tags_as_foreign_keys = true, tags_as_jsonb = true, fields_as_jsonb = true, +# so metric tables are (time, tag_id, fields jsonb) and tag tables are +# (tag_id, tags jsonb). . /usr/sbin/so-common usage() { cat </dev/null | cut -d\| -f1 | exit 2 fi -# Telegraf's postgresql output stores tag values either as individual columns -# on the _tag table or as a single JSONB "tags" column, depending on -# plugin version. Returns a SQL expression that extracts the named tag -# regardless of layout. Empty string if the tag table doesn't exist. -tag_expr() { - local table="$1" tag="$2" alias="$3" - local has_col - has_col=$(so_psql -c " - SELECT 1 FROM information_schema.columns - WHERE table_schema='${SCHEMA}' AND table_name='${table}_tag' AND column_name='${tag}' - LIMIT 1;") - if [ -n "$has_col" ]; then - echo "${alias}.${tag}" - return - fi - local has_tags - has_tags=$(so_psql -c " - SELECT 1 FROM information_schema.columns - WHERE table_schema='${SCHEMA}' AND table_name='${table}_tag' AND column_name='tags' - LIMIT 1;") - if [ -n "$has_tags" ]; then - echo "(${alias}.tags->>'${tag}')" - return - fi - echo "" -} - table_exists() { local table="$1" [ -n "$(so_psql -c "SELECT 1 FROM information_schema.tables WHERE table_schema='${SCHEMA}' AND table_name='${table}' LIMIT 1;")" ] } # Discover hosts from cpu_tag (every minion reports cpu). -host_expr=$(tag_expr "cpu" "host" "t") -if [ -z "$host_expr" ]; then - echo "Unable to determine host tag column on ${SCHEMA}.cpu_tag. Has Telegraf written any rows yet?" +if ! table_exists "cpu_tag"; then + echo "${SCHEMA}.cpu_tag not found. Has Telegraf written any rows yet?" exit 0 fi HOSTS=$(so_psql -c " - SELECT DISTINCT ${host_expr} - FROM \"${SCHEMA}\".cpu_tag t - WHERE ${host_expr} IS NOT NULL + SELECT DISTINCT tags->>'host' + FROM \"${SCHEMA}\".cpu_tag + WHERE tags ? 'host' ORDER BY 1;") if [ -z "$HOSTS" ]; then @@ -97,8 +74,7 @@ if [ -z "$HOSTS" ]; then fi print_metric() { - local query="$1" - so_psql -c "$query" + so_psql -c "$1" } for host in $HOSTS; do @@ -110,59 +86,57 @@ for host in $HOSTS; do echo " Host: $host" echo "====================================================================" - cpu_host=$(tag_expr "cpu" "host" "t") - cpu_tag=$(tag_expr "cpu" "cpu" "t") - if [ -n "$cpu_host" ] && [ -n "$cpu_tag" ]; then + if table_exists "cpu"; then print_metric " SELECT 'cpu ' AS metric, to_char(c.time, 'YYYY-MM-DD HH24:MI:SS') AS ts, - round((100 - c.usage_idle)::numeric, 1) || '% used' + round((100 - (c.fields->>'usage_idle')::numeric), 1) || '% used' FROM \"${SCHEMA}\".cpu c JOIN \"${SCHEMA}\".cpu_tag t USING (tag_id) - WHERE ${cpu_host} = '${host}' AND ${cpu_tag} = 'cpu-total' + WHERE t.tags->>'host' = '${host}' AND t.tags->>'cpu' = 'cpu-total' ORDER BY c.time DESC LIMIT 1;" fi - mem_host=$(tag_expr "mem" "host" "t") - if [ -n "$mem_host" ] && table_exists "mem"; then + if table_exists "mem"; then print_metric " SELECT 'memory ' AS metric, to_char(m.time, 'YYYY-MM-DD HH24:MI:SS') AS ts, - round(m.used_percent::numeric, 1) || '% used (' || - pg_size_pretty(m.used) || ' of ' || pg_size_pretty(m.total) || ')' + round((m.fields->>'used_percent')::numeric, 1) || '% used (' || + pg_size_pretty((m.fields->>'used')::bigint) || ' of ' || + pg_size_pretty((m.fields->>'total')::bigint) || ')' FROM \"${SCHEMA}\".mem m JOIN \"${SCHEMA}\".mem_tag t USING (tag_id) - WHERE ${mem_host} = '${host}' + WHERE t.tags->>'host' = '${host}' ORDER BY m.time DESC LIMIT 1;" fi - disk_host=$(tag_expr "disk" "host" "t") - disk_path=$(tag_expr "disk" "path" "t") - if [ -n "$disk_host" ] && [ -n "$disk_path" ] && table_exists "disk"; then + if table_exists "disk"; then print_metric " - SELECT 'disk ' || rpad(${disk_path}, 12) AS metric, + SELECT 'disk ' || rpad(t.tags->>'path', 12) AS metric, to_char(d.time, 'YYYY-MM-DD HH24:MI:SS') AS ts, - round(d.used_percent::numeric, 1) || '% used (' || - pg_size_pretty(d.used) || ' of ' || pg_size_pretty(d.total) || ')' + round((d.fields->>'used_percent')::numeric, 1) || '% used (' || + pg_size_pretty((d.fields->>'used')::bigint) || ' of ' || + pg_size_pretty((d.fields->>'total')::bigint) || ')' FROM \"${SCHEMA}\".disk d JOIN \"${SCHEMA}\".disk_tag t USING (tag_id) - WHERE ${disk_host} = '${host}' + WHERE t.tags->>'host' = '${host}' AND d.time = (SELECT max(d2.time) FROM \"${SCHEMA}\".disk d2 JOIN \"${SCHEMA}\".disk_tag t2 USING (tag_id) - WHERE ${disk_host/t./t2.} = '${host}') - ORDER BY ${disk_path};" + WHERE t2.tags->>'host' = '${host}') + ORDER BY t.tags->>'path';" fi - sys_host=$(tag_expr "system" "host" "t") - if [ -n "$sys_host" ] && table_exists "system"; then + if table_exists "system"; then print_metric " SELECT 'load ' AS metric, to_char(s.time, 'YYYY-MM-DD HH24:MI:SS') AS ts, - s.load1 || ' / ' || s.load5 || ' / ' || s.load15 || ' (1/5/15m)' + (s.fields->>'load1') || ' / ' || + (s.fields->>'load5') || ' / ' || + (s.fields->>'load15') || ' (1/5/15m)' FROM \"${SCHEMA}\".system s JOIN \"${SCHEMA}\".system_tag t USING (tag_id) - WHERE ${sys_host} = '${host}' + WHERE t.tags->>'host' = '${host}' ORDER BY s.time DESC LIMIT 1;" fi diff --git a/salt/telegraf/etc/telegraf.conf b/salt/telegraf/etc/telegraf.conf index 4f0c279cc..aa5f2a007 100644 --- a/salt/telegraf/etc/telegraf.conf +++ b/salt/telegraf/etc/telegraf.conf @@ -98,10 +98,15 @@ # options='-c role=so_telegraf' makes every connection SET ROLE to the shared # group role so tables created on first write are owned by so_telegraf, and # all per-minion members can INSERT/SELECT them via role inheritance. +# fields_as_jsonb/tags_as_jsonb keep metric tables at a fixed column count so +# high-cardinality inputs (docker, procstat, kafka) don't blow past the +# Postgres 1600-column-per-table limit. [[outputs.postgresql]] connection = "host={{ PG_HOST }} port=5432 user={{ PG_USER }} password={{ PG_PASS }} dbname=so_telegraf sslmode=verify-full sslrootcert=/etc/telegraf/ca.crt options='-c role=so_telegraf'" schema = "telegraf" tags_as_foreign_keys = true + tags_as_jsonb = true + fields_as_jsonb = true {%- endif %} ############################################################################### From d9a9029ce5a11443de87e4c5e7a9f8769e5fd6b4 Mon Sep 17 00:00:00 2001 From: Mike Reeves Date: Thu, 16 Apr 2026 17:27:15 -0400 Subject: [PATCH 042/110] Adopt pg_partman + pg_cron for Telegraf metric tables Every telegraf.* metric table is now a daily time-range partitioned parent managed by pg_partman. Retention drops old partitions instead of the row-by-row DELETE that so-telegraf-trim used to run nightly, and dashboards will benefit from partition pruning at query time. - Load pg_cron at server start via shared_preload_libraries and point cron.database_name at so_telegraf so job metadata lives alongside the metrics - Telegraf create_templates override makes every new metric table a PARTITION BY RANGE (time) parent registered with partman.create_parent in one transaction (1 day interval, 3 premade) - postgres_telegraf_group_role now also creates pg_partman and pg_cron extensions and schedules hourly partman.run_maintenance_proc - New retention reconcile state updates partman.part_config.retention from postgres.telegraf.retention_days on every apply - so_telegraf_trim cron is now unconditionally absent; script stays on disk as a manual fallback --- salt/postgres/defaults.yaml | 2 ++ salt/postgres/enabled.sls | 12 +++--------- salt/postgres/telegraf_users.sls | 30 ++++++++++++++++++++++++++++++ salt/telegraf/etc/telegraf.conf | 6 ++++++ 4 files changed, 41 insertions(+), 9 deletions(-) diff --git a/salt/postgres/defaults.yaml b/salt/postgres/defaults.yaml index dd7994044..30523cda9 100644 --- a/salt/postgres/defaults.yaml +++ b/salt/postgres/defaults.yaml @@ -14,3 +14,5 @@ postgres: log_destination: 'stderr' logging_collector: 'off' log_min_messages: 'warning' + shared_preload_libraries: pg_cron + cron.database_name: so_telegraf diff --git a/salt/postgres/enabled.sls b/salt/postgres/enabled.sls index 24e348365..b6a51580f 100644 --- a/salt/postgres/enabled.sls +++ b/salt/postgres/enabled.sls @@ -80,20 +80,14 @@ delete_so-postgres_so-status.disabled: - name: /opt/so/conf/so-status/so-status.conf - regex: ^so-postgres$ +# Retention is now handled by pg_partman (hourly maintenance via pg_cron +# scheduled from postgres/telegraf_users.sls). The so-telegraf-trim script +# stays on disk for manual/emergency use but is no longer scheduled. so_telegraf_trim: -{% if GLOBALS.telegraf_output in ['POSTGRES', 'BOTH'] %} - cron.present: -{% else %} cron.absent: -{% endif %} - name: /usr/sbin/so-telegraf-trim >> /opt/so/log/postgres/telegraf-trim.log 2>&1 - identifier: so_telegraf_trim - user: root - - minute: '17' - - hour: '3' - - daymonth: '*' - - month: '*' - - dayweek: '*' {% else %} diff --git a/salt/postgres/telegraf_users.sls b/salt/postgres/telegraf_users.sls index 7d62ee7f0..5a3ea73e9 100644 --- a/salt/postgres/telegraf_users.sls +++ b/salt/postgres/telegraf_users.sls @@ -28,6 +28,14 @@ postgres_telegraf_group_role: GRANT CONNECT ON DATABASE so_telegraf TO so_telegraf; CREATE SCHEMA IF NOT EXISTS telegraf AUTHORIZATION so_telegraf; GRANT USAGE, CREATE ON SCHEMA telegraf TO so_telegraf; + CREATE EXTENSION IF NOT EXISTS pg_partman; + CREATE EXTENSION IF NOT EXISTS pg_cron; + -- Hourly partman maintenance. cron.schedule is idempotent by jobname. + SELECT cron.schedule( + 'telegraf-partman-maintenance', + '17 * * * *', + 'CALL partman.run_maintenance_proc()' + ); EOSQL - require: - docker_container: so-postgres @@ -60,6 +68,28 @@ postgres_telegraf_role_{{ u }}: {% endif %} {% endfor %} +# Reconcile partman retention from pillar. Runs after role/schema setup so +# any partitioned parents Telegraf has already created get their retention +# refreshed whenever postgres.telegraf.retention_days changes. +{% set retention = salt['pillar.get']('postgres:telegraf:retention_days', 14) %} +postgres_telegraf_retention_reconcile: + cmd.run: + - name: | + docker exec -i so-postgres psql -v ON_ERROR_STOP=1 -U postgres -d so_telegraf <<'EOSQL' + DO $$ + BEGIN + IF EXISTS (SELECT 1 FROM pg_catalog.pg_extension WHERE extname = 'pg_partman') THEN + UPDATE partman.part_config + SET retention = '{{ retention }} days', + retention_keep_table = false + WHERE parent_table LIKE 'telegraf.%'; + END IF; + END + $$; + EOSQL + - require: + - cmd: postgres_telegraf_group_role + {% endif %} {% else %} diff --git a/salt/telegraf/etc/telegraf.conf b/salt/telegraf/etc/telegraf.conf index aa5f2a007..ea3e11c51 100644 --- a/salt/telegraf/etc/telegraf.conf +++ b/salt/telegraf/etc/telegraf.conf @@ -107,6 +107,12 @@ tags_as_foreign_keys = true tags_as_jsonb = true fields_as_jsonb = true + # Every metric table is a daily time-range partitioned parent managed by + # pg_partman. Retention drops old partitions instead of row-by-row DELETEs. + create_templates = [ + '''CREATE TABLE {TABLE} ({COLUMNS}) PARTITION BY RANGE ("time")''', + '''SELECT partman.create_parent(p_parent_table := {TABLELITERAL}, p_control := 'time', p_type := 'native', p_interval := '1 day', p_premake := 3)''' + ] {%- endif %} ############################################################################### From 7d07f3c8fe1f1d7317aa4ac4bb7d4d345ccd158d Mon Sep 17 00:00:00 2001 From: Mike Reeves Date: Fri, 17 Apr 2026 10:51:08 -0400 Subject: [PATCH 043/110] Create so_telegraf DB from Salt and pin pg_partman schema init-users.sh only runs on a fresh data dir, so upgrades onto an existing /nsm/postgres volume never got so_telegraf. Pinning partman's schema also makes partman.part_config reliably resolvable. --- salt/postgres/telegraf_users.sls | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/salt/postgres/telegraf_users.sls b/salt/postgres/telegraf_users.sls index 5a3ea73e9..8c62a8961 100644 --- a/salt/postgres/telegraf_users.sls +++ b/salt/postgres/telegraf_users.sls @@ -10,6 +10,19 @@ {% set TG_OUT = (GLOBALS.telegraf_output | default('INFLUXDB')) | upper %} {% if TG_OUT in ['POSTGRES', 'BOTH'] %} +# Ensure the shared Telegraf database exists. init-users.sh only runs on a +# fresh data dir, so hosts upgraded onto an existing /nsm/postgres volume +# would otherwise never get so_telegraf. +postgres_create_telegraf_db: + cmd.run: + - name: | + docker exec -i so-postgres psql -v ON_ERROR_STOP=1 -U postgres -d postgres <<'EOSQL' + SELECT 'CREATE DATABASE so_telegraf' + WHERE NOT EXISTS (SELECT FROM pg_database WHERE datname = 'so_telegraf')\gexec + EOSQL + - require: + - docker_container: so-postgres + # Provision the shared group role and schema once. Every per-minion role is a # member of so_telegraf, and each Telegraf connection does SET ROLE so_telegraf # (via options='-c role=so_telegraf' in the connection string) so tables created @@ -28,7 +41,8 @@ postgres_telegraf_group_role: GRANT CONNECT ON DATABASE so_telegraf TO so_telegraf; CREATE SCHEMA IF NOT EXISTS telegraf AUTHORIZATION so_telegraf; GRANT USAGE, CREATE ON SCHEMA telegraf TO so_telegraf; - CREATE EXTENSION IF NOT EXISTS pg_partman; + CREATE SCHEMA IF NOT EXISTS partman; + CREATE EXTENSION IF NOT EXISTS pg_partman SCHEMA partman; CREATE EXTENSION IF NOT EXISTS pg_cron; -- Hourly partman maintenance. cron.schedule is idempotent by jobname. SELECT cron.schedule( @@ -38,7 +52,7 @@ postgres_telegraf_group_role: ); EOSQL - require: - - docker_container: so-postgres + - cmd: postgres_create_telegraf_db {% set users = salt['pillar.get']('postgres:auth:users', {}) %} {% for key, entry in users.items() %} From 5228668be01a324a95c0fe3b2fa81d9f68885ed8 Mon Sep 17 00:00:00 2001 From: Mike Reeves Date: Fri, 17 Apr 2026 13:00:12 -0400 Subject: [PATCH 044/110] =?UTF-8?q?Fix=20Telegraf=E2=86=92Postgres=20table?= =?UTF-8?q?=20creation=20and=20state.apply=20race?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Telegraf's partman template passed p_type:='native', which pg_partman 5.x (the version shipped by postgresql-17-partman on Debian) rejects. Switched to 'range' so partman.create_parent() actually creates partitions and Telegraf's INSERTs succeed. - Added a postgres_wait_ready gate in telegraf_users.sls so psql execs don't race the init-time restart that docker-entrypoint.sh performs. - so-verify now ignores the literal "-v ON_ERROR_STOP=1" token in the setup log. Dropped the matching entry from so-log-check, which scans container stdout where that token never appears. --- salt/common/tools/sbin/so-log-check | 1 - salt/postgres/telegraf_users.sls | 20 +++++++++++++++++++- salt/telegraf/etc/telegraf.conf | 2 +- setup/so-verify | 3 ++- 4 files changed, 22 insertions(+), 4 deletions(-) diff --git a/salt/common/tools/sbin/so-log-check b/salt/common/tools/sbin/so-log-check index d8446d6fe..8c8bbf35c 100755 --- a/salt/common/tools/sbin/so-log-check +++ b/salt/common/tools/sbin/so-log-check @@ -229,7 +229,6 @@ if [[ $EXCLUDE_KNOWN_ERRORS == 'Y' ]]; then EXCLUDED_ERRORS="$EXCLUDED_ERRORS|tcp 127.0.0.1:6791: bind: address already in use" # so-elastic-fleet agent restarting. Seen starting w/ 8.18.8 https://github.com/elastic/kibana/issues/201459 EXCLUDED_ERRORS="$EXCLUDED_ERRORS|TransformTask\] \[logs-(tychon|aws_billing|microsoft_defender_endpoint).*user so_kibana lacks the required permissions \[logs-\1" # Known issue with 3 integrations using kibana_system role vs creating unique api creds with proper permissions. EXCLUDED_ERRORS="$EXCLUDED_ERRORS|manifest unknown" # appears in so-dockerregistry log for so-tcpreplay following docker upgrade to 29.2.1-1 - EXCLUDED_ERRORS="$EXCLUDED_ERRORS|-v ON_ERROR_STOP=1" # psql invocation flag from so-postgres init script, not an actual error fi RESULT=0 diff --git a/salt/postgres/telegraf_users.sls b/salt/postgres/telegraf_users.sls index 8c62a8961..920367fab 100644 --- a/salt/postgres/telegraf_users.sls +++ b/salt/postgres/telegraf_users.sls @@ -10,6 +10,24 @@ {% set TG_OUT = (GLOBALS.telegraf_output | default('INFLUXDB')) | upper %} {% if TG_OUT in ['POSTGRES', 'BOTH'] %} +# docker_container.running returns as soon as the container starts, but on +# first-init docker-entrypoint.sh runs init scripts and then restarts +# postgres, so the next docker exec can hit "the database system is shutting +# down". Wait for pg_isready before any psql work. +postgres_wait_ready: + cmd.run: + - name: | + for i in $(seq 1 60); do + if docker exec so-postgres pg_isready -U postgres -q 2>/dev/null; then + exit 0 + fi + sleep 2 + done + echo "so-postgres did not become ready within 120s" >&2 + exit 1 + - require: + - docker_container: so-postgres + # Ensure the shared Telegraf database exists. init-users.sh only runs on a # fresh data dir, so hosts upgraded onto an existing /nsm/postgres volume # would otherwise never get so_telegraf. @@ -21,7 +39,7 @@ postgres_create_telegraf_db: WHERE NOT EXISTS (SELECT FROM pg_database WHERE datname = 'so_telegraf')\gexec EOSQL - require: - - docker_container: so-postgres + - cmd: postgres_wait_ready # Provision the shared group role and schema once. Every per-minion role is a # member of so_telegraf, and each Telegraf connection does SET ROLE so_telegraf diff --git a/salt/telegraf/etc/telegraf.conf b/salt/telegraf/etc/telegraf.conf index ea3e11c51..334b62888 100644 --- a/salt/telegraf/etc/telegraf.conf +++ b/salt/telegraf/etc/telegraf.conf @@ -111,7 +111,7 @@ # pg_partman. Retention drops old partitions instead of row-by-row DELETEs. create_templates = [ '''CREATE TABLE {TABLE} ({COLUMNS}) PARTITION BY RANGE ("time")''', - '''SELECT partman.create_parent(p_parent_table := {TABLELITERAL}, p_control := 'time', p_type := 'native', p_interval := '1 day', p_premake := 3)''' + '''SELECT partman.create_parent(p_parent_table := {TABLELITERAL}, p_control := 'time', p_type := 'range', p_interval := '1 day', p_premake := 3)''' ] {%- endif %} diff --git a/setup/so-verify b/setup/so-verify index 8d23275ea..672ed70cc 100755 --- a/setup/so-verify +++ b/setup/so-verify @@ -71,7 +71,8 @@ log_has_errors() { grep -vE "remove_failed_vm.sls" | \ grep -vE "failed to copy: httpReadSeeker" | \ grep -vE "Error response from daemon: failed to resolve reference" | \ - grep -vE "log-.*-pipeline_failed_attempts" &> "$error_log" + grep -vE "log-.*-pipeline_failed_attempts" | \ + grep -vE " -v ON_ERROR_STOP=1" &> "$error_log" if [[ $? -eq 0 ]]; then # This function succeeds (returns 0) if errors are detected From b3fbd5c7a46f99e02081aca011b48a63dd097431 Mon Sep 17 00:00:00 2001 From: Mike Reeves Date: Fri, 17 Apr 2026 14:55:13 -0400 Subject: [PATCH 045/110] Use Go-template placeholders and shell-guarded CREATE DATABASE - Telegraf's outputs.postgresql plugin uses Go text/template syntax, not uppercase tokens. The {TABLE}/{COLUMNS}/{TABLELITERAL} strings were passed through to Postgres literally, producing syntax errors on every metric's first write. Switch to {{ .table }}, {{ .columns }}, and {{ .table|quoteLiteral }} so partitioned parents and the partman create_parent() call succeed. - Replace the \gexec "CREATE DATABASE ... WHERE NOT EXISTS" idiom in both init-users.sh and telegraf_users.sls with an explicit shell conditional. The prior idiom occasionally fired CREATE DATABASE even when so_telegraf already existed, producing duplicate-key failures. --- salt/postgres/files/init-users.sh | 7 +++---- salt/postgres/telegraf_users.sls | 7 +++---- salt/telegraf/etc/telegraf.conf | 4 ++-- 3 files changed, 8 insertions(+), 10 deletions(-) diff --git a/salt/postgres/files/init-users.sh b/salt/postgres/files/init-users.sh index b07dfcdb0..e1be5df19 100644 --- a/salt/postgres/files/init-users.sh +++ b/salt/postgres/files/init-users.sh @@ -20,7 +20,6 @@ EOSQL # Bootstrap the Telegraf metrics database. Per-minion roles + schemas are # reconciled on every state.apply by postgres/telegraf_users.sls; this block # only ensures the shared database exists on first initialization. -psql -v ON_ERROR_STOP=1 --username "$POSTGRES_USER" --dbname "$POSTGRES_DB" <<-EOSQL - SELECT 'CREATE DATABASE so_telegraf' - WHERE NOT EXISTS (SELECT FROM pg_database WHERE datname = 'so_telegraf')\gexec -EOSQL +if ! psql -U "$POSTGRES_USER" -tAc "SELECT 1 FROM pg_database WHERE datname='so_telegraf'" | grep -q 1; then + psql -v ON_ERROR_STOP=1 -U "$POSTGRES_USER" -c "CREATE DATABASE so_telegraf" +fi diff --git a/salt/postgres/telegraf_users.sls b/salt/postgres/telegraf_users.sls index 920367fab..8972ce510 100644 --- a/salt/postgres/telegraf_users.sls +++ b/salt/postgres/telegraf_users.sls @@ -34,10 +34,9 @@ postgres_wait_ready: postgres_create_telegraf_db: cmd.run: - name: | - docker exec -i so-postgres psql -v ON_ERROR_STOP=1 -U postgres -d postgres <<'EOSQL' - SELECT 'CREATE DATABASE so_telegraf' - WHERE NOT EXISTS (SELECT FROM pg_database WHERE datname = 'so_telegraf')\gexec - EOSQL + if ! docker exec so-postgres psql -U postgres -tAc "SELECT 1 FROM pg_database WHERE datname='so_telegraf'" | grep -q 1; then + docker exec so-postgres psql -v ON_ERROR_STOP=1 -U postgres -c "CREATE DATABASE so_telegraf" + fi - require: - cmd: postgres_wait_ready diff --git a/salt/telegraf/etc/telegraf.conf b/salt/telegraf/etc/telegraf.conf index 334b62888..fd614a68b 100644 --- a/salt/telegraf/etc/telegraf.conf +++ b/salt/telegraf/etc/telegraf.conf @@ -110,8 +110,8 @@ # Every metric table is a daily time-range partitioned parent managed by # pg_partman. Retention drops old partitions instead of row-by-row DELETEs. create_templates = [ - '''CREATE TABLE {TABLE} ({COLUMNS}) PARTITION BY RANGE ("time")''', - '''SELECT partman.create_parent(p_parent_table := {TABLELITERAL}, p_control := 'time', p_type := 'range', p_interval := '1 day', p_premake := 3)''' + '''CREATE TABLE {{ .table }} ({{ .columns }}) PARTITION BY RANGE ("time")''', + '''SELECT partman.create_parent(p_parent_table := {{ .table|quoteLiteral }}, p_control := 'time', p_type := 'range', p_interval := '1 day', p_premake := 3)''' ] {%- endif %} From af9330a9dddee39db104c467cefe1c7fd6d842ef Mon Sep 17 00:00:00 2001 From: Mike Reeves Date: Fri, 17 Apr 2026 15:04:37 -0400 Subject: [PATCH 046/110] Escape Go-template placeholders from Jinja in telegraf.conf --- salt/telegraf/etc/telegraf.conf | 2 ++ 1 file changed, 2 insertions(+) diff --git a/salt/telegraf/etc/telegraf.conf b/salt/telegraf/etc/telegraf.conf index fd614a68b..e18205ad1 100644 --- a/salt/telegraf/etc/telegraf.conf +++ b/salt/telegraf/etc/telegraf.conf @@ -109,10 +109,12 @@ fields_as_jsonb = true # Every metric table is a daily time-range partitioned parent managed by # pg_partman. Retention drops old partitions instead of row-by-row DELETEs. + {% raw %} create_templates = [ '''CREATE TABLE {{ .table }} ({{ .columns }}) PARTITION BY RANGE ("time")''', '''SELECT partman.create_parent(p_parent_table := {{ .table|quoteLiteral }}, p_control := 'time', p_type := 'range', p_interval := '1 day', p_premake := 3)''' ] + {% endraw %} {%- endif %} ############################################################################### From 927eba566cfecfd1a94f3b44fc470ffc9e038bc0 Mon Sep 17 00:00:00 2001 From: Mike Reeves Date: Fri, 17 Apr 2026 15:13:08 -0400 Subject: [PATCH 047/110] Grant so_telegraf access to partman schema Telegraf calls partman.create_parent() on first write of each metric, which needs USAGE on the partman schema, EXECUTE on its functions and procedures, and DML on partman.part_config. --- salt/postgres/telegraf_users.sls | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/salt/postgres/telegraf_users.sls b/salt/postgres/telegraf_users.sls index 8972ce510..cbbd60249 100644 --- a/salt/postgres/telegraf_users.sls +++ b/salt/postgres/telegraf_users.sls @@ -61,6 +61,14 @@ postgres_telegraf_group_role: CREATE SCHEMA IF NOT EXISTS partman; CREATE EXTENSION IF NOT EXISTS pg_partman SCHEMA partman; CREATE EXTENSION IF NOT EXISTS pg_cron; + -- Telegraf (running as so_telegraf) calls partman.create_parent() + -- on first write of each metric, which needs USAGE on the partman + -- schema, EXECUTE on its functions/procedures, and write access to + -- partman.part_config so it can register new partitioned parents. + GRANT USAGE ON SCHEMA partman TO so_telegraf; + GRANT SELECT, INSERT, UPDATE, DELETE ON ALL TABLES IN SCHEMA partman TO so_telegraf; + GRANT EXECUTE ON ALL FUNCTIONS IN SCHEMA partman TO so_telegraf; + GRANT EXECUTE ON ALL PROCEDURES IN SCHEMA partman TO so_telegraf; -- Hourly partman maintenance. cron.schedule is idempotent by jobname. SELECT cron.schedule( 'telegraf-partman-maintenance', From 0fddcd8fe70a6032089fbe31c3afc4a9116c0db6 Mon Sep 17 00:00:00 2001 From: Mike Reeves Date: Fri, 17 Apr 2026 15:22:57 -0400 Subject: [PATCH 048/110] Pass unquoted schema.name to partman.create_parent pg_partman 5.x splits p_parent_table on '.' and looks up the parts as raw identifiers, so the literal must be 'schema.name' rather than the double-quoted form quoteLiteral emits for .table. --- salt/telegraf/etc/telegraf.conf | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/salt/telegraf/etc/telegraf.conf b/salt/telegraf/etc/telegraf.conf index e18205ad1..8a48186af 100644 --- a/salt/telegraf/etc/telegraf.conf +++ b/salt/telegraf/etc/telegraf.conf @@ -110,9 +110,14 @@ # Every metric table is a daily time-range partitioned parent managed by # pg_partman. Retention drops old partitions instead of row-by-row DELETEs. {% raw %} + # pg_partman.create_parent() splits p_parent_table on '.' and looks the + # parts up as raw identifiers, so the string literal must be + # 'schema.name', NOT '"schema"."name"'. {{ .table|quoteLiteral }} would + # emit the double-quoted form and partman fails with "Unable to find + # given parent table in system catalogs." create_templates = [ '''CREATE TABLE {{ .table }} ({{ .columns }}) PARTITION BY RANGE ("time")''', - '''SELECT partman.create_parent(p_parent_table := {{ .table|quoteLiteral }}, p_control := 'time', p_type := 'range', p_interval := '1 day', p_premake := 3)''' + '''SELECT partman.create_parent(p_parent_table := {{ printf "%s.%s" .table.Schema .table.Name | quoteLiteral }}, p_control := 'time', p_type := 'range', p_interval := '1 day', p_premake := 3)''' ] {% endraw %} {%- endif %} From f11e9da83a574a0038fd028417506bbc339b176b Mon Sep 17 00:00:00 2001 From: Mike Reeves Date: Fri, 17 Apr 2026 15:27:06 -0400 Subject: [PATCH 049/110] Mark time column NOT NULL before partman.create_parent pg_partman 5.x requires the control column to be NOT NULL; Telegraf's generated columns are nullable by default. --- salt/telegraf/etc/telegraf.conf | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/salt/telegraf/etc/telegraf.conf b/salt/telegraf/etc/telegraf.conf index 8a48186af..6d46095f8 100644 --- a/salt/telegraf/etc/telegraf.conf +++ b/salt/telegraf/etc/telegraf.conf @@ -110,13 +110,13 @@ # Every metric table is a daily time-range partitioned parent managed by # pg_partman. Retention drops old partitions instead of row-by-row DELETEs. {% raw %} - # pg_partman.create_parent() splits p_parent_table on '.' and looks the - # parts up as raw identifiers, so the string literal must be - # 'schema.name', NOT '"schema"."name"'. {{ .table|quoteLiteral }} would - # emit the double-quoted form and partman fails with "Unable to find - # given parent table in system catalogs." + # pg_partman 5.x requires the control column (time) to be NOT NULL, so + # ALTER it before create_parent(). And create_parent() splits + # p_parent_table on '.' to look up raw identifiers, so the literal must + # be 'schema.name' (not '"schema"."name"' as .table|quoteLiteral emits). create_templates = [ '''CREATE TABLE {{ .table }} ({{ .columns }}) PARTITION BY RANGE ("time")''', + '''ALTER TABLE {{ .table }} ALTER COLUMN "time" SET NOT NULL''', '''SELECT partman.create_parent(p_parent_table := {{ printf "%s.%s" .table.Schema .table.Name | quoteLiteral }}, p_control := 'time', p_type := 'range', p_interval := '1 day', p_premake := 3)''' ] {% endraw %} From 21076af01ed9aa0f396b3c51ffbea7c9045757b0 Mon Sep 17 00:00:00 2001 From: Mike Reeves Date: Fri, 17 Apr 2026 15:34:19 -0400 Subject: [PATCH 050/110] Grant so_telegraf CREATE on partman schema pg_partman 5.x's create_partition() creates a per-parent template table inside the partman schema at runtime, which requires CREATE on that schema. Also extend ALTER DEFAULT PRIVILEGES so the runtime- created template tables are accessible to so_telegraf. --- salt/postgres/telegraf_users.sls | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/salt/postgres/telegraf_users.sls b/salt/postgres/telegraf_users.sls index cbbd60249..804065bae 100644 --- a/salt/postgres/telegraf_users.sls +++ b/salt/postgres/telegraf_users.sls @@ -65,10 +65,16 @@ postgres_telegraf_group_role: -- on first write of each metric, which needs USAGE on the partman -- schema, EXECUTE on its functions/procedures, and write access to -- partman.part_config so it can register new partitioned parents. - GRANT USAGE ON SCHEMA partman TO so_telegraf; + GRANT USAGE, CREATE ON SCHEMA partman TO so_telegraf; GRANT SELECT, INSERT, UPDATE, DELETE ON ALL TABLES IN SCHEMA partman TO so_telegraf; GRANT EXECUTE ON ALL FUNCTIONS IN SCHEMA partman TO so_telegraf; GRANT EXECUTE ON ALL PROCEDURES IN SCHEMA partman TO so_telegraf; + -- partman creates per-parent template tables (partman.template_*) at + -- runtime; default privileges extend DML/sequence access to them. + ALTER DEFAULT PRIVILEGES IN SCHEMA partman + GRANT SELECT, INSERT, UPDATE, DELETE ON TABLES TO so_telegraf; + ALTER DEFAULT PRIVILEGES IN SCHEMA partman + GRANT USAGE, SELECT, UPDATE ON SEQUENCES TO so_telegraf; -- Hourly partman maintenance. cron.schedule is idempotent by jobname. SELECT cron.schedule( 'telegraf-partman-maintenance', From ebb93b4fa7b78cd0d21162e268d621f4e4af2e44 Mon Sep 17 00:00:00 2001 From: reyesj2 <94730068+reyesj2@users.noreply.github.com> Date: Fri, 17 Apr 2026 14:43:07 -0500 Subject: [PATCH 051/110] add wait_for_so-elasticsearch state and split elasticsearch cluster configuration out of enabled.sls --- salt/elasticsearch/cluster.sls | 164 +++++++++++++++++++++++++++++++++ salt/elasticsearch/enabled.sls | 160 +++----------------------------- 2 files changed, 178 insertions(+), 146 deletions(-) create mode 100644 salt/elasticsearch/cluster.sls diff --git a/salt/elasticsearch/cluster.sls b/salt/elasticsearch/cluster.sls new file mode 100644 index 000000000..7a8a6675c --- /dev/null +++ b/salt/elasticsearch/cluster.sls @@ -0,0 +1,164 @@ +# Copyright Security Onion Solutions LLC and/or licensed to Security Onion Solutions LLC under one +# or more contributor license agreements. Licensed under the Elastic License 2.0 as shown at +# https://securityonion.net/license; you may not use this file except in compliance with the +# Elastic License 2.0. + +{% from 'allowed_states.map.jinja' import allowed_states %} +{% if sls.split('.')[0] in allowed_states %} +{% from 'vars/globals.map.jinja' import GLOBALS %} +{% from 'elasticsearch/config.map.jinja' import ELASTICSEARCHMERGED %} +{% from 'elasticsearch/template.map.jinja' import ES_INDEX_SETTINGS, SO_MANAGED_INDICES %} +{% if GLOBALS.role != 'so-heavynode' %} +{% from 'elasticsearch/template.map.jinja' import ALL_ADDON_SETTINGS %} +{% endif %} + +escomponenttemplates: + file.recurse: + - name: /opt/so/conf/elasticsearch/templates/component + - source: salt://elasticsearch/templates/component + - user: 930 + - group: 939 + - clean: True + - onchanges_in: + - file: so-elasticsearch-templates-reload + - show_changes: False + +# Clean up legacy and non-SO managed templates from the elasticsearch/templates/index/ directory +so_index_template_dir: + file.directory: + - name: /opt/so/conf/elasticsearch/templates/index + - clean: True + {%- if SO_MANAGED_INDICES %} + - require: + {%- for index in SO_MANAGED_INDICES %} + - file: so_index_template_{{index}} + {%- endfor %} + {%- endif %} + +# Auto-generate index templates for SO managed indices (directly defined in elasticsearch/defaults.yaml) +# These index templates are for the core SO datasets and are always required +{% for index, settings in ES_INDEX_SETTINGS.items() %} +{% if settings.index_template is defined %} +so_index_template_{{index}}: + file.managed: + - name: /opt/so/conf/elasticsearch/templates/index/{{ index }}-template.json + - source: salt://elasticsearch/base-template.json.jinja + - defaults: + TEMPLATE_CONFIG: {{ settings.index_template }} + - template: jinja + - onchanges_in: + - file: so-elasticsearch-templates-reload +{% endif %} +{% endfor %} + +{% if GLOBALS.role != "so-heavynode" %} +# Auto-generate optional index templates for integration | input | content packages +# These index templates are not used by default (until user adds package to an agent policy). +# Pre-configured with standard defaults, and incorporated into SOC configuration for user customization. +{% for index,settings in ALL_ADDON_SETTINGS.items() %} +{% if settings.index_template is defined %} +addon_index_template_{{index}}: + file.managed: + - name: /opt/so/conf/elasticsearch/templates/addon-index/{{ index }}-template.json + - source: salt://elasticsearch/base-template.json.jinja + - defaults: + TEMPLATE_CONFIG: {{ settings.index_template }} + - template: jinja + - show_changes: False + - onchanges_in: + - file: addon-elasticsearch-templates-reload +{% endif %} +{% endfor %} +{% endif %} + +{% if GLOBALS.role in GLOBALS.manager_roles %} +so-es-cluster-settings: + cmd.run: + - name: /usr/sbin/so-elasticsearch-cluster-settings + - cwd: /opt/so + - template: jinja + - require: + - docker_container: so-elasticsearch + - file: elasticsearch_sbin_jinja + - http: wait_for_so-elasticsearch +{% endif %} + +# heavynodes will only load ILM policies for SO managed indices. (Indicies defined in elasticsearch/defaults.yaml) +so-elasticsearch-ilm-policy-load: + cmd.run: + - name: /usr/sbin/so-elasticsearch-ilm-policy-load + - cwd: /opt/so + - require: + - docker_container: so-elasticsearch + - file: so-elasticsearch-ilm-policy-load-script + - onchanges: + - file: so-elasticsearch-ilm-policy-load-script + +so-elasticsearch-templates-reload: + file.absent: + - name: /opt/so/state/estemplates.txt + +addon-elasticsearch-templates-reload: + file.absent: + - name: /opt/so/state/addon_estemplates.txt + +# so-elasticsearch-templates-load will have its first successful run during the 'so-elastic-fleet-setup' script +so-elasticsearch-templates: + cmd.run: +{%- if GLOBALS.role == "so-heavynode" %} + - name: /usr/sbin/so-elasticsearch-templates-load --heavynode +{%- else %} + - name: /usr/sbin/so-elasticsearch-templates-load +{%- endif %} + - cwd: /opt/so + - template: jinja + - require: + - docker_container: so-elasticsearch + - file: elasticsearch_sbin_jinja + +so-elasticsearch-pipelines: + cmd.run: + - name: /usr/sbin/so-elasticsearch-pipelines {{ GLOBALS.hostname }} + - require: + - docker_container: so-elasticsearch + - file: so-elasticsearch-pipelines-script + +so-elasticsearch-roles-load: + cmd.run: + - name: /usr/sbin/so-elasticsearch-roles-load + - cwd: /opt/so + - template: jinja + - require: + - docker_container: so-elasticsearch + - file: elasticsearch_sbin_jinja + +{% if grains.role in ['so-managersearch', 'so-manager', 'so-managerhype'] %} +{% set ap = "absent" %} +{% endif %} +{% if grains.role in ['so-eval', 'so-standalone', 'so-heavynode'] %} +{% if ELASTICSEARCHMERGED.index_clean %} +{% set ap = "present" %} +{% else %} +{% set ap = "absent" %} +{% endif %} +{% endif %} +{% if grains.role in ['so-eval', 'so-standalone', 'so-managersearch', 'so-heavynode', 'so-manager'] %} +so-elasticsearch-indices-delete: + cron.{{ap}}: + - name: /usr/sbin/so-elasticsearch-indices-delete > /opt/so/log/elasticsearch/cron-elasticsearch-indices-delete.log 2>&1 + - identifier: so-elasticsearch-indices-delete + - user: root + - minute: '*/5' + - hour: '*' + - daymonth: '*' + - month: '*' + - dayweek: '*' +{% endif %} + +{% else %} + +{{sls}}_state_not_allowed: + test.fail_without_changes: + - name: {{sls}}_state_not_allowed + +{% endif %} diff --git a/salt/elasticsearch/enabled.sls b/salt/elasticsearch/enabled.sls index f4031ee5d..ab12b875e 100644 --- a/salt/elasticsearch/enabled.sls +++ b/salt/elasticsearch/enabled.sls @@ -10,10 +10,6 @@ {% from 'elasticsearch/config.map.jinja' import ELASTICSEARCH_NODES %} {% from 'elasticsearch/config.map.jinja' import ELASTICSEARCH_SEED_HOSTS %} {% from 'elasticsearch/config.map.jinja' import ELASTICSEARCHMERGED %} -{% from 'elasticsearch/template.map.jinja' import ES_INDEX_SETTINGS, SO_MANAGED_INDICES %} -{% if GLOBALS.role != 'so-heavynode' %} -{% from 'elasticsearch/template.map.jinja' import ALL_ADDON_SETTINGS %} -{% endif %} include: - ca @@ -21,6 +17,9 @@ include: - elasticsearch.ssl - elasticsearch.config - elasticsearch.sostatus +{%- if GLOBALS.role != 'so-searchode' %} + - elasticsearch.cluster +{%- endif%} so-elasticsearch: docker_container.running: @@ -108,150 +107,19 @@ delete_so-elasticsearch_so-status.disabled: - name: /opt/so/conf/so-status/so-status.conf - regex: ^so-elasticsearch$ -{% if GLOBALS.role != "so-searchnode" %} -escomponenttemplates: - file.recurse: - - name: /opt/so/conf/elasticsearch/templates/component - - source: salt://elasticsearch/templates/component - - user: 930 - - group: 939 - - clean: True - - onchanges_in: - - file: so-elasticsearch-templates-reload - - show_changes: False - -# Clean up legacy and non-SO managed templates from the elasticsearch/templates/index/ directory -so_index_template_dir: - file.directory: - - name: /opt/so/conf/elasticsearch/templates/index - - clean: True - {%- if SO_MANAGED_INDICES %} - - require: - {%- for index in SO_MANAGED_INDICES %} - - file: so_index_template_{{index}} - {%- endfor %} - {%- endif %} - -# Auto-generate index templates for SO managed indices (directly defined in elasticsearch/defaults.yaml) -# These index templates are for the core SO datasets and are always required -{% for index, settings in ES_INDEX_SETTINGS.items() %} -{% if settings.index_template is defined %} -so_index_template_{{index}}: - file.managed: - - name: /opt/so/conf/elasticsearch/templates/index/{{ index }}-template.json - - source: salt://elasticsearch/base-template.json.jinja - - defaults: - TEMPLATE_CONFIG: {{ settings.index_template }} - - template: jinja - - onchanges_in: - - file: so-elasticsearch-templates-reload -{% endif %} -{% endfor %} - -{% if GLOBALS.role != "so-heavynode" %} -# Auto-generate optional index templates for integration | input | content packages -# These index templates are not used by default (until user adds package to an agent policy). -# Pre-configured with standard defaults, and incorporated into SOC configuration for user customization. -{% for index,settings in ALL_ADDON_SETTINGS.items() %} -{% if settings.index_template is defined %} -addon_index_template_{{index}}: - file.managed: - - name: /opt/so/conf/elasticsearch/templates/addon-index/{{ index }}-template.json - - source: salt://elasticsearch/base-template.json.jinja - - defaults: - TEMPLATE_CONFIG: {{ settings.index_template }} - - template: jinja - - show_changes: False - - onchanges_in: - - file: addon-elasticsearch-templates-reload -{% endif %} -{% endfor %} -{% endif %} - -{% if GLOBALS.role in GLOBALS.manager_roles %} -so-es-cluster-settings: - cmd.run: - - name: /usr/sbin/so-elasticsearch-cluster-settings - - cwd: /opt/so - - template: jinja +wait_for_so-elasticsearch: + http.wait_for_successful_query: + - name: "https://localhost:9200/" + - username: 'so_elastic' + - password: '{{ ELASTICSEARCHMERGED.auth.users.so_elastic_user.pass }}' + - ssl: True + - verify_ssl: False + - status: 200 + - wait_for: 300 + - request_interval: 15 + - backend: requests - require: - docker_container: so-elasticsearch - - file: elasticsearch_sbin_jinja -{% endif %} - -# heavynodes will only load ILM policies for SO managed indices. (Indicies defined in elasticsearch/defaults.yaml) -so-elasticsearch-ilm-policy-load: - cmd.run: - - name: /usr/sbin/so-elasticsearch-ilm-policy-load - - cwd: /opt/so - - require: - - docker_container: so-elasticsearch - - file: so-elasticsearch-ilm-policy-load-script - - onchanges: - - file: so-elasticsearch-ilm-policy-load-script - -so-elasticsearch-templates-reload: - file.absent: - - name: /opt/so/state/estemplates.txt - -addon-elasticsearch-templates-reload: - file.absent: - - name: /opt/so/state/addon_estemplates.txt - -# so-elasticsearch-templates-load will have its first successful run during the 'so-elastic-fleet-setup' script -so-elasticsearch-templates: - cmd.run: -{%- if GLOBALS.role == "so-heavynode" %} - - name: /usr/sbin/so-elasticsearch-templates-load --heavynode -{%- else %} - - name: /usr/sbin/so-elasticsearch-templates-load -{%- endif %} - - cwd: /opt/so - - template: jinja - - require: - - docker_container: so-elasticsearch - - file: elasticsearch_sbin_jinja - -so-elasticsearch-pipelines: - cmd.run: - - name: /usr/sbin/so-elasticsearch-pipelines {{ GLOBALS.hostname }} - - require: - - docker_container: so-elasticsearch - - file: so-elasticsearch-pipelines-script - -so-elasticsearch-roles-load: - cmd.run: - - name: /usr/sbin/so-elasticsearch-roles-load - - cwd: /opt/so - - template: jinja - - require: - - docker_container: so-elasticsearch - - file: elasticsearch_sbin_jinja - -{% if grains.role in ['so-managersearch', 'so-manager', 'so-managerhype'] %} -{% set ap = "absent" %} -{% endif %} -{% if grains.role in ['so-eval', 'so-standalone', 'so-heavynode'] %} -{% if ELASTICSEARCHMERGED.index_clean %} -{% set ap = "present" %} -{% else %} -{% set ap = "absent" %} -{% endif %} -{% endif %} -{% if grains.role in ['so-eval', 'so-standalone', 'so-managersearch', 'so-heavynode', 'so-manager'] %} -so-elasticsearch-indices-delete: - cron.{{ap}}: - - name: /usr/sbin/so-elasticsearch-indices-delete > /opt/so/log/elasticsearch/cron-elasticsearch-indices-delete.log 2>&1 - - identifier: so-elasticsearch-indices-delete - - user: root - - minute: '*/5' - - hour: '*' - - daymonth: '*' - - month: '*' - - dayweek: '*' -{% endif %} - -{% endif %} {% else %} From 31383bd9d0fff009f7fbdd5d10b8bfeba4f3f7a4 Mon Sep 17 00:00:00 2001 From: Mike Reeves Date: Fri, 17 Apr 2026 15:43:50 -0400 Subject: [PATCH 052/110] Make Telegraf Postgres templates idempotent Use CREATE TABLE IF NOT EXISTS and a WHERE-guarded create_parent() so a Telegraf restart can re-run the templates safely after manual DB surgery. Add an explicit tag_table_create_templates mirroring the plugin default with IF NOT EXISTS for the same reason. --- salt/telegraf/etc/telegraf.conf | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/salt/telegraf/etc/telegraf.conf b/salt/telegraf/etc/telegraf.conf index 6d46095f8..d28dc7f96 100644 --- a/salt/telegraf/etc/telegraf.conf +++ b/salt/telegraf/etc/telegraf.conf @@ -114,10 +114,15 @@ # ALTER it before create_parent(). And create_parent() splits # p_parent_table on '.' to look up raw identifiers, so the literal must # be 'schema.name' (not '"schema"."name"' as .table|quoteLiteral emits). + # IF NOT EXISTS keeps the three templates idempotent so a Telegraf + # restart after any DB-side surgery re-runs them safely. create_templates = [ - '''CREATE TABLE {{ .table }} ({{ .columns }}) PARTITION BY RANGE ("time")''', + '''CREATE TABLE IF NOT EXISTS {{ .table }} ({{ .columns }}) PARTITION BY RANGE ("time")''', '''ALTER TABLE {{ .table }} ALTER COLUMN "time" SET NOT NULL''', - '''SELECT partman.create_parent(p_parent_table := {{ printf "%s.%s" .table.Schema .table.Name | quoteLiteral }}, p_control := 'time', p_type := 'range', p_interval := '1 day', p_premake := 3)''' + '''SELECT partman.create_parent(p_parent_table := {{ printf "%s.%s" .table.Schema .table.Name | quoteLiteral }}, p_control := 'time', p_type := 'range', p_interval := '1 day', p_premake := 3) WHERE NOT EXISTS (SELECT 1 FROM partman.part_config WHERE parent_table = {{ printf "%s.%s" .table.Schema .table.Name | quoteLiteral }})''' + ] + tag_table_create_templates = [ + '''CREATE TABLE IF NOT EXISTS {{ .table }} ({{ .columns }}, PRIMARY KEY (tag_id))''' ] {% endraw %} {%- endif %} From 759880a800ae2c2d4fc25a5a4efa5ad4a6dd4f87 Mon Sep 17 00:00:00 2001 From: Mike Reeves Date: Fri, 17 Apr 2026 16:43:41 -0400 Subject: [PATCH 053/110] Wait for TCP-ready postgres, not the init-phase Unix socket docker-entrypoint.sh runs the init-scripts phase with listen_addresses='' (Unix socket only). The old pg_isready check passed there and then raced the docker_temp_server_stop shutdown before the final postgres started. pg_isready -h 127.0.0.1 only returns success once the real CMD binds TCP, so downstream psql execs never land during the shutdown window. --- salt/postgres/telegraf_users.sls | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/salt/postgres/telegraf_users.sls b/salt/postgres/telegraf_users.sls index 804065bae..4719d363a 100644 --- a/salt/postgres/telegraf_users.sls +++ b/salt/postgres/telegraf_users.sls @@ -11,19 +11,22 @@ {% if TG_OUT in ['POSTGRES', 'BOTH'] %} # docker_container.running returns as soon as the container starts, but on -# first-init docker-entrypoint.sh runs init scripts and then restarts -# postgres, so the next docker exec can hit "the database system is shutting -# down". Wait for pg_isready before any psql work. +# first-init docker-entrypoint.sh starts a temporary postgres with +# `listen_addresses=''` to run /docker-entrypoint-initdb.d scripts, then +# shuts it down before exec'ing the real CMD. A default pg_isready check +# (Unix socket) passes during that ephemeral phase and races the shutdown +# with "the database system is shutting down". Checking TCP readiness on +# 127.0.0.1 only succeeds after the final postgres binds the port. postgres_wait_ready: cmd.run: - name: | for i in $(seq 1 60); do - if docker exec so-postgres pg_isready -U postgres -q 2>/dev/null; then + if docker exec so-postgres pg_isready -h 127.0.0.1 -U postgres -q 2>/dev/null; then exit 0 fi sleep 2 done - echo "so-postgres did not become ready within 120s" >&2 + echo "so-postgres did not accept TCP connections within 120s" >&2 exit 1 - require: - docker_container: so-postgres From dd39db4584b38889c67c1f416097dd2397dcf5de Mon Sep 17 00:00:00 2001 From: Mike Reeves Date: Fri, 17 Apr 2026 18:59:39 -0400 Subject: [PATCH 054/110] Drop so_telegraf_trim cron.absent tombstone feature/postgres never shipped the original cron.present, so this cleanup state is a no-op on every fresh install. The script itself stays on disk for emergency use. --- salt/postgres/enabled.sls | 9 --------- 1 file changed, 9 deletions(-) diff --git a/salt/postgres/enabled.sls b/salt/postgres/enabled.sls index b6a51580f..52b6440e8 100644 --- a/salt/postgres/enabled.sls +++ b/salt/postgres/enabled.sls @@ -80,15 +80,6 @@ delete_so-postgres_so-status.disabled: - name: /opt/so/conf/so-status/so-status.conf - regex: ^so-postgres$ -# Retention is now handled by pg_partman (hourly maintenance via pg_cron -# scheduled from postgres/telegraf_users.sls). The so-telegraf-trim script -# stays on disk for manual/emergency use but is no longer scheduled. -so_telegraf_trim: - cron.absent: - - name: /usr/sbin/so-telegraf-trim >> /opt/so/log/postgres/telegraf-trim.log 2>&1 - - identifier: so_telegraf_trim - - user: root - {% else %} {{sls}}_state_not_allowed: From f3181b204a1c02a42ebc76e74d0db9fb6ed75f8c Mon Sep 17 00:00:00 2001 From: Mike Reeves Date: Fri, 17 Apr 2026 19:06:16 -0400 Subject: [PATCH 055/110] Remove so-telegraf-trim and update retention description pg_partman drops old partitions hourly; row-DELETE retention is obsolete and a confusing emergency fallback on partitioned tables. --- salt/postgres/soc_postgres.yaml | 2 +- salt/postgres/tools/sbin/so-telegraf-trim | 103 ---------------------- 2 files changed, 1 insertion(+), 104 deletions(-) delete mode 100644 salt/postgres/tools/sbin/so-telegraf-trim diff --git a/salt/postgres/soc_postgres.yaml b/salt/postgres/soc_postgres.yaml index 167772e3f..8b4e22921 100644 --- a/salt/postgres/soc_postgres.yaml +++ b/salt/postgres/soc_postgres.yaml @@ -1,7 +1,7 @@ postgres: telegraf: retention_days: - description: Number of days of Telegraf metrics to keep in the so_telegraf database. Older rows are deleted nightly by so-telegraf-trim. + description: Number of days of Telegraf metrics to keep in the so_telegraf database. Older partitions are dropped hourly by pg_partman. forcedType: int advanced: True helpLink: influxdb diff --git a/salt/postgres/tools/sbin/so-telegraf-trim b/salt/postgres/tools/sbin/so-telegraf-trim deleted file mode 100644 index 664469d0c..000000000 --- a/salt/postgres/tools/sbin/so-telegraf-trim +++ /dev/null @@ -1,103 +0,0 @@ -#!/bin/bash - -# Copyright Security Onion Solutions LLC and/or licensed to Security Onion Solutions LLC under one -# or more contributor license agreements. Licensed under the Elastic License 2.0 as shown at -# https://securityonion.net/license; you may not use this file except in compliance with the -# Elastic License 2.0. - -# Deletes Telegraf metric rows older than the configured retention window from -# every minion schema in the so_telegraf database. Intended to run daily from -# cron. Retention comes from pillar (postgres.telegraf.retention_days), -# defaulting to 14 days. An explicit --days argument overrides the pillar. - -. /usr/sbin/so-common - -usage() { - cat </dev/null) -fi -if ! [[ "$DAYS" =~ ^[0-9]+$ ]] || [ "$DAYS" -lt 1 ]; then - DAYS=14 -fi - -log() { - echo "$(date '+%Y-%m-%d %H:%M:%S') so-telegraf-trim: $*" -} - -so_psql() { - docker exec so-postgres psql -U postgres -d so_telegraf -At -F $'\t' "$@" -} - -if ! docker exec so-postgres psql -U postgres -lqt 2>/dev/null | cut -d\| -f1 | grep -qw so_telegraf; then - log "Database so_telegraf not present; nothing to trim." - exit 0 -fi - -log "Trimming rows older than ${DAYS} days (dry_run=${DRY_RUN})." - -TOTAL_DELETED=0 - -# Every metric table in the shared telegraf schema has a 'time' column. -# Tag tables (_tag) don't, so filtering on the column presence is -# enough to scope the trim to metric tables only. -ROWS=$(so_psql -c " - SELECT table_schema || '.' || table_name - FROM information_schema.columns - WHERE column_name = 'time' - AND data_type IN ('timestamp with time zone', 'timestamp without time zone') - AND table_schema = 'telegraf' - ORDER BY 1;") - -if [ -z "$ROWS" ]; then - log "No telegraf metric tables found." - exit 0 -fi - -for qualified in $ROWS; do - if [ "$DRY_RUN" -eq 1 ]; then - count=$(so_psql -c "SELECT count(*) FROM \"${qualified%.*}\".\"${qualified#*.}\" WHERE time < now() - interval '${DAYS} days';") - log "would delete ${count:-0} rows from ${qualified}" - else - # RETURNING count via a CTE so we can log how much was trimmed per table - deleted=$(so_psql -c " - WITH d AS ( - DELETE FROM \"${qualified%.*}\".\"${qualified#*.}\" - WHERE time < now() - interval '${DAYS} days' - RETURNING 1 - ) - SELECT count(*) FROM d;") - deleted=${deleted:-0} - TOTAL_DELETED=$((TOTAL_DELETED + deleted)) - [ "$deleted" -gt 0 ] && log "deleted ${deleted} rows from ${qualified}" - fi -done - -if [ "$DRY_RUN" -eq 0 ]; then - log "Trim complete. Total rows deleted: ${TOTAL_DELETED}." -fi From 3f46caaf0285625f3ddd1f66964cb67da45b412b Mon Sep 17 00:00:00 2001 From: Mike Reeves Date: Fri, 17 Apr 2026 19:10:07 -0400 Subject: [PATCH 056/110] Revoke PUBLIC CONNECT on securityonion database Per-minion telegraf roles inherit CONNECT via PUBLIC by default and could open sessions to the SOC database (though they have no readable grants inside). Close the soft edge by revoking PUBLIC's CONNECT and re-granting it to so_postgres only. --- salt/postgres/files/init-users.sh | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/salt/postgres/files/init-users.sh b/salt/postgres/files/init-users.sh index e1be5df19..79387adaa 100644 --- a/salt/postgres/files/init-users.sh +++ b/salt/postgres/files/init-users.sh @@ -15,6 +15,12 @@ psql -v ON_ERROR_STOP=1 --username "$POSTGRES_USER" --dbname "$POSTGRES_DB" <<-E END \$\$; GRANT ALL PRIVILEGES ON DATABASE "$POSTGRES_DB" TO "$SO_POSTGRES_USER"; + -- Lock the SOC database down at the connect layer; PUBLIC gets CONNECT + -- by default, which would let per-minion telegraf roles open sessions + -- here. They have no schema/table grants inside so reads fail, but + -- revoking CONNECT closes the soft edge entirely. + REVOKE CONNECT ON DATABASE "$POSTGRES_DB" FROM PUBLIC; + GRANT CONNECT ON DATABASE "$POSTGRES_DB" TO "$SO_POSTGRES_USER"; EOSQL # Bootstrap the Telegraf metrics database. Per-minion roles + schemas are From ee437265fc62c9d66acc5d1ad8efc3cfcb6e558d Mon Sep 17 00:00:00 2001 From: Josh Patterson Date: Mon, 20 Apr 2026 12:00:02 -0400 Subject: [PATCH 057/110] monitor raid for vms --- salt/common/tools/sbin_jinja/so-raid-status | 11 +++++++++-- salt/salt/cloud/cloud.profiles.d/socloud.conf.jinja | 1 + 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/salt/common/tools/sbin_jinja/so-raid-status b/salt/common/tools/sbin_jinja/so-raid-status index 3fe238c23..ca3c34608 100755 --- a/salt/common/tools/sbin_jinja/so-raid-status +++ b/salt/common/tools/sbin_jinja/so-raid-status @@ -9,7 +9,7 @@ . /usr/sbin/so-common -software_raid=("SOSMN" "SOSMN-DE02" "SOSSNNV" "SOSSNNV-DE02" "SOS10k-DE02" "SOS10KNV" "SOS10KNV-DE02" "SOS10KNV-DE02" "SOS2000-DE02" "SOS-GOFAST-LT-DE02" "SOS-GOFAST-MD-DE02" "SOS-GOFAST-HV-DE02") +software_raid=("SOSMN" "SOSMN-DE02" "SOSSNNV" "SOSSNNV-DE02" "SOS10k-DE02" "SOS10KNV" "SOS10KNV-DE02" "SOS10KNV-DE02" "SOS2000-DE02" "SOS-GOFAST-LT-DE02" "SOS-GOFAST-MD-DE02" "SOS-GOFAST-HV-DE02" "HVGUEST") hardware_raid=("SOS1000" "SOS1000F" "SOSSN7200" "SOS5000" "SOS4000") {%- if salt['grains.get']('sosmodel', '') %} @@ -87,6 +87,11 @@ check_boss_raid() { } check_software_raid() { + if [[ ! -f /proc/mdstat ]]; then + SWRAID=0 + return + fi + SWRC=$(grep "_" /proc/mdstat) if [[ -n $SWRC ]]; then # RAID is failed in some way @@ -107,7 +112,9 @@ if [[ "$is_hwraid" == "true" ]]; then fi if [[ "$is_softwareraid" == "true" ]]; then check_software_raid - check_boss_raid + if [ "$model" != "HVGUEST" ]; then + check_boss_raid + fi fi sum=$(($SWRAID + $BOSSRAID + $HWRAID)) diff --git a/salt/salt/cloud/cloud.profiles.d/socloud.conf.jinja b/salt/salt/cloud/cloud.profiles.d/socloud.conf.jinja index f8e5d5555..44bef4108 100644 --- a/salt/salt/cloud/cloud.profiles.d/socloud.conf.jinja +++ b/salt/salt/cloud/cloud.profiles.d/socloud.conf.jinja @@ -27,6 +27,7 @@ sool9_{{host}}: log_file: /opt/so/log/salt/minion grains: hypervisor_host: {{host ~ "_" ~ role}} + sosmodel: HVGUEST preflight_cmds: - | {%- set hostnames = [MANAGERHOSTNAME] %} From 8225d41661b00f878b5a77c73f34b5a921879214 Mon Sep 17 00:00:00 2001 From: Mike Reeves Date: Mon, 20 Apr 2026 12:10:05 -0400 Subject: [PATCH 058/110] Harden postgres secrets, TLS enforcement, and admin tooling - Deliver postgres super and app passwords via mounted 0600 secret files (POSTGRES_PASSWORD_FILE, SO_POSTGRES_PASS_FILE) instead of plaintext env vars visible in docker inspect output - Mount a managed pg_hba.conf that only allows local trust and hostssl scram-sha-256 so TCP clients cannot negotiate cleartext sessions - Restrict postgres.key to 0400 and ensure owner/group 939 - Set umask 0077 on so-postgres-backup output - Validate host values in so-stats-show against [A-Za-z0-9._-] before SQL interpolation so a compromised minion cannot inject SQL via a tag value - Coerce postgres:telegraf:retention_days to int before rendering into SQL - Escape single quotes when rendering pillar values into postgresql.conf - Own postgres tooling in /usr/sbin as root:root so a container escape cannot rewrite admin scripts - Gate ES migration TLS verification on esVerifyCert (default false, matching the elastic module's existing pattern) --- salt/backup/tools/sbin/so-postgres-backup | 3 ++ salt/postgres/config.sls | 43 +++++++++++++++++++++-- salt/postgres/defaults.yaml | 1 + salt/postgres/enabled.sls | 17 ++++++--- salt/postgres/files/init-users.sh | 3 ++ salt/postgres/files/pg_hba.conf.jinja | 15 ++++++++ salt/postgres/files/postgresql.conf.jinja | 2 +- salt/postgres/ssl.sls | 3 +- salt/postgres/telegraf_users.sls | 2 +- salt/postgres/tools/sbin/so-stats-show | 13 +++++++ salt/soc/defaults.map.jinja | 2 +- 11 files changed, 94 insertions(+), 10 deletions(-) create mode 100644 salt/postgres/files/pg_hba.conf.jinja diff --git a/salt/backup/tools/sbin/so-postgres-backup b/salt/backup/tools/sbin/so-postgres-backup index c577f7b59..9db522336 100644 --- a/salt/backup/tools/sbin/so-postgres-backup +++ b/salt/backup/tools/sbin/so-postgres-backup @@ -7,6 +7,9 @@ . /usr/sbin/so-common +# Backups contain role password hashes and full chat data; keep them 0600. +umask 0077 + TODAY=$(date '+%Y_%m_%d') BACKUPDIR=/nsm/backup BACKUPFILE="$BACKUPDIR/so-postgres-backup-$TODAY.sql.gz" diff --git a/salt/postgres/config.sls b/salt/postgres/config.sls index 25bcf6ad3..76a926d59 100644 --- a/salt/postgres/config.sls +++ b/salt/postgres/config.sls @@ -15,6 +15,14 @@ postgresconfdir: - group: 939 - makedirs: True +postgressecretsdir: + file.directory: + - name: /opt/so/conf/postgres/secrets + - user: 939 + - group: 939 + - mode: 700 + - makedirs: True + postgresdatadir: file.directory: - name: /nsm/postgres @@ -54,12 +62,43 @@ postgresconf: - defaults: PGMERGED: {{ PGMERGED }} +postgreshba: + file.managed: + - name: /opt/so/conf/postgres/pg_hba.conf + - source: salt://postgres/files/pg_hba.conf.jinja + - user: 939 + - group: 939 + - mode: 640 + - template: jinja + +postgres_super_secret: + file.managed: + - name: /opt/so/conf/postgres/secrets/postgres_password + - user: 939 + - group: 939 + - mode: 600 + - contents_pillar: 'secrets:postgres_pass' + - show_changes: False + - require: + - file: postgressecretsdir + +postgres_app_secret: + file.managed: + - name: /opt/so/conf/postgres/secrets/so_postgres_pass + - user: 939 + - group: 939 + - mode: 600 + - contents_pillar: 'postgres:auth:users:so_postgres_user:pass' + - show_changes: False + - require: + - file: postgressecretsdir + postgres_sbin: file.recurse: - name: /usr/sbin - source: salt://postgres/tools/sbin - - user: 939 - - group: 939 + - user: root + - group: root - file_mode: 755 {% else %} diff --git a/salt/postgres/defaults.yaml b/salt/postgres/defaults.yaml index 30523cda9..7ad82f453 100644 --- a/salt/postgres/defaults.yaml +++ b/salt/postgres/defaults.yaml @@ -11,6 +11,7 @@ postgres: ssl_cert_file: '/conf/postgres.crt' ssl_key_file: '/conf/postgres.key' ssl_ca_file: '/conf/ca.crt' + hba_file: '/conf/pg_hba.conf' log_destination: 'stderr' logging_collector: 'off' log_min_messages: 'warning' diff --git a/salt/postgres/enabled.sls b/salt/postgres/enabled.sls index 52b6440e8..4c5838466 100644 --- a/salt/postgres/enabled.sls +++ b/salt/postgres/enabled.sls @@ -7,9 +7,7 @@ {% if sls.split('.')[0] in allowed_states %} {% from 'vars/globals.map.jinja' import GLOBALS %} {% from 'docker/docker.map.jinja' import DOCKERMERGED %} -{% set PASSWORD = salt['pillar.get']('secrets:postgres_pass') %} {% set SO_POSTGRES_USER = salt['pillar.get']('postgres:auth:users:so_postgres_user:user', 'so_postgres') %} -{% set SO_POSTGRES_PASS = salt['pillar.get']('postgres:auth:users:so_postgres_user:pass', '') %} include: - postgres.auth @@ -31,9 +29,12 @@ so-postgres: {% endfor %} - environment: - POSTGRES_DB=securityonion - - POSTGRES_PASSWORD={{ PASSWORD }} + # Passwords are delivered via mounted 0600 secret files, not plaintext env vars. + # The upstream postgres image resolves POSTGRES_PASSWORD_FILE; entrypoint.sh and + # init-users.sh resolve SO_POSTGRES_PASS_FILE the same way. + - POSTGRES_PASSWORD_FILE=/run/secrets/postgres_password - SO_POSTGRES_USER={{ SO_POSTGRES_USER }} - - SO_POSTGRES_PASS={{ SO_POSTGRES_PASS }} + - SO_POSTGRES_PASS_FILE=/run/secrets/so_postgres_pass {% if DOCKERMERGED.containers['so-postgres'].extra_env %} {% for XTRAENV in DOCKERMERGED.containers['so-postgres'].extra_env %} - {{ XTRAENV }} @@ -43,6 +44,8 @@ so-postgres: - /opt/so/log/postgres/:/log:rw - /nsm/postgres:/var/lib/postgresql/data:rw - /opt/so/conf/postgres/postgresql.conf:/conf/postgresql.conf:ro + - /opt/so/conf/postgres/pg_hba.conf:/conf/pg_hba.conf:ro + - /opt/so/conf/postgres/secrets:/run/secrets:ro - /opt/so/conf/postgres/init/init-users.sh:/docker-entrypoint-initdb.d/init-users.sh:ro - /etc/pki/postgres.crt:/conf/postgres.crt:ro - /etc/pki/postgres.key:/conf/postgres.key:ro @@ -66,12 +69,18 @@ so-postgres: {% endif %} - watch: - file: postgresconf + - file: postgreshba - file: postgresinitusers + - file: postgres_super_secret + - file: postgres_app_secret - x509: postgres_crt - x509: postgres_key - require: - file: postgresconf + - file: postgreshba - file: postgresinitusers + - file: postgres_super_secret + - file: postgres_app_secret - x509: postgres_crt - x509: postgres_key diff --git a/salt/postgres/files/init-users.sh b/salt/postgres/files/init-users.sh index 79387adaa..e28b11f0f 100644 --- a/salt/postgres/files/init-users.sh +++ b/salt/postgres/files/init-users.sh @@ -4,6 +4,9 @@ set -e # Create or update application user for SOC platform access # This script runs on first database initialization via docker-entrypoint-initdb.d # The password is properly escaped to handle special characters +if [ -z "${SO_POSTGRES_PASS:-}" ] && [ -n "${SO_POSTGRES_PASS_FILE:-}" ] && [ -r "$SO_POSTGRES_PASS_FILE" ]; then + SO_POSTGRES_PASS="$(< "$SO_POSTGRES_PASS_FILE")" +fi psql -v ON_ERROR_STOP=1 --username "$POSTGRES_USER" --dbname "$POSTGRES_DB" <<-EOSQL DO \$\$ BEGIN diff --git a/salt/postgres/files/pg_hba.conf.jinja b/salt/postgres/files/pg_hba.conf.jinja new file mode 100644 index 000000000..1d6a22a04 --- /dev/null +++ b/salt/postgres/files/pg_hba.conf.jinja @@ -0,0 +1,15 @@ +{# Copyright Security Onion Solutions LLC and/or licensed to Security Onion Solutions LLC under one + or more contributor license agreements. Licensed under the Elastic License 2.0 as shown at + https://securityonion.net/license; you may not use this file except in compliance with the + Elastic License 2.0. #} +# Managed by Salt — do not edit by hand. +# Client authentication config: only local (Unix socket) connections and TLS-wrapped TCP +# connections are accepted. Plain-text `host ...` lines are intentionally omitted so a +# misconfigured client with sslmode=disable cannot negotiate a cleartext session. + +# Local connections (Unix socket, container-internal) use peer/trust. +local all all trust + +# TCP connections MUST use TLS (hostssl) and authenticate with SCRAM. +hostssl all all 0.0.0.0/0 scram-sha-256 +hostssl all all ::/0 scram-sha-256 diff --git a/salt/postgres/files/postgresql.conf.jinja b/salt/postgres/files/postgresql.conf.jinja index 6833b3dbc..2ddc52a51 100644 --- a/salt/postgres/files/postgresql.conf.jinja +++ b/salt/postgres/files/postgresql.conf.jinja @@ -4,5 +4,5 @@ Elastic License 2.0. #} {% for key, value in PGMERGED.config.items() %} -{{ key }} = '{{ value }}' +{{ key }} = '{{ value | string | replace("'", "''") }}' {% endfor %} diff --git a/salt/postgres/ssl.sls b/salt/postgres/ssl.sls index ebd3ccbc9..4223ead34 100644 --- a/salt/postgres/ssl.sls +++ b/salt/postgres/ssl.sls @@ -42,7 +42,8 @@ postgresKeyperms: file.managed: - replace: False - name: /etc/pki/postgres.key - - mode: 640 + - mode: 400 + - user: 939 - group: 939 {% else %} diff --git a/salt/postgres/telegraf_users.sls b/salt/postgres/telegraf_users.sls index 4719d363a..cab65d8a8 100644 --- a/salt/postgres/telegraf_users.sls +++ b/salt/postgres/telegraf_users.sls @@ -119,7 +119,7 @@ postgres_telegraf_role_{{ u }}: # Reconcile partman retention from pillar. Runs after role/schema setup so # any partitioned parents Telegraf has already created get their retention # refreshed whenever postgres.telegraf.retention_days changes. -{% set retention = salt['pillar.get']('postgres:telegraf:retention_days', 14) %} +{% set retention = salt['pillar.get']('postgres:telegraf:retention_days', 14) | int %} postgres_telegraf_retention_reconcile: cmd.run: - name: | diff --git a/salt/postgres/tools/sbin/so-stats-show b/salt/postgres/tools/sbin/so-stats-show index bfc81887a..102b51ccd 100644 --- a/salt/postgres/tools/sbin/so-stats-show +++ b/salt/postgres/tools/sbin/so-stats-show @@ -42,6 +42,15 @@ esac FILTER_HOST="${1:-}" SCHEMA="telegraf" +# Host values are interpolated into SQL below. Hostnames are [A-Za-z0-9._-]; +# any other character in a tag value or CLI arg is rejected to prevent a +# stored-tag (or CLI) → SQL injection via a compromised Telegraf writer. +HOST_RE='^[A-Za-z0-9._-]+$' +if [ -n "$FILTER_HOST" ] && ! [[ "$FILTER_HOST" =~ $HOST_RE ]]; then + echo "Invalid host filter: $FILTER_HOST" >&2 + exit 1 +fi + so_psql() { docker exec so-postgres psql -U postgres -d so_telegraf -At -F $'\t' "$@" } @@ -78,6 +87,10 @@ print_metric() { } for host in $HOSTS; do + if ! [[ "$host" =~ $HOST_RE ]]; then + echo "Skipping host with invalid characters in tag value: $host" >&2 + continue + fi if [ -n "$FILTER_HOST" ] && [ "$host" != "$FILTER_HOST" ]; then continue fi diff --git a/salt/soc/defaults.map.jinja b/salt/soc/defaults.map.jinja index 46ae7e8fd..00a9604f6 100644 --- a/salt/soc/defaults.map.jinja +++ b/salt/soc/defaults.map.jinja @@ -26,7 +26,7 @@ {% if GLOBALS.postgres is defined and GLOBALS.postgres.auth is defined %} {% set PG_ADMIN_PASS = salt['pillar.get']('secrets:postgres_pass', '') %} -{% do SOCDEFAULTS.soc.config.server.modules.update({'postgres': {'hostUrl': GLOBALS.manager_ip, 'port': 5432, 'username': GLOBALS.postgres.auth.users.so_postgres_user.user, 'password': GLOBALS.postgres.auth.users.so_postgres_user.pass, 'adminUser': 'postgres', 'adminPassword': PG_ADMIN_PASS, 'dbname': 'securityonion', 'sslMode': 'require', 'assistantEnabled': true, 'esHostUrl': 'https://' ~ GLOBALS.manager_ip ~ ':9200', 'esUsername': GLOBALS.elasticsearch.auth.users.so_elastic_user.user, 'esPassword': GLOBALS.elasticsearch.auth.users.so_elastic_user.pass}}) %} +{% do SOCDEFAULTS.soc.config.server.modules.update({'postgres': {'hostUrl': GLOBALS.manager_ip, 'port': 5432, 'username': GLOBALS.postgres.auth.users.so_postgres_user.user, 'password': GLOBALS.postgres.auth.users.so_postgres_user.pass, 'adminUser': 'postgres', 'adminPassword': PG_ADMIN_PASS, 'dbname': 'securityonion', 'sslMode': 'require', 'assistantEnabled': true, 'esHostUrl': 'https://' ~ GLOBALS.manager_ip ~ ':9200', 'esUsername': GLOBALS.elasticsearch.auth.users.so_elastic_user.user, 'esPassword': GLOBALS.elasticsearch.auth.users.so_elastic_user.pass, 'esVerifyCert': false}}) %} {% endif %} {% do SOCDEFAULTS.soc.config.server.modules.influxdb.update({'hostUrl': 'https://' ~ GLOBALS.influxdb_host ~ ':8086'}) %} From 1cb34b089cc6e678504e514a161c128d22dfbce3 Mon Sep 17 00:00:00 2001 From: Mike Reeves Date: Mon, 20 Apr 2026 14:38:55 -0400 Subject: [PATCH 059/110] Restore 3/dev soup and add postgres users to post_to_3.1.0 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit feature/postgres had rewritten the 3.1.0 upgrade block, dropping the elastic upgrade work 3/dev landed for 9.0.8→9.3.3: elasticsearch_backup_index_templates, the component template state cleanup, and the /usr/sbin/so-kibana-space-defaults post-upgrade call. It also carried an older ES upgrade mapping (8.18.8→9.0.8) that was superseded on 3/dev (9.0.8→9.3.3 for 3.0.0-20260331), and a handful of latent shell-quoting regressions in verify_es_version_compatibility and the intermediate-upgrade helpers. Adopt the 3/dev soup verbatim and only add the new Telegraf Postgres provisioning to post_to_3.1.0 on top of so-kibana-space-defaults. --- salt/manager/tools/sbin/soup | 94 +++++++++++++++++++----------------- 1 file changed, 49 insertions(+), 45 deletions(-) diff --git a/salt/manager/tools/sbin/soup b/salt/manager/tools/sbin/soup index 5ed66134f..c25358418 100755 --- a/salt/manager/tools/sbin/soup +++ b/salt/manager/tools/sbin/soup @@ -362,9 +362,8 @@ preupgrade_changes() { # This function is to add any new pillar items if needed. echo "Checking to see if changes are needed." - [[ "$INSTALLEDVERSION" =~ ^2\.4\.21[0-9]+$ ]] && up_to_3.0.0 - [[ "$INSTALLEDVERSION" == 3.0.0 ]] && up_to_3.1.0 - + [[ "$INSTALLEDVERSION" =~ ^2\.4\.21[0-9]+$ ]] && up_to_3.0.0 + [[ "$INSTALLEDVERSION" == "3.0.0" ]] && up_to_3.1.0 true } @@ -373,8 +372,7 @@ postupgrade_changes() { echo "Running post upgrade processes." [[ "$POSTVERSION" =~ ^2\.4\.21[0-9]+$ ]] && post_to_3.0.0 - [[ "$POSTVERSION" =~ 3.0.0 ]] && post_to_3.1.0 - + [[ "$POSTVERSION" == "3.0.0" ]] && post_to_3.1.0 true } @@ -385,7 +383,7 @@ check_minimum_version() { fi } -### 3.0.0 Start ### +### 3.0.0 Scripts ### convert_suricata_yes_no() { echo "Starting suricata yes/no values to true/false conversion." @@ -449,7 +447,6 @@ migrate_pcap_to_suricata() { } up_to_3.0.0() { - determine_elastic_agent_upgrade migrate_pcap_to_suricata INSTALLEDVERSION=3.0.0 @@ -473,13 +470,26 @@ post_to_3.0.0() { ### 3.0.0 End ### -### 3.1.0 Start ### +### 3.1.0 Scripts ### + +elasticsearch_backup_index_templates() { + echo "Backing up current elasticsearch index templates in /opt/so/conf/elasticsearch/templates/index/ to /nsm/backup/3.0.0_elasticsearch_index_templates.tar.gz" + tar -czf /nsm/backup/3.0.0_elasticsearch_index_templates.tar.gz -C /opt/so/conf/elasticsearch/templates/index/ . +} + up_to_3.1.0() { - echo "Nothing to do" + determine_elastic_agent_upgrade + elasticsearch_backup_index_templates + # Clear existing component template state file. + rm -f /opt/so/state/esfleet_component_templates.json + + INSTALLEDVERSION=3.1.0 } post_to_3.1.0() { + /usr/sbin/so-kibana-space-defaults + # Provision per-minion Telegraf Postgres users for every minion known to the # manager. postgres.auth iterates manage.up to generate any missing passwords; # postgres.telegraf_users reconciles the roles and schemas inside the so-postgres @@ -493,6 +503,7 @@ post_to_3.1.0() { ### 3.1.0 End ### + repo_sync() { echo "Sync the local repo." su socore -c '/usr/sbin/so-repo-sync' || fail "Unable to complete so-repo-sync." @@ -752,12 +763,12 @@ verify_es_version_compatibility() { local is_active_intermediate_upgrade=1 # supported upgrade paths for SO-ES versions declare -A es_upgrade_map=( - ["8.18.8"]="9.0.8" + ["9.0.8"]="9.3.3" ) # Elasticsearch MUST upgrade through these versions declare -A es_to_so_version=( - ["8.18.8"]="2.4.190-20251024" + ["9.0.8"]="3.0.0-20260331" ) # Get current Elasticsearch version @@ -769,26 +780,17 @@ verify_es_version_compatibility() { exit 160 fi - if ! target_es_version_raw=$(so-yaml.py get $UPDATE_DIR/salt/elasticsearch/defaults.yaml elasticsearch.version); then - # so-yaml.py failed to get the ES version from upgrade versions elasticsearch/defaults.yaml file. Likely they are upgrading to an SO version older than 2.4.110 prior to the ES version pinning and should be OKAY to continue with the upgrade. + if ! target_es_version=$(so-yaml.py get -r $UPDATE_DIR/salt/elasticsearch/defaults.yaml elasticsearch.version); then + echo "Couldn't determine the target Elasticsearch version (post soup version) to ensure compatibility with current Elasticsearch version. Exiting" - # if so-yaml.py failed to get the ES version AND the version we are upgrading to is newer than 2.4.110 then we should bail - if [[ $(cat $UPDATE_DIR/VERSION | cut -d'.' -f3) > 110 ]]; then - echo "Couldn't determine the target Elasticsearch version (post soup version) to ensure compatibility with current Elasticsearch version. Exiting" - - exit 160 - fi - - # allow upgrade to version < 2.4.110 without checking ES version compatibility - return 0 - else - target_es_version=$(sed -n '1p' <<< "$target_es_version_raw") + exit 160 fi for statefile in "${es_required_version_statefile_base}"-*; do [[ -f $statefile ]] || continue - local es_required_version_statefile_value=$(cat "$statefile") + local es_required_version_statefile_value + es_required_version_statefile_value=$(cat "$statefile") if [[ "$es_required_version_statefile_value" == "$target_es_version" ]]; then echo "Intermediate upgrade to ES $target_es_version is in progress. Skipping Elasticsearch version compatibility check." @@ -797,7 +799,7 @@ verify_es_version_compatibility() { fi # use sort to check if es_required_statefile_value is < the current es_version. - if [[ "$(printf '%s\n' $es_required_version_statefile_value $es_version | sort -V | head -n1)" == "$es_required_version_statefile_value" ]]; then + if [[ "$(printf '%s\n' "$es_required_version_statefile_value" "$es_version" | sort -V | head -n1)" == "$es_required_version_statefile_value" ]]; then rm -f "$statefile" continue fi @@ -808,8 +810,7 @@ verify_es_version_compatibility() { echo -e "\n##############################################################################################################################\n" echo "A previously required intermediate Elasticsearch upgrade was detected. Verifying that all Searchnodes/Heavynodes have successfully upgraded Elasticsearch to $es_required_version_statefile_value before proceeding with soup to avoid potential data loss! This command can take up to an hour to complete." - timeout --foreground 4000 bash "$es_verification_script" "$es_required_version_statefile_value" "$statefile" - if [[ $? -ne 0 ]]; then + if ! timeout --foreground 4000 bash "$es_verification_script" "$es_required_version_statefile_value" "$statefile"; then echo -e "\n!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!\n" echo "A previous required intermediate Elasticsearch upgrade to $es_required_version_statefile_value has yet to successfully complete across the grid. Please allow time for all Searchnodes/Heavynodes to have upgraded Elasticsearch to $es_required_version_statefile_value before running soup again to avoid potential data loss!" @@ -826,6 +827,7 @@ verify_es_version_compatibility() { return 0 fi + # shellcheck disable=SC2076 # Do not want a regex here eg usage " 8.18.8 9.0.8 " =~ " 9.0.8 " if [[ " ${es_upgrade_map[$es_version]} " =~ " $target_es_version " || "$es_version" == "$target_es_version" ]]; then # supported upgrade return 0 @@ -834,7 +836,7 @@ verify_es_version_compatibility() { if [[ -z "$compatible_versions" ]]; then # If current ES version is not explicitly defined in the upgrade map, we know they have an intermediate upgrade to do. # We default to the lowest ES version defined in es_to_so_version as $first_es_required_version - local first_es_required_version=$(printf '%s\n' "${!es_to_so_version[@]}" | sort -V | head -n1) + first_es_required_version=$(printf '%s\n' "${!es_to_so_version[@]}" | sort -V | head -n1) next_step_so_version=${es_to_so_version[$first_es_required_version]} required_es_upgrade_version="$first_es_required_version" else @@ -853,7 +855,7 @@ verify_es_version_compatibility() { if [[ $is_airgap -eq 0 ]]; then run_airgap_intermediate_upgrade else - if [[ ! -z $ISOLOC ]]; then + if [[ -n $ISOLOC ]]; then originally_requested_iso_location="$ISOLOC" fi # Make sure ISOLOC is not set. Network installs that used soup -f would have ISOLOC set. @@ -885,7 +887,8 @@ wait_for_salt_minion_with_restart() { } run_airgap_intermediate_upgrade() { - local originally_requested_so_version=$(cat $UPDATE_DIR/VERSION) + local originally_requested_so_version + originally_requested_so_version=$(cat "$UPDATE_DIR/VERSION") # preserve ISOLOC value, so we can try to use it post intermediate upgrade local originally_requested_iso_location="$ISOLOC" @@ -897,7 +900,8 @@ run_airgap_intermediate_upgrade() { while [[ -z "$next_iso_location" ]] || [[ ! -f "$next_iso_location" && ! -b "$next_iso_location" ]]; do # List removable devices if any are present - local removable_devices=$(lsblk -no PATH,SIZE,TYPE,MOUNTPOINTS,RM | awk '$NF==1') + local removable_devices + removable_devices=$(lsblk -no PATH,SIZE,TYPE,MOUNTPOINTS,RM | awk '$NF==1') if [[ -n "$removable_devices" ]]; then echo "PATH SIZE TYPE MOUNTPOINTS RM" echo "$removable_devices" @@ -918,21 +922,21 @@ run_airgap_intermediate_upgrade() { echo "Using $next_iso_location for required intermediary upgrade." exec bash < Date: Mon, 20 Apr 2026 14:40:32 -0400 Subject: [PATCH 060/110] Fix soup state.apply args for postgres provisioning state.apply takes a single mods argument; space-separated names are not a list, so `state.apply postgres.auth postgres.telegraf_users` was only applying postgres.auth and silently dropping the telegraf_users state. Use comma-separated mods and add queue=True to match the rest of soup. --- salt/manager/tools/sbin/soup | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/salt/manager/tools/sbin/soup b/salt/manager/tools/sbin/soup index c25358418..0adffef86 100755 --- a/salt/manager/tools/sbin/soup +++ b/salt/manager/tools/sbin/soup @@ -496,7 +496,7 @@ post_to_3.1.0() { # container. Then push a telegraf state to every minion so their telegraf.conf # picks up the new credentials on the first apply after soup. echo "Provisioning Telegraf Postgres users for existing minions." - salt-call --local state.apply postgres.auth postgres.telegraf_users || true + salt-call --local state.apply postgres.auth,postgres.telegraf_users queue=True || true POSTVERSION=3.1.0 } From 3ecd19d085fb56f97b56cc7602e36ed56d49f2e4 Mon Sep 17 00:00:00 2001 From: Mike Reeves Date: Mon, 20 Apr 2026 16:03:02 -0400 Subject: [PATCH 061/110] Move telegraf_output from global pillar to telegraf pillar MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The Telegraf backend selector lived at global.telegraf_output but it is a Telegraf-scoped setting, not a cross-cutting grid global. Move both the value and the UI annotation under the telegraf pillar so it shows up alongside the other Telegraf tuning knobs in the Configuration UI. - salt/telegraf/defaults.yaml: add telegraf.output: BOTH - salt/telegraf/soc_telegraf.yaml: add telegraf.output annotation - salt/global/defaults.yaml: remove global.telegraf_output - salt/global/soc_global.yaml: remove global.telegraf_output annotation - salt/vars/globals.map.jinja: drop telegraf_output from GLOBALS - salt/firewall/map.jinja: read via pillar.get('telegraf:output') - salt/postgres/telegraf_users.sls: read via pillar.get('telegraf:output') - salt/telegraf/etc/telegraf.conf: read via TELEGRAFMERGED.output - salt/postgres/tools/sbin/so-stats-show: update user-facing docs No behavioral change — default stays BOTH. --- salt/firewall/map.jinja | 2 +- salt/global/defaults.yaml | 3 +-- salt/global/soc_global.yaml | 8 -------- salt/postgres/telegraf_users.sls | 2 +- salt/postgres/tools/sbin/so-stats-show | 4 ++-- salt/telegraf/defaults.yaml | 1 + salt/telegraf/etc/telegraf.conf | 2 +- salt/telegraf/soc_telegraf.yaml | 9 +++++++++ salt/vars/globals.map.jinja | 1 - 9 files changed, 16 insertions(+), 16 deletions(-) diff --git a/salt/firewall/map.jinja b/salt/firewall/map.jinja index 2821f62b4..b0c96de72 100644 --- a/salt/firewall/map.jinja +++ b/salt/firewall/map.jinja @@ -56,7 +56,7 @@ {% endif %} {# Open Postgres (5432) to minion hostgroups when Telegraf is configured to write to Postgres #} -{% set TG_OUT = (GLOBALS.telegraf_output | default('INFLUXDB')) | upper %} +{% set TG_OUT = salt['pillar.get']('telegraf:output', 'BOTH') | upper %} {% if TG_OUT in ['POSTGRES', 'BOTH'] %} {% if role.startswith('manager') or role == 'standalone' or role == 'eval' %} {% for r in ['sensor', 'searchnode', 'heavynode', 'receiver', 'fleet', 'idh', 'desktop', 'import'] %} diff --git a/salt/global/defaults.yaml b/salt/global/defaults.yaml index d041306a7..92b9c1c1a 100644 --- a/salt/global/defaults.yaml +++ b/salt/global/defaults.yaml @@ -1,4 +1,3 @@ global: pcapengine: SURICATA - pipeline: REDIS - telegraf_output: BOTH \ No newline at end of file + pipeline: REDIS \ No newline at end of file diff --git a/salt/global/soc_global.yaml b/salt/global/soc_global.yaml index 61646168f..31d9f8d3b 100644 --- a/salt/global/soc_global.yaml +++ b/salt/global/soc_global.yaml @@ -59,13 +59,5 @@ global: description: Allows use of Endgame with Security Onion. This feature requires a license from Endgame. global: True advanced: True - telegraf_output: - description: Selects the backend(s) Telegraf writes metrics to. INFLUXDB keeps the current behavior; POSTGRES writes to the grid's Postgres instance; BOTH dual-writes for migration validation. - options: - - INFLUXDB - - POSTGRES - - BOTH - global: True - advanced: True helpLink: influxdb diff --git a/salt/postgres/telegraf_users.sls b/salt/postgres/telegraf_users.sls index cab65d8a8..6bcf0900c 100644 --- a/salt/postgres/telegraf_users.sls +++ b/salt/postgres/telegraf_users.sls @@ -7,7 +7,7 @@ {% if sls.split('.')[0] in allowed_states %} {% from 'vars/globals.map.jinja' import GLOBALS %} -{% set TG_OUT = (GLOBALS.telegraf_output | default('INFLUXDB')) | upper %} +{% set TG_OUT = salt['pillar.get']('telegraf:output', 'BOTH') | upper %} {% if TG_OUT in ['POSTGRES', 'BOTH'] %} # docker_container.running returns as soon as the container starts, but on diff --git a/salt/postgres/tools/sbin/so-stats-show b/salt/postgres/tools/sbin/so-stats-show index 102b51ccd..3cf7a05d8 100644 --- a/salt/postgres/tools/sbin/so-stats-show +++ b/salt/postgres/tools/sbin/so-stats-show @@ -24,7 +24,7 @@ Shows the most recent CPU, memory, disk, and load metrics for each host from the so_telegraf Postgres database. Without an argument, reports on every host that has data. With a host, limits output to that one. -Requires: sudo, so-postgres running, global.telegraf_output set to +Requires: sudo, so-postgres running, telegraf.output set to POSTGRES or BOTH. EOF exit 1 @@ -56,7 +56,7 @@ so_psql() { } if ! docker exec so-postgres psql -U postgres -lqt 2>/dev/null | cut -d\| -f1 | grep -qw so_telegraf; then - echo "Database so_telegraf not found. Is global.telegraf_output set to POSTGRES or BOTH?" + echo "Database so_telegraf not found. Is telegraf.output set to POSTGRES or BOTH?" exit 2 fi diff --git a/salt/telegraf/defaults.yaml b/salt/telegraf/defaults.yaml index ef6c2bc77..ead122b0a 100644 --- a/salt/telegraf/defaults.yaml +++ b/salt/telegraf/defaults.yaml @@ -1,5 +1,6 @@ telegraf: enabled: False + output: BOTH config: interval: '30s' metric_batch_size: 1000 diff --git a/salt/telegraf/etc/telegraf.conf b/salt/telegraf/etc/telegraf.conf index d28dc7f96..ee13e33d0 100644 --- a/salt/telegraf/etc/telegraf.conf +++ b/salt/telegraf/etc/telegraf.conf @@ -8,7 +8,7 @@ {%- set ZEEK_ENABLED = salt['pillar.get']('zeek:enabled', True) %} {%- set MDENGINE = GLOBALS.md_engine %} {%- set LOGSTASH_ENABLED = LOGSTASH_MERGED.enabled %} -{%- set TG_OUT = GLOBALS.telegraf_output | upper %} +{%- set TG_OUT = TELEGRAFMERGED.output | upper %} {%- set PG_HOST = GLOBALS.manager_ip %} {%- set PG_SAFE = GLOBALS.minion_id | replace('.','_') | replace('-','_') | lower %} {%- set PG_USER = 'so_telegraf_' ~ PG_SAFE %} diff --git a/salt/telegraf/soc_telegraf.yaml b/salt/telegraf/soc_telegraf.yaml index 40ae7fed8..4b9a2e3d1 100644 --- a/salt/telegraf/soc_telegraf.yaml +++ b/salt/telegraf/soc_telegraf.yaml @@ -4,6 +4,15 @@ telegraf: forcedType: bool advanced: True helpLink: influxdb + output: + description: Selects the backend(s) Telegraf writes metrics to. INFLUXDB keeps the current behavior; POSTGRES writes to the grid's Postgres instance; BOTH dual-writes for migration validation. + options: + - INFLUXDB + - POSTGRES + - BOTH + global: True + advanced: True + helpLink: influxdb config: interval: description: Data collection interval. diff --git a/salt/vars/globals.map.jinja b/salt/vars/globals.map.jinja index 787691b13..385db02ae 100644 --- a/salt/vars/globals.map.jinja +++ b/salt/vars/globals.map.jinja @@ -24,7 +24,6 @@ 'md_engine': INIT.PILLAR.global.mdengine, 'pcap_engine': GLOBALMERGED.pcapengine, 'pipeline': GLOBALMERGED.pipeline, - 'telegraf_output': GLOBALMERGED.telegraf_output, 'so_version': INIT.PILLAR.global.soversion, 'so_docker_gateway': DOCKERMERGED.gateway, 'so_docker_range': DOCKERMERGED.range, From b69e50542acd23e28d856136b5b0d632e718d863 Mon Sep 17 00:00:00 2001 From: Mike Reeves Date: Mon, 20 Apr 2026 16:06:01 -0400 Subject: [PATCH 062/110] Use TELEGRAFMERGED for telegraf.output and de-jinja pg_hba.conf MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - firewall/map.jinja and postgres/telegraf_users.sls now pull the telegraf output selector through TELEGRAFMERGED so the defaults.yaml value (BOTH) is the source of truth and pillar overrides merge in cleanly. pillar.get with a hardcoded fallback was brittle and would disagree with defaults.yaml if the two ever diverged. - Rename salt/postgres/files/pg_hba.conf.jinja to pg_hba.conf and drop template: jinja from config.sls — the file has no jinja besides the comment header. --- salt/firewall/map.jinja | 3 ++- salt/postgres/config.sls | 3 +-- salt/postgres/files/{pg_hba.conf.jinja => pg_hba.conf} | 9 +++++---- salt/postgres/telegraf_users.sls | 3 ++- 4 files changed, 10 insertions(+), 8 deletions(-) rename salt/postgres/files/{pg_hba.conf.jinja => pg_hba.conf} (67%) diff --git a/salt/firewall/map.jinja b/salt/firewall/map.jinja index b0c96de72..61f8215b8 100644 --- a/salt/firewall/map.jinja +++ b/salt/firewall/map.jinja @@ -1,5 +1,6 @@ {% from 'vars/globals.map.jinja' import GLOBALS %} {% from 'docker/docker.map.jinja' import DOCKERMERGED %} +{% from 'telegraf/map.jinja' import TELEGRAFMERGED %} {% import_yaml 'firewall/defaults.yaml' as FIREWALL_DEFAULT %} {# add our ip to self #} @@ -56,7 +57,7 @@ {% endif %} {# Open Postgres (5432) to minion hostgroups when Telegraf is configured to write to Postgres #} -{% set TG_OUT = salt['pillar.get']('telegraf:output', 'BOTH') | upper %} +{% set TG_OUT = TELEGRAFMERGED.output | upper %} {% if TG_OUT in ['POSTGRES', 'BOTH'] %} {% if role.startswith('manager') or role == 'standalone' or role == 'eval' %} {% for r in ['sensor', 'searchnode', 'heavynode', 'receiver', 'fleet', 'idh', 'desktop', 'import'] %} diff --git a/salt/postgres/config.sls b/salt/postgres/config.sls index 76a926d59..efa9dba93 100644 --- a/salt/postgres/config.sls +++ b/salt/postgres/config.sls @@ -65,11 +65,10 @@ postgresconf: postgreshba: file.managed: - name: /opt/so/conf/postgres/pg_hba.conf - - source: salt://postgres/files/pg_hba.conf.jinja + - source: salt://postgres/files/pg_hba.conf - user: 939 - group: 939 - mode: 640 - - template: jinja postgres_super_secret: file.managed: diff --git a/salt/postgres/files/pg_hba.conf.jinja b/salt/postgres/files/pg_hba.conf similarity index 67% rename from salt/postgres/files/pg_hba.conf.jinja rename to salt/postgres/files/pg_hba.conf index 1d6a22a04..e7d31c05f 100644 --- a/salt/postgres/files/pg_hba.conf.jinja +++ b/salt/postgres/files/pg_hba.conf @@ -1,7 +1,8 @@ -{# Copyright Security Onion Solutions LLC and/or licensed to Security Onion Solutions LLC under one - or more contributor license agreements. Licensed under the Elastic License 2.0 as shown at - https://securityonion.net/license; you may not use this file except in compliance with the - Elastic License 2.0. #} +# Copyright Security Onion Solutions LLC and/or licensed to Security Onion Solutions LLC under one +# or more contributor license agreements. Licensed under the Elastic License 2.0 as shown at +# https://securityonion.net/license; you may not use this file except in compliance with the +# Elastic License 2.0. +# # Managed by Salt — do not edit by hand. # Client authentication config: only local (Unix socket) connections and TLS-wrapped TCP # connections are accepted. Plain-text `host ...` lines are intentionally omitted so a diff --git a/salt/postgres/telegraf_users.sls b/salt/postgres/telegraf_users.sls index 6bcf0900c..8d718519c 100644 --- a/salt/postgres/telegraf_users.sls +++ b/salt/postgres/telegraf_users.sls @@ -6,8 +6,9 @@ {% from 'allowed_states.map.jinja' import allowed_states %} {% if sls.split('.')[0] in allowed_states %} {% from 'vars/globals.map.jinja' import GLOBALS %} +{% from 'telegraf/map.jinja' import TELEGRAFMERGED %} -{% set TG_OUT = salt['pillar.get']('telegraf:output', 'BOTH') | upper %} +{% set TG_OUT = TELEGRAFMERGED.output | upper %} {% if TG_OUT in ['POSTGRES', 'BOTH'] %} # docker_container.running returns as soon as the container starts, but on From 80bf07ffd807d74316bd10aefa0064cb121b4cc5 Mon Sep 17 00:00:00 2001 From: Mike Reeves Date: Mon, 20 Apr 2026 16:36:37 -0400 Subject: [PATCH 063/110] Flesh out soc_postgres.yaml annotations MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add Configuration-UI annotations for every postgres pillar key defined in defaults.yaml, not just telegraf.retention_days: - postgres.enabled — readonly; admin-visible but toggled via state - postgres.telegraf.retention_days — drop advanced so user-tunable knobs surface in the default view - postgres.config.max_connections, shared_buffers, log_min_messages — user-tunable performance/verbosity knobs, not advanced - postgres.config.listen_addresses, port, ssl, ssl_cert_file, ssl_key_file, ssl_ca_file, hba_file, log_destination, logging_collector, shared_preload_libraries, cron.database_name — infra/Salt-managed, marked advanced so they're visible but out of the way No defaults.yaml change; value-side stays the same. --- salt/postgres/soc_postgres.yaml | 82 +++++++++++++++++++++++++++++++++ 1 file changed, 82 insertions(+) diff --git a/salt/postgres/soc_postgres.yaml b/salt/postgres/soc_postgres.yaml index 8b4e22921..d111e6923 100644 --- a/salt/postgres/soc_postgres.yaml +++ b/salt/postgres/soc_postgres.yaml @@ -1,7 +1,89 @@ postgres: + enabled: + description: Whether the PostgreSQL database container is enabled on this grid. Backs the assistant store and the Telegraf metrics database. + forcedType: bool + readonly: True + helpLink: influxdb telegraf: retention_days: description: Number of days of Telegraf metrics to keep in the so_telegraf database. Older partitions are dropped hourly by pg_partman. forcedType: int + helpLink: influxdb + config: + max_connections: + description: Maximum number of concurrent PostgreSQL connections. + forcedType: int + global: True + helpLink: influxdb + shared_buffers: + description: Amount of memory PostgreSQL uses for shared buffers (e.g. 256MB, 1GB). Raising this improves read cache hit rate at the cost of system RAM. + global: True + helpLink: influxdb + log_min_messages: + description: Minimum severity of server messages written to the PostgreSQL log. + options: + - debug1 + - info + - notice + - warning + - error + - log + - fatal + global: True + helpLink: influxdb + listen_addresses: + description: Interfaces PostgreSQL listens on. Must remain '*' so clients on the docker bridge network can connect. + global: True + advanced: True + helpLink: influxdb + port: + description: TCP port PostgreSQL listens on inside the container. Firewall rules and container port mapping assume 5432. + forcedType: int + global: True + advanced: True + helpLink: influxdb + ssl: + description: Whether PostgreSQL accepts TLS connections. Must remain 'on' — pg_hba.conf requires hostssl for TCP. + global: True + advanced: True + helpLink: influxdb + ssl_cert_file: + description: Path (inside the container) to the TLS server certificate. Salt-managed. + global: True + advanced: True + helpLink: influxdb + ssl_key_file: + description: Path (inside the container) to the TLS server private key. Salt-managed. + global: True + advanced: True + helpLink: influxdb + ssl_ca_file: + description: Path (inside the container) to the CA bundle PostgreSQL uses to verify client certificates. Salt-managed. + global: True + advanced: True + helpLink: influxdb + hba_file: + description: Path (inside the container) to the pg_hba.conf authentication file. Salt-managed — edit salt/postgres/files/pg_hba.conf. + global: True + advanced: True + helpLink: influxdb + log_destination: + description: Where PostgreSQL writes its server log. 'stderr' routes to the container log stream. + global: True + advanced: True + helpLink: influxdb + logging_collector: + description: Whether to run a separate logging collector process. Disabled because the docker log stream already captures stderr. + global: True + advanced: True + helpLink: influxdb + shared_preload_libraries: + description: Comma-separated list of extensions loaded at server start. Required for pg_cron which drives pg_partman maintenance — do not remove. + global: True + advanced: True + helpLink: influxdb + cron.database_name: + description: Database pg_cron schedules jobs in. Must be so_telegraf so partman maintenance runs in the right database context. + global: True advanced: True helpLink: influxdb From 71da27dc8ef8335dff34db21a4c6de496e0e55f2 Mon Sep 17 00:00:00 2001 From: Jason Ertel Date: Mon, 20 Apr 2026 17:02:25 -0400 Subject: [PATCH 064/110] fix template annotation --- salt/soc/soc_soc.yaml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/salt/soc/soc_soc.yaml b/salt/soc/soc_soc.yaml index d4e908637..bd3ed9095 100644 --- a/salt/soc/soc_soc.yaml +++ b/salt/soc/soc_soc.yaml @@ -890,12 +890,16 @@ soc: suricata: description: The template used when creating a new Suricata detection. [publicId] will be replaced with an unused Public Id. multiline: True + forcedType: string strelka: description: The template used when creating a new Strelka detection. multiline: True + forcedType: string elastalert: description: The template used when creating a new ElastAlert detection. [publicId] will be replaced with an unused Public Id. multiline: True + forcedType: string + grid: maxUploadSize: description: The maximum number of bytes for an uploaded PCAP import file. From ee89b78751b2cc4117ec9507ed82c5da14b0f0cd Mon Sep 17 00:00:00 2001 From: Mike Reeves Date: Mon, 20 Apr 2026 19:54:06 -0400 Subject: [PATCH 065/110] Fire telegraf user sync on salt/key accept, not salt/auth MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit salt/auth fires on every minion authentication — including every minion restart and every master restart — so the reactor was re-running the postgres.auth + postgres.telegraf_users + telegraf orchestration for every already-accepted minion on every reconnect. The underlying states are idempotent, so this was wasted work and log noise, not a correctness issue. Switch the subscription to salt/key, which fires only when the master actually changes a key's state (accept / reject / delete). Match the pattern used by salt/reactor/check_hypervisor.sls (registered in salt/salt/cloud/reactor_config_hypervisor.sls) and add the result==True guard so half-failed key operations don't trigger the orchestration. --- salt/reactor/telegraf_user_sync.sls | 4 ++-- salt/salt/master.sls | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/salt/reactor/telegraf_user_sync.sls b/salt/reactor/telegraf_user_sync.sls index abf35d3b2..ec0aec336 100644 --- a/salt/reactor/telegraf_user_sync.sls +++ b/salt/reactor/telegraf_user_sync.sls @@ -3,8 +3,8 @@ # https://securityonion.net/license; you may not use this file except in compliance with the # Elastic License 2.0. -{# Fires on salt/auth. Only act on accepted keys — ignore pending/reject. #} -{% if data.get('act') == 'accept' and data.get('id') %} +{# Fires on salt/key. Only act on successful key acceptance — not reauth. #} +{% if data.get('act') == 'accept' and data.get('result') == True and data.get('id') %} {{ data['id'] }}_telegraf_pg_sync: runner.state.orchestrate: diff --git a/salt/salt/master.sls b/salt/salt/master.sls index 7e3e48074..e61b09d21 100644 --- a/salt/salt/master.sls +++ b/salt/salt/master.sls @@ -67,7 +67,7 @@ reactor_config_telegraf: - name: /etc/salt/master.d/reactor_telegraf.conf - contents: | reactor: - - 'salt/auth': + - 'salt/key': - /opt/so/saltstack/default/salt/reactor/telegraf_user_sync.sls - user: root - group: root From 72105f1f2f29d25039a2f157b4685d73402ce201 Mon Sep 17 00:00:00 2001 From: Mike Reeves Date: Tue, 21 Apr 2026 09:31:45 -0400 Subject: [PATCH 066/110] Drop telegraf push from new-minion orch; highstate covers it New minions run highstate as part of onboarding, which already applies the telegraf state with the fresh pillar entry we just wrote. Pushing telegraf a second time from the reactor is redundant. - Remove the MINION-scoped salt.state block from the orch; keep only the manager-side postgres.auth + postgres.telegraf_users provisioning. - Stop passing minion_id as pillar in the reactor; the orch doesn't reference it anymore. --- salt/orch/telegraf_postgres_sync.sls | 17 +++++------------ salt/reactor/telegraf_user_sync.sls | 2 -- 2 files changed, 5 insertions(+), 14 deletions(-) diff --git a/salt/orch/telegraf_postgres_sync.sls b/salt/orch/telegraf_postgres_sync.sls index 90c42fc07..f2a3d950f 100644 --- a/salt/orch/telegraf_postgres_sync.sls +++ b/salt/orch/telegraf_postgres_sync.sls @@ -3,9 +3,13 @@ # https://securityonion.net/license; you may not use this file except in compliance with the # Elastic License 2.0. -{% set MINION = salt['pillar.get']('minion_id') %} {% set MANAGER = salt['pillar.get']('setup:manager') or salt['grains.get']('master') %} +# Fired by salt/reactor/telegraf_user_sync.sls when salt-key accepts a new +# minion. Only provisions the per-minion pillar entry and DB role on the +# manager; the minion itself will pick up its telegraf config on its first +# highstate during onboarding, so there's no need to push the telegraf state +# from here. manager_sync_telegraf_pg_users: salt.state: - tgt: {{ MANAGER }} @@ -13,14 +17,3 @@ manager_sync_telegraf_pg_users: - postgres.auth - postgres.telegraf_users - queue: True - -{% if MINION and MINION != MANAGER %} -{{ MINION }}_apply_telegraf: - salt.state: - - tgt: {{ MINION }} - - sls: - - telegraf - - queue: True - - require: - - salt: manager_sync_telegraf_pg_users -{% endif %} diff --git a/salt/reactor/telegraf_user_sync.sls b/salt/reactor/telegraf_user_sync.sls index ec0aec336..4830dbc53 100644 --- a/salt/reactor/telegraf_user_sync.sls +++ b/salt/reactor/telegraf_user_sync.sls @@ -10,8 +10,6 @@ runner.state.orchestrate: - args: - mods: orch.telegraf_postgres_sync - - pillar: - minion_id: {{ data['id'] }} {% do salt.log.info('telegraf_user_sync reactor: syncing telegraf PG user for minion %s' % data['id']) %} From 37e925769884563364f19b3eada30bfa13a23b92 Mon Sep 17 00:00:00 2001 From: Mike Reeves Date: Tue, 21 Apr 2026 09:33:47 -0400 Subject: [PATCH 067/110] Change so-postgres final_octet to 47 --- salt/docker/defaults.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/salt/docker/defaults.yaml b/salt/docker/defaults.yaml index 900d2cf53..81ff07190 100644 --- a/salt/docker/defaults.yaml +++ b/salt/docker/defaults.yaml @@ -238,7 +238,7 @@ docker: extra_env: [] ulimits: [] 'so-postgres': - final_octet: 89 + final_octet: 47 port_bindings: - 0.0.0.0:5432:5432 custom_bind_mounts: [] From f72c30abd0797c266be63ae86c857ecae92ab203 Mon Sep 17 00:00:00 2001 From: Mike Reeves Date: Tue, 21 Apr 2026 09:35:59 -0400 Subject: [PATCH 068/110] Have postgres.telegraf_users include postgres.enabled MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit postgres_wait_ready requires docker_container: so-postgres, which is declared in postgres.enabled. Running postgres.telegraf_users on its own — as the reactor orch and the soup post-upgrade step both do — errored because Salt couldn't resolve the require. Include postgres.enabled from postgres.telegraf_users so the container state is always in the render. postgres.enabled already includes telegraf_users; Salt de-duplicates the circular include and the included states are all idempotent, so repeated application is a no-op. --- salt/postgres/telegraf_users.sls | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/salt/postgres/telegraf_users.sls b/salt/postgres/telegraf_users.sls index 8d718519c..dbbc0f03e 100644 --- a/salt/postgres/telegraf_users.sls +++ b/salt/postgres/telegraf_users.sls @@ -8,6 +8,13 @@ {% from 'vars/globals.map.jinja' import GLOBALS %} {% from 'telegraf/map.jinja' import TELEGRAFMERGED %} +{# postgres_wait_ready below requires `docker_container: so-postgres`, which is + declared in postgres.enabled. Include it here so state.apply postgres.telegraf_users + on its own (from the reactor orch or from soup) still has that ID in scope. Salt + de-duplicates the circular include. #} +include: + - postgres.enabled + {% set TG_OUT = TELEGRAFMERGED.output | upper %} {% if TG_OUT in ['POSTGRES', 'BOTH'] %} From a902f667ba6c8ea3a4db477793c42535fa2cd357 Mon Sep 17 00:00:00 2001 From: Mike Reeves Date: Tue, 21 Apr 2026 09:37:35 -0400 Subject: [PATCH 069/110] Target manager by role grain in telegraf_postgres_sync orch The previous MANAGER resolution used pillar.get('setup:manager') with a fallback to grains.get('master'). Neither works from the reactor: setup:manager is only populated by the setup workflow (not by reactor runs), and grains.master returns the minion's master-hostname setting, not a targetable minion id. Match the pattern used by orch/delete_hypervisor.sls: compound-target whichever minion is the manager via role grain. --- salt/orch/telegraf_postgres_sync.sls | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/salt/orch/telegraf_postgres_sync.sls b/salt/orch/telegraf_postgres_sync.sls index f2a3d950f..94be77137 100644 --- a/salt/orch/telegraf_postgres_sync.sls +++ b/salt/orch/telegraf_postgres_sync.sls @@ -3,16 +3,19 @@ # https://securityonion.net/license; you may not use this file except in compliance with the # Elastic License 2.0. -{% set MANAGER = salt['pillar.get']('setup:manager') or salt['grains.get']('master') %} - # Fired by salt/reactor/telegraf_user_sync.sls when salt-key accepts a new # minion. Only provisions the per-minion pillar entry and DB role on the # manager; the minion itself will pick up its telegraf config on its first # highstate during onboarding, so there's no need to push the telegraf state # from here. +# +# Target the manager via role grains — same pattern as orch/delete_hypervisor.sls. +# The reactor doesn't know the manager's minion id, and grains.master on the +# runner is a hostname, not a targetable id. manager_sync_telegraf_pg_users: salt.state: - - tgt: {{ MANAGER }} + - tgt: 'G@role:so-manager or G@role:so-managerhype or G@role:so-managersearch or G@role:so-standalone or G@role:so-eval' + - tgt_type: compound - sls: - postgres.auth - postgres.telegraf_users From 89a6e7c0dd4745ad9baf8c83ae35a619d42097c4 Mon Sep 17 00:00:00 2001 From: Mike Reeves Date: Tue, 21 Apr 2026 09:39:58 -0400 Subject: [PATCH 070/110] Tidy config.sls makedirs and postgres helpLinks MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - config.sls: postgresconfdir creates /opt/so/conf/postgres, so the two subdirectories under it (postgressecretsdir, postgresinitdir) don't need their own makedirs — require the parent instead. - soc_postgres.yaml: helpLink for every annotated key now points to 'postgres' instead of the carried-over 'influxdb' slug. --- salt/postgres/config.sls | 6 ++++-- salt/postgres/soc_postgres.yaml | 30 +++++++++++++++--------------- 2 files changed, 19 insertions(+), 17 deletions(-) diff --git a/salt/postgres/config.sls b/salt/postgres/config.sls index efa9dba93..11ca52649 100644 --- a/salt/postgres/config.sls +++ b/salt/postgres/config.sls @@ -21,7 +21,8 @@ postgressecretsdir: - user: 939 - group: 939 - mode: 700 - - makedirs: True + - require: + - file: postgresconfdir postgresdatadir: file.directory: @@ -42,7 +43,8 @@ postgresinitdir: - name: /opt/so/conf/postgres/init - user: 939 - group: 939 - - makedirs: True + - require: + - file: postgresconfdir postgresinitusers: file.managed: diff --git a/salt/postgres/soc_postgres.yaml b/salt/postgres/soc_postgres.yaml index d111e6923..4b25cd4f5 100644 --- a/salt/postgres/soc_postgres.yaml +++ b/salt/postgres/soc_postgres.yaml @@ -8,17 +8,17 @@ postgres: retention_days: description: Number of days of Telegraf metrics to keep in the so_telegraf database. Older partitions are dropped hourly by pg_partman. forcedType: int - helpLink: influxdb + helpLink: postgres config: max_connections: description: Maximum number of concurrent PostgreSQL connections. forcedType: int global: True - helpLink: influxdb + helpLink: postgres shared_buffers: description: Amount of memory PostgreSQL uses for shared buffers (e.g. 256MB, 1GB). Raising this improves read cache hit rate at the cost of system RAM. global: True - helpLink: influxdb + helpLink: postgres log_min_messages: description: Minimum severity of server messages written to the PostgreSQL log. options: @@ -30,60 +30,60 @@ postgres: - log - fatal global: True - helpLink: influxdb + helpLink: postgres listen_addresses: description: Interfaces PostgreSQL listens on. Must remain '*' so clients on the docker bridge network can connect. global: True advanced: True - helpLink: influxdb + helpLink: postgres port: description: TCP port PostgreSQL listens on inside the container. Firewall rules and container port mapping assume 5432. forcedType: int global: True advanced: True - helpLink: influxdb + helpLink: postgres ssl: description: Whether PostgreSQL accepts TLS connections. Must remain 'on' — pg_hba.conf requires hostssl for TCP. global: True advanced: True - helpLink: influxdb + helpLink: postgres ssl_cert_file: description: Path (inside the container) to the TLS server certificate. Salt-managed. global: True advanced: True - helpLink: influxdb + helpLink: postgres ssl_key_file: description: Path (inside the container) to the TLS server private key. Salt-managed. global: True advanced: True - helpLink: influxdb + helpLink: postgres ssl_ca_file: description: Path (inside the container) to the CA bundle PostgreSQL uses to verify client certificates. Salt-managed. global: True advanced: True - helpLink: influxdb + helpLink: postgres hba_file: description: Path (inside the container) to the pg_hba.conf authentication file. Salt-managed — edit salt/postgres/files/pg_hba.conf. global: True advanced: True - helpLink: influxdb + helpLink: postgres log_destination: description: Where PostgreSQL writes its server log. 'stderr' routes to the container log stream. global: True advanced: True - helpLink: influxdb + helpLink: postgres logging_collector: description: Whether to run a separate logging collector process. Disabled because the docker log stream already captures stderr. global: True advanced: True - helpLink: influxdb + helpLink: postgres shared_preload_libraries: description: Comma-separated list of extensions loaded at server start. Required for pg_cron which drives pg_partman maintenance — do not remove. global: True advanced: True - helpLink: influxdb + helpLink: postgres cron.database_name: description: Database pg_cron schedules jobs in. Must be so_telegraf so partman maintenance runs in the right database context. global: True advanced: True - helpLink: influxdb + helpLink: postgres From 84197fb33bba5d128391f344545209c4c3c3ae71 Mon Sep 17 00:00:00 2001 From: Mike Reeves Date: Tue, 21 Apr 2026 09:42:41 -0400 Subject: [PATCH 071/110] Move postgres backup script and cron to the postgres states MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The so-postgres-backup script and its cron were living under salt/backup/config_backup.sls, which meant the backup script and cron were deployed independently of whether postgres was enabled/disabled. - Relocate salt/backup/tools/sbin/so-postgres-backup to salt/postgres/tools/sbin/so-postgres-backup so the existing postgres_sbin file.recurse in postgres/config.sls picks it up with everything else — no separate file.managed needed. - Remove postgres_backup_script and so_postgres_backup from salt/backup/config_backup.sls. - Add cron.present for so_postgres_backup to salt/postgres/enabled.sls and the matching cron.absent to salt/postgres/disabled.sls so the cron follows the container's lifecycle. --- salt/backup/config_backup.sls | 19 ------------------- salt/postgres/disabled.sls | 6 ++++++ salt/postgres/enabled.sls | 11 +++++++++++ .../tools/sbin/so-postgres-backup | 0 4 files changed, 17 insertions(+), 19 deletions(-) rename salt/{backup => postgres}/tools/sbin/so-postgres-backup (100%) diff --git a/salt/backup/config_backup.sls b/salt/backup/config_backup.sls index c8e342463..a4297444b 100644 --- a/salt/backup/config_backup.sls +++ b/salt/backup/config_backup.sls @@ -33,22 +33,3 @@ so_config_backup: - month: '*' - dayweek: '*' -postgres_backup_script: - file.managed: - - name: /usr/sbin/so-postgres-backup - - user: root - - group: root - - mode: 755 - - source: salt://backup/tools/sbin/so-postgres-backup - -# Add postgres database backup -so_postgres_backup: - cron.present: - - name: /usr/sbin/so-postgres-backup > /dev/null 2>&1 - - identifier: so_postgres_backup - - user: root - - minute: '5' - - hour: '0' - - daymonth: '*' - - month: '*' - - dayweek: '*' diff --git a/salt/postgres/disabled.sls b/salt/postgres/disabled.sls index 56dc451b7..4b5b62328 100644 --- a/salt/postgres/disabled.sls +++ b/salt/postgres/disabled.sls @@ -18,6 +18,12 @@ so-postgres_so-status.disabled: - name: /opt/so/conf/so-status/so-status.conf - regex: ^so-postgres$ +so_postgres_backup: + cron.absent: + - name: /usr/sbin/so-postgres-backup > /dev/null 2>&1 + - identifier: so_postgres_backup + - user: root + {% else %} {{sls}}_state_not_allowed: diff --git a/salt/postgres/enabled.sls b/salt/postgres/enabled.sls index 4c5838466..b3abb621e 100644 --- a/salt/postgres/enabled.sls +++ b/salt/postgres/enabled.sls @@ -89,6 +89,17 @@ delete_so-postgres_so-status.disabled: - name: /opt/so/conf/so-status/so-status.conf - regex: ^so-postgres$ +so_postgres_backup: + cron.present: + - name: /usr/sbin/so-postgres-backup > /dev/null 2>&1 + - identifier: so_postgres_backup + - user: root + - minute: '5' + - hour: '0' + - daymonth: '*' + - month: '*' + - dayweek: '*' + {% else %} {{sls}}_state_not_allowed: diff --git a/salt/backup/tools/sbin/so-postgres-backup b/salt/postgres/tools/sbin/so-postgres-backup similarity index 100% rename from salt/backup/tools/sbin/so-postgres-backup rename to salt/postgres/tools/sbin/so-postgres-backup From bb71e44614f25b299041dbdb46eb908dd908ac4c Mon Sep 17 00:00:00 2001 From: Mike Reeves Date: Tue, 21 Apr 2026 09:57:35 -0400 Subject: [PATCH 072/110] Write per-minion telegraf creds to each minion's own pillar file pillar/top.sls only distributes postgres.auth to manager-class roles, so sensors / heavynodes / searchnodes / receivers / fleet / idh / hypervisor / desktop minions never received the postgres telegraf password they need to write metrics. Broadcasting the aggregate postgres.auth pillar to every role would leak the so_postgres admin password and every other minion's cred. Fan out per-minion credentials into each minion's own pillar file at /opt/so/saltstack/local/pillar/minions/.sls. That file is already distributed by pillar/top.sls exclusively to the matching minion via `- minions.{{ grains.id }}`, so each minion sees only its own postgres.telegraf.{user,pass} and nothing else. - salt/postgres/auth.sls: after writing the manager-scoped aggregate pillar, fan the per-minion creds out via so-yaml.py replace for every up-minion. Creates the minion pillar file if missing. Requires postgres_auth_pillar so the manager pillar lands first. - salt/telegraf/etc/telegraf.conf: consume postgres:telegraf:user and postgres:telegraf:pass directly from the minion's own pillar instead of walking postgres:auth:users which isn't visible off the manager. --- salt/postgres/auth.sls | 29 +++++++++++++++++++++++++++++ salt/telegraf/etc/telegraf.conf | 9 ++++++--- 2 files changed, 35 insertions(+), 3 deletions(-) diff --git a/salt/postgres/auth.sls b/salt/postgres/auth.sls index 3da1bcde0..0b94ece99 100644 --- a/salt/postgres/auth.sls +++ b/salt/postgres/auth.sls @@ -49,6 +49,35 @@ postgres_auth_pillar: pass: "{{ entry.pass }}" {% endfor %} - show_changes: False + + {# Fan each minion's telegraf cred out to its own pillar file. The minions/ + .sls file is only served to that specific minion via pillar/top.sls + (`- minions.{{ grains.id }}`), so sensors, heavynodes, etc. see their own + credential without the admin password or anyone else's. Run per up-minion + so we have the original minion id (not just the safe-normalized version). #} + {% for mid in up_minions %} + {%- set safe = mid | replace('.','_') | replace('-','_') | lower %} + {%- set key = 'telegraf_' ~ safe %} + {%- set entry = telegraf_users.get(key) %} + {%- if entry %} + +postgres_telegraf_minion_pillar_{{ safe }}: + cmd.run: + - name: | + set -e + PILLAR_FILE=/opt/so/saltstack/local/pillar/minions/{{ mid }}.sls + if [ ! -f "$PILLAR_FILE" ]; then + echo '{}' > "$PILLAR_FILE" + chown socore:socore "$PILLAR_FILE" 2>/dev/null || true + chmod 640 "$PILLAR_FILE" + fi + /usr/sbin/so-yaml.py replace "$PILLAR_FILE" postgres.telegraf.user '{{ entry.user }}' + /usr/sbin/so-yaml.py replace "$PILLAR_FILE" postgres.telegraf.pass '{{ entry.pass }}' + - require: + - file: postgres_auth_pillar + + {%- endif %} + {% endfor %} {% else %} {{sls}}_state_not_allowed: diff --git a/salt/telegraf/etc/telegraf.conf b/salt/telegraf/etc/telegraf.conf index ee13e33d0..001a61d93 100644 --- a/salt/telegraf/etc/telegraf.conf +++ b/salt/telegraf/etc/telegraf.conf @@ -10,9 +10,12 @@ {%- set LOGSTASH_ENABLED = LOGSTASH_MERGED.enabled %} {%- set TG_OUT = TELEGRAFMERGED.output | upper %} {%- set PG_HOST = GLOBALS.manager_ip %} -{%- set PG_SAFE = GLOBALS.minion_id | replace('.','_') | replace('-','_') | lower %} -{%- set PG_USER = 'so_telegraf_' ~ PG_SAFE %} -{%- set PG_PASS = salt['pillar.get']('postgres:auth:users:telegraf_' ~ PG_SAFE ~ ':pass', '') %} +{#- Per-minion telegraf creds are written into the minion's own pillar file + (/opt/so/saltstack/local/pillar/minions/.sls) by postgres.auth on the + manager. Each minion only sees its own password — the aggregate map in + postgres:auth:users is manager-scoped. #} +{%- set PG_USER = salt['pillar.get']('postgres:telegraf:user', '') %} +{%- set PG_PASS = salt['pillar.get']('postgres:telegraf:pass', '') %} # Global tags can be specified here in key="value" format. [global_tags] role = "{{ GLOBALS.role.split('-') | last }}" From a149ea7e8f39a5da7ca1db2c569ab4072bfad527 Mon Sep 17 00:00:00 2001 From: Mike Reeves Date: Tue, 21 Apr 2026 09:59:46 -0400 Subject: [PATCH 073/110] Skip per-minion pillar fan-out when cred is already in place Every postgres.auth run was rewriting every minion pillar file via two so-yaml.py replace calls, even when nothing had changed. Passwords are only generated on first encounter (see the `if key not in telegraf_users` guard) and never rotate, so re-writing the same values on every apply is wasted work and noisy state output. Add an `unless:` check that compares the already-written postgres.telegraf.user to the one we'd set. If they match, skip the fan-out entirely. On first apply for a new minion the key isn't there, so the replace runs; on subsequent applies it's a no-op. --- salt/postgres/auth.sls | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/salt/postgres/auth.sls b/salt/postgres/auth.sls index 0b94ece99..44c89c581 100644 --- a/salt/postgres/auth.sls +++ b/salt/postgres/auth.sls @@ -73,6 +73,13 @@ postgres_telegraf_minion_pillar_{{ safe }}: fi /usr/sbin/so-yaml.py replace "$PILLAR_FILE" postgres.telegraf.user '{{ entry.user }}' /usr/sbin/so-yaml.py replace "$PILLAR_FILE" postgres.telegraf.pass '{{ entry.pass }}' + {#- Skip if this minion's pillar file already carries a matching user. + Passwords are generated once per minion (see the `if key not in telegraf_users` + guard above) and never rotate, so once a cred is fanned out the file + doesn't need to be rewritten on subsequent auth runs. If we ever add + rotation, we'd need to delete postgres.telegraf to force a re-fan. #} + - unless: | + [ "$(/usr/sbin/so-yaml.py get -r /opt/so/saltstack/local/pillar/minions/{{ mid }}.sls postgres.telegraf.user 2>/dev/null)" = '{{ entry.user }}' ] - require: - file: postgres_auth_pillar From 05f6503d61b9faae6d9f8587e1c11725f3407ca4 Mon Sep 17 00:00:00 2001 From: Mike Reeves Date: Tue, 21 Apr 2026 10:05:08 -0400 Subject: [PATCH 074/110] Gate postgres telegraf fan-out on reactor-provided minion id MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit postgres.auth was running an `unless` shell check per up-minion on every manager highstate, even when nothing had changed — N fork+python starts of so-yaml.py add up on large grids. The work is only needed when a specific minion's key is accepted. - salt/postgres/auth.sls: fan out only when postgres_fanout_minion pillar is set (targets that single minion). Manager highstates with no pillar take a zero-N code path. - salt/reactor/telegraf_user_sync.sls: re-pass the accepted minion id as postgres_fanout_minion to the orch. - salt/orch/telegraf_postgres_sync.sls: forward the pillar to the salt.state invocation so the state render sees it. - salt/manager/tools/sbin/soup: for the one-time 3.1.0 backfill, drop the per-minion state.apply and do an in-shell loop over the minion pillar files using so-yaml.py directly. Skips minions that already have postgres.telegraf.user set. --- salt/manager/tools/sbin/soup | 28 +++++++++++++++++++++++----- salt/orch/telegraf_postgres_sync.sls | 6 ++++++ salt/postgres/auth.sls | 26 +++++++++++--------------- salt/reactor/telegraf_user_sync.sls | 2 ++ 4 files changed, 42 insertions(+), 20 deletions(-) diff --git a/salt/manager/tools/sbin/soup b/salt/manager/tools/sbin/soup index 0adffef86..c19fe487e 100755 --- a/salt/manager/tools/sbin/soup +++ b/salt/manager/tools/sbin/soup @@ -490,14 +490,32 @@ up_to_3.1.0() { post_to_3.1.0() { /usr/sbin/so-kibana-space-defaults - # Provision per-minion Telegraf Postgres users for every minion known to the - # manager. postgres.auth iterates manage.up to generate any missing passwords; - # postgres.telegraf_users reconciles the roles and schemas inside the so-postgres - # container. Then push a telegraf state to every minion so their telegraf.conf - # picks up the new credentials on the first apply after soup. + # One-time backfill for minions that existed before the postgres Telegraf + # feature shipped. Generate the aggregate pillar on the manager and create + # the per-minion DB roles, then fan each minion's cred into its own pillar + # file. Going forward the reactor handles each new salt-key accept with a + # targeted fan-out, so a manager highstate no longer needs to iterate. echo "Provisioning Telegraf Postgres users for existing minions." salt-call --local state.apply postgres.auth,postgres.telegraf_users queue=True || true + AGGREGATE_PILLAR=/opt/so/saltstack/local/pillar/postgres/auth.sls + MINIONS_DIR=/opt/so/saltstack/local/pillar/minions + if [[ -f "$AGGREGATE_PILLAR" && -d "$MINIONS_DIR" ]]; then + for pillar_file in "$MINIONS_DIR"/*.sls; do + [[ -f "$pillar_file" ]] || continue + mid=$(basename "$pillar_file" .sls) + [[ "$mid" == adv_* ]] && continue + safe=$(echo "$mid" | tr '.-' '__' | tr '[:upper:]' '[:lower:]') + existing_user=$(so-yaml.py get -r "$pillar_file" postgres.telegraf.user 2>/dev/null || true) + [[ "$existing_user" == "so_telegraf_${safe}" ]] && continue + user=$(so-yaml.py get -r "$AGGREGATE_PILLAR" "postgres.auth.users.telegraf_${safe}.user" 2>/dev/null || true) + pass=$(so-yaml.py get -r "$AGGREGATE_PILLAR" "postgres.auth.users.telegraf_${safe}.pass" 2>/dev/null || true) + [[ -z "$user" || -z "$pass" ]] && continue + so-yaml.py replace "$pillar_file" postgres.telegraf.user "$user" >/dev/null + so-yaml.py replace "$pillar_file" postgres.telegraf.pass "$pass" >/dev/null + done + fi + POSTVERSION=3.1.0 } diff --git a/salt/orch/telegraf_postgres_sync.sls b/salt/orch/telegraf_postgres_sync.sls index 94be77137..5b11d1619 100644 --- a/salt/orch/telegraf_postgres_sync.sls +++ b/salt/orch/telegraf_postgres_sync.sls @@ -12,6 +12,8 @@ # Target the manager via role grains — same pattern as orch/delete_hypervisor.sls. # The reactor doesn't know the manager's minion id, and grains.master on the # runner is a hostname, not a targetable id. +{% set FANOUT_MINION = salt['pillar.get']('postgres_fanout_minion', '') %} + manager_sync_telegraf_pg_users: salt.state: - tgt: 'G@role:so-manager or G@role:so-managerhype or G@role:so-managersearch or G@role:so-standalone or G@role:so-eval' @@ -20,3 +22,7 @@ manager_sync_telegraf_pg_users: - postgres.auth - postgres.telegraf_users - queue: True + {% if FANOUT_MINION %} + - pillar: + postgres_fanout_minion: {{ FANOUT_MINION }} + {% endif %} diff --git a/salt/postgres/auth.sls b/salt/postgres/auth.sls index 44c89c581..e0397beba 100644 --- a/salt/postgres/auth.sls +++ b/salt/postgres/auth.sls @@ -50,13 +50,14 @@ postgres_auth_pillar: {% endfor %} - show_changes: False - {# Fan each minion's telegraf cred out to its own pillar file. The minions/ - .sls file is only served to that specific minion via pillar/top.sls - (`- minions.{{ grains.id }}`), so sensors, heavynodes, etc. see their own - credential without the admin password or anyone else's. Run per up-minion - so we have the original minion id (not just the safe-normalized version). #} - {% for mid in up_minions %} - {%- set safe = mid | replace('.','_') | replace('-','_') | lower %} + {# Fan a specific minion's telegraf cred out to its own pillar file. Only + runs when postgres_fanout_minion pillar is provided — otherwise this state + is a no-op. That keeps manager highstates from doing N so-yaml.py forks + when nothing changed. The reactor passes postgres_fanout_minion through + the orch on salt-key accept; soup handles bulk backfill separately. #} + {% set fanout_mid = salt['pillar.get']('postgres_fanout_minion') %} + {% if fanout_mid %} + {%- set safe = fanout_mid | replace('.','_') | replace('-','_') | lower %} {%- set key = 'telegraf_' ~ safe %} {%- set entry = telegraf_users.get(key) %} {%- if entry %} @@ -65,7 +66,7 @@ postgres_telegraf_minion_pillar_{{ safe }}: cmd.run: - name: | set -e - PILLAR_FILE=/opt/so/saltstack/local/pillar/minions/{{ mid }}.sls + PILLAR_FILE=/opt/so/saltstack/local/pillar/minions/{{ fanout_mid }}.sls if [ ! -f "$PILLAR_FILE" ]; then echo '{}' > "$PILLAR_FILE" chown socore:socore "$PILLAR_FILE" 2>/dev/null || true @@ -73,18 +74,13 @@ postgres_telegraf_minion_pillar_{{ safe }}: fi /usr/sbin/so-yaml.py replace "$PILLAR_FILE" postgres.telegraf.user '{{ entry.user }}' /usr/sbin/so-yaml.py replace "$PILLAR_FILE" postgres.telegraf.pass '{{ entry.pass }}' - {#- Skip if this minion's pillar file already carries a matching user. - Passwords are generated once per minion (see the `if key not in telegraf_users` - guard above) and never rotate, so once a cred is fanned out the file - doesn't need to be rewritten on subsequent auth runs. If we ever add - rotation, we'd need to delete postgres.telegraf to force a re-fan. #} - unless: | - [ "$(/usr/sbin/so-yaml.py get -r /opt/so/saltstack/local/pillar/minions/{{ mid }}.sls postgres.telegraf.user 2>/dev/null)" = '{{ entry.user }}' ] + [ "$(/usr/sbin/so-yaml.py get -r /opt/so/saltstack/local/pillar/minions/{{ fanout_mid }}.sls postgres.telegraf.user 2>/dev/null)" = '{{ entry.user }}' ] - require: - file: postgres_auth_pillar {%- endif %} - {% endfor %} + {% endif %} {% else %} {{sls}}_state_not_allowed: diff --git a/salt/reactor/telegraf_user_sync.sls b/salt/reactor/telegraf_user_sync.sls index 4830dbc53..075dbf62e 100644 --- a/salt/reactor/telegraf_user_sync.sls +++ b/salt/reactor/telegraf_user_sync.sls @@ -10,6 +10,8 @@ runner.state.orchestrate: - args: - mods: orch.telegraf_postgres_sync + - pillar: + postgres_fanout_minion: {{ data['id'] }} {% do salt.log.info('telegraf_user_sync reactor: syncing telegraf PG user for minion %s' % data['id']) %} From 247091766c516b41e4039d07de52366986f50948 Mon Sep 17 00:00:00 2001 From: Jason Ertel Date: Tue, 21 Apr 2026 10:18:05 -0400 Subject: [PATCH 075/110] more error handling during image updates --- salt/common/tools/sbin/so-image-common | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/salt/common/tools/sbin/so-image-common b/salt/common/tools/sbin/so-image-common index 5ce2da241..f15f90e73 100755 --- a/salt/common/tools/sbin/so-image-common +++ b/salt/common/tools/sbin/so-image-common @@ -186,8 +186,14 @@ update_docker_containers() { if [ -z "$HOSTNAME" ]; then HOSTNAME=$(hostname) fi - docker tag $CONTAINER_REGISTRY/$IMAGEREPO/$image $HOSTNAME:5000/$IMAGEREPO/$image >> "$LOG_FILE" 2>&1 - docker push $HOSTNAME:5000/$IMAGEREPO/$image >> "$LOG_FILE" 2>&1 + docker tag $CONTAINER_REGISTRY/$IMAGEREPO/$image $HOSTNAME:5000/$IMAGEREPO/$image >> "$LOG_FILE" 2>&1 || { + echo "Unable to tag $image" >> "$LOG_FILE" 2>&1 + exit 1 + } + docker push $HOSTNAME:5000/$IMAGEREPO/$image >> "$LOG_FILE" 2>&1 || { + echo "Unable to push $image" >> "$LOG_FILE" 2>&1 + exit 1 + } fi else echo "There is a problem downloading the $image image. Details: " >> "$LOG_FILE" 2>&1 From d5dc28e52689d08983530ef8da04ded2f8c7166f Mon Sep 17 00:00:00 2001 From: Mike Reeves Date: Tue, 21 Apr 2026 14:40:19 -0400 Subject: [PATCH 076/110] Fan postgres telegraf cred for manager on every auth run MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The empty-pillar case produced a telegraf.conf with `user= password=` which libpq misparses ("password=" gets consumed as the user value), yielding `password authentication failed for user "password="` on every manager without a prior fan-out (fresh install, not the salt-key path the reactor handles). Two fixes: - salt/postgres/auth.sls: always fan for grains.id in addition to any postgres_fanout_minion from the reactor, so the manager's own pillar is populated on every postgres.auth run. The existing `unless` guard keeps re-runs idempotent. - salt/telegraf/etc/telegraf.conf: gate the [[outputs.postgresql]] block on PG_USER and PG_PASS being non-empty. If a minion hasn't received its pillar yet the output block simply isn't rendered — the next highstate picks up the creds once the fan-out completes, and in the meantime telegraf keeps running the other outputs instead of erroring with a malformed connection string. --- salt/postgres/auth.sls | 33 +++++++++++++++++++++++---------- salt/telegraf/etc/telegraf.conf | 2 +- 2 files changed, 24 insertions(+), 11 deletions(-) diff --git a/salt/postgres/auth.sls b/salt/postgres/auth.sls index e0397beba..ec6f3ec7e 100644 --- a/salt/postgres/auth.sls +++ b/salt/postgres/auth.sls @@ -50,14 +50,27 @@ postgres_auth_pillar: {% endfor %} - show_changes: False - {# Fan a specific minion's telegraf cred out to its own pillar file. Only - runs when postgres_fanout_minion pillar is provided — otherwise this state - is a no-op. That keeps manager highstates from doing N so-yaml.py forks - when nothing changed. The reactor passes postgres_fanout_minion through - the orch on salt-key accept; soup handles bulk backfill separately. #} + {# Fan a specific minion's telegraf cred out to its own pillar file. + Two triggers populate the target list: + - grains.id (always) so the manager's own pillar is populated on every + postgres.auth run — otherwise the manager's telegraf has no cred on + a fresh install and can't write to its own postgres. + - pillar postgres_fanout_minion (when the reactor fires on a new + minion's salt-key accept). + The `unless` guard keeps re-runs idempotent, so this is one so-yaml.py + check per target, not per minion in the grid. Bulk backfill for + already-accepted minions lives in soup. #} + {% set fanout_targets = [] %} + {% if grains.id %} + {%- do fanout_targets.append(grains.id) %} + {% endif %} {% set fanout_mid = salt['pillar.get']('postgres_fanout_minion') %} - {% if fanout_mid %} - {%- set safe = fanout_mid | replace('.','_') | replace('-','_') | lower %} + {% if fanout_mid and fanout_mid not in fanout_targets %} + {%- do fanout_targets.append(fanout_mid) %} + {% endif %} + + {% for mid in fanout_targets %} + {%- set safe = mid | replace('.','_') | replace('-','_') | lower %} {%- set key = 'telegraf_' ~ safe %} {%- set entry = telegraf_users.get(key) %} {%- if entry %} @@ -66,7 +79,7 @@ postgres_telegraf_minion_pillar_{{ safe }}: cmd.run: - name: | set -e - PILLAR_FILE=/opt/so/saltstack/local/pillar/minions/{{ fanout_mid }}.sls + PILLAR_FILE=/opt/so/saltstack/local/pillar/minions/{{ mid }}.sls if [ ! -f "$PILLAR_FILE" ]; then echo '{}' > "$PILLAR_FILE" chown socore:socore "$PILLAR_FILE" 2>/dev/null || true @@ -75,12 +88,12 @@ postgres_telegraf_minion_pillar_{{ safe }}: /usr/sbin/so-yaml.py replace "$PILLAR_FILE" postgres.telegraf.user '{{ entry.user }}' /usr/sbin/so-yaml.py replace "$PILLAR_FILE" postgres.telegraf.pass '{{ entry.pass }}' - unless: | - [ "$(/usr/sbin/so-yaml.py get -r /opt/so/saltstack/local/pillar/minions/{{ fanout_mid }}.sls postgres.telegraf.user 2>/dev/null)" = '{{ entry.user }}' ] + [ "$(/usr/sbin/so-yaml.py get -r /opt/so/saltstack/local/pillar/minions/{{ mid }}.sls postgres.telegraf.user 2>/dev/null)" = '{{ entry.user }}' ] - require: - file: postgres_auth_pillar {%- endif %} - {% endif %} + {% endfor %} {% else %} {{sls}}_state_not_allowed: diff --git a/salt/telegraf/etc/telegraf.conf b/salt/telegraf/etc/telegraf.conf index 001a61d93..53b96e4ab 100644 --- a/salt/telegraf/etc/telegraf.conf +++ b/salt/telegraf/etc/telegraf.conf @@ -96,7 +96,7 @@ # insecure_skip_verify = false {%- endif %} -{%- if TG_OUT in ['POSTGRES', 'BOTH'] %} +{%- if TG_OUT in ['POSTGRES', 'BOTH'] and PG_USER and PG_PASS %} # Configuration for sending metrics to PostgreSQL. # options='-c role=so_telegraf' makes every connection SET ROLE to the shared # group role so tables created on first write are owned by so_telegraf, and From 81c0f2b464b83f22be2cc6a6a872f481cef16125 Mon Sep 17 00:00:00 2001 From: Mike Reeves Date: Tue, 21 Apr 2026 14:43:10 -0400 Subject: [PATCH 077/110] so-yaml.py: tolerate missing ancestors in removeKey MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit replace calls removeKey before addKey, so running `so-yaml.py replace` on a new dotted key whose parent doesn't exist — e.g., postgres.auth fanning postgres.telegraf.user into a minion pillar file that has never carried any postgres.* keys — crashed with KeyError: 'postgres' from removeKey recursing into a missing parent dict. Make removeKey a no-op when an intermediate key is absent so that: - `remove` has the natural "remove if exists" semantics, and - `replace` works for brand-new nested keys. --- salt/manager/tools/sbin/so-yaml.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/salt/manager/tools/sbin/so-yaml.py b/salt/manager/tools/sbin/so-yaml.py index 79dcfcac0..98d2bb8f9 100755 --- a/salt/manager/tools/sbin/so-yaml.py +++ b/salt/manager/tools/sbin/so-yaml.py @@ -285,7 +285,8 @@ def add(args): def removeKey(content, key): pieces = key.split(".", 1) if len(pieces) > 1: - removeKey(content[pieces[0]], pieces[1]) + if pieces[0] in content: + removeKey(content[pieces[0]], pieces[1]) else: content.pop(key, None) From 06a555fafbd405b69fd6a32acbfea51b23543383 Mon Sep 17 00:00:00 2001 From: reyesj2 <94730068+reyesj2@users.noreply.github.com> Date: Tue, 21 Apr 2026 14:01:31 -0500 Subject: [PATCH 078/110] urlencode elasticsearch version --- .../sbin_jinja/so-elastic-agent-grid-upgrade | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/salt/elasticfleet/tools/sbin_jinja/so-elastic-agent-grid-upgrade b/salt/elasticfleet/tools/sbin_jinja/so-elastic-agent-grid-upgrade index aafc9c368..01c176adc 100644 --- a/salt/elasticfleet/tools/sbin_jinja/so-elastic-agent-grid-upgrade +++ b/salt/elasticfleet/tools/sbin_jinja/so-elastic-agent-grid-upgrade @@ -5,11 +5,12 @@ # this file except in compliance with the Elastic License 2.0. . /usr/sbin/so-common +. /usr/sbin/so-elastic-fleet-common {%- import_yaml 'elasticsearch/defaults.yaml' as ELASTICSEARCHDEFAULTS %} {%- import_yaml 'elasticfleet/defaults.yaml' as ELASTICFLEETDEFAULTS %} {# Optionally override Elasticsearch version for Elastic Agent patch releases #} {%- if ELASTICFLEETDEFAULTS.elasticfleet.patch_version is defined %} -{%- do ELASTICSEARCHDEFAULTS.update({'elasticsearch': {'version': ELASTICFLEETDEFAULTS.elasticfleet.patch_version}}) %} +{%- do ELASTICSEARCHDEFAULTS.elasticsearch.update({'version': ELASTICFLEETDEFAULTS.elasticfleet.patch_version}) %} {%- endif %} # Only run on Managers @@ -19,13 +20,10 @@ if ! is_manager_node; then fi # Get current list of Grid Node Agents that need to be upgraded -RAW_JSON=$(curl -K /opt/so/conf/elasticsearch/curl.config -L "http://localhost:5601/api/fleet/agents?perPage=20&page=1&kuery=NOT%20agent.version%3A%20{{ELASTICSEARCHDEFAULTS.elasticsearch.version}}%20AND%20policy_id%3A%20so-grid-nodes_%2A&showInactive=false&getStatusSummary=true" --retry 3 --retry-delay 30 --fail 2>/dev/null) +if ! RAW_JSON=$(fleet_api "agents?perPage=20&page=1&kuery=NOT%20agent.version%3A%20{{ELASTICSEARCHDEFAULTS.elasticsearch.version | urlencode }}%20AND%20policy_id%3A%20so-grid-nodes_%2A&showInactive=false&getStatusSummary=true" -H 'kbn-xsrf: true' -H 'Content-Type: application/json'); then -# Check to make sure that the server responded with good data - else, bail from script -CHECKSUM=$(jq -r '.page' <<< "$RAW_JSON") -if [ "$CHECKSUM" -ne 1 ]; then - printf "Failed to query for current Grid Agents...\n" - exit 1 + printf "Failed to query for current Grid Agents...\n" + exit 1 fi # Generate list of Node Agents that need updates @@ -36,10 +34,12 @@ if [ "$OUTDATED_LIST" != '[]' ]; then printf "Initiating upgrades for $AGENTNUMBERS Agents to Elastic {{ELASTICSEARCHDEFAULTS.elasticsearch.version}}...\n\n" # Generate updated JSON payload - JSON_STRING=$(jq -n --arg ELASTICVERSION {{ELASTICSEARCHDEFAULTS.elasticsearch.version}} --arg UPDATELIST $OUTDATED_LIST '{"version": $ELASTICVERSION,"agents": $UPDATELIST }') + JSON_STRING=$(jq -n --arg ELASTICVERSION "{{ELASTICSEARCHDEFAULTS.elasticsearch.version}}" --argjson UPDATELIST "$OUTDATED_LIST" '{"version": $ELASTICVERSION,"agents": $UPDATELIST }') # Update Node Agents - curl -K /opt/so/conf/elasticsearch/curl.config -L -X POST "http://localhost:5601/api/fleet/agents/bulk_upgrade" -H 'kbn-xsrf: true' -H 'Content-Type: application/json' -d "$JSON_STRING" + if ! fleet_api "agents/bulk_upgrade" -XPOST -H 'kbn-xsrf: true' -H 'Content-Type: application/json' -d "$JSON_STRING"; then + printf "Failed to initiate Agent upgrades...\n" + fi else printf "No Agents need updates... Exiting\n\n" exit 0 From 1abfd77351411c4a9477daccafe8448699b3152d Mon Sep 17 00:00:00 2001 From: Mike Reeves Date: Tue, 21 Apr 2026 15:10:57 -0400 Subject: [PATCH 079/110] Hide telegraf password from console and close so-minion race Two fixes on the postgres telegraf fan-out path: 1. postgres.auth cmd.run leaked the password to the console because Salt always prints the Name: field and `show_changes: False` does not apply to cmd.run. Move the user and password into the `env:` attribute so the shell body still sees them via $PG_USER / $PG_PASS but Salt's state reporter never renders them. 2. so-minion's addMinion -> setupMinionFiles sequence removes the minion pillar file and rewrites it from scratch, which wipes the postgres.telegraf.* entries the reactor may have already written on salt-key accept. Add a postgres.auth fan-out step to orch.deploy_newnode (the orch so-minion kicks off after setupMinionFiles) and require it from the new minion's highstate. Idempotent via the existing unless: guard in postgres.auth. --- salt/orch/deploy_newnode.sls | 17 +++++++++++++++++ salt/postgres/auth.sls | 7 +++++-- 2 files changed, 22 insertions(+), 2 deletions(-) diff --git a/salt/orch/deploy_newnode.sls b/salt/orch/deploy_newnode.sls index c05a812a3..0a2c448ba 100644 --- a/salt/orch/deploy_newnode.sls +++ b/salt/orch/deploy_newnode.sls @@ -12,6 +12,21 @@ attempts: 36 interval: 5 +# so-minion's setupMinionFiles rebuilds the new minion's pillar file from +# scratch, wiping any postgres.telegraf.* entries the reactor may have written +# on salt-key accept. Re-fan the cred here so the highstate below sees it. +# Idempotent via the unless: guard in postgres.auth. +manager_fanout_postgres_telegraf_{{NEWNODE}}: + salt.state: + - tgt: {{ MANAGER }} + - sls: + - postgres.auth + - queue: True + - pillar: + postgres_fanout_minion: {{ NEWNODE }} + - require: + - salt: {{NEWNODE}}_update_mine + # we need to prepare the manager for a new searchnode or heavynode {% if NEWNODE.split('_')|last in ['searchnode', 'heavynode'] %} manager_run_es_soc: @@ -30,3 +45,5 @@ manager_run_es_soc: - tgt: {{ NEWNODE }} - highstate: True - queue: True + - require: + - salt: manager_fanout_postgres_telegraf_{{NEWNODE}} diff --git a/salt/postgres/auth.sls b/salt/postgres/auth.sls index ec6f3ec7e..beed1f8bd 100644 --- a/salt/postgres/auth.sls +++ b/salt/postgres/auth.sls @@ -85,8 +85,11 @@ postgres_telegraf_minion_pillar_{{ safe }}: chown socore:socore "$PILLAR_FILE" 2>/dev/null || true chmod 640 "$PILLAR_FILE" fi - /usr/sbin/so-yaml.py replace "$PILLAR_FILE" postgres.telegraf.user '{{ entry.user }}' - /usr/sbin/so-yaml.py replace "$PILLAR_FILE" postgres.telegraf.pass '{{ entry.pass }}' + /usr/sbin/so-yaml.py replace "$PILLAR_FILE" postgres.telegraf.user "$PG_USER" + /usr/sbin/so-yaml.py replace "$PILLAR_FILE" postgres.telegraf.pass "$PG_PASS" + - env: + - PG_USER: '{{ entry.user }}' + - PG_PASS: '{{ entry.pass }}' - unless: | [ "$(/usr/sbin/so-yaml.py get -r /opt/so/saltstack/local/pillar/minions/{{ mid }}.sls postgres.telegraf.user 2>/dev/null)" = '{{ entry.user }}' ] - require: From 5f28e9b1916b964b9f67c8209a4dd9e6085fdfab Mon Sep 17 00:00:00 2001 From: Mike Reeves Date: Tue, 21 Apr 2026 15:34:15 -0400 Subject: [PATCH 080/110] Move per-minion telegraf cred provisioning into so-minion MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Simpler, race-free replacement for the reactor + orch + fan-out chain. - salt/manager/tools/sbin/so-minion: expand add_telegraf_to_minion to generate a random 72-char password, reuse any existing password from the aggregate pillar, write postgres.telegraf.{user,pass} into the minion's own pillar file, and update the aggregate pillar so postgres.telegraf_users can CREATE ROLE on the next manager apply. Every create function already calls this hook, so add / addVM / setup dispatches are all covered identically and synchronously. - salt/postgres/auth.sls: strip the fanout_targets loop and the postgres_telegraf_minion_pillar_ cmd.run block — it's now redundant. The state still manages the so_postgres admin user and writes the aggregate pillar for postgres.telegraf_users to consume. - salt/reactor/telegraf_user_sync.sls: deleted. - salt/orch/telegraf_postgres_sync.sls: deleted. - salt/salt/master.sls: drop the reactor_config_telegraf block that registered the reactor on /etc/salt/master.d/reactor_telegraf.conf. - salt/orch/deploy_newnode.sls: drop the manager_fanout_postgres_telegraf step and the require: it added to the newnode highstate. Back to its original 3/dev shape. No more ephemeral postgres_fanout_minion pillar, no more async salt/key reactor, no more so-minion setupMinionFiles race: the pillar write happens inline inside setupMinionFiles itself. --- salt/manager/tools/sbin/so-minion | 31 ++++++++++++++++++ salt/orch/deploy_newnode.sls | 17 ---------- salt/orch/telegraf_postgres_sync.sls | 28 ---------------- salt/postgres/auth.sls | 48 ---------------------------- salt/reactor/telegraf_user_sync.sls | 18 ----------- salt/salt/master.sls | 13 -------- 6 files changed, 31 insertions(+), 124 deletions(-) delete mode 100644 salt/orch/telegraf_postgres_sync.sls delete mode 100644 salt/reactor/telegraf_user_sync.sls diff --git a/salt/manager/tools/sbin/so-minion b/salt/manager/tools/sbin/so-minion index 76b067817..a770cf21b 100755 --- a/salt/manager/tools/sbin/so-minion +++ b/salt/manager/tools/sbin/so-minion @@ -542,6 +542,37 @@ function add_telegraf_to_minion() { log "ERROR" "Failed to add telegraf configuration to $PILLARFILE" return 1 fi + + # Provision the per-minion postgres Telegraf credential so telegraf.conf + # renders correctly on the minion's first highstate and postgres.telegraf_users + # picks up the matching aggregate entry on the next manager apply. + # + # Writes: + # - postgres.telegraf.{user,pass} into the minion's own pillar file + # (distributed to only this minion via pillar/top.sls). + # - postgres.auth.users.telegraf_.{user,pass} into the aggregate + # pillar so postgres.telegraf_users CREATE ROLE finds it. + # + # An existing password is reused if the aggregate already has one (re-add), + # so rerunning so-minion for the same minion keeps the cred stable. + local MINION_SAFE + MINION_SAFE=$(echo "$MINION_ID" | tr '.-' '__' | tr '[:upper:]' '[:lower:]') + local PG_USER="so_telegraf_${MINION_SAFE}" + local AGGREGATE=/opt/so/saltstack/local/pillar/postgres/auth.sls + local PG_PASS="" + if [[ -f "$AGGREGATE" ]]; then + PG_PASS=$(so-yaml.py get -r "$AGGREGATE" "postgres.auth.users.telegraf_${MINION_SAFE}.pass" 2>/dev/null || true) + fi + if [[ -z "$PG_PASS" ]]; then + PG_PASS=$(tr -dc 'A-Za-z0-9~!@#^&*()_=+[]|;:,.<>?-' < /dev/urandom | head -c 72) + fi + + so-yaml.py replace "$PILLARFILE" postgres.telegraf.user "$PG_USER" >/dev/null + so-yaml.py replace "$PILLARFILE" postgres.telegraf.pass "$PG_PASS" >/dev/null + if [[ -f "$AGGREGATE" ]]; then + so-yaml.py replace "$AGGREGATE" "postgres.auth.users.telegraf_${MINION_SAFE}.user" "$PG_USER" >/dev/null + so-yaml.py replace "$AGGREGATE" "postgres.auth.users.telegraf_${MINION_SAFE}.pass" "$PG_PASS" >/dev/null + fi } function add_influxdb_to_minion() { diff --git a/salt/orch/deploy_newnode.sls b/salt/orch/deploy_newnode.sls index 0a2c448ba..c05a812a3 100644 --- a/salt/orch/deploy_newnode.sls +++ b/salt/orch/deploy_newnode.sls @@ -12,21 +12,6 @@ attempts: 36 interval: 5 -# so-minion's setupMinionFiles rebuilds the new minion's pillar file from -# scratch, wiping any postgres.telegraf.* entries the reactor may have written -# on salt-key accept. Re-fan the cred here so the highstate below sees it. -# Idempotent via the unless: guard in postgres.auth. -manager_fanout_postgres_telegraf_{{NEWNODE}}: - salt.state: - - tgt: {{ MANAGER }} - - sls: - - postgres.auth - - queue: True - - pillar: - postgres_fanout_minion: {{ NEWNODE }} - - require: - - salt: {{NEWNODE}}_update_mine - # we need to prepare the manager for a new searchnode or heavynode {% if NEWNODE.split('_')|last in ['searchnode', 'heavynode'] %} manager_run_es_soc: @@ -45,5 +30,3 @@ manager_run_es_soc: - tgt: {{ NEWNODE }} - highstate: True - queue: True - - require: - - salt: manager_fanout_postgres_telegraf_{{NEWNODE}} diff --git a/salt/orch/telegraf_postgres_sync.sls b/salt/orch/telegraf_postgres_sync.sls deleted file mode 100644 index 5b11d1619..000000000 --- a/salt/orch/telegraf_postgres_sync.sls +++ /dev/null @@ -1,28 +0,0 @@ -# Copyright Security Onion Solutions LLC and/or licensed to Security Onion Solutions LLC under one -# or more contributor license agreements. Licensed under the Elastic License 2.0 as shown at -# https://securityonion.net/license; you may not use this file except in compliance with the -# Elastic License 2.0. - -# Fired by salt/reactor/telegraf_user_sync.sls when salt-key accepts a new -# minion. Only provisions the per-minion pillar entry and DB role on the -# manager; the minion itself will pick up its telegraf config on its first -# highstate during onboarding, so there's no need to push the telegraf state -# from here. -# -# Target the manager via role grains — same pattern as orch/delete_hypervisor.sls. -# The reactor doesn't know the manager's minion id, and grains.master on the -# runner is a hostname, not a targetable id. -{% set FANOUT_MINION = salt['pillar.get']('postgres_fanout_minion', '') %} - -manager_sync_telegraf_pg_users: - salt.state: - - tgt: 'G@role:so-manager or G@role:so-managerhype or G@role:so-managersearch or G@role:so-standalone or G@role:so-eval' - - tgt_type: compound - - sls: - - postgres.auth - - postgres.telegraf_users - - queue: True - {% if FANOUT_MINION %} - - pillar: - postgres_fanout_minion: {{ FANOUT_MINION }} - {% endif %} diff --git a/salt/postgres/auth.sls b/salt/postgres/auth.sls index beed1f8bd..3da1bcde0 100644 --- a/salt/postgres/auth.sls +++ b/salt/postgres/auth.sls @@ -49,54 +49,6 @@ postgres_auth_pillar: pass: "{{ entry.pass }}" {% endfor %} - show_changes: False - - {# Fan a specific minion's telegraf cred out to its own pillar file. - Two triggers populate the target list: - - grains.id (always) so the manager's own pillar is populated on every - postgres.auth run — otherwise the manager's telegraf has no cred on - a fresh install and can't write to its own postgres. - - pillar postgres_fanout_minion (when the reactor fires on a new - minion's salt-key accept). - The `unless` guard keeps re-runs idempotent, so this is one so-yaml.py - check per target, not per minion in the grid. Bulk backfill for - already-accepted minions lives in soup. #} - {% set fanout_targets = [] %} - {% if grains.id %} - {%- do fanout_targets.append(grains.id) %} - {% endif %} - {% set fanout_mid = salt['pillar.get']('postgres_fanout_minion') %} - {% if fanout_mid and fanout_mid not in fanout_targets %} - {%- do fanout_targets.append(fanout_mid) %} - {% endif %} - - {% for mid in fanout_targets %} - {%- set safe = mid | replace('.','_') | replace('-','_') | lower %} - {%- set key = 'telegraf_' ~ safe %} - {%- set entry = telegraf_users.get(key) %} - {%- if entry %} - -postgres_telegraf_minion_pillar_{{ safe }}: - cmd.run: - - name: | - set -e - PILLAR_FILE=/opt/so/saltstack/local/pillar/minions/{{ mid }}.sls - if [ ! -f "$PILLAR_FILE" ]; then - echo '{}' > "$PILLAR_FILE" - chown socore:socore "$PILLAR_FILE" 2>/dev/null || true - chmod 640 "$PILLAR_FILE" - fi - /usr/sbin/so-yaml.py replace "$PILLAR_FILE" postgres.telegraf.user "$PG_USER" - /usr/sbin/so-yaml.py replace "$PILLAR_FILE" postgres.telegraf.pass "$PG_PASS" - - env: - - PG_USER: '{{ entry.user }}' - - PG_PASS: '{{ entry.pass }}' - - unless: | - [ "$(/usr/sbin/so-yaml.py get -r /opt/so/saltstack/local/pillar/minions/{{ mid }}.sls postgres.telegraf.user 2>/dev/null)" = '{{ entry.user }}' ] - - require: - - file: postgres_auth_pillar - - {%- endif %} - {% endfor %} {% else %} {{sls}}_state_not_allowed: diff --git a/salt/reactor/telegraf_user_sync.sls b/salt/reactor/telegraf_user_sync.sls deleted file mode 100644 index 075dbf62e..000000000 --- a/salt/reactor/telegraf_user_sync.sls +++ /dev/null @@ -1,18 +0,0 @@ -# Copyright Security Onion Solutions LLC and/or licensed to Security Onion Solutions LLC under one -# or more contributor license agreements. Licensed under the Elastic License 2.0 as shown at -# https://securityonion.net/license; you may not use this file except in compliance with the -# Elastic License 2.0. - -{# Fires on salt/key. Only act on successful key acceptance — not reauth. #} -{% if data.get('act') == 'accept' and data.get('result') == True and data.get('id') %} - -{{ data['id'] }}_telegraf_pg_sync: - runner.state.orchestrate: - - args: - - mods: orch.telegraf_postgres_sync - - pillar: - postgres_fanout_minion: {{ data['id'] }} - -{% do salt.log.info('telegraf_user_sync reactor: syncing telegraf PG user for minion %s' % data['id']) %} - -{% endif %} diff --git a/salt/salt/master.sls b/salt/salt/master.sls index e61b09d21..895150cd7 100644 --- a/salt/salt/master.sls +++ b/salt/salt/master.sls @@ -62,19 +62,6 @@ engines_config: - name: /etc/salt/master.d/engines.conf - source: salt://salt/files/engines.conf -reactor_config_telegraf: - file.managed: - - name: /etc/salt/master.d/reactor_telegraf.conf - - contents: | - reactor: - - 'salt/key': - - /opt/so/saltstack/default/salt/reactor/telegraf_user_sync.sls - - user: root - - group: root - - mode: 644 - - watch_in: - - service: salt_master_service - # update the bootstrap script when used for salt-cloud salt_bootstrap_cloud: file.managed: From dbf4fb66a4a3c88490e0f033f770322ab7e840e4 Mon Sep 17 00:00:00 2001 From: Mike Reeves Date: Tue, 21 Apr 2026 15:43:01 -0400 Subject: [PATCH 081/110] Clean up postgres telegraf cred on so-minion delete MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Paired with the add path in add_telegraf_to_minion: when a minion is removed, drop its entry from the aggregate postgres pillar and drop the matching so_telegraf_ role from the database. Without this, stale entries and DB roles accumulate over time. Makes rotate-password and compromise-recovery both a clean delete+add: so-minion -o=delete -m= so-minion -o=add -m= The first call drops the role and clears the aggregate pillar; the second generates a brand-new password. The cleanup is best-effort — if so-postgres isn't running or the DROP ROLE fails (e.g., the role owns unexpected objects), we log a warning and continue so the minion delete itself never gets blocked by postgres state. Admins can mop up stray roles manually if that happens. --- salt/manager/tools/sbin/so-minion | 40 ++++++++++++++++++++++++++++++- 1 file changed, 39 insertions(+), 1 deletion(-) diff --git a/salt/manager/tools/sbin/so-minion b/salt/manager/tools/sbin/so-minion index a770cf21b..4095637c8 100755 --- a/salt/manager/tools/sbin/so-minion +++ b/salt/manager/tools/sbin/so-minion @@ -273,7 +273,7 @@ function deleteMinionFiles () { log "ERROR" "Failed to delete $PILLARFILE" return 1 fi - + rm -f $ADVPILLARFILE if [ $? -ne 0 ]; then log "ERROR" "Failed to delete $ADVPILLARFILE" @@ -281,6 +281,43 @@ function deleteMinionFiles () { fi } +# Remove this minion's postgres Telegraf credential from both the aggregate +# pillar and the postgres database. Paired with add_telegraf_to_minion: +# add/delete cycle both here and in the DB. Always returns 0 so a dead or +# unreachable so-postgres doesn't block minion deletion — in that case we +# log a warning and leave the role behind for manual cleanup. +function remove_postgres_telegraf_from_minion() { + local MINION_SAFE + MINION_SAFE=$(echo "$MINION_ID" | tr '.-' '__' | tr '[:upper:]' '[:lower:]') + local PG_USER="so_telegraf_${MINION_SAFE}" + local AGGREGATE=/opt/so/saltstack/local/pillar/postgres/auth.sls + + log "INFO" "Removing postgres telegraf cred for $MINION_ID" + + if [[ -f "$AGGREGATE" ]]; then + so-yaml.py remove "$AGGREGATE" "postgres.auth.users.telegraf_${MINION_SAFE}" >/dev/null 2>&1 || true + fi + + if docker ps --format '{{.Names}}' 2>/dev/null | grep -q '^so-postgres$'; then + if ! docker exec -i so-postgres psql -v ON_ERROR_STOP=1 -U postgres -d so_telegraf >/dev/null 2>&1 < Date: Tue, 21 Apr 2026 15:45:05 -0400 Subject: [PATCH 082/110] soup: update postgres backfill comment to reflect reactor removal MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The reactor path is gone; so-minion now owns add/delete for new minions. The backfill itself is unchanged — postgres.auth's up_minions fallback fills the aggregate, postgres.telegraf_users creates the roles, and the bash loop fans to per-minion pillar files — so the pre-feature upgrade story still works end-to-end. Just refresh the comment so it isn't misleading. --- salt/manager/tools/sbin/soup | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/salt/manager/tools/sbin/soup b/salt/manager/tools/sbin/soup index c19fe487e..1580e83dd 100755 --- a/salt/manager/tools/sbin/soup +++ b/salt/manager/tools/sbin/soup @@ -491,10 +491,11 @@ post_to_3.1.0() { /usr/sbin/so-kibana-space-defaults # One-time backfill for minions that existed before the postgres Telegraf - # feature shipped. Generate the aggregate pillar on the manager and create - # the per-minion DB roles, then fan each minion's cred into its own pillar - # file. Going forward the reactor handles each new salt-key accept with a - # targeted fan-out, so a manager highstate no longer needs to iterate. + # feature shipped. postgres.auth's up_minions fallback loop generates any + # missing aggregate pillar entries; postgres.telegraf_users CREATEs the + # matching DB roles; then the bash loop below copies each minion's cred + # into its own pillar file. Going forward, so-minion owns add/delete for + # every new minion, so this backfill is only needed on the upgrade boundary. echo "Provisioning Telegraf Postgres users for existing minions." salt-call --local state.apply postgres.auth,postgres.telegraf_users queue=True || true From edd207a9d5d02eb1c4c9be1658d837153369299f Mon Sep 17 00:00:00 2001 From: Josh Patterson Date: Wed, 22 Apr 2026 09:20:53 -0400 Subject: [PATCH 083/110] soup update socloud.conf --- salt/manager/tools/sbin/soup | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/salt/manager/tools/sbin/soup b/salt/manager/tools/sbin/soup index a3b5daa23..2d36cf7eb 100755 --- a/salt/manager/tools/sbin/soup +++ b/salt/manager/tools/sbin/soup @@ -24,6 +24,14 @@ BACKUPTOPFILE=/opt/so/saltstack/default/salt/top.sls.backup SALTUPGRADED=false SALT_CLOUD_INSTALLED=false SALT_CLOUD_CONFIGURED=false +# Check if salt-cloud is installed +if rpm -q salt-cloud &>/dev/null; then + SALT_CLOUD_INSTALLED=true +fi +# Check if salt-cloud is configured +if [[ -f /etc/salt/cloud.profiles.d/socloud.conf ]]; then + SALT_CLOUD_CONFIGURED=true +fi # used to display messages to the user at the end of soup declare -a FINAL_MESSAGE_QUEUE=() @@ -489,6 +497,10 @@ up_to_3.1.0() { post_to_3.1.0() { /usr/sbin/so-kibana-space-defaults + # ensure manager has new version of socloud.conf + if [[ $SALT_CLOUD_CONFIGURED == true ]]; then + salt-call state.apply salt.cloud.config concurrent=True + fi POSTVERSION=3.1.0 } @@ -663,15 +675,6 @@ upgrade_check_salt() { upgrade_salt() { echo "Performing upgrade of Salt from $INSTALLEDSALTVERSION to $NEWSALTVERSION." echo "" - # Check if salt-cloud is installed - if rpm -q salt-cloud &>/dev/null; then - SALT_CLOUD_INSTALLED=true - fi - # Check if salt-cloud is configured - if [[ -f /etc/salt/cloud.profiles.d/socloud.conf ]]; then - SALT_CLOUD_CONFIGURED=true - fi - echo "Removing yum versionlock for Salt." echo "" yum versionlock delete "salt" From 614f32c5e087655c389e1e4ce215ed064b8c7bcc Mon Sep 17 00:00:00 2001 From: Mike Reeves Date: Wed, 22 Apr 2026 10:55:15 -0400 Subject: [PATCH 084/110] Split postgres auth from per-minion telegraf creds The old flow had two writers for each per-minion Telegraf password (so-minion wrote the minion pillar; postgres.auth regenerated any missing aggregate entries). They drifted on first-boot and there was no trigger to create DB roles when a new minion joined. Split responsibilities: - pillar/postgres/auth.sls (manager-scoped) keeps only the so_postgres admin cred. - pillar/telegraf/creds.sls (grid-wide) holds a {minion_id: {user, pass}} map, shadowed per-install by the local-pillar copy. - salt/manager/tools/sbin/so-telegraf-cred is the single writer: flock, atomic YAML write, PyYAML safe_dump so passwords never round-trip through so-yaml.py's type coercion. Idempotent add, quiet remove. - so-minion's add/remove hooks now shell out to so-telegraf-cred instead of editing pillar files directly. - postgres.telegraf_users iterates the new pillar key and CREATE/ALTERs roles from it; telegraf.conf reads its own entry via grains.id. - orch.deploy_newnode runs postgres.telegraf_users on the manager and refreshes the new minion's pillar before the new node highstates, so the DB role is in place the first time telegraf tries to connect. - soup's post_to_3.1.0 backfills the creds pillar from accepted salt keys (idempotent) and runs postgres.telegraf_users once to reconcile the DB. --- pillar/telegraf/creds.sls | 12 ++ pillar/top.sls | 1 + salt/manager/tools/sbin/so-minion | 52 ++------ salt/manager/tools/sbin/so-telegraf-cred | 159 +++++++++++++++++++++++ salt/manager/tools/sbin/soup | 36 ++--- salt/orch/deploy_newnode.sls | 25 ++++ salt/postgres/auth.sls | 25 +--- salt/postgres/telegraf_users.sls | 8 +- salt/telegraf/etc/telegraf.conf | 12 +- 9 files changed, 233 insertions(+), 97 deletions(-) create mode 100644 pillar/telegraf/creds.sls create mode 100644 salt/manager/tools/sbin/so-telegraf-cred diff --git a/pillar/telegraf/creds.sls b/pillar/telegraf/creds.sls new file mode 100644 index 000000000..8521bfbd9 --- /dev/null +++ b/pillar/telegraf/creds.sls @@ -0,0 +1,12 @@ +# Copyright Security Onion Solutions LLC and/or licensed to Security Onion Solutions LLC under one +# or more contributor license agreements. Licensed under the Elastic License 2.0 as shown at +# https://securityonion.net/license; you may not use this file except in compliance with the +# Elastic License 2.0. + +# Per-minion Telegraf Postgres credentials. so-telegraf-cred on the manager is +# the single writer; it mutates /opt/so/saltstack/local/pillar/telegraf/creds.sls +# under flock. Pillar_roots order (local before default) means the populated +# copy shadows this default on any real grid; this file exists so the pillar +# key is always defined on fresh installs and when no minions have creds yet. +telegraf: + postgres_creds: {} diff --git a/pillar/top.sls b/pillar/top.sls index 808182c2b..712629dbf 100644 --- a/pillar/top.sls +++ b/pillar/top.sls @@ -17,6 +17,7 @@ base: - sensoroni.adv_sensoroni - telegraf.soc_telegraf - telegraf.adv_telegraf + - telegraf.creds - versionlock.soc_versionlock - versionlock.adv_versionlock - soc.license diff --git a/salt/manager/tools/sbin/so-minion b/salt/manager/tools/sbin/so-minion index 4095637c8..86bab25e6 100755 --- a/salt/manager/tools/sbin/so-minion +++ b/salt/manager/tools/sbin/so-minion @@ -281,22 +281,18 @@ function deleteMinionFiles () { fi } -# Remove this minion's postgres Telegraf credential from both the aggregate -# pillar and the postgres database. Paired with add_telegraf_to_minion: -# add/delete cycle both here and in the DB. Always returns 0 so a dead or -# unreachable so-postgres doesn't block minion deletion — in that case we +# Remove this minion's postgres Telegraf credential from the shared creds +# pillar and drop the matching role in Postgres. Always returns 0 so a dead +# or unreachable so-postgres doesn't block minion deletion — in that case we # log a warning and leave the role behind for manual cleanup. function remove_postgres_telegraf_from_minion() { local MINION_SAFE MINION_SAFE=$(echo "$MINION_ID" | tr '.-' '__' | tr '[:upper:]' '[:lower:]') local PG_USER="so_telegraf_${MINION_SAFE}" - local AGGREGATE=/opt/so/saltstack/local/pillar/postgres/auth.sls log "INFO" "Removing postgres telegraf cred for $MINION_ID" - if [[ -f "$AGGREGATE" ]]; then - so-yaml.py remove "$AGGREGATE" "postgres.auth.users.telegraf_${MINION_SAFE}" >/dev/null 2>&1 || true - fi + so-telegraf-cred remove "$MINION_ID" >/dev/null 2>&1 || true if docker ps --format '{{.Names}}' 2>/dev/null | grep -q '^so-postgres$'; then if ! docker exec -i so-postgres psql -v ON_ERROR_STOP=1 -U postgres -d so_telegraf >/dev/null 2>&1 <.{user,pass} into the aggregate - # pillar so postgres.telegraf_users CREATE ROLE finds it. - # - # An existing password is reused if the aggregate already has one (re-add), - # so rerunning so-minion for the same minion keeps the cred stable. - local MINION_SAFE - MINION_SAFE=$(echo "$MINION_ID" | tr '.-' '__' | tr '[:upper:]' '[:lower:]') - local PG_USER="so_telegraf_${MINION_SAFE}" - local AGGREGATE=/opt/so/saltstack/local/pillar/postgres/auth.sls - local PG_PASS="" - if [[ -f "$AGGREGATE" ]]; then - PG_PASS=$(so-yaml.py get -r "$AGGREGATE" "postgres.auth.users.telegraf_${MINION_SAFE}.pass" 2>/dev/null || true) - fi - if [[ -z "$PG_PASS" ]]; then - PG_PASS=$(tr -dc 'A-Za-z0-9~!@#^&*()_=+[]|;:,.<>?-' < /dev/urandom | head -c 72) - fi - - so-yaml.py replace "$PILLARFILE" postgres.telegraf.user "$PG_USER" >/dev/null - so-yaml.py replace "$PILLARFILE" postgres.telegraf.pass "$PG_PASS" >/dev/null - if [[ -f "$AGGREGATE" ]]; then - so-yaml.py replace "$AGGREGATE" "postgres.auth.users.telegraf_${MINION_SAFE}.user" "$PG_USER" >/dev/null - so-yaml.py replace "$AGGREGATE" "postgres.auth.users.telegraf_${MINION_SAFE}.pass" "$PG_PASS" >/dev/null + # Provision the per-minion postgres Telegraf credential in the shared + # telegraf/creds.sls pillar. so-telegraf-cred is the only writer; it + # generates a password on first add and is a no-op on re-add so the cred + # is stable across repeated so-minion runs. postgres.telegraf_users on the + # manager creates/updates the DB role from the same pillar. + so-telegraf-cred add "$MINION_ID" + if [ $? -ne 0 ]; then + log "ERROR" "Failed to provision postgres telegraf cred for $MINION_ID" + return 1 fi } diff --git a/salt/manager/tools/sbin/so-telegraf-cred b/salt/manager/tools/sbin/so-telegraf-cred new file mode 100644 index 000000000..35ff7c438 --- /dev/null +++ b/salt/manager/tools/sbin/so-telegraf-cred @@ -0,0 +1,159 @@ +#!/usr/bin/env python3 + +# Copyright Security Onion Solutions LLC and/or licensed to Security Onion Solutions LLC under one +# or more contributor license agreements. Licensed under the Elastic License 2.0 as shown at +# https://securityonion.net/license; you may not use this file except in compliance with the +# Elastic License 2.0. + +""" +Single writer for the Telegraf Postgres credentials pillar. + +Maintains /opt/so/saltstack/local/pillar/telegraf/creds.sls with shape: + + telegraf: + postgres_creds: + : + user: so_telegraf_ + pass: "<72-char random>" + ... + +Called by so-minion on add/delete. PyYAML safe_dump preserves ambiguous +strings as quoted scalars, so passwords never round-trip through type +coercion (unlike so-yaml.py, which would). All mutations are serialized +by an flock on a sibling .creds.lock file. +""" + +import fcntl +import os +import pwd +import secrets +import string +import sys +import tempfile + +import yaml + +CREDS_PATH = "/opt/so/saltstack/local/pillar/telegraf/creds.sls" +LOCK_PATH = "/opt/so/saltstack/local/pillar/telegraf/.creds.lock" +OWNER_USER = "socore" +OWNER_GROUP = "socore" +FILE_MODE = 0o640 +PASSWORD_LEN = 72 +# Matches salt/postgres/auth.sls's DIGITS+LOWERCASE+UPPERCASE+SYMBOLS. +PASSWORD_CHARS = ( + string.digits + + string.ascii_lowercase + + string.ascii_uppercase + + "~!@#^&*()-_=+[]|;:,.<>?" +) + + +def safe_minion_id(minion_id): + return minion_id.replace(".", "_").replace("-", "_").lower() + + +def generate_password(): + return "".join(secrets.choice(PASSWORD_CHARS) for _ in range(PASSWORD_LEN)) + + +def load_creds(): + if not os.path.exists(CREDS_PATH): + return {"telegraf": {"postgres_creds": {}}} + with open(CREDS_PATH, "r") as f: + data = yaml.safe_load(f) or {} + if not isinstance(data, dict): + data = {} + data.setdefault("telegraf", {}) + if not isinstance(data["telegraf"], dict): + data["telegraf"] = {} + data["telegraf"].setdefault("postgres_creds", {}) + if not isinstance(data["telegraf"]["postgres_creds"], dict): + data["telegraf"]["postgres_creds"] = {} + return data + + +def atomic_write(data): + os.makedirs(os.path.dirname(CREDS_PATH), exist_ok=True) + fd, tmp_path = tempfile.mkstemp( + prefix=".creds.", suffix=".tmp", dir=os.path.dirname(CREDS_PATH) + ) + try: + with os.fdopen(fd, "w") as f: + yaml.safe_dump(data, f, default_flow_style=False, sort_keys=True) + f.flush() + os.fsync(f.fileno()) + os.chmod(tmp_path, FILE_MODE) + try: + pw = pwd.getpwnam(OWNER_USER) + os.chown(tmp_path, pw.pw_uid, pw.pw_gid) + except KeyError: + pass + os.rename(tmp_path, CREDS_PATH) + except Exception: + if os.path.exists(tmp_path): + os.unlink(tmp_path) + raise + + +def with_lock(fn): + os.makedirs(os.path.dirname(LOCK_PATH), exist_ok=True) + with open(LOCK_PATH, "a+") as lf: + fcntl.flock(lf.fileno(), fcntl.LOCK_EX) + try: + return fn() + finally: + fcntl.flock(lf.fileno(), fcntl.LOCK_UN) + + +def cmd_add(minion_id): + def go(): + data = load_creds() + creds = data["telegraf"]["postgres_creds"] + if minion_id in creds: + return 0 + safe = safe_minion_id(minion_id) + creds[minion_id] = { + "user": "so_telegraf_" + safe, + "pass": generate_password(), + } + atomic_write(data) + return 0 + + return with_lock(go) + + +def cmd_remove(minion_id): + def go(): + data = load_creds() + creds = data["telegraf"]["postgres_creds"] + if minion_id in creds: + del creds[minion_id] + atomic_write(data) + return 0 + + return with_lock(go) + + +def usage(): + print( + "Usage: so-telegraf-cred ", + file=sys.stderr, + ) + return 2 + + +def main(argv): + if len(argv) != 3: + return usage() + op, minion_id = argv[1], argv[2] + if not minion_id: + return usage() + if op == "add": + return cmd_add(minion_id) + if op == "remove": + return cmd_remove(minion_id) + return usage() + + +if __name__ == "__main__": + sys.exit(main(sys.argv)) diff --git a/salt/manager/tools/sbin/soup b/salt/manager/tools/sbin/soup index 1580e83dd..2c727c0f7 100755 --- a/salt/manager/tools/sbin/soup +++ b/salt/manager/tools/sbin/soup @@ -490,32 +490,16 @@ up_to_3.1.0() { post_to_3.1.0() { /usr/sbin/so-kibana-space-defaults - # One-time backfill for minions that existed before the postgres Telegraf - # feature shipped. postgres.auth's up_minions fallback loop generates any - # missing aggregate pillar entries; postgres.telegraf_users CREATEs the - # matching DB roles; then the bash loop below copies each minion's cred - # into its own pillar file. Going forward, so-minion owns add/delete for - # every new minion, so this backfill is only needed on the upgrade boundary. - echo "Provisioning Telegraf Postgres users for existing minions." - salt-call --local state.apply postgres.auth,postgres.telegraf_users queue=True || true - - AGGREGATE_PILLAR=/opt/so/saltstack/local/pillar/postgres/auth.sls - MINIONS_DIR=/opt/so/saltstack/local/pillar/minions - if [[ -f "$AGGREGATE_PILLAR" && -d "$MINIONS_DIR" ]]; then - for pillar_file in "$MINIONS_DIR"/*.sls; do - [[ -f "$pillar_file" ]] || continue - mid=$(basename "$pillar_file" .sls) - [[ "$mid" == adv_* ]] && continue - safe=$(echo "$mid" | tr '.-' '__' | tr '[:upper:]' '[:lower:]') - existing_user=$(so-yaml.py get -r "$pillar_file" postgres.telegraf.user 2>/dev/null || true) - [[ "$existing_user" == "so_telegraf_${safe}" ]] && continue - user=$(so-yaml.py get -r "$AGGREGATE_PILLAR" "postgres.auth.users.telegraf_${safe}.user" 2>/dev/null || true) - pass=$(so-yaml.py get -r "$AGGREGATE_PILLAR" "postgres.auth.users.telegraf_${safe}.pass" 2>/dev/null || true) - [[ -z "$user" || -z "$pass" ]] && continue - so-yaml.py replace "$pillar_file" postgres.telegraf.user "$user" >/dev/null - so-yaml.py replace "$pillar_file" postgres.telegraf.pass "$pass" >/dev/null - done - fi + # Backfill the Telegraf creds pillar for every accepted minion. so-telegraf-cred + # add is idempotent — it no-ops when an entry already exists — so this is safe + # to run on every soup. The subsequent state.apply creates/updates the matching + # Postgres roles from the reconciled pillar. + echo "Reconciling Telegraf Postgres creds for accepted minions." + for mid in $(salt-key --out=json --list=accepted 2>/dev/null | jq -r '.minions[]?' 2>/dev/null); do + [[ -n "$mid" ]] || continue + /usr/sbin/so-telegraf-cred add "$mid" || echo " warning: so-telegraf-cred add $mid failed" >&2 + done + salt-call --local state.apply postgres.telegraf_users queue=True || true POSTVERSION=3.1.0 } diff --git a/salt/orch/deploy_newnode.sls b/salt/orch/deploy_newnode.sls index c05a812a3..ee241ef33 100644 --- a/salt/orch/deploy_newnode.sls +++ b/salt/orch/deploy_newnode.sls @@ -25,8 +25,33 @@ manager_run_es_soc: - salt: {{NEWNODE}}_update_mine {% endif %} +# so-minion has already added the new minion's entry to telegraf/creds.sls +# via so-telegraf-cred before this orch fires. Reconcile the Postgres role +# on the manager so the new minion can authenticate on its first highstate, +# then refresh the minion's pillar so its telegraf.conf renders with the +# freshly-written cred. +manager_create_postgres_telegraf_role: + salt.state: + - tgt: {{ MANAGER }} + - sls: + - postgres.telegraf_users + - queue: True + - require: + - salt: {{NEWNODE}}_update_mine + +{{NEWNODE}}_refresh_pillar: + salt.function: + - name: saltutil.refresh_pillar + - tgt: {{ NEWNODE }} + - kwarg: + wait: True + - require: + - salt: manager_create_postgres_telegraf_role + {{NEWNODE}}_run_highstate: salt.state: - tgt: {{ NEWNODE }} - highstate: True - queue: True + - require: + - salt: {{NEWNODE}}_refresh_pillar diff --git a/salt/postgres/auth.sls b/salt/postgres/auth.sls index 3da1bcde0..4f486ff02 100644 --- a/salt/postgres/auth.sls +++ b/salt/postgres/auth.sls @@ -13,24 +13,8 @@ {% set CHARS = DIGITS~LOWERCASE~UPPERCASE~SYMBOLS %} {% set so_postgres_user_pass = salt['pillar.get']('postgres:auth:users:so_postgres_user:pass', salt['random.get_str'](72, chars=CHARS)) %} - {# Per-minion Telegraf Postgres credentials. Merge currently-up minions with any #} - {# previously-known entries in pillar so existing passwords persist across runs. #} - {% set existing = salt['pillar.get']('postgres:auth:users', {}) %} - {% set up_minions = salt['saltutil.runner']('manage.up') or [] %} - {% set telegraf_users = {} %} - {% for key, entry in existing.items() %} - {%- if key.startswith('telegraf_') and entry.get('user') and entry.get('pass') %} - {%- do telegraf_users.update({key: entry}) %} - {%- endif %} - {% endfor %} - {% for mid in up_minions %} - {%- set safe = mid | replace('.','_') | replace('-','_') | lower %} - {%- set key = 'telegraf_' ~ safe %} - {%- if key not in telegraf_users %} - {%- do telegraf_users.update({key: {'user': 'so_telegraf_' ~ safe, 'pass': salt['random.get_str'](72, chars=CHARS)}}) %} - {%- endif %} - {% endfor %} - +# Admin cred only. Per-minion Telegraf creds live in telegraf/creds.sls, +# managed by /usr/sbin/so-telegraf-cred (called from so-minion). postgres_auth_pillar: file.managed: - name: /opt/so/saltstack/local/pillar/postgres/auth.sls @@ -43,11 +27,6 @@ postgres_auth_pillar: so_postgres_user: user: so_postgres pass: "{{ so_postgres_user_pass }}" - {% for key, entry in telegraf_users.items() %} - {{ key }}: - user: {{ entry.user }} - pass: "{{ entry.pass }}" - {% endfor %} - show_changes: False {% else %} diff --git a/salt/postgres/telegraf_users.sls b/salt/postgres/telegraf_users.sls index dbbc0f03e..62490ea52 100644 --- a/salt/postgres/telegraf_users.sls +++ b/salt/postgres/telegraf_users.sls @@ -10,7 +10,7 @@ {# postgres_wait_ready below requires `docker_container: so-postgres`, which is declared in postgres.enabled. Include it here so state.apply postgres.telegraf_users - on its own (from the reactor orch or from soup) still has that ID in scope. Salt + on its own (e.g. from orch.deploy_newnode) still has that ID in scope. Salt de-duplicates the circular include. #} include: - postgres.enabled @@ -96,9 +96,9 @@ postgres_telegraf_group_role: - require: - cmd: postgres_create_telegraf_db -{% set users = salt['pillar.get']('postgres:auth:users', {}) %} -{% for key, entry in users.items() %} -{% if key.startswith('telegraf_') and entry.get('user') and entry.get('pass') %} +{% set creds = salt['pillar.get']('telegraf:postgres_creds', {}) %} +{% for mid, entry in creds.items() %} +{% if entry.get('user') and entry.get('pass') %} {% set u = entry.user %} {% set p = entry.pass | replace("'", "''") %} diff --git a/salt/telegraf/etc/telegraf.conf b/salt/telegraf/etc/telegraf.conf index 53b96e4ab..02d969ff3 100644 --- a/salt/telegraf/etc/telegraf.conf +++ b/salt/telegraf/etc/telegraf.conf @@ -10,12 +10,12 @@ {%- set LOGSTASH_ENABLED = LOGSTASH_MERGED.enabled %} {%- set TG_OUT = TELEGRAFMERGED.output | upper %} {%- set PG_HOST = GLOBALS.manager_ip %} -{#- Per-minion telegraf creds are written into the minion's own pillar file - (/opt/so/saltstack/local/pillar/minions/.sls) by postgres.auth on the - manager. Each minion only sees its own password — the aggregate map in - postgres:auth:users is manager-scoped. #} -{%- set PG_USER = salt['pillar.get']('postgres:telegraf:user', '') %} -{%- set PG_PASS = salt['pillar.get']('postgres:telegraf:pass', '') %} +{#- Per-minion telegraf creds live in the grid-wide telegraf/creds.sls pillar, + written by /usr/sbin/so-telegraf-cred on the manager. Each minion looks up + its own entry by grains.id. #} +{%- set PG_ENTRY = salt['pillar.get']('telegraf:postgres_creds:' ~ grains.id, {}) %} +{%- set PG_USER = PG_ENTRY.get('user', '') %} +{%- set PG_PASS = PG_ENTRY.get('pass', '') %} # Global tags can be specified here in key="value" format. [global_tags] role = "{{ GLOBALS.role.split('-') | last }}" From f240a99e227cb0f000b2a18317fd4a8bcbef2e34 Mon Sep 17 00:00:00 2001 From: Mike Reeves Date: Wed, 22 Apr 2026 11:09:53 -0400 Subject: [PATCH 085/110] so-telegraf-cred: thin bash wrapper around so-yaml.py MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Swap the ~150-line Python implementation for a 48-line bash script that delegates YAML mutation to so-yaml.py — the same helper so-minion and soup already use. Same semantics: seed the creds pillar on first use, idempotent add, silent remove. SO minion ids are dot-free by construction (setup/so-functions:1884 strips everything after the first '.'), so using the raw id as the so-yaml.py key path is safe. --- salt/manager/tools/sbin/so-telegraf-cred | 191 +++++------------------ 1 file changed, 43 insertions(+), 148 deletions(-) diff --git a/salt/manager/tools/sbin/so-telegraf-cred b/salt/manager/tools/sbin/so-telegraf-cred index 35ff7c438..b2b1ba030 100644 --- a/salt/manager/tools/sbin/so-telegraf-cred +++ b/salt/manager/tools/sbin/so-telegraf-cred @@ -1,159 +1,54 @@ -#!/usr/bin/env python3 +#!/bin/bash # Copyright Security Onion Solutions LLC and/or licensed to Security Onion Solutions LLC under one # or more contributor license agreements. Licensed under the Elastic License 2.0 as shown at # https://securityonion.net/license; you may not use this file except in compliance with the # Elastic License 2.0. -""" -Single writer for the Telegraf Postgres credentials pillar. +# Single writer for the Telegraf Postgres credentials pillar. Thin wrapper +# around so-yaml.py that generates a password on first add and no-ops on +# re-add so the cred is stable across repeated so-minion runs. +# +# Note: so-yaml.py splits keys on '.' with no escape. SO minion ids are +# dot-free by construction (setup/so-functions:1884 takes the short_name +# before the first '.'), so using the raw minion id as the key is safe. -Maintains /opt/so/saltstack/local/pillar/telegraf/creds.sls with shape: +CREDS=/opt/so/saltstack/local/pillar/telegraf/creds.sls - telegraf: - postgres_creds: - : - user: so_telegraf_ - pass: "<72-char random>" - ... +usage() { + echo "Usage: $0 " >&2 + exit 2 +} -Called by so-minion on add/delete. PyYAML safe_dump preserves ambiguous -strings as quoted scalars, so passwords never round-trip through type -coercion (unlike so-yaml.py, which would). All mutations are serialized -by an flock on a sibling .creds.lock file. -""" +seed_creds_file() { + mkdir -p "$(dirname "$CREDS")" + if [[ ! -f "$CREDS" ]]; then + (umask 027 && printf 'telegraf:\n postgres_creds: {}\n' > "$CREDS") + chown socore:socore "$CREDS" 2>/dev/null || true + chmod 640 "$CREDS" + fi +} -import fcntl -import os -import pwd -import secrets -import string -import sys -import tempfile +OP=$1 +MID=$2 +[[ -z "$OP" || -z "$MID" ]] && usage -import yaml - -CREDS_PATH = "/opt/so/saltstack/local/pillar/telegraf/creds.sls" -LOCK_PATH = "/opt/so/saltstack/local/pillar/telegraf/.creds.lock" -OWNER_USER = "socore" -OWNER_GROUP = "socore" -FILE_MODE = 0o640 -PASSWORD_LEN = 72 -# Matches salt/postgres/auth.sls's DIGITS+LOWERCASE+UPPERCASE+SYMBOLS. -PASSWORD_CHARS = ( - string.digits - + string.ascii_lowercase - + string.ascii_uppercase - + "~!@#^&*()-_=+[]|;:,.<>?" -) - - -def safe_minion_id(minion_id): - return minion_id.replace(".", "_").replace("-", "_").lower() - - -def generate_password(): - return "".join(secrets.choice(PASSWORD_CHARS) for _ in range(PASSWORD_LEN)) - - -def load_creds(): - if not os.path.exists(CREDS_PATH): - return {"telegraf": {"postgres_creds": {}}} - with open(CREDS_PATH, "r") as f: - data = yaml.safe_load(f) or {} - if not isinstance(data, dict): - data = {} - data.setdefault("telegraf", {}) - if not isinstance(data["telegraf"], dict): - data["telegraf"] = {} - data["telegraf"].setdefault("postgres_creds", {}) - if not isinstance(data["telegraf"]["postgres_creds"], dict): - data["telegraf"]["postgres_creds"] = {} - return data - - -def atomic_write(data): - os.makedirs(os.path.dirname(CREDS_PATH), exist_ok=True) - fd, tmp_path = tempfile.mkstemp( - prefix=".creds.", suffix=".tmp", dir=os.path.dirname(CREDS_PATH) - ) - try: - with os.fdopen(fd, "w") as f: - yaml.safe_dump(data, f, default_flow_style=False, sort_keys=True) - f.flush() - os.fsync(f.fileno()) - os.chmod(tmp_path, FILE_MODE) - try: - pw = pwd.getpwnam(OWNER_USER) - os.chown(tmp_path, pw.pw_uid, pw.pw_gid) - except KeyError: - pass - os.rename(tmp_path, CREDS_PATH) - except Exception: - if os.path.exists(tmp_path): - os.unlink(tmp_path) - raise - - -def with_lock(fn): - os.makedirs(os.path.dirname(LOCK_PATH), exist_ok=True) - with open(LOCK_PATH, "a+") as lf: - fcntl.flock(lf.fileno(), fcntl.LOCK_EX) - try: - return fn() - finally: - fcntl.flock(lf.fileno(), fcntl.LOCK_UN) - - -def cmd_add(minion_id): - def go(): - data = load_creds() - creds = data["telegraf"]["postgres_creds"] - if minion_id in creds: - return 0 - safe = safe_minion_id(minion_id) - creds[minion_id] = { - "user": "so_telegraf_" + safe, - "pass": generate_password(), - } - atomic_write(data) - return 0 - - return with_lock(go) - - -def cmd_remove(minion_id): - def go(): - data = load_creds() - creds = data["telegraf"]["postgres_creds"] - if minion_id in creds: - del creds[minion_id] - atomic_write(data) - return 0 - - return with_lock(go) - - -def usage(): - print( - "Usage: so-telegraf-cred ", - file=sys.stderr, - ) - return 2 - - -def main(argv): - if len(argv) != 3: - return usage() - op, minion_id = argv[1], argv[2] - if not minion_id: - return usage() - if op == "add": - return cmd_add(minion_id) - if op == "remove": - return cmd_remove(minion_id) - return usage() - - -if __name__ == "__main__": - sys.exit(main(sys.argv)) +case "$OP" in + add) + SAFE=$(echo "$MID" | tr '.-' '__' | tr '[:upper:]' '[:lower:]') + seed_creds_file + if so-yaml.py get -r "$CREDS" "telegraf.postgres_creds.${MID}.user" >/dev/null 2>&1; then + exit 0 + fi + PASS=$(tr -dc 'A-Za-z0-9~!@#^&*()_=+[]|;:,.<>?-' < /dev/urandom | head -c 72) + so-yaml.py replace "$CREDS" "telegraf.postgres_creds.${MID}.user" "so_telegraf_${SAFE}" >/dev/null + so-yaml.py replace "$CREDS" "telegraf.postgres_creds.${MID}.pass" "$PASS" >/dev/null + ;; + remove) + [[ -f "$CREDS" ]] || exit 0 + so-yaml.py remove "$CREDS" "telegraf.postgres_creds.${MID}" >/dev/null 2>&1 || true + ;; + *) + usage + ;; +esac From e616b4c1200804df5df98429b05b62b5366eda87 Mon Sep 17 00:00:00 2001 From: Mike Reeves Date: Wed, 22 Apr 2026 14:25:19 -0400 Subject: [PATCH 086/110] so-telegraf-cred: make executable and harden error handling so-telegraf-cred was committed with mode 644, causing `so-telegraf-cred add "$MINION_ID"` in so-minion's add_telegraf_to_minion to fail with "Permission denied" and log "Failed to provision postgres telegraf cred for ". Mark it executable. Also bail early in seed_creds_file if mkdir/printf/chmod fail, and in so-yaml.py loadYaml surface a clear stderr message with the filename instead of an unhandled FileNotFoundError traceback. --- salt/manager/tools/sbin/so-telegraf-cred | 8 ++++---- salt/manager/tools/sbin/so-yaml.py | 13 ++++++++++--- 2 files changed, 14 insertions(+), 7 deletions(-) mode change 100644 => 100755 salt/manager/tools/sbin/so-telegraf-cred diff --git a/salt/manager/tools/sbin/so-telegraf-cred b/salt/manager/tools/sbin/so-telegraf-cred old mode 100644 new mode 100755 index b2b1ba030..61d718499 --- a/salt/manager/tools/sbin/so-telegraf-cred +++ b/salt/manager/tools/sbin/so-telegraf-cred @@ -21,11 +21,11 @@ usage() { } seed_creds_file() { - mkdir -p "$(dirname "$CREDS")" + mkdir -p "$(dirname "$CREDS")" || return 1 if [[ ! -f "$CREDS" ]]; then - (umask 027 && printf 'telegraf:\n postgres_creds: {}\n' > "$CREDS") + (umask 027 && printf 'telegraf:\n postgres_creds: {}\n' > "$CREDS") || return 1 chown socore:socore "$CREDS" 2>/dev/null || true - chmod 640 "$CREDS" + chmod 640 "$CREDS" || return 1 fi } @@ -36,7 +36,7 @@ MID=$2 case "$OP" in add) SAFE=$(echo "$MID" | tr '.-' '__' | tr '[:upper:]' '[:lower:]') - seed_creds_file + seed_creds_file || exit 1 if so-yaml.py get -r "$CREDS" "telegraf.postgres_creds.${MID}.user" >/dev/null 2>&1; then exit 0 fi diff --git a/salt/manager/tools/sbin/so-yaml.py b/salt/manager/tools/sbin/so-yaml.py index 98d2bb8f9..d0d5209f9 100755 --- a/salt/manager/tools/sbin/so-yaml.py +++ b/salt/manager/tools/sbin/so-yaml.py @@ -39,9 +39,16 @@ def showUsage(args): def loadYaml(filename): - file = open(filename, "r") - content = file.read() - return yaml.safe_load(content) + try: + with open(filename, "r") as file: + content = file.read() + return yaml.safe_load(content) + except FileNotFoundError: + print(f"File not found: {filename}", file=sys.stderr) + sys.exit(1) + except Exception as e: + print(f"Error reading file {filename}: {e}", file=sys.stderr) + sys.exit(1) def writeYaml(filename, content): From d5c0ec4404ff9d93c867d2d9c86fc8c8deae8c19 Mon Sep 17 00:00:00 2001 From: Mike Reeves Date: Wed, 22 Apr 2026 14:30:51 -0400 Subject: [PATCH 087/110] so-yaml_test: cover loadYaml error paths Exercises the FileNotFoundError and generic-exception branches added to loadYaml in the previous commit, restoring 100% coverage required by the build. --- salt/manager/tools/sbin/so-yaml_test.py | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/salt/manager/tools/sbin/so-yaml_test.py b/salt/manager/tools/sbin/so-yaml_test.py index 2da8a0be9..56581f7e3 100644 --- a/salt/manager/tools/sbin/so-yaml_test.py +++ b/salt/manager/tools/sbin/so-yaml_test.py @@ -973,3 +973,21 @@ class TestReplaceListObject(unittest.TestCase): expected = "key1:\n- id: '1'\n status: updated\n- id: '2'\n status: inactive\n" self.assertEqual(actual, expected) + + +class TestLoadYaml(unittest.TestCase): + + def test_load_yaml_missing_file(self): + with patch('sys.exit', new=MagicMock()) as sysmock: + with patch('sys.stderr', new=StringIO()) as mock_stderr: + soyaml.loadYaml("/tmp/so-yaml_test-does-not-exist.yaml") + sysmock.assert_called_with(1) + self.assertIn("File not found:", mock_stderr.getvalue()) + + def test_load_yaml_read_error(self): + with patch('sys.exit', new=MagicMock()) as sysmock: + with patch('sys.stderr', new=StringIO()) as mock_stderr: + with patch('builtins.open', side_effect=PermissionError("denied")): + soyaml.loadYaml("/tmp/so-yaml_test-unreadable.yaml") + sysmock.assert_called_with(1) + self.assertIn("Error reading file", mock_stderr.getvalue()) From 339959d1c0ff1e37930b636199563f70a902424c Mon Sep 17 00:00:00 2001 From: reyesj2 <94730068+reyesj2@users.noreply.github.com> Date: Wed, 22 Apr 2026 20:30:40 -0500 Subject: [PATCH 088/110] split up elasticfleet/enabled state --- salt/elasticfleet/enabled.sls | 107 ++------------------------------ salt/elasticfleet/manager.sls | 111 ++++++++++++++++++++++++++++++++++ 2 files changed, 115 insertions(+), 103 deletions(-) create mode 100644 salt/elasticfleet/manager.sls diff --git a/salt/elasticfleet/enabled.sls b/salt/elasticfleet/enabled.sls index 89ba1f80a..9173f1d38 100644 --- a/salt/elasticfleet/enabled.sls +++ b/salt/elasticfleet/enabled.sls @@ -17,65 +17,17 @@ include: - logstash.ssl - elasticfleet.config - elasticfleet.sostatus +{%- if GLOBALS.role != "so-fleet" %} + - elasticfleet.manager +{%- endif %} -{% if grains.role not in ['so-fleet'] %} +{% if GLOBALS.role not in ['so-fleet'] %} # Wait for Elasticsearch to be ready - no reason to try running Elastic Fleet server if ES is not ready wait_for_elasticsearch_elasticfleet: cmd.run: - name: so-elasticsearch-wait -{% endif %} - -# If enabled, automatically update Fleet Logstash Outputs -{% if ELASTICFLEETMERGED.config.server.enable_auto_configuration and grains.role not in ['so-import', 'so-eval', 'so-fleet'] %} -so-elastic-fleet-auto-configure-logstash-outputs: - cmd.run: - - name: /usr/sbin/so-elastic-fleet-outputs-update - - retry: - attempts: 4 - interval: 30 - -{# Separate from above in order to catch elasticfleet-logstash.crt changes and force update to fleet output policy #} -so-elastic-fleet-auto-configure-logstash-outputs-force: - cmd.run: - - name: /usr/sbin/so-elastic-fleet-outputs-update --certs - - retry: - attempts: 4 - interval: 30 - - onchanges: - - x509: etc_elasticfleet_logstash_crt - - x509: elasticfleet_kafka_crt -{% endif %} - -# If enabled, automatically update Fleet Server URLs & ES Connection -{% if ELASTICFLEETMERGED.config.server.enable_auto_configuration and grains.role not in ['so-fleet'] %} -so-elastic-fleet-auto-configure-server-urls: - cmd.run: - - name: /usr/sbin/so-elastic-fleet-urls-update - - retry: - attempts: 4 - interval: 30 -{% endif %} - -# Automatically update Fleet Server Elasticsearch URLs & Agent Artifact URLs -{% if grains.role not in ['so-fleet'] %} -so-elastic-fleet-auto-configure-elasticsearch-urls: - cmd.run: - - name: /usr/sbin/so-elastic-fleet-es-url-update - - retry: - attempts: 4 - interval: 30 - -so-elastic-fleet-auto-configure-artifact-urls: - cmd.run: - - name: /usr/sbin/so-elastic-fleet-artifacts-url-update - - retry: - attempts: 4 - interval: 30 - -{% endif %} # Sync Elastic Agent artifacts to Fleet Node -{% if grains.role in ['so-fleet'] %} elasticagent_syncartifacts: file.recurse: - name: /nsm/elastic-fleet/artifacts/beats @@ -149,57 +101,6 @@ so-elastic-fleet: - x509: etc_elasticfleet_crt {% endif %} -{% if GLOBALS.role != "so-fleet" %} -so-elastic-fleet-package-statefile: - file.managed: - - name: /opt/so/state/elastic_fleet_packages.txt - - contents: {{ELASTICFLEETMERGED.packages}} - -so-elastic-fleet-package-upgrade: - cmd.run: - - name: /usr/sbin/so-elastic-fleet-package-upgrade - - retry: - attempts: 3 - interval: 10 - - onchanges: - - file: /opt/so/state/elastic_fleet_packages.txt - -so-elastic-fleet-integrations: - cmd.run: - - name: /usr/sbin/so-elastic-fleet-integration-policy-load - - retry: - attempts: 3 - interval: 10 - -so-elastic-agent-grid-upgrade: - cmd.run: - - name: /usr/sbin/so-elastic-agent-grid-upgrade - - retry: - attempts: 12 - interval: 5 - -so-elastic-fleet-integration-upgrade: - cmd.run: - - name: /usr/sbin/so-elastic-fleet-integration-upgrade - - retry: - attempts: 3 - interval: 10 - -{# Optional integrations script doesn't need the retries like so-elastic-fleet-integration-upgrade which loads the default integrations #} -so-elastic-fleet-addon-integrations: - cmd.run: - - name: /usr/sbin/so-elastic-fleet-optional-integrations-load - -{% if ELASTICFLEETMERGED.config.defend_filters.enable_auto_configuration %} -so-elastic-defend-manage-filters-file-watch: - cmd.run: - - name: python3 /sbin/so-elastic-defend-manage-filters.py -c /opt/so/conf/elasticsearch/curl.config -d /opt/so/conf/elastic-fleet/defend-exclusions/disabled-filters.yaml -i /nsm/securityonion-resources/event_filters/ -i /opt/so/conf/elastic-fleet/defend-exclusions/rulesets/custom-filters/ &>> /opt/so/log/elasticfleet/elastic-defend-manage-filters.log - - onchanges: - - file: elasticdefendcustom - - file: elasticdefenddisabled -{% endif %} -{% endif %} - delete_so-elastic-fleet_so-status.disabled: file.uncomment: - name: /opt/so/conf/so-status/so-status.conf diff --git a/salt/elasticfleet/manager.sls b/salt/elasticfleet/manager.sls new file mode 100644 index 000000000..7c57c1ece --- /dev/null +++ b/salt/elasticfleet/manager.sls @@ -0,0 +1,111 @@ +# Copyright Security Onion Solutions LLC and/or licensed to Security Onion Solutions LLC under one +# or more contributor license agreements. Licensed under the Elastic License 2.0 as shown at +# https://securityonion.net/license; you may not use this file except in compliance with the +# Elastic License 2.0. + +{% from 'allowed_states.map.jinja' import allowed_states %} +{% if sls.split('.')[0] in allowed_states %} +{% from 'vars/globals.map.jinja' import GLOBALS %} +{% from 'docker/docker.map.jinja' import DOCKERMERGED %} +{% from 'elasticfleet/map.jinja' import ELASTICFLEETMERGED %} + +# If enabled, automatically update Fleet Logstash Outputs +{% if ELASTICFLEETMERGED.config.server.enable_auto_configuration and grains.role not in ['so-import', 'so-eval'] %} +so-elastic-fleet-auto-configure-logstash-outputs: + cmd.run: + - name: /usr/sbin/so-elastic-fleet-outputs-update + - retry: + attempts: 4 + interval: 30 + +{# Separate from above in order to catch elasticfleet-logstash.crt changes and force update to fleet output policy #} +so-elastic-fleet-auto-configure-logstash-outputs-force: + cmd.run: + - name: /usr/sbin/so-elastic-fleet-outputs-update --certs + - retry: + attempts: 4 + interval: 30 + - onchanges: + - x509: etc_elasticfleet_logstash_crt + - x509: elasticfleet_kafka_crt +{% endif %} + +# If enabled, automatically update Fleet Server URLs & ES Connection +so-elastic-fleet-auto-configure-server-urls: + cmd.run: + - name: /usr/sbin/so-elastic-fleet-urls-update + - retry: + attempts: 4 + interval: 30 + +# Automatically update Fleet Server Elasticsearch URLs & Agent Artifact URLs +so-elastic-fleet-auto-configure-elasticsearch-urls: + cmd.run: + - name: /usr/sbin/so-elastic-fleet-es-url-update + - retry: + attempts: 4 + interval: 30 + +so-elastic-fleet-auto-configure-artifact-urls: + cmd.run: + - name: /usr/sbin/so-elastic-fleet-artifacts-url-update + - retry: + attempts: 4 + interval: 30 + +so-elastic-fleet-package-statefile: + file.managed: + - name: /opt/so/state/elastic_fleet_packages.txt + - contents: {{ELASTICFLEETMERGED.packages}} + +so-elastic-fleet-package-upgrade: + cmd.run: + - name: /usr/sbin/so-elastic-fleet-package-upgrade + - retry: + attempts: 3 + interval: 10 + - onchanges: + - file: /opt/so/state/elastic_fleet_packages.txt + +so-elastic-fleet-integrations: + cmd.run: + - name: /usr/sbin/so-elastic-fleet-integration-policy-load + - retry: + attempts: 3 + interval: 10 + +so-elastic-agent-grid-upgrade: + cmd.run: + - name: /usr/sbin/so-elastic-agent-grid-upgrade + - retry: + attempts: 12 + interval: 5 + +so-elastic-fleet-integration-upgrade: + cmd.run: + - name: /usr/sbin/so-elastic-fleet-integration-upgrade + - retry: + attempts: 3 + interval: 10 + +{# Optional integrations script doesn't need the retries like so-elastic-fleet-integration-upgrade which loads the default integrations #} +so-elastic-fleet-addon-integrations: + cmd.run: + - name: /usr/sbin/so-elastic-fleet-optional-integrations-load + +{% if ELASTICFLEETMERGED.config.defend_filters.enable_auto_configuration %} +so-elastic-defend-manage-filters-file-watch: + cmd.run: + - name: python3 /sbin/so-elastic-defend-manage-filters.py -c /opt/so/conf/elasticsearch/curl.config -d /opt/so/conf/elastic-fleet/defend-exclusions/disabled-filters.yaml -i /nsm/securityonion-resources/event_filters/ -i /opt/so/conf/elastic-fleet/defend-exclusions/rulesets/custom-filters/ &>> /opt/so/log/elasticfleet/elastic-defend-manage-filters.log + - onchanges: + - file: elasticdefendcustom + - file: elasticdefenddisabled +{% endif %} + +{% else %} + +{{sls}}_state_not_allowed: + test.fail_without_changes: + - name: {{sls}}_state_not_allowed + +{% endif %} From 72dbb69a1cd7756d04fb77eb47b98431f0851a35 Mon Sep 17 00:00:00 2001 From: reyesj2 <94730068+reyesj2@users.noreply.github.com> Date: Wed, 22 Apr 2026 20:37:48 -0500 Subject: [PATCH 089/110] fix searchnodes running elasticsearch/cluster state --- salt/elasticsearch/enabled.sls | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/salt/elasticsearch/enabled.sls b/salt/elasticsearch/enabled.sls index ab12b875e..77088d649 100644 --- a/salt/elasticsearch/enabled.sls +++ b/salt/elasticsearch/enabled.sls @@ -17,7 +17,7 @@ include: - elasticsearch.ssl - elasticsearch.config - elasticsearch.sostatus -{%- if GLOBALS.role != 'so-searchode' %} +{%- if GLOBALS.role != "so-searchode" %} - elasticsearch.cluster {%- endif%} @@ -102,11 +102,6 @@ so-elasticsearch: - cmd: auth_users_roles_inode - cmd: auth_users_inode -delete_so-elasticsearch_so-status.disabled: - file.uncomment: - - name: /opt/so/conf/so-status/so-status.conf - - regex: ^so-elasticsearch$ - wait_for_so-elasticsearch: http.wait_for_successful_query: - name: "https://localhost:9200/" @@ -117,10 +112,14 @@ wait_for_so-elasticsearch: - status: 200 - wait_for: 300 - request_interval: 15 - - backend: requests - require: - docker_container: so-elasticsearch +delete_so-elasticsearch_so-status.disabled: + file.uncomment: + - name: /opt/so/conf/so-status/so-status.conf + - regex: ^so-elasticsearch$ + {% else %} {{sls}}_state_not_allowed: From 398bc9e4ede37ce5749c12c17a81d3dfee9f43c4 Mon Sep 17 00:00:00 2001 From: reyesj2 <94730068+reyesj2@users.noreply.github.com> Date: Wed, 22 Apr 2026 20:38:13 -0500 Subject: [PATCH 090/110] update kibana discardCorruptObjects version --- salt/kibana/defaults.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/salt/kibana/defaults.yaml b/salt/kibana/defaults.yaml index 580891973..ecf56756b 100644 --- a/salt/kibana/defaults.yaml +++ b/salt/kibana/defaults.yaml @@ -22,7 +22,7 @@ kibana: - default - file migrations: - discardCorruptObjects: "8.18.8" + discardCorruptObjects: "9.3.3" telemetry: enabled: False xpack: From 22f869734ed8693bcddb10f7b03018770db10cf1 Mon Sep 17 00:00:00 2001 From: reyesj2 <94730068+reyesj2@users.noreply.github.com> Date: Wed, 22 Apr 2026 23:11:31 -0500 Subject: [PATCH 091/110] add check for files before attempting to use file pattern to load templates --- .../sbin/so-elasticsearch-templates-load | 49 ++++++++++++++----- 1 file changed, 36 insertions(+), 13 deletions(-) diff --git a/salt/elasticsearch/tools/sbin/so-elasticsearch-templates-load b/salt/elasticsearch/tools/sbin/so-elasticsearch-templates-load index 840639a32..a0ebd66e8 100755 --- a/salt/elasticsearch/tools/sbin/so-elasticsearch-templates-load +++ b/salt/elasticsearch/tools/sbin/so-elasticsearch-templates-load @@ -103,11 +103,13 @@ load_component_templates() { local pattern="${ELASTICSEARCH_TEMPLATES_DIR}/component/$2" local append_mappings="${3:-"false"}" - # current state of nullglob shell option - shopt -q nullglob && nullglob_set=1 || nullglob_set=0 - - shopt -s nullglob echo -e "\nLoading $printed_name component templates...\n" + + if ! compgen -G "${pattern}/*.json" > /dev/null; then + echo "No $printed_name component templates found in ${pattern}, skipping." + return + fi + for component in "$pattern"/*.json; do tmpl_name=$(basename "${component%.json}") @@ -121,11 +123,6 @@ load_component_templates() { SO_LOAD_FAILURES_NAMES+=("$component") fi done - - # restore nullglob shell option if needed - if [[ $nullglob_set -eq 1 ]]; then - shopt -u nullglob - fi } check_elasticsearch_responsive() { @@ -136,7 +133,32 @@ check_elasticsearch_responsive() { fail "Elasticsearch is not responding. Please review Elasticsearch logs /opt/so/log/elasticsearch/securityonion.log for more details. Additionally, consider running so-elasticsearch-troubleshoot." } -if [[ "$FORCE" == "true" || ! -f "$SO_STATEFILE_SUCCESS" ]]; then +index_templates_exist() { + local templates_dir="$1" + + if [[ ! -d "$templates_dir" ]]; then + return 1 + fi + + compgen -G "${templates_dir}/*.json" > /dev/null +} + +should_load_addon_templates() { + if [[ "$IS_HEAVYNODE" == "true" ]]; then + return 1 + fi + + # Skip statefile checks when forcing template load + if [[ "$FORCE" != "true" ]]; then + if [[ ! -f "$SO_STATEFILE_SUCCESS" || -f "$ADDON_STATEFILE_SUCCESS" ]]; then + return 1 + fi + fi + + index_templates_exist "$ADDON_TEMPLATES_DIR" +} + +if [[ "$FORCE" == "true" || ! -f "$SO_STATEFILE_SUCCESS" ]] && index_templates_exist "$SO_TEMPLATES_DIR"; then check_elasticsearch_responsive if [[ "$IS_HEAVYNODE" == "false" ]]; then @@ -201,13 +223,14 @@ if [[ "$FORCE" == "true" || ! -f "$SO_STATEFILE_SUCCESS" ]]; then fail "Failed to load all Security Onion core templates successfully." fi fi -else - +elif ! index_templates_exist "$SO_TEMPLATES_DIR"; then + echo "No Security Onion core index templates found in ${SO_TEMPLATES_DIR}, skipping." +elif [[ -f "$SO_STATEFILE_SUCCESS" ]]; then echo "Security Onion core templates already loaded" fi # Start loading addon templates -if [[ (-d "$ADDON_TEMPLATES_DIR" && -f "$SO_STATEFILE_SUCCESS" && "$IS_HEAVYNODE" == "false" && ! -f "$ADDON_STATEFILE_SUCCESS") || (-d "$ADDON_TEMPLATES_DIR" && "$IS_HEAVYNODE" == "false" && "$FORCE" == "true") ]]; then +if should_load_addon_templates; then check_elasticsearch_responsive From 22b32a16ddb72c28ca60edea1dd113f258245d81 Mon Sep 17 00:00:00 2001 From: reyesj2 <94730068+reyesj2@users.noreply.github.com> Date: Thu, 23 Apr 2026 08:30:47 -0500 Subject: [PATCH 092/110] include elasticfleet.config --- salt/elasticfleet/enabled.sls | 2 +- salt/elasticfleet/manager.sls | 3 +++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/salt/elasticfleet/enabled.sls b/salt/elasticfleet/enabled.sls index 9173f1d38..cb189f9a9 100644 --- a/salt/elasticfleet/enabled.sls +++ b/salt/elasticfleet/enabled.sls @@ -21,7 +21,7 @@ include: - elasticfleet.manager {%- endif %} -{% if GLOBALS.role not in ['so-fleet'] %} +{% if GLOBALS.role != "so-fleet" %} # Wait for Elasticsearch to be ready - no reason to try running Elastic Fleet server if ES is not ready wait_for_elasticsearch_elasticfleet: cmd.run: diff --git a/salt/elasticfleet/manager.sls b/salt/elasticfleet/manager.sls index 7c57c1ece..72f53be79 100644 --- a/salt/elasticfleet/manager.sls +++ b/salt/elasticfleet/manager.sls @@ -9,6 +9,9 @@ {% from 'docker/docker.map.jinja' import DOCKERMERGED %} {% from 'elasticfleet/map.jinja' import ELASTICFLEETMERGED %} +include: + - elasticfleet.config + # If enabled, automatically update Fleet Logstash Outputs {% if ELASTICFLEETMERGED.config.server.enable_auto_configuration and grains.role not in ['so-import', 'so-eval'] %} so-elastic-fleet-auto-configure-logstash-outputs: From eadad6c163aaad3312d2d4387ae16b6c86e0da9e Mon Sep 17 00:00:00 2001 From: Mike Reeves Date: Thu, 23 Apr 2026 10:01:38 -0400 Subject: [PATCH 093/110] soup: bootstrap postgres pillar stubs and secret on 3.0.0 upgrade pillar/top.sls now references postgres.soc_postgres / postgres.adv_postgres unconditionally, but make_some_dirs only runs at install time so managers upgrading from 3.0.0 have no local/pillar/postgres/ and salt-master fails pillar render on the first post-upgrade restart. Similarly, secrets_pillar is a no-op on upgrade (secrets.sls already exists), so secrets:postgres_pass never gets seeded and the postgres container's POSTGRES_PASSWORD_FILE and SOC's PG_ADMIN_PASS would land empty after highstate. Add ensure_postgres_local_pillar and ensure_postgres_secret to up_to_3.1.0 so the stubs and secret exist before masterlock/salt-master restart. Both are idempotent and safe to re-run. --- salt/manager/tools/sbin/soup | 37 ++++++++++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) diff --git a/salt/manager/tools/sbin/soup b/salt/manager/tools/sbin/soup index 2c727c0f7..d54af4a13 100755 --- a/salt/manager/tools/sbin/soup +++ b/salt/manager/tools/sbin/soup @@ -477,7 +477,44 @@ elasticsearch_backup_index_templates() { tar -czf /nsm/backup/3.0.0_elasticsearch_index_templates.tar.gz -C /opt/so/conf/elasticsearch/templates/index/ . } +ensure_postgres_local_pillar() { + # Postgres was added as a service after 3.0.0, so the new pillar/top.sls + # references postgres.soc_postgres / postgres.adv_postgres unconditionally. + # Managers upgrading from 3.0.0 have no /opt/so/saltstack/local/pillar/postgres/ + # (make_some_dirs only runs at install time), so the stubs must be created + # here before salt-master restarts against the new top.sls. + echo "Ensuring postgres local pillar stubs exist." + local dir=/opt/so/saltstack/local/pillar/postgres + mkdir -p "$dir" + [[ -f "$dir/soc_postgres.sls" ]] || touch "$dir/soc_postgres.sls" + [[ -f "$dir/adv_postgres.sls" ]] || touch "$dir/adv_postgres.sls" + chown -R socore:socore "$dir" +} + +ensure_postgres_secret() { + # On a fresh install, generate_passwords + secrets_pillar seed + # secrets:postgres_pass in /opt/so/saltstack/local/pillar/secrets.sls. That + # code path is skipped on upgrade (secrets.sls already exists from 3.0.0 + # with import_pass/influx_pass but no postgres_pass), so the postgres + # container's POSTGRES_PASSWORD_FILE and SOC's PG_ADMIN_PASS would be empty + # after highstate. Generate one now if missing. + local secrets_file=/opt/so/saltstack/local/pillar/secrets.sls + if [[ ! -f "$secrets_file" ]]; then + echo "WARNING: $secrets_file missing; skipping postgres_pass backfill." + return 0 + fi + if so-yaml.py get -r "$secrets_file" secrets.postgres_pass >/dev/null 2>&1; then + echo "secrets.postgres_pass already set; leaving as-is." + return 0 + fi + echo "Seeding secrets.postgres_pass in $secrets_file." + so-yaml.py add "$secrets_file" secrets.postgres_pass "$(get_random_value)" + chown socore:socore "$secrets_file" +} + up_to_3.1.0() { + ensure_postgres_local_pillar + ensure_postgres_secret determine_elastic_agent_upgrade elasticsearch_backup_index_templates # Clear existing component template state file. From 7e70870a9ef59896bba6a1f699fae0225e2bb7cf Mon Sep 17 00:00:00 2001 From: reyesj2 <94730068+reyesj2@users.noreply.github.com> Date: Thu, 23 Apr 2026 09:25:36 -0500 Subject: [PATCH 094/110] remove globals import --- salt/elasticfleet/manager.sls | 1 - 1 file changed, 1 deletion(-) diff --git a/salt/elasticfleet/manager.sls b/salt/elasticfleet/manager.sls index 72f53be79..856a9fdd6 100644 --- a/salt/elasticfleet/manager.sls +++ b/salt/elasticfleet/manager.sls @@ -5,7 +5,6 @@ {% from 'allowed_states.map.jinja' import allowed_states %} {% if sls.split('.')[0] in allowed_states %} -{% from 'vars/globals.map.jinja' import GLOBALS %} {% from 'docker/docker.map.jinja' import DOCKERMERGED %} {% from 'elasticfleet/map.jinja' import ELASTICFLEETMERGED %} From b015c8ff146e66fe1b840773cb481a192204c930 Mon Sep 17 00:00:00 2001 From: reyesj2 <94730068+reyesj2@users.noreply.github.com> Date: Thu, 23 Apr 2026 09:31:30 -0500 Subject: [PATCH 095/110] remove docker import --- salt/elasticfleet/manager.sls | 1 - 1 file changed, 1 deletion(-) diff --git a/salt/elasticfleet/manager.sls b/salt/elasticfleet/manager.sls index 856a9fdd6..9fbbff3bc 100644 --- a/salt/elasticfleet/manager.sls +++ b/salt/elasticfleet/manager.sls @@ -5,7 +5,6 @@ {% from 'allowed_states.map.jinja' import allowed_states %} {% if sls.split('.')[0] in allowed_states %} -{% from 'docker/docker.map.jinja' import DOCKERMERGED %} {% from 'elasticfleet/map.jinja' import ELASTICFLEETMERGED %} include: From 5f2ec76ba8a7c163e905a07aaa23b23a9221dbba Mon Sep 17 00:00:00 2001 From: reyesj2 <94730068+reyesj2@users.noreply.github.com> Date: Thu, 23 Apr 2026 09:50:45 -0500 Subject: [PATCH 096/110] prevent fleetnode from being able to run elasticfleet.manager state manually --- salt/allowed_states.map.jinja | 1 + salt/elasticfleet/manager.sls | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/salt/allowed_states.map.jinja b/salt/allowed_states.map.jinja index 1fac0f0e3..ad9b28b28 100644 --- a/salt/allowed_states.map.jinja +++ b/salt/allowed_states.map.jinja @@ -33,6 +33,7 @@ 'kratos', 'hydra', 'elasticfleet', + 'elasticfleet.manager', 'elastic-fleet-package-registry', 'utility' ] %} diff --git a/salt/elasticfleet/manager.sls b/salt/elasticfleet/manager.sls index 9fbbff3bc..00fead9cf 100644 --- a/salt/elasticfleet/manager.sls +++ b/salt/elasticfleet/manager.sls @@ -4,7 +4,7 @@ # Elastic License 2.0. {% from 'allowed_states.map.jinja' import allowed_states %} -{% if sls.split('.')[0] in allowed_states %} +{% if sls in allowed_states %} {% from 'elasticfleet/map.jinja' import ELASTICFLEETMERGED %} include: From fdfca469cc823ccff1c0a1f9658a12d9a920a202 Mon Sep 17 00:00:00 2001 From: reyesj2 <94730068+reyesj2@users.noreply.github.com> Date: Thu, 23 Apr 2026 09:53:07 -0500 Subject: [PATCH 097/110] prevent non-manager nodes from running elasticsearch.cluster state manually --- salt/allowed_states.map.jinja | 1 + salt/elasticsearch/cluster.sls | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/salt/allowed_states.map.jinja b/salt/allowed_states.map.jinja index ad9b28b28..3a6aa2f6a 100644 --- a/salt/allowed_states.map.jinja +++ b/salt/allowed_states.map.jinja @@ -34,6 +34,7 @@ 'hydra', 'elasticfleet', 'elasticfleet.manager', + 'elasticsearch.cluster', 'elastic-fleet-package-registry', 'utility' ] %} diff --git a/salt/elasticsearch/cluster.sls b/salt/elasticsearch/cluster.sls index 7a8a6675c..e25aed36a 100644 --- a/salt/elasticsearch/cluster.sls +++ b/salt/elasticsearch/cluster.sls @@ -4,7 +4,7 @@ # Elastic License 2.0. {% from 'allowed_states.map.jinja' import allowed_states %} -{% if sls.split('.')[0] in allowed_states %} +{% if sls in allowed_states %} {% from 'vars/globals.map.jinja' import GLOBALS %} {% from 'elasticsearch/config.map.jinja' import ELASTICSEARCHMERGED %} {% from 'elasticsearch/template.map.jinja' import ES_INDEX_SETTINGS, SO_MANAGED_INDICES %} From 0ecc7ae594df84082d574e54ac93a765086d995d Mon Sep 17 00:00:00 2001 From: Mike Reeves Date: Thu, 23 Apr 2026 11:25:44 -0400 Subject: [PATCH 098/110] soup: drop --local from postgres.telegraf_users reconcile The manager's /etc/salt/minion (written by so-functions:configure_minion) has no file_roots, so salt-call --local falls back to Salt's default /srv/salt and fails with "No matching sls found for 'postgres.telegraf_users' in env 'base'". || true was silently swallowing the error, which meant the DB roles for the pillar entries just populated by the so-telegraf-cred backfill loop never actually got created. Route through salt-master instead; its file_roots already points at the default/local salt trees. --- salt/manager/tools/sbin/soup | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/salt/manager/tools/sbin/soup b/salt/manager/tools/sbin/soup index d54af4a13..440ef47bc 100755 --- a/salt/manager/tools/sbin/soup +++ b/salt/manager/tools/sbin/soup @@ -536,7 +536,10 @@ post_to_3.1.0() { [[ -n "$mid" ]] || continue /usr/sbin/so-telegraf-cred add "$mid" || echo " warning: so-telegraf-cred add $mid failed" >&2 done - salt-call --local state.apply postgres.telegraf_users queue=True || true + # Run through the master (not --local) so state compilation uses the + # master's configured file_roots; the manager's /etc/salt/minion has no + # file_roots of its own and --local would fail with "No matching sls found". + salt-call state.apply postgres.telegraf_users queue=True || true POSTVERSION=3.1.0 } From a6948e8dcbf988be752882e2f83287913fc2f696 Mon Sep 17 00:00:00 2001 From: Mike Reeves Date: Thu, 23 Apr 2026 13:56:41 -0400 Subject: [PATCH 099/110] Remove helpLink for influxdb in soc_global.yaml Removed helpLink for influxdb from endgamehost configuration. --- salt/global/soc_global.yaml | 1 - 1 file changed, 1 deletion(-) diff --git a/salt/global/soc_global.yaml b/salt/global/soc_global.yaml index 31d9f8d3b..c15f3eb98 100644 --- a/salt/global/soc_global.yaml +++ b/salt/global/soc_global.yaml @@ -59,5 +59,4 @@ global: description: Allows use of Endgame with Security Onion. This feature requires a license from Endgame. global: True advanced: True - helpLink: influxdb From cdd217283d5f759ff3015bcb8641d491abc1510d Mon Sep 17 00:00:00 2001 From: Jason Ertel Date: Fri, 24 Apr 2026 08:13:36 -0400 Subject: [PATCH 100/110] numeric test description --- setup/so-setup | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup/so-setup b/setup/so-setup index 46b11fc11..7875b9c99 100755 --- a/setup/so-setup +++ b/setup/so-setup @@ -219,7 +219,7 @@ if [ -n "$test_profile" ]; then WEBUSER=onionuser@somewhere.invalid WEBPASSWD1=0n10nus3r WEBPASSWD2=0n10nus3r - NODE_DESCRIPTION="${HOSTNAME} - ${install_type} - ${MAINIP}" + NODE_DESCRIPTION="${HOSTNAME} - ${install_type} - ${MSRVIP_OFFSET}" update_sudoers_for_testing fi From b6acf3b52265df31bd6a8f1d2b81cef8996b8963 Mon Sep 17 00:00:00 2001 From: reyesj2 <94730068+reyesj2@users.noreply.github.com> Date: Fri, 24 Apr 2026 09:24:58 -0500 Subject: [PATCH 101/110] typo --- salt/elasticsearch/enabled.sls | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/salt/elasticsearch/enabled.sls b/salt/elasticsearch/enabled.sls index 77088d649..66d397b39 100644 --- a/salt/elasticsearch/enabled.sls +++ b/salt/elasticsearch/enabled.sls @@ -17,7 +17,7 @@ include: - elasticsearch.ssl - elasticsearch.config - elasticsearch.sostatus -{%- if GLOBALS.role != "so-searchode" %} +{%- if GLOBALS.role != "so-searchnode" %} - elasticsearch.cluster {%- endif%} From 564815e836e36c31ddbecd48b458d6e85be20b19 Mon Sep 17 00:00:00 2001 From: Josh Patterson Date: Fri, 24 Apr 2026 10:46:29 -0400 Subject: [PATCH 102/110] redo how services are stopped during reinstall --- setup/so-functions | 41 +++++++++++++++++++---------------------- 1 file changed, 19 insertions(+), 22 deletions(-) diff --git a/setup/so-functions b/setup/so-functions index bf95ea9d8..a31c6f330 100755 --- a/setup/so-functions +++ b/setup/so-functions @@ -1547,8 +1547,6 @@ reinstall_init() { local salt_services=( "salt-minion" ) fi - local service_retry_count=20 - { # remove all of root's cronjobs crontab -r -u root @@ -1563,31 +1561,30 @@ reinstall_init() { salt-call state.apply ca.remove -linfo --local --file-root=../salt - # Kill any salt processes (safely) + # Stop salt services, then force-kill any lingering salt processes so dnf remove salt can run cleanly for service in "${salt_services[@]}"; do - # Stop the service in the background so we can exit after a certain amount of time - if check_service_status "$service"; then - systemctl stop "$service" & + if ! check_service_status "$service"; then + continue + fi + local service_pid + service_pid=$(pgrep -f "/usr/bin/${service}" | head -1) + info "Stopping $service (pid=${service_pid:-none})" + systemctl stop "$service" + if [[ -n "$service_pid" ]] && ps -p "$service_pid" > /dev/null 2>&1; then + timeout 30 tail --pid="$service_pid" -f /dev/null || { + info "$service (pid $service_pid) still alive after 30s, force-killing" + pkill -9 -ef "/usr/bin/${service}" + } fi - local pid=$! - - local count=0 - while check_service_status "$service"; do - if [[ $count -gt $service_retry_count ]]; then - echo "Could not stop $service after 1 minute, exiting setup." - - # Stop the systemctl process trying to kill the service, show user a message, then exit setup - kill -9 $pid - fail_setup - fi - - sleep 5 - ((count++)) - done done + # Catch any stray salt-call / salt CLI children that weren't parented to the service cgroup + pkill -9 -ef "/usr/bin/salt-call" 2>/dev/null + pkill -9 -ef "/usr/bin/python3 /bin/salt" 2>/dev/null + # Remove all salt configs - rm -rf /etc/salt/engines/* /etc/salt/grains /etc/salt/master /etc/salt/master.d/* /etc/salt/minion /etc/salt/minion.d/* /etc/salt/pki/* /etc/salt/proxy /etc/salt/proxy.d/* /var/cache/salt/ + dnf -y remove salt + rm -rf /etc/salt/ /var/cache/salt/ if command -v docker &> /dev/null; then # Stop and remove all so-* containers so files can be changed with more safety From 0722b681b1a06925db37ddea287f53697717f91d Mon Sep 17 00:00:00 2001 From: Josh Patterson Date: Fri, 24 Apr 2026 11:04:46 -0400 Subject: [PATCH 103/110] redo service stop on reinstall --- setup/so-functions | 42 +++++++++++++++++++++++++++--------------- 1 file changed, 27 insertions(+), 15 deletions(-) diff --git a/setup/so-functions b/setup/so-functions index a31c6f330..61601aee7 100755 --- a/setup/so-functions +++ b/setup/so-functions @@ -1561,27 +1561,39 @@ reinstall_init() { salt-call state.apply ca.remove -linfo --local --file-root=../salt - # Stop salt services, then force-kill any lingering salt processes so dnf remove salt can run cleanly + # Stop salt services and force-kill any lingering salt processes (including orphans + # from an earlier reinstall attempt where the unit file is gone but processes survive) + # so dnf remove salt can run cleanly for service in "${salt_services[@]}"; do - if ! check_service_status "$service"; then - continue - fi - local service_pid - service_pid=$(pgrep -f "/usr/bin/${service}" | head -1) - info "Stopping $service (pid=${service_pid:-none})" - systemctl stop "$service" - if [[ -n "$service_pid" ]] && ps -p "$service_pid" > /dev/null 2>&1; then - timeout 30 tail --pid="$service_pid" -f /dev/null || { - info "$service (pid $service_pid) still alive after 30s, force-killing" - pkill -9 -ef "/usr/bin/${service}" - } + if check_service_status "$service"; then + info "Stopping $service via systemctl" + systemctl stop "$service" fi done - # Catch any stray salt-call / salt CLI children that weren't parented to the service cgroup - pkill -9 -ef "/usr/bin/salt-call" 2>/dev/null + # Unconditionally force-kill any remaining salt binaries — these may be orphaned + # from a prior aborted reinstall (no unit file, so systemctl can't see them). + for salt_bin in salt-master salt-minion salt-call salt-api salt-syndic; do + if pgrep -f "/usr/bin/${salt_bin}" > /dev/null 2>&1; then + info "Force-killing lingering $salt_bin processes" + pkill -9 -ef "/usr/bin/${salt_bin}" 2>/dev/null + fi + done + # Catch stray `salt` CLI children from saltutil.kill_all_jobs / state.apply invocations pkill -9 -ef "/usr/bin/python3 /bin/salt" 2>/dev/null + # Give the kernel a moment to reap the killed processes before dnf removes the binaries + local kill_wait=0 + while pgrep -f "/usr/bin/salt-" > /dev/null 2>&1; do + if [[ $kill_wait -gt 10 ]]; then + info "Salt processes still present after SIGKILL + 10s wait; proceeding anyway" + pgrep -af "/usr/bin/salt-" | while read -r line; do info " lingering: $line"; done + break + fi + sleep 1 + ((kill_wait++)) + done + # Remove all salt configs dnf -y remove salt rm -rf /etc/salt/ /var/cache/salt/ From 02381fbbe9c28089e590a9457cc3b64ba720b56f Mon Sep 17 00:00:00 2001 From: Josh Patterson Date: Fri, 24 Apr 2026 11:33:21 -0400 Subject: [PATCH 104/110] stop salt-cloud , belt-and-suspenders against a broken/incomplete salt RPM --- setup/so-functions | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/setup/so-functions b/setup/so-functions index 61601aee7..ca58dbbcb 100755 --- a/setup/so-functions +++ b/setup/so-functions @@ -1573,7 +1573,7 @@ reinstall_init() { # Unconditionally force-kill any remaining salt binaries — these may be orphaned # from a prior aborted reinstall (no unit file, so systemctl can't see them). - for salt_bin in salt-master salt-minion salt-call salt-api salt-syndic; do + for salt_bin in salt-master salt-minion salt-call salt-cloud; do if pgrep -f "/usr/bin/${salt_bin}" > /dev/null 2>&1; then info "Force-killing lingering $salt_bin processes" pkill -9 -ef "/usr/bin/${salt_bin}" 2>/dev/null @@ -1594,10 +1594,16 @@ reinstall_init() { ((kill_wait++)) done + # Clear the 'failed' state SIGKILL left on the units before removing the package + systemctl reset-failed salt-master.service salt-minion.service 2>/dev/null || true + # Remove all salt configs dnf -y remove salt rm -rf /etc/salt/ /var/cache/salt/ + # Drop systemd's in-memory references to the now-removed units + systemctl daemon-reload + if command -v docker &> /dev/null; then # Stop and remove all so-* containers so files can be changed with more safety if [[ $(docker ps -a -q --filter "name=so-" | wc -l) -gt 0 ]]; then From 90ecbe90d8ae2c4f934f927f0f0009582903ecca Mon Sep 17 00:00:00 2001 From: reyesj2 <94730068+reyesj2@users.noreply.github.com> Date: Fri, 24 Apr 2026 12:56:27 -0500 Subject: [PATCH 105/110] allow heavynodes to run elasticsearch/cluster state --- salt/allowed_states.map.jinja | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/salt/allowed_states.map.jinja b/salt/allowed_states.map.jinja index 3a6aa2f6a..6d8b0e2a0 100644 --- a/salt/allowed_states.map.jinja +++ b/salt/allowed_states.map.jinja @@ -79,7 +79,7 @@ ), 'so-heavynode': ( sensor_states + - ['elasticagent', 'elasticsearch', 'logstash', 'redis', 'nginx'] + ['elasticagent', 'elasticsearch', 'elasticsearch.cluster', 'logstash', 'redis', 'nginx'] ), 'so-idh': ( ['idh'] From 070d1504200cde46572cac8cb23c03cb8f4bad96 Mon Sep 17 00:00:00 2001 From: Josh Patterson Date: Fri, 24 Apr 2026 13:56:35 -0400 Subject: [PATCH 106/110] readonly soc and kratos enabled --- salt/kratos/soc_kratos.yaml | 2 +- salt/soc/soc_soc.yaml | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/salt/kratos/soc_kratos.yaml b/salt/kratos/soc_kratos.yaml index 07359bcab..4cfe2c1c3 100644 --- a/salt/kratos/soc_kratos.yaml +++ b/salt/kratos/soc_kratos.yaml @@ -3,8 +3,8 @@ kratos: description: Enables or disables the Kratos authentication system. WARNING - Disabling this process will cause the grid to malfunction. Re-enabling this setting will require manual effort via SSH. forcedType: bool advanced: True + readonly: True helpLink: kratos - oidc: enabled: description: Set to True to enable OIDC / Single Sign-On (SSO) to SOC. Requires a valid Security Onion license key. diff --git a/salt/soc/soc_soc.yaml b/salt/soc/soc_soc.yaml index bd3ed9095..6a2f79629 100644 --- a/salt/soc/soc_soc.yaml +++ b/salt/soc/soc_soc.yaml @@ -3,6 +3,7 @@ soc: description: Enables or disables SOC. WARNING - Disabling this setting is unsupported and will cause the grid to malfunction. Re-enabling this setting is a manual effort via SSH. forcedType: bool advanced: True + readonly: True telemetryEnabled: title: SOC Telemetry description: When this setting is enabled and the grid is not in airgap mode, SOC will provide feature usage data to the Security Onion development team via Google Analytics. This data helps Security Onion developers determine which product features are being used and can also provide insight into improving the user interface. When changing this setting, wait for the grid to fully synchronize and then perform a hard browser refresh on SOC, to force the browser cache to update and reflect the new setting. From 8eca465ef6c3ba38215010901721d4e2f505d62e Mon Sep 17 00:00:00 2001 From: Josh Patterson Date: Fri, 24 Apr 2026 14:35:11 -0400 Subject: [PATCH 107/110] uninstall elastic-agent before stopping dockers on reinstall --- setup/so-functions | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/setup/so-functions b/setup/so-functions index ca58dbbcb..f9b601a93 100755 --- a/setup/so-functions +++ b/setup/so-functions @@ -1604,6 +1604,9 @@ reinstall_init() { # Drop systemd's in-memory references to the now-removed units systemctl daemon-reload + # Uninstall local Elastic Agent, if installed + elastic-agent uninstall -f + if command -v docker &> /dev/null; then # Stop and remove all so-* containers so files can be changed with more safety if [[ $(docker ps -a -q --filter "name=so-" | wc -l) -gt 0 ]]; then @@ -1626,9 +1629,6 @@ reinstall_init() { backup_dir /nsm/hydra "$date_string" backup_dir /nsm/influxdb "$date_string" - # Uninstall local Elastic Agent, if installed - elastic-agent uninstall -f - } >> "$setup_log" 2>&1 info "System reinstall init has been completed." From 199c2746f1b397cc21f5c618ade5c1356d0104b9 Mon Sep 17 00:00:00 2001 From: Josh Patterson Date: Fri, 24 Apr 2026 15:24:11 -0400 Subject: [PATCH 108/110] stop salt-minion and salt-master regardless of install type. display reinstall on console and save to logfile --- setup/so-functions | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/setup/so-functions b/setup/so-functions index f9b601a93..23098cac8 100755 --- a/setup/so-functions +++ b/setup/so-functions @@ -202,10 +202,10 @@ check_service_status() { systemctl status $service_name > /dev/null 2>&1 local status=$? if [ $status -gt 0 ]; then - info " $service_name is not running" + info "$service_name is not running" return 1; else - info " $service_name is running" + info "$service_name is running" return 0; fi @@ -1541,11 +1541,8 @@ clear_previous_setup_results() { reinstall_init() { info "Putting system in state to run setup again" - if [[ $install_type =~ ^(MANAGER|EVAL|MANAGERSEARCH|MANAGERHYPE|STANDALONE|FLEET|IMPORT)$ ]]; then - local salt_services=( "salt-master" "salt-minion" ) - else - local salt_services=( "salt-minion" ) - fi + # Always include both services. check_service_status skips units that aren't present. + local salt_services=( "salt-master" "salt-minion" ) { # remove all of root's cronjobs @@ -1629,7 +1626,7 @@ reinstall_init() { backup_dir /nsm/hydra "$date_string" backup_dir /nsm/influxdb "$date_string" - } >> "$setup_log" 2>&1 + } 2>&1 | tee -a "$setup_log" info "System reinstall init has been completed." } From 21aeb6818873fef08c2dd3517c820b4cf47f8f58 Mon Sep 17 00:00:00 2001 From: Josh Patterson Date: Mon, 27 Apr 2026 14:30:41 -0400 Subject: [PATCH 109/110] fix sominion_setup reactor --- salt/reactor/sominion_setup.sls | 71 ++++++++++++++++++++++++--------- 1 file changed, 53 insertions(+), 18 deletions(-) diff --git a/salt/reactor/sominion_setup.sls b/salt/reactor/sominion_setup.sls index 64b3666f4..a72348cc8 100644 --- a/salt/reactor/sominion_setup.sls +++ b/salt/reactor/sominion_setup.sls @@ -6,39 +6,74 @@ # Elastic License 2.0. import logging -from subprocess import call -import yaml +import os +import re +import shlex +import subprocess log = logging.getLogger(__name__) +SO_MINION = '/usr/sbin/so-minion' + +_NODETYPE_RE = re.compile(r'^[A-Z][A-Z0-9_]{0,31}$') +_MINIONID_RE = re.compile(r'^[A-Za-z0-9._-]{1,253}$') +_HOSTPART_RE = re.compile(r'^[A-Za-z0-9._-]{1,253}$') +_IPV4_RE = re.compile( + r'^(?:(?:25[0-5]|2[0-4]\d|[01]?\d?\d)\.){3}' + r'(?:25[0-5]|2[0-4]\d|[01]?\d?\d)$' +) +_HEAP_RE = re.compile(r'^\d{1,6}[kKmMgG]?$') + + +def _check(name, value, pattern): + s = str(value) + if not pattern.match(s): + raise ValueError("sominion_setup_reactor: refusing unsafe %s=%r" % (name, value)) + return s + + def run(): log.info('sominion_setup_reactor: Running') minionid = data['id'] DATA = data['data'] - hv_name = DATA['HYPERVISOR_HOST'] log.info('sominion_setup_reactor: DATA: %s' % DATA) - # Build the base command - cmd = "NODETYPE=" + DATA['NODETYPE'] + " /usr/sbin/so-minion -o=addVM -m=" + minionid + " -n=" + DATA['MNIC'] + " -i=" + DATA['MAINIP'] + " -c=" + str(DATA['CPUCORES']) + " -d='" + DATA['NODE_DESCRIPTION'] + "'" - - # Add optional arguments only if they exist in DATA + nodetype = _check('NODETYPE', DATA['NODETYPE'], _NODETYPE_RE) + + argv = [ + SO_MINION, + '-o=addVM', + '-m=' + _check('minionid', minionid, _MINIONID_RE), + '-n=' + _check('MNIC', DATA['MNIC'], _HOSTPART_RE), + '-i=' + _check('MAINIP', DATA['MAINIP'], _IPV4_RE), + '-c=' + str(int(DATA['CPUCORES'])), + '-d=' + str(DATA['NODE_DESCRIPTION']), + ] + if 'CORECOUNT' in DATA: - cmd += " -C=" + str(DATA['CORECOUNT']) - + argv.append('-C=' + str(int(DATA['CORECOUNT']))) + if 'INTERFACE' in DATA: - cmd += " -a=" + DATA['INTERFACE'] - + argv.append('-a=' + _check('INTERFACE', DATA['INTERFACE'], _HOSTPART_RE)) + if 'ES_HEAP_SIZE' in DATA: - cmd += " -e=" + DATA['ES_HEAP_SIZE'] - + argv.append('-e=' + _check('ES_HEAP_SIZE', DATA['ES_HEAP_SIZE'], _HEAP_RE)) + if 'LS_HEAP_SIZE' in DATA: - cmd += " -l=" + DATA['LS_HEAP_SIZE'] + argv.append('-l=' + _check('LS_HEAP_SIZE', DATA['LS_HEAP_SIZE'], _HEAP_RE)) if 'LSHOSTNAME' in DATA: - cmd += " -L=" + DATA['LSHOSTNAME'] - - log.info('sominion_setup_reactor: Command: %s' % cmd) - rc = call(cmd, shell=True) + argv.append('-L=' + _check('LSHOSTNAME', DATA['LSHOSTNAME'], _HOSTPART_RE)) + + env = os.environ.copy() + env['NODETYPE'] = nodetype + + log.info( + 'sominion_setup_reactor: argv: %s (NODETYPE=%s)', + ' '.join(shlex.quote(a) for a in argv), + shlex.quote(nodetype), + ) + rc = subprocess.call(argv, shell=False, env=env) log.info('sominion_setup_reactor: rc: %s' % rc) From 9f2ca7012f858063a53648aa805c8415029b1110 Mon Sep 17 00:00:00 2001 From: reyesj2 <94730068+reyesj2@users.noreply.github.com> Date: Mon, 27 Apr 2026 15:02:13 -0500 Subject: [PATCH 110/110] exclude more transform job errors --- salt/common/tools/sbin/so-log-check | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/salt/common/tools/sbin/so-log-check b/salt/common/tools/sbin/so-log-check index 8c8bbf35c..f355e1bfe 100755 --- a/salt/common/tools/sbin/so-log-check +++ b/salt/common/tools/sbin/so-log-check @@ -227,7 +227,7 @@ if [[ $EXCLUDE_KNOWN_ERRORS == 'Y' ]]; then EXCLUDED_ERRORS="$EXCLUDED_ERRORS|from NIC checksum offloading" # zeek reporter.log EXCLUDED_ERRORS="$EXCLUDED_ERRORS|marked for removal" # docker container getting recycled EXCLUDED_ERRORS="$EXCLUDED_ERRORS|tcp 127.0.0.1:6791: bind: address already in use" # so-elastic-fleet agent restarting. Seen starting w/ 8.18.8 https://github.com/elastic/kibana/issues/201459 - EXCLUDED_ERRORS="$EXCLUDED_ERRORS|TransformTask\] \[logs-(tychon|aws_billing|microsoft_defender_endpoint).*user so_kibana lacks the required permissions \[logs-\1" # Known issue with 3 integrations using kibana_system role vs creating unique api creds with proper permissions. + EXCLUDED_ERRORS="$EXCLUDED_ERRORS|TransformTask\] \[logs-(tychon|aws_billing|microsoft_defender_endpoint|armis|o365_metrics|microsoft_sentinel|snyk).*user so_kibana lacks the required permissions \[(logs|metrics)-\1" # Known issue with integrations starting transform jobs that are explicitly not allowed to start as a system user. (installed as so_elastic / so_kibana) EXCLUDED_ERRORS="$EXCLUDED_ERRORS|manifest unknown" # appears in so-dockerregistry log for so-tcpreplay following docker upgrade to 29.2.1-1 fi