From 39d09471026443258f604d54106509d5c9226c83 Mon Sep 17 00:00:00 2001 From: reyesj2 <94730068+reyesj2@users.noreply.github.com> Date: Wed, 29 Apr 2026 17:38:40 -0500 Subject: [PATCH 1/5] update default elastic agent logging level to warning --- .../tools/sbin/so-elastic-fleet-common | 2 +- salt/manager/tools/sbin/soup | 51 +++++++++++++++++++ 2 files changed, 52 insertions(+), 1 deletion(-) diff --git a/salt/elasticfleet/tools/sbin/so-elastic-fleet-common b/salt/elasticfleet/tools/sbin/so-elastic-fleet-common index 92532082a..91fa787f2 100644 --- a/salt/elasticfleet/tools/sbin/so-elastic-fleet-common +++ b/salt/elasticfleet/tools/sbin/so-elastic-fleet-common @@ -240,7 +240,7 @@ elastic_fleet_policy_create() { --arg DESC "$DESC" \ --arg TIMEOUT $TIMEOUT \ --arg FLEETSERVER "$FLEETSERVER" \ - '{"name": $NAME,"id":$NAME,"description":$DESC,"namespace":"default","monitoring_enabled":["logs"],"inactivity_timeout":$TIMEOUT,"has_fleet_server":$FLEETSERVER}' + '{"name": $NAME,"id":$NAME,"description":$DESC,"namespace":"default","monitoring_enabled":["logs"],"inactivity_timeout":$TIMEOUT,"has_fleet_server":$FLEETSERVER,"advanced_settings":{"agent_logging_level": "warning"}}' ) # Create Fleet Policy if ! fleet_api "agent_policies" -XPOST -H 'kbn-xsrf: true' -H 'Content-Type: application/json' -d "$JSON_STRING"; then diff --git a/salt/manager/tools/sbin/soup b/salt/manager/tools/sbin/soup index a838e3275..44eda49bb 100755 --- a/salt/manager/tools/sbin/soup +++ b/salt/manager/tools/sbin/soup @@ -485,6 +485,54 @@ elasticsearch_backup_index_templates() { tar -czf /nsm/backup/3.0.0_elasticsearch_index_templates.tar.gz -C /opt/so/conf/elasticsearch/templates/index/ . } +elasticfleet_set_agent_logging_level_warn() { + . /usr/sbin/so-elastic-fleet-common + + local current_agent_policies + if ! current_agent_policies=$(fleet_api "agent_policies?perPage=1000"); then + echo "Warning: unable to retrieve Fleet agent policies" + return 0 + fi + + # Only updating policies that are within Security Onion defaults and do not already have any user configured advanced_settings. + local policies_to_update + policies_to_update=$(jq -c ' + .items[] + | select(has("advanced_settings") | not) + | select( + .id == "so-grid-nodes_general" + or .id == "so-grid-nodes_heavy" + or .id == "endpoints-initial" + or (.id | startswith("FleetServer_")) + ) + ' <<< "$current_agent_policies") + + if [[ -z "$policies_to_update" ]]; then + return 0 + fi + + while IFS= read -r policy; do + [[ -z "$policy" ]] && continue + + local policy_id policy_name policy_namespace + policy_id=$(jq -r '.id' <<< "$policy") + policy_name=$(jq -r '.name' <<< "$policy") + policy_namespace=$(jq -r '.namespace' <<< "$policy") + + local update_logging + update_logging=$(jq -n \ + --arg name "$policy_name" \ + --arg namespace "$policy_namespace" \ + '{name: $name, namespace: $namespace, advanced_settings: {agent_logging_level: "warning"}}' + ) + + echo "Setting elastic agent_logging_level to warning on policy '$policy_name' ($policy_id)." + if ! fleet_api "agent_policies/$policy_id" -XPUT -H 'kbn-xsrf: true' -H 'Content-Type: application/json' -d "$update_logging" >/dev/null; then + echo " warning: failed to update agent policy '$policy_name' ($policy_id)" >&2 + fi + done <<< "$policies_to_update" +} + ensure_postgres_local_pillar() { # Postgres was added as a service after 3.0.0, so the new pillar/top.sls # references postgres.soc_postgres / postgres.adv_postgres unconditionally. @@ -553,6 +601,9 @@ post_to_3.1.0() { # file_roots of its own and --local would fail with "No matching sls found". salt-call state.apply postgres.telegraf_users queue=True || true + # Update default agent policies to use logging level warn. + elasticfleet_set_agent_logging_level_warn || true + POSTVERSION=3.1.0 } From 86966d277889e535f400901175d9200cda318f4f Mon Sep 17 00:00:00 2001 From: reyesj2 <94730068+reyesj2@users.noreply.github.com> Date: Fri, 1 May 2026 12:44:08 -0500 Subject: [PATCH 2/5] reauthorize unhealthy transform jobs using kibana 9.3.3 auth flow --- salt/manager/tools/sbin/soup | 79 ++++++++++++++++++++++++++++++++++++ 1 file changed, 79 insertions(+) diff --git a/salt/manager/tools/sbin/soup b/salt/manager/tools/sbin/soup index 44eda49bb..c0f8b61c1 100755 --- a/salt/manager/tools/sbin/soup +++ b/salt/manager/tools/sbin/soup @@ -533,6 +533,82 @@ elasticfleet_set_agent_logging_level_warn() { done <<< "$policies_to_update" } +check_transform_health_and_reauthorize() { + . /usr/sbin/so-elastic-fleet-common + + echo "Checking integration transform jobs for unhealthy / unauthorized status..." + + local transforms_doc stats_doc installed_doc + if ! transforms_doc=$(so-elasticsearch-query "_transform/_all?size=1000" --fail --retry 3 --retry-delay 5 2>/dev/null); then + echo "Unable to query for transform jobs, skipping reauthorization." + return 0 + fi + if ! stats_doc=$(so-elasticsearch-query "_transform/_all/_stats?size=1000" --fail --retry 3 --retry-delay 5 2>/dev/null); then + echo "Unable to query for transform job stats, skipping reauthorization." + return 0 + fi + if ! installed_doc=$(fleet_api "epm/packages/installed?perPage=500"); then + echo "Unable to list installed Fleet packages, skipping reauthorization." + return 0 + fi + + # Get all transforms that meet the following + # - unhealthy (any non-green health status) + # - metadata has run_as_kibana_system: false (this fix is specific to transforms started prior to Kibana 9.3.3) + # - are not orphaned (integration is not somehow missing/corrupt/uninstalled) + local unhealthy_transforms + unhealthy_transforms=$(jq -c -n \ + --argjson t "$transforms_doc" \ + --argjson s "$stats_doc" \ + --argjson i "$installed_doc" ' + ($i.items | map({key: .name, value: .version}) | from_entries) as $pkg_ver + | ($s.transforms | map({key: .id, value: .health.status}) | from_entries) as $health + | [ $t.transforms[] + | select(._meta.run_as_kibana_system == false) + | select(($health[.id] // "unknown") != "green") + | {id, pkg: ._meta.package.name, ver: ($pkg_ver[._meta.package.name])} + ] + | if length == 0 then empty else . end + | (map(select(.ver == null)) | map({orphan: .id})[]), + (map(select(.ver != null)) + | group_by(.pkg) + | map({pkg: .[0].pkg, ver: .[0].ver, transformIds: map(.id)})[]) + ') + + if [[ -z "$unhealthy_transforms" ]]; then + return 0 + fi + + local unhealthy_count + unhealthy_count=$(jq -s '[.[].transformIds? // empty | .[]] | length' <<< "$unhealthy_transforms") + echo "Found $unhealthy_count transform(s) needing reauthorization." + + local total_failures=0 + while IFS= read -r transform; do + [[ -z "$transform" ]] && continue + if jq -e 'has("orphan")' <<< "$transform" >/dev/null 2>&1; then + echo "Skipping transform not owned by any installed Fleet package: $(jq -r '.orphan' <<< "$transform")" + continue + fi + + local pkg ver body resp + pkg=$(jq -r '.pkg' <<< "$transform") + ver=$(jq -r '.ver' <<< "$transform") + body=$(jq -c '{transforms: (.transformIds | map({transformId: .}))}' <<< "$transform") + + echo "Reauthorizing transform(s) for ${pkg}-${ver}..." + resp=$(fleet_api "epm/packages/${pkg}/${ver}/transforms/authorize" \ + -XPOST -H 'kbn-xsrf: true' -H 'Content-Type: application/json' \ + -d "$body") || { echo "Could not reauthorize transform(s) for ${pkg}-${ver}"; continue; } + + (( total_failures += $(jq 'map(select(.success != true)) | length' <<< "$resp" 2>/dev/null) )) + done <<< "$unhealthy_transforms" + + if [[ "$total_failures" -gt 0 ]]; then + echo "Some transform(s) failed to reauthorize." + fi +} + ensure_postgres_local_pillar() { # Postgres was added as a service after 3.0.0, so the new pillar/top.sls # references postgres.soc_postgres / postgres.adv_postgres unconditionally. @@ -604,6 +680,9 @@ post_to_3.1.0() { # Update default agent policies to use logging level warn. elasticfleet_set_agent_logging_level_warn || true + # Check for unhealthy / unauthorized integration transform jobs and attempt reauthorizations + check_transform_health_and_reauthorize || true + POSTVERSION=3.1.0 } From 702b3585ccf4fe99e17154d8e8d48f2d368798f2 Mon Sep 17 00:00:00 2001 From: reyesj2 <94730068+reyesj2@users.noreply.github.com> Date: Fri, 1 May 2026 12:57:59 -0500 Subject: [PATCH 3/5] excluding additional integration transform job failures --- salt/common/tools/sbin/so-log-check | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/salt/common/tools/sbin/so-log-check b/salt/common/tools/sbin/so-log-check index f355e1bfe..a3d9c51d0 100755 --- a/salt/common/tools/sbin/so-log-check +++ b/salt/common/tools/sbin/so-log-check @@ -227,7 +227,7 @@ if [[ $EXCLUDE_KNOWN_ERRORS == 'Y' ]]; then EXCLUDED_ERRORS="$EXCLUDED_ERRORS|from NIC checksum offloading" # zeek reporter.log EXCLUDED_ERRORS="$EXCLUDED_ERRORS|marked for removal" # docker container getting recycled EXCLUDED_ERRORS="$EXCLUDED_ERRORS|tcp 127.0.0.1:6791: bind: address already in use" # so-elastic-fleet agent restarting. Seen starting w/ 8.18.8 https://github.com/elastic/kibana/issues/201459 - EXCLUDED_ERRORS="$EXCLUDED_ERRORS|TransformTask\] \[logs-(tychon|aws_billing|microsoft_defender_endpoint|armis|o365_metrics|microsoft_sentinel|snyk).*user so_kibana lacks the required permissions \[(logs|metrics)-\1" # Known issue with integrations starting transform jobs that are explicitly not allowed to start as a system user. (installed as so_elastic / so_kibana) + EXCLUDED_ERRORS="$EXCLUDED_ERRORS|TransformTask\] \[logs-(tychon|aws_billing|microsoft_defender_endpoint|armis|o365_metrics|microsoft_sentinel|snyk|cyera|island_browser).*user so_kibana lacks the required permissions \[(logs|metrics)-\1" # Known issue with integrations starting transform jobs that are explicitly not allowed to start as a system user. This error should not be seen on fresh ES 9.3.3 installs or after SO 3.1.0 with soups addition of check_transform_health_and_reauthorize() EXCLUDED_ERRORS="$EXCLUDED_ERRORS|manifest unknown" # appears in so-dockerregistry log for so-tcpreplay following docker upgrade to 29.2.1-1 fi From 2203037ce75fdf668340eae7ba41ab99b2af8bc5 Mon Sep 17 00:00:00 2001 From: reyesj2 <94730068+reyesj2@users.noreply.github.com> Date: Mon, 4 May 2026 10:52:37 -0500 Subject: [PATCH 4/5] fleet package registry health check --- salt/elastic-fleet-package-registry/enabled.sls | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/salt/elastic-fleet-package-registry/enabled.sls b/salt/elastic-fleet-package-registry/enabled.sls index e2833f5be..a5a8b9d22 100644 --- a/salt/elastic-fleet-package-registry/enabled.sls +++ b/salt/elastic-fleet-package-registry/enabled.sls @@ -51,6 +51,16 @@ so-elastic-fleet-package-registry: - {{ ULIMIT.name }}={{ ULIMIT.soft }}:{{ ULIMIT.hard }} {% endfor %} {% endif %} + +wait_for_so-elastic-fleet-package-registry: + http.wait_for_successful_query: + - name: "http://localhost:8080/health" + - status: 200 + - wait_for: 300 + - request_interval: 15 + - require: + - docker_container: so-elastic-fleet-package-registry + delete_so-elastic-fleet-package-registry_so-status.disabled: file.uncomment: - name: /opt/so/conf/so-status/so-status.conf From b701664e04cb3d69d395cc6c178c7ef3d16289c9 Mon Sep 17 00:00:00 2001 From: Mike Reeves Date: Mon, 4 May 2026 12:09:35 -0400 Subject: [PATCH 5/5] Fix unsafe PyYAML load in filecheck --- salt/strelka/filecheck/filecheck | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/salt/strelka/filecheck/filecheck b/salt/strelka/filecheck/filecheck index 758248083..35b47ce71 100644 --- a/salt/strelka/filecheck/filecheck +++ b/salt/strelka/filecheck/filecheck @@ -15,7 +15,7 @@ from watchdog.observers import Observer from watchdog.events import FileSystemEventHandler with open("/opt/so/conf/strelka/filecheck.yaml", "r") as ymlfile: - cfg = yaml.load(ymlfile, Loader=yaml.Loader) + cfg = yaml.safe_load(ymlfile) extract_path = cfg["filecheck"]["extract_path"] historypath = cfg["filecheck"]["historypath"]