diff --git a/salt/common/tools/sbin/so-log-check b/salt/common/tools/sbin/so-log-check index f355e1bfe..a3d9c51d0 100755 --- a/salt/common/tools/sbin/so-log-check +++ b/salt/common/tools/sbin/so-log-check @@ -227,7 +227,7 @@ if [[ $EXCLUDE_KNOWN_ERRORS == 'Y' ]]; then EXCLUDED_ERRORS="$EXCLUDED_ERRORS|from NIC checksum offloading" # zeek reporter.log EXCLUDED_ERRORS="$EXCLUDED_ERRORS|marked for removal" # docker container getting recycled EXCLUDED_ERRORS="$EXCLUDED_ERRORS|tcp 127.0.0.1:6791: bind: address already in use" # so-elastic-fleet agent restarting. Seen starting w/ 8.18.8 https://github.com/elastic/kibana/issues/201459 - EXCLUDED_ERRORS="$EXCLUDED_ERRORS|TransformTask\] \[logs-(tychon|aws_billing|microsoft_defender_endpoint|armis|o365_metrics|microsoft_sentinel|snyk).*user so_kibana lacks the required permissions \[(logs|metrics)-\1" # Known issue with integrations starting transform jobs that are explicitly not allowed to start as a system user. (installed as so_elastic / so_kibana) + EXCLUDED_ERRORS="$EXCLUDED_ERRORS|TransformTask\] \[logs-(tychon|aws_billing|microsoft_defender_endpoint|armis|o365_metrics|microsoft_sentinel|snyk|cyera|island_browser).*user so_kibana lacks the required permissions \[(logs|metrics)-\1" # Known issue with integrations starting transform jobs that are explicitly not allowed to start as a system user. This error should not be seen on fresh ES 9.3.3 installs or after SO 3.1.0 with soups addition of check_transform_health_and_reauthorize() EXCLUDED_ERRORS="$EXCLUDED_ERRORS|manifest unknown" # appears in so-dockerregistry log for so-tcpreplay following docker upgrade to 29.2.1-1 fi diff --git a/salt/elastic-fleet-package-registry/enabled.sls b/salt/elastic-fleet-package-registry/enabled.sls index 8fb0e0f55..3bf15c984 100644 --- a/salt/elastic-fleet-package-registry/enabled.sls +++ b/salt/elastic-fleet-package-registry/enabled.sls @@ -52,6 +52,16 @@ so-elastic-fleet-package-registry: - {{ ULIMIT.name }}={{ ULIMIT.soft }}:{{ ULIMIT.hard }} {% endfor %} {% endif %} + +wait_for_so-elastic-fleet-package-registry: + http.wait_for_successful_query: + - name: "http://localhost:8080/health" + - status: 200 + - wait_for: 300 + - request_interval: 15 + - require: + - docker_container: so-elastic-fleet-package-registry + delete_so-elastic-fleet-package-registry_so-status.disabled: file.uncomment: - name: /opt/so/conf/so-status/so-status.conf diff --git a/salt/elasticfleet/tools/sbin/so-elastic-fleet-common b/salt/elasticfleet/tools/sbin/so-elastic-fleet-common index 92532082a..91fa787f2 100644 --- a/salt/elasticfleet/tools/sbin/so-elastic-fleet-common +++ b/salt/elasticfleet/tools/sbin/so-elastic-fleet-common @@ -240,7 +240,7 @@ elastic_fleet_policy_create() { --arg DESC "$DESC" \ --arg TIMEOUT $TIMEOUT \ --arg FLEETSERVER "$FLEETSERVER" \ - '{"name": $NAME,"id":$NAME,"description":$DESC,"namespace":"default","monitoring_enabled":["logs"],"inactivity_timeout":$TIMEOUT,"has_fleet_server":$FLEETSERVER}' + '{"name": $NAME,"id":$NAME,"description":$DESC,"namespace":"default","monitoring_enabled":["logs"],"inactivity_timeout":$TIMEOUT,"has_fleet_server":$FLEETSERVER,"advanced_settings":{"agent_logging_level": "warning"}}' ) # Create Fleet Policy if ! fleet_api "agent_policies" -XPOST -H 'kbn-xsrf: true' -H 'Content-Type: application/json' -d "$JSON_STRING"; then diff --git a/salt/elasticsearch/files/ingest/common b/salt/elasticsearch/files/ingest/common index b7048cf3b..409bf5af2 100644 --- a/salt/elasticsearch/files/ingest/common +++ b/salt/elasticsearch/files/ingest/common @@ -63,7 +63,7 @@ { "set": { "if": "ctx.event?.dataset != null && !ctx.event.dataset.contains('.')", "field": "event.dataset", "value": "{{event.module}}.{{event.dataset}}" } }, { "split": { "if": "ctx.event?.dataset != null && ctx.event.dataset.contains('.')", "field": "event.dataset", "separator": "\\.", "target_field": "dataset_tag_temp" } }, { "append": { "if": "ctx.dataset_tag_temp != null", "field": "tags", "value": "{{dataset_tag_temp.1}}" } }, - { "grok": { "if": "ctx.http?.response?.status_code != null", "field": "http.response.status_code", "patterns": ["%{NUMBER:http.response.status_code:long} %{GREEDYDATA}"]} }, + { "convert": { "if": "ctx.http?.response?.status_code != null", "field": "http.response.status_code", "type":"long", "ignore_missing": true } }, { "set": { "if": "ctx?.metadata?.kafka != null" , "field": "kafka.id", "value": "{{metadata.kafka.partition}}{{metadata.kafka.offset}}{{metadata.kafka.timestamp}}", "ignore_failure": true } }, { "remove": { "field": [ "message2", "type", "fields", "category", "module", "dataset", "dataset_tag_temp", "event.dataset_temp" ], "ignore_missing": true, "ignore_failure": true } }, { "pipeline": { "name": "global@custom", "ignore_missing_pipeline": true, "description": "[Fleet] Global pipeline for all data streams" } } diff --git a/salt/manager/tools/sbin/soup b/salt/manager/tools/sbin/soup index a838e3275..c0f8b61c1 100755 --- a/salt/manager/tools/sbin/soup +++ b/salt/manager/tools/sbin/soup @@ -485,6 +485,130 @@ elasticsearch_backup_index_templates() { tar -czf /nsm/backup/3.0.0_elasticsearch_index_templates.tar.gz -C /opt/so/conf/elasticsearch/templates/index/ . } +elasticfleet_set_agent_logging_level_warn() { + . /usr/sbin/so-elastic-fleet-common + + local current_agent_policies + if ! current_agent_policies=$(fleet_api "agent_policies?perPage=1000"); then + echo "Warning: unable to retrieve Fleet agent policies" + return 0 + fi + + # Only updating policies that are within Security Onion defaults and do not already have any user configured advanced_settings. + local policies_to_update + policies_to_update=$(jq -c ' + .items[] + | select(has("advanced_settings") | not) + | select( + .id == "so-grid-nodes_general" + or .id == "so-grid-nodes_heavy" + or .id == "endpoints-initial" + or (.id | startswith("FleetServer_")) + ) + ' <<< "$current_agent_policies") + + if [[ -z "$policies_to_update" ]]; then + return 0 + fi + + while IFS= read -r policy; do + [[ -z "$policy" ]] && continue + + local policy_id policy_name policy_namespace + policy_id=$(jq -r '.id' <<< "$policy") + policy_name=$(jq -r '.name' <<< "$policy") + policy_namespace=$(jq -r '.namespace' <<< "$policy") + + local update_logging + update_logging=$(jq -n \ + --arg name "$policy_name" \ + --arg namespace "$policy_namespace" \ + '{name: $name, namespace: $namespace, advanced_settings: {agent_logging_level: "warning"}}' + ) + + echo "Setting elastic agent_logging_level to warning on policy '$policy_name' ($policy_id)." + if ! fleet_api "agent_policies/$policy_id" -XPUT -H 'kbn-xsrf: true' -H 'Content-Type: application/json' -d "$update_logging" >/dev/null; then + echo " warning: failed to update agent policy '$policy_name' ($policy_id)" >&2 + fi + done <<< "$policies_to_update" +} + +check_transform_health_and_reauthorize() { + . /usr/sbin/so-elastic-fleet-common + + echo "Checking integration transform jobs for unhealthy / unauthorized status..." + + local transforms_doc stats_doc installed_doc + if ! transforms_doc=$(so-elasticsearch-query "_transform/_all?size=1000" --fail --retry 3 --retry-delay 5 2>/dev/null); then + echo "Unable to query for transform jobs, skipping reauthorization." + return 0 + fi + if ! stats_doc=$(so-elasticsearch-query "_transform/_all/_stats?size=1000" --fail --retry 3 --retry-delay 5 2>/dev/null); then + echo "Unable to query for transform job stats, skipping reauthorization." + return 0 + fi + if ! installed_doc=$(fleet_api "epm/packages/installed?perPage=500"); then + echo "Unable to list installed Fleet packages, skipping reauthorization." + return 0 + fi + + # Get all transforms that meet the following + # - unhealthy (any non-green health status) + # - metadata has run_as_kibana_system: false (this fix is specific to transforms started prior to Kibana 9.3.3) + # - are not orphaned (integration is not somehow missing/corrupt/uninstalled) + local unhealthy_transforms + unhealthy_transforms=$(jq -c -n \ + --argjson t "$transforms_doc" \ + --argjson s "$stats_doc" \ + --argjson i "$installed_doc" ' + ($i.items | map({key: .name, value: .version}) | from_entries) as $pkg_ver + | ($s.transforms | map({key: .id, value: .health.status}) | from_entries) as $health + | [ $t.transforms[] + | select(._meta.run_as_kibana_system == false) + | select(($health[.id] // "unknown") != "green") + | {id, pkg: ._meta.package.name, ver: ($pkg_ver[._meta.package.name])} + ] + | if length == 0 then empty else . end + | (map(select(.ver == null)) | map({orphan: .id})[]), + (map(select(.ver != null)) + | group_by(.pkg) + | map({pkg: .[0].pkg, ver: .[0].ver, transformIds: map(.id)})[]) + ') + + if [[ -z "$unhealthy_transforms" ]]; then + return 0 + fi + + local unhealthy_count + unhealthy_count=$(jq -s '[.[].transformIds? // empty | .[]] | length' <<< "$unhealthy_transforms") + echo "Found $unhealthy_count transform(s) needing reauthorization." + + local total_failures=0 + while IFS= read -r transform; do + [[ -z "$transform" ]] && continue + if jq -e 'has("orphan")' <<< "$transform" >/dev/null 2>&1; then + echo "Skipping transform not owned by any installed Fleet package: $(jq -r '.orphan' <<< "$transform")" + continue + fi + + local pkg ver body resp + pkg=$(jq -r '.pkg' <<< "$transform") + ver=$(jq -r '.ver' <<< "$transform") + body=$(jq -c '{transforms: (.transformIds | map({transformId: .}))}' <<< "$transform") + + echo "Reauthorizing transform(s) for ${pkg}-${ver}..." + resp=$(fleet_api "epm/packages/${pkg}/${ver}/transforms/authorize" \ + -XPOST -H 'kbn-xsrf: true' -H 'Content-Type: application/json' \ + -d "$body") || { echo "Could not reauthorize transform(s) for ${pkg}-${ver}"; continue; } + + (( total_failures += $(jq 'map(select(.success != true)) | length' <<< "$resp" 2>/dev/null) )) + done <<< "$unhealthy_transforms" + + if [[ "$total_failures" -gt 0 ]]; then + echo "Some transform(s) failed to reauthorize." + fi +} + ensure_postgres_local_pillar() { # Postgres was added as a service after 3.0.0, so the new pillar/top.sls # references postgres.soc_postgres / postgres.adv_postgres unconditionally. @@ -553,6 +677,12 @@ post_to_3.1.0() { # file_roots of its own and --local would fail with "No matching sls found". salt-call state.apply postgres.telegraf_users queue=True || true + # Update default agent policies to use logging level warn. + elasticfleet_set_agent_logging_level_warn || true + + # Check for unhealthy / unauthorized integration transform jobs and attempt reauthorizations + check_transform_health_and_reauthorize || true + POSTVERSION=3.1.0 } diff --git a/salt/strelka/filecheck/filecheck b/salt/strelka/filecheck/filecheck index 758248083..35b47ce71 100644 --- a/salt/strelka/filecheck/filecheck +++ b/salt/strelka/filecheck/filecheck @@ -15,7 +15,7 @@ from watchdog.observers import Observer from watchdog.events import FileSystemEventHandler with open("/opt/so/conf/strelka/filecheck.yaml", "r") as ymlfile: - cfg = yaml.load(ymlfile, Loader=yaml.Loader) + cfg = yaml.safe_load(ymlfile) extract_path = cfg["filecheck"]["extract_path"] historypath = cfg["filecheck"]["historypath"] diff --git a/setup/so-functions b/setup/so-functions index 3cd665076..c94b8eee7 100755 --- a/setup/so-functions +++ b/setup/so-functions @@ -745,6 +745,56 @@ configure_network_sensor() { return $err } +configure_management_bond() { + local bond_name="bond1" + local bond_mode=${MBOND_MODE:-active-backup} + + info "Setting up $bond_name management interface with mode $bond_mode" + + if [[ ${#MBNICS[@]} -eq 0 ]]; then + error "[ERROR] No management bond NICs were selected." + fail_setup + fi + + nmcli -t -f NAME con show | grep -Fxq "$bond_name" + local found_int=$? + + if [[ $found_int != 0 ]]; then + nmcli con add type bond ifname "$bond_name" con-name "$bond_name" mode "$bond_mode" -- \ + ipv6.method ignore \ + connection.autoconnect yes >> "$setup_log" 2>&1 + else + nmcli con mod "$bond_name" \ + bond.options "mode=$bond_mode" \ + ipv6.method ignore \ + connection.autoconnect yes >> "$setup_log" 2>&1 + fi + + local err=0 + for MBNIC in "${MBNICS[@]}"; do + local slave_name="$bond_name-slave-$MBNIC" + + nmcli -t -f NAME con show | grep -Fxq "$slave_name" + found_int=$? + + if [[ $found_int != 0 ]]; then + nmcli con add type ethernet ifname "$MBNIC" con-name "$slave_name" master "$bond_name" -- \ + connection.autoconnect yes >> "$setup_log" 2>&1 + else + nmcli con mod "$slave_name" \ + connection.master "$bond_name" \ + connection.slave-type bond \ + connection.autoconnect yes >> "$setup_log" 2>&1 + fi + + nmcli con up "$slave_name" >> "$setup_log" 2>&1 + local ret=$? + [[ $ret -eq 0 ]] || err=$ret + done + + return $err +} + configure_hyper_bridge() { info "Setting up hypervisor bridge" info "Checking $MNIC ipv4.method is auto or manual" @@ -999,6 +1049,11 @@ filter_unused_nics() { grep_string="$grep_string\|$BONDNIC" done fi + if [[ $MBNICS ]]; then + for BONDNIC in "${MBNICS[@]}"; do + grep_string="$grep_string\|$BONDNIC" + done + fi # Finally, set filtered_nics to any NICs we aren't using (and ignore interfaces that aren't of use) filtered_nics=$(ip link | awk -F: '$0 !~ "lo|vir|veth|br|docker|wl|^[^0-9]"{print $2}' | grep -vwe "$grep_string" | sed 's/ //g' | sed -r 's/(.*)(\.[0-9]+)@\1/\1\2/g') @@ -1388,7 +1443,7 @@ network_init() { title "Initializing Network" disable_ipv6 set_hostname - if [[ ( $is_iso || $is_desktop_iso ) ]]; then + if [[ $is_iso || $is_desktop_iso ]]; then set_management_interface fi } @@ -1701,6 +1756,24 @@ remove_package() { fi } +ensure_pyyaml() { + title "Ensuring python3-pyyaml is installed" + if rpm -q python3-pyyaml >/dev/null 2>&1; then + info "python3-pyyaml already installed" + return 0 + fi + info "python3-pyyaml not found, attempting to install" + set -o pipefail + dnf -y install python3-pyyaml 2>&1 | tee -a "$setup_log" + local result=$? + set +o pipefail + if [[ $result -ne 0 ]] || ! rpm -q python3-pyyaml >/dev/null 2>&1; then + error "Failed to install python3-pyyaml (exit=$result)" + fail_setup + fi + info "python3-pyyaml installed successfully" +} + # When updating the salt version, also update the version in securityonion-builds/images/iso-task/Dockerfile and salt/salt/master.defaults.yaml and salt/salt/minion.defaults.yaml # CAUTION! SALT VERSION UDDATES - READ BELOW # When updating the salt version, also update the version in: @@ -2084,8 +2157,12 @@ set_initial_firewall_access() { # Set up the management interface on the ISO set_management_interface() { title "Setting up the main interface" + if [[ $MNIC == "bond1" ]]; then + configure_management_bond || fail_setup + fi + if [ "$address_type" = 'DHCP' ]; then - logCmd "nmcli con mod $MNIC connection.autoconnect yes" + logCmd "nmcli con mod $MNIC connection.autoconnect yes ipv4.method auto" logCmd "nmcli con up $MNIC" logCmd "nmcli -p connection show $MNIC" else diff --git a/setup/so-setup b/setup/so-setup index 7875b9c99..6c77e781c 100755 --- a/setup/so-setup +++ b/setup/so-setup @@ -66,6 +66,9 @@ set_timezone # Let's see what OS we are dealing with here detect_os +# Ensure python3-pyyaml is available before any code that may need so-yaml/PyYAML +ensure_pyyaml + # Check to see if this is the setup type of "desktop". is_desktop= diff --git a/setup/so-whiptail b/setup/so-whiptail index 9a1d21150..6188d3d30 100755 --- a/setup/so-whiptail +++ b/setup/so-whiptail @@ -845,18 +845,99 @@ whiptail_management_nic() { [ -n "$TESTING" ] && return filter_unused_nics + local management_nic_options=( "${nic_list_management[@]}" ) + if [[ $is_iso || $is_desktop_iso ]]; then + management_nic_options+=( "BOND" "Configure a bonded management interface" ) + fi - MNIC=$(whiptail --title "$whiptail_title" --menu "Please select the NIC you would like to use for management.\n\nUse the arrow keys to move around and the Enter key to select." 20 75 12 "${nic_list_management[@]}" 3>&1 1>&2 2>&3 ) + MNIC=$(whiptail --title "$whiptail_title" --menu "Please select the NIC you would like to use for management.\n\nUse the arrow keys to move around and the Enter key to select." 20 75 12 "${management_nic_options[@]}" 3>&1 1>&2 2>&3 ) local exitstatus=$? whiptail_check_exitstatus $exitstatus while [ -z "$MNIC" ] do - MNIC=$(whiptail --title "$whiptail_title" --menu "Please select the NIC you would like to use for management.\n\nUse the arrow keys to move around and the Enter key to select." 22 75 12 "${nic_list_management[@]}" 3>&1 1>&2 2>&3 ) + MNIC=$(whiptail --title "$whiptail_title" --menu "Please select the NIC you would like to use for management.\n\nUse the arrow keys to move around and the Enter key to select." 22 75 12 "${management_nic_options[@]}" 3>&1 1>&2 2>&3 ) local exitstatus=$? whiptail_check_exitstatus $exitstatus done + if [[ $MNIC == "BOND" ]]; then + whiptail_management_bond + fi +} + +whiptail_management_bond() { + + [ -n "$TESTING" ] && return + + MBOND_MODE=$(whiptail --title "$whiptail_title" --menu \ + "Choose the bond mode for the management interface.\n\nThe management bond will be created as bond1." 20 75 7 \ + "active-backup" "One active NIC with failover (recommended)" \ + "balance-rr" "Round-robin transmit policy" \ + "balance-xor" "Transmit based on selected hash policy" \ + "broadcast" "Transmit everything on all slave interfaces" \ + "802.3ad" "Dynamic link aggregation (requires switch support)" \ + "balance-tlb" "Adaptive transmit load balancing" \ + "balance-alb" "Adaptive load balancing" 3>&1 1>&2 2>&3) + local exitstatus=$? + whiptail_check_exitstatus $exitstatus + + while [ -z "$MBOND_MODE" ] + do + MBOND_MODE=$(whiptail --title "$whiptail_title" --menu \ + "Choose the bond mode for the management interface.\n\nThe management bond will be created as bond1." 20 75 7 \ + "active-backup" "One active NIC with failover (recommended)" \ + "balance-rr" "Round-robin transmit policy" \ + "balance-xor" "Transmit based on selected hash policy" \ + "broadcast" "Transmit everything on all slave interfaces" \ + "802.3ad" "Dynamic link aggregation (requires switch support)" \ + "balance-tlb" "Adaptive transmit load balancing" \ + "balance-alb" "Adaptive load balancing" 3>&1 1>&2 2>&3) + local exitstatus=$? + whiptail_check_exitstatus $exitstatus + done + + whiptail_management_bond_nics + MNIC="bond1" + + export MBOND_MODE MNIC +} + +whiptail_management_bond_nics() { + + [ -n "$TESTING" ] && return + + MBNICS=() + filter_unused_nics + + MBNICS=$(whiptail --title "$whiptail_title" --checklist "Please add NICs to the Management Interface:" 20 75 12 "${nic_list[@]}" 3>&1 1>&2 2>&3) + local exitstatus=$? + whiptail_check_exitstatus $exitstatus + + while [ -z "$MBNICS" ] + do + MBNICS=$(whiptail --title "$whiptail_title" --checklist "Please add NICs to the Management Interface:" 20 75 12 "${nic_list[@]}" 3>&1 1>&2 2>&3) + local exitstatus=$? + whiptail_check_exitstatus $exitstatus + done + + MBNICS=$(echo "$MBNICS" | tr -d '"') + + IFS=' ' read -ra MBNICS <<< "$MBNICS" + + for bond_nic in "${MBNICS[@]}"; do + for dev_status in "${nmcli_dev_status_list[@]}"; do + if [[ $dev_status == "${bond_nic}:unmanaged" ]]; then + whiptail \ + --title "$whiptail_title" \ + --msgbox "$bond_nic is unmanaged by Network Manager. Please remove it from other network management tools then re-run setup." \ + 8 75 + exit + fi + done + done + + export MBNICS } whiptail_net_method() {