#!/bin/bash
# Copyright Security Onion Solutions LLC and/or licensed to Security Onion Solutions LLC under one
# or more contributor license agreements. Licensed under the Elastic License 2.0 as shown at
# https://securityonion.net/license; you may not use this file except in compliance with the
# Elastic License 2.0.
{%- import_yaml 'elasticfleet/defaults.yaml' as ELASTICFLEETDEFAULTS %}
{%- set SUPPORTED_PACKAGES = salt['pillar.get']('elasticfleet:packages', default=ELASTICFLEETDEFAULTS.elasticfleet.packages, merge=True) %}
{%- set AUTO_UPGRADE_INTEGRATIONS = salt['pillar.get']('elasticfleet:config:auto_upgrade_integrations', default=false) %}

. /usr/sbin/so-elastic-fleet-common

curl_output=$(curl -s -K /opt/so/conf/elasticsearch/curl.config -c - -X GET http://localhost:5601/)
if [ $? -ne 0 ]; then
    echo "Error: Failed to connect to Kibana."
    exit 1
fi

IFS=$'\n'
agent_policies=$(elastic_fleet_agent_policy_ids)
if [ $? -ne 0 ]; then
    echo "Error: Failed to retrieve agent policies."
    exit 1
fi

default_packages=({% for pkg in SUPPORTED_PACKAGES %}"{{ pkg }}"{% if not loop.last %} {% endif %}{% endfor %})
# JSON array of the default packages, used by the jq filter below.
default_packages_json=$(printf '%s\n' "${default_packages[@]}" | jq -R . | jq -s '.')

# Output lock (serializes concurrent job output) and failure file (one marker line per
# failed integration). Mirrors the pattern used by elastic_fleet_load_integrations_dir.
OUTPUT_LOCK=$(mktemp)
FAIL_FILE=$(mktemp)
trap 'rm -f "$OUTPUT_LOCK" "$FAIL_FILE"' EXIT

# Cache of package name -> latest available version, so the same package is only looked up
# once instead of once per (policy, integration).
declare -A LATEST_VERSION_CACHE

for AGENT_POLICY in $agent_policies; do
    # Fetch the agent policy a single time; package name/version and integration id are all
    # extracted locally below instead of re-fetching the same policy per integration.
    if ! POLICY_JSON=$(fleet_api "agent_policies/$AGENT_POLICY"); then
        # this script upgrades default integration packages, exit 1 and let salt handle retrying
        exit 1
    fi

    # One jq pass emits name/package.name/package.version/id for every eligible integration.
    # The endpoint/fleet_server skips and the default-package gate are applied here in jq.
    # $defaults (not $def, a jq reserved keyword) holds the default package list.
    while IFS=$'\t' read -r INTEGRATION PACKAGE_NAME PACKAGE_VERSION INTEGRATION_ID; do
        [ -n "$INTEGRATION" ] || continue

        # Look up the latest available version once per package, then memoize it.
        if [[ -z "${LATEST_VERSION_CACHE[$PACKAGE_NAME]+set}" ]]; then
            if ! AVAILABLE_VERSION=$(elastic_fleet_package_latest_version_check "$PACKAGE_NAME"); then
                echo "Error: Failed getting latest version for $PACKAGE_NAME"
                exit 1
            fi
            LATEST_VERSION_CACHE[$PACKAGE_NAME]=$AVAILABLE_VERSION
        fi
        AVAILABLE_VERSION=${LATEST_VERSION_CACHE[$PACKAGE_NAME]}

        if [[ "$PACKAGE_VERSION" != "$AVAILABLE_VERSION" ]]; then
            # Dry run, then (if clean) the actual upgrade, dispatched as a throttled background
            # job. Each job builds its full log into one block, then flushes it under a single
            # shared lock (OUTPUT_LOCK) so concurrent jobs never interleave on stdout; a failed
            # job also appends a marker line to FAIL_FILE while holding that same lock.
            elastic_fleet_throttle
            {
                block=$'\n'"Current $PACKAGE_NAME package version ($PACKAGE_VERSION) is not the same as the latest available package ($AVAILABLE_VERSION)..."$'\n'
                block+="Upgrading $INTEGRATION..."$'\n'"Starting dry run..."$'\n'
                fail=""
                if ! DRYRUN_OUTPUT=$(elastic_fleet_integration_policy_dryrun_upgrade "$INTEGRATION_ID"); then
                    block+="Error: Failed to complete dry run for '$INTEGRATION_ID'."$'\n'
                    fail="dryrun $INTEGRATION"
                elif [[ "$(jq .[].hasErrors <<<"$DRYRUN_OUTPUT")" == "false" ]]; then
                    block+="No errors detected. Proceeding with upgrade..."$'\n'
                    if ! elastic_fleet_integration_policy_upgrade "$INTEGRATION_ID"; then
                        block+="Error: Upgrade failed for $PACKAGE_NAME with integration ID '$INTEGRATION_ID'."$'\n'
                        fail="upgrade $INTEGRATION"
                    fi
                else
                    block+="Errors detected during dry run for $PACKAGE_NAME policy upgrade..."$'\n'
                    fail="dryrun-errors $INTEGRATION"
                fi
                {
                    flock 9
                    printf '%s' "$block"
                    [ -n "$fail" ] && printf '%s\n' "$fail" >>"$FAIL_FILE"
                } 9>>"$OUTPUT_LOCK"
            } &
        fi
    done < <(jq -r --argjson defaults "$default_packages_json" '
        .item.package_policies[]
        | select(.name != "elastic-defend-endpoints")
        | select(.name | startswith("fleet_server-") | not)
        {%- if not AUTO_UPGRADE_INTEGRATIONS %}
        | select(.package.name | IN($defaults[]))
        {%- endif %}
        | [.name, .package.name, .package.version, .id] | @tsv
    ' <<<"$POLICY_JSON")
done

# Barrier: wait for every dispatched dry-run/upgrade job to finish.
wait

if [ -s "$FAIL_FILE" ]; then
    printf '\nFailed integration upgrades:\n'
    cat "$FAIL_FILE"
    exit 1
fi
echo
