#!/bin/bash
# Copyright Security Onion Solutions LLC and/or licensed to Security Onion Solutions LLC under one
# or more contributor license agreements. Licensed under the Elastic License 2.0 as shown at
# https://securityonion.net/license; you may not use this file except in compliance with the
# Elastic License 2.0.

. /usr/sbin/so-common

SO_STATEFILE_SUCCESS=/opt/so/state/estemplates.txt
ADDON_STATEFILE_SUCCESS=/opt/so/state/addon_estemplates.txt
ELASTICSEARCH_TEMPLATES_DIR="/opt/so/conf/elasticsearch/templates"
SO_TEMPLATES_DIR="${ELASTICSEARCH_TEMPLATES_DIR}/index"
ADDON_TEMPLATES_DIR="${ELASTICSEARCH_TEMPLATES_DIR}/addon-index"
FAILED_NAMES=()
FAILED_COUNT=0
IS_HEAVYNODE="false"
FORCE="false"
VERBOSE="false"
SHOULD_EXIT_ON_FAILURE="true"

# If soup is running, ignore errors
pgrep soup >/dev/null && SHOULD_EXIT_ON_FAILURE="false"

while [[ $# -gt 0 ]]; do
    case "$1" in
    --heavynode)
        IS_HEAVYNODE="true"
        ;;
    --force)
        FORCE="true"
        ;;
    --verbose)
        VERBOSE="true"
        ;;
    *)
        echo "Usage: $0 [options]"
        echo "Options:"
        echo "  --heavynode     Only loads index templates specific to heavynodes"
        echo "  --force     Force reload all templates regardless of statefiles (default: false)"
        echo "  --verbose     Enable verbose output"
        exit 1
        ;;
    esac
    shift
done

# Max number of concurrent template PUT jobs. Override via env if needed.
MAX_TEMPLATE_JOBS=${MAX_TEMPLATE_JOBS:-10}

# Block until fewer than MAX_TEMPLATE_JOBS background jobs are running.
template_throttle() {
    while (( $(jobs -rp | wc -l) >= MAX_TEMPLATE_JOBS )); do
        wait -n
    done
}

# Per-job failure markers and an output lock for serializing parallel job output.
# Each failed load drops one file (named after the template) into FAIL_DIR; the
# output of each job is flushed as a single block under flock so concurrent jobs
# never interleave their (chatty) retry output.
FAIL_DIR=$(mktemp -d)
OUTPUT_LOCK="${FAIL_DIR}/.output.lock"
: > "$OUTPUT_LOCK"
trap 'rm -rf "$FAIL_DIR"' EXIT

# Record a failure: $1 = the template name/path to report later. Slashes are
# encoded so the path becomes a safe single filename.
record_failure() {
    local marker="${1//\//__}"
    : > "${FAIL_DIR}/fail.${marker}"
}

# Populate FAILED_NAMES and FAILED_COUNT from the current phase's markers.
# Must run in the current shell (not a command substitution) so the array sticks.
collect_failures() {
    FAILED_NAMES=()
    FAILED_COUNT=0
    local f name
    shopt -s nullglob
    for f in "${FAIL_DIR}"/fail.*; do
        name="${f##*/fail.}"
        name="${name//__//}"
        FAILED_NAMES+=("$name")
        FAILED_COUNT=$((FAILED_COUNT + 1))
    done
    shopt -u nullglob
}

# Clear markers and names between phases so SO and addon counts stay independent.
reset_failures() {
    shopt -s nullglob
    rm -f "${FAIL_DIR}"/fail.*
    shopt -u nullglob
    FAILED_NAMES=()
    FAILED_COUNT=0
}

# Print a block of text atomically (under the shared output lock) so the output
# of concurrent background jobs is not interleaved.
locked_echo() {
    { flock 9; printf '%s\n' "$1"; } 9>>"$OUTPUT_LOCK"
}

# Loads one template file via PUT. Intended to be dispatched as a background job.
#   $1 uri          - e.g. _component_template/foo or _index_template/foo
#   $2 file         - path to the template JSON
#   $3 report_name  - name/path to record if this load fails
load_template() {
    local uri="$1"
    local file="$2"
    local report_name="$3"
    local out rc=0 block

    # Capture everything (including retry's diagnostic chatter) into one block so
    # concurrent jobs never interleave; the whole block is flushed under one flock.
    block="Loading template file $file"$'\n'
    if ! out=$(retry 3 3 "so-elasticsearch-query $uri -d@$file -XPUT" "{\"acknowledged\":true}" 2>&1); then
        block+="$out"$'\n'
        rc=1
    elif [[ "$VERBOSE" == "true" ]]; then
        block+="$out"$'\n'
    fi

    { flock 9; printf '%s' "$block"; } 9>>"$OUTPUT_LOCK"

    (( rc != 0 )) && record_failure "$report_name"
}

check_required_component_template_exists() {
    local required
    local missing
    local file=$1

    required=$(jq '[((.composed_of //[]) - (.ignore_missing_component_templates // []))[]]' "$file")
    missing=$(jq -n --argjson required "$required" --argjson component_templates "$component_templates" '(($required) - ($component_templates))')

    if [[ $(jq length <<<"$missing") -gt 0 ]]; then

        return 1
    fi
}

check_heavynode_compatiable_index_template() {
    # The only templates that are relevant to heavynodes are from datasets defined in elasticagent/files/elastic-agent.yml.jinja.
    # Heavynodes do not have fleet server packages installed and do not support elastic agents reporting directly to them.
    local -A heavynode_index_templates=(
        ["so-import"]=1
        ["so-syslog"]=1
        ["so-logs-soc"]=1
        ["so-suricata"]=1
        ["so-suricata.alerts"]=1
        ["so-zeek"]=1
        ["so-strelka"]=1
    )

    local template_name="$1"

    if [[ ! -v heavynode_index_templates["$template_name"] ]]; then

        return 1
    fi

}

load_component_templates() {
    local printed_name="$1"
    local pattern="${ELASTICSEARCH_TEMPLATES_DIR}/component/$2"
    local append_mappings="${3:-"false"}"

    echo -e "\nLoading $printed_name component templates...\n"

    if ! compgen -G "${pattern}/*.json" > /dev/null; then
        echo "No $printed_name component templates found in ${pattern}, skipping."
        return
    fi

    # Dispatch loads as throttled background jobs. The barrier (wait) happens in
    # the caller after all component groups have been dispatched, since index
    # templates must not load until every component template is in place.
    for component in "$pattern"/*.json; do
        tmpl_name=$(basename "${component%.json}")

        if [[ "$append_mappings" == "true" ]]; then
            # avoid duplicating "-mappings" if it already exists in the component template filename
            tmpl_name="${tmpl_name%-mappings}-mappings"
        fi

        template_throttle
        load_template "_component_template/${tmpl_name}" "$component" "$component" &
    done
}

index_templates_exist() {
    local templates_dir="$1"

    if [[ ! -d "$templates_dir" ]]; then
        return 1
    fi

    compgen -G "${templates_dir}/*.json" > /dev/null
}

should_load_addon_templates() {
    if [[ "$IS_HEAVYNODE" == "true" ]]; then
        return 1
    fi

    # Skip statefile checks when forcing template load
    if [[ "$FORCE" != "true" ]]; then
        if [[ ! -f "$SO_STATEFILE_SUCCESS" || -f "$ADDON_STATEFILE_SUCCESS" ]]; then
            return 1
        fi
    fi

    index_templates_exist "$ADDON_TEMPLATES_DIR"
}

if [[ "$FORCE" == "true" || ! -f "$SO_STATEFILE_SUCCESS" ]] && index_templates_exist "$SO_TEMPLATES_DIR"; then
    check_elasticsearch_responsive

    if [[ "$IS_HEAVYNODE" == "false" ]]; then
        # TODO: Better way to check if fleet server is installed vs checking for Elastic Defend component template.
        fleet_check="logs-endpoint.alerts@package"
        if ! so-elasticsearch-query "_component_template/$fleet_check" --output /dev/null --retry 5 --retry-delay 3 --fail; then
            # This check prevents so-elasticsearch-templates-load from running before so-elastic-fleet-setup has run.
            echo -e "\nPackage $fleet_check not yet installed. Fleet Server may not be fully configured yet."
            # Fleet Server is required because some SO index templates depend on components installed via
            #  specific integrations eg Elastic Defend. These are components that we do not manually create / manage
            #  via /opt/so/saltstack/salt/elasticsearch/templates/component/

            exit 0
        fi
    fi

    # load_component_templates "Name" "directory" "append '-mappings'?"
    load_component_templates "ECS" "ecs" "true"
    load_component_templates "Elastic Agent" "elastic-agent"
    load_component_templates "Security Onion" "so"

    # Barrier: every component template PUT must complete before we snapshot the
    # component template list and start loading index templates that depend on them.
    wait
    component_templates=$(so-elasticsearch-component-templates-list)
    echo -e "Loading Security Onion index templates...\n"
    for so_idx_tmpl in "${SO_TEMPLATES_DIR}"/*.json; do
        tmpl_name=$(basename "${so_idx_tmpl%-template.json}")

        if [[ "$IS_HEAVYNODE" == "true" ]]; then
            # TODO: Better way to load only heavynode specific templates
            if ! check_heavynode_compatiable_index_template "$tmpl_name"; then
                if [[ "$VERBOSE" == "true" ]]; then
                    locked_echo "Skipping over $so_idx_tmpl, template is not a heavynode specific index template."
                fi

                continue
            fi
        fi

        if check_required_component_template_exists "$so_idx_tmpl"; then
            template_throttle
            load_template "_index_template/$tmpl_name" "$so_idx_tmpl" "$so_idx_tmpl" &
        else
            locked_echo "Skipping over $so_idx_tmpl due to missing required component template(s)."
            record_failure "$so_idx_tmpl"

            continue
        fi
    done

    # Barrier: all SO index template PUTs must finish before tallying failures.
    wait

    collect_failures
    if [[ $FAILED_COUNT -eq 0 ]]; then
        echo "All Security Onion core templates loaded successfully."

        touch "$SO_STATEFILE_SUCCESS"
    else
        echo "Encountered $FAILED_COUNT failure(s) loading templates:"
        for failed_template in "${FAILED_NAMES[@]}"; do
            echo "  - $failed_template"
        done
        if [[ "$SHOULD_EXIT_ON_FAILURE" == "true" ]]; then
            fail "Failed to load all Security Onion core templates successfully."
        fi
    fi
    reset_failures
elif ! index_templates_exist "$SO_TEMPLATES_DIR"; then
    echo "No Security Onion core index templates found in ${SO_TEMPLATES_DIR}, skipping."
elif [[ -f "$SO_STATEFILE_SUCCESS" ]]; then
    echo "Security Onion core templates already loaded"
fi

# Start loading addon templates
if should_load_addon_templates; then

    check_elasticsearch_responsive

    echo -e "\nLoading addon integration index templates...\n"
    component_templates=$(so-elasticsearch-component-templates-list)

    for addon_idx_tmpl in "${ADDON_TEMPLATES_DIR}"/*.json; do
        tmpl_name=$(basename "${addon_idx_tmpl%-template.json}")

        if check_required_component_template_exists "$addon_idx_tmpl"; then
            template_throttle
            load_template "_index_template/${tmpl_name}" "$addon_idx_tmpl" "$addon_idx_tmpl" &
        else
            locked_echo "Skipping over $addon_idx_tmpl due to missing required component template(s)."
            record_failure "$addon_idx_tmpl"

            continue
        fi
    done

    # Barrier: all addon index template PUTs must finish before tallying failures.
    wait

    collect_failures
    if [[ $FAILED_COUNT -eq 0 ]]; then
        echo "All addon integration templates loaded successfully."

        touch "$ADDON_STATEFILE_SUCCESS"
    else
        echo "Encountered $FAILED_COUNT failure(s) loading addon integration templates:"
        for failed_template in "${FAILED_NAMES[@]}"; do
            echo "  - $failed_template"
        done
        if [[ "$SHOULD_EXIT_ON_FAILURE" == "true" ]]; then
            fail "Failed to load all addon integration templates successfully."
        fi
    fi

elif [[ ! -f "$SO_STATEFILE_SUCCESS" && "$IS_HEAVYNODE" == "false" ]]; then
    echo "Skipping loading addon integration templates until Security Onion core templates have been loaded."

elif [[ -f "$ADDON_STATEFILE_SUCCESS" && "$IS_HEAVYNODE" == "false" && "$FORCE" == "false" ]]; then
    echo "Addon integration templates already loaded"
fi
