diff --git a/salt/common/tools/sbin/so-common b/salt/common/tools/sbin/so-common index 8089db28b..e09d2c8ae 100755 --- a/salt/common/tools/sbin/so-common +++ b/salt/common/tools/sbin/so-common @@ -397,6 +397,10 @@ retry() { echo "" echo "$output" echo "" + if [[ $exitcode -eq 0 ]]; then + echo "Forcing exit code to 1" + exitcode=1 + fi fi elif [ -n "$failedOutput" ]; then if [[ "$output" =~ "$failedOutput" ]]; then @@ -405,7 +409,7 @@ retry() { echo "$output" echo "" if [[ $exitcode -eq 0 ]]; then - echo "The exitcode was 0, but we are setting to 1 since we found $failedOutput in the output." + echo "Forcing exit code to 1" exitcode=1 fi else diff --git a/salt/elasticsearch/enabled.sls b/salt/elasticsearch/enabled.sls index fa0f824b4..f7ab7749f 100644 --- a/salt/elasticsearch/enabled.sls +++ b/salt/elasticsearch/enabled.sls @@ -110,7 +110,7 @@ escomponenttemplates: - group: 939 - clean: True - onchanges_in: - - cmd: so-elasticsearch-templates + - file: so-elasticsearch-templates-reload # Auto-generate templates from defaults file {% for index, settings in ES_INDEX_SETTINGS.items() %} @@ -123,7 +123,7 @@ es_index_template_{{index}}: TEMPLATE_CONFIG: {{ settings.index_template }} - template: jinja - onchanges_in: - - cmd: so-elasticsearch-templates + - file: so-elasticsearch-templates-reload {% endif %} {% endfor %} @@ -142,7 +142,7 @@ es_template_{{TEMPLATE.split('.')[0] | replace("/","_") }}: - user: 930 - group: 939 - onchanges_in: - - cmd: so-elasticsearch-templates + - file: so-elasticsearch-templates-reload {% endfor %} {% endif %} @@ -167,6 +167,10 @@ so-elasticsearch-ilm-policy-load: - onchanges: - file: so-elasticsearch-ilm-policy-load-script +so-elasticsearch-templates-reload: + file.absent: + - name: /opt/so/state/estemplates.txt + so-elasticsearch-templates: cmd.run: - name: /usr/sbin/so-elasticsearch-templates-load diff --git a/salt/elasticsearch/tools/sbin/so-elasticsearch-pipelines b/salt/elasticsearch/tools/sbin/so-elasticsearch-pipelines index 350ac97c5..71c40c1ca 100755 --- a/salt/elasticsearch/tools/sbin/so-elasticsearch-pipelines +++ b/salt/elasticsearch/tools/sbin/so-elasticsearch-pipelines @@ -6,8 +6,6 @@ . /usr/sbin/so-common - -RETURN_CODE=0 ELASTICSEARCH_HOST=$1 ELASTICSEARCH_PORT=9200 @@ -15,40 +13,20 @@ ELASTICSEARCH_PORT=9200 ELASTICSEARCH_INGEST_PIPELINES="/opt/so/conf/elasticsearch/ingest/" # Wait for ElasticSearch to initialize - if [ ! -f /opt/so/state/espipelines.txt ]; then - + echo "State file /opt/so/state/espipelines.txt not found. Running so-elasticsearch-pipelines." echo -n "Waiting for ElasticSearch..." - COUNT=0 - ELASTICSEARCH_CONNECTED="no" - while [[ "$COUNT" -le 240 ]]; do - curl -K /opt/so/conf/elasticsearch/curl.config -k --output /dev/null --silent --head --fail -L https://"$ELASTICSEARCH_HOST":"$ELASTICSEARCH_PORT" - if [ $? -eq 0 ]; then - ELASTICSEARCH_CONNECTED="yes" - echo "connected!" - break - else - ((COUNT+=1)) - sleep 1 - echo -n "." - fi - done - if [ "$ELASTICSEARCH_CONNECTED" == "no" ]; then - echo - echo -e "Connection attempt timed out. Unable to connect to ElasticSearch. \nPlease try: \n -checking log(s) in /var/log/elasticsearch/\n -running 'sudo docker ps' \n -running 'sudo so-elastic-restart'" - echo - fi + retry 240 1 "so-elasticsearch-query / -k --output /dev/null --silent --head --fail" || fail "Connection attempt timed out. Unable to connect to ElasticSearch. \nPlease try: \n -checking log(s) in /var/log/elasticsearch/\n -running 'sudo docker ps' \n -running 'sudo so-elastic-restart'" cd ${ELASTICSEARCH_INGEST_PIPELINES} - echo "Loading pipelines..." - for i in .[a-z]* *; do echo $i; RESPONSE=$(curl -K /opt/so/conf/elasticsearch/curl.config -k -XPUT -L https://${ELASTICSEARCH_HOST}:${ELASTICSEARCH_PORT}/_ingest/pipeline/$i -H 'Content-Type: application/json' -d@$i 2>/dev/null); echo $RESPONSE; if [[ "$RESPONSE" == *"error"* ]]; then RETURN_CODE=1; fi; done + for i in .[a-z]* *; + do + echo $i; + retry 5 5 "so-elasticsearch-query _ingest/pipeline/$i -d@$i -XPUT | grep '{\"acknowledged\":true}'" || fail "Could not load pipeline: $i" + done echo cd - >/dev/null - if [[ "$RETURN_CODE" != "1" ]]; then - touch /opt/so/state/espipelines.txt - fi -else - exit $RETURN_CODE + touch /opt/so/state/espipelines.txt fi diff --git a/salt/elasticsearch/tools/sbin_jinja/so-elasticsearch-templates-load b/salt/elasticsearch/tools/sbin_jinja/so-elasticsearch-templates-load index 857da5434..33caff435 100755 --- a/salt/elasticsearch/tools/sbin_jinja/so-elasticsearch-templates-load +++ b/salt/elasticsearch/tools/sbin_jinja/so-elasticsearch-templates-load @@ -7,105 +7,143 @@ {% from 'vars/globals.map.jinja' import GLOBALS %} {%- set SUPPORTED_PACKAGES = salt['pillar.get']('elasticfleet:packages', default=ELASTICFLEETDEFAULTS.elasticfleet.packages, merge=True) %} -. /usr/sbin/so-common -{% if GLOBALS.role != 'so-heavynode' %} -if [ -f /usr/sbin/so-elastic-fleet-common ]; then - . /usr/sbin/so-elastic-fleet-common -fi -{% endif %} +STATE_FILE_INITIAL=/opt/so/state/estemplates_initial_load_attempt.txt +STATE_FILE_SUCCESS=/opt/so/state/estemplates.txt -default_conf_dir=/opt/so/conf - -# Define a default directory to load pipelines from -ELASTICSEARCH_TEMPLATES="$default_conf_dir/elasticsearch/templates/" - -{% if GLOBALS.role == 'so-heavynode' %} -file="/opt/so/conf/elasticsearch/templates/index/so-common-template.json" -{% else %} -file="/usr/sbin/so-elastic-fleet-common" -{% endif %} - -if [ -f "$file" ]; then - # Wait for ElasticSearch to initialize - echo -n "Waiting for ElasticSearch..." - COUNT=0 - ELASTICSEARCH_CONNECTED="no" - while [[ "$COUNT" -le 240 ]]; do - so-elasticsearch-query / -k --output /dev/null --silent --head --fail - if [ $? -eq 0 ]; then - ELASTICSEARCH_CONNECTED="yes" - echo "connected!" - break - else - ((COUNT+=1)) - sleep 1 - echo -n "." - fi - done - if [ "$ELASTICSEARCH_CONNECTED" == "no" ]; then - echo - echo -e "Connection attempt timed out. Unable to connect to ElasticSearch. \nPlease try: \n -checking log(s) in /var/log/elasticsearch/\n -running 'sudo docker ps' \n -running 'sudo so-elastic-restart'" - echo - exit 1 - fi - - {% if GLOBALS.role != 'so-heavynode' %} - SESSIONCOOKIE=$(curl -s -K /opt/so/conf/elasticsearch/curl.config -c - -X GET http://localhost:5601/ | grep sid | awk '{print $7}') - INSTALLED=$(elastic_fleet_package_is_installed {{ SUPPORTED_PACKAGES[0] }} ) - if [ "$INSTALLED" != "installed" ]; then - echo - echo "Packages not yet installed." - echo - exit 0 - fi - {% endif %} - set -e - - cd ${ELASTICSEARCH_TEMPLATES}/component/ecs - - echo "Loading ECS component templates..." - for i in *; do TEMPLATE=$(echo $i | cut -d '.' -f1); echo "$TEMPLATE-mappings"; so-elasticsearch-query _component_template/$TEMPLATE-mappings -d@$i -XPUT 2>/dev/null; echo; done - - cd ${ELASTICSEARCH_TEMPLATES}/component/elastic-agent - - echo "Loading Elastic Agent component templates..." - {% if GLOBALS.role == 'so-heavynode' %} - component_pattern="so-*" - {% else %} - component_pattern="*" - {% endif %} - for i in $component_pattern; do TEMPLATE=${i::-5}; echo "$TEMPLATE"; so-elasticsearch-query _component_template/$TEMPLATE -d@$i -XPUT 2>/dev/null; echo; done - - # Load SO-specific component templates - cd ${ELASTICSEARCH_TEMPLATES}/component/so - - echo "Loading Security Onion component templates..." - for i in *; do TEMPLATE=$(echo $i | cut -d '.' -f1); echo "$TEMPLATE"; so-elasticsearch-query _component_template/$TEMPLATE -d@$i -XPUT 2>/dev/null; echo; done - echo - - # Load SO index templates - cd ${ELASTICSEARCH_TEMPLATES}/index - - echo "Loading Security Onion index templates..." - shopt -s extglob - {% if GLOBALS.role == 'so-heavynode' %} - pattern="!(*1password*|*aws*|*azure*|*cloudflare*|*elastic_agent*|*fim*|*github*|*google*|*osquery*|*system*|*windows*)" - {% else %} - pattern="*" - {% endif %} - for i in $pattern; do - TEMPLATE=${i::-14}; - echo "$TEMPLATE"; - so-elasticsearch-query _index_template/$TEMPLATE -d@$i -XPUT 2>/dev/null; - echo; - done - echo +if [[ -f $STATE_FILE_INITIAL ]]; then + # The initial template load has already run. As this is a subsequent load, all dependencies should + # already be satisified. Therefore, immediately exit/abort this script upon any template load failure + # since this is an unrecoverable failure. + should_exit_on_failure=1 else - {% if GLOBALS.role == 'so-heavynode' %} - echo "Common template does not exist. Exiting..." - {% else %} - echo "Elastic Fleet not configured. Exiting..." - {% endif %} - exit 0 + # This is the initial template load, and there likely are some components not yet setup in Elasticsearch. + # Therefore load as many templates as possible at this time and if an error occurs proceed to the next + # template. But if at least one template fails to load do not mark the templates as having been loaded. + # This will allow the next load to resume the load of the templates that failed to load initially. + should_exit_on_failure=0 + echo "This is the initial template load" +fi + +load_failures=0 + +load_template() { + uri=$1 + file=$2 + + echo "Loading template file $i" + if ! retry 3 5 "so-elasticsearch-query $uri -d@$file -XPUT" "{\"acknowledged\":true}"; then + if [[ $should_exit_on_failure -eq 1 ]]; then + fail "Could not load template file: $file" + else + load_failures=$((load_failures+1)) + echo "Incremented load failure counter: $load_failures" + fi + fi +} + +if [ ! -f $STATE_FILE_SUCCESS ]; then + echo "State file $STATE_FILE_SUCCESS not found. Running so-elasticsearch-templates-load." + + . /usr/sbin/so-common + + {% if GLOBALS.role != 'so-heavynode' %} + if [ -f /usr/sbin/so-elastic-fleet-common ]; then + . /usr/sbin/so-elastic-fleet-common + fi + {% endif %} + + default_conf_dir=/opt/so/conf + + # Define a default directory to load pipelines from + ELASTICSEARCH_TEMPLATES="$default_conf_dir/elasticsearch/templates/" + + {% if GLOBALS.role == 'so-heavynode' %} + file="/opt/so/conf/elasticsearch/templates/index/so-common-template.json" + {% else %} + file="/usr/sbin/so-elastic-fleet-common" + {% endif %} + + if [ -f "$file" ]; then + # Wait for ElasticSearch to initialize + echo -n "Waiting for ElasticSearch..." + retry 240 1 "so-elasticsearch-query / -k --output /dev/null --silent --head --fail" || fail "Connection attempt timed out. Unable to connect to ElasticSearch. \nPlease try: \n -checking log(s) in /var/log/elasticsearch/\n -running 'sudo docker ps' \n -running 'sudo so-elastic-restart'" + {% if GLOBALS.role != 'so-heavynode' %} + SESSIONCOOKIE=$(curl -s -K /opt/so/conf/elasticsearch/curl.config -c - -X GET http://localhost:5601/ | grep sid | awk '{print $7}') + INSTALLED=$(elastic_fleet_package_is_installed {{ SUPPORTED_PACKAGES[0] }} ) + if [ "$INSTALLED" != "installed" ]; then + echo + echo "Packages not yet installed." + echo + exit 0 + fi + {% endif %} + + touch $STATE_FILE_INITIAL + + cd ${ELASTICSEARCH_TEMPLATES}/component/ecs + + echo "Loading ECS component templates..." + for i in *; do + TEMPLATE=$(echo $i | cut -d '.' -f1) + load_template "_component_template/${TEMPLATE}-mappings" "$i" + done + echo + + cd ${ELASTICSEARCH_TEMPLATES}/component/elastic-agent + + echo "Loading Elastic Agent component templates..." + {% if GLOBALS.role == 'so-heavynode' %} + component_pattern="so-*" + {% else %} + component_pattern="*" + {% endif %} + for i in $component_pattern; do + TEMPLATE=${i::-5} + load_template "_component_template/$TEMPLATE" "$i" + done + echo + + # Load SO-specific component templates + cd ${ELASTICSEARCH_TEMPLATES}/component/so + + echo "Loading Security Onion component templates..." + for i in *; do + TEMPLATE=$(echo $i | cut -d '.' -f1); + load_template "_component_template/$TEMPLATE" "$i" + done + echo + + # Load SO index templates + cd ${ELASTICSEARCH_TEMPLATES}/index + + echo "Loading Security Onion index templates..." + shopt -s extglob + {% if GLOBALS.role == 'so-heavynode' %} + pattern="!(*1password*|*aws*|*azure*|*cloudflare*|*elastic_agent*|*fim*|*github*|*google*|*osquery*|*system*|*windows*)" + {% else %} + pattern="*" + {% endif %} + for i in $pattern; do + TEMPLATE=${i::-14} + load_template "_index_template/$TEMPLATE" "$i" + done + else + {% if GLOBALS.role == 'so-heavynode' %} + echo "Common template does not exist. Exiting..." + {% else %} + echo "Elastic Fleet not configured. Exiting..." + {% endif %} + exit 0 + fi + + cd - >/dev/null + + if [[ $load_failures -eq 0 ]]; then + echo "All template loaded successfully" + touch $STATE_FILE_SUCCESS + else + echo "Encountered $load_failures templates that were unable to load, likely due to missing dependencies that will be available later; will retry on next highstate" + fi +else + echo "Templates already loaded" fi - cd - >/dev/null diff --git a/setup/so-verify b/setup/so-verify index 3f00cc420..8133c0bf1 100755 --- a/setup/so-verify +++ b/setup/so-verify @@ -37,7 +37,7 @@ log_has_errors() { # Failed to restart snapd.mounts-pre.target: Operation refused, unit snapd.mounts-pre.target # may be requested by dependency only (it is configured to refuse manual start/stop). - # Exit code 100 failure is likely apt-get running in the background, we wait for it to unlock. + # Command failed with exit code is output during retry loops. grep -E "FAILED|Failed|failed|ERROR|Result: False|Error is not recoverable" "$setup_log" | \ grep -vE "The Salt Master has cached the public key for this node" | \ @@ -57,7 +57,7 @@ log_has_errors() { grep -vE "Login Failed Details" | \ grep -vE "response from daemon: unauthorized" | \ grep -vE "Reading first line of patchfile" | \ - grep -vE "Command failed with exit code 100; will retry" | \ + grep -vE "Command failed with exit code" | \ grep -vE "Running scope as unit" &> "$error_log" if [[ $? -eq 0 ]]; then