Merge pull request #11734 from Security-Onion-Solutions/fix/elastic_scripts

Improve error handling and add retry logic
This commit is contained in:
Jason Ertel
2023-11-08 12:19:00 -05:00
committed by GitHub
5 changed files with 159 additions and 135 deletions

View File

@@ -397,6 +397,10 @@ retry() {
echo "<Start of output>"
echo "$output"
echo "<End of output>"
if [[ $exitcode -eq 0 ]]; then
echo "Forcing exit code to 1"
exitcode=1
fi
fi
elif [ -n "$failedOutput" ]; then
if [[ "$output" =~ "$failedOutput" ]]; then
@@ -405,7 +409,7 @@ retry() {
echo "$output"
echo "<End of output>"
if [[ $exitcode -eq 0 ]]; then
echo "The exitcode was 0, but we are setting to 1 since we found $failedOutput in the output."
echo "Forcing exit code to 1"
exitcode=1
fi
else

View File

@@ -110,7 +110,7 @@ escomponenttemplates:
- group: 939
- clean: True
- onchanges_in:
- cmd: so-elasticsearch-templates
- file: so-elasticsearch-templates-reload
# Auto-generate templates from defaults file
{% for index, settings in ES_INDEX_SETTINGS.items() %}
@@ -123,7 +123,7 @@ es_index_template_{{index}}:
TEMPLATE_CONFIG: {{ settings.index_template }}
- template: jinja
- onchanges_in:
- cmd: so-elasticsearch-templates
- file: so-elasticsearch-templates-reload
{% endif %}
{% endfor %}
@@ -142,7 +142,7 @@ es_template_{{TEMPLATE.split('.')[0] | replace("/","_") }}:
- user: 930
- group: 939
- onchanges_in:
- cmd: so-elasticsearch-templates
- file: so-elasticsearch-templates-reload
{% endfor %}
{% endif %}
@@ -167,6 +167,10 @@ so-elasticsearch-ilm-policy-load:
- onchanges:
- file: so-elasticsearch-ilm-policy-load-script
so-elasticsearch-templates-reload:
file.absent:
- name: /opt/so/state/estemplates.txt
so-elasticsearch-templates:
cmd.run:
- name: /usr/sbin/so-elasticsearch-templates-load

View File

@@ -6,8 +6,6 @@
. /usr/sbin/so-common
RETURN_CODE=0
ELASTICSEARCH_HOST=$1
ELASTICSEARCH_PORT=9200
@@ -15,40 +13,20 @@ ELASTICSEARCH_PORT=9200
ELASTICSEARCH_INGEST_PIPELINES="/opt/so/conf/elasticsearch/ingest/"
# Wait for ElasticSearch to initialize
if [ ! -f /opt/so/state/espipelines.txt ]; then
echo "State file /opt/so/state/espipelines.txt not found. Running so-elasticsearch-pipelines."
echo -n "Waiting for ElasticSearch..."
COUNT=0
ELASTICSEARCH_CONNECTED="no"
while [[ "$COUNT" -le 240 ]]; do
curl -K /opt/so/conf/elasticsearch/curl.config -k --output /dev/null --silent --head --fail -L https://"$ELASTICSEARCH_HOST":"$ELASTICSEARCH_PORT"
if [ $? -eq 0 ]; then
ELASTICSEARCH_CONNECTED="yes"
echo "connected!"
break
else
((COUNT+=1))
sleep 1
echo -n "."
fi
done
if [ "$ELASTICSEARCH_CONNECTED" == "no" ]; then
echo
echo -e "Connection attempt timed out. Unable to connect to ElasticSearch. \nPlease try: \n -checking log(s) in /var/log/elasticsearch/\n -running 'sudo docker ps' \n -running 'sudo so-elastic-restart'"
echo
fi
retry 240 1 "so-elasticsearch-query / -k --output /dev/null --silent --head --fail" || fail "Connection attempt timed out. Unable to connect to ElasticSearch. \nPlease try: \n -checking log(s) in /var/log/elasticsearch/\n -running 'sudo docker ps' \n -running 'sudo so-elastic-restart'"
cd ${ELASTICSEARCH_INGEST_PIPELINES}
echo "Loading pipelines..."
for i in .[a-z]* *; do echo $i; RESPONSE=$(curl -K /opt/so/conf/elasticsearch/curl.config -k -XPUT -L https://${ELASTICSEARCH_HOST}:${ELASTICSEARCH_PORT}/_ingest/pipeline/$i -H 'Content-Type: application/json' -d@$i 2>/dev/null); echo $RESPONSE; if [[ "$RESPONSE" == *"error"* ]]; then RETURN_CODE=1; fi; done
for i in .[a-z]* *;
do
echo $i;
retry 5 5 "so-elasticsearch-query _ingest/pipeline/$i -d@$i -XPUT | grep '{\"acknowledged\":true}'" || fail "Could not load pipeline: $i"
done
echo
cd - >/dev/null
if [[ "$RETURN_CODE" != "1" ]]; then
touch /opt/so/state/espipelines.txt
fi
else
exit $RETURN_CODE
fi

View File

@@ -7,48 +7,66 @@
{% from 'vars/globals.map.jinja' import GLOBALS %}
{%- set SUPPORTED_PACKAGES = salt['pillar.get']('elasticfleet:packages', default=ELASTICFLEETDEFAULTS.elasticfleet.packages, merge=True) %}
. /usr/sbin/so-common
{% if GLOBALS.role != 'so-heavynode' %}
if [ -f /usr/sbin/so-elastic-fleet-common ]; then
. /usr/sbin/so-elastic-fleet-common
STATE_FILE_INITIAL=/opt/so/state/estemplates_initial_load_attempt.txt
STATE_FILE_SUCCESS=/opt/so/state/estemplates.txt
if [[ -f $STATE_FILE_INITIAL ]]; then
# The initial template load has already run. As this is a subsequent load, all dependencies should
# already be satisified. Therefore, immediately exit/abort this script upon any template load failure
# since this is an unrecoverable failure.
should_exit_on_failure=1
else
# This is the initial template load, and there likely are some components not yet setup in Elasticsearch.
# Therefore load as many templates as possible at this time and if an error occurs proceed to the next
# template. But if at least one template fails to load do not mark the templates as having been loaded.
# This will allow the next load to resume the load of the templates that failed to load initially.
should_exit_on_failure=0
echo "This is the initial template load"
fi
{% endif %}
default_conf_dir=/opt/so/conf
load_failures=0
# Define a default directory to load pipelines from
ELASTICSEARCH_TEMPLATES="$default_conf_dir/elasticsearch/templates/"
load_template() {
uri=$1
file=$2
{% if GLOBALS.role == 'so-heavynode' %}
file="/opt/so/conf/elasticsearch/templates/index/so-common-template.json"
{% else %}
file="/usr/sbin/so-elastic-fleet-common"
{% endif %}
echo "Loading template file $i"
if ! retry 3 5 "so-elasticsearch-query $uri -d@$file -XPUT" "{\"acknowledged\":true}"; then
if [[ $should_exit_on_failure -eq 1 ]]; then
fail "Could not load template file: $file"
else
load_failures=$((load_failures+1))
echo "Incremented load failure counter: $load_failures"
fi
fi
}
if [ -f "$file" ]; then
if [ ! -f $STATE_FILE_SUCCESS ]; then
echo "State file $STATE_FILE_SUCCESS not found. Running so-elasticsearch-templates-load."
. /usr/sbin/so-common
{% if GLOBALS.role != 'so-heavynode' %}
if [ -f /usr/sbin/so-elastic-fleet-common ]; then
. /usr/sbin/so-elastic-fleet-common
fi
{% endif %}
default_conf_dir=/opt/so/conf
# Define a default directory to load pipelines from
ELASTICSEARCH_TEMPLATES="$default_conf_dir/elasticsearch/templates/"
{% if GLOBALS.role == 'so-heavynode' %}
file="/opt/so/conf/elasticsearch/templates/index/so-common-template.json"
{% else %}
file="/usr/sbin/so-elastic-fleet-common"
{% endif %}
if [ -f "$file" ]; then
# Wait for ElasticSearch to initialize
echo -n "Waiting for ElasticSearch..."
COUNT=0
ELASTICSEARCH_CONNECTED="no"
while [[ "$COUNT" -le 240 ]]; do
so-elasticsearch-query / -k --output /dev/null --silent --head --fail
if [ $? -eq 0 ]; then
ELASTICSEARCH_CONNECTED="yes"
echo "connected!"
break
else
((COUNT+=1))
sleep 1
echo -n "."
fi
done
if [ "$ELASTICSEARCH_CONNECTED" == "no" ]; then
echo
echo -e "Connection attempt timed out. Unable to connect to ElasticSearch. \nPlease try: \n -checking log(s) in /var/log/elasticsearch/\n -running 'sudo docker ps' \n -running 'sudo so-elastic-restart'"
echo
exit 1
fi
retry 240 1 "so-elasticsearch-query / -k --output /dev/null --silent --head --fail" || fail "Connection attempt timed out. Unable to connect to ElasticSearch. \nPlease try: \n -checking log(s) in /var/log/elasticsearch/\n -running 'sudo docker ps' \n -running 'sudo so-elastic-restart'"
{% if GLOBALS.role != 'so-heavynode' %}
SESSIONCOOKIE=$(curl -s -K /opt/so/conf/elasticsearch/curl.config -c - -X GET http://localhost:5601/ | grep sid | awk '{print $7}')
INSTALLED=$(elastic_fleet_package_is_installed {{ SUPPORTED_PACKAGES[0] }} )
@@ -59,12 +77,17 @@ if [ -f "$file" ]; then
exit 0
fi
{% endif %}
set -e
touch $STATE_FILE_INITIAL
cd ${ELASTICSEARCH_TEMPLATES}/component/ecs
echo "Loading ECS component templates..."
for i in *; do TEMPLATE=$(echo $i | cut -d '.' -f1); echo "$TEMPLATE-mappings"; so-elasticsearch-query _component_template/$TEMPLATE-mappings -d@$i -XPUT 2>/dev/null; echo; done
for i in *; do
TEMPLATE=$(echo $i | cut -d '.' -f1)
load_template "_component_template/${TEMPLATE}-mappings" "$i"
done
echo
cd ${ELASTICSEARCH_TEMPLATES}/component/elastic-agent
@@ -74,13 +97,20 @@ if [ -f "$file" ]; then
{% else %}
component_pattern="*"
{% endif %}
for i in $component_pattern; do TEMPLATE=${i::-5}; echo "$TEMPLATE"; so-elasticsearch-query _component_template/$TEMPLATE -d@$i -XPUT 2>/dev/null; echo; done
for i in $component_pattern; do
TEMPLATE=${i::-5}
load_template "_component_template/$TEMPLATE" "$i"
done
echo
# Load SO-specific component templates
cd ${ELASTICSEARCH_TEMPLATES}/component/so
echo "Loading Security Onion component templates..."
for i in *; do TEMPLATE=$(echo $i | cut -d '.' -f1); echo "$TEMPLATE"; so-elasticsearch-query _component_template/$TEMPLATE -d@$i -XPUT 2>/dev/null; echo; done
for i in *; do
TEMPLATE=$(echo $i | cut -d '.' -f1);
load_template "_component_template/$TEMPLATE" "$i"
done
echo
# Load SO index templates
@@ -94,18 +124,26 @@ if [ -f "$file" ]; then
pattern="*"
{% endif %}
for i in $pattern; do
TEMPLATE=${i::-14};
echo "$TEMPLATE";
so-elasticsearch-query _index_template/$TEMPLATE -d@$i -XPUT 2>/dev/null;
echo;
TEMPLATE=${i::-14}
load_template "_index_template/$TEMPLATE" "$i"
done
echo
else
else
{% if GLOBALS.role == 'so-heavynode' %}
echo "Common template does not exist. Exiting..."
{% else %}
echo "Elastic Fleet not configured. Exiting..."
{% endif %}
exit 0
fi
fi
cd - >/dev/null
if [[ $load_failures -eq 0 ]]; then
echo "All template loaded successfully"
touch $STATE_FILE_SUCCESS
else
echo "Encountered $load_failures templates that were unable to load, likely due to missing dependencies that will be available later; will retry on next highstate"
fi
else
echo "Templates already loaded"
fi

View File

@@ -37,7 +37,7 @@ log_has_errors() {
# Failed to restart snapd.mounts-pre.target: Operation refused, unit snapd.mounts-pre.target
# may be requested by dependency only (it is configured to refuse manual start/stop).
# Exit code 100 failure is likely apt-get running in the background, we wait for it to unlock.
# Command failed with exit code is output during retry loops.
grep -E "FAILED|Failed|failed|ERROR|Result: False|Error is not recoverable" "$setup_log" | \
grep -vE "The Salt Master has cached the public key for this node" | \
@@ -57,7 +57,7 @@ log_has_errors() {
grep -vE "Login Failed Details" | \
grep -vE "response from daemon: unauthorized" | \
grep -vE "Reading first line of patchfile" | \
grep -vE "Command failed with exit code 100; will retry" | \
grep -vE "Command failed with exit code" | \
grep -vE "Running scope as unit" &> "$error_log"
if [[ $? -eq 0 ]]; then