diff --git a/salt/common/tools/sbin/so-log-check b/salt/common/tools/sbin/so-log-check new file mode 100755 index 000000000..f89995065 --- /dev/null +++ b/salt/common/tools/sbin/so-log-check @@ -0,0 +1,207 @@ +#!/bin/bash +# +# Copyright Security Onion Solutions LLC and/or licensed to Security Onion Solutions LLC under one +# or more contributor license agreements. Licensed under the Elastic License 2.0 as shown at +# https://securityonion.net/license; you may not use this file except in compliance with the +# Elastic License 2.0. + +. /usr/sbin/so-common + +RECENT_LOG_LINES=200 +EXCLUDE_STARTUP_ERRORS=N +EXCLUDE_FALSE_POSITIVE_ERRORS=N +EXCLUDE_KNOWN_ERRORS=N + +while [[ $# -gt 0 ]]; do + case $1 in + --exclude-connection-errors) + EXCLUDE_STARTUP_ERRORS=Y + ;; + --exclude-false-positives) + EXCLUDE_FALSE_POSITIVE_ERRORS=Y + ;; + --exclude-known-errors) + EXCLUDE_KNOWN_ERRORS=Y + ;; + --unknown) + EXCLUDE_STARTUP_ERRORS=Y + EXCLUDE_FALSE_POSITIVE_ERRORS=Y + EXCLUDE_KNOWN_ERRORS=Y + ;; + --recent-log-lines) + shift + RECENT_LOG_LINES=$1 + ;; + *) + echo "Usage: $0 [options]" + echo "" + echo "where options are:" + echo " --recent-log-lines N looks at the most recent N log lines per file or container; defaults to 200" + echo " --exclude-connection-errors exclude errors caused by a recent server or container restart" + echo " --exclude-false-positives exclude logs that are known false positives" + echo " --exclude-known-errors exclude errors that are known and non-critical issues" + echo " --unknown exclude everything mentioned above; only show unknown errors" + echo "" + echo "A non-zero return value indicates errors were found" + exit 1 + ;; + esac + shift +done + +echo "Security Onion Log Check - $(date)" +echo "-------------------------------------------" +echo "" +echo "- RECENT_LOG_LINES: $RECENT_LOG_LINES" +echo "- EXCLUDE_STARTUP_ERRORS: $EXCLUDE_STARTUP_ERRORS" +echo "- EXCLUDE_FALSE_POSITIVE_ERRORS: $EXCLUDE_FALSE_POSITIVE_ERRORS" +echo "- EXCLUDE_KNOWN_ERRORS: $EXCLUDE_KNOWN_ERRORS" +echo "" + +function status() { + header "$1" +} + +function exclude_container() { + name=$1 + + exclude_id=$(docker ps | grep "$name" | awk '{print $1}') + if [[ -n "$exclude_id" ]]; then + CONTAINER_IDS=$(echo $CONTAINER_IDS | sed -e "s/$exclude_id//g") + return $? + fi + return $? +} + +function exclude_log() { + name=$1 + + cat /tmp/log_check_files | grep -v $name > /tmp/log_check_files.new + mv /tmp/log_check_files.new /tmp/log_check_files +} + +function check_for_errors() { + if cat /tmp/log_check | grep -i error | grep -vEi "$EXCLUDED_ERRORS"; then + RESULT=1 + fi +} + +EXCLUDED_ERRORS="__LOG_CHECK_PLACEHOLDER_EXCLUSION__" + +if [[ $EXCLUDE_STARTUP_ERRORS == 'Y' ]]; then + EXCLUDED_ERRORS="$EXCLUDED_ERRORS|database is locked" # server not yet ready + EXCLUDED_ERRORS="$EXCLUDED_ERRORS|econnreset" # server not yet ready + EXCLUDED_ERRORS="$EXCLUDED_ERRORS|unreachable" # server not yet ready + EXCLUDED_ERRORS="$EXCLUDED_ERRORS|no route to host" # server not yet ready + EXCLUDED_ERRORS="$EXCLUDED_ERRORS|not running" # server not yet ready + EXCLUDED_ERRORS="$EXCLUDED_ERRORS|unavailable" # server not yet ready + EXCLUDED_ERRORS="$EXCLUDED_ERRORS|request.py" # server not yet ready (python stack output) + EXCLUDED_ERRORS="$EXCLUDED_ERRORS|httperror" # server not yet ready + EXCLUDED_ERRORS="$EXCLUDED_ERRORS|servfail" # server not yet ready + EXCLUDED_ERRORS="$EXCLUDED_ERRORS|connect" # server not yet ready + EXCLUDED_ERRORS="$EXCLUDED_ERRORS|missing shards" # server not yet ready + EXCLUDED_ERRORS="$EXCLUDED_ERRORS|failed to send metrics" # server not yet ready + EXCLUDED_ERRORS="$EXCLUDED_ERRORS|influxsize kbytes" # server not yet ready (telegraf waiting on influx) + EXCLUDED_ERRORS="$EXCLUDED_ERRORS|expected field at" # server not yet ready (telegraf waiting on health data) + EXCLUDED_ERRORS="$EXCLUDED_ERRORS|cached the public key" # server not yet ready (salt minion waiting on key acceptance) + EXCLUDED_ERRORS="$EXCLUDED_ERRORS|no ingest nodes" # server not yet ready (logstash waiting on elastic) + EXCLUDED_ERRORS="$EXCLUDED_ERRORS|failed to poll" # server not yet ready (sensoroni waiting on soc) + EXCLUDED_ERRORS="$EXCLUDED_ERRORS|minions returned with non" # server not yet ready (salt waiting on minions) + EXCLUDED_ERRORS="$EXCLUDED_ERRORS|so_long_term" # server not yet ready (influxdb not yet setup) +fi + +if [[ $EXCLUDE_FALSE_POSITIVE_ERRORS == 'Y' ]]; then + EXCLUDED_ERRORS="$EXCLUDED_ERRORS|elastalert_status_error" # false positive + EXCLUDED_ERRORS="$EXCLUDED_ERRORS|elastalert_error.json" # false positive + EXCLUDED_ERRORS="$EXCLUDED_ERRORS|error: '0'" # false positive + EXCLUDED_ERRORS="$EXCLUDED_ERRORS|errors_index" # false positive + EXCLUDED_ERRORS="$EXCLUDED_ERRORS|noerror" # false positive + EXCLUDED_ERRORS="$EXCLUDED_ERRORS|error-template" # false positive (elastic templates) + EXCLUDED_ERRORS="$EXCLUDED_ERRORS|deprecated" # false positive (playbook) + EXCLUDED_ERRORS="$EXCLUDED_ERRORS|could cause errors" # false positive (playbook) + EXCLUDED_ERRORS="$EXCLUDED_ERRORS|id.orig_h" # false positive (zeek test data) + EXCLUDED_ERRORS="$EXCLUDED_ERRORS|emerging-all.rules" # false positive (error in rulename) + EXCLUDED_ERRORS="$EXCLUDED_ERRORS|invalid query input" # false positive (Invalid user input in hunt query) + EXCLUDED_ERRORS="$EXCLUDED_ERRORS|example" # false positive (example test data) + EXCLUDED_ERRORS="$EXCLUDED_ERRORS|status 200" # false positive (request successful, contained error string in content) +fi + +if [[ $EXCLUDE_KNOWN_ERRORS == 'Y' ]]; then + EXCLUDED_ERRORS="$EXCLUDED_ERRORS|eof" + EXCLUDED_ERRORS="$EXCLUDED_ERRORS|raise error" # redis/python generic stack line, rely on other lines for actual error + EXCLUDED_ERRORS="$EXCLUDED_ERRORS|fail\\(error\\)" # redis/python generic stack line, rely on other lines for actual error + EXCLUDED_ERRORS="$EXCLUDED_ERRORS|urlerror" # idstools connection timeout + EXCLUDED_ERRORS="$EXCLUDED_ERRORS|timeouterror" # idstools connection timeout + EXCLUDED_ERRORS="$EXCLUDED_ERRORS|forbidden" # playbook + EXCLUDED_ERRORS="$EXCLUDED_ERRORS|_ml" # Elastic ML errors + EXCLUDED_ERRORS="$EXCLUDED_ERRORS|context canceled" # elastic agent during shutdown + EXCLUDED_ERRORS="$EXCLUDED_ERRORS|iteration" + EXCLUDED_ERRORS="$EXCLUDED_ERRORS|communication packets" + EXCLUDED_ERRORS="$EXCLUDED_ERRORS|use of closed" + EXCLUDED_ERRORS="$EXCLUDED_ERRORS|bookkeeper" + EXCLUDED_ERRORS="$EXCLUDED_ERRORS|noindices" + EXCLUDED_ERRORS="$EXCLUDED_ERRORS|failed to start transient scope" + EXCLUDED_ERRORS="$EXCLUDED_ERRORS|so-user.lock exists" + EXCLUDED_ERRORS="$EXCLUDED_ERRORS|systemd-run" + EXCLUDED_ERRORS="$EXCLUDED_ERRORS|retcode: 1" + EXCLUDED_ERRORS="$EXCLUDED_ERRORS|telemetry-task" + EXCLUDED_ERRORS="$EXCLUDED_ERRORS|redisqueue" + EXCLUDED_ERRORS="$EXCLUDED_ERRORS|fleet_detail_query" + EXCLUDED_ERRORS="$EXCLUDED_ERRORS|num errors=0" + EXCLUDED_ERRORS="$EXCLUDED_ERRORS|provisioning/alerting" + EXCLUDED_ERRORS="$EXCLUDED_ERRORS|provisioning/notifiers" + EXCLUDED_ERRORS="$EXCLUDED_ERRORS|provisoning/plugins" + EXCLUDED_ERRORS="$EXCLUDED_ERRORS|active-responses.log" + EXCLUDED_ERRORS="$EXCLUDED_ERRORS|scanentropy" + EXCLUDED_ERRORS="$EXCLUDED_ERRORS|integration policy" + EXCLUDED_ERRORS="$EXCLUDED_ERRORS|blob unknown" + EXCLUDED_ERRORS="$EXCLUDED_ERRORS|token required" + EXCLUDED_ERRORS="$EXCLUDED_ERRORS|zeekcaptureloss" + EXCLUDED_ERRORS="$EXCLUDED_ERRORS|unable to create detection" + EXCLUDED_ERRORS="$EXCLUDED_ERRORS|error installing new prebuilt rules" + EXCLUDED_ERRORS="$EXCLUDED_ERRORS|parent.error" + EXCLUDED_ERRORS="$EXCLUDED_ERRORS|req.LocalMeta.host.ip" # known issue in GH + EXCLUDED_ERRORS="$EXCLUDED_ERRORS|sendmail" # zeek + EXCLUDED_ERRORS="$EXCLUDED_ERRORS|stats.log" + EXCLUDED_ERRORS="$EXCLUDED_ERRORS|context deadline exceeded" +fi + +RESULT=0 + +# Check Security Onion container stdout/stderr logs +CONTAINER_IDS=$(docker ps -q) +exclude_container so-kibana +exclude_container so-idstools + +for container_id in $CONTAINER_IDS; do + status "Checking container $container_id" + docker logs -n $RECENT_LOG_LINES $container_id > /tmp/log_check 2>&1 + check_for_errors +done + +# Check Security Onion related log files +find /opt/so/log/ /nsm -name \*.log > /tmp/log_check_files +if [[ -f /var/log/cron ]]; then + echo "/var/log/cron" >> /tmp/log_check_files +fi +exclude_log "kibana.log" +exclude_log "spool" +exclude_log "import" + +for log_file in $(cat /tmp/log_check_files); do + status "Checking log file $log_file" + tail -n $RECENT_LOG_LINES $log_file > /tmp/log_check + check_for_errors +done + +# Cleanup temp files +rm -f /tmp/log_check_files +rm -f /tmp/log_check + +if [[ $RESULT -eq 0 ]]; then + echo -e "\nResult: No errors found" +else + echo -e "\nResult: One or more errors found" +fi + +exit $RESULT \ No newline at end of file diff --git a/salt/nginx/etc/nginx.conf b/salt/nginx/etc/nginx.conf index 795663384..3ef0c5c1f 100644 --- a/salt/nginx/etc/nginx.conf +++ b/salt/nginx/etc/nginx.conf @@ -8,6 +8,7 @@ worker_processes auto; error_log /var/log/nginx/error.log; pid /run/nginx.pid; +user nobody; include /usr/share/nginx/modules/*.conf; diff --git a/salt/strelka/filestream/config.sls b/salt/strelka/filestream/config.sls index 833a08505..0f9f38914 100644 --- a/salt/strelka/filestream/config.sls +++ b/salt/strelka/filestream/config.sls @@ -108,6 +108,11 @@ filecheck_stdout.log: {% if GLOBALS.md_engine == 'ZEEK' %} +remove_filecheck_run: + cron.absent: + - identifier: filecheck_run + - user: socore + filecheck_run_socore: cron.present: - name: 'ps -ef | grep filecheck | grep -v grep > /dev/null 2>&1 || python3 /opt/so/conf/strelka/filecheck >> /opt/so/log/strelka/filecheck_stdout.log 2>&1 &' @@ -121,6 +126,11 @@ remove_filecheck_run_suricata: {% elif GLOBALS.md_engine == 'SURICATA'%} +remove_filecheck_run: + cron.absent: + - identifier: filecheck_run + - user: suricata + filecheck_run_suricata: cron.present: - name: 'ps -ef | grep filecheck | grep -v grep > /dev/null 2>&1 || python3 /opt/so/conf/strelka/filecheck >> /opt/so/log/strelka/filecheck_stdout.log 2>&1 &' diff --git a/salt/telegraf/defaults.yaml b/salt/telegraf/defaults.yaml index a87fa952b..ab8679e57 100644 --- a/salt/telegraf/defaults.yaml +++ b/salt/telegraf/defaults.yaml @@ -11,7 +11,6 @@ telegraf: quiet: 'false' scripts: eval: - - beatseps.sh - checkfiles.sh - influxdbsize.sh - oldpcap.sh @@ -23,7 +22,6 @@ telegraf: - zeekcaptureloss.sh - zeekloss.sh standalone: - - beatseps.sh - checkfiles.sh - eps.sh - influxdbsize.sh @@ -36,13 +34,11 @@ telegraf: - zeekcaptureloss.sh - zeekloss.sh manager: - - beatseps.sh - influxdbsize.sh - raid.sh - redis.sh - sostatus.sh managersearch: - - beatseps.sh - eps.sh - influxdbsize.sh - raid.sh @@ -51,7 +47,6 @@ telegraf: import: - sostatus.sh sensor: - - beatseps.sh - checkfiles.sh - oldpcap.sh - raid.sh @@ -61,7 +56,6 @@ telegraf: - zeekcaptureloss.sh - zeekloss.sh heavynode: - - beatseps.sh - checkfiles.sh - eps.sh - oldpcap.sh @@ -75,12 +69,10 @@ telegraf: idh: - sostatus.sh searchnode: - - beatseps.sh - eps.sh - raid.sh - sostatus.sh receiver: - - beatseps.sh - eps.sh - raid.sh - redis.sh diff --git a/salt/telegraf/scripts/beatseps.sh b/salt/telegraf/scripts/beatseps.sh deleted file mode 100644 index 5f3db53f8..000000000 --- a/salt/telegraf/scripts/beatseps.sh +++ /dev/null @@ -1,38 +0,0 @@ -#!/bin/bash -# -# Copyright Security Onion Solutions LLC and/or licensed to Security Onion Solutions LLC under one -# or more contributor license agreements. Licensed under the Elastic License 2.0 as shown at -# https://securityonion.net/license; you may not use this file except in compliance with the -# Elastic License 2.0. - - - -# if this script isn't already running -if [[ ! "`pidof -x $(basename $0) -o %PPID`" ]]; then - - PREVCOUNTFILE='/tmp/beatseps.txt' - EVENTCOUNTCURRENT="$(curl -s localhost:5066/stats | jq '.libbeat.output.events.acked')" - FAILEDEVENTCOUNT="$(curl -s localhost:5066/stats | jq '.libbeat.output.events.failed')" - - if [ ! -z "$EVENTCOUNTCURRENT" ]; then - - if [ -f "$PREVCOUNTFILE" ]; then - EVENTCOUNTPREVIOUS=`cat $PREVCOUNTFILE` - else - echo "${EVENTCOUNTCURRENT}" > $PREVCOUNTFILE - exit 0 - fi - - echo "${EVENTCOUNTCURRENT}" > $PREVCOUNTFILE - # the division by 30 is because the agent interval is 30 seconds - EVENTS=$(((EVENTCOUNTCURRENT - EVENTCOUNTPREVIOUS)/30)) - if [ "$EVENTS" -lt 0 ]; then - EVENTS=0 - fi - - echo "fbstats eps=${EVENTS%%.*},failed=$FAILEDEVENTCOUNT" - fi - -fi - -exit 0 diff --git a/salt/zeek/defaults.yaml b/salt/zeek/defaults.yaml index 8e6814b2e..4435670a2 100644 --- a/salt/zeek/defaults.yaml +++ b/salt/zeek/defaults.yaml @@ -8,9 +8,9 @@ zeek: buffer: 128*1024*1024 zeekctl: MailTo: root@localhost - MailConnectionSummary: 1 + MailConnectionSummary: 0 MinDiskSpace: 5 - MailHostUpDown: 1 + MailHostUpDown: 0 LogRotationInterval: 3600 LogExpireInterval: 0 StatsLogEnable: 1 @@ -28,7 +28,6 @@ zeek: - misc/loaded-scripts - tuning/defaults - misc/capture-loss - - misc/stats - frameworks/software/vulnerable - frameworks/software/version-changes - protocols/ftp/software