Compare commits

..

2 Commits

Author SHA1 Message Date
Mike Reeves cab57edf7c Harden reinstall_init and add post-saltify readiness gate
- setup/so-functions: dump pre-reinstall salt state (systemctl /
  journalctl / ls /etc/salt / .rpmnew diff) to the setup log so a
  failed reinstall leaves a usable post-mortem; swap the manual
  rm -rf of /etc/salt/* for `dnf -y remove salt` so package configs
  get cleaned up properly.
- setup/so-setup: replace the `sleep 2 / state.show_top / sleep 2`
  dance after saltify with a readiness gate that waits for
  /etc/salt/pki/master/master.pub, runs check_salt_master_status,
  and then wait_for_minion_key_pending before salt-key -ya. Fixes
  reinstalls on 3.x timing out on "Unable to sign_in to master".
- salt/common/tools/sbin/so-common: add wait_for_minion_key_pending
  helper, polls `salt-key -l pre` until the minion appears.
2026-04-23 17:43:39 -04:00
Josh Patterson 8b2064f1c1 uninstall salt during reinstall_init 2026-04-22 16:40:47 -04:00
63 changed files with 246 additions and 1991 deletions
-12
View File
@@ -1,12 +0,0 @@
# Copyright Security Onion Solutions LLC and/or licensed to Security Onion Solutions LLC under one
# or more contributor license agreements. Licensed under the Elastic License 2.0 as shown at
# https://securityonion.net/license; you may not use this file except in compliance with the
# Elastic License 2.0.
# Per-minion Telegraf Postgres credentials. so-telegraf-cred on the manager is
# the single writer; it mutates /opt/so/saltstack/local/pillar/telegraf/creds.sls
# under flock. Pillar_roots order (local before default) means the populated
# copy shadows this default on any real grid; this file exists so the pillar
# key is always defined on fresh installs and when no minions have creds yet.
telegraf:
postgres_creds: {}
-21
View File
@@ -17,7 +17,6 @@ base:
- sensoroni.adv_sensoroni
- telegraf.soc_telegraf
- telegraf.adv_telegraf
- telegraf.creds
- versionlock.soc_versionlock
- versionlock.adv_versionlock
- soc.license
@@ -39,9 +38,6 @@ base:
{% if salt['file.file_exists']('/opt/so/saltstack/local/pillar/elasticsearch/auth.sls') %}
- elasticsearch.auth
{% endif %}
{% if salt['file.file_exists']('/opt/so/saltstack/local/pillar/postgres/auth.sls') %}
- postgres.auth
{% endif %}
{% if salt['file.file_exists']('/opt/so/saltstack/local/pillar/kibana/secrets.sls') %}
- kibana.secrets
{% endif %}
@@ -64,8 +60,6 @@ base:
- redis.adv_redis
- influxdb.soc_influxdb
- influxdb.adv_influxdb
- postgres.soc_postgres
- postgres.adv_postgres
- elasticsearch.nodes
- elasticsearch.soc_elasticsearch
- elasticsearch.adv_elasticsearch
@@ -106,9 +100,6 @@ base:
{% if salt['file.file_exists']('/opt/so/saltstack/local/pillar/elasticsearch/auth.sls') %}
- elasticsearch.auth
{% endif %}
{% if salt['file.file_exists']('/opt/so/saltstack/local/pillar/postgres/auth.sls') %}
- postgres.auth
{% endif %}
{% if salt['file.file_exists']('/opt/so/saltstack/local/pillar/kibana/secrets.sls') %}
- kibana.secrets
{% endif %}
@@ -134,8 +125,6 @@ base:
- redis.adv_redis
- influxdb.soc_influxdb
- influxdb.adv_influxdb
- postgres.soc_postgres
- postgres.adv_postgres
- backup.soc_backup
- backup.adv_backup
- zeek.soc_zeek
@@ -155,9 +144,6 @@ base:
{% if salt['file.file_exists']('/opt/so/saltstack/local/pillar/elasticsearch/auth.sls') %}
- elasticsearch.auth
{% endif %}
{% if salt['file.file_exists']('/opt/so/saltstack/local/pillar/postgres/auth.sls') %}
- postgres.auth
{% endif %}
{% if salt['file.file_exists']('/opt/so/saltstack/local/pillar/kibana/secrets.sls') %}
- kibana.secrets
{% endif %}
@@ -172,8 +158,6 @@ base:
- redis.adv_redis
- influxdb.soc_influxdb
- influxdb.adv_influxdb
- postgres.soc_postgres
- postgres.adv_postgres
- elasticsearch.nodes
- elasticsearch.soc_elasticsearch
- elasticsearch.adv_elasticsearch
@@ -273,9 +257,6 @@ base:
{% if salt['file.file_exists']('/opt/so/saltstack/local/pillar/elasticsearch/auth.sls') %}
- elasticsearch.auth
{% endif %}
{% if salt['file.file_exists']('/opt/so/saltstack/local/pillar/postgres/auth.sls') %}
- postgres.auth
{% endif %}
{% if salt['file.file_exists']('/opt/so/saltstack/local/pillar/kibana/secrets.sls') %}
- kibana.secrets
{% endif %}
@@ -301,8 +282,6 @@ base:
- redis.adv_redis
- influxdb.soc_influxdb
- influxdb.adv_influxdb
- postgres.soc_postgres
- postgres.adv_postgres
- zeek.soc_zeek
- zeek.adv_zeek
- bpf.soc_bpf
+1 -5
View File
@@ -29,14 +29,10 @@
'manager',
'nginx',
'influxdb',
'postgres',
'postgres.auth',
'soc',
'kratos',
'hydra',
'elasticfleet',
'elasticfleet.manager',
'elasticsearch.cluster',
'elastic-fleet-package-registry',
'utility'
] %}
@@ -81,7 +77,7 @@
),
'so-heavynode': (
sensor_states +
['elasticagent', 'elasticsearch', 'elasticsearch.cluster', 'logstash', 'redis', 'nginx']
['elasticagent', 'elasticsearch', 'logstash', 'redis', 'nginx']
),
'so-idh': (
['idh']
-1
View File
@@ -32,4 +32,3 @@ so_config_backup:
- daymonth: '*'
- month: '*'
- dayweek: '*'
-14
View File
@@ -54,20 +54,6 @@ x509_signing_policies:
- extendedKeyUsage: serverAuth
- days_valid: 820
- copypath: /etc/pki/issued_certs/
postgres:
- minions: '*'
- signing_private_key: /etc/pki/ca.key
- signing_cert: /etc/pki/ca.crt
- C: US
- ST: Utah
- L: Salt Lake City
- basicConstraints: "critical CA:false"
- keyUsage: "critical keyEncipherment"
- subjectKeyIdentifier: hash
- authorityKeyIdentifier: keyid,issuer:always
- extendedKeyUsage: serverAuth
- days_valid: 820
- copypath: /etc/pki/issued_certs/
elasticfleet:
- minions: '*'
- signing_private_key: /etc/pki/ca.key
+23
View File
@@ -162,6 +162,29 @@ check_salt_master_status() {
return 0
}
# Wait until $minion shows up in the salt master's unaccepted-keys list.
# Used after saltify on a reinstall to replace the old `sleep 2 / state.show_top /
# sleep 2` dance — the new minion's key takes longer to appear than 2s on
# salt 3006.x and the subsequent salt-key -ya needs something to accept.
# Returns 0 as soon as the key is pending, 1 after attempts*delay seconds.
wait_for_minion_key_pending() {
local minion="$1"
local attempts="${2:-30}"
local delay="${3:-2}"
local count=0
while ! salt-key -l pre --out=json 2>/dev/null \
| python3 -c "import json,sys; d=json.load(sys.stdin); sys.exit(0 if '$minion' in d.get('minions_pre', []) else 1)" 2>/dev/null; do
((count+=1))
if [[ $count -ge $attempts ]]; then
echo "Gave up waiting for $minion to appear in salt-master's pending keys"
return 1
fi
sleep "$delay"
done
echo "Minion $minion is pending acceptance after $((count * delay))s"
return 0
}
# this is only intended to be used to check the status of the minion from a salt master
check_salt_minion_status() {
local minion="$1"
+5 -20
View File
@@ -31,7 +31,6 @@ container_list() {
"so-hydra"
"so-nginx"
"so-pcaptools"
"so-postgres"
"so-soc"
"so-suricata"
"so-telegraf"
@@ -56,7 +55,6 @@ container_list() {
"so-logstash"
"so-nginx"
"so-pcaptools"
"so-postgres"
"so-redis"
"so-soc"
"so-strelka-backend"
@@ -164,8 +162,8 @@ update_docker_containers() {
# Pull down the trusted docker image
run_check_net_err \
"docker pull $CONTAINER_REGISTRY/$IMAGEREPO/$image" \
"Could not pull $image, please ensure connectivity to $CONTAINER_REGISTRY" >> "$LOG_FILE" 2>&1
"Could not pull $image, please ensure connectivity to $CONTAINER_REGISTRY" >> "$LOG_FILE" 2>&1
# Get signature
run_check_net_err \
"curl --retry 5 --retry-delay 60 -A '$CURLTYPE/$CURRENTVERSION/$OS/$(uname -r)' $sig_url --output $SIGNPATH/$image.sig" \
@@ -189,24 +187,11 @@ update_docker_containers() {
HOSTNAME=$(hostname)
fi
docker tag $CONTAINER_REGISTRY/$IMAGEREPO/$image $HOSTNAME:5000/$IMAGEREPO/$image >> "$LOG_FILE" 2>&1 || {
echo "Unable to tag $image" >> "$LOG_FILE" 2>&1
echo "Unable to tag $image" >> "$LOG_FILE" 2>&1
exit 1
}
# Push to the embedded registry via a registry-to-registry copy. Avoids
# `docker push`, which on Docker 29.x with the containerd image store
# represents freshly-pulled images as an index whose layer content
# isn't reachable through the push path. The local `docker tag` above
# is preserved so so-image-pull's `:5000` existence check still works.
# Pin to the digest already gpg-verified above so we copy exactly the
# bytes we approved.
local VERIFIED_REF
VERIFIED_REF=$(echo "$DOCKERINSPECT" | jq -r ".[0].RepoDigests[] | select(. | contains(\"$CONTAINER_REGISTRY\"))" | head -n 1)
if [ -z "$VERIFIED_REF" ] || [ "$VERIFIED_REF" = "null" ]; then
echo "Unable to determine verified digest for $image" >> "$LOG_FILE" 2>&1
exit 1
fi
docker buildx imagetools create --tag $HOSTNAME:5000/$IMAGEREPO/$image "$VERIFIED_REF" >> "$LOG_FILE" 2>&1 || {
echo "Unable to copy $image to embedded registry" >> "$LOG_FILE" 2>&1
docker push $HOSTNAME:5000/$IMAGEREPO/$image >> "$LOG_FILE" 2>&1 || {
echo "Unable to push $image" >> "$LOG_FILE" 2>&1
exit 1
}
fi
+1 -1
View File
@@ -227,7 +227,7 @@ if [[ $EXCLUDE_KNOWN_ERRORS == 'Y' ]]; then
EXCLUDED_ERRORS="$EXCLUDED_ERRORS|from NIC checksum offloading" # zeek reporter.log
EXCLUDED_ERRORS="$EXCLUDED_ERRORS|marked for removal" # docker container getting recycled
EXCLUDED_ERRORS="$EXCLUDED_ERRORS|tcp 127.0.0.1:6791: bind: address already in use" # so-elastic-fleet agent restarting. Seen starting w/ 8.18.8 https://github.com/elastic/kibana/issues/201459
EXCLUDED_ERRORS="$EXCLUDED_ERRORS|TransformTask\] \[logs-(tychon|aws_billing|microsoft_defender_endpoint|armis|o365_metrics|microsoft_sentinel|snyk|cyera|island_browser).*user so_kibana lacks the required permissions \[(logs|metrics)-\1" # Known issue with integrations starting transform jobs that are explicitly not allowed to start as a system user. This error should not be seen on fresh ES 9.3.3 installs or after SO 3.1.0 with soups addition of check_transform_health_and_reauthorize()
EXCLUDED_ERRORS="$EXCLUDED_ERRORS|TransformTask\] \[logs-(tychon|aws_billing|microsoft_defender_endpoint).*user so_kibana lacks the required permissions \[logs-\1" # Known issue with 3 integrations using kibana_system role vs creating unique api creds with proper permissions.
EXCLUDED_ERRORS="$EXCLUDED_ERRORS|manifest unknown" # appears in so-dockerregistry log for so-tcpreplay following docker upgrade to 29.2.1-1
fi
-8
View File
@@ -237,11 +237,3 @@ docker:
extra_hosts: []
extra_env: []
ulimits: []
'so-postgres':
final_octet: 47
port_bindings:
- 0.0.0.0:5432:5432
custom_bind_mounts: []
extra_hosts: []
extra_env: []
ulimits: []
+103 -4
View File
@@ -17,17 +17,65 @@ include:
- logstash.ssl
- elasticfleet.config
- elasticfleet.sostatus
{%- if GLOBALS.role != "so-fleet" %}
- elasticfleet.manager
{%- endif %}
{% if GLOBALS.role != "so-fleet" %}
{% if grains.role not in ['so-fleet'] %}
# Wait for Elasticsearch to be ready - no reason to try running Elastic Fleet server if ES is not ready
wait_for_elasticsearch_elasticfleet:
cmd.run:
- name: so-elasticsearch-wait
{% endif %}
# If enabled, automatically update Fleet Logstash Outputs
{% if ELASTICFLEETMERGED.config.server.enable_auto_configuration and grains.role not in ['so-import', 'so-eval', 'so-fleet'] %}
so-elastic-fleet-auto-configure-logstash-outputs:
cmd.run:
- name: /usr/sbin/so-elastic-fleet-outputs-update
- retry:
attempts: 4
interval: 30
{# Separate from above in order to catch elasticfleet-logstash.crt changes and force update to fleet output policy #}
so-elastic-fleet-auto-configure-logstash-outputs-force:
cmd.run:
- name: /usr/sbin/so-elastic-fleet-outputs-update --certs
- retry:
attempts: 4
interval: 30
- onchanges:
- x509: etc_elasticfleet_logstash_crt
- x509: elasticfleet_kafka_crt
{% endif %}
# If enabled, automatically update Fleet Server URLs & ES Connection
{% if ELASTICFLEETMERGED.config.server.enable_auto_configuration and grains.role not in ['so-fleet'] %}
so-elastic-fleet-auto-configure-server-urls:
cmd.run:
- name: /usr/sbin/so-elastic-fleet-urls-update
- retry:
attempts: 4
interval: 30
{% endif %}
# Automatically update Fleet Server Elasticsearch URLs & Agent Artifact URLs
{% if grains.role not in ['so-fleet'] %}
so-elastic-fleet-auto-configure-elasticsearch-urls:
cmd.run:
- name: /usr/sbin/so-elastic-fleet-es-url-update
- retry:
attempts: 4
interval: 30
so-elastic-fleet-auto-configure-artifact-urls:
cmd.run:
- name: /usr/sbin/so-elastic-fleet-artifacts-url-update
- retry:
attempts: 4
interval: 30
{% endif %}
# Sync Elastic Agent artifacts to Fleet Node
{% if grains.role in ['so-fleet'] %}
elasticagent_syncartifacts:
file.recurse:
- name: /nsm/elastic-fleet/artifacts/beats
@@ -101,6 +149,57 @@ so-elastic-fleet:
- x509: etc_elasticfleet_crt
{% endif %}
{% if GLOBALS.role != "so-fleet" %}
so-elastic-fleet-package-statefile:
file.managed:
- name: /opt/so/state/elastic_fleet_packages.txt
- contents: {{ELASTICFLEETMERGED.packages}}
so-elastic-fleet-package-upgrade:
cmd.run:
- name: /usr/sbin/so-elastic-fleet-package-upgrade
- retry:
attempts: 3
interval: 10
- onchanges:
- file: /opt/so/state/elastic_fleet_packages.txt
so-elastic-fleet-integrations:
cmd.run:
- name: /usr/sbin/so-elastic-fleet-integration-policy-load
- retry:
attempts: 3
interval: 10
so-elastic-agent-grid-upgrade:
cmd.run:
- name: /usr/sbin/so-elastic-agent-grid-upgrade
- retry:
attempts: 12
interval: 5
so-elastic-fleet-integration-upgrade:
cmd.run:
- name: /usr/sbin/so-elastic-fleet-integration-upgrade
- retry:
attempts: 3
interval: 10
{# Optional integrations script doesn't need the retries like so-elastic-fleet-integration-upgrade which loads the default integrations #}
so-elastic-fleet-addon-integrations:
cmd.run:
- name: /usr/sbin/so-elastic-fleet-optional-integrations-load
{% if ELASTICFLEETMERGED.config.defend_filters.enable_auto_configuration %}
so-elastic-defend-manage-filters-file-watch:
cmd.run:
- name: python3 /sbin/so-elastic-defend-manage-filters.py -c /opt/so/conf/elasticsearch/curl.config -d /opt/so/conf/elastic-fleet/defend-exclusions/disabled-filters.yaml -i /nsm/securityonion-resources/event_filters/ -i /opt/so/conf/elastic-fleet/defend-exclusions/rulesets/custom-filters/ &>> /opt/so/log/elasticfleet/elastic-defend-manage-filters.log
- onchanges:
- file: elasticdefendcustom
- file: elasticdefenddisabled
{% endif %}
{% endif %}
delete_so-elastic-fleet_so-status.disabled:
file.uncomment:
- name: /opt/so/conf/so-status/so-status.conf
-101
View File
@@ -1,101 +0,0 @@
# Copyright Security Onion Solutions LLC and/or licensed to Security Onion Solutions LLC under one
# or more contributor license agreements. Licensed under the Elastic License 2.0 as shown at
# https://securityonion.net/license; you may not use this file except in compliance with the
# Elastic License 2.0.
{% from 'allowed_states.map.jinja' import allowed_states %}
{% if sls in allowed_states %}
{% from 'elasticfleet/map.jinja' import ELASTICFLEETMERGED %}
include:
- elasticfleet.config
# If enabled, automatically update Fleet Logstash Outputs
{% if ELASTICFLEETMERGED.config.server.enable_auto_configuration and grains.role not in ['so-import', 'so-eval'] %}
so-elastic-fleet-auto-configure-logstash-outputs:
cmd.run:
- name: /usr/sbin/so-elastic-fleet-outputs-update
- retry:
attempts: 4
interval: 30
{% endif %}
# If enabled, automatically update Fleet Server URLs & ES Connection
so-elastic-fleet-auto-configure-server-urls:
cmd.run:
- name: /usr/sbin/so-elastic-fleet-urls-update
- retry:
attempts: 4
interval: 30
# Automatically update Fleet Server Elasticsearch URLs & Agent Artifact URLs
so-elastic-fleet-auto-configure-elasticsearch-urls:
cmd.run:
- name: /usr/sbin/so-elastic-fleet-es-url-update
- retry:
attempts: 4
interval: 30
so-elastic-fleet-auto-configure-artifact-urls:
cmd.run:
- name: /usr/sbin/so-elastic-fleet-artifacts-url-update
- retry:
attempts: 4
interval: 30
so-elastic-fleet-package-statefile:
file.managed:
- name: /opt/so/state/elastic_fleet_packages.txt
- contents: {{ELASTICFLEETMERGED.packages}}
so-elastic-fleet-package-upgrade:
cmd.run:
- name: /usr/sbin/so-elastic-fleet-package-upgrade
- retry:
attempts: 3
interval: 10
- onchanges:
- file: /opt/so/state/elastic_fleet_packages.txt
so-elastic-fleet-integrations:
cmd.run:
- name: /usr/sbin/so-elastic-fleet-integration-policy-load
- retry:
attempts: 3
interval: 10
so-elastic-agent-grid-upgrade:
cmd.run:
- name: /usr/sbin/so-elastic-agent-grid-upgrade
- retry:
attempts: 12
interval: 5
so-elastic-fleet-integration-upgrade:
cmd.run:
- name: /usr/sbin/so-elastic-fleet-integration-upgrade
- retry:
attempts: 3
interval: 10
{# Optional integrations script doesn't need the retries like so-elastic-fleet-integration-upgrade which loads the default integrations #}
so-elastic-fleet-addon-integrations:
cmd.run:
- name: /usr/sbin/so-elastic-fleet-optional-integrations-load
{% if ELASTICFLEETMERGED.config.defend_filters.enable_auto_configuration %}
so-elastic-defend-manage-filters-file-watch:
cmd.run:
- name: python3 /sbin/so-elastic-defend-manage-filters.py -c /opt/so/conf/elasticsearch/curl.config -d /opt/so/conf/elastic-fleet/defend-exclusions/disabled-filters.yaml -i /nsm/securityonion-resources/event_filters/ -i /opt/so/conf/elastic-fleet/defend-exclusions/rulesets/custom-filters/ &>> /opt/so/log/elasticfleet/elastic-defend-manage-filters.log
- onchanges:
- file: elasticdefendcustom
- file: elasticdefenddisabled
{% endif %}
{% else %}
{{sls}}_state_not_allowed:
test.fail_without_changes:
- name: {{sls}}_state_not_allowed
{% endif %}
@@ -240,7 +240,7 @@ elastic_fleet_policy_create() {
--arg DESC "$DESC" \
--arg TIMEOUT $TIMEOUT \
--arg FLEETSERVER "$FLEETSERVER" \
'{"name": $NAME,"id":$NAME,"description":$DESC,"namespace":"default","monitoring_enabled":["logs"],"inactivity_timeout":$TIMEOUT,"has_fleet_server":$FLEETSERVER,"advanced_settings":{"agent_logging_level": "warning"}}'
'{"name": $NAME,"id":$NAME,"description":$DESC,"namespace":"default","monitoring_enabled":["logs"],"inactivity_timeout":$TIMEOUT,"has_fleet_server":$FLEETSERVER}'
)
# Create Fleet Policy
if ! fleet_api "agent_policies" -XPOST -H 'kbn-xsrf: true' -H 'Content-Type: application/json' -d "$JSON_STRING"; then
@@ -235,16 +235,6 @@ function update_kafka_outputs() {
{% endif %}
# Compare the current Elastic Fleet certificate against what is on disk
POLICY_CERT_SHA=$(jq -r '.item.ssl.certificate' <<< $RAW_JSON | openssl x509 -noout -sha256 -fingerprint)
DISK_CERT_SHA=$(openssl x509 -in /etc/pki/elasticfleet-logstash.crt -noout -sha256 -fingerprint)
if [[ "$POLICY_CERT_SHA" != "$DISK_CERT_SHA" ]]; then
printf "Certificate on disk doesn't match certificate in policy - forcing update\n"
UPDATE_CERTS=true
FORCE_UPDATE=true
fi
# Sort & hash the new list of Logstash Outputs
NEW_LIST_JSON=$(jq --compact-output --null-input '$ARGS.positional' --args -- "${NEW_LIST[@]}")
NEW_HASH=$(sha256sum <<< "$NEW_LIST_JSON" | awk '{print $1}')
+1 -1
View File
@@ -4,7 +4,7 @@
# Elastic License 2.0.
{% from 'allowed_states.map.jinja' import allowed_states %}
{% if sls in allowed_states %}
{% if sls.split('.')[0] in allowed_states %}
{% from 'vars/globals.map.jinja' import GLOBALS %}
{% from 'elasticsearch/config.map.jinja' import ELASTICSEARCHMERGED %}
{% from 'elasticsearch/template.map.jinja' import ES_INDEX_SETTINGS, SO_MANAGED_INDICES %}
+7 -6
View File
@@ -17,7 +17,7 @@ include:
- elasticsearch.ssl
- elasticsearch.config
- elasticsearch.sostatus
{%- if GLOBALS.role != "so-searchnode" %}
{%- if GLOBALS.role != 'so-searchode' %}
- elasticsearch.cluster
{%- endif%}
@@ -102,6 +102,11 @@ so-elasticsearch:
- cmd: auth_users_roles_inode
- cmd: auth_users_inode
delete_so-elasticsearch_so-status.disabled:
file.uncomment:
- name: /opt/so/conf/so-status/so-status.conf
- regex: ^so-elasticsearch$
wait_for_so-elasticsearch:
http.wait_for_successful_query:
- name: "https://localhost:9200/"
@@ -112,14 +117,10 @@ wait_for_so-elasticsearch:
- status: 200
- wait_for: 300
- request_interval: 15
- backend: requests
- require:
- docker_container: so-elasticsearch
delete_so-elasticsearch_so-status.disabled:
file.uncomment:
- name: /opt/so/conf/so-status/so-status.conf
- regex: ^so-elasticsearch$
{% else %}
{{sls}}_state_not_allowed:
@@ -103,13 +103,11 @@ load_component_templates() {
local pattern="${ELASTICSEARCH_TEMPLATES_DIR}/component/$2"
local append_mappings="${3:-"false"}"
# current state of nullglob shell option
shopt -q nullglob && nullglob_set=1 || nullglob_set=0
shopt -s nullglob
echo -e "\nLoading $printed_name component templates...\n"
if ! compgen -G "${pattern}/*.json" > /dev/null; then
echo "No $printed_name component templates found in ${pattern}, skipping."
return
fi
for component in "$pattern"/*.json; do
tmpl_name=$(basename "${component%.json}")
@@ -123,6 +121,11 @@ load_component_templates() {
SO_LOAD_FAILURES_NAMES+=("$component")
fi
done
# restore nullglob shell option if needed
if [[ $nullglob_set -eq 1 ]]; then
shopt -u nullglob
fi
}
check_elasticsearch_responsive() {
@@ -133,32 +136,7 @@ check_elasticsearch_responsive() {
fail "Elasticsearch is not responding. Please review Elasticsearch logs /opt/so/log/elasticsearch/securityonion.log for more details. Additionally, consider running so-elasticsearch-troubleshoot."
}
index_templates_exist() {
local templates_dir="$1"
if [[ ! -d "$templates_dir" ]]; then
return 1
fi
compgen -G "${templates_dir}/*.json" > /dev/null
}
should_load_addon_templates() {
if [[ "$IS_HEAVYNODE" == "true" ]]; then
return 1
fi
# Skip statefile checks when forcing template load
if [[ "$FORCE" != "true" ]]; then
if [[ ! -f "$SO_STATEFILE_SUCCESS" || -f "$ADDON_STATEFILE_SUCCESS" ]]; then
return 1
fi
fi
index_templates_exist "$ADDON_TEMPLATES_DIR"
}
if [[ "$FORCE" == "true" || ! -f "$SO_STATEFILE_SUCCESS" ]] && index_templates_exist "$SO_TEMPLATES_DIR"; then
if [[ "$FORCE" == "true" || ! -f "$SO_STATEFILE_SUCCESS" ]]; then
check_elasticsearch_responsive
if [[ "$IS_HEAVYNODE" == "false" ]]; then
@@ -223,14 +201,13 @@ if [[ "$FORCE" == "true" || ! -f "$SO_STATEFILE_SUCCESS" ]] && index_templates_e
fail "Failed to load all Security Onion core templates successfully."
fi
fi
elif ! index_templates_exist "$SO_TEMPLATES_DIR"; then
echo "No Security Onion core index templates found in ${SO_TEMPLATES_DIR}, skipping."
elif [[ -f "$SO_STATEFILE_SUCCESS" ]]; then
else
echo "Security Onion core templates already loaded"
fi
# Start loading addon templates
if should_load_addon_templates; then
if [[ (-d "$ADDON_TEMPLATES_DIR" && -f "$SO_STATEFILE_SUCCESS" && "$IS_HEAVYNODE" == "false" && ! -f "$ADDON_STATEFILE_SUCCESS") || (-d "$ADDON_TEMPLATES_DIR" && "$IS_HEAVYNODE" == "false" && "$FORCE" == "true") ]]; then
check_elasticsearch_responsive
-3
View File
@@ -11,7 +11,6 @@
'so-kratos',
'so-hydra',
'so-nginx',
'so-postgres',
'so-redis',
'so-soc',
'so-strelka-coordinator',
@@ -35,7 +34,6 @@
'so-hydra',
'so-logstash',
'so-nginx',
'so-postgres',
'so-redis',
'so-soc',
'so-strelka-coordinator',
@@ -79,7 +77,6 @@
'so-kratos',
'so-hydra',
'so-nginx',
'so-postgres',
'so-soc'
] %}
-42
View File
@@ -98,10 +98,6 @@ firewall:
tcp:
- 8086
udp: []
postgres:
tcp:
- 5432
udp: []
kafka_controller:
tcp:
- 9093
@@ -197,7 +193,6 @@ firewall:
- kibana
- redis
- influxdb
- postgres
- elasticsearch_rest
- elasticsearch_node
- localrules
@@ -384,7 +379,6 @@ firewall:
- kibana
- redis
- influxdb
- postgres
- elasticsearch_rest
- elasticsearch_node
- docker_registry
@@ -398,7 +392,6 @@ firewall:
- elasticsearch_rest
- docker_registry
- influxdb
- postgres
- sensoroni
- yum
- beats_5044
@@ -411,7 +404,6 @@ firewall:
portgroups:
- docker_registry
- influxdb
- postgres
- sensoroni
- yum
- beats_5044
@@ -429,7 +421,6 @@ firewall:
- yum
- docker_registry
- influxdb
- postgres
- sensoroni
searchnode:
portgroups:
@@ -440,7 +431,6 @@ firewall:
- yum
- docker_registry
- influxdb
- postgres
- elastic_agent_control
- elastic_agent_data
- elastic_agent_update
@@ -454,7 +444,6 @@ firewall:
- yum
- docker_registry
- influxdb
- postgres
- elastic_agent_control
- elastic_agent_data
- elastic_agent_update
@@ -464,7 +453,6 @@ firewall:
- yum
- docker_registry
- influxdb
- postgres
- elastic_agent_control
- elastic_agent_data
- elastic_agent_update
@@ -498,7 +486,6 @@ firewall:
portgroups:
- docker_registry
- influxdb
- postgres
- sensoroni
- yum
- elastic_agent_control
@@ -509,7 +496,6 @@ firewall:
- yum
- docker_registry
- influxdb
- postgres
- elastic_agent_control
- elastic_agent_data
- elastic_agent_update
@@ -604,7 +590,6 @@ firewall:
- kibana
- redis
- influxdb
- postgres
- elasticsearch_rest
- elasticsearch_node
- docker_registry
@@ -618,7 +603,6 @@ firewall:
- elasticsearch_rest
- docker_registry
- influxdb
- postgres
- sensoroni
- yum
- beats_5044
@@ -631,7 +615,6 @@ firewall:
portgroups:
- docker_registry
- influxdb
- postgres
- sensoroni
- yum
- beats_5044
@@ -649,7 +632,6 @@ firewall:
- yum
- docker_registry
- influxdb
- postgres
- sensoroni
searchnode:
portgroups:
@@ -660,7 +642,6 @@ firewall:
- yum
- docker_registry
- influxdb
- postgres
- elastic_agent_control
- elastic_agent_data
- elastic_agent_update
@@ -674,7 +655,6 @@ firewall:
- yum
- docker_registry
- influxdb
- postgres
- elastic_agent_control
- elastic_agent_data
- elastic_agent_update
@@ -684,7 +664,6 @@ firewall:
- yum
- docker_registry
- influxdb
- postgres
- elastic_agent_control
- elastic_agent_data
- elastic_agent_update
@@ -716,7 +695,6 @@ firewall:
portgroups:
- docker_registry
- influxdb
- postgres
- sensoroni
- yum
- elastic_agent_control
@@ -727,7 +705,6 @@ firewall:
- yum
- docker_registry
- influxdb
- postgres
- elastic_agent_control
- elastic_agent_data
- elastic_agent_update
@@ -822,7 +799,6 @@ firewall:
- kibana
- redis
- influxdb
- postgres
- elasticsearch_rest
- elasticsearch_node
- docker_registry
@@ -836,7 +812,6 @@ firewall:
- elasticsearch_rest
- docker_registry
- influxdb
- postgres
- sensoroni
- yum
- beats_5044
@@ -849,7 +824,6 @@ firewall:
portgroups:
- docker_registry
- influxdb
- postgres
- sensoroni
- yum
- beats_5044
@@ -867,7 +841,6 @@ firewall:
- yum
- docker_registry
- influxdb
- postgres
- sensoroni
searchnode:
portgroups:
@@ -877,7 +850,6 @@ firewall:
- yum
- docker_registry
- influxdb
- postgres
- elastic_agent_control
- elastic_agent_data
- elastic_agent_update
@@ -890,7 +862,6 @@ firewall:
- yum
- docker_registry
- influxdb
- postgres
- elastic_agent_control
- elastic_agent_data
- elastic_agent_update
@@ -900,7 +871,6 @@ firewall:
- yum
- docker_registry
- influxdb
- postgres
- elastic_agent_control
- elastic_agent_data
- elastic_agent_update
@@ -934,7 +904,6 @@ firewall:
portgroups:
- docker_registry
- influxdb
- postgres
- sensoroni
- yum
- elastic_agent_control
@@ -945,7 +914,6 @@ firewall:
- yum
- docker_registry
- influxdb
- postgres
- elastic_agent_control
- elastic_agent_data
- elastic_agent_update
@@ -1043,7 +1011,6 @@ firewall:
- kibana
- redis
- influxdb
- postgres
- elasticsearch_rest
- elasticsearch_node
- docker_registry
@@ -1064,7 +1031,6 @@ firewall:
- elasticsearch_rest
- docker_registry
- influxdb
- postgres
- sensoroni
- yum
- beats_5044
@@ -1077,7 +1043,6 @@ firewall:
portgroups:
- docker_registry
- influxdb
- postgres
- sensoroni
- yum
- beats_5044
@@ -1089,7 +1054,6 @@ firewall:
portgroups:
- docker_registry
- influxdb
- postgres
- sensoroni
- yum
- beats_5044
@@ -1101,7 +1065,6 @@ firewall:
portgroups:
- docker_registry
- influxdb
- postgres
- sensoroni
- yum
- redis
@@ -1111,7 +1074,6 @@ firewall:
portgroups:
- docker_registry
- influxdb
- postgres
- sensoroni
- yum
- redis
@@ -1122,7 +1084,6 @@ firewall:
- yum
- docker_registry
- influxdb
- postgres
- elastic_agent_control
- elastic_agent_data
- elastic_agent_update
@@ -1159,7 +1120,6 @@ firewall:
portgroups:
- docker_registry
- influxdb
- postgres
- sensoroni
- yum
- elastic_agent_control
@@ -1170,7 +1130,6 @@ firewall:
- yum
- docker_registry
- influxdb
- postgres
- elastic_agent_control
- elastic_agent_data
- elastic_agent_update
@@ -1514,7 +1473,6 @@ firewall:
- kibana
- redis
- influxdb
- postgres
- elasticsearch_rest
- elasticsearch_node
- elastic_agent_control
+1 -1
View File
@@ -22,7 +22,7 @@ kibana:
- default
- file
migrations:
discardCorruptObjects: "9.3.3"
discardCorruptObjects: "8.18.8"
telemetry:
enabled: False
xpack:
+1 -1
View File
@@ -3,8 +3,8 @@ kratos:
description: Enables or disables the Kratos authentication system. WARNING - Disabling this process will cause the grid to malfunction. Re-enabling this setting will require manual effort via SSH.
forcedType: bool
advanced: True
readonly: True
helpLink: kratos
oidc:
enabled:
description: Set to True to enable OIDC / Single Sign-On (SSO) to SOC. Requires a valid Security Onion license key.
+1 -46
View File
@@ -273,7 +273,7 @@ function deleteMinionFiles () {
log "ERROR" "Failed to delete $PILLARFILE"
return 1
fi
rm -f $ADVPILLARFILE
if [ $? -ne 0 ]; then
log "ERROR" "Failed to delete $ADVPILLARFILE"
@@ -281,39 +281,6 @@ function deleteMinionFiles () {
fi
}
# Remove this minion's postgres Telegraf credential from the shared creds
# pillar and drop the matching role in Postgres. Always returns 0 so a dead
# or unreachable so-postgres doesn't block minion deletion — in that case we
# log a warning and leave the role behind for manual cleanup.
function remove_postgres_telegraf_from_minion() {
local MINION_SAFE
MINION_SAFE=$(echo "$MINION_ID" | tr '.-' '__' | tr '[:upper:]' '[:lower:]')
local PG_USER="so_telegraf_${MINION_SAFE}"
log "INFO" "Removing postgres telegraf cred for $MINION_ID"
so-telegraf-cred remove "$MINION_ID" >/dev/null 2>&1 || true
if docker ps --format '{{.Names}}' 2>/dev/null | grep -q '^so-postgres$'; then
if ! docker exec -i so-postgres psql -v ON_ERROR_STOP=1 -U postgres -d so_telegraf >/dev/null 2>&1 <<EOSQL
DO \$\$
BEGIN
IF EXISTS (SELECT FROM pg_catalog.pg_roles WHERE rolname = '$PG_USER') THEN
EXECUTE format('REASSIGN OWNED BY %I TO so_telegraf', '$PG_USER');
EXECUTE format('DROP OWNED BY %I', '$PG_USER');
EXECUTE format('DROP ROLE %I', '$PG_USER');
END IF;
END
\$\$;
EOSQL
then
log "WARN" "Failed to drop postgres role $PG_USER; pillar entry was removed — drop manually if the role persists"
fi
else
log "WARN" "so-postgres container is not running; skipping DB role cleanup for $PG_USER"
fi
}
# Create the minion file
function ensure_socore_ownership() {
log "INFO" "Setting socore ownership on minion files"
@@ -575,17 +542,6 @@ function add_telegraf_to_minion() {
log "ERROR" "Failed to add telegraf configuration to $PILLARFILE"
return 1
fi
# Provision the per-minion postgres Telegraf credential in the shared
# telegraf/creds.sls pillar. so-telegraf-cred is the only writer; it
# generates a password on first add and is a no-op on re-add so the cred
# is stable across repeated so-minion runs. postgres.telegraf_users on the
# manager creates/updates the DB role from the same pillar.
so-telegraf-cred add "$MINION_ID"
if [ $? -ne 0 ]; then
log "ERROR" "Failed to provision postgres telegraf cred for $MINION_ID"
return 1
fi
}
function add_influxdb_to_minion() {
@@ -1113,7 +1069,6 @@ case "$OPERATION" in
"delete")
log "INFO" "Removing minion $MINION_ID"
remove_postgres_telegraf_from_minion
deleteMinionFiles || {
log "ERROR" "Failed to delete minion files for $MINION_ID"
exit 1
-54
View File
@@ -1,54 +0,0 @@
#!/bin/bash
# Copyright Security Onion Solutions LLC and/or licensed to Security Onion Solutions LLC under one
# or more contributor license agreements. Licensed under the Elastic License 2.0 as shown at
# https://securityonion.net/license; you may not use this file except in compliance with the
# Elastic License 2.0.
# Single writer for the Telegraf Postgres credentials pillar. Thin wrapper
# around so-yaml.py that generates a password on first add and no-ops on
# re-add so the cred is stable across repeated so-minion runs.
#
# Note: so-yaml.py splits keys on '.' with no escape. SO minion ids are
# dot-free by construction (setup/so-functions:1884 takes the short_name
# before the first '.'), so using the raw minion id as the key is safe.
CREDS=/opt/so/saltstack/local/pillar/telegraf/creds.sls
usage() {
echo "Usage: $0 <add|remove> <minion_id>" >&2
exit 2
}
seed_creds_file() {
mkdir -p "$(dirname "$CREDS")" || return 1
if [[ ! -f "$CREDS" ]]; then
(umask 027 && printf 'telegraf:\n postgres_creds: {}\n' > "$CREDS") || return 1
chown socore:socore "$CREDS" 2>/dev/null || true
chmod 640 "$CREDS" || return 1
fi
}
OP=$1
MID=$2
[[ -z "$OP" || -z "$MID" ]] && usage
case "$OP" in
add)
SAFE=$(echo "$MID" | tr '.-' '__' | tr '[:upper:]' '[:lower:]')
seed_creds_file || exit 1
if so-yaml.py get -r "$CREDS" "telegraf.postgres_creds.${MID}.user" >/dev/null 2>&1; then
exit 0
fi
PASS=$(tr -dc 'A-Za-z0-9~!@#^&*()_=+[]|;:,.<>?-' < /dev/urandom | head -c 72)
so-yaml.py replace "$CREDS" "telegraf.postgres_creds.${MID}.user" "so_telegraf_${SAFE}" >/dev/null
so-yaml.py replace "$CREDS" "telegraf.postgres_creds.${MID}.pass" "$PASS" >/dev/null
;;
remove)
[[ -f "$CREDS" ]] || exit 0
so-yaml.py remove "$CREDS" "telegraf.postgres_creds.${MID}" >/dev/null 2>&1 || true
;;
*)
usage
;;
esac
+4 -12
View File
@@ -39,16 +39,9 @@ def showUsage(args):
def loadYaml(filename):
try:
with open(filename, "r") as file:
content = file.read()
return yaml.safe_load(content)
except FileNotFoundError:
print(f"File not found: {filename}", file=sys.stderr)
sys.exit(1)
except Exception as e:
print(f"Error reading file {filename}: {e}", file=sys.stderr)
sys.exit(1)
file = open(filename, "r")
content = file.read()
return yaml.safe_load(content)
def writeYaml(filename, content):
@@ -292,8 +285,7 @@ def add(args):
def removeKey(content, key):
pieces = key.split(".", 1)
if len(pieces) > 1:
if pieces[0] in content:
removeKey(content[pieces[0]], pieces[1])
removeKey(content[pieces[0]], pieces[1])
else:
content.pop(key, None)
-18
View File
@@ -973,21 +973,3 @@ class TestReplaceListObject(unittest.TestCase):
expected = "key1:\n- id: '1'\n status: updated\n- id: '2'\n status: inactive\n"
self.assertEqual(actual, expected)
class TestLoadYaml(unittest.TestCase):
def test_load_yaml_missing_file(self):
with patch('sys.exit', new=MagicMock()) as sysmock:
with patch('sys.stderr', new=StringIO()) as mock_stderr:
soyaml.loadYaml("/tmp/so-yaml_test-does-not-exist.yaml")
sysmock.assert_called_with(1)
self.assertIn("File not found:", mock_stderr.getvalue())
def test_load_yaml_read_error(self):
with patch('sys.exit', new=MagicMock()) as sysmock:
with patch('sys.stderr', new=StringIO()) as mock_stderr:
with patch('builtins.open', side_effect=PermissionError("denied")):
soyaml.loadYaml("/tmp/so-yaml_test-unreadable.yaml")
sysmock.assert_called_with(1)
self.assertIn("Error reading file", mock_stderr.getvalue())
-181
View File
@@ -485,168 +485,7 @@ elasticsearch_backup_index_templates() {
tar -czf /nsm/backup/3.0.0_elasticsearch_index_templates.tar.gz -C /opt/so/conf/elasticsearch/templates/index/ .
}
elasticfleet_set_agent_logging_level_warn() {
. /usr/sbin/so-elastic-fleet-common
local current_agent_policies
if ! current_agent_policies=$(fleet_api "agent_policies?perPage=1000"); then
echo "Warning: unable to retrieve Fleet agent policies"
return 0
fi
# Only updating policies that are within Security Onion defaults and do not already have any user configured advanced_settings.
local policies_to_update
policies_to_update=$(jq -c '
.items[]
| select(has("advanced_settings") | not)
| select(
.id == "so-grid-nodes_general"
or .id == "so-grid-nodes_heavy"
or .id == "endpoints-initial"
or (.id | startswith("FleetServer_"))
)
' <<< "$current_agent_policies")
if [[ -z "$policies_to_update" ]]; then
return 0
fi
while IFS= read -r policy; do
[[ -z "$policy" ]] && continue
local policy_id policy_name policy_namespace
policy_id=$(jq -r '.id' <<< "$policy")
policy_name=$(jq -r '.name' <<< "$policy")
policy_namespace=$(jq -r '.namespace' <<< "$policy")
local update_logging
update_logging=$(jq -n \
--arg name "$policy_name" \
--arg namespace "$policy_namespace" \
'{name: $name, namespace: $namespace, advanced_settings: {agent_logging_level: "warning"}}'
)
echo "Setting elastic agent_logging_level to warning on policy '$policy_name' ($policy_id)."
if ! fleet_api "agent_policies/$policy_id" -XPUT -H 'kbn-xsrf: true' -H 'Content-Type: application/json' -d "$update_logging" >/dev/null; then
echo " warning: failed to update agent policy '$policy_name' ($policy_id)" >&2
fi
done <<< "$policies_to_update"
}
check_transform_health_and_reauthorize() {
. /usr/sbin/so-elastic-fleet-common
echo "Checking integration transform jobs for unhealthy / unauthorized status..."
local transforms_doc stats_doc installed_doc
if ! transforms_doc=$(so-elasticsearch-query "_transform/_all?size=1000" --fail --retry 3 --retry-delay 5 2>/dev/null); then
echo "Unable to query for transform jobs, skipping reauthorization."
return 0
fi
if ! stats_doc=$(so-elasticsearch-query "_transform/_all/_stats?size=1000" --fail --retry 3 --retry-delay 5 2>/dev/null); then
echo "Unable to query for transform job stats, skipping reauthorization."
return 0
fi
if ! installed_doc=$(fleet_api "epm/packages/installed?perPage=500"); then
echo "Unable to list installed Fleet packages, skipping reauthorization."
return 0
fi
# Get all transforms that meet the following
# - unhealthy (any non-green health status)
# - metadata has run_as_kibana_system: false (this fix is specific to transforms started prior to Kibana 9.3.3)
# - are not orphaned (integration is not somehow missing/corrupt/uninstalled)
local unhealthy_transforms
unhealthy_transforms=$(jq -c -n \
--argjson t "$transforms_doc" \
--argjson s "$stats_doc" \
--argjson i "$installed_doc" '
($i.items | map({key: .name, value: .version}) | from_entries) as $pkg_ver
| ($s.transforms | map({key: .id, value: .health.status}) | from_entries) as $health
| [ $t.transforms[]
| select(._meta.run_as_kibana_system == false)
| select(($health[.id] // "unknown") != "green")
| {id, pkg: ._meta.package.name, ver: ($pkg_ver[._meta.package.name])}
]
| if length == 0 then empty else . end
| (map(select(.ver == null)) | map({orphan: .id})[]),
(map(select(.ver != null))
| group_by(.pkg)
| map({pkg: .[0].pkg, ver: .[0].ver, transformIds: map(.id)})[])
')
if [[ -z "$unhealthy_transforms" ]]; then
return 0
fi
local unhealthy_count
unhealthy_count=$(jq -s '[.[].transformIds? // empty | .[]] | length' <<< "$unhealthy_transforms")
echo "Found $unhealthy_count transform(s) needing reauthorization."
local total_failures=0
while IFS= read -r transform; do
[[ -z "$transform" ]] && continue
if jq -e 'has("orphan")' <<< "$transform" >/dev/null 2>&1; then
echo "Skipping transform not owned by any installed Fleet package: $(jq -r '.orphan' <<< "$transform")"
continue
fi
local pkg ver body resp
pkg=$(jq -r '.pkg' <<< "$transform")
ver=$(jq -r '.ver' <<< "$transform")
body=$(jq -c '{transforms: (.transformIds | map({transformId: .}))}' <<< "$transform")
echo "Reauthorizing transform(s) for ${pkg}-${ver}..."
resp=$(fleet_api "epm/packages/${pkg}/${ver}/transforms/authorize" \
-XPOST -H 'kbn-xsrf: true' -H 'Content-Type: application/json' \
-d "$body") || { echo "Could not reauthorize transform(s) for ${pkg}-${ver}"; continue; }
(( total_failures += $(jq 'map(select(.success != true)) | length' <<< "$resp" 2>/dev/null) ))
done <<< "$unhealthy_transforms"
if [[ "$total_failures" -gt 0 ]]; then
echo "Some transform(s) failed to reauthorize."
fi
}
ensure_postgres_local_pillar() {
# Postgres was added as a service after 3.0.0, so the new pillar/top.sls
# references postgres.soc_postgres / postgres.adv_postgres unconditionally.
# Managers upgrading from 3.0.0 have no /opt/so/saltstack/local/pillar/postgres/
# (make_some_dirs only runs at install time), so the stubs must be created
# here before salt-master restarts against the new top.sls.
echo "Ensuring postgres local pillar stubs exist."
local dir=/opt/so/saltstack/local/pillar/postgres
mkdir -p "$dir"
[[ -f "$dir/soc_postgres.sls" ]] || touch "$dir/soc_postgres.sls"
[[ -f "$dir/adv_postgres.sls" ]] || touch "$dir/adv_postgres.sls"
chown -R socore:socore "$dir"
}
ensure_postgres_secret() {
# On a fresh install, generate_passwords + secrets_pillar seed
# secrets:postgres_pass in /opt/so/saltstack/local/pillar/secrets.sls. That
# code path is skipped on upgrade (secrets.sls already exists from 3.0.0
# with import_pass/influx_pass but no postgres_pass), so the postgres
# container's POSTGRES_PASSWORD_FILE and SOC's PG_ADMIN_PASS would be empty
# after highstate. Generate one now if missing.
local secrets_file=/opt/so/saltstack/local/pillar/secrets.sls
if [[ ! -f "$secrets_file" ]]; then
echo "WARNING: $secrets_file missing; skipping postgres_pass backfill."
return 0
fi
if so-yaml.py get -r "$secrets_file" secrets.postgres_pass >/dev/null 2>&1; then
echo "secrets.postgres_pass already set; leaving as-is."
return 0
fi
echo "Seeding secrets.postgres_pass in $secrets_file."
so-yaml.py add "$secrets_file" secrets.postgres_pass "$(get_random_value)"
chown socore:socore "$secrets_file"
}
up_to_3.1.0() {
ensure_postgres_local_pillar
ensure_postgres_secret
determine_elastic_agent_upgrade
elasticsearch_backup_index_templates
# Clear existing component template state file.
@@ -663,26 +502,6 @@ post_to_3.1.0() {
salt-call state.apply salt.cloud.config concurrent=True
fi
# Backfill the Telegraf creds pillar for every accepted minion. so-telegraf-cred
# add is idempotent — it no-ops when an entry already exists — so this is safe
# to run on every soup. The subsequent state.apply creates/updates the matching
# Postgres roles from the reconciled pillar.
echo "Reconciling Telegraf Postgres creds for accepted minions."
for mid in $(salt-key --out=json --list=accepted 2>/dev/null | jq -r '.minions[]?' 2>/dev/null); do
[[ -n "$mid" ]] || continue
/usr/sbin/so-telegraf-cred add "$mid" || echo " warning: so-telegraf-cred add $mid failed" >&2
done
# Run through the master (not --local) so state compilation uses the
# master's configured file_roots; the manager's /etc/salt/minion has no
# file_roots of its own and --local would fail with "No matching sls found".
salt-call state.apply postgres.telegraf_users queue=True || true
# Update default agent policies to use logging level warn.
elasticfleet_set_agent_logging_level_warn || true
# Check for unhealthy / unauthorized integration transform jobs and attempt reauthorizations
check_transform_health_and_reauthorize || true
POSTVERSION=3.1.0
}
-25
View File
@@ -25,33 +25,8 @@ manager_run_es_soc:
- salt: {{NEWNODE}}_update_mine
{% endif %}
# so-minion has already added the new minion's entry to telegraf/creds.sls
# via so-telegraf-cred before this orch fires. Reconcile the Postgres role
# on the manager so the new minion can authenticate on its first highstate,
# then refresh the minion's pillar so its telegraf.conf renders with the
# freshly-written cred.
manager_create_postgres_telegraf_role:
salt.state:
- tgt: {{ MANAGER }}
- sls:
- postgres.telegraf_users
- queue: True
- require:
- salt: {{NEWNODE}}_update_mine
{{NEWNODE}}_refresh_pillar:
salt.function:
- name: saltutil.refresh_pillar
- tgt: {{ NEWNODE }}
- kwarg:
wait: True
- require:
- salt: manager_create_postgres_telegraf_role
{{NEWNODE}}_run_highstate:
salt.state:
- tgt: {{ NEWNODE }}
- highstate: True
- queue: True
- require:
- salt: {{NEWNODE}}_refresh_pillar
-37
View File
@@ -1,37 +0,0 @@
# Copyright Security Onion Solutions LLC and/or licensed to Security Onion Solutions LLC under one
# or more contributor license agreements. Licensed under the Elastic License 2.0 as shown at
# https://securityonion.net/license; you may not use this file except in compliance with the
# Elastic License 2.0.
{% from 'allowed_states.map.jinja' import allowed_states %}
{% if sls in allowed_states %}
{% set DIGITS = "1234567890" %}
{% set LOWERCASE = "qwertyuiopasdfghjklzxcvbnm" %}
{% set UPPERCASE = "QWERTYUIOPASDFGHJKLZXCVBNM" %}
{% set SYMBOLS = "~!@#^&*()-_=+[]|;:,.<>?" %}
{% set CHARS = DIGITS~LOWERCASE~UPPERCASE~SYMBOLS %}
{% set so_postgres_user_pass = salt['pillar.get']('postgres:auth:users:so_postgres_user:pass', salt['random.get_str'](72, chars=CHARS)) %}
# Admin cred only. Per-minion Telegraf creds live in telegraf/creds.sls,
# managed by /usr/sbin/so-telegraf-cred (called from so-minion).
postgres_auth_pillar:
file.managed:
- name: /opt/so/saltstack/local/pillar/postgres/auth.sls
- mode: 640
- reload_pillar: True
- contents: |
postgres:
auth:
users:
so_postgres_user:
user: so_postgres
pass: "{{ so_postgres_user_pass }}"
- show_changes: False
{% else %}
{{sls}}_state_not_allowed:
test.fail_without_changes:
- name: {{sls}}_state_not_allowed
{% endif %}
-111
View File
@@ -1,111 +0,0 @@
# Copyright Security Onion Solutions LLC and/or licensed to Security Onion Solutions LLC under one
# or more contributor license agreements. Licensed under the Elastic License 2.0 as shown at
# https://securityonion.net/license; you may not use this file except in compliance with the
# Elastic License 2.0.
{% from 'allowed_states.map.jinja' import allowed_states %}
{% if sls.split('.')[0] in allowed_states %}
{% from 'postgres/map.jinja' import PGMERGED %}
# Postgres Setup
postgresconfdir:
file.directory:
- name: /opt/so/conf/postgres
- user: 939
- group: 939
- makedirs: True
postgressecretsdir:
file.directory:
- name: /opt/so/conf/postgres/secrets
- user: 939
- group: 939
- mode: 700
- require:
- file: postgresconfdir
postgresdatadir:
file.directory:
- name: /nsm/postgres
- user: 939
- group: 939
- makedirs: True
postgreslogdir:
file.directory:
- name: /opt/so/log/postgres
- user: 939
- group: 939
- makedirs: True
postgresinitdir:
file.directory:
- name: /opt/so/conf/postgres/init
- user: 939
- group: 939
- require:
- file: postgresconfdir
postgresinitusers:
file.managed:
- name: /opt/so/conf/postgres/init/init-users.sh
- source: salt://postgres/files/init-users.sh
- user: 939
- group: 939
- mode: 755
postgresconf:
file.managed:
- name: /opt/so/conf/postgres/postgresql.conf
- source: salt://postgres/files/postgresql.conf.jinja
- user: 939
- group: 939
- template: jinja
- defaults:
PGMERGED: {{ PGMERGED }}
postgreshba:
file.managed:
- name: /opt/so/conf/postgres/pg_hba.conf
- source: salt://postgres/files/pg_hba.conf
- user: 939
- group: 939
- mode: 640
postgres_super_secret:
file.managed:
- name: /opt/so/conf/postgres/secrets/postgres_password
- user: 939
- group: 939
- mode: 600
- contents_pillar: 'secrets:postgres_pass'
- show_changes: False
- require:
- file: postgressecretsdir
postgres_app_secret:
file.managed:
- name: /opt/so/conf/postgres/secrets/so_postgres_pass
- user: 939
- group: 939
- mode: 600
- contents_pillar: 'postgres:auth:users:so_postgres_user:pass'
- show_changes: False
- require:
- file: postgressecretsdir
postgres_sbin:
file.recurse:
- name: /usr/sbin
- source: salt://postgres/tools/sbin
- user: root
- group: root
- file_mode: 755
{% else %}
{{sls}}_state_not_allowed:
test.fail_without_changes:
- name: {{sls}}_state_not_allowed
{% endif %}
-19
View File
@@ -1,19 +0,0 @@
postgres:
enabled: True
telegraf:
retention_days: 14
config:
listen_addresses: '*'
port: 5432
max_connections: 100
shared_buffers: 256MB
ssl: 'on'
ssl_cert_file: '/conf/postgres.crt'
ssl_key_file: '/conf/postgres.key'
ssl_ca_file: '/conf/ca.crt'
hba_file: '/conf/pg_hba.conf'
log_destination: 'stderr'
logging_collector: 'off'
log_min_messages: 'warning'
shared_preload_libraries: pg_cron
cron.database_name: so_telegraf
-33
View File
@@ -1,33 +0,0 @@
# Copyright Security Onion Solutions LLC and/or licensed to Security Onion Solutions LLC under one
# or more contributor license agreements. Licensed under the Elastic License 2.0 as shown at
# https://securityonion.net/license; you may not use this file except in compliance with the
# Elastic License 2.0.
{% from 'allowed_states.map.jinja' import allowed_states %}
{% if sls.split('.')[0] in allowed_states %}
include:
- postgres.sostatus
so-postgres:
docker_container.absent:
- force: True
so-postgres_so-status.disabled:
file.comment:
- name: /opt/so/conf/so-status/so-status.conf
- regex: ^so-postgres$
so_postgres_backup:
cron.absent:
- name: /usr/sbin/so-postgres-backup > /dev/null 2>&1
- identifier: so_postgres_backup
- user: root
{% else %}
{{sls}}_state_not_allowed:
test.fail_without_changes:
- name: {{sls}}_state_not_allowed
{% endif %}
-109
View File
@@ -1,109 +0,0 @@
# Copyright Security Onion Solutions LLC and/or licensed to Security Onion Solutions LLC under one
# or more contributor license agreements. Licensed under the Elastic License 2.0 as shown at
# https://securityonion.net/license; you may not use this file except in compliance with the
# Elastic License 2.0.
{% from 'allowed_states.map.jinja' import allowed_states %}
{% if sls.split('.')[0] in allowed_states %}
{% from 'vars/globals.map.jinja' import GLOBALS %}
{% from 'docker/docker.map.jinja' import DOCKERMERGED %}
{% set SO_POSTGRES_USER = salt['pillar.get']('postgres:auth:users:so_postgres_user:user', 'so_postgres') %}
include:
- postgres.auth
- postgres.ssl
- postgres.config
- postgres.sostatus
- postgres.telegraf_users
so-postgres:
docker_container.running:
- image: {{ GLOBALS.registry_host }}:5000/{{ GLOBALS.image_repo }}/so-postgres:{{ GLOBALS.so_version }}
- hostname: so-postgres
- networks:
- sobridge:
- ipv4_address: {{ DOCKERMERGED.containers['so-postgres'].ip }}
- port_bindings:
{% for BINDING in DOCKERMERGED.containers['so-postgres'].port_bindings %}
- {{ BINDING }}
{% endfor %}
- environment:
- POSTGRES_DB=securityonion
# Passwords are delivered via mounted 0600 secret files, not plaintext env vars.
# The upstream postgres image resolves POSTGRES_PASSWORD_FILE; entrypoint.sh and
# init-users.sh resolve SO_POSTGRES_PASS_FILE the same way.
- POSTGRES_PASSWORD_FILE=/run/secrets/postgres_password
- SO_POSTGRES_USER={{ SO_POSTGRES_USER }}
- SO_POSTGRES_PASS_FILE=/run/secrets/so_postgres_pass
{% if DOCKERMERGED.containers['so-postgres'].extra_env %}
{% for XTRAENV in DOCKERMERGED.containers['so-postgres'].extra_env %}
- {{ XTRAENV }}
{% endfor %}
{% endif %}
- binds:
- /opt/so/log/postgres/:/log:rw
- /nsm/postgres:/var/lib/postgresql/data:rw
- /opt/so/conf/postgres/postgresql.conf:/conf/postgresql.conf:ro
- /opt/so/conf/postgres/pg_hba.conf:/conf/pg_hba.conf:ro
- /opt/so/conf/postgres/secrets:/run/secrets:ro
- /opt/so/conf/postgres/init/init-users.sh:/docker-entrypoint-initdb.d/init-users.sh:ro
- /etc/pki/postgres.crt:/conf/postgres.crt:ro
- /etc/pki/postgres.key:/conf/postgres.key:ro
- /etc/pki/tls/certs/intca.crt:/conf/ca.crt:ro
{% if DOCKERMERGED.containers['so-postgres'].custom_bind_mounts %}
{% for BIND in DOCKERMERGED.containers['so-postgres'].custom_bind_mounts %}
- {{ BIND }}
{% endfor %}
{% endif %}
{% if DOCKERMERGED.containers['so-postgres'].extra_hosts %}
- extra_hosts:
{% for XTRAHOST in DOCKERMERGED.containers['so-postgres'].extra_hosts %}
- {{ XTRAHOST }}
{% endfor %}
{% endif %}
{% if DOCKERMERGED.containers['so-postgres'].ulimits %}
- ulimits:
{% for ULIMIT in DOCKERMERGED.containers['so-postgres'].ulimits %}
- {{ ULIMIT.name }}={{ ULIMIT.soft }}:{{ ULIMIT.hard }}
{% endfor %}
{% endif %}
- watch:
- file: postgresconf
- file: postgreshba
- file: postgresinitusers
- file: postgres_super_secret
- file: postgres_app_secret
- x509: postgres_crt
- x509: postgres_key
- require:
- file: postgresconf
- file: postgreshba
- file: postgresinitusers
- file: postgres_super_secret
- file: postgres_app_secret
- x509: postgres_crt
- x509: postgres_key
delete_so-postgres_so-status.disabled:
file.uncomment:
- name: /opt/so/conf/so-status/so-status.conf
- regex: ^so-postgres$
so_postgres_backup:
cron.present:
- name: /usr/sbin/so-postgres-backup > /dev/null 2>&1
- identifier: so_postgres_backup
- user: root
- minute: '5'
- hour: '0'
- daymonth: '*'
- month: '*'
- dayweek: '*'
{% else %}
{{sls}}_state_not_allowed:
test.fail_without_changes:
- name: {{sls}}_state_not_allowed
{% endif %}
-34
View File
@@ -1,34 +0,0 @@
#!/bin/bash
set -e
# Create or update application user for SOC platform access
# This script runs on first database initialization via docker-entrypoint-initdb.d
# The password is properly escaped to handle special characters
if [ -z "${SO_POSTGRES_PASS:-}" ] && [ -n "${SO_POSTGRES_PASS_FILE:-}" ] && [ -r "$SO_POSTGRES_PASS_FILE" ]; then
SO_POSTGRES_PASS="$(< "$SO_POSTGRES_PASS_FILE")"
fi
psql -v ON_ERROR_STOP=1 --username "$POSTGRES_USER" --dbname "$POSTGRES_DB" <<-EOSQL
DO \$\$
BEGIN
IF NOT EXISTS (SELECT FROM pg_catalog.pg_roles WHERE rolname = '${SO_POSTGRES_USER}') THEN
EXECUTE format('CREATE ROLE %I WITH LOGIN PASSWORD %L', '${SO_POSTGRES_USER}', '${SO_POSTGRES_PASS}');
ELSE
EXECUTE format('ALTER ROLE %I WITH PASSWORD %L', '${SO_POSTGRES_USER}', '${SO_POSTGRES_PASS}');
END IF;
END
\$\$;
GRANT ALL PRIVILEGES ON DATABASE "$POSTGRES_DB" TO "$SO_POSTGRES_USER";
-- Lock the SOC database down at the connect layer; PUBLIC gets CONNECT
-- by default, which would let per-minion telegraf roles open sessions
-- here. They have no schema/table grants inside so reads fail, but
-- revoking CONNECT closes the soft edge entirely.
REVOKE CONNECT ON DATABASE "$POSTGRES_DB" FROM PUBLIC;
GRANT CONNECT ON DATABASE "$POSTGRES_DB" TO "$SO_POSTGRES_USER";
EOSQL
# Bootstrap the Telegraf metrics database. Per-minion roles + schemas are
# reconciled on every state.apply by postgres/telegraf_users.sls; this block
# only ensures the shared database exists on first initialization.
if ! psql -U "$POSTGRES_USER" -tAc "SELECT 1 FROM pg_database WHERE datname='so_telegraf'" | grep -q 1; then
psql -v ON_ERROR_STOP=1 -U "$POSTGRES_USER" -c "CREATE DATABASE so_telegraf"
fi
-16
View File
@@ -1,16 +0,0 @@
# Copyright Security Onion Solutions LLC and/or licensed to Security Onion Solutions LLC under one
# or more contributor license agreements. Licensed under the Elastic License 2.0 as shown at
# https://securityonion.net/license; you may not use this file except in compliance with the
# Elastic License 2.0.
#
# Managed by Salt — do not edit by hand.
# Client authentication config: only local (Unix socket) connections and TLS-wrapped TCP
# connections are accepted. Plain-text `host ...` lines are intentionally omitted so a
# misconfigured client with sslmode=disable cannot negotiate a cleartext session.
# Local connections (Unix socket, container-internal) use peer/trust.
local all all trust
# TCP connections MUST use TLS (hostssl) and authenticate with SCRAM.
hostssl all all 0.0.0.0/0 scram-sha-256
hostssl all all ::/0 scram-sha-256
@@ -1,8 +0,0 @@
{# Copyright Security Onion Solutions LLC and/or licensed to Security Onion Solutions LLC under one
or more contributor license agreements. Licensed under the Elastic License 2.0 as shown at
https://securityonion.net/license; you may not use this file except in compliance with the
Elastic License 2.0. #}
{% for key, value in PGMERGED.config.items() %}
{{ key }} = '{{ value | string | replace("'", "''") }}'
{% endfor %}
-13
View File
@@ -1,13 +0,0 @@
# Copyright Security Onion Solutions LLC and/or licensed to Security Onion Solutions LLC under one
# or more contributor license agreements. Licensed under the Elastic License 2.0 as shown at
# https://securityonion.net/license; you may not use this file except in compliance with the
# Elastic License 2.0.
{% from 'postgres/map.jinja' import PGMERGED %}
include:
{% if PGMERGED.enabled %}
- postgres.enabled
{% else %}
- postgres.disabled
{% endif %}
-7
View File
@@ -1,7 +0,0 @@
{# Copyright Security Onion Solutions LLC and/or licensed to Security Onion Solutions LLC under one
or more contributor license agreements. Licensed under the Elastic License 2.0 as shown at
https://securityonion.net/license; you may not use this file except in compliance with the
Elastic License 2.0. #}
{% import_yaml 'postgres/defaults.yaml' as PGDEFAULTS %}
{% set PGMERGED = salt['pillar.get']('postgres', PGDEFAULTS.postgres, merge=True) %}
-89
View File
@@ -1,89 +0,0 @@
postgres:
enabled:
description: Whether the PostgreSQL database container is enabled on this grid. Backs the assistant store and the Telegraf metrics database.
forcedType: bool
readonly: True
helpLink: influxdb
telegraf:
retention_days:
description: Number of days of Telegraf metrics to keep in the so_telegraf database. Older partitions are dropped hourly by pg_partman.
forcedType: int
helpLink: postgres
config:
max_connections:
description: Maximum number of concurrent PostgreSQL connections.
forcedType: int
global: True
helpLink: postgres
shared_buffers:
description: Amount of memory PostgreSQL uses for shared buffers (e.g. 256MB, 1GB). Raising this improves read cache hit rate at the cost of system RAM.
global: True
helpLink: postgres
log_min_messages:
description: Minimum severity of server messages written to the PostgreSQL log.
options:
- debug1
- info
- notice
- warning
- error
- log
- fatal
global: True
helpLink: postgres
listen_addresses:
description: Interfaces PostgreSQL listens on. Must remain '*' so clients on the docker bridge network can connect.
global: True
advanced: True
helpLink: postgres
port:
description: TCP port PostgreSQL listens on inside the container. Firewall rules and container port mapping assume 5432.
forcedType: int
global: True
advanced: True
helpLink: postgres
ssl:
description: Whether PostgreSQL accepts TLS connections. Must remain 'on' — pg_hba.conf requires hostssl for TCP.
global: True
advanced: True
helpLink: postgres
ssl_cert_file:
description: Path (inside the container) to the TLS server certificate. Salt-managed.
global: True
advanced: True
helpLink: postgres
ssl_key_file:
description: Path (inside the container) to the TLS server private key. Salt-managed.
global: True
advanced: True
helpLink: postgres
ssl_ca_file:
description: Path (inside the container) to the CA bundle PostgreSQL uses to verify client certificates. Salt-managed.
global: True
advanced: True
helpLink: postgres
hba_file:
description: Path (inside the container) to the pg_hba.conf authentication file. Salt-managed — edit salt/postgres/files/pg_hba.conf.
global: True
advanced: True
helpLink: postgres
log_destination:
description: Where PostgreSQL writes its server log. 'stderr' routes to the container log stream.
global: True
advanced: True
helpLink: postgres
logging_collector:
description: Whether to run a separate logging collector process. Disabled because the docker log stream already captures stderr.
global: True
advanced: True
helpLink: postgres
shared_preload_libraries:
description: Comma-separated list of extensions loaded at server start. Required for pg_cron which drives pg_partman maintenance — do not remove.
global: True
advanced: True
helpLink: postgres
cron.database_name:
description: Database pg_cron schedules jobs in. Must be so_telegraf so partman maintenance runs in the right database context.
global: True
advanced: True
helpLink: postgres
-21
View File
@@ -1,21 +0,0 @@
# Copyright Security Onion Solutions LLC and/or licensed to Security Onion Solutions LLC under one
# or more contributor license agreements. Licensed under the Elastic License 2.0 as shown at
# https://securityonion.net/license; you may not use this file except in compliance with the
# Elastic License 2.0.
{% from 'allowed_states.map.jinja' import allowed_states %}
{% if sls.split('.')[0] in allowed_states %}
append_so-postgres_so-status.conf:
file.append:
- name: /opt/so/conf/so-status/so-status.conf
- text: so-postgres
- unless: grep -q so-postgres /opt/so/conf/so-status/so-status.conf
{% else %}
{{sls}}_state_not_allowed:
test.fail_without_changes:
- name: {{sls}}_state_not_allowed
{% endif %}
-55
View File
@@ -1,55 +0,0 @@
# Copyright Security Onion Solutions LLC and/or licensed to Security Onion Solutions LLC under one
# or more contributor license agreements. Licensed under the Elastic License 2.0 as shown at
# https://securityonion.net/license; you may not use this file except in compliance with the
# Elastic License 2.0.
{% from 'allowed_states.map.jinja' import allowed_states %}
{% if sls.split('.')[0] in allowed_states %}
{% from 'vars/globals.map.jinja' import GLOBALS %}
{% from 'ca/map.jinja' import CA %}
postgres_key:
x509.private_key_managed:
- name: /etc/pki/postgres.key
- keysize: 4096
- backup: True
- new: True
{% if salt['file.file_exists']('/etc/pki/postgres.key') -%}
- prereq:
- x509: /etc/pki/postgres.crt
{%- endif %}
- retry:
attempts: 5
interval: 30
postgres_crt:
x509.certificate_managed:
- name: /etc/pki/postgres.crt
- ca_server: {{ CA.server }}
- subjectAltName: DNS:{{ GLOBALS.hostname }}, IP:{{ GLOBALS.node_ip }}
- signing_policy: postgres
- private_key: /etc/pki/postgres.key
- CN: {{ GLOBALS.hostname }}
- days_remaining: 7
- days_valid: 820
- backup: True
- timeout: 30
- retry:
attempts: 5
interval: 30
postgresKeyperms:
file.managed:
- replace: False
- name: /etc/pki/postgres.key
- mode: 400
- user: 939
- group: 939
{% else %}
{{sls}}_state_not_allowed:
test.fail_without_changes:
- name: {{sls}}_state_not_allowed
{% endif %}
-157
View File
@@ -1,157 +0,0 @@
# Copyright Security Onion Solutions LLC and/or licensed to Security Onion Solutions LLC under one
# or more contributor license agreements. Licensed under the Elastic License 2.0 as shown at
# https://securityonion.net/license; you may not use this file except in compliance with the
# Elastic License 2.0.
{% from 'allowed_states.map.jinja' import allowed_states %}
{% if sls.split('.')[0] in allowed_states %}
{% from 'vars/globals.map.jinja' import GLOBALS %}
{% from 'telegraf/map.jinja' import TELEGRAFMERGED %}
{# postgres_wait_ready below requires `docker_container: so-postgres`, which is
declared in postgres.enabled. Include it here so state.apply postgres.telegraf_users
on its own (e.g. from orch.deploy_newnode) still has that ID in scope. Salt
de-duplicates the circular include. #}
include:
- postgres.enabled
{% set TG_OUT = TELEGRAFMERGED.output | upper %}
{% if TG_OUT in ['POSTGRES', 'BOTH'] %}
# docker_container.running returns as soon as the container starts, but on
# first-init docker-entrypoint.sh starts a temporary postgres with
# `listen_addresses=''` to run /docker-entrypoint-initdb.d scripts, then
# shuts it down before exec'ing the real CMD. A default pg_isready check
# (Unix socket) passes during that ephemeral phase and races the shutdown
# with "the database system is shutting down". Checking TCP readiness on
# 127.0.0.1 only succeeds after the final postgres binds the port.
postgres_wait_ready:
cmd.run:
- name: |
for i in $(seq 1 60); do
if docker exec so-postgres pg_isready -h 127.0.0.1 -U postgres -q 2>/dev/null; then
exit 0
fi
sleep 2
done
echo "so-postgres did not accept TCP connections within 120s" >&2
exit 1
- require:
- docker_container: so-postgres
# Ensure the shared Telegraf database exists. init-users.sh only runs on a
# fresh data dir, so hosts upgraded onto an existing /nsm/postgres volume
# would otherwise never get so_telegraf.
postgres_create_telegraf_db:
cmd.run:
- name: |
if ! docker exec so-postgres psql -U postgres -tAc "SELECT 1 FROM pg_database WHERE datname='so_telegraf'" | grep -q 1; then
docker exec so-postgres psql -v ON_ERROR_STOP=1 -U postgres -c "CREATE DATABASE so_telegraf"
fi
- require:
- cmd: postgres_wait_ready
# Provision the shared group role and schema once. Every per-minion role is a
# member of so_telegraf, and each Telegraf connection does SET ROLE so_telegraf
# (via options='-c role=so_telegraf' in the connection string) so tables created
# on first write are owned by the group role and every member can INSERT/SELECT.
postgres_telegraf_group_role:
cmd.run:
- name: |
docker exec -i so-postgres psql -v ON_ERROR_STOP=1 -U postgres -d so_telegraf <<'EOSQL'
DO $$
BEGIN
IF NOT EXISTS (SELECT FROM pg_catalog.pg_roles WHERE rolname = 'so_telegraf') THEN
CREATE ROLE so_telegraf NOLOGIN;
END IF;
END
$$;
GRANT CONNECT ON DATABASE so_telegraf TO so_telegraf;
CREATE SCHEMA IF NOT EXISTS telegraf AUTHORIZATION so_telegraf;
GRANT USAGE, CREATE ON SCHEMA telegraf TO so_telegraf;
CREATE SCHEMA IF NOT EXISTS partman;
CREATE EXTENSION IF NOT EXISTS pg_partman SCHEMA partman;
CREATE EXTENSION IF NOT EXISTS pg_cron;
-- Telegraf (running as so_telegraf) calls partman.create_parent()
-- on first write of each metric, which needs USAGE on the partman
-- schema, EXECUTE on its functions/procedures, and write access to
-- partman.part_config so it can register new partitioned parents.
GRANT USAGE, CREATE ON SCHEMA partman TO so_telegraf;
GRANT SELECT, INSERT, UPDATE, DELETE ON ALL TABLES IN SCHEMA partman TO so_telegraf;
GRANT EXECUTE ON ALL FUNCTIONS IN SCHEMA partman TO so_telegraf;
GRANT EXECUTE ON ALL PROCEDURES IN SCHEMA partman TO so_telegraf;
-- partman creates per-parent template tables (partman.template_*) at
-- runtime; default privileges extend DML/sequence access to them.
ALTER DEFAULT PRIVILEGES IN SCHEMA partman
GRANT SELECT, INSERT, UPDATE, DELETE ON TABLES TO so_telegraf;
ALTER DEFAULT PRIVILEGES IN SCHEMA partman
GRANT USAGE, SELECT, UPDATE ON SEQUENCES TO so_telegraf;
-- Hourly partman maintenance. cron.schedule is idempotent by jobname.
SELECT cron.schedule(
'telegraf-partman-maintenance',
'17 * * * *',
'CALL partman.run_maintenance_proc()'
);
EOSQL
- require:
- cmd: postgres_create_telegraf_db
{% set creds = salt['pillar.get']('telegraf:postgres_creds', {}) %}
{% for mid, entry in creds.items() %}
{% if entry.get('user') and entry.get('pass') %}
{% set u = entry.user %}
{% set p = entry.pass | replace("'", "''") %}
postgres_telegraf_role_{{ u }}:
cmd.run:
- name: |
docker exec -i so-postgres psql -v ON_ERROR_STOP=1 -U postgres -d so_telegraf <<'EOSQL'
DO $$
BEGIN
IF NOT EXISTS (SELECT FROM pg_catalog.pg_roles WHERE rolname = '{{ u }}') THEN
EXECUTE format('CREATE ROLE %I WITH LOGIN PASSWORD %L', '{{ u }}', '{{ p }}');
ELSE
EXECUTE format('ALTER ROLE %I WITH PASSWORD %L', '{{ u }}', '{{ p }}');
END IF;
END
$$;
GRANT CONNECT ON DATABASE so_telegraf TO "{{ u }}";
GRANT so_telegraf TO "{{ u }}";
EOSQL
- require:
- cmd: postgres_telegraf_group_role
{% endif %}
{% endfor %}
# Reconcile partman retention from pillar. Runs after role/schema setup so
# any partitioned parents Telegraf has already created get their retention
# refreshed whenever postgres.telegraf.retention_days changes.
{% set retention = salt['pillar.get']('postgres:telegraf:retention_days', 14) | int %}
postgres_telegraf_retention_reconcile:
cmd.run:
- name: |
docker exec -i so-postgres psql -v ON_ERROR_STOP=1 -U postgres -d so_telegraf <<'EOSQL'
DO $$
BEGIN
IF EXISTS (SELECT 1 FROM pg_catalog.pg_extension WHERE extname = 'pg_partman') THEN
UPDATE partman.part_config
SET retention = '{{ retention }} days',
retention_keep_table = false
WHERE parent_table LIKE 'telegraf.%';
END IF;
END
$$;
EOSQL
- require:
- cmd: postgres_telegraf_group_role
{% endif %}
{% else %}
{{sls}}_state_not_allowed:
test.fail_without_changes:
- name: {{sls}}_state_not_allowed
{% endif %}
@@ -1,39 +0,0 @@
#!/bin/bash
#
# Copyright Security Onion Solutions LLC and/or licensed to Security Onion Solutions LLC under one
# or more contributor license agreements. Licensed under the Elastic License 2.0 as shown at
# https://securityonion.net/license; you may not use this file except in compliance with the
# Elastic License 2.0.
. /usr/sbin/so-common
# Backups contain role password hashes and full chat data; keep them 0600.
umask 0077
TODAY=$(date '+%Y_%m_%d')
BACKUPDIR=/nsm/backup
BACKUPFILE="$BACKUPDIR/so-postgres-backup-$TODAY.sql.gz"
MAXBACKUPS=7
mkdir -p $BACKUPDIR
# Skip if already backed up today
if [ -f "$BACKUPFILE" ]; then
exit 0
fi
# Skip if container isn't running
if ! docker ps --format '{{.Names}}' | grep -q '^so-postgres$'; then
exit 0
fi
# Dump all databases and roles, compress
docker exec so-postgres pg_dumpall -U postgres | gzip > "$BACKUPFILE"
# Retention cleanup
NUMBACKUPS=$(find $BACKUPDIR -type f -name "so-postgres-backup*" | wc -l)
while [ "$NUMBACKUPS" -gt "$MAXBACKUPS" ]; do
OLDEST=$(find $BACKUPDIR -type f -name "so-postgres-backup*" -printf '%T+ %p\n' | sort | head -n 1 | awk -F" " '{print $2}')
rm -f "$OLDEST"
NUMBACKUPS=$(find $BACKUPDIR -type f -name "so-postgres-backup*" | wc -l)
done
@@ -1,80 +0,0 @@
#!/bin/bash
# Copyright Security Onion Solutions LLC and/or licensed to Security Onion Solutions LLC under one
# or more contributor license agreements. Licensed under the Elastic License 2.0 as shown at
# https://securityonion.net/license; you may not use this file except in compliance with the
# Elastic License 2.0.
. /usr/sbin/so-common
usage() {
echo "Usage: $0 <operation> [args]"
echo ""
echo "Supported Operations:"
echo " sql Execute a SQL command, requires: <sql>"
echo " sqlfile Execute a SQL file, requires: <path>"
echo " shell Open an interactive psql shell"
echo " dblist List databases"
echo " userlist List database roles"
echo ""
exit 1
}
if [ $# -lt 1 ]; then
usage
fi
# Check for prerequisites
if [ "$(id -u)" -ne 0 ]; then
echo "This script must be run using sudo!"
exit 1
fi
COMMAND=$(basename $0)
OP=$1
shift
set -eo pipefail
log() {
echo -e "$(date) | $COMMAND | $@" >&2
}
so_psql() {
docker exec so-postgres psql -U postgres -d securityonion "$@"
}
case "$OP" in
sql)
[ $# -lt 1 ] && usage
so_psql -c "$1"
;;
sqlfile)
[ $# -ne 1 ] && usage
if [ ! -f "$1" ]; then
log "File not found: $1"
exit 1
fi
docker cp "$1" so-postgres:/tmp/sqlfile.sql
docker exec so-postgres psql -U postgres -d securityonion -f /tmp/sqlfile.sql
docker exec so-postgres rm -f /tmp/sqlfile.sql
;;
shell)
docker exec -it so-postgres psql -U postgres -d securityonion
;;
dblist)
so_psql -c "\l"
;;
userlist)
so_psql -c "\du"
;;
*)
usage
;;
esac
@@ -1,10 +0,0 @@
#!/bin/bash
# Copyright Security Onion Solutions LLC and/or licensed to Security Onion Solutions LLC under one
# or more contributor license agreements. Licensed under the Elastic License 2.0 as shown at
# https://securityonion.net/license; you may not use this file except in compliance with the
# Elastic License 2.0.
. /usr/sbin/so-common
/usr/sbin/so-restart postgres $1
@@ -1,10 +0,0 @@
#!/bin/bash
# Copyright Security Onion Solutions LLC and/or licensed to Security Onion Solutions LLC under one
# or more contributor license agreements. Licensed under the Elastic License 2.0 as shown at
# https://securityonion.net/license; you may not use this file except in compliance with the
# Elastic License 2.0.
. /usr/sbin/so-common
/usr/sbin/so-start postgres $1
-10
View File
@@ -1,10 +0,0 @@
#!/bin/bash
# Copyright Security Onion Solutions LLC and/or licensed to Security Onion Solutions LLC under one
# or more contributor license agreements. Licensed under the Elastic License 2.0 as shown at
# https://securityonion.net/license; you may not use this file except in compliance with the
# Elastic License 2.0.
. /usr/sbin/so-common
/usr/sbin/so-stop postgres $1
-157
View File
@@ -1,157 +0,0 @@
#!/bin/bash
# Copyright Security Onion Solutions LLC and/or licensed to Security Onion Solutions LLC under one
# or more contributor license agreements. Licensed under the Elastic License 2.0 as shown at
# https://securityonion.net/license; you may not use this file except in compliance with the
# Elastic License 2.0.
# Point-in-time host metrics from the Telegraf Postgres backend.
# Sanity-check tool for verifying metrics are landing before the grid
# dashboards consume them.
#
# Assumes Telegraf's postgresql output is configured with
# tags_as_foreign_keys = true, tags_as_jsonb = true, fields_as_jsonb = true,
# so metric tables are (time, tag_id, fields jsonb) and tag tables are
# (tag_id, tags jsonb).
. /usr/sbin/so-common
usage() {
cat <<EOF
Usage: $0 [host]
Shows the most recent CPU, memory, disk, and load metrics for each host
from the so_telegraf Postgres database. Without an argument, reports on
every host that has data. With a host, limits output to that one.
Requires: sudo, so-postgres running, telegraf.output set to
POSTGRES or BOTH.
EOF
exit 1
}
if [ "$(id -u)" -ne 0 ]; then
echo "This script must be run using sudo!"
exit 1
fi
case "${1:-}" in
-h|--help) usage ;;
esac
FILTER_HOST="${1:-}"
SCHEMA="telegraf"
# Host values are interpolated into SQL below. Hostnames are [A-Za-z0-9._-];
# any other character in a tag value or CLI arg is rejected to prevent a
# stored-tag (or CLI) → SQL injection via a compromised Telegraf writer.
HOST_RE='^[A-Za-z0-9._-]+$'
if [ -n "$FILTER_HOST" ] && ! [[ "$FILTER_HOST" =~ $HOST_RE ]]; then
echo "Invalid host filter: $FILTER_HOST" >&2
exit 1
fi
so_psql() {
docker exec so-postgres psql -U postgres -d so_telegraf -At -F $'\t' "$@"
}
if ! docker exec so-postgres psql -U postgres -lqt 2>/dev/null | cut -d\| -f1 | grep -qw so_telegraf; then
echo "Database so_telegraf not found. Is telegraf.output set to POSTGRES or BOTH?"
exit 2
fi
table_exists() {
local table="$1"
[ -n "$(so_psql -c "SELECT 1 FROM information_schema.tables WHERE table_schema='${SCHEMA}' AND table_name='${table}' LIMIT 1;")" ]
}
# Discover hosts from cpu_tag (every minion reports cpu).
if ! table_exists "cpu_tag"; then
echo "${SCHEMA}.cpu_tag not found. Has Telegraf written any rows yet?"
exit 0
fi
HOSTS=$(so_psql -c "
SELECT DISTINCT tags->>'host'
FROM \"${SCHEMA}\".cpu_tag
WHERE tags ? 'host'
ORDER BY 1;")
if [ -z "$HOSTS" ]; then
echo "No hosts found in ${SCHEMA}. Is Telegraf configured to write to Postgres?"
exit 0
fi
print_metric() {
so_psql -c "$1"
}
for host in $HOSTS; do
if ! [[ "$host" =~ $HOST_RE ]]; then
echo "Skipping host with invalid characters in tag value: $host" >&2
continue
fi
if [ -n "$FILTER_HOST" ] && [ "$host" != "$FILTER_HOST" ]; then
continue
fi
echo "===================================================================="
echo " Host: $host"
echo "===================================================================="
if table_exists "cpu"; then
print_metric "
SELECT 'cpu ' AS metric,
to_char(c.time, 'YYYY-MM-DD HH24:MI:SS') AS ts,
round((100 - (c.fields->>'usage_idle')::numeric), 1) || '% used'
FROM \"${SCHEMA}\".cpu c
JOIN \"${SCHEMA}\".cpu_tag t USING (tag_id)
WHERE t.tags->>'host' = '${host}' AND t.tags->>'cpu' = 'cpu-total'
ORDER BY c.time DESC LIMIT 1;"
fi
if table_exists "mem"; then
print_metric "
SELECT 'memory ' AS metric,
to_char(m.time, 'YYYY-MM-DD HH24:MI:SS') AS ts,
round((m.fields->>'used_percent')::numeric, 1) || '% used (' ||
pg_size_pretty((m.fields->>'used')::bigint) || ' of ' ||
pg_size_pretty((m.fields->>'total')::bigint) || ')'
FROM \"${SCHEMA}\".mem m
JOIN \"${SCHEMA}\".mem_tag t USING (tag_id)
WHERE t.tags->>'host' = '${host}'
ORDER BY m.time DESC LIMIT 1;"
fi
if table_exists "disk"; then
print_metric "
SELECT 'disk ' || rpad(t.tags->>'path', 12) AS metric,
to_char(d.time, 'YYYY-MM-DD HH24:MI:SS') AS ts,
round((d.fields->>'used_percent')::numeric, 1) || '% used (' ||
pg_size_pretty((d.fields->>'used')::bigint) || ' of ' ||
pg_size_pretty((d.fields->>'total')::bigint) || ')'
FROM \"${SCHEMA}\".disk d
JOIN \"${SCHEMA}\".disk_tag t USING (tag_id)
WHERE t.tags->>'host' = '${host}'
AND d.time = (SELECT max(d2.time)
FROM \"${SCHEMA}\".disk d2
JOIN \"${SCHEMA}\".disk_tag t2 USING (tag_id)
WHERE t2.tags->>'host' = '${host}')
ORDER BY t.tags->>'path';"
fi
if table_exists "system"; then
print_metric "
SELECT 'load ' AS metric,
to_char(s.time, 'YYYY-MM-DD HH24:MI:SS') AS ts,
(s.fields->>'load1') || ' / ' ||
(s.fields->>'load5') || ' / ' ||
(s.fields->>'load15') || ' (1/5/15m)'
FROM \"${SCHEMA}\".system s
JOIN \"${SCHEMA}\".system_tag t USING (tag_id)
WHERE t.tags->>'host' = '${host}'
ORDER BY s.time DESC LIMIT 1;"
fi
echo ""
done
+18 -53
View File
@@ -6,74 +6,39 @@
# Elastic License 2.0.
import logging
import os
import re
import shlex
import subprocess
from subprocess import call
import yaml
log = logging.getLogger(__name__)
SO_MINION = '/usr/sbin/so-minion'
_NODETYPE_RE = re.compile(r'^[A-Z][A-Z0-9_]{0,31}$')
_MINIONID_RE = re.compile(r'^[A-Za-z0-9._-]{1,253}$')
_HOSTPART_RE = re.compile(r'^[A-Za-z0-9._-]{1,253}$')
_IPV4_RE = re.compile(
r'^(?:(?:25[0-5]|2[0-4]\d|[01]?\d?\d)\.){3}'
r'(?:25[0-5]|2[0-4]\d|[01]?\d?\d)$'
)
_HEAP_RE = re.compile(r'^\d{1,6}[kKmMgG]?$')
def _check(name, value, pattern):
s = str(value)
if not pattern.match(s):
raise ValueError("sominion_setup_reactor: refusing unsafe %s=%r" % (name, value))
return s
def run():
log.info('sominion_setup_reactor: Running')
minionid = data['id']
DATA = data['data']
hv_name = DATA['HYPERVISOR_HOST']
log.info('sominion_setup_reactor: DATA: %s' % DATA)
nodetype = _check('NODETYPE', DATA['NODETYPE'], _NODETYPE_RE)
argv = [
SO_MINION,
'-o=addVM',
'-m=' + _check('minionid', minionid, _MINIONID_RE),
'-n=' + _check('MNIC', DATA['MNIC'], _HOSTPART_RE),
'-i=' + _check('MAINIP', DATA['MAINIP'], _IPV4_RE),
'-c=' + str(int(DATA['CPUCORES'])),
'-d=' + str(DATA['NODE_DESCRIPTION']),
]
# Build the base command
cmd = "NODETYPE=" + DATA['NODETYPE'] + " /usr/sbin/so-minion -o=addVM -m=" + minionid + " -n=" + DATA['MNIC'] + " -i=" + DATA['MAINIP'] + " -c=" + str(DATA['CPUCORES']) + " -d='" + DATA['NODE_DESCRIPTION'] + "'"
# Add optional arguments only if they exist in DATA
if 'CORECOUNT' in DATA:
argv.append('-C=' + str(int(DATA['CORECOUNT'])))
cmd += " -C=" + str(DATA['CORECOUNT'])
if 'INTERFACE' in DATA:
argv.append('-a=' + _check('INTERFACE', DATA['INTERFACE'], _HOSTPART_RE))
cmd += " -a=" + DATA['INTERFACE']
if 'ES_HEAP_SIZE' in DATA:
argv.append('-e=' + _check('ES_HEAP_SIZE', DATA['ES_HEAP_SIZE'], _HEAP_RE))
cmd += " -e=" + DATA['ES_HEAP_SIZE']
if 'LS_HEAP_SIZE' in DATA:
argv.append('-l=' + _check('LS_HEAP_SIZE', DATA['LS_HEAP_SIZE'], _HEAP_RE))
cmd += " -l=" + DATA['LS_HEAP_SIZE']
if 'LSHOSTNAME' in DATA:
argv.append('-L=' + _check('LSHOSTNAME', DATA['LSHOSTNAME'], _HOSTPART_RE))
env = os.environ.copy()
env['NODETYPE'] = nodetype
log.info(
'sominion_setup_reactor: argv: %s (NODETYPE=%s)',
' '.join(shlex.quote(a) for a in argv),
shlex.quote(nodetype),
)
rc = subprocess.call(argv, shell=False, env=env)
cmd += " -L=" + DATA['LSHOSTNAME']
log.info('sominion_setup_reactor: Command: %s' % cmd)
rc = call(cmd, shell=True)
log.info('sominion_setup_reactor: rc: %s' % rc)
-1
View File
@@ -3,7 +3,6 @@ soc:
description: Enables or disables SOC. WARNING - Disabling this setting is unsupported and will cause the grid to malfunction. Re-enabling this setting is a manual effort via SSH.
forcedType: bool
advanced: True
readonly: True
telemetryEnabled:
title: SOC Telemetry
description: When this setting is enabled and the grid is not in airgap mode, SOC will provide feature usage data to the Security Onion development team via Google Analytics. This data helps Security Onion developers determine which product features are being used and can also provide insight into improving the user interface. When changing this setting, wait for the grid to fully synchronize and then perform a hard browser refresh on SOC, to force the browser cache to update and reflect the new setting.
-1
View File
@@ -1,6 +1,5 @@
telegraf:
enabled: False
output: BOTH
config:
interval: '30s'
metric_batch_size: 1000
-44
View File
@@ -8,14 +8,6 @@
{%- set ZEEK_ENABLED = salt['pillar.get']('zeek:enabled', True) %}
{%- set MDENGINE = GLOBALS.md_engine %}
{%- set LOGSTASH_ENABLED = LOGSTASH_MERGED.enabled %}
{%- set TG_OUT = TELEGRAFMERGED.output | upper %}
{%- set PG_HOST = GLOBALS.manager_ip %}
{#- Per-minion telegraf creds live in the grid-wide telegraf/creds.sls pillar,
written by /usr/sbin/so-telegraf-cred on the manager. Each minion looks up
its own entry by grains.id. #}
{%- set PG_ENTRY = salt['pillar.get']('telegraf:postgres_creds:' ~ grains.id, {}) %}
{%- set PG_USER = PG_ENTRY.get('user', '') %}
{%- set PG_PASS = PG_ENTRY.get('pass', '') %}
# Global tags can be specified here in key="value" format.
[global_tags]
role = "{{ GLOBALS.role.split('-') | last }}"
@@ -80,7 +72,6 @@
# OUTPUT PLUGINS #
###############################################################################
{%- if TG_OUT in ['INFLUXDB', 'BOTH'] %}
# Configuration for sending metrics to InfluxDB
[[outputs.influxdb_v2]]
urls = ["https://{{ INFLUXDBHOST }}:8086"]
@@ -94,41 +85,6 @@
tls_key = "/etc/telegraf/telegraf.key"
## Use TLS but skip chain & host verification
# insecure_skip_verify = false
{%- endif %}
{%- if TG_OUT in ['POSTGRES', 'BOTH'] and PG_USER and PG_PASS %}
# Configuration for sending metrics to PostgreSQL.
# options='-c role=so_telegraf' makes every connection SET ROLE to the shared
# group role so tables created on first write are owned by so_telegraf, and
# all per-minion members can INSERT/SELECT them via role inheritance.
# fields_as_jsonb/tags_as_jsonb keep metric tables at a fixed column count so
# high-cardinality inputs (docker, procstat, kafka) don't blow past the
# Postgres 1600-column-per-table limit.
[[outputs.postgresql]]
connection = "host={{ PG_HOST }} port=5432 user={{ PG_USER }} password={{ PG_PASS }} dbname=so_telegraf sslmode=verify-full sslrootcert=/etc/telegraf/ca.crt options='-c role=so_telegraf'"
schema = "telegraf"
tags_as_foreign_keys = true
tags_as_jsonb = true
fields_as_jsonb = true
# Every metric table is a daily time-range partitioned parent managed by
# pg_partman. Retention drops old partitions instead of row-by-row DELETEs.
{% raw %}
# pg_partman 5.x requires the control column (time) to be NOT NULL, so
# ALTER it before create_parent(). And create_parent() splits
# p_parent_table on '.' to look up raw identifiers, so the literal must
# be 'schema.name' (not '"schema"."name"' as .table|quoteLiteral emits).
# IF NOT EXISTS keeps the three templates idempotent so a Telegraf
# restart after any DB-side surgery re-runs them safely.
create_templates = [
'''CREATE TABLE IF NOT EXISTS {{ .table }} ({{ .columns }}) PARTITION BY RANGE ("time")''',
'''ALTER TABLE {{ .table }} ALTER COLUMN "time" SET NOT NULL''',
'''SELECT partman.create_parent(p_parent_table := {{ printf "%s.%s" .table.Schema .table.Name | quoteLiteral }}, p_control := 'time', p_type := 'range', p_interval := '1 day', p_premake := 3) WHERE NOT EXISTS (SELECT 1 FROM partman.part_config WHERE parent_table = {{ printf "%s.%s" .table.Schema .table.Name | quoteLiteral }})'''
]
tag_table_create_templates = [
'''CREATE TABLE IF NOT EXISTS {{ .table }} ({{ .columns }}, PRIMARY KEY (tag_id))'''
]
{% endraw %}
{%- endif %}
###############################################################################
# PROCESSOR PLUGINS #
-9
View File
@@ -4,15 +4,6 @@ telegraf:
forcedType: bool
advanced: True
helpLink: influxdb
output:
description: Selects the backend(s) Telegraf writes metrics to. INFLUXDB keeps the current behavior; POSTGRES writes to the grid's Postgres instance; BOTH dual-writes for migration validation.
options:
- INFLUXDB
- POSTGRES
- BOTH
global: True
advanced: True
helpLink: influxdb
config:
interval:
description: Data collection interval.
-5
View File
@@ -68,7 +68,6 @@ base:
- backup.config_backup
- nginx
- influxdb
- postgres
- soc
- kratos
- hydra
@@ -96,7 +95,6 @@ base:
- backup.config_backup
- nginx
- influxdb
- postgres
- soc
- kratos
- hydra
@@ -125,7 +123,6 @@ base:
- registry
- nginx
- influxdb
- postgres
- strelka.manager
- soc
- kratos
@@ -156,7 +153,6 @@ base:
- registry
- nginx
- influxdb
- postgres
- strelka.manager
- soc
- kratos
@@ -185,7 +181,6 @@ base:
- manager
- nginx
- influxdb
- postgres
- strelka.manager
- soc
- kratos
-2
View File
@@ -1,5 +1,4 @@
{% from 'vars/elasticsearch.map.jinja' import ELASTICSEARCH_GLOBALS %}
{% from 'vars/postgres.map.jinja' import POSTGRES_GLOBALS %}
{% from 'vars/sensor.map.jinja' import SENSOR_GLOBALS %}
{% set ROLE_GLOBALS = {} %}
@@ -7,7 +6,6 @@
{% set EVAL_GLOBALS =
[
ELASTICSEARCH_GLOBALS,
POSTGRES_GLOBALS,
SENSOR_GLOBALS
]
%}
-2
View File
@@ -1,5 +1,4 @@
{% from 'vars/elasticsearch.map.jinja' import ELASTICSEARCH_GLOBALS %}
{% from 'vars/postgres.map.jinja' import POSTGRES_GLOBALS %}
{% from 'vars/sensor.map.jinja' import SENSOR_GLOBALS %}
{% set ROLE_GLOBALS = {} %}
@@ -7,7 +6,6 @@
{% set IMPORT_GLOBALS =
[
ELASTICSEARCH_GLOBALS,
POSTGRES_GLOBALS,
SENSOR_GLOBALS
]
%}
+1 -3
View File
@@ -1,14 +1,12 @@
{% from 'vars/elasticsearch.map.jinja' import ELASTICSEARCH_GLOBALS %}
{% from 'vars/logstash.map.jinja' import LOGSTASH_GLOBALS %}
{% from 'vars/postgres.map.jinja' import POSTGRES_GLOBALS %}
{% set ROLE_GLOBALS = {} %}
{% set MANAGER_GLOBALS =
[
ELASTICSEARCH_GLOBALS,
LOGSTASH_GLOBALS,
POSTGRES_GLOBALS
LOGSTASH_GLOBALS
]
%}
+1 -3
View File
@@ -1,14 +1,12 @@
{% from 'vars/elasticsearch.map.jinja' import ELASTICSEARCH_GLOBALS %}
{% from 'vars/logstash.map.jinja' import LOGSTASH_GLOBALS %}
{% from 'vars/postgres.map.jinja' import POSTGRES_GLOBALS %}
{% set ROLE_GLOBALS = {} %}
{% set MANAGERSEARCH_GLOBALS =
[
ELASTICSEARCH_GLOBALS,
LOGSTASH_GLOBALS,
POSTGRES_GLOBALS
LOGSTASH_GLOBALS
]
%}
-16
View File
@@ -1,16 +0,0 @@
{# Copyright Security Onion Solutions LLC and/or licensed to Security Onion Solutions LLC under one
or more contributor license agreements. Licensed under the Elastic License 2.0 as shown at
https://securityonion.net/license; you may not use this file except in compliance with the
Elastic License 2.0. #}
{% import 'vars/init.map.jinja' as INIT %}
{%
set POSTGRES_GLOBALS = {
'postgres': {}
}
%}
{% if salt['file.file_exists']('/opt/so/saltstack/local/pillar/postgres/auth.sls') %}
{% do POSTGRES_GLOBALS.postgres.update({'auth': INIT.PILLAR.postgres.auth}) %}
{% endif %}
-2
View File
@@ -1,6 +1,5 @@
{% from 'vars/elasticsearch.map.jinja' import ELASTICSEARCH_GLOBALS %}
{% from 'vars/logstash.map.jinja' import LOGSTASH_GLOBALS %}
{% from 'vars/postgres.map.jinja' import POSTGRES_GLOBALS %}
{% from 'vars/sensor.map.jinja' import SENSOR_GLOBALS %}
{% set ROLE_GLOBALS = {} %}
@@ -9,7 +8,6 @@
[
ELASTICSEARCH_GLOBALS,
LOGSTASH_GLOBALS,
POSTGRES_GLOBALS,
SENSOR_GLOBALS
]
%}
+49 -116
View File
@@ -202,10 +202,10 @@ check_service_status() {
systemctl status $service_name > /dev/null 2>&1
local status=$?
if [ $status -gt 0 ]; then
info "$service_name is not running"
info " $service_name is not running"
return 1;
else
info "$service_name is running"
info " $service_name is running"
return 0;
fi
@@ -745,56 +745,6 @@ configure_network_sensor() {
return $err
}
configure_management_bond() {
local bond_name="bond1"
local bond_mode=${MBOND_MODE:-active-backup}
info "Setting up $bond_name management interface with mode $bond_mode"
if [[ ${#MBNICS[@]} -eq 0 ]]; then
error "[ERROR] No management bond NICs were selected."
fail_setup
fi
nmcli -t -f NAME con show | grep -Fxq "$bond_name"
local found_int=$?
if [[ $found_int != 0 ]]; then
nmcli con add type bond ifname "$bond_name" con-name "$bond_name" mode "$bond_mode" -- \
ipv6.method ignore \
connection.autoconnect yes >> "$setup_log" 2>&1
else
nmcli con mod "$bond_name" \
bond.options "mode=$bond_mode" \
ipv6.method ignore \
connection.autoconnect yes >> "$setup_log" 2>&1
fi
local err=0
for MBNIC in "${MBNICS[@]}"; do
local slave_name="$bond_name-slave-$MBNIC"
nmcli -t -f NAME con show | grep -Fxq "$slave_name"
found_int=$?
if [[ $found_int != 0 ]]; then
nmcli con add type ethernet ifname "$MBNIC" con-name "$slave_name" master "$bond_name" -- \
connection.autoconnect yes >> "$setup_log" 2>&1
else
nmcli con mod "$slave_name" \
connection.master "$bond_name" \
connection.slave-type bond \
connection.autoconnect yes >> "$setup_log" 2>&1
fi
nmcli con up "$slave_name" >> "$setup_log" 2>&1
local ret=$?
[[ $ret -eq 0 ]] || err=$ret
done
return $err
}
configure_hyper_bridge() {
info "Setting up hypervisor bridge"
info "Checking $MNIC ipv4.method is auto or manual"
@@ -871,7 +821,6 @@ create_manager_pillars() {
soc_pillar
idh_pillar
influxdb_pillar
postgres_pillar
logrotate_pillar
patch_pillar
nginx_pillar
@@ -1049,11 +998,6 @@ filter_unused_nics() {
grep_string="$grep_string\|$BONDNIC"
done
fi
if [[ $MBNICS ]]; then
for BONDNIC in "${MBNICS[@]}"; do
grep_string="$grep_string\|$BONDNIC"
done
fi
# Finally, set filtered_nics to any NICs we aren't using (and ignore interfaces that aren't of use)
filtered_nics=$(ip link | awk -F: '$0 !~ "lo|vir|veth|br|docker|wl|^[^0-9]"{print $2}' | grep -vwe "$grep_string" | sed 's/ //g' | sed -r 's/(.*)(\.[0-9]+)@\1/\1\2/g')
@@ -1109,7 +1053,6 @@ generate_passwords(){
HYDRAKEY=$(get_random_value)
HYDRASALT=$(get_random_value)
REDISPASS=$(get_random_value)
POSTGRESPASS=$(get_random_value)
SOCSRVKEY=$(get_random_value 64)
IMPORTPASS=$(get_random_value)
}
@@ -1412,12 +1355,6 @@ influxdb_pillar() {
" token: $INFLUXTOKEN" > $local_salt_dir/pillar/influxdb/token.sls
}
postgres_pillar() {
title "Create the postgres pillar file"
touch $adv_postgres_pillar_file
touch $postgres_pillar_file
}
make_some_dirs() {
mkdir -p /nsm
mkdir -p "$default_salt_dir"
@@ -1427,7 +1364,7 @@ make_some_dirs() {
mkdir -p $local_salt_dir/salt/firewall/portgroups
mkdir -p $local_salt_dir/salt/firewall/ports
for THEDIR in bpf elasticsearch ntp firewall redis backup influxdb postgres strelka sensoroni soc docker zeek suricata nginx telegraf logstash soc manager kratos hydra idh elastalert stig global kafka versionlock hypervisor vm; do
for THEDIR in bpf elasticsearch ntp firewall redis backup influxdb strelka sensoroni soc docker zeek suricata nginx telegraf logstash soc manager kratos hydra idh elastalert stig global kafka versionlock hypervisor vm; do
mkdir -p $local_salt_dir/pillar/$THEDIR
touch $local_salt_dir/pillar/$THEDIR/adv_$THEDIR.sls
touch $local_salt_dir/pillar/$THEDIR/soc_$THEDIR.sls
@@ -1443,7 +1380,7 @@ network_init() {
title "Initializing Network"
disable_ipv6
set_hostname
if [[ $is_iso || $is_desktop_iso ]]; then
if [[ ( $is_iso || $is_desktop_iso ) ]]; then
set_management_interface
fi
}
@@ -1604,10 +1541,28 @@ clear_previous_setup_results() {
reinstall_init() {
info "Putting system in state to run setup again"
# Always include both services. check_service_status skips units that aren't present.
local salt_services=( "salt-master" "salt-minion" )
if [[ $install_type =~ ^(MANAGER|EVAL|MANAGERSEARCH|MANAGERHYPE|STANDALONE|FLEET|IMPORT)$ ]]; then
local salt_services=( "salt-master" "salt-minion" )
else
local salt_services=( "salt-minion" )
fi
local service_retry_count=20
{
# Snapshot pre-reinstall salt state before any destructive step so a
# failed reinstall leaves a usable post-mortem in the setup log.
echo "=== pre-reinstall salt diagnostic $(date -Iseconds) ==="
systemctl status salt-master --no-pager 2>&1 | head -40 || true
systemctl status salt-minion --no-pager 2>&1 | head -40 || true
journalctl -u salt-master --no-pager --since "-10 minutes" 2>&1 | tail -80 || true
journalctl -u salt-minion --no-pager --since "-10 minutes" 2>&1 | tail -80 || true
ls -laR /etc/salt 2>&1 | head -60 || true
ls -la /var/cache/salt 2>&1 | head -40 || true
[[ -f /etc/salt/master.rpmnew ]] && diff -u /etc/salt/master /etc/salt/master.rpmnew 2>&1 | head -80 || true
[[ -f /etc/salt/minion.rpmnew ]] && diff -u /etc/salt/minion /etc/salt/minion.rpmnew 2>&1 | head -40 || true
echo "=== end diagnostic ==="
# remove all of root's cronjobs
crontab -r -u root
@@ -1621,51 +1576,31 @@ reinstall_init() {
salt-call state.apply ca.remove -linfo --local --file-root=../salt
# Stop salt services and force-kill any lingering salt processes (including orphans
# from an earlier reinstall attempt where the unit file is gone but processes survive)
# so dnf remove salt can run cleanly
# Kill any salt processes (safely)
for service in "${salt_services[@]}"; do
# Stop the service in the background so we can exit after a certain amount of time
if check_service_status "$service"; then
info "Stopping $service via systemctl"
systemctl stop "$service"
systemctl stop "$service" &
fi
local pid=$!
local count=0
while check_service_status "$service"; do
if [[ $count -gt $service_retry_count ]]; then
echo "Could not stop $service after 1 minute, exiting setup."
# Stop the systemctl process trying to kill the service, show user a message, then exit setup
kill -9 $pid
fail_setup
fi
sleep 5
((count++))
done
done
# Unconditionally force-kill any remaining salt binaries — these may be orphaned
# from a prior aborted reinstall (no unit file, so systemctl can't see them).
for salt_bin in salt-master salt-minion salt-call salt-cloud; do
if pgrep -f "/usr/bin/${salt_bin}" > /dev/null 2>&1; then
info "Force-killing lingering $salt_bin processes"
pkill -9 -ef "/usr/bin/${salt_bin}" 2>/dev/null
fi
done
# Catch stray `salt` CLI children from saltutil.kill_all_jobs / state.apply invocations
pkill -9 -ef "/usr/bin/python3 /bin/salt" 2>/dev/null
# Give the kernel a moment to reap the killed processes before dnf removes the binaries
local kill_wait=0
while pgrep -f "/usr/bin/salt-" > /dev/null 2>&1; do
if [[ $kill_wait -gt 10 ]]; then
info "Salt processes still present after SIGKILL + 10s wait; proceeding anyway"
pgrep -af "/usr/bin/salt-" | while read -r line; do info " lingering: $line"; done
break
fi
sleep 1
((kill_wait++))
done
# Clear the 'failed' state SIGKILL left on the units before removing the package
systemctl reset-failed salt-master.service salt-minion.service 2>/dev/null || true
# Remove all salt configs
# Uninstall salt so configs and directories are removed and reinstall reconfigures directories
dnf -y remove salt
rm -rf /etc/salt/ /var/cache/salt/
# Drop systemd's in-memory references to the now-removed units
systemctl daemon-reload
# Uninstall local Elastic Agent, if installed
elastic-agent uninstall -f
if command -v docker &> /dev/null; then
# Stop and remove all so-* containers so files can be changed with more safety
@@ -1689,7 +1624,10 @@ reinstall_init() {
backup_dir /nsm/hydra "$date_string"
backup_dir /nsm/influxdb "$date_string"
} 2>&1 | tee -a "$setup_log"
# Uninstall local Elastic Agent, if installed
elastic-agent uninstall -f
} >> "$setup_log" 2>&1
info "System reinstall init has been completed."
}
@@ -1907,8 +1845,7 @@ secrets_pillar(){
printf '%s\n'\
"secrets:"\
" import_pass: $IMPORTPASS"\
" influx_pass: $INFLUXPASS"\
" postgres_pass: $POSTGRESPASS" > $local_salt_dir/pillar/secrets.sls
" influx_pass: $INFLUXPASS" > $local_salt_dir/pillar/secrets.sls
fi
}
@@ -2139,12 +2076,8 @@ set_initial_firewall_access() {
# Set up the management interface on the ISO
set_management_interface() {
title "Setting up the main interface"
if [[ $MNIC == "bond1" ]]; then
configure_management_bond || fail_setup
fi
if [ "$address_type" = 'DHCP' ]; then
logCmd "nmcli con mod $MNIC connection.autoconnect yes ipv4.method auto"
logCmd "nmcli con mod $MNIC connection.autoconnect yes"
logCmd "nmcli con up $MNIC"
logCmd "nmcli -p connection show $MNIC"
else
+12 -4
View File
@@ -219,7 +219,7 @@ if [ -n "$test_profile" ]; then
WEBUSER=onionuser@somewhere.invalid
WEBPASSWD1=0n10nus3r
WEBPASSWD2=0n10nus3r
NODE_DESCRIPTION="${HOSTNAME} - ${install_type} - ${MSRVIP_OFFSET}"
NODE_DESCRIPTION="${HOSTNAME} - ${install_type} - ${MAINIP}"
update_sudoers_for_testing
fi
@@ -724,10 +724,18 @@ if ! [[ -f $install_opt_file ]]; then
# Install salt
saltify
check_sos_appliance
# Wait for salt-master to be actually running and have its PKI
# ready after a fresh saltify. Without this, salt-key operations
# silently race the daemon and the key accept no-ops, which is
# what was causing reinstalls on 3.x to hang on state.show_top.
retry 30 2 "test -f /etc/salt/pki/master/master.pub" \
|| fail "salt-master did not initialize PKI after saltify"
check_salt_master_status \
|| fail "salt-master not accepting calls after saltify"
logCmd "salt-key -yd $MINION_ID"
sleep 2 # Debug RSA Key format errors
logCmd "salt-call state.show_top"
sleep 2 # Debug RSA Key format errors
wait_for_minion_key_pending "$MINION_ID" 30 2 \
|| fail "salt-minion never presented its key to salt-master"
logCmd "salt-key -ya $MINION_ID"
logCmd "salt-call saltutil.sync_all"
# we need to sync the runner and generate the soqemussh user keys so that first highstate after license created
-6
View File
@@ -202,12 +202,6 @@ export influxdb_pillar_file
adv_influxdb_pillar_file="$local_salt_dir/pillar/influxdb/adv_influxdb.sls"
export adv_influxdb_pillar_file
postgres_pillar_file="$local_salt_dir/pillar/postgres/soc_postgres.sls"
export postgres_pillar_file
adv_postgres_pillar_file="$local_salt_dir/pillar/postgres/adv_postgres.sls"
export adv_postgres_pillar_file
logrotate_pillar_file="$local_salt_dir/pillar/logrotate/soc_logrotate.sls"
export logrotate_pillar_file
+1 -2
View File
@@ -71,8 +71,7 @@ log_has_errors() {
grep -vE "remove_failed_vm.sls" | \
grep -vE "failed to copy: httpReadSeeker" | \
grep -vE "Error response from daemon: failed to resolve reference" | \
grep -vE "log-.*-pipeline_failed_attempts" | \
grep -vE " -v ON_ERROR_STOP=1" &> "$error_log"
grep -vE "log-.*-pipeline_failed_attempts" &> "$error_log"
if [[ $? -eq 0 ]]; then
# This function succeeds (returns 0) if errors are detected
+2 -83
View File
@@ -845,99 +845,18 @@ whiptail_management_nic() {
[ -n "$TESTING" ] && return
filter_unused_nics
local management_nic_options=( "${nic_list_management[@]}" )
if [[ $is_iso || $is_desktop_iso ]]; then
management_nic_options+=( "BOND" "Configure a bonded management interface" )
fi
MNIC=$(whiptail --title "$whiptail_title" --menu "Please select the NIC you would like to use for management.\n\nUse the arrow keys to move around and the Enter key to select." 20 75 12 "${management_nic_options[@]}" 3>&1 1>&2 2>&3 )
MNIC=$(whiptail --title "$whiptail_title" --menu "Please select the NIC you would like to use for management.\n\nUse the arrow keys to move around and the Enter key to select." 20 75 12 "${nic_list_management[@]}" 3>&1 1>&2 2>&3 )
local exitstatus=$?
whiptail_check_exitstatus $exitstatus
while [ -z "$MNIC" ]
do
MNIC=$(whiptail --title "$whiptail_title" --menu "Please select the NIC you would like to use for management.\n\nUse the arrow keys to move around and the Enter key to select." 22 75 12 "${management_nic_options[@]}" 3>&1 1>&2 2>&3 )
MNIC=$(whiptail --title "$whiptail_title" --menu "Please select the NIC you would like to use for management.\n\nUse the arrow keys to move around and the Enter key to select." 22 75 12 "${nic_list_management[@]}" 3>&1 1>&2 2>&3 )
local exitstatus=$?
whiptail_check_exitstatus $exitstatus
done
if [[ $MNIC == "BOND" ]]; then
whiptail_management_bond
fi
}
whiptail_management_bond() {
[ -n "$TESTING" ] && return
MBOND_MODE=$(whiptail --title "$whiptail_title" --menu \
"Choose the bond mode for the management interface.\n\nThe management bond will be created as bond1." 20 75 7 \
"active-backup" "One active NIC with failover (recommended)" \
"balance-rr" "Round-robin transmit policy" \
"balance-xor" "Transmit based on selected hash policy" \
"broadcast" "Transmit everything on all slave interfaces" \
"802.3ad" "Dynamic link aggregation (requires switch support)" \
"balance-tlb" "Adaptive transmit load balancing" \
"balance-alb" "Adaptive load balancing" 3>&1 1>&2 2>&3)
local exitstatus=$?
whiptail_check_exitstatus $exitstatus
while [ -z "$MBOND_MODE" ]
do
MBOND_MODE=$(whiptail --title "$whiptail_title" --menu \
"Choose the bond mode for the management interface.\n\nThe management bond will be created as bond1." 20 75 7 \
"active-backup" "One active NIC with failover (recommended)" \
"balance-rr" "Round-robin transmit policy" \
"balance-xor" "Transmit based on selected hash policy" \
"broadcast" "Transmit everything on all slave interfaces" \
"802.3ad" "Dynamic link aggregation (requires switch support)" \
"balance-tlb" "Adaptive transmit load balancing" \
"balance-alb" "Adaptive load balancing" 3>&1 1>&2 2>&3)
local exitstatus=$?
whiptail_check_exitstatus $exitstatus
done
whiptail_management_bond_nics
MNIC="bond1"
export MBOND_MODE MNIC
}
whiptail_management_bond_nics() {
[ -n "$TESTING" ] && return
MBNICS=()
filter_unused_nics
MBNICS=$(whiptail --title "$whiptail_title" --checklist "Please add NICs to the Management Interface:" 20 75 12 "${nic_list[@]}" 3>&1 1>&2 2>&3)
local exitstatus=$?
whiptail_check_exitstatus $exitstatus
while [ -z "$MBNICS" ]
do
MBNICS=$(whiptail --title "$whiptail_title" --checklist "Please add NICs to the Management Interface:" 20 75 12 "${nic_list[@]}" 3>&1 1>&2 2>&3)
local exitstatus=$?
whiptail_check_exitstatus $exitstatus
done
MBNICS=$(echo "$MBNICS" | tr -d '"')
IFS=' ' read -ra MBNICS <<< "$MBNICS"
for bond_nic in "${MBNICS[@]}"; do
for dev_status in "${nmcli_dev_status_list[@]}"; do
if [[ $dev_status == "${bond_nic}:unmanaged" ]]; then
whiptail \
--title "$whiptail_title" \
--msgbox "$bond_nic is unmanaged by Network Manager. Please remove it from other network management tools then re-run setup." \
8 75
exit
fi
done
done
export MBNICS
}
whiptail_net_method() {