diff --git a/pillar/healthcheck/eval.sls b/pillar/healthcheck/eval.sls new file mode 100644 index 000000000..09efb7ba7 --- /dev/null +++ b/pillar/healthcheck/eval.sls @@ -0,0 +1,5 @@ +healthcheck: + enabled: False + schedule: 10 + checks: + - zeek diff --git a/pillar/healthcheck/sensor.sls b/pillar/healthcheck/sensor.sls new file mode 100644 index 000000000..09efb7ba7 --- /dev/null +++ b/pillar/healthcheck/sensor.sls @@ -0,0 +1,5 @@ +healthcheck: + enabled: False + schedule: 10 + checks: + - zeek diff --git a/pillar/top.sls b/pillar/top.sls index f1cde7853..7ebd8ada2 100644 --- a/pillar/top.sls +++ b/pillar/top.sls @@ -13,6 +13,7 @@ base: - static - firewall.* - brologs + - healthcheck.sensor - minions.{{ grains.id }} '*_master or *_mastersearch': @@ -35,6 +36,7 @@ base: - auth - logstash - logstash.eval + - healthcheck.eval - minions.{{ grains.id }} '*_node': diff --git a/salt/_modules/healthcheck.py b/salt/_modules/healthcheck.py new file mode 100644 index 000000000..26e021322 --- /dev/null +++ b/salt/_modules/healthcheck.py @@ -0,0 +1,76 @@ +#!py + +import logging +import sys + +allowed_functions = ['zeek'] +states_to_apply = [] + + +def apply_states(states=''): + + calling_func = sys._getframe().f_back.f_code.co_name + logging.debug('healthcheck module: apply_states function caller: %s' % calling_func) + + if not states: + states = ','.join(states_to_apply) + + if states: + logging.info('healthcheck module: apply_states states: %s' % str(states)) + __salt__['state.apply'](states) + + +def docker_restart(container): + + try: + stopdocker = __salt__['docker.rm'](container, 'stop=True') + except Exception as e: + logging.error('healthcheck module: %s' % e) + + +def run(checks=''): + + retval = [] + calling_func = sys._getframe().f_back.f_code.co_name + logging.debug('healthcheck module: run function caller: %s' % calling_func) + + if checks: + checks = checks.split(',') + else: + checks = __salt__['pillar.get']('healthcheck:checks', {}) + + logging.debug('healthcheck module: run checks to be run: %s' % str(checks)) + for check in checks: + if check in allowed_functions: + retval.append(check) + check = getattr(sys.modules[__name__], check) + check() + else: + logging.warning('healthcheck module: attempted to run function %s' % check) + + # If you want to apply states at the end of the run, + # be sure to append the state name to states_to_apply[] + apply_states() + + return retval + + +def zeek(): + + calling_func = sys._getframe().f_back.f_code.co_name + logging.debug('healthcheck module: zeek function caller: %s' % calling_func) + + retcode = __salt__['zeekctl.status'](verbose=False) + logging.debug('zeekctl.status retcode: %i' % retcode) + if retcode: + docker_restart('so-zeek') + states_to_apply.append('zeek') + zeek_restarted = True + else: + zeek_restarted = False + + if calling_func == 'execute': + apply_states() + + __salt__['telegraf.send']('healthcheck zeek_restarted=%s' % str(zeek_restarted)) + return 'zeek_restarted: %s' % str(zeek_restarted) diff --git a/salt/_modules/telegraf.py b/salt/_modules/telegraf.py new file mode 100644 index 000000000..ee564a44d --- /dev/null +++ b/salt/_modules/telegraf.py @@ -0,0 +1,16 @@ +#!py + +import logging +import socket + + +def send(data): + + mainint = __salt__['pillar.get']('node:mainint') + mainip = __salt__['grains.get']('ip_interfaces').get(mainint)[0] + dstport = 8094 + + sock = socket.socket(socket.AF_INET, socket.SOCK_DGRAM) + sent = sock.sendto(data.encode('utf-8'), (mainip, dstport)) + + return sent diff --git a/salt/_modules/zeekctl.py b/salt/_modules/zeekctl.py index e84173f50..16d6bf6f1 100644 --- a/salt/_modules/zeekctl.py +++ b/salt/_modules/zeekctl.py @@ -134,10 +134,13 @@ def start(): return retval -def status(): +def status(verbose=True): cmd = "runuser -l zeek -c '/opt/zeek/bin/zeekctl status'" retval = __salt__['docker.run']('so-zeek', cmd) + if not verbose: + retval = __context__['retcode'] + return retval diff --git a/salt/common/init.sls b/salt/common/init.sls index 8146f281b..6e8a3ea65 100644 --- a/salt/common/init.sls +++ b/salt/common/init.sls @@ -192,6 +192,8 @@ so-telegraf: - HOST_SYS=/host/sys - HOST_MOUNT_PREFIX=/host - network_mode: host + - port_bindings: + - 127.0.0.1:8094:8094 - binds: - /opt/so/log/telegraf:/var/log/telegraf:rw - /opt/so/conf/telegraf/etc/telegraf.conf:/etc/telegraf/telegraf.conf:ro @@ -313,7 +315,9 @@ grafanaconf: - source: salt://common/grafana/etc {% if salt['pillar.get']('mastertab', False) %} -{%- for SN, SNDATA in salt['pillar.get']('mastertab', {}).items() %} +{% for SN, SNDATA in salt['pillar.get']('mastertab', {}).items() %} +{% set NODETYPE = SN.split('_')|last %} +{% set SN = SN | regex_replace('_' ~ NODETYPE, '') %} dashboard-master: file.managed: - name: /opt/so/conf/grafana/grafana_dashboards/master/{{ SN }}-Master.json @@ -330,11 +334,13 @@ dashboard-master: ROOTFS: {{ SNDATA.rootfs }} NSMFS: {{ SNDATA.nsmfs }} -{%- endfor %} +{% endfor %} {% endif %} {% if salt['pillar.get']('sensorstab', False) %} -{%- for SN, SNDATA in salt['pillar.get']('sensorstab', {}).items() %} +{% for SN, SNDATA in salt['pillar.get']('sensorstab', {}).items() %} +{% set NODETYPE = SN.split('_')|last %} +{% set SN = SN | regex_replace('_' ~ NODETYPE, '') %} dashboard-{{ SN }}: file.managed: - name: /opt/so/conf/grafana/grafana_dashboards/forward_nodes/{{ SN }}-Sensor.json @@ -355,7 +361,9 @@ dashboard-{{ SN }}: {% endif %} {% if salt['pillar.get']('nodestab', False) %} -{%- for SN, SNDATA in salt['pillar.get']('nodestab', {}).items() %} +{% for SN, SNDATA in salt['pillar.get']('nodestab', {}).items() %} +{% set NODETYPE = SN.split('_')|last %} +{% set SN = SN | regex_replace('_' ~ NODETYPE, '') %} dashboardsearch-{{ SN }}: file.managed: - name: /opt/so/conf/grafana/grafana_dashboards/search_nodes/{{ SN }}-Node.json @@ -376,7 +384,9 @@ dashboardsearch-{{ SN }}: {% endif %} {% if salt['pillar.get']('evaltab', False) %} -{%- for SN, SNDATA in salt['pillar.get']('evaltab', {}).items() %} +{% for SN, SNDATA in salt['pillar.get']('evaltab', {}).items() %} +{% set NODETYPE = SN.split('_')|last %} +{% set SN = SN | regex_replace('_' ~ NODETYPE, '') %} dashboard-{{ SN }}: file.managed: - name: /opt/so/conf/grafana/grafana_dashboards/eval/{{ SN }}-Node.json diff --git a/salt/common/telegraf/etc/telegraf.conf b/salt/common/telegraf/etc/telegraf.conf index b08f2aac2..5b3a9ce55 100644 --- a/salt/common/telegraf/etc/telegraf.conf +++ b/salt/common/telegraf/etc/telegraf.conf @@ -498,10 +498,10 @@ [[inputs.disk]] ## By default stats will be gathered for all mount points. ## Set mount_points will restrict the stats to only the specified mount points. - # mount_points = ["/"] + mount_points = ["/","/nsm"] ## Ignore mount points by filesystem type. - ignore_fs = ["tmpfs", "devtmpfs", "devfs", "overlay", "aufs", "squashfs"] + #ignore_fs = ["tmpfs", "devtmpfs", "devfs", "overlay", "aufs", "squashfs"] # Read metrics about disk IO by device @@ -2053,6 +2053,9 @@ # ## more about them here: # ## https://github.com/influxdata/telegraf/blob/master/docs/DATA_FORMATS_INPUT.md # # data_format = "influx" +[[inputs.socket_listener]] + service_address = "udp://:8094" + data_format = "influx" # # Statsd UDP/TCP Server diff --git a/salt/common/telegraf/scripts/influxdbsize.sh b/salt/common/telegraf/scripts/influxdbsize.sh index a469da8ae..f3b3b2a3c 100644 --- a/salt/common/telegraf/scripts/influxdbsize.sh +++ b/salt/common/telegraf/scripts/influxdbsize.sh @@ -1,5 +1,5 @@ #!/bin/bash -INFLUXSIZE=$(du -s -B1 /host/nsm/influxdb | awk {'print $1'} +INFLUXSIZE=$(du -s -B1 /host/nsm/influxdb | awk {'print $1'}) echo "influxsize bytes=$INFLUXSIZE" diff --git a/salt/healthcheck/init.sls b/salt/healthcheck/init.sls new file mode 100644 index 000000000..356b8381b --- /dev/null +++ b/salt/healthcheck/init.sls @@ -0,0 +1,25 @@ +{% set CHECKS = salt['pillar.get']('healthcheck:checks', {}) %} +{% set ENABLED = salt['pillar.get']('healthcheck:enabled', False) %} +{% set SCHEDULE = salt['pillar.get']('healthcheck:schedule', 30) %} + +{% if CHECKS and ENABLED %} + {% set STATUS = ['present','enabled'] %} +{% else %} + {% set STATUS = ['absent','disabled'] %} +nohealthchecks: + test.configurable_test_state: + - name: nohealthchecks + - changes: True + - result: True + - comment: 'No checks are enabled for the healthcheck schedule' +{% endif %} + +healthcheck_schedule_{{ STATUS[0] }}: + schedule.{{ STATUS[0] }}: + - name: healthcheck + - function: healthcheck.run + - minutes: {{ SCHEDULE }} + +healthcheck_schedule_{{ STATUS[1] }}: + schedule.{{ STATUS[1] }}: + - name: healthcheck diff --git a/salt/top.sls b/salt/top.sls index 5026caffd..10ef82f9a 100644 --- a/salt/top.sls +++ b/salt/top.sls @@ -35,6 +35,7 @@ base: - firewall - pcap - suricata + - healthcheck {%- if BROVER != 'SURICATA' %} - zeek {%- endif %} @@ -55,6 +56,7 @@ base: - firewall - idstools - auth + - healthcheck {%- if FLEETMASTER or FLEETNODE %} - mysql {%- endif %}