diff --git a/roles/common/tasks/monit.yml b/roles/common/tasks/monit.yml index 9068afe..121cb34 100644 --- a/roles/common/tasks/monit.yml +++ b/roles/common/tasks/monit.yml @@ -1,27 +1,35 @@ --- - name: Create nagios directory file: state="directory" path="/etc/nagios" - name: Create user nrpe user: name=nrpe - name: Monitoring task template: src: nrpe_local.cfg dest: /etc/nagios/nrpe_local.cfg owner: nrpe group: nrpe mode: 0640 notify: restart nrpe +- name: Memory check for nagios + template: + src: check_mem.sh + dest: /usr/local/bin/check_mem.sh + owner: nrpe + group: nrpe + mode: 0750 + - name: Remote logging with rsyslog lineinfile: dest: /etc/rsyslog.conf line: "{{ item }}" state: present with_items: - '$ModLoad imklog' - "*.* @{{ groups['monit'][0] }}:514" notify: restart rsyslog when: "'{{ inventory_hostname }}' != '{{ groups.monit[0] }}'" diff --git a/roles/common/templates/check_mem.sh b/roles/common/templates/check_mem.sh new file mode 100644 index 0000000..c83b43e --- /dev/null +++ b/roles/common/templates/check_mem.sh @@ -0,0 +1,124 @@ +#!/usr/bin/env bash + +#Set script name +SCRIPT=`basename ${BASH_SOURCE[0]}` + +#Set default values +optMW=95 +optMC=98 +optSW=95 +optSC=98 + +# help function +function printHelp { + echo -e \\n"Help for $SCRIPT"\\n + echo -e "Basic usage: $SCRIPT -w {warning} -c {critical} -W {warning} -C {critical}"\\n + echo "Command switches are optional, default values for warning is 95% and critical is 98%" + echo "-w - Sets warning value for Memory Usage. Default is 95%" + echo "-c - Sets critical value for Memory Usage. Default is 98%" + echo "-W - Sets warning value for Swap Usage. Default is 95%" + echo "-C - Sets critical value for Swap Usage. Default is 98%" + echo -e "-h - Displays this help message"\\n + echo -e "Example: $SCRIPT -w 80 -c 90 -W 40 -C 60"\\n + echo -e \\n\\n"Author: Lukasz Gogolin, lukasz.gogolin@gmail.com" + echo -e "Git: http://bitbucket.org/lgogolin/nagios_plugins" + exit 1 +} + +# regex to check is OPTARG an integer +re='^[0-9]+$' + +while getopts :w:c:W:C:h FLAG; do + case $FLAG in + w) + if ! [[ $OPTARG =~ $re ]] ; then + echo "error: Not a number" >&2; exit 1 + else + optMW=$OPTARG + fi + ;; + c) + if ! [[ $OPTARG =~ $re ]] ; then + echo "error: Not a number" >&2; exit 1 + else + optMC=$OPTARG + fi + ;; + W) + if ! [[ $OPTARG =~ $re ]] ; then + echo "error: Not a number" >&2; exit 1 + else + optSW=$OPTARG + fi + ;; + C) + if ! [[ $OPTARG =~ $re ]] ; then + echo "error: Not a number" >&2; exit 1 + else + optSC=$OPTARG + fi + ;; + h) + printHelp + ;; + \?) + echo -e \\n"Option - $OPTARG not allowed." + printHelp + exit 2 + ;; + esac +done + +shift $((OPTIND-1)) + + + + + +array=( $(cat /proc/meminfo | egrep 'MemTotal|MemFree|Buffers|Cached|SwapTotal|SwapFree' |awk '{print $1 " " $2}' |tr '\n' ' ' |tr -d ':' |awk '{ printf("%i %i %i %i %i %i %i", $2, $4, $6, $8, $10, $12, $14) }') ) + +memTotal_k=${array[0]} +memTotal_b=$(($memTotal_k*1024)) +memFree_k=${array[1]} +memFree_b=$(($memFree_k*1024)) +memBuffer_k=${array[2]} +memBuffer_b=$(($memBuffer_k*1024)) +memCache_k=${array[3]} +memCache_b=$(($memCache_k*1024)) +memTotal_m=$(($memTotal_k/1024)) +memFree_m=$(($memFree_k/1024)) +memBuffer_m=$(($memBuffer_k/1024)) +memCache_m=$(($memCache_k/1024)) +memUsed_b=$(($memTotal_b-$memFree_b-$memBuffer_b-$memCache_b)) +memUsed_m=$(($memTotal_m-$memFree_m-$memBuffer_m-$memCache_m)) +memUsedPrc=$((($memUsed_b*100)/$memTotal_b)) + +swapTotal_k=${array[5]} +swapTotal_b=$(($swapTotal_k*1024)) +swapFree_k=${array[6]} +swapFree_b=$(($swapFree_k*1024)) +swapUsed_k=$(($swapTotal_k-$swapFree_k)) +swapUsed_b=$(($swapUsed_k*1024)) +swapTotal_m=$(($swapTotal_k/1024)) +swapFree_m=$(($swapFree_k/1024)) +swapUsed_m=$(($swapTotal_m-$swapFree_m)) + +if [ $swapTotal_k -eq 0 ]; then + swapUsedPrc=0 +else + swapUsedPrc=$((($swapUsed_k*100)/$swapTotal_k)) +fi + +message="[MEMORY] Total: $memTotal_m MB - Used: $memUsed_m MB - $memUsedPrc% [SWAP] Total: $swapTotal_m MB - Used: $swapUsed_m MB - $swapUsedPrc% | MTOTAL=$memTotal_b;;;; MUSED=$memUsed_b;;;; MCACHE=$memCache_b;;;; MBUFFER=$memBuffer_b;;;; STOTAL=$swapTotal_b;;;; SUSED=$swapUsed_b;;;;" + + +if [ $memUsedPrc -ge $optMC ] || [ $swapUsedPrc -ge $optSC ]; then + echo -e $message + $(exit 2) +elif [ $memUsedPrc -ge $optMW ] || [ $swapUsedPrc -ge $optSW ]; then + echo -e $message + $(exit 1) +else + echo -e $message + $(exit 0) +fi diff --git a/roles/common/templates/nrpe_local.cfg b/roles/common/templates/nrpe_local.cfg index 7398aaf..d70a7a8 100644 --- a/roles/common/templates/nrpe_local.cfg +++ b/roles/common/templates/nrpe_local.cfg @@ -1,22 +1,22 @@ command[check_ssh]=/usr/lib64/nagios/plugins/check_ssh -H 127.0.0.1 command[check_ssh_phab]=/usr/lib64/nagios/plugins/check_ssh -H 127.0.0.1 -p {{ vcs_port }} command[check_disk_vda]=/usr/lib64/nagios/plugins/check_disk -w 20% -c 10% -p /dev/vda1 command[check_disk_vdb]=/usr/lib64/nagios/plugins/check_disk -w 20% -c 10% -p /dev/vdb command[check_disk_repo]=/usr/lib64/nagios/plugins/check_disk -X ext4 {{ repositories_path }} command[check_http_phab]=/usr/lib64/nagios/plugins/check_http -I {{ inventory_hostname }} -H {{ domain }} command[check_http_ex_phab]=/usr/lib64/nagios/plugins/check_http -H {{ domain }} command[check_http_ex_phab_ssl]=/usr/lib64/nagios/plugins/check_http -H {{ domain }} --ssl command[check_http_jenkins]=/usr/lib64/nagios/plugins/check_http -H jenkins.{{ domain }} --ssl command[check_mysql_remote]=/usr/lib64/nagios/plugins/check_mysql -H {{ hostvars['127.0.0.1']['openstackjump'].results[0]['openstack']['private_v4'] }} -u {{ mysql_app_user }} -p {{ mysql_app_pass }} command[check_mysql_local]=/usr/lib64/nagios/plugins/check_mysql -u root command[check_phd]=/usr/lib64/nagios/plugins/check_procs -C 'php' -a {{ phabricator_path }}phabricator/scripts/daemon/phd-daemon -w 5: -c 1: command[check_gluster]=/usr/lib64/nagios/plugins/check_procs -C 'glusterd' -c 1 command[check_gmond]=/usr/lib64/nagios/plugins/check_procs -C 'gmond' -c 1 command[check_gmetad]=/usr/lib64/nagios/plugins/check_procs -C 'gmetad' -c 1 command[check_httpd]=/usr/lib64/nagios/plugins/check_procs -C 'httpd' -c 1: command[check_java_jenkins]=/usr/lib64/nagios/plugins/check_procs -C 'java' -c 1 -command[check_swap]=/usr/lib64/nagios/plugins/check_swap -w 50 -c 20 command[check_shibd]=/usr/lib64/nagios/plugins/check_procs -C 'shibd' -c 1 command[check_postfix_master]=/usr/lib64/nagios/plugins/check_procs -C master -a '-w' -c 1 command[check_postfix_pickup]=/usr/lib64/nagios/plugins/check_procs -C pickup -c 1 command[check_postfix_qmgr]=/usr/lib64/nagios/plugins/check_procs -C qmgr -c 1 +command[check_mem]=/usr/local/bin/check_mem.sh -w 80 -c 90 -W 5 -C 10 diff --git a/roles/nagios/templates/services.cfg b/roles/nagios/templates/services.cfg index cf6033d..5f5996f 100644 --- a/roles/nagios/templates/services.cfg +++ b/roles/nagios/templates/services.cfg @@ -1,219 +1,219 @@ ### Common ### {% for host in groups['all'] %} {% if hostvars[host]['host_name'] is defined and hostvars[host]['host_name'] != 'c4science-ci-slave00' %} define service{ use generic-service host_name {{ hostvars[host]['host_name'] }} service_description CPU Load check_command check_nrpe!check_load } define service{ use generic-service host_name {{ hostvars[host]['host_name'] }} service_description SSH check_command check_nrpe!check_ssh } define service{ use generic-service host_name {{ hostvars[host]['host_name'] }} service_description Disk Usage vda1 check_command check_nrpe!check_disk_vda } define service{ use generic-service host_name {{ hostvars[host]['host_name'] }} - service_description Swap usage - check_command check_nrpe!check_swap + service_description Memory usage + check_command check_nrpe!check_mem } {% if hostvars[host]['host_name'] != 'c4science-backup' %} define service{ use generic-service host_name {{ hostvars[host]['host_name'] }} service_description Ganglia check_command check_nrpe!check_gmond } {% endif %} {% endif %} {% endfor %} ### APP ### {% for host in groups['app'] %} {% if hostvars[host]['host_name'] is defined %} define service{ use generic-service host_name {{ hostvars[host]['host_name'] }} service_description Disk Usage vdb check_command check_nrpe!check_disk_vdb } define service{ use generic-service host_name {{ hostvars[host]['host_name'] }} service_description Disk repo mount check_command check_nrpe!check_disk_repo } define service{ use generic-service host_name {{ hostvars[host]['host_name'] }} service_description SSH Phabricator check_command check_nrpe!check_ssh_phab } define service{ use generic-service host_name {{ hostvars[host]['host_name'] }} service_description HTTP Phabricator check_command check_nrpe!check_http_phab } define service{ use generic-service host_name {{ hostvars[host]['host_name'] }} service_description Phabricator Daemon check_command check_nrpe!check_phd } define service{ use generic-service host_name {{ hostvars[host]['host_name'] }} service_description Apache check_command check_nrpe!check_httpd } define service{ use generic-service host_name {{ hostvars[host]['host_name'] }} service_description Gluster Daemon check_command check_nrpe!check_gluster } define service{ use generic-service host_name {{ hostvars[host]['host_name'] }} service_description MySQL Remote check_command check_nrpe!check_mysql_remote } define service{ use generic-service host_name {{ hostvars[host]['host_name'] }} service_description Shibboleth daemon check_command check_nrpe!check_shibd } define service{ use generic-service host_name {{ hostvars[host]['host_name'] }} service_description Postfix Mailer check_command check_nrpe!check_postfix_master } define service{ use generic-service host_name {{ hostvars[host]['host_name'] }} service_description Postfix Pickup check_command check_nrpe!check_postfix_pickup } define service{ use generic-service host_name {{ hostvars[host]['host_name'] }} service_description Postfix Queue check_command check_nrpe!check_postfix_qmgr } {% endif %} {% endfor %} ### LOAD BALANCER ### {% for host in groups['lbs'] %} {% if hostvars[host]['host_name'] is defined %} define service{ use generic-service host_name {{ hostvars[host]['host_name'] }} service_description HTTP Phabricator check_command check_nrpe!check_http_ex_phab } define service{ use generic-service host_name {{ hostvars[host]['host_name'] }} service_description HTTP Phabricator SSL check_command check_nrpe!check_http_ex_phab_ssl } define service{ use generic-service host_name {{ hostvars[host]['host_name'] }} service_description HTTP Jenkins check_command check_nrpe!check_http_jenkins } {% endif %} {% endfor %} ### CI ### {% for host in groups['ci'] %} {% if hostvars[host]['host_name'] is defined %} define service{ use generic-service host_name {{ hostvars[host]['host_name'] }} service_description HTTP Jenkins check_command check_nrpe!check_http_jenkins } define service{ use generic-service host_name {{ hostvars[host]['host_name'] }} service_description Java Jenkins check_command check_nrpe!check_java_jenkins } {% endif %} {% endfor %} ### MONIT ### {% for host in groups['monit'] %} {% if hostvars[host]['host_name'] is defined %} define service{ use generic-service host_name {{ hostvars[host]['host_name'] }} service_description Ganglia server check_command check_nrpe!check_gmetad } define service{ use generic-service host_name {{ hostvars[host]['host_name'] }} service_description Apache check_command check_nrpe!check_httpd } {% endif %} {% endfor %} ### Databases ### {% for host in groups['dbs'] %} {% if hostvars[host]['host_name'] is defined %} define service{ use generic-service host_name {{ hostvars[host]['host_name'] }} service_description MySQL check_command check_nrpe!check_mysql_local } define service{ use generic-service host_name {{ hostvars[host]['host_name'] }} service_description Disk Usage vdb check_command check_nrpe!check_disk_vdb } {% endif %} {% endfor %}