From: Tom Hughes Date: Tue, 2 Apr 2019 17:27:36 +0000 (+0100) Subject: Add local implementation of cciss-vol-statusd X-Git-Url: https://git.openstreetmap.org/chef.git/commitdiff_plain/00ac1bd304754ca934889a92a75b622f1b3a40f0?ds=inline Add local implementation of cciss-vol-statusd --- diff --git a/cookbooks/hardware/recipes/default.rb b/cookbooks/hardware/recipes/default.rb index 2fa629adf..66babcc94 100644 --- a/cookbooks/hardware/recipes/default.rb +++ b/cookbooks/hardware/recipes/default.rb @@ -247,6 +247,26 @@ else end end +if status_packages.include?("cciss-vol-status") + template "/usr/local/bin/cciss-vol-statusd" do + source "cciss-vol-statusd.erb" + owner "root" + group "root" + mode 0o755 + notifies :restart, "service[cciss-vol-statusd]" + end + + systemd_service "cciss-vol-statusd" do + description "Check cciss_vol_status values in the background" + exec_start "/usr/local/bin/cciss-vol-statusd" + private_tmp true + protect_system "full" + protect_home true + no_new_privileges true + notifies :restart, "service[cciss-vol-statusd]" + end +end + ["cciss-vol-status", "mpt-status", "sas2ircu-status", "megaraid-status", "megaclisas-status", "aacraid-status"].each do |status_package| if status_packages.include?(status_package) package status_package diff --git a/cookbooks/hardware/templates/default/cciss-vol-statusd.erb b/cookbooks/hardware/templates/default/cciss-vol-statusd.erb new file mode 100755 index 000000000..8f256507f --- /dev/null +++ b/cookbooks/hardware/templates/default/cciss-vol-statusd.erb @@ -0,0 +1,77 @@ +#!/bin/sh + +NAME="cciss-vol-statusd" +STATUSFILE=/var/run/$NAME.status + +# Do not touch you can configure this in /etc/default/cciss-vol-statusd +MAILTO=root # Where to report problems +PERIOD=600 # Seconds between each check (default 10 minutes) +REMIND=7200 # Seconds between each reminder (default 2 hours) +ID=/dev/cciss/c0d0 + +[ -e /etc/default/cciss-vol-statusd ] && . /etc/default/cciss-vol-statusd + +# Gracefully exit if the package has been removed. +test -x /usr/bin/cciss_vol_status || exit 0 + +while true ; do + # Check ever $PERIOD seconds, send email on every status + # change and repeat ever $REMIND seconds if the raid is still + # bad. + if (cciss_vol_status $ID); then + BADRAID=false + else + BADRAID=true + logger -t cciss-vol-statusd "detected non-optimal RAID status" + fi + STATUSCHANGE=false + if [ true = "$BADRAID" ] ; then + # RAID not OK + (cciss_vol_status $ID) > $STATUSFILE.new + if [ ! -f $STATUSFILE ] ; then # RAID just became broken + STATUSCHANGE=true + mv $STATUSFILE.new $STATUSFILE + elif cmp -s $STATUSFILE $STATUSFILE.new ; then + # No change. Should we send reminder? + LASTTIME="`stat -c '%Z' $STATUSFILE`" + NOW="`date +%s`" + SINCELAST="`expr $NOW - $LASTTIME`" + if [ $REMIND -le "$SINCELAST" ]; then + # Time to send reminder + STATUSCHANGE=true + mv $STATUSFILE.new $STATUSFILE + else + rm $STATUSFILE.new + fi + else + STATUSCHANGE=true + mv $STATUSFILE.new $STATUSFILE + fi + else + # RAID OK + if [ -f $STATUSFILE ] ; then + rm $STATUSFILE + STATUSCHANGE=true + fi + fi + + if [ true = "$STATUSCHANGE" ]; then + hostname="`uname -n`" + ( + cat <