[Nagiosplug-devel] check_sensors rework
Mikael Lammentausta
Mikael.Lammentausta at student.savonia-amk.fi
Fri Sep 8 10:14:56 CEST 2006
The official check_sensors plugin didn't report any info other than a regexp 'ALARM' from lm_sensors. I rewrote the plugin to send data, and have been making nice plots with nagios-grapher a few days now.
This version reports cpu, motherboard and hard drive temperatures, fan speeds and voltages (in separate instances). Warning and critical level setting only affects cpu temperature and all fans.
By default this plugin should work like the earlier version, the switches modify the operation mode. See --help for more information.
--
#!/usr/bin/env bash
# gets temperature, fan speed and voltages from lm_sensors
# and smartctl for HDD temp
# Copyright (C) 2006 Mikael Lammentausta
PATH=/bin:/sbin:/usr/bin:/usr/sbin:/usr/local/bin:/usr/local/sbin:/usr/nagios/sbin:/usr/local/nagios/sbin
PROGNAME=`basename $0`
PROGPATH=`echo $0 | sed -e 's,[\\/][^\\/][^\\/]*$,,'`
REVISION=`echo '$Revision: 2.0 $' | sed -e 's/[^0-9.]//g'`
#. $PROGPATH/utils.sh
print_usage() {
echo "Usage: $PROGNAME [options]"
echo " e.g. $PROGNAME -T temperature -w 50 -c 60 -d /dev/hda,/dev/hdb"
echo
echo "Options:"
echo -e "\t --help | -h print help"
echo -e "\t --version | -V print version"
echo -e "\t --verbose | -v be verbose"
echo -e "\t --type | -T [type] "
echo -e "\t temperature check and print temperature data"
echo -e "\t voltages check and print voltage data"
echo -e "\t fan check and print fan data"
echo -e "\t -w [value] set warning value"
echo -e "\t -c [value] set critical value"
echo -e "\t -d [hdd drives] set hard drive to scan, comma-separated"
echo -e "\t -dd [driver type] set hard drive type for smartctl (default ata)"
echo
echo "If no options are given, $PROGRAME will print only status."
echo "NOTE: you can only ask one data type at once, $PROGNAME will"
echo " not output temp, voltage and fan info at the same time."
echo " Critical/warning level only watches the CPU temperature."
echo " You cannot set levels for voltages."
echo
}
print_help() {
print_revision $PROGNAME $REVISION
echo
echo "This plugin checks hardware status using the lm_sensors package."
echo
print_usage
echo
support
exit 3
}
# set defaults
check_temp=0
check_voltages=0
check_fan=0
check_smart=0
isverbose=0
hddtype="ata"
# get the data
sensordata=$(sensors 2>&1)
status=$?
# test status of lm_sensors
if [ ! "$(type -p sensors)" ]; then
echo "SENSORS UNKNOWN - command not found (did you install lmsensors?)"
exit -1
elif [ ! "$(type -p smartctl)" ] && [ ${check_smart} -eq 1 ]; then
echo "Smartctl binary not found - no data available from hard drives"
echo
check_smart=0
elif test ${status} -ne 0 ; then
echo "WARNING - sensors returned state $status"
exit -1
elif [ $check_smart -eq 1 ] && [ ! "$hdd_drive" ]; then
echo "No HDD drive defined. Use the -d switch."
exit -1
fi
# all is ok
if [ $isverbose -eq 1 ]; then
echo -e "${sensordata}"
fi
# put the multiple items given at the prompt to an array
form_hdd_array() {
IFS_bak="$IFS"
IFS=","
local drive
hdd_drives[0]="reserved"
for drive in ${1}; do
hdd_drives[${#hdd_drives[@]}]="$drive"
done
IFS="$IFS_bak"
}
# check functions
check_temp() {
cpu_temp="$(grep -i 'cpu temp' <<< "${sensordata}" | \
grep -Eo '[0-9\.]+[[:punct:]]?[ ]?[CF]+' | head -n 1)"
mobo_temp="$(grep -i 'M/B temp' <<< "${sensordata}" | \
grep -Eo '\+[0-9]*[[:punct:]]?[ ]?[CF]+' | head -n 1)"
local drive
hdd_temp[0]="reserved"
for drive in $(seq 1 1 ${#hdd_drives[@]}); do
[ -e "${hdd_drives[$drive]}" ] && \
hdd_temp[${#hdd_temp[@]}]="$(smartctl -A ${hdd_drives[$drive]} -d $hddtype | \
grep -i temperature | \
awk '{for (i=10; i<=NF; i++) printf("%s ",$i);printf ("\n")}') C"
done
}
# check fan speed
check_fan() {
fans="$(grep -i 'fan' <<< "${sensordata}" | cut -d'(' -f 1 )"
}
# check voltages
check_voltages() {
# parse all voltages nicely and put into one string
voltages="$(sensors | grep -E '[0-9]{2}\ V' | \
cut -d'(' -f 1 | sed 's/\ *//g ; s/:/\ =\ /' | \
tr '\n' '; ' )"
}
main() {
# temperature
if [ $check_temp -eq 1 ]; then
check_temp
local cpu_status=0
# if either temp exceeds critical..
[ "$critical" ] && \
if [ $(grep -Eo '[0-9]+' <<< ${cpu_temp}) -ge $critical ]; then
# [ $(grep -Eo '[0-9]+' <<< ${mobo_temp}) -ge $critical ]; then
echo -n "[CRITICAL] "
cpu_status=2
fi
# if either temp exceeds warning...
[ "$warning" ] && \
if [ $(grep -Eo '[0-9]+' <<< ${cpu_temp}) -ge $warning ]; then
# [ $(grep -Eo '[0-9]+' <<< ${mobo_temp}) -ge $warning ]; then
echo "[WARNING] "
cpu_status=1
fi
# output data
echo -n "CPU = ${cpu_temp} MOTHERBOARD = ${mobo_temp} "
for drive in $(seq 1 1 ${#hdd_drives[@]}); do
echo -n "${hdd_drives[$drive]} = ${hdd_temp[$drive]} "
done
exit $cpu_status
# fan
elif [ $check_fan -eq 1 ]; then
check_fan
local fan_status=0
local worst_state=0
IFS_bak="$IFS"
IFS=$'\n'
for fan in $fans; do
fan_status=0
fan_name="$(awk -F':' {'print $1'} <<< $fan )"
speed="$(awk /[0-9]+/{'print $2'} <<< $fan)"
# if a fan exceeds critical..
[ "$critical" ] && \
if [ $speed -le $critical ]; then
fan_status=2
fi
# if a fan exceeds warning..
[ "$warning" ] && \
if [ $speed -le $warning ]; then
fan_status=1
fi
# normal status -eq 0
[ $fan_status -eq 2 ] && echo -n "[CRITICAL]: "
[ $fan_status -eq 1 ] && echo -n "[WARNING]: "
echo -n "${fan_name} = ${speed} RPM "
# remember worst state
[ $fan_status -gt $worst_state ] && $worst_state=$fan_status
done
echo
exit $worst_state
# voltages
elif [ $check_voltages -eq 1 ]; then
# no critical/warning for voltages!
check_voltages
echo "$voltages"
exit 0
# default operation
else
if echo ${sensordata} | egrep ALARM > /dev/null; then
echo SENSOR CRITICAL - Sensor alarm detected!
exit 2
else
echo sensor ok
exit 0
fi
fi
}
# parse cmd arguments
if [ "$#" -gt 0 ]; then
while [ "$#" -gt 0 ]; do
case "$1" in
'--help'|'-h')
print_help
exit 3
;;
'--version'|'-V')
print_revision $PROGNAME $REVISION
exit 3
;;
'--verbose'|'-v')
isverbose=1
shift 1
;;
'-T'|'--type')
case $2 in
'temp'|'temperature')
check_temp=1
check_smart=1
;;
'voltages')
check_voltages=1
;;
'fan')
check_fan=1
;;
*)
echo "Unknown type!"
exit 3
;;
esac
shift 2
;;
'-c')
critical="$2"
shift 2
;;
'-w')
warning="$2"
shift 2
;;
'-d')
form_hdd_array "$2"
# drives are now in $hdd_drives[]
shift 2
;;
'-dd')
hddtype="$2"
;;
*)
echo "Unknown option!"
print_usage
exit 3
;;
esac
done
fi
main
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://nagios-plugins.org/archive/devel/attachments/20060908/32583506/attachment.html>
More information about the Devel
mailing list