New path for Xymon

This commit is contained in:
gardouille 2024-12-01 16:50:27 +01:00
parent 20581a9fa0
commit eed91835a4
Signed by: gardouille
GPG Key ID: E759BAA22501AF32
22 changed files with 1988 additions and 0 deletions

23
xymon/get.xymon.alert.vars.sh Executable file
View File

@ -0,0 +1,23 @@
#!/bin/sh
log_file="/tmp/xymon.alert.${BBHOSTSVC}.vars"
# Print variables for an alert on a specific host
rm -f -- "${log_file}"
touch "${log_file}"
printf '%b' "BBCOLORLEVEL=\"${BBCOLORLEVEL}\"\\n" >> "${log_file}"
printf '%b' "BBALPHAMSG=\"${BBALPHAMSG}\"\\n" >> "${log_file}"
printf '%b' "ACKCODE=\"${ACKCODE}\"\\n" >> "${log_file}"
printf '%b' "RCPT=\"${RCPT}\"\\n" >> "${log_file}"
printf '%b' "BBHOSTNAME=\"${BBHOSTNAME}\"\\n" >> "${log_file}"
printf '%b' "MACHIP=\"${MACHIP}\"\\n" >> "${log_file}"
printf '%b' "BBSVCNAME=\"${BBSVCNAME}\"\\n" >> "${log_file}"
printf '%b' "BBSVCNUM=\"${BBSVCNUM}\"\\n" >> "${log_file}"
printf '%b' "BBHOSTSVC=\"${BBHOSTSVC}\"\\n" >> "${log_file}"
printf '%b' "BBHOSTSVCCOMMAS=\"${BBHOSTSVCCOMMAS}\"\\n" >> "${log_file}"
printf '%b' "BBNUMERIC=\"${BBNUMERIC}\"\\n" >> "${log_file}"
printf '%b' "RECOVERED=\"${RECOVERED}\"\\n" >> "${log_file}"
printf '%b' "DOWNSECS=\"${DOWNSECS}\"\\n" >> "${log_file}"
printf '%b' "DOWNSECSMSG=\"${DOWNSECSMSG}\"\\n" >> "${log_file}"
exit 0

View File

@ -0,0 +1,8 @@
3&green /dev/sda auto
3&green /dev/sdb auto
3&green /dev/sdc auto
3&green /dev/sdd auto
3&green /dev/sde auto
4&clear /dev/sdf unsupported
3&green /dev/bus/0 megaraid,12
3&green /dev/bus/0 megaraid,13

View File

@ -0,0 +1,258 @@
&green /dev/sda auto
SMART Health Status: OK
&clear Device is unknown or not complete in smartmontools database. Please take a look to the FAQ:
https://www.smartmontools.org/wiki/FAQ#SmartmontoolsDatabase
=== START OF INFORMATION SECTION ===
Vendor: HGST
Product: HUS728T8TAL5200
Revision: RS01
Compliance: SPC-4
User Capacity: 8,001,563,222,016 bytes [8.00 TB]
Logical block size: 512 bytes
Physical block size: 4096 bytes
Formatted with type 2 protection
LU is fully provisioned
Rotation Rate: 7200 rpm
Form Factor: 3.5 inches
Logical Unit id: 0x5000cca09976b8c4
Serial number: VAJ392BL
Device type: disk
Transport protocol: SAS (SPL-3)
Local Time is: Fri Feb 28 15:07:11 2020 CET
SMART support is: Available - device has SMART capability.
SMART support is: Enabled
Temperature Warning: Disabled or Not Supported
=== START OF READ SMART DATA SECTION ===
SMART Self-test log
Num Test Status segment LifeTime LBA_first_err [SK ASC ASQ]
Description number (hours)
# 1 Background short Completed 96 5771 - [- - -]
# 2 Background short Completed 96 5747 - [- - -]
------------------------------------------------------------
&green /dev/sdb auto
SMART Health Status: OK
&clear Device is unknown or not complete in smartmontools database. Please take a look to the FAQ:
https://www.smartmontools.org/wiki/FAQ#SmartmontoolsDatabase
=== START OF INFORMATION SECTION ===
Vendor: HGST
Product: HUS728T8TAL5200
Revision: RS01
Compliance: SPC-4
User Capacity: 8,001,563,222,016 bytes [8.00 TB]
Logical block size: 512 bytes
Physical block size: 4096 bytes
Formatted with type 2 protection
LU is fully provisioned
Rotation Rate: 7200 rpm
Form Factor: 3.5 inches
Logical Unit id: 0x5000cca09975fc04
Serial number: VAJ2WHPL
Device type: disk
Transport protocol: SAS (SPL-3)
Local Time is: Fri Feb 28 15:07:11 2020 CET
SMART support is: Available - device has SMART capability.
SMART support is: Enabled
Temperature Warning: Disabled or Not Supported
=== START OF READ SMART DATA SECTION ===
SMART Self-test log
Num Test Status segment LifeTime LBA_first_err [SK ASC ASQ]
Description number (hours)
# 1 Background short Completed 96 5607 - [- - -]
# 2 Background short Completed 96 5583 - [- - -]
------------------------------------------------------------
&green /dev/sdc auto
SMART Health Status: OK
&clear Device is unknown or not complete in smartmontools database. Please take a look to the FAQ:
https://www.smartmontools.org/wiki/FAQ#SmartmontoolsDatabase
=== START OF INFORMATION SECTION ===
Vendor: HGST
Product: HUS728T8TAL5200
Revision: RS01
Compliance: SPC-4
User Capacity: 8,001,563,222,016 bytes [8.00 TB]
Logical block size: 512 bytes
Physical block size: 4096 bytes
Formatted with type 2 protection
LU is fully provisioned
Rotation Rate: 7200 rpm
Form Factor: 3.5 inches
Logical Unit id: 0x5000cca099757c5c
Serial number: VAJ2M04L
Device type: disk
Transport protocol: SAS (SPL-3)
Local Time is: Fri Feb 28 15:07:11 2020 CET
SMART support is: Available - device has SMART capability.
SMART support is: Enabled
Temperature Warning: Disabled or Not Supported
=== START OF READ SMART DATA SECTION ===
SMART Self-test log
Num Test Status segment LifeTime LBA_first_err [SK ASC ASQ]
Description number (hours)
# 1 Background short Completed 96 5859 - [- - -]
# 2 Background short Completed 96 5835 - [- - -]
------------------------------------------------------------
&green /dev/sdd auto
SMART Health Status: OK
&clear Device is unknown or not complete in smartmontools database. Please take a look to the FAQ:
https://www.smartmontools.org/wiki/FAQ#SmartmontoolsDatabase
=== START OF INFORMATION SECTION ===
Vendor: HGST
Product: HUS728T8TAL5200
Revision: RS01
Compliance: SPC-4
User Capacity: 8,001,563,222,016 bytes [8.00 TB]
Logical block size: 512 bytes
Physical block size: 4096 bytes
Formatted with type 2 protection
LU is fully provisioned
Rotation Rate: 7200 rpm
Form Factor: 3.5 inches
Logical Unit id: 0x5000cca099765214
Serial number: VAJ327BL
Device type: disk
Transport protocol: SAS (SPL-3)
Local Time is: Fri Feb 28 15:07:11 2020 CET
SMART support is: Available - device has SMART capability.
SMART support is: Enabled
Temperature Warning: Disabled or Not Supported
=== START OF READ SMART DATA SECTION ===
SMART Self-test log
Num Test Status segment LifeTime LBA_first_err [SK ASC ASQ]
Description number (hours)
# 1 Background short Completed 96 5599 - [- - -]
# 2 Background short Completed 96 5575 - [- - -]
------------------------------------------------------------
&green /dev/sde auto
SMART Health Status: OK
&clear Device is unknown or not complete in smartmontools database. Please take a look to the FAQ:
https://www.smartmontools.org/wiki/FAQ#SmartmontoolsDatabase
=== START OF INFORMATION SECTION ===
Vendor: HGST
Product: HUS728T8TAL5200
Revision: RS01
Compliance: SPC-4
User Capacity: 8,001,563,222,016 bytes [8.00 TB]
Logical block size: 512 bytes
Physical block size: 4096 bytes
Formatted with type 2 protection
LU is fully provisioned
Rotation Rate: 7200 rpm
Form Factor: 3.5 inches
Logical Unit id: 0x5000cca09976e460
Serial number: VAJ3BZDL
Device type: disk
Transport protocol: SAS (SPL-3)
Local Time is: Fri Feb 28 15:07:11 2020 CET
SMART support is: Available - device has SMART capability.
SMART support is: Enabled
Temperature Warning: Disabled or Not Supported
=== START OF READ SMART DATA SECTION ===
SMART Self-test log
Num Test Status segment LifeTime LBA_first_err [SK ASC ASQ]
Description number (hours)
# 1 Background short Completed 96 5599 - [- - -]
# 2 Background short Completed 96 5575 - [- - -]
------------------------------------------------------------
&clear /dev/sdf unsupported
SMART Health Status can't be determine because of:
SMART support is: Unavailable - device lacks SMART capability.
=== START OF INFORMATION SECTION ===
Vendor: DELL
Product: PERC H730P Mini
Revision: 4.30
User Capacity: 146,163,105,792 bytes [146 GB]
Logical block size: 512 bytes
Logical Unit id: 0x61866da06192eb00256e8c0a2d73f5b6
Serial number: 00b6f5732d0a8c6e2500eb9261a06d86
Device type: disk
Local Time is: Fri Feb 28 15:07:12 2020 CET
SMART support is: Unavailable - device lacks SMART capability.
------------------------------------------------------------
&green /dev/bus/0 megaraid,12
SMART Health Status: OK
&clear Device is unknown or not complete in smartmontools database. Please take a look to the FAQ:
https://www.smartmontools.org/wiki/FAQ#SmartmontoolsDatabase
=== START OF INFORMATION SECTION ===
Vendor: SEAGATE
Product: ST9146803SS
Revision: FS64
User Capacity: 146,815,733,760 bytes [146 GB]
Logical block size: 512 bytes
Rotation Rate: 10000 rpm
Form Factor: 2.5 inches
Logical Unit id: 0x5000c5003ac7ef07
Serial number: 6SD3HJV0
Device type: disk
Transport protocol: SAS (SPL-3)
Local Time is: Fri Feb 28 15:07:13 2020 CET
SMART support is: Available - device has SMART capability.
SMART support is: Enabled
Temperature Warning: Disabled or Not Supported
=== START OF READ SMART DATA SECTION ===
SMART Self-test log
Num Test Status segment LifeTime LBA_first_err [SK ASC ASQ]
Description number (hours)
# 1 Background short Completed 16 62479 - [- - -]
# 2 Background short Completed 16 62455 - [- - -]
------------------------------------------------------------
&green /dev/bus/0 megaraid,13
SMART Health Status: OK
&clear Device is unknown or not complete in smartmontools database. Please take a look to the FAQ:
https://www.smartmontools.org/wiki/FAQ#SmartmontoolsDatabase
=== START OF INFORMATION SECTION ===
Vendor: SEAGATE
Product: ST9146803SS
Revision: FS64
User Capacity: 146,815,733,760 bytes [146 GB]
Logical block size: 512 bytes
Rotation Rate: 10000 rpm
Form Factor: 2.5 inches
Logical Unit id: 0x5000c5003ac956db
Serial number: 6SD3HH6J
Device type: disk
Transport protocol: SAS (SPL-3)
Local Time is: Fri Feb 28 15:07:14 2020 CET
SMART support is: Available - device has SMART capability.
SMART support is: Enabled
Temperature Warning: Disabled or Not Supported
=== START OF READ SMART DATA SECTION ===
SMART Self-test log
Num Test Status segment LifeTime LBA_first_err [SK ASC ASQ]
Description number (hours)
# 1 Background short Completed 16 61374 - [- - -]
# 2 Background short Completed 16 61350 - [- - -]
------------------------------------------------------------

View File

@ -0,0 +1,13 @@
/dev/sda -d scsi # /dev/sda, SCSI device
/dev/sdb -d scsi # /dev/sdb, SCSI device
/dev/sdc -d scsi # /dev/sdc, SCSI device
/dev/sdd -d scsi # /dev/sdd, SCSI device
/dev/sde -d scsi # /dev/sde, SCSI device
/dev/sdf -d scsi # /dev/sdf, SCSI device
/dev/bus/0 -d megaraid,0 # /dev/bus/0 [megaraid_disk_00], SCSI device
/dev/bus/0 -d megaraid,1 # /dev/bus/0 [megaraid_disk_01], SCSI device
/dev/bus/0 -d megaraid,2 # /dev/bus/0 [megaraid_disk_02], SCSI device
/dev/bus/0 -d megaraid,3 # /dev/bus/0 [megaraid_disk_03], SCSI device
/dev/bus/0 -d megaraid,4 # /dev/bus/0 [megaraid_disk_04], SCSI device
/dev/bus/0 -d megaraid,12 # /dev/bus/0 [megaraid_disk_12], SCSI device
/dev/bus/0 -d megaraid,13 # /dev/bus/0 [megaraid_disk_13], SCSI device

242
xymon/plugins/client/ext/sge.sh Executable file
View File

@ -0,0 +1,242 @@
#!/bin/sh
#
# SGE: Sun Grid Engine check - Xymon external script test
#
##### Purpose is to report back to a central server, all Sun
##### Grid Engine software faults.
#####
#
# version 0.4
#
# BIG BROTHER / XXXXXXXXXXXXXXXX status
#
# Written by Butch Deal <butchdeal@yahoo.com>
# Daniel Gomez <dgomez@tigr.org,daniel@ixplosive.com>
# Jérémy Gardais <jeremy.gardais@univ-rennes1.fr>
#
# v0.4 09/06/20 clean, correction,… for Xymon 4.3.28
# v0.3e 10/14/08 cut down on the number of qhost runs
# v0.3d 03/31/06 added alarm/suspend state identification
# v0.3c 03/01/06 propogated yellow state upon UNAVAILABLE queue intances
# v0.3b 01/31/06 fixed yellow warning queue status for ambigious config test
# v0.3a 01/31/06 added unknown queue status and ambigious config test
# v0.3 01/26/06 fixed status reporting and optimized job status
# v0.2 08/03/05 flag disabled queues as clear
# v0.1 07/28/05 authored
########################################
# NOTE
# The version v0.4 has only been tested with Xymon (server and client) 4.2.x.
#
# The color status with respects to queue status is arbitrary and should be
# reviewed for your particular environment.
#
# Tested on :
# Solaris & Linux
# Linux only (for v0.4)
########################################
########################################
# INSTALLATION
# step 1 - copy to Xymon client's ext dir
# step 2 - New clientlaunch.d/sge.cfg file
# step 3 - restart Xymon client
#
# NOTE - the TEST variable in the configuration section, this is the name used
# as the column header.
########################################
##################################
# CONFIGURE IT HERE
##################################
readonly PLUGIN_NAME=$(basename "${0}")
readonly TEST="sge"
readonly PLUGIN_RESULT="${XYMONTMP}/${MACHINEDOTS}.${TEST}.plugin_result"
readonly PLUGIN_STATE="${XYMONTMP}/${MACHINEDOTS}.${TEST}.plugin_state"
true > "${PLUGIN_STATE}"
readonly QSTAT=$(command -v qstat)
readonly QHOST=$(command -v qhost)
readonly QSELECT=$(command -v qselect)
export QSTAT QHOST QSELECT
# define colours for graphics
# Comment these out if using older BB versions
CLEAR_PIC="&clear"
RED_PIC="&red"
YELLOW_PIC="&yellow"
GREEN_PIC="&green"
UNKNOWN_PIC="&purple"
##################################
# Start of script
##################################
get_header()
{
echo ""
#echo "<FONT SIZE=+2><b>$1</b></FONT> ($2)<BR>"
echo "<FONT SIZE=+2><b>$1</b></FONT> <BR>"
# If you do not want the header in a bigger font use line below instead
#echo "<b>$1</b> ($2)"
# If you want the "Paul Luzzi" look uncomment this section and comment
# out the above sections:
#echo "<P><DIV ALIGN=\"CENTER\"><HR>"
#echo "<B>============== $1 ==============</B>"
#echo "<B>--- ($2) ---</B>"
#echo "<HR></DIV>"
#echo "<BLOCKQUOTE>"
}
get_header_small()
{
echo ""
#echo "<FONT SIZE=+2><b>$1</b></FONT> ($2)<BR>"
echo "<FONT SIZE=+1><b>$1</b></FONT> <BR>"
# If you do not want the header in a bigger font use line below instead
# echo "<b>$1</b> ($2)"
# If you want the "Paul Luzzi" look uncomment this section and comment
# out the above sections:
#echo "<P><DIV ALIGN=\"CENTER\"><HR>"
#echo "<B>============== $1 ==============</B>"
#echo "<B>--- ($2) ---</B>"
#echo "<HR></DIV>"
#echo "<BLOCKQUOTE>"
}
get_footer()
{
echo ""
# If you want the "Paul Luzzi" look uncomment this section and comment
# out the above sections:
#echo "</BLOCKQUOTE>"
}
#####
##### Get Status proc - used to get all responses
#####
get_status()
{
# Check defaults have been set
if [ "${QSTAT}" = "" ]; then
readonly QSTAT=$(command -v qstat)
echo ""
echo "$YELLOW_PIC QSTAT command is not defined in etc/bbsys.local - using default: ${QSTAT}"
fi
if [ "${QHOST}" = "" ]; then
readonly QHOST=$(command -v qhost)
echo ""
echo "$YELLOW_PIC QHOST command is not defined in etc/bbsys.local - using default: ${QHOST}"
fi
if [ "${QSELECT}" = "" ]; then
readonly QSELECT=$(command -v qselect)
echo ""
echo "$YELLOW_PIC QSELECT command is not defined in etc/bbsys.local - using default: ${QSELECT}"
fi
###
### Check the jobs
###
get_header "Jobs" "$QSTAT -l hostname=$MACHINEDOTS"
jobs=$(${QSTAT} -l hostname="${MACHINEDOTS}" -s r -u \*)
if [ -z "$jobs" ]; then
echo "No Running Jobs"
else
${QSTAT} -l hostname="${MACHINEDOTS}" -s r -u \*
fi
get_footer
###
### Check the host
###
get_header "Host" "$METAHS -i"
${QHOST} -h "${MACHINEDOTS}" | grep -v "global"
get_footer
###
### Identify queue memberships
###
#get_header "Queue Membership" "$QHOST -q"
#${QHOST} -h ${MACHINEDOTS} -q | tail +5
#get_footer
###
### Check queue instance states
###
queueTriggered=false;
${QHOST} -h "${MACHINEDOTS}" -q | tail +5 > "${PLUGIN_RESULT}.QSTATE"
while IFS= read -r _LINE; do
queue=$(printf -- '%s' "${_LINE}" | awk '{ print $1 }')
qstate=$(printf -- '%s' "${_LINE}" | awk '{ print $4 }')
# Order determines more significant alert status
if [ "$(echo "${qstate}" | grep -c d)" != "0" ]; then
echo "4&clear $queue@$HOST is DISABLED" >> "${PLUGIN_STATE}"
queueMsg=$(echo "$queueMsg<BR>$CLEAR_PIC $queue@$HOST is DISABLED<BR>${_LINE}")
queueTriggered=true;
elif [ "$(echo "${qstate}" | grep -c E)" != "0" ]; then
echo "1&red $queue@$HOST is in ERROR!" >> "${PLUGIN_STATE}"
queueMsg=$(echo "$queueMsg<BR>$RED_PIC $queue@$HOST is in ERROR!<BR>${_LINE}")
queueTriggered=true;
elif [ "$(echo "${qstate}" | grep -c c)" != "0" ]; then
echo "2&yellow $queue@$HOST has an ambigious configuration!" >> "${PLUGIN_STATE}"
queueMsg=$(echo "$queueMsg<BR>$YELLOW_PIC $queue@$HOST has an ambigious configuration!<BR>${_LINE}")
queueTriggered=true;
elif [ "$(echo "${qstate}" | grep -c a)" != "0" ] || \
[ "$(echo "${qstate}" | grep -c A)" != "0" ]; then
echo "2&yellow $queue@$HOST is in ALARM" >> "${PLUGIN_STATE}"
queueMsg=$(echo "$queueMsg<BR>$YELLOW_PIC $queue@$HOST is in ALARM<BR>${_LINE}")
elif [ "$(echo "${qstate}" | grep -c s)" != "0" ] || \
[ "$(echo "${qstate}" | grep -c S)" != "0" ]; then
echo "2&yellow $queue@$HOST is SUSPENDED" >> "${PLUGIN_STATE}"
queueMsg=$(echo "$queueMsg<BR>$YELLOW_PIC $queue@$HOST is SUSPENDED<BR>${_LINE}")
elif [ "$(echo "${qstate}" | grep -c u)" != "0" ]; then
echo "2&yellow $queue@$HOST is UNAVAILABLE" >> "${PLUGIN_STATE}"
queueMsg=$(echo "$queueMsg<BR>$YELLOW_PIC $queue@$HOST is UNAVAILABLE!<BR>${_LINE}")
queueTriggered=true;
elif [ "$qstate" = "" ]; then
echo "3&green $queue@$HOST is OK" >> "${PLUGIN_STATE}"
queueMsg=$(echo "$queueMsg<BR>$GREEN_PIC $queue@$HOST is OK<BR>${_LINE}")
else
echo "5&purple $queue@$HOST is UNKNOWN" >> "${PLUGIN_STATE}"
queueMsg=$(echo "$queueMsg<BR>$UNKNOWN_PIC $queue@$HOST is UNKNOWN<BR>${_LINE}")
queueTriggered=true;
fi
done < "${PLUGIN_RESULT}.QSTATE"
get_header "Queue Instance Status Report"
echo "$queueMsg"
get_footer
#####
##### End of get_status proc
#####
}
#####
##### Main body
#####
get_status > "${PLUGIN_RESULT}"
# Set the global color according to the highest alert
COLOR=$(< "${PLUGIN_STATE}" awk '{print $1}' | sort | uniq | head -1 | cut -c3-)
# NOW USE THE XYMON COMMAND TO SEND THE DATA ACROSS
$XYMON "${XYMSRV}" "status ${MACHINE}.${TEST} ${COLOR} $($DATE) $(cat ${PLUGIN_RESULT})"
#For testing only
# echo $XYMON "${XYMSRV}" "status ${MACHINE}.${TEST} ${COLOR} $($DATE) $(cat ${PLUGIN_RESULT})" > /tmp/sgetmp
# Clean up our mess
# Checking for existence of each file since the whole test may be optional
# and may not actually run on every client
#
if [ -f "${PLUGIN_RESULT}" ]; then
rm -f -- "${PLUGIN_RESULT}" "${PLUGIN_STATE}" "${PLUGIN_RESULT}.QSTATE"
fi
##############################################
# end of script
##############################################

202
xymon/plugins/client/ext/smart Executable file
View File

@ -0,0 +1,202 @@
#!/usr/bin/perl
# $Id: sensors 70 2011-11-25 09:21:18Z skazi $
# Author: Jacek Tomasiak <jacek.tomasiak@gmail.com>
# https://github.com/skazi0/xymon-plugins/blob/master/client/ext/smart
use strict;
# add script's directory to module search path for Hobbit.pm on non-debian systems
use FindBin;
use lib $FindBin::Bin;
use Hobbit;
use Data::Dumper;
my $bb = new Hobbit('smart');
my $temp_disk_list = "$ENV{'XYMONTMP'}/$ENV{'MACHINEDOTS'}.smart.drivedb.list";
my @disks = ();
my %olderr = {};
my $CACHETIME = 10; # minutes
my $CACHEFILE = "$ENV{'XYMONTMP'}/$ENV{'MACHINEDOTS'}.smart.cache";
&load_config("$ENV{'XYMONTMP'}/logfetch.$ENV{'MACHINEDOTS'}.cfg");
my @disks_stat = stat($temp_disk_list);
my $disks_mtime = scalar @disks_stat ? $disks_stat[9] : 0;
#
# Regenerate disks list if the file is too old (600 minutes)
if (time() - $disks_mtime > 600)
{
unlink $temp_disk_list;
}
if (-e $temp_disk_list) {
# Should use the existing file
}
else {
# Create a file with the list of disks
system("ls -1 /dev/sd* | grep -vE '[0-9]' > $temp_disk_list") == 0
or die "system command to create $temp_disk_list failed: $?";
}
# fallback to disk detection if nothing defined in the config
unless (@disks) {
## Put temp_disk_list content to disks array
open(my $fh, '<:encoding(UTF-8)', $temp_disk_list)
or die "Could not open file '$temp_disk_list' $!";
while (my $row = <$fh>) {
chomp $row;
push(@disks, "$row");
}
}
my @stat = stat($CACHEFILE);
my $mtime = scalar @stat ? $stat[9] : 0;
# regenerate sensors cache if outdated
if (time() - $mtime > $CACHETIME * 60)
{
open(OUT, ">$CACHEFILE") or die "cannot open $CACHEFILE";
foreach my $name (@disks)
{
print OUT ('=' x 20) . " $name " . ('=' x 20) . "\n";
my @output = `sudo smartctl -AHi -l error -l selftest $name 2>&1` or die;
my $ncv = '';
my $newerr = 1;
my $ponhours = undef;
my $lasttest = undef;
foreach my $line (@output)
{
# skip header
next if ($line =~ /smartctl|Copyright|Home page|===/);
if ($line =~ /.*overall-health.*:\s*(.*)/)
{
my $lstatus = ($1 eq 'PASSED') ? 'green' : 'red';
print OUT "&$lstatus $line";
}
elsif ($line =~ /^\s*(\d+)\s+(\S+)\s+\S+\s+(\d+)\s+(\d+)\s+(\d+)\s+\S+\s+\S+\s+(\S+)\s+(.*)$/)
{
my ($aname, $value, $worst, $thresh, $failure, $raw) = ($2, $3, $4, $5, $6, $7);
my $lstatus = 'green';
if ($aname =~ /Current_Pending_Sector|Offline_Uncorrectable/ and int($raw) > 0)
{
$lstatus = 'yellow';
}
elsif ($aname =~ /Power_On_Hours/)
{
$ponhours = $raw;
}
if ($failure =~ /FAIL/) {
$lstatus = 'red';
}
print OUT "&$lstatus $line";
$ncv .= "$name-$aname-value : $value\n";
$ncv .= "$name-$aname-worst : $worst\n";
$ncv .= "$name-$aname-thresh : $thresh\n";
$ncv .= "$name-$aname-raw : $raw\n";
}
elsif ($line =~ /^\s*No Errors Logged\s*$/)
{
$newerr = 0;
print OUT "&green $line";
}
elsif ($line =~ /Error Count:\s*(\d+)/)
{
$newerr = $1 - $olderr{$name};
my $lstatus = $newerr > 0 ? 'red' : 'green';
print OUT "&$lstatus $line"
}
elsif ($line =~ /^\s*Error \d+ occurred/)
{
my $lstatus = $newerr > 0 ? 'red' : 'green';
print OUT "&$lstatus $line"
}
elsif ($line =~ /^\s*#\s*\d+\s+(Conveyance offline|Extended offline|Short offline|Extended captive)\s+(.*)\s+\d+%\s+(\d+)/)
{
my $status = $2;
my $lifetime = $3;
my $lstatus = 'red';
$lasttest = $lifetime if (!defined($lasttest));
$lstatus = 'yellow' if ($status =~ /Aborted by host|Interrupted \(host reset\)/);
$lstatus = 'green' if ($status =~ /Completed without error|Self-test routine in progress|Interrupted \(host reset\)/);
print OUT "&$lstatus $line";
}
else
{
print OUT " $line";
}
}
# test status footer
my $lasttestage = $ponhours % 65536 - $lasttest;
my $lasttestmsg = "$lasttestage hours ago";
my $lasttestcolor = 'green';
if (!defined($lasttest))
{
$lasttestcolor = 'yellow';
$lasttestmsg = 'no test performed';
}
elsif ($lasttestage > 24 * 7)
{
$lasttestcolor = 'red';
}
elsif ($lasttestage > 24 * 2)
{
$lasttestcolor = 'yellow';
}
print OUT "&$lasttestcolor Last Self-test: $lasttestmsg\n";
# hidden output for ncv
print OUT "<!--\n$ncv\n-->\n";
}
close OUT;
}
# send cached content
{
open IN, $CACHEFILE or die "cannot open $CACHEFILE";
while (my $line = <IN>)
{
if ($line =~ /^\s*&(\S+)/)
{
$bb->color_print($1, $line);
}
else
{
$bb->print($line);
}
}
close IN;
}
$bb->send;
sub load_config
{
my $path = shift;
open C, "<$path" or return;
# print "loading config from $path\n";
while (my $line = <C>)
{
next if ($line =~ /^\s*#/);
if ($line =~ /DISKS\s*=\s*['"](.*?)["']/)
{
@disks = split(/\s+/, $1);
}
if ($line =~ /SMARTOLDERROR\[([\w\/]+)\]\s+(\d+)/)
{
$olderr{$1} = $2;
}
if ($line =~ /SMARTCACHETIME=(\d+)/)
{
$CACHETIME = $1;
}
}
close C;
}

View File

@ -0,0 +1,300 @@
#!/bin/sh
# .. vim: foldmarker=[[[,]]]:foldmethod=marker
# NOTE: Must be run as root, so you probably need to setup sudo for this.
# This script is mostly intend to be used with Xymon and rather for devices unknown to the smartmontools base.
# Based on xymon.com's script: https://www.xymon.com/xymon-cgi/viewconf.sh?smart
# The script will scan all devices compatible with SMART and for each disk, it will: [[[
# * try to guess the expected TYPE (even megaraid,…).
# * display health status.
# * set a "clear" state for incompatible device.
# * display last selftests.
# * set a "error" state if no selftest is recorded.
# * display basic informations.
# * recommend a more advanced SMART script if the disk is known of smartmontools's database (drivedb.h) or redirect to smartmontools's FAQ if not.
# ]]]
# Things the script CAN'T do: [[[
# * ensure a recent selftest was run.
# * compare current value with vendor's one (for failure prediction or error).
# * give detail about errors.
# * Take a look to this more advance script for such features: https://github.com/skazi0/xymon-plugins/blob/master/client/ext/smart
# ]]]
# Vars [[[
debug="1"
## Colors [[[
c_redb='\033[1;31m'
c_magentab='\033[1;35m'
c_reset='\033[0m'
## ]]]
plugin_name=$(basename "${0}")
plugin_result="${XYMONTMP}/${MACHINEDOTS}.smartoverall.plugin_result"
plugin_state="${XYMONTMP}/${MACHINEDOTS}.smartoverall.plugin_state"
device_list="${XYMONTMP}/${MACHINEDOTS}.smartoverall.dscan"
## List of devices known from the smartmontools base and compatible with test logging
## This file might be used by a more advanced script such as skazi0's one
drivedb_list="${XYMONTMP}/${MACHINEDOTS}.smart.drivedb.list"
# By default, don't empty files newer than 10hours (600 minutes)
default_mtime_minutes="600"
xymon_username="xymon"
xymon_groupname="xymon"
# ]]]
# Functions
## Create or empty a file if it's too old [[[
## First argument (required): Absolut path to the file
## Second argument (optionnal): Maximum number of minutes since last modification
regenerate_if_too_old() {
## Set variables according to the number of passed arguments [[[
case $# in
0 )
[ "${debug}" -eq "0" ] && printf "${c_magentab}%-6b${c_reset}\n" "DEBUG: regenerate_if_too_old func Need at least 1 argument."
exit 1
;;
1 )
_file="${1}"
[ "${debug}" -eq "0" ] && printf "${c_magentab}%-6b${c_reset}\n" "DEBUG: regenerate_if_too_old func Use default_mtime_minutes value: ${default_mtime_minutes}."
_max_mtime_minutes="${default_mtime_minutes}"
;;
2 )
_file="${1}"
_max_mtime_minutes="${2}"
;;
esac
## ]]]
_current_timestamp=$(date "+%s")
_file_mtime_timestamp=$(stat --format="%Y" -- "${_file}")
## Substract last modification timestamp of the file to current timestamp
: $(( _file_mtime_seconds=_current_timestamp-_file_mtime_timestamp ))
## Get maximum allowed mtime in seconds
: $(( _max_mtime_seconds=_max_mtime_minutes*60 ))
## Compare last modification mtime with the maximum allowed
if [ "${_file_mtime_seconds}" -gt "${_max_mtime_seconds}" ]; then
[ "${debug}" -eq "0" ] && printf "${c_magentab}%-6b${c_reset}\n" "DEBUG: regenerate_if_too_old func Need to empty or create ${_file} last modification happened ${_file_mtime_seconds} seconds ago (maximum is ${_max_mtime_seconds})."
true > "${_file}"
chown -- "${xymon_username}":"${xymon_groupname}" "${_file}"
else
[ "${debug}" -eq "0" ] && printf "${c_magentab}%-6b${c_reset}\n" "DEBUG: regenerate_if_too_old func Don't need to empty ${_file} last modification happened ${_file_mtime_seconds} seconds ago (maximum is ${_max_mtime_seconds})."
fi
}
## ]]]
## Test if a disk really support SMART [[[
## Smartctl can give an health status even without a full support
## of SMART for some type (eg. scsi or megaraid).
## Exemple: SMART support is: Unavailable - device lacks SMART capability.
is_disk_support_smart() {
_disk="${1}"
_type="${2}"
_smarctl_support_result="${XYMONTMP}/${MACHINEDOTS}.smartoverall.support.$(basename "${_disk}").${_type}"
smart_support_msg=""
[ "${debug}" -eq "0" ] && printf "${c_magentab}%-6b${c_reset}\n" "DEBUG: is_disk_support_smart func check if SMART is supported on: ${_disk}."
## Create or empty previous file only if older than 24h (1440 minutes)
regenerate_if_too_old "${_smarctl_support_result}" 1440
## Grep only "support" lines from disk's informations only if the file was emptied
if test ! -s "${_smarctl_support_result}"; then
smartctl -d "${_type}" -i -- "${_disk}" | grep -E "^SMART support is:" -- >> "${_smarctl_support_result}"
fi
## If the file is not empty
if test -s "${_smarctl_support_result}"; then
## Parse all "support" lines
while IFS= read -r _LINE; do
if ! printf -- '%s' "${_LINE}" | grep -q -E -- "(Enabled|Available)"
then
smart_support_msg="${_LINE}"
fi
done < "${_smarctl_support_result}"
else
smart_support_msg="smartctl was not able to open ${_disk} DEVICE with ${_type} TYPE."
fi
if [ -z "${smart_support_msg}" ]; then
[ "${debug}" -eq "0" ] && printf "${c_magentab}%-6b${c_reset}\n" "DEBUG: is_disk_support_smart func SMART seems fully supported on: ${_disk} with ${_type} type."
else
[ "${debug}" -eq "0" ] && printf "${c_magentab}%-6b${c_reset}\n" "DEBUG: is_disk_support_smart func SMART is not fully supported on: ${_disk} with ${_type} type. See smartctl informations:\n${smart_support_msg}"
fi
## Clean temp files
### As the Xymon's tmpdir is used to store log files, no need to delete them at
### the end of the script. They will be emptied, reused or regenerate (if oldest
### than the expected interval) at the next run.
}
## ]]]
## Test the type of disk with smartctl [[[
## Cause the scanned one might not be the one to use
choose_correct_type() {
_disk="${1}"
_scanned_type="${2}"
_default_type="auto"
TYPE=""
SMART_SUPPORT_MSG=""
for test_type in "${_scanned_type}" "${_default_type}"; do
is_disk_support_smart "${_disk}" "${test_type}"
## If no message, the type is correct
if [ -z "${smart_support_msg}" ]; then
TYPE="${test_type}"
SMART_SUPPORT_MSG=""
return
else
SMART_SUPPORT_MSG="${smart_support_msg}"
fi
done
}
## ]]]
# Create or empty previous files
true > "${plugin_result}"
chown -- "${xymon_username}":"${xymon_groupname}" "${plugin_result}"
true > "${plugin_state}"
chown -- "${xymon_username}":"${xymon_groupname}" "${plugin_state}"
## Create or empty previous file only if older than 24h (1440 minutes)
regenerate_if_too_old "${device_list}" 1440
regenerate_if_too_old "${drivedb_list}" 1440
# Get the list of all available devices if the previous list was emptied
if test ! -s "${device_list}"; then
smartctl --scan >> "${device_list}"
fi
# If the file is not empty
if test -s "${device_list}"; then
while IFS= read -r LINE; do
## Get device path
DISK=$(echo "${LINE}" | cut -d" " -f1)
## Try to determine the best type
SCANNED_TYPE=$(echo "${LINE}" | cut -d" " -f3)
choose_correct_type "${DISK}" "${SCANNED_TYPE}"
## If no correct type was found for this device
if [ -z "${TYPE}" ]; then
[ "${debug}" -eq "0" ] && printf "${c_magentab}%-6b${c_reset}\n" "DEBUG: SMART is not fully supported."
DRES=$(printf '%s' "SMART Health Status can't be determine because of:\n${SMART_SUPPORT_MSG}")
DCODE="2"
TYPE="unsupported"
### Still try to display informations about unsupported device (eg. RAID controller,…)
DID="unsupported-${DISK}"
DINFO=$(smartctl -i -d "${SCANNED_TYPE}" "${DISK}" | grep -v -E "^smartctl|^Copyright|^$" || printf '%s' "Can't get informations due to no SMART support.")
DDRIVEDB_MSG=""
DSELFTEST=""
else
[ "${debug}" -eq "0" ] && printf "${c_magentab}%-6b${c_reset}\n" "DEBUG: SMART seems fully supported, proceed normally."
### Get SMART Health Status and return code
DRES=$(/usr/sbin/smartctl -H -d "${TYPE}" -n standby "${DISK}")
DCODE=$?
### Get disk's serial number and informations
DID=$(smartctl -i -d "${TYPE}" "${DISK}" | awk '/.erial .umber:/ { print $NF }')
DINFO=$(smartctl -i -d "${TYPE}" "${DISK}" | grep -v -E "^smartctl|^Copyright|^$")
## If the model of the disk is known from smartmontools database
if smartctl -d "${TYPE}" -P show "${DISK}" | grep -qi -- "drive found in"; then
DDRIVEDB_MSG="&green Device is known in smartmontools database. You might consider using a more advanced plugin such as:
https://github.com/skazi0/xymon-plugins/blob/master/client/ext/smart"
else
DDRIVEDB_MSG="&clear Device is unknown or not complete in smartmontools database. Please take a look to the FAQ:
https://www.smartmontools.org/wiki/FAQ#SmartmontoolsDatabase"
fi
DSELFTEST=$(smartctl -d "${TYPE}" -l selftest "${DISK}" | grep -v -E -- "^smartctl|^Copyright|^$")
## If no selftest have been recorded
if smartctl -d "${TYPE}" -l selftest "${DISK}" | grep -qi -- "No self-tests"; then
DSELFTEST_MSG="&red No self-tests recorded:"
DCODE="8"
## If the device doesn't support test logging
elif smartctl -d "${TYPE}" -l selftest "${DISK}" | grep -qEi -- "(does not support.*logging|Log not supported)"; then
DSELFTEST_MSG="&clear Test logging are not supported:"
else
DSELFTEST_MSG=""
### If the device is also known from smartmontools database
### and not already present in the list of compatible disk
if printf -- '%s' "${DDRIVEDB_MSG}" | grep -q -E -- "green" &&
! grep -q -- "${DISK}" "${drivedb_list}"
then
echo "${DISK}" >> "${drivedb_list}"
fi
fi
fi
## Test health status
DSTBY=$(( DCODE & 2 ))
DFAIL=$(( DCODE & 8 ))
DWARN=$(( DCODE & 32 ))
## According to health, give a weight to each color to easily get the page status
if test $DSTBY -ne 0
then
COLOR="4&clear"
elif test $DFAIL -ne 0
then
COLOR="1&red"
elif test $DWARN -ne 0
then
COLOR="2&yellow"
else
COLOR="3&green"
fi
## Avoid duplicate device
if ! grep -q "${DID}" "${plugin_result}"; then
## For summary
echo "${COLOR} $DISK ${TYPE}"
## For detailed informations
{
echo "${COLOR} $DISK ${TYPE}" | cut -c2-
echo ""
echo "$DRES" | grep -v -E "^smartctl|^Copyright|^$|^==="
echo "${DDRIVEDB_MSG}"
echo "${DINFO}"
echo "${DSELFTEST_MSG}"
echo "${DSELFTEST}" | head -n12
echo "------------------------------------------------------------"
echo ""
echo ""
} >> "${plugin_result}"
fi
done < "${device_list}" >> "${plugin_state}"
# If the file is empty
else
echo "1&red Error while scanning devices with smartctl" >> "${plugin_state}"
fi
# Set the global color according to the highest alert
COLOR=$(< "${plugin_state}" awk '{print $1}' | sort | uniq | head -1 | cut -c3-)
# Send informations to Xymon server
$XYMON "${XYMSRV}" "status ${MACHINE}.${plugin_name} ${COLOR} SMART health check
$(< "${plugin_state}" cut -c2-)
==================== Detailed status ====================
$(cat "${plugin_result}")
"
## Clean temp files
### As the Xymon's tmpdir is used to store log files, no need to delete them at
### the end of the script. They will be emptied, reused or regenerate (if oldest
### than the expected interval) at the next run.
exit 0

View File

@ -0,0 +1,66 @@
#!/bin/sh
# .. vim: foldmarker=[[[,]]]:foldmethod=marker
# Vars [[[
debug="0"
## Colors [[[
c_redb='\033[1;31m'
c_magentab='\033[1;35m'
c_reset='\033[0m'
## ]]]
default_mtime_minutes="600"
drivedb_list="/tmp/test.css"
DDRIVEDB_MSG="green"
#DDRIVEDB_MSG="red"
DISK="/dev/sda"
# ]]]
## Create or empty a file if it's too old [[[
regenerate_if_too_old() {
## Set variables according to the number of passed arguments
case $# in
1 )
_file="${1}"
[ "${debug}" -eq "0" ] && printf "${c_magentab}%-6b${c_reset}\n" "DEBUG: regenerate_if_too_old func Use default_mtime_minutes value: ${default_mtime_minutes}."
_max_mtime_minutes="${default_mtime_minutes}"
;;
2 )
_file="${1}"
_max_mtime_minutes="${2}"
;;
esac
_current_timestamp=$(date "+%s")
_file_mtime_timestamp=$(stat --format="%Y" -- "${_file}")
## Substract last modification timestamp of the file to current timestamp
: $(( _file_mtime_seconds=_current_timestamp-_file_mtime_timestamp ))
## Get maximum allowed mtime in seconds
: $(( _max_mtime_seconds=_max_mtime_minutes*60 ))
## Compare last modification mtime with the maximum allowed
if [ "${_file_mtime_seconds}" -gt "${_max_mtime_seconds}" ]; then
[ "${debug}" -eq "0" ] && printf "${c_magentab}%-6b${c_reset}\n" "DEBUG: regenerate_if_too_old func Need to empty or create ${_file} last modification happened ${_file_mtime_seconds} seconds ago (maximum is ${_max_mtime_seconds})."
true > "${_file}"
else
[ "${debug}" -eq "0" ] && printf "${c_magentab}%-6b${c_reset}\n" "DEBUG: regenerate_if_too_old func Don't need to empty ${_file} last modification happened ${_file_mtime_seconds} seconds ago (maximum is ${_max_mtime_seconds})."
fi
}
## ]]]
regenerate_if_too_old /tmp/css_style.css
regenerate_if_too_old /tmp/font.css 60
regenerate_if_too_old /tmp/user/1337/serverauth.qGdeK8OOzr 1440
regenerate_if_too_old /tmp/test.css 600
if printf -- '%s' "${DDRIVEDB_MSG}" | grep -q -E -- "green" &&
! grep -q -- "${DISK}" "${drivedb_list}"
then
echo "${DISK}" >> "${drivedb_list}"
fi
exit 0

View File

@ -0,0 +1,28 @@
BBCOLORLEVEL="red"
BBALPHAMSG="HOST.DOMAIN.ORG:apt red [168321]
red Wed Aug 22 11:26:34 2018 - apt NOT ok
Debian GNU/Linux 9.5 (stretch)
&red Security updates (4): apt-get install openssh-client openssh-server openssh-sftp-server ssh
openssh-client (1:7.4p1-10+deb9u3 1:7.4p1-10+deb9u4)
openssh-server (1:7.4p1-10+deb9u3 1:7.4p1-10+deb9u4)
openssh-sftp-server (1:7.4p1-10+deb9u3 1:7.4p1-10+deb9u4)
ssh (1:7.4p1-10+deb9u3 1:7.4p1-10+deb9u4)
&red Last apt update: 3.0 day(s) ago
See http://localhost/xymon-cgi/svcstatus.sh?HOST=HOST.DOMAIN.ORG&SERVICE=apt
"
ACKCODE="168321"
RCPT="1234567890"
BBHOSTNAME="HOST.DOMAIN.ORG"
MACHIP="111222333444"
BBSVCNAME="apt"
BBSVCNUM="0"
BBHOSTSVC="HOST.DOMAIN.ORG.apt"
BBHOSTSVCCOMMAS="HOST,DOMAIN,ORG.apt"
BBNUMERIC="000111222333444168321"
RECOVERED="0"
DOWNSECS="36"
DOWNSECSMSG=""

View File

@ -0,0 +1,62 @@
BBCOLORLEVEL="yellow"
BBALPHAMSG="HOST.DOMAIN.ORG:cpu yellow [520216]
yellow Tue Aug 21 11:43:22 CEST 2018 up: 00:00, 0 users, 29 procs, load=1.90
&yellow Machine recently rebooted
System clock is 0 seconds off
top - 11:43:23 up 0 min, 0 users, load average: 1.49, 1.90, 3.03
Tasks: 33 total, 2 running, 31 sleeping, 0 stopped, 0 zombie
%Cpu(s): 3.8 us, 5.9 sy, 0.0 ni, 89.3 id, 0.8 wa, 0.0 hi, 0.1 si, 0.0 st
KiB Mem : 1048576 total, 897468 free, 33268 used, 117840 buff/cache
KiB Swap: 1048576 total, 1048576 free, 0 used. 1015308 avail Mem
PID USER PR NI VIRT RES SHR S %CPU %MEM TIME+ COMMAND
263 xymon 20 0 64284 6264 5072 D 100.0 0.6 0:00.05 apt-cache
1 root 20 0 69588 7680 5968 S 0.0 0.7 0:00.08 systemd
43 root 20 0 93256 16952 16356 S 0.0 1.6 0:00.03 systemd-j+
89 root 20 0 22560 1052 0 S 0.0 0.1 0:00.00 dhclient
121 root 20 0 29668 2804 2520 S 0.0 0.3 0:00.00 cron
122 root 20 0 321836 2756 2348 S 0.0 0.3 0:00.00 rsyslogd
124 root 20 0 171944 9704 8308 S 0.0 0.9 0:00.00 sssd
130 message+ 20 0 59424 4036 3576 S 0.0 0.4 0:00.00 dbus-daem+
145 daemon 20 0 27968 2120 1916 S 0.0 0.2 0:00.00 atd
147 root 20 0 71988 5648 4896 S 0.0 0.5 0:00.00 sshd
164 xymon 20 0 4272 1488 1332 S 0.0 0.1 0:00.00 xymonlaun+
167 xymon 20 0 4292 712 640 S 0.0 0.1 0:00.00 xymonclie+
168 xymon 20 0 18300 5152 3940 S 0.0 0.5 0:00.00 apt
176 root 20 0 201132 15804 10264 S 0.0 1.5 0:00.02 sssd_be
177 root 20 0 17076 160 0 S 0.0 0.0 0:00.00 in.tftpd
191 root 20 0 168252 33204 32060 S 0.0 3.2 0:00.02 sssd_nss
192 root 20 0 147780 7452 6396 S 0.0 0.7 0:00.00 sssd_pam
193 root 20 0 141356 7364 6344 S 0.0 0.7 0:00.00 sssd_auto+
198 xymon 20 0 4292 1360 1252 S 0.0 0.1 0:00.00 xymonclie+
211 arpwatch 20 0 31872 4792 4104 S 0.0 0.5 0:00.00 arpwatch
214 root 20 0 54532 2768 2276 S 0.0 0.3 0:00.00 systemd-l+
223 root 20 0 69592 1716 4 S 0.0 0.2 0:00.00 (agetty)
224 root 20 0 69592 1716 4 S 0.0 0.2 0:00.00 (agetty)
251 root 20 0 361140 15748 6296 S 0.0 1.5 0:00.04 fail2ban-+
292 root 20 0 81152 6396 5636 S 0.0 0.6 0:00.00 postmulti
298 root 20 0 4292 752 680 S 0.0 0.1 0:00.00 postfix-s+
306 xymon 20 0 38236 2984 2584 R 0.0 0.3 0:00.00 top
345 root 20 0 4292 712 636 S 0.0 0.1 0:00.00 sh
349 root 20 0 22536 1392 1164 D 0.0 0.1 0:00.00 nft
351 root 20 0 4292 760 688 S 0.0 0.1 0:00.00 postfix-s+
353 root 20 0 4292 96 0 S 0.0 0.0 0:00.00 postfix-s+
354 root 20 0 79236 4740 4064 R 0.0 0.5 0:00.00 postconf
355 root 20 0 13216 1004 900 S 0.0 0.1 0:00.00 sed
See http://localhost/xymon-cgi/svcstatus.sh?HOST=HOST.DOMAIN.ORG&SERVICE=cpu
"
ACKCODE="520216"
RCPT="1234567890"
BBHOSTNAME="HOST.DOMAIN.ORG"
MACHIP="111222333444"
BBSVCNAME="cpu"
BBSVCNUM="200"
BBHOSTSVC="HOST.DOMAIN.ORG.cpu"
BBHOSTSVCCOMMAS="HOST,DOMAIN,ORG.cpu"
BBNUMERIC="200111222333444520216"
RECOVERED="0"
DOWNSECS="0"
DOWNSECSMSG=""

View File

@ -0,0 +1,41 @@
BBCOLORLEVEL="yellow"
BBALPHAMSG="HOST.DOMAIN.ORG:files yellow [275849]
yellow Thu Oct 10 11:17:18 CEST 2019 - Files NOT ok
&yellow <a href="/xymon-cgi/svcstatus.sh?CLIENT=HOST.DOMAIN.ORG&amp;SECTION=file:/var/log/cron.log">/var/log/cron.log</a>
File was modified 4642 seconds ago - should be <3800
&yellow <a href="/xymon-cgi/svcstatus.sh?CLIENT=vmgit.101010.virtual&amp;SECTION=file:/tmp/.github.TEST.upgrade">/tmp/.github.TEST.upgrade</a>
File exists
&green <a href="/xymon-cgi/svcstatus.sh?CLIENT=HOST.DOMAIN.ORG&amp;SECTION=logfile:/var/log/kern.log">/var/log/kern.log</a>
&green <a href="/xymon-cgi/svcstatus.sh?CLIENT=HOST.DOMAIN.ORG&amp;SECTION=file:/var/log/messages">/var/log/messages</a>
&green <a href="/xymon-cgi/svcstatus.sh?CLIENT=HOST.DOMAIN.ORG&amp;SECTION=file:/var/log/syslog">/var/log/syslog</a>
&green <a href="/xymon-cgi/svcstatus.sh?CLIENT=HOST.DOMAIN.ORG&amp;SECTION=dir:/var/log">/var/log</a>
&green <a href="/xymon-cgi/svcstatus.sh?CLIENT=HOST.DOMAIN.ORG&amp;SECTION=dir:/tmp/">/tmp/</a>
&green <a href="/xymon-cgi/svcstatus.sh?CLIENT=HOST.DOMAIN.ORG&amp;SECTION=file:/bin/su">/bin/su</a>
&green <a href="/xymon-cgi/svcstatus.sh?CLIENT=HOST.DOMAIN.ORG&amp;SECTION=file:/usr/bin/sudo">/usr/bin/sudo</a>
&green <a href="/xymon-cgi/svcstatus.sh?CLIENT=HOST.DOMAIN.ORG&amp;SECTION=file:/var/log/installer">/var/log/installer</a>
See http://localhost/xymon-cgi/svcstatus.sh?HOST=HOST.DOMAIN.ORG&SERVICE=files
"
ACKCODE="275849"
RCPT="1234567890"
BBHOSTNAME="HOST.DOMAIN.ORG"
MACHIP="037187001062"
BBSVCNAME="files"
BBSVCNUM="0"
BBHOSTSVC="HOST.DOMAIN.ORG.files"
BBHOSTSVCCOMMAS="HOST,DOMAIN.ORG.files"
BBNUMERIC="000037187001062275849"
RECOVERED="0"
DOWNSECS="603"
DOWNSECSMSG=""

View File

@ -0,0 +1,29 @@
BBCOLORLEVEL="yellow"
BBALPHAMSG="HOST.DOMAIN.ORG:libs yellow [0]
yellow Thu Aug 16 16:41:56 2018 - libs NOT ok
&yellow Machine should be rebooted. Running not the newest installed kernel:
Running kernel=\"4.9.0-7-amd64, version #1 SMP Debian 4.9.110-3+deb9u1 (2018-08-03)\"
Newest installed kernel=\"4.9.0-7-amd64, version #1 SMP Debian 4.9.110-3+deb9u2 (2018-08-13)\"
&yellow The following processes have libs linked that were upgraded:
root:
/lib/systemd/systemd-udevd (546)
systemd-timesync:
/usr/lib/postfix/qmgr (52880)
See http://localhost/xymon-cgi/svcstatus.sh?HOST=HOST.DOMAIN.ORG&SERVICE=libs"
ACKCODE="0"
RCPT="1234567890"
BBHOSTNAME="HOST.DOMAIN.ORG"
MACHIP="111222333444"
BBSVCNAME="libs"
BBSVCNUM="0"
BBHOSTSVC="HOST.DOMAIN.ORG.libs"
BBHOSTSVCCOMMAS="HOST.DOMAIN.ORG.libs"
BBNUMERIC="0001112223334440"
RECOVERED="0"
DOWNSECS="36320"
DOWNSECSMSG=""

View File

@ -0,0 +1,22 @@
BBCOLORLEVEL="red"
BBALPHAMSG="HOST.DOMAIN.ORG:memory red [251314]
red Tue Aug 21 11:42:42 CEST 2018 - Memory CRITICAL
Memory Used Total Percentage
&green Real/Physical 1017M 1536M 66%
&green Actual/Virtual 692M 1536M 45%
&red Swap/Page 1024M 1024M 100%
See http://localhost/xymon-cgi/svcstatus.sh?HOST=HOST.DOMAIN.ORG&SERVICE=memory
"
ACKCODE="251314"
RCPT="1234567890"
BBHOSTNAME="HOST.DOMAIN.ORG"
MACHIP="111222333444"
BBSVCNAME="memory"
BBSVCNUM="0"
BBHOSTSVC="HOST.DOMAIN.ORG.memory"
BBHOSTSVCCOMMAS="HOST,DOMAIN,ORG.memory"
BBNUMERIC="000111222333444251314"
RECOVERED="0"
DOWNSECS="0"
DOWNSECSMSG=""

View File

@ -0,0 +1,38 @@
BBCOLORLEVEL="red"
BBALPHAMSG="HOST.DOMAIN.ORG:ntpq red [842850]
red Tue Aug 21 11:20:53 2018 - ntpq NOT ok
NTP peers:
remote refid st t when poll reach delay offset jitter
==============================================================================
&clear WWW.XXX.YYY.ZZZ 213.251.53.11 3 u 18 64 1 0.542 -2.836 0.000
&red No system peer entry ("*") found
remote refid st t when poll reach delay offset jitter
==============================================================================
&green *WWW.XXX.YYY.ZZZ 51.15.178.157 3 u 243 1024 377 1.194 102.549 83.035
SyspeerDelay: 1.194
SyspeerOffset: 102.549
SyspeerJitter: 83.035
SyspeerOffset thresholds:
Warning: 100ms
Critical: 2000ms
&yellow SyspeerOffset > 100ms
See http://localhost/xymon-cgi/svcstatus.sh?HOST=HOST.DOMAIN.ORG&SERVICE=ntpq
"
ACKCODE="842850"
RCPT="1234567890"
BBHOSTNAME="HOST.DOMAIN.ORG"
MACHIP="111222333444"
BBSVCNAME="ntpq"
BBSVCNUM="0"
BBHOSTSVC="HOST.DOMAIN.ORG.ntpq"
BBHOSTSVCCOMMAS="HOST,DOMAIN,ORG.ntpq"
BBNUMERIC="000111222333444842850"
RECOVERED="0"
DOWNSECS="1544"
DOWNSECSMSG=""

View File

@ -0,0 +1,31 @@
BBCOLORLEVEL="yellow"
BBALPHAMSG="HOST.DOMAIN.ORG:ports yellow [40450]
yellow Tue Aug 21 11:43:22 CEST 2018 - Ports NOT ok
&yellow SMTP listener (found 0, req. between 1 and 2)
&green SSH logins (found 0, req. at most 10)
&green Bad listeners (found 0, req. at most 0)
Active Internet connections (servers and established)
Proto Recv-Q Send-Q Local Address Foreign Address State
tcp 0 0 0.0.0.0:22 0.0.0.0:* LISTEN
tcp 0 0 111.222.333.444:45250 WWW.XXX.YYY.ZZZ:1984 TIME_WAIT
tcp 0 0 111.222.333.444:54522 444.333.222.111:389 ESTABLISHED
tcp 0 0 111.222.333.444:45244 WWW.XXX.YYY.ZZZ:1984 TIME_WAIT
tcp 0 0 111.222.333.444:45242 WWW.XXX.YYY.ZZZ:1984 TIME_WAIT
tcp6 0 0 :::22 :::* LISTEN
udp 0 0 0.0.0.0:68 0.0.0.0:*
udp6 0 0 :::69 :::*
See http://localhost/xymon-cgi/svcstatus.sh?HOST=HOST.DOMAIN.ORG&SERVICE=ports
"
ACKCODE="40450"
RCPT="1234567890"
BBHOSTNAME="HOST.DOMAIN.ORG"
MACHIP="111222333444"
BBSVCNAME="ports"
BBSVCNUM="0"
BBHOSTSVC="HOST.DOMAIN.ORG.ports"
BBHOSTSVCCOMMAS="HOST,DOMAIN,ORG.ports"
BBNUMERIC="00011122233344440450"
RECOVERED="0"
DOWNSECS="6"
DOWNSECSMSG=""

View File

@ -0,0 +1,71 @@
BBCOLORLEVEL="red"
BBALPHAMSG="HOST.DOMAIN.ORG:procs red [757744]
red Tue Aug 21 13:29:28 CEST 2018 - Processes NOT ok
&green systemd-journald (found 1, req. between 1 and 1)
&green systemd-logind (found 1, req. between 1 and 1)
&green CRON (found 1, req. between 1 and 999)
&green ATD (found 1, req. between 1 and 999)
&green MTA-Stretch (found 1, req. between 1 and 1)
&green SSHD (found 3, req. between 1 and 20)
&green SSSD (found 1, req. between 1 and 1)
&green Fail2Ban (found 1, req. between 1 and 1)
&red tftpd-hpa (found 0, req. between 1 and 1)
&yellow ARPwatch (found 0, req. between 1 and 1)
&red Jenkins (found 0, req. between 1 and 5)
PID PPID USER STARTED S PRI %CPU TIME %MEM RSZ VSZ CMD
1 0 root 11:43:19 S 19 0.0 00:00:00 0.7 7816 69724 /sbin/init
43 1 root 11:43:20 S 19 0.0 00:00:00 2.4 25848 109744 /lib/systemd/systemd-journald
89 1 root 11:43:20 S 19 0.0 00:00:00 0.1 1052 22560 /sbin/dhclient -4 -v -pf /run/dhclient.eth0.pid -lf /var/lib/dhcp/dhclient.eth0.leases -I -df /var/lib/dhcp/dhclient6.eth0.leases eth0
121 1 root 11:43:21 S 19 0.0 00:00:00 0.2 2804 29668 /usr/sbin/cron -f
122 1 root 11:43:21 S 19 0.0 00:00:00 0.2 2756 321836 /usr/sbin/rsyslogd -n
124 1 root 11:43:21 S 19 0.0 00:00:00 0.9 9704 171944 /usr/sbin/sssd -i -f
176 124 root 11:43:21 S 19 0.0 00:00:00 1.7 18244 212360 \_ /usr/lib/x86_64-linux-gnu/sssd/sssd_be --domain ur1 --uid 0 --gid 0 --debug-to-files
191 124 root 11:43:21 S 19 0.0 00:00:00 3.1 33204 168252 \_ /usr/lib/x86_64-linux-gnu/sssd/sssd_nss --uid 0 --gid 0 --debug-to-files
192 124 root 11:43:21 S 19 0.0 00:00:00 0.8 8672 147912 \_ /usr/lib/x86_64-linux-gnu/sssd/sssd_pam --uid 0 --gid 0 --debug-to-files
193 124 root 11:43:21 S 19 0.0 00:00:00 0.7 7364 141356 \_ /usr/lib/x86_64-linux-gnu/sssd/sssd_autofs --uid 0 --gid 0 --debug-to-files
145 1 daemon 11:43:21 S 19 0.0 00:00:00 0.2 2120 27968 /usr/sbin/atd -f -l 9.6 -b 98
147 1 root 11:43:21 S 19 0.0 00:00:00 0.5 6180 71988 /usr/sbin/sshd -D
4810 147 root 13:29:14 S 19 0.0 00:00:00 0.7 7468 134176 \_ sshd: USER [priv]
5023 4810 USER 13:29:14 S 19 0.0 00:00:00 0.3 3952 134176 \_ sshd: USER@pts/2
5024 5023 USER 13:29:14 S 19 0.0 00:00:00 0.5 5608 59296 \_ zsh
5057 5024 USER 13:29:15 S 19 0.0 00:00:00 0.2 2988 19336 \_ tmux
223 1 root 11:43:21 S 19 0.0 00:00:00 0.1 2060 14316 /sbin/agetty -o -p -- \u --noclear --keep-baud tty1 115200,38400,9600 linux
224 1 root 11:43:21 S 19 0.0 00:00:00 0.1 2056 14316 /sbin/agetty -o -p -- \u --noclear --keep-baud console 115200,38400,9600 linux
251 1 root 11:43:22 S 19 0.0 00:00:01 1.5 15800 361140 /usr/bin/python3 /usr/bin/fail2ban-server -s /var/run/fail2ban/fail2ban.sock -p /var/run/fail2ban/fail2ban.pid -x -b
412 1 root 11:43:22 S 19 0.0 00:00:00 0.4 4244 83252 /usr/lib/postfix/sbin/master -w
415 412 postfix 11:43:22 S 19 0.0 00:00:00 0.6 6628 95528 \_ qmgr -l -t unix -u
4453 412 postfix 13:19:59 S 19 0.0 00:00:00 0.6 6532 95480 \_ showq -t unix -u -c
4504 412 postfix 13:23:22 S 19 0.0 00:00:00 0.6 6524 95480 \_ pickup -l -t unix -u -c
416 1 netdata 11:43:22 S 19 0.0 00:02:06 7.1 75324 224012 /usr/sbin/netdata -D
432 416 netdata 11:43:23 R 19 0.0 00:00:16 0.2 2628 28108 \_ /usr/lib/x86_64-linux-gnu/netdata/plugins.d/apps.plugin 1
433 416 netdata 11:43:23 S 19 0.0 00:01:30 4.6 48576 121208 \_ /usr/bin/python /usr/lib/x86_64-linux-gnu/netdata/plugins.d/python.d.plugin 1
4643 416 netdata 13:25:26 S 19 0.0 00:00:00 0.2 2688 9700 \_ bash /usr/lib/x86_64-linux-gnu/netdata/plugins.d/tc-qos-helper.sh 1
660 1 xymon-s+ 11:43:27 S 19 0.0 00:00:00 0.6 6788 68928 /lib/systemd/systemd --user
662 660 xymon-s+ 11:43:27 S 19 0.0 00:00:00 0.1 1904 123912 \_ (sd-pam)
881 1 message+ 11:43:27 S 19 0.0 00:00:00 0.4 4384 67636 /usr/bin/dbus-daemon --system --address=systemd: --nofork --nopidfile --systemd-activation
883 1 root 11:43:27 S 19 0.0 00:00:00 0.5 5552 73268 /lib/systemd/systemd-logind
1377 1 USER 11:49:11 S 19 0.0 00:00:00 0.6 6716 68932 /lib/systemd/systemd --user
1379 1377 USER 11:49:11 S 19 0.0 00:00:00 0.1 2040 132260 \_ (sd-pam)
4768 1 xymon 13:28:48 S 19 0.0 00:00:00 0.0 756 4292 sh -c vmstat 300 2 1&gt;/var/lib/xymon/tmp/xymon_vmstat.HOST.DOMAIN.ORG.4719 2&gt;&amp;1; mv /var/lib/xymon/tmp/xymon_vmstat.HOST.DOMAIN.ORG.4719 /var/lib/xymon/tmp/xymon_vmstat.HOST.DOMAIN.ORG
4770 4768 xymon 13:28:48 S 19 0.0 00:00:00 0.1 1400 24900 \_ vmstat 300 2
5059 1 USER 13:29:15 S 19 0.0 00:00:00 0.3 3364 28044 tmux
5067 5059 USER 13:29:15 S 19 0.0 00:00:00 0.6 6396 62052 \_ -zsh
5187 1 xymon 13:29:27 S 19 0.0 00:00:00 0.1 1576 4272 /usr/lib/xymon/client/bin/xymonlaunch --config=/etc/xymon/clientlaunch.cfg --log=/var/log/xymon/clientlaunch.log --pidfile=/var/run/xymon/clientlaunch.pid
5191 5187 xymon 13:29:27 S 19 0.0 00:00:00 0.1 1572 4292 \_ /bin/sh /usr/lib/xymon/client/bin/xymonclient.sh
5214 5191 xymon 13:29:27 S 19 0.0 00:00:00 0.1 1600 4292 | \_ /bin/sh /usr/lib/xymon/client/bin/xymonclient-linux.sh
5256 5214 xymon 13:29:28 R 19 0.0 00:00:00 0.2 2804 44404 | \_ ps -Aww f -o pid,ppid,user,start,state,pri,pcpu,time:12,pmem,rsz:10,vsz:10,cmd
5192 5187 xymon 13:29:27 S 19 0.0 00:00:00 0.4 5164 18300 \_ /usr/bin/perl -w /usr/lib/xymon/client/ext/apt
5231 5192 xymon 13:29:28 R 19 0.0 00:00:00 3.9 41128 70068 \_ apt-cache policy acl adduser apt apt-listchanges apt-transport-https apt-utils aptitude aptitude-common arpwatch at base-files base-passwd bash bash-completion bind9-host binutils bsd-mailx bsdmainutils bsdutils bzip2 ca-certificates coreutils cpio cpp cpp-6 cracklib-runtime cron curl dash dbus dctrl-tools debconf debconf-i18n debian-archive-keyring debian-faq debian-goodies debianutils debsecan debsums dh-python diffutils dirmngr distro-info-data dmidecode dmsetup doc-debian dpkg e2fslibs:amd64 e2fsprogs ed etckeeper fail2ban file findutils fontconfig-config fonts-dejavu-core fonts-font-awesome fping gcc-6-base:amd64 gettext-base git git-man gnupg gnupg-agent gnutls-bin gpgv grep groff-base gzip hobbit-plugins hostname htop iftop ifupdown init init-system-helpers DOMAINoute2 iputils-ping isc-dhcp-client isc-dhcp-common kmod krb5-locales less libacl1:amd64 libapparmor1:amd64 libapt-inst2.0:amd64 libapt-pkg5.0:amd64 libasprintf0v5:amd64 libassuan0:amd64 libattr1:amd64 libaudit-common libaudit1:amd64 libavahi-client3:amd64 libavahi-common-data:amd64 libavahi-common3:amd64 libbasicobjects0:amd64 libbind9-140:amd64 libblkid1:amd64 libboost-filesystem1.62.0:amd64 libboost-iostreams1.62.0:amd64 libboost-system1.62.0:amd64 libbsd0:amd64 libbz2-1.0:amd64 libc-ares2:amd64 libc-bin libc-l10n libc6:amd64 libcap-ng0:amd64 libcap2-bin libcap2:amd64 libclass-isa-perl libcollection4:amd64 libcomerr2:amd64 libcrack2:amd64 libcryptsetup4:amd64 libcups2:amd64 libcurl3-gnutls:amd64 libcurl3:amd64 libcwidget3v5:amd64 libdb5.3:amd64 libdbus-1-3:amd64 libdebconfclient0:amd64 libdevmapper1.02.1:amd64 libdhash1:amd64 libdns-export162 libdns162:amd64 libdpkg-perl libdrm2:amd64 libedit2:amd64 libelf1:amd64 liberror-perl libes"
ACKCODE="757744"
RCPT="1234567890"
BBHOSTNAME="HOST.DOMAIN.ORG"
MACHIP="111222333444"
BBSVCNAME="procs"
BBSVCNUM="300"
BBHOSTSVC="HOST.DOMAIN.ORG.procs"
BBHOSTSVCCOMMAS="HOST,DOMAIN,ORG.procs"
BBNUMERIC="300111222333444757744"
RECOVERED="0"
DOWNSECS="0"
DOWNSECSMSG=""

65
xymon/tar.client.logfiles.sh Executable file
View File

@ -0,0 +1,65 @@
#!/bin/sh
# Purpose:
# Create an XZ archive of all files between 2 dates.
# Then remove these files
# Call this script from an /var/lib/xymon/hostdata subdirectory or
# /var/lib/xymon/histlogs subdirectory.
# Vars {{{
## Enable (0) or disable (1) debug
debug=0
## Colors {{{
c_redb='\033[1;31m'
c_magentab='\033[1;35m'
c_reset='\033[0m'
## }}}
## Manage files of year
date_year="2019"
## Compress files between these dates
date_start="${date_year}-01-01 00:00:01"
date_end="${date_year}-12-31 23:59:59"
## Best XZ compression level
xz_compression_lvl="-9"
## Fatest XZ compression level
#xz_compression_lvl="-0"
## Get current directory name
current_dir=${PWD##*/}
## Count the number of files
match_files=$(find . -type f -newermt "${date_start}" -not -newermt "${date_end}" -not -iname "*.tar*" | wc -l)
## Archive name
tar_file_name="${date_year}.${current_dir}${xz_compression_lvl}.tar.xz"
# }}}
# If archive already exists
if [ -s "${tar_file_name}" ]; then
printf "${c_redb}%-6b${c_reset}\n" "ERROR: ${tar_file_name} already exists (also ${match_files} files match the expected pattern). Please manage this directory manually or remove the archive or files then restart."
exit 1
fi
# If some files match
if [ ! ${match_files} -eq "0" ]; then
[ "${debug}" -eq "0" ] && printf "${c_magentab}%-6b${c_reset}\n" "DEBUG: Create an archive for ${current_dir} files between ${date_start} and ${date_end} (${match_files} files) using XZ's compression level: ${xz_compression_lvl}."
## Get the list of files between the 2 dates and ignore tar files
find . -type f -newermt "${date_start}" -not -newermt "${date_end}" -not -iname "*.tar*" -print0 | tar cJf "${tar_file_name}" --null -T -
## Check previous return code and if the archive exists with size > 0
if [ "${?}" -eq "0" ] && [ -s "${tar_file_name}" ]; then
[ "${debug}" -eq "0" ] && printf "${c_magentab}%-6b${c_reset}\n" "DEBUG: ${tar_file_name} successfully created, the files can be deleted."
find . -type f -newermt "${date_start}" -not -newermt "${date_end}" -not -iname "*.tar*" -delete
fi
else
[ "${debug}" -eq "0" ] && printf "${c_magentab}%-6b${c_reset}\n" "DEBUG: Skip ${current_dir}, no files found between ${date_start} and ${date_end}."
fi
exit 0

59
xymon/xymon.alert.sh Executable file
View File

@ -0,0 +1,59 @@
#!/bin/sh
# Purpose {{{
## If Xymon server says that a probe is in error on a remote host, try to call the appropriate script.
## For debugging messages, you can check xymon's logs (/var/log/xymon/alert.log)
## How-to use: {{{
### Define an alert in Xymon configuration file (/etc/xymon/alerts.cfg)
#HOST=HOST.DOMAIN.ORG
# SCRIPT /PATH/TO/SCRIPT/xymon.alert.sh 1234567890 FORMAT=SCRIPT DURATION<20
## }}}
# }}}
# Vars {{{
DEBUG=1
script_path="$(dirname -- ${0})"
script_apt="${script_path}/xymon.apt.alert.sh"
script_files="${script_path}/xymon.files.alert.sh"
script_libs="${script_path}/xymon.libs.alert.sh"
script_procs="${script_path}/xymon.procs.alert.sh"
# }}}
[ "${DEBUG}" -eq "0" ] && printf '\e[1;35m%-6s\e[m\n' "DEBUG: ${BBHOSTNAME}${BBSVCNAME} is in error."
# Match probe name with the script {{{
case "${BBSVCNAME}" in
'apt' )
script_to_run="${script_apt}"
;;
'files' )
script_to_run="${script_files}"
;;
'libs' )
script_to_run="${script_libs}"
;;
'procs' )
script_to_run="${script_procs}"
;;
# default
* )
script_to_run="NOT.MANAGED"
;;
esac
# }}}
# Call the next script if managed {{{
if [ "${script_to_run}" != "NOT.MANAGED" ]; then
# Export vars {{{
[ "${DEBUG}" -eq "0" ] && printf '\e[1;35m%-6s\e[m\n' "DEBUG: ${BBHOSTNAME} — Export vars for ${script_to_run}"
export BBALPHAMSG
export BBHOSTNAME
export BBSVCNAME
# }}}
[ "${DEBUG}" -eq "0" ] && printf '\e[1;35m%-6s\e[m\n' "DEBUG: ${BBHOSTNAME} — Run ${script_to_run} script."
"${script_to_run}"
fi
# }}}
exit 0

62
xymon/xymon.apt.alert.sh Executable file
View File

@ -0,0 +1,62 @@
#!/bin/sh
# Purpose {{{
## If Xymon server says that the last apt update is too old, try to run a new one.
## 1. Create a ssh keyring for xymon user {{{
# sudo mkdir -p -- /var/lib/xymon/.ssh/
# sudo ssh-keygen -f /var/lib/xymon/.ssh/id_rsa -N '' -q
# sudo chown -R xymon:xymon /var/lib/xymon/.ssh/
## }}}
## 2. Remote user {{{
# Ensure to have the ${REMOTE_SSH_USER} available on remote hosts and allowed to connect with SSH.
# Restrict the SSH access to a single SSH key from the Xymon server IP (~${REMOTE_SSH_USER}/.ssh/authorized_keys):
## from="IP.SRV.XYM.ON" ssh-rsa AAAAA…
# Allow sudo commands to restart services and run apt update (/etc/sudoers.d/xymon-ssh):
## xymon-ssh ALL=(root:root) NOPASSWD: /usr/bin/apt update
## xymon-ssh ALL=(root:root) NOPASSWD: /bin/systemctl restart *
## }}}
# }}}
# Vars {{{
DEBUG=1
LOCAL_SSH_USER="xymon"
REMOTE_SSH_USER="xymon-ssh"
temp_dir=$(mktemp -d -t xymon-apt-alert-XXXXXX.tmp)
debug_stdout="${temp_dir}/debug.stdout"
debug_stderr="${temp_dir}/debug.stderr"
# }}}
# Create log files
touch "${debug_stdout}" "${debug_stderr}"
# Manage only apt probe {{{
if [ "${BBSVCNAME}" = "apt" ]; then
[ "${DEBUG}" -eq "0" ] && printf '\e[1;35m%-6s\e[m\n' "DEBUG: ${BBHOSTNAME}${BBSVCNAME} error" >> "${debug_stdout}"
else
[ "${DEBUG}" -eq "0" ] && printf '\e[1;35m%-6s\e[m\n' "DEBUG: ${BBHOSTNAME}${BBSVCNAME} probe is not managed." >> "${debug_stderr}"
[ "${DEBUG}" -eq "0" ] || rm -rf -- "${temp_dir}"
exit 0
fi
# }}}
# Check if repos need to be updated {{{
if echo "${BBALPHAMSG}" | grep -qE "\\&(red|yellow) Last apt update.*ago$" ; then
[ "${DEBUG}" -eq "0" ] && printf '\e[1;35m%-6s\e[m\n' "DEBUG: Test APT repos — APT repos need to be updated." >> "${debug_stdout}"
[ "${DEBUG}" -eq "0" ] && printf '\e[1;35m%-6s\e[m\n' "DEBUG: Test APT repos — ssh -n -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null ${REMOTE_SSH_USER}@${BBHOSTNAME} sudo apt update" >> "${debug_stdout}"
ssh -n -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null "${REMOTE_SSH_USER}"@"${BBHOSTNAME}" "sudo apt update" >> "${debug_stdout}" 2>> "${debug_stderr}"
# Also restart xymon-client service {{{
[ "${DEBUG}" -eq "0" ] && printf '\e[1;35m%-6s\e[m\n' "DEBUG: Test APT repos — xymon-client also need to be restarted." >> "${debug_stdout}"
[ "${DEBUG}" -eq "0" ] && printf '\e[1;35m%-6s\e[m\n' "DEBUG: Test APT repos — ssh -n -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null ${REMOTE_SSH_USER}@${BBHOSTNAME} sudo systemctl restart xymon-client.service" >> "${debug_stdout}"
ssh -n -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null "${REMOTE_SSH_USER}"@"${BBHOSTNAME}" "sudo systemctl restart xymon-client.service" >> "${debug_stdout}" 2>> "${debug_stderr}"
# }}}
fi
# }}}
# Remove empty error file
[ -s "${debug_stderr}" ] || rm -f "${debug_stderr}"
# Remove temp_dir if DEBUG is disable
[ "${DEBUG}" -eq "0" ] || rm -rf -- "${temp_dir}"
exit 0

102
xymon/xymon.files.alert.sh Executable file
View File

@ -0,0 +1,102 @@
#!/bin/sh
# Purpose {{{
## If Xymon server says that a file is in error on a remote host, try to restart the related service.
## 1. Create a ssh keyring for xymon user {{{
# sudo mkdir -p -- /var/lib/xymon/.ssh/
# sudo ssh-keygen -f /var/lib/xymon/.ssh/id_rsa -N '' -q
# sudo chown -R xymon:xymon /var/lib/xymon/.ssh/
## }}}
## 2. Remote user {{{
# Ensure to have the ${REMOTE_SSH_USER} available on remote hosts and allowed to connect with SSH.
# Restrict the SSH access to a single SSH key from the Xymon server IP (~${REMOTE_SSH_USER}/.ssh/authorized_keys):
## from="IP.SRV.XYM.ON" ssh-rsa AAAAA…
# Allow sudo commands to restart services (/etc/sudoers.d/xymon-ssh):
## xymon-ssh ALL=(root:root) NOPASSWD: /bin/systemctl restart *
## }}}
# }}}
# Vars {{{
DEBUG=1
LOCAL_SSH_USER="xymon"
REMOTE_SSH_USER="xymon-ssh"
temp_dir=$(mktemp -d -t xymon-files-alert-XXXXXX.tmp)
debug_stdout="${temp_dir}/debug.stdout"
debug_stderr="${temp_dir}/debug.stderr"
file_list="${temp_dir}/services.error.list"
# }}}
# Create log files
touch "${debug_stdout}" "${debug_stderr}"
# Manage only files probe {{{
if [ "${BBSVCNAME}" = "files" ]; then
[ "${DEBUG}" -eq "0" ] && printf '\e[1;35m%-6s\e[m\n' "DEBUG: ${BBHOSTNAME}${BBSVCNAME} error" >> "${debug_stdout}"
else
[ "${DEBUG}" -eq "0" ] && printf '\e[1;35m%-6s\e[m\n' "DEBUG: ${BBHOSTNAME}${BBSVCNAME} probe is not managed." >> "${debug_stderr}"
[ "${DEBUG}" -eq "0" ] || rm -rf -- "${temp_dir}"
exit 0
fi
# }}}
# Check if a file exceeds it's modification time {{{
if echo "${BBALPHAMSG}" | grep -qE "File was modified.*ago - should be.*" ; then
[ "${DEBUG}" -eq "0" ] && printf '\e[1;35m%-6s\e[m\n' "DEBUG: Test file — Some files exceeds their modification time." >> "${debug_stdout}"
# First restart Rsyslog service {{{
[ "${DEBUG}" -eq "0" ] && printf '\e[1;35m%-6s\e[m\n' "DEBUG: Test service — First restart rsyslog service." >> "${debug_stdout}"
[ "${DEBUG}" -eq "0" ] && printf '\e[1;35m%-6s\e[m\n' "DEBUG: Test service — ssh -n -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null ${REMOTE_SSH_USER}@${BBHOSTNAME} sudo systemctl restart rsyslog.service" >> "${debug_stdout}"
ssh -n -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null "${REMOTE_SSH_USER}"@"${BBHOSTNAME}" "sudo systemctl restart rsyslog.service" >> "${debug_stdout}" 2>> "${debug_stderr}"
# }}}
## Get the list of files path
## TODO: Set a pattern for both yellow and red colors
echo "${BBALPHAMSG}" | sed -n 's;^\&yellow <a.*>\(.*\)</a>;\1;p' > "${file_list}"
while IFS= read -r file_path; do
[ "${DEBUG}" -eq "0" ] && printf '\e[1;35m%-6s\e[m\n' "DEBUG: while file_path loop — ${file_path} exceeds it's modification time." >> "${debug_stdout}"
# Match files path and services name {{{
case "${file_path}" in
# Cron
'/var/log/cron.log' )
service_name="cron"
;;
# default
* )
service_name="NOT.MANAGED"
;;
esac
# }}}
# Restart service if needed {{{
if [ "${service_name}" != "NOT.MANAGED" ] && [ "${service_name}" != "${previous_service_name}" ]; then
[ "${DEBUG}" -eq "0" ] && printf '\e[1;35m%-6s\e[m\n' "DEBUG: while file_path loop — ${service_name} need to be restarted." >> "${debug_stdout}"
[ "${DEBUG}" -eq "0" ] && printf '\e[1;35m%-6s\e[m\n' "DEBUG: while file_path loop — ssh -n -o StrictHostKeyChecking=no ${REMOTE_SSH_USER}@${BBHOSTNAME} sudo systemctl restart ${service_name}.service" >> "${debug_stdout}"
ssh -n -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null "${REMOTE_SSH_USER}"@"${BBHOSTNAME}" "sudo systemctl restart ${service_name}.service" >> "${debug_stdout}" 2>> "${debug_stderr}"
previous_service_name="${service_name}"
else
if [ "${service_name}" = "NOT.MANAGED" ]; then
[ "${DEBUG}" -eq "0" ] && printf '\e[1;35m%-6s\e[m\n' "DEBUG: while file_path loop — service for ${file_path} is not managed." >> "${debug_stdout}"
else
[ "${DEBUG}" -eq "0" ] && printf '\e[1;35m%-6s\e[m\n' "DEBUG: while file_path loop — ${service_name} was already restarted." >> "${debug_stdout}"
fi
fi
# }}}
done < "${file_list}"
# Also restart xymon-client service {{{
[ "${DEBUG}" -eq "0" ] && printf '\e[1;35m%-6s\e[m\n' "DEBUG: Test service — xymon-client also need to be restarted." >> "${debug_stdout}"
[ "${DEBUG}" -eq "0" ] && printf '\e[1;35m%-6s\e[m\n' "DEBUG: Test service — ssh -n -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null ${REMOTE_SSH_USER}@${BBHOSTNAME} sudo systemctl restart xymon-client.service" >> "${debug_stdout}"
ssh -n -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null "${REMOTE_SSH_USER}"@"${BBHOSTNAME}" "sudo systemctl restart xymon-client.service" >> "${debug_stdout}" 2>> "${debug_stderr}"
# }}}
else
[ "${DEBUG}" -eq "0" ] && printf '\e[1;35m%-6s\e[m\n' "DEBUG: Test file — All files seems up to date." >> "${debug_stdout}"
fi
# }}}
# Remove empty error file
[ -s "${debug_stderr}" ] || rm -f "${debug_stderr}"
# Remove temp_dir if DEBUG is disable
[ "${DEBUG}" -eq "0" ] || rm -rf -- "${temp_dir}"
exit 0

159
xymon/xymon.libs.alert.sh Executable file
View File

@ -0,0 +1,159 @@
#!/bin/sh
# Purpose {{{
## If Xymon server says that a service is in error on a remote host, try to restart this service.
## 1. Create a ssh keyring for xymon user {{{
# sudo mkdir -p -- /var/lib/xymon/.ssh/
# sudo ssh-keygen -f /var/lib/xymon/.ssh/id_rsa -N '' -q
# sudo chown -R xymon:xymon /var/lib/xymon/.ssh/
## }}}
## 2. Remote user {{{
# Ensure to have the ${REMOTE_SSH_USER} available on remote hosts and allowed to connect with SSH.
# Restrict the SSH access to a single SSH key from the Xymon server IP (~${REMOTE_SSH_USER}/.ssh/authorized_keys):
## from="IP.SRV.XYM.ON" ssh-rsa AAAAA…
# Allow sudo commands to restart services (/etc/sudoers.d/xymon-ssh):
## xymon-ssh ALL=(root:root) NOPASSWD: /bin/systemctl restart *
## }}}
# }}}
# Vars {{{
DEBUG=1
LOCAL_SSH_USER="xymon"
REMOTE_SSH_USER="xymon-ssh"
temp_dir=$(mktemp -d -t xymon-libs-alert-XXXXXX.tmp)
debug_stdout="${temp_dir}/debug.stdout"
debug_stderr="${temp_dir}/debug.stderr"
service_list="${temp_dir}/services.error.list"
# }}}
# Create log files
touch "${debug_stdout}" "${debug_stderr}"
# Manage only libs probe {{{
if [ "${BBSVCNAME}" = "libs" ]; then
[ "${DEBUG}" -eq "0" ] && printf '\e[1;35m%-6s\e[m\n' "DEBUG: ${BBHOSTNAME}${BBSVCNAME} error" >> "${debug_stdout}"
else
[ "${DEBUG}" -eq "0" ] && printf '\e[1;35m%-6s\e[m\n' "DEBUG: ${BBHOSTNAME}${BBSVCNAME} probe is not managed." >> "${debug_stderr}"
[ "${DEBUG}" -eq "0" ] || rm -rf -- "${temp_dir}"
exit 0
fi
# }}}
# Check if host need to reboot {{{
if echo "${BBALPHAMSG}" | grep -qE "\\&yellow Machine should be reboot.*" ; then
[ "${DEBUG}" -eq "0" ] && printf '\e[1;35m%-6s\e[m\n' "DEBUG: Test kernel — The host need to be rebooted." >> "${debug_stdout}"
fi
# }}}
# Check if a service need to restart
if echo "${BBALPHAMSG}" | grep -qE "\\&yellow The following processes.*" ; then
[ "${DEBUG}" -eq "0" ] && printf '\e[1;35m%-6s\e[m\n' "DEBUG: Test service — Some services need to be restarted." >> "${debug_stdout}"
# Get the list of binaries path
echo "${BBALPHAMSG}" | sed -n 's/^ \(\/.*\) (.*)/\1/p' > "${service_list}"
while IFS= read -r bin_path; do
[ "${DEBUG}" -eq "0" ] && printf '\e[1;35m%-6s\e[m\n' "DEBUG: while bin_path loop — ${bin_path} use old libs." >> "${debug_stdout}"
# Match binaries path and services name {{{
case "${bin_path}" in
# Apache2
'/usr/sbin/apache2' )
service_name="apache2"
;;
# Arpwatch
'/usr/sbin/arpwatch' )
service_name="arpwatch"
;;
# Dbus
'/usr/bin/dbus-daemon' )
service_name="dbus"
;;
# lvmetad LVM metadata cache daemon
'/sbin/lvmetad' )
service_name="lvm2-lvmetad"
;;
# Netdata
'/usr/sbin/netdata' )
service_name="netdata"
;;
# blkmapd pNFS block layout mapping daemon
'/usr/sbin/blkmapd' )
service_name="nfs-utils"
;;
# Mumble-server
'/usr/sbin/murmurd' )
service_name="mumble-server"
;;
# Nginx
'/usr/sbin/nginx' )
service_name="nginx"
;;
# Ntp
'/usr/sbin/ntpd' )
service_name="ntp"
;;
# Nslcd
'/usr/sbin/nslcd' )
service_name="nslcd"
;;
# PHP-FPM 7.0
'/usr/sbin/php-fpm7.0' )
service_name="php7.0-fpm"
;;
# PHP-FPM 7.3
'/usr/sbin/php-fpm7.3' )
service_name="php7.3-fpm"
;;
# Postfix
'/usr/lib/postfix/sbin/pickup' | '/usr/lib/postfix/qmgr' | '/usr/lib/postfix/sbin/tlsmgr' | '/usr/lib/postfix/sbin/qmgr' )
service_name="postfix"
;;
# Rdnssd
'/sbin/rdnssd' )
service_name="rdnssd"
;;
# Systemd-journald
'/lib/systemd/systemd-journald' )
service_name="systemd-journald"
;;
# Systemd-logind
'/lib/systemd/systemd-logind' )
service_name="systemd-logind"
;;
# default
* )
service_name="NOT.MANAGED"
;;
esac
# }}}
# Restart service if needed {{{
if [ "${service_name}" != "NOT.MANAGED" ] && [ "${service_name}" != "${previous_service_name}" ]; then
[ "${DEBUG}" -eq "0" ] && printf '\e[1;35m%-6s\e[m\n' "DEBUG: while bin_path loop — ${service_name} need to be restarted." >> "${debug_stdout}"
[ "${DEBUG}" -eq "0" ] && printf '\e[1;35m%-6s\e[m\n' "DEBUG: while bin_path loop — ssh -n -o StrictHostKeyChecking=no ${REMOTE_SSH_USER}@${BBHOSTNAME} sudo systemctl restart ${service_name}.service" >> "${debug_stdout}"
ssh -n -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null "${REMOTE_SSH_USER}"@"${BBHOSTNAME}" "sudo systemctl restart ${service_name}.service" >> "${debug_stdout}" 2>> "${debug_stderr}"
previous_service_name="${service_name}"
else
if [ "${service_name}" = "NOT.MANAGED" ]; then
[ "${DEBUG}" -eq "0" ] && printf '\e[1;35m%-6s\e[m\n' "DEBUG: while bin_path loop — service for ${bin_path} is not managed." >> "${debug_stdout}"
else
[ "${DEBUG}" -eq "0" ] && printf '\e[1;35m%-6s\e[m\n' "DEBUG: while bin_path loop — ${service_name} was already restarted." >> "${debug_stdout}"
fi
fi
# }}}
done < "${service_list}"
# Also restart xymon-client service {{{
[ "${DEBUG}" -eq "0" ] && printf '\e[1;35m%-6s\e[m\n' "DEBUG: Test service — xymon-client also need to be restarted." >> "${debug_stdout}"
[ "${DEBUG}" -eq "0" ] && printf '\e[1;35m%-6s\e[m\n' "DEBUG: Test service — ssh -n -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null ${REMOTE_SSH_USER}@${BBHOSTNAME} sudo systemctl restart xymon-client.service" >> "${debug_stdout}"
ssh -n -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null "${REMOTE_SSH_USER}"@"${BBHOSTNAME}" "sudo systemctl restart xymon-client.service" >> "${debug_stdout}" 2>> "${debug_stderr}"
# }}}
fi
# Remove empty error file
[ -s "${debug_stderr}" ] || rm -f "${debug_stderr}"
# Remove temp_dir if DEBUG is disable
[ "${DEBUG}" -eq "0" ] || rm -rf -- "${temp_dir}"
exit 0

107
xymon/xymon.procs.alert.sh Executable file
View File

@ -0,0 +1,107 @@
#!/bin/sh
# Purpose {{{
## If Xymon server says that a service is in error on a remote host, try to
## restart this service.
## 1. Create a ssh keyring for xymon user {{{
# sudo mkdir -p -- /var/lib/xymon/.ssh/
# sudo ssh-keygen -f /var/lib/xymon/.ssh/id_rsa -N '' -q
# sudo chown -R xymon:xymon /var/lib/xymon/.ssh/
## }}}
## 2. Remote user {{{
# Ensure to have the ${REMOTE_SSH_USER} available on remote hosts and allowed
# to connect with SSH.
# Restrict the SSH access to a single SSH key from the Xymon server IP
# (~${REMOTE_SSH_USER}/.ssh/authorized_keys):
## from="IP.SRV.XYM.ON" ssh-rsa AAAAA…
# Allow sudo commands to restart services (/etc/sudoers.d/xymon-ssh):
## xymon-ssh ALL=(root:root) NOPASSWD: /bin/systemctl restart *
## }}}
## 3. Xymon Configuration {{{
# PROC monitoring need to display the real service name in it's description:
## PROC %^/sbin/rpcbind MIN=1 MAX=1 COLOR=red "TEXT=rpcbind"
# You can add more information about this proc if you an underscore "_":
## PROC %^/usr/sbin/rpc.idmapd MIN=1 MAX=1 COLOR=red "TEXT=NFS-server_rpc.idmapd"
## This way, the script will only take the text before the underscore "_" as the
## service name to be restarted.
# Don't add whitespaces in the description of a process.
## }}}
# }}}
# Vars {{{
DEBUG=1
REMOTE_SSH_USER="xymon-ssh"
temp_dir=$(mktemp -d -t xymon-procs-alert-XXXXXX.tmp)
debug_stdout="${temp_dir}/debug.stdout"
debug_stderr="${temp_dir}/debug.stderr"
service_list="${temp_dir}/services.error.list"
# }}}
# Create log files
touch "${debug_stdout}" "${debug_stderr}"
# Manage only procs probe {{{
if [ "${BBSVCNAME}" = "procs" ]; then
[ "${DEBUG}" -eq "0" ] && printf '\e[1;35m%-6s\e[m\n' "DEBUG: ${BBHOSTNAME}${BBSVCNAME} error" >> "${debug_stdout}"
else
[ "${DEBUG}" -eq "0" ] && printf '\e[1;35m%-6s\e[m\n' "DEBUG: ${BBHOSTNAME}${BBSVCNAME} probe is not managed." >> "${debug_stderr}"
[ "${DEBUG}" -eq "0" ] || rm -rf -- "${temp_dir}"
exit 0
fi
# }}}
# Get the list of processes with an error
echo "${BBALPHAMSG}" | grep -E "&(red|yellow)" | cut -d" " -f2- | tr '[:upper:]' '[:lower:]' > "${service_list}"
# If any error on a process
if [ -s "${service_list}" ]; then
[ "${DEBUG}" -eq "0" ] && printf '\e[1;35m%-6s\e[m\n' "DEBUG: process list — Some processes seems to be in error." >> "${debug_stdout}"
while IFS= read -r line; do
## Pattern "req. between" {{{
if echo "${line}" | grep -q -E -- ".* \\(found .*, req. between .* and .*\\)" ; then
[ "${DEBUG}" -eq "0" ] && printf '\e[1;35m%-6s\e[m\n' "DEBUG: while process loop — Pattern \"req. between\"." >> "${debug_stdout}"
service_name="$(echo "${line}" | cut -d" " -f1 | sed 's/_.*//')"
process_found="$(echo "${line}" | cut -d" " -f3 | tr -d ',')"
process_min="$(echo "${line}" | cut -d" " -f6)"
process_max="$(echo "${line}" | cut -d" " -f8 | tr -d ')')"
fi
## }}}
## Pattern "req. .* or more" {{{
if echo "${line}" | grep -q -E -- ".* \\(found .*, req. .* or more\\)" ; then
[ "${DEBUG}" -eq "0" ] && printf '\e[1;35m%-6s\e[m\n' "DEBUG: while process loop — Pattern \"req. .* or more\"." >> "${debug_stdout}"
service_name="$(echo "${line}" | cut -d" " -f1 | sed 's/_.*//')"
process_found="$(echo "${line}" | cut -d" " -f3 | tr -d ',')"
process_min="$(echo "${line}" | cut -d" " -f5)"
process_max="nolimit"
fi
## }}}
[ "${DEBUG}" -eq "0" ] && printf '\e[1;35m%-6s\e[m\n' "DEBUG: while process loop — Found ${process_found} process(es) for ${service_name} service and require between ${process_min} and ${process_max}." >> "${debug_stdout}"
# Restart service if needed {{{
if [ "${process_found}" ] && [ "${process_min}" ] && [ "${process_found}" -lt "${process_min}" ]; then
[ "${DEBUG}" -eq "0" ] && printf '\e[1;35m%-6s\e[m\n' "DEBUG: while process loop — ${service_name} need to be restarted." >> "${debug_stdout}"
[ "${DEBUG}" -eq "0" ] && printf '\e[1;35m%-6s\e[m\n' "DEBUG: while process loop — ssh -n -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null ${REMOTE_SSH_USER}@${BBHOSTNAME} sudo systemctl restart ${service_name}.service" >> "${debug_stdout}"
ssh -n -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null "${REMOTE_SSH_USER}"@"${BBHOSTNAME}" "sudo systemctl restart ${service_name}.service" >> "${debug_stdout}" 2>> "${debug_stderr}"
else
[ "${DEBUG}" -eq "0" ] && printf '\e[1;35m%-6s\e[m\n' "DEBUG: while process loop — ${service_name} service is not managed." >> "${debug_stdout}"
fi
# }}}
done < "${service_list}"
# Also restart xymon-client service {{{
[ "${DEBUG}" -eq "0" ] && printf '\e[1;35m%-6s\e[m\n' "DEBUG: process list — xymon-client also need to be restarted." >> "${debug_stdout}"
[ "${DEBUG}" -eq "0" ] && printf '\e[1;35m%-6s\e[m\n' "DEBUG: process list — ssh -n -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null ${REMOTE_SSH_USER}@${BBHOSTNAME} sudo systemctl restart xymon-client.service" >> "${debug_stdout}"
ssh -n -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null "${REMOTE_SSH_USER}"@"${BBHOSTNAME}" "sudo systemctl restart xymon-client.service" >> "${debug_stdout}" 2>> "${debug_stderr}"
# }}}
else
[ "${DEBUG}" -eq "0" ] && printf '\e[1;35m%-6s\e[m\n' "DEBUG: process list — No error on any process." >> "${debug_stdout}"
fi
# Remove empty error file
[ -s "${debug_stderr}" ] || rm -f "${debug_stderr}"
# Remove temp_dir if DEBUG is disable
[ "${DEBUG}" -eq "0" ] || rm -rf -- "${temp_dir}"
exit 0