New path for Xymon
This commit is contained in:
parent
20581a9fa0
commit
eed91835a4
23
xymon/get.xymon.alert.vars.sh
Executable file
23
xymon/get.xymon.alert.vars.sh
Executable file
@ -0,0 +1,23 @@
|
|||||||
|
#!/bin/sh
|
||||||
|
|
||||||
|
log_file="/tmp/xymon.alert.${BBHOSTSVC}.vars"
|
||||||
|
|
||||||
|
# Print variables for an alert on a specific host
|
||||||
|
rm -f -- "${log_file}"
|
||||||
|
touch "${log_file}"
|
||||||
|
printf '%b' "BBCOLORLEVEL=\"${BBCOLORLEVEL}\"\\n" >> "${log_file}"
|
||||||
|
printf '%b' "BBALPHAMSG=\"${BBALPHAMSG}\"\\n" >> "${log_file}"
|
||||||
|
printf '%b' "ACKCODE=\"${ACKCODE}\"\\n" >> "${log_file}"
|
||||||
|
printf '%b' "RCPT=\"${RCPT}\"\\n" >> "${log_file}"
|
||||||
|
printf '%b' "BBHOSTNAME=\"${BBHOSTNAME}\"\\n" >> "${log_file}"
|
||||||
|
printf '%b' "MACHIP=\"${MACHIP}\"\\n" >> "${log_file}"
|
||||||
|
printf '%b' "BBSVCNAME=\"${BBSVCNAME}\"\\n" >> "${log_file}"
|
||||||
|
printf '%b' "BBSVCNUM=\"${BBSVCNUM}\"\\n" >> "${log_file}"
|
||||||
|
printf '%b' "BBHOSTSVC=\"${BBHOSTSVC}\"\\n" >> "${log_file}"
|
||||||
|
printf '%b' "BBHOSTSVCCOMMAS=\"${BBHOSTSVCCOMMAS}\"\\n" >> "${log_file}"
|
||||||
|
printf '%b' "BBNUMERIC=\"${BBNUMERIC}\"\\n" >> "${log_file}"
|
||||||
|
printf '%b' "RECOVERED=\"${RECOVERED}\"\\n" >> "${log_file}"
|
||||||
|
printf '%b' "DOWNSECS=\"${DOWNSECS}\"\\n" >> "${log_file}"
|
||||||
|
printf '%b' "DOWNSECSMSG=\"${DOWNSECSMSG}\"\\n" >> "${log_file}"
|
||||||
|
|
||||||
|
exit 0
|
8
xymon/plugins/client/ext/dcheck
Normal file
8
xymon/plugins/client/ext/dcheck
Normal file
@ -0,0 +1,8 @@
|
|||||||
|
3&green /dev/sda auto
|
||||||
|
3&green /dev/sdb auto
|
||||||
|
3&green /dev/sdc auto
|
||||||
|
3&green /dev/sdd auto
|
||||||
|
3&green /dev/sde auto
|
||||||
|
4&clear /dev/sdf unsupported
|
||||||
|
3&green /dev/bus/0 megaraid,12
|
||||||
|
3&green /dev/bus/0 megaraid,13
|
258
xymon/plugins/client/ext/dres
Normal file
258
xymon/plugins/client/ext/dres
Normal file
@ -0,0 +1,258 @@
|
|||||||
|
&green /dev/sda auto
|
||||||
|
|
||||||
|
SMART Health Status: OK
|
||||||
|
&clear Device is unknown or not complete in smartmontools database. Please take a look to the FAQ:
|
||||||
|
https://www.smartmontools.org/wiki/FAQ#SmartmontoolsDatabase
|
||||||
|
=== START OF INFORMATION SECTION ===
|
||||||
|
Vendor: HGST
|
||||||
|
Product: HUS728T8TAL5200
|
||||||
|
Revision: RS01
|
||||||
|
Compliance: SPC-4
|
||||||
|
User Capacity: 8,001,563,222,016 bytes [8.00 TB]
|
||||||
|
Logical block size: 512 bytes
|
||||||
|
Physical block size: 4096 bytes
|
||||||
|
Formatted with type 2 protection
|
||||||
|
LU is fully provisioned
|
||||||
|
Rotation Rate: 7200 rpm
|
||||||
|
Form Factor: 3.5 inches
|
||||||
|
Logical Unit id: 0x5000cca09976b8c4
|
||||||
|
Serial number: VAJ392BL
|
||||||
|
Device type: disk
|
||||||
|
Transport protocol: SAS (SPL-3)
|
||||||
|
Local Time is: Fri Feb 28 15:07:11 2020 CET
|
||||||
|
SMART support is: Available - device has SMART capability.
|
||||||
|
SMART support is: Enabled
|
||||||
|
Temperature Warning: Disabled or Not Supported
|
||||||
|
|
||||||
|
=== START OF READ SMART DATA SECTION ===
|
||||||
|
SMART Self-test log
|
||||||
|
Num Test Status segment LifeTime LBA_first_err [SK ASC ASQ]
|
||||||
|
Description number (hours)
|
||||||
|
# 1 Background short Completed 96 5771 - [- - -]
|
||||||
|
# 2 Background short Completed 96 5747 - [- - -]
|
||||||
|
------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
|
&green /dev/sdb auto
|
||||||
|
|
||||||
|
SMART Health Status: OK
|
||||||
|
&clear Device is unknown or not complete in smartmontools database. Please take a look to the FAQ:
|
||||||
|
https://www.smartmontools.org/wiki/FAQ#SmartmontoolsDatabase
|
||||||
|
=== START OF INFORMATION SECTION ===
|
||||||
|
Vendor: HGST
|
||||||
|
Product: HUS728T8TAL5200
|
||||||
|
Revision: RS01
|
||||||
|
Compliance: SPC-4
|
||||||
|
User Capacity: 8,001,563,222,016 bytes [8.00 TB]
|
||||||
|
Logical block size: 512 bytes
|
||||||
|
Physical block size: 4096 bytes
|
||||||
|
Formatted with type 2 protection
|
||||||
|
LU is fully provisioned
|
||||||
|
Rotation Rate: 7200 rpm
|
||||||
|
Form Factor: 3.5 inches
|
||||||
|
Logical Unit id: 0x5000cca09975fc04
|
||||||
|
Serial number: VAJ2WHPL
|
||||||
|
Device type: disk
|
||||||
|
Transport protocol: SAS (SPL-3)
|
||||||
|
Local Time is: Fri Feb 28 15:07:11 2020 CET
|
||||||
|
SMART support is: Available - device has SMART capability.
|
||||||
|
SMART support is: Enabled
|
||||||
|
Temperature Warning: Disabled or Not Supported
|
||||||
|
|
||||||
|
=== START OF READ SMART DATA SECTION ===
|
||||||
|
SMART Self-test log
|
||||||
|
Num Test Status segment LifeTime LBA_first_err [SK ASC ASQ]
|
||||||
|
Description number (hours)
|
||||||
|
# 1 Background short Completed 96 5607 - [- - -]
|
||||||
|
# 2 Background short Completed 96 5583 - [- - -]
|
||||||
|
------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
|
&green /dev/sdc auto
|
||||||
|
|
||||||
|
SMART Health Status: OK
|
||||||
|
&clear Device is unknown or not complete in smartmontools database. Please take a look to the FAQ:
|
||||||
|
https://www.smartmontools.org/wiki/FAQ#SmartmontoolsDatabase
|
||||||
|
=== START OF INFORMATION SECTION ===
|
||||||
|
Vendor: HGST
|
||||||
|
Product: HUS728T8TAL5200
|
||||||
|
Revision: RS01
|
||||||
|
Compliance: SPC-4
|
||||||
|
User Capacity: 8,001,563,222,016 bytes [8.00 TB]
|
||||||
|
Logical block size: 512 bytes
|
||||||
|
Physical block size: 4096 bytes
|
||||||
|
Formatted with type 2 protection
|
||||||
|
LU is fully provisioned
|
||||||
|
Rotation Rate: 7200 rpm
|
||||||
|
Form Factor: 3.5 inches
|
||||||
|
Logical Unit id: 0x5000cca099757c5c
|
||||||
|
Serial number: VAJ2M04L
|
||||||
|
Device type: disk
|
||||||
|
Transport protocol: SAS (SPL-3)
|
||||||
|
Local Time is: Fri Feb 28 15:07:11 2020 CET
|
||||||
|
SMART support is: Available - device has SMART capability.
|
||||||
|
SMART support is: Enabled
|
||||||
|
Temperature Warning: Disabled or Not Supported
|
||||||
|
|
||||||
|
=== START OF READ SMART DATA SECTION ===
|
||||||
|
SMART Self-test log
|
||||||
|
Num Test Status segment LifeTime LBA_first_err [SK ASC ASQ]
|
||||||
|
Description number (hours)
|
||||||
|
# 1 Background short Completed 96 5859 - [- - -]
|
||||||
|
# 2 Background short Completed 96 5835 - [- - -]
|
||||||
|
------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
|
&green /dev/sdd auto
|
||||||
|
|
||||||
|
SMART Health Status: OK
|
||||||
|
&clear Device is unknown or not complete in smartmontools database. Please take a look to the FAQ:
|
||||||
|
https://www.smartmontools.org/wiki/FAQ#SmartmontoolsDatabase
|
||||||
|
=== START OF INFORMATION SECTION ===
|
||||||
|
Vendor: HGST
|
||||||
|
Product: HUS728T8TAL5200
|
||||||
|
Revision: RS01
|
||||||
|
Compliance: SPC-4
|
||||||
|
User Capacity: 8,001,563,222,016 bytes [8.00 TB]
|
||||||
|
Logical block size: 512 bytes
|
||||||
|
Physical block size: 4096 bytes
|
||||||
|
Formatted with type 2 protection
|
||||||
|
LU is fully provisioned
|
||||||
|
Rotation Rate: 7200 rpm
|
||||||
|
Form Factor: 3.5 inches
|
||||||
|
Logical Unit id: 0x5000cca099765214
|
||||||
|
Serial number: VAJ327BL
|
||||||
|
Device type: disk
|
||||||
|
Transport protocol: SAS (SPL-3)
|
||||||
|
Local Time is: Fri Feb 28 15:07:11 2020 CET
|
||||||
|
SMART support is: Available - device has SMART capability.
|
||||||
|
SMART support is: Enabled
|
||||||
|
Temperature Warning: Disabled or Not Supported
|
||||||
|
|
||||||
|
=== START OF READ SMART DATA SECTION ===
|
||||||
|
SMART Self-test log
|
||||||
|
Num Test Status segment LifeTime LBA_first_err [SK ASC ASQ]
|
||||||
|
Description number (hours)
|
||||||
|
# 1 Background short Completed 96 5599 - [- - -]
|
||||||
|
# 2 Background short Completed 96 5575 - [- - -]
|
||||||
|
------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
|
&green /dev/sde auto
|
||||||
|
|
||||||
|
SMART Health Status: OK
|
||||||
|
&clear Device is unknown or not complete in smartmontools database. Please take a look to the FAQ:
|
||||||
|
https://www.smartmontools.org/wiki/FAQ#SmartmontoolsDatabase
|
||||||
|
=== START OF INFORMATION SECTION ===
|
||||||
|
Vendor: HGST
|
||||||
|
Product: HUS728T8TAL5200
|
||||||
|
Revision: RS01
|
||||||
|
Compliance: SPC-4
|
||||||
|
User Capacity: 8,001,563,222,016 bytes [8.00 TB]
|
||||||
|
Logical block size: 512 bytes
|
||||||
|
Physical block size: 4096 bytes
|
||||||
|
Formatted with type 2 protection
|
||||||
|
LU is fully provisioned
|
||||||
|
Rotation Rate: 7200 rpm
|
||||||
|
Form Factor: 3.5 inches
|
||||||
|
Logical Unit id: 0x5000cca09976e460
|
||||||
|
Serial number: VAJ3BZDL
|
||||||
|
Device type: disk
|
||||||
|
Transport protocol: SAS (SPL-3)
|
||||||
|
Local Time is: Fri Feb 28 15:07:11 2020 CET
|
||||||
|
SMART support is: Available - device has SMART capability.
|
||||||
|
SMART support is: Enabled
|
||||||
|
Temperature Warning: Disabled or Not Supported
|
||||||
|
|
||||||
|
=== START OF READ SMART DATA SECTION ===
|
||||||
|
SMART Self-test log
|
||||||
|
Num Test Status segment LifeTime LBA_first_err [SK ASC ASQ]
|
||||||
|
Description number (hours)
|
||||||
|
# 1 Background short Completed 96 5599 - [- - -]
|
||||||
|
# 2 Background short Completed 96 5575 - [- - -]
|
||||||
|
------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
|
&clear /dev/sdf unsupported
|
||||||
|
|
||||||
|
SMART Health Status can't be determine because of:
|
||||||
|
SMART support is: Unavailable - device lacks SMART capability.
|
||||||
|
|
||||||
|
=== START OF INFORMATION SECTION ===
|
||||||
|
Vendor: DELL
|
||||||
|
Product: PERC H730P Mini
|
||||||
|
Revision: 4.30
|
||||||
|
User Capacity: 146,163,105,792 bytes [146 GB]
|
||||||
|
Logical block size: 512 bytes
|
||||||
|
Logical Unit id: 0x61866da06192eb00256e8c0a2d73f5b6
|
||||||
|
Serial number: 00b6f5732d0a8c6e2500eb9261a06d86
|
||||||
|
Device type: disk
|
||||||
|
Local Time is: Fri Feb 28 15:07:12 2020 CET
|
||||||
|
SMART support is: Unavailable - device lacks SMART capability.
|
||||||
|
|
||||||
|
|
||||||
|
------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
|
&green /dev/bus/0 megaraid,12
|
||||||
|
|
||||||
|
SMART Health Status: OK
|
||||||
|
&clear Device is unknown or not complete in smartmontools database. Please take a look to the FAQ:
|
||||||
|
https://www.smartmontools.org/wiki/FAQ#SmartmontoolsDatabase
|
||||||
|
=== START OF INFORMATION SECTION ===
|
||||||
|
Vendor: SEAGATE
|
||||||
|
Product: ST9146803SS
|
||||||
|
Revision: FS64
|
||||||
|
User Capacity: 146,815,733,760 bytes [146 GB]
|
||||||
|
Logical block size: 512 bytes
|
||||||
|
Rotation Rate: 10000 rpm
|
||||||
|
Form Factor: 2.5 inches
|
||||||
|
Logical Unit id: 0x5000c5003ac7ef07
|
||||||
|
Serial number: 6SD3HJV0
|
||||||
|
Device type: disk
|
||||||
|
Transport protocol: SAS (SPL-3)
|
||||||
|
Local Time is: Fri Feb 28 15:07:13 2020 CET
|
||||||
|
SMART support is: Available - device has SMART capability.
|
||||||
|
SMART support is: Enabled
|
||||||
|
Temperature Warning: Disabled or Not Supported
|
||||||
|
|
||||||
|
=== START OF READ SMART DATA SECTION ===
|
||||||
|
SMART Self-test log
|
||||||
|
Num Test Status segment LifeTime LBA_first_err [SK ASC ASQ]
|
||||||
|
Description number (hours)
|
||||||
|
# 1 Background short Completed 16 62479 - [- - -]
|
||||||
|
# 2 Background short Completed 16 62455 - [- - -]
|
||||||
|
------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
|
&green /dev/bus/0 megaraid,13
|
||||||
|
|
||||||
|
SMART Health Status: OK
|
||||||
|
&clear Device is unknown or not complete in smartmontools database. Please take a look to the FAQ:
|
||||||
|
https://www.smartmontools.org/wiki/FAQ#SmartmontoolsDatabase
|
||||||
|
=== START OF INFORMATION SECTION ===
|
||||||
|
Vendor: SEAGATE
|
||||||
|
Product: ST9146803SS
|
||||||
|
Revision: FS64
|
||||||
|
User Capacity: 146,815,733,760 bytes [146 GB]
|
||||||
|
Logical block size: 512 bytes
|
||||||
|
Rotation Rate: 10000 rpm
|
||||||
|
Form Factor: 2.5 inches
|
||||||
|
Logical Unit id: 0x5000c5003ac956db
|
||||||
|
Serial number: 6SD3HH6J
|
||||||
|
Device type: disk
|
||||||
|
Transport protocol: SAS (SPL-3)
|
||||||
|
Local Time is: Fri Feb 28 15:07:14 2020 CET
|
||||||
|
SMART support is: Available - device has SMART capability.
|
||||||
|
SMART support is: Enabled
|
||||||
|
Temperature Warning: Disabled or Not Supported
|
||||||
|
|
||||||
|
=== START OF READ SMART DATA SECTION ===
|
||||||
|
SMART Self-test log
|
||||||
|
Num Test Status segment LifeTime LBA_first_err [SK ASC ASQ]
|
||||||
|
Description number (hours)
|
||||||
|
# 1 Background short Completed 16 61374 - [- - -]
|
||||||
|
# 2 Background short Completed 16 61350 - [- - -]
|
||||||
|
------------------------------------------------------------
|
||||||
|
|
||||||
|
|
13
xymon/plugins/client/ext/dscan
Normal file
13
xymon/plugins/client/ext/dscan
Normal file
@ -0,0 +1,13 @@
|
|||||||
|
/dev/sda -d scsi # /dev/sda, SCSI device
|
||||||
|
/dev/sdb -d scsi # /dev/sdb, SCSI device
|
||||||
|
/dev/sdc -d scsi # /dev/sdc, SCSI device
|
||||||
|
/dev/sdd -d scsi # /dev/sdd, SCSI device
|
||||||
|
/dev/sde -d scsi # /dev/sde, SCSI device
|
||||||
|
/dev/sdf -d scsi # /dev/sdf, SCSI device
|
||||||
|
/dev/bus/0 -d megaraid,0 # /dev/bus/0 [megaraid_disk_00], SCSI device
|
||||||
|
/dev/bus/0 -d megaraid,1 # /dev/bus/0 [megaraid_disk_01], SCSI device
|
||||||
|
/dev/bus/0 -d megaraid,2 # /dev/bus/0 [megaraid_disk_02], SCSI device
|
||||||
|
/dev/bus/0 -d megaraid,3 # /dev/bus/0 [megaraid_disk_03], SCSI device
|
||||||
|
/dev/bus/0 -d megaraid,4 # /dev/bus/0 [megaraid_disk_04], SCSI device
|
||||||
|
/dev/bus/0 -d megaraid,12 # /dev/bus/0 [megaraid_disk_12], SCSI device
|
||||||
|
/dev/bus/0 -d megaraid,13 # /dev/bus/0 [megaraid_disk_13], SCSI device
|
242
xymon/plugins/client/ext/sge.sh
Executable file
242
xymon/plugins/client/ext/sge.sh
Executable file
@ -0,0 +1,242 @@
|
|||||||
|
#!/bin/sh
|
||||||
|
#
|
||||||
|
# SGE: Sun Grid Engine check - Xymon external script test
|
||||||
|
#
|
||||||
|
##### Purpose is to report back to a central server, all Sun
|
||||||
|
##### Grid Engine software faults.
|
||||||
|
#####
|
||||||
|
#
|
||||||
|
# version 0.4
|
||||||
|
#
|
||||||
|
# BIG BROTHER / XXXXXXXXXXXXXXXX status
|
||||||
|
#
|
||||||
|
# Written by Butch Deal <butchdeal@yahoo.com>
|
||||||
|
# Daniel Gomez <dgomez@tigr.org,daniel@ixplosive.com>
|
||||||
|
# Jérémy Gardais <jeremy.gardais@univ-rennes1.fr>
|
||||||
|
#
|
||||||
|
# v0.4 09/06/20 clean, correction,… for Xymon 4.3.28
|
||||||
|
# v0.3e 10/14/08 cut down on the number of qhost runs
|
||||||
|
# v0.3d 03/31/06 added alarm/suspend state identification
|
||||||
|
# v0.3c 03/01/06 propogated yellow state upon UNAVAILABLE queue intances
|
||||||
|
# v0.3b 01/31/06 fixed yellow warning queue status for ambigious config test
|
||||||
|
# v0.3a 01/31/06 added unknown queue status and ambigious config test
|
||||||
|
# v0.3 01/26/06 fixed status reporting and optimized job status
|
||||||
|
# v0.2 08/03/05 flag disabled queues as clear
|
||||||
|
# v0.1 07/28/05 authored
|
||||||
|
|
||||||
|
########################################
|
||||||
|
# NOTE
|
||||||
|
# The version v0.4 has only been tested with Xymon (server and client) 4.2.x.
|
||||||
|
#
|
||||||
|
# The color status with respects to queue status is arbitrary and should be
|
||||||
|
# reviewed for your particular environment.
|
||||||
|
#
|
||||||
|
# Tested on :
|
||||||
|
# Solaris & Linux
|
||||||
|
# Linux only (for v0.4)
|
||||||
|
########################################
|
||||||
|
|
||||||
|
########################################
|
||||||
|
# INSTALLATION
|
||||||
|
# step 1 - copy to Xymon client's ext dir
|
||||||
|
# step 2 - New clientlaunch.d/sge.cfg file
|
||||||
|
# step 3 - restart Xymon client
|
||||||
|
#
|
||||||
|
# NOTE - the TEST variable in the configuration section, this is the name used
|
||||||
|
# as the column header.
|
||||||
|
########################################
|
||||||
|
|
||||||
|
##################################
|
||||||
|
# CONFIGURE IT HERE
|
||||||
|
##################################
|
||||||
|
readonly PLUGIN_NAME=$(basename "${0}")
|
||||||
|
|
||||||
|
readonly TEST="sge"
|
||||||
|
readonly PLUGIN_RESULT="${XYMONTMP}/${MACHINEDOTS}.${TEST}.plugin_result"
|
||||||
|
readonly PLUGIN_STATE="${XYMONTMP}/${MACHINEDOTS}.${TEST}.plugin_state"
|
||||||
|
true > "${PLUGIN_STATE}"
|
||||||
|
|
||||||
|
readonly QSTAT=$(command -v qstat)
|
||||||
|
readonly QHOST=$(command -v qhost)
|
||||||
|
readonly QSELECT=$(command -v qselect)
|
||||||
|
export QSTAT QHOST QSELECT
|
||||||
|
|
||||||
|
# define colours for graphics
|
||||||
|
# Comment these out if using older BB versions
|
||||||
|
CLEAR_PIC="&clear"
|
||||||
|
RED_PIC="&red"
|
||||||
|
YELLOW_PIC="&yellow"
|
||||||
|
GREEN_PIC="&green"
|
||||||
|
UNKNOWN_PIC="&purple"
|
||||||
|
|
||||||
|
##################################
|
||||||
|
# Start of script
|
||||||
|
##################################
|
||||||
|
|
||||||
|
get_header()
|
||||||
|
{
|
||||||
|
echo ""
|
||||||
|
#echo "<FONT SIZE=+2><b>$1</b></FONT> ($2)<BR>"
|
||||||
|
echo "<FONT SIZE=+2><b>$1</b></FONT> <BR>"
|
||||||
|
# If you do not want the header in a bigger font use line below instead
|
||||||
|
#echo "<b>$1</b> ($2)"
|
||||||
|
# If you want the "Paul Luzzi" look uncomment this section and comment
|
||||||
|
# out the above sections:
|
||||||
|
#echo "<P><DIV ALIGN=\"CENTER\"><HR>"
|
||||||
|
#echo "<B>============== $1 ==============</B>"
|
||||||
|
#echo "<B>--- ($2) ---</B>"
|
||||||
|
#echo "<HR></DIV>"
|
||||||
|
#echo "<BLOCKQUOTE>"
|
||||||
|
}
|
||||||
|
get_header_small()
|
||||||
|
{
|
||||||
|
echo ""
|
||||||
|
#echo "<FONT SIZE=+2><b>$1</b></FONT> ($2)<BR>"
|
||||||
|
echo "<FONT SIZE=+1><b>$1</b></FONT> <BR>"
|
||||||
|
# If you do not want the header in a bigger font use line below instead
|
||||||
|
# echo "<b>$1</b> ($2)"
|
||||||
|
# If you want the "Paul Luzzi" look uncomment this section and comment
|
||||||
|
# out the above sections:
|
||||||
|
#echo "<P><DIV ALIGN=\"CENTER\"><HR>"
|
||||||
|
#echo "<B>============== $1 ==============</B>"
|
||||||
|
#echo "<B>--- ($2) ---</B>"
|
||||||
|
#echo "<HR></DIV>"
|
||||||
|
#echo "<BLOCKQUOTE>"
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
get_footer()
|
||||||
|
{
|
||||||
|
echo ""
|
||||||
|
# If you want the "Paul Luzzi" look uncomment this section and comment
|
||||||
|
# out the above sections:
|
||||||
|
#echo "</BLOCKQUOTE>"
|
||||||
|
}
|
||||||
|
|
||||||
|
#####
|
||||||
|
##### Get Status proc - used to get all responses
|
||||||
|
#####
|
||||||
|
get_status()
|
||||||
|
{
|
||||||
|
|
||||||
|
# Check defaults have been set
|
||||||
|
if [ "${QSTAT}" = "" ]; then
|
||||||
|
readonly QSTAT=$(command -v qstat)
|
||||||
|
echo ""
|
||||||
|
echo "$YELLOW_PIC QSTAT command is not defined in etc/bbsys.local - using default: ${QSTAT}"
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [ "${QHOST}" = "" ]; then
|
||||||
|
readonly QHOST=$(command -v qhost)
|
||||||
|
echo ""
|
||||||
|
echo "$YELLOW_PIC QHOST command is not defined in etc/bbsys.local - using default: ${QHOST}"
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [ "${QSELECT}" = "" ]; then
|
||||||
|
readonly QSELECT=$(command -v qselect)
|
||||||
|
echo ""
|
||||||
|
echo "$YELLOW_PIC QSELECT command is not defined in etc/bbsys.local - using default: ${QSELECT}"
|
||||||
|
fi
|
||||||
|
|
||||||
|
###
|
||||||
|
### Check the jobs
|
||||||
|
###
|
||||||
|
get_header "Jobs" "$QSTAT -l hostname=$MACHINEDOTS"
|
||||||
|
jobs=$(${QSTAT} -l hostname="${MACHINEDOTS}" -s r -u \*)
|
||||||
|
if [ -z "$jobs" ]; then
|
||||||
|
echo "No Running Jobs"
|
||||||
|
else
|
||||||
|
${QSTAT} -l hostname="${MACHINEDOTS}" -s r -u \*
|
||||||
|
fi
|
||||||
|
get_footer
|
||||||
|
|
||||||
|
###
|
||||||
|
### Check the host
|
||||||
|
###
|
||||||
|
get_header "Host" "$METAHS -i"
|
||||||
|
${QHOST} -h "${MACHINEDOTS}" | grep -v "global"
|
||||||
|
get_footer
|
||||||
|
|
||||||
|
###
|
||||||
|
### Identify queue memberships
|
||||||
|
###
|
||||||
|
#get_header "Queue Membership" "$QHOST -q"
|
||||||
|
#${QHOST} -h ${MACHINEDOTS} -q | tail +5
|
||||||
|
#get_footer
|
||||||
|
|
||||||
|
###
|
||||||
|
### Check queue instance states
|
||||||
|
###
|
||||||
|
queueTriggered=false;
|
||||||
|
${QHOST} -h "${MACHINEDOTS}" -q | tail +5 > "${PLUGIN_RESULT}.QSTATE"
|
||||||
|
while IFS= read -r _LINE; do
|
||||||
|
queue=$(printf -- '%s' "${_LINE}" | awk '{ print $1 }')
|
||||||
|
qstate=$(printf -- '%s' "${_LINE}" | awk '{ print $4 }')
|
||||||
|
|
||||||
|
# Order determines more significant alert status
|
||||||
|
if [ "$(echo "${qstate}" | grep -c d)" != "0" ]; then
|
||||||
|
echo "4&clear $queue@$HOST is DISABLED" >> "${PLUGIN_STATE}"
|
||||||
|
queueMsg=$(echo "$queueMsg<BR>$CLEAR_PIC $queue@$HOST is DISABLED<BR>${_LINE}")
|
||||||
|
queueTriggered=true;
|
||||||
|
elif [ "$(echo "${qstate}" | grep -c E)" != "0" ]; then
|
||||||
|
echo "1&red $queue@$HOST is in ERROR!" >> "${PLUGIN_STATE}"
|
||||||
|
queueMsg=$(echo "$queueMsg<BR>$RED_PIC $queue@$HOST is in ERROR!<BR>${_LINE}")
|
||||||
|
queueTriggered=true;
|
||||||
|
elif [ "$(echo "${qstate}" | grep -c c)" != "0" ]; then
|
||||||
|
echo "2&yellow $queue@$HOST has an ambigious configuration!" >> "${PLUGIN_STATE}"
|
||||||
|
queueMsg=$(echo "$queueMsg<BR>$YELLOW_PIC $queue@$HOST has an ambigious configuration!<BR>${_LINE}")
|
||||||
|
queueTriggered=true;
|
||||||
|
elif [ "$(echo "${qstate}" | grep -c a)" != "0" ] || \
|
||||||
|
[ "$(echo "${qstate}" | grep -c A)" != "0" ]; then
|
||||||
|
echo "2&yellow $queue@$HOST is in ALARM" >> "${PLUGIN_STATE}"
|
||||||
|
queueMsg=$(echo "$queueMsg<BR>$YELLOW_PIC $queue@$HOST is in ALARM<BR>${_LINE}")
|
||||||
|
elif [ "$(echo "${qstate}" | grep -c s)" != "0" ] || \
|
||||||
|
[ "$(echo "${qstate}" | grep -c S)" != "0" ]; then
|
||||||
|
echo "2&yellow $queue@$HOST is SUSPENDED" >> "${PLUGIN_STATE}"
|
||||||
|
queueMsg=$(echo "$queueMsg<BR>$YELLOW_PIC $queue@$HOST is SUSPENDED<BR>${_LINE}")
|
||||||
|
elif [ "$(echo "${qstate}" | grep -c u)" != "0" ]; then
|
||||||
|
echo "2&yellow $queue@$HOST is UNAVAILABLE" >> "${PLUGIN_STATE}"
|
||||||
|
queueMsg=$(echo "$queueMsg<BR>$YELLOW_PIC $queue@$HOST is UNAVAILABLE!<BR>${_LINE}")
|
||||||
|
queueTriggered=true;
|
||||||
|
elif [ "$qstate" = "" ]; then
|
||||||
|
echo "3&green $queue@$HOST is OK" >> "${PLUGIN_STATE}"
|
||||||
|
queueMsg=$(echo "$queueMsg<BR>$GREEN_PIC $queue@$HOST is OK<BR>${_LINE}")
|
||||||
|
else
|
||||||
|
echo "5&purple $queue@$HOST is UNKNOWN" >> "${PLUGIN_STATE}"
|
||||||
|
queueMsg=$(echo "$queueMsg<BR>$UNKNOWN_PIC $queue@$HOST is UNKNOWN<BR>${_LINE}")
|
||||||
|
queueTriggered=true;
|
||||||
|
fi
|
||||||
|
done < "${PLUGIN_RESULT}.QSTATE"
|
||||||
|
|
||||||
|
get_header "Queue Instance Status Report"
|
||||||
|
echo "$queueMsg"
|
||||||
|
get_footer
|
||||||
|
|
||||||
|
#####
|
||||||
|
##### End of get_status proc
|
||||||
|
#####
|
||||||
|
}
|
||||||
|
|
||||||
|
#####
|
||||||
|
##### Main body
|
||||||
|
#####
|
||||||
|
get_status > "${PLUGIN_RESULT}"
|
||||||
|
|
||||||
|
# Set the global color according to the highest alert
|
||||||
|
COLOR=$(< "${PLUGIN_STATE}" awk '{print $1}' | sort | uniq | head -1 | cut -c3-)
|
||||||
|
|
||||||
|
# NOW USE THE XYMON COMMAND TO SEND THE DATA ACROSS
|
||||||
|
$XYMON "${XYMSRV}" "status ${MACHINE}.${TEST} ${COLOR} $($DATE) $(cat ${PLUGIN_RESULT})"
|
||||||
|
#For testing only
|
||||||
|
# echo $XYMON "${XYMSRV}" "status ${MACHINE}.${TEST} ${COLOR} $($DATE) $(cat ${PLUGIN_RESULT})" > /tmp/sgetmp
|
||||||
|
|
||||||
|
# Clean up our mess
|
||||||
|
# Checking for existence of each file since the whole test may be optional
|
||||||
|
# and may not actually run on every client
|
||||||
|
#
|
||||||
|
if [ -f "${PLUGIN_RESULT}" ]; then
|
||||||
|
rm -f -- "${PLUGIN_RESULT}" "${PLUGIN_STATE}" "${PLUGIN_RESULT}.QSTATE"
|
||||||
|
fi
|
||||||
|
##############################################
|
||||||
|
# end of script
|
||||||
|
##############################################
|
202
xymon/plugins/client/ext/smart
Executable file
202
xymon/plugins/client/ext/smart
Executable file
@ -0,0 +1,202 @@
|
|||||||
|
#!/usr/bin/perl
|
||||||
|
# $Id: sensors 70 2011-11-25 09:21:18Z skazi $
|
||||||
|
# Author: Jacek Tomasiak <jacek.tomasiak@gmail.com>
|
||||||
|
# https://github.com/skazi0/xymon-plugins/blob/master/client/ext/smart
|
||||||
|
|
||||||
|
use strict;
|
||||||
|
# add script's directory to module search path for Hobbit.pm on non-debian systems
|
||||||
|
use FindBin;
|
||||||
|
use lib $FindBin::Bin;
|
||||||
|
|
||||||
|
use Hobbit;
|
||||||
|
use Data::Dumper;
|
||||||
|
|
||||||
|
my $bb = new Hobbit('smart');
|
||||||
|
|
||||||
|
my $temp_disk_list = "$ENV{'XYMONTMP'}/$ENV{'MACHINEDOTS'}.smart.drivedb.list";
|
||||||
|
my @disks = ();
|
||||||
|
my %olderr = {};
|
||||||
|
|
||||||
|
my $CACHETIME = 10; # minutes
|
||||||
|
my $CACHEFILE = "$ENV{'XYMONTMP'}/$ENV{'MACHINEDOTS'}.smart.cache";
|
||||||
|
|
||||||
|
&load_config("$ENV{'XYMONTMP'}/logfetch.$ENV{'MACHINEDOTS'}.cfg");
|
||||||
|
|
||||||
|
my @disks_stat = stat($temp_disk_list);
|
||||||
|
my $disks_mtime = scalar @disks_stat ? $disks_stat[9] : 0;
|
||||||
|
#
|
||||||
|
# Regenerate disks list if the file is too old (600 minutes)
|
||||||
|
if (time() - $disks_mtime > 600)
|
||||||
|
{
|
||||||
|
unlink $temp_disk_list;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (-e $temp_disk_list) {
|
||||||
|
# Should use the existing file
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
# Create a file with the list of disks
|
||||||
|
system("ls -1 /dev/sd* | grep -vE '[0-9]' > $temp_disk_list") == 0
|
||||||
|
or die "system command to create $temp_disk_list failed: $?";
|
||||||
|
}
|
||||||
|
|
||||||
|
# fallback to disk detection if nothing defined in the config
|
||||||
|
unless (@disks) {
|
||||||
|
## Put temp_disk_list content to disks array
|
||||||
|
open(my $fh, '<:encoding(UTF-8)', $temp_disk_list)
|
||||||
|
or die "Could not open file '$temp_disk_list' $!";
|
||||||
|
while (my $row = <$fh>) {
|
||||||
|
chomp $row;
|
||||||
|
push(@disks, "$row");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
my @stat = stat($CACHEFILE);
|
||||||
|
my $mtime = scalar @stat ? $stat[9] : 0;
|
||||||
|
# regenerate sensors cache if outdated
|
||||||
|
if (time() - $mtime > $CACHETIME * 60)
|
||||||
|
{
|
||||||
|
open(OUT, ">$CACHEFILE") or die "cannot open $CACHEFILE";
|
||||||
|
|
||||||
|
foreach my $name (@disks)
|
||||||
|
{
|
||||||
|
print OUT ('=' x 20) . " $name " . ('=' x 20) . "\n";
|
||||||
|
my @output = `sudo smartctl -AHi -l error -l selftest $name 2>&1` or die;
|
||||||
|
my $ncv = '';
|
||||||
|
my $newerr = 1;
|
||||||
|
my $ponhours = undef;
|
||||||
|
my $lasttest = undef;
|
||||||
|
foreach my $line (@output)
|
||||||
|
{
|
||||||
|
# skip header
|
||||||
|
next if ($line =~ /smartctl|Copyright|Home page|===/);
|
||||||
|
|
||||||
|
if ($line =~ /.*overall-health.*:\s*(.*)/)
|
||||||
|
{
|
||||||
|
my $lstatus = ($1 eq 'PASSED') ? 'green' : 'red';
|
||||||
|
print OUT "&$lstatus $line";
|
||||||
|
}
|
||||||
|
elsif ($line =~ /^\s*(\d+)\s+(\S+)\s+\S+\s+(\d+)\s+(\d+)\s+(\d+)\s+\S+\s+\S+\s+(\S+)\s+(.*)$/)
|
||||||
|
{
|
||||||
|
my ($aname, $value, $worst, $thresh, $failure, $raw) = ($2, $3, $4, $5, $6, $7);
|
||||||
|
my $lstatus = 'green';
|
||||||
|
if ($aname =~ /Current_Pending_Sector|Offline_Uncorrectable/ and int($raw) > 0)
|
||||||
|
{
|
||||||
|
$lstatus = 'yellow';
|
||||||
|
}
|
||||||
|
elsif ($aname =~ /Power_On_Hours/)
|
||||||
|
{
|
||||||
|
$ponhours = $raw;
|
||||||
|
}
|
||||||
|
if ($failure =~ /FAIL/) {
|
||||||
|
$lstatus = 'red';
|
||||||
|
}
|
||||||
|
|
||||||
|
print OUT "&$lstatus $line";
|
||||||
|
|
||||||
|
$ncv .= "$name-$aname-value : $value\n";
|
||||||
|
$ncv .= "$name-$aname-worst : $worst\n";
|
||||||
|
$ncv .= "$name-$aname-thresh : $thresh\n";
|
||||||
|
$ncv .= "$name-$aname-raw : $raw\n";
|
||||||
|
}
|
||||||
|
elsif ($line =~ /^\s*No Errors Logged\s*$/)
|
||||||
|
{
|
||||||
|
$newerr = 0;
|
||||||
|
print OUT "&green $line";
|
||||||
|
}
|
||||||
|
elsif ($line =~ /Error Count:\s*(\d+)/)
|
||||||
|
{
|
||||||
|
$newerr = $1 - $olderr{$name};
|
||||||
|
my $lstatus = $newerr > 0 ? 'red' : 'green';
|
||||||
|
print OUT "&$lstatus $line"
|
||||||
|
}
|
||||||
|
elsif ($line =~ /^\s*Error \d+ occurred/)
|
||||||
|
{
|
||||||
|
my $lstatus = $newerr > 0 ? 'red' : 'green';
|
||||||
|
print OUT "&$lstatus $line"
|
||||||
|
}
|
||||||
|
elsif ($line =~ /^\s*#\s*\d+\s+(Conveyance offline|Extended offline|Short offline|Extended captive)\s+(.*)\s+\d+%\s+(\d+)/)
|
||||||
|
{
|
||||||
|
my $status = $2;
|
||||||
|
my $lifetime = $3;
|
||||||
|
my $lstatus = 'red';
|
||||||
|
$lasttest = $lifetime if (!defined($lasttest));
|
||||||
|
$lstatus = 'yellow' if ($status =~ /Aborted by host|Interrupted \(host reset\)/);
|
||||||
|
$lstatus = 'green' if ($status =~ /Completed without error|Self-test routine in progress|Interrupted \(host reset\)/);
|
||||||
|
print OUT "&$lstatus $line";
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
print OUT " $line";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
# test status footer
|
||||||
|
my $lasttestage = $ponhours % 65536 - $lasttest;
|
||||||
|
my $lasttestmsg = "$lasttestage hours ago";
|
||||||
|
my $lasttestcolor = 'green';
|
||||||
|
if (!defined($lasttest))
|
||||||
|
{
|
||||||
|
$lasttestcolor = 'yellow';
|
||||||
|
$lasttestmsg = 'no test performed';
|
||||||
|
}
|
||||||
|
elsif ($lasttestage > 24 * 7)
|
||||||
|
{
|
||||||
|
$lasttestcolor = 'red';
|
||||||
|
}
|
||||||
|
elsif ($lasttestage > 24 * 2)
|
||||||
|
{
|
||||||
|
$lasttestcolor = 'yellow';
|
||||||
|
}
|
||||||
|
print OUT "&$lasttestcolor Last Self-test: $lasttestmsg\n";
|
||||||
|
|
||||||
|
# hidden output for ncv
|
||||||
|
print OUT "<!--\n$ncv\n-->\n";
|
||||||
|
}
|
||||||
|
|
||||||
|
close OUT;
|
||||||
|
}
|
||||||
|
|
||||||
|
# send cached content
|
||||||
|
{
|
||||||
|
open IN, $CACHEFILE or die "cannot open $CACHEFILE";
|
||||||
|
while (my $line = <IN>)
|
||||||
|
{
|
||||||
|
if ($line =~ /^\s*&(\S+)/)
|
||||||
|
{
|
||||||
|
$bb->color_print($1, $line);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
$bb->print($line);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
close IN;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
$bb->send;
|
||||||
|
|
||||||
|
sub load_config
|
||||||
|
{
|
||||||
|
my $path = shift;
|
||||||
|
|
||||||
|
open C, "<$path" or return;
|
||||||
|
# print "loading config from $path\n";
|
||||||
|
while (my $line = <C>)
|
||||||
|
{
|
||||||
|
next if ($line =~ /^\s*#/);
|
||||||
|
if ($line =~ /DISKS\s*=\s*['"](.*?)["']/)
|
||||||
|
{
|
||||||
|
@disks = split(/\s+/, $1);
|
||||||
|
}
|
||||||
|
if ($line =~ /SMARTOLDERROR\[([\w\/]+)\]\s+(\d+)/)
|
||||||
|
{
|
||||||
|
$olderr{$1} = $2;
|
||||||
|
}
|
||||||
|
if ($line =~ /SMARTCACHETIME=(\d+)/)
|
||||||
|
{
|
||||||
|
$CACHETIME = $1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
close C;
|
||||||
|
}
|
300
xymon/plugins/client/ext/smartoverall
Executable file
300
xymon/plugins/client/ext/smartoverall
Executable file
@ -0,0 +1,300 @@
|
|||||||
|
#!/bin/sh
|
||||||
|
# .. vim: foldmarker=[[[,]]]:foldmethod=marker
|
||||||
|
|
||||||
|
# NOTE: Must be run as root, so you probably need to setup sudo for this.
|
||||||
|
|
||||||
|
# This script is mostly intend to be used with Xymon and rather for devices unknown to the smartmontools base.
|
||||||
|
# Based on xymon.com's script : https://www.xymon.com/xymon-cgi/viewconf.sh?smart
|
||||||
|
# The script will scan all devices compatible with SMART and for each disk, it will : [[[
|
||||||
|
# * try to guess the expected TYPE (even megaraid,…).
|
||||||
|
# * display health status.
|
||||||
|
# * set a "clear" state for incompatible device.
|
||||||
|
# * display last selftests.
|
||||||
|
# * set a "error" state if no selftest is recorded.
|
||||||
|
# * display basic informations.
|
||||||
|
# * recommend a more advanced SMART script if the disk is known of smartmontools's database (drivedb.h) or redirect to smartmontools's FAQ if not.
|
||||||
|
# ]]]
|
||||||
|
# Things the script CAN'T do : [[[
|
||||||
|
# * ensure a recent selftest was run.
|
||||||
|
# * compare current value with vendor's one (for failure prediction or error).
|
||||||
|
# * give detail about errors.
|
||||||
|
# * Take a look to this more advance script for such features : https://github.com/skazi0/xymon-plugins/blob/master/client/ext/smart
|
||||||
|
# ]]]
|
||||||
|
|
||||||
|
# Vars [[[
|
||||||
|
debug="1"
|
||||||
|
|
||||||
|
## Colors [[[
|
||||||
|
c_redb='\033[1;31m'
|
||||||
|
c_magentab='\033[1;35m'
|
||||||
|
c_reset='\033[0m'
|
||||||
|
## ]]]
|
||||||
|
|
||||||
|
plugin_name=$(basename "${0}")
|
||||||
|
|
||||||
|
plugin_result="${XYMONTMP}/${MACHINEDOTS}.smartoverall.plugin_result"
|
||||||
|
plugin_state="${XYMONTMP}/${MACHINEDOTS}.smartoverall.plugin_state"
|
||||||
|
device_list="${XYMONTMP}/${MACHINEDOTS}.smartoverall.dscan"
|
||||||
|
## List of devices known from the smartmontools base and compatible with test logging
|
||||||
|
## This file might be used by a more advanced script such as skazi0's one
|
||||||
|
drivedb_list="${XYMONTMP}/${MACHINEDOTS}.smart.drivedb.list"
|
||||||
|
|
||||||
|
# By default, don't empty files newer than 10hours (600 minutes)
|
||||||
|
default_mtime_minutes="600"
|
||||||
|
|
||||||
|
xymon_username="xymon"
|
||||||
|
xymon_groupname="xymon"
|
||||||
|
# ]]]
|
||||||
|
|
||||||
|
# Functions
|
||||||
|
## Create or empty a file if it's too old [[[
|
||||||
|
## First argument (required): Absolut path to the file
|
||||||
|
## Second argument (optionnal): Maximum number of minutes since last modification
|
||||||
|
regenerate_if_too_old() {
|
||||||
|
## Set variables according to the number of passed arguments [[[
|
||||||
|
case $# in
|
||||||
|
0 )
|
||||||
|
[ "${debug}" -eq "0" ] && printf "${c_magentab}%-6b${c_reset}\n" "DEBUG : regenerate_if_too_old func − Need at least 1 argument."
|
||||||
|
exit 1
|
||||||
|
;;
|
||||||
|
1 )
|
||||||
|
_file="${1}"
|
||||||
|
[ "${debug}" -eq "0" ] && printf "${c_magentab}%-6b${c_reset}\n" "DEBUG : regenerate_if_too_old func − Use default_mtime_minutes value: ${default_mtime_minutes}."
|
||||||
|
_max_mtime_minutes="${default_mtime_minutes}"
|
||||||
|
;;
|
||||||
|
2 )
|
||||||
|
_file="${1}"
|
||||||
|
_max_mtime_minutes="${2}"
|
||||||
|
;;
|
||||||
|
esac
|
||||||
|
## ]]]
|
||||||
|
_current_timestamp=$(date "+%s")
|
||||||
|
_file_mtime_timestamp=$(stat --format="%Y" -- "${_file}")
|
||||||
|
|
||||||
|
## Substract last modification timestamp of the file to current timestamp
|
||||||
|
: $(( _file_mtime_seconds=_current_timestamp-_file_mtime_timestamp ))
|
||||||
|
## Get maximum allowed mtime in seconds
|
||||||
|
: $(( _max_mtime_seconds=_max_mtime_minutes*60 ))
|
||||||
|
|
||||||
|
## Compare last modification mtime with the maximum allowed
|
||||||
|
if [ "${_file_mtime_seconds}" -gt "${_max_mtime_seconds}" ]; then
|
||||||
|
[ "${debug}" -eq "0" ] && printf "${c_magentab}%-6b${c_reset}\n" "DEBUG : regenerate_if_too_old func − Need to empty or create ${_file} last modification happened ${_file_mtime_seconds} seconds ago (maximum is ${_max_mtime_seconds})."
|
||||||
|
true > "${_file}"
|
||||||
|
chown -- "${xymon_username}":"${xymon_groupname}" "${_file}"
|
||||||
|
else
|
||||||
|
[ "${debug}" -eq "0" ] && printf "${c_magentab}%-6b${c_reset}\n" "DEBUG : regenerate_if_too_old func − Don't need to empty ${_file} last modification happened ${_file_mtime_seconds} seconds ago (maximum is ${_max_mtime_seconds})."
|
||||||
|
fi
|
||||||
|
|
||||||
|
}
|
||||||
|
## ]]]
|
||||||
|
## Test if a disk really support SMART [[[
|
||||||
|
## Smartctl can give an health status even without a full support
|
||||||
|
## of SMART for some type (eg. scsi or megaraid).
|
||||||
|
## Exemple : SMART support is: Unavailable - device lacks SMART capability.
|
||||||
|
is_disk_support_smart() {
|
||||||
|
_disk="${1}"
|
||||||
|
_type="${2}"
|
||||||
|
|
||||||
|
_smarctl_support_result="${XYMONTMP}/${MACHINEDOTS}.smartoverall.support.$(basename "${_disk}").${_type}"
|
||||||
|
|
||||||
|
smart_support_msg=""
|
||||||
|
|
||||||
|
[ "${debug}" -eq "0" ] && printf "${c_magentab}%-6b${c_reset}\n" "DEBUG : is_disk_support_smart func − check if SMART is supported on : ${_disk}."
|
||||||
|
|
||||||
|
## Create or empty previous file only if older than 24h (1440 minutes)
|
||||||
|
regenerate_if_too_old "${_smarctl_support_result}" 1440
|
||||||
|
|
||||||
|
## Grep only "support" lines from disk's informations only if the file was emptied
|
||||||
|
if test ! -s "${_smarctl_support_result}"; then
|
||||||
|
smartctl -d "${_type}" -i -- "${_disk}" | grep -E "^SMART support is:" -- >> "${_smarctl_support_result}"
|
||||||
|
fi
|
||||||
|
|
||||||
|
## If the file is not empty
|
||||||
|
if test -s "${_smarctl_support_result}"; then
|
||||||
|
## Parse all "support" lines
|
||||||
|
while IFS= read -r _LINE; do
|
||||||
|
if ! printf -- '%s' "${_LINE}" | grep -q -E -- "(Enabled|Available)"
|
||||||
|
then
|
||||||
|
smart_support_msg="${_LINE}"
|
||||||
|
fi
|
||||||
|
done < "${_smarctl_support_result}"
|
||||||
|
else
|
||||||
|
smart_support_msg="smartctl was not able to open ${_disk} DEVICE with ${_type} TYPE."
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [ -z "${smart_support_msg}" ]; then
|
||||||
|
[ "${debug}" -eq "0" ] && printf "${c_magentab}%-6b${c_reset}\n" "DEBUG : is_disk_support_smart func − SMART seems fully supported on : ${_disk} with ${_type} type."
|
||||||
|
else
|
||||||
|
[ "${debug}" -eq "0" ] && printf "${c_magentab}%-6b${c_reset}\n" "DEBUG : is_disk_support_smart func − SMART is not fully supported on : ${_disk} with ${_type} type. See smartctl informations :\n${smart_support_msg}"
|
||||||
|
fi
|
||||||
|
|
||||||
|
## Clean temp files
|
||||||
|
### As the Xymon's tmpdir is used to store log files, no need to delete them at
|
||||||
|
### the end of the script. They will be emptied, reused or regenerate (if oldest
|
||||||
|
### than the expected interval) at the next run.
|
||||||
|
|
||||||
|
}
|
||||||
|
## ]]]
|
||||||
|
## Test the type of disk with smartctl [[[
|
||||||
|
## Cause the scanned one might not be the one to use
|
||||||
|
choose_correct_type() {
|
||||||
|
_disk="${1}"
|
||||||
|
_scanned_type="${2}"
|
||||||
|
_default_type="auto"
|
||||||
|
|
||||||
|
TYPE=""
|
||||||
|
SMART_SUPPORT_MSG=""
|
||||||
|
|
||||||
|
for test_type in "${_scanned_type}" "${_default_type}"; do
|
||||||
|
is_disk_support_smart "${_disk}" "${test_type}"
|
||||||
|
|
||||||
|
## If no message, the type is correct
|
||||||
|
if [ -z "${smart_support_msg}" ]; then
|
||||||
|
TYPE="${test_type}"
|
||||||
|
SMART_SUPPORT_MSG=""
|
||||||
|
return
|
||||||
|
else
|
||||||
|
SMART_SUPPORT_MSG="${smart_support_msg}"
|
||||||
|
fi
|
||||||
|
|
||||||
|
done
|
||||||
|
|
||||||
|
}
|
||||||
|
## ]]]
|
||||||
|
|
||||||
|
# Create or empty previous files
|
||||||
|
true > "${plugin_result}"
|
||||||
|
chown -- "${xymon_username}":"${xymon_groupname}" "${plugin_result}"
|
||||||
|
true > "${plugin_state}"
|
||||||
|
chown -- "${xymon_username}":"${xymon_groupname}" "${plugin_state}"
|
||||||
|
## Create or empty previous file only if older than 24h (1440 minutes)
|
||||||
|
regenerate_if_too_old "${device_list}" 1440
|
||||||
|
regenerate_if_too_old "${drivedb_list}" 1440
|
||||||
|
|
||||||
|
# Get the list of all available devices if the previous list was emptied
|
||||||
|
if test ! -s "${device_list}"; then
|
||||||
|
smartctl --scan >> "${device_list}"
|
||||||
|
fi
|
||||||
|
|
||||||
|
# If the file is not empty
|
||||||
|
if test -s "${device_list}"; then
|
||||||
|
while IFS= read -r LINE; do
|
||||||
|
## Get device path
|
||||||
|
DISK=$(echo "${LINE}" | cut -d" " -f1)
|
||||||
|
## Try to determine the best type
|
||||||
|
SCANNED_TYPE=$(echo "${LINE}" | cut -d" " -f3)
|
||||||
|
choose_correct_type "${DISK}" "${SCANNED_TYPE}"
|
||||||
|
|
||||||
|
## If no correct type was found for this device
|
||||||
|
if [ -z "${TYPE}" ]; then
|
||||||
|
[ "${debug}" -eq "0" ] && printf "${c_magentab}%-6b${c_reset}\n" "DEBUG : SMART is not fully supported."
|
||||||
|
DRES=$(printf '%s' "SMART Health Status can't be determine because of:\n${SMART_SUPPORT_MSG}")
|
||||||
|
DCODE="2"
|
||||||
|
TYPE="unsupported"
|
||||||
|
### Still try to display informations about unsupported device (eg. RAID controller,…)
|
||||||
|
DID="unsupported-${DISK}"
|
||||||
|
DINFO=$(smartctl -i -d "${SCANNED_TYPE}" "${DISK}" | grep -v -E "^smartctl|^Copyright|^$" || printf '%s' "Can't get informations due to no SMART support.")
|
||||||
|
DDRIVEDB_MSG=""
|
||||||
|
DSELFTEST=""
|
||||||
|
else
|
||||||
|
[ "${debug}" -eq "0" ] && printf "${c_magentab}%-6b${c_reset}\n" "DEBUG : SMART seems fully supported, proceed normally."
|
||||||
|
### Get SMART Health Status and return code
|
||||||
|
DRES=$(/usr/sbin/smartctl -H -d "${TYPE}" -n standby "${DISK}")
|
||||||
|
DCODE=$?
|
||||||
|
### Get disk's serial number and informations
|
||||||
|
DID=$(smartctl -i -d "${TYPE}" "${DISK}" | awk '/.erial .umber:/ { print $NF }')
|
||||||
|
DINFO=$(smartctl -i -d "${TYPE}" "${DISK}" | grep -v -E "^smartctl|^Copyright|^$")
|
||||||
|
|
||||||
|
## If the model of the disk is known from smartmontools database
|
||||||
|
if smartctl -d "${TYPE}" -P show "${DISK}" | grep -qi -- "drive found in"; then
|
||||||
|
DDRIVEDB_MSG="&green Device is known in smartmontools database. You might consider using a more advanced plugin such as:
|
||||||
|
https://github.com/skazi0/xymon-plugins/blob/master/client/ext/smart"
|
||||||
|
else
|
||||||
|
DDRIVEDB_MSG="&clear Device is unknown or not complete in smartmontools database. Please take a look to the FAQ:
|
||||||
|
https://www.smartmontools.org/wiki/FAQ#SmartmontoolsDatabase"
|
||||||
|
fi
|
||||||
|
|
||||||
|
DSELFTEST=$(smartctl -d "${TYPE}" -l selftest "${DISK}" | grep -v -E -- "^smartctl|^Copyright|^$")
|
||||||
|
## If no selftest have been recorded
|
||||||
|
if smartctl -d "${TYPE}" -l selftest "${DISK}" | grep -qi -- "No self-tests"; then
|
||||||
|
DSELFTEST_MSG="&red No self-tests recorded:"
|
||||||
|
DCODE="8"
|
||||||
|
## If the device doesn't support test logging
|
||||||
|
elif smartctl -d "${TYPE}" -l selftest "${DISK}" | grep -qEi -- "(does not support.*logging|Log not supported)"; then
|
||||||
|
DSELFTEST_MSG="&clear Test logging are not supported:"
|
||||||
|
else
|
||||||
|
DSELFTEST_MSG=""
|
||||||
|
### If the device is also known from smartmontools database
|
||||||
|
### and not already present in the list of compatible disk
|
||||||
|
if printf -- '%s' "${DDRIVEDB_MSG}" | grep -q -E -- "green" &&
|
||||||
|
! grep -q -- "${DISK}" "${drivedb_list}"
|
||||||
|
then
|
||||||
|
echo "${DISK}" >> "${drivedb_list}"
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
|
||||||
|
## Test health status
|
||||||
|
DSTBY=$(( DCODE & 2 ))
|
||||||
|
DFAIL=$(( DCODE & 8 ))
|
||||||
|
DWARN=$(( DCODE & 32 ))
|
||||||
|
|
||||||
|
## According to health, give a weight to each color to easily get the page status
|
||||||
|
if test $DSTBY -ne 0
|
||||||
|
then
|
||||||
|
COLOR="4&clear"
|
||||||
|
elif test $DFAIL -ne 0
|
||||||
|
then
|
||||||
|
COLOR="1&red"
|
||||||
|
elif test $DWARN -ne 0
|
||||||
|
then
|
||||||
|
COLOR="2&yellow"
|
||||||
|
else
|
||||||
|
COLOR="3&green"
|
||||||
|
fi
|
||||||
|
|
||||||
|
## Avoid duplicate device
|
||||||
|
if ! grep -q "${DID}" "${plugin_result}"; then
|
||||||
|
## For summary
|
||||||
|
echo "${COLOR} $DISK ${TYPE}"
|
||||||
|
|
||||||
|
## For detailed informations
|
||||||
|
{
|
||||||
|
echo "${COLOR} $DISK ${TYPE}" | cut -c2-
|
||||||
|
echo ""
|
||||||
|
echo "$DRES" | grep -v -E "^smartctl|^Copyright|^$|^==="
|
||||||
|
echo "${DDRIVEDB_MSG}"
|
||||||
|
echo "${DINFO}"
|
||||||
|
echo "${DSELFTEST_MSG}"
|
||||||
|
echo "${DSELFTEST}" | head -n12
|
||||||
|
echo "------------------------------------------------------------"
|
||||||
|
echo ""
|
||||||
|
echo ""
|
||||||
|
} >> "${plugin_result}"
|
||||||
|
fi
|
||||||
|
done < "${device_list}" >> "${plugin_state}"
|
||||||
|
|
||||||
|
# If the file is empty
|
||||||
|
else
|
||||||
|
echo "1&red Error while scanning devices with smartctl" >> "${plugin_state}"
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Set the global color according to the highest alert
|
||||||
|
COLOR=$(< "${plugin_state}" awk '{print $1}' | sort | uniq | head -1 | cut -c3-)
|
||||||
|
|
||||||
|
# Send informations to Xymon server
|
||||||
|
$XYMON "${XYMSRV}" "status ${MACHINE}.${plugin_name} ${COLOR} SMART health check
|
||||||
|
|
||||||
|
$(< "${plugin_state}" cut -c2-)
|
||||||
|
|
||||||
|
==================== Detailed status ====================
|
||||||
|
|
||||||
|
$(cat "${plugin_result}")
|
||||||
|
"
|
||||||
|
|
||||||
|
## Clean temp files
|
||||||
|
### As the Xymon's tmpdir is used to store log files, no need to delete them at
|
||||||
|
### the end of the script. They will be emptied, reused or regenerate (if oldest
|
||||||
|
### than the expected interval) at the next run.
|
||||||
|
|
||||||
|
exit 0
|
66
xymon/plugins/client/ext/test.int.compare.sh
Executable file
66
xymon/plugins/client/ext/test.int.compare.sh
Executable file
@ -0,0 +1,66 @@
|
|||||||
|
#!/bin/sh
|
||||||
|
# .. vim: foldmarker=[[[,]]]:foldmethod=marker
|
||||||
|
|
||||||
|
# Vars [[[
|
||||||
|
debug="0"
|
||||||
|
|
||||||
|
## Colors [[[
|
||||||
|
c_redb='\033[1;31m'
|
||||||
|
c_magentab='\033[1;35m'
|
||||||
|
c_reset='\033[0m'
|
||||||
|
## ]]]
|
||||||
|
default_mtime_minutes="600"
|
||||||
|
|
||||||
|
drivedb_list="/tmp/test.css"
|
||||||
|
|
||||||
|
DDRIVEDB_MSG="green"
|
||||||
|
#DDRIVEDB_MSG="red"
|
||||||
|
DISK="/dev/sda"
|
||||||
|
# ]]]
|
||||||
|
## Create or empty a file if it's too old [[[
|
||||||
|
regenerate_if_too_old() {
|
||||||
|
## Set variables according to the number of passed arguments
|
||||||
|
case $# in
|
||||||
|
1 )
|
||||||
|
_file="${1}"
|
||||||
|
[ "${debug}" -eq "0" ] && printf "${c_magentab}%-6b${c_reset}\n" "DEBUG : regenerate_if_too_old func − Use default_mtime_minutes value: ${default_mtime_minutes}."
|
||||||
|
_max_mtime_minutes="${default_mtime_minutes}"
|
||||||
|
;;
|
||||||
|
2 )
|
||||||
|
_file="${1}"
|
||||||
|
_max_mtime_minutes="${2}"
|
||||||
|
;;
|
||||||
|
esac
|
||||||
|
|
||||||
|
_current_timestamp=$(date "+%s")
|
||||||
|
_file_mtime_timestamp=$(stat --format="%Y" -- "${_file}")
|
||||||
|
|
||||||
|
## Substract last modification timestamp of the file to current timestamp
|
||||||
|
: $(( _file_mtime_seconds=_current_timestamp-_file_mtime_timestamp ))
|
||||||
|
## Get maximum allowed mtime in seconds
|
||||||
|
: $(( _max_mtime_seconds=_max_mtime_minutes*60 ))
|
||||||
|
|
||||||
|
## Compare last modification mtime with the maximum allowed
|
||||||
|
if [ "${_file_mtime_seconds}" -gt "${_max_mtime_seconds}" ]; then
|
||||||
|
[ "${debug}" -eq "0" ] && printf "${c_magentab}%-6b${c_reset}\n" "DEBUG : regenerate_if_too_old func − Need to empty or create ${_file} last modification happened ${_file_mtime_seconds} seconds ago (maximum is ${_max_mtime_seconds})."
|
||||||
|
true > "${_file}"
|
||||||
|
else
|
||||||
|
[ "${debug}" -eq "0" ] && printf "${c_magentab}%-6b${c_reset}\n" "DEBUG : regenerate_if_too_old func − Don't need to empty ${_file} last modification happened ${_file_mtime_seconds} seconds ago (maximum is ${_max_mtime_seconds})."
|
||||||
|
fi
|
||||||
|
|
||||||
|
}
|
||||||
|
## ]]]
|
||||||
|
|
||||||
|
regenerate_if_too_old /tmp/css_style.css
|
||||||
|
regenerate_if_too_old /tmp/font.css 60
|
||||||
|
regenerate_if_too_old /tmp/user/1337/serverauth.qGdeK8OOzr 1440
|
||||||
|
regenerate_if_too_old /tmp/test.css 600
|
||||||
|
|
||||||
|
if printf -- '%s' "${DDRIVEDB_MSG}" | grep -q -E -- "green" &&
|
||||||
|
! grep -q -- "${DISK}" "${drivedb_list}"
|
||||||
|
then
|
||||||
|
echo "${DISK}" >> "${drivedb_list}"
|
||||||
|
fi
|
||||||
|
|
||||||
|
|
||||||
|
exit 0
|
28
xymon/sample.messages/apt.alert
Normal file
28
xymon/sample.messages/apt.alert
Normal file
@ -0,0 +1,28 @@
|
|||||||
|
BBCOLORLEVEL="red"
|
||||||
|
BBALPHAMSG="HOST.DOMAIN.ORG:apt red [168321]
|
||||||
|
red Wed Aug 22 11:26:34 2018 - apt NOT ok
|
||||||
|
Debian GNU/Linux 9.5 (stretch)
|
||||||
|
|
||||||
|
&red Security updates (4): apt-get install openssh-client openssh-server openssh-sftp-server ssh
|
||||||
|
openssh-client (1:7.4p1-10+deb9u3 1:7.4p1-10+deb9u4)
|
||||||
|
openssh-server (1:7.4p1-10+deb9u3 1:7.4p1-10+deb9u4)
|
||||||
|
openssh-sftp-server (1:7.4p1-10+deb9u3 1:7.4p1-10+deb9u4)
|
||||||
|
ssh (1:7.4p1-10+deb9u3 1:7.4p1-10+deb9u4)
|
||||||
|
|
||||||
|
&red Last apt update: 3.0 day(s) ago
|
||||||
|
|
||||||
|
|
||||||
|
See http://localhost/xymon-cgi/svcstatus.sh?HOST=HOST.DOMAIN.ORG&SERVICE=apt
|
||||||
|
"
|
||||||
|
ACKCODE="168321"
|
||||||
|
RCPT="1234567890"
|
||||||
|
BBHOSTNAME="HOST.DOMAIN.ORG"
|
||||||
|
MACHIP="111222333444"
|
||||||
|
BBSVCNAME="apt"
|
||||||
|
BBSVCNUM="0"
|
||||||
|
BBHOSTSVC="HOST.DOMAIN.ORG.apt"
|
||||||
|
BBHOSTSVCCOMMAS="HOST,DOMAIN,ORG.apt"
|
||||||
|
BBNUMERIC="000111222333444168321"
|
||||||
|
RECOVERED="0"
|
||||||
|
DOWNSECS="36"
|
||||||
|
DOWNSECSMSG=""
|
62
xymon/sample.messages/cpu.alert
Normal file
62
xymon/sample.messages/cpu.alert
Normal file
@ -0,0 +1,62 @@
|
|||||||
|
BBCOLORLEVEL="yellow"
|
||||||
|
BBALPHAMSG="HOST.DOMAIN.ORG:cpu yellow [520216]
|
||||||
|
yellow Tue Aug 21 11:43:22 CEST 2018 up: 00:00, 0 users, 29 procs, load=1.90
|
||||||
|
&yellow Machine recently rebooted
|
||||||
|
System clock is 0 seconds off
|
||||||
|
|
||||||
|
|
||||||
|
top - 11:43:23 up 0 min, 0 users, load average: 1.49, 1.90, 3.03
|
||||||
|
Tasks: 33 total, 2 running, 31 sleeping, 0 stopped, 0 zombie
|
||||||
|
%Cpu(s): 3.8 us, 5.9 sy, 0.0 ni, 89.3 id, 0.8 wa, 0.0 hi, 0.1 si, 0.0 st
|
||||||
|
KiB Mem : 1048576 total, 897468 free, 33268 used, 117840 buff/cache
|
||||||
|
KiB Swap: 1048576 total, 1048576 free, 0 used. 1015308 avail Mem
|
||||||
|
|
||||||
|
PID USER PR NI VIRT RES SHR S %CPU %MEM TIME+ COMMAND
|
||||||
|
263 xymon 20 0 64284 6264 5072 D 100.0 0.6 0:00.05 apt-cache
|
||||||
|
1 root 20 0 69588 7680 5968 S 0.0 0.7 0:00.08 systemd
|
||||||
|
43 root 20 0 93256 16952 16356 S 0.0 1.6 0:00.03 systemd-j+
|
||||||
|
89 root 20 0 22560 1052 0 S 0.0 0.1 0:00.00 dhclient
|
||||||
|
121 root 20 0 29668 2804 2520 S 0.0 0.3 0:00.00 cron
|
||||||
|
122 root 20 0 321836 2756 2348 S 0.0 0.3 0:00.00 rsyslogd
|
||||||
|
124 root 20 0 171944 9704 8308 S 0.0 0.9 0:00.00 sssd
|
||||||
|
130 message+ 20 0 59424 4036 3576 S 0.0 0.4 0:00.00 dbus-daem+
|
||||||
|
145 daemon 20 0 27968 2120 1916 S 0.0 0.2 0:00.00 atd
|
||||||
|
147 root 20 0 71988 5648 4896 S 0.0 0.5 0:00.00 sshd
|
||||||
|
164 xymon 20 0 4272 1488 1332 S 0.0 0.1 0:00.00 xymonlaun+
|
||||||
|
167 xymon 20 0 4292 712 640 S 0.0 0.1 0:00.00 xymonclie+
|
||||||
|
168 xymon 20 0 18300 5152 3940 S 0.0 0.5 0:00.00 apt
|
||||||
|
176 root 20 0 201132 15804 10264 S 0.0 1.5 0:00.02 sssd_be
|
||||||
|
177 root 20 0 17076 160 0 S 0.0 0.0 0:00.00 in.tftpd
|
||||||
|
191 root 20 0 168252 33204 32060 S 0.0 3.2 0:00.02 sssd_nss
|
||||||
|
192 root 20 0 147780 7452 6396 S 0.0 0.7 0:00.00 sssd_pam
|
||||||
|
193 root 20 0 141356 7364 6344 S 0.0 0.7 0:00.00 sssd_auto+
|
||||||
|
198 xymon 20 0 4292 1360 1252 S 0.0 0.1 0:00.00 xymonclie+
|
||||||
|
211 arpwatch 20 0 31872 4792 4104 S 0.0 0.5 0:00.00 arpwatch
|
||||||
|
214 root 20 0 54532 2768 2276 S 0.0 0.3 0:00.00 systemd-l+
|
||||||
|
223 root 20 0 69592 1716 4 S 0.0 0.2 0:00.00 (agetty)
|
||||||
|
224 root 20 0 69592 1716 4 S 0.0 0.2 0:00.00 (agetty)
|
||||||
|
251 root 20 0 361140 15748 6296 S 0.0 1.5 0:00.04 fail2ban-+
|
||||||
|
292 root 20 0 81152 6396 5636 S 0.0 0.6 0:00.00 postmulti
|
||||||
|
298 root 20 0 4292 752 680 S 0.0 0.1 0:00.00 postfix-s+
|
||||||
|
306 xymon 20 0 38236 2984 2584 R 0.0 0.3 0:00.00 top
|
||||||
|
345 root 20 0 4292 712 636 S 0.0 0.1 0:00.00 sh
|
||||||
|
349 root 20 0 22536 1392 1164 D 0.0 0.1 0:00.00 nft
|
||||||
|
351 root 20 0 4292 760 688 S 0.0 0.1 0:00.00 postfix-s+
|
||||||
|
353 root 20 0 4292 96 0 S 0.0 0.0 0:00.00 postfix-s+
|
||||||
|
354 root 20 0 79236 4740 4064 R 0.0 0.5 0:00.00 postconf
|
||||||
|
355 root 20 0 13216 1004 900 S 0.0 0.1 0:00.00 sed
|
||||||
|
|
||||||
|
See http://localhost/xymon-cgi/svcstatus.sh?HOST=HOST.DOMAIN.ORG&SERVICE=cpu
|
||||||
|
"
|
||||||
|
ACKCODE="520216"
|
||||||
|
RCPT="1234567890"
|
||||||
|
BBHOSTNAME="HOST.DOMAIN.ORG"
|
||||||
|
MACHIP="111222333444"
|
||||||
|
BBSVCNAME="cpu"
|
||||||
|
BBSVCNUM="200"
|
||||||
|
BBHOSTSVC="HOST.DOMAIN.ORG.cpu"
|
||||||
|
BBHOSTSVCCOMMAS="HOST,DOMAIN,ORG.cpu"
|
||||||
|
BBNUMERIC="200111222333444520216"
|
||||||
|
RECOVERED="0"
|
||||||
|
DOWNSECS="0"
|
||||||
|
DOWNSECSMSG=""
|
41
xymon/sample.messages/files.alert
Normal file
41
xymon/sample.messages/files.alert
Normal file
@ -0,0 +1,41 @@
|
|||||||
|
BBCOLORLEVEL="yellow"
|
||||||
|
BBALPHAMSG="HOST.DOMAIN.ORG:files yellow [275849]
|
||||||
|
yellow Thu Oct 10 11:17:18 CEST 2019 - Files NOT ok
|
||||||
|
|
||||||
|
&yellow <a href="/xymon-cgi/svcstatus.sh?CLIENT=HOST.DOMAIN.ORG&SECTION=file:/var/log/cron.log">/var/log/cron.log</a>
|
||||||
|
File was modified 4642 seconds ago - should be <3800
|
||||||
|
|
||||||
|
&yellow <a href="/xymon-cgi/svcstatus.sh?CLIENT=vmgit.101010.virtual&SECTION=file:/tmp/.github.TEST.upgrade">/tmp/.github.TEST.upgrade</a>
|
||||||
|
File exists
|
||||||
|
|
||||||
|
&green <a href="/xymon-cgi/svcstatus.sh?CLIENT=HOST.DOMAIN.ORG&SECTION=logfile:/var/log/kern.log">/var/log/kern.log</a>
|
||||||
|
|
||||||
|
&green <a href="/xymon-cgi/svcstatus.sh?CLIENT=HOST.DOMAIN.ORG&SECTION=file:/var/log/messages">/var/log/messages</a>
|
||||||
|
|
||||||
|
&green <a href="/xymon-cgi/svcstatus.sh?CLIENT=HOST.DOMAIN.ORG&SECTION=file:/var/log/syslog">/var/log/syslog</a>
|
||||||
|
|
||||||
|
&green <a href="/xymon-cgi/svcstatus.sh?CLIENT=HOST.DOMAIN.ORG&SECTION=dir:/var/log">/var/log</a>
|
||||||
|
|
||||||
|
&green <a href="/xymon-cgi/svcstatus.sh?CLIENT=HOST.DOMAIN.ORG&SECTION=dir:/tmp/">/tmp/</a>
|
||||||
|
|
||||||
|
&green <a href="/xymon-cgi/svcstatus.sh?CLIENT=HOST.DOMAIN.ORG&SECTION=file:/bin/su">/bin/su</a>
|
||||||
|
|
||||||
|
&green <a href="/xymon-cgi/svcstatus.sh?CLIENT=HOST.DOMAIN.ORG&SECTION=file:/usr/bin/sudo">/usr/bin/sudo</a>
|
||||||
|
|
||||||
|
&green <a href="/xymon-cgi/svcstatus.sh?CLIENT=HOST.DOMAIN.ORG&SECTION=file:/var/log/installer">/var/log/installer</a>
|
||||||
|
|
||||||
|
|
||||||
|
See http://localhost/xymon-cgi/svcstatus.sh?HOST=HOST.DOMAIN.ORG&SERVICE=files
|
||||||
|
"
|
||||||
|
ACKCODE="275849"
|
||||||
|
RCPT="1234567890"
|
||||||
|
BBHOSTNAME="HOST.DOMAIN.ORG"
|
||||||
|
MACHIP="037187001062"
|
||||||
|
BBSVCNAME="files"
|
||||||
|
BBSVCNUM="0"
|
||||||
|
BBHOSTSVC="HOST.DOMAIN.ORG.files"
|
||||||
|
BBHOSTSVCCOMMAS="HOST,DOMAIN.ORG.files"
|
||||||
|
BBNUMERIC="000037187001062275849"
|
||||||
|
RECOVERED="0"
|
||||||
|
DOWNSECS="603"
|
||||||
|
DOWNSECSMSG=""
|
29
xymon/sample.messages/libs.alert
Normal file
29
xymon/sample.messages/libs.alert
Normal file
@ -0,0 +1,29 @@
|
|||||||
|
BBCOLORLEVEL="yellow"
|
||||||
|
BBALPHAMSG="HOST.DOMAIN.ORG:libs yellow [0]
|
||||||
|
yellow Thu Aug 16 16:41:56 2018 - libs NOT ok
|
||||||
|
&yellow Machine should be rebooted. Running not the newest installed kernel:
|
||||||
|
|
||||||
|
Running kernel=\"4.9.0-7-amd64, version #1 SMP Debian 4.9.110-3+deb9u1 (2018-08-03)\"
|
||||||
|
Newest installed kernel=\"4.9.0-7-amd64, version #1 SMP Debian 4.9.110-3+deb9u2 (2018-08-13)\"
|
||||||
|
|
||||||
|
&yellow The following processes have libs linked that were upgraded:
|
||||||
|
|
||||||
|
root:
|
||||||
|
/lib/systemd/systemd-udevd (546)
|
||||||
|
systemd-timesync:
|
||||||
|
/usr/lib/postfix/qmgr (52880)
|
||||||
|
|
||||||
|
|
||||||
|
See http://localhost/xymon-cgi/svcstatus.sh?HOST=HOST.DOMAIN.ORG&SERVICE=libs"
|
||||||
|
ACKCODE="0"
|
||||||
|
RCPT="1234567890"
|
||||||
|
BBHOSTNAME="HOST.DOMAIN.ORG"
|
||||||
|
MACHIP="111222333444"
|
||||||
|
BBSVCNAME="libs"
|
||||||
|
BBSVCNUM="0"
|
||||||
|
BBHOSTSVC="HOST.DOMAIN.ORG.libs"
|
||||||
|
BBHOSTSVCCOMMAS="HOST.DOMAIN.ORG.libs"
|
||||||
|
BBNUMERIC="0001112223334440"
|
||||||
|
RECOVERED="0"
|
||||||
|
DOWNSECS="36320"
|
||||||
|
DOWNSECSMSG=""
|
22
xymon/sample.messages/memory.alert
Normal file
22
xymon/sample.messages/memory.alert
Normal file
@ -0,0 +1,22 @@
|
|||||||
|
BBCOLORLEVEL="red"
|
||||||
|
BBALPHAMSG="HOST.DOMAIN.ORG:memory red [251314]
|
||||||
|
red Tue Aug 21 11:42:42 CEST 2018 - Memory CRITICAL
|
||||||
|
Memory Used Total Percentage
|
||||||
|
&green Real/Physical 1017M 1536M 66%
|
||||||
|
&green Actual/Virtual 692M 1536M 45%
|
||||||
|
&red Swap/Page 1024M 1024M 100%
|
||||||
|
|
||||||
|
See http://localhost/xymon-cgi/svcstatus.sh?HOST=HOST.DOMAIN.ORG&SERVICE=memory
|
||||||
|
"
|
||||||
|
ACKCODE="251314"
|
||||||
|
RCPT="1234567890"
|
||||||
|
BBHOSTNAME="HOST.DOMAIN.ORG"
|
||||||
|
MACHIP="111222333444"
|
||||||
|
BBSVCNAME="memory"
|
||||||
|
BBSVCNUM="0"
|
||||||
|
BBHOSTSVC="HOST.DOMAIN.ORG.memory"
|
||||||
|
BBHOSTSVCCOMMAS="HOST,DOMAIN,ORG.memory"
|
||||||
|
BBNUMERIC="000111222333444251314"
|
||||||
|
RECOVERED="0"
|
||||||
|
DOWNSECS="0"
|
||||||
|
DOWNSECSMSG=""
|
38
xymon/sample.messages/ntpq.alert
Normal file
38
xymon/sample.messages/ntpq.alert
Normal file
@ -0,0 +1,38 @@
|
|||||||
|
BBCOLORLEVEL="red"
|
||||||
|
BBALPHAMSG="HOST.DOMAIN.ORG:ntpq red [842850]
|
||||||
|
red Tue Aug 21 11:20:53 2018 - ntpq NOT ok
|
||||||
|
NTP peers:
|
||||||
|
|
||||||
|
remote refid st t when poll reach delay offset jitter
|
||||||
|
==============================================================================
|
||||||
|
&clear WWW.XXX.YYY.ZZZ 213.251.53.11 3 u 18 64 1 0.542 -2.836 0.000
|
||||||
|
|
||||||
|
&red No system peer entry ("*") found
|
||||||
|
|
||||||
|
remote refid st t when poll reach delay offset jitter
|
||||||
|
==============================================================================
|
||||||
|
&green *WWW.XXX.YYY.ZZZ 51.15.178.157 3 u 243 1024 377 1.194 102.549 83.035
|
||||||
|
|
||||||
|
SyspeerDelay: 1.194
|
||||||
|
SyspeerOffset: 102.549
|
||||||
|
SyspeerJitter: 83.035
|
||||||
|
|
||||||
|
SyspeerOffset thresholds:
|
||||||
|
Warning: 100ms
|
||||||
|
Critical: 2000ms
|
||||||
|
&yellow SyspeerOffset > 100ms
|
||||||
|
|
||||||
|
See http://localhost/xymon-cgi/svcstatus.sh?HOST=HOST.DOMAIN.ORG&SERVICE=ntpq
|
||||||
|
"
|
||||||
|
ACKCODE="842850"
|
||||||
|
RCPT="1234567890"
|
||||||
|
BBHOSTNAME="HOST.DOMAIN.ORG"
|
||||||
|
MACHIP="111222333444"
|
||||||
|
BBSVCNAME="ntpq"
|
||||||
|
BBSVCNUM="0"
|
||||||
|
BBHOSTSVC="HOST.DOMAIN.ORG.ntpq"
|
||||||
|
BBHOSTSVCCOMMAS="HOST,DOMAIN,ORG.ntpq"
|
||||||
|
BBNUMERIC="000111222333444842850"
|
||||||
|
RECOVERED="0"
|
||||||
|
DOWNSECS="1544"
|
||||||
|
DOWNSECSMSG=""
|
31
xymon/sample.messages/ports.alert
Normal file
31
xymon/sample.messages/ports.alert
Normal file
@ -0,0 +1,31 @@
|
|||||||
|
BBCOLORLEVEL="yellow"
|
||||||
|
BBALPHAMSG="HOST.DOMAIN.ORG:ports yellow [40450]
|
||||||
|
yellow Tue Aug 21 11:43:22 CEST 2018 - Ports NOT ok
|
||||||
|
&yellow SMTP listener (found 0, req. between 1 and 2)
|
||||||
|
&green SSH logins (found 0, req. at most 10)
|
||||||
|
&green Bad listeners (found 0, req. at most 0)
|
||||||
|
|
||||||
|
Active Internet connections (servers and established)
|
||||||
|
Proto Recv-Q Send-Q Local Address Foreign Address State
|
||||||
|
tcp 0 0 0.0.0.0:22 0.0.0.0:* LISTEN
|
||||||
|
tcp 0 0 111.222.333.444:45250 WWW.XXX.YYY.ZZZ:1984 TIME_WAIT
|
||||||
|
tcp 0 0 111.222.333.444:54522 444.333.222.111:389 ESTABLISHED
|
||||||
|
tcp 0 0 111.222.333.444:45244 WWW.XXX.YYY.ZZZ:1984 TIME_WAIT
|
||||||
|
tcp 0 0 111.222.333.444:45242 WWW.XXX.YYY.ZZZ:1984 TIME_WAIT
|
||||||
|
tcp6 0 0 :::22 :::* LISTEN
|
||||||
|
udp 0 0 0.0.0.0:68 0.0.0.0:*
|
||||||
|
udp6 0 0 :::69 :::*
|
||||||
|
See http://localhost/xymon-cgi/svcstatus.sh?HOST=HOST.DOMAIN.ORG&SERVICE=ports
|
||||||
|
"
|
||||||
|
ACKCODE="40450"
|
||||||
|
RCPT="1234567890"
|
||||||
|
BBHOSTNAME="HOST.DOMAIN.ORG"
|
||||||
|
MACHIP="111222333444"
|
||||||
|
BBSVCNAME="ports"
|
||||||
|
BBSVCNUM="0"
|
||||||
|
BBHOSTSVC="HOST.DOMAIN.ORG.ports"
|
||||||
|
BBHOSTSVCCOMMAS="HOST,DOMAIN,ORG.ports"
|
||||||
|
BBNUMERIC="00011122233344440450"
|
||||||
|
RECOVERED="0"
|
||||||
|
DOWNSECS="6"
|
||||||
|
DOWNSECSMSG=""
|
71
xymon/sample.messages/procs.alert
Normal file
71
xymon/sample.messages/procs.alert
Normal file
@ -0,0 +1,71 @@
|
|||||||
|
BBCOLORLEVEL="red"
|
||||||
|
BBALPHAMSG="HOST.DOMAIN.ORG:procs red [757744]
|
||||||
|
red Tue Aug 21 13:29:28 CEST 2018 - Processes NOT ok
|
||||||
|
&green systemd-journald (found 1, req. between 1 and 1)
|
||||||
|
&green systemd-logind (found 1, req. between 1 and 1)
|
||||||
|
&green CRON (found 1, req. between 1 and 999)
|
||||||
|
&green ATD (found 1, req. between 1 and 999)
|
||||||
|
&green MTA-Stretch (found 1, req. between 1 and 1)
|
||||||
|
&green SSHD (found 3, req. between 1 and 20)
|
||||||
|
&green SSSD (found 1, req. between 1 and 1)
|
||||||
|
&green Fail2Ban (found 1, req. between 1 and 1)
|
||||||
|
&red tftpd-hpa (found 0, req. between 1 and 1)
|
||||||
|
&yellow ARPwatch (found 0, req. between 1 and 1)
|
||||||
|
&red Jenkins (found 0, req. between 1 and 5)
|
||||||
|
|
||||||
|
PID PPID USER STARTED S PRI %CPU TIME %MEM RSZ VSZ CMD
|
||||||
|
1 0 root 11:43:19 S 19 0.0 00:00:00 0.7 7816 69724 /sbin/init
|
||||||
|
43 1 root 11:43:20 S 19 0.0 00:00:00 2.4 25848 109744 /lib/systemd/systemd-journald
|
||||||
|
89 1 root 11:43:20 S 19 0.0 00:00:00 0.1 1052 22560 /sbin/dhclient -4 -v -pf /run/dhclient.eth0.pid -lf /var/lib/dhcp/dhclient.eth0.leases -I -df /var/lib/dhcp/dhclient6.eth0.leases eth0
|
||||||
|
121 1 root 11:43:21 S 19 0.0 00:00:00 0.2 2804 29668 /usr/sbin/cron -f
|
||||||
|
122 1 root 11:43:21 S 19 0.0 00:00:00 0.2 2756 321836 /usr/sbin/rsyslogd -n
|
||||||
|
124 1 root 11:43:21 S 19 0.0 00:00:00 0.9 9704 171944 /usr/sbin/sssd -i -f
|
||||||
|
176 124 root 11:43:21 S 19 0.0 00:00:00 1.7 18244 212360 \_ /usr/lib/x86_64-linux-gnu/sssd/sssd_be --domain ur1 --uid 0 --gid 0 --debug-to-files
|
||||||
|
191 124 root 11:43:21 S 19 0.0 00:00:00 3.1 33204 168252 \_ /usr/lib/x86_64-linux-gnu/sssd/sssd_nss --uid 0 --gid 0 --debug-to-files
|
||||||
|
192 124 root 11:43:21 S 19 0.0 00:00:00 0.8 8672 147912 \_ /usr/lib/x86_64-linux-gnu/sssd/sssd_pam --uid 0 --gid 0 --debug-to-files
|
||||||
|
193 124 root 11:43:21 S 19 0.0 00:00:00 0.7 7364 141356 \_ /usr/lib/x86_64-linux-gnu/sssd/sssd_autofs --uid 0 --gid 0 --debug-to-files
|
||||||
|
145 1 daemon 11:43:21 S 19 0.0 00:00:00 0.2 2120 27968 /usr/sbin/atd -f -l 9.6 -b 98
|
||||||
|
147 1 root 11:43:21 S 19 0.0 00:00:00 0.5 6180 71988 /usr/sbin/sshd -D
|
||||||
|
4810 147 root 13:29:14 S 19 0.0 00:00:00 0.7 7468 134176 \_ sshd: USER [priv]
|
||||||
|
5023 4810 USER 13:29:14 S 19 0.0 00:00:00 0.3 3952 134176 \_ sshd: USER@pts/2
|
||||||
|
5024 5023 USER 13:29:14 S 19 0.0 00:00:00 0.5 5608 59296 \_ zsh
|
||||||
|
5057 5024 USER 13:29:15 S 19 0.0 00:00:00 0.2 2988 19336 \_ tmux
|
||||||
|
223 1 root 11:43:21 S 19 0.0 00:00:00 0.1 2060 14316 /sbin/agetty -o -p -- \u --noclear --keep-baud tty1 115200,38400,9600 linux
|
||||||
|
224 1 root 11:43:21 S 19 0.0 00:00:00 0.1 2056 14316 /sbin/agetty -o -p -- \u --noclear --keep-baud console 115200,38400,9600 linux
|
||||||
|
251 1 root 11:43:22 S 19 0.0 00:00:01 1.5 15800 361140 /usr/bin/python3 /usr/bin/fail2ban-server -s /var/run/fail2ban/fail2ban.sock -p /var/run/fail2ban/fail2ban.pid -x -b
|
||||||
|
412 1 root 11:43:22 S 19 0.0 00:00:00 0.4 4244 83252 /usr/lib/postfix/sbin/master -w
|
||||||
|
415 412 postfix 11:43:22 S 19 0.0 00:00:00 0.6 6628 95528 \_ qmgr -l -t unix -u
|
||||||
|
4453 412 postfix 13:19:59 S 19 0.0 00:00:00 0.6 6532 95480 \_ showq -t unix -u -c
|
||||||
|
4504 412 postfix 13:23:22 S 19 0.0 00:00:00 0.6 6524 95480 \_ pickup -l -t unix -u -c
|
||||||
|
416 1 netdata 11:43:22 S 19 0.0 00:02:06 7.1 75324 224012 /usr/sbin/netdata -D
|
||||||
|
432 416 netdata 11:43:23 R 19 0.0 00:00:16 0.2 2628 28108 \_ /usr/lib/x86_64-linux-gnu/netdata/plugins.d/apps.plugin 1
|
||||||
|
433 416 netdata 11:43:23 S 19 0.0 00:01:30 4.6 48576 121208 \_ /usr/bin/python /usr/lib/x86_64-linux-gnu/netdata/plugins.d/python.d.plugin 1
|
||||||
|
4643 416 netdata 13:25:26 S 19 0.0 00:00:00 0.2 2688 9700 \_ bash /usr/lib/x86_64-linux-gnu/netdata/plugins.d/tc-qos-helper.sh 1
|
||||||
|
660 1 xymon-s+ 11:43:27 S 19 0.0 00:00:00 0.6 6788 68928 /lib/systemd/systemd --user
|
||||||
|
662 660 xymon-s+ 11:43:27 S 19 0.0 00:00:00 0.1 1904 123912 \_ (sd-pam)
|
||||||
|
881 1 message+ 11:43:27 S 19 0.0 00:00:00 0.4 4384 67636 /usr/bin/dbus-daemon --system --address=systemd: --nofork --nopidfile --systemd-activation
|
||||||
|
883 1 root 11:43:27 S 19 0.0 00:00:00 0.5 5552 73268 /lib/systemd/systemd-logind
|
||||||
|
1377 1 USER 11:49:11 S 19 0.0 00:00:00 0.6 6716 68932 /lib/systemd/systemd --user
|
||||||
|
1379 1377 USER 11:49:11 S 19 0.0 00:00:00 0.1 2040 132260 \_ (sd-pam)
|
||||||
|
4768 1 xymon 13:28:48 S 19 0.0 00:00:00 0.0 756 4292 sh -c vmstat 300 2 1>/var/lib/xymon/tmp/xymon_vmstat.HOST.DOMAIN.ORG.4719 2>&1; mv /var/lib/xymon/tmp/xymon_vmstat.HOST.DOMAIN.ORG.4719 /var/lib/xymon/tmp/xymon_vmstat.HOST.DOMAIN.ORG
|
||||||
|
4770 4768 xymon 13:28:48 S 19 0.0 00:00:00 0.1 1400 24900 \_ vmstat 300 2
|
||||||
|
5059 1 USER 13:29:15 S 19 0.0 00:00:00 0.3 3364 28044 tmux
|
||||||
|
5067 5059 USER 13:29:15 S 19 0.0 00:00:00 0.6 6396 62052 \_ -zsh
|
||||||
|
5187 1 xymon 13:29:27 S 19 0.0 00:00:00 0.1 1576 4272 /usr/lib/xymon/client/bin/xymonlaunch --config=/etc/xymon/clientlaunch.cfg --log=/var/log/xymon/clientlaunch.log --pidfile=/var/run/xymon/clientlaunch.pid
|
||||||
|
5191 5187 xymon 13:29:27 S 19 0.0 00:00:00 0.1 1572 4292 \_ /bin/sh /usr/lib/xymon/client/bin/xymonclient.sh
|
||||||
|
5214 5191 xymon 13:29:27 S 19 0.0 00:00:00 0.1 1600 4292 | \_ /bin/sh /usr/lib/xymon/client/bin/xymonclient-linux.sh
|
||||||
|
5256 5214 xymon 13:29:28 R 19 0.0 00:00:00 0.2 2804 44404 | \_ ps -Aww f -o pid,ppid,user,start,state,pri,pcpu,time:12,pmem,rsz:10,vsz:10,cmd
|
||||||
|
5192 5187 xymon 13:29:27 S 19 0.0 00:00:00 0.4 5164 18300 \_ /usr/bin/perl -w /usr/lib/xymon/client/ext/apt
|
||||||
|
5231 5192 xymon 13:29:28 R 19 0.0 00:00:00 3.9 41128 70068 \_ apt-cache policy acl adduser apt apt-listchanges apt-transport-https apt-utils aptitude aptitude-common arpwatch at base-files base-passwd bash bash-completion bind9-host binutils bsd-mailx bsdmainutils bsdutils bzip2 ca-certificates coreutils cpio cpp cpp-6 cracklib-runtime cron curl dash dbus dctrl-tools debconf debconf-i18n debian-archive-keyring debian-faq debian-goodies debianutils debsecan debsums dh-python diffutils dirmngr distro-info-data dmidecode dmsetup doc-debian dpkg e2fslibs:amd64 e2fsprogs ed etckeeper fail2ban file findutils fontconfig-config fonts-dejavu-core fonts-font-awesome fping gcc-6-base:amd64 gettext-base git git-man gnupg gnupg-agent gnutls-bin gpgv grep groff-base gzip hobbit-plugins hostname htop iftop ifupdown init init-system-helpers DOMAINoute2 iputils-ping isc-dhcp-client isc-dhcp-common kmod krb5-locales less libacl1:amd64 libapparmor1:amd64 libapt-inst2.0:amd64 libapt-pkg5.0:amd64 libasprintf0v5:amd64 libassuan0:amd64 libattr1:amd64 libaudit-common libaudit1:amd64 libavahi-client3:amd64 libavahi-common-data:amd64 libavahi-common3:amd64 libbasicobjects0:amd64 libbind9-140:amd64 libblkid1:amd64 libboost-filesystem1.62.0:amd64 libboost-iostreams1.62.0:amd64 libboost-system1.62.0:amd64 libbsd0:amd64 libbz2-1.0:amd64 libc-ares2:amd64 libc-bin libc-l10n libc6:amd64 libcap-ng0:amd64 libcap2-bin libcap2:amd64 libclass-isa-perl libcollection4:amd64 libcomerr2:amd64 libcrack2:amd64 libcryptsetup4:amd64 libcups2:amd64 libcurl3-gnutls:amd64 libcurl3:amd64 libcwidget3v5:amd64 libdb5.3:amd64 libdbus-1-3:amd64 libdebconfclient0:amd64 libdevmapper1.02.1:amd64 libdhash1:amd64 libdns-export162 libdns162:amd64 libdpkg-perl libdrm2:amd64 libedit2:amd64 libelf1:amd64 liberror-perl libes"
|
||||||
|
ACKCODE="757744"
|
||||||
|
RCPT="1234567890"
|
||||||
|
BBHOSTNAME="HOST.DOMAIN.ORG"
|
||||||
|
MACHIP="111222333444"
|
||||||
|
BBSVCNAME="procs"
|
||||||
|
BBSVCNUM="300"
|
||||||
|
BBHOSTSVC="HOST.DOMAIN.ORG.procs"
|
||||||
|
BBHOSTSVCCOMMAS="HOST,DOMAIN,ORG.procs"
|
||||||
|
BBNUMERIC="300111222333444757744"
|
||||||
|
RECOVERED="0"
|
||||||
|
DOWNSECS="0"
|
||||||
|
DOWNSECSMSG=""
|
65
xymon/tar.client.logfiles.sh
Executable file
65
xymon/tar.client.logfiles.sh
Executable file
@ -0,0 +1,65 @@
|
|||||||
|
#!/bin/sh
|
||||||
|
|
||||||
|
# Purpose :
|
||||||
|
# Create an XZ archive of all files between 2 dates.
|
||||||
|
# Then remove these files
|
||||||
|
|
||||||
|
# Call this script from an /var/lib/xymon/hostdata subdirectory or
|
||||||
|
# /var/lib/xymon/histlogs subdirectory.
|
||||||
|
|
||||||
|
# Vars {{{
|
||||||
|
## Enable (0) or disable (1) debug
|
||||||
|
debug=0
|
||||||
|
|
||||||
|
## Colors {{{
|
||||||
|
c_redb='\033[1;31m'
|
||||||
|
c_magentab='\033[1;35m'
|
||||||
|
c_reset='\033[0m'
|
||||||
|
## }}}
|
||||||
|
|
||||||
|
## Manage files of year
|
||||||
|
date_year="2019"
|
||||||
|
|
||||||
|
## Compress files between these dates
|
||||||
|
date_start="${date_year}-01-01 00:00:01"
|
||||||
|
date_end="${date_year}-12-31 23:59:59"
|
||||||
|
|
||||||
|
## Best XZ compression level
|
||||||
|
xz_compression_lvl="-9"
|
||||||
|
## Fatest XZ compression level
|
||||||
|
#xz_compression_lvl="-0"
|
||||||
|
|
||||||
|
## Get current directory name
|
||||||
|
current_dir=${PWD##*/}
|
||||||
|
|
||||||
|
## Count the number of files
|
||||||
|
match_files=$(find . -type f -newermt "${date_start}" -not -newermt "${date_end}" -not -iname "*.tar*" | wc -l)
|
||||||
|
|
||||||
|
## Archive name
|
||||||
|
tar_file_name="${date_year}.${current_dir}${xz_compression_lvl}.tar.xz"
|
||||||
|
|
||||||
|
# }}}
|
||||||
|
|
||||||
|
# If archive already exists
|
||||||
|
if [ -s "${tar_file_name}" ]; then
|
||||||
|
printf "${c_redb}%-6b${c_reset}\n" "ERROR : ${tar_file_name} already exists (also ${match_files} files match the expected pattern). Please manage this directory manually or remove the archive or files then restart."
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
# If some files match
|
||||||
|
if [ ! ${match_files} -eq "0" ]; then
|
||||||
|
[ "${debug}" -eq "0" ] && printf "${c_magentab}%-6b${c_reset}\n" "DEBUG : Create an archive for ${current_dir} files between ${date_start} and ${date_end} (${match_files} files) using XZ's compression level : ${xz_compression_lvl}."
|
||||||
|
|
||||||
|
## Get the list of files between the 2 dates and ignore tar files
|
||||||
|
find . -type f -newermt "${date_start}" -not -newermt "${date_end}" -not -iname "*.tar*" -print0 | tar cJf "${tar_file_name}" --null -T -
|
||||||
|
|
||||||
|
## Check previous return code and if the archive exists with size > 0
|
||||||
|
if [ "${?}" -eq "0" ] && [ -s "${tar_file_name}" ]; then
|
||||||
|
[ "${debug}" -eq "0" ] && printf "${c_magentab}%-6b${c_reset}\n" "DEBUG : ${tar_file_name} successfully created, the files can be deleted."
|
||||||
|
find . -type f -newermt "${date_start}" -not -newermt "${date_end}" -not -iname "*.tar*" -delete
|
||||||
|
fi
|
||||||
|
else
|
||||||
|
[ "${debug}" -eq "0" ] && printf "${c_magentab}%-6b${c_reset}\n" "DEBUG : Skip ${current_dir}, no files found between ${date_start} and ${date_end}."
|
||||||
|
fi
|
||||||
|
|
||||||
|
exit 0
|
59
xymon/xymon.alert.sh
Executable file
59
xymon/xymon.alert.sh
Executable file
@ -0,0 +1,59 @@
|
|||||||
|
#!/bin/sh
|
||||||
|
# Purpose {{{
|
||||||
|
## If Xymon server says that a probe is in error on a remote host, try to call the appropriate script.
|
||||||
|
## For debugging messages, you can check xymon's logs (/var/log/xymon/alert.log)
|
||||||
|
|
||||||
|
## How-to use : {{{
|
||||||
|
### Define an alert in Xymon configuration file (/etc/xymon/alerts.cfg)
|
||||||
|
#HOST=HOST.DOMAIN.ORG
|
||||||
|
# SCRIPT /PATH/TO/SCRIPT/xymon.alert.sh 1234567890 FORMAT=SCRIPT DURATION<20
|
||||||
|
|
||||||
|
## }}}
|
||||||
|
# }}}
|
||||||
|
# Vars {{{
|
||||||
|
DEBUG=1
|
||||||
|
|
||||||
|
script_path="$(dirname -- ${0})"
|
||||||
|
script_apt="${script_path}/xymon.apt.alert.sh"
|
||||||
|
script_files="${script_path}/xymon.files.alert.sh"
|
||||||
|
script_libs="${script_path}/xymon.libs.alert.sh"
|
||||||
|
script_procs="${script_path}/xymon.procs.alert.sh"
|
||||||
|
# }}}
|
||||||
|
|
||||||
|
[ "${DEBUG}" -eq "0" ] && printf '\e[1;35m%-6s\e[m\n' "DEBUG : ${BBHOSTNAME} — ${BBSVCNAME} is in error."
|
||||||
|
|
||||||
|
# Match probe name with the script {{{
|
||||||
|
case "${BBSVCNAME}" in
|
||||||
|
'apt' )
|
||||||
|
script_to_run="${script_apt}"
|
||||||
|
;;
|
||||||
|
'files' )
|
||||||
|
script_to_run="${script_files}"
|
||||||
|
;;
|
||||||
|
'libs' )
|
||||||
|
script_to_run="${script_libs}"
|
||||||
|
;;
|
||||||
|
'procs' )
|
||||||
|
script_to_run="${script_procs}"
|
||||||
|
;;
|
||||||
|
# default
|
||||||
|
* )
|
||||||
|
script_to_run="NOT.MANAGED"
|
||||||
|
;;
|
||||||
|
esac
|
||||||
|
# }}}
|
||||||
|
|
||||||
|
# Call the next script if managed {{{
|
||||||
|
if [ "${script_to_run}" != "NOT.MANAGED" ]; then
|
||||||
|
# Export vars {{{
|
||||||
|
[ "${DEBUG}" -eq "0" ] && printf '\e[1;35m%-6s\e[m\n' "DEBUG : ${BBHOSTNAME} — Export vars for ${script_to_run}"
|
||||||
|
export BBALPHAMSG
|
||||||
|
export BBHOSTNAME
|
||||||
|
export BBSVCNAME
|
||||||
|
# }}}
|
||||||
|
[ "${DEBUG}" -eq "0" ] && printf '\e[1;35m%-6s\e[m\n' "DEBUG : ${BBHOSTNAME} — Run ${script_to_run} script."
|
||||||
|
"${script_to_run}"
|
||||||
|
fi
|
||||||
|
# }}}
|
||||||
|
|
||||||
|
exit 0
|
62
xymon/xymon.apt.alert.sh
Executable file
62
xymon/xymon.apt.alert.sh
Executable file
@ -0,0 +1,62 @@
|
|||||||
|
#!/bin/sh
|
||||||
|
# Purpose {{{
|
||||||
|
## If Xymon server says that the last apt update is too old, try to run a new one.
|
||||||
|
## 1. Create a ssh keyring for xymon user {{{
|
||||||
|
# sudo mkdir -p -- /var/lib/xymon/.ssh/
|
||||||
|
# sudo ssh-keygen -f /var/lib/xymon/.ssh/id_rsa -N '' -q
|
||||||
|
# sudo chown -R xymon:xymon /var/lib/xymon/.ssh/
|
||||||
|
## }}}
|
||||||
|
## 2. Remote user {{{
|
||||||
|
# Ensure to have the ${REMOTE_SSH_USER} available on remote hosts and allowed to connect with SSH.
|
||||||
|
# Restrict the SSH access to a single SSH key from the Xymon server IP (~${REMOTE_SSH_USER}/.ssh/authorized_keys) :
|
||||||
|
## from="IP.SRV.XYM.ON" ssh-rsa AAAAA…
|
||||||
|
# Allow sudo commands to restart services and run apt update (/etc/sudoers.d/xymon-ssh) :
|
||||||
|
## xymon-ssh ALL=(root:root) NOPASSWD: /usr/bin/apt update
|
||||||
|
## xymon-ssh ALL=(root:root) NOPASSWD: /bin/systemctl restart *
|
||||||
|
## }}}
|
||||||
|
|
||||||
|
# }}}
|
||||||
|
# Vars {{{
|
||||||
|
DEBUG=1
|
||||||
|
|
||||||
|
LOCAL_SSH_USER="xymon"
|
||||||
|
REMOTE_SSH_USER="xymon-ssh"
|
||||||
|
|
||||||
|
temp_dir=$(mktemp -d -t xymon-apt-alert-XXXXXX.tmp)
|
||||||
|
debug_stdout="${temp_dir}/debug.stdout"
|
||||||
|
debug_stderr="${temp_dir}/debug.stderr"
|
||||||
|
# }}}
|
||||||
|
|
||||||
|
# Create log files
|
||||||
|
touch "${debug_stdout}" "${debug_stderr}"
|
||||||
|
|
||||||
|
# Manage only apt probe {{{
|
||||||
|
if [ "${BBSVCNAME}" = "apt" ]; then
|
||||||
|
[ "${DEBUG}" -eq "0" ] && printf '\e[1;35m%-6s\e[m\n' "DEBUG : ${BBHOSTNAME} — ${BBSVCNAME} error" >> "${debug_stdout}"
|
||||||
|
else
|
||||||
|
[ "${DEBUG}" -eq "0" ] && printf '\e[1;35m%-6s\e[m\n' "DEBUG : ${BBHOSTNAME} — ${BBSVCNAME} probe is not managed." >> "${debug_stderr}"
|
||||||
|
[ "${DEBUG}" -eq "0" ] || rm -rf -- "${temp_dir}"
|
||||||
|
exit 0
|
||||||
|
fi
|
||||||
|
# }}}
|
||||||
|
|
||||||
|
# Check if repos need to be updated {{{
|
||||||
|
if echo "${BBALPHAMSG}" | grep -qE "\\&(red|yellow) Last apt update.*ago$" ; then
|
||||||
|
[ "${DEBUG}" -eq "0" ] && printf '\e[1;35m%-6s\e[m\n' "DEBUG : Test APT repos — APT repos need to be updated." >> "${debug_stdout}"
|
||||||
|
[ "${DEBUG}" -eq "0" ] && printf '\e[1;35m%-6s\e[m\n' "DEBUG : Test APT repos — ssh -n -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null ${REMOTE_SSH_USER}@${BBHOSTNAME} sudo apt update" >> "${debug_stdout}"
|
||||||
|
ssh -n -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null "${REMOTE_SSH_USER}"@"${BBHOSTNAME}" "sudo apt update" >> "${debug_stdout}" 2>> "${debug_stderr}"
|
||||||
|
# Also restart xymon-client service {{{
|
||||||
|
[ "${DEBUG}" -eq "0" ] && printf '\e[1;35m%-6s\e[m\n' "DEBUG : Test APT repos — xymon-client also need to be restarted." >> "${debug_stdout}"
|
||||||
|
[ "${DEBUG}" -eq "0" ] && printf '\e[1;35m%-6s\e[m\n' "DEBUG : Test APT repos — ssh -n -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null ${REMOTE_SSH_USER}@${BBHOSTNAME} sudo systemctl restart xymon-client.service" >> "${debug_stdout}"
|
||||||
|
ssh -n -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null "${REMOTE_SSH_USER}"@"${BBHOSTNAME}" "sudo systemctl restart xymon-client.service" >> "${debug_stdout}" 2>> "${debug_stderr}"
|
||||||
|
# }}}
|
||||||
|
fi
|
||||||
|
# }}}
|
||||||
|
|
||||||
|
# Remove empty error file
|
||||||
|
[ -s "${debug_stderr}" ] || rm -f "${debug_stderr}"
|
||||||
|
|
||||||
|
# Remove temp_dir if DEBUG is disable
|
||||||
|
[ "${DEBUG}" -eq "0" ] || rm -rf -- "${temp_dir}"
|
||||||
|
|
||||||
|
exit 0
|
102
xymon/xymon.files.alert.sh
Executable file
102
xymon/xymon.files.alert.sh
Executable file
@ -0,0 +1,102 @@
|
|||||||
|
#!/bin/sh
|
||||||
|
# Purpose {{{
|
||||||
|
## If Xymon server says that a file is in error on a remote host, try to restart the related service.
|
||||||
|
## 1. Create a ssh keyring for xymon user {{{
|
||||||
|
# sudo mkdir -p -- /var/lib/xymon/.ssh/
|
||||||
|
# sudo ssh-keygen -f /var/lib/xymon/.ssh/id_rsa -N '' -q
|
||||||
|
# sudo chown -R xymon:xymon /var/lib/xymon/.ssh/
|
||||||
|
## }}}
|
||||||
|
## 2. Remote user {{{
|
||||||
|
# Ensure to have the ${REMOTE_SSH_USER} available on remote hosts and allowed to connect with SSH.
|
||||||
|
# Restrict the SSH access to a single SSH key from the Xymon server IP (~${REMOTE_SSH_USER}/.ssh/authorized_keys) :
|
||||||
|
## from="IP.SRV.XYM.ON" ssh-rsa AAAAA…
|
||||||
|
# Allow sudo commands to restart services (/etc/sudoers.d/xymon-ssh) :
|
||||||
|
## xymon-ssh ALL=(root:root) NOPASSWD: /bin/systemctl restart *
|
||||||
|
## }}}
|
||||||
|
|
||||||
|
# }}}
|
||||||
|
# Vars {{{
|
||||||
|
DEBUG=1
|
||||||
|
|
||||||
|
LOCAL_SSH_USER="xymon"
|
||||||
|
REMOTE_SSH_USER="xymon-ssh"
|
||||||
|
|
||||||
|
temp_dir=$(mktemp -d -t xymon-files-alert-XXXXXX.tmp)
|
||||||
|
debug_stdout="${temp_dir}/debug.stdout"
|
||||||
|
debug_stderr="${temp_dir}/debug.stderr"
|
||||||
|
file_list="${temp_dir}/services.error.list"
|
||||||
|
# }}}
|
||||||
|
|
||||||
|
# Create log files
|
||||||
|
touch "${debug_stdout}" "${debug_stderr}"
|
||||||
|
|
||||||
|
# Manage only files probe {{{
|
||||||
|
if [ "${BBSVCNAME}" = "files" ]; then
|
||||||
|
[ "${DEBUG}" -eq "0" ] && printf '\e[1;35m%-6s\e[m\n' "DEBUG : ${BBHOSTNAME} — ${BBSVCNAME} error" >> "${debug_stdout}"
|
||||||
|
else
|
||||||
|
[ "${DEBUG}" -eq "0" ] && printf '\e[1;35m%-6s\e[m\n' "DEBUG : ${BBHOSTNAME} — ${BBSVCNAME} probe is not managed." >> "${debug_stderr}"
|
||||||
|
[ "${DEBUG}" -eq "0" ] || rm -rf -- "${temp_dir}"
|
||||||
|
exit 0
|
||||||
|
fi
|
||||||
|
# }}}
|
||||||
|
|
||||||
|
# Check if a file exceeds it's modification time {{{
|
||||||
|
if echo "${BBALPHAMSG}" | grep -qE "File was modified.*ago - should be.*" ; then
|
||||||
|
[ "${DEBUG}" -eq "0" ] && printf '\e[1;35m%-6s\e[m\n' "DEBUG : Test file — Some files exceeds their modification time." >> "${debug_stdout}"
|
||||||
|
# First restart Rsyslog service {{{
|
||||||
|
[ "${DEBUG}" -eq "0" ] && printf '\e[1;35m%-6s\e[m\n' "DEBUG : Test service — First restart rsyslog service." >> "${debug_stdout}"
|
||||||
|
[ "${DEBUG}" -eq "0" ] && printf '\e[1;35m%-6s\e[m\n' "DEBUG : Test service — ssh -n -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null ${REMOTE_SSH_USER}@${BBHOSTNAME} sudo systemctl restart rsyslog.service" >> "${debug_stdout}"
|
||||||
|
ssh -n -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null "${REMOTE_SSH_USER}"@"${BBHOSTNAME}" "sudo systemctl restart rsyslog.service" >> "${debug_stdout}" 2>> "${debug_stderr}"
|
||||||
|
# }}}
|
||||||
|
## Get the list of files path
|
||||||
|
## TODO : Set a pattern for both yellow and red colors
|
||||||
|
echo "${BBALPHAMSG}" | sed -n 's;^\&yellow <a.*>\(.*\)</a>;\1;p' > "${file_list}"
|
||||||
|
|
||||||
|
while IFS= read -r file_path; do
|
||||||
|
[ "${DEBUG}" -eq "0" ] && printf '\e[1;35m%-6s\e[m\n' "DEBUG : while file_path loop — ${file_path} exceeds it's modification time." >> "${debug_stdout}"
|
||||||
|
# Match files path and services name {{{
|
||||||
|
case "${file_path}" in
|
||||||
|
# Cron
|
||||||
|
'/var/log/cron.log' )
|
||||||
|
service_name="cron"
|
||||||
|
;;
|
||||||
|
# default
|
||||||
|
* )
|
||||||
|
service_name="NOT.MANAGED"
|
||||||
|
;;
|
||||||
|
esac
|
||||||
|
# }}}
|
||||||
|
|
||||||
|
# Restart service if needed {{{
|
||||||
|
if [ "${service_name}" != "NOT.MANAGED" ] && [ "${service_name}" != "${previous_service_name}" ]; then
|
||||||
|
[ "${DEBUG}" -eq "0" ] && printf '\e[1;35m%-6s\e[m\n' "DEBUG : while file_path loop — ${service_name} need to be restarted." >> "${debug_stdout}"
|
||||||
|
[ "${DEBUG}" -eq "0" ] && printf '\e[1;35m%-6s\e[m\n' "DEBUG : while file_path loop — ssh -n -o StrictHostKeyChecking=no ${REMOTE_SSH_USER}@${BBHOSTNAME} sudo systemctl restart ${service_name}.service" >> "${debug_stdout}"
|
||||||
|
ssh -n -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null "${REMOTE_SSH_USER}"@"${BBHOSTNAME}" "sudo systemctl restart ${service_name}.service" >> "${debug_stdout}" 2>> "${debug_stderr}"
|
||||||
|
previous_service_name="${service_name}"
|
||||||
|
else
|
||||||
|
if [ "${service_name}" = "NOT.MANAGED" ]; then
|
||||||
|
[ "${DEBUG}" -eq "0" ] && printf '\e[1;35m%-6s\e[m\n' "DEBUG : while file_path loop — service for ${file_path} is not managed." >> "${debug_stdout}"
|
||||||
|
else
|
||||||
|
[ "${DEBUG}" -eq "0" ] && printf '\e[1;35m%-6s\e[m\n' "DEBUG : while file_path loop — ${service_name} was already restarted." >> "${debug_stdout}"
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
# }}}
|
||||||
|
done < "${file_list}"
|
||||||
|
|
||||||
|
# Also restart xymon-client service {{{
|
||||||
|
[ "${DEBUG}" -eq "0" ] && printf '\e[1;35m%-6s\e[m\n' "DEBUG : Test service — xymon-client also need to be restarted." >> "${debug_stdout}"
|
||||||
|
[ "${DEBUG}" -eq "0" ] && printf '\e[1;35m%-6s\e[m\n' "DEBUG : Test service — ssh -n -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null ${REMOTE_SSH_USER}@${BBHOSTNAME} sudo systemctl restart xymon-client.service" >> "${debug_stdout}"
|
||||||
|
ssh -n -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null "${REMOTE_SSH_USER}"@"${BBHOSTNAME}" "sudo systemctl restart xymon-client.service" >> "${debug_stdout}" 2>> "${debug_stderr}"
|
||||||
|
# }}}
|
||||||
|
else
|
||||||
|
[ "${DEBUG}" -eq "0" ] && printf '\e[1;35m%-6s\e[m\n' "DEBUG : Test file — All files seems up to date." >> "${debug_stdout}"
|
||||||
|
fi
|
||||||
|
# }}}
|
||||||
|
|
||||||
|
# Remove empty error file
|
||||||
|
[ -s "${debug_stderr}" ] || rm -f "${debug_stderr}"
|
||||||
|
|
||||||
|
# Remove temp_dir if DEBUG is disable
|
||||||
|
[ "${DEBUG}" -eq "0" ] || rm -rf -- "${temp_dir}"
|
||||||
|
|
||||||
|
exit 0
|
159
xymon/xymon.libs.alert.sh
Executable file
159
xymon/xymon.libs.alert.sh
Executable file
@ -0,0 +1,159 @@
|
|||||||
|
#!/bin/sh
|
||||||
|
# Purpose {{{
|
||||||
|
## If Xymon server says that a service is in error on a remote host, try to restart this service.
|
||||||
|
## 1. Create a ssh keyring for xymon user {{{
|
||||||
|
# sudo mkdir -p -- /var/lib/xymon/.ssh/
|
||||||
|
# sudo ssh-keygen -f /var/lib/xymon/.ssh/id_rsa -N '' -q
|
||||||
|
# sudo chown -R xymon:xymon /var/lib/xymon/.ssh/
|
||||||
|
## }}}
|
||||||
|
## 2. Remote user {{{
|
||||||
|
# Ensure to have the ${REMOTE_SSH_USER} available on remote hosts and allowed to connect with SSH.
|
||||||
|
# Restrict the SSH access to a single SSH key from the Xymon server IP (~${REMOTE_SSH_USER}/.ssh/authorized_keys) :
|
||||||
|
## from="IP.SRV.XYM.ON" ssh-rsa AAAAA…
|
||||||
|
# Allow sudo commands to restart services (/etc/sudoers.d/xymon-ssh) :
|
||||||
|
## xymon-ssh ALL=(root:root) NOPASSWD: /bin/systemctl restart *
|
||||||
|
## }}}
|
||||||
|
|
||||||
|
# }}}
|
||||||
|
# Vars {{{
|
||||||
|
DEBUG=1
|
||||||
|
|
||||||
|
LOCAL_SSH_USER="xymon"
|
||||||
|
REMOTE_SSH_USER="xymon-ssh"
|
||||||
|
|
||||||
|
temp_dir=$(mktemp -d -t xymon-libs-alert-XXXXXX.tmp)
|
||||||
|
debug_stdout="${temp_dir}/debug.stdout"
|
||||||
|
debug_stderr="${temp_dir}/debug.stderr"
|
||||||
|
service_list="${temp_dir}/services.error.list"
|
||||||
|
# }}}
|
||||||
|
|
||||||
|
# Create log files
|
||||||
|
touch "${debug_stdout}" "${debug_stderr}"
|
||||||
|
|
||||||
|
# Manage only libs probe {{{
|
||||||
|
if [ "${BBSVCNAME}" = "libs" ]; then
|
||||||
|
[ "${DEBUG}" -eq "0" ] && printf '\e[1;35m%-6s\e[m\n' "DEBUG : ${BBHOSTNAME} — ${BBSVCNAME} error" >> "${debug_stdout}"
|
||||||
|
else
|
||||||
|
[ "${DEBUG}" -eq "0" ] && printf '\e[1;35m%-6s\e[m\n' "DEBUG : ${BBHOSTNAME} — ${BBSVCNAME} probe is not managed." >> "${debug_stderr}"
|
||||||
|
[ "${DEBUG}" -eq "0" ] || rm -rf -- "${temp_dir}"
|
||||||
|
exit 0
|
||||||
|
fi
|
||||||
|
# }}}
|
||||||
|
|
||||||
|
# Check if host need to reboot {{{
|
||||||
|
if echo "${BBALPHAMSG}" | grep -qE "\\&yellow Machine should be reboot.*" ; then
|
||||||
|
[ "${DEBUG}" -eq "0" ] && printf '\e[1;35m%-6s\e[m\n' "DEBUG : Test kernel — The host need to be rebooted." >> "${debug_stdout}"
|
||||||
|
fi
|
||||||
|
# }}}
|
||||||
|
|
||||||
|
# Check if a service need to restart
|
||||||
|
if echo "${BBALPHAMSG}" | grep -qE "\\&yellow The following processes.*" ; then
|
||||||
|
[ "${DEBUG}" -eq "0" ] && printf '\e[1;35m%-6s\e[m\n' "DEBUG : Test service — Some services need to be restarted." >> "${debug_stdout}"
|
||||||
|
# Get the list of binaries path
|
||||||
|
echo "${BBALPHAMSG}" | sed -n 's/^ \(\/.*\) (.*)/\1/p' > "${service_list}"
|
||||||
|
|
||||||
|
while IFS= read -r bin_path; do
|
||||||
|
[ "${DEBUG}" -eq "0" ] && printf '\e[1;35m%-6s\e[m\n' "DEBUG : while bin_path loop — ${bin_path} use old libs." >> "${debug_stdout}"
|
||||||
|
# Match binaries path and services name {{{
|
||||||
|
case "${bin_path}" in
|
||||||
|
# Apache2
|
||||||
|
'/usr/sbin/apache2' )
|
||||||
|
service_name="apache2"
|
||||||
|
;;
|
||||||
|
# Arpwatch
|
||||||
|
'/usr/sbin/arpwatch' )
|
||||||
|
service_name="arpwatch"
|
||||||
|
;;
|
||||||
|
# Dbus
|
||||||
|
'/usr/bin/dbus-daemon' )
|
||||||
|
service_name="dbus"
|
||||||
|
;;
|
||||||
|
# lvmetad − LVM metadata cache daemon
|
||||||
|
'/sbin/lvmetad' )
|
||||||
|
service_name="lvm2-lvmetad"
|
||||||
|
;;
|
||||||
|
# Netdata
|
||||||
|
'/usr/sbin/netdata' )
|
||||||
|
service_name="netdata"
|
||||||
|
;;
|
||||||
|
# blkmapd − pNFS block layout mapping daemon
|
||||||
|
'/usr/sbin/blkmapd' )
|
||||||
|
service_name="nfs-utils"
|
||||||
|
;;
|
||||||
|
# Mumble-server
|
||||||
|
'/usr/sbin/murmurd' )
|
||||||
|
service_name="mumble-server"
|
||||||
|
;;
|
||||||
|
# Nginx
|
||||||
|
'/usr/sbin/nginx' )
|
||||||
|
service_name="nginx"
|
||||||
|
;;
|
||||||
|
# Ntp
|
||||||
|
'/usr/sbin/ntpd' )
|
||||||
|
service_name="ntp"
|
||||||
|
;;
|
||||||
|
# Nslcd
|
||||||
|
'/usr/sbin/nslcd' )
|
||||||
|
service_name="nslcd"
|
||||||
|
;;
|
||||||
|
# PHP-FPM 7.0
|
||||||
|
'/usr/sbin/php-fpm7.0' )
|
||||||
|
service_name="php7.0-fpm"
|
||||||
|
;;
|
||||||
|
# PHP-FPM 7.3
|
||||||
|
'/usr/sbin/php-fpm7.3' )
|
||||||
|
service_name="php7.3-fpm"
|
||||||
|
;;
|
||||||
|
# Postfix
|
||||||
|
'/usr/lib/postfix/sbin/pickup' | '/usr/lib/postfix/qmgr' | '/usr/lib/postfix/sbin/tlsmgr' | '/usr/lib/postfix/sbin/qmgr' )
|
||||||
|
service_name="postfix"
|
||||||
|
;;
|
||||||
|
# Rdnssd
|
||||||
|
'/sbin/rdnssd' )
|
||||||
|
service_name="rdnssd"
|
||||||
|
;;
|
||||||
|
# Systemd-journald
|
||||||
|
'/lib/systemd/systemd-journald' )
|
||||||
|
service_name="systemd-journald"
|
||||||
|
;;
|
||||||
|
# Systemd-logind
|
||||||
|
'/lib/systemd/systemd-logind' )
|
||||||
|
service_name="systemd-logind"
|
||||||
|
;;
|
||||||
|
# default
|
||||||
|
* )
|
||||||
|
service_name="NOT.MANAGED"
|
||||||
|
;;
|
||||||
|
esac
|
||||||
|
# }}}
|
||||||
|
|
||||||
|
# Restart service if needed {{{
|
||||||
|
if [ "${service_name}" != "NOT.MANAGED" ] && [ "${service_name}" != "${previous_service_name}" ]; then
|
||||||
|
[ "${DEBUG}" -eq "0" ] && printf '\e[1;35m%-6s\e[m\n' "DEBUG : while bin_path loop — ${service_name} need to be restarted." >> "${debug_stdout}"
|
||||||
|
[ "${DEBUG}" -eq "0" ] && printf '\e[1;35m%-6s\e[m\n' "DEBUG : while bin_path loop — ssh -n -o StrictHostKeyChecking=no ${REMOTE_SSH_USER}@${BBHOSTNAME} sudo systemctl restart ${service_name}.service" >> "${debug_stdout}"
|
||||||
|
ssh -n -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null "${REMOTE_SSH_USER}"@"${BBHOSTNAME}" "sudo systemctl restart ${service_name}.service" >> "${debug_stdout}" 2>> "${debug_stderr}"
|
||||||
|
previous_service_name="${service_name}"
|
||||||
|
else
|
||||||
|
if [ "${service_name}" = "NOT.MANAGED" ]; then
|
||||||
|
[ "${DEBUG}" -eq "0" ] && printf '\e[1;35m%-6s\e[m\n' "DEBUG : while bin_path loop — service for ${bin_path} is not managed." >> "${debug_stdout}"
|
||||||
|
else
|
||||||
|
[ "${DEBUG}" -eq "0" ] && printf '\e[1;35m%-6s\e[m\n' "DEBUG : while bin_path loop — ${service_name} was already restarted." >> "${debug_stdout}"
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
# }}}
|
||||||
|
done < "${service_list}"
|
||||||
|
|
||||||
|
# Also restart xymon-client service {{{
|
||||||
|
[ "${DEBUG}" -eq "0" ] && printf '\e[1;35m%-6s\e[m\n' "DEBUG : Test service — xymon-client also need to be restarted." >> "${debug_stdout}"
|
||||||
|
[ "${DEBUG}" -eq "0" ] && printf '\e[1;35m%-6s\e[m\n' "DEBUG : Test service — ssh -n -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null ${REMOTE_SSH_USER}@${BBHOSTNAME} sudo systemctl restart xymon-client.service" >> "${debug_stdout}"
|
||||||
|
ssh -n -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null "${REMOTE_SSH_USER}"@"${BBHOSTNAME}" "sudo systemctl restart xymon-client.service" >> "${debug_stdout}" 2>> "${debug_stderr}"
|
||||||
|
# }}}
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Remove empty error file
|
||||||
|
[ -s "${debug_stderr}" ] || rm -f "${debug_stderr}"
|
||||||
|
|
||||||
|
# Remove temp_dir if DEBUG is disable
|
||||||
|
[ "${DEBUG}" -eq "0" ] || rm -rf -- "${temp_dir}"
|
||||||
|
|
||||||
|
exit 0
|
107
xymon/xymon.procs.alert.sh
Executable file
107
xymon/xymon.procs.alert.sh
Executable file
@ -0,0 +1,107 @@
|
|||||||
|
#!/bin/sh
|
||||||
|
# Purpose {{{
|
||||||
|
## If Xymon server says that a service is in error on a remote host, try to
|
||||||
|
## restart this service.
|
||||||
|
## 1. Create a ssh keyring for xymon user {{{
|
||||||
|
# sudo mkdir -p -- /var/lib/xymon/.ssh/
|
||||||
|
# sudo ssh-keygen -f /var/lib/xymon/.ssh/id_rsa -N '' -q
|
||||||
|
# sudo chown -R xymon:xymon /var/lib/xymon/.ssh/
|
||||||
|
## }}}
|
||||||
|
## 2. Remote user {{{
|
||||||
|
# Ensure to have the ${REMOTE_SSH_USER} available on remote hosts and allowed
|
||||||
|
# to connect with SSH.
|
||||||
|
# Restrict the SSH access to a single SSH key from the Xymon server IP
|
||||||
|
# (~${REMOTE_SSH_USER}/.ssh/authorized_keys) :
|
||||||
|
## from="IP.SRV.XYM.ON" ssh-rsa AAAAA…
|
||||||
|
# Allow sudo commands to restart services (/etc/sudoers.d/xymon-ssh) :
|
||||||
|
## xymon-ssh ALL=(root:root) NOPASSWD: /bin/systemctl restart *
|
||||||
|
## }}}
|
||||||
|
## 3. Xymon Configuration {{{
|
||||||
|
# PROC monitoring need to display the real service name in it's description :
|
||||||
|
## PROC %^/sbin/rpcbind MIN=1 MAX=1 COLOR=red "TEXT=rpcbind"
|
||||||
|
# You can add more information about this proc if you an underscore "_" :
|
||||||
|
## PROC %^/usr/sbin/rpc.idmapd MIN=1 MAX=1 COLOR=red "TEXT=NFS-server_rpc.idmapd"
|
||||||
|
## This way, the script will only take the text before the underscore "_" as the
|
||||||
|
## service name to be restarted.
|
||||||
|
# Don't add whitespaces in the description of a process.
|
||||||
|
|
||||||
|
## }}}
|
||||||
|
# }}}
|
||||||
|
|
||||||
|
# Vars {{{
|
||||||
|
DEBUG=1
|
||||||
|
|
||||||
|
REMOTE_SSH_USER="xymon-ssh"
|
||||||
|
|
||||||
|
temp_dir=$(mktemp -d -t xymon-procs-alert-XXXXXX.tmp)
|
||||||
|
debug_stdout="${temp_dir}/debug.stdout"
|
||||||
|
debug_stderr="${temp_dir}/debug.stderr"
|
||||||
|
service_list="${temp_dir}/services.error.list"
|
||||||
|
# }}}
|
||||||
|
|
||||||
|
# Create log files
|
||||||
|
touch "${debug_stdout}" "${debug_stderr}"
|
||||||
|
|
||||||
|
# Manage only procs probe {{{
|
||||||
|
if [ "${BBSVCNAME}" = "procs" ]; then
|
||||||
|
[ "${DEBUG}" -eq "0" ] && printf '\e[1;35m%-6s\e[m\n' "DEBUG : ${BBHOSTNAME} — ${BBSVCNAME} error" >> "${debug_stdout}"
|
||||||
|
else
|
||||||
|
[ "${DEBUG}" -eq "0" ] && printf '\e[1;35m%-6s\e[m\n' "DEBUG : ${BBHOSTNAME} — ${BBSVCNAME} probe is not managed." >> "${debug_stderr}"
|
||||||
|
[ "${DEBUG}" -eq "0" ] || rm -rf -- "${temp_dir}"
|
||||||
|
exit 0
|
||||||
|
fi
|
||||||
|
# }}}
|
||||||
|
|
||||||
|
# Get the list of processes with an error
|
||||||
|
echo "${BBALPHAMSG}" | grep -E "&(red|yellow)" | cut -d" " -f2- | tr '[:upper:]' '[:lower:]' > "${service_list}"
|
||||||
|
|
||||||
|
# If any error on a process
|
||||||
|
if [ -s "${service_list}" ]; then
|
||||||
|
[ "${DEBUG}" -eq "0" ] && printf '\e[1;35m%-6s\e[m\n' "DEBUG : process list — Some processes seems to be in error." >> "${debug_stdout}"
|
||||||
|
while IFS= read -r line; do
|
||||||
|
## Pattern "req. between" {{{
|
||||||
|
if echo "${line}" | grep -q -E -- ".* \\(found .*, req. between .* and .*\\)" ; then
|
||||||
|
[ "${DEBUG}" -eq "0" ] && printf '\e[1;35m%-6s\e[m\n' "DEBUG : while process loop — Pattern \"req. between\"." >> "${debug_stdout}"
|
||||||
|
service_name="$(echo "${line}" | cut -d" " -f1 | sed 's/_.*//')"
|
||||||
|
process_found="$(echo "${line}" | cut -d" " -f3 | tr -d ',')"
|
||||||
|
process_min="$(echo "${line}" | cut -d" " -f6)"
|
||||||
|
process_max="$(echo "${line}" | cut -d" " -f8 | tr -d ')')"
|
||||||
|
fi
|
||||||
|
## }}}
|
||||||
|
## Pattern "req. .* or more" {{{
|
||||||
|
if echo "${line}" | grep -q -E -- ".* \\(found .*, req. .* or more\\)" ; then
|
||||||
|
[ "${DEBUG}" -eq "0" ] && printf '\e[1;35m%-6s\e[m\n' "DEBUG : while process loop — Pattern \"req. .* or more\"." >> "${debug_stdout}"
|
||||||
|
service_name="$(echo "${line}" | cut -d" " -f1 | sed 's/_.*//')"
|
||||||
|
process_found="$(echo "${line}" | cut -d" " -f3 | tr -d ',')"
|
||||||
|
process_min="$(echo "${line}" | cut -d" " -f5)"
|
||||||
|
process_max="nolimit"
|
||||||
|
fi
|
||||||
|
## }}}
|
||||||
|
[ "${DEBUG}" -eq "0" ] && printf '\e[1;35m%-6s\e[m\n' "DEBUG : while process loop — Found ${process_found} process(es) for ${service_name} service and require between ${process_min} and ${process_max}." >> "${debug_stdout}"
|
||||||
|
# Restart service if needed {{{
|
||||||
|
if [ "${process_found}" ] && [ "${process_min}" ] && [ "${process_found}" -lt "${process_min}" ]; then
|
||||||
|
[ "${DEBUG}" -eq "0" ] && printf '\e[1;35m%-6s\e[m\n' "DEBUG : while process loop — ${service_name} need to be restarted." >> "${debug_stdout}"
|
||||||
|
[ "${DEBUG}" -eq "0" ] && printf '\e[1;35m%-6s\e[m\n' "DEBUG : while process loop — ssh -n -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null ${REMOTE_SSH_USER}@${BBHOSTNAME} sudo systemctl restart ${service_name}.service" >> "${debug_stdout}"
|
||||||
|
ssh -n -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null "${REMOTE_SSH_USER}"@"${BBHOSTNAME}" "sudo systemctl restart ${service_name}.service" >> "${debug_stdout}" 2>> "${debug_stderr}"
|
||||||
|
else
|
||||||
|
[ "${DEBUG}" -eq "0" ] && printf '\e[1;35m%-6s\e[m\n' "DEBUG : while process loop — ${service_name} service is not managed." >> "${debug_stdout}"
|
||||||
|
fi
|
||||||
|
# }}}
|
||||||
|
done < "${service_list}"
|
||||||
|
|
||||||
|
# Also restart xymon-client service {{{
|
||||||
|
[ "${DEBUG}" -eq "0" ] && printf '\e[1;35m%-6s\e[m\n' "DEBUG : process list — xymon-client also need to be restarted." >> "${debug_stdout}"
|
||||||
|
[ "${DEBUG}" -eq "0" ] && printf '\e[1;35m%-6s\e[m\n' "DEBUG : process list — ssh -n -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null ${REMOTE_SSH_USER}@${BBHOSTNAME} sudo systemctl restart xymon-client.service" >> "${debug_stdout}"
|
||||||
|
ssh -n -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null "${REMOTE_SSH_USER}"@"${BBHOSTNAME}" "sudo systemctl restart xymon-client.service" >> "${debug_stdout}" 2>> "${debug_stderr}"
|
||||||
|
# }}}
|
||||||
|
else
|
||||||
|
[ "${DEBUG}" -eq "0" ] && printf '\e[1;35m%-6s\e[m\n' "DEBUG : process list — No error on any process." >> "${debug_stdout}"
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Remove empty error file
|
||||||
|
[ -s "${debug_stderr}" ] || rm -f "${debug_stderr}"
|
||||||
|
|
||||||
|
# Remove temp_dir if DEBUG is disable
|
||||||
|
[ "${DEBUG}" -eq "0" ] || rm -rf -- "${temp_dir}"
|
||||||
|
|
||||||
|
exit 0
|
Loading…
Reference in New Issue
Block a user