The smallest watchdog on earth. Tiny, monitoring-plugins compatible monitoring with a status page.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 
 

166 lines
5.8 KiB

package chihuahua
import (
"github.com/carlescere/scheduler"
"sync"
"time"
)
type Notifier interface {
Notify(*Config, Check, CheckResult)
}
type NotifierWithContext interface{
Notifier
Export() interface{}
Import(interface{})
}
// Notifiers is a map of registered notification providers
var Notifiers = map[string]Notifier{}
var Working = sync.RWMutex{}
type Config struct {
Servers []*ServerOrGroup
Notifiers map[string]Notifier
RootURL string
}
func (cfg *Config) walk(lst []*ServerOrGroup, fn func(*ServerOrGroup)) {
for _, x := range lst {
fn(x)
if x.Children != nil {
cfg.walk(x.Children, fn)
}
}
}
func (cfg *Config) Walk(fn func(*ServerOrGroup)) {
cfg.walk(cfg.Servers, fn)
}
type ServerOrGroup struct {
ID string
Name string
ConnectionType string `json:",omitempty"`
ConnectionParams string `json:"-"`
Checks []*Check `json:",omitempty"`
Children []*ServerOrGroup `json:",omitempty"`
Parent *ServerOrGroup `json:"-"` // would lead to a loop if exposed to JSON!
}
// Check describes a command that shall be run in a specific shell, and (if the check has already been run) the result of that command interpreted according to the monitoring-plugins documentation (https://www.monitoring-plugins.org/doc/guidelines.html)
type Check struct {
ID string
Name string
// Command is the check command line to run inside the shell (e.g. `/usr/lib/monitoring-plugins/check_ping -H 8.8.8.8 -w 100,25% -c 200,50%`)
Command string `json:"-"`
Disable bool
Notifiers []string `json:"-"`
Verify uint `json:"-"`
Interval time.Duration `json:"-"`
Result CheckResult
Parent *ServerOrGroup `json:"-"` // would lead to a loop if exposed to JSON!
Job *scheduler.Job `json:"-"`
JobLock sync.Mutex `json:"-"`
}
type CheckResult struct {
// Status is the result of the check after it has been run
Status CheckStatus
// Error contains the STDERR output of the check command, and should normally be empty - if it is non-empty, it is very probable that the check couldn't be initiated correctly
Error string
// Details contains the STDOUT output of the check command
Details string
// Performance contains the performance data parts of the check, mapped to their label
Performance map[string]CheckPerformance
// LastUpdate is the last execution date of the check
LastUpdate time.Time
}
// CheckRange describes a range for warning and critical values for a performance data part of a completed check
type CheckRange struct {
// Start is the lower bound of the value (will send an alert if the actual value is smaller), or -Inf if it does not apply
Start float64
// End is the upper bound of the value (will send an alert if the actual value is bigger), or Inf if it does not apply
End float64
// Inside changes the behaviour (if set to true) to send an alert if the actual value is BIGGER than Start AND SMALLER than End
Inside bool
}
// CheckPerformance describes a performance data part of a completed check
type CheckPerformance struct {
// Unit is the unit of measurement (UOM) or the part
Unit CheckUnit
// Value is the current value, or NaN if the actual value couldn't be determined (UOM "U" or parsing issues (which additionally cause a warning))
Value float64
// Min is the smallest possible value, or NaN if it does not apply or in the case of parsing issues (which additionally cause a warning)
Min float64
// Max is the biggest possible value, or NaN if it does not apply or in the case of parsing issues (which additionally cause a warning)
Max float64
// Warning is the range definition that will result in a warning alert, or nil if it does not apply or in the case of parsing issues (which additionally cause a warning)
Warning CheckRange
// Critical is the range definition that will result in a critical alert, or nil if it does not apply or in the case of parsing issues (which additionally cause a warning)
Critical CheckRange
}
// CheckStatus describes the result of a check (StatusOk, StatusWarning, StatusCritical, StatusUnknown)
type CheckStatus int
func (s CheckStatus) String() string {
switch s {
case 0:
return "OK"
case 1:
return "WARNING"
case 2:
return "CRITICAL"
default:
return "UNKNOWN"
}
}
// CheckUnit describes the unit of measurement (UOM) for a check value
type CheckUnit string
const (
// StatusOk is the result of a check that returned with the exit code 0
StatusOk CheckStatus = 0
// StatusWarning is the result of a check that returned with the exit code 1
StatusWarning CheckStatus = 1
// StatusCritical is the result of a check that returned with the exit code 2
StatusCritical CheckStatus = 2
// StatusUnknown is the result of a check that returned with a different exit code or threw an error during execution
StatusUnknown CheckStatus = 3
// UnitNumber is the unit used for a number of things (e.g. users, processes, load averages)
UnitNumber CheckUnit = ""
// UnitSeconds is the unit used for an elapsed time in seconds
UnitSeconds CheckUnit = "s"
// UnitMilliseconds is the unit used for an elapsed time in milliseconds
UnitMilliseconds CheckUnit = "ms"
// UnitMicroseconds is the unit used for an elapsed time in microseconds
UnitMicroseconds CheckUnit = "us"
// UnitPercentage is the unit used for a percentage, normally between 0 and 100
UnitPercentage CheckUnit = "%"
// UnitBytes is the unit used for data sizes in bytes
UnitBytes CheckUnit = "B"
// UnitKilobytes is the unit used for data sizes in kilobytes
UnitKilobytes CheckUnit = "KB"
// UnitMegabytes is the unit used for data sizes in megabytes
UnitMegabytes CheckUnit = "MB"
// UnitGigabytes is the unit used for data sizes in gigabytes
UnitGigabytes CheckUnit = "GB"
// UnitTerabytes is the unit used for data sizes in terabytes
UnitTerabytes CheckUnit = "TB"
// UnitCounter is the unit used for a continuous counter (such as bytes transmitted on an interface)
UnitCounter CheckUnit = "c"
)