The smallest watchdog on earth. Tiny, monitoring-plugins compatible monitoring with a status page.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 
 

166 lines
4.1 KiB

package chihuahua
import (
"bytes"
"context"
"errors"
"github.com/carlescere/scheduler"
"github.com/rs/zerolog/log"
"net/url"
"os/exec"
"strings"
"time"
)
func (chk *Check) FullID() []string {
return append(chk.Parent.FullID(), url.PathEscape(chk.ID))
}
func (chk *Check) FullName() []string {
return append(chk.Parent.FullName(), chk.Name)
}
func (sog *ServerOrGroup) FullID() []string {
if sog.Parent != nil {
return append(sog.Parent.FullID(), url.PathEscape(sog.ID))
}
if sog.ID != "" {
return []string{sog.ID}
}
return []string{}
}
func (sog *ServerOrGroup) FullName() []string {
if sog.Parent != nil {
return append(sog.Parent.FullName(), sog.Name)
}
if sog.ID != "" {
return []string{sog.Name}
}
return []string{}
}
var ConnectionTimeout = 30 * time.Second
func (chk *Check) Run(cfg *Config) {
Working.RLock()
// TODO: read config
Working.RUnlock()
previous := chk.Result
current := CheckResult{}
var shell []string
var params = strings.Split(chk.Parent.ConnectionParams, " ")
for i := 0; i < len(params); i++ {
params[i] = strings.TrimSpace(params[i])
if params[i] == "''" || params[i] == `""` {
params[i] = ""
} else if params[i] == "" {
params = append(params[:i], params[i+1:]...)
i--
}
}
switch chk.Parent.ConnectionType {
case "ssh":
shell = append([]string{"ssh", "-o", "StrictHostKeyChecking accept-new", "-o", "UpdateHostKeys yes"}, params...)
case "push":
// TODO: create a push cache
return
default:
if len(params) > 0 {
shell = params
} else {
shell = []string{"sh", "-c"}
}
}
log.Debug().Str("id", strings.Join(chk.FullID(), "/")).Strs("command", append(shell, chk.Command)).Msg("Executing check command")
ctx, cancel := context.WithTimeout(context.Background(), ConnectionTimeout)
defer cancel()
c := exec.CommandContext(ctx, shell[0], append(shell[1:], "cd ~; [ -e /etc/chihuahua.rc ] && eval \"$(/etc/chihuahua.rc)\"; [ -e ~/.chihuahuarc ] && eval \"$(cat ~/.chihuahuarc)\"; "+chk.Command)...)
var errbuf bytes.Buffer
c.Stderr = &errbuf
output, err := c.Output()
stderr, _ := errbuf.ReadString(0)
current.Error = strings.TrimSpace(string(stderr))
if err == nil {
current.Status = StatusOk
} else if err.Error() == "exit status 1" {
current.Status = StatusWarning
} else if strings.HasPrefix(err.Error(), "exit status ") {
current.Status = StatusCritical
} else {
current.Status = StatusUnknown
current.Error = strings.TrimSpace(err.Error() + "\n" + current.Error)
}
current.Details = strings.TrimSpace(strings.SplitN(strings.SplitN(string(output), "\n", 2)[0], "|", 2)[0])
// TODO: parse performance data
current.LastUpdate = time.Now()
log.Debug().Str("id", strings.Join(chk.FullID(), "/")).Str("status", current.Status.String()).Msg("check completed")
chk.Result = current
chk.Notify(cfg, previous)
SendUpdate(cfg)
}
// if a check takes longer than the interval, the next occurence is skipped.
func (chk *Check) Schedule(cfg *Config) {
// TODO: make sure that if the interval didn't change the check won't be run again
// maybe just use lastchecktime + interval?!
chk.JobLock.Lock()
var err error
if chk.Job != nil {
chk.Job.Quit <- true
}
chk.Job = nil
if chk.Disable {
chk.JobLock.Unlock()
return
}
// TODO: random offset to distribute checks over time?!
chk.Job, err = scheduler.Every(int(chk.Interval.Seconds())).Seconds().Run(func() {
chk.Run(cfg)
})
chk.JobLock.Unlock()
if err != nil {
// TODO: when does this happen?!
log.Fatal().Err(err).Msg("Scheduling error, exiting!")
}
}
func RunOnce(cfg *Config) []error {
errs := []error{}
cfg.Walk(func(server *ServerOrGroup) {
if server.Checks != nil {
for _, chk := range server.Checks {
if !chk.Disable {
chk.Run(cfg)
if chk.Result.Status != StatusOk {
errs = append(errs, errors.New("check failed: \""+strings.Join(chk.FullID(), "/")+"\" returned "+chk.Result.Status.String()))
}
}
}
}
})
if len(errs) == 0 {
return nil
}
return errs
}
func Schedule(cfg *Config) {
cfg.Walk(func(server *ServerOrGroup) {
if server.Checks != nil {
for _, chk := range server.Checks {
chk.Schedule(cfg)
}
}
})
}