The smallest watchdog on earth. Tiny, monitoring-plugins compatible monitoring with a status page. https://cloud.docker.com/repository/docker/momar/chihuahua/general
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 
 

167 lines
4.1 KiB

  1. package chihuahua
  2. import (
  3. "bytes"
  4. "context"
  5. "errors"
  6. "github.com/carlescere/scheduler"
  7. "github.com/rs/zerolog/log"
  8. "net/url"
  9. "os/exec"
  10. "strings"
  11. "time"
  12. )
  13. func (chk *Check) FullID() []string {
  14. return append(chk.Parent.FullID(), url.PathEscape(chk.ID))
  15. }
  16. func (chk *Check) FullName() []string {
  17. return append(chk.Parent.FullName(), chk.Name)
  18. }
  19. func (sog *ServerOrGroup) FullID() []string {
  20. if sog.Parent != nil {
  21. return append(sog.Parent.FullID(), url.PathEscape(sog.ID))
  22. }
  23. if sog.ID != "" {
  24. return []string{sog.ID}
  25. }
  26. return []string{}
  27. }
  28. func (sog *ServerOrGroup) FullName() []string {
  29. if sog.Parent != nil {
  30. return append(sog.Parent.FullName(), sog.Name)
  31. }
  32. if sog.ID != "" {
  33. return []string{sog.Name}
  34. }
  35. return []string{}
  36. }
  37. var ConnectionTimeout = 30 * time.Second
  38. func (chk *Check) Run(cfg *Config) {
  39. Working.RLock()
  40. // TODO: read config
  41. Working.RUnlock()
  42. previous := chk.Result
  43. current := CheckResult{}
  44. var shell []string
  45. var params = strings.Split(chk.Parent.ConnectionParams, " ")
  46. for i := 0; i < len(params); i++ {
  47. params[i] = strings.TrimSpace(params[i])
  48. if params[i] == "''" || params[i] == `""` {
  49. params[i] = ""
  50. } else if params[i] == "" {
  51. params = append(params[:i], params[i+1:]...)
  52. i--
  53. }
  54. }
  55. switch chk.Parent.ConnectionType {
  56. case "ssh":
  57. shell = append([]string{"ssh", "-o", "StrictHostKeyChecking accept-new", "-o", "UpdateHostKeys yes"}, params...)
  58. case "push":
  59. // TODO: create a push cache
  60. return
  61. default:
  62. if len(params) > 0 {
  63. shell = params
  64. } else {
  65. shell = []string{"sh", "-c"}
  66. }
  67. }
  68. log.Debug().Str("id", strings.Join(chk.FullID(), "/")).Strs("command", append(shell, chk.Command)).Msg("Executing check command")
  69. ctx, cancel := context.WithTimeout(context.Background(), ConnectionTimeout)
  70. defer cancel()
  71. c := exec.CommandContext(ctx, shell[0], append(shell[1:], "cd ~; [ -e /etc/chihuahua.rc ] && eval \"$(/etc/chihuahua.rc)\"; [ -e ~/.chihuahuarc ] && eval \"$(cat ~/.chihuahuarc)\"; "+chk.Command)...)
  72. var errbuf bytes.Buffer
  73. c.Stderr = &errbuf
  74. output, err := c.Output()
  75. stderr, _ := errbuf.ReadString(0)
  76. current.Error = strings.TrimSpace(string(stderr))
  77. if err == nil {
  78. current.Status = StatusOk
  79. } else if err.Error() == "exit status 1" {
  80. current.Status = StatusWarning
  81. } else if strings.HasPrefix(err.Error(), "exit status ") {
  82. current.Status = StatusCritical
  83. } else {
  84. current.Status = StatusUnknown
  85. current.Error = strings.TrimSpace(err.Error() + "\n" + current.Error)
  86. }
  87. current.Details = strings.TrimSpace(strings.SplitN(strings.SplitN(string(output), "\n", 2)[0], "|", 2)[0])
  88. // TODO: parse performance data
  89. current.LastUpdate = time.Now()
  90. log.Debug().Str("id", strings.Join(chk.FullID(), "/")).Str("status", current.Status.String()).Msg("check completed")
  91. chk.Result = current
  92. chk.Notify(cfg, previous)
  93. SendUpdate(cfg)
  94. }
  95. // if a check takes longer than the interval, the next occurence is skipped.
  96. func (chk *Check) Schedule(cfg *Config) {
  97. // TODO: make sure that if the interval didn't change the check won't be run again
  98. // maybe just use lastchecktime + interval?!
  99. chk.JobLock.Lock()
  100. var err error
  101. if chk.Job != nil {
  102. chk.Job.Quit <- true
  103. }
  104. chk.Job = nil
  105. if chk.Disable {
  106. chk.JobLock.Unlock()
  107. return
  108. }
  109. // TODO: random offset to distribute checks over time?!
  110. chk.Job, err = scheduler.Every(int(chk.Interval.Seconds())).Seconds().Run(func() {
  111. chk.Run(cfg)
  112. })
  113. chk.JobLock.Unlock()
  114. if err != nil {
  115. // TODO: when does this happen?!
  116. log.Fatal().Err(err).Msg("Scheduling error, exiting!")
  117. }
  118. }
  119. func RunOnce(cfg *Config) []error {
  120. errs := []error{}
  121. cfg.Walk(func(server *ServerOrGroup) {
  122. if server.Checks != nil {
  123. for _, chk := range server.Checks {
  124. if !chk.Disable {
  125. chk.Run(cfg)
  126. if chk.Result.Status != StatusOk {
  127. errs = append(errs, errors.New("check failed: \""+strings.Join(chk.FullID(), "/")+"\" returned "+chk.Result.Status.String()))
  128. }
  129. }
  130. }
  131. }
  132. })
  133. if len(errs) == 0 {
  134. return nil
  135. }
  136. return errs
  137. }
  138. func Schedule(cfg *Config) {
  139. cfg.Walk(func(server *ServerOrGroup) {
  140. if server.Checks != nil {
  141. for _, chk := range server.Checks {
  142. chk.Schedule(cfg)
  143. }
  144. }
  145. })
  146. }