HCL & more features from v2 #35

Manually merged
zottelchin merged 32 commits from feature-hcl into master 2 years ago
  1. 1
      .dockerignore
  2. 7
      .gitignore
  3. 19
      Dockerfile
  4. 93
      README.md
  5. 184
      api.go
  6. 182
      check.go
  7. 17
      chihuahua.example.yml
  8. 67
      cmd/log.go
  9. 87
      cmd/main.go
  10. 70
      config/model.go
  11. 427
      config/parse.go
  12. 84
      config/setup.go
  13. 55
      config/watch.go
  14. 249
      docs/Configuration.md
  15. 14
      go.mod
  16. 76
      go.sum
  17. 40
      notifiers/batch.go
  18. 40
      notifiers/console.go
  19. 257
      notifiers/email.go
  20. 57
      notifiers/gotify.go
  21. 136
      notify.go
  22. 45
      print.go
  23. 64
      resources/chihuahua.hcl
  24. 116
      resources/notification-email.html
  25. 36
      resources/notification-email.subject
  26. 25
      resources/notification-email.txt
  27. 55
      resources/setup.sh
  28. 39
      setup.go
  29. 98
      types.go
  30. 33
      types/config.go
  31. 10
      types/server.go
  32. 658
      web/data.go
  33. 18
      web/index.html
  34. 10
      web/script.js

1
.dockerignore

@ -2,3 +2,4 @@ chihuahua
chihuahua.yml
Dockerfile
cache.json
go.sum

7
.gitignore vendored

@ -1,4 +1,7 @@
/chihuahua
/chihuahua.yml
/web/data.go
/chihuahua.hcl
/cache.json
/go.sum
/pkged.go
/data/
/.idea/

19
Dockerfile

@ -1,13 +1,16 @@
FROM golang:1-alpine AS build
RUN apk --no-cache add git
RUN go get github.com/go-bindata/go-bindata/...
COPY ./go.mod /build/go.mod
WORKDIR /build
RUN go mod download
COPY . /go/src/codeberg.org/momar/chihuahua
RUN go get codeberg.org/momar/chihuahua/cmd
WORKDIR /go/src/codeberg.org/momar/chihuahua
# Using v0.12.8 because of https://github.com/markbates/pkger/issues/56
RUN go get github.com/markbates/pkger@v0.12.8 && go build -o /bin/pkger github.com/markbates/pkger/cmd/pkger
COPY . /build
RUN go generate
RUN go build -ldflags '-s -w' -o /tmp/chihuahua codeberg.org/momar/chihuahua/cmd
RUN go build -ldflags '-s -w' -o chihuahua ./cmd
FROM alpine
@ -16,10 +19,10 @@ RUN apk add --no-cache openssh ca-certificates monitoring-plugins
RUN mkdir -p /data/.ssh &&\
echo -e "Host *\n StrictHostKeyChecking accept-new\n UpdateHostKeys yes\n IdentityFile /data/.ssh/id_rsa\n IdentityFile /data/.ssh/id_dsa\n IdentityFile /data/.ssh/id_ecdsa\n IdentityFile /data/.ssh/id_ed25519\n UserKnownHostsFile /data/.ssh/known_hosts" >> /etc/ssh/ssh_config
COPY --from=build /tmp/chihuahua /bin/chihuahua
COPY --from=build /build/chihuahua /bin/chihuahua
EXPOSE 80
ENV ADDRESS :80
ENV PORT 80
ENV PATH /usr/lib/monitoring-plugins:/usr/local/bin:/usr/bin:/bin
ENV HOME /data
WORKDIR /data
CMD ["/bin/chihuahua", "-c", "/data/chihuahua.yml"]
ENTRYPOINT ["/bin/chihuahua", "-c", "/data/chihuahua.hcl"]

93
README.md

@ -1,21 +1,54 @@
# chihuahua
**The smallest watchdog on earth.** Tiny, [monitoring-plugins](https://www.monitoring-plugins.org/) compatible monitoring with a status page, built with [Go](https://golang.org), [Air](https://github.com/aofei/air), [Vue.js](https://vuejs.org/) and [Bulma](https://bulma.io/).
**The smallest watchdog on earth.** Tiny, [monitoring-plugins](https://www.monitoring-plugins.org/) compatible monitoring with a status page, built with [Go](https://golang.org).
- Run nagios checks on a remote server
- Run nagios/monitoring-plugins checks on a remote server
- Alert on state change via [Gotify](https://gotify.net) or email
- Show a nice little status page
![Screenshot](https://i.vgy.me/WKL8Dl.png)
## Getting Started
```bash
mkdir -p ./data/.ssh
ssh-keygen -b 2048 -f ./data/.ssh/id_rsa -P "" -C "Chihuahua Monitoring"
nano ./data/chihuahua.yml
docker run -d -p 8080:80 -v "$PWD/data:/data" momar/chihuahua
nano data/chihuahua.hcl
```
Chihuahua now runs at http://localhost:8080.
### Using a systemd service (without Docker)
You can find a full example configuration file at [chihuahua.hcl](https://codeberg.org/momar/chihuahua/src/branch/master/resources/chihuahua.hcl), and should also read the [full configuration manual](https://codeberg.org/momar/chihuahua/src/branch/master/docs/Configuration.md), but to get started, you can use a simple configuration like this:
```hcl
check "cpu" {
# This check will be run on every server, unless the
# server specifies an overriding check called "cpu".
name = "CPU Load"
command = "check_load -r -w 3,2,1.25 -c 4,3,2"
}
server "server-local" {
connection = "local"
}
server "server-01" {
connection = "ssh chihuahua@example.org"
check "ram" {
command = "check_memory -w 8 -c 3" # parameters in percent
}
}
notifier "email-myself" {
# emails are delayed by 5 minutes by default to accumulate
# multiple notifications into a single notification email.
type = "smtp"
server = "user:password@smtp.example.org"
from = "Chihuahua <chihuahua@example.org>"
to = ["myself@example.org"]
}
```
<details>
<summary>Use a systemd service to install the server without Docker (unsupported)</summary>
```bash
wget https://codeberg.org/momar/chihuahua/releases/download/v1.4/chihuahua-x64.gz -O- | gunzip > /tmp/chihuahua
sudo install -m755 /tmp/chihuahua /usr/local/bin/chihuahua
@ -29,44 +62,26 @@ sudo wget https://codeberg.org/momar/chihuahua/raw/branch/master/chihuahua.servi
sudo systemctl enable chihuahua.service
sudo systemctl start chihuahua.service
```
</details>
## Example chihuahua.yml
```yaml
notifications:
- provider: gotify # Send notifications via https://gotify.net/
server: https://gotify.example.org
token: QwErTyUiOpAsDfG
- provider: email # Send notifications via E-Mail
host: smtp.example.org
port: 25
username: example
password: QwErTyUiOpAsDfG
from: noreply@example.org
to: postmaster@example.org
servers:
example:
ssh: monitoring@example.org -p 2222 # SSH connection parameters for the server. Leave away to run checks locally. You can add a ~/.chihuahuarc on the server that's being run before each check.
checks:
disk-space: check_disk -w 15% -c 5% # any monitoring-plugins compatible check command works here - you can basically use every command that returns 0 on success, 1 on warnings and 2 on critical errors
sudo-check: check_sudo check_disk -w 15% -c 5% # run checks as root
ram-check: check_linux_memory # run checks in /var/chihuahua - download like that: wget https://raw.githubusercontent.com/hugme/Nag_checks/master/check_linux_memory -O /var/chihuahua/ && chmod +x /var/chihuahua/check_linux_memory
# Examples for basic checks:
http: check_http -H example.org -w 2 -c 15 -t 20 --sni --ssl -r "This domain is established to be used for illustrative examples in documents."
ssh: check_ssh -t 15 example.org
process: "check_procs -c 1: -C nginx" # check if nginx is running (has at least 1 process)
errorInLogfile: check_sudo check_log -F /var/log/auth.log -O ./logs/auth.log -q "session opened for user root" # check for root sessions
backupInLast24h: check_file_age -c 86400 /mnt/backup/meta.json # check if there was a backup in the last 24 hours (86400 seconds)
### Set up a server for connections (Debian/Ubuntu/Alpine/...)
```bash
sudo sh <(curl http://status.example.org:8080/setup.sh)
```
## Example debian server setup
You can now use `connection = "ssh chihuahua@example.org"` to connect to your server with a limited user.
To completely **remove** the Chihuahua setup from your server, use the following commands:
```bash
sudo sh <(curl http://status.example.org:8080/setup.sh)
sudo userdel -r chihuahua
sudo apt-get remove --auto-remove monitoring-plugins # or "apk del monitoring-plugins" on Alpine
sudo rm /usr/local/bin/check_sudo
sudo sed -i '/^chihuahua /d' /etc/sudoers
```
## API
TODO: this should be documented more thoroughly - maybe provide an API Blueprint?
```
GET /setup.sh
GET /checks
@ -78,8 +93,8 @@ GET /checks/:server/:check
Requires [Go](https://golang.org)
```bash
git clone https://codeberg.org/momar/chihuahua.git && cd chihuahua
cp chihuahua.example.yml chihuahua.yml && nano chihuahua.yml
go generate && DEBUG=1 go run ./cmd -c chihuahua.yml
cp resources/chihuahua.hcl .
go run ./cmd --debug
```
## Roadmap

184
api.go

@ -1,89 +1,169 @@
package chihuahua
//go:generate go-bindata -pkg web -fs -prefix web -o web/data.go -ignore data\\.go web web/icons web/lib
//go:generate pkger
import (
"bytes"
"github.com/rs/zerolog/log"
"io/ioutil"
"net/http"
"net/url"
"os"
"os/exec"
"path/filepath"
"strings"
"codeberg.org/momar/chihuahua/types"
"codeberg.org/momar/chihuahua/web"
"codeberg.org/momar/logg"
"codeberg.org/momar/ternary"
"github.com/aofei/air"
"github.com/markbates/pkger"
)
func Api(servers map[string]*types.Server) {
var keyList string
func GenerateKeys() {
// Get SSH keys for the setup script
// getCheck returns the check results
getCheck := func(req *air.Request, res *air.Response) error {
result := map[string]map[string]*types.Check{}
for serverName, server := range servers {
result[serverName] = map[string]*types.Check{}
for checkName, check := range server.Checks {
result[serverName][checkName] = check
// chown if someone already created a .ssh directory; mostly relevant for Docker
filepath.Walk(filepath.Join(os.Getenv("HOME"), ".ssh/"), func(path string, info os.FileInfo, err error) error {
os.Chown(path, os.Getuid(), os.Getgid())
return nil
})
keyFiles, _ := filepath.Glob(filepath.Join(os.Getenv("HOME"), ".ssh/id_*"))
keyList = ""
if keyFiles != nil {
for _, file := range keyFiles {
if !strings.HasSuffix(file, ".pub") {
continue // it's a private key, and we don't want those to be reachable unauthenticated via HTTP :)
}
key, _ := ioutil.ReadFile(file)
keyList += strings.TrimSpace(string(key)) + "\n"
}
}
if keyList == "" {
if info, err := os.Stat(filepath.Join(os.Getenv("HOME"), ".ssh")); err != nil || !info.IsDir() {
err := os.MkdirAll(filepath.Join(os.Getenv("HOME"), ".ssh"), 755)
if err != nil {
log.Warn().Str("caller", "api").Err(err).Msg("couldn't generate an SSH key")
}
}
cmd := exec.Command("ssh-keygen", "-t", "ed25519", "-N", "", "-C", "Chihuahua Monitoring", "-f", filepath.Join(os.Getenv("HOME"), ".ssh/id_ed25519"))
cmd.Stderr = os.Stderr
cmd.Stdout = os.Stdout
log.Info().Str("caller", "api").Msgf("no SSH key found, generating one at %s", cmd.Args[len(cmd.Args)-1])
err := cmd.Run()
if err != nil {
log.Warn().Str("caller", "api").Err(err).Msg("couldn't generate an SSH key")
}
}
}
func Api(cfg *Config) {
// TODO: switch to Gin, for SSE and more
s := req.Param("server").Value()
c := req.Param("check").Value()
if c != nil || s != nil {
sres, ok := result[s.String()]
if ok && c != nil {
cres, ok := sres[c.String()]
if ok {
return res.WriteJSON(cres)
// getCheck returns the check results
getCheck := func(req *air.Request, res *air.Response) error {
param := req.Param("*")
if param == nil {
// All servers
return res.WriteJSON(cfg.Servers)
}
path := param.Value()
if path == nil {
// All servers
return res.WriteJSON(cfg.Servers)
}
id := strings.Split(path.String(), "/")
parent := &ServerOrGroup{Children: cfg.Servers}
for i := 0; i < len(id); i++ {
idPart, err := url.QueryUnescape(id[i])
if err != nil {
idPart = id[i]
}
found := false
if parent.Children != nil {
for _, child := range parent.Children {
if child.ID == idPart {
parent = child
found = true
break
}
}
} else if ok {
return res.WriteJSON(sres)
}
res.Status = 404
return res.WriteJSON(map[string]bool{})
if !found {
// allow single checks
if parent.Checks != nil {
for _, check := range parent.Checks {
if check.ID == idPart {
if i+1 >= len(id) {
return res.WriteJSON(check)
}
}
}
}
res.Status = 404
return res.WriteJSON(map[string]struct{}{})
}
}
return res.WriteJSON(result)
return res.WriteJSON(parent)
}
// putMessage adds a message to a check
putMessage := func(req *air.Request, res *air.Response) error {
// TODO
return nil
}
//putMessage := func(req *air.Request, res *air.Response) error {
// // TODO
// return nil
//}
// deleteMessage deletes the message from a check
deleteMessage := func(req *air.Request, res *air.Response) error {
// TODO:
return nil
}
//deleteMessage := func(req *air.Request, res *air.Response) error {
// // TODO:
// return nil
//}
// Get the setup script
setupFile, err := pkger.Open("codeberg.org/momar/chihuahua:/resources/setup.sh")
if err != nil {
log.Error().Str("caller", "api").Err(err).Msg("couldn't open setup.sh from web resources")
os.Exit(1)
}
setupScript, err := ioutil.ReadAll(setupFile)
if err != nil {
log.Error().Str("caller", "api").Err(err).Msg("couldn't read setup.sh from web resources")
os.Exit(1)
}
setupScript = bytes.ReplaceAll(setupScript, []byte("[CHIHUAHUA_PUBLIC_KEYS]"), []byte(keyList))
getSetupScript := func(req *air.Request, res *air.Response) error {
files, _ := filepath.Glob(filepath.Join(os.Getenv("HOME"), ".ssh/id_*.pub"))
keys := ""
for _, file := range files {
key, _ := ioutil.ReadFile(file)
keys += strings.TrimSpace(string(key)) + "\n"
}
res.WriteString(SetupScript(string(keys)))
return nil
return res.Write(bytes.NewReader(setupScript))
}
app := air.New()
app.GET("/setup.sh", getSetupScript)
app.GET("/checks", getCheck)
app.GET("/checks/:server", getCheck)
app.GET("/checks/:server/:check", getCheck)
app.PUT("/checks/:server/:check", putMessage)
app.DELETE("/checks/:server/:check", deleteMessage)
app.GET("/checks/*", getCheck)
//app.PUT("/checks/*", putMessage)
//app.DELETE("/checks/:server/:check", deleteMessage)
app.NotFoundHandler = air.WrapHTTPHandler(http.FileServer(web.AssetFile()))
app.NotFoundHandler = air.WrapHTTPHandler(http.FileServer(pkger.Dir("codeberg.org/momar/chihuahua:/web")))
app.ErrorHandler = func(err error, request *air.Request, response *air.Response) {
log.Error().Err(err).Msg("internal server error")
}
app.Address = ternary.Default(os.Getenv("ADDRESS"), ":8080").(string)
logg.Tag("server").Info("Listening on %s", app.Address)
err := app.Serve()
app.Address = ":"
if host := os.Getenv("HOST"); host != "" {
app.Address = host + app.Address
}
if port := os.Getenv("PORT"); port != "" {
app.Address += port
} else {
app.Address += "8080"
}
log.Info().Str("caller", "api").Str("address", app.Address).Msg("starting API web server")
err = app.Serve()
if err != nil {
logg.Error("%s", err)
os.Exit(2)
log.Error().Str("caller", "api").Err(err).Msg("web server threw an error")
os.Exit(1)
}
}
func SendUpdate(cfg *Config) {
// TODO: send current check state to browsers via SSE
}

182
check.go

@ -3,108 +3,146 @@ package chihuahua
import (
"bytes"
"context"
"errors"
"github.com/carlescere/scheduler"
"github.com/rs/zerolog/log"
"net/url"
"os/exec"
"strings"
"time"
"codeberg.org/momar/chihuahua/types"
"codeberg.org/momar/logg"
)
// MaxConnections defines the maximum number of simultaneous connections against a single server
const MaxConnections = 5
func (chk *Check) FullID() string {
return chk.Parent.FullID() + "/" + url.PathEscape(chk.ID)
}
const ConnectionTimeout = 30 * time.Second
func (sog *ServerOrGroup) FullID() string {
if sog.Parent != nil {
return sog.Parent.FullID() + "/" + url.PathEscape(sog.ID)
}
return sog.ID
}
// RunCheck runs a check and populates it with the interpreted result
func RunCheck(checkName string, check *types.Check, shell []string) {
logg.Tag("check", checkName).Debug("Executing command: %#v", append(shell, check.Command))
var ConnectionTimeout = 30 * time.Second
func (chk *Check) Run(cfg *Config) {
Working.RLock()
// TODO: read config
Working.RUnlock()
previous := chk.Result
current := CheckResult{}
var shell []string
var params = strings.Split(chk.Parent.ConnectionParams, " ")
for i := 0; i < len(params); i++ {
params[i] = strings.TrimSpace(params[i])
if params[i] == "''" || params[i] == `""` {
params[i] = ""
} else if params[i] == "" {
params = append(params[:i], params[i+1:]...)
i--
}
}
switch chk.Parent.ConnectionType {
case "ssh":
shell = append([]string{"ssh"}, params...)
case "push":
// TODO: create a push cache
return
default:
if len(params) > 0 {
shell = params
} else {
shell = []string{"sh", "-c"}
}
}
log.Debug().Str("id", chk.FullID()).Strs("command", append(shell, chk.Command)).Msg("Executing check command",)
ctx, cancel := context.WithTimeout(context.Background(), ConnectionTimeout)
defer cancel()
c := exec.CommandContext(ctx, shell[0], append(shell[1:], "[ -e ~/.chihuahuarc ] && eval \"$(cat ~/.chihuahuarc)\"; "+check.Command)...)
c := exec.CommandContext(ctx, shell[0], append(shell[1:], "[ -e ~/.chihuahuarc ] && eval \"$(cat ~/.chihuahuarc)\"; "+chk.Command)...)
var errbuf bytes.Buffer
c.Stderr = &errbuf
output, err := c.Output()
stderr, _ := errbuf.ReadString(0)
check.Error = strings.TrimSpace(string(stderr))
current.Error = strings.TrimSpace(string(stderr))
if err == nil {
check.Status = types.StatusOk
current.Status = StatusOk
} else if err.Error() == "exit status 1" {
check.Status = types.StatusWarning
} else if err.Error() == "exit status 2" {
check.Status = types.StatusCritical
current.Status = StatusWarning
} else if strings.HasPrefix(err.Error(), "exit status ") {
current.Status = StatusCritical
} else {
check.Status = types.StatusUnknown
if err.Error() != "exit status 3" {
check.Error = strings.TrimSpace(err.Error() + "\n" + check.Error)
}
current.Status = StatusUnknown
current.Error = strings.TrimSpace(err.Error() + "\n" + current.Error)
}
check.Details = strings.SplitN(strings.SplitN(string(output), "\n", 2)[0], "|", 2)[0]
current.Details = strings.TrimSpace(strings.SplitN(strings.SplitN(string(output), "\n", 2)[0], "|", 2)[0])
// TODO: parse performance data
check.LastUpdate = time.Now()
logg.Tag("check", checkName).Debug("Check completed, result %d", check.Status)
}
// RunServerChecks runs all checks on a server asynchronously (up to MaxConnections checks at the same time) and populates them with their interpreted results
func RunServerChecks(serverName string, server *types.Server) {
completed := 0
channel := make(chan bool)
connections := 0
current.LastUpdate = time.Now()
log.Debug().Str("id", chk.FullID()).Str("status", current.Status.String()).Msg("check completed")
if len(server.Shell) <= 0 {
server.Shell = []string{"sh", "-c"}
}
processCheck := func(checkName string, check *types.Check) {
// limit to 5 simultaneous connections
for connections > MaxConnections {
time.Sleep(250 * time.Millisecond)
}
connections++
logg.Tag("check", serverName).Debug("Processing check: %s", checkName)
oldStatus := check.Status
RunCheck(serverName+"/"+checkName, check, server.Shell)
// repeat the check if the check failed to mitigate hiccups
if oldStatus != check.Status && check.Status > 0 {
RunCheck(serverName+"/"+checkName, check, server.Shell)
}
chk.Result = current
chk.Notify(cfg, previous)
SendUpdate(cfg)
}
connections--
completed++
channel <- true
// if a check takes longer than the interval, the next occurence is skipped.
func (chk *Check) Schedule(cfg *Config) {
// TODO: make sure that if the interval didn't change the check won't be run again
// maybe just use lastchecktime + interval?!
chk.JobLock.Lock()
var err error
if chk.Job != nil {
chk.Job.Quit <- true
}
for checkName, check := range server.Checks {
go processCheck(checkName, check)
chk.Job = nil
if chk.Disable {
chk.JobLock.Unlock()
return
}
// Wait for checks to complete
for completed < len(server.Checks) && <-channel {
logg.Tag("check", serverName).Debug("%d checks left", len(server.Checks)-completed)
// TODO: random offset to distribute checks over time?!
chk.Job, err = scheduler.Every(int(chk.Interval.Seconds())).Seconds().Run(func() {
chk.Run(cfg)
})
chk.JobLock.Unlock()
if err != nil {
// TODO: when does this happen?!
log.Fatal().Err(err).Msg("Scheduling error, exiting!")
}
}
// RunAllChecks runs all checks on all servers asynchronously and populates them with their interpreted results
func RunAllChecks(servers map[string]*types.Server) {
completed := 0
channel := make(chan bool)
func RunOnce(cfg *Config) []error {
errs := []error{}
cfg.Walk(func(server *ServerOrGroup) {
if server.Checks != nil {
for _, chk := range server.Checks {
if !chk.Disable {
chk.Run(cfg)
if chk.Result.Status != StatusOk {
errs = append(errs, errors.New("check failed: \""+chk.FullID()+"\" returned "+chk.Result.Status.String()))
}
}
}
}
})
processServer := func(serverName string, server *types.Server) {
logg.Tag("check").Debug("Processing server: %s", serverName)
RunServerChecks(serverName, server)
completed++
channel <- true
}
for serverName, server := range servers {
go processServer(serverName, server)
if len(errs) == 0 {
return nil
}
return errs
}
// Wait for checks to complete
for completed < len(servers) && <-channel {
logg.Tag("check").Debug("%d servers left", len(servers)-completed)
}
func Schedule(cfg *Config) {
cfg.Walk(func(server *ServerOrGroup) {
if server.Checks != nil {
for _, chk := range server.Checks {
chk.Schedule(cfg)
}
}
})
}

17
chihuahua.example.yml

@ -1,17 +0,0 @@
notifications:
- provider: gotify # Send notifications via https://gotify.net/
server: https://gotify.example.org
token: QwErTyUiOpAsDfG
- provider: email # Send notifications via E-Mail
host: smtp.example.org
port: 25
username: example
password: QwErTyUiOpAsDfG
from: noreply@example.org
to: postmaster@example.org
servers:
example:
ssh: monitoring@example.org -p 2222 # SSH connection parameters for the server. Leave away to run checks locally.
checks:
disk-space: check_disk -w 15% -c 5% # monitoring-plugins compatible check command

67
cmd/log.go

@ -0,0 +1,67 @@
package main
import (
"fmt"
"github.com/rs/zerolog"
"github.com/rs/zerolog/log"
"os"
"path"
"runtime"
"strings"
"time"
"strconv"
)
var IgnoreCallers = []string{
"main.GetCaller",
"main.init.0.func1",
}
var IgnorePackages = []string{
"github.com/rs/zerolog",
"github.com/rs/zerolog/log",
}
func sliceContains(s []string, v string) bool {
for _, x := range s { if x == v { return true } }
return false
}
func GetCaller() string {
// TODO: automatically add timer for JSON logging
programCounters := make([]uintptr, 15)
n := runtime.Callers(0, programCounters)
if n > 0 {
frames := runtime.CallersFrames(programCounters[:n])
for more := true; more; {
var frameCandidate runtime.Frame
frameCandidate, more = frames.Next()
pkg := strings.TrimPrefix(path.Dir(frameCandidate.Function) + "/" + strings.SplitN(path.Base(frameCandidate.Function), ".", 2)[0], "./")
if frameCandidate.Function != "runtime.Callers" && (strings.Contains(pkg, "/") || pkg == "main") && !sliceContains(IgnorePackages, pkg) && !sliceContains(IgnoreCallers, frameCandidate.Function) {
return frameCandidate.Function + " [" + path.Base(frameCandidate.File) + ":" + strconv.Itoa(frameCandidate.Line) + "]"
}
}
}
return ""
}
func init() {
if os.Getenv("LOG") != "" && os.Getenv("LOG") != "console" {
return
}
cw := zerolog.NewConsoleWriter()
cw.FormatCaller = func(i interface{}) string {
if i != nil {
return fmt.Sprintf("\x1b[%sm%v\x1b[0m", "1;36", i)
}
c := GetCaller()
if c != "" {
return fmt.Sprintf("\x1b[%sm%v\x1b[0m", "1;36", strings.SplitN(path.Base(c), ".", 2)[0])
} else {
return ""
}
}
cw.FormatTimestamp = func(i interface{}) string {
return fmt.Sprintf("\x1b[%sm%v\x1b[0m", "90", time.Now().UTC().Format("2006-01-02 15:04:05 "))
}
log.Logger = zerolog.New(cw)
}

87
cmd/main.go

@ -1,79 +1,56 @@
package main
import (
"io/ioutil"
"codeberg.org/momar/chihuahua"
"codeberg.org/momar/chihuahua/config"
"github.com/rs/zerolog"
"github.com/rs/zerolog/log"
"os"
"strconv"
"time"
"codeberg.org/momar/chihuahua"
"codeberg.org/momar/chihuahua/types"
"codeberg.org/momar/logg"
"codeberg.org/momar/ternary"
"github.com/teris-io/cli"
"gopkg.in/yaml.v2"
// Register notifiers
_ "codeberg.org/momar/chihuahua/notifiers"
)
func main() {
app := cli.New("the smallest watchdog on earth")
app.WithOption(cli.NewOption("config", "path to the configuration file").WithChar('c').WithType(cli.TypeString)).
WithOption(cli.NewOption("interval", "check interval in seconds").WithChar('i').WithType(cli.TypeInt)).
app.WithOption(cli.NewOption("config", "set the path to the configuration file").WithChar('c').WithType(cli.TypeString)).
WithOption(cli.NewOption("once", "only run checks once and print the result - doesn't start the webserver").WithType(cli.TypeBool)).
WithOption(cli.NewOption("silent", "disables notifications; use with care!").WithType(cli.TypeBool)).
WithOption(cli.NewOption("silent", "disables notifications - use with care!").WithType(cli.TypeBool)).
WithOption(cli.NewOption("debug", "enable debugging output").WithType(cli.TypeBool)).
WithAction(func(args []string, options map[string]string) int {
// Parse the config
cfg := getConfig(options["config"])
servers := cfg.GetServers()
// Parse the interval
i, _ := strconv.Atoi(options["interval"])
id := time.Duration(ternary.Default(i, 300).(int)) * time.Second
first := true
if options["once"] == "" {
// Start the webserver
go chihuahua.Api(servers)
if options["debug"] == "" {
log.Logger = log.Logger.Level(zerolog.InfoLevel)
}
for options["once"] == "" || first {
// Run the actual checks
chihuahua.RunAllChecks(servers)
if options["silent"] == "" {
// Send notifications
chihuahua.Notify(cfg.Notifications, servers)
}
// Parse the config
cfg := config.Setup(options["config"], options["once"] == "")
if options["once"] == "" {
logg.Debug("Waiting for next check at %s...", time.Now().Add(id).UTC().Format("2006-01-02 15:04:05"))
time.Sleep(id)
}
first = false
// Remove notifiers if --silent is given
if options["silent"] != "" {
log.Warn().Msg("notifications are disabled as --silent is given")
cfg.Notifiers = map[string]chihuahua.Notifier{}
}
// Only run once if --once is given
if options["once"] != "" {
// Print the output if --once is set
chihuahua.Print(servers)
errs := chihuahua.RunOnce(cfg)
if errs != nil {
os.Exit(2)
} else {
os.Exit(0)
}
}
// Start scheduler & API/webserver otherwise
chihuahua.GenerateKeys()
chihuahua.Schedule(cfg)
chihuahua.Api(cfg)
return 0
})
os.Exit(app.Run(os.Args, os.Stdout))
}
func getConfig(cfgPath string) *types.Config {
if cfgPath == "" {
cfgPath = "/etc/chihuahua.yml"
}
cfgFile, err := ioutil.ReadFile(cfgPath)
if err != nil {
logg.Tag("config", "file").Error("%s", err)
os.Exit(2)
}
cfg := &types.Config{}
err = yaml.Unmarshal(cfgFile, cfg)
if err != nil {
logg.Tag("config", "yaml").Error("%s", err)
os.Exit(2)
}
return cfg
}

70
config/model.go

@ -0,0 +1,70 @@
package config
import (
"github.com/hashicorp/hcl/v2"
"time"
)
type Config struct {
Check []Check `hcl:"check,block"`
Group []ServerOrGroup `hcl:"group,block"`
Server []ServerOrGroup `hcl:"server,block"`
Notifier []Notifier `hcl:"notifier,block"`
RootURL *string `hcl:"root_url"`
Disable *bool `hcl:"disable"`
Notify *[]string `hcl:"notify"`
Verify *uint `hcl:"verify"`
Interval *string `hcl:"interval"`
interval *time.Duration
}
type Context struct {
Parent []string
Disable bool
Notify []string
Verify uint
Interval time.Duration
Checks []Check
}
type Check struct {
ID string `hcl:"id,label"`
Name *string `hcl:"name"`
Command string `hcl:"command"`
Disable *bool `hcl:"disable"`
Notify *[]string `hcl:"notify"`
Verify *uint `hcl:"verify"`
Interval *string `hcl:"interval"`
interval *time.Duration
}
// Server and Group exist just to check if there are any invalid fields
type Empty struct {}
type ServerOrGroup struct {
ID string `hcl:"id,label"`
Name *string `hcl:"name"`
Connection *string `hcl:"connection"`
connectionType string
Disable *bool `hcl:"disable"`
Notify *[]string `hcl:"notify"`
Verify *uint `hcl:"verify"`
Interval *string `hcl:"interval"`
interval *time.Duration
Check []Check `hcl:"check,block"`
Group []ServerOrGroup `hcl:"group,block"`
Server []ServerOrGroup `hcl:"server,block"`
Info hcl.Body `hcl:",remain"`
}
type Notifier struct{
ID string `hcl:"id,label"`
Type string `hcl:"type"`
Remain hcl.Body `hcl:",remain"`
}

427
config/parse.go

@ -0,0 +1,427 @@
package config
import (
"codeberg.org/momar/chihuahua"
"errors"
"github.com/hashicorp/hcl/v2/gohcl"
"github.com/hashicorp/hcl/v2/hclsimple"
"github.com/rs/zerolog/log"
"reflect"
"strings"
"time"
)
func New() *chihuahua.Config {
return &chihuahua.Config{
Servers: []*chihuahua.ServerOrGroup{},
Notifiers: map[string]chihuahua.Notifier{},
}
}
// TODO: lots of variable names just don't make any sense at all!
// TODO: better error messages (most are good already but the error position is missing for some)
// TODO: check if references vs. values are correctly handled
func Parse(path string, cfg *chihuahua.Config) error {
if path == "" {
path = "/etc/chihuahua.hcl"
}
src := &Config{}
err := hclsimple.DecodeFile(path, nil, src)
if err != nil {
return err
}
log.Trace().Msg("configuration has been parsed by HCL")
// parse notifiers, but don't apply them yet (so applying doesn't throw errors)
notifierStructs := map[string]chihuahua.Notifier{}
for _, notifierConfig := range src.Notifier {
if providerType, ok := chihuahua.Notifiers[notifierConfig.Type]; ok {
var provider = reflect.New(reflect.TypeOf(providerType).Elem()).Interface()
diag := gohcl.DecodeBody(notifierConfig.Remain, nil, provider)
if diag.HasErrors() {
return errors.New(diag.Error())
}
notifierStructs[notifierConfig.ID] = provider.(chihuahua.Notifier)
} else {
mir := notifierConfig.Remain.MissingItemRange()
notifiers := []string{}
for id, _ := range chihuahua.Notifiers {
notifiers = append(notifiers, id)
}
return errors.New(mir.String() + ": Invalid value; The argument \"type\" must be a valid notifier type (available types: " + strings.Join(notifiers, ", ") + ").")
}
}
// parse intervals recursively
var parseIntervals func(serversOrGroups []ServerOrGroup) error
parseIntervals = func(serversOrGroups []ServerOrGroup) error {
for i, serverOrGroup := range serversOrGroups {
if serverOrGroup.Interval != nil {
interval, err := time.ParseDuration(*serverOrGroup.Interval)
if err != nil {
return err // TODO: print position
}
serversOrGroups[i].interval = &interval
}
if serverOrGroup.Check != nil {
for j, chk := range serverOrGroup.Check {
if chk.Interval != nil {
interval, err := time.ParseDuration(*chk.Interval)
if err != nil {
return err // TODO: print position
}
serversOrGroups[i].Check[j].interval = &interval
}
if chk.Notify != nil {
for _, notifierID := range *chk.Notify {
if _, ok := notifierStructs[notifierID]; !ok {
return errors.New("notifier not found: " + notifierID) // TODO: print position
}
}
}
}
}
if serverOrGroup.Notify != nil {
for _, notifierID := range *serverOrGroup.Notify {
if _, ok := notifierStructs[notifierID]; !ok {
return errors.New("notifier not found: " + notifierID) // TODO: print position
}
}
}
if serverOrGroup.Server != nil {
if err := parseIntervals(serversOrGroups[i].Server); err != nil {
return err
}
}
if serverOrGroup.Group != nil {
if err := parseIntervals(serversOrGroups[i].Group); err != nil {
return err
}
}
}
return nil
}
if src.Interval != nil {
interval, err := time.ParseDuration(*src.Interval)
if err != nil {
return err // TODO: print position
}
src.interval = &interval
}
if err := parseIntervals(src.Server); err != nil {
return err
}
if err := parseIntervals(src.Group); err != nil {
return err
}
ctx := Context{
Parent: []string{},
Disable: false,
Notify: nil,
Verify: 0,
Interval: 5 * time.Minute,
Checks: src.Check,
}
// apply values from src to ctx
if src.Disable != nil {
ctx.Disable = *src.Disable
}
if src.Notify != nil {
ctx.Notify = *src.Notify
for _, notifierID := range ctx.Notify {
if _, ok := notifierStructs[notifierID]; !ok {
return errors.New("notifier not found: " + notifierID) // TODO: print position
}
}
}
if src.Verify != nil {
ctx.Verify = *src.Verify
}
if src.interval != nil {
ctx.Interval = *src.interval
}
// merge server & group array to keep the order from the configuration file
src.Group, err = Merge(src.Server, src.Group)
if err != nil {
return err
}
src.Server = nil
// FROM THIS POINT, NO ERROR SHALL BE THROWN!
// apply the server/group config to the target configuration
tmp := &chihuahua.ServerOrGroup{Children:cfg.Servers}
Apply(src.Group, ctx, tmp)
cfg.Servers = tmp.Children // if the reference has changed, e.g. by append(), override it
notifierIDs := map[string]struct{}{}
// apply notifiers (which have been parsed before so now there will be no error)
for id, provider := range notifierStructs {
providerOld, okOld := chihuahua.Notifiers[id]
if okOld {
ctxOld, okOld := providerOld.(chihuahua.NotifierWithContext)
ctxNew, okNew := provider.(chihuahua.NotifierWithContext)
if cfg.Notifiers[id] == nil || reflect.TypeOf(cfg.Notifiers[id]) != reflect.TypeOf(provider) || !okOld || !okNew {
// modify existing one without context
cfg.Notifiers[id] = provider.(chihuahua.Notifier)
} else {
// modify existing notifier with context
ctxNew.Import(ctxOld.Export())
cfg.Notifiers[id] = ctxNew
}
} else {
// add new notifier
cfg.Notifiers[id] = provider.(chihuahua.Notifier)
}
notifierIDs[id] = struct{}{}
}
// remove stray notifiers
for id := range cfg.Notifiers {
if _, ok := notifierIDs[id]; !ok {
delete(cfg.Notifiers, id)
}
}
// apply root URL
if src.RootURL != nil {
cfg.RootURL = *src.RootURL
} else {
cfg.RootURL = ""
}
log.Trace().Interface("config", cfg).Msg("configuration has been updated")
return nil
}
func (in Check) Convert(out *chihuahua.Check, ctx Context) {
if in.Disable != nil {
ctx.Disable = *in.Disable
}
if in.Notify != nil {
ctx.Notify = *in.Notify
}
if in.Verify != nil {
ctx.Verify = *in.Verify
}
if in.interval != nil {
ctx.Interval = *in.interval
}
out.Disable = ctx.Disable
if ctx.Notify != nil {
out.Notifiers = ctx.Notify[:]
} else {
out.Notifiers = nil
}
out.Verify = ctx.Verify
out.Interval = ctx.Interval
out.Name = in.ID
if in.Name != nil {
out.Name = *in.Name
}
out.ID = in.ID
out.Command = in.Command
if out.Result.LastUpdate.IsZero() {
out.Result.Status = chihuahua.StatusUnknown
out.Result.LastUpdate = time.Now()
out.Result.Details = "Waiting for first check..."
}
}
func Apply(in []ServerOrGroup, ctx Context, out *chihuahua.ServerOrGroup) {
childIndices := map[string]int{}
for i, child := range out.Children {
childIndices[child.ID] = i
}
childIDs := map[string]struct{}{}
for _, src := range in {
childIDs[src.ID] = struct{}{}
var res *chihuahua.ServerOrGroup
if i, ok := childIndices[src.ID]; ok {
res = out.Children[i]
} else {
res = &chihuahua.ServerOrGroup{
ID: src.ID,
Name: src.ID,
ConnectionType: "",
ConnectionParams: "",
Checks: []*chihuahua.Check{},
Children: []*chihuahua.ServerOrGroup{},
Parent: out,
}
out.Children = append(out.Children, res)
}
res.Parent = out
if src.Name != nil {
res.Name = *src.Name
}
checks := append(ctx.Checks, src.Check...)
ctx2 := Context{
Disable: ctx.Disable,
Notify: ctx.Notify,
Verify: ctx.Verify,
Interval: ctx.Interval,
Checks: checks,
}
if src.Disable != nil {
ctx2.Disable = *src.Disable
}
if src.Notify != nil {
ctx2.Notify = *src.Notify
}
if src.Verify != nil {
ctx2.Verify = *src.Verify
}
if src.interval != nil {
ctx2.Interval = *src.interval
}
if src.connectionType != "" {
res.Children = nil // we're in a server now!
res.ConnectionType = src.connectionType
res.ConnectionParams = *src.Connection
checkIndices := map[string]int{}
for i, chk := range res.Checks {
checkIndices[chk.ID] = i
}
checkIDs := map[string]struct{}{}
for _, chk := range checks {
checkIDs[chk.ID] = struct{}{}
if i, ok := checkIndices[chk.ID]; ok { // TODO: doesn't work for whatever reason?!
chk.Convert(res.Checks[i], ctx2)
res.Checks[i].Parent = res
} else {
chkOut := &chihuahua.Check{}
chk.Convert(chkOut, ctx2)
chkOut.Parent = res
res.Checks = append(res.Checks, chkOut)
checkIndices[chk.ID] = len(res.Checks)
}
}
for i := 0; i < len(res.Checks); i++ {
if _, ok := checkIDs[res.Checks[i].ID]; !ok {
if res.Checks[i].Job != nil {
res.Checks[i].Job.Quit <- true
}
res.Checks = append(res.Checks[:i], res.Checks[i+1:]...)
i--
}
}
} else {
res.Checks = nil // we're in a group now
Apply(src.Group, ctx2, res)
}
}
var stopChecks = func(s *chihuahua.ServerOrGroup) {
if s.Checks != nil {
for _, chk := range s.Checks {
if chk.Job != nil {
chk.Job.Quit <- true
}
}
}
}
for i := 0; i < len(out.Children); i++ {
if _, ok := childIDs[out.Children[i].ID]; !ok {
stopChecks(out.Children[i])
out.Children = append(out.Children[:i], out.Children[i+1:]...)
i--
}
}
}
func Merge(servers []ServerOrGroup, groups []ServerOrGroup) ([]ServerOrGroup, error) {
result := make([]ServerOrGroup, len(servers) + len(groups))
var serversPos, groupsPos = 0, 0
var err error
for i := 0; i < len(result) && (serversPos < len(servers) || groupsPos < len(groups)); i++ {
if serversPos >= len(servers) || (groupsPos < len(groups) && servers[serversPos].Info.MissingItemRange().Start.Line > groups[groupsPos].Info.MissingItemRange().Start.Line) {
// Next one is the group
group := groups[groupsPos]
// check for server-only fields
if group.Connection != nil {
mir := group.Info.MissingItemRange()
return nil, errors.New(mir.String() + ": Unsupported argument; An argument named \"connection\" is not expected here.")
}
// check for remaining, required or invalid fields
if diag := gohcl.DecodeBody(group.Info, nil, &Empty{}); diag.HasErrors() {
return nil, errors.New(diag.Error())
}
group.Group, err = Merge(group.Server, group.Group)
if err != nil {
return nil, err
}
group.Server = nil
result[i] = group
groupsPos++
} else {
// Next one is the server
server := servers[serversPos]
// check for group-only and server-required fields
if server.Connection == nil {
mir := server.Info.MissingItemRange()
return nil, errors.New(mir.String() + ": Missing required argument; The argument \"connection\" is required, but no definition was found.")
}
connectionParts := strings.SplitN(*server.Connection, " ", 2)
server.connectionType = connectionParts[0]
if len(connectionParts) > 1 {
*server.Connection = strings.TrimSpace(connectionParts[1])
} else {
*server.Connection = ""
}
if server.connectionType != "local" && server.connectionType != "ssh" && server.connectionType != "push" {
mir := server.Info.MissingItemRange()
return nil, errors.New(mir.String() + ": Invalid value; The argument \"connection\" must start with \"local\", \"ssh\" or \"push\".")
}
if server.connectionType == "ssh" && strings.TrimSpace(*server.Connection) == "" {
mir := server.Info.MissingItemRange()
return nil, errors.New(mir.String() + ": Invalid value; The argument \"connection\" must contain additional parameters when starting with \"ssh\" (e.g. \"ssh -i /tmp/id_rsa root@example.org\").")
}
if server.Connection == nil {
mir := server.Info.MissingItemRange()
return nil, errors.New(mir.String() + ": Missing required argument; The argument \"connection\" is required, but no definition was found.")
}
if server.Group != nil && len(server.Group) > 0 {
mir := server.Info.MissingItemRange()
return nil, errors.New(mir.String() + ": Unsupported block type; Blocks of type \"group\" are not expected here.")
}
if server.Server != nil && len(server.Server) > 0 {
mir := server.Info.MissingItemRange()
return nil, errors.New(mir.String() + ": Unsupported block type; Blocks of type \"server\" are not expected here.")
}
// check for remaining, required or invalid fields
if diag := gohcl.DecodeBody(server.Info, nil, &Empty{}); diag.HasErrors() {
return nil, errors.New(diag.Error())
}
server.Info = nil
server.Group = nil
server.Server = nil
result[i] = server
serversPos++
}
}
return result, nil
}

84
config/setup.go

@ -0,0 +1,84 @@
package config
import (
"codeberg.org/momar/chihuahua"
"github.com/markbates/pkger"
"github.com/rs/zerolog/log"
"io/ioutil"
"os"
"path/filepath"
)
func Setup(sourcePath string, watch bool) *chihuahua.Config {
f := sourcePath
if f == "" {
if info, err := os.Stat("./chihuahua.hcl"); err == nil && !info.IsDir() {
f = "./chihuahua.hcl"
} else if info, err := os.Stat(filepath.Join(os.Getenv("HOME"), ".config/chihuahua.hcl")); err == nil && !info.IsDir() {
f = filepath.Join(os.Getenv("HOME"), ".config/chihuahua.hcl")
} else if info, err := os.Stat("/etc/chihuahua.hcl"); err == nil && !info.IsDir() {
f = "/etc/chihuahua.hcl"
} else {
// Write config
configText := loadConfig()
err := ioutil.WriteFile("/etc/chihuahua.hcl", configText, 0644)
if err != nil {
err = ioutil.WriteFile(filepath.Join(os.Getenv("HOME"), ".config/chihuahua.hcl"), configText, 0644)
if err != nil {
log.Error().Err(err).Msg("couldn't write default config to /etc/chihuahua.hcl or ~/.config/chihuahua.html")