summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJeff Carr <[email protected]>2024-10-27 11:02:50 -0500
committerJeff Carr <[email protected]>2024-10-27 11:02:50 -0500
commit8fc2fbd9c9a6f05852cd597b246f0b4e634af7d7 (patch)
tree707c6044781eb4ef1f3ed9c4b756f6e605e4b6c8
parentd38865a6cf3d9e11803e9f565a0dd0c763de479d (diff)
track droplets reported from each hypervisor
Signed-off-by: Jeff Carr <[email protected]>
-rw-r--r--dump.go22
-rw-r--r--http.go19
-rw-r--r--main.go13
-rw-r--r--poll.go52
-rw-r--r--structs.go11
-rw-r--r--watchdog.go2
6 files changed, 86 insertions, 33 deletions
diff --git a/dump.go b/dump.go
index 57a3404..9291bb1 100644
--- a/dump.go
+++ b/dump.go
@@ -4,7 +4,9 @@ import (
"fmt"
"net/http"
"strings"
+ "time"
+ "go.wit.com/lib/gui/shell"
pb "go.wit.com/lib/protobuf/virtbuf"
)
@@ -44,3 +46,23 @@ func dumpDroplets(w http.ResponseWriter) {
}
}
}
+
+// status of the hypervisors
+func dumpHypervisors(w http.ResponseWriter) {
+ for _, h := range me.hypers {
+ // lastpoll time.Time // the last time the hypervisor polled
+ dur := time.Since(h.lastpoll)
+ tmp := shell.FormatDuration(dur)
+ fmt.Fprintln(w, h.pb.Hostname, "killcount =", h.killcount, "lastpoll:", tmp)
+ for name, t := range h.lastDroplets {
+ dur := time.Since(t)
+ tmp := shell.FormatDuration(dur)
+ d := findDroplet(name)
+ if d == nil {
+ fmt.Fprintln(w, "\t", h.pb.Hostname, "name =", name, "lastpoll:", tmp)
+ } else {
+ fmt.Fprintln(w, "\t", h.pb.Hostname, "name =", name, "lastpoll:", tmp, d.CurrentState)
+ }
+ }
+ }
+}
diff --git a/http.go b/http.go
index 937f9a7..2dddeb4 100644
--- a/http.go
+++ b/http.go
@@ -77,7 +77,7 @@ func okHandler(w http.ResponseWriter, r *http.Request) {
}
if route == "/uptime" {
- ok, s := clusterHealthy()
+ ok, s := uptimeCheck()
if ok {
log.Info("Handling URL:", route, "cluster is ok", s)
fmt.Fprintln(w, s)
@@ -113,6 +113,18 @@ func okHandler(w http.ResponseWriter, r *http.Request) {
return
}
+ // toggle poll logging
+ if route == "/poll" {
+ if POLL.Get() {
+ fmt.Fprintln(w, "POLL is true")
+ POLL.SetBool(false)
+ } else {
+ fmt.Fprintln(w, "POLL is false")
+ POLL.SetBool(true)
+ }
+ return
+ }
+
if route == "/dumpcluster" {
dumpCluster(w)
return
@@ -123,6 +135,11 @@ func okHandler(w http.ResponseWriter, r *http.Request) {
return
}
+ if route == "/dumphypervisors" {
+ dumpHypervisors(w)
+ return
+ }
+
if route == "/dumplibvirtxml" {
virtigoxml.DumpLibvirtxmlDomainNames()
return
diff --git a/main.go b/main.go
index 1168355..fe827eb 100644
--- a/main.go
+++ b/main.go
@@ -38,16 +38,17 @@ func main() {
}
// set defaults
- me.unstable = time.Now() // initialize the grid as unstable
- me.delay = 5 * time.Second // how often to poll the hypervisors
+ me.unstable = time.Now() // initialize the grid as unstable
me.changed = false
- // me.dmap = make(map[*pb.Droplet]*DropletT)
me.hmap = make(map[*pb.Hypervisor]*HyperT)
// how long a droplet can be missing until it's declared dead
me.unstableTimeout = 17 * time.Second
me.missingDropletTimeout = time.Minute // not sure the difference between these values
+ // how often to poll the hypervisors
+ me.hyperPollDelay = 5 * time.Second
+
// how long the cluster must be stable before new droplets can be started
me.clusterStableDuration = 37 * time.Second
@@ -141,12 +142,14 @@ func main() {
log.Println("result:", result)
os.Exit(0)
}
+
// initialize each hypervisor
for _, pbh := range me.cluster.Hypervisors {
// this is a new unknown droplet (not in the config file)
- h := new(HyperT)
+ var h *HyperT
+ h = new(HyperT)
h.pb = pbh
-
+ h.lastDroplets = make(map[string]time.Time)
h.lastpoll = time.Now()
me.hmap[pbh] = h
diff --git a/poll.go b/poll.go
index 3e9caa7..73cbbea 100644
--- a/poll.go
+++ b/poll.go
@@ -18,6 +18,7 @@ func (h *HyperT) pollHypervisor() {
if s == nil {
return
}
+
var bytesSplice []byte
bytesSplice = s.Bytes()
// fmt.Fprintln(w, string(bytesSplice))
@@ -31,12 +32,26 @@ func (h *HyperT) pollHypervisor() {
}
state := fields[0]
name := fields[1]
+ if state == "OFF" {
+ // skip locally defined libvirt vms
+ continue
+ }
+ h.lastDroplets[name] = time.Now()
+ // if _, ok := h.lastDroplets[name]; ok {
+ // h.lastDroplets[name] = time.Now()
+ // }
+
+ // try the protobuf
+ d := findDroplet(name)
+ if d == nil {
+ // not sure whawt now?
+ log.Log(WARN, name, "is unknown on", h.pb.Hostname, "state =", state)
+ log.Log(WARN, name, "this vm was probably started by hand using virtsh")
+ log.Log(WARN, name, "todo: import vm from libvrit")
+ continue
+ }
if state == "ON" {
log.Log(POLL, h.pb.Hostname, "STATE:", state, "HOST:", name, "rest:", fields[2:])
- d := findDroplet(name)
- if d == nil {
- // not sure whawt now?
- }
log.Log(INFO, "ALREADY RECORDED", d.Hostname)
// update the status to ON
@@ -69,30 +84,25 @@ func (h *HyperT) pollHypervisor() {
}
d.CurrentHypervisor = h.pb.Hostname
}
- continue
}
- h.lastpoll = time.Now()
- h.killcount = 0 // poll worked. reset killcount
-}
-
-/*
-func findHypervisor(name string) *HyperT {
- if h, ok := me.hmap[name]; ok {
- return h
- }
- return nil
- for _, h := range me.hypers {
- if h.pb.Hostname == name {
- return h
+ for name, t := range h.lastDroplets {
+ dur := time.Since(t)
+ if dur > me.hyperPollDelay {
+ log.Info("droplet has probably powered down", name)
+ d := findDroplet(name)
+ if d != nil {
+ d.CurrentState = pb.DropletState_UNKNOWN
+ log.Info("set state UNKNOWN here", name)
+ }
}
}
- return nil
+ h.lastpoll = time.Now()
+ h.killcount = 0 // poll worked. reset killcount
}
-*/
// check the state of the cluster and return a string
// that is intended to be sent to an uptime monitor like Kuma
-func clusterHealthy() (bool, string) {
+func uptimeCheck() (bool, string) {
var good bool = true
var total int
var working int
diff --git a/structs.go b/structs.go
index 2671b52..715231a 100644
--- a/structs.go
+++ b/structs.go
@@ -21,13 +21,13 @@ func (b *virtigoT) Enable() {
// this app's variables
type virtigoT struct {
cluster *pb.Cluster // basic cluster settings
- delay time.Duration // how often to poll the hypervisors
hmap map[*pb.Hypervisor]*HyperT // map to the local struct
names []string
hypers []*HyperT
killcount int
unstable time.Time // the last time the cluster was incorrect
changed bool
+ hyperPollDelay time.Duration // how often to poll the hypervisors
unstableTimeout time.Duration // how long a droplet can be unstable until it's declared dead
clusterStableDuration time.Duration // how long the cluster must be stable before new droplets can be started
missingDropletTimeout time.Duration // how long a droplet can be missing for
@@ -35,8 +35,9 @@ type virtigoT struct {
// the stuff that is needed for a hypervisor
type HyperT struct {
- pb *pb.Hypervisor // the Hypervisor protobuf
- dog *time.Ticker // the watchdog timer itself
- lastpoll time.Time // the last time the hypervisor polled
- killcount int
+ pb *pb.Hypervisor // the Hypervisor protobuf
+ dog *time.Ticker // the watchdog timer itself
+ lastpoll time.Time // the last time the hypervisor polled
+ lastDroplets map[string]time.Time // the vm's in the last poll
+ killcount int // how many times the daemon has been forcably killed
}
diff --git a/watchdog.go b/watchdog.go
index be6f8cf..97baac4 100644
--- a/watchdog.go
+++ b/watchdog.go
@@ -39,7 +39,7 @@ func (h *HyperT) sendDirs() {
}
func (h *HyperT) NewWatchdog() {
- h.dog = time.NewTicker(me.delay)
+ h.dog = time.NewTicker(me.hyperPollDelay)
defer h.dog.Stop()
done := make(chan bool)
/*