Commit f9bdd382 authored by Francois Tur's avatar Francois Tur Committed by Yong Tang

Ensure Re-register of metrics variables after a reload (#2080)

* - ensure plugins that use prometheus.MustRegister, re-register after reload
- removing once.Do on the startup function was simplest way to do it.

* - fix underscored names (advice of bot)

* - tune existing UT for reload, and add a test verifying failing reload does not prevent correct registering for metrics

* - ensure different ports for tests that can run in same time ..
parent cb932ca2
package autopath package autopath
import ( import (
"sync"
"github.com/coredns/coredns/plugin" "github.com/coredns/coredns/plugin"
"github.com/prometheus/client_golang/prometheus" "github.com/prometheus/client_golang/prometheus"
...@@ -16,5 +14,3 @@ var ( ...@@ -16,5 +14,3 @@ var (
Help: "Counter of requests that did autopath.", Help: "Counter of requests that did autopath.",
}, []string{"server"}) }, []string{"server"})
) )
var once sync.Once
...@@ -26,7 +26,7 @@ func setup(c *caddy.Controller) error { ...@@ -26,7 +26,7 @@ func setup(c *caddy.Controller) error {
} }
c.OnStartup(func() error { c.OnStartup(func() error {
once.Do(func() { metrics.MustRegister(c, autoPathCount) }) metrics.MustRegister(c, autoPathCount)
return nil return nil
}) })
......
...@@ -3,7 +3,6 @@ package cache ...@@ -3,7 +3,6 @@ package cache
import ( import (
"context" "context"
"math" "math"
"sync"
"time" "time"
"github.com/coredns/coredns/plugin" "github.com/coredns/coredns/plugin"
...@@ -126,5 +125,3 @@ var ( ...@@ -126,5 +125,3 @@ var (
Help: "The number responses that are not cached, because the reply is malformed.", Help: "The number responses that are not cached, because the reply is malformed.",
}, []string{"server"}) }, []string{"server"})
) )
var once sync.Once
...@@ -34,11 +34,9 @@ func setup(c *caddy.Controller) error { ...@@ -34,11 +34,9 @@ func setup(c *caddy.Controller) error {
}) })
c.OnStartup(func() error { c.OnStartup(func() error {
once.Do(func() { metrics.MustRegister(c,
metrics.MustRegister(c, cacheSize, cacheHits, cacheMisses,
cacheSize, cacheHits, cacheMisses, cachePrefetches, cacheDrops)
cachePrefetches, cacheDrops)
})
return nil return nil
}) })
......
...@@ -2,7 +2,6 @@ package dnssec ...@@ -2,7 +2,6 @@ package dnssec
import ( import (
"context" "context"
"sync"
"github.com/coredns/coredns/plugin" "github.com/coredns/coredns/plugin"
"github.com/coredns/coredns/plugin/metrics" "github.com/coredns/coredns/plugin/metrics"
...@@ -81,5 +80,3 @@ var ( ...@@ -81,5 +80,3 @@ var (
// Name implements the Handler interface. // Name implements the Handler interface.
func (d Dnssec) Name() string { return "dnssec" } func (d Dnssec) Name() string { return "dnssec" }
var once sync.Once
...@@ -35,9 +35,7 @@ func setup(c *caddy.Controller) error { ...@@ -35,9 +35,7 @@ func setup(c *caddy.Controller) error {
}) })
c.OnStartup(func() error { c.OnStartup(func() error {
once.Do(func() { metrics.MustRegister(c, cacheSize, cacheHits, cacheMisses)
metrics.MustRegister(c, cacheSize, cacheHits, cacheMisses)
})
return nil return nil
}) })
......
package forward package forward
import ( import (
"sync"
"github.com/coredns/coredns/plugin" "github.com/coredns/coredns/plugin"
"github.com/prometheus/client_golang/prometheus" "github.com/prometheus/client_golang/prometheus"
...@@ -48,5 +46,3 @@ var ( ...@@ -48,5 +46,3 @@ var (
Help: "Gauge of open sockets per upstream.", Help: "Gauge of open sockets per upstream.",
}, []string{"to"}) }, []string{"to"})
) )
var once sync.Once
...@@ -38,9 +38,7 @@ func setup(c *caddy.Controller) error { ...@@ -38,9 +38,7 @@ func setup(c *caddy.Controller) error {
}) })
c.OnStartup(func() error { c.OnStartup(func() error {
once.Do(func() { metrics.MustRegister(c, RequestCount, RcodeCount, RequestDuration, HealthcheckFailureCount, SocketGauge)
metrics.MustRegister(c, RequestCount, RcodeCount, RequestDuration, HealthcheckFailureCount, SocketGauge)
})
return f.OnStartup() return f.OnStartup()
}) })
......
...@@ -55,7 +55,7 @@ func setup(c *caddy.Controller) error { ...@@ -55,7 +55,7 @@ func setup(c *caddy.Controller) error {
}) })
c.OnStartup(func() error { c.OnStartup(func() error {
once.Do(func() { metrics.MustRegister(c, HealthDuration) }) metrics.MustRegister(c, HealthDuration)
return nil return nil
}) })
......
package proxy package proxy
import ( import (
"sync"
"github.com/coredns/coredns/plugin" "github.com/coredns/coredns/plugin"
"github.com/prometheus/client_golang/prometheus" "github.com/prometheus/client_golang/prometheus"
...@@ -36,5 +34,3 @@ func familyToString(f int) string { ...@@ -36,5 +34,3 @@ func familyToString(f int) string {
} }
return "" return ""
} }
var once sync.Once
...@@ -33,7 +33,7 @@ func setup(c *caddy.Controller) error { ...@@ -33,7 +33,7 @@ func setup(c *caddy.Controller) error {
}) })
c.OnStartup(func() error { c.OnStartup(func() error {
once.Do(func() { metrics.MustRegister(c, RequestCount, RequestDuration) }) metrics.MustRegister(c, RequestCount, RequestDuration)
return nil return nil
}) })
......
package template package template
import ( import (
"sync"
"github.com/coredns/coredns/plugin" "github.com/coredns/coredns/plugin"
"github.com/coredns/coredns/plugin/metrics" "github.com/coredns/coredns/plugin/metrics"
...@@ -34,13 +32,9 @@ var ( ...@@ -34,13 +32,9 @@ var (
// OnStartupMetrics sets up the metrics on startup. // OnStartupMetrics sets up the metrics on startup.
func setupMetrics(c *caddy.Controller) error { func setupMetrics(c *caddy.Controller) error {
c.OnStartup(func() error { c.OnStartup(func() error {
metricsOnce.Do(func() { metrics.MustRegister(c, templateMatchesCount, templateFailureCount, templateRRFailureCount)
metrics.MustRegister(c, templateMatchesCount, templateFailureCount, templateRRFailureCount)
})
return nil return nil
}) })
return nil return nil
} }
var metricsOnce sync.Once
...@@ -119,22 +119,24 @@ func TestReloadMetricsHealth(t *testing.T) { ...@@ -119,22 +119,24 @@ func TestReloadMetricsHealth(t *testing.T) {
if err != nil { if err != nil {
t.Fatal(err) t.Fatal(err)
} }
const proc = "process_virtual_memory_bytes" const proc = "coredns_build_info"
metrics, _ := ioutil.ReadAll(resp.Body) metrics, _ := ioutil.ReadAll(resp.Body)
if !bytes.Contains(metrics, []byte(proc)) { if !bytes.Contains(metrics, []byte(proc)) {
t.Errorf("Failed to see %s in metric output", proc) t.Errorf("Failed to see %s in metric output", proc)
} }
} }
func collectMetricsInfo(addr, proc string) error { func collectMetricsInfo(addr string, procs ...string) error {
cl := &http.Client{} cl := &http.Client{}
resp, err := cl.Get(fmt.Sprintf("http://%s/metrics", addr)) resp, err := cl.Get(fmt.Sprintf("http://%s/metrics", addr))
if err != nil { if err != nil {
return err return err
} }
metrics, _ := ioutil.ReadAll(resp.Body) metrics, _ := ioutil.ReadAll(resp.Body)
if !bytes.Contains(metrics, []byte(proc)) { for _, p := range procs {
return fmt.Errorf("failed to see %s in metric output", proc) if !bytes.Contains(metrics, []byte(p)) {
return fmt.Errorf("failed to see %s in metric output \n%s", p, metrics)
}
} }
return nil return nil
} }
...@@ -202,4 +204,132 @@ func TestReloadSeveralTimeMetrics(t *testing.T) { ...@@ -202,4 +204,132 @@ func TestReloadSeveralTimeMetrics(t *testing.T) {
} }
} }
func TestMetricsAvailableAfterReload(t *testing.T) {
//TODO: add a tool that find an available port because this needs to be a port
// that is not used in another test
promAddress := "127.0.0.1:53186"
procMetric := "coredns_build_info"
procCache := "coredns_cache_size"
procForward := "coredns_dns_request_duration_seconds"
corefileWithMetrics := `
.:0 {
prometheus ` + promAddress + `
cache
forward . 8.8.8.8 {
force_tcp
}
}`
inst, _, tcp, err := CoreDNSServerAndPorts(corefileWithMetrics)
if err != nil {
if strings.Contains(err.Error(), inUse) {
return
}
t.Errorf("Could not get service instance: %s", err)
}
// send a query and check we can scrap corresponding metrics
cl := dns.Client{Net: "tcp"}
m := new(dns.Msg)
m.SetQuestion("www.example.org.", dns.TypeA)
if _, _, err := cl.Exchange(m, tcp); err != nil {
t.Fatalf("Could not send message: %s", err)
}
// we should have metrics from forward, cache, and metrics itself
if err := collectMetricsInfo(promAddress, procMetric, procCache, procForward); err != nil {
t.Errorf("Could not scrap one of expected stats : %s", err)
}
// now reload
instReload, err := inst.Restart(
NewInput(corefileWithMetrics),
)
if err != nil {
t.Errorf("Could not restart CoreDNS : %s", err)
instReload = inst
}
// check the metrics are available still
if err := collectMetricsInfo(promAddress, procMetric, procCache, procForward); err != nil {
t.Errorf("Could not scrap one of expected stats : %s", err)
}
instReload.Stop()
// verify that metrics have not been pushed
}
func TestMetricsAvailableAfterReloadAndFailedReload(t *testing.T) {
//TODO: add a tool that find an available port because this needs to be a port
// that is not used in another test
promAddress := "127.0.0.1:53187"
procMetric := "coredns_build_info"
procCache := "coredns_cache_size"
procForward := "coredns_dns_request_duration_seconds"
corefileWithMetrics := `
.:0 {
prometheus ` + promAddress + `
cache
forward . 8.8.8.8 {
force_tcp
}
}`
invalidCorefileWithMetrics := `
.:0 {
prometheus ` + promAddress + `
cache
forward . 8.8.8.8 {
force_tcp
}
invalid
}`
inst, _, tcp, err := CoreDNSServerAndPorts(corefileWithMetrics)
if err != nil {
if strings.Contains(err.Error(), inUse) {
return
}
t.Errorf("Could not get service instance: %s", err)
}
// send a query and check we can scrap corresponding metrics
cl := dns.Client{Net: "tcp"}
m := new(dns.Msg)
m.SetQuestion("www.example.org.", dns.TypeA)
if _, _, err := cl.Exchange(m, tcp); err != nil {
t.Fatalf("Could not send message: %s", err)
}
// we should have metrics from forward, cache, and metrics itself
if err := collectMetricsInfo(promAddress, procMetric, procCache, procForward); err != nil {
t.Errorf("Could not scrap one of expected stats : %s", err)
}
for i := 0; i < 2; i++ {
// now provide a failed reload
invInst, err := inst.Restart(
NewInput(invalidCorefileWithMetrics),
)
if err == nil {
t.Errorf("Invalid test - this reload should fail")
inst = invInst
}
}
// now reload with correct corefile
instReload, err := inst.Restart(
NewInput(corefileWithMetrics),
)
if err != nil {
t.Errorf("Could not restart CoreDNS : %s", err)
instReload = inst
}
// check the metrics are available still
if err := collectMetricsInfo(promAddress, procMetric, procCache, procForward); err != nil {
t.Errorf("Could not scrap one of expected stats : %s", err)
}
instReload.Stop()
// verify that metrics have not been pushed
}
const inUse = "address already in use" const inUse = "address already in use"
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment