Commit 219bfd04 authored by Miek Gieben's avatar Miek Gieben Committed by GitHub

middleware/metrics: cleanup (#355)

* middleware/metrics: add more metrics

middleware/cache:
Add metrics for number of elements in the cache. Also export the total
size. Update README to detail the new metrics.

middleware/metrics

Move metrics into subpackage called "vars". This breaks the import
cycle and is cleaner. This allows vars.Report to be used in the
the dnsserver to log refused queries.

middleware/metrics: tests

Add tests to the metrics framework. The metrics/test subpackage allows
scraping of the local server. Do a few test scrape of the metrics that
are defined in the metrics middleware.

This also allows metrics integration tests to check if the caching and
dnssec middleware export their metrics correctly.

* update README

* typos

* fix tests
parent 6d9d6008
...@@ -41,3 +41,19 @@ func GetConfig(c *caddy.Controller) *Config { ...@@ -41,3 +41,19 @@ func GetConfig(c *caddy.Controller) *Config {
ctx.saveConfig(c.Key, &Config{}) ctx.saveConfig(c.Key, &Config{})
return GetConfig(c) return GetConfig(c)
} }
// GetMiddleware returns the middleware handler that has been added to the config under name.
// This is useful to inspect if a certain middleware is active in this server.
// Note that this is order dependent and the order is defined in directives.go, i.e. if your middleware
// comes before the middleware you are checking; it will not be there (yet).
func GetMiddleware(c *caddy.Controller, name string) middleware.Handler {
// TODO(miek): calling the handler h(nil) should be a noop...
conf := GetConfig(c)
for _, h := range conf.Middleware {
x := h(nil)
if name == x.Name() {
return x
}
}
return nil
}
...@@ -10,7 +10,9 @@ import ( ...@@ -10,7 +10,9 @@ import (
"time" "time"
"github.com/miekg/coredns/middleware" "github.com/miekg/coredns/middleware"
"github.com/miekg/coredns/middleware/metrics/vars"
"github.com/miekg/coredns/middleware/pkg/edns" "github.com/miekg/coredns/middleware/pkg/edns"
"github.com/miekg/coredns/middleware/pkg/rcode"
"github.com/miekg/coredns/request" "github.com/miekg/coredns/request"
"github.com/miekg/dns" "github.com/miekg/dns"
...@@ -247,14 +249,16 @@ func (s *Server) OnStartupComplete() { ...@@ -247,14 +249,16 @@ func (s *Server) OnStartupComplete() {
} }
// DefaultErrorFunc responds to an DNS request with an error. // DefaultErrorFunc responds to an DNS request with an error.
func DefaultErrorFunc(w dns.ResponseWriter, r *dns.Msg, rcode int) { func DefaultErrorFunc(w dns.ResponseWriter, r *dns.Msg, rc int) {
state := request.Request{W: w, Req: r} state := request.Request{W: w, Req: r}
answer := new(dns.Msg) answer := new(dns.Msg)
answer.SetRcode(r, rcode) answer.SetRcode(r, rc)
state.SizeAndDo(answer) state.SizeAndDo(answer)
vars.Report(state, vars.Dropped, rcode.ToString(rc), answer.Len(), time.Now())
w.WriteMsg(answer) w.WriteMsg(answer)
} }
......
...@@ -36,7 +36,8 @@ TODO(miek): text here on how to hook up middleware. ...@@ -36,7 +36,8 @@ TODO(miek): text here on how to hook up middleware.
## Metrics ## Metrics
When exporting metrics the *Namespace* should be `middleware.Namespace` (="coredns"), and the When exporting metrics the *Namespace* should be `middleware.Namespace` (="coredns"), and the
*Subsystem* should be the name of the middleware. *Subsystem* should be the name of the middleware. The README.md for the middleware should then
also contain a *Metrics* section detailing the metrics.
## Documentation ## Documentation
......
...@@ -8,6 +8,7 @@ import ( ...@@ -8,6 +8,7 @@ import (
"github.com/miekg/coredns/middleware" "github.com/miekg/coredns/middleware"
"github.com/miekg/coredns/middleware/file" "github.com/miekg/coredns/middleware/file"
"github.com/miekg/coredns/middleware/metrics"
"github.com/miekg/coredns/request" "github.com/miekg/coredns/request"
"github.com/miekg/dns" "github.com/miekg/dns"
...@@ -20,6 +21,7 @@ type ( ...@@ -20,6 +21,7 @@ type (
Next middleware.Handler Next middleware.Handler
*Zones *Zones
metrics *metrics.Metrics
loader loader
} }
...@@ -97,3 +99,5 @@ func (a Auto) ServeDNS(ctx context.Context, w dns.ResponseWriter, r *dns.Msg) (i ...@@ -97,3 +99,5 @@ func (a Auto) ServeDNS(ctx context.Context, w dns.ResponseWriter, r *dns.Msg) (i
w.WriteMsg(m) w.WriteMsg(m)
return dns.RcodeSuccess, nil return dns.RcodeSuccess, nil
} }
func (a Auto) Name() string { return "auto" }
...@@ -11,6 +11,7 @@ import ( ...@@ -11,6 +11,7 @@ import (
"github.com/miekg/coredns/core/dnsserver" "github.com/miekg/coredns/core/dnsserver"
"github.com/miekg/coredns/middleware" "github.com/miekg/coredns/middleware"
"github.com/miekg/coredns/middleware/file" "github.com/miekg/coredns/middleware/file"
"github.com/miekg/coredns/middleware/metrics"
"github.com/mholt/caddy" "github.com/mholt/caddy"
) )
...@@ -28,10 +29,16 @@ func setup(c *caddy.Controller) error { ...@@ -28,10 +29,16 @@ func setup(c *caddy.Controller) error {
return middleware.Error("auto", err) return middleware.Error("auto", err)
} }
// If we have enabled prometheus we should add newly discovered zones to it.
met := dnsserver.GetMiddleware(c, "prometheus")
if met != nil {
a.metrics = met.(*metrics.Metrics)
}
walkChan := make(chan bool) walkChan := make(chan bool)
c.OnStartup(func() error { c.OnStartup(func() error {
err := a.Zones.Walk(a.loader) err := a.Walk()
if err != nil { if err != nil {
return err return err
} }
...@@ -43,7 +50,7 @@ func setup(c *caddy.Controller) error { ...@@ -43,7 +50,7 @@ func setup(c *caddy.Controller) error {
case <-walkChan: case <-walkChan:
return return
case <-ticker.C: case <-ticker.C:
a.Zones.Walk(a.loader) a.Walk()
} }
} }
}() }()
......
...@@ -13,26 +13,26 @@ import ( ...@@ -13,26 +13,26 @@ import (
) )
// Walk will recursively walk of the file under l.directory and adds the one that match l.re. // Walk will recursively walk of the file under l.directory and adds the one that match l.re.
func (z *Zones) Walk(l loader) error { func (a Auto) Walk() error {
// TODO(miek): should add something so that we don't stomp on each other. // TODO(miek): should add something so that we don't stomp on each other.
toDelete := make(map[string]bool) toDelete := make(map[string]bool)
for _, n := range z.Names() { for _, n := range a.Zones.Names() {
toDelete[n] = true toDelete[n] = true
} }
filepath.Walk(l.directory, func(path string, info os.FileInfo, err error) error { filepath.Walk(a.loader.directory, func(path string, info os.FileInfo, err error) error {
if info.IsDir() { if info.IsDir() {
return nil return nil
} }
match, origin := matches(l.re, info.Name(), l.template) match, origin := matches(a.loader.re, info.Name(), a.loader.template)
if !match { if !match {
return nil return nil
} }
if _, ok := z.Z[origin]; ok { if _, ok := a.Zones.Z[origin]; ok {
// we already have this zone // we already have this zone
toDelete[origin] = false toDelete[origin] = false
return nil return nil
...@@ -50,10 +50,14 @@ func (z *Zones) Walk(l loader) error { ...@@ -50,10 +50,14 @@ func (z *Zones) Walk(l loader) error {
return nil return nil
} }
zo.NoReload = l.noReload zo.NoReload = a.loader.noReload
zo.TransferTo = l.transferTo zo.TransferTo = a.loader.transferTo
z.Insert(zo, origin) a.Zones.Add(zo, origin)
if a.metrics != nil {
a.metrics.AddZone(origin)
}
zo.Notify() zo.Notify()
...@@ -68,7 +72,13 @@ func (z *Zones) Walk(l loader) error { ...@@ -68,7 +72,13 @@ func (z *Zones) Walk(l loader) error {
if !ok { if !ok {
continue continue
} }
z.Delete(origin)
if a.metrics != nil {
a.metrics.RemoveZone(origin)
}
a.Zones.Remove(origin)
log.Printf("[INFO] Deleting zone `%s'", origin) log.Printf("[INFO] Deleting zone `%s'", origin)
} }
......
...@@ -37,13 +37,16 @@ func TestWalk(t *testing.T) { ...@@ -37,13 +37,16 @@ func TestWalk(t *testing.T) {
template: `${1}`, template: `${1}`,
} }
z := &Zones{} a := Auto{
loader: ldr,
Zones: &Zones{},
}
z.Walk(ldr) a.Walk()
// db.example.org and db.example.com should be here (created in createFiles) // db.example.org and db.example.com should be here (created in createFiles)
for _, name := range []string{"example.com.", "example.org."} { for _, name := range []string{"example.com.", "example.org."} {
if _, ok := z.Z[name]; !ok { if _, ok := a.Zones.Z[name]; !ok {
t.Errorf("%s should have been added", name) t.Errorf("%s should have been added", name)
} }
} }
......
...@@ -27,15 +27,18 @@ func TestWatcher(t *testing.T) { ...@@ -27,15 +27,18 @@ func TestWatcher(t *testing.T) {
template: `${1}`, template: `${1}`,
} }
z := &Zones{} a := Auto{
loader: ldr,
Zones: &Zones{},
}
z.Walk(ldr) a.Walk()
// example.org and example.com should exist // example.org and example.com should exist
if x := len(z.Z["example.org."].All()); x != 4 { if x := len(a.Zones.Z["example.org."].All()); x != 4 {
t.Fatalf("expected 4 RRs, got %d", x) t.Fatalf("expected 4 RRs, got %d", x)
} }
if x := len(z.Z["example.com."].All()); x != 4 { if x := len(a.Zones.Z["example.com."].All()); x != 4 {
t.Fatalf("expected 4 RRs, got %d", x) t.Fatalf("expected 4 RRs, got %d", x)
} }
...@@ -44,5 +47,6 @@ func TestWatcher(t *testing.T) { ...@@ -44,5 +47,6 @@ func TestWatcher(t *testing.T) {
t.Fatal(err) t.Fatal(err)
} }
z.Walk(ldr) a.Walk()
// TODO(miek): check
} }
...@@ -40,9 +40,9 @@ func (z *Zones) Zones(name string) *file.Zone { ...@@ -40,9 +40,9 @@ func (z *Zones) Zones(name string) *file.Zone {
return zo return zo
} }
// Insert inserts a new zone into z. If zo.NoReload is false, the // Add adds a new zone into z. If zo.NoReload is false, the
// reload goroutine is started. // reload goroutine is started.
func (z *Zones) Insert(zo *file.Zone, name string) { func (z *Zones) Add(zo *file.Zone, name string) {
z.Lock() z.Lock()
if z.Z == nil { if z.Z == nil {
...@@ -51,14 +51,13 @@ func (z *Zones) Insert(zo *file.Zone, name string) { ...@@ -51,14 +51,13 @@ func (z *Zones) Insert(zo *file.Zone, name string) {
z.Z[name] = zo z.Z[name] = zo
z.names = append(z.names, name) z.names = append(z.names, name)
zo.Reload() zo.Reload()
z.Unlock() z.Unlock()
} }
// Delete removes the zone named name from z. It also stop the the zone's reload goroutine. // Remove removes the zone named name from z. It also stop the the zone's reload goroutine.
func (z *Zones) Delete(name string) { func (z *Zones) Remove(name string) {
z.Lock() z.Lock()
if zo, ok := z.Z[name]; ok && !zo.NoReload { if zo, ok := z.Z[name]; ok && !zo.NoReload {
...@@ -67,10 +66,11 @@ func (z *Zones) Delete(name string) { ...@@ -67,10 +66,11 @@ func (z *Zones) Delete(name string) {
delete(z.Z, name) delete(z.Z, name)
// just regenerate Names (might be bad if you have a lot of zones...) // TODO(miek): just regenerate Names (might be bad if you have a lot of zones...)
z.names = []string{} z.names = []string{}
for n := range z.Z { for n := range z.Z {
z.names = append(z.names, n) z.names = append(z.names, n)
} }
z.Unlock() z.Unlock()
} }
...@@ -35,24 +35,24 @@ There is a third category (`error`) but those responses are never cached. ...@@ -35,24 +35,24 @@ There is a third category (`error`) but those responses are never cached.
The minimum TTL allowed on resource records is 5 seconds. The minimum TTL allowed on resource records is 5 seconds.
If monitoring is enabled (via the *prometheus* directive) then the following extra metrics are added: ## Metrics
* coredns_cache_hit_count_total, and If monitoring is enabled (via the *prometheus* directive) then the following metrics are exported:
* coredns_cache_miss_count_total
They both work on a per-zone basis and just count the hit and miss counts for each query. * coredns_cache_size_guage{type} - total elements in the case, type is either "denial" or "success".
* coredns_cache_capacity_guage{type} - total capacity of the cache, type is either "denial" or "success".
## Examples ## Examples
Enable caching for all zones, but cap everything to a TTL of 10 seconds:
~~~ ~~~
cache 10 cache 10
~~~ ~~~
Enable caching for all zones, but cap everything to a TTL of 10 seconds. Proxy to Google Public DNS and only cache responses for example.org (or below).
~~~ ~~~
proxy . 8.8.8.8:53 proxy . 8.8.8.8:53
cache example.org cache example.org
~~~ ~~~
Proxy to Google Public DNS and only cache responses for example.org (or below).
...@@ -79,6 +79,9 @@ func (c *ResponseWriter) WriteMsg(res *dns.Msg) error { ...@@ -79,6 +79,9 @@ func (c *ResponseWriter) WriteMsg(res *dns.Msg) error {
if key != "" { if key != "" {
c.set(res, key, mt, duration) c.set(res, key, mt, duration)
cacheSize.WithLabelValues(Success).Set(float64(c.pcache.Len()))
cacheSize.WithLabelValues(Denial).Set(float64(c.ncache.Len()))
} }
setMsgTTL(res, uint32(duration.Seconds())) setMsgTTL(res, uint32(duration.Seconds()))
...@@ -103,7 +106,6 @@ func (c *ResponseWriter) set(m *dns.Msg, key string, mt response.Type, duration ...@@ -103,7 +106,6 @@ func (c *ResponseWriter) set(m *dns.Msg, key string, mt response.Type, duration
case response.OtherError: case response.OtherError:
// don't cache these // don't cache these
// TODO(miek): what do we do with these?
default: default:
log.Printf("[WARNING] Caching called with unknown classification: %d", mt) log.Printf("[WARNING] Caching called with unknown classification: %d", mt)
} }
...@@ -122,4 +124,9 @@ const ( ...@@ -122,4 +124,9 @@ const (
minTTL = 5 * time.Second minTTL = 5 * time.Second
defaultCap = 10000 // default capacity of the cache. defaultCap = 10000 // default capacity of the cache.
// Success is the class for caching postive caching.
Success = "success"
// Denial is the class defined for negative caching.
Denial = "denial"
) )
...@@ -30,17 +30,15 @@ func (c *Cache) ServeDNS(ctx context.Context, w dns.ResponseWriter, r *dns.Msg) ...@@ -30,17 +30,15 @@ func (c *Cache) ServeDNS(ctx context.Context, w dns.ResponseWriter, r *dns.Msg)
state.SizeAndDo(resp) state.SizeAndDo(resp)
w.WriteMsg(resp) w.WriteMsg(resp)
cacheHitCount.WithLabelValues(zone).Inc()
return dns.RcodeSuccess, nil return dns.RcodeSuccess, nil
} }
cacheMissCount.WithLabelValues(zone).Inc()
crr := &ResponseWriter{w, c} crr := &ResponseWriter{w, c}
return c.Next.ServeDNS(ctx, crr, r) return c.Next.ServeDNS(ctx, crr, r)
} }
func (c *Cache) Name() string { return "cache" }
func (c *Cache) get(qname string, qtype uint16, do bool) (*item, bool, bool) { func (c *Cache) get(qname string, qtype uint16, do bool) (*item, bool, bool) {
k := rawKey(qname, qtype, do) k := rawKey(qname, qtype, do)
...@@ -55,24 +53,24 @@ func (c *Cache) get(qname string, qtype uint16, do bool) (*item, bool, bool) { ...@@ -55,24 +53,24 @@ func (c *Cache) get(qname string, qtype uint16, do bool) (*item, bool, bool) {
} }
var ( var (
cacheHitCount = prometheus.NewCounterVec(prometheus.CounterOpts{ cacheSize = prometheus.NewGaugeVec(prometheus.GaugeOpts{
Namespace: middleware.Namespace, Namespace: middleware.Namespace,
Subsystem: subsystem, Subsystem: subsystem,
Name: "hit_count_total", Name: "size_guage",
Help: "Counter of DNS requests that were found in the cache.", Help: "Gauge of number of elements in the cache.",
}, []string{"zone"}) }, []string{"type"})
cacheMissCount = prometheus.NewCounterVec(prometheus.CounterOpts{ cacheCapacity = prometheus.NewGaugeVec(prometheus.GaugeOpts{
Namespace: middleware.Namespace, Namespace: middleware.Namespace,
Subsystem: subsystem, Subsystem: subsystem,
Name: "miss_count_total", Name: "capacity_gauge",
Help: "Counter of DNS requests that were not found in the cache.", Help: "Gauge of cache's capacity.",
}, []string{"zone"}) }, []string{"type"})
) )
const subsystem = "cache" const subsystem = "cache"
func init() { func init() {
prometheus.MustRegister(cacheHitCount) prometheus.MustRegister(cacheSize)
prometheus.MustRegister(cacheMissCount) prometheus.MustRegister(cacheCapacity)
} }
...@@ -28,6 +28,10 @@ func setup(c *caddy.Controller) error { ...@@ -28,6 +28,10 @@ func setup(c *caddy.Controller) error {
return ca return ca
}) })
// Export the capacity for the metrics. This only happens once, because this is a re-load change only.
cacheCapacity.WithLabelValues(Success).Set(float64(ca.pcap))
cacheCapacity.WithLabelValues(Denial).Set(float64(ca.ncap))
return nil return nil
} }
...@@ -58,7 +62,7 @@ func cacheParse(c *caddy.Controller) (*Cache, error) { ...@@ -58,7 +62,7 @@ func cacheParse(c *caddy.Controller) (*Cache, error) {
for c.NextBlock() { for c.NextBlock() {
switch c.Val() { switch c.Val() {
// first number is cap, second is an new ttl // first number is cap, second is an new ttl
case "success": case Success:
args := c.RemainingArgs() args := c.RemainingArgs()
if len(args) == 0 { if len(args) == 0 {
return nil, c.ArgErr() return nil, c.ArgErr()
...@@ -75,7 +79,7 @@ func cacheParse(c *caddy.Controller) (*Cache, error) { ...@@ -75,7 +79,7 @@ func cacheParse(c *caddy.Controller) (*Cache, error) {
} }
ca.pttl = time.Duration(pttl) * time.Second ca.pttl = time.Duration(pttl) * time.Second
} }
case "denial": case Denial:
args := c.RemainingArgs() args := c.RemainingArgs()
if len(args) == 0 { if len(args) == 0 {
return nil, c.ArgErr() return nil, c.ArgErr()
......
...@@ -51,6 +51,8 @@ func (c Chaos) ServeDNS(ctx context.Context, w dns.ResponseWriter, r *dns.Msg) ( ...@@ -51,6 +51,8 @@ func (c Chaos) ServeDNS(ctx context.Context, w dns.ResponseWriter, r *dns.Msg) (
return 0, nil return 0, nil
} }
func (c Chaos) Name() string { return "chaos" }
func trim(s string) string { func trim(s string) string {
if len(s) < 256 { if len(s) < 256 {
return s return s
......
...@@ -34,9 +34,14 @@ dnssec [ZONES... ] { ...@@ -34,9 +34,14 @@ dnssec [ZONES... ] {
will be signed with all keys. Generating a key can be done with `dnssec-keygen`: `dnssec-keygen -a will be signed with all keys. Generating a key can be done with `dnssec-keygen`: `dnssec-keygen -a
ECDSAP256SHA256 <zonename>`. A key created for zone *A* can be safely used for zone *B*. ECDSAP256SHA256 <zonename>`. A key created for zone *A* can be safely used for zone *B*.
* `cache_capacity` indicates the capacity of the LRU cache. The dnssec middleware uses LRU cache to manage * `cache_capacity` indicates the capacity of the LRU cache. The dnssec middleware uses LRU cache to manage
objects and the default capacity is 10000. objects and the default capacity is 10000.
## Metrics
If monitoring is enabled (via the *prometheus* directive) then the following metrics are exported:
* coredns_dnssec_size_guage{type} - total elements in the cache, type is "signature".
* coredns_dnssec_capacity_guage{type} - total capacity of the cache, type is "signature".
## Examples ## Examples
...@@ -40,24 +40,26 @@ func (d Dnssec) ServeDNS(ctx context.Context, w dns.ResponseWriter, r *dns.Msg) ...@@ -40,24 +40,26 @@ func (d Dnssec) ServeDNS(ctx context.Context, w dns.ResponseWriter, r *dns.Msg)
} }
var ( var (
cacheHitCount = prometheus.NewCounterVec(prometheus.CounterOpts{ cacheSize = prometheus.NewGaugeVec(prometheus.GaugeOpts{
Namespace: middleware.Namespace, Namespace: middleware.Namespace,
Subsystem: subsystem, Subsystem: subsystem,
Name: "hit_count_total", Name: "size_guage",
Help: "Counter of signatures that were found in the cache.", Help: "Gauge of number of elements in the cache.",
}, []string{"zone"}) }, []string{"type"})
cacheMissCount = prometheus.NewCounterVec(prometheus.CounterOpts{ cacheCapacity = prometheus.NewGaugeVec(prometheus.GaugeOpts{
Namespace: middleware.Namespace, Namespace: middleware.Namespace,
Subsystem: subsystem, Subsystem: subsystem,
Name: "miss_count_total", Name: "capacity_gauge",
Help: "Counter of signatures that were not found in the cache.", Help: "Gauge of cache's capacity.",
}, []string{"zone"}) }, []string{"type"})
) )
func (d Dnssec) Name() string { return "dnssec" }
const subsystem = "dnssec" const subsystem = "dnssec"
func init() { func init() {
prometheus.MustRegister(cacheHitCount) prometheus.MustRegister(cacheSize)
prometheus.MustRegister(cacheMissCount) prometheus.MustRegister(cacheCapacity)
} }
...@@ -30,6 +30,8 @@ func (d *ResponseWriter) WriteMsg(res *dns.Msg) error { ...@@ -30,6 +30,8 @@ func (d *ResponseWriter) WriteMsg(res *dns.Msg) error {
if state.Do() { if state.Do() {
res = d.d.Sign(state, zone, time.Now().UTC()) res = d.d.Sign(state, zone, time.Now().UTC())
cacheSize.WithLabelValues("signature").Set(float64(d.d.cache.Len()))
} }
state.SizeAndDo(res) state.SizeAndDo(res)
......
...@@ -32,6 +32,9 @@ func setup(c *caddy.Controller) error { ...@@ -32,6 +32,9 @@ func setup(c *caddy.Controller) error {
return New(zones, keys, next, cache) return New(zones, keys, next, cache)
}) })
// Export the capacity for the metrics. This only happens once, because this is a re-load change only.
cacheCapacity.WithLabelValues("signature").Set(float64(capacity))
return nil return nil
} }
......
...@@ -48,6 +48,8 @@ func (h errorHandler) ServeDNS(ctx context.Context, w dns.ResponseWriter, r *dns ...@@ -48,6 +48,8 @@ func (h errorHandler) ServeDNS(ctx context.Context, w dns.ResponseWriter, r *dns
return rcode, err return rcode, err
} }
func (h errorHandler) Name() string { return "errors" }
func (h errorHandler) recovery(ctx context.Context, w dns.ResponseWriter, r *dns.Msg) { func (h errorHandler) recovery(ctx context.Context, w dns.ResponseWriter, r *dns.Msg) {
rec := recover() rec := recover()
if rec == nil { if rec == nil {
......
...@@ -117,6 +117,8 @@ func (e *Etcd) ServeDNS(ctx context.Context, w dns.ResponseWriter, r *dns.Msg) ( ...@@ -117,6 +117,8 @@ func (e *Etcd) ServeDNS(ctx context.Context, w dns.ResponseWriter, r *dns.Msg) (
return dns.RcodeSuccess, nil return dns.RcodeSuccess, nil
} }
func (e *Etcd) Name() string { return "etcd" }
// Err write an error response to the client. // Err write an error response to the client.
func (e *Etcd) Err(zone string, rcode int, state request.Request, debug []msg.Service, err error, opt Options) (int, error) { func (e *Etcd) Err(zone string, rcode int, state request.Request, debug []msg.Service, err error, opt Options) (int, error) {
m := new(dns.Msg) m := new(dns.Msg)
......
...@@ -110,6 +110,8 @@ func (f File) ServeDNS(ctx context.Context, w dns.ResponseWriter, r *dns.Msg) (i ...@@ -110,6 +110,8 @@ func (f File) ServeDNS(ctx context.Context, w dns.ResponseWriter, r *dns.Msg) (i
return dns.RcodeSuccess, nil return dns.RcodeSuccess, nil
} }
func (f File) Name() string { return "file" }
// Parse parses the zone in filename and returns a new Zone or an error. // Parse parses the zone in filename and returns a new Zone or an error.
func Parse(f io.Reader, origin, fileName string) (*Zone, error) { func Parse(f io.Reader, origin, fileName string) (*Zone, error) {
tokens := dns.ParseZone(f, dns.Fqdn(origin), fileName) tokens := dns.ParseZone(f, dns.Fqdn(origin), fileName)
......
...@@ -101,6 +101,8 @@ func (k Kubernetes) ServeDNS(ctx context.Context, w dns.ResponseWriter, r *dns.M ...@@ -101,6 +101,8 @@ func (k Kubernetes) ServeDNS(ctx context.Context, w dns.ResponseWriter, r *dns.M
return dns.RcodeSuccess, nil return dns.RcodeSuccess, nil
} }
func (k Kubernetes) Name() string { return "kubernetes" }
// Err writes an error response back to the client. // Err writes an error response back to the client.
func (k Kubernetes) Err(zone string, rcode int, state request.Request) (int, error) { func (k Kubernetes) Err(zone string, rcode int, state request.Request) (int, error) {
m := new(dns.Msg) m := new(dns.Msg)
......
...@@ -18,3 +18,5 @@ func (rr RoundRobin) ServeDNS(ctx context.Context, w dns.ResponseWriter, r *dns. ...@@ -18,3 +18,5 @@ func (rr RoundRobin) ServeDNS(ctx context.Context, w dns.ResponseWriter, r *dns.
wrr := &RoundRobinResponseWriter{w} wrr := &RoundRobinResponseWriter{w}
return rr.Next.ServeDNS(ctx, wrr, r) return rr.Next.ServeDNS(ctx, wrr, r)
} }
func (rr RoundRobin) Name() string { return "loadbalance" }
...@@ -6,7 +6,7 @@ import ( ...@@ -6,7 +6,7 @@ import (
"time" "time"
"github.com/miekg/coredns/middleware" "github.com/miekg/coredns/middleware"
"github.com/miekg/coredns/middleware/metrics" "github.com/miekg/coredns/middleware/metrics/vars"
"github.com/miekg/coredns/middleware/pkg/dnsrecorder" "github.com/miekg/coredns/middleware/pkg/dnsrecorder"
"github.com/miekg/coredns/middleware/pkg/rcode" "github.com/miekg/coredns/middleware/pkg/rcode"
"github.com/miekg/coredns/middleware/pkg/replacer" "github.com/miekg/coredns/middleware/pkg/replacer"
...@@ -45,7 +45,7 @@ func (l Logger) ServeDNS(ctx context.Context, w dns.ResponseWriter, r *dns.Msg) ...@@ -45,7 +45,7 @@ func (l Logger) ServeDNS(ctx context.Context, w dns.ResponseWriter, r *dns.Msg)
answer.SetRcode(r, rc) answer.SetRcode(r, rc)
state.SizeAndDo(answer) state.SizeAndDo(answer)
metrics.Report(state, metrics.Dropped, rcode.ToString(rc), answer.Len(), time.Now()) vars.Report(state, vars.Dropped, rcode.ToString(rc), answer.Len(), time.Now())
w.WriteMsg(answer) w.WriteMsg(answer)
} }
...@@ -64,6 +64,8 @@ func (l Logger) ServeDNS(ctx context.Context, w dns.ResponseWriter, r *dns.Msg) ...@@ -64,6 +64,8 @@ func (l Logger) ServeDNS(ctx context.Context, w dns.ResponseWriter, r *dns.Msg)
return l.Next.ServeDNS(ctx, w, r) return l.Next.ServeDNS(ctx, w, r)
} }
func (l Logger) Name() string { return "log" }
// Rule configures the logging middleware. // Rule configures the logging middleware.
type Rule struct { type Rule struct {
NameScope string NameScope string
......
...@@ -7,12 +7,10 @@ The following metrics are exported: ...@@ -7,12 +7,10 @@ The following metrics are exported:
* coredns_dns_request_count_total{zone, proto, family} * coredns_dns_request_count_total{zone, proto, family}
* coredns_dns_request_duration_milliseconds{zone} * coredns_dns_request_duration_milliseconds{zone}
* coredns_dns_request_size_bytes{zone,, proto} * coredns_dns_request_size_bytes{zone, proto}
* coredns_dns_request_transfer_size_bytes{zone,, proto}
* coredns_dns_request_do_count_total{zone} * coredns_dns_request_do_count_total{zone}
* coredns_dns_request_type_count_total{zone, type} * coredns_dns_request_type_count_total{zone, type}
* coredns_dns_response_size_bytes{zone, proto} * coredns_dns_response_size_bytes{zone, proto}
* coredns_dns_response_transfer_size_bytes{zone, proto}
* coredns_dns_response_rcode_count_total{zone, rcode} * coredns_dns_response_rcode_count_total{zone, rcode}
Each counter has a label `zone` which is the zonename used for the request/response. Each counter has a label `zone` which is the zonename used for the request/response.
...@@ -27,10 +25,7 @@ Extra labels used are: ...@@ -27,10 +25,7 @@ Extra labels used are:
* The `response_rcode_count_total` has an extra label `rcode` which holds the rcode of the response. * The `response_rcode_count_total` has an extra label `rcode` which holds the rcode of the response.
If monitoring is enabled, queries that do not enter the middleware chain are exported under the fake If monitoring is enabled, queries that do not enter the middleware chain are exported under the fake
domain "dropped" (without a closing dot). name "dropped" (without a closing dot - this is never a valid domain name).
Restarting CoreDNS will stop the monitoring. This is a bug. Also [this upstream
Caddy bug](https://github.com/mholt/caddy/issues/675).
## Syntax ## Syntax
...@@ -44,3 +39,9 @@ It optionally takes an address to which the metrics are exported; the default ...@@ -44,3 +39,9 @@ It optionally takes an address to which the metrics are exported; the default
is `localhost:9153`. The metrics path is fixed to `/metrics`. is `localhost:9153`. The metrics path is fixed to `/metrics`.
## Examples ## Examples
Use an alternative address:
~~~
prometheus localhost:9253
~~~
package metrics package metrics
import ( import (
"time"
"github.com/miekg/coredns/middleware" "github.com/miekg/coredns/middleware"
"github.com/miekg/coredns/middleware/metrics/vars"
"github.com/miekg/coredns/middleware/pkg/dnsrecorder" "github.com/miekg/coredns/middleware/pkg/dnsrecorder"
"github.com/miekg/coredns/middleware/pkg/rcode" "github.com/miekg/coredns/middleware/pkg/rcode"
"github.com/miekg/coredns/request" "github.com/miekg/coredns/request"
...@@ -17,7 +16,7 @@ func (m *Metrics) ServeDNS(ctx context.Context, w dns.ResponseWriter, r *dns.Msg ...@@ -17,7 +16,7 @@ func (m *Metrics) ServeDNS(ctx context.Context, w dns.ResponseWriter, r *dns.Msg
state := request.Request{W: w, Req: r} state := request.Request{W: w, Req: r}
qname := state.QName() qname := state.QName()
zone := middleware.Zones(m.ZoneNames).Matches(qname) zone := middleware.Zones(m.ZoneNames()).Matches(qname)
if zone == "" { if zone == "" {
zone = "." zone = "."
} }
...@@ -26,71 +25,9 @@ func (m *Metrics) ServeDNS(ctx context.Context, w dns.ResponseWriter, r *dns.Msg ...@@ -26,71 +25,9 @@ func (m *Metrics) ServeDNS(ctx context.Context, w dns.ResponseWriter, r *dns.Msg
rw := dnsrecorder.New(w) rw := dnsrecorder.New(w)
status, err := m.Next.ServeDNS(ctx, rw, r) status, err := m.Next.ServeDNS(ctx, rw, r)
Report(state, zone, rcode.ToString(rw.Rcode), rw.Size, rw.Start) vars.Report(state, zone, rcode.ToString(rw.Rcode), rw.Size, rw.Start)
return status, err return status, err
} }
// Report is a plain reporting function that the server can use for REFUSED and other func (m *Metrics) Name() string { return "prometheus" }
// queries that are turned down because they don't match any middleware.
func Report(req request.Request, zone, rcode string, size int, start time.Time) {
if requestCount == nil {
// no metrics are enabled
return
}
// Proto and Family
net := req.Proto()
fam := "1"
if req.Family() == 2 {
fam = "2"
}
typ := req.QType()
requestCount.WithLabelValues(zone, net, fam).Inc()
requestDuration.WithLabelValues(zone).Observe(float64(time.Since(start) / time.Millisecond))
if req.Do() {
requestDo.WithLabelValues(zone).Inc()
}
if _, known := monitorType[typ]; known {
requestType.WithLabelValues(zone, dns.Type(typ).String()).Inc()
} else {
requestType.WithLabelValues(zone, other).Inc()
}
if typ == dns.TypeIXFR || typ == dns.TypeAXFR {
responseTransferSize.WithLabelValues(zone, net).Observe(float64(size))
requestTransferSize.WithLabelValues(zone, net).Observe(float64(req.Size()))
} else {
responseSize.WithLabelValues(zone, net).Observe(float64(size))
requestSize.WithLabelValues(zone, net).Observe(float64(req.Size()))
}
responseRcode.WithLabelValues(zone, rcode).Inc()
}
var monitorType = map[uint16]bool{
dns.TypeAAAA: true,
dns.TypeA: true,
dns.TypeCNAME: true,
dns.TypeDNSKEY: true,
dns.TypeDS: true,
dns.TypeMX: true,
dns.TypeNSEC3: true,
dns.TypeNSEC: true,
dns.TypeNS: true,
dns.TypePTR: true,
dns.TypeRRSIG: true,
dns.TypeSOA: true,
dns.TypeSRV: true,
dns.TypeTXT: true,
// Meta Qtypes
dns.TypeIXFR: true,
dns.TypeAXFR: true,
dns.TypeANY: true,
}
const other = "other"
// Package metrics implement a handler and middleware that provides Prometheus // Package metrics implement a handler and middleware that provides Prometheus metrics.
// metrics.
package metrics package metrics
import ( import (
...@@ -9,37 +8,51 @@ import ( ...@@ -9,37 +8,51 @@ import (
"sync" "sync"
"github.com/miekg/coredns/middleware" "github.com/miekg/coredns/middleware"
"github.com/miekg/coredns/middleware/metrics/vars"
"github.com/prometheus/client_golang/prometheus" "github.com/prometheus/client_golang/prometheus"
) )
var (
requestCount *prometheus.CounterVec
requestDuration *prometheus.HistogramVec
requestSize *prometheus.HistogramVec
requestTransferSize *prometheus.HistogramVec
requestDo *prometheus.CounterVec
requestType *prometheus.CounterVec
responseSize *prometheus.HistogramVec
responseTransferSize *prometheus.HistogramVec
responseRcode *prometheus.CounterVec
)
// Metrics holds the prometheus configuration. The metrics' path is fixed to be /metrics // Metrics holds the prometheus configuration. The metrics' path is fixed to be /metrics
type Metrics struct { type Metrics struct {
Next middleware.Handler Next middleware.Handler
Addr string Addr string
ln net.Listener ln net.Listener
mux *http.ServeMux mux *http.ServeMux
Once sync.Once Once sync.Once
ZoneNames []string
zoneNames []string
zoneMap map[string]bool
zoneMu sync.RWMutex
}
// AddZone adds zone z to m.
func (m *Metrics) AddZone(z string) {
m.zoneMu.Lock()
m.zoneMap[z] = true
m.zoneNames = keys(m.zoneMap)
m.zoneMu.Unlock()
}
// RemoveZone remove zone z from m.
func (m *Metrics) RemoveZone(z string) {
m.zoneMu.Lock()
delete(m.zoneMap, z)
m.zoneNames = keys(m.zoneMap)
m.zoneMu.Unlock()
}
// ZoneNames returns the zones of m.
func (m *Metrics) ZoneNames() []string {
m.zoneMu.RLock()
s := m.zoneNames
m.zoneMu.RUnlock()
return s
} }
// OnStartup sets up the metrics on startup. // OnStartup sets up the metrics on startup.
func (m *Metrics) OnStartup() error { func (m *Metrics) OnStartup() error {
m.Once.Do(func() { m.Once.Do(func() {
define()
ln, err := net.Listen("tcp", m.Addr) ln, err := net.Listen("tcp", m.Addr)
if err != nil { if err != nil {
...@@ -51,18 +64,16 @@ func (m *Metrics) OnStartup() error { ...@@ -51,18 +64,16 @@ func (m *Metrics) OnStartup() error {
m.mux = http.NewServeMux() m.mux = http.NewServeMux()
prometheus.MustRegister(requestCount) prometheus.MustRegister(vars.RequestCount)
prometheus.MustRegister(requestDuration) prometheus.MustRegister(vars.RequestDuration)
prometheus.MustRegister(requestSize) prometheus.MustRegister(vars.RequestSize)
prometheus.MustRegister(requestTransferSize) prometheus.MustRegister(vars.RequestDo)
prometheus.MustRegister(requestDo) prometheus.MustRegister(vars.RequestType)
prometheus.MustRegister(requestType)
prometheus.MustRegister(responseSize) prometheus.MustRegister(vars.ResponseSize)
prometheus.MustRegister(responseTransferSize) prometheus.MustRegister(vars.ResponseRcode)
prometheus.MustRegister(responseRcode)
m.mux.Handle(path, prometheus.Handler()) m.mux.Handle("/metrics", prometheus.Handler())
go func() { go func() {
http.Serve(m.ln, m.mux) http.Serve(m.ln, m.mux)
...@@ -79,79 +90,10 @@ func (m *Metrics) OnShutdown() error { ...@@ -79,79 +90,10 @@ func (m *Metrics) OnShutdown() error {
return nil return nil
} }
func define() { func keys(m map[string]bool) []string {
requestCount = prometheus.NewCounterVec(prometheus.CounterOpts{ sx := []string{}
Namespace: middleware.Namespace, for k := range m {
Subsystem: subsystem, sx = append(sx, k)
Name: "request_count_total", }
Help: "Counter of DNS requests made per zone, protocol and family.", return sx
}, []string{"zone", "proto", "family"})
requestDuration = prometheus.NewHistogramVec(prometheus.HistogramOpts{
Namespace: middleware.Namespace,
Subsystem: subsystem,
Name: "request_duration_milliseconds",
Buckets: append(prometheus.DefBuckets, []float64{50, 100, 200, 500, 1000, 2000, 3000, 4000, 5000}...),
Help: "Histogram of the time (in milliseconds) each request took.",
}, []string{"zone"})
requestSize = prometheus.NewHistogramVec(prometheus.HistogramOpts{
Namespace: middleware.Namespace,
Subsystem: subsystem,
Name: "request_size_bytes",
Help: "Size of the EDNS0 UDP buffer in bytes (64K for TCP).",
Buckets: []float64{0, 100, 200, 300, 400, 511, 1023, 2047, 4095, 8291, 16e3, 32e3, 48e3, 64e3},
}, []string{"zone", "proto"})
requestTransferSize = prometheus.NewHistogramVec(prometheus.HistogramOpts{
Namespace: middleware.Namespace,
Subsystem: subsystem,
Name: "request_transfer_size_bytes",
Help: "Size of the incoming zone transfer in bytes.",
Buckets: []float64{0, 100, 200, 300, 400, 511, 1023, 2047, 4095, 8291, 16e3, 32e3, 48e3, 64e3},
}, []string{"zone", "proto"})
requestDo = prometheus.NewCounterVec(prometheus.CounterOpts{
Namespace: middleware.Namespace,
Subsystem: subsystem,
Name: "request_do_count_total",
Help: "Counter of DNS requests with DO bit set per zone.",
}, []string{"zone"})
requestType = prometheus.NewCounterVec(prometheus.CounterOpts{
Namespace: middleware.Namespace,
Subsystem: subsystem,
Name: "request_type_count_total",
Help: "Counter of DNS requests per type, per zone.",
}, []string{"zone", "type"})
responseSize = prometheus.NewHistogramVec(prometheus.HistogramOpts{
Namespace: middleware.Namespace,
Subsystem: subsystem,
Name: "response_size_bytes",
Help: "Size of the returned response in bytes.",
Buckets: []float64{0, 100, 200, 300, 400, 511, 1023, 2047, 4095, 8291, 16e3, 32e3, 48e3, 64e3},
}, []string{"zone", "proto"})
responseTransferSize = prometheus.NewHistogramVec(prometheus.HistogramOpts{
Namespace: middleware.Namespace,
Subsystem: subsystem,
Name: "response_transfer_size_bytes",
Help: "Size of the returned zone transfer in bytes.",
Buckets: []float64{0, 100, 200, 300, 400, 511, 1023, 2047, 4095, 8291, 16e3, 32e3, 48e3, 64e3},
}, []string{"zone", "proto"})
responseRcode = prometheus.NewCounterVec(prometheus.CounterOpts{
Namespace: middleware.Namespace,
Subsystem: subsystem,
Name: "response_rcode_count_total",
Help: "Counter of response status codes.",
}, []string{"zone", "rcode"})
} }
const (
// Dropped indicates we dropped the query before any handling. It has no closing dot, so it can not be a valid zone.
Dropped = "dropped"
subsystem = "dns"
path = "/metrics"
)
package metrics
import (
"testing"
"github.com/miekg/coredns/middleware"
mtest "github.com/miekg/coredns/middleware/metrics/test"
"github.com/miekg/coredns/middleware/pkg/dnsrecorder"
"github.com/miekg/coredns/middleware/test"
"github.com/miekg/dns"
"golang.org/x/net/context"
)
func TestMetrics(t *testing.T) {
met := &Metrics{Addr: Addr, zoneMap: make(map[string]bool)}
if err := met.OnStartup(); err != nil {
t.Fatalf("Failed to start metrics handler: %s", err)
}
defer met.OnShutdown()
met.AddZone("example.org.")
tests := []struct {
next middleware.Handler
qname string
qtype uint16
metric string
expectedValue string
}{
// This all works because 1 bucket (1 zone, 1 type)
{
next: test.NextHandler(dns.RcodeSuccess, nil),
qname: "example.org",
metric: "coredns_dns_request_count_total",
expectedValue: "1",
},
{
next: test.NextHandler(dns.RcodeSuccess, nil),
qname: "example.org",
metric: "coredns_dns_request_count_total",
expectedValue: "2",
},
{
next: test.NextHandler(dns.RcodeSuccess, nil),
qname: "example.org",
metric: "coredns_dns_request_type_count_total",
expectedValue: "3",
},
{
next: test.NextHandler(dns.RcodeSuccess, nil),
qname: "example.org",
metric: "coredns_dns_response_rcode_count_total",
expectedValue: "4",
},
}
ctx := context.TODO()
for i, tc := range tests {
req := new(dns.Msg)
if tc.qtype == 0 {
tc.qtype = dns.TypeA
}
req.SetQuestion(dns.Fqdn(tc.qname), tc.qtype)
met.Next = tc.next
rec := dnsrecorder.New(&test.ResponseWriter{})
_, err := met.ServeDNS(ctx, rec, req)
if err != nil {
t.Fatalf("Test %d: Expected no error, but got %s", i, err)
}
result := mtest.Scrape(t, "http://"+Addr+"/metrics")
if tc.expectedValue != "" {
got, _ := mtest.MetricValue(tc.metric, result)
if got != tc.expectedValue {
t.Errorf("Test %d: Expected value %s for metrics %s, but got %s", i, tc.expectedValue, tc.metric, got)
}
}
}
}
...@@ -38,18 +38,17 @@ func setup(c *caddy.Controller) error { ...@@ -38,18 +38,17 @@ func setup(c *caddy.Controller) error {
func prometheusParse(c *caddy.Controller) (*Metrics, error) { func prometheusParse(c *caddy.Controller) (*Metrics, error) {
var ( var (
met = &Metrics{Addr: addr} met = &Metrics{Addr: Addr, zoneMap: make(map[string]bool)}
err error err error
) )
for c.Next() { for c.Next() {
if len(met.ZoneNames) > 0 { if len(met.ZoneNames()) > 0 {
return met, c.Err("metrics: can only have one metrics module per server") return met, c.Err("can only have one metrics module per server")
} }
met.ZoneNames = make([]string, len(c.ServerBlockKeys))
copy(met.ZoneNames, c.ServerBlockKeys) for _, z := range c.ServerBlockKeys {
for i := range met.ZoneNames { met.AddZone(middleware.Host(z).Normalize())
met.ZoneNames[i] = middleware.Host(met.ZoneNames[i]).Normalize()
} }
args := c.RemainingArgs() args := c.RemainingArgs()
...@@ -78,7 +77,7 @@ func prometheusParse(c *caddy.Controller) (*Metrics, error) { ...@@ -78,7 +77,7 @@ func prometheusParse(c *caddy.Controller) (*Metrics, error) {
return met, e return met, e
} }
default: default:
return met, c.Errf("metrics: unknown item: %s", c.Val()) return met, c.Errf("unknown item: %s", c.Val())
} }
} }
...@@ -88,4 +87,4 @@ func prometheusParse(c *caddy.Controller) (*Metrics, error) { ...@@ -88,4 +87,4 @@ func prometheusParse(c *caddy.Controller) (*Metrics, error) {
var metricsOnce sync.Once var metricsOnce sync.Once
const addr = "localhost:9153" const Addr = "localhost:9153"
// Adapted by Miek Gieben for CoreDNS testing.
//
// License from prom2json
// Copyright 2014 Prometheus Team
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// Package test will scrape a target and you can inspect the variables.
// Basic usage:
//
// result := Scrape("http://localhost:9153/metrics")
// v := MetricValue("coredns_cache_capacity_gauge", result)
//
package test
import (
"fmt"
"io"
"mime"
"net/http"
"testing"
"github.com/matttproud/golang_protobuf_extensions/pbutil"
"github.com/prometheus/common/expfmt"
dto "github.com/prometheus/client_model/go"
)
type (
// MetricFamily holds a prometheus metric.
MetricFamily struct {
Name string `json:"name"`
Help string `json:"help"`
Type string `json:"type"`
Metrics []interface{} `json:"metrics,omitempty"` // Either metric or summary.
}
// metric is for all "single value" metrics.
metric struct {
Labels map[string]string `json:"labels,omitempty"`
Value string `json:"value"`
}
summary struct {
Labels map[string]string `json:"labels,omitempty"`
Quantiles map[string]string `json:"quantiles,omitempty"`
Count string `json:"count"`
Sum string `json:"sum"`
}
histogram struct {
Labels map[string]string `json:"labels,omitempty"`
Buckets map[string]string `json:"buckets,omitempty"`
Count string `json:"count"`
Sum string `json:"sum"`
}
)
// Scrape returns the all the vars a []*metricFamily.
func Scrape(t *testing.T, url string) []*MetricFamily {
mfChan := make(chan *dto.MetricFamily, 1024)
go fetchMetricFamilies(t, url, mfChan)
result := []*MetricFamily{}
for mf := range mfChan {
result = append(result, newMetricFamily(mf))
}
return result
}
// MetricValue returns the value associated with name as a string as well as the labels.
// It only returns the first metrics of the slice.
func MetricValue(name string, mfs []*MetricFamily) (string, map[string]string) {
for _, mf := range mfs {
if mf.Name == name {
// Only works with Gauge and Counter...
return mf.Metrics[0].(metric).Value, mf.Metrics[0].(metric).Labels
}
}
return "", nil
}
// MetricValueLabel returns the value for name *and* label *value*.
func MetricValueLabel(name, label string, mfs []*MetricFamily) (string, map[string]string) {
// bit hacky is this really handy...?
for _, mf := range mfs {
if mf.Name == name {
for _, m := range mf.Metrics {
for _, v := range m.(metric).Labels {
if v == label {
return m.(metric).Value, m.(metric).Labels
}
}
}
}
}
return "", nil
}
func newMetricFamily(dtoMF *dto.MetricFamily) *MetricFamily {
mf := &MetricFamily{
Name: dtoMF.GetName(),
Help: dtoMF.GetHelp(),
Type: dtoMF.GetType().String(),
Metrics: make([]interface{}, len(dtoMF.Metric)),
}
for i, m := range dtoMF.Metric {
if dtoMF.GetType() == dto.MetricType_SUMMARY {
mf.Metrics[i] = summary{
Labels: makeLabels(m),
Quantiles: makeQuantiles(m),
Count: fmt.Sprint(m.GetSummary().GetSampleCount()),
Sum: fmt.Sprint(m.GetSummary().GetSampleSum()),
}
} else if dtoMF.GetType() == dto.MetricType_HISTOGRAM {
mf.Metrics[i] = histogram{
Labels: makeLabels(m),
Buckets: makeBuckets(m),
Count: fmt.Sprint(m.GetHistogram().GetSampleCount()),
Sum: fmt.Sprint(m.GetSummary().GetSampleSum()),
}
} else {
mf.Metrics[i] = metric{
Labels: makeLabels(m),
Value: fmt.Sprint(value(m)),
}
}
}
return mf
}
func value(m *dto.Metric) float64 {
if m.Gauge != nil {
return m.GetGauge().GetValue()
}
if m.Counter != nil {
return m.GetCounter().GetValue()
}
if m.Untyped != nil {
return m.GetUntyped().GetValue()
}
return 0.
}
func makeLabels(m *dto.Metric) map[string]string {
result := map[string]string{}
for _, lp := range m.Label {
result[lp.GetName()] = lp.GetValue()
}
return result
}
func makeQuantiles(m *dto.Metric) map[string]string {
result := map[string]string{}
for _, q := range m.GetSummary().Quantile {
result[fmt.Sprint(q.GetQuantile())] = fmt.Sprint(q.GetValue())
}
return result
}
func makeBuckets(m *dto.Metric) map[string]string {
result := map[string]string{}
for _, b := range m.GetHistogram().Bucket {
result[fmt.Sprint(b.GetUpperBound())] = fmt.Sprint(b.GetCumulativeCount())
}
return result
}
func fetchMetricFamilies(t *testing.T, url string, ch chan<- *dto.MetricFamily) {
defer close(ch)
req, err := http.NewRequest("GET", url, nil)
if err != nil {
t.Fatalf("creating GET request for URL %q failed: %s", url, err)
}
req.Header.Add("Accept", acceptHeader)
resp, err := http.DefaultClient.Do(req)
if err != nil {
t.Fatalf("executing GET request for URL %q failed: %s", url, err)
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
t.Fatalf("GET request for URL %q returned HTTP status %s", url, resp.Status)
}
mediatype, params, err := mime.ParseMediaType(resp.Header.Get("Content-Type"))
if err == nil && mediatype == "application/vnd.google.protobuf" &&
params["encoding"] == "delimited" &&
params["proto"] == "io.prometheus.client.MetricFamily" {
for {
mf := &dto.MetricFamily{}
if _, err = pbutil.ReadDelimited(resp.Body, mf); err != nil {
if err == io.EOF {
break
}
t.Fatalf("reading metric family protocol buffer failed: %s", err)
}
ch <- mf
}
} else {
// We could do further content-type checks here, but the
// fallback for now will anyway be the text format
// version 0.0.4, so just go for it and see if it works.
var parser expfmt.TextParser
metricFamilies, err := parser.TextToMetricFamilies(resp.Body)
if err != nil {
t.Fatal("reading text format failed:", err)
}
for _, mf := range metricFamilies {
ch <- mf
}
}
}
const acceptHeader = `application/vnd.google.protobuf;proto=io.prometheus.client.MetricFamily;encoding=delimited;q=0.7,text/plain;version=0.0.4;q=0.3`
package vars
import (
"time"
"github.com/miekg/coredns/request"
"github.com/miekg/dns"
)
// Report reports the metrics data associcated with request.
func Report(req request.Request, zone, rcode string, size int, start time.Time) {
// Proto and Family
net := req.Proto()
fam := "1"
if req.Family() == 2 {
fam = "2"
}
typ := req.QType()
RequestCount.WithLabelValues(zone, net, fam).Inc()
RequestDuration.WithLabelValues(zone).Observe(float64(time.Since(start) / time.Millisecond))
if req.Do() {
RequestDo.WithLabelValues(zone).Inc()
}
if _, known := monitorType[typ]; known {
RequestType.WithLabelValues(zone, dns.Type(typ).String()).Inc()
} else {
RequestType.WithLabelValues(zone, other).Inc()
}
ResponseSize.WithLabelValues(zone, net).Observe(float64(size))
RequestSize.WithLabelValues(zone, net).Observe(float64(req.Size()))
ResponseRcode.WithLabelValues(zone, rcode).Inc()
}
var monitorType = map[uint16]bool{
dns.TypeAAAA: true,
dns.TypeA: true,
dns.TypeCNAME: true,
dns.TypeDNSKEY: true,
dns.TypeDS: true,
dns.TypeMX: true,
dns.TypeNSEC3: true,
dns.TypeNSEC: true,
dns.TypeNS: true,
dns.TypePTR: true,
dns.TypeRRSIG: true,
dns.TypeSOA: true,
dns.TypeSRV: true,
dns.TypeTXT: true,
// Meta Qtypes
dns.TypeIXFR: true,
dns.TypeAXFR: true,
dns.TypeANY: true,
}
const other = "other"
package vars
import (
"github.com/miekg/coredns/middleware"
"github.com/prometheus/client_golang/prometheus"
)
var (
RequestCount = prometheus.NewCounterVec(prometheus.CounterOpts{
Namespace: middleware.Namespace,
Subsystem: subsystem,
Name: "request_count_total",
Help: "Counter of DNS requests made per zone, protocol and family.",
}, []string{"zone", "proto", "family"})
RequestDuration = prometheus.NewHistogramVec(prometheus.HistogramOpts{
Namespace: middleware.Namespace,
Subsystem: subsystem,
Name: "request_duration_milliseconds",
Buckets: append(prometheus.DefBuckets, []float64{50, 100, 200, 500, 1000, 2000, 3000, 4000, 5000, 10000}...),
Help: "Histogram of the time (in milliseconds) each request took.",
}, []string{"zone"})
RequestSize = prometheus.NewHistogramVec(prometheus.HistogramOpts{
Namespace: middleware.Namespace,
Subsystem: subsystem,
Name: "request_size_bytes",
Help: "Size of the EDNS0 UDP buffer in bytes (64K for TCP).",
Buckets: []float64{0, 100, 200, 300, 400, 511, 1023, 2047, 4095, 8291, 16e3, 32e3, 48e3, 64e3},
}, []string{"zone", "proto"})
RequestDo = prometheus.NewCounterVec(prometheus.CounterOpts{
Namespace: middleware.Namespace,
Subsystem: subsystem,
Name: "request_do_count_total",
Help: "Counter of DNS requests with DO bit set per zone.",
}, []string{"zone"})
RequestType = prometheus.NewCounterVec(prometheus.CounterOpts{
Namespace: middleware.Namespace,
Subsystem: subsystem,
Name: "request_type_count_total",
Help: "Counter of DNS requests per type, per zone.",
}, []string{"zone", "type"})
ResponseSize = prometheus.NewHistogramVec(prometheus.HistogramOpts{
Namespace: middleware.Namespace,
Subsystem: subsystem,
Name: "response_size_bytes",
Help: "Size of the returned response in bytes.",
Buckets: []float64{0, 100, 200, 300, 400, 511, 1023, 2047, 4095, 8291, 16e3, 32e3, 48e3, 64e3},
}, []string{"zone", "proto"})
ResponseRcode = prometheus.NewCounterVec(prometheus.CounterOpts{
Namespace: middleware.Namespace,
Subsystem: subsystem,
Name: "response_rcode_count_total",
Help: "Counter of response status codes.",
}, []string{"zone", "rcode"})
)
const (
subsystem = "dns"
// Dropped indicates we dropped the query before any handling. It has no closing dot, so it can not be a valid zone.
Dropped = "dropped"
)
...@@ -45,6 +45,7 @@ type ( ...@@ -45,6 +45,7 @@ type (
// chain by returning them unchanged. // chain by returning them unchanged.
Handler interface { Handler interface {
ServeDNS(context.Context, dns.ResponseWriter, *dns.Msg) (int, error) ServeDNS(context.Context, dns.ResponseWriter, *dns.Msg) (int, error)
Name() string
} }
// HandlerFunc is a convenience type like dns.HandlerFunc, except // HandlerFunc is a convenience type like dns.HandlerFunc, except
...@@ -58,6 +59,8 @@ func (f HandlerFunc) ServeDNS(ctx context.Context, w dns.ResponseWriter, r *dns. ...@@ -58,6 +59,8 @@ func (f HandlerFunc) ServeDNS(ctx context.Context, w dns.ResponseWriter, r *dns.
return f(ctx, w, r) return f(ctx, w, r)
} }
func (f HandlerFunc) Name() string { return "handlerfunc" }
// Error returns err with 'middleware/name: ' prefixed to it. // Error returns err with 'middleware/name: ' prefixed to it.
func Error(name string, err error) error { return fmt.Errorf("%s/%s: %s", "middleware", name, err) } func Error(name string, err error) error { return fmt.Errorf("%s/%s: %s", "middleware", name, err) }
......
...@@ -51,7 +51,7 @@ func (n Name) Normalize() string { return strings.ToLower(dns.Fqdn(string(n))) } ...@@ -51,7 +51,7 @@ func (n Name) Normalize() string { return strings.ToLower(dns.Fqdn(string(n))) }
type ( type (
// Host represents a host from the Corefile, may contain port. // Host represents a host from the Corefile, may contain port.
Host string // Host represents a host from the Corefile, may contain port. Host string // Host represents a host from the Corefile, may contain port.
// Addr resprents an address in the Corefile. // Addr represents an address in the Corefile.
Addr string // Addr resprents an address in the Corefile. Addr string // Addr resprents an address in the Corefile.
) )
......
...@@ -102,5 +102,7 @@ func (p Proxy) ServeDNS(ctx context.Context, w dns.ResponseWriter, r *dns.Msg) ( ...@@ -102,5 +102,7 @@ func (p Proxy) ServeDNS(ctx context.Context, w dns.ResponseWriter, r *dns.Msg) (
return p.Next.ServeDNS(ctx, w, r) return p.Next.ServeDNS(ctx, w, r)
} }
func (p Proxy) Name() string { return "proxy" }
// defaultTimeout is the default networking timeout for DNS requests. // defaultTimeout is the default networking timeout for DNS requests.
const defaultTimeout = 5 * time.Second const defaultTimeout = 5 * time.Second
...@@ -52,6 +52,8 @@ func (rw Rewrite) ServeDNS(ctx context.Context, w dns.ResponseWriter, r *dns.Msg ...@@ -52,6 +52,8 @@ func (rw Rewrite) ServeDNS(ctx context.Context, w dns.ResponseWriter, r *dns.Msg
return rw.Next.ServeDNS(ctx, w, r) return rw.Next.ServeDNS(ctx, w, r)
} }
func (rw Rewrite) Name() string { return "rewrite" }
// Rule describes an internal location rewrite rule. // Rule describes an internal location rewrite rule.
type Rule interface { type Rule interface {
// Rewrite rewrites the internal location of the current request. // Rewrite rewrites the internal location of the current request.
......
...@@ -283,6 +283,7 @@ type ( ...@@ -283,6 +283,7 @@ type (
// Handler interface defines a middleware. // Handler interface defines a middleware.
Handler interface { Handler interface {
ServeDNS(context.Context, dns.ResponseWriter, *dns.Msg) (int, error) ServeDNS(context.Context, dns.ResponseWriter, *dns.Msg) (int, error)
Name() string
} }
) )
...@@ -290,3 +291,5 @@ type ( ...@@ -290,3 +291,5 @@ type (
func (f HandlerFunc) ServeDNS(ctx context.Context, w dns.ResponseWriter, r *dns.Msg) (int, error) { func (f HandlerFunc) ServeDNS(ctx context.Context, w dns.ResponseWriter, r *dns.Msg) (int, error) {
return f(ctx, w, r) return f(ctx, w, r)
} }
func (f HandlerFunc) Name() string { return "handlerfunc" }
...@@ -55,3 +55,5 @@ func (wh Whoami) ServeDNS(ctx context.Context, w dns.ResponseWriter, r *dns.Msg) ...@@ -55,3 +55,5 @@ func (wh Whoami) ServeDNS(ctx context.Context, w dns.ResponseWriter, r *dns.Msg)
return 0, nil return 0, nil
} }
func (wh Whoami) Name() string { return "whoami" }
package test package test
import "testing" import (
"io/ioutil"
"log"
"os"
"path"
"testing"
"time"
"github.com/miekg/coredns/middleware/cache"
"github.com/miekg/coredns/middleware/metrics"
mtest "github.com/miekg/coredns/middleware/metrics/test"
"github.com/miekg/coredns/middleware/metrics/vars"
"github.com/miekg/dns"
)
// Start test server that has metrics enabled. Then tear it down again. // Start test server that has metrics enabled. Then tear it down again.
func TestMetricsServer(t *testing.T) { func TestMetricsServer(t *testing.T) {
corefile := `.:0 { corefile := `example.org:0 {
chaos CoreDNS-001 miek@miek.nl chaos CoreDNS-001 miek@miek.nl
prometheus localhost:0 prometheus
}
example.com:0 {
proxy . 8.8.4.4:53
prometheus
}
`
srv, err := CoreDNSServer(corefile)
if err != nil {
t.Fatalf("Could not get CoreDNS serving instance: %s", err)
}
defer srv.Stop()
}
func TestMetricsRefused(t *testing.T) {
metricName := "coredns_dns_response_rcode_count_total"
corefile := `example.org:0 {
proxy . 8.8.8.8:53
prometheus
}
`
srv, err := CoreDNSServer(corefile)
if err != nil {
t.Fatalf("Could not get CoreDNS serving instance: %s", err)
}
defer srv.Stop()
udp, _ := CoreDNSServerPorts(srv, 0)
m := new(dns.Msg)
m.SetQuestion("google.com.", dns.TypeA)
if _, err = dns.Exchange(m, udp); err != nil {
t.Fatalf("Could not send message: %s", err)
}
data := mtest.Scrape(t, "http://"+metrics.Addr+"/metrics")
got, labels := mtest.MetricValue(metricName, data)
if got != "1" {
t.Errorf("Expected value %s for refused, but got %s", "1", got)
}
if labels["zone"] != vars.Dropped {
t.Errorf("Expected zone value %s for refused, but got %s", vars.Dropped, labels["zone"])
}
if labels["rcode"] != "REFUSED" {
t.Errorf("Expected zone value %s for refused, but got %s", "REFUSED", labels["rcode"])
}
}
func TestMetricsCache(t *testing.T) {
metricName := "coredns_cache_size_guage"
corefile := `example.net:0 {
proxy . 8.8.8.8:53
prometheus
cache
} }
` `
srv, err := CoreDNSServer(corefile) srv, err := CoreDNSServer(corefile)
...@@ -14,4 +86,88 @@ func TestMetricsServer(t *testing.T) { ...@@ -14,4 +86,88 @@ func TestMetricsServer(t *testing.T) {
t.Fatalf("Could not get CoreDNS serving instance: %s", err) t.Fatalf("Could not get CoreDNS serving instance: %s", err)
} }
defer srv.Stop() defer srv.Stop()
udp, _ := CoreDNSServerPorts(srv, 0)
m := new(dns.Msg)
m.SetQuestion("www.example.net.", dns.TypeA)
if _, err = dns.Exchange(m, udp); err != nil {
t.Fatalf("Could not send message: %s", err)
}
data := mtest.Scrape(t, "http://"+metrics.Addr+"/metrics")
// Get the value for the metrics where the one of the labels values matches "success"
got, _ := mtest.MetricValueLabel(metricName, cache.Success, data)
if got != "1" {
t.Errorf("Expected value %s for %s, but got %s", "1", metricName, got)
}
}
func TestMetricsAuto(t *testing.T) {
tmpdir, err := ioutil.TempDir(os.TempDir(), "coredns")
if err != nil {
t.Fatal(err)
}
// TODO(miek): Random port as string and use that later?
corefile := `org:0 {
auto {
directory ` + tmpdir + ` db\.(.*) {1} 1
}
prometheus
}
`
i, err := CoreDNSServer(corefile)
if err != nil {
t.Fatalf("Could not get CoreDNS serving instance: %s", err)
}
udp, _ := CoreDNSServerPorts(i, 0)
if udp == "" {
t.Fatalf("Could not get UDP listening port")
}
defer i.Stop()
log.SetOutput(ioutil.Discard)
// Write db.example.org to get example.org.
if err = ioutil.WriteFile(path.Join(tmpdir, "db.example.org"), []byte(zoneContent), 0644); err != nil {
t.Fatal(err)
}
// TODO(miek): make the auto sleep even less.
time.Sleep(1100 * time.Millisecond) // wait for it to be picked up
m := new(dns.Msg)
m.SetQuestion("www.example.org.", dns.TypeA)
if _, err := dns.Exchange(m, udp); err != nil {
t.Fatalf("Could not send message: %s", err)
}
metricName := "coredns_dns_request_count_total" //{zone, proto, family}
data := mtest.Scrape(t, "http://"+metrics.Addr+"/metrics")
// Get the value for the metrics where the one of the labels values matches "example.org."
got, _ := mtest.MetricValueLabel(metricName, "example.org.", data)
if got != "1" {
t.Errorf("Expected value %s for %s, but got %s", "1", metricName, got)
}
// Remove db.example.org again. And see if the metric stops increasing.
os.Remove(path.Join(tmpdir, "db.example.org"))
time.Sleep(1100 * time.Millisecond) // wait for it to be picked up
if _, err := dns.Exchange(m, udp); err != nil {
t.Fatalf("Could not send message: %s", err)
}
data = mtest.Scrape(t, "http://"+metrics.Addr+"/metrics")
got, _ = mtest.MetricValueLabel(metricName, "example.org.", data)
if got != "1" {
t.Errorf("Expected value %s for %s, but got %s", "1", metricName, got)
}
} }
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment