Commit d563c625 authored by Miek Gieben's avatar Miek Gieben Committed by GitHub

middleware/monitoring: add more monitoring (#182)

* Split response size into normal responses and AXFR|IXFR responses.
* Split out incoming sizes as well in normal and AXFR|IXFRs.
* Add meta qtype to the monitored qtypes.
* Make duration to be exported in milliseconds instead of seconds.
parent d277f21d
...@@ -5,23 +5,26 @@ This module enables prometheus metrics for CoreDNS. The default location for the ...@@ -5,23 +5,26 @@ This module enables prometheus metrics for CoreDNS. The default location for the
The following metrics are exported: The following metrics are exported:
* coredns_dns_request_count_total * coredns_dns_request_count_total{zone, proto, family}
* coredns_dns_request_duration_seconds * coredns_dns_request_duration_milliseconds{zone}
* coredns_dns_request_size_bytes * coredns_dns_request_size_bytes{zone,, proto}
* coredns_dns_request_do_count_total * coredns_dns_request_transfer_size_bytes{zone,, proto}
* coredns_dns_request_type_count_total * coredns_dns_request_do_count_total{zone}
* coredns_dns_response_size_bytes * coredns_dns_request_type_count_total{zone, type}
* coredns_dns_response_rcode_count_total * coredns_dns_response_size_bytes{zone, proto}
* coredns_dns_response_transfer_size_bytes{zone, proto}
Each counter has a label `zone` which is the zonename used for the request/response. and a label * coredns_dns_response_rcode_count_total{zone, rcode}
`qtype` which old the query type. The `dns_request_count_total` has extra labels: `proto` which
holds the transport of the response ("udp" or "tcp") and the address family of the transport (1 Each counter has a label `zone` which is the zonename used for the request/response.
= IP (IP version 4), 2 = IP6 (IP version 6)). And `type_count_total` hold a per RR type counter, it
holds the most common ones (A, AAAA, MX, SOA, CNAME, PTR, TXT, NS, SRV, DS, DNSKEY, RRSIG, NSEC, Extra labels used are:
NSEC3) and "other" which lumps together all other types.
* `proto` which holds the transport of the response ("udp" or "tcp")
The `response_rcode_count_total` has an extra label `rcode` which holds the rcode of the response. * The address family (`family`) of the transport (1 = IP (IP version 4), 2 = IP6 (IP version 6)).
The `*_size_bytes` counters also hold the protocol in the `proto` label ("udp" or "tcp"). * `type` which old the query type, it holds the most common ones (A, AAAA, MX, SOA, CNAME, PTR, TXT,
NS, SRV, DS, DNSKEY, RRSIG, NSEC, NSEC3, IXFR, AXFR and ANY) and "other" which lumps together all
other types.
* The `response_rcode_count_total` has an extra label `rcode` which holds the rcode of the response.
If monitoring is enabled queries that do not enter the middleware chain are exported under the fake If monitoring is enabled queries that do not enter the middleware chain are exported under the fake
domain "dropped" (without a closing dot). domain "dropped" (without a closing dot).
......
...@@ -42,20 +42,29 @@ func Report(state middleware.State, zone, rcode string, size int, start time.Tim ...@@ -42,20 +42,29 @@ func Report(state middleware.State, zone, rcode string, size int, start time.Tim
fam = "2" fam = "2"
} }
typ := state.QType()
requestCount.WithLabelValues(zone, net, fam).Inc() requestCount.WithLabelValues(zone, net, fam).Inc()
requestDuration.WithLabelValues(zone).Observe(float64(time.Since(start) / time.Second)) requestDuration.WithLabelValues(zone).Observe(float64(time.Since(start) / time.Millisecond))
requestSize.WithLabelValues(zone, net).Observe(float64(state.Size()))
if state.Do() { if state.Do() {
requestDo.WithLabelValues(zone).Inc() requestDo.WithLabelValues(zone).Inc()
} }
typ := state.QType()
if _, known := monitorType[typ]; known { if _, known := monitorType[typ]; known {
requestType.WithLabelValues(zone, dns.Type(typ).String()).Inc() requestType.WithLabelValues(zone, dns.Type(typ).String()).Inc()
} else { } else {
requestType.WithLabelValues(zone, other).Inc() requestType.WithLabelValues(zone, other).Inc()
} }
responseSize.WithLabelValues(zone, net).Observe(float64(size)) if typ == dns.TypeIXFR || typ == dns.TypeAXFR {
responseTransferSize.WithLabelValues(zone, net).Observe(float64(size))
requestTransferSize.WithLabelValues(zone, net).Observe(float64(size))
} else {
responseSize.WithLabelValues(zone, net).Observe(float64(size))
requestSize.WithLabelValues(zone, net).Observe(float64(state.Size()))
}
responseRcode.WithLabelValues(zone, rcode).Inc() responseRcode.WithLabelValues(zone, rcode).Inc()
} }
...@@ -74,6 +83,10 @@ var monitorType = map[uint16]bool{ ...@@ -74,6 +83,10 @@ var monitorType = map[uint16]bool{
dns.TypeSOA: true, dns.TypeSOA: true,
dns.TypeSRV: true, dns.TypeSRV: true,
dns.TypeTXT: true, dns.TypeTXT: true,
// Meta Qtypes
dns.TypeIXFR: true,
dns.TypeAXFR: true,
dns.TypeANY: true,
} }
const other = "other" const other = "other"
...@@ -12,14 +12,16 @@ import ( ...@@ -12,14 +12,16 @@ import (
) )
var ( var (
requestCount *prometheus.CounterVec requestCount *prometheus.CounterVec
requestDuration *prometheus.HistogramVec requestDuration *prometheus.HistogramVec
requestSize *prometheus.HistogramVec requestSize *prometheus.HistogramVec
requestDo *prometheus.CounterVec requestTransferSize *prometheus.HistogramVec
requestType *prometheus.CounterVec requestDo *prometheus.CounterVec
requestType *prometheus.CounterVec
responseSize *prometheus.HistogramVec
responseRcode *prometheus.CounterVec responseSize *prometheus.HistogramVec
responseTransferSize *prometheus.HistogramVec
responseRcode *prometheus.CounterVec
) )
// Metrics holds the prometheus configuration. The metrics' path is fixed to be /metrics // Metrics holds the prometheus configuration. The metrics' path is fixed to be /metrics
...@@ -47,10 +49,12 @@ func (m *Metrics) Start() error { ...@@ -47,10 +49,12 @@ func (m *Metrics) Start() error {
prometheus.MustRegister(requestCount) prometheus.MustRegister(requestCount)
prometheus.MustRegister(requestDuration) prometheus.MustRegister(requestDuration)
prometheus.MustRegister(requestSize) prometheus.MustRegister(requestSize)
prometheus.MustRegister(requestTransferSize)
prometheus.MustRegister(requestDo) prometheus.MustRegister(requestDo)
prometheus.MustRegister(requestType) prometheus.MustRegister(requestType)
prometheus.MustRegister(responseSize) prometheus.MustRegister(responseSize)
prometheus.MustRegister(responseTransferSize)
prometheus.MustRegister(responseRcode) prometheus.MustRegister(responseRcode)
m.mux.Handle(path, prometheus.Handler()) m.mux.Handle(path, prometheus.Handler())
...@@ -80,9 +84,9 @@ func define() { ...@@ -80,9 +84,9 @@ func define() {
requestDuration = prometheus.NewHistogramVec(prometheus.HistogramOpts{ requestDuration = prometheus.NewHistogramVec(prometheus.HistogramOpts{
Namespace: middleware.Namespace, Namespace: middleware.Namespace,
Subsystem: subsystem, Subsystem: subsystem,
Name: "request_duration_seconds", Name: "request_duration_milliseconds",
Buckets: append([]float64{.0001, .0005, .001, .0025}, prometheus.DefBuckets...), Buckets: append(prometheus.DefBuckets, []float64{50, 100, 200, 500, 1000, 2000, 3000, 4000, 5000}...),
Help: "Histogram of the time (in seconds) each request took.", Help: "Histogram of the time (in milliseconds) each request took.",
}, []string{"zone"}) }, []string{"zone"})
requestSize = prometheus.NewHistogramVec(prometheus.HistogramOpts{ requestSize = prometheus.NewHistogramVec(prometheus.HistogramOpts{
...@@ -93,6 +97,14 @@ func define() { ...@@ -93,6 +97,14 @@ func define() {
Buckets: []float64{0, 100, 200, 300, 400, 511, 1023, 2047, 4095, 8291, 16e3, 32e3, 48e3, 64e3}, Buckets: []float64{0, 100, 200, 300, 400, 511, 1023, 2047, 4095, 8291, 16e3, 32e3, 48e3, 64e3},
}, []string{"zone", "proto"}) }, []string{"zone", "proto"})
requestTransferSize = prometheus.NewHistogramVec(prometheus.HistogramOpts{
Namespace: middleware.Namespace,
Subsystem: subsystem,
Name: "request_transfer_size_bytes",
Help: "Size of the incoming zone transfer in bytes.",
Buckets: []float64{0, 100, 200, 300, 400, 511, 1023, 2047, 4095, 8291, 16e3, 32e3, 48e3, 64e3},
}, []string{"zone", "proto"})
requestDo = prometheus.NewCounterVec(prometheus.CounterOpts{ requestDo = prometheus.NewCounterVec(prometheus.CounterOpts{
Namespace: middleware.Namespace, Namespace: middleware.Namespace,
Subsystem: subsystem, Subsystem: subsystem,
...@@ -111,7 +123,15 @@ func define() { ...@@ -111,7 +123,15 @@ func define() {
Namespace: middleware.Namespace, Namespace: middleware.Namespace,
Subsystem: subsystem, Subsystem: subsystem,
Name: "response_size_bytes", Name: "response_size_bytes",
Help: "Size of the returns response in bytes.", Help: "Size of the returned response in bytes.",
Buckets: []float64{0, 100, 200, 300, 400, 511, 1023, 2047, 4095, 8291, 16e3, 32e3, 48e3, 64e3},
}, []string{"zone", "proto"})
responseTransferSize = prometheus.NewHistogramVec(prometheus.HistogramOpts{
Namespace: middleware.Namespace,
Subsystem: subsystem,
Name: "response_transfer_size_bytes",
Help: "Size of the returned zone transfer in bytes.",
Buckets: []float64{0, 100, 200, 300, 400, 511, 1023, 2047, 4095, 8291, 16e3, 32e3, 48e3, 64e3}, Buckets: []float64{0, 100, 200, 300, 400, 511, 1023, 2047, 4095, 8291, 16e3, 32e3, 48e3, 64e3},
}, []string{"zone", "proto"}) }, []string{"zone", "proto"})
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment