Commit d563c625 authored by Miek Gieben's avatar Miek Gieben Committed by GitHub

middleware/monitoring: add more monitoring (#182)

* Split response size into normal responses and AXFR|IXFR responses.
* Split out incoming sizes as well in normal and AXFR|IXFRs.
* Add meta qtype to the monitored qtypes.
* Make duration to be exported in milliseconds instead of seconds.
parent d277f21d
......@@ -5,23 +5,26 @@ This module enables prometheus metrics for CoreDNS. The default location for the
The following metrics are exported:
* coredns_dns_request_count_total
* coredns_dns_request_duration_seconds
* coredns_dns_request_size_bytes
* coredns_dns_request_do_count_total
* coredns_dns_request_type_count_total
* coredns_dns_response_size_bytes
* coredns_dns_response_rcode_count_total
Each counter has a label `zone` which is the zonename used for the request/response. and a label
`qtype` which old the query type. The `dns_request_count_total` has extra labels: `proto` which
holds the transport of the response ("udp" or "tcp") and the address family of the transport (1
= IP (IP version 4), 2 = IP6 (IP version 6)). And `type_count_total` hold a per RR type counter, it
holds the most common ones (A, AAAA, MX, SOA, CNAME, PTR, TXT, NS, SRV, DS, DNSKEY, RRSIG, NSEC,
NSEC3) and "other" which lumps together all other types.
The `response_rcode_count_total` has an extra label `rcode` which holds the rcode of the response.
The `*_size_bytes` counters also hold the protocol in the `proto` label ("udp" or "tcp").
* coredns_dns_request_count_total{zone, proto, family}
* coredns_dns_request_duration_milliseconds{zone}
* coredns_dns_request_size_bytes{zone,, proto}
* coredns_dns_request_transfer_size_bytes{zone,, proto}
* coredns_dns_request_do_count_total{zone}
* coredns_dns_request_type_count_total{zone, type}
* coredns_dns_response_size_bytes{zone, proto}
* coredns_dns_response_transfer_size_bytes{zone, proto}
* coredns_dns_response_rcode_count_total{zone, rcode}
Each counter has a label `zone` which is the zonename used for the request/response.
Extra labels used are:
* `proto` which holds the transport of the response ("udp" or "tcp")
* The address family (`family`) of the transport (1 = IP (IP version 4), 2 = IP6 (IP version 6)).
* `type` which old the query type, it holds the most common ones (A, AAAA, MX, SOA, CNAME, PTR, TXT,
NS, SRV, DS, DNSKEY, RRSIG, NSEC, NSEC3, IXFR, AXFR and ANY) and "other" which lumps together all
other types.
* The `response_rcode_count_total` has an extra label `rcode` which holds the rcode of the response.
If monitoring is enabled queries that do not enter the middleware chain are exported under the fake
domain "dropped" (without a closing dot).
......
......@@ -42,20 +42,29 @@ func Report(state middleware.State, zone, rcode string, size int, start time.Tim
fam = "2"
}
typ := state.QType()
requestCount.WithLabelValues(zone, net, fam).Inc()
requestDuration.WithLabelValues(zone).Observe(float64(time.Since(start) / time.Second))
requestSize.WithLabelValues(zone, net).Observe(float64(state.Size()))
requestDuration.WithLabelValues(zone).Observe(float64(time.Since(start) / time.Millisecond))
if state.Do() {
requestDo.WithLabelValues(zone).Inc()
}
typ := state.QType()
if _, known := monitorType[typ]; known {
requestType.WithLabelValues(zone, dns.Type(typ).String()).Inc()
} else {
requestType.WithLabelValues(zone, other).Inc()
}
if typ == dns.TypeIXFR || typ == dns.TypeAXFR {
responseTransferSize.WithLabelValues(zone, net).Observe(float64(size))
requestTransferSize.WithLabelValues(zone, net).Observe(float64(size))
} else {
responseSize.WithLabelValues(zone, net).Observe(float64(size))
requestSize.WithLabelValues(zone, net).Observe(float64(state.Size()))
}
responseRcode.WithLabelValues(zone, rcode).Inc()
}
......@@ -74,6 +83,10 @@ var monitorType = map[uint16]bool{
dns.TypeSOA: true,
dns.TypeSRV: true,
dns.TypeTXT: true,
// Meta Qtypes
dns.TypeIXFR: true,
dns.TypeAXFR: true,
dns.TypeANY: true,
}
const other = "other"
......@@ -15,10 +15,12 @@ var (
requestCount *prometheus.CounterVec
requestDuration *prometheus.HistogramVec
requestSize *prometheus.HistogramVec
requestTransferSize *prometheus.HistogramVec
requestDo *prometheus.CounterVec
requestType *prometheus.CounterVec
responseSize *prometheus.HistogramVec
responseTransferSize *prometheus.HistogramVec
responseRcode *prometheus.CounterVec
)
......@@ -47,10 +49,12 @@ func (m *Metrics) Start() error {
prometheus.MustRegister(requestCount)
prometheus.MustRegister(requestDuration)
prometheus.MustRegister(requestSize)
prometheus.MustRegister(requestTransferSize)
prometheus.MustRegister(requestDo)
prometheus.MustRegister(requestType)
prometheus.MustRegister(responseSize)
prometheus.MustRegister(responseTransferSize)
prometheus.MustRegister(responseRcode)
m.mux.Handle(path, prometheus.Handler())
......@@ -80,9 +84,9 @@ func define() {
requestDuration = prometheus.NewHistogramVec(prometheus.HistogramOpts{
Namespace: middleware.Namespace,
Subsystem: subsystem,
Name: "request_duration_seconds",
Buckets: append([]float64{.0001, .0005, .001, .0025}, prometheus.DefBuckets...),
Help: "Histogram of the time (in seconds) each request took.",
Name: "request_duration_milliseconds",
Buckets: append(prometheus.DefBuckets, []float64{50, 100, 200, 500, 1000, 2000, 3000, 4000, 5000}...),
Help: "Histogram of the time (in milliseconds) each request took.",
}, []string{"zone"})
requestSize = prometheus.NewHistogramVec(prometheus.HistogramOpts{
......@@ -93,6 +97,14 @@ func define() {
Buckets: []float64{0, 100, 200, 300, 400, 511, 1023, 2047, 4095, 8291, 16e3, 32e3, 48e3, 64e3},
}, []string{"zone", "proto"})
requestTransferSize = prometheus.NewHistogramVec(prometheus.HistogramOpts{
Namespace: middleware.Namespace,
Subsystem: subsystem,
Name: "request_transfer_size_bytes",
Help: "Size of the incoming zone transfer in bytes.",
Buckets: []float64{0, 100, 200, 300, 400, 511, 1023, 2047, 4095, 8291, 16e3, 32e3, 48e3, 64e3},
}, []string{"zone", "proto"})
requestDo = prometheus.NewCounterVec(prometheus.CounterOpts{
Namespace: middleware.Namespace,
Subsystem: subsystem,
......@@ -111,7 +123,15 @@ func define() {
Namespace: middleware.Namespace,
Subsystem: subsystem,
Name: "response_size_bytes",
Help: "Size of the returns response in bytes.",
Help: "Size of the returned response in bytes.",
Buckets: []float64{0, 100, 200, 300, 400, 511, 1023, 2047, 4095, 8291, 16e3, 32e3, 48e3, 64e3},
}, []string{"zone", "proto"})
responseTransferSize = prometheus.NewHistogramVec(prometheus.HistogramOpts{
Namespace: middleware.Namespace,
Subsystem: subsystem,
Name: "response_transfer_size_bytes",
Help: "Size of the returned zone transfer in bytes.",
Buckets: []float64{0, 100, 200, 300, 400, 511, 1023, 2047, 4095, 8291, 16e3, 32e3, 48e3, 64e3},
}, []string{"zone", "proto"})
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment