Commit 30a4a87e authored by Ruslan Drozhdzh's avatar Ruslan Drozhdzh Committed by GitHub

plugin/forward: add hit/miss metrics for connection cache (#4114)

Signed-off-by: default avatarRuslan Drozhdzh <rdrozhdzh@infoblox.com>
parent 2fe5d684
...@@ -112,10 +112,12 @@ If monitoring is enabled (via the *prometheus* plugin) then the following metric ...@@ -112,10 +112,12 @@ If monitoring is enabled (via the *prometheus* plugin) then the following metric
* `coredns_forward_healthcheck_failures_total{to}` - number of failed health checks per upstream. * `coredns_forward_healthcheck_failures_total{to}` - number of failed health checks per upstream.
* `coredns_forward_healthcheck_broken_total{}` - counter of when all upstreams are unhealthy, * `coredns_forward_healthcheck_broken_total{}` - counter of when all upstreams are unhealthy,
and we are randomly (this always uses the `random` policy) spraying to an upstream. and we are randomly (this always uses the `random` policy) spraying to an upstream.
* `max_concurrent_rejects_total{}` - counter of the number of queries rejected because the * `coredns_forward_max_concurrent_rejects_total{}` - counter of the number of queries rejected because the
number of concurrent queries were at maximum. number of concurrent queries were at maximum.
* `coredns_forward_conn_cache_hits_total{to, proto}` - counter of connection cache hits per upstream and protocol.
* `coredns_forward_conn_cache_misses_total{to, proto}` - counter of connection cache misses per upstream and protocol.
Where `to` is one of the upstream servers (**TO** from the config), `rcode` is the returned RCODE Where `to` is one of the upstream servers (**TO** from the config), `rcode` is the returned RCODE
from the upstream. from the upstream, `proto` is the transport protocol like `udp`, `tcp`, `tcp-tls`.
## Examples ## Examples
......
...@@ -54,8 +54,10 @@ func (t *Transport) Dial(proto string) (*persistConn, bool, error) { ...@@ -54,8 +54,10 @@ func (t *Transport) Dial(proto string) (*persistConn, bool, error) {
pc := <-t.ret pc := <-t.ret
if pc != nil { if pc != nil {
ConnCacheHitsCount.WithLabelValues(t.addr, proto).Add(1)
return pc, true, nil return pc, true, nil
} }
ConnCacheMissesCount.WithLabelValues(t.addr, proto).Add(1)
reqTime := time.Now() reqTime := time.Now()
timeout := t.dialTimeout() timeout := t.dialTimeout()
......
...@@ -52,4 +52,16 @@ var ( ...@@ -52,4 +52,16 @@ var (
Name: "max_concurrent_rejects_total", Name: "max_concurrent_rejects_total",
Help: "Counter of the number of queries rejected because the concurrent queries were at maximum.", Help: "Counter of the number of queries rejected because the concurrent queries were at maximum.",
}) })
ConnCacheHitsCount = promauto.NewCounterVec(prometheus.CounterOpts{
Namespace: plugin.Namespace,
Subsystem: "forward",
Name: "conn_cache_hits_total",
Help: "Counter of connection cache hits per upstream and protocol.",
}, []string{"to", "proto"})
ConnCacheMissesCount = promauto.NewCounterVec(prometheus.CounterOpts{
Namespace: plugin.Namespace,
Subsystem: "forward",
Name: "conn_cache_misses_total",
Help: "Counter of connection cache misses per upstream and protocol.",
}, []string{"to", "proto"})
) )
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment