Skip to content

Commit

Permalink
Merge pull request #19133 from bparees/metrics
Browse files Browse the repository at this point in the history
TemplateInstance metrics update
  • Loading branch information
openshift-merge-robot authored Mar 29, 2018
2 parents bb8bbbe + 6386481 commit b58be93
Show file tree
Hide file tree
Showing 6 changed files with 212 additions and 95 deletions.
85 changes: 44 additions & 41 deletions pkg/template/controller/metrics.go
Original file line number Diff line number Diff line change
@@ -1,76 +1,79 @@
package controller

import (
"time"

templateapi "github.com/openshift/origin/pkg/template/apis/template"
"github.com/prometheus/client_golang/prometheus"
"k8s.io/apimachinery/pkg/labels"
utilruntime "k8s.io/apimachinery/pkg/util/runtime"
kapi "k8s.io/kubernetes/pkg/apis/core"
)

var templateInstancesTotal = prometheus.NewGaugeVec(
prometheus.GaugeOpts{
Name: "openshift_template_instance_total",
Help: "Counts TemplateInstance objects",
var templateInstanceCompleted = prometheus.NewCounterVec(
prometheus.CounterOpts{
Name: "openshift_template_instance_completed_total",
Help: "Counts completed TemplateInstance objects by condition",
},
nil,
[]string{"condition"},
)

var templateInstanceStatusCondition = prometheus.NewGaugeVec(
prometheus.GaugeOpts{
Name: "openshift_template_instance_status_condition_total",
Help: "Counts TemplateInstance objects by condition type and status",
},
[]string{"type", "status"},
)
func newTemplateInstanceActiveAge() prometheus.Histogram {
// We recreate a new Histogram object every time Collect is called. This is
// because we are recording a series of point-in-time observations about the
// population of "active" TemplateInstances. Were we to use a singleton
// Histogram, we would only be able to observe TemplateInstances as they
// completed, which would add latency in reporting very long-running
// TemplateInstances and completely prevent reporting of non-completing
// TemplateInstances.
//
// Effectively, the resulting series is to Histogram what Gauge is to
// Counter. In the resulting series, _count and _sum are not monotonically
// increasing (because TemplateInstances are no longer part of the
// population once they terminate or are deleted), therefore it is not valid
// to use counter functions such as rate() on this series.

var templateInstancesActiveStartTime = prometheus.NewGaugeVec(
prometheus.GaugeOpts{
Name: "openshift_template_instance_active_start_time_seconds",
Help: "Show the start time in unix epoch form of active TemplateInstance objects by namespace and name",
},
[]string{"namespace", "name"},
)
return prometheus.NewHistogram(
prometheus.HistogramOpts{
Name: "openshift_template_instance_active_age_seconds",
Help: "Shows the instantaneous age distribution of active TemplateInstance objects",
Buckets: prometheus.LinearBuckets(600, 600, 7),
},
)
}

func (c *TemplateInstanceController) Describe(ch chan<- *prometheus.Desc) {
templateInstancesTotal.Describe(ch)
templateInstanceStatusCondition.Describe(ch)
templateInstancesActiveStartTime.Describe(ch)
templateInstanceActiveAge := newTemplateInstanceActiveAge()

templateInstanceCompleted.Describe(ch)
templateInstanceActiveAge.Describe(ch)
}

func (c *TemplateInstanceController) Collect(ch chan<- prometheus.Metric) {
templateInstanceCompleted.Collect(ch)

now := c.clock.Now()

templateInstances, err := c.lister.List(labels.Everything())
if err != nil {
utilruntime.HandleError(err)
return
}

templateInstancesTotal.Reset()
templateInstanceStatusCondition.Reset()
templateInstancesActiveStartTime.Reset()

templateInstancesTotal.WithLabelValues().Set(0)
templateInstanceActiveAge := newTemplateInstanceActiveAge()

nextTemplateInstance:
for _, templateInstance := range templateInstances {
waiting := true

templateInstancesTotal.WithLabelValues().Inc()

for _, cond := range templateInstance.Status.Conditions {
templateInstanceStatusCondition.WithLabelValues(string(cond.Type), string(cond.Status)).Inc()

if cond.Status == kapi.ConditionTrue &&
(cond.Type == templateapi.TemplateInstanceInstantiateFailure || cond.Type == templateapi.TemplateInstanceReady) {
waiting = false
(cond.Type == templateapi.TemplateInstanceInstantiateFailure ||
cond.Type == templateapi.TemplateInstanceReady) {
continue nextTemplateInstance
}
}

if waiting {
templateInstancesActiveStartTime.WithLabelValues(templateInstance.Namespace, templateInstance.Name).Set(float64(templateInstance.CreationTimestamp.Unix()))
}
templateInstanceActiveAge.Observe(float64(now.Sub(templateInstance.CreationTimestamp.Time) / time.Second))
}

templateInstancesTotal.Collect(ch)
templateInstanceStatusCondition.Collect(ch)
templateInstancesActiveStartTime.Collect(ch)
templateInstanceActiveAge.Collect(ch)
}
175 changes: 134 additions & 41 deletions pkg/template/controller/metrics_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,23 +7,42 @@ import (
"time"

templateapi "github.com/openshift/origin/pkg/template/apis/template"
templateclient "github.com/openshift/origin/pkg/template/generated/internalclientset"
"github.com/openshift/origin/pkg/template/generated/internalclientset/fake"
"github.com/openshift/origin/pkg/template/generated/listers/template/internalversion"

"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/client_golang/prometheus/promhttp"

metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/labels"
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/client-go/util/workqueue"
kapi "k8s.io/kubernetes/pkg/apis/core"
)

type fakeLister []*templateapi.TemplateInstance
type fakeLister struct {
templateClient templateclient.Interface
}

func (f *fakeLister) List(labels.Selector) ([]*templateapi.TemplateInstance, error) {
list, err := f.templateClient.Template().TemplateInstances("").List(metav1.ListOptions{})
if err != nil {
return nil, err
}
templateInstances := make([]*templateapi.TemplateInstance, len(list.Items))
for i := range list.Items {
templateInstances[i] = &list.Items[i]
}
return templateInstances, err
}

func (f fakeLister) List(labels.Selector) ([]*templateapi.TemplateInstance, error) {
return f, nil
func (f *fakeLister) Get(name string) (*templateapi.TemplateInstance, error) {
return f.templateClient.Template().TemplateInstances("").Get(name, metav1.GetOptions{})
}
func (fakeLister) TemplateInstances(string) internalversion.TemplateInstanceNamespaceLister {
return nil

func (f *fakeLister) TemplateInstances(string) internalversion.TemplateInstanceNamespaceLister {
return f
}

type fakeResponseWriter struct {
Expand All @@ -41,58 +60,132 @@ func (f *fakeResponseWriter) WriteHeader(statusCode int) {
}

func TestMetrics(t *testing.T) {
expectedResponse := `# HELP openshift_template_instance_active_start_time_seconds Show the start time in unix epoch form of active TemplateInstance objects by namespace and name
# TYPE openshift_template_instance_active_start_time_seconds gauge
openshift_template_instance_active_start_time_seconds{name="testname",namespace="testnamespace"} 123
# HELP openshift_template_instance_status_condition_total Counts TemplateInstance objects by condition type and status
# TYPE openshift_template_instance_status_condition_total gauge
openshift_template_instance_status_condition_total{status="False",type="Ready"} 1
openshift_template_instance_status_condition_total{status="True",type="Ready"} 1
# HELP openshift_template_instance_total Counts TemplateInstance objects
# TYPE openshift_template_instance_total gauge
openshift_template_instance_total 2
expectedResponse := `# HELP openshift_template_instance_active_age_seconds Shows the instantaneous age distribution of active TemplateInstance objects
# TYPE openshift_template_instance_active_age_seconds histogram
openshift_template_instance_active_age_seconds_bucket{le="600"} 0
openshift_template_instance_active_age_seconds_bucket{le="1200"} 1
openshift_template_instance_active_age_seconds_bucket{le="1800"} 1
openshift_template_instance_active_age_seconds_bucket{le="2400"} 1
openshift_template_instance_active_age_seconds_bucket{le="3000"} 1
openshift_template_instance_active_age_seconds_bucket{le="3600"} 1
openshift_template_instance_active_age_seconds_bucket{le="4200"} 1
openshift_template_instance_active_age_seconds_bucket{le="+Inf"} 1
openshift_template_instance_active_age_seconds_sum 900
openshift_template_instance_active_age_seconds_count 1
# HELP openshift_template_instance_completed_total Counts completed TemplateInstance objects by condition
# TYPE openshift_template_instance_completed_total counter
openshift_template_instance_completed_total{condition="InstantiateFailure"} 2
openshift_template_instance_completed_total{condition="Ready"} 1
`

clock := &fakeClock{now: time.Unix(0, 0)}

registry := prometheus.NewRegistry()

c := &TemplateInstanceController{
lister: &fakeLister{
{
Status: templateapi.TemplateInstanceStatus{
Conditions: []templateapi.TemplateInstanceCondition{
{
Type: templateapi.TemplateInstanceReady,
Status: kapi.ConditionTrue,
},
fakeTemplateClient := fake.NewSimpleClientset(
// when sync is called on this TemplateInstance it should fail and
// increment openshift_template_instance_completed_total
// {condition="InstantiateFailure"}
&templateapi.TemplateInstance{
ObjectMeta: metav1.ObjectMeta{
Name: "abouttofail",
},
Spec: templateapi.TemplateInstanceSpec{
Template: templateapi.Template{
Objects: []runtime.Object{
&kapi.ConfigMap{},
},
},
},
{
ObjectMeta: metav1.ObjectMeta{
Namespace: "testnamespace",
Name: "testname",
CreationTimestamp: metav1.Time{
Time: time.Unix(123, 0),
},
// when sync is called on this TemplateInstance it should timeout and
// increment openshift_template_instance_completed_total
// {condition="InstantiateFailure"}
&templateapi.TemplateInstance{
ObjectMeta: metav1.ObjectMeta{
Name: "abouttotimeout",
},
Spec: templateapi.TemplateInstanceSpec{
Template: templateapi.Template{
Objects: []runtime.Object{
&kapi.ConfigMap{},
},
},
Status: templateapi.TemplateInstanceStatus{
Conditions: []templateapi.TemplateInstanceCondition{
{
Type: templateapi.TemplateInstanceReady,
Status: kapi.ConditionFalse,
},
Requester: &templateapi.TemplateInstanceRequester{},
},
Status: templateapi.TemplateInstanceStatus{
Objects: []templateapi.TemplateInstanceObject{
{},
},
},
},
// when sync is called on this TemplateInstance it should succeed and
// increment openshift_template_instance_completed_total
// {condition="Ready"}
&templateapi.TemplateInstance{
ObjectMeta: metav1.ObjectMeta{
Name: "abouttosucceed",
CreationTimestamp: metav1.Time{
Time: clock.now,
},
},
Spec: templateapi.TemplateInstanceSpec{
Template: templateapi.Template{
Objects: []runtime.Object{
&kapi.ConfigMap{},
},
},
Requester: &templateapi.TemplateInstanceRequester{},
},
Status: templateapi.TemplateInstanceStatus{
Objects: []templateapi.TemplateInstanceObject{
{},
},
},
},
// this TemplateInstance is in-flight, not timed out.
&templateapi.TemplateInstance{
ObjectMeta: metav1.ObjectMeta{
CreationTimestamp: metav1.Time{
Time: clock.now.Add(-900 * time.Second),
},
},
Status: templateapi.TemplateInstanceStatus{
Conditions: []templateapi.TemplateInstanceCondition{
{
Type: templateapi.TemplateInstanceReady,
Status: kapi.ConditionFalse,
},
},
},
},
)

c := &TemplateInstanceController{
lister: &fakeLister{fakeTemplateClient},
templateClient: fakeTemplateClient,
clock: clock,
readinessLimiter: &workqueue.BucketRateLimiter{},
}

registry.MustRegister(c)

h := promhttp.HandlerFor(registry, promhttp.HandlerOpts{ErrorHandling: promhttp.PanicOnError})
rw := &fakeResponseWriter{header: http.Header{}}
h.ServeHTTP(rw, &http.Request{})

if rw.String() != expectedResponse {
t.Error(rw.String())
// We loop twice: we expect the metrics response to match after the first
// set of sync calls, and not change after the second set.
for i := 0; i < 2; i++ {
for _, key := range []string{"/abouttofail", "/abouttotimeout", "/abouttosucceed"} {
err := c.sync(key)
if err != nil {
t.Fatal(err)
}
}

rw := &fakeResponseWriter{header: http.Header{}}
h.ServeHTTP(rw, &http.Request{})

if rw.String() != expectedResponse {
t.Errorf("run %d: %s\n", i, rw.String())
}
}
}
2 changes: 1 addition & 1 deletion pkg/template/controller/readiness_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -194,7 +194,7 @@ func TestCheckReadiness(t *testing.T) {
groupKind: batch.Kind("Job"),
object: &batch.Job{
Status: batch.JobStatus{
CompletionTime: &metav1.Time{Time: time.Now()},
CompletionTime: &metav1.Time{Time: time.Unix(0, 0)},
},
},
expectedReady: true,
Expand Down
Loading

0 comments on commit b58be93

Please sign in to comment.