-
Notifications
You must be signed in to change notification settings - Fork 4.7k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
TemplateInstance metrics update #19133
Merged
Merged
Changes from all commits
Commits
Show all changes
2 commits
Select commit
Hold shift + click to select a range
File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,76 +1,79 @@ | ||
package controller | ||
|
||
import ( | ||
"time" | ||
|
||
templateapi "github.com/openshift/origin/pkg/template/apis/template" | ||
"github.com/prometheus/client_golang/prometheus" | ||
"k8s.io/apimachinery/pkg/labels" | ||
utilruntime "k8s.io/apimachinery/pkg/util/runtime" | ||
kapi "k8s.io/kubernetes/pkg/apis/core" | ||
) | ||
|
||
var templateInstancesTotal = prometheus.NewGaugeVec( | ||
prometheus.GaugeOpts{ | ||
Name: "openshift_template_instance_total", | ||
Help: "Counts TemplateInstance objects", | ||
var templateInstanceCompleted = prometheus.NewCounterVec( | ||
prometheus.CounterOpts{ | ||
Name: "openshift_template_instance_completed_total", | ||
Help: "Counts completed TemplateInstance objects by condition", | ||
}, | ||
nil, | ||
[]string{"condition"}, | ||
) | ||
|
||
var templateInstanceStatusCondition = prometheus.NewGaugeVec( | ||
prometheus.GaugeOpts{ | ||
Name: "openshift_template_instance_status_condition_total", | ||
Help: "Counts TemplateInstance objects by condition type and status", | ||
}, | ||
[]string{"type", "status"}, | ||
) | ||
func newTemplateInstanceActiveAge() prometheus.Histogram { | ||
// We recreate a new Histogram object every time Collect is called. This is | ||
// because we are recording a series of point-in-time observations about the | ||
// population of "active" TemplateInstances. Were we to use a singleton | ||
// Histogram, we would only be able to observe TemplateInstances as they | ||
// completed, which would add latency in reporting very long-running | ||
// TemplateInstances and completely prevent reporting of non-completing | ||
// TemplateInstances. | ||
// | ||
// Effectively, the resulting series is to Histogram what Gauge is to | ||
// Counter. In the resulting series, _count and _sum are not monotonically | ||
// increasing (because TemplateInstances are no longer part of the | ||
// population once they terminate or are deleted), therefore it is not valid | ||
// to use counter functions such as rate() on this series. | ||
|
||
var templateInstancesActiveStartTime = prometheus.NewGaugeVec( | ||
prometheus.GaugeOpts{ | ||
Name: "openshift_template_instance_active_start_time_seconds", | ||
Help: "Show the start time in unix epoch form of active TemplateInstance objects by namespace and name", | ||
}, | ||
[]string{"namespace", "name"}, | ||
) | ||
return prometheus.NewHistogram( | ||
prometheus.HistogramOpts{ | ||
Name: "openshift_template_instance_active_age_seconds", | ||
Help: "Shows the instantaneous age distribution of active TemplateInstance objects", | ||
Buckets: prometheus.LinearBuckets(600, 600, 7), | ||
}, | ||
) | ||
} | ||
|
||
func (c *TemplateInstanceController) Describe(ch chan<- *prometheus.Desc) { | ||
templateInstancesTotal.Describe(ch) | ||
templateInstanceStatusCondition.Describe(ch) | ||
templateInstancesActiveStartTime.Describe(ch) | ||
templateInstanceActiveAge := newTemplateInstanceActiveAge() | ||
|
||
templateInstanceCompleted.Describe(ch) | ||
templateInstanceActiveAge.Describe(ch) | ||
} | ||
|
||
func (c *TemplateInstanceController) Collect(ch chan<- prometheus.Metric) { | ||
templateInstanceCompleted.Collect(ch) | ||
|
||
now := c.clock.Now() | ||
|
||
templateInstances, err := c.lister.List(labels.Everything()) | ||
if err != nil { | ||
utilruntime.HandleError(err) | ||
return | ||
} | ||
|
||
templateInstancesTotal.Reset() | ||
templateInstanceStatusCondition.Reset() | ||
templateInstancesActiveStartTime.Reset() | ||
|
||
templateInstancesTotal.WithLabelValues().Set(0) | ||
templateInstanceActiveAge := newTemplateInstanceActiveAge() | ||
|
||
nextTemplateInstance: | ||
for _, templateInstance := range templateInstances { | ||
waiting := true | ||
|
||
templateInstancesTotal.WithLabelValues().Inc() | ||
|
||
for _, cond := range templateInstance.Status.Conditions { | ||
templateInstanceStatusCondition.WithLabelValues(string(cond.Type), string(cond.Status)).Inc() | ||
|
||
if cond.Status == kapi.ConditionTrue && | ||
(cond.Type == templateapi.TemplateInstanceInstantiateFailure || cond.Type == templateapi.TemplateInstanceReady) { | ||
waiting = false | ||
(cond.Type == templateapi.TemplateInstanceInstantiateFailure || | ||
cond.Type == templateapi.TemplateInstanceReady) { | ||
continue nextTemplateInstance | ||
} | ||
} | ||
|
||
if waiting { | ||
templateInstancesActiveStartTime.WithLabelValues(templateInstance.Namespace, templateInstance.Name).Set(float64(templateInstance.CreationTimestamp.Unix())) | ||
} | ||
templateInstanceActiveAge.Observe(float64(now.Sub(templateInstance.CreationTimestamp.Time) / time.Second)) | ||
} | ||
|
||
templateInstancesTotal.Collect(ch) | ||
templateInstanceStatusCondition.Collect(ch) | ||
templateInstancesActiveStartTime.Collect(ch) | ||
templateInstanceActiveAge.Collect(ch) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -7,23 +7,42 @@ import ( | |
"time" | ||
|
||
templateapi "github.com/openshift/origin/pkg/template/apis/template" | ||
templateclient "github.com/openshift/origin/pkg/template/generated/internalclientset" | ||
"github.com/openshift/origin/pkg/template/generated/internalclientset/fake" | ||
"github.com/openshift/origin/pkg/template/generated/listers/template/internalversion" | ||
|
||
"github.com/prometheus/client_golang/prometheus" | ||
"github.com/prometheus/client_golang/prometheus/promhttp" | ||
|
||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" | ||
"k8s.io/apimachinery/pkg/labels" | ||
"k8s.io/apimachinery/pkg/runtime" | ||
"k8s.io/client-go/util/workqueue" | ||
kapi "k8s.io/kubernetes/pkg/apis/core" | ||
) | ||
|
||
type fakeLister []*templateapi.TemplateInstance | ||
type fakeLister struct { | ||
templateClient templateclient.Interface | ||
} | ||
|
||
func (f *fakeLister) List(labels.Selector) ([]*templateapi.TemplateInstance, error) { | ||
list, err := f.templateClient.Template().TemplateInstances("").List(metav1.ListOptions{}) | ||
if err != nil { | ||
return nil, err | ||
} | ||
templateInstances := make([]*templateapi.TemplateInstance, len(list.Items)) | ||
for i := range list.Items { | ||
templateInstances[i] = &list.Items[i] | ||
} | ||
return templateInstances, err | ||
} | ||
|
||
func (f fakeLister) List(labels.Selector) ([]*templateapi.TemplateInstance, error) { | ||
return f, nil | ||
func (f *fakeLister) Get(name string) (*templateapi.TemplateInstance, error) { | ||
return f.templateClient.Template().TemplateInstances("").Get(name, metav1.GetOptions{}) | ||
} | ||
func (fakeLister) TemplateInstances(string) internalversion.TemplateInstanceNamespaceLister { | ||
return nil | ||
|
||
func (f *fakeLister) TemplateInstances(string) internalversion.TemplateInstanceNamespaceLister { | ||
return f | ||
} | ||
|
||
type fakeResponseWriter struct { | ||
|
@@ -41,58 +60,132 @@ func (f *fakeResponseWriter) WriteHeader(statusCode int) { | |
} | ||
|
||
func TestMetrics(t *testing.T) { | ||
expectedResponse := `# HELP openshift_template_instance_active_start_time_seconds Show the start time in unix epoch form of active TemplateInstance objects by namespace and name | ||
# TYPE openshift_template_instance_active_start_time_seconds gauge | ||
openshift_template_instance_active_start_time_seconds{name="testname",namespace="testnamespace"} 123 | ||
# HELP openshift_template_instance_status_condition_total Counts TemplateInstance objects by condition type and status | ||
# TYPE openshift_template_instance_status_condition_total gauge | ||
openshift_template_instance_status_condition_total{status="False",type="Ready"} 1 | ||
openshift_template_instance_status_condition_total{status="True",type="Ready"} 1 | ||
# HELP openshift_template_instance_total Counts TemplateInstance objects | ||
# TYPE openshift_template_instance_total gauge | ||
openshift_template_instance_total 2 | ||
expectedResponse := `# HELP openshift_template_instance_active_age_seconds Shows the instantaneous age distribution of active TemplateInstance objects | ||
# TYPE openshift_template_instance_active_age_seconds histogram | ||
openshift_template_instance_active_age_seconds_bucket{le="600"} 0 | ||
openshift_template_instance_active_age_seconds_bucket{le="1200"} 1 | ||
openshift_template_instance_active_age_seconds_bucket{le="1800"} 1 | ||
openshift_template_instance_active_age_seconds_bucket{le="2400"} 1 | ||
openshift_template_instance_active_age_seconds_bucket{le="3000"} 1 | ||
openshift_template_instance_active_age_seconds_bucket{le="3600"} 1 | ||
openshift_template_instance_active_age_seconds_bucket{le="4200"} 1 | ||
openshift_template_instance_active_age_seconds_bucket{le="+Inf"} 1 | ||
openshift_template_instance_active_age_seconds_sum 900 | ||
openshift_template_instance_active_age_seconds_count 1 | ||
# HELP openshift_template_instance_completed_total Counts completed TemplateInstance objects by condition | ||
# TYPE openshift_template_instance_completed_total counter | ||
openshift_template_instance_completed_total{condition="InstantiateFailure"} 2 | ||
openshift_template_instance_completed_total{condition="Ready"} 1 | ||
` | ||
|
||
clock := &fakeClock{now: time.Unix(0, 0)} | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yeah here we are using unix epoch ... |
||
|
||
registry := prometheus.NewRegistry() | ||
|
||
c := &TemplateInstanceController{ | ||
lister: &fakeLister{ | ||
{ | ||
Status: templateapi.TemplateInstanceStatus{ | ||
Conditions: []templateapi.TemplateInstanceCondition{ | ||
{ | ||
Type: templateapi.TemplateInstanceReady, | ||
Status: kapi.ConditionTrue, | ||
}, | ||
fakeTemplateClient := fake.NewSimpleClientset( | ||
// when sync is called on this TemplateInstance it should fail and | ||
// increment openshift_template_instance_completed_total | ||
// {condition="InstantiateFailure"} | ||
&templateapi.TemplateInstance{ | ||
ObjectMeta: metav1.ObjectMeta{ | ||
Name: "abouttofail", | ||
}, | ||
Spec: templateapi.TemplateInstanceSpec{ | ||
Template: templateapi.Template{ | ||
Objects: []runtime.Object{ | ||
&kapi.ConfigMap{}, | ||
}, | ||
}, | ||
}, | ||
{ | ||
ObjectMeta: metav1.ObjectMeta{ | ||
Namespace: "testnamespace", | ||
Name: "testname", | ||
CreationTimestamp: metav1.Time{ | ||
Time: time.Unix(123, 0), | ||
}, | ||
// when sync is called on this TemplateInstance it should timeout and | ||
// increment openshift_template_instance_completed_total | ||
// {condition="InstantiateFailure"} | ||
&templateapi.TemplateInstance{ | ||
ObjectMeta: metav1.ObjectMeta{ | ||
Name: "abouttotimeout", | ||
}, | ||
Spec: templateapi.TemplateInstanceSpec{ | ||
Template: templateapi.Template{ | ||
Objects: []runtime.Object{ | ||
&kapi.ConfigMap{}, | ||
}, | ||
}, | ||
Status: templateapi.TemplateInstanceStatus{ | ||
Conditions: []templateapi.TemplateInstanceCondition{ | ||
{ | ||
Type: templateapi.TemplateInstanceReady, | ||
Status: kapi.ConditionFalse, | ||
}, | ||
Requester: &templateapi.TemplateInstanceRequester{}, | ||
}, | ||
Status: templateapi.TemplateInstanceStatus{ | ||
Objects: []templateapi.TemplateInstanceObject{ | ||
{}, | ||
}, | ||
}, | ||
}, | ||
// when sync is called on this TemplateInstance it should succeed and | ||
// increment openshift_template_instance_completed_total | ||
// {condition="Ready"} | ||
&templateapi.TemplateInstance{ | ||
ObjectMeta: metav1.ObjectMeta{ | ||
Name: "abouttosucceed", | ||
CreationTimestamp: metav1.Time{ | ||
Time: clock.now, | ||
}, | ||
}, | ||
Spec: templateapi.TemplateInstanceSpec{ | ||
Template: templateapi.Template{ | ||
Objects: []runtime.Object{ | ||
&kapi.ConfigMap{}, | ||
}, | ||
}, | ||
Requester: &templateapi.TemplateInstanceRequester{}, | ||
}, | ||
Status: templateapi.TemplateInstanceStatus{ | ||
Objects: []templateapi.TemplateInstanceObject{ | ||
{}, | ||
}, | ||
}, | ||
}, | ||
// this TemplateInstance is in-flight, not timed out. | ||
&templateapi.TemplateInstance{ | ||
ObjectMeta: metav1.ObjectMeta{ | ||
CreationTimestamp: metav1.Time{ | ||
Time: clock.now.Add(-900 * time.Second), | ||
}, | ||
}, | ||
Status: templateapi.TemplateInstanceStatus{ | ||
Conditions: []templateapi.TemplateInstanceCondition{ | ||
{ | ||
Type: templateapi.TemplateInstanceReady, | ||
Status: kapi.ConditionFalse, | ||
}, | ||
}, | ||
}, | ||
}, | ||
) | ||
|
||
c := &TemplateInstanceController{ | ||
lister: &fakeLister{fakeTemplateClient}, | ||
templateClient: fakeTemplateClient, | ||
clock: clock, | ||
readinessLimiter: &workqueue.BucketRateLimiter{}, | ||
} | ||
|
||
registry.MustRegister(c) | ||
|
||
h := promhttp.HandlerFor(registry, promhttp.HandlerOpts{ErrorHandling: promhttp.PanicOnError}) | ||
rw := &fakeResponseWriter{header: http.Header{}} | ||
h.ServeHTTP(rw, &http.Request{}) | ||
|
||
if rw.String() != expectedResponse { | ||
t.Error(rw.String()) | ||
// We loop twice: we expect the metrics response to match after the first | ||
// set of sync calls, and not change after the second set. | ||
for i := 0; i < 2; i++ { | ||
for _, key := range []string{"/abouttofail", "/abouttotimeout", "/abouttosucceed"} { | ||
err := c.sync(key) | ||
if err != nil { | ||
t.Fatal(err) | ||
} | ||
} | ||
|
||
rw := &fakeResponseWriter{header: http.Header{}} | ||
h.ServeHTTP(rw, &http.Request{}) | ||
|
||
if rw.String() != expectedResponse { | ||
t.Errorf("run %d: %s\n", i, rw.String()) | ||
} | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I forget, was there a reason that unix epoch time was not used here?
Curious on the req here vs. what we had to the for build's active metric
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
In this case, c.clock will be an instance of RealClock which has as its Now implementation:
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
(it was done this way to provide an abstraction for the clock implementation so tests could plug in their own clock impl)