Skip to content

Commit

Permalink
Limit some high cardinality metrics by default
Browse files Browse the repository at this point in the history
  • Loading branch information
smarterclayton committed Dec 11, 2017
1 parent db29c1c commit 3287329
Show file tree
Hide file tree
Showing 4 changed files with 143 additions and 257 deletions.
60 changes: 39 additions & 21 deletions examples/prometheus/prometheus.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ parameters:
name: SESSION_SECRET
generate: expression
from: "[a-zA-Z0-9]{43}"

objects:
# Authorize the prometheus service account to read data about the cluster
- apiVersion: v1
Expand Down Expand Up @@ -255,18 +256,20 @@ objects:
miqTarget: "ContainerNode"
severity: "HIGH"
message: "{{$labels.instance}} is down"
recording.rules: |
groups:
- name: aggregate_container_resources
rules:
- record: container_cpu_usage_rate
expr: sum without (cpu) (rate(container_cpu_usage_seconds_total[3m]))
expr: sum without (cpu) (rate(container_cpu_usage_seconds_total[5m]))
- record: container_memory_rss_by_type
expr: container_memory_rss{id=~"/|/system.slice|/kubepods.slice"} > 0
- record: container_cpu_usage_percent_by_host
expr: sum by (hostname,type)(rate(container_cpu_usage_seconds_total{id="/"}[3m])) / on (hostname,type) machine_cpu_cores
- record: apiserver_request_count_by_resources
expr: sum by (hostname,type)(rate(container_cpu_usage_seconds_total{id="/"}[5m])) / on (hostname,type) machine_cpu_cores
- record: apiserver_request_count_rate_by_resources
expr: sum without (client,instance,contentType) (rate(apiserver_request_count[5m]))
prometheus.yml: |
rule_files:
- '*.rules'
Expand Down Expand Up @@ -304,24 +307,6 @@ objects:
action: keep
regex: default;kubernetes;https
# Scrape config for nodes.
#
# Each node exposes a /metrics endpoint that contains operational metrics for
# the Kubelet and other components.
- job_name: 'kubernetes-nodes'
scheme: https
tls_config:
ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
kubernetes_sd_configs:
- role: node
relabel_configs:
- action: labelmap
regex: __meta_kubernetes_node_label_(.+)
# Scrape config for controllers.
#
# Each master node exposes a /metrics endpoint on :8444 that contains operational metrics for
Expand Down Expand Up @@ -352,6 +337,31 @@ objects:
regex: (.+)(?::\d+)
replacement: $1:8444
# Scrape config for nodes.
#
# Each node exposes a /metrics endpoint that contains operational metrics for
# the Kubelet and other components.
- job_name: 'kubernetes-nodes'
scheme: https
tls_config:
ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
kubernetes_sd_configs:
- role: node
# Drop a very high cardinality metric that is incorrect in 3.7. It will be
# fixed in 3.9.
metric_relabel_configs:
- source_labels: [__name__]
action: drop
regex: 'openshift_sdn_pod_(setup|teardown)_latency(.*)'
relabel_configs:
- action: labelmap
regex: __meta_kubernetes_node_label_(.+)
# Scrape config for cAdvisor.
#
# Beginning in Kube 1.7, each node exposes a /metrics/cadvisor endpoint that
Expand All @@ -368,6 +378,14 @@ objects:
kubernetes_sd_configs:
- role: node
# Exclude a set of high cardinality metrics that can contribute to significant
# memory use in large clusters. These can be selectively enabled as necessary
# for medium or small clusters.
metric_relabel_configs:
- source_labels: [__name__]
action: drop
regex: 'container_(cpu_user_seconds_total|cpu_cfs_periods_total|memory_usage_bytes|memory_swap|memory_working_set_bytes|memory_cache|last_seen|fs_(read_seconds_total|write_seconds_total|sector_(.*)|io_(.*)|reads_merged_total|writes_merged_total)|tasks_state|memory_failcnt|memory_failures_total|spec_memory_swap_limit_bytes|fs_(.*)_bytes_total|spec_(.*))'
relabel_configs:
- action: labelmap
regex: __meta_kubernetes_node_label_(.+)
Expand Down
4 changes: 4 additions & 0 deletions hack/update-generated-bindata.sh
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,8 @@ pushd "${OS_ROOT}" > /dev/null
-ignore ".*\.go$" \
-ignore "\.DS_Store" \
-ignore application-template.json \
-ignore "prometheus-standalone.yaml" \
-ignore "node-exporter.yaml" \
examples/image-streams/... \
examples/db-templates/... \
examples/jenkins \
Expand All @@ -44,6 +46,8 @@ pushd "${OS_ROOT}" > /dev/null
-ignore "OWNERS" \
-ignore "\.DS_Store" \
-ignore ".*\.(go|md)$" \
-ignore "prometheus-standalone.yaml" \
-ignore "node-exporter.yaml" \
test/extended/testdata/... \
test/integration/testdata \
examples/db-templates \
Expand Down
168 changes: 50 additions & 118 deletions pkg/oc/bootstrap/bindata.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading

0 comments on commit 3287329

Please sign in to comment.