Skip to content

Commit

Permalink
Merge pull request #14790 from smarterclayton/router_metrics_port
Browse files Browse the repository at this point in the history
Merged by openshift-bot
  • Loading branch information
OpenShift Bot authored Jun 23, 2017
2 parents 75c6d6e + 778c6d4 commit 1b8ade7
Show file tree
Hide file tree
Showing 8 changed files with 164 additions and 30 deletions.
10 changes: 8 additions & 2 deletions images/router/haproxy/conf/haproxy-config.template
Original file line number Diff line number Diff line change
Expand Up @@ -140,10 +140,11 @@ defaults
{{- end }}
{{- end }}

{{ if (gt .StatsPort -1) }}
{{ if (gt .StatsPort 0) }}
listen stats :{{.StatsPort}}
listen stats :{{.StatsPort}}
{{- else }}
listen stats :1936
listen stats :1936
{{- end }}
mode http
# Health check monitoring uri.
Expand All @@ -158,6 +159,7 @@ defaults
stats uri /
stats auth {{.StatsUser}}:{{.StatsPassword}}
{{- end }}
{{- end }}

{{ if .BindPorts -}}
frontend public
Expand All @@ -166,6 +168,10 @@ frontend public
tcp-request inspect-delay 5s
tcp-request content accept if HTTP

{{- if (eq .StatsPort -1) }}
monitor-uri /_______internal_router_healthz
{{- end }}

# check if we need to redirect/force using https.
acl secure_redirect base,map_reg(/var/lib/haproxy/conf/os_route_http_redirect.map) -m found
redirect scheme https if secure_redirect
Expand Down
37 changes: 30 additions & 7 deletions images/router/haproxy/reload-haproxy
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,11 @@ readonly numeric_re='^[0-9]+$'

function haproxyHealthCheck() {
local wait_time=${MAX_RELOAD_WAIT_TIME:-$max_wait_time}
local port=${STATS_PORT:-"1936"}
local port=${ROUTER_SERVICE_HTTP_PORT:-"80"}
local url="http://localhost:${port}"
local retries=0
local start_ts=$(date +"%s")
local proxy_proto="${ROUTER_USE_PROXY_PROTOCOL-}"

if ! [[ $wait_time =~ $numeric_re ]]; then
echo " - Invalid max reload wait time, using default $max_wait_time ..."
Expand All @@ -23,18 +25,39 @@ function haproxyHealthCheck() {

local end_ts=$((start_ts + wait_time))

local proxy_proto="${ROUTER_USE_PROXY_PROTOCOL:-FALSE}"
echo " - Proxy protocol '${proxy_proto}'. Checking HAProxy /healthz on port $port ..."
# test with proxy protocol on
if [[ "${proxy_proto}" == "TRUE" || "${proxy_proto}" == "true" ]]; then
echo " - Proxy protocol on, checking ${url} ..."
while true; do
local statusline=$(echo $'PROXY UNKNOWN\r\nGET / HTTP/1.1\r\n' | socat tcp-connect:localhost:${port} stdio | head -1)

if [[ "$statusline" == *" 503 "* ]]; then
echo " - Health check ok : $retries retry attempt(s)."
return 0
fi

if [ $(date +"%s") -ge $end_ts ]; then
echo " - Exceeded max wait time ($wait_time) in health check - $retries retry attempt(s)."
return 1
fi

sleep 0.5
retries=$((retries + 1))
done
return 0
fi

echo " - Checking ${url} ..."
while true; do
local httpcode=$(curl $timeout_opts -s -o /dev/null -I -w "%{http_code}" http://localhost:${port}/healthz)
local httpcode=$(curl $timeout_opts -s -o /dev/null -I -w "%{http_code}" ${url})

if [ "$httpcode" == "200" ]; then
echo " - HAProxy port $port health check ok : $retries retry attempt(s)."
if [ "$httpcode" == "503" ]; then
echo " - Health check ok : $retries retry attempt(s)."
return 0
fi

if [ $(date +"%s") -ge $end_ts ]; then
echo " - Exceeded max wait time ($wait_time) in HAProxy health check - $retries retry attempt(s)."
echo " - Exceeded max wait time ($wait_time) in health check - $retries retry attempt(s)."
return 1
fi

Expand Down
13 changes: 4 additions & 9 deletions pkg/cmd/admin/router/router.go
Original file line number Diff line number Diff line change
Expand Up @@ -682,13 +682,8 @@ func RunCmdRouter(f *clientcmd.Factory, cmd *cobra.Command, out, errout io.Write
}
// automatically start the internal metrics agent if we are handling a known type
if cfg.Type == "haproxy-router" {
env["ROUTER_LISTEN_ADDR"] = fmt.Sprintf("0.0.0.0:%d", defaultStatsPort-1)
env["ROUTER_LISTEN_ADDR"] = fmt.Sprintf("0.0.0.0:%d", cfg.StatsPort)
env["ROUTER_METRICS_TYPE"] = "haproxy"
ports = append(ports, kapi.ContainerPort{
Name: "router-stats",
ContainerPort: int32(defaultStatsPort - 1),
Protocol: kapi.ProtocolTCP,
})
}
env.Add(secretEnv)
if len(defaultCert) > 0 {
Expand Down Expand Up @@ -803,9 +798,9 @@ func RunCmdRouter(f *clientcmd.Factory, cmd *cobra.Command, out, errout io.Write
t.Annotations = make(map[string]string)
}
t.Annotations["prometheus.io/scrape"] = "true"
t.Annotations["prometheus.io/port"] = "1935"
t.Annotations["prometheus.io/username"] = cfg.StatsUsername
t.Annotations["prometheus.io/password"] = cfg.StatsPassword
t.Annotations["prometheus.io/port"] = "1936"
t.Annotations["prometheus.openshift.io/username"] = cfg.StatsUsername
t.Annotations["prometheus.openshift.io/password"] = cfg.StatsPassword
t.Spec.ClusterIP = clusterIP
for j, servicePort := range t.Spec.Ports {
for _, targetPort := range ports {
Expand Down
2 changes: 1 addition & 1 deletion pkg/cmd/infra/router/router.go
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@ func (o *RouterSelection) Bind(flag *pflag.FlagSet) {
flag.BoolVar(&o.AllowWildcardRoutes, "allow-wildcard-routes", cmdutil.Env("ROUTER_ALLOW_WILDCARD_ROUTES", "") == "true", "Allow wildcard host names for routes")
flag.BoolVar(&o.DisableNamespaceOwnershipCheck, "disable-namespace-ownership-check", cmdutil.Env("ROUTER_DISABLE_NAMESPACE_OWNERSHIP_CHECK", "") == "true", "Disables the namespace ownership checks for a route host with different paths or for overlapping host names in the case of wildcard routes. Please be aware that if namespace ownership checks are disabled, routes in a different namespace can use this mechanism to 'steal' sub-paths for existing domains. This is only safe if route creation privileges are restricted, or if all the users can be trusted.")
flag.BoolVar(&o.EnableIngress, "enable-ingress", cmdutil.Env("ROUTER_ENABLE_INGRESS", "") == "true", "Enable configuration via ingress resources")
flag.StringVar(&o.ListenAddr, "listen-addr", cmdutil.Env("ROUTER_LISTEN_ADDR", ""), "The name of an interface to listen on to expose metrics and health checking. If not specified, will not listen.")
flag.StringVar(&o.ListenAddr, "listen-addr", cmdutil.Env("ROUTER_LISTEN_ADDR", ""), "The name of an interface to listen on to expose metrics and health checking. If not specified, will not listen. Overrides stats port.")
}

// RouteSelectionFunc returns a func that identifies the host for a route.
Expand Down
47 changes: 38 additions & 9 deletions pkg/cmd/infra/router/template.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@ package router
import (
"errors"
"fmt"
"net"
"net/url"
"os"
"strconv"
"strings"
Expand Down Expand Up @@ -116,7 +118,7 @@ type RouterStats struct {
}

func (o *RouterStats) Bind(flag *pflag.FlagSet) {
flag.StringVar(&o.StatsPortString, "stats-port", util.Env("STATS_PORT", ""), "If the underlying router implementation can provide statistics this is a hint to expose it on this port.")
flag.StringVar(&o.StatsPortString, "stats-port", util.Env("STATS_PORT", ""), "If the underlying router implementation can provide statistics this is a hint to expose it on this port. Ignored if listen-addr is specified.")
flag.StringVar(&o.StatsPassword, "stats-password", util.Env("STATS_PASSWORD", ""), "If the underlying router implementation can provide statistics this is the requested password for auth.")
flag.StringVar(&o.StatsUsername, "stats-user", util.Env("STATS_USERNAME", ""), "If the underlying router implementation can provide statistics this is the requested username for auth.")
}
Expand Down Expand Up @@ -179,6 +181,22 @@ func (o *TemplateRouterOptions) Complete() error {
}
o.StatsPort = statsPort
}
if len(o.ListenAddr) > 0 {
_, port, err := net.SplitHostPort(o.ListenAddr)
if err != nil {
return fmt.Errorf("listen-addr is not valid: %v", err)
}
// stats port on listen-addr overrides stats port argument
statsPort, err := strconv.Atoi(port)
if err != nil {
return fmt.Errorf("listen-addr port is not valid: %v", err)
}
o.StatsPort = statsPort
} else {
if o.StatsPort != 0 {
o.ListenAddr = fmt.Sprintf("0.0.0.0:%d", o.StatsPort)
}
}

if nsecs := int(o.ReloadInterval.Seconds()); nsecs < 1 {
return fmt.Errorf("invalid reload interval: %v - must be a positive duration", nsecs)
Expand Down Expand Up @@ -222,9 +240,10 @@ func (o *TemplateRouterOptions) Validate() error {

// Run launches a template router using the provided options. It never exits.
func (o *TemplateRouterOptions) Run() error {
var reloadCallbacks []func()
statsPort := o.StatsPort

switch {
case o.MetricsType == "haproxy" && len(o.ListenAddr) > 0:
case o.MetricsType == "haproxy":
if len(o.StatsUsername) == 0 || len(o.StatsPassword) == 0 {
glog.Warningf("Metrics were requested but no username or password has been provided - the metrics endpoint will not be accessible to prevent accidental security breaches")
}
Expand Down Expand Up @@ -266,6 +285,7 @@ func (o *TemplateRouterOptions) Run() error {
exported = append(exported, i)
}
}

_, err := haproxy.NewPrometheusCollector(haproxy.PrometheusOptions{
// Only template router customizers who alter the image should need this
ScrapeURI: util.Env("ROUTER_METRICS_HAPROXY_SCRAPE_URI", ""),
Expand All @@ -279,23 +299,32 @@ func (o *TemplateRouterOptions) Run() error {
if err != nil {
return err
}
//reloadCallbacks = append(reloadCallbacks, e.CollectNow)
}
if len(o.ListenAddr) > 0 {
metrics.Listen(o.ListenAddr, o.StatsUsername, o.StatsPassword)

// Metrics will handle healthz on the stats port, and instruct the template router to disable stats completely.
// The underlying router must provide a custom health check if customized which will be called into.
statsPort = -1
httpURL := util.Env("ROUTER_METRICS_READY_HTTP_URL", fmt.Sprintf("http://%s:%s/_______internal_router_healthz", "localhost", util.Env("ROUTER_SERVICE_HTTP_PORT", "80")))
u, err := url.Parse(httpURL)
if err != nil {
return fmt.Errorf("ROUTER_METRICS_READY_HTTP_URL must be a valid URL or empty: %v", err)
}
check := metrics.HTTPBackendAvailable(u)
if useProxy := util.Env("ROUTER_USE_PROXY_PROTOCOL", ""); useProxy == "true" || useProxy == "TRUE" {
check = metrics.ProxyProtocolHTTPBackendAvailable(u)
}
metrics.Listen(o.ListenAddr, o.StatsUsername, o.StatsPassword, check)
}

pluginCfg := templateplugin.TemplatePluginConfig{
WorkingDir: o.WorkingDir,
TemplatePath: o.TemplateFile,
ReloadScriptPath: o.ReloadScript,
ReloadInterval: o.ReloadInterval,
ReloadCallbacks: reloadCallbacks,
DefaultCertificate: o.DefaultCertificate,
DefaultCertificatePath: o.DefaultCertificatePath,
DefaultCertificateDir: o.DefaultCertificateDir,
DefaultDestinationCAPath: o.DefaultDestinationCAPath,
StatsPort: o.StatsPort,
StatsPort: statsPort,
StatsUsername: o.StatsUsername,
StatsPassword: o.StatsPassword,
PeerService: o.RouterService,
Expand Down
77 changes: 77 additions & 0 deletions pkg/router/metrics/health.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
package metrics

import (
"bufio"
"fmt"
"io"
"io/ioutil"
"net"
"net/http"
"net/url"
"time"

"github.com/golang/glog"

"k8s.io/apiserver/pkg/server/healthz"
"k8s.io/kubernetes/pkg/probe"
probehttp "k8s.io/kubernetes/pkg/probe/http"
)

var errBackend = fmt.Errorf("backend reported failure")

// HTTPBackendAvailable returns a healthz check that verifies a backend responds to a GET to
// the provided URL with 2xx or 3xx response.
func HTTPBackendAvailable(u *url.URL) healthz.HealthzChecker {
p := probehttp.New()
return healthz.NamedCheck("backend-http", func(r *http.Request) error {
result, _, err := p.Probe(u, nil, 2*time.Second)
if err != nil {
return err
}
if result != probe.Success {
return errBackend
}
return nil
})
}

// ProxyProtocolHTTPBackendAvailable returns a healthz check that verifies a backend supporting
// the HAProxy PROXY protocol responds to a GET to the provided URL with 2xx or 3xx response.
func ProxyProtocolHTTPBackendAvailable(u *url.URL) healthz.HealthzChecker {
dialer := &net.Dialer{
Timeout: 2 * time.Second,
DualStack: true,
}
return healthz.NamedCheck("backend-proxy-http", func(r *http.Request) error {
conn, err := dialer.Dial("tcp", u.Host)
if err != nil {
return err
}
conn.SetDeadline(time.Now().Add(2 * time.Second))
br := bufio.NewReader(conn)
if _, err := conn.Write([]byte("PROXY UNKNOWN\r\n")); err != nil {
return err
}
req := &http.Request{Method: "GET", URL: u, Proto: "HTTP/1.1", ProtoMajor: 1, ProtoMinor: 1}
if err := req.Write(conn); err != nil {
return err
}
res, err := http.ReadResponse(br, req)
if err != nil {
return err
}

// read full body
defer res.Body.Close()
if _, err := io.Copy(ioutil.Discard, res.Body); err != nil {
glog.V(4).Infof("Error discarding probe body contents: %v", err)
}

if res.StatusCode < http.StatusOK && res.StatusCode >= http.StatusBadRequest {
glog.V(4).Infof("Probe failed for %s, Response: %v", u.String(), res)
return errBackend
}
glog.V(4).Infof("Probe succeeded for %s, Response: %v", u.String(), res)
return nil
})
}
4 changes: 2 additions & 2 deletions pkg/router/metrics/metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,10 +13,10 @@ import (
// Listen starts a server for health, metrics, and profiling on the provided listen port.
// It will terminate the process if the server fails. Metrics and profiling are only exposed
// if username and password are provided and the user's input matches.
func Listen(listenAddr string, username, password string) {
func Listen(listenAddr string, username, password string, checks ...healthz.HealthzChecker) {
go func() {
mux := http.NewServeMux()
healthz.InstallHandler(mux)
healthz.InstallHandler(mux, checks...)

// TODO: exclude etcd and other unused metrics

Expand Down
4 changes: 4 additions & 0 deletions test/integration/router_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1673,6 +1673,10 @@ func TestRouterBindsPortsAfterSync(t *testing.T) {
err := wait.Poll(time.Millisecond*100, time.Duration(reloadInterval)*2*time.Second, func() (bool, error) {
_, err := getRoute(routeAddress, routeAddress, scheme, nil, "")
lastErr = nil

if err != nil && strings.Contains(err.Error(), "connection refused") {
err = ErrUnavailable
}
switch err {
case ErrUnavailable:
return true, nil
Expand Down

0 comments on commit 1b8ade7

Please sign in to comment.