Skip to content

Commit

Permalink
A new liveness probe for router pod
Browse files Browse the repository at this point in the history
To ultimately prevent bug 1405440, a new, implemnt-independent HTTP-Get
health check is introduced by this PR. This new health check is provided
by the `openshift-router` process itself. For HAProxy-based router, the
health check uses HAProxy's CLI in stats socket for liveness probe, and
`/healthz` for readiness probe.
  • Loading branch information
louyihua committed Feb 16, 2017
1 parent 912c6c4 commit be23bb9
Show file tree
Hide file tree
Showing 18 changed files with 199 additions and 38 deletions.
10 changes: 10 additions & 0 deletions contrib/completions/bash/openshift
Original file line number Diff line number Diff line change
Expand Up @@ -21852,6 +21852,10 @@ _openshift_infra_f5-router()
local_nonpersistent_flags+=("--namespace-labels=")
flags+=("--override-hostname")
local_nonpersistent_flags+=("--override-hostname")
flags+=("--probe-endpoint=")
local_nonpersistent_flags+=("--probe-endpoint=")
flags+=("--probe-timeout=")
local_nonpersistent_flags+=("--probe-timeout=")
flags+=("--project-labels=")
local_nonpersistent_flags+=("--project-labels=")
flags+=("--request-timeout=")
Expand Down Expand Up @@ -22025,6 +22029,12 @@ _openshift_infra_router()
local_nonpersistent_flags+=("--namespace-labels=")
flags+=("--override-hostname")
local_nonpersistent_flags+=("--override-hostname")
flags+=("--probe-endpoint=")
local_nonpersistent_flags+=("--probe-endpoint=")
flags+=("--probe-socket=")
local_nonpersistent_flags+=("--probe-socket=")
flags+=("--probe-timeout=")
local_nonpersistent_flags+=("--probe-timeout=")
flags+=("--project-labels=")
local_nonpersistent_flags+=("--project-labels=")
flags+=("--reload=")
Expand Down
10 changes: 10 additions & 0 deletions contrib/completions/zsh/openshift
Original file line number Diff line number Diff line change
Expand Up @@ -22000,6 +22000,10 @@ _openshift_infra_f5-router()
local_nonpersistent_flags+=("--namespace-labels=")
flags+=("--override-hostname")
local_nonpersistent_flags+=("--override-hostname")
flags+=("--probe-endpoint=")
local_nonpersistent_flags+=("--probe-endpoint=")
flags+=("--probe-timeout=")
local_nonpersistent_flags+=("--probe-timeout=")
flags+=("--project-labels=")
local_nonpersistent_flags+=("--project-labels=")
flags+=("--request-timeout=")
Expand Down Expand Up @@ -22173,6 +22177,12 @@ _openshift_infra_router()
local_nonpersistent_flags+=("--namespace-labels=")
flags+=("--override-hostname")
local_nonpersistent_flags+=("--override-hostname")
flags+=("--probe-endpoint=")
local_nonpersistent_flags+=("--probe-endpoint=")
flags+=("--probe-socket=")
local_nonpersistent_flags+=("--probe-socket=")
flags+=("--probe-timeout=")
local_nonpersistent_flags+=("--probe-timeout=")
flags+=("--project-labels=")
local_nonpersistent_flags+=("--project-labels=")
flags+=("--reload=")
Expand Down
8 changes: 8 additions & 0 deletions docs/man/man1/openshift-infra-f5-router.1
Original file line number Diff line number Diff line change
Expand Up @@ -161,6 +161,14 @@ You may restrict the set of routes exposed to a single project (with \-\-namespa
\fB\-\-override\-hostname\fP=false
Override the spec.host value for a route with \-\-hostname\-template

.PP
\fB\-\-probe\-endpoint\fP="0.0.0.0:1935"
The http endpoint that router listens on for accepting incoming probes

.PP
\fB\-\-probe\-timeout\fP="1s"
The timeout that router waits for underlying implementation to reply a probe

.PP
\fB\-\-project\-labels\fP=""
A label selector to apply to projects to watch; if '*' watches all projects the client can access
Expand Down
12 changes: 12 additions & 0 deletions docs/man/man1/openshift-infra-router.1
Original file line number Diff line number Diff line change
Expand Up @@ -155,6 +155,18 @@ You may restrict the set of routes exposed to a single project (with \-\-namespa
\fB\-\-override\-hostname\fP=false
Override the spec.host value for a route with \-\-hostname\-template

.PP
\fB\-\-probe\-endpoint\fP="0.0.0.0:1935"
The http endpoint that router listens on for accepting incoming probes

.PP
\fB\-\-probe\-socket\fP="/var/lib/haproxy/run/haproxy.sock"
The unix socket for accessing underlying router implementation's CLI.

.PP
\fB\-\-probe\-timeout\fP="1s"
The timeout that router waits for underlying implementation to reply a probe

.PP
\fB\-\-project\-labels\fP=""
A label selector to apply to projects to watch; if '*' watches all projects the client can access
Expand Down
50 changes: 17 additions & 33 deletions pkg/cmd/admin/router/router.go
Original file line number Diff line number Diff line change
Expand Up @@ -235,7 +235,7 @@ const (

// Default stats and healthz port.
defaultStatsPort = 1936
defaultHealthzPort = defaultStatsPort
defaultHealthzPort = 1935
)

// NewCmdRouter implements the OpenShift CLI router command.
Expand Down Expand Up @@ -402,49 +402,33 @@ func generateSecretsConfig(cfg *RouterConfig, namespace string, defaultCert []by
return secrets, volumes, mounts, nil
}

func generateProbeConfigForRouter(cfg *RouterConfig, ports []kapi.ContainerPort) *kapi.Probe {
var probe *kapi.Probe
func generateProbeConfigForRouter(cfg *RouterConfig, ports []kapi.ContainerPort, probePath string, initialDelay int32) *kapi.Probe {
probe := &kapi.Probe{InitialDelaySeconds: initialDelay}

if cfg.Type == "haproxy-router" {
probe = &kapi.Probe{}
healthzPort := defaultHealthzPort
if cfg.StatsPort > 0 {
healthzPort = cfg.StatsPort
}

probe.Handler.HTTPGet = &kapi.HTTPGetAction{
Path: "/healthz",
Port: intstr.IntOrString{
Type: intstr.Int,
IntVal: int32(healthzPort),
},
}
probe.Handler.HTTPGet = &kapi.HTTPGetAction{
Path: probePath,
Port: intstr.IntOrString{
Type: intstr.Int,
IntVal: int32(defaultHealthzPort),
},
}

// Workaround for misconfigured environments where the Node's InternalIP is
// physically present on the Node. In those environments the probes will
// fail unless a host firewall port is opened
if cfg.HostNetwork {
probe.Handler.HTTPGet.Host = "localhost"
}
// Workaround for misconfigured environments where the Node's InternalIP is
// physically present on the Node. In those environments the probes will
// fail unless a host firewall port is opened
if cfg.HostNetwork {
probe.Handler.HTTPGet.Host = "localhost"
}

return probe
}

func generateLivenessProbeConfig(cfg *RouterConfig, ports []kapi.ContainerPort) *kapi.Probe {
probe := generateProbeConfigForRouter(cfg, ports)
if probe != nil {
probe.InitialDelaySeconds = 10
}
return probe
return generateProbeConfigForRouter(cfg, ports, "/alive", 10)
}

func generateReadinessProbeConfig(cfg *RouterConfig, ports []kapi.ContainerPort) *kapi.Probe {
probe := generateProbeConfigForRouter(cfg, ports)
if probe != nil {
probe.InitialDelaySeconds = 10
}
return probe
return generateProbeConfigForRouter(cfg, ports, "/ready", 10)
}

func generateMetricsExporterContainer(cfg *RouterConfig, env app.Environment) *kapi.Container {
Expand Down
2 changes: 1 addition & 1 deletion pkg/cmd/infra/router/f5.go
Original file line number Diff line number Diff line change
Expand Up @@ -225,7 +225,7 @@ func (o *F5RouterOptions) Run() error {

factory := o.RouterSelection.NewFactory(oc, kc)
watchNodes := (len(o.InternalAddress) != 0 && len(o.VxlanGateway) != 0)
controller := factory.Create(plugin, watchNodes, o.EnableIngress)
controller := factory.Create(plugin, watchNodes, o.EnableIngress, o.ProbeEndpoint, o.ProbeTimeout)
controller.Run()

select {}
Expand Down
12 changes: 12 additions & 0 deletions pkg/cmd/infra/router/router.go
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,10 @@ type RouterSelection struct {
DisableNamespaceOwnershipCheck bool

EnableIngress bool

ProbeEndpoint string
ProbeTimeoutStr string
ProbeTimeout time.Duration
}

// Bind sets the appropriate labels
Expand All @@ -73,6 +77,8 @@ func (o *RouterSelection) Bind(flag *pflag.FlagSet) {
flag.BoolVar(&o.AllowWildcardRoutes, "allow-wildcard-routes", cmdutil.Env("ROUTER_ALLOW_WILDCARD_ROUTES", "") == "true", "Allow wildcard host names for routes")
flag.BoolVar(&o.DisableNamespaceOwnershipCheck, "disable-namespace-ownership-check", cmdutil.Env("ROUTER_DISABLE_NAMESPACE_OWNERSHIP_CHECK", "") == "true", "Disables the namespace ownership checks for a route host with different paths or for overlapping host names in the case of wildcard routes. Please be aware that if namespace ownership checks are disabled, routes in a different namespace can use this mechanism to 'steal' sub-paths for existing domains. This is only safe if route creation privileges are restricted, or if all the users can be trusted.")
flag.BoolVar(&o.EnableIngress, "enable-ingress", cmdutil.Env("ROUTER_ENABLE_INGRESS", "") == "true", "Enable configuration via ingress resources")
flag.StringVar(&o.ProbeEndpoint, "probe-endpoint", cmdutil.Env("ROUTER_PROBE_ENDPOINT", "0.0.0.0:1935"), "The http endpoint that router listens on for accepting incoming probes")
flag.StringVar(&o.ProbeTimeoutStr, "probe-timeout", cmdutil.Env("ROUTER_PROBE_TIMEOUT", "1s"), "The timeout that router waits for underlying implementation to reply a probe")
}

// RouteSelectionFunc returns a func that identifies the host for a route.
Expand Down Expand Up @@ -213,6 +219,12 @@ func (o *RouterSelection) Complete() error {
o.BlacklistedDomains = sets.NewString(o.DeniedDomains...)
o.WhitelistedDomains = sets.NewString(o.AllowedDomains...)

if probeTimeout, err := time.ParseDuration(o.ProbeTimeoutStr); err != nil {
o.ProbeTimeout = time.Second
} else {
o.ProbeTimeout = probeTimeout
}

return nil
}

Expand Down
5 changes: 4 additions & 1 deletion pkg/cmd/infra/router/template.go
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,7 @@ type TemplateRouter struct {
RouterService *ktypes.NamespacedName
BindPortsAfterSync bool
MaxConnections string
ProbeSocket string
}

// reloadInterval returns how often to run the router reloads. The interval
Expand Down Expand Up @@ -93,6 +94,7 @@ func (o *TemplateRouter) Bind(flag *pflag.FlagSet) {
flag.BoolVar(&o.ExtendedValidation, "extended-validation", util.Env("EXTENDED_VALIDATION", "true") == "true", "If set, then an additional extended validation step is performed on all routes admitted in by this router. Defaults to true and enables the extended validation checks.")
flag.BoolVar(&o.BindPortsAfterSync, "bind-ports-after-sync", util.Env("ROUTER_BIND_PORTS_AFTER_SYNC", "") == "true", "Bind ports only after route state has been synchronized")
flag.StringVar(&o.MaxConnections, "max-connections", util.Env("ROUTER_MAX_CONNECTIONS", ""), "Specifies the maximum number of concurrent connections.")
flag.StringVar(&o.ProbeSocket, "probe-socket", util.Env("ROUTER_PROBE_SOCKET", "/var/lib/haproxy/run/haproxy.sock"), "The unix socket for accessing underlying router implementation's CLI.")
}

type RouterStats struct {
Expand Down Expand Up @@ -209,6 +211,7 @@ func (o *TemplateRouterOptions) Run() error {
IncludeUDP: o.RouterSelection.IncludeUDP,
AllowWildcardRoutes: o.RouterSelection.AllowWildcardRoutes,
MaxConnections: o.MaxConnections,
ProbeSocket: o.ProbeSocket,
}

oc, kc, err := o.Config.Clients()
Expand All @@ -231,7 +234,7 @@ func (o *TemplateRouterOptions) Run() error {
plugin := controller.NewHostAdmitter(uniqueHostPlugin, o.RouteAdmissionFunc(), o.AllowWildcardRoutes, o.RouterSelection.DisableNamespaceOwnershipCheck, controller.RejectionRecorder(statusPlugin))

factory := o.RouterSelection.NewFactory(oc, kc)
controller := factory.Create(plugin, false, o.EnableIngress)
controller := factory.Create(plugin, false, o.EnableIngress, o.ProbeEndpoint, o.ProbeTimeout)
controller.Run()

proc.StartReaper()
Expand Down
24 changes: 24 additions & 0 deletions pkg/router/controller/controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@ package controller

import (
"fmt"
"net/http"
"strings"
"sync"
"time"

Expand Down Expand Up @@ -63,6 +65,9 @@ type RouterController struct {

EnableIngress bool
IngressTranslator *IngressTranslator

ProbeEndpoint string
ProbeTimeout time.Duration
}

// Run begins watching and syncing.
Expand All @@ -81,6 +86,9 @@ func (c *RouterController) Run() {
go utilwait.Forever(c.HandleIngress, 0)
go utilwait.Forever(c.HandleSecret, 0)
}
if len(c.ProbeEndpoint) > 0 {
go http.ListenAndServe(c.ProbeEndpoint, c)
}
go c.watchForFirstSync()
}

Expand Down Expand Up @@ -321,3 +329,19 @@ func (c *RouterController) processIngressEvents(events []ingressRouteEvents) {
}
}
}

func (c *RouterController) ServeHTTP(resp http.ResponseWriter, req *http.Request) {
var code int
var text []byte

switch strings.ToLower(req.URL.Path) {
case "/alive", "/healthz":
code, text = c.Plugin.HandleProbe(router.LivenessProbe, c.ProbeTimeout)
case "/ready":
code, text = c.Plugin.HandleProbe(router.ReadinessProbe, c.ProbeTimeout)
default:
code, text = http.StatusNotFound, ([]byte)("Invalid probe")
}
resp.WriteHeader(code)
resp.Write(text)
}
9 changes: 7 additions & 2 deletions pkg/router/controller/extended_validator.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ package controller
import (
"fmt"
"reflect"
"time"

"github.com/golang/glog"
kapi "k8s.io/kubernetes/pkg/api"
Expand All @@ -27,7 +28,7 @@ type ExtendedValidator struct {
invalidRoutes map[string]routeapi.Route
}

// ExtendedValidator creates a plugin wrapper that ensures only routes that
// NewExtendedValidator creates a plugin wrapper that ensures only routes that
// pass extended validation are relayed to the next plugin in the chain.
// Recorder is an interface for indicating why a route was rejected.
func NewExtendedValidator(plugin router.Plugin, recorder RejectionRecorder) *ExtendedValidator {
Expand Down Expand Up @@ -73,12 +74,16 @@ func (p *ExtendedValidator) HandleRoute(eventType watch.EventType, route *routea
return p.plugin.HandleRoute(eventType, route)
}

// HandleAllowedNamespaces limits the scope of valid routes to only those that match
// HandleNamespaces limits the scope of valid routes to only those that match
// the provided namespace list.
func (p *ExtendedValidator) HandleNamespaces(namespaces sets.String) error {
return p.plugin.HandleNamespaces(namespaces)
}

func (p *ExtendedValidator) HandleProbe(probe router.ProbeType, timeout time.Duration) (int, []byte) {
return p.plugin.HandleProbe(probe, timeout)
}

func (p *ExtendedValidator) Commit() error {
return p.plugin.Commit()
}
4 changes: 3 additions & 1 deletion pkg/router/controller/factory/factory.go
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ func NewDefaultRouterControllerFactory(oc osclient.RoutesNamespacer, kc kclients

// Create begins listing and watching against the API server for the desired route and endpoint
// resources. It spawns child goroutines that cannot be terminated.
func (factory *RouterControllerFactory) Create(plugin router.Plugin, watchNodes, enableIngress bool) *controller.RouterController {
func (factory *RouterControllerFactory) Create(plugin router.Plugin, watchNodes, enableIngress bool, probeEndpoint string, probeTimeout time.Duration) *controller.RouterController {
routeEventQueue := oscache.NewEventQueue(cache.MetaNamespaceKeyFunc)
cache.NewReflector(&routeLW{
client: factory.OSClient,
Expand Down Expand Up @@ -188,6 +188,8 @@ func (factory *RouterControllerFactory) Create(plugin router.Plugin, watchNodes,
WatchNodes: watchNodes,
EnableIngress: enableIngress,
IngressTranslator: ingressTranslator,
ProbeEndpoint: probeEndpoint,
ProbeTimeout: probeTimeout,
}
}

Expand Down
5 changes: 5 additions & 0 deletions pkg/router/controller/host_admitter.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ package controller

import (
"fmt"
"time"

"github.com/golang/glog"
kapi "k8s.io/kubernetes/pkg/api"
Expand Down Expand Up @@ -153,6 +154,10 @@ func (p *HostAdmitter) HandleNamespaces(namespaces sets.String) error {
return p.plugin.HandleNamespaces(namespaces)
}

func (p *HostAdmitter) HandleProbe(probe router.ProbeType, timeout time.Duration) (int, []byte) {
return p.plugin.HandleProbe(probe, timeout)
}

func (p *HostAdmitter) Commit() error {
return p.plugin.Commit()
}
Expand Down
4 changes: 4 additions & 0 deletions pkg/router/controller/status.go
Original file line number Diff line number Diff line change
Expand Up @@ -333,6 +333,10 @@ func (a *StatusAdmitter) HandleNamespaces(namespaces sets.String) error {
return a.plugin.HandleNamespaces(namespaces)
}

func (a *StatusAdmitter) HandleProbe(probe router.ProbeType, timeout time.Duration) (int, []byte) {
return a.plugin.HandleProbe(probe, timeout)
}

func (a *StatusAdmitter) Commit() error {
return a.plugin.Commit()
}
5 changes: 5 additions & 0 deletions pkg/router/controller/unique_host.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ package controller
import (
"fmt"
"strings"
"time"

"github.com/golang/glog"
kapi "k8s.io/kubernetes/pkg/api"
Expand Down Expand Up @@ -255,6 +256,10 @@ func (p *UniqueHost) HandleNamespaces(namespaces sets.String) error {
return p.plugin.HandleNamespaces(namespaces)
}

func (p *UniqueHost) HandleProbe(probe router.ProbeType, timeout time.Duration) (int, []byte) {
return p.plugin.HandleProbe(probe, timeout)
}

func (p *UniqueHost) Commit() error {
return p.plugin.Commit()
}
Expand Down
Loading

0 comments on commit be23bb9

Please sign in to comment.