Skip to content

Commit

Permalink
KEP-4603: Node specific kubelet config for maximum backoff down to 1 …
Browse files Browse the repository at this point in the history
…second (kubernetes#128374)

* Add feature gate, API, and conflict validation tests for enablecrashloopbackoffmax

Signed-off-by: Laura Lorenz <[email protected]>

* Handle when current base is longer than node max

Signed-off-by: Laura Lorenz <[email protected]>

* Update pkg/features/kube_features.go

Co-authored-by: Tsubasa Nagasawa <[email protected]>

* Fix indentation

Signed-off-by: Laura Lorenz <[email protected]>

* Follow convention for success test

Signed-off-by: Laura Lorenz <[email protected]>

* Normalize casing, and change field to Duration

Signed-off-by: Laura Lorenz <[email protected]>

* Fix json name and some other casing errors

Signed-off-by: Laura Lorenz <[email protected]>

* Another one I missed before

Signed-off-by: Laura Lorenz <[email protected]>

* Don't clobber global max function

Signed-off-by: Laura Lorenz <[email protected]>

* Change to flat value in defaults.go

Signed-off-by: Laura Lorenz <[email protected]>

* Streamline validation and defaults

Signed-off-by: Laura Lorenz <[email protected]>

* Fix typecheck

Signed-off-by: Laura Lorenz <[email protected]>

* Lint

Signed-off-by: Laura Lorenz <[email protected]>

* Tighten up validation for subsecond values

Signed-off-by: Laura Lorenz <[email protected]>

* Rename field from MaxBackOffPeriod to MaxContainerRestartPeriod

Signed-off-by: Laura Lorenz <[email protected]>

* A few missed references to renames

Signed-off-by: Laura Lorenz <[email protected]>

* Only compare flags in flags test

Signed-off-by: Laura Lorenz <[email protected]>

* Don't mess with SetDefault signature

Nobody messes with SetDefault signature

Signed-off-by: Laura Lorenz <[email protected]>

* Fix stale signature change, and update test data

Signed-off-by: Laura Lorenz <[email protected]>

* Inspect current feature gates at defaulting time

Signed-off-by: Laura Lorenz <[email protected]>

* Don't use the global feature gate for temp usage

Signed-off-by: Laura Lorenz <[email protected]>

* Expose default error, and some comments

Signed-off-by: Laura Lorenz <[email protected]>

* Hint fuzzer for less arbitrary values to FeatureGates

Signed-off-by: Laura Lorenz <[email protected]>

---------

Signed-off-by: Laura Lorenz <[email protected]>
Co-authored-by: Tsubasa Nagasawa <[email protected]>
  • Loading branch information
lauralorenz and toVersus authored Nov 9, 2024
1 parent 591c75e commit 7fe41da
Show file tree
Hide file tree
Showing 20 changed files with 664 additions and 276 deletions.
4 changes: 2 additions & 2 deletions cmd/kubelet/app/options/options_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -100,8 +100,8 @@ func TestRoundTrip(t *testing.T) {
}
continue
}
if !reflect.DeepEqual(modifiedFlags, outputFlags) {
t.Errorf("%s: flags did not round trip: %s", testCase.name, cmp.Diff(modifiedFlags, outputFlags))
if !reflect.DeepEqual(modifiedFlags.KubeletFlags, outputFlags.KubeletFlags) {
t.Errorf("%s: flags did not round trip: %s", testCase.name, cmp.Diff(modifiedFlags.KubeletFlags, outputFlags.KubeletFlags))
continue
}
}
Expand Down
1 change: 1 addition & 0 deletions cmd/kubelet/app/server.go
Original file line number Diff line number Diff line change
Expand Up @@ -367,6 +367,7 @@ func mergeKubeletConfigurations(kubeletConfig *kubeletconfiginternal.KubeletConf
}
// apply defaulting after decoding
kubeletconfigv1beta1conversion.SetDefaults_KubeletConfiguration(versionedConfig)

// convert back to internal config
if err := kubeletconfigv1beta1conversion.Convert_v1beta1_KubeletConfiguration_To_config_KubeletConfiguration(versionedConfig, kubeletConfig, nil); err != nil {
return fmt.Errorf("failed to convert merged config to internal kubelet configuration: %w", err)
Expand Down
9 changes: 9 additions & 0 deletions pkg/features/kube_features.go
Original file line number Diff line number Diff line change
Expand Up @@ -232,6 +232,15 @@ const (
// status from DRA drivers.
DRAResourceClaimDeviceStatus featuregate.Feature = "DRAResourceClaimDeviceStatus"

// owner: @lauralorenz
// kep: https://kep.k8s.io/4603
// owner: @lauralorenz
// kep: https://kep.k8s.io/4603
//
// Enables support for configurable per-node backoff maximums for restarting
// containers (aka containers in CrashLoopBackOff)
KubeletCrashLoopBackOffMax featuregate.Feature = "KubeletCrashLoopBackOffMax"

// owner: @harche
// kep: http://kep.k8s.io/3386
//
Expand Down
4 changes: 4 additions & 0 deletions pkg/features/versioned_kube_features.go
Original file line number Diff line number Diff line change
Expand Up @@ -187,6 +187,10 @@ var defaultVersionedKubernetesFeatureGates = map[featuregate.Feature]featuregate
{Version: version.MustParse("1.32"), Default: false, PreRelease: featuregate.Alpha},
},

KubeletCrashLoopBackOffMax: {
{Version: version.MustParse("1.32"), Default: false, PreRelease: featuregate.Alpha},
},

ElasticIndexedJob: {
{Version: version.MustParse("1.27"), Default: true, PreRelease: featuregate.Beta},
{Version: version.MustParse("1.31"), Default: true, PreRelease: featuregate.GA, LockToDefault: true}, // GA in 1.31, remove in 1.32
Expand Down
30 changes: 29 additions & 1 deletion pkg/generated/openapi/zz_generated.openapi.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 4 additions & 0 deletions pkg/kubelet/apis/config/fuzzer/fuzzer.go
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,10 @@ func Funcs(codecs runtimeserializer.CodecFactory) []interface{} {
obj.EnableSystemLogHandler = true
obj.MemoryThrottlingFactor = ptr.To(rand.Float64())
obj.LocalStorageCapacityIsolation = true
obj.FeatureGates = map[string]bool{
"AllAlpha": false,
"AllBeta": true,
}
},
}
}
1 change: 1 addition & 0 deletions pkg/kubelet/apis/config/helpers_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -302,5 +302,6 @@ var (
"Tracing.SamplingRatePerMillion",
"LocalStorageCapacityIsolation",
"FailCgroupV1",
"CrashLoopBackOff.MaxContainerRestartPeriod",
)
)
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ cpuCFSQuota: true
cpuCFSQuotaPeriod: 100ms
cpuManagerPolicy: none
cpuManagerReconcilePeriod: 10s
crashLoopBackOff: {}
enableControllerAttachDetach: true
enableDebugFlagsHandler: true
enableDebuggingHandlers: true
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ cpuCFSQuota: true
cpuCFSQuotaPeriod: 100ms
cpuManagerPolicy: none
cpuManagerReconcilePeriod: 10s
crashLoopBackOff: {}
enableControllerAttachDetach: true
enableDebugFlagsHandler: true
enableDebuggingHandlers: true
Expand Down
16 changes: 16 additions & 0 deletions pkg/kubelet/apis/config/types.go
Original file line number Diff line number Diff line change
Expand Up @@ -506,6 +506,12 @@ type KubeletConfiguration struct {
// option is explicitly enabled.
// +optional
FailCgroupV1 bool

// CrashLoopBackOff contains config to modify node-level parameters for
// container restart behavior
// +featureGate=KubeletCrashLoopBackoffMax
// +optional
CrashLoopBackOff CrashLoopBackOffConfig
}

// KubeletAuthorizationMode denotes the authorization mode for the kubelet
Expand Down Expand Up @@ -684,3 +690,13 @@ type MemorySwapConfiguration struct {
// +optional
SwapBehavior string
}

// CrashLoopBackOffConfig is used for setting configuration for this kubelet's
// container restart behavior
type CrashLoopBackOffConfig struct {
// MaxContainerRestartPeriod is the maximum duration the backoff delay can accrue
// to for container restarts, minimum 1 second, maximum 300 seconds.
// +featureGate=KubeletCrashLoopBackOffMax
// +optional
MaxContainerRestartPeriod *metav1.Duration
}
24 changes: 24 additions & 0 deletions pkg/kubelet/apis/config/v1beta1/defaults.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,15 +17,18 @@ limitations under the License.
package v1beta1

import (
"fmt"
"time"

metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
kruntime "k8s.io/apimachinery/pkg/runtime"
kubeletconfigv1beta1 "k8s.io/kubelet/config/v1beta1"

// TODO: Cut references to k8s.io/kubernetes, eventually there should be none from this package
utilfeature "k8s.io/apiserver/pkg/util/feature"
logsapi "k8s.io/component-base/logs/api/v1"
"k8s.io/kubernetes/pkg/cluster/ports"
"k8s.io/kubernetes/pkg/features"
"k8s.io/kubernetes/pkg/kubelet/qos"
kubetypes "k8s.io/kubernetes/pkg/kubelet/types"
"k8s.io/utils/ptr"
Expand All @@ -39,6 +42,8 @@ const (
DefaultPodLogsDir = "/var/log/pods"
// See https://github.com/kubernetes/enhancements/tree/master/keps/sig-node/2570-memory-qos
DefaultMemoryThrottlingFactor = 0.9
// MaxContainerBackOff is the max backoff period for container restarts, exported for the e2e test
MaxContainerBackOff = 300 * time.Second
)

var (
Expand All @@ -53,6 +58,19 @@ func addDefaultingFuncs(scheme *kruntime.Scheme) error {
}

func SetDefaults_KubeletConfiguration(obj *kubeletconfigv1beta1.KubeletConfiguration) {

// TODO(lauralorenz): Reasses conditional feature gating on defaults. Here
// we 1) copy the gates to a local var, unilaterally merge it with the gate
// config while being defaulted. Alternatively we could unilaterally set the
// default value, later check the gate and wipe it if needed, like API
// strategy does for gate-disabled fields. Meanwhile, KubeletConfiguration
// is increasingly dynamic and the configured gates may change depending on
// when this is called. See also validation.go.
localFeatureGate := utilfeature.DefaultMutableFeatureGate.DeepCopy()
if err := localFeatureGate.SetFromMap(obj.FeatureGates); err != nil {
panic(fmt.Sprintf("failed to merge global and in-flight KubeletConfiguration while setting defaults, error: %v", err))
}

if obj.EnableServer == nil {
obj.EnableServer = ptr.To(true)
}
Expand Down Expand Up @@ -286,4 +304,10 @@ func SetDefaults_KubeletConfiguration(obj *kubeletconfigv1beta1.KubeletConfigura
if obj.PodLogsDir == "" {
obj.PodLogsDir = DefaultPodLogsDir
}

if localFeatureGate.Enabled(features.KubeletCrashLoopBackOffMax) {
if obj.CrashLoopBackOff.MaxContainerRestartPeriod == nil {
obj.CrashLoopBackOff.MaxContainerRestartPeriod = &metav1.Duration{Duration: MaxContainerBackOff}
}
}
}
Loading

0 comments on commit 7fe41da

Please sign in to comment.