Skip to content

Commit

Permalink
Merge pull request #17000 from mfojtik/dc-timeout
Browse files Browse the repository at this point in the history
Automatic merge from submit-queue (batch tested with PRs 17020, 17026, 17000, 17010).

apps: deployment config stuck in the new state should respect timeoutSeconds

Fixes: #16962

With this patch the deployment config controller will set the deployment as failed (timeout) after it reaches timeoutSeconds and the status of the deployment is 'new'. This generally happens when the deployment is not able to create the deployer pod (quota). We should not wait infinitely to have the quota.
  • Loading branch information
openshift-merge-robot authored Oct 25, 2017
2 parents 6cf2dbd + 81ab37e commit 58d89a4
Show file tree
Hide file tree
Showing 5 changed files with 182 additions and 0 deletions.
1 change: 1 addition & 0 deletions pkg/apps/apis/apps/types.go
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,7 @@ const (
DeploymentCancelledNewerDeploymentExists = "newer deployment was found running"
DeploymentFailedUnrelatedDeploymentExists = "unrelated pod with the same name as this deployment is already running"
DeploymentFailedDeployerPodNoLongerExists = "deployer pod no longer exists"
DeploymentFailedUnableToCreateDeployerPod = "unable to create deployer pod"
)

// DeploymentStatus describes the possible states a deployment can be in.
Expand Down
15 changes: 15 additions & 0 deletions pkg/apps/controller/deployer/deployer_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -127,6 +127,21 @@ func (c *DeploymentController) handle(deployment *v1.ReplicationController, will
}
break
}
// In case the deployment is stuck in "new" state because we fail to create
// deployer pod (quota, etc..) we should respect the timeoutSeconds in the
// config strategy and transition the rollout to failed instead of waiting for
// the deployment pod forever.
config, err := deployutil.DecodeDeploymentConfig(deployment, c.codec)
if err != nil {
return err
}
if deployutil.RolloutExceededTimeoutSeconds(config, deployment) {
nextStatus = deployapi.DeploymentStatusFailed
updatedAnnotations[deployapi.DeploymentStatusReasonAnnotation] = deployapi.DeploymentFailedUnableToCreateDeployerPod
c.emitDeploymentEvent(deployment, v1.EventTypeWarning, "RolloutTimeout", fmt.Sprintf("Rollout for %q failed to create deployer pod (timeoutSeconds: %ds)", deployutil.LabelForDeploymentV1(deployment), deployutil.GetTimeoutSecondsForStrategy(config)))
glog.V(4).Infof("Failing deployment %s/%s as we reached timeout while waiting for the deployer pod to be created", deployment.Namespace, deployment.Name)
break
}

switch {
case kerrors.IsNotFound(deployerErr):
Expand Down
16 changes: 16 additions & 0 deletions pkg/apps/controller/deployer/deployer_controller_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -138,6 +138,7 @@ func TestHandle_createPodOk(t *testing.T) {
deployment, _ := deployutil.MakeDeploymentV1(config, codec)
deployment.Annotations[deployapi.DeploymentStatusAnnotation] = string(deployapi.DeploymentStatusNew)
deployment.Spec.Template.Spec.NodeSelector = map[string]string{"labelKey1": "labelValue1", "labelKey2": "labelValue2"}
deployment.CreationTimestamp = metav1.Now()

controller := okDeploymentController(client, nil, nil, true, v1.PodUnknown)

Expand Down Expand Up @@ -227,6 +228,7 @@ func TestHandle_createPodFail(t *testing.T) {
config := deploytest.OkDeploymentConfig(1)
deployment, _ := deployutil.MakeDeploymentV1(config, codec)
deployment.Annotations[deployapi.DeploymentStatusAnnotation] = string(deployapi.DeploymentStatusNew)
deployment.CreationTimestamp = metav1.Now()

controller := okDeploymentController(client, nil, nil, true, v1.PodUnknown)

Expand Down Expand Up @@ -282,6 +284,7 @@ func TestHandle_deployerPodAlreadyExists(t *testing.T) {
config := deploytest.OkDeploymentConfig(1)
deployment, _ := deployutil.MakeDeploymentV1(config, codec)
deployment.Annotations[deployapi.DeploymentStatusAnnotation] = string(deployapi.DeploymentStatusNew)
deployment.CreationTimestamp = metav1.Now()
deployerPodName := deployutil.DeployerPodNameForDeployment(deployment.Name)

client := &fake.Clientset{}
Expand Down Expand Up @@ -321,6 +324,7 @@ func TestHandle_unrelatedPodAlreadyExists(t *testing.T) {

config := deploytest.OkDeploymentConfig(1)
deployment, _ := deployutil.MakeDeploymentV1(config, codec)
deployment.CreationTimestamp = metav1.Now()
deployment.Annotations[deployapi.DeploymentStatusAnnotation] = string(deployapi.DeploymentStatusNew)

client := &fake.Clientset{}
Expand Down Expand Up @@ -362,6 +366,7 @@ func TestHandle_unrelatedPodAlreadyExistsTestScaled(t *testing.T) {
config := deploytest.TestDeploymentConfig(deploytest.OkDeploymentConfig(1))
deployment, _ := deployutil.MakeDeploymentV1(config, codec)
deployment.Annotations[deployapi.DeploymentStatusAnnotation] = string(deployapi.DeploymentStatusNew)
deployment.CreationTimestamp = metav1.Now()
one := int32(1)
deployment.Spec.Replicas = &one

Expand Down Expand Up @@ -433,6 +438,7 @@ func TestHandle_noop(t *testing.T) {

deployment, _ := deployutil.MakeDeploymentV1(deploytest.OkDeploymentConfig(1), codec)
deployment.Annotations[deployapi.DeploymentStatusAnnotation] = string(test.deploymentPhase)
deployment.CreationTimestamp = metav1.Now()

controller := okDeploymentController(client, deployment, nil, true, test.podPhase)

Expand Down Expand Up @@ -476,6 +482,7 @@ func TestHandle_failedTest(t *testing.T) {
// Verify successful cleanup
config := deploytest.TestDeploymentConfig(deploytest.OkDeploymentConfig(1))
deployment, _ := deployutil.MakeDeploymentV1(config, codec)
deployment.CreationTimestamp = metav1.Now()
one := int32(1)
deployment.Spec.Replicas = &one
deployment.Annotations[deployapi.DeploymentStatusAnnotation] = string(deployapi.DeploymentStatusRunning)
Expand Down Expand Up @@ -519,6 +526,7 @@ func TestHandle_cleanupPodOk(t *testing.T) {
config := deploytest.OkDeploymentConfig(1)
deployment, _ := deployutil.MakeDeploymentV1(config, codec)
deployment.Annotations[deployapi.DeploymentStatusAnnotation] = string(deployapi.DeploymentStatusComplete)
deployment.CreationTimestamp = metav1.Now()

controller := okDeploymentController(client, deployment, hookPods, true, v1.PodSucceeded)
hookPods = append(hookPods, deployment.Name)
Expand Down Expand Up @@ -562,6 +570,7 @@ func TestHandle_cleanupPodOkTest(t *testing.T) {
// Verify successful cleanup
config := deploytest.TestDeploymentConfig(deploytest.OkDeploymentConfig(1))
deployment, _ := deployutil.MakeDeploymentV1(config, codec)
deployment.CreationTimestamp = metav1.Now()
one := int32(1)
deployment.Spec.Replicas = &one
deployment.Annotations[deployapi.DeploymentStatusAnnotation] = string(deployapi.DeploymentStatusRunning)
Expand Down Expand Up @@ -606,6 +615,7 @@ func TestHandle_cleanupPodNoop(t *testing.T) {
// Verify no-op
config := deploytest.OkDeploymentConfig(1)
deployment, _ := deployutil.MakeDeploymentV1(config, codec)
deployment.CreationTimestamp = metav1.Now()
deployment.Annotations[deployapi.DeploymentStatusAnnotation] = string(deployapi.DeploymentStatusComplete)

controller := okDeploymentController(client, deployment, nil, true, v1.PodSucceeded)
Expand Down Expand Up @@ -637,6 +647,7 @@ func TestHandle_cleanupPodFail(t *testing.T) {
// Verify error
config := deploytest.OkDeploymentConfig(1)
deployment, _ := deployutil.MakeDeploymentV1(config, codec)
deployment.CreationTimestamp = metav1.Now()
deployment.Annotations[deployapi.DeploymentStatusAnnotation] = string(deployapi.DeploymentStatusComplete)

controller := okDeploymentController(client, deployment, nil, true, v1.PodSucceeded)
Expand Down Expand Up @@ -667,6 +678,7 @@ func TestHandle_cancelNew(t *testing.T) {
})

deployment, _ := deployutil.MakeDeploymentV1(deploytest.OkDeploymentConfig(1), codec)
deployment.CreationTimestamp = metav1.Now()
deployment.Annotations[deployapi.DeploymentStatusAnnotation] = string(deployapi.DeploymentStatusNew)
deployment.Annotations[deployapi.DeploymentCancelledAnnotation] = deployapi.DeploymentCancelledAnnotationValue

Expand All @@ -688,6 +700,7 @@ func TestHandle_cleanupNewWithDeployers(t *testing.T) {
deletedDeployer := false

deployment, _ := deployutil.MakeDeploymentV1(deploytest.OkDeploymentConfig(1), codec)
deployment.CreationTimestamp = metav1.Now()
deployment.Annotations[deployapi.DeploymentStatusAnnotation] = string(deployapi.DeploymentStatusNew)
deployment.Annotations[deployapi.DeploymentCancelledAnnotation] = deployapi.DeploymentCancelledAnnotationValue

Expand Down Expand Up @@ -782,6 +795,7 @@ func TestHandle_cleanupPostNew(t *testing.T) {
})

deployment, _ := deployutil.MakeDeploymentV1(deploytest.OkDeploymentConfig(1), codec)
deployment.CreationTimestamp = metav1.Now()
deployment.Annotations[deployapi.DeploymentCancelledAnnotation] = deployapi.DeploymentCancelledAnnotationValue
deployment.Annotations[deployapi.DeploymentStatusAnnotation] = string(test.deploymentPhase)

Expand Down Expand Up @@ -845,6 +859,7 @@ func TestHandle_deployerPodDisappeared(t *testing.T) {
continue
}
deployment.Annotations[deployapi.DeploymentStatusAnnotation] = string(test.phase)
deployment.CreationTimestamp = metav1.Now()
updatedDeployment = deployment

controller := okDeploymentController(client, nil, nil, true, v1.PodUnknown)
Expand Down Expand Up @@ -980,6 +995,7 @@ func TestHandle_transitionFromDeployer(t *testing.T) {

deployment, _ := deployutil.MakeDeploymentV1(deploytest.OkDeploymentConfig(1), codec)
deployment.Annotations[deployapi.DeploymentStatusAnnotation] = string(test.deploymentPhase)
deployment.CreationTimestamp = metav1.Now()

controller := okDeploymentController(client, deployment, nil, true, test.podPhase)

Expand Down
42 changes: 42 additions & 0 deletions pkg/apps/util/util.go
Original file line number Diff line number Diff line change
Expand Up @@ -652,6 +652,12 @@ func IsTerminatedDeployment(deployment runtime.Object) bool {
return IsCompleteDeployment(deployment) || IsFailedDeployment(deployment)
}

// IsNewDeployment returns true if the passed deployment is in new state.
func IsNewDeployment(deployment runtime.Object) bool {
current := DeploymentStatusFor(deployment)
return current == deployapi.DeploymentStatusNew
}

// IsCompleteDeployment returns true if the passed deployment is in state complete.
func IsCompleteDeployment(deployment runtime.Object) bool {
current := DeploymentStatusFor(deployment)
Expand Down Expand Up @@ -782,6 +788,42 @@ func DeploymentsForCleanup(configuration *deployapi.DeploymentConfig, deployment
return relevantDeployments
}

// GetTimeoutSecondsForStrategy returns the timeout in seconds defined in the
// deployment config strategy.
func GetTimeoutSecondsForStrategy(config *deployapi.DeploymentConfig) int64 {
var timeoutSeconds int64
switch config.Spec.Strategy.Type {
case deployapi.DeploymentStrategyTypeRolling:
timeoutSeconds = deployapi.DefaultRollingTimeoutSeconds
if t := config.Spec.Strategy.RollingParams.TimeoutSeconds; t != nil {
timeoutSeconds = *t
}
case deployapi.DeploymentStrategyTypeRecreate:
timeoutSeconds = deployapi.DefaultRecreateTimeoutSeconds
if t := config.Spec.Strategy.RecreateParams.TimeoutSeconds; t != nil {
timeoutSeconds = *t
}
case deployapi.DeploymentStrategyTypeCustom:
timeoutSeconds = deployapi.DefaultRecreateTimeoutSeconds
}
return timeoutSeconds
}

// RolloutExceededTimeoutSeconds returns true if the current deployment exceeded
// the timeoutSeconds defined for its strategy.
// Note that this is different than activeDeadlineSeconds which is the timeout
// set for the deployer pod. In some cases, the deployer pod cannot be created
// (like quota, etc...). In that case deployer controller use this function to
// measure if the created deployment (RC) exceeded the timeout.
func RolloutExceededTimeoutSeconds(config *deployapi.DeploymentConfig, latestRC *v1.ReplicationController) bool {
timeoutSeconds := GetTimeoutSecondsForStrategy(config)
// If user set the timeoutSeconds to 0, we assume there should be no timeout.
if timeoutSeconds <= 0 {
return false
}
return int64(time.Since(latestRC.CreationTimestamp.Time).Seconds()) > timeoutSeconds
}

// WaitForRunningDeployerPod waits a given period of time until the deployer pod
// for given replication controller is not running.
func WaitForRunningDeployerPod(podClient kcoreclient.PodsGetter, rc *api.ReplicationController, timeout time.Duration) error {
Expand Down
108 changes: 108 additions & 0 deletions pkg/apps/util/util_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -586,3 +586,111 @@ func TestRemoveCondition(t *testing.T) {
}
}
}

func TestRolloutExceededTimeoutSeconds(t *testing.T) {
now := time.Now()
tests := []struct {
name string
config *deployapi.DeploymentConfig
deploymentCreationTime time.Time
expectTimeout bool
}{
// Recreate strategy with deployment running for 20s (exceeding 10s timeout)
{
name: "recreate timeout",
config: func(timeoutSeconds int64) *deployapi.DeploymentConfig {
config := deploytest.OkDeploymentConfig(1)
config.Spec.Strategy.RecreateParams.TimeoutSeconds = &timeoutSeconds
return config
}(int64(10)),
deploymentCreationTime: now.Add(-20 * time.Second),
expectTimeout: true,
},
// Recreate strategy with no timeout
{
name: "recreate no timeout",
config: func(timeoutSeconds int64) *deployapi.DeploymentConfig {
config := deploytest.OkDeploymentConfig(1)
config.Spec.Strategy.RecreateParams.TimeoutSeconds = &timeoutSeconds
return config
}(int64(0)),
deploymentCreationTime: now.Add(-700 * time.Second),
expectTimeout: false,
},

// Rolling strategy with deployment running for 20s (exceeding 10s timeout)
{
name: "rolling timeout",
config: func(timeoutSeconds int64) *deployapi.DeploymentConfig {
config := deploytest.OkDeploymentConfig(1)
config.Spec.Strategy = deploytest.OkRollingStrategy()
config.Spec.Strategy.RollingParams.TimeoutSeconds = &timeoutSeconds
return config
}(int64(10)),
deploymentCreationTime: now.Add(-20 * time.Second),
expectTimeout: true,
},
// Rolling strategy with deployment with no timeout specified.
{
name: "rolling using default timeout",
config: func(timeoutSeconds int64) *deployapi.DeploymentConfig {
config := deploytest.OkDeploymentConfig(1)
config.Spec.Strategy = deploytest.OkRollingStrategy()
config.Spec.Strategy.RollingParams.TimeoutSeconds = nil
return config
}(0),
deploymentCreationTime: now.Add(-20 * time.Second),
expectTimeout: false,
},
// Recreate strategy with deployment with no timeout specified.
{
name: "recreate using default timeout",
config: func(timeoutSeconds int64) *deployapi.DeploymentConfig {
config := deploytest.OkDeploymentConfig(1)
config.Spec.Strategy.RecreateParams.TimeoutSeconds = nil
return config
}(0),
deploymentCreationTime: now.Add(-20 * time.Second),
expectTimeout: false,
},
// Custom strategy with deployment with no timeout specified.
{
name: "custom using default timeout",
config: func(timeoutSeconds int64) *deployapi.DeploymentConfig {
config := deploytest.OkDeploymentConfig(1)
config.Spec.Strategy = deploytest.OkCustomStrategy()
return config
}(0),
deploymentCreationTime: now.Add(-20 * time.Second),
expectTimeout: false,
},
// Custom strategy use default timeout exceeding it.
{
name: "custom using default timeout timing out",
config: func(timeoutSeconds int64) *deployapi.DeploymentConfig {
config := deploytest.OkDeploymentConfig(1)
config.Spec.Strategy = deploytest.OkCustomStrategy()
return config
}(0),
deploymentCreationTime: now.Add(-700 * time.Second),
expectTimeout: true,
},
}

for _, tc := range tests {
config := tc.config
deployment, err := MakeDeploymentV1(config, kapi.Codecs.LegacyCodec(deployv1.SchemeGroupVersion))
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
deployment.ObjectMeta.CreationTimestamp = metav1.Time{Time: tc.deploymentCreationTime}
gotTimeout := RolloutExceededTimeoutSeconds(config, deployment)
if tc.expectTimeout && !gotTimeout {
t.Errorf("[%s]: expected timeout, but got no timeout", tc.name)
}
if !tc.expectTimeout && gotTimeout {
t.Errorf("[%s]: expected no timeout, but got timeout", tc.name)
}

}
}

0 comments on commit 58d89a4

Please sign in to comment.