Skip to content

Commit

Permalink
Merge pull request #20297 from wozniakjan/bug-1596440/surface_oomkill…
Browse files Browse the repository at this point in the history
…ed_in_build

Bug 1596440 - surface OOMKilled pod to build
  • Loading branch information
openshift-merge-robot authored Jul 19, 2018
2 parents 656dd2f + 01eab5b commit 6b0c540
Show file tree
Hide file tree
Showing 6 changed files with 115 additions and 2 deletions.
4 changes: 4 additions & 0 deletions pkg/build/apis/build/types.go
Original file line number Diff line number Diff line change
Expand Up @@ -514,6 +514,9 @@ const (
// range of build failures.
StatusReasonGenericBuildFailed StatusReason = "GenericBuildFailed"

// StatusReasonOutOfMemoryKilled indicates that the build pod was killed for its memory consumption
StatusReasonOutOfMemoryKilled StatusReason = "OutOfMemoryKilled"

// StatusCannotRetrieveServiceAccount is the reason associated with a failure
// to look up the service account associated with the BuildConfig.
StatusReasonCannotRetrieveServiceAccount StatusReason = "CannotRetrieveServiceAccount"
Expand All @@ -540,6 +543,7 @@ const (
StatusMessageNoBuildContainerStatus = "The pod for this build has no container statuses indicating success or failure."
StatusMessageFailedContainer = "The pod for this build has at least one container with a non-zero exit status."
StatusMessageGenericBuildFailed = "Generic Build failure - check logs for details."
StatusMessageOutOfMemoryKilled = "The build pod was killed due to an out of memory condition."
StatusMessageUnresolvableEnvironmentVariable = "Unable to resolve build environment variable reference."
StatusMessageCannotRetrieveServiceAccount = "Unable to look up the service account secrets for this build."
)
Expand Down
26 changes: 25 additions & 1 deletion pkg/build/controller/build/build_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -1001,7 +1001,9 @@ func (bc *BuildController) handleActiveBuild(build *buildapi.Build, pod *v1.Pod)
}
}
case v1.PodFailed:
if build.Status.Phase != buildapi.BuildPhaseFailed {
if isOOMKilled(pod) {
update = transitionToPhase(buildapi.BuildPhaseFailed, buildapi.StatusReasonOutOfMemoryKilled, buildapi.StatusMessageOutOfMemoryKilled)
} else if build.Status.Phase != buildapi.BuildPhaseFailed {
// If a DeletionTimestamp has been set, it means that the pod will
// soon be deleted. The build should be transitioned to the Error phase.
if pod.DeletionTimestamp != nil {
Expand All @@ -1014,11 +1016,33 @@ func (bc *BuildController) handleActiveBuild(build *buildapi.Build, pod *v1.Pod)
return update, nil
}

func isOOMKilled(pod *v1.Pod) bool {
if pod.Status.Reason == "OOMKilled" {
return true
}
for _, c := range pod.Status.InitContainerStatuses {
terminated := c.State.Terminated
if terminated != nil && terminated.Reason == "OOMKilled" {
return true
}
}
for _, c := range pod.Status.ContainerStatuses {
terminated := c.State.Terminated
if terminated != nil && terminated.Reason == "OOMKilled" {
return true
}
}
return false
}

// handleCompletedBuild will only be called on builds that are already in a terminal phase. It is used to setup the
// completion timestamp and failure logsnippet as needed.
func (bc *BuildController) handleCompletedBuild(build *buildapi.Build, pod *v1.Pod) (*buildUpdate, error) {

update := &buildUpdate{}
if isOOMKilled(pod) {
update = transitionToPhase(buildapi.BuildPhaseFailed, buildapi.StatusReasonOutOfMemoryKilled, buildapi.StatusMessageOutOfMemoryKilled)
}
setBuildCompletionData(build, pod, update)

return update, nil
Expand Down
6 changes: 5 additions & 1 deletion pkg/build/registry/build/strategy.go
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,11 @@ func (strategy) PrepareForUpdate(ctx context.Context, obj, old runtime.Object) {
// of the reason and message. This is to prevent the build controller from
// overwriting the reason and message that was set by the builder pod
// when it updated the build's details.
if oldBuild.Status.Phase == buildapi.BuildPhaseFailed {
// Only allow OOMKilled override because various processes in a container
// can get OOMKilled and this confuses builder to prematurely populate
// failure reason
if oldBuild.Status.Phase == buildapi.BuildPhaseFailed &&
newBuild.Status.Reason != buildapi.StatusReasonOutOfMemoryKilled {
newBuild.Status.Reason = oldBuild.Status.Reason
newBuild.Status.Message = oldBuild.Status.Message
}
Expand Down
27 changes: 27 additions & 0 deletions test/extended/builds/failure_status.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ var _ = g.Describe("[Feature:Builds][Slow] update failure status", func() {
fetchDockerSrc = exutil.FixturePath("testdata", "builds", "statusfail-fetchsourcedocker.yaml")
fetchS2ISrc = exutil.FixturePath("testdata", "builds", "statusfail-fetchsources2i.yaml")
badContextDirS2ISrc = exutil.FixturePath("testdata", "builds", "statusfail-badcontextdirs2i.yaml")
oomkilled = exutil.FixturePath("testdata", "builds", "statusfail-oomkilled.yaml")
builderImageFixture = exutil.FixturePath("testdata", "builds", "statusfail-fetchbuilderimage.yaml")
pushToRegistryFixture = exutil.FixturePath("testdata", "builds", "statusfail-pushtoregistry.yaml")
failedAssembleFixture = exutil.FixturePath("testdata", "builds", "statusfail-failedassemble.yaml")
Expand Down Expand Up @@ -130,6 +131,32 @@ var _ = g.Describe("[Feature:Builds][Slow] update failure status", func() {
})
})

g.Describe("Build status OutOfMemoryKilled", func() {
g.It("should contain OutOfMemoryKilled failure reason and message", func() {
err := oc.Run("create").Args("-f", oomkilled).Execute()
o.Expect(err).NotTo(o.HaveOccurred())

br, err := exutil.StartBuildAndWait(oc, "statusfail-oomkilled", "--build-loglevel=5")
o.Expect(err).NotTo(o.HaveOccurred())
br.AssertFailure()
br.DumpLogs()

var build *buildapi.Build
wait.PollImmediate(200*time.Millisecond, 30*time.Second, func() (bool, error) {
build, err = oc.BuildClient().Build().Builds(oc.Namespace()).Get(br.Build.Name, metav1.GetOptions{})
if build.Status.Reason != buildapi.StatusReasonOutOfMemoryKilled {
return false, nil
}
return true, nil
})
o.Expect(err).NotTo(o.HaveOccurred())
o.Expect(build.Status.Reason).To(o.Equal(buildapi.StatusReasonOutOfMemoryKilled))
o.Expect(build.Status.Message).To(o.Equal(buildapi.StatusMessageOutOfMemoryKilled))

exutil.CheckForBuildEvent(oc.KubeClient().Core(), br.Build, buildapi.BuildFailedEventReason, buildapi.BuildFailedEventMessage)
})
})

g.Describe("Build status S2I bad context dir failure", func() {
g.It("should contain the S2I bad context dir failure reason and message", func() {
err := oc.Run("create").Args("-f", badContextDirS2ISrc).Execute()
Expand Down
37 changes: 37 additions & 0 deletions test/extended/testdata/bindata.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

17 changes: 17 additions & 0 deletions test/extended/testdata/builds/statusfail-oomkilled.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
kind: BuildConfig
apiVersion: v1
metadata:
name: statusfail-oomkilled
spec:
resources:
limits:
memory: 10Mi
source:
git:
uri: "https://github.com/openshift/ruby-hello-world"
strategy:
type: Source
sourceStrategy:
from:
kind: DockerImage
name: centos/ruby-23-centos7:latest

0 comments on commit 6b0c540

Please sign in to comment.