Alright, so I put together a very rough lab setup of reproducing this, happy to clean things up and change things around later but this is just for illustration
The ideal solution would be to add an attribute to a step that has the ability to fail the rest of the pipeline forcefully, in this case I just called it fail_all
(boolean); it could make sense as fail_all_on_failure
but that can be changed easily enough
Ideally I would put it on each of the parallel steps, but I only put it on the one step that is quick to fail
Desired Result
When several simultaneous parallel steps run, fail all steps right away when one step fails so that we are not wasting resources waiting for a pipeline to fail when it should fail fast and immediately
In our current pipeline we have 5 special Build Runner images with Docker Compose / Docker clients that talk to a Docker in Docker instance and then run test suites that interact with multiple codebases and services that all talk to each other. Each runner stands up an identical instance of the application using all of these services. One test suite could fail very early and since we want to re-use cache of the DinD instance per-project, we make other builds wait until the current pipeline is 100% done, which could be another 10-15 minutes at least and this can be very annoying for developers when tests keep running and the containers do not get killed until the command is done running even after a cancellation
Mock Lab Pipeline Setup
Quickly realized it made more sense to just put a quick mock pipeline together instead of using one of our CI pipelines. Here I just used a few debian containers with some sleeps and echos
kind: pipeline
type: docker
name: default
clone:
disable: true
steps:
- name: setup
image: debian:stable-slim
commands:
- sleep 1 && echo "Done with Setup"
- name: test-1
image: debian:stable-slim
commands:
- sleep 60 && echo "Done 1"
depends_on:
- setup
- name: test-2
image: debian:stable-slim
commands:
- sleep 60 && echo "Done 2"
depends_on:
- setup
- name: test-3
image: debian:stable-slim
fail_all: true
commands:
- sleep 2 && echo "Attempting early failure" && exit 1
depends_on:
- setup
- name: test-4
image: debian:stable-slim
commands:
- sleep 70 && echo "Done 4"
depends_on:
- setup
Pipeline Run Example
./drone-runner-docker exec --debug --dump ./.drone.yml
Executing step ["setup"] [fail_all: false]
Executing step ["test-1"] [fail_all: false]
Executing step ["test-2"] [fail_all: false]
Executing step ["test-3"] [fail_all: true]
Executing step ["test-4"] [fail_all: false]
DEBU[0000] Running Step step.command="[echo \"$DRONE_SCRIPT\" | /bin/sh]" step.image="docker.io/library/debian:stable-slim" step.name=setup
[setup:1] + sleep 1 && echo "Done with Setup"
[setup:2] Done with Setup
step exited
DEBU[0002] Running Step step.command="[echo \"$DRONE_SCRIPT\" | /bin/sh]" step.image="docker.io/library/debian:stable-slim" step.name=test-4
DEBU[0002] Running Step step.command="[echo \"$DRONE_SCRIPT\" | /bin/sh]" step.image="docker.io/library/debian:stable-slim" step.name=test-1
DEBU[0002] Running Step step.command="[echo \"$DRONE_SCRIPT\" | /bin/sh]" step.image="docker.io/library/debian:stable-slim" step.name=test-2
DEBU[0002] Running Step step.command="[echo \"$DRONE_SCRIPT\" | /bin/sh]" step.image="docker.io/library/debian:stable-slim" step.name=test-3
[test-1:3] + sleep 60 && echo "Done 1"
[test-4:4] + sleep 70 && echo "Done 4"
[test-2:5] + sleep 60 && echo "Done 2"
[test-3:6] + sleep 2 && echo "Attempting early failure" && exit 1
[test-3:7] Attempting early failure
step exited
DEBU[0007] Failing all steps in build step.name=test-3
step exited
step exited
step exited
{
"Build": {
"id": 1,
"repo_id": 0,
"trigger": "",
"number": 1,
"status": "killed",
"event": "push",
"action": "",
"link": "",
"timestamp": 0,
"message": "",
"before": "",
"after": "",
"ref": "",
"source_repo": "",
"source": "",
"target": "",
"author_login": "",
"author_name": "",
"author_email": "",
"author_avatar": "",
"sender": "",
"started": 0,
"finished": 0,
"created": 1574231669,
"updated": 1574231669,
"version": 0
},
"Repo": {
"id": 1,
"uid": "",
"user_id": 0,
"namespace": "",
"name": "",
"slug": "",
"scm": "",
"git_http_url": "",
"git_ssh_url": "",
"link": "",
"default_branch": "",
"private": false,
"visibility": "",
"active": false,
"config_path": "",
"trusted": false,
"protected": false,
"ignore_forks": false,
"ignore_pull_requests": false,
"timeout": 60,
"counter": 0,
"synced": 0,
"created": 1574231669,
"updated": 1574231669,
"version": 0
},
"Stage": {
"id": 1,
"build_id": 0,
"number": 1,
"name": "default",
"status": "killed",
"errignore": false,
"exit_code": 137,
"os": "",
"arch": "",
"started": 1574231677,
"stopped": 1574231677,
"created": 1574231669,
"updated": 1574231669,
"version": 0,
"on_success": false,
"on_failure": false,
"steps": [
{
"id": 0,
"step_id": 1,
"number": 1,
"name": "setup",
"status": "success",
"exit_code": 0,
"started": 1574231669,
"stopped": 1574231671,
"version": 0
},
{
"id": 0,
"step_id": 1,
"number": 2,
"name": "test-1",
"status": "killed",
"exit_code": 137,
"started": 1574231671,
"stopped": 1574231677,
"version": 0
},
{
"id": 0,
"step_id": 1,
"number": 3,
"name": "test-2",
"status": "killed",
"exit_code": 137,
"started": 1574231671,
"stopped": 1574231677,
"version": 0
},
{
"id": 0,
"step_id": 1,
"number": 4,
"name": "test-3",
"status": "failure",
"exit_code": 1,
"started": 1574231671,
"stopped": 1574231677,
"version": 0
},
{
"id": 0,
"step_id": 1,
"number": 5,
"name": "test-4",
"status": "killed",
"exit_code": 137,
"started": 1574231671,
"stopped": 1574231677,
"version": 0
}
]
},
"System": {}
}
Diff
diff --git a/.gitignore b/.gitignore
index 1a47b29..bd2f260 100644
--- a/.gitignore
+++ b/.gitignore
@@ -4,3 +4,4 @@ release/*
.docker
.env
NOTES*
+.idea
\ No newline at end of file
diff --git a/engine/compiler/step.go b/engine/compiler/step.go
index d0bd093..74a0152 100644
--- a/engine/compiler/step.go
+++ b/engine/compiler/step.go
@@ -5,6 +5,7 @@
package compiler
import (
+ "fmt"
"strings"
"github.com/drone-runners/drone-runner-docker/engine"
@@ -22,6 +23,7 @@ func createStep(spec *resource.Pipeline, src *resource.Step) *engine.Step {
Entrypoint: src.Entrypoint,
Detach: src.Detach,
DependsOn: src.DependsOn,
+ FailAll: src.FailAll,
DNS: src.DNS,
DNSSearch: src.DNSSearch,
Envs: convertStaticEnv(src.Environment),
@@ -47,6 +49,8 @@ func createStep(spec *resource.Pipeline, src *resource.Step) *engine.Step {
// Resources: toResources(src), // TODO
}
+ fmt.Println(fmt.Sprintf("Executing step [%q] [fail_all: %t]", src.Name, src.FailAll))
+
// appends the volumes to the container def.
for _, vol := range src.Volumes {
dst.Volumes = append(dst.Volumes, &engine.VolumeMount{
diff --git a/engine/resource/pipeline.go b/engine/resource/pipeline.go
index c992fff..5e121f5 100644
--- a/engine/resource/pipeline.go
+++ b/engine/resource/pipeline.go
@@ -94,6 +94,7 @@ type (
Environment map[string]*manifest.Variable `json:"environment,omitempty"`
ExtraHosts []string `json:"extra_hosts,omitempty" yaml:"extra_hosts"`
Failure string `json:"failure,omitempty"`
+ FailAll bool `json:"fail_all,omitempty" yaml:"fail_all"`
Image string `json:"image,omitempty"`
Network string `json:"network_mode,omitempty" yaml:"network_mode"`
Name string `json:"name,omitempty"`
diff --git a/engine/spec.go b/engine/spec.go
index 9b524dc..83d2390 100644
--- a/engine/spec.go
+++ b/engine/spec.go
@@ -40,6 +40,7 @@ type (
MemSwapLimit int64 `json:"memswap_limit,omitempty"`
MemLimit int64 `json:"mem_limit,omitempty"`
Name string `json:"name,omitempty"`
+ FailAll bool `json:"fail_all,omitempty"`
Network string `json:"network,omitempty"`
Networks []string `json:"networks,omitempty"`
Privileged bool `json:"privileged,omitempty"`
diff --git a/runtime/execer.go b/runtime/execer.go
index 9071abf..45cfa5c 100644
--- a/runtime/execer.go
+++ b/runtime/execer.go
@@ -8,6 +8,7 @@ package runtime
import (
"context"
+ "fmt"
"sync"
"github.com/drone-runners/drone-runner-docker/engine"
@@ -72,6 +73,13 @@ func (e *execer) Exec(ctx context.Context, spec *engine.Spec, state *pipeline.St
for _, s := range spec.Steps {
step := s
d.AddVertex(step.Name, func() error {
+ log := logger.FromContext(ctx).
+ WithField("step.name", step.Name).
+ WithField("step.command", step.Command).
+ WithField("step.image", step.Image)
+
+ log.Debug("Running Step")
+
return e.exec(ctx, state, spec, step)
})
}
@@ -197,6 +205,16 @@ func (e *execer) exec(ctx context.Context, state *pipeline.State, spec *engine.S
if err != nil {
multierror.Append(result, err)
}
+
+ fmt.Println("step exited")
+
+ if step.FailAll {
+ log.Debug("Failing all steps in build")
+ state.Cancel()
+ e.engine.Destroy(noContext, spec)
+ return nil
+ }
+
// if the exit code is 78 the system will skip all
// subsequent pending steps in the pipeline.
if exited.ExitCode == 78 {
Let me know your thoughts