feat(actions): implement jobs.<job_id>.continue-on-error (#38100)

Support `continue-on-error` for workflow jobs when aggregating an
Actions workflow run status.

Previously, `continue-on-error` was parsed from workflow YAML but was
not persisted or used when calculating the overall run result. As a
result, a failed job could incorrectly fail the entire workflow even
when the workflow explicitly allowed that job to fail.

This PR stores the parsed `continue-on-error` value on each action run
job and treats failed jobs with `continue-on-error: true` as successful
when computing the workflow run status, matching GitHub Actions
behavior.

## Changes

- Add `ContinueOnError` to `jobparser.Job`.
- Add `continue_on_error` to `ActionRunJob` with a `NOT NULL DEFAULT
FALSE` migration.
- Populate `ActionRunJob.ContinueOnError` when creating workflow run
jobs.
- Update workflow status aggregation so failed `continue-on-error` jobs
do not fail the overall run.
- Leave `resolveCheckNeeds` unchanged so dependent jobs still see the
job result as `failure` and are skipped by default.

## Compatibility

This is backward compatible.

If only the runner or only the server is updated, `continue-on-error`
continues to degrade to the previous behavior and is effectively ignored
until both sides support it.

Related runner PR: https://gitea.com/gitea/runner/pulls/1032

---------

Signed-off-by: bircni <bircni@icloud.com>
Co-authored-by: Lunny Xiao <xiaolunwen@gmail.com>
This commit is contained in:
bircni
2026-06-22 06:51:16 +02:00
committed by GitHub
parent 2c2611eab9
commit 7684221ed4
16 changed files with 268 additions and 36 deletions

View File

@@ -108,6 +108,10 @@ type ActionRunJob struct {
// ParentJobID scopes `Needs` resolution: name lookups happen only among rows sharing the same ParentJobID. 0 for top-level rows.
ParentJobID int64 `xorm:"index NOT NULL DEFAULT 0"`
// ContinueOnError mirrors the job-level continue-on-error field from the workflow YAML.
// When true, a failure of this job does not fail the overall workflow run.
ContinueOnError bool `xorm:"NOT NULL DEFAULT FALSE"`
Started timeutil.TimeStamp
Stopped timeutil.TimeStamp
Created timeutil.TimeStamp `xorm:"created"`
@@ -500,9 +504,12 @@ func AggregateJobStatus(jobs []*ActionRunJob) Status {
allSkipped := len(jobs) != 0
var hasFailure, hasCancelled, hasCancelling, hasWaiting, hasRunning, hasBlocked bool
for _, job := range jobs {
allSuccessOrSkipped = allSuccessOrSkipped && (job.Status == StatusSuccess || job.Status == StatusSkipped)
// A failed job with continue-on-error:true does not fail the workflow run.
// It counts as a "continued failure" and is treated like success for aggregation.
isContinuedFailure := job.ContinueOnError && job.Status == StatusFailure
allSuccessOrSkipped = allSuccessOrSkipped && (job.Status == StatusSuccess || job.Status == StatusSkipped || isContinuedFailure)
allSkipped = allSkipped && job.Status == StatusSkipped
hasFailure = hasFailure || job.Status == StatusFailure
hasFailure = hasFailure || (job.Status == StatusFailure && !job.ContinueOnError)
hasCancelled = hasCancelled || job.Status == StatusCancelled
hasCancelling = hasCancelling || job.Status == StatusCancelling
hasWaiting = hasWaiting || job.Status == StatusWaiting

View File

@@ -48,3 +48,57 @@ func TestStatusFromResult(t *testing.T) {
assert.Equal(t, tt.want, StatusFromResult(tt.result), "result=%s", tt.result)
}
}
func newJob(status Status, continueOnError bool) *ActionRunJob {
return &ActionRunJob{Status: status, ContinueOnError: continueOnError}
}
func TestAggregateJobStatusContinueOnError(t *testing.T) {
cases := []struct {
name string
jobs []*ActionRunJob
want Status
}{
{
name: "all success",
jobs: []*ActionRunJob{newJob(StatusSuccess, false), newJob(StatusSuccess, false)},
want: StatusSuccess,
},
{
name: "one failure without continue-on-error",
jobs: []*ActionRunJob{newJob(StatusSuccess, false), newJob(StatusFailure, false)},
want: StatusFailure,
},
{
name: "one failure with continue-on-error",
jobs: []*ActionRunJob{newJob(StatusSuccess, false), newJob(StatusFailure, true)},
want: StatusSuccess,
},
{
name: "only continued-failure",
jobs: []*ActionRunJob{newJob(StatusFailure, true)},
want: StatusSuccess,
},
{
name: "continued-failure plus real failure",
jobs: []*ActionRunJob{newJob(StatusFailure, true), newJob(StatusFailure, false)},
want: StatusFailure,
},
{
name: "all skipped",
jobs: []*ActionRunJob{newJob(StatusSkipped, false), newJob(StatusSkipped, false)},
want: StatusSkipped,
},
{
name: "continued-failure plus skipped counts as success",
jobs: []*ActionRunJob{newJob(StatusFailure, true), newJob(StatusSkipped, false)},
want: StatusSuccess,
},
}
for _, tt := range cases {
t.Run(tt.name, func(t *testing.T) {
assert.Equal(t, tt.want, AggregateJobStatus(tt.jobs))
})
}
}

View File

@@ -417,6 +417,7 @@ func prepareMigrationTasks() []*migration {
newMigration(337, "Add visibility to team", v1_27.AddVisibilityToTeam),
newMigration(338, "Expand legacy MSSQL issue/comment long-text columns", v1_27.ExpandIssueAndCommentLongTextFieldsForMSSQL),
newMigration(339, "Extend action c_u index to include created_unix for faster dashboard feed queries", v1_27.AddCreatedUnixToActionUserIsDeletedIndex),
newMigration(340, "Add ContinueOnError column to ActionRunJob", v1_27.AddContinueOnErrorToActionRunJob),
}
return preparedMigrations
}

View File

@@ -0,0 +1,24 @@
// Copyright 2026 The Gitea Authors. All rights reserved.
// SPDX-License-Identifier: MIT
package v1_27
import (
"gitea.dev/models/db"
"xorm.io/xorm"
)
// AddContinueOnErrorToActionRunJob adds the ContinueOnError column to ActionRunJob,
// storing the job-level continue-on-error value from the workflow YAML.
func AddContinueOnErrorToActionRunJob(x db.EngineMigration) error {
type ActionRunJob struct {
ContinueOnError bool `xorm:"NOT NULL DEFAULT FALSE"`
}
_, err := x.SyncWithOptions(xorm.SyncOptions{
IgnoreDropIndices: true,
IgnoreConstrains: true,
}, new(ActionRunJob))
return err
}

View File

@@ -69,6 +69,9 @@ func Parse(content []byte, options ...ParseOption) ([]*SingleWorkflow, error) {
runsOn[i] = evaluator.Interpolate(v)
}
job.RawRunsOn = encodeRunsOn(runsOn)
if err := evaluator.EvaluateYamlNode(&job.RawContinueOnError); err != nil {
return nil, fmt.Errorf("evaluate continue-on-error for job %q: %w", id, err)
}
swf := &SingleWorkflow{
Name: workflow.Name,
RawOn: workflow.RawOn,

View File

@@ -58,6 +58,11 @@ func TestParse(t *testing.T) {
options: nil,
wantErr: false,
},
{
name: "continue_on_error_expr",
options: nil,
wantErr: false,
},
}
invalidFileTests := []struct {
name string

View File

@@ -79,23 +79,37 @@ func (w *SingleWorkflow) Marshal() ([]byte, error) {
}
type Job struct {
Name string `yaml:"name,omitempty"`
RawNeeds yaml.Node `yaml:"needs,omitempty"`
RawRunsOn yaml.Node `yaml:"runs-on,omitempty"`
Env yaml.Node `yaml:"env,omitempty"`
If yaml.Node `yaml:"if,omitempty"`
Steps []*Step `yaml:"steps,omitempty"`
TimeoutMinutes string `yaml:"timeout-minutes,omitempty"`
Services map[string]*ContainerSpec `yaml:"services,omitempty"`
Strategy Strategy `yaml:"strategy,omitempty"`
RawContainer yaml.Node `yaml:"container,omitempty"`
Defaults Defaults `yaml:"defaults,omitempty"`
Outputs map[string]string `yaml:"outputs,omitempty"`
Uses string `yaml:"uses,omitempty"`
With map[string]any `yaml:"with,omitempty"`
RawSecrets yaml.Node `yaml:"secrets,omitempty"`
RawConcurrency *model.RawConcurrency `yaml:"concurrency,omitempty"`
RawPermissions yaml.Node `yaml:"permissions,omitempty"`
Name string `yaml:"name,omitempty"`
RawNeeds yaml.Node `yaml:"needs,omitempty"`
RawRunsOn yaml.Node `yaml:"runs-on,omitempty"`
Env yaml.Node `yaml:"env,omitempty"`
If yaml.Node `yaml:"if,omitempty"`
Steps []*Step `yaml:"steps,omitempty"`
TimeoutMinutes string `yaml:"timeout-minutes,omitempty"`
RawContinueOnError yaml.Node `yaml:"continue-on-error,omitempty"`
Services map[string]*ContainerSpec `yaml:"services,omitempty"`
Strategy Strategy `yaml:"strategy,omitempty"`
RawContainer yaml.Node `yaml:"container,omitempty"`
Defaults Defaults `yaml:"defaults,omitempty"`
Outputs map[string]string `yaml:"outputs,omitempty"`
Uses string `yaml:"uses,omitempty"`
With map[string]any `yaml:"with,omitempty"`
RawSecrets yaml.Node `yaml:"secrets,omitempty"`
RawConcurrency *model.RawConcurrency `yaml:"concurrency,omitempty"`
RawPermissions yaml.Node `yaml:"permissions,omitempty"`
}
// GetContinueOnError decodes the continue-on-error field to a bool.
// The field may be a literal bool or an already-evaluated expression node.
func (j *Job) GetContinueOnError() bool {
if j.RawContinueOnError.Kind == 0 {
return false
}
var v bool
if err := j.RawContinueOnError.Decode(&v); err != nil {
return false
}
return v
}
func (j *Job) Clone() *Job {
@@ -103,23 +117,24 @@ func (j *Job) Clone() *Job {
return nil
}
return &Job{
Name: j.Name,
RawNeeds: j.RawNeeds,
RawRunsOn: j.RawRunsOn,
Env: j.Env,
If: j.If,
Steps: j.Steps,
TimeoutMinutes: j.TimeoutMinutes,
Services: j.Services,
Strategy: j.Strategy,
RawContainer: j.RawContainer,
Defaults: j.Defaults,
Outputs: j.Outputs,
Uses: j.Uses,
With: j.With,
RawSecrets: j.RawSecrets,
RawConcurrency: j.RawConcurrency,
RawPermissions: j.RawPermissions,
Name: j.Name,
RawNeeds: j.RawNeeds,
RawRunsOn: j.RawRunsOn,
Env: j.Env,
If: j.If,
Steps: j.Steps,
TimeoutMinutes: j.TimeoutMinutes,
RawContinueOnError: j.RawContinueOnError,
Services: j.Services,
Strategy: j.Strategy,
RawContainer: j.RawContainer,
Defaults: j.Defaults,
Outputs: j.Outputs,
Uses: j.Uses,
With: j.With,
RawSecrets: j.RawSecrets,
RawConcurrency: j.RawConcurrency,
RawPermissions: j.RawPermissions,
}
}

View File

@@ -336,6 +336,52 @@ func TestSingleWorkflow_SetJob(t *testing.T) {
})
}
func TestGetContinueOnError(t *testing.T) {
tests := []struct {
name string
yaml string
want bool
}{
{
name: "absent",
yaml: "name: test\non: push\njobs:\n job1:\n runs-on: ubuntu-22.04\n steps:\n - run: echo hi\n",
want: false,
},
{
name: "static true",
yaml: "name: test\non: push\njobs:\n job1:\n runs-on: ubuntu-22.04\n continue-on-error: true\n steps:\n - run: echo hi\n",
want: true,
},
{
name: "static false",
yaml: "name: test\non: push\njobs:\n job1:\n runs-on: ubuntu-22.04\n continue-on-error: false\n steps:\n - run: echo hi\n",
want: false,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
got, err := Parse([]byte(tt.yaml))
require.NoError(t, err)
require.Len(t, got, 1)
_, job := got[0].Job()
assert.Equal(t, tt.want, job.GetContinueOnError())
})
}
// Expression case: ${{ matrix.experimental }} must resolve per matrix variant.
t.Run("matrix expression", func(t *testing.T) {
content := ReadTestdata(t, "continue_on_error_expr.in.yaml")
got, err := Parse(content)
require.NoError(t, err)
require.Len(t, got, 2)
// sorted by matrix name: (false) before (true)
_, jobFalse := got[0].Job()
_, jobTrue := got[1].Job()
assert.False(t, jobFalse.GetContinueOnError())
assert.True(t, jobTrue.GetContinueOnError())
})
}
func TestParseMappingNode(t *testing.T) {
tests := []struct {
input string

View File

@@ -0,0 +1,10 @@
name: test
jobs:
job1:
strategy:
matrix:
experimental: [false, true]
runs-on: ubuntu-22.04
continue-on-error: ${{ matrix.experimental }}
steps:
- run: echo hi

View File

@@ -0,0 +1,25 @@
name: test
jobs:
job1:
name: job1 (false)
runs-on: ubuntu-22.04
steps:
- run: echo hi
continue-on-error: false
strategy:
matrix:
experimental:
- false
---
name: test
jobs:
job1:
name: job1 (true)
runs-on: ubuntu-22.04
steps:
- run: echo hi
continue-on-error: true
strategy:
matrix:
experimental:
- true

View File

@@ -380,6 +380,11 @@ func (r *jobStatusResolver) resolveCheckNeeds(id int64) (allDone, allSucceed boo
if !needStatus.IsDone() {
allDone = false
}
// A failed need with continue-on-error:true is treated as success, matching AggregateJobStatus,
// so a downstream job with an implicit `success()` is not skipped.
if needJob := r.jobMap[need]; needJob != nil && needJob.ContinueOnError && needStatus == actions_model.StatusFailure {
continue
}
if needStatus.In(actions_model.StatusFailure, actions_model.StatusCancelled, actions_model.StatusSkipped) {
allSucceed = false
}

View File

@@ -131,6 +131,24 @@ jobs:
},
want: map[int64]actions_model.Status{2: actions_model.StatusSkipped},
},
{
name: "`if` is empty and a failed need has continue-on-error",
jobs: actions_model.ActionJobList{
{ID: 1, JobID: "job1", Status: actions_model.StatusFailure, ContinueOnError: true, Needs: []string{}},
{ID: 2, JobID: "job2", Status: actions_model.StatusBlocked, Needs: []string{"job1"}, WorkflowPayload: []byte(
`
name: test
on: push
jobs:
job2:
runs-on: ubuntu-latest
needs: job1
steps:
- run: echo "should run, job1 failure is masked by continue-on-error"
`)},
},
want: map[int64]actions_model.Status{2: actions_model.StatusWaiting},
},
}
assert.NoError(t, unittest.PrepareTestDatabase())
ctx := t.Context()

View File

@@ -506,6 +506,7 @@ func cloneRunJobForAttempt(templateJob *actions_model.ActionRunJob, attempt *act
AttemptJobID: templateJob.AttemptJobID,
Needs: slices.Clone(templateJob.Needs),
RunsOn: slices.Clone(templateJob.RunsOn),
ContinueOnError: templateJob.ContinueOnError,
Status: templateJob.Status,
RawConcurrency: templateJob.RawConcurrency,
IsConcurrencyEvaluated: templateJob.IsConcurrencyEvaluated,

View File

@@ -80,6 +80,22 @@ func TestGetFailedJobsForRerun(t *testing.T) {
})
}
func TestCloneRunJobForAttempt(t *testing.T) {
attempt := &actions_model.ActionRunAttempt{ID: 42, Attempt: 2}
t.Run("preserves continue-on-error", func(t *testing.T) {
template := &actions_model.ActionRunJob{ContinueOnError: true, Status: actions_model.StatusFailure}
clone := cloneRunJobForAttempt(template, attempt)
assert.True(t, clone.ContinueOnError)
})
t.Run("defaults to false when template has it unset", func(t *testing.T) {
template := &actions_model.ActionRunJob{ContinueOnError: false}
clone := cloneRunJobForAttempt(template, attempt)
assert.False(t, clone.ContinueOnError)
})
}
func TestRerunValidation(t *testing.T) {
runningRun := &actions_model.ActionRun{Status: actions_model.StatusRunning}

View File

@@ -325,6 +325,7 @@ func insertCallerChildren(ctx context.Context, run *actions_model.ActionRun, att
AttemptJobID: attemptJobID,
Needs: needs,
RunsOn: parsedChild.RunsOn(),
ContinueOnError: parsedChild.GetContinueOnError(),
Status: actions_model.StatusBlocked,
ParentJobID: caller.ID,
WorkflowSourceRepoID: sourceRepoID,

View File

@@ -164,6 +164,7 @@ func InsertRun(ctx context.Context, run *actions_model.ActionRun, content []byte
Status: util.Iif(shouldBlockJob, actions_model.StatusBlocked, actions_model.StatusWaiting),
WorkflowSourceRepoID: run.RepoID,
WorkflowSourceCommitSHA: run.CommitSHA,
ContinueOnError: job.GetContinueOnError(),
}
// Parse workflow/job permissions (no clamping here)
if perms := ExtractJobPermissionsFromWorkflow(v, job); perms != nil {