From 7684221ed47b49d572e2177cc2258e66a4fcdf99 Mon Sep 17 00:00:00 2001
From: bircni <bircni@icloud.com>
Date: Mon, 22 Jun 2026 06:51:16 +0200
Subject: [PATCH] feat(actions): implement `jobs.<job_id>.continue-on-error`
 (#38100)

Support `continue-on-error` for workflow jobs when aggregating an
Actions workflow run status.

Previously, `continue-on-error` was parsed from workflow YAML but was
not persisted or used when calculating the overall run result. As a
result, a failed job could incorrectly fail the entire workflow even
when the workflow explicitly allowed that job to fail.

This PR stores the parsed `continue-on-error` value on each action run
job and treats failed jobs with `continue-on-error: true` as successful
when computing the workflow run status, matching GitHub Actions
behavior.

## Changes

- Add `ContinueOnError` to `jobparser.Job`.
- Add `continue_on_error` to `ActionRunJob` with a `NOT NULL DEFAULT
FALSE` migration.
- Populate `ActionRunJob.ContinueOnError` when creating workflow run
jobs.
- Update workflow status aggregation so failed `continue-on-error` jobs
do not fail the overall run.
- Leave `resolveCheckNeeds` unchanged so dependent jobs still see the
job result as `failure` and are skipped by default.

## Compatibility

This is backward compatible.

If only the runner or only the server is updated, `continue-on-error`
continues to degrade to the previous behavior and is effectively ignored
until both sides support it.

Related runner PR: https://gitea.com/gitea/runner/pulls/1032

---------

Signed-off-by: bircni <bircni@icloud.com>
Co-authored-by: Lunny Xiao <xiaolunwen@gmail.com>
---
 models/actions/run_job.go                     | 11 ++-
 models/actions/status_test.go                 | 54 ++++++++++++
 models/migrations/migrations.go               |  1 +
 models/migrations/v1_27/v340.go               | 24 ++++++
 modules/actions/jobparser/jobparser.go        |  3 +
 modules/actions/jobparser/jobparser_test.go   |  5 ++
 modules/actions/jobparser/model.go            | 83 +++++++++++--------
 modules/actions/jobparser/model_test.go       | 46 ++++++++++
 .../testdata/continue_on_error_expr.in.yaml   | 10 +++
 .../testdata/continue_on_error_expr.out.yaml  | 25 ++++++
 services/actions/job_emitter.go               |  5 ++
 services/actions/job_emitter_test.go          | 18 ++++
 services/actions/rerun.go                     |  1 +
 services/actions/rerun_test.go                | 16 ++++
 services/actions/reusable_workflow.go         |  1 +
 services/actions/run.go                       |  1 +
 16 files changed, 268 insertions(+), 36 deletions(-)
 create mode 100644 models/migrations/v1_27/v340.go
 create mode 100644 modules/actions/jobparser/testdata/continue_on_error_expr.in.yaml
 create mode 100644 modules/actions/jobparser/testdata/continue_on_error_expr.out.yaml

diff --git a/models/actions/run_job.go b/models/actions/run_job.go
index df01546fd8..02877e0e2c 100644
--- a/models/actions/run_job.go
+++ b/models/actions/run_job.go
@@ -108,6 +108,10 @@ type ActionRunJob struct {
 	// ParentJobID scopes `Needs` resolution: name lookups happen only among rows sharing the same ParentJobID. 0 for top-level rows.
 	ParentJobID int64 `xorm:"index NOT NULL DEFAULT 0"`
 
+	// ContinueOnError mirrors the job-level continue-on-error field from the workflow YAML.
+	// When true, a failure of this job does not fail the overall workflow run.
+	ContinueOnError bool `xorm:"NOT NULL DEFAULT FALSE"`
+
 	Started timeutil.TimeStamp
 	Stopped timeutil.TimeStamp
 	Created timeutil.TimeStamp `xorm:"created"`
@@ -500,9 +504,12 @@ func AggregateJobStatus(jobs []*ActionRunJob) Status {
 	allSkipped := len(jobs) != 0
 	var hasFailure, hasCancelled, hasCancelling, hasWaiting, hasRunning, hasBlocked bool
 	for _, job := range jobs {
-		allSuccessOrSkipped = allSuccessOrSkipped && (job.Status == StatusSuccess || job.Status == StatusSkipped)
+		// A failed job with continue-on-error:true does not fail the workflow run.
+		// It counts as a "continued failure" and is treated like success for aggregation.
+		isContinuedFailure := job.ContinueOnError && job.Status == StatusFailure
+		allSuccessOrSkipped = allSuccessOrSkipped && (job.Status == StatusSuccess || job.Status == StatusSkipped || isContinuedFailure)
 		allSkipped = allSkipped && job.Status == StatusSkipped
-		hasFailure = hasFailure || job.Status == StatusFailure
+		hasFailure = hasFailure || (job.Status == StatusFailure && !job.ContinueOnError)
 		hasCancelled = hasCancelled || job.Status == StatusCancelled
 		hasCancelling = hasCancelling || job.Status == StatusCancelling
 		hasWaiting = hasWaiting || job.Status == StatusWaiting
diff --git a/models/actions/status_test.go b/models/actions/status_test.go
index f1551b2892..eed45af9b0 100644
--- a/models/actions/status_test.go
+++ b/models/actions/status_test.go
@@ -48,3 +48,57 @@ func TestStatusFromResult(t *testing.T) {
 		assert.Equal(t, tt.want, StatusFromResult(tt.result), "result=%s", tt.result)
 	}
 }
+
+func newJob(status Status, continueOnError bool) *ActionRunJob {
+	return &ActionRunJob{Status: status, ContinueOnError: continueOnError}
+}
+
+func TestAggregateJobStatusContinueOnError(t *testing.T) {
+	cases := []struct {
+		name string
+		jobs []*ActionRunJob
+		want Status
+	}{
+		{
+			name: "all success",
+			jobs: []*ActionRunJob{newJob(StatusSuccess, false), newJob(StatusSuccess, false)},
+			want: StatusSuccess,
+		},
+		{
+			name: "one failure without continue-on-error",
+			jobs: []*ActionRunJob{newJob(StatusSuccess, false), newJob(StatusFailure, false)},
+			want: StatusFailure,
+		},
+		{
+			name: "one failure with continue-on-error",
+			jobs: []*ActionRunJob{newJob(StatusSuccess, false), newJob(StatusFailure, true)},
+			want: StatusSuccess,
+		},
+		{
+			name: "only continued-failure",
+			jobs: []*ActionRunJob{newJob(StatusFailure, true)},
+			want: StatusSuccess,
+		},
+		{
+			name: "continued-failure plus real failure",
+			jobs: []*ActionRunJob{newJob(StatusFailure, true), newJob(StatusFailure, false)},
+			want: StatusFailure,
+		},
+		{
+			name: "all skipped",
+			jobs: []*ActionRunJob{newJob(StatusSkipped, false), newJob(StatusSkipped, false)},
+			want: StatusSkipped,
+		},
+		{
+			name: "continued-failure plus skipped counts as success",
+			jobs: []*ActionRunJob{newJob(StatusFailure, true), newJob(StatusSkipped, false)},
+			want: StatusSuccess,
+		},
+	}
+
+	for _, tt := range cases {
+		t.Run(tt.name, func(t *testing.T) {
+			assert.Equal(t, tt.want, AggregateJobStatus(tt.jobs))
+		})
+	}
+}
diff --git a/models/migrations/migrations.go b/models/migrations/migrations.go
index a026ee7a52..006016f447 100644
--- a/models/migrations/migrations.go
+++ b/models/migrations/migrations.go
@@ -417,6 +417,7 @@ func prepareMigrationTasks() []*migration {
 		newMigration(337, "Add visibility to team", v1_27.AddVisibilityToTeam),
 		newMigration(338, "Expand legacy MSSQL issue/comment long-text columns", v1_27.ExpandIssueAndCommentLongTextFieldsForMSSQL),
 		newMigration(339, "Extend action c_u index to include created_unix for faster dashboard feed queries", v1_27.AddCreatedUnixToActionUserIsDeletedIndex),
+		newMigration(340, "Add ContinueOnError column to ActionRunJob", v1_27.AddContinueOnErrorToActionRunJob),
 	}
 	return preparedMigrations
 }
diff --git a/models/migrations/v1_27/v340.go b/models/migrations/v1_27/v340.go
new file mode 100644
index 0000000000..dcb942dfb4
--- /dev/null
+++ b/models/migrations/v1_27/v340.go
@@ -0,0 +1,24 @@
+// Copyright 2026 The Gitea Authors. All rights reserved.
+// SPDX-License-Identifier: MIT
+
+package v1_27
+
+import (
+	"gitea.dev/models/db"
+
+	"xorm.io/xorm"
+)
+
+// AddContinueOnErrorToActionRunJob adds the ContinueOnError column to ActionRunJob,
+// storing the job-level continue-on-error value from the workflow YAML.
+func AddContinueOnErrorToActionRunJob(x db.EngineMigration) error {
+	type ActionRunJob struct {
+		ContinueOnError bool `xorm:"NOT NULL DEFAULT FALSE"`
+	}
+
+	_, err := x.SyncWithOptions(xorm.SyncOptions{
+		IgnoreDropIndices: true,
+		IgnoreConstrains:  true,
+	}, new(ActionRunJob))
+	return err
+}
diff --git a/modules/actions/jobparser/jobparser.go b/modules/actions/jobparser/jobparser.go
index e7a2b48498..79c7b7b433 100644
--- a/modules/actions/jobparser/jobparser.go
+++ b/modules/actions/jobparser/jobparser.go
@@ -69,6 +69,9 @@ func Parse(content []byte, options ...ParseOption) ([]*SingleWorkflow, error) {
 				runsOn[i] = evaluator.Interpolate(v)
 			}
 			job.RawRunsOn = encodeRunsOn(runsOn)
+			if err := evaluator.EvaluateYamlNode(&job.RawContinueOnError); err != nil {
+				return nil, fmt.Errorf("evaluate continue-on-error for job %q: %w", id, err)
+			}
 			swf := &SingleWorkflow{
 				Name:           workflow.Name,
 				RawOn:          workflow.RawOn,
diff --git a/modules/actions/jobparser/jobparser_test.go b/modules/actions/jobparser/jobparser_test.go
index e74f0644f8..05bb3151b7 100644
--- a/modules/actions/jobparser/jobparser_test.go
+++ b/modules/actions/jobparser/jobparser_test.go
@@ -58,6 +58,11 @@ func TestParse(t *testing.T) {
 			options: nil,
 			wantErr: false,
 		},
+		{
+			name:    "continue_on_error_expr",
+			options: nil,
+			wantErr: false,
+		},
 	}
 	invalidFileTests := []struct {
 		name string
diff --git a/modules/actions/jobparser/model.go b/modules/actions/jobparser/model.go
index c80626e4c0..97a36235d2 100644
--- a/modules/actions/jobparser/model.go
+++ b/modules/actions/jobparser/model.go
@@ -79,23 +79,37 @@ func (w *SingleWorkflow) Marshal() ([]byte, error) {
 }
 
 type Job struct {
-	Name           string                    `yaml:"name,omitempty"`
-	RawNeeds       yaml.Node                 `yaml:"needs,omitempty"`
-	RawRunsOn      yaml.Node                 `yaml:"runs-on,omitempty"`
-	Env            yaml.Node                 `yaml:"env,omitempty"`
-	If             yaml.Node                 `yaml:"if,omitempty"`
-	Steps          []*Step                   `yaml:"steps,omitempty"`
-	TimeoutMinutes string                    `yaml:"timeout-minutes,omitempty"`
-	Services       map[string]*ContainerSpec `yaml:"services,omitempty"`
-	Strategy       Strategy                  `yaml:"strategy,omitempty"`
-	RawContainer   yaml.Node                 `yaml:"container,omitempty"`
-	Defaults       Defaults                  `yaml:"defaults,omitempty"`
-	Outputs        map[string]string         `yaml:"outputs,omitempty"`
-	Uses           string                    `yaml:"uses,omitempty"`
-	With           map[string]any            `yaml:"with,omitempty"`
-	RawSecrets     yaml.Node                 `yaml:"secrets,omitempty"`
-	RawConcurrency *model.RawConcurrency     `yaml:"concurrency,omitempty"`
-	RawPermissions yaml.Node                 `yaml:"permissions,omitempty"`
+	Name               string                    `yaml:"name,omitempty"`
+	RawNeeds           yaml.Node                 `yaml:"needs,omitempty"`
+	RawRunsOn          yaml.Node                 `yaml:"runs-on,omitempty"`
+	Env                yaml.Node                 `yaml:"env,omitempty"`
+	If                 yaml.Node                 `yaml:"if,omitempty"`
+	Steps              []*Step                   `yaml:"steps,omitempty"`
+	TimeoutMinutes     string                    `yaml:"timeout-minutes,omitempty"`
+	RawContinueOnError yaml.Node                 `yaml:"continue-on-error,omitempty"`
+	Services           map[string]*ContainerSpec `yaml:"services,omitempty"`
+	Strategy           Strategy                  `yaml:"strategy,omitempty"`
+	RawContainer       yaml.Node                 `yaml:"container,omitempty"`
+	Defaults           Defaults                  `yaml:"defaults,omitempty"`
+	Outputs            map[string]string         `yaml:"outputs,omitempty"`
+	Uses               string                    `yaml:"uses,omitempty"`
+	With               map[string]any            `yaml:"with,omitempty"`
+	RawSecrets         yaml.Node                 `yaml:"secrets,omitempty"`
+	RawConcurrency     *model.RawConcurrency     `yaml:"concurrency,omitempty"`
+	RawPermissions     yaml.Node                 `yaml:"permissions,omitempty"`
+}
+
+// GetContinueOnError decodes the continue-on-error field to a bool.
+// The field may be a literal bool or an already-evaluated expression node.
+func (j *Job) GetContinueOnError() bool {
+	if j.RawContinueOnError.Kind == 0 {
+		return false
+	}
+	var v bool
+	if err := j.RawContinueOnError.Decode(&v); err != nil {
+		return false
+	}
+	return v
 }
 
 func (j *Job) Clone() *Job {
@@ -103,23 +117,24 @@ func (j *Job) Clone() *Job {
 		return nil
 	}
 	return &Job{
-		Name:           j.Name,
-		RawNeeds:       j.RawNeeds,
-		RawRunsOn:      j.RawRunsOn,
-		Env:            j.Env,
-		If:             j.If,
-		Steps:          j.Steps,
-		TimeoutMinutes: j.TimeoutMinutes,
-		Services:       j.Services,
-		Strategy:       j.Strategy,
-		RawContainer:   j.RawContainer,
-		Defaults:       j.Defaults,
-		Outputs:        j.Outputs,
-		Uses:           j.Uses,
-		With:           j.With,
-		RawSecrets:     j.RawSecrets,
-		RawConcurrency: j.RawConcurrency,
-		RawPermissions: j.RawPermissions,
+		Name:               j.Name,
+		RawNeeds:           j.RawNeeds,
+		RawRunsOn:          j.RawRunsOn,
+		Env:                j.Env,
+		If:                 j.If,
+		Steps:              j.Steps,
+		TimeoutMinutes:     j.TimeoutMinutes,
+		RawContinueOnError: j.RawContinueOnError,
+		Services:           j.Services,
+		Strategy:           j.Strategy,
+		RawContainer:       j.RawContainer,
+		Defaults:           j.Defaults,
+		Outputs:            j.Outputs,
+		Uses:               j.Uses,
+		With:               j.With,
+		RawSecrets:         j.RawSecrets,
+		RawConcurrency:     j.RawConcurrency,
+		RawPermissions:     j.RawPermissions,
 	}
 }
 
diff --git a/modules/actions/jobparser/model_test.go b/modules/actions/jobparser/model_test.go
index 567c160f92..ff85e912af 100644
--- a/modules/actions/jobparser/model_test.go
+++ b/modules/actions/jobparser/model_test.go
@@ -336,6 +336,52 @@ func TestSingleWorkflow_SetJob(t *testing.T) {
 	})
 }
 
+func TestGetContinueOnError(t *testing.T) {
+	tests := []struct {
+		name string
+		yaml string
+		want bool
+	}{
+		{
+			name: "absent",
+			yaml: "name: test\non: push\njobs:\n  job1:\n    runs-on: ubuntu-22.04\n    steps:\n      - run: echo hi\n",
+			want: false,
+		},
+		{
+			name: "static true",
+			yaml: "name: test\non: push\njobs:\n  job1:\n    runs-on: ubuntu-22.04\n    continue-on-error: true\n    steps:\n      - run: echo hi\n",
+			want: true,
+		},
+		{
+			name: "static false",
+			yaml: "name: test\non: push\njobs:\n  job1:\n    runs-on: ubuntu-22.04\n    continue-on-error: false\n    steps:\n      - run: echo hi\n",
+			want: false,
+		},
+	}
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			got, err := Parse([]byte(tt.yaml))
+			require.NoError(t, err)
+			require.Len(t, got, 1)
+			_, job := got[0].Job()
+			assert.Equal(t, tt.want, job.GetContinueOnError())
+		})
+	}
+
+	// Expression case: ${{ matrix.experimental }} must resolve per matrix variant.
+	t.Run("matrix expression", func(t *testing.T) {
+		content := ReadTestdata(t, "continue_on_error_expr.in.yaml")
+		got, err := Parse(content)
+		require.NoError(t, err)
+		require.Len(t, got, 2)
+		// sorted by matrix name: (false) before (true)
+		_, jobFalse := got[0].Job()
+		_, jobTrue := got[1].Job()
+		assert.False(t, jobFalse.GetContinueOnError())
+		assert.True(t, jobTrue.GetContinueOnError())
+	})
+}
+
 func TestParseMappingNode(t *testing.T) {
 	tests := []struct {
 		input   string
diff --git a/modules/actions/jobparser/testdata/continue_on_error_expr.in.yaml b/modules/actions/jobparser/testdata/continue_on_error_expr.in.yaml
new file mode 100644
index 0000000000..5dc55d38eb
--- /dev/null
+++ b/modules/actions/jobparser/testdata/continue_on_error_expr.in.yaml
@@ -0,0 +1,10 @@
+name: test
+jobs:
+  job1:
+    strategy:
+      matrix:
+        experimental: [false, true]
+    runs-on: ubuntu-22.04
+    continue-on-error: ${{ matrix.experimental }}
+    steps:
+      - run: echo hi
diff --git a/modules/actions/jobparser/testdata/continue_on_error_expr.out.yaml b/modules/actions/jobparser/testdata/continue_on_error_expr.out.yaml
new file mode 100644
index 0000000000..0d3afff878
--- /dev/null
+++ b/modules/actions/jobparser/testdata/continue_on_error_expr.out.yaml
@@ -0,0 +1,25 @@
+name: test
+jobs:
+  job1:
+    name: job1 (false)
+    runs-on: ubuntu-22.04
+    steps:
+      - run: echo hi
+    continue-on-error: false
+    strategy:
+      matrix:
+        experimental:
+          - false
+---
+name: test
+jobs:
+  job1:
+    name: job1 (true)
+    runs-on: ubuntu-22.04
+    steps:
+      - run: echo hi
+    continue-on-error: true
+    strategy:
+      matrix:
+        experimental:
+          - true
diff --git a/services/actions/job_emitter.go b/services/actions/job_emitter.go
index 9c9a408db0..1b45eab83f 100644
--- a/services/actions/job_emitter.go
+++ b/services/actions/job_emitter.go
@@ -380,6 +380,11 @@ func (r *jobStatusResolver) resolveCheckNeeds(id int64) (allDone, allSucceed boo
 		if !needStatus.IsDone() {
 			allDone = false
 		}
+		// A failed need with continue-on-error:true is treated as success, matching AggregateJobStatus,
+		// so a downstream job with an implicit `success()` is not skipped.
+		if needJob := r.jobMap[need]; needJob != nil && needJob.ContinueOnError && needStatus == actions_model.StatusFailure {
+			continue
+		}
 		if needStatus.In(actions_model.StatusFailure, actions_model.StatusCancelled, actions_model.StatusSkipped) {
 			allSucceed = false
 		}
diff --git a/services/actions/job_emitter_test.go b/services/actions/job_emitter_test.go
index 892ba6c2c5..6b15ecd346 100644
--- a/services/actions/job_emitter_test.go
+++ b/services/actions/job_emitter_test.go
@@ -131,6 +131,24 @@ jobs:
 			},
 			want: map[int64]actions_model.Status{2: actions_model.StatusSkipped},
 		},
+		{
+			name: "`if` is empty and a failed need has continue-on-error",
+			jobs: actions_model.ActionJobList{
+				{ID: 1, JobID: "job1", Status: actions_model.StatusFailure, ContinueOnError: true, Needs: []string{}},
+				{ID: 2, JobID: "job2", Status: actions_model.StatusBlocked, Needs: []string{"job1"}, WorkflowPayload: []byte(
+					`
+name: test
+on: push
+jobs:
+  job2:
+    runs-on: ubuntu-latest
+    needs: job1
+    steps:
+      - run: echo "should run, job1 failure is masked by continue-on-error"
+`)},
+			},
+			want: map[int64]actions_model.Status{2: actions_model.StatusWaiting},
+		},
 	}
 	assert.NoError(t, unittest.PrepareTestDatabase())
 	ctx := t.Context()
diff --git a/services/actions/rerun.go b/services/actions/rerun.go
index 1ef84608a0..17076a594c 100644
--- a/services/actions/rerun.go
+++ b/services/actions/rerun.go
@@ -506,6 +506,7 @@ func cloneRunJobForAttempt(templateJob *actions_model.ActionRunJob, attempt *act
 		AttemptJobID:           templateJob.AttemptJobID,
 		Needs:                  slices.Clone(templateJob.Needs),
 		RunsOn:                 slices.Clone(templateJob.RunsOn),
+		ContinueOnError:        templateJob.ContinueOnError,
 		Status:                 templateJob.Status,
 		RawConcurrency:         templateJob.RawConcurrency,
 		IsConcurrencyEvaluated: templateJob.IsConcurrencyEvaluated,
diff --git a/services/actions/rerun_test.go b/services/actions/rerun_test.go
index baa678a440..9c0f63e165 100644
--- a/services/actions/rerun_test.go
+++ b/services/actions/rerun_test.go
@@ -80,6 +80,22 @@ func TestGetFailedJobsForRerun(t *testing.T) {
 	})
 }
 
+func TestCloneRunJobForAttempt(t *testing.T) {
+	attempt := &actions_model.ActionRunAttempt{ID: 42, Attempt: 2}
+
+	t.Run("preserves continue-on-error", func(t *testing.T) {
+		template := &actions_model.ActionRunJob{ContinueOnError: true, Status: actions_model.StatusFailure}
+		clone := cloneRunJobForAttempt(template, attempt)
+		assert.True(t, clone.ContinueOnError)
+	})
+
+	t.Run("defaults to false when template has it unset", func(t *testing.T) {
+		template := &actions_model.ActionRunJob{ContinueOnError: false}
+		clone := cloneRunJobForAttempt(template, attempt)
+		assert.False(t, clone.ContinueOnError)
+	})
+}
+
 func TestRerunValidation(t *testing.T) {
 	runningRun := &actions_model.ActionRun{Status: actions_model.StatusRunning}
 
diff --git a/services/actions/reusable_workflow.go b/services/actions/reusable_workflow.go
index 65a6acfbd0..cf77824c4d 100644
--- a/services/actions/reusable_workflow.go
+++ b/services/actions/reusable_workflow.go
@@ -325,6 +325,7 @@ func insertCallerChildren(ctx context.Context, run *actions_model.ActionRun, att
 			AttemptJobID:            attemptJobID,
 			Needs:                   needs,
 			RunsOn:                  parsedChild.RunsOn(),
+			ContinueOnError:         parsedChild.GetContinueOnError(),
 			Status:                  actions_model.StatusBlocked,
 			ParentJobID:             caller.ID,
 			WorkflowSourceRepoID:    sourceRepoID,
diff --git a/services/actions/run.go b/services/actions/run.go
index 82335af861..58e50de0c2 100644
--- a/services/actions/run.go
+++ b/services/actions/run.go
@@ -164,6 +164,7 @@ func InsertRun(ctx context.Context, run *actions_model.ActionRun, content []byte
 				Status:                  util.Iif(shouldBlockJob, actions_model.StatusBlocked, actions_model.StatusWaiting),
 				WorkflowSourceRepoID:    run.RepoID,
 				WorkflowSourceCommitSHA: run.CommitSHA,
+				ContinueOnError:         job.GetContinueOnError(),
 			}
 			// Parse workflow/job permissions (no clamping here)
 			if perms := ExtractJobPermissionsFromWorkflow(v, job); perms != nil {