From 198ef500d2e381f811ee9217bfb63f5ce627431e Mon Sep 17 00:00:00 2001
From: silverwind <me@silverwind.io>
Date: Mon, 4 May 2026 13:10:42 +0200
Subject: [PATCH] Don't unblock run-level-concurrency-blocked runs in the
 resolver (#37461)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Fixes #37446.

The job-status resolver in `checkJobsOfCurrentRunAttempt` only
considered `needs` and job-level concurrency when transitioning jobs out
of `Blocked`. When something drove the resolver against a run blocked
solely by workflow-level concurrency — for example, a sibling run in the
same group entering the queue and triggering `EmitJobsIfReadyByRun` —
the run's job silently became `Waiting` while another run still held the
concurrency group, and the runner could pick it up, defeating the
concurrency guarantee.

The fix bails out of the resolver when the run's latest attempt is still
blocked by run-level concurrency. `checkRunConcurrency` re-evaluates
when the holding run finishes.

Covered by a unit test
(`Test_checkJobsOfCurrentRunAttempt_RunLevelConcurrencyKeepsJobsBlocked`
in `services/actions/job_emitter_test.go`) that sets up a Running holder
attempt and a Blocked sibling attempt in the same concurrency group
directly in the DB, calls `checkJobsOfCurrentRunAttempt`, and asserts
the blocked job stays `Blocked`. Fails on master, passes with the fix.

---
This PR was written with the help of Claude Opus 4.7

---------

Co-authored-by: Claude (Opus 4.7) <noreply@anthropic.com>
---
 services/actions/job_emitter.go      | 18 ++++++++
 services/actions/job_emitter_test.go | 65 ++++++++++++++++++++++++++++
 2 files changed, 83 insertions(+)

diff --git a/services/actions/job_emitter.go b/services/actions/job_emitter.go
index 489b36a3a7..b81ec9fe6c 100644
--- a/services/actions/job_emitter.go
+++ b/services/actions/job_emitter.go
@@ -228,6 +228,24 @@ func checkJobsOfCurrentRunAttempt(ctx context.Context, run *actions_model.Action
 	if err != nil {
 		return nil, nil, nil, err
 	}
+	// The resolver below only considers needs and job-level concurrency, so a run blocked
+	// solely by run-level concurrency would have its jobs unblocked here. checkRunConcurrency
+	// re-evaluates when the holding run finishes.
+	if run.Status.IsBlocked() {
+		attempt, has, err := run.GetLatestAttempt(ctx)
+		if err != nil {
+			return nil, nil, nil, fmt.Errorf("GetLatestAttempt: %w", err)
+		}
+		if has {
+			shouldBlock, err := shouldBlockRunByConcurrency(ctx, attempt)
+			if err != nil {
+				return nil, nil, nil, fmt.Errorf("shouldBlockRunByConcurrency: %w", err)
+			}
+			if shouldBlock {
+				return jobs, nil, nil, nil
+			}
+		}
+	}
 	vars, err := actions_model.GetVariablesOfRun(ctx, run)
 	if err != nil {
 		return nil, nil, nil, err
diff --git a/services/actions/job_emitter_test.go b/services/actions/job_emitter_test.go
index 11998e01b2..9a40927e06 100644
--- a/services/actions/job_emitter_test.go
+++ b/services/actions/job_emitter_test.go
@@ -228,3 +228,68 @@ func Test_checkRunConcurrency_NoDuplicateConcurrencyGroupCheck(t *testing.T) {
 		assert.Equal(t, jobBBlocked.ID, jobs[0].ID)
 	}
 }
+
+// Test_checkJobsOfCurrentRunAttempt_RunLevelConcurrencyKeepsJobsBlocked verifies that
+// the resolver does not transition a job out of Blocked while another run still holds
+// the workflow-level concurrency group. Regression for #37446.
+func Test_checkJobsOfCurrentRunAttempt_RunLevelConcurrencyKeepsJobsBlocked(t *testing.T) {
+	assert.NoError(t, unittest.PrepareTestDatabase())
+	ctx := t.Context()
+
+	const group = "test-run-level-concurrency-keeps-blocked"
+
+	// Holder run: Running attempt in the concurrency group.
+	holderRun := &actions_model.ActionRun{
+		RepoID: 4, OwnerID: 1, TriggerUserID: 1,
+		WorkflowID: "test.yml", Index: 9911, Ref: "refs/heads/main",
+		Status: actions_model.StatusRunning,
+	}
+	assert.NoError(t, db.Insert(ctx, holderRun))
+	holderAttempt := &actions_model.ActionRunAttempt{
+		RepoID: 4, RunID: holderRun.ID, Attempt: 1,
+		Status: actions_model.StatusRunning, ConcurrencyGroup: group,
+	}
+	assert.NoError(t, db.Insert(ctx, holderAttempt))
+	_, err := db.Exec(ctx, "UPDATE `action_run` SET latest_attempt_id = ? WHERE id = ?", holderAttempt.ID, holderRun.ID)
+	assert.NoError(t, err)
+
+	// Blocked run: Blocked attempt in the same group, with one Blocked job that has
+	// no needs and no job-level concurrency. Without the run-level guard in
+	// checkJobsOfCurrentRunAttempt, the resolver would transition this job to Waiting.
+	blockedRun := &actions_model.ActionRun{
+		RepoID: 4, OwnerID: 1, TriggerUserID: 1,
+		WorkflowID: "test.yml", Index: 9912, Ref: "refs/heads/main",
+		Status: actions_model.StatusBlocked,
+	}
+	assert.NoError(t, db.Insert(ctx, blockedRun))
+	blockedAttempt := &actions_model.ActionRunAttempt{
+		RepoID: 4, RunID: blockedRun.ID, Attempt: 1,
+		Status: actions_model.StatusBlocked, ConcurrencyGroup: group,
+	}
+	assert.NoError(t, db.Insert(ctx, blockedAttempt))
+	_, err = db.Exec(ctx, "UPDATE `action_run` SET latest_attempt_id = ? WHERE id = ?", blockedAttempt.ID, blockedRun.ID)
+	assert.NoError(t, err)
+	blockedRun.LatestAttemptID = blockedAttempt.ID
+	blockedJob := &actions_model.ActionRunJob{
+		RunID: blockedRun.ID, RunAttemptID: blockedAttempt.ID, AttemptJobID: 1,
+		RepoID: 4, OwnerID: 1, JobID: "job1", Name: "job1",
+		Status: actions_model.StatusBlocked,
+		WorkflowPayload: []byte(`
+name: test
+on: push
+jobs:
+  job1:
+    runs-on: ubuntu-latest
+    steps:
+      - run: echo
+`),
+	}
+	assert.NoError(t, db.Insert(ctx, blockedJob))
+
+	_, updated, _, err := checkJobsOfCurrentRunAttempt(ctx, blockedRun)
+	assert.NoError(t, err)
+	assert.Empty(t, updated)
+
+	refreshed := unittest.AssertExistsAndLoadBean(t, &actions_model.ActionRunJob{ID: blockedJob.ID})
+	assert.Equal(t, actions_model.StatusBlocked, refreshed.Status)
+}