From 198ef500d2e381f811ee9217bfb63f5ce627431e Mon Sep 17 00:00:00 2001 From: silverwind Date: Mon, 4 May 2026 13:10:42 +0200 Subject: [PATCH] Don't unblock run-level-concurrency-blocked runs in the resolver (#37461) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fixes #37446. The job-status resolver in `checkJobsOfCurrentRunAttempt` only considered `needs` and job-level concurrency when transitioning jobs out of `Blocked`. When something drove the resolver against a run blocked solely by workflow-level concurrency — for example, a sibling run in the same group entering the queue and triggering `EmitJobsIfReadyByRun` — the run's job silently became `Waiting` while another run still held the concurrency group, and the runner could pick it up, defeating the concurrency guarantee. The fix bails out of the resolver when the run's latest attempt is still blocked by run-level concurrency. `checkRunConcurrency` re-evaluates when the holding run finishes. Covered by a unit test (`Test_checkJobsOfCurrentRunAttempt_RunLevelConcurrencyKeepsJobsBlocked` in `services/actions/job_emitter_test.go`) that sets up a Running holder attempt and a Blocked sibling attempt in the same concurrency group directly in the DB, calls `checkJobsOfCurrentRunAttempt`, and asserts the blocked job stays `Blocked`. Fails on master, passes with the fix. --- This PR was written with the help of Claude Opus 4.7 --------- Co-authored-by: Claude (Opus 4.7) --- services/actions/job_emitter.go | 18 ++++++++ services/actions/job_emitter_test.go | 65 ++++++++++++++++++++++++++++ 2 files changed, 83 insertions(+) diff --git a/services/actions/job_emitter.go b/services/actions/job_emitter.go index 489b36a3a7..b81ec9fe6c 100644 --- a/services/actions/job_emitter.go +++ b/services/actions/job_emitter.go @@ -228,6 +228,24 @@ func checkJobsOfCurrentRunAttempt(ctx context.Context, run *actions_model.Action if err != nil { return nil, nil, nil, err } + // The resolver below only considers needs and job-level concurrency, so a run blocked + // solely by run-level concurrency would have its jobs unblocked here. checkRunConcurrency + // re-evaluates when the holding run finishes. + if run.Status.IsBlocked() { + attempt, has, err := run.GetLatestAttempt(ctx) + if err != nil { + return nil, nil, nil, fmt.Errorf("GetLatestAttempt: %w", err) + } + if has { + shouldBlock, err := shouldBlockRunByConcurrency(ctx, attempt) + if err != nil { + return nil, nil, nil, fmt.Errorf("shouldBlockRunByConcurrency: %w", err) + } + if shouldBlock { + return jobs, nil, nil, nil + } + } + } vars, err := actions_model.GetVariablesOfRun(ctx, run) if err != nil { return nil, nil, nil, err diff --git a/services/actions/job_emitter_test.go b/services/actions/job_emitter_test.go index 11998e01b2..9a40927e06 100644 --- a/services/actions/job_emitter_test.go +++ b/services/actions/job_emitter_test.go @@ -228,3 +228,68 @@ func Test_checkRunConcurrency_NoDuplicateConcurrencyGroupCheck(t *testing.T) { assert.Equal(t, jobBBlocked.ID, jobs[0].ID) } } + +// Test_checkJobsOfCurrentRunAttempt_RunLevelConcurrencyKeepsJobsBlocked verifies that +// the resolver does not transition a job out of Blocked while another run still holds +// the workflow-level concurrency group. Regression for #37446. +func Test_checkJobsOfCurrentRunAttempt_RunLevelConcurrencyKeepsJobsBlocked(t *testing.T) { + assert.NoError(t, unittest.PrepareTestDatabase()) + ctx := t.Context() + + const group = "test-run-level-concurrency-keeps-blocked" + + // Holder run: Running attempt in the concurrency group. + holderRun := &actions_model.ActionRun{ + RepoID: 4, OwnerID: 1, TriggerUserID: 1, + WorkflowID: "test.yml", Index: 9911, Ref: "refs/heads/main", + Status: actions_model.StatusRunning, + } + assert.NoError(t, db.Insert(ctx, holderRun)) + holderAttempt := &actions_model.ActionRunAttempt{ + RepoID: 4, RunID: holderRun.ID, Attempt: 1, + Status: actions_model.StatusRunning, ConcurrencyGroup: group, + } + assert.NoError(t, db.Insert(ctx, holderAttempt)) + _, err := db.Exec(ctx, "UPDATE `action_run` SET latest_attempt_id = ? WHERE id = ?", holderAttempt.ID, holderRun.ID) + assert.NoError(t, err) + + // Blocked run: Blocked attempt in the same group, with one Blocked job that has + // no needs and no job-level concurrency. Without the run-level guard in + // checkJobsOfCurrentRunAttempt, the resolver would transition this job to Waiting. + blockedRun := &actions_model.ActionRun{ + RepoID: 4, OwnerID: 1, TriggerUserID: 1, + WorkflowID: "test.yml", Index: 9912, Ref: "refs/heads/main", + Status: actions_model.StatusBlocked, + } + assert.NoError(t, db.Insert(ctx, blockedRun)) + blockedAttempt := &actions_model.ActionRunAttempt{ + RepoID: 4, RunID: blockedRun.ID, Attempt: 1, + Status: actions_model.StatusBlocked, ConcurrencyGroup: group, + } + assert.NoError(t, db.Insert(ctx, blockedAttempt)) + _, err = db.Exec(ctx, "UPDATE `action_run` SET latest_attempt_id = ? WHERE id = ?", blockedAttempt.ID, blockedRun.ID) + assert.NoError(t, err) + blockedRun.LatestAttemptID = blockedAttempt.ID + blockedJob := &actions_model.ActionRunJob{ + RunID: blockedRun.ID, RunAttemptID: blockedAttempt.ID, AttemptJobID: 1, + RepoID: 4, OwnerID: 1, JobID: "job1", Name: "job1", + Status: actions_model.StatusBlocked, + WorkflowPayload: []byte(` +name: test +on: push +jobs: + job1: + runs-on: ubuntu-latest + steps: + - run: echo +`), + } + assert.NoError(t, db.Insert(ctx, blockedJob)) + + _, updated, _, err := checkJobsOfCurrentRunAttempt(ctx, blockedRun) + assert.NoError(t, err) + assert.Empty(t, updated) + + refreshed := unittest.AssertExistsAndLoadBean(t, &actions_model.ActionRunJob{ID: blockedJob.ID}) + assert.Equal(t, actions_model.StatusBlocked, refreshed.Status) +}