From cf0f25b79840100336d729ccd5dc6c7580f90c71 Mon Sep 17 00:00:00 2001 From: Zettat123 Date: Fri, 15 May 2026 02:39:18 -0600 Subject: [PATCH] fix(actions): deadlock between `PrepareRunAndInsert` and `UpdateTaskByState` (#37692) Fix #36234 ## Bug Logs show `PrepareRunAndInsert: InsertRun: Error 1213: Deadlock found`, which `handleWorkflows` silently swallows via `log.Error + continue`, so the triggered run is dropped. ## Root cause The path `UpdateRun -> UpdateRepoRunsNumbers` runs the following SQL inside every status-changing transaction: ```sql UPDATE repository SET num_action_runs = (SELECT count(*) FROM action_run WHERE repo_id = N), num_closed_action_runs = (SELECT count(*) FROM action_run WHERE repo_id = N AND status IN (...)) WHERE id = N; ``` On any DB that treats subqueries inside an UPDATE as locking reads, this statement takes locks in two steps: 1. The outer UPDATE acquires an X lock on `repository[id=N]` 2. The embedded SELECT subqueries are evaluated as locking reads, taking S locks on every `action_run` row matching `repo_id = N` Two such concurrent transactions form a cycle via `repository[N]`: | Tx | Holds | Wants | Blocked by | |---|---|---|---| | A: `PrepareRunAndInsert` (push trigger) | X on inserted `action_run` row R_A; X on `repository[N]` (outer UPDATE already through step 1) | S on `action_run` rows for repo N (subquery, step 2) | B's X lock on R_B | | B: `UpdateTaskByState` (runner callback) | X on `action_run` row R_B (from `UpdateRun`) | X on `repository[N]` (outer UPDATE, step 1) | A's X lock on `repository[N]` | | **Cycle** | A waits for R_B; B waits for `repository[N]` | | deadlock error -> `handleWorkflows` swallows -> run lost | PostgreSQL's MVCC reads do not take these locks and SQLite serializes writers, so the symptom only surfaces on MySQL/MSSQL. ## Fix Split `UpdateRepoRunsNumbers` into small SQLs to avoid locking reads and move it out of DB transactions. --------- Signed-off-by: wxiaoguang Co-authored-by: wxiaoguang Co-authored-by: Nicolas --- models/actions/run.go | 65 ++++++++++++++++--------------------- models/actions/run_test.go | 3 +- services/actions/cleanup.go | 2 ++ services/actions/notify.go | 4 +++ 4 files changed, 35 insertions(+), 39 deletions(-) diff --git a/models/actions/run.go b/models/actions/run.go index a44b0ff343..5d7f51ade3 100644 --- a/models/actions/run.go +++ b/models/actions/run.go @@ -7,7 +7,6 @@ import ( "context" "errors" "fmt" - "slices" "strings" "time" @@ -224,30 +223,34 @@ func (run *ActionRun) IsSchedule() bool { } // UpdateRepoRunsNumbers updates the number of runs and closed runs of a repository. -func UpdateRepoRunsNumbers(ctx context.Context, repo *repo_model.Repository) error { - _, err := db.GetEngine(ctx).ID(repo.ID). - NoAutoTime(). - Cols("num_action_runs", "num_closed_action_runs"). - SetExpr("num_action_runs", - builder.Select("count(*)").From("action_run"). - Where(builder.Eq{"repo_id": repo.ID}), - ). - SetExpr("num_closed_action_runs", - builder.Select("count(*)").From("action_run"). - Where(builder.Eq{ - "repo_id": repo.ID, - }.And( - builder.In("status", - StatusSuccess, - StatusFailure, - StatusCancelled, - StatusSkipped, - ), - ), - ), - ). - Update(repo) - return err +// Callers MUST invoke this from outside any transaction that has X-locked action_run rows for the same repo, otherwise, transaction deadlock +func UpdateRepoRunsNumbers(ctx context.Context, repoID int64) { + if db.InTransaction(ctx) { + setting.PanicInDevOrTesting("UpdateRepoRunsNumbers must not be called inside a transaction") + } + + e := db.GetEngine(ctx) + + numActionRuns, err := e.Where("repo_id = ?", repoID).Count(new(ActionRun)) + if err != nil { + log.Error("UpdateRepoRunsNumbers count num_action_runs for repo %d: %v", repoID, err) + return + } + + numClosedActionRuns, err := e.Where("repo_id = ?", repoID). + In("status", StatusSuccess, StatusFailure, StatusCancelled, StatusSkipped). + Count(new(ActionRun)) + if err != nil { + log.Error("UpdateRepoRunsNumbers count num_closed_action_runs for repo %d: %v", repoID, err) + return + } + + if _, err := e.ID(repoID).Cols("num_action_runs", "num_closed_action_runs").NoAutoTime().Update(&repo_model.Repository{ + NumActionRuns: int(numActionRuns), + NumClosedActionRuns: int(numClosedActionRuns), + }); err != nil { + log.Error("UpdateRepoRunsNumbers update repo %d: %v", repoID, err) + } } // CancelPreviousJobs cancels all previous jobs of the same repository, reference, workflow, and event. @@ -415,18 +418,6 @@ func UpdateRun(ctx context.Context, run *ActionRun, cols ...string) error { // It's impossible that the run is not found, since Gitea never deletes runs. } - if run.Status != 0 || slices.Contains(cols, "status") { - if run.RepoID == 0 { - setting.PanicInDevOrTesting("RepoID should not be 0") - } - if err = run.LoadRepo(ctx); err != nil { - return err - } - if err := UpdateRepoRunsNumbers(ctx, run.Repo); err != nil { - return err - } - } - return nil } diff --git a/models/actions/run_test.go b/models/actions/run_test.go index e82cbe84b5..6c8160beae 100644 --- a/models/actions/run_test.go +++ b/models/actions/run_test.go @@ -29,8 +29,7 @@ func TestUpdateRepoRunsNumbers(t *testing.T) { assert.Equal(t, 2, repo.NumClosedActionRuns) // now update will correct them, only num_actionr_runs and num_closed_action_runs should be updated - err = UpdateRepoRunsNumbers(t.Context(), repo) - assert.NoError(t, err) + UpdateRepoRunsNumbers(t.Context(), repo.ID) repo = unittest.AssertExistsAndLoadBean(t, &repo_model.Repository{ID: 4}) assert.Equal(t, 4, repo.NumActionRuns) assert.Equal(t, 3, repo.NumClosedActionRuns) diff --git a/services/actions/cleanup.go b/services/actions/cleanup.go index f223c98125..5f605cd265 100644 --- a/services/actions/cleanup.go +++ b/services/actions/cleanup.go @@ -250,6 +250,8 @@ func DeleteRun(ctx context.Context, run *actions_model.ActionRun) error { return err } + actions_model.UpdateRepoRunsNumbers(ctx, repoID) + // Delete files on storage for _, tas := range tasks { removeTaskLog(ctx, tas) diff --git a/services/actions/notify.go b/services/actions/notify.go index e8b05c9fec..21ca8c7a14 100644 --- a/services/actions/notify.go +++ b/services/actions/notify.go @@ -78,7 +78,11 @@ func NotifyWorkflowRunStatusUpdate(ctx context.Context, run *actions_model.Actio } triggerUser = attempt.TriggerUser } + notify_service.WorkflowRunStatusUpdate(ctx, run.Repo, triggerUser, run) + + // Recomputes the repository's num_action_runs / num_closed_action_runs counters since the run's status changed + actions_model.UpdateRepoRunsNumbers(ctx, run.RepoID) } // NotifyWorkflowJobsStatusUpdate notifies status updates for jobs without task.