Files
gitea/services/actions/notify.go
Zettat123 cf0f25b798 fix(actions): deadlock between PrepareRunAndInsert and UpdateTaskByState (#37692)
Fix #36234

## Bug

Logs show `PrepareRunAndInsert: InsertRun: Error 1213: Deadlock found`,
which `handleWorkflows` silently swallows via `log.Error + continue`, so
the triggered run is dropped.

## Root cause

The path `UpdateRun -> UpdateRepoRunsNumbers` runs the following SQL
inside every status-changing transaction:

```sql
UPDATE repository
SET num_action_runs        = (SELECT count(*) FROM action_run WHERE repo_id = N),
    num_closed_action_runs = (SELECT count(*) FROM action_run WHERE repo_id = N AND status IN (...))
WHERE id = N;
```

On any DB that treats subqueries inside an UPDATE as locking reads, this
statement takes locks in two steps:

1. The outer UPDATE acquires an X lock on `repository[id=N]`
2. The embedded SELECT subqueries are evaluated as locking reads, taking
S locks on every `action_run` row matching `repo_id = N`

Two such concurrent transactions form a cycle via `repository[N]`:

| Tx | Holds | Wants | Blocked by |
|---|---|---|---|
| A: `PrepareRunAndInsert` (push trigger) | X on inserted `action_run`
row R_A; X on `repository[N]` (outer UPDATE already through step 1) | S
on `action_run` rows for repo N (subquery, step 2) | B's X lock on R_B |
| B: `UpdateTaskByState` (runner callback) | X on `action_run` row R_B
(from `UpdateRun`) | X on `repository[N]` (outer UPDATE, step 1) | A's X
lock on `repository[N]` |
| **Cycle** | A waits for R_B; B waits for `repository[N]` | | deadlock
error -> `handleWorkflows` swallows -> run lost |


PostgreSQL's MVCC reads do not take these locks and SQLite serializes
writers, so the symptom only surfaces on MySQL/MSSQL.

## Fix

Split `UpdateRepoRunsNumbers` into small SQLs to avoid locking reads and
move it out of DB transactions.

---------

Signed-off-by: wxiaoguang <wxiaoguang@gmail.com>
Co-authored-by: wxiaoguang <wxiaoguang@gmail.com>
Co-authored-by: Nicolas <bircni@icloud.com>
2026-05-15 08:39:18 +00:00

149 lines
5.1 KiB
Go

// Copyright 2026 The Gitea Authors. All rights reserved.
// SPDX-License-Identifier: MIT
package actions
import (
"context"
actions_model "code.gitea.io/gitea/models/actions"
"code.gitea.io/gitea/modules/log"
notify_service "code.gitea.io/gitea/services/notify"
)
// NotifyWorkflowJobsAndRunsStatusUpdate notifies status changes for a batch of jobs and the runs they affect.
// Use it when a workflow operation updates multiple jobs and runs.
func NotifyWorkflowJobsAndRunsStatusUpdate(ctx context.Context, jobs []*actions_model.ActionRunJob) {
if len(jobs) == 0 {
return
}
// The input jobs may belong to different runs, so track each affected run.
runs := make(map[int64]*actions_model.ActionRun, len(jobs))
jobsByRunID := make(map[int64][]*actions_model.ActionRunJob)
for _, job := range jobs {
if err := job.LoadAttributes(ctx); err != nil {
log.Error("Failed to load job attributes: %v", err)
continue
}
CreateCommitStatusForRunJobs(ctx, job.Run, job)
if _, ok := runs[job.RunID]; !ok {
runs[job.RunID] = job.Run
}
if _, ok := jobsByRunID[job.RunID]; !ok {
jobsByRunID[job.RunID] = make([]*actions_model.ActionRunJob, 0)
}
jobsByRunID[job.RunID] = append(jobsByRunID[job.RunID], job)
}
for _, run := range runs {
NotifyWorkflowRunStatusUpdate(ctx, run)
}
for _, jobs := range jobsByRunID {
NotifyWorkflowJobsStatusUpdate(ctx, jobs...)
}
}
// NotifyWorkflowRunStatusUpdateWithReload reloads the run before notifying its status update.
// Use it when only repo/run IDs are available or when the in-memory run may be stale after job updates.
func NotifyWorkflowRunStatusUpdateWithReload(ctx context.Context, repoID, runID int64) {
run, err := actions_model.GetRunByRepoAndID(ctx, repoID, runID)
if err != nil {
log.Error("GetRunByRepoAndID: %v", err)
return
}
NotifyWorkflowRunStatusUpdate(ctx, run)
}
// NotifyWorkflowRunStatusUpdate notifies a run status update using the latest attempt trigger user when available.
// Use it for run-level notifications when the caller already has the run model loaded.
func NotifyWorkflowRunStatusUpdate(ctx context.Context, run *actions_model.ActionRun) {
if err := run.LoadAttributes(ctx); err != nil {
log.Error("run.LoadAttributes: %v", err)
return
}
triggerUser := run.TriggerUser
if run.LatestAttemptID > 0 {
attempt, err := actions_model.GetRunAttemptByRepoAndID(ctx, run.RepoID, run.LatestAttemptID)
if err != nil {
log.Error("GetRunAttemptByRepoAndID: %v", err)
return
}
if err := attempt.LoadAttributes(ctx); err != nil {
log.Error("attempt.LoadAttributes: %v", err)
return
}
triggerUser = attempt.TriggerUser
}
notify_service.WorkflowRunStatusUpdate(ctx, run.Repo, triggerUser, run)
// Recomputes the repository's num_action_runs / num_closed_action_runs counters since the run's status changed
actions_model.UpdateRepoRunsNumbers(ctx, run.RepoID)
}
// NotifyWorkflowJobsStatusUpdate notifies status updates for jobs without task.
// Use it for batch or single-job notifications after state changes.
func NotifyWorkflowJobsStatusUpdate(ctx context.Context, jobs ...*actions_model.ActionRunJob) {
jobsByAttempt := make(map[int64][]*actions_model.ActionRunJob)
for _, job := range jobs {
if _, ok := jobsByAttempt[job.RunAttemptID]; !ok {
jobsByAttempt[job.RunAttemptID] = make([]*actions_model.ActionRunJob, 0)
}
jobsByAttempt[job.RunAttemptID] = append(jobsByAttempt[job.RunAttemptID], job)
}
for attemptID, js := range jobsByAttempt {
if attemptID == 0 {
for _, job := range js {
if err := job.LoadAttributes(ctx); err != nil {
log.Error("job.LoadAttributes: %v", err)
continue
}
notify_service.WorkflowJobStatusUpdate(ctx, job.Run.Repo, job.Run.TriggerUser, job, nil)
}
continue
}
attempt, err := actions_model.GetRunAttemptByRepoAndID(ctx, js[0].RepoID, attemptID)
if err != nil {
log.Error("GetRunAttemptByRepoAndID: %v", err)
continue
}
if err := attempt.LoadAttributes(ctx); err != nil {
log.Error("attempt.LoadAttributes: %v", err)
continue
}
for _, job := range js {
notify_service.WorkflowJobStatusUpdate(ctx, attempt.Run.Repo, attempt.TriggerUser, job, nil)
}
}
}
// NotifyWorkflowJobStatusUpdateWithTask notifies a single job status update when a concrete task is available.
// Use it for runner/task lifecycle callbacks so the notification includes the originating task context.
func NotifyWorkflowJobStatusUpdateWithTask(ctx context.Context, job *actions_model.ActionRunJob, task *actions_model.ActionTask) {
if job.RunAttemptID == 0 {
if err := job.LoadAttributes(ctx); err != nil {
log.Error("job.LoadAttributes: %v", err)
return
}
notify_service.WorkflowJobStatusUpdate(ctx, job.Run.Repo, job.Run.TriggerUser, job, task)
return
}
attempt, err := actions_model.GetRunAttemptByRepoAndID(ctx, job.RepoID, job.RunAttemptID)
if err != nil {
log.Error("GetRunAttemptByRepoAndID: %v", err)
return
}
if err := attempt.LoadAttributes(ctx); err != nil {
log.Error("attempt.LoadAttributes: %v", err)
return
}
notify_service.WorkflowJobStatusUpdate(ctx, attempt.Run.Repo, attempt.TriggerUser, job, task)
}