apprt/gtk: fix audio-bell GStreamer thread leak (reuse one MediaFile per surface) (#12815)

## Problem

Every audio bell calls `gtk.MediaFile.newForFilename`, which spins up a
full GStreamer pipeline. The GTK4 GStreamer backend's GL sink starts
`gstglcontext`/`gldisplay-event` threads that are **never joined on
teardown**, so allocating a fresh `MediaFile` per ring leaks a pipeline
and ~4 threads on every bell. The old `notify::ended -> unref` handler
discarded the pipeline but did not (and could not) join those threads.

A long-running instance accumulated **705 threads over ~4h** of normal
use.

## Fix

Cache one `MediaFile` per surface (`priv.bell_media`), rebuilt only when
`bell-audio-path` changes and unref'd on `dispose`. Each bell now
replays the same pipeline via `seek(0)` + `play()` instead of creating a
new one. `seek(0)` is required so an ended stream plays again (cf.
#8957).

## Verification

Confirmed on a real running instance with the fix: GStreamer's global
element counter only ever reached `oggdemux4` over an hour of use (one
pipeline per bell-ringing surface, reused for every subsequent bell) and
the process thread count stayed flat — versus the per-bell growth
before.

## Commits

1. **The fix** — reuse one MediaFile per surface.
2. **Unit regression test** — guards the `bellMediaFile` reuse contract
(same path → same object, changed path → rebuild). Runs in the existing
`test-gtk` CI job; needs no display.
3. **End-to-end CI job** *(kept separate so it can be dropped
independently)* — `test/bell-leak.sh` + a `test-gtk-bell-leak` workflow
job that runs ghostty headless (Xvfb + software GL), rings 120 bells,
and fails if the thread count grows per-bell. It's heavier and more
environment-sensitive (needs Xvfb/Mesa/GStreamer on the runner), so it's
isolated for easy review/removal.

🤖 Generated with [Claude Code](https://claude.com/claude-code)
This commit is contained in:
Jeffrey C. Ollie
2026-05-26 11:20:02 -05:00
committed by GitHub
3 changed files with 202 additions and 20 deletions

View File

@@ -281,4 +281,113 @@ in {
server.wait_for_file("${user.home}/.terminfo/x/xterm-ghostty", timeout=30)
'';
};
# Regression test for the GTK audio-bell GStreamer thread leak. Each audio
# bell used to allocate a fresh gtk.MediaFile (and thus a GStreamer pipeline
# whose GL sink spawns gstglcontext/gldisplay-event threads that are never
# joined), leaking ~4 threads per ring; the fix reuses one MediaFile per
# surface. This rings many bells and asserts the GUI process thread count
# stays bounded. Runs under GNOME on Wayland so it exercises the real path.
bell-leak-check-gnome = mkTestGnome {
name = "bell-leak-check-gnome";
settings = {
# The VM has no GPU, so GNOME and Ghostty render via llvmpipe. Give the
# guest enough cores/RAM that software GL can bring up Ghostty's window
# before the +new-window D-Bus activation times out, and force clean
# software GL so mesa doesn't stall probing for absent hardware.
virtualisation.cores = 4;
virtualisation.memorySize = 4096;
environment.sessionVariables = {
LIBGL_ALWAYS_SOFTWARE = "1";
GALLIUM_DRIVER = "llvmpipe";
};
home-manager.users.ghostty = {
xdg.configFile = {
"ghostty/config".text = ''
bell-features = audio
bell-audio-path = ${pkgs.sound-theme-freedesktop}/share/sounds/freedesktop/stereo/bell.oga
bell-audio-volume = 0
'';
};
};
};
testScript = {nodes, ...}: let
user = nodes.machine.users.users.ghostty;
bus_path = "/run/user/${toString user.uid}/bus";
bus = "DBUS_SESSION_BUS_ADDRESS=unix:path=${bus_path}";
gdbus = "${bus} gdbus";
ghostty = "${bus} ghostty";
su = command: "su - ${user.name} -c '${command}'";
gseval = "call --session -d org.gnome.Shell -o /org/gnome/Shell -m org.gnome.Shell.Eval";
wm_class = su "${gdbus} ${gseval} global.display.focus_window.wm_class";
# Emits N BELs >100ms apart (which clears the bell rate-limit), then holds
# so the window (and its audio pipeline) stays alive while we sample. Run
# by typing its path into the open window; written as a script to avoid
# shell-escaping the BEL byte through the test driver.
ringBells = pkgs.writeShellScript "ring-bells" ''
for _ in $(seq 100); do printf '\a'; sleep 0.12; done
sleep 60
'';
in ''
# Thread count of the ghostty GUI process: the ghostty process with the
# most threads. The CLI also spawns 1-thread launcher/helper stubs (and
# this very command matches the pgrep), but those are filtered by the max.
def ghostty_threads():
out = machine.succeed(
"max=0; "
"for p in $(pgrep -f ghostty); do "
" n=$(ls /proc/$p/task 2>/dev/null | wc -l); "
" [ \"$n\" -gt \"$max\" ] && max=$n; "
"done; "
"echo $max"
).strip()
return int(out)
def window_open():
status, _ = machine.execute("${wm_class} | grep -q 'com.mitchellh.ghostty-debug'")
return status == 0
with subtest("boot and open a keep-alive ghostty window"):
start_all()
machine.wait_for_x()
machine.wait_for_file("${bus_path}")
machine.systemctl("enable app-com.mitchellh.ghostty-debug.service", user="${user.name}")
# Under software GL the +new-window D-Bus activation can exceed its
# client-side timeout even though the window still comes up, so we
# tolerate a failed call and (re)nudge until the window appears.
for _ in range(6):
machine.execute("${su "${ghostty} +new-window"}")
if window_open():
break
machine.sleep(5)
assert window_open(), "ghostty window never appeared"
machine.sleep(2)
with subtest("ring 100 bells and assert the thread count stays bounded"):
baseline = ghostty_threads()
# Ring the bells by running the script inside the focused window (type
# its path + Enter). A separate `ghostty -e` process can't open the
# display from the bare su environment, so we drive the open window.
machine.send_chars("${ringBells}\n")
# 100 bells * 0.12s + settle, within the script's trailing hold so the
# window (and its audio pipeline) is still alive when we sample.
machine.sleep(22)
final = ghostty_threads()
growth = final - baseline
print(f"bell-leak: baseline={baseline} final={final} growth={growth}")
# Pre-fix grows ~4 threads/bell (~+400 over 100 bells); the fix adds
# only one pipeline's worth of threads. 40 sits well clear of both.
assert growth <= 40, (
f"thread count grew by {growth} over 100 bells "
f"(baseline={baseline}, final={final}): audio-bell pipeline leak regressed"
)
'';
};
}

View File

@@ -674,6 +674,12 @@ pub const Surface = extern struct {
// false by a parent widget.
bell_ringing: bool = false,
// The audio bell's MediaFile, reused across bells so we don't leak a
// GStreamer pipeline (and its GL threads) on every ring. Built lazily
// on the first audio bell and rebuilt when `bell-audio-path` changes;
// unref'd on dispose. See ringBell and media.zig.
bell_media: ?*gtk.MediaFile = null,
/// True if this surface is in an error state. This is currently
/// a simple boolean with no additional information on WHAT the
/// error state is, because we don't yet need it or use it. For now,
@@ -1854,6 +1860,11 @@ pub const Surface = extern struct {
priv.config = null;
}
if (priv.bell_media) |v| {
v.unref();
priv.bell_media = null;
}
if (priv.vadj_signal_group) |group| {
group.setTarget(null);
group.as(gobject.Object).unref();
@@ -2486,8 +2497,15 @@ pub const Surface = extern struct {
1.0,
);
const media_file = media.fromFilename(path) orelse break :audio;
media.playMediaFile(media_file, volume, required);
// Reuse one MediaFile per surface (rebuilt only when the path
// changes) so each bell replays the same pipeline instead of
// leaking a fresh one. Assign unconditionally: bellMediaFile frees
// any stale MediaFile and returns the current slot value (possibly
// null if the path is now inaccessible), so priv.bell_media never
// dangles.
priv.bell_media = media.bellMediaFile(priv.bell_media, path, required);
const media_file = priv.bell_media orelse break :audio;
media.playBell(media_file, volume);
}
}

View File

@@ -44,9 +44,38 @@ pub fn fromResource(path: [:0]const u8) ?*gtk.MediaFile {
return gtk.MediaFile.newForResource(path);
}
pub fn playMediaFile(media_file: *gtk.MediaFile, volume: f64, required: bool) void {
// If the audio file is marked as required, we'll emit an error if
// there was a problem playing it. Otherwise there will be silence.
/// Get-or-create a reusable bell MediaFile targeting `path`.
///
/// `current` is the surface's currently-cached MediaFile (or null). If it
/// already targets `path` it is returned unchanged; otherwise it is unref'd and
/// a fresh MediaFile is built for `path`. Returns null (after freeing `current`)
/// if `path` is inaccessible, leaving the caller's slot empty.
///
/// Reusing one MediaFile per surface is what prevents the GStreamer pipeline
/// leak: `gtk.MediaFile.newForFilename` spins up a full pipeline (and, via the
/// GTK4 GStreamer backend's GL sink, gstglcontext/gldisplay-event threads) that
/// is never torn down on the happy path, so allocating one per bell leaked a
/// pipeline + its threads on every ring. See the caller in surface.zig.
pub fn bellMediaFile(
current: ?*gtk.MediaFile,
path: [:0]const u8,
required: bool,
) ?*gtk.MediaFile {
if (current) |media_file| {
if (isForPath(media_file, path)) return media_file;
media_file.unref();
}
const media_file = fromFilename(path) orelse return null;
// If the audio file is marked as required, we'll emit an error if there
// was a problem playing it. Otherwise there will be silence. We connect
// this once, here, because the MediaFile is reused across bells.
//
// NOTE: we intentionally do NOT connect notify::ended to unref. The
// MediaFile is owned by the surface and replayed via `seek(0)` for every
// bell; unref'ing on `ended` is precisely what previously discarded (and
// leaked) a pipeline per ring.
if (required) {
_ = gobject.Object.signals.notify.connect(
media_file,
@@ -57,21 +86,27 @@ pub fn playMediaFile(media_file: *gtk.MediaFile, volume: f64, required: bool) vo
);
}
// Watch for the "ended" signal so that we can clean up after
// ourselves.
_ = gobject.Object.signals.notify.connect(
media_file,
?*anyopaque,
mediaFileEnded,
null,
.{ .detail = "ended" },
);
return media_file;
}
/// (Re)play `media_file` at `volume`. `seek(0)` rewinds first so that a
/// previously-ended stream plays again; without it playback only ever happens
/// once (see #8957). Safe on a freshly-created stream as well.
pub fn playBell(media_file: *gtk.MediaFile, volume: f64) void {
const media_stream = media_file.as(gtk.MediaStream);
media_stream.setVolume(volume);
media_stream.seek(0);
media_stream.play();
}
/// Whether `media_file` was created for `path`.
fn isForPath(media_file: *gtk.MediaFile, path: [:0]const u8) bool {
const file = media_file.getFile() orelse return false;
const cur = file.getPath() orelse return false;
defer glib.free(cur);
return std.mem.eql(u8, std.mem.span(cur), path);
}
fn mediaFileError(
media_file: *gtk.MediaFile,
_: *gobject.ParamSpec,
@@ -93,10 +128,30 @@ fn mediaFileError(
});
}
fn mediaFileEnded(
media_file: *gtk.MediaFile,
_: *gobject.ParamSpec,
_: ?*anyopaque,
) callconv(.c) void {
media_file.unref();
test "bellMediaFile reuses one MediaFile per path" {
// Regression guard for the audio-bell thread leak: each bell must replay a
// single cached MediaFile, not allocate a fresh GStreamer pipeline (which
// leaked gstglcontext/gldisplay-event threads) per ring. We assert the
// reuse contract of bellMediaFile directly; this needs no display and no
// playback (MediaFile is lazy), only that the path comparison drives reuse.
const testing = std.testing;
// The files need not exist: MediaFile only records the path until played.
const path_a: [:0]const u8 = "/tmp/ghostty-bell-test-a.oga";
const path_b: [:0]const u8 = "/tmp/ghostty-bell-test-b.oga";
var current = bellMediaFile(null, path_a, false) orelse return error.SkipZigTest;
const first = current;
try testing.expect(isForPath(current, path_a));
// Same path => identical object (the leak regression is rebuilding here).
current = bellMediaFile(current, path_a, false).?;
try testing.expectEqual(first, current);
// Changed path => rebuilt object targeting the new path (old one freed).
current = bellMediaFile(current, path_b, false) orelse return error.SkipZigTest;
try testing.expect(isForPath(current, path_b));
try testing.expect(!isForPath(current, path_a));
current.unref();
}