From bfe9fa0f8e937a4172d6fabfa209633c9f160698 Mon Sep 17 00:00:00 2001 From: James Fotherby Date: Mon, 19 Jan 2026 21:59:43 -0800 Subject: [PATCH] fix(socket): remove stale socket files (#37378) Problem: Crashed Nvim leaves socket files. Subsequent starts fail with "address already in use". Solution: On bind failure, test socket liveness via connect(). Remove if stale, retry bind(). Fixes #36581 --- src/nvim/event/socket.c | 65 ++++++++++++++++++++++++++++ test/functional/core/server_spec.lua | 45 +++++++++++++++++++ 2 files changed, 110 insertions(+) diff --git a/src/nvim/event/socket.c b/src/nvim/event/socket.c index be20a249f7..ffb5bcf559 100644 --- a/src/nvim/event/socket.c +++ b/src/nvim/event/socket.c @@ -77,6 +77,37 @@ int socket_watcher_init(Loop *loop, SocketWatcher *watcher, const char *endpoint return 0; } +/// Callback for closing probe connection handle +static void probe_close_cb(uv_handle_t *handle) +{ + bool *closed = handle->data; + *closed = true; +} + +/// Check if a socket is alive by attempting to connect to it. +/// @param loop Event loop +/// @param addr Socket address to probe +/// @return true if socket is alive (connection succeeded), false otherwise +static bool socket_alive(Loop *loop, const char *addr) +{ + RStream stream; + const char *error = NULL; + + // Try to connect with a 500ms timeout (fast failure for dead sockets) + bool connected = socket_connect(loop, &stream, false, addr, 500, &error); + if (!connected) { + return false; + } + + // Connection succeeded - socket is alive. Close the probe connection properly. + bool closed = false; + stream.s.uv.pipe.data = &closed; + uv_close((uv_handle_t *)&stream.s.uv.pipe, probe_close_cb); + LOOP_PROCESS_EVENTS_UNTIL(&main_loop, NULL, -1, closed); + + return true; +} + int socket_watcher_start(SocketWatcher *watcher, int backlog, socket_cb cb) FUNC_ATTR_NONNULL_ALL { @@ -112,6 +143,40 @@ int socket_watcher_start(SocketWatcher *watcher, int backlog, socket_cb cb) uv_freeaddrinfo(watcher->uv.tcp.addrinfo); } else { result = uv_pipe_bind(&watcher->uv.pipe.handle, watcher->addr); + + // If bind failed with EACCES/EADDRINUSE, check if socket is stale + if (result == UV_EACCES || result == UV_EADDRINUSE) { + Loop *loop = watcher->stream->loop->data; + + if (!socket_alive(loop, watcher->addr)) { + // Socket exists but is dead - remove it + ILOG("Removing stale socket: %s", watcher->addr); + int rm_result = os_remove(watcher->addr); + + if (rm_result != 0) { + WLOG("Failed to remove stale socket %s: %s", + watcher->addr, uv_strerror(rm_result)); + } else { + // Close and reinit the pipe handle before retrying bind + uv_loop_t *uv_loop = watcher->uv.pipe.handle.loop; + bool closed = false; + watcher->uv.pipe.handle.data = &closed; + uv_close((uv_handle_t *)&watcher->uv.pipe.handle, probe_close_cb); + LOOP_PROCESS_EVENTS_UNTIL(&main_loop, NULL, -1, closed); + + uv_pipe_init(uv_loop, &watcher->uv.pipe.handle, 0); + watcher->stream = (uv_stream_t *)(&watcher->uv.pipe.handle); + watcher->stream->data = watcher; + + // Retry bind with fresh handle + result = uv_pipe_bind(&watcher->uv.pipe.handle, watcher->addr); + } + } else { + // Socket is alive - this is a real error + ELOG("Socket already in use by another Nvim instance: %s", watcher->addr); + } + } + if (result == 0) { result = uv_listen(watcher->stream, backlog, connection_cb); } diff --git a/test/functional/core/server_spec.lua b/test/functional/core/server_spec.lua index 113209cad5..1aae0cc823 100644 --- a/test/functional/core/server_spec.lua +++ b/test/functional/core/server_spec.lua @@ -218,6 +218,51 @@ describe('server', function() eq(true, old_servs_num < #new_servs) client:close() end) + + it('removes stale socket files automatically #26053', function() + if is_os('win') then + -- Windows named pipes are ephemeral kernel objects that are automatically + -- cleaned up when the process terminates. Unix domain sockets persist as + -- files on the filesystem and can become stale after crashes. + return + end + + clear() + clear_serverlist() + local socket_path = './Xtest-stale-socket' + + -- Create stale socket file (simulate crash) + vim.uv.fs_close(vim.uv.fs_open(socket_path, 'w', 438)) + + -- serverstart() should detect and remove stale socket + eq(socket_path, fn.serverstart(socket_path)) + fn.serverstop(socket_path) + + -- Same test with --listen flag + vim.uv.fs_close(vim.uv.fs_open(socket_path, 'w', 438)) + clear({ args = { '--listen', socket_path } }) + eq(socket_path, api.nvim_get_vvar('servername')) + fn.serverstop(socket_path) + end) + + it('does not remove live sockets #26053', function() + if is_os('win') then + return + end + + clear() + local socket_path = './Xtest-live-socket' + eq(socket_path, fn.serverstart(socket_path)) + + -- Second instance should fail without removing live socket + local result = n.exec_lua(function(sock) + return vim.system({ vim.v.progpath, '--headless', '--listen', sock }, { text = true }):wait() + end, socket_path) + + neq(0, result.code) + matches('Failed.*listen', result.stderr) + fn.serverstop(socket_path) + end) end) describe('startup --listen', function()