fix(socket): remove stale socket files (#37378)

Problem:
Crashed Nvim leaves socket files. Subsequent starts fail with "address already in use".

Solution:
On bind failure, test socket liveness via connect(). Remove if stale, retry bind().

Fixes #36581
This commit is contained in:
James Fotherby
2026-01-19 21:59:43 -08:00
committed by GitHub
parent 0712a9c87b
commit bfe9fa0f8e
2 changed files with 110 additions and 0 deletions

View File

@@ -77,6 +77,37 @@ int socket_watcher_init(Loop *loop, SocketWatcher *watcher, const char *endpoint
return 0;
}
/// Callback for closing probe connection handle
static void probe_close_cb(uv_handle_t *handle)
{
bool *closed = handle->data;
*closed = true;
}
/// Check if a socket is alive by attempting to connect to it.
/// @param loop Event loop
/// @param addr Socket address to probe
/// @return true if socket is alive (connection succeeded), false otherwise
static bool socket_alive(Loop *loop, const char *addr)
{
RStream stream;
const char *error = NULL;
// Try to connect with a 500ms timeout (fast failure for dead sockets)
bool connected = socket_connect(loop, &stream, false, addr, 500, &error);
if (!connected) {
return false;
}
// Connection succeeded - socket is alive. Close the probe connection properly.
bool closed = false;
stream.s.uv.pipe.data = &closed;
uv_close((uv_handle_t *)&stream.s.uv.pipe, probe_close_cb);
LOOP_PROCESS_EVENTS_UNTIL(&main_loop, NULL, -1, closed);
return true;
}
int socket_watcher_start(SocketWatcher *watcher, int backlog, socket_cb cb)
FUNC_ATTR_NONNULL_ALL
{
@@ -112,6 +143,40 @@ int socket_watcher_start(SocketWatcher *watcher, int backlog, socket_cb cb)
uv_freeaddrinfo(watcher->uv.tcp.addrinfo);
} else {
result = uv_pipe_bind(&watcher->uv.pipe.handle, watcher->addr);
// If bind failed with EACCES/EADDRINUSE, check if socket is stale
if (result == UV_EACCES || result == UV_EADDRINUSE) {
Loop *loop = watcher->stream->loop->data;
if (!socket_alive(loop, watcher->addr)) {
// Socket exists but is dead - remove it
ILOG("Removing stale socket: %s", watcher->addr);
int rm_result = os_remove(watcher->addr);
if (rm_result != 0) {
WLOG("Failed to remove stale socket %s: %s",
watcher->addr, uv_strerror(rm_result));
} else {
// Close and reinit the pipe handle before retrying bind
uv_loop_t *uv_loop = watcher->uv.pipe.handle.loop;
bool closed = false;
watcher->uv.pipe.handle.data = &closed;
uv_close((uv_handle_t *)&watcher->uv.pipe.handle, probe_close_cb);
LOOP_PROCESS_EVENTS_UNTIL(&main_loop, NULL, -1, closed);
uv_pipe_init(uv_loop, &watcher->uv.pipe.handle, 0);
watcher->stream = (uv_stream_t *)(&watcher->uv.pipe.handle);
watcher->stream->data = watcher;
// Retry bind with fresh handle
result = uv_pipe_bind(&watcher->uv.pipe.handle, watcher->addr);
}
} else {
// Socket is alive - this is a real error
ELOG("Socket already in use by another Nvim instance: %s", watcher->addr);
}
}
if (result == 0) {
result = uv_listen(watcher->stream, backlog, connection_cb);
}

View File

@@ -218,6 +218,51 @@ describe('server', function()
eq(true, old_servs_num < #new_servs)
client:close()
end)
it('removes stale socket files automatically #26053', function()
if is_os('win') then
-- Windows named pipes are ephemeral kernel objects that are automatically
-- cleaned up when the process terminates. Unix domain sockets persist as
-- files on the filesystem and can become stale after crashes.
return
end
clear()
clear_serverlist()
local socket_path = './Xtest-stale-socket'
-- Create stale socket file (simulate crash)
vim.uv.fs_close(vim.uv.fs_open(socket_path, 'w', 438))
-- serverstart() should detect and remove stale socket
eq(socket_path, fn.serverstart(socket_path))
fn.serverstop(socket_path)
-- Same test with --listen flag
vim.uv.fs_close(vim.uv.fs_open(socket_path, 'w', 438))
clear({ args = { '--listen', socket_path } })
eq(socket_path, api.nvim_get_vvar('servername'))
fn.serverstop(socket_path)
end)
it('does not remove live sockets #26053', function()
if is_os('win') then
return
end
clear()
local socket_path = './Xtest-live-socket'
eq(socket_path, fn.serverstart(socket_path))
-- Second instance should fail without removing live socket
local result = n.exec_lua(function(sock)
return vim.system({ vim.v.progpath, '--headless', '--listen', sock }, { text = true }):wait()
end, socket_path)
neq(0, result.code)
matches('Failed.*listen', result.stderr)
fn.serverstop(socket_path)
end)
end)
describe('startup --listen', function()