mirror of
https://github.com/neovim/neovim.git
synced 2025-09-28 14:08:32 +00:00
job-control: mitigate process-kill race
children_kill_cb() is racey. One obvious problem is that process_close_handles() is *queued* by on_process_exit(), so when children_kill_cb() is invoked, the dead process might still be in the `loop->children` list. If the OS already reclaimed the dead PID, Nvim may try to SIGKILL it. Avoid that by checking `proc->status`. Vim doesn't have this problem because it doesn't attempt to kill processes that ignored SIGTERM after a timeout. closes #8269
This commit is contained in:
@@ -5033,10 +5033,9 @@ jobstart({cmd}[, {opts}]) *jobstart()*
|
|||||||
width : (pty only) Width of the terminal screen
|
width : (pty only) Width of the terminal screen
|
||||||
height : (pty only) Height of the terminal screen
|
height : (pty only) Height of the terminal screen
|
||||||
TERM : (pty only) $TERM environment variable
|
TERM : (pty only) $TERM environment variable
|
||||||
detach : (non-pty only) Detach the job process from the
|
detach : (non-pty only) Detach the job process: it will
|
||||||
nvim process. The process will not get killed
|
not be killed when Nvim exits. If the process
|
||||||
when nvim exits. If the process dies before
|
exits before Nvim, "on_exit" will be invoked.
|
||||||
nvim exits, "on_exit" will still be invoked.
|
|
||||||
|
|
||||||
{opts} is passed as |self| dictionary to the callback; the
|
{opts} is passed as |self| dictionary to the callback; the
|
||||||
caller may set other keys to pass application-specific data.
|
caller may set other keys to pass application-specific data.
|
||||||
|
@@ -658,9 +658,9 @@ static void channel_process_exit_cb(Process *proc, int status, void *data)
|
|||||||
terminal_close(chan->term, msg);
|
terminal_close(chan->term, msg);
|
||||||
}
|
}
|
||||||
|
|
||||||
// if status is -1 the process did not really exit,
|
// If process did not exit, we only closed the handle of a detached process.
|
||||||
// we just closed the handle onto a detached process
|
bool exited = (status >= 0);
|
||||||
if (status >= 0) {
|
if (exited) {
|
||||||
process_channel_event(chan, &chan->on_exit, "exit", NULL, 0, status);
|
process_channel_event(chan, &chan->on_exit, "exit", NULL, 0, status);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@@ -188,8 +188,7 @@ int process_wait(Process *proc, int ms, MultiQueue *events)
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (proc->refcount == 1) {
|
if (proc->refcount == 1) {
|
||||||
// Job exited, collect status and manually invoke close_cb to free the job
|
// Job exited, free its resources.
|
||||||
// resources
|
|
||||||
decref(proc);
|
decref(proc);
|
||||||
if (events) {
|
if (events) {
|
||||||
// the decref call created an exit event, process it now
|
// the decref call created an exit event, process it now
|
||||||
@@ -205,7 +204,8 @@ int process_wait(Process *proc, int ms, MultiQueue *events)
|
|||||||
/// Ask a process to terminate and eventually kill if it doesn't respond
|
/// Ask a process to terminate and eventually kill if it doesn't respond
|
||||||
void process_stop(Process *proc) FUNC_ATTR_NONNULL_ALL
|
void process_stop(Process *proc) FUNC_ATTR_NONNULL_ALL
|
||||||
{
|
{
|
||||||
if (proc->stopped_time) {
|
bool exited = (proc->status >= 0);
|
||||||
|
if (exited || proc->stopped_time) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -228,14 +228,14 @@ void process_stop(Process *proc) FUNC_ATTR_NONNULL_ALL
|
|||||||
}
|
}
|
||||||
|
|
||||||
Loop *loop = proc->loop;
|
Loop *loop = proc->loop;
|
||||||
// Start a timer to periodically check if a signal should be send to the job.
|
// Start a timer to periodically check if a signal should be sent to the job.
|
||||||
ILOG("starting job kill timer");
|
ILOG("starting job kill timer");
|
||||||
uv_timer_start(&loop->children_kill_timer, children_kill_cb,
|
uv_timer_start(&loop->children_kill_timer, children_kill_cb,
|
||||||
KILL_TIMEOUT_MS, KILL_TIMEOUT_MS);
|
KILL_TIMEOUT_MS, KILL_TIMEOUT_MS);
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Iterates the process list sending SIGTERM to stopped processes and SIGKILL
|
/// Sends SIGKILL (or SIGTERM for PTY jobs) to processes that didn't terminate
|
||||||
/// to those that didn't die from SIGTERM after a while(exit_timeout is 0).
|
/// after process_stop() requested them.
|
||||||
static void children_kill_cb(uv_timer_t *handle)
|
static void children_kill_cb(uv_timer_t *handle)
|
||||||
{
|
{
|
||||||
Loop *loop = handle->loop->data;
|
Loop *loop = handle->loop->data;
|
||||||
@@ -243,11 +243,11 @@ static void children_kill_cb(uv_timer_t *handle)
|
|||||||
|
|
||||||
kl_iter(WatcherPtr, loop->children, current) {
|
kl_iter(WatcherPtr, loop->children, current) {
|
||||||
Process *proc = (*current)->data;
|
Process *proc = (*current)->data;
|
||||||
if (!proc->stopped_time) {
|
bool exited = (proc->status >= 0);
|
||||||
|
if (exited || !proc->stopped_time) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
uint64_t elapsed = (now - proc->stopped_time) / 1000000 + 1;
|
uint64_t elapsed = (now - proc->stopped_time) / 1000000 + 1;
|
||||||
|
|
||||||
if (elapsed >= KILL_TIMEOUT_MS) {
|
if (elapsed >= KILL_TIMEOUT_MS) {
|
||||||
int sig = proc->type == kProcessTypePty && elapsed < KILL_TIMEOUT_MS * 2
|
int sig = proc->type == kProcessTypePty && elapsed < KILL_TIMEOUT_MS * 2
|
||||||
? SIGTERM
|
? SIGTERM
|
||||||
@@ -380,8 +380,10 @@ static void process_close_handles(void **argv)
|
|||||||
static void on_process_exit(Process *proc)
|
static void on_process_exit(Process *proc)
|
||||||
{
|
{
|
||||||
Loop *loop = proc->loop;
|
Loop *loop = proc->loop;
|
||||||
|
ILOG("exited: pid=%d status=%d stoptime=%" PRId64, proc->pid,
|
||||||
|
proc->status, proc->stopped_time);
|
||||||
if (proc->stopped_time) {
|
if (proc->stopped_time) {
|
||||||
DLOG("stopping process kill timer");
|
ILOG("stopping process kill timer");
|
||||||
uv_timer_stop(&loop->children_kill_timer);
|
uv_timer_stop(&loop->children_kill_timer);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Reference in New Issue
Block a user