exec: rework the group exit and fix the race with kill
As Roland pointed out, we have the very old problem with exec. de_thread()
sets SIGNAL_GROUP_EXIT, kills other threads, changes ->group_leader and then
clears signal->flags. All signals (even fatal ones) sent in this window
(which is not too small) will be lost.
With this patch exec doesn't abuse SIGNAL_GROUP_EXIT. signal_group_exit(),
the new helper, should be used to detect exit_group() or exec() in progress.
It can have more users, but this patch does only strictly necessary changes.
Signed-off-by: Oleg Nesterov <oleg@tv-sign.ru>
Cc: Davide Libenzi <davidel@xmailserver.org>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Robin Holt <holt@sgi.com>
Cc: Roland McGrath <roland@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
diff --git a/fs/exec.c b/fs/exec.c
index 966c5c5..be923e4 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -760,7 +760,7 @@
*/
read_lock(&tasklist_lock);
spin_lock_irq(lock);
- if (sig->flags & SIGNAL_GROUP_EXIT) {
+ if (signal_group_exit(sig)) {
/*
* Another group action in progress, just
* return so that the signal is processed.
@@ -778,6 +778,7 @@
if (unlikely(tsk->group_leader == task_child_reaper(tsk)))
task_active_pid_ns(tsk)->child_reaper = tsk;
+ sig->group_exit_task = tsk;
zap_other_threads(tsk);
read_unlock(&tasklist_lock);
@@ -802,7 +803,6 @@
}
sig->notify_count = count;
- sig->group_exit_task = tsk;
while (atomic_read(&sig->count) > count) {
__set_current_state(TASK_UNINTERRUPTIBLE);
spin_unlock_irq(lock);
@@ -871,15 +871,10 @@
leader->exit_state = EXIT_DEAD;
write_unlock_irq(&tasklist_lock);
- }
+ }
sig->group_exit_task = NULL;
sig->notify_count = 0;
- /*
- * There may be one thread left which is just exiting,
- * but it's safe to stop telling the group to kill themselves.
- */
- sig->flags = 0;
no_thread_group:
exit_itimers(sig);
@@ -1549,7 +1544,7 @@
int err = -EAGAIN;
spin_lock_irq(&tsk->sighand->siglock);
- if (!(tsk->signal->flags & SIGNAL_GROUP_EXIT)) {
+ if (!signal_group_exit(tsk->signal)) {
tsk->signal->group_exit_code = exit_code;
zap_process(tsk);
err = 0;
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 680bb03..483ea4e 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -555,6 +555,13 @@
#define SIGNAL_STOP_CONTINUED 0x00000004 /* SIGCONT since WCONTINUED reap */
#define SIGNAL_GROUP_EXIT 0x00000008 /* group exit in progress */
+/* If true, all threads except ->group_exit_task have pending SIGKILL */
+static inline int signal_group_exit(const struct signal_struct *sig)
+{
+ return (sig->flags & SIGNAL_GROUP_EXIT) ||
+ (sig->group_exit_task != NULL);
+}
+
/*
* Some day this will be a full-fledged user tracking system..
*/
diff --git a/kernel/exit.c b/kernel/exit.c
index 9e459fe..9d3d0f0 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -1083,11 +1083,12 @@
struct signal_struct *const sig = current->signal;
struct sighand_struct *const sighand = current->sighand;
spin_lock_irq(&sighand->siglock);
- if (sig->flags & SIGNAL_GROUP_EXIT)
+ if (signal_group_exit(sig))
/* Another thread got here before we took the lock. */
exit_code = sig->group_exit_code;
else {
sig->group_exit_code = exit_code;
+ sig->flags = SIGNAL_GROUP_EXIT;
zap_other_threads(current);
}
spin_unlock_irq(&sighand->siglock);
diff --git a/kernel/signal.c b/kernel/signal.c
index 1117b28..6a5f97c 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -957,7 +957,6 @@
{
struct task_struct *t;
- p->signal->flags = SIGNAL_GROUP_EXIT;
p->signal->group_stop_count = 0;
for (t = next_thread(p); t != p; t = next_thread(t)) {
@@ -1697,7 +1696,8 @@
} else {
struct task_struct *t;
- if (!likely(sig->flags & SIGNAL_STOP_DEQUEUED))
+ if (!likely(sig->flags & SIGNAL_STOP_DEQUEUED) ||
+ unlikely(sig->group_exit_task))
return 0;
/*
* There is no group stop already in progress.