[PATCH] pidspace: is_init()
This is an updated version of Eric Biederman's is_init() patch.
(http://lkml.org/lkml/2006/2/6/280). It applies cleanly to 2.6.18-rc3 and
replaces a few more instances of ->pid == 1 with is_init().
Further, is_init() checks pid and thus removes dependency on Eric's other
patches for now.
Eric's original description:
There are a lot of places in the kernel where we test for init
because we give it special properties. Most significantly init
must not die. This results in code all over the kernel test
->pid == 1.
Introduce is_init to capture this case.
With multiple pid spaces for all of the cases affected we are
looking for only the first process on the system, not some other
process that has pid == 1.
Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
Signed-off-by: Sukadev Bhattiprolu <sukadev@us.ibm.com>
Cc: Dave Hansen <haveblue@us.ibm.com>
Cc: Serge Hallyn <serue@us.ibm.com>
Cc: Cedric Le Goater <clg@fr.ibm.com>
Cc: <lxc-devel@lists.sourceforge.net>
Acked-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
diff --git a/arch/alpha/mm/fault.c b/arch/alpha/mm/fault.c
index 622dabd..8871529 100644
--- a/arch/alpha/mm/fault.c
+++ b/arch/alpha/mm/fault.c
@@ -193,7 +193,7 @@
/* We ran out of memory, or some other thing happened to us that
made us unable to handle the page fault gracefully. */
out_of_memory:
- if (current->pid == 1) {
+ if (is_init(current)) {
yield();
down_read(&mm->mmap_sem);
goto survive;
diff --git a/arch/arm/mm/fault.c b/arch/arm/mm/fault.c
index a5b33ff..5e658a8 100644
--- a/arch/arm/mm/fault.c
+++ b/arch/arm/mm/fault.c
@@ -198,7 +198,7 @@
return fault;
}
- if (tsk->pid != 1)
+ if (!is_init(tsk))
goto out;
/*
diff --git a/arch/arm26/mm/fault.c b/arch/arm26/mm/fault.c
index a7c4cc9..a1f6d8a 100644
--- a/arch/arm26/mm/fault.c
+++ b/arch/arm26/mm/fault.c
@@ -185,7 +185,7 @@
}
fault = -3; /* out of memory */
- if (tsk->pid != 1)
+ if (!is_init(tsk))
goto out;
/*
diff --git a/arch/i386/lib/usercopy.c b/arch/i386/lib/usercopy.c
index efc7e7d..08502fc 100644
--- a/arch/i386/lib/usercopy.c
+++ b/arch/i386/lib/usercopy.c
@@ -739,7 +739,7 @@
retval = get_user_pages(current, current->mm,
(unsigned long )to, 1, 1, 0, &pg, NULL);
- if (retval == -ENOMEM && current->pid == 1) {
+ if (retval == -ENOMEM && is_init(current)) {
up_read(¤t->mm->mmap_sem);
blk_congestion_wait(WRITE, HZ/50);
goto survive;
diff --git a/arch/i386/mm/fault.c b/arch/i386/mm/fault.c
index 50d8617..2581575 100644
--- a/arch/i386/mm/fault.c
+++ b/arch/i386/mm/fault.c
@@ -589,7 +589,7 @@
*/
out_of_memory:
up_read(&mm->mmap_sem);
- if (tsk->pid == 1) {
+ if (is_init(tsk)) {
yield();
down_read(&mm->mmap_sem);
goto survive;
diff --git a/arch/ia64/mm/fault.c b/arch/ia64/mm/fault.c
index d8b1b4a..59f3ab9 100644
--- a/arch/ia64/mm/fault.c
+++ b/arch/ia64/mm/fault.c
@@ -280,7 +280,7 @@
out_of_memory:
up_read(&mm->mmap_sem);
- if (current->pid == 1) {
+ if (is_init(current)) {
yield();
down_read(&mm->mmap_sem);
goto survive;
diff --git a/arch/m32r/mm/fault.c b/arch/m32r/mm/fault.c
index dc18a33..8d5f551 100644
--- a/arch/m32r/mm/fault.c
+++ b/arch/m32r/mm/fault.c
@@ -299,7 +299,7 @@
*/
out_of_memory:
up_read(&mm->mmap_sem);
- if (tsk->pid == 1) {
+ if (is_init(tsk)) {
yield();
down_read(&mm->mmap_sem);
goto survive;
diff --git a/arch/m68k/mm/fault.c b/arch/m68k/mm/fault.c
index 5e2d87c..911f2ce 100644
--- a/arch/m68k/mm/fault.c
+++ b/arch/m68k/mm/fault.c
@@ -181,7 +181,7 @@
*/
out_of_memory:
up_read(&mm->mmap_sem);
- if (current->pid == 1) {
+ if (is_init(current)) {
yield();
down_read(&mm->mmap_sem);
goto survive;
diff --git a/arch/mips/mm/fault.c b/arch/mips/mm/fault.c
index a4f8c45..8423d85 100644
--- a/arch/mips/mm/fault.c
+++ b/arch/mips/mm/fault.c
@@ -171,7 +171,7 @@
*/
out_of_memory:
up_read(&mm->mmap_sem);
- if (tsk->pid == 1) {
+ if (is_init(tsk)) {
yield();
down_read(&mm->mmap_sem);
goto survive;
diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c
index 77953f4..e8fa506 100644
--- a/arch/powerpc/mm/fault.c
+++ b/arch/powerpc/mm/fault.c
@@ -386,7 +386,7 @@
*/
out_of_memory:
up_read(&mm->mmap_sem);
- if (current->pid == 1) {
+ if (is_init(current)) {
yield();
down_read(&mm->mmap_sem);
goto survive;
diff --git a/arch/powerpc/platforms/pseries/ras.c b/arch/powerpc/platforms/pseries/ras.c
index 903115d..311ed19 100644
--- a/arch/powerpc/platforms/pseries/ras.c
+++ b/arch/powerpc/platforms/pseries/ras.c
@@ -337,7 +337,7 @@
err->disposition == RTAS_DISP_NOT_RECOVERED &&
err->target == RTAS_TARGET_MEMORY &&
err->type == RTAS_TYPE_ECC_UNCORR &&
- !(current->pid == 0 || current->pid == 1)) {
+ !(current->pid == 0 || is_init(current))) {
/* Kill off a user process with an ECC error */
printk(KERN_ERR "MCE: uncorrectable ecc error for pid %d\n",
current->pid);
diff --git a/arch/ppc/kernel/traps.c b/arch/ppc/kernel/traps.c
index d7a4330..aafc8e8 100644
--- a/arch/ppc/kernel/traps.c
+++ b/arch/ppc/kernel/traps.c
@@ -119,7 +119,7 @@
* generate the same exception over and over again and we get
* nowhere. Better to kill it and let the kernel panic.
*/
- if (current->pid == 1) {
+ if (is_init(current)) {
__sighandler_t handler;
spin_lock_irq(¤t->sighand->siglock);
diff --git a/arch/ppc/mm/fault.c b/arch/ppc/mm/fault.c
index bc776be..465f451 100644
--- a/arch/ppc/mm/fault.c
+++ b/arch/ppc/mm/fault.c
@@ -291,7 +291,7 @@
*/
out_of_memory:
up_read(&mm->mmap_sem);
- if (current->pid == 1) {
+ if (is_init(current)) {
yield();
down_read(&mm->mmap_sem);
goto survive;
diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c
index f2b9a84..9c3c19f 100644
--- a/arch/s390/mm/fault.c
+++ b/arch/s390/mm/fault.c
@@ -353,7 +353,7 @@
*/
out_of_memory:
up_read(&mm->mmap_sem);
- if (tsk->pid == 1) {
+ if (is_init(tsk)) {
yield();
down_read(&mm->mmap_sem);
goto survive;
diff --git a/arch/sh/mm/fault.c b/arch/sh/mm/fault.c
index 507f289..68663b8 100644
--- a/arch/sh/mm/fault.c
+++ b/arch/sh/mm/fault.c
@@ -149,7 +149,7 @@
*/
out_of_memory:
up_read(&mm->mmap_sem);
- if (current->pid == 1) {
+ if (is_init(current)) {
yield();
down_read(&mm->mmap_sem);
goto survive;
diff --git a/arch/sh64/mm/fault.c b/arch/sh64/mm/fault.c
index f08d0ea..8e2f6c2 100644
--- a/arch/sh64/mm/fault.c
+++ b/arch/sh64/mm/fault.c
@@ -277,7 +277,7 @@
show_regs(regs);
#endif
}
- if (tsk->pid == 1) {
+ if (is_init(tsk)) {
panic("INIT had user mode bad_area\n");
}
tsk->thread.address = address;
@@ -319,14 +319,14 @@
* us unable to handle the page fault gracefully.
*/
out_of_memory:
- if (current->pid == 1) {
+ if (is_init(current)) {
panic("INIT out of memory\n");
yield();
goto survive;
}
printk("fault:Out of memory\n");
up_read(&mm->mmap_sem);
- if (current->pid == 1) {
+ if (is_init(current)) {
yield();
down_read(&mm->mmap_sem);
goto survive;
diff --git a/arch/um/kernel/trap.c b/arch/um/kernel/trap.c
index 61a23ff..c7b195c 100644
--- a/arch/um/kernel/trap.c
+++ b/arch/um/kernel/trap.c
@@ -120,7 +120,7 @@
* us unable to handle the page fault gracefully.
*/
out_of_memory:
- if (current->pid == 1) {
+ if (is_init(current)) {
up_read(&mm->mmap_sem);
yield();
down_read(&mm->mmap_sem);
diff --git a/arch/x86_64/mm/fault.c b/arch/x86_64/mm/fault.c
index 9ba54cc..3751b47 100644
--- a/arch/x86_64/mm/fault.c
+++ b/arch/x86_64/mm/fault.c
@@ -244,7 +244,7 @@
int unhandled_signal(struct task_struct *tsk, int sig)
{
- if (tsk->pid == 1)
+ if (is_init(tsk))
return 1;
if (tsk->ptrace & PT_PTRACED)
return 0;
@@ -580,7 +580,7 @@
*/
out_of_memory:
up_read(&mm->mmap_sem);
- if (current->pid == 1) {
+ if (is_init(current)) {
yield();
goto again;
}
diff --git a/arch/xtensa/mm/fault.c b/arch/xtensa/mm/fault.c
index a945a33..dd0dbec 100644
--- a/arch/xtensa/mm/fault.c
+++ b/arch/xtensa/mm/fault.c
@@ -144,7 +144,7 @@
*/
out_of_memory:
up_read(&mm->mmap_sem);
- if (current->pid == 1) {
+ if (is_init(current)) {
yield();
down_read(&mm->mmap_sem);
goto survive;
diff --git a/drivers/char/sysrq.c b/drivers/char/sysrq.c
index ee3ca8f..0ad6cb0 100644
--- a/drivers/char/sysrq.c
+++ b/drivers/char/sysrq.c
@@ -208,7 +208,7 @@
struct task_struct *p;
for_each_process(p) {
- if (p->mm && p->pid != 1)
+ if (p->mm && !is_init(p))
/* Not swapper, init nor kernel thread */
force_sig(sig, p);
}
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 3696f2f..ed2af86 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1033,6 +1033,16 @@
return p->pids[PIDTYPE_PID].pid != NULL;
}
+/**
+ * is_init - check if a task structure is the first user space
+ * task the kernel created.
+ * @p: Task structure to be checked.
+ */
+static inline int is_init(struct task_struct *tsk)
+{
+ return tsk->pid == 1;
+}
+
extern void free_task(struct task_struct *tsk);
#define get_task_struct(tsk) do { atomic_inc(&(tsk)->usage); } while(0)
diff --git a/kernel/capability.c b/kernel/capability.c
index c7685ad..edb845a 100644
--- a/kernel/capability.c
+++ b/kernel/capability.c
@@ -133,7 +133,7 @@
int found = 0;
do_each_thread(g, target) {
- if (target == current || target->pid == 1)
+ if (target == current || is_init(target))
continue;
found = 1;
if (security_capset_check(target, effective, inheritable,
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index 1b32c2c..584bb4e 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -240,7 +240,7 @@
* A cpuset can only be deleted if both its 'count' of using tasks
* is zero, and its list of 'children' cpusets is empty. Since all
* tasks in the system use _some_ cpuset, and since there is always at
- * least one task in the system (init, pid == 1), therefore, top_cpuset
+ * least one task in the system (init), therefore, top_cpuset
* always has either children cpusets and/or using tasks. So we don't
* need a special hack to ensure that top_cpuset cannot be deleted.
*
diff --git a/kernel/exit.c b/kernel/exit.c
index 4b6fb05..9961192 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -219,7 +219,7 @@
do_each_task_pid(pgrp, PIDTYPE_PGID, p) {
if (p == ignored_task
|| p->exit_state
- || p->real_parent->pid == 1)
+ || is_init(p->real_parent))
continue;
if (process_group(p->real_parent) != pgrp
&& p->real_parent->signal->session == p->signal->session) {
diff --git a/kernel/kexec.c b/kernel/kexec.c
index 50087ec..37cad75 100644
--- a/kernel/kexec.c
+++ b/kernel/kexec.c
@@ -40,7 +40,7 @@
int kexec_should_crash(struct task_struct *p)
{
- if (in_interrupt() || !p->pid || p->pid == 1 || panic_on_oops)
+ if (in_interrupt() || !p->pid || is_init(p) || panic_on_oops)
return 1;
return 0;
}
diff --git a/kernel/ptrace.c b/kernel/ptrace.c
index 8aad033..4d50e06 100644
--- a/kernel/ptrace.c
+++ b/kernel/ptrace.c
@@ -440,6 +440,7 @@
child = find_task_by_pid(pid);
if (child)
get_task_struct(child);
+
read_unlock(&tasklist_lock);
if (!child)
return ERR_PTR(-ESRCH);
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 8bfa7d1..9535a38 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -1915,7 +1915,7 @@
return -EPERM;
}
- op = (current->pid == 1) ? OP_SET : OP_AND;
+ op = is_init(current) ? OP_SET : OP_AND;
return do_proc_dointvec(table,write,filp,buffer,lenp,ppos,
do_proc_dointvec_bset_conv,&op);
}
diff --git a/mm/oom_kill.c b/mm/oom_kill.c
index bada3d0..f3dd79c 100644
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -255,7 +255,7 @@
*/
static void __oom_kill_task(struct task_struct *p, const char *message)
{
- if (p->pid == 1) {
+ if (is_init(p)) {
WARN_ON(1);
printk(KERN_WARNING "tried to kill init!\n");
return;
diff --git a/security/commoncap.c b/security/commoncap.c
index f50fc29..5a5ef5c 100644
--- a/security/commoncap.c
+++ b/security/commoncap.c
@@ -169,7 +169,7 @@
/* For init, we want to retain the capabilities set
* in the init_task struct. Thus we skip the usual
* capability rules */
- if (current->pid != 1) {
+ if (!is_init(current)) {
current->cap_permitted = new_permitted;
current->cap_effective =
cap_intersect (new_permitted, bprm->cap_effective);