audit: rework execve audit

The purpose of audit_bprm() is to log the argv array to a userspace daemon at
the end of the execve system call.  Since user-space hasn't had time to run,
this array is still in pristine state on the process' stack; so no need to
copy it, we can just grab it from there.

In order to minimize the damage to audit_log_*() copy each string into a
temporary kernel buffer first.

Currently the audit code requires that the full argument vector fits in a
single packet.  So currently it does clip the argv size to a (sysctl) limit,
but only when execve auditing is enabled.

If the audit protocol gets extended to allow for multiple packets this check
can be removed.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Signed-off-by: Ollie Wild <aaw@google.com>
Cc: <linux-audit@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
diff --git a/Documentation/filesystems/proc.txt b/Documentation/filesystems/proc.txt
index ebffdff..72e247e 100644
--- a/Documentation/filesystems/proc.txt
+++ b/Documentation/filesystems/proc.txt
@@ -1065,6 +1065,13 @@
 resume it  if we have a value of 3 or more percent; consider information about
 the amount of free space valid for 30 seconds
 
+audit_argv_kb
+-------------
+
+The file contains a single value denoting the limit on the argv array size
+for execve (in KiB). This limit is only applied when system call auditing for
+execve is enabled, otherwise the value is ignored.
+
 ctrl-alt-del
 ------------
 
diff --git a/fs/exec.c b/fs/exec.c
index f20561f..2e3f795 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -1154,6 +1154,7 @@
 {
 	struct linux_binprm *bprm;
 	struct file *file;
+	unsigned long env_p;
 	int retval;
 	int i;
 
@@ -1208,9 +1209,11 @@
 	if (retval < 0)
 		goto out;
 
+	env_p = bprm->p;
 	retval = copy_strings(bprm->argc, argv, bprm);
 	if (retval < 0)
 		goto out;
+	bprm->argv_len = env_p - bprm->p;
 
 	retval = search_binary_handler(bprm,regs);
 	if (retval >= 0) {
diff --git a/include/linux/binfmts.h b/include/linux/binfmts.h
index e1a7083..a0b209c 100644
--- a/include/linux/binfmts.h
+++ b/include/linux/binfmts.h
@@ -40,6 +40,7 @@
 	unsigned interp_flags;
 	unsigned interp_data;
 	unsigned long loader, exec;
+	unsigned long argv_len;
 };
 
 #define BINPRM_FLAGS_ENFORCE_NONDUMP_BIT 0
diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index b7640a5..535586f 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -153,7 +153,7 @@
 	struct audit_aux_data	d;
 	int argc;
 	int envc;
-	char mem[0];
+	struct mm_struct *mm;
 };
 
 struct audit_aux_data_socketcall {
@@ -831,6 +831,55 @@
 	return rc;
 }
 
+static void audit_log_execve_info(struct audit_buffer *ab,
+		struct audit_aux_data_execve *axi)
+{
+	int i;
+	long len, ret;
+	const char __user *p = (const char __user *)axi->mm->arg_start;
+	char *buf;
+
+	if (axi->mm != current->mm)
+		return; /* execve failed, no additional info */
+
+	for (i = 0; i < axi->argc; i++, p += len) {
+		len = strnlen_user(p, MAX_ARG_PAGES*PAGE_SIZE);
+		/*
+		 * We just created this mm, if we can't find the strings
+		 * we just copied into it something is _very_ wrong. Similar
+		 * for strings that are too long, we should not have created
+		 * any.
+		 */
+		if (!len || len > MAX_ARG_STRLEN) {
+			WARN_ON(1);
+			send_sig(SIGKILL, current, 0);
+		}
+
+		buf = kmalloc(len, GFP_KERNEL);
+		if (!buf) {
+			audit_panic("out of memory for argv string\n");
+			break;
+		}
+
+		ret = copy_from_user(buf, p, len);
+		/*
+		 * There is no reason for this copy to be short. We just
+		 * copied them here, and the mm hasn't been exposed to user-
+		 * space yet.
+		 */
+		if (!ret) {
+			WARN_ON(1);
+			send_sig(SIGKILL, current, 0);
+		}
+
+		audit_log_format(ab, "a%d=", i);
+		audit_log_untrustedstring(ab, buf);
+		audit_log_format(ab, "\n");
+
+		kfree(buf);
+	}
+}
+
 static void audit_log_exit(struct audit_context *context, struct task_struct *tsk)
 {
 	int i, call_panic = 0;
@@ -971,13 +1020,7 @@
 
 		case AUDIT_EXECVE: {
 			struct audit_aux_data_execve *axi = (void *)aux;
-			int i;
-			const char *p;
-			for (i = 0, p = axi->mem; i < axi->argc; i++) {
-				audit_log_format(ab, "a%d=", i);
-				p = audit_log_untrustedstring(ab, p);
-				audit_log_format(ab, "\n");
-			}
+			audit_log_execve_info(ab, axi);
 			break; }
 
 		case AUDIT_SOCKETCALL: {
@@ -1821,32 +1864,31 @@
 	return 0;
 }
 
+int audit_argv_kb = 32;
+
 int audit_bprm(struct linux_binprm *bprm)
 {
 	struct audit_aux_data_execve *ax;
 	struct audit_context *context = current->audit_context;
-	unsigned long p, next;
-	void *to;
 
 	if (likely(!audit_enabled || !context || context->dummy))
 		return 0;
 
-	ax = kmalloc(sizeof(*ax) + PAGE_SIZE * MAX_ARG_PAGES - bprm->p,
-				GFP_KERNEL);
+	/*
+	 * Even though the stack code doesn't limit the arg+env size any more,
+	 * the audit code requires that _all_ arguments be logged in a single
+	 * netlink skb. Hence cap it :-(
+	 */
+	if (bprm->argv_len > (audit_argv_kb << 10))
+		return -E2BIG;
+
+	ax = kmalloc(sizeof(*ax), GFP_KERNEL);
 	if (!ax)
 		return -ENOMEM;
 
 	ax->argc = bprm->argc;
 	ax->envc = bprm->envc;
-	for (p = bprm->p, to = ax->mem; p < MAX_ARG_PAGES*PAGE_SIZE; p = next) {
-		struct page *page = bprm->page[p / PAGE_SIZE];
-		void *kaddr = kmap(page);
-		next = (p + PAGE_SIZE) & ~(PAGE_SIZE - 1);
-		memcpy(to, kaddr + (p & (PAGE_SIZE - 1)), next - p);
-		to += next - p;
-		kunmap(page);
-	}
-
+	ax->mm = bprm->mm;
 	ax->d.type = AUDIT_EXECVE;
 	ax->d.next = context->aux;
 	context->aux = (void *)ax;
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 3ed4912b..8db4176 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -78,6 +78,7 @@
 extern int compat_log;
 extern int maps_protect;
 extern int sysctl_stat_interval;
+extern int audit_argv_kb;
 
 /* this is needed for the proc_dointvec_minmax for [fs_]overflow UID and GID */
 static int maxolduid = 65535;
@@ -306,6 +307,16 @@
 		.mode		= 0644,
 		.proc_handler	= &proc_dointvec,
 	},
+#ifdef CONFIG_AUDITSYSCALL
+	{
+		.ctl_name	= CTL_UNNUMBERED,
+		.procname	= "audit_argv_kb",
+		.data		= &audit_argv_kb,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec,
+	},
+#endif
 	{
 		.ctl_name	= KERN_CORE_PATTERN,
 		.procname	= "core_pattern",