mailsplit and mailinfo: gracefully handle NUL characters

The function fgets() has a big problem with NUL characters: it reads
them, but nobody will know if the NUL comes from the file stream, or
was appended at the end of the line.

So implement a custom read_line_with_nul() function.

Noticed by Tommy Thorn.

Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
diff --git a/builtin-mailinfo.c b/builtin-mailinfo.c
index 11f154b..f0c4209 100644
--- a/builtin-mailinfo.c
+++ b/builtin-mailinfo.c
@@ -641,7 +641,7 @@
 	}
 }
 
-static int handle_filter(char *line, unsigned linesize);
+static int handle_filter(char *line, unsigned linesize, int linelen);
 
 static int find_boundary(void)
 {
@@ -669,7 +669,7 @@
 					"can't recover\n");
 			exit(1);
 		}
-		handle_filter(newline, sizeof(newline));
+		handle_filter(newline, sizeof(newline), strlen(newline));
 
 		/* skip to the next boundary */
 		if (!find_boundary())
@@ -759,14 +759,14 @@
 	return 0;
 }
 
-static int handle_patch(char *line)
+static int handle_patch(char *line, int len)
 {
-	fputs(line, patchfile);
+	fwrite(line, 1, len, patchfile);
 	patch_lines++;
 	return 0;
 }
 
-static int handle_filter(char *line, unsigned linesize)
+static int handle_filter(char *line, unsigned linesize, int linelen)
 {
 	static int filter = 0;
 
@@ -779,7 +779,7 @@
 			break;
 		filter++;
 	case 1:
-		if (!handle_patch(line))
+		if (!handle_patch(line, linelen))
 			break;
 		filter++;
 	default:
@@ -794,6 +794,7 @@
 	int rc = 0;
 	static char newline[2000];
 	static char *np = newline;
+	int len = strlen(line);
 
 	/* Skip up to the first boundary */
 	if (content_top->boundary) {
@@ -807,7 +808,8 @@
 			/* flush any leftover */
 			if ((transfer_encoding == TE_BASE64)  &&
 			    (np != newline)) {
-				handle_filter(newline, sizeof(newline));
+				handle_filter(newline, sizeof(newline),
+						strlen(newline));
 			}
 			if (!handle_boundary())
 				return;
@@ -824,7 +826,7 @@
 
 			/* binary data most likely doesn't have newlines */
 			if (message_type != TYPE_TEXT) {
-				rc = handle_filter(line, sizeof(newline));
+				rc = handle_filter(line, sizeof(line), len);
 				break;
 			}
 
@@ -841,7 +843,7 @@
 					/* should be sitting on a new line */
 					*(++np) = 0;
 					op++;
-					rc = handle_filter(newline, sizeof(newline));
+					rc = handle_filter(newline, sizeof(newline), np - newline);
 					np = newline;
 				}
 			} while (*op != 0);
@@ -851,12 +853,12 @@
 			break;
 		}
 		default:
-			rc = handle_filter(line, sizeof(newline));
+			rc = handle_filter(line, sizeof(line), len);
 		}
 		if (rc)
 			/* nothing left to filter */
 			break;
-	} while (fgets(line, sizeof(line), fin));
+	} while ((len = read_line_with_nul(line, sizeof(line), fin)));
 
 	return;
 }
diff --git a/builtin-mailsplit.c b/builtin-mailsplit.c
index 46b27cd..e4d977b 100644
--- a/builtin-mailsplit.c
+++ b/builtin-mailsplit.c
@@ -45,6 +45,25 @@
 /* Could be as small as 64, enough to hold a Unix "From " line. */
 static char buf[4096];
 
+/* We cannot use fgets() because our lines can contain NULs */
+int read_line_with_nul(char *buf, int size, FILE *in)
+{
+	int len = 0, c;
+
+	for (;;) {
+		c = getc(in);
+		buf[len++] = c;
+		if (c == EOF || c == '\n' || len + 1 >= size)
+			break;
+	}
+
+	if (c == EOF)
+		len--;
+	buf[len] = '\0';
+
+	return len;
+}
+
 /* Called with the first line (potentially partial)
  * already in buf[] -- normally that should begin with
  * the Unix "From " line.  Write it into the specified
@@ -70,19 +89,19 @@
 	 * "From " and having something that looks like a date format.
 	 */
 	for (;;) {
-		int is_partial = (buf[len-1] != '\n');
+		int is_partial = len && buf[len-1] != '\n';
 
-		if (fputs(buf, output) == EOF)
+		if (fwrite(buf, 1, len, output) != len)
 			die("cannot write output");
 
-		if (fgets(buf, sizeof(buf), mbox) == NULL) {
+		len = read_line_with_nul(buf, sizeof(buf), mbox);
+		if (len == 0) {
 			if (feof(mbox)) {
 				status = 1;
 				break;
 			}
 			die("cannot read mbox");
 		}
-		len = strlen(buf);
 		if (!is_partial && !is_bare && is_from_line(buf, len))
 			break; /* done with one message */
 	}
diff --git a/builtin.h b/builtin.h
index 95126fd..48f1332 100644
--- a/builtin.h
+++ b/builtin.h
@@ -9,6 +9,7 @@
 extern void list_common_cmds_help(void);
 extern void help_unknown_cmd(const char *cmd);
 extern void prune_packed_objects(int);
+extern int read_line_with_nul(char *buf, int size, FILE *file);
 
 extern int cmd_add(int argc, const char **argv, const char *prefix);
 extern int cmd_annotate(int argc, const char **argv, const char *prefix);
diff --git a/t/t5100-mailinfo.sh b/t/t5100-mailinfo.sh
index d6c55c1..5a4610b 100755
--- a/t/t5100-mailinfo.sh
+++ b/t/t5100-mailinfo.sh
@@ -25,4 +25,13 @@
 		diff ../t5100/info$mail info$mail"
 done
 
+test_expect_success 'respect NULs' '
+
+	git mailsplit -d3 -o. ../t5100/nul &&
+	cmp ../t5100/nul 001 &&
+	(cat 001 | git mailinfo msg patch) &&
+	test 4 = $(wc -l < patch)
+
+'
+
 test_done
diff --git a/t/t5100/nul b/t/t5100/nul
new file mode 100644
index 0000000..3d40691
--- /dev/null
+++ b/t/t5100/nul
Binary files differ