fsck: implement checks for gitattributes

Recently, a vulnerability was reported that can lead to an out-of-bounds
write when reading an unreasonably large gitattributes file. The root
cause of this error are multiple integer overflows in different parts of
the code when there are either too many lines, when paths are too long,
when attribute names are too long, or when there are too many attributes
declared for a pattern.

As all of these are related to size, it seems reasonable to restrict the
size of the gitattributes file via git-fsck(1). This allows us to both
stop distributing known-vulnerable objects via common hosting platforms
that have fsck enabled, and users to protect themselves by enabling the
`fetch.fsckObjects` config.

There are basically two checks:

    1. We verify that size of the gitattributes file is smaller than
       100MB.

    2. We verify that the maximum line length does not exceed 2048
       bytes.

With the preceding commits, both of these conditions would cause us to
either ignore the complete gitattributes file or blob in the first case,
or the specific line in the second case. Now with these consistency
checks added, we also grow the ability to stop distributing such files
in the first place when `receive.fsckObjects` is enabled.

Signed-off-by: Patrick Steinhardt <ps@pks.im>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
diff --git a/fsck.c b/fsck.c
index 3a7fb9e..614c776 100644
--- a/fsck.c
+++ b/fsck.c
@@ -2,6 +2,7 @@
 #include "object-store.h"
 #include "repository.h"
 #include "object.h"
+#include "attr.h"
 #include "blob.h"
 #include "tree.h"
 #include "tree-walk.h"
@@ -615,7 +616,10 @@
 		}
 
 		if (is_hfs_dotgitattributes(name) || is_ntfs_dotgitattributes(name)) {
-			if (S_ISLNK(mode))
+			if (!S_ISLNK(mode))
+				oidset_insert(&options->gitattributes_found,
+					      entry_oid);
+			else
 				retval += report(options, tree_oid, OBJ_TREE,
 						 FSCK_MSG_GITATTRIBUTES_SYMLINK,
 						 ".gitattributes is a symlink");
@@ -1206,6 +1210,35 @@
 		ret |= data.ret;
 	}
 
+	if (oidset_contains(&options->gitattributes_found, oid)) {
+		const char *ptr;
+
+		oidset_insert(&options->gitattributes_done, oid);
+
+		if (!buf || size > ATTR_MAX_FILE_SIZE) {
+			/*
+			 * A missing buffer here is a sign that the caller found the
+			 * blob too gigantic to load into memory. Let's just consider
+			 * that an error.
+			 */
+			return report(options, oid, OBJ_BLOB,
+					FSCK_MSG_GITATTRIBUTES_LARGE,
+					".gitattributes too large to parse");
+		}
+
+		for (ptr = buf; *ptr; ) {
+			const char *eol = strchrnul(ptr, '\n');
+			if (eol - ptr >= ATTR_MAX_LINE_LENGTH) {
+				ret |= report(options, oid, OBJ_BLOB,
+					      FSCK_MSG_GITATTRIBUTES_LINE_LENGTH,
+					      ".gitattributes has too long lines to parse");
+				break;
+			}
+
+			ptr = *eol ? eol + 1 : eol;
+		}
+	}
+
 	return ret;
 }
 
@@ -1293,6 +1326,9 @@
 	ret |= fsck_blobs(&options->gitmodules_found, &options->gitmodules_done,
 			  FSCK_MSG_GITMODULES_MISSING, FSCK_MSG_GITMODULES_BLOB,
 			  options, ".gitmodules");
+	ret |= fsck_blobs(&options->gitattributes_found, &options->gitattributes_done,
+			  FSCK_MSG_GITATTRIBUTES_MISSING, FSCK_MSG_GITATTRIBUTES_BLOB,
+			  options, ".gitattributes");
 
 	return ret;
 }
diff --git a/fsck.h b/fsck.h
index d07f7a2..cc3379d 100644
--- a/fsck.h
+++ b/fsck.h
@@ -55,6 +55,10 @@
 	FUNC(GITMODULES_URL, ERROR) \
 	FUNC(GITMODULES_PATH, ERROR) \
 	FUNC(GITMODULES_UPDATE, ERROR) \
+	FUNC(GITATTRIBUTES_MISSING, ERROR) \
+	FUNC(GITATTRIBUTES_LARGE, ERROR) \
+	FUNC(GITATTRIBUTES_LINE_LENGTH, ERROR) \
+	FUNC(GITATTRIBUTES_BLOB, ERROR) \
 	/* warnings */ \
 	FUNC(BAD_FILEMODE, WARN) \
 	FUNC(EMPTY_NAME, WARN) \
@@ -129,6 +133,8 @@
 	struct oidset skiplist;
 	struct oidset gitmodules_found;
 	struct oidset gitmodules_done;
+	struct oidset gitattributes_found;
+	struct oidset gitattributes_done;
 	kh_oid_map_t *object_names;
 };
 
@@ -136,18 +142,24 @@
 	.skiplist = OIDSET_INIT, \
 	.gitmodules_found = OIDSET_INIT, \
 	.gitmodules_done = OIDSET_INIT, \
+	.gitattributes_found = OIDSET_INIT, \
+	.gitattributes_done = OIDSET_INIT, \
 	.error_func = fsck_error_function \
 }
 #define FSCK_OPTIONS_STRICT { \
 	.strict = 1, \
 	.gitmodules_found = OIDSET_INIT, \
 	.gitmodules_done = OIDSET_INIT, \
+	.gitattributes_found = OIDSET_INIT, \
+	.gitattributes_done = OIDSET_INIT, \
 	.error_func = fsck_error_function, \
 }
 #define FSCK_OPTIONS_MISSING_GITMODULES { \
 	.strict = 1, \
 	.gitmodules_found = OIDSET_INIT, \
 	.gitmodules_done = OIDSET_INIT, \
+	.gitattributes_found = OIDSET_INIT, \
+	.gitattributes_done = OIDSET_INIT, \
 	.error_func = fsck_error_cb_print_missing_gitmodules, \
 }
 
diff --git a/t/t1450-fsck.sh b/t/t1450-fsck.sh
index 5071ac6..9e0afe1 100755
--- a/t/t1450-fsck.sh
+++ b/t/t1450-fsck.sh
@@ -865,4 +865,28 @@
 	test_i18ngrep "bad index file" errors
 '
 
+test_expect_success 'fsck error on gitattributes with excessive line lengths' '
+	blob=$(printf "pattern %02048d" 1 | git hash-object -w --stdin) &&
+	test_when_finished "remove_object $blob" &&
+	tree=$(printf "100644 blob %s\t%s\n" $blob .gitattributes | git mktree) &&
+	test_when_finished "remove_object $tree" &&
+	cat >expected <<-EOF &&
+	error in blob $blob: gitattributesLineLength: .gitattributes has too long lines to parse
+	EOF
+	test_must_fail git fsck --no-dangling >actual 2>&1 &&
+	test_cmp expected actual
+'
+
+test_expect_success 'fsck error on gitattributes with excessive size' '
+	blob=$(test-tool genzeros $((100 * 1024 * 1024 + 1)) | git hash-object -w --stdin) &&
+	test_when_finished "remove_object $blob" &&
+	tree=$(printf "100644 blob %s\t%s\n" $blob .gitattributes | git mktree) &&
+	test_when_finished "remove_object $tree" &&
+	cat >expected <<-EOF &&
+	error in blob $blob: gitattributesLarge: .gitattributes too large to parse
+	EOF
+	test_must_fail git fsck --no-dangling >actual 2>&1 &&
+	test_cmp expected actual
+'
+
 test_done