| From f9eb3aecac9a4a8f1ec86ce131b7085e833f313a Mon Sep 17 00:00:00 2001 |
| From: Jeff King <peff@peff.net> |
| Date: Wed, 2 May 2018 15:44:51 -0400 |
| Subject: fsck: actually fsck blob data |
| |
| commit 7ac4f3a007e2567f9d2492806186aa063f9a08d6 upstream. |
| |
| Because fscking a blob has always been a noop, we didn't |
| bother passing around the blob data. In preparation for |
| content-level checks, let's fix up a few things: |
| |
| 1. The fsck_object() function just returns success for any |
| blob. Let's a noop fsck_blob(), which we can fill in |
| with actual logic later. |
| |
| 2. The fsck_loose() function in builtin/fsck.c |
| just threw away blob content after loading it. Let's |
| hold onto it until after we've called fsck_object(). |
| |
| The easiest way to do this is to just drop the |
| parse_loose_object() helper entirely. Incidentally, |
| this also fixes a memory leak: if we successfully |
| loaded the object data but did not parse it, we would |
| have left the function without freeing it. |
| |
| 3. When fsck_loose() loads the object data, it |
| does so with a custom read_loose_object() helper. This |
| function streams any blobs, regardless of size, under |
| the assumption that we're only checking the sha1. |
| |
| Instead, let's actually load blobs smaller than |
| big_file_threshold, as the normal object-reading |
| code-paths would do. This lets us fsck small files, and |
| a NULL return is an indication that the blob was so big |
| that it needed to be streamed, and we can pass that |
| information along to fsck_blob(). |
| |
| Signed-off-by: Jeff King <peff@peff.net> |
| Signed-off-by: Jonathan Nieder <jrnieder@gmail.com> |
| --- |
| builtin/fsck.c | 51 +++++++++++++++++++++++++------------------------- |
| fsck.c | 8 +++++++- |
| sha1_file.c | 2 +- |
| 3 files changed, 33 insertions(+), 28 deletions(-) |
| |
| diff --git a/builtin/fsck.c b/builtin/fsck.c |
| index 4b91ee95e6..95053ec02f 100644 |
| --- a/builtin/fsck.c |
| +++ b/builtin/fsck.c |
| @@ -318,7 +318,7 @@ static void check_connectivity(void) |
| } |
| } |
| |
| -static int fsck_obj(struct object *obj) |
| +static int fsck_obj(struct object *obj, void *buffer, unsigned long size) |
| { |
| if (obj->flags & SEEN) |
| return 0; |
| @@ -330,7 +330,7 @@ static int fsck_obj(struct object *obj) |
| |
| if (fsck_walk(obj, NULL, &fsck_obj_options)) |
| objerror(obj, "broken links"); |
| - if (fsck_object(obj, NULL, 0, &fsck_obj_options)) |
| + if (fsck_object(obj, buffer, size, &fsck_obj_options)) |
| return -1; |
| |
| if (obj->type == OBJ_TREE) { |
| @@ -376,7 +376,7 @@ static int fsck_obj_buffer(const unsigned char *sha1, enum object_type type, |
| return error("%s: object corrupt or missing", sha1_to_hex(sha1)); |
| } |
| obj->flags = HAS_OBJ; |
| - return fsck_obj(obj); |
| + return fsck_obj(obj, buffer, size); |
| } |
| |
| static int default_refs; |
| @@ -476,43 +476,42 @@ static void get_default_heads(void) |
| } |
| } |
| |
| -static struct object *parse_loose_object(const unsigned char *sha1, |
| - const char *path) |
| +static int fsck_loose(const unsigned char *sha1, const char *path, void *data) |
| { |
| struct object *obj; |
| - void *contents; |
| enum object_type type; |
| unsigned long size; |
| + void *contents; |
| int eaten; |
| |
| - if (read_loose_object(path, sha1, &type, &size, &contents) < 0) |
| - return NULL; |
| + if (read_loose_object(path, sha1, &type, &size, &contents) < 0) { |
| + errors_found |= ERROR_OBJECT; |
| + error("%s: object corrupt or missing: %s", |
| + sha1_to_hex(sha1), path); |
| + return 0; /* keep checking other objects */ |
| + } |
| |
| if (!contents && type != OBJ_BLOB) |
| die("BUG: read_loose_object streamed a non-blob"); |
| |
| obj = parse_object_buffer(sha1, type, size, contents, &eaten); |
| |
| + if (!obj) { |
| + errors_found |= ERROR_OBJECT; |
| + error("%s: object could not be parsed: %s", |
| + sha1_to_hex(sha1), path); |
| + if (!eaten) |
| + free(contents); |
| + return 0; /* keep checking other objects */ |
| + } |
| + |
| + obj->flags = HAS_OBJ; |
| + if (fsck_obj(obj, contents, size)) |
| + errors_found |= ERROR_OBJECT; |
| + |
| if (!eaten) |
| free(contents); |
| - return obj; |
| -} |
| - |
| -static int fsck_loose(const unsigned char *sha1, const char *path, void *data) |
| -{ |
| - struct object *obj = parse_loose_object(sha1, path); |
| - |
| - if (!obj) { |
| - errors_found |= ERROR_OBJECT; |
| - error("%s: object corrupt or missing: %s", |
| - sha1_to_hex(sha1), path); |
| - return 0; /* keep checking other objects */ |
| - } |
| - |
| - obj->flags = HAS_OBJ; |
| - if (fsck_obj(obj)) |
| - errors_found |= ERROR_OBJECT; |
| - return 0; |
| + return 0; /* keep checking other objects, even if we saw an error */ |
| } |
| |
| static int fsck_cruft(const char *basename, const char *path, void *data) |
| diff --git a/fsck.c b/fsck.c |
| index 1336ead9eb..56546206ae 100644 |
| --- a/fsck.c |
| +++ b/fsck.c |
| @@ -893,6 +893,12 @@ static int fsck_tag(struct tag *tag, const char *data, |
| return fsck_tag_buffer(tag, data, size, options); |
| } |
| |
| +static int fsck_blob(struct blob *blob, const char *buf, |
| + unsigned long size, struct fsck_options *options) |
| +{ |
| + return 0; |
| +} |
| + |
| int fsck_object(struct object *obj, void *data, unsigned long size, |
| struct fsck_options *options) |
| { |
| @@ -900,7 +906,7 @@ int fsck_object(struct object *obj, void *data, unsigned long size, |
| return report(options, obj, FSCK_MSG_BAD_OBJECT_SHA1, "no valid object to fsck"); |
| |
| if (obj->type == OBJ_BLOB) |
| - return 0; |
| + return fsck_blob((struct blob *)obj, data, size, options); |
| if (obj->type == OBJ_TREE) |
| return fsck_tree((struct tree *) obj, options); |
| if (obj->type == OBJ_COMMIT) |
| diff --git a/sha1_file.c b/sha1_file.c |
| index 0a609a5772..65db803392 100644 |
| --- a/sha1_file.c |
| +++ b/sha1_file.c |
| @@ -3876,7 +3876,7 @@ int read_loose_object(const char *path, |
| goto out; |
| } |
| |
| - if (*type == OBJ_BLOB) { |
| + if (*type == OBJ_BLOB && *size > big_file_threshold) { |
| if (check_stream_sha1(&stream, hdr, *size, path, expected_sha1) < 0) |
| goto out; |
| } else { |
| -- |
| 2.17.0.921.gf22659ad46 |
| |