| /* |
| * path-walk.c: implementation for path-based walks of the object graph. |
| */ |
| #include "git-compat-util.h" |
| #include "path-walk.h" |
| #include "blob.h" |
| #include "commit.h" |
| #include "dir.h" |
| #include "hashmap.h" |
| #include "hex.h" |
| #include "list-objects.h" |
| #include "object.h" |
| #include "oid-array.h" |
| #include "prio-queue.h" |
| #include "repository.h" |
| #include "revision.h" |
| #include "string-list.h" |
| #include "strmap.h" |
| #include "tag.h" |
| #include "trace2.h" |
| #include "tree.h" |
| #include "tree-walk.h" |
| |
| static const char *root_path = ""; |
| |
| struct type_and_oid_list { |
| enum object_type type; |
| struct oid_array oids; |
| int maybe_interesting; |
| }; |
| |
| #define TYPE_AND_OID_LIST_INIT { \ |
| .type = OBJ_NONE, \ |
| .oids = OID_ARRAY_INIT \ |
| } |
| |
| struct path_walk_context { |
| /** |
| * Repeats of data in 'struct path_walk_info' for |
| * access with fewer characters. |
| */ |
| struct repository *repo; |
| struct rev_info *revs; |
| struct path_walk_info *info; |
| |
| /** |
| * Map a path to a 'struct type_and_oid_list' |
| * containing the objects discovered at that |
| * path. |
| */ |
| struct strmap paths_to_lists; |
| |
| /** |
| * Store the current list of paths in a priority queue, |
| * using object type as a sorting mechanism, mostly to |
| * make sure blobs are popped off the stack first. No |
| * other sort is made, so within each object type it acts |
| * like a stack and performs a DFS within the trees. |
| * |
| * Use path_stack_pushed to indicate whether a path |
| * was previously added to path_stack. |
| */ |
| struct prio_queue path_stack; |
| struct strset path_stack_pushed; |
| }; |
| |
| static int compare_by_type(const void *one, const void *two, void *cb_data) |
| { |
| struct type_and_oid_list *list1, *list2; |
| const char *str1 = one; |
| const char *str2 = two; |
| struct path_walk_context *ctx = cb_data; |
| |
| list1 = strmap_get(&ctx->paths_to_lists, str1); |
| list2 = strmap_get(&ctx->paths_to_lists, str2); |
| |
| /* |
| * If object types are equal, then use path comparison. |
| */ |
| if (!list1 || !list2 || list1->type == list2->type) |
| return strcmp(str1, str2); |
| |
| /* Prefer tags to be popped off first. */ |
| if (list1->type == OBJ_TAG) |
| return -1; |
| if (list2->type == OBJ_TAG) |
| return 1; |
| |
| /* Prefer blobs to be popped off second. */ |
| if (list1->type == OBJ_BLOB) |
| return -1; |
| if (list2->type == OBJ_BLOB) |
| return 1; |
| |
| return 0; |
| } |
| |
| static void push_to_stack(struct path_walk_context *ctx, |
| const char *path) |
| { |
| if (strset_contains(&ctx->path_stack_pushed, path)) |
| return; |
| |
| strset_add(&ctx->path_stack_pushed, path); |
| prio_queue_put(&ctx->path_stack, xstrdup(path)); |
| } |
| |
| static int add_tree_entries(struct path_walk_context *ctx, |
| const char *base_path, |
| struct object_id *oid) |
| { |
| struct tree_desc desc; |
| struct name_entry entry; |
| struct strbuf path = STRBUF_INIT; |
| size_t base_len; |
| struct tree *tree = lookup_tree(ctx->repo, oid); |
| |
| if (!tree) { |
| error(_("failed to walk children of tree %s: not found"), |
| oid_to_hex(oid)); |
| return -1; |
| } else if (parse_tree_gently(tree, 1)) { |
| error("bad tree object %s", oid_to_hex(oid)); |
| return -1; |
| } |
| |
| strbuf_addstr(&path, base_path); |
| base_len = path.len; |
| |
| init_tree_desc(&desc, &tree->object.oid, tree->buffer, tree->size); |
| while (tree_entry(&desc, &entry)) { |
| struct type_and_oid_list *list; |
| struct object *o; |
| /* Not actually true, but we will ignore submodules later. */ |
| enum object_type type = S_ISDIR(entry.mode) ? OBJ_TREE : OBJ_BLOB; |
| |
| /* Skip submodules. */ |
| if (S_ISGITLINK(entry.mode)) |
| continue; |
| |
| /* If the caller doesn't want blobs, then don't bother. */ |
| if (!ctx->info->blobs && type == OBJ_BLOB) |
| continue; |
| |
| if (type == OBJ_TREE) { |
| struct tree *child = lookup_tree(ctx->repo, &entry.oid); |
| o = child ? &child->object : NULL; |
| } else if (type == OBJ_BLOB) { |
| struct blob *child = lookup_blob(ctx->repo, &entry.oid); |
| o = child ? &child->object : NULL; |
| } else { |
| BUG("invalid type for tree entry: %d", type); |
| } |
| |
| if (!o) { |
| error(_("failed to find object %s"), |
| oid_to_hex(&o->oid)); |
| return -1; |
| } |
| |
| /* Skip this object if already seen. */ |
| if (o->flags & SEEN) |
| continue; |
| o->flags |= SEEN; |
| |
| strbuf_setlen(&path, base_len); |
| strbuf_add(&path, entry.path, entry.pathlen); |
| |
| /* |
| * Trees will end with "/" for concatenation and distinction |
| * from blobs at the same path. |
| */ |
| if (type == OBJ_TREE) |
| strbuf_addch(&path, '/'); |
| |
| if (ctx->info->pl) { |
| int dtype; |
| enum pattern_match_result match; |
| match = path_matches_pattern_list(path.buf, path.len, |
| path.buf + base_len, &dtype, |
| ctx->info->pl, |
| ctx->repo->index); |
| |
| if (ctx->info->pl->use_cone_patterns && |
| match == NOT_MATCHED) |
| continue; |
| else if (!ctx->info->pl->use_cone_patterns && |
| type == OBJ_BLOB && |
| match != MATCHED) |
| continue; |
| } |
| |
| if (!(list = strmap_get(&ctx->paths_to_lists, path.buf))) { |
| CALLOC_ARRAY(list, 1); |
| list->type = type; |
| strmap_put(&ctx->paths_to_lists, path.buf, list); |
| } |
| push_to_stack(ctx, path.buf); |
| |
| if (!(o->flags & UNINTERESTING)) |
| list->maybe_interesting = 1; |
| |
| oid_array_append(&list->oids, &entry.oid); |
| } |
| |
| free_tree_buffer(tree); |
| strbuf_release(&path); |
| return 0; |
| } |
| |
| /* |
| * For each path in paths_to_explore, walk the trees another level |
| * and add any found blobs to the batch (but only if they exist and |
| * haven't been added yet). |
| */ |
| static int walk_path(struct path_walk_context *ctx, |
| const char *path) |
| { |
| struct type_and_oid_list *list; |
| int ret = 0; |
| |
| list = strmap_get(&ctx->paths_to_lists, path); |
| |
| if (!list) |
| BUG("provided path '%s' that had no associated list", path); |
| |
| if (!list->oids.nr) |
| return 0; |
| |
| if (ctx->info->prune_all_uninteresting) { |
| /* |
| * This is true if all objects were UNINTERESTING |
| * when added to the list. |
| */ |
| if (!list->maybe_interesting) |
| return 0; |
| |
| /* |
| * But it's still possible that the objects were set |
| * as UNINTERESTING after being added. Do a quick check. |
| */ |
| list->maybe_interesting = 0; |
| for (size_t i = 0; |
| !list->maybe_interesting && i < list->oids.nr; |
| i++) { |
| if (list->type == OBJ_TREE) { |
| struct tree *t = lookup_tree(ctx->repo, |
| &list->oids.oid[i]); |
| if (t && !(t->object.flags & UNINTERESTING)) |
| list->maybe_interesting = 1; |
| } else if (list->type == OBJ_BLOB) { |
| struct blob *b = lookup_blob(ctx->repo, |
| &list->oids.oid[i]); |
| if (b && !(b->object.flags & UNINTERESTING)) |
| list->maybe_interesting = 1; |
| } else { |
| /* Tags are always interesting if visited. */ |
| list->maybe_interesting = 1; |
| } |
| } |
| |
| /* We have confirmed that all objects are UNINTERESTING. */ |
| if (!list->maybe_interesting) |
| return 0; |
| } |
| |
| /* Evaluate function pointer on this data, if requested. */ |
| if ((list->type == OBJ_TREE && ctx->info->trees) || |
| (list->type == OBJ_BLOB && ctx->info->blobs) || |
| (list->type == OBJ_TAG && ctx->info->tags)) |
| ret = ctx->info->path_fn(path, &list->oids, list->type, |
| ctx->info->path_fn_data); |
| |
| /* Expand data for children. */ |
| if (list->type == OBJ_TREE) { |
| for (size_t i = 0; i < list->oids.nr; i++) { |
| ret |= add_tree_entries(ctx, |
| path, |
| &list->oids.oid[i]); |
| } |
| } |
| |
| oid_array_clear(&list->oids); |
| strmap_remove(&ctx->paths_to_lists, path, 1); |
| return ret; |
| } |
| |
| static void clear_paths_to_lists(struct strmap *map) |
| { |
| struct hashmap_iter iter; |
| struct strmap_entry *e; |
| |
| hashmap_for_each_entry(&map->map, &iter, e, ent) { |
| struct type_and_oid_list *list = e->value; |
| oid_array_clear(&list->oids); |
| } |
| strmap_clear(map, 1); |
| strmap_init(map); |
| } |
| |
| static struct repository *edge_repo; |
| static struct type_and_oid_list *edge_tree_list; |
| |
| static void show_edge(struct commit *commit) |
| { |
| struct tree *t = repo_get_commit_tree(edge_repo, commit); |
| |
| if (!t) |
| return; |
| |
| if (commit->object.flags & UNINTERESTING) |
| t->object.flags |= UNINTERESTING; |
| |
| if (t->object.flags & SEEN) |
| return; |
| t->object.flags |= SEEN; |
| |
| oid_array_append(&edge_tree_list->oids, &t->object.oid); |
| } |
| |
| static int setup_pending_objects(struct path_walk_info *info, |
| struct path_walk_context *ctx) |
| { |
| struct type_and_oid_list *tags = NULL; |
| struct type_and_oid_list *tagged_blobs = NULL; |
| struct type_and_oid_list *root_tree_list = NULL; |
| |
| if (info->tags) |
| CALLOC_ARRAY(tags, 1); |
| if (info->blobs) |
| CALLOC_ARRAY(tagged_blobs, 1); |
| if (info->trees) |
| root_tree_list = strmap_get(&ctx->paths_to_lists, root_path); |
| |
| /* |
| * Pending objects include: |
| * * Commits at branch tips. |
| * * Annotated tags at tag tips. |
| * * Any kind of object at lightweight tag tips. |
| * * Trees and blobs in the index (with an associated path). |
| */ |
| for (size_t i = 0; i < info->revs->pending.nr; i++) { |
| struct object_array_entry *pending = info->revs->pending.objects + i; |
| struct object *obj = pending->item; |
| |
| /* Commits will be picked up by revision walk. */ |
| if (obj->type == OBJ_COMMIT) |
| continue; |
| |
| /* Navigate annotated tag object chains. */ |
| while (obj->type == OBJ_TAG) { |
| struct tag *tag = lookup_tag(info->revs->repo, &obj->oid); |
| if (!tag) { |
| error(_("failed to find tag %s"), |
| oid_to_hex(&obj->oid)); |
| return -1; |
| } |
| if (tag->object.flags & SEEN) |
| break; |
| tag->object.flags |= SEEN; |
| |
| if (tags) |
| oid_array_append(&tags->oids, &obj->oid); |
| obj = tag->tagged; |
| } |
| |
| if (obj->type == OBJ_TAG) |
| continue; |
| |
| /* We are now at a non-tag object. */ |
| if (obj->flags & SEEN) |
| continue; |
| obj->flags |= SEEN; |
| |
| switch (obj->type) { |
| case OBJ_TREE: |
| if (!info->trees) |
| continue; |
| if (pending->path) { |
| struct type_and_oid_list *list; |
| char *path = *pending->path ? xstrfmt("%s/", pending->path) |
| : xstrdup(""); |
| if (!(list = strmap_get(&ctx->paths_to_lists, path))) { |
| CALLOC_ARRAY(list, 1); |
| list->type = OBJ_TREE; |
| strmap_put(&ctx->paths_to_lists, path, list); |
| } |
| oid_array_append(&list->oids, &obj->oid); |
| free(path); |
| } else { |
| /* assume a root tree, such as a lightweight tag. */ |
| oid_array_append(&root_tree_list->oids, &obj->oid); |
| } |
| break; |
| |
| case OBJ_BLOB: |
| if (!info->blobs) |
| continue; |
| if (pending->path) { |
| struct type_and_oid_list *list; |
| char *path = pending->path; |
| if (!(list = strmap_get(&ctx->paths_to_lists, path))) { |
| CALLOC_ARRAY(list, 1); |
| list->type = OBJ_BLOB; |
| strmap_put(&ctx->paths_to_lists, path, list); |
| } |
| oid_array_append(&list->oids, &obj->oid); |
| } else { |
| /* assume a root tree, such as a lightweight tag. */ |
| oid_array_append(&tagged_blobs->oids, &obj->oid); |
| } |
| break; |
| |
| case OBJ_COMMIT: |
| /* Make sure it is in the object walk */ |
| if (obj != pending->item) |
| add_pending_object(info->revs, obj, ""); |
| break; |
| |
| default: |
| BUG("should not see any other type here"); |
| } |
| } |
| |
| /* |
| * Add tag objects and tagged blobs if they exist. |
| */ |
| if (tagged_blobs) { |
| if (tagged_blobs->oids.nr) { |
| const char *tagged_blob_path = "/tagged-blobs"; |
| tagged_blobs->type = OBJ_BLOB; |
| tagged_blobs->maybe_interesting = 1; |
| strmap_put(&ctx->paths_to_lists, tagged_blob_path, tagged_blobs); |
| push_to_stack(ctx, tagged_blob_path); |
| } else { |
| oid_array_clear(&tagged_blobs->oids); |
| free(tagged_blobs); |
| } |
| } |
| if (tags) { |
| if (tags->oids.nr) { |
| const char *tag_path = "/tags"; |
| tags->type = OBJ_TAG; |
| tags->maybe_interesting = 1; |
| strmap_put(&ctx->paths_to_lists, tag_path, tags); |
| push_to_stack(ctx, tag_path); |
| } else { |
| oid_array_clear(&tags->oids); |
| free(tags); |
| } |
| } |
| |
| return 0; |
| } |
| |
| /** |
| * Given the configuration of 'info', walk the commits based on 'info->revs' and |
| * call 'info->path_fn' on each discovered path. |
| * |
| * Returns nonzero on an error. |
| */ |
| int walk_objects_by_path(struct path_walk_info *info) |
| { |
| int ret; |
| size_t commits_nr = 0, paths_nr = 0; |
| struct commit *c; |
| struct type_and_oid_list *root_tree_list; |
| struct type_and_oid_list *commit_list; |
| struct path_walk_context ctx = { |
| .repo = info->revs->repo, |
| .revs = info->revs, |
| .info = info, |
| .path_stack = { |
| .compare = compare_by_type, |
| .cb_data = &ctx |
| }, |
| .path_stack_pushed = STRSET_INIT, |
| .paths_to_lists = STRMAP_INIT |
| }; |
| |
| trace2_region_enter("path-walk", "commit-walk", info->revs->repo); |
| |
| CALLOC_ARRAY(commit_list, 1); |
| commit_list->type = OBJ_COMMIT; |
| |
| if (info->tags) |
| info->revs->tag_objects = 1; |
| |
| /* Insert a single list for the root tree into the paths. */ |
| CALLOC_ARRAY(root_tree_list, 1); |
| root_tree_list->type = OBJ_TREE; |
| root_tree_list->maybe_interesting = 1; |
| strmap_put(&ctx.paths_to_lists, root_path, root_tree_list); |
| push_to_stack(&ctx, root_path); |
| |
| /* |
| * Set these values before preparing the walk to catch |
| * lightweight tags pointing to non-commits and indexed objects. |
| */ |
| info->revs->blob_objects = info->blobs; |
| info->revs->tree_objects = info->trees; |
| |
| if (prepare_revision_walk(info->revs)) |
| die(_("failed to setup revision walk")); |
| |
| /* Walk trees to mark them as UNINTERESTING. */ |
| edge_repo = info->revs->repo; |
| edge_tree_list = root_tree_list; |
| mark_edges_uninteresting(info->revs, show_edge, |
| info->prune_all_uninteresting); |
| edge_repo = NULL; |
| edge_tree_list = NULL; |
| |
| info->revs->blob_objects = info->revs->tree_objects = 0; |
| |
| trace2_region_enter("path-walk", "pending-walk", info->revs->repo); |
| ret = setup_pending_objects(info, &ctx); |
| trace2_region_leave("path-walk", "pending-walk", info->revs->repo); |
| |
| if (ret) |
| return ret; |
| |
| while ((c = get_revision(info->revs))) { |
| struct object_id *oid; |
| struct tree *t; |
| commits_nr++; |
| |
| if (info->commits) |
| oid_array_append(&commit_list->oids, |
| &c->object.oid); |
| |
| /* If we only care about commits, then skip trees. */ |
| if (!info->trees && !info->blobs) |
| continue; |
| |
| oid = get_commit_tree_oid(c); |
| t = lookup_tree(info->revs->repo, oid); |
| |
| if (!t) { |
| error("could not find tree %s", oid_to_hex(oid)); |
| return -1; |
| } |
| |
| if (t->object.flags & SEEN) |
| continue; |
| t->object.flags |= SEEN; |
| oid_array_append(&root_tree_list->oids, oid); |
| } |
| |
| trace2_data_intmax("path-walk", ctx.repo, "commits", commits_nr); |
| trace2_region_leave("path-walk", "commit-walk", info->revs->repo); |
| |
| /* Track all commits. */ |
| if (info->commits && commit_list->oids.nr) |
| ret = info->path_fn("", &commit_list->oids, OBJ_COMMIT, |
| info->path_fn_data); |
| oid_array_clear(&commit_list->oids); |
| free(commit_list); |
| |
| trace2_region_enter("path-walk", "path-walk", info->revs->repo); |
| while (!ret && ctx.path_stack.nr) { |
| char *path = prio_queue_get(&ctx.path_stack); |
| paths_nr++; |
| |
| ret = walk_path(&ctx, path); |
| |
| free(path); |
| } |
| |
| /* Are there paths remaining? Likely they are from indexed objects. */ |
| if (!strmap_empty(&ctx.paths_to_lists)) { |
| struct hashmap_iter iter; |
| struct strmap_entry *entry; |
| |
| strmap_for_each_entry(&ctx.paths_to_lists, &iter, entry) |
| push_to_stack(&ctx, entry->key); |
| |
| while (!ret && ctx.path_stack.nr) { |
| char *path = prio_queue_get(&ctx.path_stack); |
| paths_nr++; |
| |
| ret = walk_path(&ctx, path); |
| |
| free(path); |
| } |
| } |
| |
| trace2_data_intmax("path-walk", ctx.repo, "paths", paths_nr); |
| trace2_region_leave("path-walk", "path-walk", info->revs->repo); |
| |
| clear_paths_to_lists(&ctx.paths_to_lists); |
| strset_clear(&ctx.path_stack_pushed); |
| clear_prio_queue(&ctx.path_stack); |
| return ret; |
| } |
| |
| void path_walk_info_init(struct path_walk_info *info) |
| { |
| struct path_walk_info empty = PATH_WALK_INFO_INIT; |
| memcpy(info, &empty, sizeof(empty)); |
| } |
| |
| void path_walk_info_clear(struct path_walk_info *info) |
| { |
| if (info->pl) { |
| clear_pattern_list(info->pl); |
| free(info->pl); |
| } |
| } |