Merge branch 'ew/many-alternate-optim'
Optimization for repositories with many alternate object store.
* ew/many-alternate-optim:
oidtree: a crit-bit tree for odb_loose_cache
oidcpy_with_padding: constify `src' arg
make object_directory.loose_objects_subdir_seen a bitmap
avoid strlen via strbuf_addstr in link_alt_odb_entry
speed up alt_odb_usable() with many alternates
diff --git a/Makefile b/Makefile
index e79534b..c6f6246 100644
--- a/Makefile
+++ b/Makefile
@@ -726,6 +726,7 @@
TEST_BUILTINS_OBJS += test-mktemp.o
TEST_BUILTINS_OBJS += test-oid-array.o
TEST_BUILTINS_OBJS += test-oidmap.o
+TEST_BUILTINS_OBJS += test-oidtree.o
TEST_BUILTINS_OBJS += test-online-cpus.o
TEST_BUILTINS_OBJS += test-parse-options.o
TEST_BUILTINS_OBJS += test-parse-pathspec-file.o
@@ -850,6 +851,7 @@
LIB_OBJS += bulk-checkin.o
LIB_OBJS += bundle.o
LIB_OBJS += cache-tree.o
+LIB_OBJS += cbtree.o
LIB_OBJS += chdir-notify.o
LIB_OBJS += checkout.o
LIB_OBJS += chunk-format.o
@@ -945,6 +947,7 @@
LIB_OBJS += oid-array.o
LIB_OBJS += oidmap.o
LIB_OBJS += oidset.o
+LIB_OBJS += oidtree.o
LIB_OBJS += pack-bitmap-write.o
LIB_OBJS += pack-bitmap.o
LIB_OBJS += pack-check.o
diff --git a/cbtree.c b/cbtree.c
new file mode 100644
index 0000000..b0c65d8
--- /dev/null
+++ b/cbtree.c
@@ -0,0 +1,167 @@
+/*
+ * crit-bit tree implementation, does no allocations internally
+ * For more information on crit-bit trees: https://cr.yp.to/critbit.html
+ * Based on Adam Langley's adaptation of Dan Bernstein's public domain code
+ * git clone https://github.com/agl/critbit.git
+ */
+#include "cbtree.h"
+
+static struct cb_node *cb_node_of(const void *p)
+{
+ return (struct cb_node *)((uintptr_t)p - 1);
+}
+
+/* locate the best match, does not do a final comparision */
+static struct cb_node *cb_internal_best_match(struct cb_node *p,
+ const uint8_t *k, size_t klen)
+{
+ while (1 & (uintptr_t)p) {
+ struct cb_node *q = cb_node_of(p);
+ uint8_t c = q->byte < klen ? k[q->byte] : 0;
+ size_t direction = (1 + (q->otherbits | c)) >> 8;
+
+ p = q->child[direction];
+ }
+ return p;
+}
+
+/* returns NULL if successful, existing cb_node if duplicate */
+struct cb_node *cb_insert(struct cb_tree *t, struct cb_node *node, size_t klen)
+{
+ size_t newbyte, newotherbits;
+ uint8_t c;
+ int newdirection;
+ struct cb_node **wherep, *p;
+
+ assert(!((uintptr_t)node & 1)); /* allocations must be aligned */
+
+ if (!t->root) { /* insert into empty tree */
+ t->root = node;
+ return NULL; /* success */
+ }
+
+ /* see if a node already exists */
+ p = cb_internal_best_match(t->root, node->k, klen);
+
+ /* find first differing byte */
+ for (newbyte = 0; newbyte < klen; newbyte++) {
+ if (p->k[newbyte] != node->k[newbyte])
+ goto different_byte_found;
+ }
+ return p; /* element exists, let user deal with it */
+
+different_byte_found:
+ newotherbits = p->k[newbyte] ^ node->k[newbyte];
+ newotherbits |= newotherbits >> 1;
+ newotherbits |= newotherbits >> 2;
+ newotherbits |= newotherbits >> 4;
+ newotherbits = (newotherbits & ~(newotherbits >> 1)) ^ 255;
+ c = p->k[newbyte];
+ newdirection = (1 + (newotherbits | c)) >> 8;
+
+ node->byte = newbyte;
+ node->otherbits = newotherbits;
+ node->child[1 - newdirection] = node;
+
+ /* find a place to insert it */
+ wherep = &t->root;
+ for (;;) {
+ struct cb_node *q;
+ size_t direction;
+
+ p = *wherep;
+ if (!(1 & (uintptr_t)p))
+ break;
+ q = cb_node_of(p);
+ if (q->byte > newbyte)
+ break;
+ if (q->byte == newbyte && q->otherbits > newotherbits)
+ break;
+ c = q->byte < klen ? node->k[q->byte] : 0;
+ direction = (1 + (q->otherbits | c)) >> 8;
+ wherep = q->child + direction;
+ }
+
+ node->child[newdirection] = *wherep;
+ *wherep = (struct cb_node *)(1 + (uintptr_t)node);
+
+ return NULL; /* success */
+}
+
+struct cb_node *cb_lookup(struct cb_tree *t, const uint8_t *k, size_t klen)
+{
+ struct cb_node *p = cb_internal_best_match(t->root, k, klen);
+
+ return p && !memcmp(p->k, k, klen) ? p : NULL;
+}
+
+struct cb_node *cb_unlink(struct cb_tree *t, const uint8_t *k, size_t klen)
+{
+ struct cb_node **wherep = &t->root;
+ struct cb_node **whereq = NULL;
+ struct cb_node *q = NULL;
+ size_t direction = 0;
+ uint8_t c;
+ struct cb_node *p = t->root;
+
+ if (!p) return NULL; /* empty tree, nothing to delete */
+
+ /* traverse to find best match, keeping link to parent */
+ while (1 & (uintptr_t)p) {
+ whereq = wherep;
+ q = cb_node_of(p);
+ c = q->byte < klen ? k[q->byte] : 0;
+ direction = (1 + (q->otherbits | c)) >> 8;
+ wherep = q->child + direction;
+ p = *wherep;
+ }
+
+ if (memcmp(p->k, k, klen))
+ return NULL; /* no match, nothing unlinked */
+
+ /* found an exact match */
+ if (whereq) /* update parent */
+ *whereq = q->child[1 - direction];
+ else
+ t->root = NULL;
+ return p;
+}
+
+static enum cb_next cb_descend(struct cb_node *p, cb_iter fn, void *arg)
+{
+ if (1 & (uintptr_t)p) {
+ struct cb_node *q = cb_node_of(p);
+ enum cb_next n = cb_descend(q->child[0], fn, arg);
+
+ return n == CB_BREAK ? n : cb_descend(q->child[1], fn, arg);
+ } else {
+ return fn(p, arg);
+ }
+}
+
+void cb_each(struct cb_tree *t, const uint8_t *kpfx, size_t klen,
+ cb_iter fn, void *arg)
+{
+ struct cb_node *p = t->root;
+ struct cb_node *top = p;
+ size_t i = 0;
+
+ if (!p) return; /* empty tree */
+
+ /* Walk tree, maintaining top pointer */
+ while (1 & (uintptr_t)p) {
+ struct cb_node *q = cb_node_of(p);
+ uint8_t c = q->byte < klen ? kpfx[q->byte] : 0;
+ size_t direction = (1 + (q->otherbits | c)) >> 8;
+
+ p = q->child[direction];
+ if (q->byte < klen)
+ top = p;
+ }
+
+ for (i = 0; i < klen; i++) {
+ if (p->k[i] != kpfx[i])
+ return; /* "best" match failed */
+ }
+ cb_descend(top, fn, arg);
+}
diff --git a/cbtree.h b/cbtree.h
new file mode 100644
index 0000000..fe45870
--- /dev/null
+++ b/cbtree.h
@@ -0,0 +1,56 @@
+/*
+ * crit-bit tree implementation, does no allocations internally
+ * For more information on crit-bit trees: https://cr.yp.to/critbit.html
+ * Based on Adam Langley's adaptation of Dan Bernstein's public domain code
+ * git clone https://github.com/agl/critbit.git
+ *
+ * This is adapted to store arbitrary data (not just NUL-terminated C strings
+ * and allocates no memory internally. The user needs to allocate
+ * "struct cb_node" and fill cb_node.k[] with arbitrary match data
+ * for memcmp.
+ * If "klen" is variable, then it should be embedded into "c_node.k[]"
+ * Recursion is bound by the maximum value of "klen" used.
+ */
+#ifndef CBTREE_H
+#define CBTREE_H
+
+#include "git-compat-util.h"
+
+struct cb_node;
+struct cb_node {
+ struct cb_node *child[2];
+ /*
+ * n.b. uint32_t for `byte' is excessive for OIDs,
+ * we may consider shorter variants if nothing else gets stored.
+ */
+ uint32_t byte;
+ uint8_t otherbits;
+ uint8_t k[FLEX_ARRAY]; /* arbitrary data */
+};
+
+struct cb_tree {
+ struct cb_node *root;
+};
+
+enum cb_next {
+ CB_CONTINUE = 0,
+ CB_BREAK = 1
+};
+
+#define CBTREE_INIT { .root = NULL }
+
+static inline void cb_init(struct cb_tree *t)
+{
+ t->root = NULL;
+}
+
+struct cb_node *cb_lookup(struct cb_tree *, const uint8_t *k, size_t klen);
+struct cb_node *cb_insert(struct cb_tree *, struct cb_node *, size_t klen);
+struct cb_node *cb_unlink(struct cb_tree *t, const uint8_t *k, size_t klen);
+
+typedef enum cb_next (*cb_iter)(struct cb_node *, void *arg);
+
+void cb_each(struct cb_tree *, const uint8_t *kpfx, size_t klen,
+ cb_iter, void *arg);
+
+#endif /* CBTREE_H */
diff --git a/dir.c b/dir.c
index 313e932..0efa67f 100644
--- a/dir.c
+++ b/dir.c
@@ -78,11 +78,21 @@ int fspathcmp(const char *a, const char *b)
return ignore_case ? strcasecmp(a, b) : strcmp(a, b);
}
+int fspatheq(const char *a, const char *b)
+{
+ return !fspathcmp(a, b);
+}
+
int fspathncmp(const char *a, const char *b, size_t count)
{
return ignore_case ? strncasecmp(a, b, count) : strncmp(a, b, count);
}
+unsigned int fspathhash(const char *str)
+{
+ return ignore_case ? strihash(str) : strhash(str);
+}
+
int git_fnmatch(const struct pathspec_item *item,
const char *pattern, const char *string,
int prefix)
diff --git a/dir.h b/dir.h
index 8d0ddd8..b3e1a54a 100644
--- a/dir.h
+++ b/dir.h
@@ -489,7 +489,9 @@ int remove_dir_recursively(struct strbuf *path, int flag);
int remove_path(const char *path);
int fspathcmp(const char *a, const char *b);
+int fspatheq(const char *a, const char *b);
int fspathncmp(const char *a, const char *b, size_t count);
+unsigned int fspathhash(const char *str);
/*
* The prefix part of pattern must not contains wildcards.
diff --git a/hash.h b/hash.h
index 9c6df4d..27a1802 100644
--- a/hash.h
+++ b/hash.h
@@ -265,7 +265,7 @@ static inline void oidcpy(struct object_id *dst, const struct object_id *src)
/* Like oidcpy() but zero-pads the unused bytes in dst's hash array. */
static inline void oidcpy_with_padding(struct object_id *dst,
- struct object_id *src)
+ const struct object_id *src)
{
size_t hashsz;
diff --git a/object-file.c b/object-file.c
index ecca5a8..3d27dc8 100644
--- a/object-file.c
+++ b/object-file.c
@@ -517,9 +517,9 @@ const char *loose_object_path(struct repository *r, struct strbuf *buf,
*/
static int alt_odb_usable(struct raw_object_store *o,
struct strbuf *path,
- const char *normalized_objdir)
+ const char *normalized_objdir, khiter_t *pos)
{
- struct object_directory *odb;
+ int r;
/* Detect cases where alternate disappeared */
if (!is_directory(path->buf)) {
@@ -533,14 +533,20 @@ static int alt_odb_usable(struct raw_object_store *o,
* Prevent the common mistake of listing the same
* thing twice, or object directory itself.
*/
- for (odb = o->odb; odb; odb = odb->next) {
- if (!fspathcmp(path->buf, odb->path))
- return 0;
- }
- if (!fspathcmp(path->buf, normalized_objdir))
- return 0;
+ if (!o->odb_by_path) {
+ khiter_t p;
- return 1;
+ o->odb_by_path = kh_init_odb_path_map();
+ assert(!o->odb->next);
+ p = kh_put_odb_path_map(o->odb_by_path, o->odb->path, &r);
+ assert(r == 1); /* never used */
+ kh_value(o->odb_by_path, p) = o->odb;
+ }
+ if (fspatheq(path->buf, normalized_objdir))
+ return 0;
+ *pos = kh_put_odb_path_map(o->odb_by_path, path->buf, &r);
+ /* r: 0 = exists, 1 = never used, 2 = deleted */
+ return r == 0 ? 0 : 1;
}
/*
@@ -561,17 +567,18 @@ static int alt_odb_usable(struct raw_object_store *o,
static void read_info_alternates(struct repository *r,
const char *relative_base,
int depth);
-static int link_alt_odb_entry(struct repository *r, const char *entry,
+static int link_alt_odb_entry(struct repository *r, const struct strbuf *entry,
const char *relative_base, int depth, const char *normalized_objdir)
{
struct object_directory *ent;
struct strbuf pathbuf = STRBUF_INIT;
+ khiter_t pos;
- if (!is_absolute_path(entry) && relative_base) {
+ if (!is_absolute_path(entry->buf) && relative_base) {
strbuf_realpath(&pathbuf, relative_base, 1);
strbuf_addch(&pathbuf, '/');
}
- strbuf_addstr(&pathbuf, entry);
+ strbuf_addbuf(&pathbuf, entry);
if (strbuf_normalize_path(&pathbuf) < 0 && relative_base) {
error(_("unable to normalize alternate object path: %s"),
@@ -587,23 +594,25 @@ static int link_alt_odb_entry(struct repository *r, const char *entry,
while (pathbuf.len && pathbuf.buf[pathbuf.len - 1] == '/')
strbuf_setlen(&pathbuf, pathbuf.len - 1);
- if (!alt_odb_usable(r->objects, &pathbuf, normalized_objdir)) {
+ if (!alt_odb_usable(r->objects, &pathbuf, normalized_objdir, &pos)) {
strbuf_release(&pathbuf);
return -1;
}
CALLOC_ARRAY(ent, 1);
- ent->path = xstrdup(pathbuf.buf);
+ /* pathbuf.buf is already in r->objects->odb_by_path */
+ ent->path = strbuf_detach(&pathbuf, NULL);
/* add the alternate entry */
*r->objects->odb_tail = ent;
r->objects->odb_tail = &(ent->next);
ent->next = NULL;
+ assert(r->objects->odb_by_path);
+ kh_value(r->objects->odb_by_path, pos) = ent;
/* recursively add alternates */
- read_info_alternates(r, pathbuf.buf, depth + 1);
+ read_info_alternates(r, ent->path, depth + 1);
- strbuf_release(&pathbuf);
return 0;
}
@@ -660,7 +669,7 @@ static void link_alt_odb_entries(struct repository *r, const char *alt,
alt = parse_alt_odb_entry(alt, sep, &entry);
if (!entry.len)
continue;
- link_alt_odb_entry(r, entry.buf,
+ link_alt_odb_entry(r, &entry,
relative_base, depth, objdirbuf.buf);
}
strbuf_release(&entry);
@@ -1178,7 +1187,7 @@ static int quick_has_loose(struct repository *r,
prepare_alt_odb(r);
for (odb = r->objects->odb; odb; odb = odb->next) {
- if (oid_array_lookup(odb_loose_cache(odb, oid), oid) >= 0)
+ if (oidtree_contains(odb_loose_cache(odb, oid), oid))
return 1;
}
return 0;
@@ -2454,39 +2463,45 @@ int for_each_loose_object(each_loose_object_fn cb, void *data,
static int append_loose_object(const struct object_id *oid, const char *path,
void *data)
{
- oid_array_append(data, oid);
+ oidtree_insert(data, oid);
return 0;
}
-struct oid_array *odb_loose_cache(struct object_directory *odb,
+struct oidtree *odb_loose_cache(struct object_directory *odb,
const struct object_id *oid)
{
int subdir_nr = oid->hash[0];
struct strbuf buf = STRBUF_INIT;
+ size_t word_bits = bitsizeof(odb->loose_objects_subdir_seen[0]);
+ size_t word_index = subdir_nr / word_bits;
+ size_t mask = 1 << (subdir_nr % word_bits);
+ uint32_t *bitmap;
if (subdir_nr < 0 ||
- subdir_nr >= ARRAY_SIZE(odb->loose_objects_subdir_seen))
+ subdir_nr >= bitsizeof(odb->loose_objects_subdir_seen))
BUG("subdir_nr out of range");
- if (odb->loose_objects_subdir_seen[subdir_nr])
- return &odb->loose_objects_cache[subdir_nr];
-
+ bitmap = &odb->loose_objects_subdir_seen[word_index];
+ if (*bitmap & mask)
+ return odb->loose_objects_cache;
+ if (!odb->loose_objects_cache) {
+ ALLOC_ARRAY(odb->loose_objects_cache, 1);
+ oidtree_init(odb->loose_objects_cache);
+ }
strbuf_addstr(&buf, odb->path);
for_each_file_in_obj_subdir(subdir_nr, &buf,
append_loose_object,
NULL, NULL,
- &odb->loose_objects_cache[subdir_nr]);
- odb->loose_objects_subdir_seen[subdir_nr] = 1;
+ odb->loose_objects_cache);
+ *bitmap |= mask;
strbuf_release(&buf);
- return &odb->loose_objects_cache[subdir_nr];
+ return odb->loose_objects_cache;
}
void odb_clear_loose_cache(struct object_directory *odb)
{
- int i;
-
- for (i = 0; i < ARRAY_SIZE(odb->loose_objects_cache); i++)
- oid_array_clear(&odb->loose_objects_cache[i]);
+ oidtree_clear(odb->loose_objects_cache);
+ FREE_AND_NULL(odb->loose_objects_cache);
memset(&odb->loose_objects_subdir_seen, 0,
sizeof(odb->loose_objects_subdir_seen));
}
diff --git a/object-name.c b/object-name.c
index 64202de..3263c19 100644
--- a/object-name.c
+++ b/object-name.c
@@ -87,27 +87,21 @@ static void update_candidates(struct disambiguate_state *ds, const struct object
static int match_hash(unsigned, const unsigned char *, const unsigned char *);
+static enum cb_next match_prefix(const struct object_id *oid, void *arg)
+{
+ struct disambiguate_state *ds = arg;
+ /* no need to call match_hash, oidtree_each did prefix match */
+ update_candidates(ds, oid);
+ return ds->ambiguous ? CB_BREAK : CB_CONTINUE;
+}
+
static void find_short_object_filename(struct disambiguate_state *ds)
{
struct object_directory *odb;
- for (odb = ds->repo->objects->odb; odb && !ds->ambiguous; odb = odb->next) {
- int pos;
- struct oid_array *loose_objects;
-
- loose_objects = odb_loose_cache(odb, &ds->bin_pfx);
- pos = oid_array_lookup(loose_objects, &ds->bin_pfx);
- if (pos < 0)
- pos = -1 - pos;
- while (!ds->ambiguous && pos < loose_objects->nr) {
- const struct object_id *oid;
- oid = loose_objects->oid + pos;
- if (!match_hash(ds->len, ds->bin_pfx.hash, oid->hash))
- break;
- update_candidates(ds, oid);
- pos++;
- }
- }
+ for (odb = ds->repo->objects->odb; odb && !ds->ambiguous; odb = odb->next)
+ oidtree_each(odb_loose_cache(odb, &ds->bin_pfx),
+ &ds->bin_pfx, ds->len, match_prefix, ds);
}
static int match_hash(unsigned len, const unsigned char *a, const unsigned char *b)
diff --git a/object-store.h b/object-store.h
index ec32c23..e679acc 100644
--- a/object-store.h
+++ b/object-store.h
@@ -7,6 +7,9 @@
#include "oid-array.h"
#include "strbuf.h"
#include "thread-utils.h"
+#include "khash.h"
+#include "dir.h"
+#include "oidtree.h"
struct object_directory {
struct object_directory *next;
@@ -20,8 +23,8 @@ struct object_directory {
*
* Be sure to call odb_load_loose_cache() before using.
*/
- char loose_objects_subdir_seen[256];
- struct oid_array loose_objects_cache[256];
+ uint32_t loose_objects_subdir_seen[8]; /* 256 bits */
+ struct oidtree *loose_objects_cache;
/*
* Path to the alternative object store. If this is a relative path,
@@ -30,6 +33,9 @@ struct object_directory {
char *path;
};
+KHASH_INIT(odb_path_map, const char * /* key: odb_path */,
+ struct object_directory *, 1, fspathhash, fspatheq);
+
void prepare_alt_odb(struct repository *r);
char *compute_alternate_path(const char *path, struct strbuf *err);
typedef int alt_odb_fn(struct object_directory *, void *);
@@ -54,7 +60,7 @@ void add_to_alternates_memory(const char *dir);
* Populate and return the loose object cache array corresponding to the
* given object ID.
*/
-struct oid_array *odb_loose_cache(struct object_directory *odb,
+struct oidtree *odb_loose_cache(struct object_directory *odb,
const struct object_id *oid);
/* Empty the loose object cache for the specified object directory. */
@@ -116,6 +122,8 @@ struct raw_object_store {
*/
struct object_directory *odb;
struct object_directory **odb_tail;
+ kh_odb_path_map_t *odb_by_path;
+
int loaded_alternates;
/*
diff --git a/object.c b/object.c
index 1418845..2b3c075 100644
--- a/object.c
+++ b/object.c
@@ -511,6 +511,8 @@ static void free_object_directories(struct raw_object_store *o)
free_object_directory(o->odb);
o->odb = next;
}
+ kh_destroy_odb_path_map(o->odb_by_path);
+ o->odb_by_path = NULL;
}
void raw_object_store_clear(struct raw_object_store *o)
diff --git a/oidtree.c b/oidtree.c
new file mode 100644
index 0000000..7eb0e9b
--- /dev/null
+++ b/oidtree.c
@@ -0,0 +1,104 @@
+/*
+ * A wrapper around cbtree which stores oids
+ * May be used to replace oid-array for prefix (abbreviation) matches
+ */
+#include "oidtree.h"
+#include "alloc.h"
+#include "hash.h"
+
+struct oidtree_node {
+ /* n.k[] is used to store "struct object_id" */
+ struct cb_node n;
+};
+
+struct oidtree_iter_data {
+ oidtree_iter fn;
+ void *arg;
+ size_t *last_nibble_at;
+ int algo;
+ uint8_t last_byte;
+};
+
+void oidtree_init(struct oidtree *ot)
+{
+ cb_init(&ot->tree);
+ mem_pool_init(&ot->mem_pool, 0);
+}
+
+void oidtree_clear(struct oidtree *ot)
+{
+ if (ot) {
+ mem_pool_discard(&ot->mem_pool, 0);
+ oidtree_init(ot);
+ }
+}
+
+void oidtree_insert(struct oidtree *ot, const struct object_id *oid)
+{
+ struct oidtree_node *on;
+
+ if (!oid->algo)
+ BUG("oidtree_insert requires oid->algo");
+
+ on = mem_pool_alloc(&ot->mem_pool, sizeof(*on) + sizeof(*oid));
+ oidcpy_with_padding((struct object_id *)on->n.k, oid);
+
+ /*
+ * n.b. Current callers won't get us duplicates, here. If a
+ * future caller causes duplicates, there'll be a a small leak
+ * that won't be freed until oidtree_clear. Currently it's not
+ * worth maintaining a free list
+ */
+ cb_insert(&ot->tree, &on->n, sizeof(*oid));
+}
+
+
+int oidtree_contains(struct oidtree *ot, const struct object_id *oid)
+{
+ struct object_id k;
+ size_t klen = sizeof(k);
+
+ oidcpy_with_padding(&k, oid);
+
+ if (oid->algo == GIT_HASH_UNKNOWN)
+ klen -= sizeof(oid->algo);
+
+ /* cb_lookup relies on memcmp on the struct, so order matters: */
+ klen += BUILD_ASSERT_OR_ZERO(offsetof(struct object_id, hash) <
+ offsetof(struct object_id, algo));
+
+ return cb_lookup(&ot->tree, (const uint8_t *)&k, klen) ? 1 : 0;
+}
+
+static enum cb_next iter(struct cb_node *n, void *arg)
+{
+ struct oidtree_iter_data *x = arg;
+ const struct object_id *oid = (const struct object_id *)n->k;
+
+ if (x->algo != GIT_HASH_UNKNOWN && x->algo != oid->algo)
+ return CB_CONTINUE;
+
+ if (x->last_nibble_at) {
+ if ((oid->hash[*x->last_nibble_at] ^ x->last_byte) & 0xf0)
+ return CB_CONTINUE;
+ }
+
+ return x->fn(oid, x->arg);
+}
+
+void oidtree_each(struct oidtree *ot, const struct object_id *oid,
+ size_t oidhexsz, oidtree_iter fn, void *arg)
+{
+ size_t klen = oidhexsz / 2;
+ struct oidtree_iter_data x = { 0 };
+ assert(oidhexsz <= GIT_MAX_HEXSZ);
+
+ x.fn = fn;
+ x.arg = arg;
+ x.algo = oid->algo;
+ if (oidhexsz & 1) {
+ x.last_byte = oid->hash[klen];
+ x.last_nibble_at = &klen;
+ }
+ cb_each(&ot->tree, (const uint8_t *)oid, klen, iter, &x);
+}
diff --git a/oidtree.h b/oidtree.h
new file mode 100644
index 0000000..77898f5
--- /dev/null
+++ b/oidtree.h
@@ -0,0 +1,22 @@
+#ifndef OIDTREE_H
+#define OIDTREE_H
+
+#include "cbtree.h"
+#include "hash.h"
+#include "mem-pool.h"
+
+struct oidtree {
+ struct cb_tree tree;
+ struct mem_pool mem_pool;
+};
+
+void oidtree_init(struct oidtree *);
+void oidtree_clear(struct oidtree *);
+void oidtree_insert(struct oidtree *, const struct object_id *);
+int oidtree_contains(struct oidtree *, const struct object_id *);
+
+typedef enum cb_next (*oidtree_iter)(const struct object_id *, void *data);
+void oidtree_each(struct oidtree *, const struct object_id *,
+ size_t oidhexsz, oidtree_iter, void *data);
+
+#endif /* OIDTREE_H */
diff --git a/t/helper/test-oidtree.c b/t/helper/test-oidtree.c
new file mode 100644
index 0000000..180ee28
--- /dev/null
+++ b/t/helper/test-oidtree.c
@@ -0,0 +1,49 @@
+#include "test-tool.h"
+#include "cache.h"
+#include "oidtree.h"
+
+static enum cb_next print_oid(const struct object_id *oid, void *data)
+{
+ puts(oid_to_hex(oid));
+ return CB_CONTINUE;
+}
+
+int cmd__oidtree(int argc, const char **argv)
+{
+ struct oidtree ot;
+ struct strbuf line = STRBUF_INIT;
+ int nongit_ok;
+ int algo = GIT_HASH_UNKNOWN;
+
+ oidtree_init(&ot);
+ setup_git_directory_gently(&nongit_ok);
+
+ while (strbuf_getline(&line, stdin) != EOF) {
+ const char *arg;
+ struct object_id oid;
+
+ if (skip_prefix(line.buf, "insert ", &arg)) {
+ if (get_oid_hex_any(arg, &oid) == GIT_HASH_UNKNOWN)
+ die("insert not a hexadecimal oid: %s", arg);
+ algo = oid.algo;
+ oidtree_insert(&ot, &oid);
+ } else if (skip_prefix(line.buf, "contains ", &arg)) {
+ if (get_oid_hex(arg, &oid))
+ die("contains not a hexadecimal oid: %s", arg);
+ printf("%d\n", oidtree_contains(&ot, &oid));
+ } else if (skip_prefix(line.buf, "each ", &arg)) {
+ char buf[GIT_MAX_HEXSZ + 1] = { '0' };
+ memset(&oid, 0, sizeof(oid));
+ memcpy(buf, arg, strlen(arg));
+ buf[hash_algos[algo].hexsz] = '\0';
+ get_oid_hex_any(buf, &oid);
+ oid.algo = algo;
+ oidtree_each(&ot, &oid, strlen(arg), print_oid, NULL);
+ } else if (!strcmp(line.buf, "clear")) {
+ oidtree_clear(&ot);
+ } else {
+ die("unknown command: %s", line.buf);
+ }
+ }
+ return 0;
+}
diff --git a/t/helper/test-tool.c b/t/helper/test-tool.c
index b21e8f1..490ac02 100644
--- a/t/helper/test-tool.c
+++ b/t/helper/test-tool.c
@@ -43,6 +43,7 @@ static struct test_cmd cmds[] = {
{ "mktemp", cmd__mktemp },
{ "oid-array", cmd__oid_array },
{ "oidmap", cmd__oidmap },
+ { "oidtree", cmd__oidtree },
{ "online-cpus", cmd__online_cpus },
{ "parse-options", cmd__parse_options },
{ "parse-pathspec-file", cmd__parse_pathspec_file },
diff --git a/t/helper/test-tool.h b/t/helper/test-tool.h
index f845ced..f8dc266 100644
--- a/t/helper/test-tool.h
+++ b/t/helper/test-tool.h
@@ -32,6 +32,7 @@ int cmd__match_trees(int argc, const char **argv);
int cmd__mergesort(int argc, const char **argv);
int cmd__mktemp(int argc, const char **argv);
int cmd__oidmap(int argc, const char **argv);
+int cmd__oidtree(int argc, const char **argv);
int cmd__online_cpus(int argc, const char **argv);
int cmd__parse_options(int argc, const char **argv);
int cmd__parse_pathspec_file(int argc, const char** argv);
diff --git a/t/t0069-oidtree.sh b/t/t0069-oidtree.sh
new file mode 100755
index 0000000..bfb1397
--- /dev/null
+++ b/t/t0069-oidtree.sh
@@ -0,0 +1,49 @@
+#!/bin/sh
+
+test_description='basic tests for the oidtree implementation'
+. ./test-lib.sh
+
+maxhexsz=$(test_oid hexsz)
+echoid () {
+ prefix="${1:+$1 }"
+ shift
+ while test $# -gt 0
+ do
+ shortoid="$1"
+ shift
+ difference=$(($maxhexsz - ${#shortoid}))
+ printf "%s%s%0${difference}d\\n" "$prefix" "$shortoid" "0"
+ done
+}
+
+test_expect_success 'oidtree insert and contains' '
+ cat >expect <<-\EOF &&
+ 0
+ 0
+ 0
+ 1
+ 1
+ 0
+ EOF
+ {
+ echoid insert 444 1 2 3 4 5 a b c d e &&
+ echoid contains 44 441 440 444 4440 4444
+ echo clear
+ } | test-tool oidtree >actual &&
+ test_cmp expect actual
+'
+
+test_expect_success 'oidtree each' '
+ echoid "" 123 321 321 >expect &&
+ {
+ echoid insert f 9 8 123 321 a b c d e
+ echo each 12300
+ echo each 3211
+ echo each 3210
+ echo each 32100
+ echo clear
+ } | test-tool oidtree >actual &&
+ test_cmp expect actual
+'
+
+test_done