Elijah Newren | bc5c5ec | 2023-05-16 06:33:57 +0000 | [diff] [blame] | 1 | #include "git-compat-util.h" |
Elijah Newren | d1cbe1e | 2023-04-22 20:17:20 +0000 | [diff] [blame] | 2 | #include "hash.h" |
Martin Ågren | bc62692 | 2020-12-31 12:56:23 +0100 | [diff] [blame] | 3 | #include "hash-lookup.h" |
Elijah Newren | 08c46a4 | 2023-05-16 06:33:56 +0000 | [diff] [blame] | 4 | #include "read-cache-ll.h" |
Junio C Hamano | 628522e | 2007-12-29 02:05:47 -0800 | [diff] [blame] | 5 | |
Jeff King | 45ee13b | 2021-01-28 01:19:42 -0500 | [diff] [blame] | 6 | static uint32_t take2(const struct object_id *oid, size_t ofs) |
Christian Couder | 96beef8 | 2009-04-04 22:59:26 +0200 | [diff] [blame] | 7 | { |
Jeff King | 45ee13b | 2021-01-28 01:19:42 -0500 | [diff] [blame] | 8 | return ((oid->hash[ofs] << 8) | oid->hash[ofs + 1]); |
Christian Couder | 96beef8 | 2009-04-04 22:59:26 +0200 | [diff] [blame] | 9 | } |
| 10 | |
| 11 | /* |
| 12 | * Conventional binary search loop looks like this: |
| 13 | * |
| 14 | * do { |
Derrick Stolee | 19716b2 | 2017-10-08 14:29:37 -0400 | [diff] [blame] | 15 | * int mi = lo + (hi - lo) / 2; |
Christian Couder | 96beef8 | 2009-04-04 22:59:26 +0200 | [diff] [blame] | 16 | * int cmp = "entry pointed at by mi" minus "target"; |
| 17 | * if (!cmp) |
| 18 | * return (mi is the wanted one) |
| 19 | * if (cmp > 0) |
| 20 | * hi = mi; "mi is larger than target" |
| 21 | * else |
| 22 | * lo = mi+1; "mi is smaller than target" |
| 23 | * } while (lo < hi); |
| 24 | * |
| 25 | * The invariants are: |
| 26 | * |
| 27 | * - When entering the loop, lo points at a slot that is never |
| 28 | * above the target (it could be at the target), hi points at a |
| 29 | * slot that is guaranteed to be above the target (it can never |
| 30 | * be at the target). |
| 31 | * |
| 32 | * - We find a point 'mi' between lo and hi (mi could be the same |
| 33 | * as lo, but never can be the same as hi), and check if it hits |
| 34 | * the target. There are three cases: |
| 35 | * |
| 36 | * - if it is a hit, we are happy. |
| 37 | * |
| 38 | * - if it is strictly higher than the target, we update hi with |
| 39 | * it. |
| 40 | * |
| 41 | * - if it is strictly lower than the target, we update lo to be |
| 42 | * one slot after it, because we allow lo to be at the target. |
| 43 | * |
| 44 | * When choosing 'mi', we do not have to take the "middle" but |
| 45 | * anywhere in between lo and hi, as long as lo <= mi < hi is |
| 46 | * satisfied. When we somehow know that the distance between the |
| 47 | * target and lo is much shorter than the target and hi, we could |
| 48 | * pick mi that is much closer to lo than the midway. |
| 49 | */ |
| 50 | /* |
| 51 | * The table should contain "nr" elements. |
Jeff King | 45ee13b | 2021-01-28 01:19:42 -0500 | [diff] [blame] | 52 | * The oid of element i (between 0 and nr - 1) should be returned |
Christian Couder | 96beef8 | 2009-04-04 22:59:26 +0200 | [diff] [blame] | 53 | * by "fn(i, table)". |
| 54 | */ |
Jeff King | 8380dcd | 2021-01-28 01:20:23 -0500 | [diff] [blame] | 55 | int oid_pos(const struct object_id *oid, const void *table, size_t nr, |
Jeff King | 45ee13b | 2021-01-28 01:19:42 -0500 | [diff] [blame] | 56 | oid_access_fn fn) |
Christian Couder | 96beef8 | 2009-04-04 22:59:26 +0200 | [diff] [blame] | 57 | { |
| 58 | size_t hi = nr; |
| 59 | size_t lo = 0; |
| 60 | size_t mi = 0; |
| 61 | |
| 62 | if (!nr) |
| 63 | return -1; |
| 64 | |
| 65 | if (nr != 1) { |
| 66 | size_t lov, hiv, miv, ofs; |
| 67 | |
brian m. carlson | e84f357 | 2019-08-18 20:04:14 +0000 | [diff] [blame] | 68 | for (ofs = 0; ofs < the_hash_algo->rawsz - 2; ofs += 2) { |
Jeff King | 45ee13b | 2021-01-28 01:19:42 -0500 | [diff] [blame] | 69 | lov = take2(fn(0, table), ofs); |
| 70 | hiv = take2(fn(nr - 1, table), ofs); |
| 71 | miv = take2(oid, ofs); |
Christian Couder | 96beef8 | 2009-04-04 22:59:26 +0200 | [diff] [blame] | 72 | if (miv < lov) |
| 73 | return -1; |
| 74 | if (hiv < miv) |
Johannes Schindelin | c097b95 | 2019-10-04 08:09:26 -0700 | [diff] [blame] | 75 | return index_pos_to_insert_pos(nr); |
Christian Couder | 96beef8 | 2009-04-04 22:59:26 +0200 | [diff] [blame] | 76 | if (lov != hiv) { |
| 77 | /* |
| 78 | * At this point miv could be equal |
Martin Ågren | 7a7d992 | 2020-12-31 12:56:22 +0100 | [diff] [blame] | 79 | * to hiv (but hash could still be higher); |
Christian Couder | 96beef8 | 2009-04-04 22:59:26 +0200 | [diff] [blame] | 80 | * the invariant of (mi < hi) should be |
| 81 | * kept. |
| 82 | */ |
| 83 | mi = (nr - 1) * (miv - lov) / (hiv - lov); |
| 84 | if (lo <= mi && mi < hi) |
| 85 | break; |
Johannes Schindelin | 033abf9 | 2018-05-02 11:38:39 +0200 | [diff] [blame] | 86 | BUG("assertion failed in binary search"); |
Christian Couder | 96beef8 | 2009-04-04 22:59:26 +0200 | [diff] [blame] | 87 | } |
| 88 | } |
Christian Couder | 96beef8 | 2009-04-04 22:59:26 +0200 | [diff] [blame] | 89 | } |
| 90 | |
| 91 | do { |
| 92 | int cmp; |
Jeff King | 45ee13b | 2021-01-28 01:19:42 -0500 | [diff] [blame] | 93 | cmp = oidcmp(fn(mi, table), oid); |
Christian Couder | 96beef8 | 2009-04-04 22:59:26 +0200 | [diff] [blame] | 94 | if (!cmp) |
| 95 | return mi; |
| 96 | if (cmp > 0) |
| 97 | hi = mi; |
| 98 | else |
| 99 | lo = mi + 1; |
Derrick Stolee | 19716b2 | 2017-10-08 14:29:37 -0400 | [diff] [blame] | 100 | mi = lo + (hi - lo) / 2; |
Christian Couder | 96beef8 | 2009-04-04 22:59:26 +0200 | [diff] [blame] | 101 | } while (lo < hi); |
Johannes Schindelin | c097b95 | 2019-10-04 08:09:26 -0700 | [diff] [blame] | 102 | return index_pos_to_insert_pos(lo); |
Christian Couder | 96beef8 | 2009-04-04 22:59:26 +0200 | [diff] [blame] | 103 | } |
Jonathan Tan | b4e00f7 | 2018-02-13 10:39:39 -0800 | [diff] [blame] | 104 | |
Martin Ågren | bc62692 | 2020-12-31 12:56:23 +0100 | [diff] [blame] | 105 | int bsearch_hash(const unsigned char *hash, const uint32_t *fanout_nbo, |
Jonathan Tan | b4e00f7 | 2018-02-13 10:39:39 -0800 | [diff] [blame] | 106 | const unsigned char *table, size_t stride, uint32_t *result) |
| 107 | { |
| 108 | uint32_t hi, lo; |
| 109 | |
Martin Ågren | bc62692 | 2020-12-31 12:56:23 +0100 | [diff] [blame] | 110 | hi = ntohl(fanout_nbo[*hash]); |
| 111 | lo = ((*hash == 0x0) ? 0 : ntohl(fanout_nbo[*hash - 1])); |
Jonathan Tan | b4e00f7 | 2018-02-13 10:39:39 -0800 | [diff] [blame] | 112 | |
| 113 | while (lo < hi) { |
| 114 | unsigned mi = lo + (hi - lo) / 2; |
Martin Ågren | bc62692 | 2020-12-31 12:56:23 +0100 | [diff] [blame] | 115 | int cmp = hashcmp(table + mi * stride, hash); |
Jonathan Tan | b4e00f7 | 2018-02-13 10:39:39 -0800 | [diff] [blame] | 116 | |
| 117 | if (!cmp) { |
| 118 | if (result) |
| 119 | *result = mi; |
| 120 | return 1; |
| 121 | } |
| 122 | if (cmp > 0) |
| 123 | hi = mi; |
| 124 | else |
| 125 | lo = mi + 1; |
| 126 | } |
| 127 | |
| 128 | if (result) |
| 129 | *result = lo; |
| 130 | return 0; |
| 131 | } |