Derrick Stolee | a340773 | 2018-07-12 15:39:21 -0400 | [diff] [blame] | 1 | #include "cache.h" |
Derrick Stolee | c4d2522 | 2018-07-12 15:39:33 -0400 | [diff] [blame] | 2 | #include "config.h" |
Derrick Stolee | fc59e74 | 2018-07-12 15:39:22 -0400 | [diff] [blame] | 3 | #include "csum-file.h" |
Derrick Stolee | 396f257 | 2018-07-12 15:39:26 -0400 | [diff] [blame] | 4 | #include "dir.h" |
Derrick Stolee | fc59e74 | 2018-07-12 15:39:22 -0400 | [diff] [blame] | 5 | #include "lockfile.h" |
Derrick Stolee | 396f257 | 2018-07-12 15:39:26 -0400 | [diff] [blame] | 6 | #include "packfile.h" |
Derrick Stolee | 4d80560 | 2018-07-12 15:39:23 -0400 | [diff] [blame] | 7 | #include "object-store.h" |
Derrick Stolee | 3715a63 | 2018-07-12 15:39:34 -0400 | [diff] [blame] | 8 | #include "sha1-lookup.h" |
Derrick Stolee | a340773 | 2018-07-12 15:39:21 -0400 | [diff] [blame] | 9 | #include "midx.h" |
| 10 | |
Derrick Stolee | fc59e74 | 2018-07-12 15:39:22 -0400 | [diff] [blame] | 11 | #define MIDX_SIGNATURE 0x4d494458 /* "MIDX" */ |
| 12 | #define MIDX_VERSION 1 |
Derrick Stolee | 4d80560 | 2018-07-12 15:39:23 -0400 | [diff] [blame] | 13 | #define MIDX_BYTE_FILE_VERSION 4 |
| 14 | #define MIDX_BYTE_HASH_VERSION 5 |
| 15 | #define MIDX_BYTE_NUM_CHUNKS 6 |
| 16 | #define MIDX_BYTE_NUM_PACKS 8 |
Derrick Stolee | fc59e74 | 2018-07-12 15:39:22 -0400 | [diff] [blame] | 17 | #define MIDX_HASH_VERSION 1 |
| 18 | #define MIDX_HEADER_SIZE 12 |
Derrick Stolee | 4d80560 | 2018-07-12 15:39:23 -0400 | [diff] [blame] | 19 | #define MIDX_HASH_LEN 20 |
| 20 | #define MIDX_MIN_SIZE (MIDX_HEADER_SIZE + MIDX_HASH_LEN) |
Derrick Stolee | fc59e74 | 2018-07-12 15:39:22 -0400 | [diff] [blame] | 21 | |
Derrick Stolee | 662148c | 2018-07-12 15:39:32 -0400 | [diff] [blame] | 22 | #define MIDX_MAX_CHUNKS 5 |
Derrick Stolee | 32f3c54 | 2018-07-12 15:39:27 -0400 | [diff] [blame] | 23 | #define MIDX_CHUNK_ALIGNMENT 4 |
| 24 | #define MIDX_CHUNKID_PACKNAMES 0x504e414d /* "PNAM" */ |
Derrick Stolee | d7cacf2 | 2018-07-12 15:39:31 -0400 | [diff] [blame] | 25 | #define MIDX_CHUNKID_OIDFANOUT 0x4f494446 /* "OIDF" */ |
Derrick Stolee | 0d5b3a5 | 2018-07-12 15:39:30 -0400 | [diff] [blame] | 26 | #define MIDX_CHUNKID_OIDLOOKUP 0x4f49444c /* "OIDL" */ |
Derrick Stolee | 662148c | 2018-07-12 15:39:32 -0400 | [diff] [blame] | 27 | #define MIDX_CHUNKID_OBJECTOFFSETS 0x4f4f4646 /* "OOFF" */ |
| 28 | #define MIDX_CHUNKID_LARGEOFFSETS 0x4c4f4646 /* "LOFF" */ |
Derrick Stolee | 32f3c54 | 2018-07-12 15:39:27 -0400 | [diff] [blame] | 29 | #define MIDX_CHUNKLOOKUP_WIDTH (sizeof(uint32_t) + sizeof(uint64_t)) |
Derrick Stolee | d7cacf2 | 2018-07-12 15:39:31 -0400 | [diff] [blame] | 30 | #define MIDX_CHUNK_FANOUT_SIZE (sizeof(uint32_t) * 256) |
Derrick Stolee | 662148c | 2018-07-12 15:39:32 -0400 | [diff] [blame] | 31 | #define MIDX_CHUNK_OFFSET_WIDTH (2 * sizeof(uint32_t)) |
| 32 | #define MIDX_CHUNK_LARGE_OFFSET_WIDTH (sizeof(uint64_t)) |
| 33 | #define MIDX_LARGE_OFFSET_NEEDED 0x80000000 |
Derrick Stolee | 32f3c54 | 2018-07-12 15:39:27 -0400 | [diff] [blame] | 34 | |
Derrick Stolee | fc59e74 | 2018-07-12 15:39:22 -0400 | [diff] [blame] | 35 | static char *get_midx_filename(const char *object_dir) |
| 36 | { |
| 37 | return xstrfmt("%s/pack/multi-pack-index", object_dir); |
| 38 | } |
| 39 | |
Derrick Stolee | 2cf489a | 2018-08-20 16:51:55 +0000 | [diff] [blame] | 40 | struct multi_pack_index *load_multi_pack_index(const char *object_dir, int local) |
Derrick Stolee | 4d80560 | 2018-07-12 15:39:23 -0400 | [diff] [blame] | 41 | { |
| 42 | struct multi_pack_index *m = NULL; |
| 43 | int fd; |
| 44 | struct stat st; |
| 45 | size_t midx_size; |
| 46 | void *midx_map = NULL; |
| 47 | uint32_t hash_version; |
| 48 | char *midx_name = get_midx_filename(object_dir); |
Derrick Stolee | 32f3c54 | 2018-07-12 15:39:27 -0400 | [diff] [blame] | 49 | uint32_t i; |
Derrick Stolee | 3227565 | 2018-07-12 15:39:28 -0400 | [diff] [blame] | 50 | const char *cur_pack_name; |
Derrick Stolee | 4d80560 | 2018-07-12 15:39:23 -0400 | [diff] [blame] | 51 | |
| 52 | fd = git_open(midx_name); |
| 53 | |
| 54 | if (fd < 0) |
| 55 | goto cleanup_fail; |
| 56 | if (fstat(fd, &st)) { |
| 57 | error_errno(_("failed to read %s"), midx_name); |
| 58 | goto cleanup_fail; |
| 59 | } |
| 60 | |
| 61 | midx_size = xsize_t(st.st_size); |
| 62 | |
| 63 | if (midx_size < MIDX_MIN_SIZE) { |
| 64 | error(_("multi-pack-index file %s is too small"), midx_name); |
| 65 | goto cleanup_fail; |
| 66 | } |
| 67 | |
| 68 | FREE_AND_NULL(midx_name); |
| 69 | |
| 70 | midx_map = xmmap(NULL, midx_size, PROT_READ, MAP_PRIVATE, fd, 0); |
| 71 | |
| 72 | FLEX_ALLOC_MEM(m, object_dir, object_dir, strlen(object_dir)); |
| 73 | m->fd = fd; |
| 74 | m->data = midx_map; |
| 75 | m->data_len = midx_size; |
Derrick Stolee | 2cf489a | 2018-08-20 16:51:55 +0000 | [diff] [blame] | 76 | m->local = local; |
Derrick Stolee | 4d80560 | 2018-07-12 15:39:23 -0400 | [diff] [blame] | 77 | |
| 78 | m->signature = get_be32(m->data); |
Derrick Stolee | 53ad040 | 2018-09-13 11:02:15 -0700 | [diff] [blame] | 79 | if (m->signature != MIDX_SIGNATURE) |
| 80 | die(_("multi-pack-index signature 0x%08x does not match signature 0x%08x"), |
Derrick Stolee | 4d80560 | 2018-07-12 15:39:23 -0400 | [diff] [blame] | 81 | m->signature, MIDX_SIGNATURE); |
Derrick Stolee | 4d80560 | 2018-07-12 15:39:23 -0400 | [diff] [blame] | 82 | |
| 83 | m->version = m->data[MIDX_BYTE_FILE_VERSION]; |
Derrick Stolee | 53ad040 | 2018-09-13 11:02:15 -0700 | [diff] [blame] | 84 | if (m->version != MIDX_VERSION) |
| 85 | die(_("multi-pack-index version %d not recognized"), |
Derrick Stolee | 4d80560 | 2018-07-12 15:39:23 -0400 | [diff] [blame] | 86 | m->version); |
Derrick Stolee | 4d80560 | 2018-07-12 15:39:23 -0400 | [diff] [blame] | 87 | |
| 88 | hash_version = m->data[MIDX_BYTE_HASH_VERSION]; |
Derrick Stolee | 53ad040 | 2018-09-13 11:02:15 -0700 | [diff] [blame] | 89 | if (hash_version != MIDX_HASH_VERSION) |
| 90 | die(_("hash version %u does not match"), hash_version); |
Derrick Stolee | 4d80560 | 2018-07-12 15:39:23 -0400 | [diff] [blame] | 91 | m->hash_len = MIDX_HASH_LEN; |
| 92 | |
| 93 | m->num_chunks = m->data[MIDX_BYTE_NUM_CHUNKS]; |
| 94 | |
| 95 | m->num_packs = get_be32(m->data + MIDX_BYTE_NUM_PACKS); |
| 96 | |
Derrick Stolee | 32f3c54 | 2018-07-12 15:39:27 -0400 | [diff] [blame] | 97 | for (i = 0; i < m->num_chunks; i++) { |
| 98 | uint32_t chunk_id = get_be32(m->data + MIDX_HEADER_SIZE + |
| 99 | MIDX_CHUNKLOOKUP_WIDTH * i); |
| 100 | uint64_t chunk_offset = get_be64(m->data + MIDX_HEADER_SIZE + 4 + |
| 101 | MIDX_CHUNKLOOKUP_WIDTH * i); |
| 102 | |
Derrick Stolee | d3f8e21 | 2018-09-13 11:02:16 -0700 | [diff] [blame] | 103 | if (chunk_offset >= m->data_len) |
| 104 | die(_("invalid chunk offset (too large)")); |
| 105 | |
Derrick Stolee | 32f3c54 | 2018-07-12 15:39:27 -0400 | [diff] [blame] | 106 | switch (chunk_id) { |
| 107 | case MIDX_CHUNKID_PACKNAMES: |
| 108 | m->chunk_pack_names = m->data + chunk_offset; |
| 109 | break; |
| 110 | |
Derrick Stolee | d7cacf2 | 2018-07-12 15:39:31 -0400 | [diff] [blame] | 111 | case MIDX_CHUNKID_OIDFANOUT: |
| 112 | m->chunk_oid_fanout = (uint32_t *)(m->data + chunk_offset); |
| 113 | break; |
| 114 | |
Derrick Stolee | 0d5b3a5 | 2018-07-12 15:39:30 -0400 | [diff] [blame] | 115 | case MIDX_CHUNKID_OIDLOOKUP: |
| 116 | m->chunk_oid_lookup = m->data + chunk_offset; |
| 117 | break; |
| 118 | |
Derrick Stolee | 662148c | 2018-07-12 15:39:32 -0400 | [diff] [blame] | 119 | case MIDX_CHUNKID_OBJECTOFFSETS: |
| 120 | m->chunk_object_offsets = m->data + chunk_offset; |
| 121 | break; |
| 122 | |
| 123 | case MIDX_CHUNKID_LARGEOFFSETS: |
| 124 | m->chunk_large_offsets = m->data + chunk_offset; |
| 125 | break; |
| 126 | |
Derrick Stolee | 32f3c54 | 2018-07-12 15:39:27 -0400 | [diff] [blame] | 127 | case 0: |
| 128 | die(_("terminating multi-pack-index chunk id appears earlier than expected")); |
| 129 | break; |
| 130 | |
| 131 | default: |
| 132 | /* |
| 133 | * Do nothing on unrecognized chunks, allowing future |
| 134 | * extensions to add optional chunks. |
| 135 | */ |
| 136 | break; |
| 137 | } |
| 138 | } |
| 139 | |
| 140 | if (!m->chunk_pack_names) |
| 141 | die(_("multi-pack-index missing required pack-name chunk")); |
Derrick Stolee | d7cacf2 | 2018-07-12 15:39:31 -0400 | [diff] [blame] | 142 | if (!m->chunk_oid_fanout) |
| 143 | die(_("multi-pack-index missing required OID fanout chunk")); |
Derrick Stolee | 0d5b3a5 | 2018-07-12 15:39:30 -0400 | [diff] [blame] | 144 | if (!m->chunk_oid_lookup) |
| 145 | die(_("multi-pack-index missing required OID lookup chunk")); |
Derrick Stolee | 662148c | 2018-07-12 15:39:32 -0400 | [diff] [blame] | 146 | if (!m->chunk_object_offsets) |
| 147 | die(_("multi-pack-index missing required object offsets chunk")); |
Derrick Stolee | 32f3c54 | 2018-07-12 15:39:27 -0400 | [diff] [blame] | 148 | |
Derrick Stolee | d7cacf2 | 2018-07-12 15:39:31 -0400 | [diff] [blame] | 149 | m->num_objects = ntohl(m->chunk_oid_fanout[255]); |
| 150 | |
Derrick Stolee | 3227565 | 2018-07-12 15:39:28 -0400 | [diff] [blame] | 151 | m->pack_names = xcalloc(m->num_packs, sizeof(*m->pack_names)); |
Derrick Stolee | 3715a63 | 2018-07-12 15:39:34 -0400 | [diff] [blame] | 152 | m->packs = xcalloc(m->num_packs, sizeof(*m->packs)); |
Derrick Stolee | 3227565 | 2018-07-12 15:39:28 -0400 | [diff] [blame] | 153 | |
| 154 | cur_pack_name = (const char *)m->chunk_pack_names; |
| 155 | for (i = 0; i < m->num_packs; i++) { |
| 156 | m->pack_names[i] = cur_pack_name; |
| 157 | |
| 158 | cur_pack_name += strlen(cur_pack_name) + 1; |
| 159 | |
Derrick Stolee | 8e72a3c | 2018-09-13 11:02:18 -0700 | [diff] [blame] | 160 | if (i && strcmp(m->pack_names[i], m->pack_names[i - 1]) <= 0) |
| 161 | die(_("multi-pack-index pack names out of order: '%s' before '%s'"), |
Derrick Stolee | 3227565 | 2018-07-12 15:39:28 -0400 | [diff] [blame] | 162 | m->pack_names[i - 1], |
| 163 | m->pack_names[i]); |
Derrick Stolee | 3227565 | 2018-07-12 15:39:28 -0400 | [diff] [blame] | 164 | } |
| 165 | |
Derrick Stolee | 4d80560 | 2018-07-12 15:39:23 -0400 | [diff] [blame] | 166 | return m; |
| 167 | |
| 168 | cleanup_fail: |
| 169 | free(m); |
| 170 | free(midx_name); |
| 171 | if (midx_map) |
| 172 | munmap(midx_map, midx_size); |
| 173 | if (0 <= fd) |
| 174 | close(fd); |
| 175 | return NULL; |
| 176 | } |
| 177 | |
Derrick Stolee | a40498a | 2018-07-12 15:39:36 -0400 | [diff] [blame] | 178 | static void close_midx(struct multi_pack_index *m) |
| 179 | { |
| 180 | uint32_t i; |
| 181 | munmap((unsigned char *)m->data, m->data_len); |
| 182 | close(m->fd); |
| 183 | m->fd = -1; |
| 184 | |
| 185 | for (i = 0; i < m->num_packs; i++) { |
| 186 | if (m->packs[i]) { |
| 187 | close_pack(m->packs[i]); |
| 188 | free(m->packs); |
| 189 | } |
| 190 | } |
| 191 | FREE_AND_NULL(m->packs); |
| 192 | FREE_AND_NULL(m->pack_names); |
| 193 | } |
| 194 | |
Derrick Stolee | 0bff526 | 2018-08-20 16:52:02 +0000 | [diff] [blame] | 195 | int prepare_midx_pack(struct multi_pack_index *m, uint32_t pack_int_id) |
Derrick Stolee | 3715a63 | 2018-07-12 15:39:34 -0400 | [diff] [blame] | 196 | { |
| 197 | struct strbuf pack_name = STRBUF_INIT; |
| 198 | |
| 199 | if (pack_int_id >= m->num_packs) |
| 200 | BUG("bad pack-int-id"); |
| 201 | |
| 202 | if (m->packs[pack_int_id]) |
| 203 | return 0; |
| 204 | |
| 205 | strbuf_addf(&pack_name, "%s/pack/%s", m->object_dir, |
| 206 | m->pack_names[pack_int_id]); |
| 207 | |
Derrick Stolee | 2cf489a | 2018-08-20 16:51:55 +0000 | [diff] [blame] | 208 | m->packs[pack_int_id] = add_packed_git(pack_name.buf, pack_name.len, m->local); |
Derrick Stolee | 3715a63 | 2018-07-12 15:39:34 -0400 | [diff] [blame] | 209 | strbuf_release(&pack_name); |
| 210 | return !m->packs[pack_int_id]; |
| 211 | } |
| 212 | |
| 213 | int bsearch_midx(const struct object_id *oid, struct multi_pack_index *m, uint32_t *result) |
| 214 | { |
| 215 | return bsearch_hash(oid->hash, m->chunk_oid_fanout, m->chunk_oid_lookup, |
| 216 | MIDX_HASH_LEN, result); |
| 217 | } |
| 218 | |
Derrick Stolee | 8aac67a | 2018-07-12 15:39:35 -0400 | [diff] [blame] | 219 | struct object_id *nth_midxed_object_oid(struct object_id *oid, |
| 220 | struct multi_pack_index *m, |
| 221 | uint32_t n) |
| 222 | { |
| 223 | if (n >= m->num_objects) |
| 224 | return NULL; |
| 225 | |
| 226 | hashcpy(oid->hash, m->chunk_oid_lookup + m->hash_len * n); |
| 227 | return oid; |
| 228 | } |
| 229 | |
Derrick Stolee | 3715a63 | 2018-07-12 15:39:34 -0400 | [diff] [blame] | 230 | static off_t nth_midxed_offset(struct multi_pack_index *m, uint32_t pos) |
| 231 | { |
| 232 | const unsigned char *offset_data; |
| 233 | uint32_t offset32; |
| 234 | |
| 235 | offset_data = m->chunk_object_offsets + pos * MIDX_CHUNK_OFFSET_WIDTH; |
| 236 | offset32 = get_be32(offset_data + sizeof(uint32_t)); |
| 237 | |
| 238 | if (m->chunk_large_offsets && offset32 & MIDX_LARGE_OFFSET_NEEDED) { |
| 239 | if (sizeof(offset32) < sizeof(uint64_t)) |
| 240 | die(_("multi-pack-index stores a 64-bit offset, but off_t is too small")); |
| 241 | |
| 242 | offset32 ^= MIDX_LARGE_OFFSET_NEEDED; |
| 243 | return get_be64(m->chunk_large_offsets + sizeof(uint64_t) * offset32); |
| 244 | } |
| 245 | |
| 246 | return offset32; |
| 247 | } |
| 248 | |
| 249 | static uint32_t nth_midxed_pack_int_id(struct multi_pack_index *m, uint32_t pos) |
| 250 | { |
| 251 | return get_be32(m->chunk_object_offsets + pos * MIDX_CHUNK_OFFSET_WIDTH); |
| 252 | } |
| 253 | |
| 254 | static int nth_midxed_pack_entry(struct multi_pack_index *m, struct pack_entry *e, uint32_t pos) |
| 255 | { |
| 256 | uint32_t pack_int_id; |
| 257 | struct packed_git *p; |
| 258 | |
| 259 | if (pos >= m->num_objects) |
| 260 | return 0; |
| 261 | |
| 262 | pack_int_id = nth_midxed_pack_int_id(m, pos); |
| 263 | |
| 264 | if (prepare_midx_pack(m, pack_int_id)) |
| 265 | die(_("error preparing packfile from multi-pack-index")); |
| 266 | p = m->packs[pack_int_id]; |
| 267 | |
| 268 | /* |
| 269 | * We are about to tell the caller where they can locate the |
| 270 | * requested object. We better make sure the packfile is |
| 271 | * still here and can be accessed before supplying that |
| 272 | * answer, as it may have been deleted since the MIDX was |
| 273 | * loaded! |
| 274 | */ |
| 275 | if (!is_pack_valid(p)) |
| 276 | return 0; |
| 277 | |
Derrick Stolee | c39b02a | 2018-08-20 16:51:57 +0000 | [diff] [blame] | 278 | if (p->num_bad_objects) { |
| 279 | uint32_t i; |
| 280 | struct object_id oid; |
| 281 | nth_midxed_object_oid(&oid, m, pos); |
| 282 | for (i = 0; i < p->num_bad_objects; i++) |
| 283 | if (!hashcmp(oid.hash, |
| 284 | p->bad_object_sha1 + the_hash_algo->rawsz * i)) |
| 285 | return 0; |
| 286 | } |
| 287 | |
Derrick Stolee | 3715a63 | 2018-07-12 15:39:34 -0400 | [diff] [blame] | 288 | e->offset = nth_midxed_offset(m, pos); |
| 289 | e->p = p; |
| 290 | |
| 291 | return 1; |
| 292 | } |
| 293 | |
| 294 | int fill_midx_entry(const struct object_id *oid, struct pack_entry *e, struct multi_pack_index *m) |
| 295 | { |
| 296 | uint32_t pos; |
| 297 | |
| 298 | if (!bsearch_midx(oid, m, &pos)) |
| 299 | return 0; |
| 300 | |
| 301 | return nth_midxed_pack_entry(m, e, pos); |
| 302 | } |
| 303 | |
Derrick Stolee | a40498a | 2018-07-12 15:39:36 -0400 | [diff] [blame] | 304 | int midx_contains_pack(struct multi_pack_index *m, const char *idx_name) |
| 305 | { |
| 306 | uint32_t first = 0, last = m->num_packs; |
| 307 | |
| 308 | while (first < last) { |
| 309 | uint32_t mid = first + (last - first) / 2; |
| 310 | const char *current; |
| 311 | int cmp; |
| 312 | |
| 313 | current = m->pack_names[mid]; |
| 314 | cmp = strcmp(idx_name, current); |
| 315 | if (!cmp) |
| 316 | return 1; |
| 317 | if (cmp > 0) { |
| 318 | first = mid + 1; |
| 319 | continue; |
| 320 | } |
| 321 | last = mid; |
| 322 | } |
| 323 | |
| 324 | return 0; |
| 325 | } |
| 326 | |
Derrick Stolee | 2cf489a | 2018-08-20 16:51:55 +0000 | [diff] [blame] | 327 | int prepare_multi_pack_index_one(struct repository *r, const char *object_dir, int local) |
Derrick Stolee | c4d2522 | 2018-07-12 15:39:33 -0400 | [diff] [blame] | 328 | { |
Derrick Stolee | 29e2016 | 2018-08-20 16:52:00 +0000 | [diff] [blame] | 329 | struct multi_pack_index *m; |
Derrick Stolee | c4d2522 | 2018-07-12 15:39:33 -0400 | [diff] [blame] | 330 | struct multi_pack_index *m_search; |
| 331 | int config_value; |
| 332 | |
| 333 | if (repo_config_get_bool(r, "core.multipackindex", &config_value) || |
| 334 | !config_value) |
| 335 | return 0; |
| 336 | |
Derrick Stolee | 29e2016 | 2018-08-20 16:52:00 +0000 | [diff] [blame] | 337 | for (m_search = r->objects->multi_pack_index; m_search; m_search = m_search->next) |
Derrick Stolee | c4d2522 | 2018-07-12 15:39:33 -0400 | [diff] [blame] | 338 | if (!strcmp(object_dir, m_search->object_dir)) |
| 339 | return 1; |
| 340 | |
Derrick Stolee | 29e2016 | 2018-08-20 16:52:00 +0000 | [diff] [blame] | 341 | m = load_multi_pack_index(object_dir, local); |
Derrick Stolee | c4d2522 | 2018-07-12 15:39:33 -0400 | [diff] [blame] | 342 | |
Derrick Stolee | 29e2016 | 2018-08-20 16:52:00 +0000 | [diff] [blame] | 343 | if (m) { |
| 344 | m->next = r->objects->multi_pack_index; |
| 345 | r->objects->multi_pack_index = m; |
Derrick Stolee | c4d2522 | 2018-07-12 15:39:33 -0400 | [diff] [blame] | 346 | return 1; |
| 347 | } |
| 348 | |
| 349 | return 0; |
| 350 | } |
| 351 | |
Derrick Stolee | fc59e74 | 2018-07-12 15:39:22 -0400 | [diff] [blame] | 352 | static size_t write_midx_header(struct hashfile *f, |
| 353 | unsigned char num_chunks, |
| 354 | uint32_t num_packs) |
| 355 | { |
| 356 | unsigned char byte_values[4]; |
| 357 | |
| 358 | hashwrite_be32(f, MIDX_SIGNATURE); |
| 359 | byte_values[0] = MIDX_VERSION; |
| 360 | byte_values[1] = MIDX_HASH_VERSION; |
| 361 | byte_values[2] = num_chunks; |
| 362 | byte_values[3] = 0; /* unused */ |
| 363 | hashwrite(f, byte_values, sizeof(byte_values)); |
| 364 | hashwrite_be32(f, num_packs); |
| 365 | |
| 366 | return MIDX_HEADER_SIZE; |
| 367 | } |
| 368 | |
Derrick Stolee | 396f257 | 2018-07-12 15:39:26 -0400 | [diff] [blame] | 369 | struct pack_list { |
| 370 | struct packed_git **list; |
Derrick Stolee | 32f3c54 | 2018-07-12 15:39:27 -0400 | [diff] [blame] | 371 | char **names; |
Derrick Stolee | 396f257 | 2018-07-12 15:39:26 -0400 | [diff] [blame] | 372 | uint32_t nr; |
| 373 | uint32_t alloc_list; |
Derrick Stolee | 32f3c54 | 2018-07-12 15:39:27 -0400 | [diff] [blame] | 374 | uint32_t alloc_names; |
| 375 | size_t pack_name_concat_len; |
Derrick Stolee | a40498a | 2018-07-12 15:39:36 -0400 | [diff] [blame] | 376 | struct multi_pack_index *m; |
Derrick Stolee | 396f257 | 2018-07-12 15:39:26 -0400 | [diff] [blame] | 377 | }; |
| 378 | |
| 379 | static void add_pack_to_midx(const char *full_path, size_t full_path_len, |
| 380 | const char *file_name, void *data) |
| 381 | { |
| 382 | struct pack_list *packs = (struct pack_list *)data; |
| 383 | |
| 384 | if (ends_with(file_name, ".idx")) { |
Derrick Stolee | a40498a | 2018-07-12 15:39:36 -0400 | [diff] [blame] | 385 | if (packs->m && midx_contains_pack(packs->m, file_name)) |
| 386 | return; |
| 387 | |
Derrick Stolee | 396f257 | 2018-07-12 15:39:26 -0400 | [diff] [blame] | 388 | ALLOC_GROW(packs->list, packs->nr + 1, packs->alloc_list); |
Derrick Stolee | 32f3c54 | 2018-07-12 15:39:27 -0400 | [diff] [blame] | 389 | ALLOC_GROW(packs->names, packs->nr + 1, packs->alloc_names); |
Derrick Stolee | 396f257 | 2018-07-12 15:39:26 -0400 | [diff] [blame] | 390 | |
| 391 | packs->list[packs->nr] = add_packed_git(full_path, |
| 392 | full_path_len, |
| 393 | 0); |
Derrick Stolee | fe1ed56 | 2018-07-12 15:39:29 -0400 | [diff] [blame] | 394 | |
Derrick Stolee | 396f257 | 2018-07-12 15:39:26 -0400 | [diff] [blame] | 395 | if (!packs->list[packs->nr]) { |
| 396 | warning(_("failed to add packfile '%s'"), |
| 397 | full_path); |
| 398 | return; |
| 399 | } |
| 400 | |
Derrick Stolee | fe1ed56 | 2018-07-12 15:39:29 -0400 | [diff] [blame] | 401 | if (open_pack_index(packs->list[packs->nr])) { |
| 402 | warning(_("failed to open pack-index '%s'"), |
| 403 | full_path); |
| 404 | close_pack(packs->list[packs->nr]); |
| 405 | FREE_AND_NULL(packs->list[packs->nr]); |
| 406 | return; |
| 407 | } |
| 408 | |
Derrick Stolee | 32f3c54 | 2018-07-12 15:39:27 -0400 | [diff] [blame] | 409 | packs->names[packs->nr] = xstrdup(file_name); |
| 410 | packs->pack_name_concat_len += strlen(file_name) + 1; |
Derrick Stolee | 396f257 | 2018-07-12 15:39:26 -0400 | [diff] [blame] | 411 | packs->nr++; |
| 412 | } |
| 413 | } |
| 414 | |
Derrick Stolee | 32f3c54 | 2018-07-12 15:39:27 -0400 | [diff] [blame] | 415 | struct pack_pair { |
| 416 | uint32_t pack_int_id; |
| 417 | char *pack_name; |
| 418 | }; |
| 419 | |
| 420 | static int pack_pair_compare(const void *_a, const void *_b) |
| 421 | { |
| 422 | struct pack_pair *a = (struct pack_pair *)_a; |
| 423 | struct pack_pair *b = (struct pack_pair *)_b; |
| 424 | return strcmp(a->pack_name, b->pack_name); |
| 425 | } |
| 426 | |
| 427 | static void sort_packs_by_name(char **pack_names, uint32_t nr_packs, uint32_t *perm) |
| 428 | { |
| 429 | uint32_t i; |
| 430 | struct pack_pair *pairs; |
| 431 | |
| 432 | ALLOC_ARRAY(pairs, nr_packs); |
| 433 | |
| 434 | for (i = 0; i < nr_packs; i++) { |
| 435 | pairs[i].pack_int_id = i; |
| 436 | pairs[i].pack_name = pack_names[i]; |
| 437 | } |
| 438 | |
| 439 | QSORT(pairs, nr_packs, pack_pair_compare); |
| 440 | |
| 441 | for (i = 0; i < nr_packs; i++) { |
| 442 | pack_names[i] = pairs[i].pack_name; |
| 443 | perm[pairs[i].pack_int_id] = i; |
| 444 | } |
| 445 | |
| 446 | free(pairs); |
| 447 | } |
| 448 | |
Derrick Stolee | fe1ed56 | 2018-07-12 15:39:29 -0400 | [diff] [blame] | 449 | struct pack_midx_entry { |
| 450 | struct object_id oid; |
| 451 | uint32_t pack_int_id; |
| 452 | time_t pack_mtime; |
| 453 | uint64_t offset; |
| 454 | }; |
| 455 | |
| 456 | static int midx_oid_compare(const void *_a, const void *_b) |
| 457 | { |
| 458 | const struct pack_midx_entry *a = (const struct pack_midx_entry *)_a; |
| 459 | const struct pack_midx_entry *b = (const struct pack_midx_entry *)_b; |
| 460 | int cmp = oidcmp(&a->oid, &b->oid); |
| 461 | |
| 462 | if (cmp) |
| 463 | return cmp; |
| 464 | |
| 465 | if (a->pack_mtime > b->pack_mtime) |
| 466 | return -1; |
| 467 | else if (a->pack_mtime < b->pack_mtime) |
| 468 | return 1; |
| 469 | |
| 470 | return a->pack_int_id - b->pack_int_id; |
| 471 | } |
| 472 | |
Derrick Stolee | a40498a | 2018-07-12 15:39:36 -0400 | [diff] [blame] | 473 | static int nth_midxed_pack_midx_entry(struct multi_pack_index *m, |
| 474 | uint32_t *pack_perm, |
| 475 | struct pack_midx_entry *e, |
| 476 | uint32_t pos) |
| 477 | { |
| 478 | if (pos >= m->num_objects) |
| 479 | return 1; |
| 480 | |
| 481 | nth_midxed_object_oid(&e->oid, m, pos); |
| 482 | e->pack_int_id = pack_perm[nth_midxed_pack_int_id(m, pos)]; |
| 483 | e->offset = nth_midxed_offset(m, pos); |
| 484 | |
| 485 | /* consider objects in midx to be from "old" packs */ |
| 486 | e->pack_mtime = 0; |
| 487 | return 0; |
| 488 | } |
| 489 | |
Derrick Stolee | fe1ed56 | 2018-07-12 15:39:29 -0400 | [diff] [blame] | 490 | static void fill_pack_entry(uint32_t pack_int_id, |
| 491 | struct packed_git *p, |
| 492 | uint32_t cur_object, |
| 493 | struct pack_midx_entry *entry) |
| 494 | { |
| 495 | if (!nth_packed_object_oid(&entry->oid, p, cur_object)) |
| 496 | die(_("failed to locate object %d in packfile"), cur_object); |
| 497 | |
| 498 | entry->pack_int_id = pack_int_id; |
| 499 | entry->pack_mtime = p->mtime; |
| 500 | |
| 501 | entry->offset = nth_packed_object_offset(p, cur_object); |
| 502 | } |
| 503 | |
| 504 | /* |
| 505 | * It is possible to artificially get into a state where there are many |
| 506 | * duplicate copies of objects. That can create high memory pressure if |
| 507 | * we are to create a list of all objects before de-duplication. To reduce |
| 508 | * this memory pressure without a significant performance drop, automatically |
| 509 | * group objects by the first byte of their object id. Use the IDX fanout |
| 510 | * tables to group the data, copy to a local array, then sort. |
| 511 | * |
| 512 | * Copy only the de-duplicated entries (selected by most-recent modified time |
| 513 | * of a packfile containing the object). |
| 514 | */ |
Derrick Stolee | a40498a | 2018-07-12 15:39:36 -0400 | [diff] [blame] | 515 | static struct pack_midx_entry *get_sorted_entries(struct multi_pack_index *m, |
| 516 | struct packed_git **p, |
Derrick Stolee | fe1ed56 | 2018-07-12 15:39:29 -0400 | [diff] [blame] | 517 | uint32_t *perm, |
| 518 | uint32_t nr_packs, |
| 519 | uint32_t *nr_objects) |
| 520 | { |
| 521 | uint32_t cur_fanout, cur_pack, cur_object; |
| 522 | uint32_t alloc_fanout, alloc_objects, total_objects = 0; |
| 523 | struct pack_midx_entry *entries_by_fanout = NULL; |
| 524 | struct pack_midx_entry *deduplicated_entries = NULL; |
Derrick Stolee | a40498a | 2018-07-12 15:39:36 -0400 | [diff] [blame] | 525 | uint32_t start_pack = m ? m->num_packs : 0; |
Derrick Stolee | fe1ed56 | 2018-07-12 15:39:29 -0400 | [diff] [blame] | 526 | |
Derrick Stolee | a40498a | 2018-07-12 15:39:36 -0400 | [diff] [blame] | 527 | for (cur_pack = start_pack; cur_pack < nr_packs; cur_pack++) |
Derrick Stolee | fe1ed56 | 2018-07-12 15:39:29 -0400 | [diff] [blame] | 528 | total_objects += p[cur_pack]->num_objects; |
| 529 | |
| 530 | /* |
| 531 | * As we de-duplicate by fanout value, we expect the fanout |
| 532 | * slices to be evenly distributed, with some noise. Hence, |
| 533 | * allocate slightly more than one 256th. |
| 534 | */ |
| 535 | alloc_objects = alloc_fanout = total_objects > 3200 ? total_objects / 200 : 16; |
| 536 | |
| 537 | ALLOC_ARRAY(entries_by_fanout, alloc_fanout); |
| 538 | ALLOC_ARRAY(deduplicated_entries, alloc_objects); |
| 539 | *nr_objects = 0; |
| 540 | |
| 541 | for (cur_fanout = 0; cur_fanout < 256; cur_fanout++) { |
| 542 | uint32_t nr_fanout = 0; |
| 543 | |
Derrick Stolee | a40498a | 2018-07-12 15:39:36 -0400 | [diff] [blame] | 544 | if (m) { |
| 545 | uint32_t start = 0, end; |
| 546 | |
| 547 | if (cur_fanout) |
| 548 | start = ntohl(m->chunk_oid_fanout[cur_fanout - 1]); |
| 549 | end = ntohl(m->chunk_oid_fanout[cur_fanout]); |
| 550 | |
| 551 | for (cur_object = start; cur_object < end; cur_object++) { |
| 552 | ALLOC_GROW(entries_by_fanout, nr_fanout + 1, alloc_fanout); |
| 553 | nth_midxed_pack_midx_entry(m, perm, |
| 554 | &entries_by_fanout[nr_fanout], |
| 555 | cur_object); |
| 556 | nr_fanout++; |
| 557 | } |
| 558 | } |
| 559 | |
| 560 | for (cur_pack = start_pack; cur_pack < nr_packs; cur_pack++) { |
Derrick Stolee | fe1ed56 | 2018-07-12 15:39:29 -0400 | [diff] [blame] | 561 | uint32_t start = 0, end; |
| 562 | |
| 563 | if (cur_fanout) |
| 564 | start = get_pack_fanout(p[cur_pack], cur_fanout - 1); |
| 565 | end = get_pack_fanout(p[cur_pack], cur_fanout); |
| 566 | |
| 567 | for (cur_object = start; cur_object < end; cur_object++) { |
| 568 | ALLOC_GROW(entries_by_fanout, nr_fanout + 1, alloc_fanout); |
| 569 | fill_pack_entry(perm[cur_pack], p[cur_pack], cur_object, &entries_by_fanout[nr_fanout]); |
| 570 | nr_fanout++; |
| 571 | } |
| 572 | } |
| 573 | |
| 574 | QSORT(entries_by_fanout, nr_fanout, midx_oid_compare); |
| 575 | |
| 576 | /* |
| 577 | * The batch is now sorted by OID and then mtime (descending). |
| 578 | * Take only the first duplicate. |
| 579 | */ |
| 580 | for (cur_object = 0; cur_object < nr_fanout; cur_object++) { |
| 581 | if (cur_object && !oidcmp(&entries_by_fanout[cur_object - 1].oid, |
| 582 | &entries_by_fanout[cur_object].oid)) |
| 583 | continue; |
| 584 | |
| 585 | ALLOC_GROW(deduplicated_entries, *nr_objects + 1, alloc_objects); |
| 586 | memcpy(&deduplicated_entries[*nr_objects], |
| 587 | &entries_by_fanout[cur_object], |
| 588 | sizeof(struct pack_midx_entry)); |
| 589 | (*nr_objects)++; |
| 590 | } |
| 591 | } |
| 592 | |
| 593 | free(entries_by_fanout); |
| 594 | return deduplicated_entries; |
| 595 | } |
| 596 | |
Derrick Stolee | 32f3c54 | 2018-07-12 15:39:27 -0400 | [diff] [blame] | 597 | static size_t write_midx_pack_names(struct hashfile *f, |
| 598 | char **pack_names, |
| 599 | uint32_t num_packs) |
| 600 | { |
| 601 | uint32_t i; |
| 602 | unsigned char padding[MIDX_CHUNK_ALIGNMENT]; |
| 603 | size_t written = 0; |
| 604 | |
| 605 | for (i = 0; i < num_packs; i++) { |
| 606 | size_t writelen = strlen(pack_names[i]) + 1; |
| 607 | |
| 608 | if (i && strcmp(pack_names[i], pack_names[i - 1]) <= 0) |
| 609 | BUG("incorrect pack-file order: %s before %s", |
| 610 | pack_names[i - 1], |
| 611 | pack_names[i]); |
| 612 | |
| 613 | hashwrite(f, pack_names[i], writelen); |
| 614 | written += writelen; |
| 615 | } |
| 616 | |
| 617 | /* add padding to be aligned */ |
| 618 | i = MIDX_CHUNK_ALIGNMENT - (written % MIDX_CHUNK_ALIGNMENT); |
| 619 | if (i < MIDX_CHUNK_ALIGNMENT) { |
| 620 | memset(padding, 0, sizeof(padding)); |
| 621 | hashwrite(f, padding, i); |
| 622 | written += i; |
| 623 | } |
| 624 | |
| 625 | return written; |
| 626 | } |
| 627 | |
Derrick Stolee | d7cacf2 | 2018-07-12 15:39:31 -0400 | [diff] [blame] | 628 | static size_t write_midx_oid_fanout(struct hashfile *f, |
| 629 | struct pack_midx_entry *objects, |
| 630 | uint32_t nr_objects) |
| 631 | { |
| 632 | struct pack_midx_entry *list = objects; |
| 633 | struct pack_midx_entry *last = objects + nr_objects; |
| 634 | uint32_t count = 0; |
| 635 | uint32_t i; |
| 636 | |
| 637 | /* |
| 638 | * Write the first-level table (the list is sorted, |
| 639 | * but we use a 256-entry lookup to be able to avoid |
| 640 | * having to do eight extra binary search iterations). |
| 641 | */ |
| 642 | for (i = 0; i < 256; i++) { |
| 643 | struct pack_midx_entry *next = list; |
| 644 | |
| 645 | while (next < last && next->oid.hash[0] == i) { |
| 646 | count++; |
| 647 | next++; |
| 648 | } |
| 649 | |
| 650 | hashwrite_be32(f, count); |
| 651 | list = next; |
| 652 | } |
| 653 | |
| 654 | return MIDX_CHUNK_FANOUT_SIZE; |
| 655 | } |
| 656 | |
Derrick Stolee | 0d5b3a5 | 2018-07-12 15:39:30 -0400 | [diff] [blame] | 657 | static size_t write_midx_oid_lookup(struct hashfile *f, unsigned char hash_len, |
| 658 | struct pack_midx_entry *objects, |
| 659 | uint32_t nr_objects) |
| 660 | { |
| 661 | struct pack_midx_entry *list = objects; |
| 662 | uint32_t i; |
| 663 | size_t written = 0; |
| 664 | |
| 665 | for (i = 0; i < nr_objects; i++) { |
| 666 | struct pack_midx_entry *obj = list++; |
| 667 | |
| 668 | if (i < nr_objects - 1) { |
| 669 | struct pack_midx_entry *next = list; |
| 670 | if (oidcmp(&obj->oid, &next->oid) >= 0) |
| 671 | BUG("OIDs not in order: %s >= %s", |
| 672 | oid_to_hex(&obj->oid), |
| 673 | oid_to_hex(&next->oid)); |
| 674 | } |
| 675 | |
| 676 | hashwrite(f, obj->oid.hash, (int)hash_len); |
| 677 | written += hash_len; |
| 678 | } |
| 679 | |
| 680 | return written; |
| 681 | } |
| 682 | |
Derrick Stolee | 662148c | 2018-07-12 15:39:32 -0400 | [diff] [blame] | 683 | static size_t write_midx_object_offsets(struct hashfile *f, int large_offset_needed, |
| 684 | struct pack_midx_entry *objects, uint32_t nr_objects) |
| 685 | { |
| 686 | struct pack_midx_entry *list = objects; |
| 687 | uint32_t i, nr_large_offset = 0; |
| 688 | size_t written = 0; |
| 689 | |
| 690 | for (i = 0; i < nr_objects; i++) { |
| 691 | struct pack_midx_entry *obj = list++; |
| 692 | |
| 693 | hashwrite_be32(f, obj->pack_int_id); |
| 694 | |
| 695 | if (large_offset_needed && obj->offset >> 31) |
| 696 | hashwrite_be32(f, MIDX_LARGE_OFFSET_NEEDED | nr_large_offset++); |
| 697 | else if (!large_offset_needed && obj->offset >> 32) |
| 698 | BUG("object %s requires a large offset (%"PRIx64") but the MIDX is not writing large offsets!", |
| 699 | oid_to_hex(&obj->oid), |
| 700 | obj->offset); |
| 701 | else |
| 702 | hashwrite_be32(f, (uint32_t)obj->offset); |
| 703 | |
| 704 | written += MIDX_CHUNK_OFFSET_WIDTH; |
| 705 | } |
| 706 | |
| 707 | return written; |
| 708 | } |
| 709 | |
| 710 | static size_t write_midx_large_offsets(struct hashfile *f, uint32_t nr_large_offset, |
| 711 | struct pack_midx_entry *objects, uint32_t nr_objects) |
| 712 | { |
| 713 | struct pack_midx_entry *list = objects; |
| 714 | size_t written = 0; |
| 715 | |
| 716 | while (nr_large_offset) { |
| 717 | struct pack_midx_entry *obj = list++; |
| 718 | uint64_t offset = obj->offset; |
| 719 | |
| 720 | if (!(offset >> 31)) |
| 721 | continue; |
| 722 | |
| 723 | hashwrite_be32(f, offset >> 32); |
| 724 | hashwrite_be32(f, offset & 0xffffffffUL); |
| 725 | written += 2 * sizeof(uint32_t); |
| 726 | |
| 727 | nr_large_offset--; |
| 728 | } |
| 729 | |
| 730 | return written; |
| 731 | } |
| 732 | |
Derrick Stolee | a340773 | 2018-07-12 15:39:21 -0400 | [diff] [blame] | 733 | int write_midx_file(const char *object_dir) |
| 734 | { |
Derrick Stolee | 32f3c54 | 2018-07-12 15:39:27 -0400 | [diff] [blame] | 735 | unsigned char cur_chunk, num_chunks = 0; |
Derrick Stolee | fc59e74 | 2018-07-12 15:39:22 -0400 | [diff] [blame] | 736 | char *midx_name; |
Derrick Stolee | 396f257 | 2018-07-12 15:39:26 -0400 | [diff] [blame] | 737 | uint32_t i; |
Derrick Stolee | fc59e74 | 2018-07-12 15:39:22 -0400 | [diff] [blame] | 738 | struct hashfile *f = NULL; |
| 739 | struct lock_file lk; |
Derrick Stolee | 396f257 | 2018-07-12 15:39:26 -0400 | [diff] [blame] | 740 | struct pack_list packs; |
Derrick Stolee | 32f3c54 | 2018-07-12 15:39:27 -0400 | [diff] [blame] | 741 | uint32_t *pack_perm = NULL; |
| 742 | uint64_t written = 0; |
| 743 | uint32_t chunk_ids[MIDX_MAX_CHUNKS + 1]; |
| 744 | uint64_t chunk_offsets[MIDX_MAX_CHUNKS + 1]; |
Derrick Stolee | 662148c | 2018-07-12 15:39:32 -0400 | [diff] [blame] | 745 | uint32_t nr_entries, num_large_offsets = 0; |
Derrick Stolee | fe1ed56 | 2018-07-12 15:39:29 -0400 | [diff] [blame] | 746 | struct pack_midx_entry *entries = NULL; |
Derrick Stolee | 662148c | 2018-07-12 15:39:32 -0400 | [diff] [blame] | 747 | int large_offsets_needed = 0; |
Derrick Stolee | fc59e74 | 2018-07-12 15:39:22 -0400 | [diff] [blame] | 748 | |
| 749 | midx_name = get_midx_filename(object_dir); |
| 750 | if (safe_create_leading_directories(midx_name)) { |
| 751 | UNLEAK(midx_name); |
| 752 | die_errno(_("unable to create leading directories of %s"), |
| 753 | midx_name); |
| 754 | } |
| 755 | |
Derrick Stolee | 2cf489a | 2018-08-20 16:51:55 +0000 | [diff] [blame] | 756 | packs.m = load_multi_pack_index(object_dir, 1); |
Derrick Stolee | a40498a | 2018-07-12 15:39:36 -0400 | [diff] [blame] | 757 | |
Derrick Stolee | 396f257 | 2018-07-12 15:39:26 -0400 | [diff] [blame] | 758 | packs.nr = 0; |
Derrick Stolee | a40498a | 2018-07-12 15:39:36 -0400 | [diff] [blame] | 759 | packs.alloc_list = packs.m ? packs.m->num_packs : 16; |
| 760 | packs.alloc_names = packs.alloc_list; |
Derrick Stolee | 396f257 | 2018-07-12 15:39:26 -0400 | [diff] [blame] | 761 | packs.list = NULL; |
Derrick Stolee | a40498a | 2018-07-12 15:39:36 -0400 | [diff] [blame] | 762 | packs.names = NULL; |
Derrick Stolee | 32f3c54 | 2018-07-12 15:39:27 -0400 | [diff] [blame] | 763 | packs.pack_name_concat_len = 0; |
Derrick Stolee | 396f257 | 2018-07-12 15:39:26 -0400 | [diff] [blame] | 764 | ALLOC_ARRAY(packs.list, packs.alloc_list); |
Derrick Stolee | 32f3c54 | 2018-07-12 15:39:27 -0400 | [diff] [blame] | 765 | ALLOC_ARRAY(packs.names, packs.alloc_names); |
Derrick Stolee | 396f257 | 2018-07-12 15:39:26 -0400 | [diff] [blame] | 766 | |
Derrick Stolee | a40498a | 2018-07-12 15:39:36 -0400 | [diff] [blame] | 767 | if (packs.m) { |
| 768 | for (i = 0; i < packs.m->num_packs; i++) { |
| 769 | ALLOC_GROW(packs.list, packs.nr + 1, packs.alloc_list); |
| 770 | ALLOC_GROW(packs.names, packs.nr + 1, packs.alloc_names); |
| 771 | |
| 772 | packs.list[packs.nr] = NULL; |
| 773 | packs.names[packs.nr] = xstrdup(packs.m->pack_names[i]); |
| 774 | packs.pack_name_concat_len += strlen(packs.names[packs.nr]) + 1; |
| 775 | packs.nr++; |
| 776 | } |
| 777 | } |
| 778 | |
Derrick Stolee | 396f257 | 2018-07-12 15:39:26 -0400 | [diff] [blame] | 779 | for_each_file_in_pack_dir(object_dir, add_pack_to_midx, &packs); |
| 780 | |
Derrick Stolee | a40498a | 2018-07-12 15:39:36 -0400 | [diff] [blame] | 781 | if (packs.m && packs.nr == packs.m->num_packs) |
| 782 | goto cleanup; |
| 783 | |
Derrick Stolee | 32f3c54 | 2018-07-12 15:39:27 -0400 | [diff] [blame] | 784 | if (packs.pack_name_concat_len % MIDX_CHUNK_ALIGNMENT) |
| 785 | packs.pack_name_concat_len += MIDX_CHUNK_ALIGNMENT - |
| 786 | (packs.pack_name_concat_len % MIDX_CHUNK_ALIGNMENT); |
| 787 | |
| 788 | ALLOC_ARRAY(pack_perm, packs.nr); |
| 789 | sort_packs_by_name(packs.names, packs.nr, pack_perm); |
| 790 | |
Derrick Stolee | a40498a | 2018-07-12 15:39:36 -0400 | [diff] [blame] | 791 | entries = get_sorted_entries(packs.m, packs.list, pack_perm, packs.nr, &nr_entries); |
| 792 | |
Derrick Stolee | 662148c | 2018-07-12 15:39:32 -0400 | [diff] [blame] | 793 | for (i = 0; i < nr_entries; i++) { |
| 794 | if (entries[i].offset > 0x7fffffff) |
| 795 | num_large_offsets++; |
| 796 | if (entries[i].offset > 0xffffffff) |
| 797 | large_offsets_needed = 1; |
| 798 | } |
Derrick Stolee | fe1ed56 | 2018-07-12 15:39:29 -0400 | [diff] [blame] | 799 | |
Derrick Stolee | fc59e74 | 2018-07-12 15:39:22 -0400 | [diff] [blame] | 800 | hold_lock_file_for_update(&lk, midx_name, LOCK_DIE_ON_ERROR); |
| 801 | f = hashfd(lk.tempfile->fd, lk.tempfile->filename.buf); |
| 802 | FREE_AND_NULL(midx_name); |
| 803 | |
Derrick Stolee | a40498a | 2018-07-12 15:39:36 -0400 | [diff] [blame] | 804 | if (packs.m) |
| 805 | close_midx(packs.m); |
| 806 | |
Derrick Stolee | 32f3c54 | 2018-07-12 15:39:27 -0400 | [diff] [blame] | 807 | cur_chunk = 0; |
Derrick Stolee | 662148c | 2018-07-12 15:39:32 -0400 | [diff] [blame] | 808 | num_chunks = large_offsets_needed ? 5 : 4; |
Derrick Stolee | 32f3c54 | 2018-07-12 15:39:27 -0400 | [diff] [blame] | 809 | |
| 810 | written = write_midx_header(f, num_chunks, packs.nr); |
| 811 | |
| 812 | chunk_ids[cur_chunk] = MIDX_CHUNKID_PACKNAMES; |
| 813 | chunk_offsets[cur_chunk] = written + (num_chunks + 1) * MIDX_CHUNKLOOKUP_WIDTH; |
| 814 | |
| 815 | cur_chunk++; |
Derrick Stolee | d7cacf2 | 2018-07-12 15:39:31 -0400 | [diff] [blame] | 816 | chunk_ids[cur_chunk] = MIDX_CHUNKID_OIDFANOUT; |
Derrick Stolee | 32f3c54 | 2018-07-12 15:39:27 -0400 | [diff] [blame] | 817 | chunk_offsets[cur_chunk] = chunk_offsets[cur_chunk - 1] + packs.pack_name_concat_len; |
| 818 | |
Derrick Stolee | 0d5b3a5 | 2018-07-12 15:39:30 -0400 | [diff] [blame] | 819 | cur_chunk++; |
Derrick Stolee | d7cacf2 | 2018-07-12 15:39:31 -0400 | [diff] [blame] | 820 | chunk_ids[cur_chunk] = MIDX_CHUNKID_OIDLOOKUP; |
| 821 | chunk_offsets[cur_chunk] = chunk_offsets[cur_chunk - 1] + MIDX_CHUNK_FANOUT_SIZE; |
| 822 | |
| 823 | cur_chunk++; |
Derrick Stolee | 662148c | 2018-07-12 15:39:32 -0400 | [diff] [blame] | 824 | chunk_ids[cur_chunk] = MIDX_CHUNKID_OBJECTOFFSETS; |
Derrick Stolee | 0d5b3a5 | 2018-07-12 15:39:30 -0400 | [diff] [blame] | 825 | chunk_offsets[cur_chunk] = chunk_offsets[cur_chunk - 1] + nr_entries * MIDX_HASH_LEN; |
| 826 | |
Derrick Stolee | 662148c | 2018-07-12 15:39:32 -0400 | [diff] [blame] | 827 | cur_chunk++; |
| 828 | chunk_offsets[cur_chunk] = chunk_offsets[cur_chunk - 1] + nr_entries * MIDX_CHUNK_OFFSET_WIDTH; |
| 829 | if (large_offsets_needed) { |
| 830 | chunk_ids[cur_chunk] = MIDX_CHUNKID_LARGEOFFSETS; |
| 831 | |
| 832 | cur_chunk++; |
| 833 | chunk_offsets[cur_chunk] = chunk_offsets[cur_chunk - 1] + |
| 834 | num_large_offsets * MIDX_CHUNK_LARGE_OFFSET_WIDTH; |
| 835 | } |
| 836 | |
| 837 | chunk_ids[cur_chunk] = 0; |
| 838 | |
Derrick Stolee | 32f3c54 | 2018-07-12 15:39:27 -0400 | [diff] [blame] | 839 | for (i = 0; i <= num_chunks; i++) { |
| 840 | if (i && chunk_offsets[i] < chunk_offsets[i - 1]) |
| 841 | BUG("incorrect chunk offsets: %"PRIu64" before %"PRIu64, |
| 842 | chunk_offsets[i - 1], |
| 843 | chunk_offsets[i]); |
| 844 | |
| 845 | if (chunk_offsets[i] % MIDX_CHUNK_ALIGNMENT) |
| 846 | BUG("chunk offset %"PRIu64" is not properly aligned", |
| 847 | chunk_offsets[i]); |
| 848 | |
| 849 | hashwrite_be32(f, chunk_ids[i]); |
| 850 | hashwrite_be32(f, chunk_offsets[i] >> 32); |
| 851 | hashwrite_be32(f, chunk_offsets[i]); |
| 852 | |
| 853 | written += MIDX_CHUNKLOOKUP_WIDTH; |
| 854 | } |
| 855 | |
| 856 | for (i = 0; i < num_chunks; i++) { |
| 857 | if (written != chunk_offsets[i]) |
| 858 | BUG("incorrect chunk offset (%"PRIu64" != %"PRIu64") for chunk id %"PRIx32, |
| 859 | chunk_offsets[i], |
| 860 | written, |
| 861 | chunk_ids[i]); |
| 862 | |
| 863 | switch (chunk_ids[i]) { |
| 864 | case MIDX_CHUNKID_PACKNAMES: |
| 865 | written += write_midx_pack_names(f, packs.names, packs.nr); |
| 866 | break; |
| 867 | |
Derrick Stolee | d7cacf2 | 2018-07-12 15:39:31 -0400 | [diff] [blame] | 868 | case MIDX_CHUNKID_OIDFANOUT: |
| 869 | written += write_midx_oid_fanout(f, entries, nr_entries); |
| 870 | break; |
| 871 | |
Derrick Stolee | 0d5b3a5 | 2018-07-12 15:39:30 -0400 | [diff] [blame] | 872 | case MIDX_CHUNKID_OIDLOOKUP: |
| 873 | written += write_midx_oid_lookup(f, MIDX_HASH_LEN, entries, nr_entries); |
| 874 | break; |
| 875 | |
Derrick Stolee | 662148c | 2018-07-12 15:39:32 -0400 | [diff] [blame] | 876 | case MIDX_CHUNKID_OBJECTOFFSETS: |
| 877 | written += write_midx_object_offsets(f, large_offsets_needed, entries, nr_entries); |
| 878 | break; |
| 879 | |
| 880 | case MIDX_CHUNKID_LARGEOFFSETS: |
| 881 | written += write_midx_large_offsets(f, num_large_offsets, entries, nr_entries); |
| 882 | break; |
| 883 | |
Derrick Stolee | 32f3c54 | 2018-07-12 15:39:27 -0400 | [diff] [blame] | 884 | default: |
| 885 | BUG("trying to write unknown chunk id %"PRIx32, |
| 886 | chunk_ids[i]); |
| 887 | } |
| 888 | } |
| 889 | |
| 890 | if (written != chunk_offsets[num_chunks]) |
| 891 | BUG("incorrect final offset %"PRIu64" != %"PRIu64, |
| 892 | written, |
| 893 | chunk_offsets[num_chunks]); |
Derrick Stolee | fc59e74 | 2018-07-12 15:39:22 -0400 | [diff] [blame] | 894 | |
| 895 | finalize_hashfile(f, NULL, CSUM_FSYNC | CSUM_HASH_IN_STREAM); |
| 896 | commit_lock_file(&lk); |
| 897 | |
Derrick Stolee | a40498a | 2018-07-12 15:39:36 -0400 | [diff] [blame] | 898 | cleanup: |
Derrick Stolee | 396f257 | 2018-07-12 15:39:26 -0400 | [diff] [blame] | 899 | for (i = 0; i < packs.nr; i++) { |
| 900 | if (packs.list[i]) { |
| 901 | close_pack(packs.list[i]); |
| 902 | free(packs.list[i]); |
| 903 | } |
Derrick Stolee | 32f3c54 | 2018-07-12 15:39:27 -0400 | [diff] [blame] | 904 | free(packs.names[i]); |
Derrick Stolee | 396f257 | 2018-07-12 15:39:26 -0400 | [diff] [blame] | 905 | } |
| 906 | |
| 907 | free(packs.list); |
Derrick Stolee | 32f3c54 | 2018-07-12 15:39:27 -0400 | [diff] [blame] | 908 | free(packs.names); |
Derrick Stolee | fe1ed56 | 2018-07-12 15:39:29 -0400 | [diff] [blame] | 909 | free(entries); |
Derrick Stolee | a40498a | 2018-07-12 15:39:36 -0400 | [diff] [blame] | 910 | free(pack_perm); |
| 911 | free(midx_name); |
Derrick Stolee | a340773 | 2018-07-12 15:39:21 -0400 | [diff] [blame] | 912 | return 0; |
| 913 | } |
Derrick Stolee | 525e18c | 2018-07-12 15:39:40 -0400 | [diff] [blame] | 914 | |
| 915 | void clear_midx_file(const char *object_dir) |
| 916 | { |
| 917 | char *midx = get_midx_filename(object_dir); |
| 918 | |
| 919 | if (remove_path(midx)) { |
| 920 | UNLEAK(midx); |
| 921 | die(_("failed to clear multi-pack-index at %s"), midx); |
| 922 | } |
| 923 | |
| 924 | free(midx); |
| 925 | } |
Derrick Stolee | 56ee7ff | 2018-09-13 11:02:13 -0700 | [diff] [blame] | 926 | |
| 927 | static int verify_midx_error; |
| 928 | |
Derrick Stolee | d4bf1d8 | 2018-09-13 11:02:19 -0700 | [diff] [blame^] | 929 | static void midx_report(const char *fmt, ...) |
| 930 | { |
| 931 | va_list ap; |
| 932 | verify_midx_error = 1; |
| 933 | va_start(ap, fmt); |
| 934 | vfprintf(stderr, fmt, ap); |
| 935 | fprintf(stderr, "\n"); |
| 936 | va_end(ap); |
| 937 | } |
| 938 | |
Derrick Stolee | 56ee7ff | 2018-09-13 11:02:13 -0700 | [diff] [blame] | 939 | int verify_midx_file(const char *object_dir) |
| 940 | { |
Derrick Stolee | d4bf1d8 | 2018-09-13 11:02:19 -0700 | [diff] [blame^] | 941 | uint32_t i; |
Derrick Stolee | 56ee7ff | 2018-09-13 11:02:13 -0700 | [diff] [blame] | 942 | struct multi_pack_index *m = load_multi_pack_index(object_dir, 1); |
| 943 | verify_midx_error = 0; |
| 944 | |
| 945 | if (!m) |
| 946 | return 0; |
| 947 | |
Derrick Stolee | d4bf1d8 | 2018-09-13 11:02:19 -0700 | [diff] [blame^] | 948 | for (i = 0; i < m->num_packs; i++) { |
| 949 | if (prepare_midx_pack(m, i)) |
| 950 | midx_report("failed to load pack in position %d", i); |
| 951 | } |
| 952 | |
Derrick Stolee | 56ee7ff | 2018-09-13 11:02:13 -0700 | [diff] [blame] | 953 | return verify_midx_error; |
| 954 | } |