blob: bc4797196f02e60f566668867d6896b79ff952e4 [file] [log] [blame]
Elijah Newren5e3f94d2023-04-22 20:17:23 +00001#include "git-compat-util.h"
Derrick Stoleec4d25222018-07-12 15:39:33 -04002#include "config.h"
Derrick Stolee396f2572018-07-12 15:39:26 -04003#include "dir.h"
Elijah Newren41771fa2023-02-24 00:09:27 +00004#include "hex.h"
Derrick Stolee396f2572018-07-12 15:39:26 -04005#include "packfile.h"
Elijah Newren87bed172023-04-11 00:41:53 -07006#include "object-file.h"
Martin Ă…grenbc626922020-12-31 12:56:23 +01007#include "hash-lookup.h"
Derrick Stoleea3407732018-07-12 15:39:21 -04008#include "midx.h"
Derrick Stolee144d7032018-09-13 11:02:26 -07009#include "progress.h"
Jeff Hostetlerd8292232019-03-21 12:36:13 -070010#include "trace2.h"
Derrick Stolee63a8f0e2021-02-18 14:07:33 +000011#include "chunk-format.h"
Taylor Blauc528e172021-08-31 16:52:24 -040012#include "pack-bitmap.h"
Taylor Blaub1e33332023-12-14 17:24:25 -050013#include "pack-revindex.h"
Derrick Stoleea3407732018-07-12 15:39:21 -040014
Taylor Blau748b88a2024-04-01 17:16:34 -040015int midx_checksum_valid(struct multi_pack_index *m);
16void clear_midx_files_ext(const char *object_dir, const char *ext,
17 unsigned char *keep_hash);
18int cmp_idx_or_pack_name(const char *idx_or_pack_name,
19 const char *idx_name);
Derrick Stolee19575c72019-06-10 16:35:25 -070020
Taylor Blau0f533c72021-08-31 16:52:21 -040021const unsigned char *get_midx_checksum(struct multi_pack_index *m)
Taylor Blauf8940812021-03-30 11:04:26 -040022{
23 return m->data + m->data_len - the_hash_algo->rawsz;
24}
25
Taylor Blau60980ae2021-10-26 17:01:21 -040026void get_midx_filename(struct strbuf *out, const char *object_dir)
Derrick Stoleefc59e742018-07-12 15:39:22 -040027{
Taylor Blaudefba632024-05-29 18:55:42 -040028 get_midx_filename_ext(out, object_dir, NULL, NULL);
Derrick Stoleefc59e742018-07-12 15:39:22 -040029}
30
Taylor Blaudefba632024-05-29 18:55:42 -040031void get_midx_filename_ext(struct strbuf *out, const char *object_dir,
32 const unsigned char *hash, const char *ext)
Taylor Blauf8940812021-03-30 11:04:26 -040033{
Taylor Blaudefba632024-05-29 18:55:42 -040034 strbuf_addf(out, "%s/pack/multi-pack-index", object_dir);
35 if (ext)
36 strbuf_addf(out, "-%s.%s", hash_to_hex(hash), ext);
Taylor Blauf8940812021-03-30 11:04:26 -040037}
38
Derrick Stolee6ab3b8b2021-02-18 14:07:36 +000039static int midx_read_oid_fanout(const unsigned char *chunk_start,
40 size_t chunk_size, void *data)
41{
Jeff King9d78fb02023-11-09 02:12:07 -050042 int i;
Derrick Stolee6ab3b8b2021-02-18 14:07:36 +000043 struct multi_pack_index *m = data;
44 m->chunk_oid_fanout = (uint32_t *)chunk_start;
45
46 if (chunk_size != 4 * 256) {
47 error(_("multi-pack-index OID fanout is of the wrong size"));
48 return 1;
49 }
Jeff King9d78fb02023-11-09 02:12:07 -050050 for (i = 0; i < 255; i++) {
51 uint32_t oid_fanout1 = ntohl(m->chunk_oid_fanout[i]);
52 uint32_t oid_fanout2 = ntohl(m->chunk_oid_fanout[i+1]);
53
54 if (oid_fanout1 > oid_fanout2) {
55 error(_("oid fanout out of order: fanout[%d] = %"PRIx32" > %"PRIx32" = fanout[%d]"),
56 i, oid_fanout1, oid_fanout2, i + 1);
57 return 1;
58 }
59 }
Jeff Kingfc926562023-10-09 17:02:03 -040060 m->num_objects = ntohl(m->chunk_oid_fanout[255]);
61 return 0;
62}
63
64static int midx_read_oid_lookup(const unsigned char *chunk_start,
65 size_t chunk_size, void *data)
66{
67 struct multi_pack_index *m = data;
68 m->chunk_oid_lookup = chunk_start;
69
70 if (chunk_size != st_mult(m->hash_len, m->num_objects)) {
71 error(_("multi-pack-index OID lookup chunk is the wrong size"));
72 return 1;
73 }
Derrick Stolee6ab3b8b2021-02-18 14:07:36 +000074 return 0;
75}
76
Jeff King09248692023-10-09 17:05:27 -040077static int midx_read_object_offsets(const unsigned char *chunk_start,
78 size_t chunk_size, void *data)
79{
80 struct multi_pack_index *m = data;
81 m->chunk_object_offsets = chunk_start;
82
83 if (chunk_size != st_mult(m->num_objects, MIDX_CHUNK_OFFSET_WIDTH)) {
84 error(_("multi-pack-index object offset chunk is the wrong size"));
85 return 1;
86 }
87 return 0;
88}
89
Taylor Blau748b88a2024-04-01 17:16:34 -040090#define MIDX_MIN_SIZE (MIDX_HEADER_SIZE + the_hash_algo->rawsz)
91
Derrick Stolee2cf489a2018-08-20 16:51:55 +000092struct multi_pack_index *load_multi_pack_index(const char *object_dir, int local)
Derrick Stolee4d805602018-07-12 15:39:23 -040093{
94 struct multi_pack_index *m = NULL;
95 int fd;
96 struct stat st;
97 size_t midx_size;
98 void *midx_map = NULL;
99 uint32_t hash_version;
Taylor Blau60980ae2021-10-26 17:01:21 -0400100 struct strbuf midx_name = STRBUF_INIT;
Derrick Stolee32f3c542018-07-12 15:39:27 -0400101 uint32_t i;
Derrick Stolee32275652018-07-12 15:39:28 -0400102 const char *cur_pack_name;
Derrick Stolee6ab3b8b2021-02-18 14:07:36 +0000103 struct chunkfile *cf = NULL;
Derrick Stolee4d805602018-07-12 15:39:23 -0400104
Taylor Blau60980ae2021-10-26 17:01:21 -0400105 get_midx_filename(&midx_name, object_dir);
106
107 fd = git_open(midx_name.buf);
Derrick Stolee4d805602018-07-12 15:39:23 -0400108
109 if (fd < 0)
110 goto cleanup_fail;
111 if (fstat(fd, &st)) {
Taylor Blau60980ae2021-10-26 17:01:21 -0400112 error_errno(_("failed to read %s"), midx_name.buf);
Derrick Stolee4d805602018-07-12 15:39:23 -0400113 goto cleanup_fail;
114 }
115
116 midx_size = xsize_t(st.st_size);
117
118 if (midx_size < MIDX_MIN_SIZE) {
Taylor Blau60980ae2021-10-26 17:01:21 -0400119 error(_("multi-pack-index file %s is too small"), midx_name.buf);
Derrick Stolee4d805602018-07-12 15:39:23 -0400120 goto cleanup_fail;
121 }
122
Taylor Blau60980ae2021-10-26 17:01:21 -0400123 strbuf_release(&midx_name);
Derrick Stolee4d805602018-07-12 15:39:23 -0400124
125 midx_map = xmmap(NULL, midx_size, PROT_READ, MAP_PRIVATE, fd, 0);
Derrick Stolee6c7ff7c2020-04-24 09:17:16 -0400126 close(fd);
Derrick Stolee4d805602018-07-12 15:39:23 -0400127
Denton Liu577314c2019-04-03 15:00:05 -0700128 FLEX_ALLOC_STR(m, object_dir, object_dir);
Derrick Stolee4d805602018-07-12 15:39:23 -0400129 m->data = midx_map;
130 m->data_len = midx_size;
Derrick Stolee2cf489a2018-08-20 16:51:55 +0000131 m->local = local;
Derrick Stolee4d805602018-07-12 15:39:23 -0400132
133 m->signature = get_be32(m->data);
Derrick Stolee53ad0402018-09-13 11:02:15 -0700134 if (m->signature != MIDX_SIGNATURE)
135 die(_("multi-pack-index signature 0x%08x does not match signature 0x%08x"),
Derrick Stolee4d805602018-07-12 15:39:23 -0400136 m->signature, MIDX_SIGNATURE);
Derrick Stolee4d805602018-07-12 15:39:23 -0400137
138 m->version = m->data[MIDX_BYTE_FILE_VERSION];
Derrick Stolee53ad0402018-09-13 11:02:15 -0700139 if (m->version != MIDX_VERSION)
140 die(_("multi-pack-index version %d not recognized"),
Derrick Stolee4d805602018-07-12 15:39:23 -0400141 m->version);
Derrick Stolee4d805602018-07-12 15:39:23 -0400142
143 hash_version = m->data[MIDX_BYTE_HASH_VERSION];
Taylor Blaud9fef9d2022-05-20 19:17:41 -0400144 if (hash_version != oid_version(the_hash_algo)) {
Derrick Stoleed9607542020-08-17 14:04:48 +0000145 error(_("multi-pack-index hash version %u does not match version %u"),
Taylor Blaud9fef9d2022-05-20 19:17:41 -0400146 hash_version, oid_version(the_hash_algo));
Derrick Stoleed9607542020-08-17 14:04:48 +0000147 goto cleanup_fail;
148 }
brian m. carlsonaaa95df2019-08-18 20:04:27 +0000149 m->hash_len = the_hash_algo->rawsz;
Derrick Stolee4d805602018-07-12 15:39:23 -0400150
151 m->num_chunks = m->data[MIDX_BYTE_NUM_CHUNKS];
152
153 m->num_packs = get_be32(m->data + MIDX_BYTE_NUM_PACKS);
154
Taylor Blaub1e33332023-12-14 17:24:25 -0500155 m->preferred_pack_idx = -1;
156
Derrick Stolee6ab3b8b2021-02-18 14:07:36 +0000157 cf = init_chunkfile(NULL);
Derrick Stolee32f3c542018-07-12 15:39:27 -0400158
Derrick Stolee6ab3b8b2021-02-18 14:07:36 +0000159 if (read_table_of_contents(cf, m->data, midx_size,
Jeff Kingc9b9fef2023-10-09 17:05:23 -0400160 MIDX_HEADER_SIZE, m->num_chunks,
161 MIDX_CHUNK_ALIGNMENT))
Derrick Stolee6ab3b8b2021-02-18 14:07:36 +0000162 goto cleanup_fail;
Derrick Stoleed3f8e212018-09-13 11:02:16 -0700163
Jeff King72a9a082023-10-09 17:05:14 -0400164 if (pair_chunk(cf, MIDX_CHUNKID_PACKNAMES, &m->chunk_pack_names, &m->chunk_pack_names_len))
Jeff Kinge3c96002023-10-09 16:59:19 -0400165 die(_("multi-pack-index required pack-name chunk missing or corrupted"));
166 if (read_chunk(cf, MIDX_CHUNKID_OIDFANOUT, midx_read_oid_fanout, m))
167 die(_("multi-pack-index required OID fanout chunk missing or corrupted"));
Jeff Kingfc926562023-10-09 17:02:03 -0400168 if (read_chunk(cf, MIDX_CHUNKID_OIDLOOKUP, midx_read_oid_lookup, m))
Jeff Kinge3c96002023-10-09 16:59:19 -0400169 die(_("multi-pack-index required OID lookup chunk missing or corrupted"));
Jeff King09248692023-10-09 17:05:27 -0400170 if (read_chunk(cf, MIDX_CHUNKID_OBJECTOFFSETS, midx_read_object_offsets, m))
Jeff Kinge3c96002023-10-09 16:59:19 -0400171 die(_("multi-pack-index required object offsets chunk missing or corrupted"));
Derrick Stolee32f3c542018-07-12 15:39:27 -0400172
Jeff King2abd56e2023-10-09 17:05:30 -0400173 pair_chunk(cf, MIDX_CHUNKID_LARGEOFFSETS, &m->chunk_large_offsets,
174 &m->chunk_large_offsets_len);
Patrick Steinhardt795006f2024-04-15 08:41:25 +0200175 if (git_env_bool("GIT_TEST_MIDX_READ_BTMP", 1))
176 pair_chunk(cf, MIDX_CHUNKID_BITMAPPEDPACKS,
177 (const unsigned char **)&m->chunk_bitmapped_packs,
178 &m->chunk_bitmapped_packs_len);
Derrick Stolee6ab3b8b2021-02-18 14:07:36 +0000179
Taylor Blau7f514b72022-01-25 17:41:17 -0500180 if (git_env_bool("GIT_TEST_MIDX_READ_RIDX", 1))
Jeff Kingc0fe9b22023-10-09 17:05:33 -0400181 pair_chunk(cf, MIDX_CHUNKID_REVINDEX, &m->chunk_revindex,
182 &m->chunk_revindex_len);
Taylor Blau7f514b72022-01-25 17:41:17 -0500183
René Scharfeca56dad2021-03-13 17:17:22 +0100184 CALLOC_ARRAY(m->pack_names, m->num_packs);
185 CALLOC_ARRAY(m->packs, m->num_packs);
Derrick Stolee32275652018-07-12 15:39:28 -0400186
187 cur_pack_name = (const char *)m->chunk_pack_names;
188 for (i = 0; i < m->num_packs; i++) {
Jeff King72a9a082023-10-09 17:05:14 -0400189 const char *end;
190 size_t avail = m->chunk_pack_names_len -
191 (cur_pack_name - (const char *)m->chunk_pack_names);
192
Derrick Stolee32275652018-07-12 15:39:28 -0400193 m->pack_names[i] = cur_pack_name;
194
Jeff King72a9a082023-10-09 17:05:14 -0400195 end = memchr(cur_pack_name, '\0', avail);
196 if (!end)
197 die(_("multi-pack-index pack-name chunk is too short"));
198 cur_pack_name = end + 1;
Derrick Stolee32275652018-07-12 15:39:28 -0400199
Derrick Stolee8e72a3c2018-09-13 11:02:18 -0700200 if (i && strcmp(m->pack_names[i], m->pack_names[i - 1]) <= 0)
201 die(_("multi-pack-index pack names out of order: '%s' before '%s'"),
Derrick Stolee32275652018-07-12 15:39:28 -0400202 m->pack_names[i - 1],
203 m->pack_names[i]);
Derrick Stolee32275652018-07-12 15:39:28 -0400204 }
205
Jeff Hostetlerd8292232019-03-21 12:36:13 -0700206 trace2_data_intmax("midx", the_repository, "load/num_packs", m->num_packs);
207 trace2_data_intmax("midx", the_repository, "load/num_objects", m->num_objects);
208
Taylor Blau692305e2021-10-20 23:39:47 -0400209 free_chunkfile(cf);
Derrick Stolee4d805602018-07-12 15:39:23 -0400210 return m;
211
212cleanup_fail:
213 free(m);
Taylor Blau60980ae2021-10-26 17:01:21 -0400214 strbuf_release(&midx_name);
Taylor Blau692305e2021-10-20 23:39:47 -0400215 free_chunkfile(cf);
Derrick Stolee4d805602018-07-12 15:39:23 -0400216 if (midx_map)
217 munmap(midx_map, midx_size);
218 if (0 <= fd)
219 close(fd);
220 return NULL;
221}
222
Derrick Stolee1dcd9f22018-10-12 10:34:19 -0700223void close_midx(struct multi_pack_index *m)
Derrick Stoleea40498a2018-07-12 15:39:36 -0400224{
225 uint32_t i;
Derrick Stolee1dcd9f22018-10-12 10:34:19 -0700226
227 if (!m)
228 return;
229
Taylor Blau9bb6c2e2021-08-31 16:52:07 -0400230 close_midx(m->next);
231
Derrick Stoleea40498a2018-07-12 15:39:36 -0400232 munmap((unsigned char *)m->data, m->data_len);
Derrick Stoleea40498a2018-07-12 15:39:36 -0400233
234 for (i = 0; i < m->num_packs; i++) {
Derrick Stoleeaf96fe32019-04-29 09:18:56 -0700235 if (m->packs[i])
236 m->packs[i]->multi_pack_index = 0;
Derrick Stoleea40498a2018-07-12 15:39:36 -0400237 }
238 FREE_AND_NULL(m->packs);
239 FREE_AND_NULL(m->pack_names);
Taylor Blau9bb6c2e2021-08-31 16:52:07 -0400240 free(m);
Derrick Stoleea40498a2018-07-12 15:39:36 -0400241}
242
Derrick Stolee64404a22019-04-29 09:18:55 -0700243int prepare_midx_pack(struct repository *r, struct multi_pack_index *m, uint32_t pack_int_id)
Derrick Stolee3715a632018-07-12 15:39:34 -0400244{
245 struct strbuf pack_name = STRBUF_INIT;
Derrick Stoleeaf96fe32019-04-29 09:18:56 -0700246 struct packed_git *p;
Derrick Stolee3715a632018-07-12 15:39:34 -0400247
248 if (pack_int_id >= m->num_packs)
Jean-Noël Avilad355e462018-11-28 22:43:09 +0100249 die(_("bad pack-int-id: %u (%u total packs)"),
Derrick Stoleecc6af732018-09-13 11:02:25 -0700250 pack_int_id, m->num_packs);
Derrick Stolee3715a632018-07-12 15:39:34 -0400251
252 if (m->packs[pack_int_id])
253 return 0;
254
255 strbuf_addf(&pack_name, "%s/pack/%s", m->object_dir,
256 m->pack_names[pack_int_id]);
257
Derrick Stoleeaf96fe32019-04-29 09:18:56 -0700258 p = add_packed_git(pack_name.buf, pack_name.len, m->local);
Derrick Stolee3715a632018-07-12 15:39:34 -0400259 strbuf_release(&pack_name);
Derrick Stoleeaf96fe32019-04-29 09:18:56 -0700260
261 if (!p)
262 return 1;
263
264 p->multi_pack_index = 1;
265 m->packs[pack_int_id] = p;
266 install_packed_git(r, p);
267 list_add_tail(&p->mru, &r->objects->packed_git_mru);
268
269 return 0;
Derrick Stolee3715a632018-07-12 15:39:34 -0400270}
271
Taylor Blau748b88a2024-04-01 17:16:34 -0400272#define MIDX_CHUNK_BITMAPPED_PACKS_WIDTH (2 * sizeof(uint32_t))
273
Taylor Blau5f5ccd92023-12-14 17:23:51 -0500274int nth_bitmapped_pack(struct repository *r, struct multi_pack_index *m,
275 struct bitmapped_pack *bp, uint32_t pack_int_id)
276{
277 if (!m->chunk_bitmapped_packs)
278 return error(_("MIDX does not contain the BTMP chunk"));
279
280 if (prepare_midx_pack(r, m, pack_int_id))
281 return error(_("could not load bitmapped pack %"PRIu32), pack_int_id);
282
283 bp->p = m->packs[pack_int_id];
284 bp->bitmap_pos = get_be32((char *)m->chunk_bitmapped_packs +
285 MIDX_CHUNK_BITMAPPED_PACKS_WIDTH * pack_int_id);
286 bp->bitmap_nr = get_be32((char *)m->chunk_bitmapped_packs +
287 MIDX_CHUNK_BITMAPPED_PACKS_WIDTH * pack_int_id +
288 sizeof(uint32_t));
289 bp->pack_int_id = pack_int_id;
290
291 return 0;
292}
293
Derrick Stolee3715a632018-07-12 15:39:34 -0400294int bsearch_midx(const struct object_id *oid, struct multi_pack_index *m, uint32_t *result)
295{
296 return bsearch_hash(oid->hash, m->chunk_oid_fanout, m->chunk_oid_lookup,
brian m. carlsonaaa95df2019-08-18 20:04:27 +0000297 the_hash_algo->rawsz, result);
Derrick Stolee3715a632018-07-12 15:39:34 -0400298}
299
Derrick Stolee8aac67a2018-07-12 15:39:35 -0400300struct object_id *nth_midxed_object_oid(struct object_id *oid,
301 struct multi_pack_index *m,
302 uint32_t n)
303{
304 if (n >= m->num_objects)
305 return NULL;
306
Taylor Blauc2b24ed2023-07-12 19:37:38 -0400307 oidread(oid, m->chunk_oid_lookup + st_mult(m->hash_len, n));
Derrick Stolee8aac67a2018-07-12 15:39:35 -0400308 return oid;
309}
310
Taylor Blau62f2c1b2021-03-30 11:04:20 -0400311off_t nth_midxed_offset(struct multi_pack_index *m, uint32_t pos)
Derrick Stolee3715a632018-07-12 15:39:34 -0400312{
313 const unsigned char *offset_data;
314 uint32_t offset32;
315
Derrick Stolee329fac32021-02-18 14:07:37 +0000316 offset_data = m->chunk_object_offsets + (off_t)pos * MIDX_CHUNK_OFFSET_WIDTH;
Derrick Stolee3715a632018-07-12 15:39:34 -0400317 offset32 = get_be32(offset_data + sizeof(uint32_t));
318
319 if (m->chunk_large_offsets && offset32 & MIDX_LARGE_OFFSET_NEEDED) {
Derrick Stoleed8ac9ee2018-09-13 11:02:23 -0700320 if (sizeof(off_t) < sizeof(uint64_t))
Derrick Stolee3715a632018-07-12 15:39:34 -0400321 die(_("multi-pack-index stores a 64-bit offset, but off_t is too small"));
322
323 offset32 ^= MIDX_LARGE_OFFSET_NEEDED;
Jeff King2abd56e2023-10-09 17:05:30 -0400324 if (offset32 >= m->chunk_large_offsets_len / sizeof(uint64_t))
325 die(_("multi-pack-index large offset out of bounds"));
326 return get_be64(m->chunk_large_offsets + sizeof(uint64_t) * offset32);
Derrick Stolee3715a632018-07-12 15:39:34 -0400327 }
328
329 return offset32;
330}
331
Taylor Blau62f2c1b2021-03-30 11:04:20 -0400332uint32_t nth_midxed_pack_int_id(struct multi_pack_index *m, uint32_t pos)
Derrick Stolee3715a632018-07-12 15:39:34 -0400333{
Derrick Stolee329fac32021-02-18 14:07:37 +0000334 return get_be32(m->chunk_object_offsets +
335 (off_t)pos * MIDX_CHUNK_OFFSET_WIDTH);
Derrick Stolee3715a632018-07-12 15:39:34 -0400336}
337
Taylor Blaua8437f32022-10-12 18:01:48 -0400338int fill_midx_entry(struct repository *r,
René Scharfe893b5632021-09-11 22:39:31 +0200339 const struct object_id *oid,
340 struct pack_entry *e,
341 struct multi_pack_index *m)
Derrick Stolee3715a632018-07-12 15:39:34 -0400342{
René Scharfe893b5632021-09-11 22:39:31 +0200343 uint32_t pos;
Derrick Stolee3715a632018-07-12 15:39:34 -0400344 uint32_t pack_int_id;
345 struct packed_git *p;
346
René Scharfe893b5632021-09-11 22:39:31 +0200347 if (!bsearch_midx(oid, m, &pos))
348 return 0;
349
Derrick Stolee3715a632018-07-12 15:39:34 -0400350 if (pos >= m->num_objects)
351 return 0;
352
353 pack_int_id = nth_midxed_pack_int_id(m, pos);
354
Derrick Stolee64404a22019-04-29 09:18:55 -0700355 if (prepare_midx_pack(r, m, pack_int_id))
Taylor Blau506ec2f2020-11-25 12:17:33 -0500356 return 0;
Derrick Stolee3715a632018-07-12 15:39:34 -0400357 p = m->packs[pack_int_id];
358
359 /*
360 * We are about to tell the caller where they can locate the
361 * requested object. We better make sure the packfile is
362 * still here and can be accessed before supplying that
363 * answer, as it may have been deleted since the MIDX was
364 * loaded!
365 */
366 if (!is_pack_valid(p))
367 return 0;
368
René Scharfe09ef6612021-09-11 22:43:26 +0200369 if (oidset_size(&p->bad_objects) &&
370 oidset_contains(&p->bad_objects, oid))
371 return 0;
Derrick Stoleec39b02a2018-08-20 16:51:57 +0000372
Derrick Stolee3715a632018-07-12 15:39:34 -0400373 e->offset = nth_midxed_offset(m, pos);
374 e->p = p;
375
376 return 1;
377}
378
Jeff King013fd7a2019-04-05 14:06:04 -0400379/* Match "foo.idx" against either "foo.pack" _or_ "foo.idx". */
Taylor Blau748b88a2024-04-01 17:16:34 -0400380int cmp_idx_or_pack_name(const char *idx_or_pack_name,
381 const char *idx_name)
Jeff King013fd7a2019-04-05 14:06:04 -0400382{
383 /* Skip past any initial matching prefix. */
384 while (*idx_name && *idx_name == *idx_or_pack_name) {
385 idx_name++;
386 idx_or_pack_name++;
387 }
388
389 /*
390 * If we didn't match completely, we may have matched "pack-1234." and
391 * be left with "idx" and "pack" respectively, which is also OK. We do
392 * not have to check for "idx" and "idx", because that would have been
393 * a complete match (and in that case these strcmps will be false, but
394 * we'll correctly return 0 from the final strcmp() below.
395 *
396 * Technically this matches "fooidx" and "foopack", but we'd never have
397 * such names in the first place.
398 */
399 if (!strcmp(idx_name, "idx") && !strcmp(idx_or_pack_name, "pack"))
400 return 0;
401
402 /*
403 * This not only checks for a complete match, but also orders based on
404 * the first non-identical character, which means our ordering will
405 * match a raw strcmp(). That makes it OK to use this to binary search
406 * a naively-sorted list.
407 */
408 return strcmp(idx_or_pack_name, idx_name);
409}
410
Taylor Blau307d75b2023-12-14 17:23:54 -0500411int midx_locate_pack(struct multi_pack_index *m, const char *idx_or_pack_name,
412 uint32_t *pos)
Derrick Stoleea40498a2018-07-12 15:39:36 -0400413{
414 uint32_t first = 0, last = m->num_packs;
415
416 while (first < last) {
417 uint32_t mid = first + (last - first) / 2;
418 const char *current;
419 int cmp;
420
421 current = m->pack_names[mid];
Jeff King013fd7a2019-04-05 14:06:04 -0400422 cmp = cmp_idx_or_pack_name(idx_or_pack_name, current);
Taylor Blau307d75b2023-12-14 17:23:54 -0500423 if (!cmp) {
424 if (pos)
425 *pos = mid;
Derrick Stoleea40498a2018-07-12 15:39:36 -0400426 return 1;
Taylor Blau307d75b2023-12-14 17:23:54 -0500427 }
Derrick Stoleea40498a2018-07-12 15:39:36 -0400428 if (cmp > 0) {
429 first = mid + 1;
430 continue;
431 }
432 last = mid;
433 }
434
435 return 0;
436}
437
Taylor Blau307d75b2023-12-14 17:23:54 -0500438int midx_contains_pack(struct multi_pack_index *m, const char *idx_or_pack_name)
439{
440 return midx_locate_pack(m, idx_or_pack_name, NULL);
441}
442
Taylor Blaub1e33332023-12-14 17:24:25 -0500443int midx_preferred_pack(struct multi_pack_index *m, uint32_t *pack_int_id)
444{
445 if (m->preferred_pack_idx == -1) {
446 if (load_midx_revindex(m) < 0) {
447 m->preferred_pack_idx = -2;
448 return -1;
449 }
450
451 m->preferred_pack_idx =
452 nth_midxed_pack_int_id(m, pack_pos_to_midx(m, 0));
453 } else if (m->preferred_pack_idx == -2)
454 return -1; /* no revindex */
455
456 *pack_int_id = m->preferred_pack_idx;
457 return 0;
458}
459
Derrick Stolee2cf489a2018-08-20 16:51:55 +0000460int prepare_multi_pack_index_one(struct repository *r, const char *object_dir, int local)
Derrick Stoleec4d25222018-07-12 15:39:33 -0400461{
Derrick Stolee29e20162018-08-20 16:52:00 +0000462 struct multi_pack_index *m;
Derrick Stoleec4d25222018-07-12 15:39:33 -0400463 struct multi_pack_index *m_search;
Derrick Stoleec4d25222018-07-12 15:39:33 -0400464
Derrick Stolee18e449f2020-09-25 12:33:34 +0000465 prepare_repo_settings(r);
466 if (!r->settings.core_multi_pack_index)
Derrick Stoleec4d25222018-07-12 15:39:33 -0400467 return 0;
468
Derrick Stolee29e20162018-08-20 16:52:00 +0000469 for (m_search = r->objects->multi_pack_index; m_search; m_search = m_search->next)
Derrick Stoleec4d25222018-07-12 15:39:33 -0400470 if (!strcmp(object_dir, m_search->object_dir))
471 return 1;
472
Derrick Stolee29e20162018-08-20 16:52:00 +0000473 m = load_multi_pack_index(object_dir, local);
Derrick Stoleec4d25222018-07-12 15:39:33 -0400474
Derrick Stolee29e20162018-08-20 16:52:00 +0000475 if (m) {
Taylor Blau59552fb2020-08-28 16:22:13 -0400476 struct multi_pack_index *mp = r->objects->multi_pack_index;
477 if (mp) {
478 m->next = mp->next;
479 mp->next = m;
480 } else
481 r->objects->multi_pack_index = m;
Derrick Stoleec4d25222018-07-12 15:39:33 -0400482 return 1;
483 }
484
485 return 0;
486}
487
Taylor Blau748b88a2024-04-01 17:16:34 -0400488int midx_checksum_valid(struct multi_pack_index *m)
Taylor Blauec1e28e2021-06-23 14:39:12 -0400489{
490 return hashfile_checksum_valid(m->data, m->data_len);
491}
492
Taylor Blau38ff7ca2021-03-30 11:04:32 -0400493struct clear_midx_data {
494 char *keep;
495 const char *ext;
496};
497
Jeff Kingbe252d32023-02-24 01:39:24 -0500498static void clear_midx_file_ext(const char *full_path, size_t full_path_len UNUSED,
Taylor Blau38ff7ca2021-03-30 11:04:32 -0400499 const char *file_name, void *_data)
500{
501 struct clear_midx_data *data = _data;
502
503 if (!(starts_with(file_name, "multi-pack-index-") &&
504 ends_with(file_name, data->ext)))
505 return;
506 if (data->keep && !strcmp(data->keep, file_name))
507 return;
508
509 if (unlink(full_path))
510 die_errno(_("failed to remove %s"), full_path);
511}
512
Taylor Blau748b88a2024-04-01 17:16:34 -0400513void clear_midx_files_ext(const char *object_dir, const char *ext,
514 unsigned char *keep_hash)
Taylor Blau38ff7ca2021-03-30 11:04:32 -0400515{
516 struct clear_midx_data data;
517 memset(&data, 0, sizeof(struct clear_midx_data));
518
519 if (keep_hash)
520 data.keep = xstrfmt("multi-pack-index-%s%s",
521 hash_to_hex(keep_hash), ext);
522 data.ext = ext;
523
Taylor Blau426c00e2021-08-31 16:51:55 -0400524 for_each_file_in_pack_dir(object_dir,
Taylor Blau38ff7ca2021-03-30 11:04:32 -0400525 clear_midx_file_ext,
526 &data);
527
528 free(data.keep);
Derrick Stoleea3407732018-07-12 15:39:21 -0400529}
Derrick Stolee525e18c2018-07-12 15:39:40 -0400530
Derrick Stolee1dcd9f22018-10-12 10:34:19 -0700531void clear_midx_file(struct repository *r)
Derrick Stolee525e18c2018-07-12 15:39:40 -0400532{
Taylor Blau60980ae2021-10-26 17:01:21 -0400533 struct strbuf midx = STRBUF_INIT;
534
535 get_midx_filename(&midx, r->objects->odb->path);
Derrick Stolee1dcd9f22018-10-12 10:34:19 -0700536
537 if (r->objects && r->objects->multi_pack_index) {
538 close_midx(r->objects->multi_pack_index);
539 r->objects->multi_pack_index = NULL;
540 }
Derrick Stolee525e18c2018-07-12 15:39:40 -0400541
Taylor Blau60980ae2021-10-26 17:01:21 -0400542 if (remove_path(midx.buf))
543 die(_("failed to clear multi-pack-index at %s"), midx.buf);
Derrick Stolee525e18c2018-07-12 15:39:40 -0400544
Taylor Blauc528e172021-08-31 16:52:24 -0400545 clear_midx_files_ext(r->objects->odb->path, ".bitmap", NULL);
Taylor Blau426c00e2021-08-31 16:51:55 -0400546 clear_midx_files_ext(r->objects->odb->path, ".rev", NULL);
Taylor Blau38ff7ca2021-03-30 11:04:32 -0400547
Taylor Blau60980ae2021-10-26 17:01:21 -0400548 strbuf_release(&midx);
Derrick Stolee525e18c2018-07-12 15:39:40 -0400549}
Derrick Stolee56ee7ff2018-09-13 11:02:13 -0700550
551static int verify_midx_error;
552
Ævar Arnfjörð Bjarmason48ca53c2021-07-13 10:05:18 +0200553__attribute__((format (printf, 1, 2)))
Derrick Stoleed4bf1d82018-09-13 11:02:19 -0700554static void midx_report(const char *fmt, ...)
555{
556 va_list ap;
557 verify_midx_error = 1;
558 va_start(ap, fmt);
559 vfprintf(stderr, fmt, ap);
560 fprintf(stderr, "\n");
561 va_end(ap);
562}
563
Jeff Hostetler5ae18df2019-03-21 12:36:15 -0700564struct pair_pos_vs_id
565{
566 uint32_t pos;
567 uint32_t pack_int_id;
568};
569
570static int compare_pair_pos_vs_id(const void *_a, const void *_b)
571{
572 struct pair_pos_vs_id *a = (struct pair_pos_vs_id *)_a;
573 struct pair_pos_vs_id *b = (struct pair_pos_vs_id *)_b;
574
575 return b->pack_int_id - a->pack_int_id;
576}
577
Jeff Hostetler430efb82019-03-21 12:36:14 -0700578/*
579 * Limit calls to display_progress() for performance reasons.
580 * The interval here was arbitrarily chosen.
581 */
582#define SPARSE_PROGRESS_INTERVAL (1 << 12)
583#define midx_display_sparse_progress(progress, n) \
584 do { \
585 uint64_t _n = (n); \
586 if ((_n & (SPARSE_PROGRESS_INTERVAL - 1)) == 0) \
587 display_progress(progress, _n); \
588 } while (0)
589
William Bakerefbc3ae2019-10-21 18:39:58 +0000590int verify_midx_file(struct repository *r, const char *object_dir, unsigned flags)
Derrick Stolee56ee7ff2018-09-13 11:02:13 -0700591{
Jeff Hostetler5ae18df2019-03-21 12:36:15 -0700592 struct pair_pos_vs_id *pairs = NULL;
Derrick Stoleed4bf1d82018-09-13 11:02:19 -0700593 uint32_t i;
William Bakerad600962019-10-21 18:40:01 +0000594 struct progress *progress = NULL;
Derrick Stolee56ee7ff2018-09-13 11:02:13 -0700595 struct multi_pack_index *m = load_multi_pack_index(object_dir, 1);
596 verify_midx_error = 0;
597
Derrick Stoleed9607542020-08-17 14:04:48 +0000598 if (!m) {
599 int result = 0;
600 struct stat sb;
Taylor Blau60980ae2021-10-26 17:01:21 -0400601 struct strbuf filename = STRBUF_INIT;
602
603 get_midx_filename(&filename, object_dir);
604
605 if (!stat(filename.buf, &sb)) {
Derrick Stoleed9607542020-08-17 14:04:48 +0000606 error(_("multi-pack-index file exists, but failed to parse"));
607 result = 1;
608 }
Taylor Blau60980ae2021-10-26 17:01:21 -0400609 strbuf_release(&filename);
Derrick Stoleed9607542020-08-17 14:04:48 +0000610 return result;
611 }
Derrick Stolee56ee7ff2018-09-13 11:02:13 -0700612
Taylor Blauf89ecf72021-06-23 14:39:15 -0400613 if (!midx_checksum_valid(m))
614 midx_report(_("incorrect checksum"));
615
William Bakerad600962019-10-21 18:40:01 +0000616 if (flags & MIDX_PROGRESS)
Derrick Stoleeefdd2f02020-09-25 12:33:35 +0000617 progress = start_delayed_progress(_("Looking for referenced packfiles"),
William Bakerad600962019-10-21 18:40:01 +0000618 m->num_packs);
Derrick Stoleed4bf1d82018-09-13 11:02:19 -0700619 for (i = 0; i < m->num_packs; i++) {
Derrick Stolee64404a22019-04-29 09:18:55 -0700620 if (prepare_midx_pack(r, m, i))
Derrick Stoleed4bf1d82018-09-13 11:02:19 -0700621 midx_report("failed to load pack in position %d", i);
Jeff Hostetler430efb82019-03-21 12:36:14 -0700622
623 display_progress(progress, i + 1);
Derrick Stoleed4bf1d82018-09-13 11:02:19 -0700624 }
Jeff Hostetler430efb82019-03-21 12:36:14 -0700625 stop_progress(&progress);
Derrick Stoleed4bf1d82018-09-13 11:02:19 -0700626
Damien Robert796d61c2020-03-28 23:18:22 +0100627 if (m->num_objects == 0) {
628 midx_report(_("the midx contains no oid"));
629 /*
630 * Remaining tests assume that we have objects, so we can
631 * return here.
632 */
Taylor Blau492cb392021-10-26 17:01:08 -0400633 goto cleanup;
Damien Robert796d61c2020-03-28 23:18:22 +0100634 }
635
William Bakerad600962019-10-21 18:40:01 +0000636 if (flags & MIDX_PROGRESS)
637 progress = start_sparse_progress(_("Verifying OID order in multi-pack-index"),
638 m->num_objects - 1);
Derrick Stolee55c56482018-09-13 11:02:22 -0700639 for (i = 0; i < m->num_objects - 1; i++) {
640 struct object_id oid1, oid2;
641
642 nth_midxed_object_oid(&oid1, m, i);
643 nth_midxed_object_oid(&oid2, m, i + 1);
644
645 if (oidcmp(&oid1, &oid2) >= 0)
646 midx_report(_("oid lookup out of order: oid[%d] = %s >= %s = oid[%d]"),
647 i, oid_to_hex(&oid1), oid_to_hex(&oid2), i + 1);
Derrick Stolee55c56482018-09-13 11:02:22 -0700648
Jeff Hostetler430efb82019-03-21 12:36:14 -0700649 midx_display_sparse_progress(progress, i + 1);
650 }
651 stop_progress(&progress);
652
Jeff Hostetler5ae18df2019-03-21 12:36:15 -0700653 /*
654 * Create an array mapping each object to its packfile id. Sort it
655 * to group the objects by packfile. Use this permutation to visit
656 * each of the objects and only require 1 packfile to be open at a
657 * time.
658 */
659 ALLOC_ARRAY(pairs, m->num_objects);
660 for (i = 0; i < m->num_objects; i++) {
661 pairs[i].pos = i;
662 pairs[i].pack_int_id = nth_midxed_pack_int_id(m, i);
663 }
664
William Bakerad600962019-10-21 18:40:01 +0000665 if (flags & MIDX_PROGRESS)
666 progress = start_sparse_progress(_("Sorting objects by packfile"),
667 m->num_objects);
Jeff Hostetler5ae18df2019-03-21 12:36:15 -0700668 display_progress(progress, 0); /* TODO: Measure QSORT() progress */
669 QSORT(pairs, m->num_objects, compare_pair_pos_vs_id);
670 stop_progress(&progress);
671
William Bakerad600962019-10-21 18:40:01 +0000672 if (flags & MIDX_PROGRESS)
673 progress = start_sparse_progress(_("Verifying object offsets"), m->num_objects);
Derrick Stoleecc6af732018-09-13 11:02:25 -0700674 for (i = 0; i < m->num_objects; i++) {
675 struct object_id oid;
676 struct pack_entry e;
677 off_t m_offset, p_offset;
678
Jeff Hostetler5ae18df2019-03-21 12:36:15 -0700679 if (i > 0 && pairs[i-1].pack_int_id != pairs[i].pack_int_id &&
680 m->packs[pairs[i-1].pack_int_id])
681 {
682 close_pack_fd(m->packs[pairs[i-1].pack_int_id]);
683 close_pack_index(m->packs[pairs[i-1].pack_int_id]);
684 }
685
686 nth_midxed_object_oid(&oid, m, pairs[i].pos);
687
Derrick Stolee64404a22019-04-29 09:18:55 -0700688 if (!fill_midx_entry(r, &oid, &e, m)) {
Derrick Stoleecc6af732018-09-13 11:02:25 -0700689 midx_report(_("failed to load pack entry for oid[%d] = %s"),
Jeff Hostetler5ae18df2019-03-21 12:36:15 -0700690 pairs[i].pos, oid_to_hex(&oid));
Derrick Stoleecc6af732018-09-13 11:02:25 -0700691 continue;
692 }
693
694 if (open_pack_index(e.p)) {
695 midx_report(_("failed to load pack-index for packfile %s"),
696 e.p->pack_name);
697 break;
698 }
699
700 m_offset = e.offset;
701 p_offset = find_pack_entry_one(oid.hash, e.p);
702
703 if (m_offset != p_offset)
704 midx_report(_("incorrect object offset for oid[%d] = %s: %"PRIx64" != %"PRIx64),
Jeff Hostetler5ae18df2019-03-21 12:36:15 -0700705 pairs[i].pos, oid_to_hex(&oid), m_offset, p_offset);
Derrick Stolee144d7032018-09-13 11:02:26 -0700706
Jeff Hostetler430efb82019-03-21 12:36:14 -0700707 midx_display_sparse_progress(progress, i + 1);
Derrick Stoleecc6af732018-09-13 11:02:25 -0700708 }
Derrick Stolee144d7032018-09-13 11:02:26 -0700709 stop_progress(&progress);
Derrick Stoleecc6af732018-09-13 11:02:25 -0700710
Taylor Blau492cb392021-10-26 17:01:08 -0400711cleanup:
Jeff Hostetler5ae18df2019-03-21 12:36:15 -0700712 free(pairs);
Taylor Blau492cb392021-10-26 17:01:08 -0400713 close_midx(m);
Jeff Hostetler5ae18df2019-03-21 12:36:15 -0700714
Derrick Stolee56ee7ff2018-09-13 11:02:13 -0700715 return verify_midx_error;
716}