blob: beaf0c0de4472c8c2fcf20902aa0aaf1ed3e4dab [file] [log] [blame]
Elijah Newren5e3f94d2023-04-22 20:17:23 +00001#include "git-compat-util.h"
Elijah Newren0b027f62023-03-21 06:25:58 +00002#include "abspath.h"
Derrick Stoleec4d25222018-07-12 15:39:33 -04003#include "config.h"
Derrick Stoleefc59e742018-07-12 15:39:22 -04004#include "csum-file.h"
Derrick Stolee396f2572018-07-12 15:39:26 -04005#include "dir.h"
Elijah Newrenf394e092023-03-21 06:25:54 +00006#include "gettext.h"
Elijah Newren41771fa2023-02-24 00:09:27 +00007#include "hex.h"
Derrick Stoleefc59e742018-07-12 15:39:22 -04008#include "lockfile.h"
Derrick Stolee396f2572018-07-12 15:39:26 -04009#include "packfile.h"
Elijah Newren87bed172023-04-11 00:41:53 -070010#include "object-file.h"
Elijah Newrena034e912023-05-16 06:34:06 +000011#include "object-store-ll.h"
Martin Ågrenbc626922020-12-31 12:56:23 +010012#include "hash-lookup.h"
Derrick Stoleea3407732018-07-12 15:39:21 -040013#include "midx.h"
Derrick Stolee144d7032018-09-13 11:02:26 -070014#include "progress.h"
Jeff Hostetlerd8292232019-03-21 12:36:13 -070015#include "trace2.h"
Derrick Stoleece1e4a12019-06-10 16:35:27 -070016#include "run-command.h"
Derrick Stolee18e449f2020-09-25 12:33:34 +000017#include "repository.h"
Derrick Stolee63a8f0e2021-02-18 14:07:33 +000018#include "chunk-format.h"
Taylor Blau38ff7ca2021-03-30 11:04:32 -040019#include "pack.h"
Taylor Blauc528e172021-08-31 16:52:24 -040020#include "pack-bitmap.h"
21#include "refs.h"
22#include "revision.h"
23#include "list-objects.h"
Derrick Stoleea3407732018-07-12 15:39:21 -040024
Derrick Stoleefc59e742018-07-12 15:39:22 -040025#define MIDX_SIGNATURE 0x4d494458 /* "MIDX" */
26#define MIDX_VERSION 1
Derrick Stolee4d805602018-07-12 15:39:23 -040027#define MIDX_BYTE_FILE_VERSION 4
28#define MIDX_BYTE_HASH_VERSION 5
29#define MIDX_BYTE_NUM_CHUNKS 6
30#define MIDX_BYTE_NUM_PACKS 8
Derrick Stoleefc59e742018-07-12 15:39:22 -040031#define MIDX_HEADER_SIZE 12
brian m. carlsonaaa95df2019-08-18 20:04:27 +000032#define MIDX_MIN_SIZE (MIDX_HEADER_SIZE + the_hash_algo->rawsz)
Derrick Stoleefc59e742018-07-12 15:39:22 -040033
Derrick Stolee32f3c542018-07-12 15:39:27 -040034#define MIDX_CHUNK_ALIGNMENT 4
35#define MIDX_CHUNKID_PACKNAMES 0x504e414d /* "PNAM" */
Taylor Blau5f5ccd92023-12-14 17:23:51 -050036#define MIDX_CHUNKID_BITMAPPEDPACKS 0x42544d50 /* "BTMP" */
Derrick Stoleed7cacf22018-07-12 15:39:31 -040037#define MIDX_CHUNKID_OIDFANOUT 0x4f494446 /* "OIDF" */
Derrick Stolee0d5b3a52018-07-12 15:39:30 -040038#define MIDX_CHUNKID_OIDLOOKUP 0x4f49444c /* "OIDL" */
Derrick Stolee662148c2018-07-12 15:39:32 -040039#define MIDX_CHUNKID_OBJECTOFFSETS 0x4f4f4646 /* "OOFF" */
40#define MIDX_CHUNKID_LARGEOFFSETS 0x4c4f4646 /* "LOFF" */
Taylor Blau95e83832022-01-25 17:41:03 -050041#define MIDX_CHUNKID_REVINDEX 0x52494458 /* "RIDX" */
Derrick Stoleed7cacf22018-07-12 15:39:31 -040042#define MIDX_CHUNK_FANOUT_SIZE (sizeof(uint32_t) * 256)
Derrick Stolee662148c2018-07-12 15:39:32 -040043#define MIDX_CHUNK_OFFSET_WIDTH (2 * sizeof(uint32_t))
44#define MIDX_CHUNK_LARGE_OFFSET_WIDTH (sizeof(uint64_t))
Taylor Blau5f5ccd92023-12-14 17:23:51 -050045#define MIDX_CHUNK_BITMAPPED_PACKS_WIDTH (2 * sizeof(uint32_t))
Derrick Stolee662148c2018-07-12 15:39:32 -040046#define MIDX_LARGE_OFFSET_NEEDED 0x80000000
Derrick Stolee32f3c542018-07-12 15:39:27 -040047
Derrick Stolee19575c72019-06-10 16:35:25 -070048#define PACK_EXPIRED UINT_MAX
49
Taylor Blau0f533c72021-08-31 16:52:21 -040050const unsigned char *get_midx_checksum(struct multi_pack_index *m)
Taylor Blauf8940812021-03-30 11:04:26 -040051{
52 return m->data + m->data_len - the_hash_algo->rawsz;
53}
54
Taylor Blau60980ae2021-10-26 17:01:21 -040055void get_midx_filename(struct strbuf *out, const char *object_dir)
Derrick Stoleefc59e742018-07-12 15:39:22 -040056{
Taylor Blau60980ae2021-10-26 17:01:21 -040057 strbuf_addf(out, "%s/pack/multi-pack-index", object_dir);
Derrick Stoleefc59e742018-07-12 15:39:22 -040058}
59
Taylor Blau60980ae2021-10-26 17:01:21 -040060void get_midx_rev_filename(struct strbuf *out, struct multi_pack_index *m)
Taylor Blauf8940812021-03-30 11:04:26 -040061{
Taylor Blau60980ae2021-10-26 17:01:21 -040062 get_midx_filename(out, m->object_dir);
63 strbuf_addf(out, "-%s.rev", hash_to_hex(get_midx_checksum(m)));
Taylor Blauf8940812021-03-30 11:04:26 -040064}
65
Derrick Stolee6ab3b8b2021-02-18 14:07:36 +000066static int midx_read_oid_fanout(const unsigned char *chunk_start,
67 size_t chunk_size, void *data)
68{
Jeff King9d78fb02023-11-09 02:12:07 -050069 int i;
Derrick Stolee6ab3b8b2021-02-18 14:07:36 +000070 struct multi_pack_index *m = data;
71 m->chunk_oid_fanout = (uint32_t *)chunk_start;
72
73 if (chunk_size != 4 * 256) {
74 error(_("multi-pack-index OID fanout is of the wrong size"));
75 return 1;
76 }
Jeff King9d78fb02023-11-09 02:12:07 -050077 for (i = 0; i < 255; i++) {
78 uint32_t oid_fanout1 = ntohl(m->chunk_oid_fanout[i]);
79 uint32_t oid_fanout2 = ntohl(m->chunk_oid_fanout[i+1]);
80
81 if (oid_fanout1 > oid_fanout2) {
82 error(_("oid fanout out of order: fanout[%d] = %"PRIx32" > %"PRIx32" = fanout[%d]"),
83 i, oid_fanout1, oid_fanout2, i + 1);
84 return 1;
85 }
86 }
Jeff Kingfc926562023-10-09 17:02:03 -040087 m->num_objects = ntohl(m->chunk_oid_fanout[255]);
88 return 0;
89}
90
91static int midx_read_oid_lookup(const unsigned char *chunk_start,
92 size_t chunk_size, void *data)
93{
94 struct multi_pack_index *m = data;
95 m->chunk_oid_lookup = chunk_start;
96
97 if (chunk_size != st_mult(m->hash_len, m->num_objects)) {
98 error(_("multi-pack-index OID lookup chunk is the wrong size"));
99 return 1;
100 }
Derrick Stolee6ab3b8b2021-02-18 14:07:36 +0000101 return 0;
102}
103
Jeff King09248692023-10-09 17:05:27 -0400104static int midx_read_object_offsets(const unsigned char *chunk_start,
105 size_t chunk_size, void *data)
106{
107 struct multi_pack_index *m = data;
108 m->chunk_object_offsets = chunk_start;
109
110 if (chunk_size != st_mult(m->num_objects, MIDX_CHUNK_OFFSET_WIDTH)) {
111 error(_("multi-pack-index object offset chunk is the wrong size"));
112 return 1;
113 }
114 return 0;
115}
116
Derrick Stolee2cf489a2018-08-20 16:51:55 +0000117struct multi_pack_index *load_multi_pack_index(const char *object_dir, int local)
Derrick Stolee4d805602018-07-12 15:39:23 -0400118{
119 struct multi_pack_index *m = NULL;
120 int fd;
121 struct stat st;
122 size_t midx_size;
123 void *midx_map = NULL;
124 uint32_t hash_version;
Taylor Blau60980ae2021-10-26 17:01:21 -0400125 struct strbuf midx_name = STRBUF_INIT;
Derrick Stolee32f3c542018-07-12 15:39:27 -0400126 uint32_t i;
Derrick Stolee32275652018-07-12 15:39:28 -0400127 const char *cur_pack_name;
Derrick Stolee6ab3b8b2021-02-18 14:07:36 +0000128 struct chunkfile *cf = NULL;
Derrick Stolee4d805602018-07-12 15:39:23 -0400129
Taylor Blau60980ae2021-10-26 17:01:21 -0400130 get_midx_filename(&midx_name, object_dir);
131
132 fd = git_open(midx_name.buf);
Derrick Stolee4d805602018-07-12 15:39:23 -0400133
134 if (fd < 0)
135 goto cleanup_fail;
136 if (fstat(fd, &st)) {
Taylor Blau60980ae2021-10-26 17:01:21 -0400137 error_errno(_("failed to read %s"), midx_name.buf);
Derrick Stolee4d805602018-07-12 15:39:23 -0400138 goto cleanup_fail;
139 }
140
141 midx_size = xsize_t(st.st_size);
142
143 if (midx_size < MIDX_MIN_SIZE) {
Taylor Blau60980ae2021-10-26 17:01:21 -0400144 error(_("multi-pack-index file %s is too small"), midx_name.buf);
Derrick Stolee4d805602018-07-12 15:39:23 -0400145 goto cleanup_fail;
146 }
147
Taylor Blau60980ae2021-10-26 17:01:21 -0400148 strbuf_release(&midx_name);
Derrick Stolee4d805602018-07-12 15:39:23 -0400149
150 midx_map = xmmap(NULL, midx_size, PROT_READ, MAP_PRIVATE, fd, 0);
Derrick Stolee6c7ff7c2020-04-24 09:17:16 -0400151 close(fd);
Derrick Stolee4d805602018-07-12 15:39:23 -0400152
Denton Liu577314c2019-04-03 15:00:05 -0700153 FLEX_ALLOC_STR(m, object_dir, object_dir);
Derrick Stolee4d805602018-07-12 15:39:23 -0400154 m->data = midx_map;
155 m->data_len = midx_size;
Derrick Stolee2cf489a2018-08-20 16:51:55 +0000156 m->local = local;
Derrick Stolee4d805602018-07-12 15:39:23 -0400157
158 m->signature = get_be32(m->data);
Derrick Stolee53ad0402018-09-13 11:02:15 -0700159 if (m->signature != MIDX_SIGNATURE)
160 die(_("multi-pack-index signature 0x%08x does not match signature 0x%08x"),
Derrick Stolee4d805602018-07-12 15:39:23 -0400161 m->signature, MIDX_SIGNATURE);
Derrick Stolee4d805602018-07-12 15:39:23 -0400162
163 m->version = m->data[MIDX_BYTE_FILE_VERSION];
Derrick Stolee53ad0402018-09-13 11:02:15 -0700164 if (m->version != MIDX_VERSION)
165 die(_("multi-pack-index version %d not recognized"),
Derrick Stolee4d805602018-07-12 15:39:23 -0400166 m->version);
Derrick Stolee4d805602018-07-12 15:39:23 -0400167
168 hash_version = m->data[MIDX_BYTE_HASH_VERSION];
Taylor Blaud9fef9d2022-05-20 19:17:41 -0400169 if (hash_version != oid_version(the_hash_algo)) {
Derrick Stoleed9607542020-08-17 14:04:48 +0000170 error(_("multi-pack-index hash version %u does not match version %u"),
Taylor Blaud9fef9d2022-05-20 19:17:41 -0400171 hash_version, oid_version(the_hash_algo));
Derrick Stoleed9607542020-08-17 14:04:48 +0000172 goto cleanup_fail;
173 }
brian m. carlsonaaa95df2019-08-18 20:04:27 +0000174 m->hash_len = the_hash_algo->rawsz;
Derrick Stolee4d805602018-07-12 15:39:23 -0400175
176 m->num_chunks = m->data[MIDX_BYTE_NUM_CHUNKS];
177
178 m->num_packs = get_be32(m->data + MIDX_BYTE_NUM_PACKS);
179
Derrick Stolee6ab3b8b2021-02-18 14:07:36 +0000180 cf = init_chunkfile(NULL);
Derrick Stolee32f3c542018-07-12 15:39:27 -0400181
Derrick Stolee6ab3b8b2021-02-18 14:07:36 +0000182 if (read_table_of_contents(cf, m->data, midx_size,
Jeff Kingc9b9fef2023-10-09 17:05:23 -0400183 MIDX_HEADER_SIZE, m->num_chunks,
184 MIDX_CHUNK_ALIGNMENT))
Derrick Stolee6ab3b8b2021-02-18 14:07:36 +0000185 goto cleanup_fail;
Derrick Stoleed3f8e212018-09-13 11:02:16 -0700186
Jeff King72a9a082023-10-09 17:05:14 -0400187 if (pair_chunk(cf, MIDX_CHUNKID_PACKNAMES, &m->chunk_pack_names, &m->chunk_pack_names_len))
Jeff Kinge3c96002023-10-09 16:59:19 -0400188 die(_("multi-pack-index required pack-name chunk missing or corrupted"));
189 if (read_chunk(cf, MIDX_CHUNKID_OIDFANOUT, midx_read_oid_fanout, m))
190 die(_("multi-pack-index required OID fanout chunk missing or corrupted"));
Jeff Kingfc926562023-10-09 17:02:03 -0400191 if (read_chunk(cf, MIDX_CHUNKID_OIDLOOKUP, midx_read_oid_lookup, m))
Jeff Kinge3c96002023-10-09 16:59:19 -0400192 die(_("multi-pack-index required OID lookup chunk missing or corrupted"));
Jeff King09248692023-10-09 17:05:27 -0400193 if (read_chunk(cf, MIDX_CHUNKID_OBJECTOFFSETS, midx_read_object_offsets, m))
Jeff Kinge3c96002023-10-09 16:59:19 -0400194 die(_("multi-pack-index required object offsets chunk missing or corrupted"));
Derrick Stolee32f3c542018-07-12 15:39:27 -0400195
Jeff King2abd56e2023-10-09 17:05:30 -0400196 pair_chunk(cf, MIDX_CHUNKID_LARGEOFFSETS, &m->chunk_large_offsets,
197 &m->chunk_large_offsets_len);
Taylor Blau5f5ccd92023-12-14 17:23:51 -0500198 pair_chunk(cf, MIDX_CHUNKID_BITMAPPEDPACKS,
199 (const unsigned char **)&m->chunk_bitmapped_packs,
200 &m->chunk_bitmapped_packs_len);
Derrick Stolee6ab3b8b2021-02-18 14:07:36 +0000201
Taylor Blau7f514b72022-01-25 17:41:17 -0500202 if (git_env_bool("GIT_TEST_MIDX_READ_RIDX", 1))
Jeff Kingc0fe9b22023-10-09 17:05:33 -0400203 pair_chunk(cf, MIDX_CHUNKID_REVINDEX, &m->chunk_revindex,
204 &m->chunk_revindex_len);
Taylor Blau7f514b72022-01-25 17:41:17 -0500205
René Scharfeca56dad2021-03-13 17:17:22 +0100206 CALLOC_ARRAY(m->pack_names, m->num_packs);
207 CALLOC_ARRAY(m->packs, m->num_packs);
Derrick Stolee32275652018-07-12 15:39:28 -0400208
209 cur_pack_name = (const char *)m->chunk_pack_names;
210 for (i = 0; i < m->num_packs; i++) {
Jeff King72a9a082023-10-09 17:05:14 -0400211 const char *end;
212 size_t avail = m->chunk_pack_names_len -
213 (cur_pack_name - (const char *)m->chunk_pack_names);
214
Derrick Stolee32275652018-07-12 15:39:28 -0400215 m->pack_names[i] = cur_pack_name;
216
Jeff King72a9a082023-10-09 17:05:14 -0400217 end = memchr(cur_pack_name, '\0', avail);
218 if (!end)
219 die(_("multi-pack-index pack-name chunk is too short"));
220 cur_pack_name = end + 1;
Derrick Stolee32275652018-07-12 15:39:28 -0400221
Derrick Stolee8e72a3c2018-09-13 11:02:18 -0700222 if (i && strcmp(m->pack_names[i], m->pack_names[i - 1]) <= 0)
223 die(_("multi-pack-index pack names out of order: '%s' before '%s'"),
Derrick Stolee32275652018-07-12 15:39:28 -0400224 m->pack_names[i - 1],
225 m->pack_names[i]);
Derrick Stolee32275652018-07-12 15:39:28 -0400226 }
227
Jeff Hostetlerd8292232019-03-21 12:36:13 -0700228 trace2_data_intmax("midx", the_repository, "load/num_packs", m->num_packs);
229 trace2_data_intmax("midx", the_repository, "load/num_objects", m->num_objects);
230
Taylor Blau692305e2021-10-20 23:39:47 -0400231 free_chunkfile(cf);
Derrick Stolee4d805602018-07-12 15:39:23 -0400232 return m;
233
234cleanup_fail:
235 free(m);
Taylor Blau60980ae2021-10-26 17:01:21 -0400236 strbuf_release(&midx_name);
Taylor Blau692305e2021-10-20 23:39:47 -0400237 free_chunkfile(cf);
Derrick Stolee4d805602018-07-12 15:39:23 -0400238 if (midx_map)
239 munmap(midx_map, midx_size);
240 if (0 <= fd)
241 close(fd);
242 return NULL;
243}
244
Derrick Stolee1dcd9f22018-10-12 10:34:19 -0700245void close_midx(struct multi_pack_index *m)
Derrick Stoleea40498a2018-07-12 15:39:36 -0400246{
247 uint32_t i;
Derrick Stolee1dcd9f22018-10-12 10:34:19 -0700248
249 if (!m)
250 return;
251
Taylor Blau9bb6c2e2021-08-31 16:52:07 -0400252 close_midx(m->next);
253
Derrick Stoleea40498a2018-07-12 15:39:36 -0400254 munmap((unsigned char *)m->data, m->data_len);
Derrick Stoleea40498a2018-07-12 15:39:36 -0400255
256 for (i = 0; i < m->num_packs; i++) {
Derrick Stoleeaf96fe32019-04-29 09:18:56 -0700257 if (m->packs[i])
258 m->packs[i]->multi_pack_index = 0;
Derrick Stoleea40498a2018-07-12 15:39:36 -0400259 }
260 FREE_AND_NULL(m->packs);
261 FREE_AND_NULL(m->pack_names);
Taylor Blau9bb6c2e2021-08-31 16:52:07 -0400262 free(m);
Derrick Stoleea40498a2018-07-12 15:39:36 -0400263}
264
Derrick Stolee64404a22019-04-29 09:18:55 -0700265int prepare_midx_pack(struct repository *r, struct multi_pack_index *m, uint32_t pack_int_id)
Derrick Stolee3715a632018-07-12 15:39:34 -0400266{
267 struct strbuf pack_name = STRBUF_INIT;
Derrick Stoleeaf96fe32019-04-29 09:18:56 -0700268 struct packed_git *p;
Derrick Stolee3715a632018-07-12 15:39:34 -0400269
270 if (pack_int_id >= m->num_packs)
Jean-Noël Avilad355e462018-11-28 22:43:09 +0100271 die(_("bad pack-int-id: %u (%u total packs)"),
Derrick Stoleecc6af732018-09-13 11:02:25 -0700272 pack_int_id, m->num_packs);
Derrick Stolee3715a632018-07-12 15:39:34 -0400273
274 if (m->packs[pack_int_id])
275 return 0;
276
277 strbuf_addf(&pack_name, "%s/pack/%s", m->object_dir,
278 m->pack_names[pack_int_id]);
279
Derrick Stoleeaf96fe32019-04-29 09:18:56 -0700280 p = add_packed_git(pack_name.buf, pack_name.len, m->local);
Derrick Stolee3715a632018-07-12 15:39:34 -0400281 strbuf_release(&pack_name);
Derrick Stoleeaf96fe32019-04-29 09:18:56 -0700282
283 if (!p)
284 return 1;
285
286 p->multi_pack_index = 1;
287 m->packs[pack_int_id] = p;
288 install_packed_git(r, p);
289 list_add_tail(&p->mru, &r->objects->packed_git_mru);
290
291 return 0;
Derrick Stolee3715a632018-07-12 15:39:34 -0400292}
293
Taylor Blau5f5ccd92023-12-14 17:23:51 -0500294int nth_bitmapped_pack(struct repository *r, struct multi_pack_index *m,
295 struct bitmapped_pack *bp, uint32_t pack_int_id)
296{
297 if (!m->chunk_bitmapped_packs)
298 return error(_("MIDX does not contain the BTMP chunk"));
299
300 if (prepare_midx_pack(r, m, pack_int_id))
301 return error(_("could not load bitmapped pack %"PRIu32), pack_int_id);
302
303 bp->p = m->packs[pack_int_id];
304 bp->bitmap_pos = get_be32((char *)m->chunk_bitmapped_packs +
305 MIDX_CHUNK_BITMAPPED_PACKS_WIDTH * pack_int_id);
306 bp->bitmap_nr = get_be32((char *)m->chunk_bitmapped_packs +
307 MIDX_CHUNK_BITMAPPED_PACKS_WIDTH * pack_int_id +
308 sizeof(uint32_t));
309 bp->pack_int_id = pack_int_id;
310
311 return 0;
312}
313
Derrick Stolee3715a632018-07-12 15:39:34 -0400314int bsearch_midx(const struct object_id *oid, struct multi_pack_index *m, uint32_t *result)
315{
316 return bsearch_hash(oid->hash, m->chunk_oid_fanout, m->chunk_oid_lookup,
brian m. carlsonaaa95df2019-08-18 20:04:27 +0000317 the_hash_algo->rawsz, result);
Derrick Stolee3715a632018-07-12 15:39:34 -0400318}
319
Derrick Stolee8aac67a2018-07-12 15:39:35 -0400320struct object_id *nth_midxed_object_oid(struct object_id *oid,
321 struct multi_pack_index *m,
322 uint32_t n)
323{
324 if (n >= m->num_objects)
325 return NULL;
326
Taylor Blauc2b24ed2023-07-12 19:37:38 -0400327 oidread(oid, m->chunk_oid_lookup + st_mult(m->hash_len, n));
Derrick Stolee8aac67a2018-07-12 15:39:35 -0400328 return oid;
329}
330
Taylor Blau62f2c1b2021-03-30 11:04:20 -0400331off_t nth_midxed_offset(struct multi_pack_index *m, uint32_t pos)
Derrick Stolee3715a632018-07-12 15:39:34 -0400332{
333 const unsigned char *offset_data;
334 uint32_t offset32;
335
Derrick Stolee329fac32021-02-18 14:07:37 +0000336 offset_data = m->chunk_object_offsets + (off_t)pos * MIDX_CHUNK_OFFSET_WIDTH;
Derrick Stolee3715a632018-07-12 15:39:34 -0400337 offset32 = get_be32(offset_data + sizeof(uint32_t));
338
339 if (m->chunk_large_offsets && offset32 & MIDX_LARGE_OFFSET_NEEDED) {
Derrick Stoleed8ac9ee2018-09-13 11:02:23 -0700340 if (sizeof(off_t) < sizeof(uint64_t))
Derrick Stolee3715a632018-07-12 15:39:34 -0400341 die(_("multi-pack-index stores a 64-bit offset, but off_t is too small"));
342
343 offset32 ^= MIDX_LARGE_OFFSET_NEEDED;
Jeff King2abd56e2023-10-09 17:05:30 -0400344 if (offset32 >= m->chunk_large_offsets_len / sizeof(uint64_t))
345 die(_("multi-pack-index large offset out of bounds"));
346 return get_be64(m->chunk_large_offsets + sizeof(uint64_t) * offset32);
Derrick Stolee3715a632018-07-12 15:39:34 -0400347 }
348
349 return offset32;
350}
351
Taylor Blau62f2c1b2021-03-30 11:04:20 -0400352uint32_t nth_midxed_pack_int_id(struct multi_pack_index *m, uint32_t pos)
Derrick Stolee3715a632018-07-12 15:39:34 -0400353{
Derrick Stolee329fac32021-02-18 14:07:37 +0000354 return get_be32(m->chunk_object_offsets +
355 (off_t)pos * MIDX_CHUNK_OFFSET_WIDTH);
Derrick Stolee3715a632018-07-12 15:39:34 -0400356}
357
Taylor Blaua8437f32022-10-12 18:01:48 -0400358int fill_midx_entry(struct repository *r,
René Scharfe893b5632021-09-11 22:39:31 +0200359 const struct object_id *oid,
360 struct pack_entry *e,
361 struct multi_pack_index *m)
Derrick Stolee3715a632018-07-12 15:39:34 -0400362{
René Scharfe893b5632021-09-11 22:39:31 +0200363 uint32_t pos;
Derrick Stolee3715a632018-07-12 15:39:34 -0400364 uint32_t pack_int_id;
365 struct packed_git *p;
366
René Scharfe893b5632021-09-11 22:39:31 +0200367 if (!bsearch_midx(oid, m, &pos))
368 return 0;
369
Derrick Stolee3715a632018-07-12 15:39:34 -0400370 if (pos >= m->num_objects)
371 return 0;
372
373 pack_int_id = nth_midxed_pack_int_id(m, pos);
374
Derrick Stolee64404a22019-04-29 09:18:55 -0700375 if (prepare_midx_pack(r, m, pack_int_id))
Taylor Blau506ec2f2020-11-25 12:17:33 -0500376 return 0;
Derrick Stolee3715a632018-07-12 15:39:34 -0400377 p = m->packs[pack_int_id];
378
379 /*
380 * We are about to tell the caller where they can locate the
381 * requested object. We better make sure the packfile is
382 * still here and can be accessed before supplying that
383 * answer, as it may have been deleted since the MIDX was
384 * loaded!
385 */
386 if (!is_pack_valid(p))
387 return 0;
388
René Scharfe09ef6612021-09-11 22:43:26 +0200389 if (oidset_size(&p->bad_objects) &&
390 oidset_contains(&p->bad_objects, oid))
391 return 0;
Derrick Stoleec39b02a2018-08-20 16:51:57 +0000392
Derrick Stolee3715a632018-07-12 15:39:34 -0400393 e->offset = nth_midxed_offset(m, pos);
394 e->p = p;
395
396 return 1;
397}
398
Jeff King013fd7a2019-04-05 14:06:04 -0400399/* Match "foo.idx" against either "foo.pack" _or_ "foo.idx". */
400static int cmp_idx_or_pack_name(const char *idx_or_pack_name,
401 const char *idx_name)
402{
403 /* Skip past any initial matching prefix. */
404 while (*idx_name && *idx_name == *idx_or_pack_name) {
405 idx_name++;
406 idx_or_pack_name++;
407 }
408
409 /*
410 * If we didn't match completely, we may have matched "pack-1234." and
411 * be left with "idx" and "pack" respectively, which is also OK. We do
412 * not have to check for "idx" and "idx", because that would have been
413 * a complete match (and in that case these strcmps will be false, but
414 * we'll correctly return 0 from the final strcmp() below.
415 *
416 * Technically this matches "fooidx" and "foopack", but we'd never have
417 * such names in the first place.
418 */
419 if (!strcmp(idx_name, "idx") && !strcmp(idx_or_pack_name, "pack"))
420 return 0;
421
422 /*
423 * This not only checks for a complete match, but also orders based on
424 * the first non-identical character, which means our ordering will
425 * match a raw strcmp(). That makes it OK to use this to binary search
426 * a naively-sorted list.
427 */
428 return strcmp(idx_or_pack_name, idx_name);
429}
430
Taylor Blau307d75b2023-12-14 17:23:54 -0500431int midx_locate_pack(struct multi_pack_index *m, const char *idx_or_pack_name,
432 uint32_t *pos)
Derrick Stoleea40498a2018-07-12 15:39:36 -0400433{
434 uint32_t first = 0, last = m->num_packs;
435
436 while (first < last) {
437 uint32_t mid = first + (last - first) / 2;
438 const char *current;
439 int cmp;
440
441 current = m->pack_names[mid];
Jeff King013fd7a2019-04-05 14:06:04 -0400442 cmp = cmp_idx_or_pack_name(idx_or_pack_name, current);
Taylor Blau307d75b2023-12-14 17:23:54 -0500443 if (!cmp) {
444 if (pos)
445 *pos = mid;
Derrick Stoleea40498a2018-07-12 15:39:36 -0400446 return 1;
Taylor Blau307d75b2023-12-14 17:23:54 -0500447 }
Derrick Stoleea40498a2018-07-12 15:39:36 -0400448 if (cmp > 0) {
449 first = mid + 1;
450 continue;
451 }
452 last = mid;
453 }
454
455 return 0;
456}
457
Taylor Blau307d75b2023-12-14 17:23:54 -0500458int midx_contains_pack(struct multi_pack_index *m, const char *idx_or_pack_name)
459{
460 return midx_locate_pack(m, idx_or_pack_name, NULL);
461}
462
Derrick Stolee2cf489a2018-08-20 16:51:55 +0000463int prepare_multi_pack_index_one(struct repository *r, const char *object_dir, int local)
Derrick Stoleec4d25222018-07-12 15:39:33 -0400464{
Derrick Stolee29e20162018-08-20 16:52:00 +0000465 struct multi_pack_index *m;
Derrick Stoleec4d25222018-07-12 15:39:33 -0400466 struct multi_pack_index *m_search;
Derrick Stoleec4d25222018-07-12 15:39:33 -0400467
Derrick Stolee18e449f2020-09-25 12:33:34 +0000468 prepare_repo_settings(r);
469 if (!r->settings.core_multi_pack_index)
Derrick Stoleec4d25222018-07-12 15:39:33 -0400470 return 0;
471
Derrick Stolee29e20162018-08-20 16:52:00 +0000472 for (m_search = r->objects->multi_pack_index; m_search; m_search = m_search->next)
Derrick Stoleec4d25222018-07-12 15:39:33 -0400473 if (!strcmp(object_dir, m_search->object_dir))
474 return 1;
475
Derrick Stolee29e20162018-08-20 16:52:00 +0000476 m = load_multi_pack_index(object_dir, local);
Derrick Stoleec4d25222018-07-12 15:39:33 -0400477
Derrick Stolee29e20162018-08-20 16:52:00 +0000478 if (m) {
Taylor Blau59552fb2020-08-28 16:22:13 -0400479 struct multi_pack_index *mp = r->objects->multi_pack_index;
480 if (mp) {
481 m->next = mp->next;
482 mp->next = m;
483 } else
484 r->objects->multi_pack_index = m;
Derrick Stoleec4d25222018-07-12 15:39:33 -0400485 return 1;
486 }
487
488 return 0;
489}
490
Derrick Stoleefc59e742018-07-12 15:39:22 -0400491static size_t write_midx_header(struct hashfile *f,
492 unsigned char num_chunks,
493 uint32_t num_packs)
494{
Derrick Stoleefc59e742018-07-12 15:39:22 -0400495 hashwrite_be32(f, MIDX_SIGNATURE);
René Scharfe014f1442020-09-06 10:59:02 +0200496 hashwrite_u8(f, MIDX_VERSION);
Taylor Blaud9fef9d2022-05-20 19:17:41 -0400497 hashwrite_u8(f, oid_version(the_hash_algo));
René Scharfe014f1442020-09-06 10:59:02 +0200498 hashwrite_u8(f, num_chunks);
499 hashwrite_u8(f, 0); /* unused */
Derrick Stoleefc59e742018-07-12 15:39:22 -0400500 hashwrite_be32(f, num_packs);
501
502 return MIDX_HEADER_SIZE;
503}
504
Taylor Blau5f5ccd92023-12-14 17:23:51 -0500505#define BITMAP_POS_UNKNOWN (~((uint32_t)0))
506
Derrick Stoleed01bf2e2019-06-10 16:35:24 -0700507struct pack_info {
508 uint32_t orig_pack_int_id;
509 char *pack_name;
510 struct packed_git *p;
Taylor Blau5f5ccd92023-12-14 17:23:51 -0500511
512 uint32_t bitmap_pos;
513 uint32_t bitmap_nr;
514
Derrick Stolee19575c72019-06-10 16:35:25 -0700515 unsigned expired : 1;
Derrick Stoleed01bf2e2019-06-10 16:35:24 -0700516};
517
Taylor Blaufba68182023-12-14 17:23:48 -0500518static void fill_pack_info(struct pack_info *info,
519 struct packed_git *p, const char *pack_name,
520 uint32_t orig_pack_int_id)
521{
522 memset(info, 0, sizeof(struct pack_info));
523
524 info->orig_pack_int_id = orig_pack_int_id;
525 info->pack_name = xstrdup(pack_name);
526 info->p = p;
Taylor Blau5f5ccd92023-12-14 17:23:51 -0500527 info->bitmap_pos = BITMAP_POS_UNKNOWN;
Taylor Blaufba68182023-12-14 17:23:48 -0500528}
529
Derrick Stoleed01bf2e2019-06-10 16:35:24 -0700530static int pack_info_compare(const void *_a, const void *_b)
531{
532 struct pack_info *a = (struct pack_info *)_a;
533 struct pack_info *b = (struct pack_info *)_b;
534 return strcmp(a->pack_name, b->pack_name);
535}
536
Taylor Blau9218c6a2021-03-30 11:04:11 -0400537static int idx_or_pack_name_cmp(const void *_va, const void *_vb)
538{
539 const char *pack_name = _va;
540 const struct pack_info *compar = _vb;
541
542 return cmp_idx_or_pack_name(pack_name, compar->pack_name);
543}
544
Derrick Stolee577dc492021-02-18 14:07:26 +0000545struct write_midx_context {
Derrick Stoleed01bf2e2019-06-10 16:35:24 -0700546 struct pack_info *info;
Taylor Blaucc381272023-07-12 19:37:44 -0400547 size_t nr;
548 size_t alloc;
Derrick Stoleea40498a2018-07-12 15:39:36 -0400549 struct multi_pack_index *m;
William Baker840cef02019-10-21 18:39:59 +0000550 struct progress *progress;
551 unsigned pack_paths_checked;
Derrick Stolee31bda9a2021-02-18 14:07:28 +0000552
553 struct pack_midx_entry *entries;
Taylor Blaucc381272023-07-12 19:37:44 -0400554 size_t entries_nr;
Derrick Stolee7a3ada12021-02-18 14:07:29 +0000555
556 uint32_t *pack_perm;
Taylor Blau38ff7ca2021-03-30 11:04:32 -0400557 uint32_t *pack_order;
Derrick Stolee7a3ada12021-02-18 14:07:29 +0000558 unsigned large_offsets_needed:1;
Derrick Stolee980f5252021-02-18 14:07:30 +0000559 uint32_t num_large_offsets;
Taylor Blau9218c6a2021-03-30 11:04:11 -0400560
561 int preferred_pack_idx;
Taylor Blau56d863e2021-09-28 21:55:01 -0400562
563 struct string_list *to_include;
Derrick Stolee396f2572018-07-12 15:39:26 -0400564};
565
566static void add_pack_to_midx(const char *full_path, size_t full_path_len,
567 const char *file_name, void *data)
568{
Derrick Stolee577dc492021-02-18 14:07:26 +0000569 struct write_midx_context *ctx = data;
Taylor Blaufba68182023-12-14 17:23:48 -0500570 struct packed_git *p;
Derrick Stolee396f2572018-07-12 15:39:26 -0400571
572 if (ends_with(file_name, ".idx")) {
Derrick Stolee577dc492021-02-18 14:07:26 +0000573 display_progress(ctx->progress, ++ctx->pack_paths_checked);
Taylor Blau56d863e2021-09-28 21:55:01 -0400574 /*
575 * Note that at most one of ctx->m and ctx->to_include are set,
576 * so we are testing midx_contains_pack() and
577 * string_list_has_string() independently (guarded by the
578 * appropriate NULL checks).
579 *
580 * We could support passing to_include while reusing an existing
581 * MIDX, but don't currently since the reuse process drags
582 * forward all packs from an existing MIDX (without checking
583 * whether or not they appear in the to_include list).
584 *
585 * If we added support for that, these next two conditional
586 * should be performed independently (likely checking
587 * to_include before the existing MIDX).
588 */
Derrick Stolee577dc492021-02-18 14:07:26 +0000589 if (ctx->m && midx_contains_pack(ctx->m, file_name))
Derrick Stoleea40498a2018-07-12 15:39:36 -0400590 return;
Taylor Blau56d863e2021-09-28 21:55:01 -0400591 else if (ctx->to_include &&
592 !string_list_has_string(ctx->to_include, file_name))
593 return;
Derrick Stoleea40498a2018-07-12 15:39:36 -0400594
Derrick Stolee577dc492021-02-18 14:07:26 +0000595 ALLOC_GROW(ctx->info, ctx->nr + 1, ctx->alloc);
Derrick Stolee396f2572018-07-12 15:39:26 -0400596
Taylor Blaufba68182023-12-14 17:23:48 -0500597 p = add_packed_git(full_path, full_path_len, 0);
598 if (!p) {
Derrick Stolee396f2572018-07-12 15:39:26 -0400599 warning(_("failed to add packfile '%s'"),
600 full_path);
601 return;
602 }
603
Taylor Blaufba68182023-12-14 17:23:48 -0500604 if (open_pack_index(p)) {
Derrick Stoleefe1ed562018-07-12 15:39:29 -0400605 warning(_("failed to open pack-index '%s'"),
606 full_path);
Taylor Blaufba68182023-12-14 17:23:48 -0500607 close_pack(p);
608 free(p);
Derrick Stoleefe1ed562018-07-12 15:39:29 -0400609 return;
610 }
611
Taylor Blaufba68182023-12-14 17:23:48 -0500612 fill_pack_info(&ctx->info[ctx->nr], p, file_name, ctx->nr);
Derrick Stolee577dc492021-02-18 14:07:26 +0000613 ctx->nr++;
Derrick Stolee396f2572018-07-12 15:39:26 -0400614 }
615}
616
Derrick Stoleefe1ed562018-07-12 15:39:29 -0400617struct pack_midx_entry {
618 struct object_id oid;
619 uint32_t pack_int_id;
620 time_t pack_mtime;
621 uint64_t offset;
Taylor Blau9218c6a2021-03-30 11:04:11 -0400622 unsigned preferred : 1;
Derrick Stoleefe1ed562018-07-12 15:39:29 -0400623};
624
625static int midx_oid_compare(const void *_a, const void *_b)
626{
627 const struct pack_midx_entry *a = (const struct pack_midx_entry *)_a;
628 const struct pack_midx_entry *b = (const struct pack_midx_entry *)_b;
629 int cmp = oidcmp(&a->oid, &b->oid);
630
631 if (cmp)
632 return cmp;
633
Taylor Blau9218c6a2021-03-30 11:04:11 -0400634 /* Sort objects in a preferred pack first when multiple copies exist. */
635 if (a->preferred > b->preferred)
636 return -1;
637 if (a->preferred < b->preferred)
638 return 1;
639
Derrick Stoleefe1ed562018-07-12 15:39:29 -0400640 if (a->pack_mtime > b->pack_mtime)
641 return -1;
642 else if (a->pack_mtime < b->pack_mtime)
643 return 1;
644
645 return a->pack_int_id - b->pack_int_id;
646}
647
Derrick Stoleea40498a2018-07-12 15:39:36 -0400648static int nth_midxed_pack_midx_entry(struct multi_pack_index *m,
Derrick Stoleea40498a2018-07-12 15:39:36 -0400649 struct pack_midx_entry *e,
650 uint32_t pos)
651{
652 if (pos >= m->num_objects)
653 return 1;
654
655 nth_midxed_object_oid(&e->oid, m, pos);
Derrick Stoleed01bf2e2019-06-10 16:35:24 -0700656 e->pack_int_id = nth_midxed_pack_int_id(m, pos);
Derrick Stoleea40498a2018-07-12 15:39:36 -0400657 e->offset = nth_midxed_offset(m, pos);
658
659 /* consider objects in midx to be from "old" packs */
660 e->pack_mtime = 0;
661 return 0;
662}
663
Derrick Stoleefe1ed562018-07-12 15:39:29 -0400664static void fill_pack_entry(uint32_t pack_int_id,
665 struct packed_git *p,
666 uint32_t cur_object,
Taylor Blau9218c6a2021-03-30 11:04:11 -0400667 struct pack_midx_entry *entry,
668 int preferred)
Derrick Stoleefe1ed562018-07-12 15:39:29 -0400669{
Jeff King07636712020-02-23 23:27:36 -0500670 if (nth_packed_object_id(&entry->oid, p, cur_object) < 0)
Derrick Stoleefe1ed562018-07-12 15:39:29 -0400671 die(_("failed to locate object %d in packfile"), cur_object);
672
673 entry->pack_int_id = pack_int_id;
674 entry->pack_mtime = p->mtime;
675
676 entry->offset = nth_packed_object_offset(p, cur_object);
Taylor Blau9218c6a2021-03-30 11:04:11 -0400677 entry->preferred = !!preferred;
Derrick Stoleefe1ed562018-07-12 15:39:29 -0400678}
679
Taylor Blau989d9cb2022-08-22 15:50:38 -0400680struct midx_fanout {
681 struct pack_midx_entry *entries;
Taylor Blaue6c71f22023-07-12 19:37:36 -0400682 size_t nr, alloc;
Taylor Blau989d9cb2022-08-22 15:50:38 -0400683};
684
Taylor Blaue6c71f22023-07-12 19:37:36 -0400685static void midx_fanout_grow(struct midx_fanout *fanout, size_t nr)
Taylor Blau989d9cb2022-08-22 15:50:38 -0400686{
Taylor Blaue6c71f22023-07-12 19:37:36 -0400687 if (nr < fanout->nr)
688 BUG("negative growth in midx_fanout_grow() (%"PRIuMAX" < %"PRIuMAX")",
689 (uintmax_t)nr, (uintmax_t)fanout->nr);
Taylor Blau989d9cb2022-08-22 15:50:38 -0400690 ALLOC_GROW(fanout->entries, nr, fanout->alloc);
691}
692
693static void midx_fanout_sort(struct midx_fanout *fanout)
694{
695 QSORT(fanout->entries, fanout->nr, midx_oid_compare);
696}
697
Taylor Blau852c5302022-08-22 15:50:41 -0400698static void midx_fanout_add_midx_fanout(struct midx_fanout *fanout,
699 struct multi_pack_index *m,
Taylor Blau99e4d082022-08-22 15:50:49 -0400700 uint32_t cur_fanout,
701 int preferred_pack)
Taylor Blau852c5302022-08-22 15:50:41 -0400702{
703 uint32_t start = 0, end;
704 uint32_t cur_object;
705
706 if (cur_fanout)
707 start = ntohl(m->chunk_oid_fanout[cur_fanout - 1]);
708 end = ntohl(m->chunk_oid_fanout[cur_fanout]);
709
710 for (cur_object = start; cur_object < end; cur_object++) {
Taylor Blau99e4d082022-08-22 15:50:49 -0400711 if ((preferred_pack > -1) &&
712 (preferred_pack == nth_midxed_pack_int_id(m, cur_object))) {
713 /*
714 * Objects from preferred packs are added
715 * separately.
716 */
717 continue;
718 }
719
Taylor Blau852c5302022-08-22 15:50:41 -0400720 midx_fanout_grow(fanout, fanout->nr + 1);
721 nth_midxed_pack_midx_entry(m,
722 &fanout->entries[fanout->nr],
723 cur_object);
Taylor Blaucdf517b2022-08-22 15:50:46 -0400724 fanout->entries[fanout->nr].preferred = 0;
Taylor Blau852c5302022-08-22 15:50:41 -0400725 fanout->nr++;
726 }
727}
728
Taylor Blau1d6f4c62022-08-22 15:50:43 -0400729static void midx_fanout_add_pack_fanout(struct midx_fanout *fanout,
730 struct pack_info *info,
731 uint32_t cur_pack,
732 int preferred,
733 uint32_t cur_fanout)
734{
735 struct packed_git *pack = info[cur_pack].p;
736 uint32_t start = 0, end;
737 uint32_t cur_object;
738
739 if (cur_fanout)
740 start = get_pack_fanout(pack, cur_fanout - 1);
741 end = get_pack_fanout(pack, cur_fanout);
742
743 for (cur_object = start; cur_object < end; cur_object++) {
744 midx_fanout_grow(fanout, fanout->nr + 1);
745 fill_pack_entry(cur_pack,
746 info[cur_pack].p,
747 cur_object,
748 &fanout->entries[fanout->nr],
749 preferred);
750 fanout->nr++;
751 }
752}
753
Derrick Stoleefe1ed562018-07-12 15:39:29 -0400754/*
755 * It is possible to artificially get into a state where there are many
756 * duplicate copies of objects. That can create high memory pressure if
757 * we are to create a list of all objects before de-duplication. To reduce
758 * this memory pressure without a significant performance drop, automatically
759 * group objects by the first byte of their object id. Use the IDX fanout
760 * tables to group the data, copy to a local array, then sort.
761 *
762 * Copy only the de-duplicated entries (selected by most-recent modified time
763 * of a packfile containing the object).
764 */
Derrick Stoleea40498a2018-07-12 15:39:36 -0400765static struct pack_midx_entry *get_sorted_entries(struct multi_pack_index *m,
Derrick Stoleed01bf2e2019-06-10 16:35:24 -0700766 struct pack_info *info,
Derrick Stoleefe1ed562018-07-12 15:39:29 -0400767 uint32_t nr_packs,
Taylor Blaucc381272023-07-12 19:37:44 -0400768 size_t *nr_objects,
Taylor Blau9218c6a2021-03-30 11:04:11 -0400769 int preferred_pack)
Derrick Stoleefe1ed562018-07-12 15:39:29 -0400770{
771 uint32_t cur_fanout, cur_pack, cur_object;
Taylor Blaucc381272023-07-12 19:37:44 -0400772 size_t alloc_objects, total_objects = 0;
Taylor Blau989d9cb2022-08-22 15:50:38 -0400773 struct midx_fanout fanout = { 0 };
Derrick Stoleefe1ed562018-07-12 15:39:29 -0400774 struct pack_midx_entry *deduplicated_entries = NULL;
Derrick Stoleea40498a2018-07-12 15:39:36 -0400775 uint32_t start_pack = m ? m->num_packs : 0;
Derrick Stoleefe1ed562018-07-12 15:39:29 -0400776
Derrick Stoleea40498a2018-07-12 15:39:36 -0400777 for (cur_pack = start_pack; cur_pack < nr_packs; cur_pack++)
Taylor Blaucc381272023-07-12 19:37:44 -0400778 total_objects = st_add(total_objects,
779 info[cur_pack].p->num_objects);
Derrick Stoleefe1ed562018-07-12 15:39:29 -0400780
781 /*
782 * As we de-duplicate by fanout value, we expect the fanout
783 * slices to be evenly distributed, with some noise. Hence,
784 * allocate slightly more than one 256th.
785 */
Taylor Blau989d9cb2022-08-22 15:50:38 -0400786 alloc_objects = fanout.alloc = total_objects > 3200 ? total_objects / 200 : 16;
Derrick Stoleefe1ed562018-07-12 15:39:29 -0400787
Taylor Blau989d9cb2022-08-22 15:50:38 -0400788 ALLOC_ARRAY(fanout.entries, fanout.alloc);
Derrick Stoleefe1ed562018-07-12 15:39:29 -0400789 ALLOC_ARRAY(deduplicated_entries, alloc_objects);
790 *nr_objects = 0;
791
792 for (cur_fanout = 0; cur_fanout < 256; cur_fanout++) {
Taylor Blau989d9cb2022-08-22 15:50:38 -0400793 fanout.nr = 0;
Derrick Stoleefe1ed562018-07-12 15:39:29 -0400794
Taylor Blau852c5302022-08-22 15:50:41 -0400795 if (m)
Taylor Blau99e4d082022-08-22 15:50:49 -0400796 midx_fanout_add_midx_fanout(&fanout, m, cur_fanout,
797 preferred_pack);
Derrick Stoleea40498a2018-07-12 15:39:36 -0400798
799 for (cur_pack = start_pack; cur_pack < nr_packs; cur_pack++) {
Taylor Blau9218c6a2021-03-30 11:04:11 -0400800 int preferred = cur_pack == preferred_pack;
Taylor Blau1d6f4c62022-08-22 15:50:43 -0400801 midx_fanout_add_pack_fanout(&fanout,
802 info, cur_pack,
803 preferred, cur_fanout);
Derrick Stoleefe1ed562018-07-12 15:39:29 -0400804 }
805
Taylor Blaucdf517b2022-08-22 15:50:46 -0400806 if (-1 < preferred_pack && preferred_pack < start_pack)
807 midx_fanout_add_pack_fanout(&fanout, info,
808 preferred_pack, 1,
809 cur_fanout);
810
Taylor Blau989d9cb2022-08-22 15:50:38 -0400811 midx_fanout_sort(&fanout);
Derrick Stoleefe1ed562018-07-12 15:39:29 -0400812
813 /*
814 * The batch is now sorted by OID and then mtime (descending).
815 * Take only the first duplicate.
816 */
Taylor Blau989d9cb2022-08-22 15:50:38 -0400817 for (cur_object = 0; cur_object < fanout.nr; cur_object++) {
818 if (cur_object && oideq(&fanout.entries[cur_object - 1].oid,
819 &fanout.entries[cur_object].oid))
Derrick Stoleefe1ed562018-07-12 15:39:29 -0400820 continue;
821
Taylor Blaucc381272023-07-12 19:37:44 -0400822 ALLOC_GROW(deduplicated_entries, st_add(*nr_objects, 1),
823 alloc_objects);
Derrick Stoleefe1ed562018-07-12 15:39:29 -0400824 memcpy(&deduplicated_entries[*nr_objects],
Taylor Blau989d9cb2022-08-22 15:50:38 -0400825 &fanout.entries[cur_object],
Derrick Stoleefe1ed562018-07-12 15:39:29 -0400826 sizeof(struct pack_midx_entry));
827 (*nr_objects)++;
828 }
829 }
830
Taylor Blau989d9cb2022-08-22 15:50:38 -0400831 free(fanout.entries);
Derrick Stoleefe1ed562018-07-12 15:39:29 -0400832 return deduplicated_entries;
833}
834
Derrick Stolee0ccd7132021-02-18 14:07:31 +0000835static int write_midx_pack_names(struct hashfile *f, void *data)
Derrick Stolee32f3c542018-07-12 15:39:27 -0400836{
Derrick Stoleeb4d94142021-02-18 14:07:27 +0000837 struct write_midx_context *ctx = data;
Derrick Stolee32f3c542018-07-12 15:39:27 -0400838 uint32_t i;
839 unsigned char padding[MIDX_CHUNK_ALIGNMENT];
840 size_t written = 0;
841
Derrick Stoleeb4d94142021-02-18 14:07:27 +0000842 for (i = 0; i < ctx->nr; i++) {
Derrick Stolee19575c72019-06-10 16:35:25 -0700843 size_t writelen;
844
Derrick Stoleeb4d94142021-02-18 14:07:27 +0000845 if (ctx->info[i].expired)
Derrick Stolee19575c72019-06-10 16:35:25 -0700846 continue;
Derrick Stolee32f3c542018-07-12 15:39:27 -0400847
Derrick Stoleeb4d94142021-02-18 14:07:27 +0000848 if (i && strcmp(ctx->info[i].pack_name, ctx->info[i - 1].pack_name) <= 0)
Derrick Stolee32f3c542018-07-12 15:39:27 -0400849 BUG("incorrect pack-file order: %s before %s",
Derrick Stoleeb4d94142021-02-18 14:07:27 +0000850 ctx->info[i - 1].pack_name,
851 ctx->info[i].pack_name);
Derrick Stolee32f3c542018-07-12 15:39:27 -0400852
Derrick Stoleeb4d94142021-02-18 14:07:27 +0000853 writelen = strlen(ctx->info[i].pack_name) + 1;
854 hashwrite(f, ctx->info[i].pack_name, writelen);
Derrick Stolee32f3c542018-07-12 15:39:27 -0400855 written += writelen;
856 }
857
858 /* add padding to be aligned */
859 i = MIDX_CHUNK_ALIGNMENT - (written % MIDX_CHUNK_ALIGNMENT);
860 if (i < MIDX_CHUNK_ALIGNMENT) {
861 memset(padding, 0, sizeof(padding));
862 hashwrite(f, padding, i);
Derrick Stolee32f3c542018-07-12 15:39:27 -0400863 }
864
Derrick Stolee0ccd7132021-02-18 14:07:31 +0000865 return 0;
Derrick Stolee32f3c542018-07-12 15:39:27 -0400866}
867
Taylor Blau5f5ccd92023-12-14 17:23:51 -0500868static int write_midx_bitmapped_packs(struct hashfile *f, void *data)
869{
870 struct write_midx_context *ctx = data;
871 size_t i;
872
873 for (i = 0; i < ctx->nr; i++) {
874 struct pack_info *pack = &ctx->info[i];
875 if (pack->expired)
876 continue;
877
878 if (pack->bitmap_pos == BITMAP_POS_UNKNOWN && pack->bitmap_nr)
879 BUG("pack '%s' has no bitmap position, but has %d bitmapped object(s)",
880 pack->pack_name, pack->bitmap_nr);
881
882 hashwrite_be32(f, pack->bitmap_pos);
883 hashwrite_be32(f, pack->bitmap_nr);
884 }
885 return 0;
886}
887
Derrick Stolee0ccd7132021-02-18 14:07:31 +0000888static int write_midx_oid_fanout(struct hashfile *f,
889 void *data)
Derrick Stoleed7cacf22018-07-12 15:39:31 -0400890{
Derrick Stolee31bda9a2021-02-18 14:07:28 +0000891 struct write_midx_context *ctx = data;
892 struct pack_midx_entry *list = ctx->entries;
893 struct pack_midx_entry *last = ctx->entries + ctx->entries_nr;
Derrick Stoleed7cacf22018-07-12 15:39:31 -0400894 uint32_t count = 0;
895 uint32_t i;
896
897 /*
898 * Write the first-level table (the list is sorted,
899 * but we use a 256-entry lookup to be able to avoid
900 * having to do eight extra binary search iterations).
901 */
902 for (i = 0; i < 256; i++) {
903 struct pack_midx_entry *next = list;
904
905 while (next < last && next->oid.hash[0] == i) {
906 count++;
907 next++;
908 }
909
910 hashwrite_be32(f, count);
911 list = next;
912 }
913
Derrick Stolee0ccd7132021-02-18 14:07:31 +0000914 return 0;
Derrick Stoleed7cacf22018-07-12 15:39:31 -0400915}
916
Derrick Stolee0ccd7132021-02-18 14:07:31 +0000917static int write_midx_oid_lookup(struct hashfile *f,
918 void *data)
Derrick Stolee0d5b3a52018-07-12 15:39:30 -0400919{
Derrick Stolee31bda9a2021-02-18 14:07:28 +0000920 struct write_midx_context *ctx = data;
921 unsigned char hash_len = the_hash_algo->rawsz;
922 struct pack_midx_entry *list = ctx->entries;
Derrick Stolee0d5b3a52018-07-12 15:39:30 -0400923 uint32_t i;
Derrick Stolee0d5b3a52018-07-12 15:39:30 -0400924
Derrick Stolee31bda9a2021-02-18 14:07:28 +0000925 for (i = 0; i < ctx->entries_nr; i++) {
Derrick Stolee0d5b3a52018-07-12 15:39:30 -0400926 struct pack_midx_entry *obj = list++;
927
Derrick Stolee31bda9a2021-02-18 14:07:28 +0000928 if (i < ctx->entries_nr - 1) {
Derrick Stolee0d5b3a52018-07-12 15:39:30 -0400929 struct pack_midx_entry *next = list;
930 if (oidcmp(&obj->oid, &next->oid) >= 0)
931 BUG("OIDs not in order: %s >= %s",
932 oid_to_hex(&obj->oid),
933 oid_to_hex(&next->oid));
934 }
935
936 hashwrite(f, obj->oid.hash, (int)hash_len);
Derrick Stolee0d5b3a52018-07-12 15:39:30 -0400937 }
938
Derrick Stolee0ccd7132021-02-18 14:07:31 +0000939 return 0;
Derrick Stolee0d5b3a52018-07-12 15:39:30 -0400940}
941
Derrick Stolee0ccd7132021-02-18 14:07:31 +0000942static int write_midx_object_offsets(struct hashfile *f,
943 void *data)
Derrick Stolee662148c2018-07-12 15:39:32 -0400944{
Derrick Stolee7a3ada12021-02-18 14:07:29 +0000945 struct write_midx_context *ctx = data;
946 struct pack_midx_entry *list = ctx->entries;
Derrick Stolee662148c2018-07-12 15:39:32 -0400947 uint32_t i, nr_large_offset = 0;
Derrick Stolee662148c2018-07-12 15:39:32 -0400948
Derrick Stolee7a3ada12021-02-18 14:07:29 +0000949 for (i = 0; i < ctx->entries_nr; i++) {
Derrick Stolee662148c2018-07-12 15:39:32 -0400950 struct pack_midx_entry *obj = list++;
951
Derrick Stolee7a3ada12021-02-18 14:07:29 +0000952 if (ctx->pack_perm[obj->pack_int_id] == PACK_EXPIRED)
Derrick Stolee19575c72019-06-10 16:35:25 -0700953 BUG("object %s is in an expired pack with int-id %d",
954 oid_to_hex(&obj->oid),
955 obj->pack_int_id);
956
Derrick Stolee7a3ada12021-02-18 14:07:29 +0000957 hashwrite_be32(f, ctx->pack_perm[obj->pack_int_id]);
Derrick Stolee662148c2018-07-12 15:39:32 -0400958
Derrick Stolee7a3ada12021-02-18 14:07:29 +0000959 if (ctx->large_offsets_needed && obj->offset >> 31)
Derrick Stolee662148c2018-07-12 15:39:32 -0400960 hashwrite_be32(f, MIDX_LARGE_OFFSET_NEEDED | nr_large_offset++);
Derrick Stolee7a3ada12021-02-18 14:07:29 +0000961 else if (!ctx->large_offsets_needed && obj->offset >> 32)
Derrick Stolee662148c2018-07-12 15:39:32 -0400962 BUG("object %s requires a large offset (%"PRIx64") but the MIDX is not writing large offsets!",
963 oid_to_hex(&obj->oid),
964 obj->offset);
965 else
966 hashwrite_be32(f, (uint32_t)obj->offset);
Derrick Stolee662148c2018-07-12 15:39:32 -0400967 }
968
Derrick Stolee0ccd7132021-02-18 14:07:31 +0000969 return 0;
Derrick Stolee662148c2018-07-12 15:39:32 -0400970}
971
Derrick Stolee0ccd7132021-02-18 14:07:31 +0000972static int write_midx_large_offsets(struct hashfile *f,
973 void *data)
Derrick Stolee662148c2018-07-12 15:39:32 -0400974{
Derrick Stolee980f5252021-02-18 14:07:30 +0000975 struct write_midx_context *ctx = data;
976 struct pack_midx_entry *list = ctx->entries;
977 struct pack_midx_entry *end = ctx->entries + ctx->entries_nr;
Derrick Stolee980f5252021-02-18 14:07:30 +0000978 uint32_t nr_large_offset = ctx->num_large_offsets;
Derrick Stolee662148c2018-07-12 15:39:32 -0400979
980 while (nr_large_offset) {
Jeff King61b0fcb2018-11-03 22:27:46 -0400981 struct pack_midx_entry *obj;
982 uint64_t offset;
983
984 if (list >= end)
985 BUG("too many large-offset objects");
986
987 obj = list++;
988 offset = obj->offset;
Derrick Stolee662148c2018-07-12 15:39:32 -0400989
990 if (!(offset >> 31))
991 continue;
992
Derrick Stolee0ccd7132021-02-18 14:07:31 +0000993 hashwrite_be64(f, offset);
Derrick Stolee662148c2018-07-12 15:39:32 -0400994
995 nr_large_offset--;
996 }
997
Derrick Stolee0ccd7132021-02-18 14:07:31 +0000998 return 0;
Derrick Stolee662148c2018-07-12 15:39:32 -0400999}
1000
Taylor Blau95e83832022-01-25 17:41:03 -05001001static int write_midx_revindex(struct hashfile *f,
1002 void *data)
1003{
1004 struct write_midx_context *ctx = data;
1005 uint32_t i;
1006
1007 for (i = 0; i < ctx->entries_nr; i++)
1008 hashwrite_be32(f, ctx->pack_order[i]);
1009
1010 return 0;
1011}
1012
Jeff King30077522021-03-30 11:04:36 -04001013struct midx_pack_order_data {
1014 uint32_t nr;
1015 uint32_t pack;
1016 off_t offset;
1017};
1018
1019static int midx_pack_order_cmp(const void *va, const void *vb)
Taylor Blau38ff7ca2021-03-30 11:04:32 -04001020{
Jeff King30077522021-03-30 11:04:36 -04001021 const struct midx_pack_order_data *a = va, *b = vb;
1022 if (a->pack < b->pack)
Taylor Blau38ff7ca2021-03-30 11:04:32 -04001023 return -1;
Jeff King30077522021-03-30 11:04:36 -04001024 else if (a->pack > b->pack)
Taylor Blau38ff7ca2021-03-30 11:04:32 -04001025 return 1;
Jeff King30077522021-03-30 11:04:36 -04001026 else if (a->offset < b->offset)
Taylor Blau38ff7ca2021-03-30 11:04:32 -04001027 return -1;
Jeff King30077522021-03-30 11:04:36 -04001028 else if (a->offset > b->offset)
Taylor Blau38ff7ca2021-03-30 11:04:32 -04001029 return 1;
Jeff King30077522021-03-30 11:04:36 -04001030 else
1031 return 0;
Taylor Blau38ff7ca2021-03-30 11:04:32 -04001032}
1033
1034static uint32_t *midx_pack_order(struct write_midx_context *ctx)
1035{
Jeff King30077522021-03-30 11:04:36 -04001036 struct midx_pack_order_data *data;
Taylor Blau38ff7ca2021-03-30 11:04:32 -04001037 uint32_t *pack_order;
1038 uint32_t i;
1039
Taylor Blau2dcff522022-10-12 18:01:55 -04001040 trace2_region_enter("midx", "midx_pack_order", the_repository);
1041
Jeff King30077522021-03-30 11:04:36 -04001042 ALLOC_ARRAY(data, ctx->entries_nr);
1043 for (i = 0; i < ctx->entries_nr; i++) {
1044 struct pack_midx_entry *e = &ctx->entries[i];
1045 data[i].nr = i;
1046 data[i].pack = ctx->pack_perm[e->pack_int_id];
1047 if (!e->preferred)
1048 data[i].pack |= (1U << 31);
1049 data[i].offset = e->offset;
1050 }
1051
1052 QSORT(data, ctx->entries_nr, midx_pack_order_cmp);
1053
Taylor Blau38ff7ca2021-03-30 11:04:32 -04001054 ALLOC_ARRAY(pack_order, ctx->entries_nr);
Taylor Blau5f5ccd92023-12-14 17:23:51 -05001055 for (i = 0; i < ctx->entries_nr; i++) {
1056 struct pack_midx_entry *e = &ctx->entries[data[i].nr];
1057 struct pack_info *pack = &ctx->info[ctx->pack_perm[e->pack_int_id]];
1058 if (pack->bitmap_pos == BITMAP_POS_UNKNOWN)
1059 pack->bitmap_pos = i;
1060 pack->bitmap_nr++;
Jeff King30077522021-03-30 11:04:36 -04001061 pack_order[i] = data[i].nr;
Taylor Blau5f5ccd92023-12-14 17:23:51 -05001062 }
1063 for (i = 0; i < ctx->nr; i++) {
1064 struct pack_info *pack = &ctx->info[ctx->pack_perm[i]];
1065 if (pack->bitmap_pos == BITMAP_POS_UNKNOWN)
1066 pack->bitmap_pos = 0;
1067 }
Jeff King30077522021-03-30 11:04:36 -04001068 free(data);
Taylor Blau38ff7ca2021-03-30 11:04:32 -04001069
Taylor Blau2dcff522022-10-12 18:01:55 -04001070 trace2_region_leave("midx", "midx_pack_order", the_repository);
1071
Taylor Blau38ff7ca2021-03-30 11:04:32 -04001072 return pack_order;
1073}
1074
1075static void write_midx_reverse_index(char *midx_name, unsigned char *midx_hash,
1076 struct write_midx_context *ctx)
1077{
1078 struct strbuf buf = STRBUF_INIT;
1079 const char *tmp_file;
1080
Taylor Blau2dcff522022-10-12 18:01:55 -04001081 trace2_region_enter("midx", "write_midx_reverse_index", the_repository);
1082
Taylor Blau38ff7ca2021-03-30 11:04:32 -04001083 strbuf_addf(&buf, "%s-%s.rev", midx_name, hash_to_hex(midx_hash));
1084
1085 tmp_file = write_rev_file_order(NULL, ctx->pack_order, ctx->entries_nr,
1086 midx_hash, WRITE_REV);
1087
1088 if (finalize_object_file(tmp_file, buf.buf))
1089 die(_("cannot store reverse index file"));
1090
1091 strbuf_release(&buf);
Taylor Blau2dcff522022-10-12 18:01:55 -04001092
1093 trace2_region_leave("midx", "write_midx_reverse_index", the_repository);
Taylor Blau38ff7ca2021-03-30 11:04:32 -04001094}
1095
Taylor Blau426c00e2021-08-31 16:51:55 -04001096static void clear_midx_files_ext(const char *object_dir, const char *ext,
Taylor Blau38ff7ca2021-03-30 11:04:32 -04001097 unsigned char *keep_hash);
1098
Taylor Blauec1e28e2021-06-23 14:39:12 -04001099static int midx_checksum_valid(struct multi_pack_index *m)
1100{
1101 return hashfile_checksum_valid(m->data, m->data_len);
1102}
1103
Taylor Blauc528e172021-08-31 16:52:24 -04001104static void prepare_midx_packing_data(struct packing_data *pdata,
1105 struct write_midx_context *ctx)
1106{
1107 uint32_t i;
1108
Taylor Blau2dcff522022-10-12 18:01:55 -04001109 trace2_region_enter("midx", "prepare_midx_packing_data", the_repository);
1110
Taylor Blauc528e172021-08-31 16:52:24 -04001111 memset(pdata, 0, sizeof(struct packing_data));
1112 prepare_packing_data(the_repository, pdata);
1113
1114 for (i = 0; i < ctx->entries_nr; i++) {
1115 struct pack_midx_entry *from = &ctx->entries[ctx->pack_order[i]];
1116 struct object_entry *to = packlist_alloc(pdata, &from->oid);
1117
1118 oe_set_in_pack(pdata, to,
1119 ctx->info[ctx->pack_perm[from->pack_int_id]].p);
1120 }
Taylor Blau2dcff522022-10-12 18:01:55 -04001121
1122 trace2_region_leave("midx", "prepare_midx_packing_data", the_repository);
Taylor Blauc528e172021-08-31 16:52:24 -04001123}
1124
1125static int add_ref_to_pending(const char *refname,
1126 const struct object_id *oid,
1127 int flag, void *cb_data)
1128{
1129 struct rev_info *revs = (struct rev_info*)cb_data;
Taylor Blau1dc4f1e2022-10-12 18:01:52 -04001130 struct object_id peeled;
Taylor Blauc528e172021-08-31 16:52:24 -04001131 struct object *object;
1132
1133 if ((flag & REF_ISSYMREF) && (flag & REF_ISBROKEN)) {
1134 warning("symbolic ref is dangling: %s", refname);
1135 return 0;
1136 }
1137
Taylor Blau1dc4f1e2022-10-12 18:01:52 -04001138 if (!peel_iterated_oid(oid, &peeled))
1139 oid = &peeled;
1140
Taylor Blauc528e172021-08-31 16:52:24 -04001141 object = parse_object_or_die(oid, refname);
1142 if (object->type != OBJ_COMMIT)
1143 return 0;
1144
1145 add_pending_object(revs, object, "");
1146 if (bitmap_is_preferred_refname(revs->repo, refname))
1147 object->flags |= NEEDS_BITMAP;
1148 return 0;
1149}
1150
1151struct bitmap_commit_cb {
1152 struct commit **commits;
1153 size_t commits_nr, commits_alloc;
1154
1155 struct write_midx_context *ctx;
1156};
1157
1158static const struct object_id *bitmap_oid_access(size_t index,
1159 const void *_entries)
1160{
1161 const struct pack_midx_entry *entries = _entries;
1162 return &entries[index].oid;
1163}
1164
1165static void bitmap_show_commit(struct commit *commit, void *_data)
1166{
1167 struct bitmap_commit_cb *data = _data;
1168 int pos = oid_pos(&commit->object.oid, data->ctx->entries,
1169 data->ctx->entries_nr,
1170 bitmap_oid_access);
1171 if (pos < 0)
1172 return;
1173
1174 ALLOC_GROW(data->commits, data->commits_nr + 1, data->commits_alloc);
1175 data->commits[data->commits_nr++] = commit;
1176}
1177
Taylor Blau08944d12021-09-28 21:55:07 -04001178static int read_refs_snapshot(const char *refs_snapshot,
1179 struct rev_info *revs)
1180{
1181 struct strbuf buf = STRBUF_INIT;
1182 struct object_id oid;
1183 FILE *f = xfopen(refs_snapshot, "r");
1184
1185 while (strbuf_getline(&buf, f) != EOF) {
1186 struct object *object;
1187 int preferred = 0;
1188 char *hex = buf.buf;
1189 const char *end = NULL;
1190
1191 if (buf.len && *buf.buf == '+') {
1192 preferred = 1;
1193 hex = &buf.buf[1];
1194 }
1195
1196 if (parse_oid_hex(hex, &oid, &end) < 0)
1197 die(_("could not parse line: %s"), buf.buf);
1198 if (*end)
1199 die(_("malformed line: %s"), buf.buf);
1200
1201 object = parse_object_or_die(&oid, NULL);
1202 if (preferred)
1203 object->flags |= NEEDS_BITMAP;
1204
1205 add_pending_object(revs, object, "");
1206 }
1207
1208 fclose(f);
1209 strbuf_release(&buf);
1210 return 0;
1211}
1212
Taylor Blauc528e172021-08-31 16:52:24 -04001213static struct commit **find_commits_for_midx_bitmap(uint32_t *indexed_commits_nr_p,
Taylor Blau08944d12021-09-28 21:55:07 -04001214 const char *refs_snapshot,
Taylor Blauc528e172021-08-31 16:52:24 -04001215 struct write_midx_context *ctx)
1216{
1217 struct rev_info revs;
1218 struct bitmap_commit_cb cb = {0};
1219
Taylor Blau2dcff522022-10-12 18:01:55 -04001220 trace2_region_enter("midx", "find_commits_for_midx_bitmap",
1221 the_repository);
1222
Taylor Blauc528e172021-08-31 16:52:24 -04001223 cb.ctx = ctx;
1224
1225 repo_init_revisions(the_repository, &revs, NULL);
Taylor Blau08944d12021-09-28 21:55:07 -04001226 if (refs_snapshot) {
1227 read_refs_snapshot(refs_snapshot, &revs);
1228 } else {
1229 setup_revisions(0, NULL, &revs, NULL);
1230 for_each_ref(add_ref_to_pending, &revs);
1231 }
Taylor Blauc528e172021-08-31 16:52:24 -04001232
1233 /*
1234 * Skipping promisor objects here is intentional, since it only excludes
1235 * them from the list of reachable commits that we want to select from
1236 * when computing the selection of MIDX'd commits to receive bitmaps.
1237 *
1238 * Reachability bitmaps do require that their objects be closed under
1239 * reachability, but fetching any objects missing from promisors at this
1240 * point is too late. But, if one of those objects can be reached from
1241 * an another object that is included in the bitmap, then we will
1242 * complain later that we don't have reachability closure (and fail
1243 * appropriately).
1244 */
1245 fetch_if_missing = 0;
1246 revs.exclude_promisor_objects = 1;
1247
1248 if (prepare_revision_walk(&revs))
1249 die(_("revision walk setup failed"));
1250
1251 traverse_commit_list(&revs, bitmap_show_commit, NULL, &cb);
1252 if (indexed_commits_nr_p)
1253 *indexed_commits_nr_p = cb.commits_nr;
1254
Ævar Arnfjörð Bjarmason2108fe42022-04-13 22:01:36 +02001255 release_revisions(&revs);
Taylor Blau2dcff522022-10-12 18:01:55 -04001256
1257 trace2_region_leave("midx", "find_commits_for_midx_bitmap",
1258 the_repository);
1259
Taylor Blauc528e172021-08-31 16:52:24 -04001260 return cb.commits;
1261}
1262
Derrick Stolee90b2bb72022-07-19 15:26:05 +00001263static int write_midx_bitmap(const char *midx_name,
1264 const unsigned char *midx_hash,
1265 struct packing_data *pdata,
1266 struct commit **commits,
1267 uint32_t commits_nr,
1268 uint32_t *pack_order,
Taylor Blauc528e172021-08-31 16:52:24 -04001269 unsigned flags)
1270{
Derrick Stolee90b2bb72022-07-19 15:26:05 +00001271 int ret, i;
Taylor Blaucaca3c92021-09-14 18:06:06 -04001272 uint16_t options = 0;
Derrick Stolee90b2bb72022-07-19 15:26:05 +00001273 struct pack_idx_entry **index;
1274 char *bitmap_name = xstrfmt("%s-%s.bitmap", midx_name,
1275 hash_to_hex(midx_hash));
Taylor Blaueb572772022-02-09 14:26:47 -05001276
Taylor Blau2dcff522022-10-12 18:01:55 -04001277 trace2_region_enter("midx", "write_midx_bitmap", the_repository);
1278
Taylor Blaucaca3c92021-09-14 18:06:06 -04001279 if (flags & MIDX_WRITE_BITMAP_HASH_CACHE)
1280 options |= BITMAP_OPT_HASH_CACHE;
1281
Abhradeep Chakraborty76f14b72022-08-14 16:55:09 +00001282 if (flags & MIDX_WRITE_BITMAP_LOOKUP_TABLE)
1283 options |= BITMAP_OPT_LOOKUP_TABLE;
1284
Taylor Blauc528e172021-08-31 16:52:24 -04001285 /*
1286 * Build the MIDX-order index based on pdata.objects (which is already
1287 * in MIDX order; c.f., 'midx_pack_order_cmp()' for the definition of
1288 * this order).
1289 */
Derrick Stolee90b2bb72022-07-19 15:26:05 +00001290 ALLOC_ARRAY(index, pdata->nr_objects);
1291 for (i = 0; i < pdata->nr_objects; i++)
1292 index[i] = &pdata->objects[i].idx;
Taylor Blauc528e172021-08-31 16:52:24 -04001293
1294 bitmap_writer_show_progress(flags & MIDX_PROGRESS);
Derrick Stolee90b2bb72022-07-19 15:26:05 +00001295 bitmap_writer_build_type_index(pdata, index, pdata->nr_objects);
Taylor Blauc528e172021-08-31 16:52:24 -04001296
1297 /*
1298 * bitmap_writer_finish expects objects in lex order, but pack_order
1299 * gives us exactly that. use it directly instead of re-sorting the
1300 * array.
1301 *
1302 * This changes the order of objects in 'index' between
1303 * bitmap_writer_build_type_index and bitmap_writer_finish.
1304 *
1305 * The same re-ordering takes place in the single-pack bitmap code via
1306 * write_idx_file(), which is called by finish_tmp_packfile(), which
1307 * happens between bitmap_writer_build_type_index() and
1308 * bitmap_writer_finish().
1309 */
Derrick Stolee90b2bb72022-07-19 15:26:05 +00001310 for (i = 0; i < pdata->nr_objects; i++)
1311 index[pack_order[i]] = &pdata->objects[i].idx;
Taylor Blauc528e172021-08-31 16:52:24 -04001312
1313 bitmap_writer_select_commits(commits, commits_nr, -1);
Derrick Stolee90b2bb72022-07-19 15:26:05 +00001314 ret = bitmap_writer_build(pdata);
Taylor Blauc528e172021-08-31 16:52:24 -04001315 if (ret < 0)
1316 goto cleanup;
1317
1318 bitmap_writer_set_checksum(midx_hash);
Derrick Stolee90b2bb72022-07-19 15:26:05 +00001319 bitmap_writer_finish(index, pdata->nr_objects, bitmap_name, options);
Taylor Blauc528e172021-08-31 16:52:24 -04001320
1321cleanup:
1322 free(index);
1323 free(bitmap_name);
Taylor Blau2dcff522022-10-12 18:01:55 -04001324
1325 trace2_region_leave("midx", "write_midx_bitmap", the_repository);
1326
Taylor Blauc528e172021-08-31 16:52:24 -04001327 return ret;
1328}
1329
Taylor Blau504131a2021-10-08 17:46:29 -04001330static struct multi_pack_index *lookup_multi_pack_index(struct repository *r,
1331 const char *object_dir)
1332{
Derrick Stoleeeafcc6d2022-04-25 18:27:12 +00001333 struct multi_pack_index *result = NULL;
Taylor Blau504131a2021-10-08 17:46:29 -04001334 struct multi_pack_index *cur;
Derrick Stoleeeafcc6d2022-04-25 18:27:12 +00001335 char *obj_dir_real = real_pathdup(object_dir, 1);
1336 struct strbuf cur_path_real = STRBUF_INIT;
Taylor Blau504131a2021-10-08 17:46:29 -04001337
1338 /* Ensure the given object_dir is local, or a known alternate. */
Derrick Stoleeeafcc6d2022-04-25 18:27:12 +00001339 find_odb(r, obj_dir_real);
Taylor Blau504131a2021-10-08 17:46:29 -04001340
1341 for (cur = get_multi_pack_index(r); cur; cur = cur->next) {
Derrick Stoleeeafcc6d2022-04-25 18:27:12 +00001342 strbuf_realpath(&cur_path_real, cur->object_dir, 1);
1343 if (!strcmp(obj_dir_real, cur_path_real.buf)) {
1344 result = cur;
1345 goto cleanup;
1346 }
Taylor Blau504131a2021-10-08 17:46:29 -04001347 }
1348
Derrick Stoleeeafcc6d2022-04-25 18:27:12 +00001349cleanup:
1350 free(obj_dir_real);
1351 strbuf_release(&cur_path_real);
1352 return result;
Taylor Blau504131a2021-10-08 17:46:29 -04001353}
1354
Taylor Blauf57a7392021-09-01 16:34:01 -04001355static int write_midx_internal(const char *object_dir,
Taylor Blau56d863e2021-09-28 21:55:01 -04001356 struct string_list *packs_to_include,
Taylor Blau9218c6a2021-03-30 11:04:11 -04001357 struct string_list *packs_to_drop,
1358 const char *preferred_pack_name,
Taylor Blau08944d12021-09-28 21:55:07 -04001359 const char *refs_snapshot,
Taylor Blau9218c6a2021-03-30 11:04:11 -04001360 unsigned flags)
Derrick Stoleea3407732018-07-12 15:39:21 -04001361{
Taylor Blau60980ae2021-10-26 17:01:21 -04001362 struct strbuf midx_name = STRBUF_INIT;
Taylor Blau9f191612021-03-30 11:04:17 -04001363 unsigned char midx_hash[GIT_MAX_RAWSZ];
Derrick Stolee396f2572018-07-12 15:39:26 -04001364 uint32_t i;
Derrick Stoleefc59e742018-07-12 15:39:22 -04001365 struct hashfile *f = NULL;
1366 struct lock_file lk;
Derrick Stolee577dc492021-02-18 14:07:26 +00001367 struct write_midx_context ctx = { 0 };
Taylor Blau5f5ccd92023-12-14 17:23:51 -05001368 int bitmapped_packs_concat_len = 0;
Derrick Stoleedba61752019-06-10 16:35:24 -07001369 int pack_name_concat_len = 0;
Derrick Stolee19575c72019-06-10 16:35:25 -07001370 int dropped_packs = 0;
1371 int result = 0;
Derrick Stolee63a8f0e2021-02-18 14:07:33 +00001372 struct chunkfile *cf;
Derrick Stoleefc59e742018-07-12 15:39:22 -04001373
Taylor Blau2dcff522022-10-12 18:01:55 -04001374 trace2_region_enter("midx", "write_midx_internal", the_repository);
1375
Taylor Blau60980ae2021-10-26 17:01:21 -04001376 get_midx_filename(&midx_name, object_dir);
1377 if (safe_create_leading_directories(midx_name.buf))
Derrick Stoleefc59e742018-07-12 15:39:22 -04001378 die_errno(_("unable to create leading directories of %s"),
Taylor Blau60980ae2021-10-26 17:01:21 -04001379 midx_name.buf);
Derrick Stoleefc59e742018-07-12 15:39:22 -04001380
Taylor Blau56d863e2021-09-28 21:55:01 -04001381 if (!packs_to_include) {
1382 /*
1383 * Only reference an existing MIDX when not filtering which
1384 * packs to include, since all packs and objects are copied
1385 * blindly from an existing MIDX if one is present.
1386 */
Taylor Blau504131a2021-10-08 17:46:29 -04001387 ctx.m = lookup_multi_pack_index(the_repository, object_dir);
Taylor Blauf57a7392021-09-01 16:34:01 -04001388 }
Derrick Stoleea40498a2018-07-12 15:39:36 -04001389
Taylor Blauec1e28e2021-06-23 14:39:12 -04001390 if (ctx.m && !midx_checksum_valid(ctx.m)) {
1391 warning(_("ignoring existing multi-pack-index; checksum mismatch"));
1392 ctx.m = NULL;
1393 }
1394
Derrick Stolee577dc492021-02-18 14:07:26 +00001395 ctx.nr = 0;
1396 ctx.alloc = ctx.m ? ctx.m->num_packs : 16;
1397 ctx.info = NULL;
1398 ALLOC_ARRAY(ctx.info, ctx.alloc);
Derrick Stolee396f2572018-07-12 15:39:26 -04001399
Derrick Stolee577dc492021-02-18 14:07:26 +00001400 if (ctx.m) {
1401 for (i = 0; i < ctx.m->num_packs; i++) {
1402 ALLOC_GROW(ctx.info, ctx.nr + 1, ctx.alloc);
Derrick Stoleea40498a2018-07-12 15:39:36 -04001403
Taylor Blau5d3cd092021-08-31 16:52:02 -04001404 if (flags & MIDX_WRITE_REV_INDEX) {
1405 /*
1406 * If generating a reverse index, need to have
1407 * packed_git's loaded to compare their
1408 * mtimes and object count.
1409 */
1410 if (prepare_midx_pack(the_repository, ctx.m, i)) {
1411 error(_("could not load pack"));
1412 result = 1;
1413 goto cleanup;
1414 }
1415
1416 if (open_pack_index(ctx.m->packs[i]))
1417 die(_("could not open index for %s"),
1418 ctx.m->packs[i]->pack_name);
Taylor Blau5d3cd092021-08-31 16:52:02 -04001419 }
1420
Taylor Blaufba68182023-12-14 17:23:48 -05001421 fill_pack_info(&ctx.info[ctx.nr++], ctx.m->packs[i],
1422 ctx.m->pack_names[i], i);
Derrick Stoleea40498a2018-07-12 15:39:36 -04001423 }
1424 }
1425
Derrick Stolee577dc492021-02-18 14:07:26 +00001426 ctx.pack_paths_checked = 0;
William Baker840cef02019-10-21 18:39:59 +00001427 if (flags & MIDX_PROGRESS)
Derrick Stolee577dc492021-02-18 14:07:26 +00001428 ctx.progress = start_delayed_progress(_("Adding packfiles to multi-pack-index"), 0);
William Baker840cef02019-10-21 18:39:59 +00001429 else
Derrick Stolee577dc492021-02-18 14:07:26 +00001430 ctx.progress = NULL;
William Baker840cef02019-10-21 18:39:59 +00001431
Taylor Blau56d863e2021-09-28 21:55:01 -04001432 ctx.to_include = packs_to_include;
1433
Derrick Stolee577dc492021-02-18 14:07:26 +00001434 for_each_file_in_pack_dir(object_dir, add_pack_to_midx, &ctx);
1435 stop_progress(&ctx.progress);
Derrick Stolee396f2572018-07-12 15:39:26 -04001436
Taylor Blau56d863e2021-09-28 21:55:01 -04001437 if ((ctx.m && ctx.nr == ctx.m->num_packs) &&
1438 !(packs_to_include || packs_to_drop)) {
Taylor Blauc528e172021-08-31 16:52:24 -04001439 struct bitmap_index *bitmap_git;
1440 int bitmap_exists;
1441 int want_bitmap = flags & MIDX_WRITE_BITMAP;
1442
Jeff Kingbfbb60d2021-09-09 15:56:58 -04001443 bitmap_git = prepare_midx_bitmap_git(ctx.m);
Taylor Blauc528e172021-08-31 16:52:24 -04001444 bitmap_exists = bitmap_git && bitmap_is_midx(bitmap_git);
1445 free_bitmap_index(bitmap_git);
1446
1447 if (bitmap_exists || !want_bitmap) {
1448 /*
1449 * The correct MIDX already exists, and so does a
1450 * corresponding bitmap (or one wasn't requested).
1451 */
1452 if (!want_bitmap)
1453 clear_midx_files_ext(object_dir, ".bitmap",
1454 NULL);
1455 goto cleanup;
1456 }
1457 }
Derrick Stoleea40498a2018-07-12 15:39:36 -04001458
Taylor Blau9218c6a2021-03-30 11:04:11 -04001459 if (preferred_pack_name) {
Patrick Steinhardtceb96a12023-04-14 08:01:31 +02001460 ctx.preferred_pack_idx = -1;
1461
Taylor Blau9218c6a2021-03-30 11:04:11 -04001462 for (i = 0; i < ctx.nr; i++) {
1463 if (!cmp_idx_or_pack_name(preferred_pack_name,
1464 ctx.info[i].pack_name)) {
1465 ctx.preferred_pack_idx = i;
1466 break;
1467 }
1468 }
Taylor Blau177c0d62021-08-31 16:52:04 -04001469
Patrick Steinhardtceb96a12023-04-14 08:01:31 +02001470 if (ctx.preferred_pack_idx == -1)
Taylor Blau177c0d62021-08-31 16:52:04 -04001471 warning(_("unknown preferred pack: '%s'"),
1472 preferred_pack_name);
Taylor Blauc528e172021-08-31 16:52:24 -04001473 } else if (ctx.nr &&
1474 (flags & (MIDX_WRITE_REV_INDEX | MIDX_WRITE_BITMAP))) {
Taylor Blau177c0d62021-08-31 16:52:04 -04001475 struct packed_git *oldest = ctx.info[ctx.preferred_pack_idx].p;
1476 ctx.preferred_pack_idx = 0;
1477
1478 if (packs_to_drop && packs_to_drop->nr)
1479 BUG("cannot write a MIDX bitmap during expiration");
1480
1481 /*
1482 * set a preferred pack when writing a bitmap to ensure that
1483 * the pack from which the first object is selected in pseudo
1484 * pack-order has all of its objects selected from that pack
1485 * (and not another pack containing a duplicate)
1486 */
1487 for (i = 1; i < ctx.nr; i++) {
1488 struct packed_git *p = ctx.info[i].p;
1489
1490 if (!oldest->num_objects || p->mtime < oldest->mtime) {
1491 oldest = p;
1492 ctx.preferred_pack_idx = i;
1493 }
1494 }
1495
1496 if (!oldest->num_objects) {
1497 /*
1498 * If all packs are empty; unset the preferred index.
1499 * This is acceptable since there will be no duplicate
1500 * objects to resolve, so the preferred value doesn't
1501 * matter.
1502 */
1503 ctx.preferred_pack_idx = -1;
1504 }
1505 } else {
1506 /*
1507 * otherwise don't mark any pack as preferred to avoid
1508 * interfering with expiration logic below
1509 */
1510 ctx.preferred_pack_idx = -1;
Taylor Blau9218c6a2021-03-30 11:04:11 -04001511 }
1512
Taylor Blau5d3cd092021-08-31 16:52:02 -04001513 if (ctx.preferred_pack_idx > -1) {
1514 struct packed_git *preferred = ctx.info[ctx.preferred_pack_idx].p;
1515 if (!preferred->num_objects) {
1516 error(_("cannot select preferred pack %s with no objects"),
1517 preferred->pack_name);
1518 result = 1;
1519 goto cleanup;
1520 }
1521 }
1522
Taylor Blau9218c6a2021-03-30 11:04:11 -04001523 ctx.entries = get_sorted_entries(ctx.m, ctx.info, ctx.nr, &ctx.entries_nr,
1524 ctx.preferred_pack_idx);
Derrick Stoleea40498a2018-07-12 15:39:36 -04001525
Derrick Stolee7a3ada12021-02-18 14:07:29 +00001526 ctx.large_offsets_needed = 0;
Derrick Stolee31bda9a2021-02-18 14:07:28 +00001527 for (i = 0; i < ctx.entries_nr; i++) {
1528 if (ctx.entries[i].offset > 0x7fffffff)
Derrick Stolee980f5252021-02-18 14:07:30 +00001529 ctx.num_large_offsets++;
Derrick Stolee31bda9a2021-02-18 14:07:28 +00001530 if (ctx.entries[i].offset > 0xffffffff)
Derrick Stolee7a3ada12021-02-18 14:07:29 +00001531 ctx.large_offsets_needed = 1;
Derrick Stolee662148c2018-07-12 15:39:32 -04001532 }
Derrick Stoleefe1ed562018-07-12 15:39:29 -04001533
Derrick Stolee577dc492021-02-18 14:07:26 +00001534 QSORT(ctx.info, ctx.nr, pack_info_compare);
Derrick Stoleed01bf2e2019-06-10 16:35:24 -07001535
Derrick Stolee19575c72019-06-10 16:35:25 -07001536 if (packs_to_drop && packs_to_drop->nr) {
1537 int drop_index = 0;
1538 int missing_drops = 0;
1539
Derrick Stolee577dc492021-02-18 14:07:26 +00001540 for (i = 0; i < ctx.nr && drop_index < packs_to_drop->nr; i++) {
1541 int cmp = strcmp(ctx.info[i].pack_name,
Derrick Stolee19575c72019-06-10 16:35:25 -07001542 packs_to_drop->items[drop_index].string);
1543
1544 if (!cmp) {
1545 drop_index++;
Derrick Stolee577dc492021-02-18 14:07:26 +00001546 ctx.info[i].expired = 1;
Derrick Stolee19575c72019-06-10 16:35:25 -07001547 } else if (cmp > 0) {
1548 error(_("did not see pack-file %s to drop"),
1549 packs_to_drop->items[drop_index].string);
1550 drop_index++;
1551 missing_drops++;
1552 i--;
1553 } else {
Derrick Stolee577dc492021-02-18 14:07:26 +00001554 ctx.info[i].expired = 0;
Derrick Stolee19575c72019-06-10 16:35:25 -07001555 }
1556 }
1557
1558 if (missing_drops) {
1559 result = 1;
1560 goto cleanup;
1561 }
1562 }
1563
Derrick Stoleed01bf2e2019-06-10 16:35:24 -07001564 /*
1565 * pack_perm stores a permutation between pack-int-ids from the
1566 * previous multi-pack-index to the new one we are writing:
1567 *
1568 * pack_perm[old_id] = new_id
1569 */
Derrick Stolee7a3ada12021-02-18 14:07:29 +00001570 ALLOC_ARRAY(ctx.pack_perm, ctx.nr);
Derrick Stolee577dc492021-02-18 14:07:26 +00001571 for (i = 0; i < ctx.nr; i++) {
1572 if (ctx.info[i].expired) {
Derrick Stolee19575c72019-06-10 16:35:25 -07001573 dropped_packs++;
Derrick Stolee7a3ada12021-02-18 14:07:29 +00001574 ctx.pack_perm[ctx.info[i].orig_pack_int_id] = PACK_EXPIRED;
Derrick Stolee19575c72019-06-10 16:35:25 -07001575 } else {
Derrick Stolee7a3ada12021-02-18 14:07:29 +00001576 ctx.pack_perm[ctx.info[i].orig_pack_int_id] = i - dropped_packs;
Derrick Stolee19575c72019-06-10 16:35:25 -07001577 }
Derrick Stoleed01bf2e2019-06-10 16:35:24 -07001578 }
1579
Derrick Stolee577dc492021-02-18 14:07:26 +00001580 for (i = 0; i < ctx.nr; i++) {
Taylor Blau5f5ccd92023-12-14 17:23:51 -05001581 if (ctx.info[i].expired)
1582 continue;
1583 pack_name_concat_len += strlen(ctx.info[i].pack_name) + 1;
1584 bitmapped_packs_concat_len += 2 * sizeof(uint32_t);
Derrick Stolee19575c72019-06-10 16:35:25 -07001585 }
Derrick Stoleedba61752019-06-10 16:35:24 -07001586
Taylor Blau9218c6a2021-03-30 11:04:11 -04001587 /* Check that the preferred pack wasn't expired (if given). */
1588 if (preferred_pack_name) {
1589 struct pack_info *preferred = bsearch(preferred_pack_name,
1590 ctx.info, ctx.nr,
1591 sizeof(*ctx.info),
1592 idx_or_pack_name_cmp);
Taylor Blau177c0d62021-08-31 16:52:04 -04001593 if (preferred) {
Taylor Blau9218c6a2021-03-30 11:04:11 -04001594 uint32_t perm = ctx.pack_perm[preferred->orig_pack_int_id];
1595 if (perm == PACK_EXPIRED)
1596 warning(_("preferred pack '%s' is expired"),
1597 preferred_pack_name);
1598 }
1599 }
1600
Derrick Stoleedba61752019-06-10 16:35:24 -07001601 if (pack_name_concat_len % MIDX_CHUNK_ALIGNMENT)
1602 pack_name_concat_len += MIDX_CHUNK_ALIGNMENT -
1603 (pack_name_concat_len % MIDX_CHUNK_ALIGNMENT);
1604
Taylor Blau60980ae2021-10-26 17:01:21 -04001605 hold_lock_file_for_update(&lk, midx_name.buf, LOCK_DIE_ON_ERROR);
Martin Ågrenacd71602021-01-05 20:23:48 +01001606 f = hashfd(get_lock_file_fd(&lk), get_lock_file_path(&lk));
Derrick Stoleefc59e742018-07-12 15:39:22 -04001607
Derrick Stolee577dc492021-02-18 14:07:26 +00001608 if (ctx.nr - dropped_packs == 0) {
Damien Robert796d61c2020-03-28 23:18:22 +01001609 error(_("no pack files to index."));
1610 result = 1;
1611 goto cleanup;
1612 }
1613
Taylor Blaueb572772022-02-09 14:26:47 -05001614 if (!ctx.entries_nr) {
1615 if (flags & MIDX_WRITE_BITMAP)
1616 warning(_("refusing to write multi-pack .bitmap without any objects"));
1617 flags &= ~(MIDX_WRITE_REV_INDEX | MIDX_WRITE_BITMAP);
1618 }
1619
Derrick Stolee63a8f0e2021-02-18 14:07:33 +00001620 cf = init_chunkfile(f);
Derrick Stolee32f3c542018-07-12 15:39:27 -04001621
Derrick Stolee63a8f0e2021-02-18 14:07:33 +00001622 add_chunk(cf, MIDX_CHUNKID_PACKNAMES, pack_name_concat_len,
1623 write_midx_pack_names);
1624 add_chunk(cf, MIDX_CHUNKID_OIDFANOUT, MIDX_CHUNK_FANOUT_SIZE,
1625 write_midx_oid_fanout);
1626 add_chunk(cf, MIDX_CHUNKID_OIDLOOKUP,
Taylor Blau2bc764c2023-07-12 19:37:46 -04001627 st_mult(ctx.entries_nr, the_hash_algo->rawsz),
Derrick Stolee63a8f0e2021-02-18 14:07:33 +00001628 write_midx_oid_lookup);
1629 add_chunk(cf, MIDX_CHUNKID_OBJECTOFFSETS,
Taylor Blau2bc764c2023-07-12 19:37:46 -04001630 st_mult(ctx.entries_nr, MIDX_CHUNK_OFFSET_WIDTH),
Derrick Stolee63a8f0e2021-02-18 14:07:33 +00001631 write_midx_object_offsets);
Derrick Stolee32f3c542018-07-12 15:39:27 -04001632
Derrick Stolee63a8f0e2021-02-18 14:07:33 +00001633 if (ctx.large_offsets_needed)
1634 add_chunk(cf, MIDX_CHUNKID_LARGEOFFSETS,
Taylor Blau2bc764c2023-07-12 19:37:46 -04001635 st_mult(ctx.num_large_offsets,
1636 MIDX_CHUNK_LARGE_OFFSET_WIDTH),
Derrick Stolee63a8f0e2021-02-18 14:07:33 +00001637 write_midx_large_offsets);
Derrick Stolee32f3c542018-07-12 15:39:27 -04001638
Taylor Blau95e83832022-01-25 17:41:03 -05001639 if (flags & (MIDX_WRITE_REV_INDEX | MIDX_WRITE_BITMAP)) {
1640 ctx.pack_order = midx_pack_order(&ctx);
1641 add_chunk(cf, MIDX_CHUNKID_REVINDEX,
Taylor Blau2bc764c2023-07-12 19:37:46 -04001642 st_mult(ctx.entries_nr, sizeof(uint32_t)),
Taylor Blau95e83832022-01-25 17:41:03 -05001643 write_midx_revindex);
Taylor Blau5f5ccd92023-12-14 17:23:51 -05001644 add_chunk(cf, MIDX_CHUNKID_BITMAPPEDPACKS,
1645 bitmapped_packs_concat_len,
1646 write_midx_bitmapped_packs);
Taylor Blau95e83832022-01-25 17:41:03 -05001647 }
1648
Derrick Stolee63a8f0e2021-02-18 14:07:33 +00001649 write_midx_header(f, get_num_chunks(cf), ctx.nr - dropped_packs);
1650 write_chunkfile(cf, &ctx);
Derrick Stoleefc59e742018-07-12 15:39:22 -04001651
Neeraj Singh020406e2022-03-10 22:43:21 +00001652 finalize_hashfile(f, midx_hash, FSYNC_COMPONENT_PACK_METADATA,
1653 CSUM_FSYNC | CSUM_HASH_IN_STREAM);
Derrick Stolee63a8f0e2021-02-18 14:07:33 +00001654 free_chunkfile(cf);
Taylor Blau38ff7ca2021-03-30 11:04:32 -04001655
Taylor Blau7f514b72022-01-25 17:41:17 -05001656 if (flags & MIDX_WRITE_REV_INDEX &&
1657 git_env_bool("GIT_TEST_MIDX_WRITE_REV", 0))
Taylor Blau60980ae2021-10-26 17:01:21 -04001658 write_midx_reverse_index(midx_name.buf, midx_hash, &ctx);
Derrick Stolee90b2bb72022-07-19 15:26:05 +00001659
Taylor Blauc528e172021-08-31 16:52:24 -04001660 if (flags & MIDX_WRITE_BITMAP) {
Derrick Stolee90b2bb72022-07-19 15:26:05 +00001661 struct packing_data pdata;
1662 struct commit **commits;
1663 uint32_t commits_nr;
1664
1665 if (!ctx.entries_nr)
1666 BUG("cannot write a bitmap without any objects");
1667
1668 prepare_midx_packing_data(&pdata, &ctx);
1669
1670 commits = find_commits_for_midx_bitmap(&commits_nr, refs_snapshot, &ctx);
1671
Derrick Stolee068fa542022-07-19 15:26:06 +00001672 /*
1673 * The previous steps translated the information from
1674 * 'entries' into information suitable for constructing
1675 * bitmaps. We no longer need that array, so clear it to
1676 * reduce memory pressure.
1677 */
1678 FREE_AND_NULL(ctx.entries);
1679 ctx.entries_nr = 0;
1680
Derrick Stolee90b2bb72022-07-19 15:26:05 +00001681 if (write_midx_bitmap(midx_name.buf, midx_hash, &pdata,
1682 commits, commits_nr, ctx.pack_order,
Jeff King51d1b692022-07-26 18:05:03 -04001683 flags) < 0) {
Taylor Blauc528e172021-08-31 16:52:24 -04001684 error(_("could not write multi-pack bitmap"));
1685 result = 1;
Taylor Blau66f0c712023-12-14 17:23:39 -05001686 clear_packing_data(&pdata);
1687 free(commits);
Taylor Blauc528e172021-08-31 16:52:24 -04001688 goto cleanup;
1689 }
Taylor Blau66f0c712023-12-14 17:23:39 -05001690
1691 clear_packing_data(&pdata);
1692 free(commits);
Taylor Blauc528e172021-08-31 16:52:24 -04001693 }
Derrick Stolee068fa542022-07-19 15:26:06 +00001694 /*
1695 * NOTE: Do not use ctx.entries beyond this point, since it might
1696 * have been freed in the previous if block.
1697 */
Taylor Blauc528e172021-08-31 16:52:24 -04001698
1699 if (ctx.m)
1700 close_object_store(the_repository->objects);
Taylor Blau38ff7ca2021-03-30 11:04:32 -04001701
Taylor Blauae22e842021-10-08 17:46:38 -04001702 if (commit_lock_file(&lk) < 0)
1703 die_errno(_("could not write multi-pack-index"));
Derrick Stoleefc59e742018-07-12 15:39:22 -04001704
Taylor Blauc528e172021-08-31 16:52:24 -04001705 clear_midx_files_ext(object_dir, ".bitmap", midx_hash);
Taylor Blauf5909d32021-08-31 16:51:59 -04001706 clear_midx_files_ext(object_dir, ".rev", midx_hash);
1707
Derrick Stoleea40498a2018-07-12 15:39:36 -04001708cleanup:
Derrick Stolee577dc492021-02-18 14:07:26 +00001709 for (i = 0; i < ctx.nr; i++) {
1710 if (ctx.info[i].p) {
1711 close_pack(ctx.info[i].p);
1712 free(ctx.info[i].p);
Derrick Stolee396f2572018-07-12 15:39:26 -04001713 }
Derrick Stolee577dc492021-02-18 14:07:26 +00001714 free(ctx.info[i].pack_name);
Derrick Stolee396f2572018-07-12 15:39:26 -04001715 }
1716
Derrick Stolee577dc492021-02-18 14:07:26 +00001717 free(ctx.info);
Derrick Stolee31bda9a2021-02-18 14:07:28 +00001718 free(ctx.entries);
Derrick Stolee7a3ada12021-02-18 14:07:29 +00001719 free(ctx.pack_perm);
Taylor Blau38ff7ca2021-03-30 11:04:32 -04001720 free(ctx.pack_order);
Taylor Blau60980ae2021-10-26 17:01:21 -04001721 strbuf_release(&midx_name);
Taylor Blauc528e172021-08-31 16:52:24 -04001722
Taylor Blau2dcff522022-10-12 18:01:55 -04001723 trace2_region_leave("midx", "write_midx_internal", the_repository);
1724
Derrick Stolee19575c72019-06-10 16:35:25 -07001725 return result;
1726}
1727
Taylor Blau9218c6a2021-03-30 11:04:11 -04001728int write_midx_file(const char *object_dir,
1729 const char *preferred_pack_name,
Taylor Blau08944d12021-09-28 21:55:07 -04001730 const char *refs_snapshot,
Taylor Blau9218c6a2021-03-30 11:04:11 -04001731 unsigned flags)
Derrick Stolee19575c72019-06-10 16:35:25 -07001732{
Taylor Blau56d863e2021-09-28 21:55:01 -04001733 return write_midx_internal(object_dir, NULL, NULL, preferred_pack_name,
Taylor Blau08944d12021-09-28 21:55:07 -04001734 refs_snapshot, flags);
Taylor Blau56d863e2021-09-28 21:55:01 -04001735}
1736
1737int write_midx_file_only(const char *object_dir,
1738 struct string_list *packs_to_include,
1739 const char *preferred_pack_name,
Taylor Blau08944d12021-09-28 21:55:07 -04001740 const char *refs_snapshot,
Taylor Blau56d863e2021-09-28 21:55:01 -04001741 unsigned flags)
1742{
1743 return write_midx_internal(object_dir, packs_to_include, NULL,
Taylor Blau08944d12021-09-28 21:55:07 -04001744 preferred_pack_name, refs_snapshot, flags);
Derrick Stoleea3407732018-07-12 15:39:21 -04001745}
Derrick Stolee525e18c2018-07-12 15:39:40 -04001746
Taylor Blau38ff7ca2021-03-30 11:04:32 -04001747struct clear_midx_data {
1748 char *keep;
1749 const char *ext;
1750};
1751
Jeff Kingbe252d32023-02-24 01:39:24 -05001752static void clear_midx_file_ext(const char *full_path, size_t full_path_len UNUSED,
Taylor Blau38ff7ca2021-03-30 11:04:32 -04001753 const char *file_name, void *_data)
1754{
1755 struct clear_midx_data *data = _data;
1756
1757 if (!(starts_with(file_name, "multi-pack-index-") &&
1758 ends_with(file_name, data->ext)))
1759 return;
1760 if (data->keep && !strcmp(data->keep, file_name))
1761 return;
1762
1763 if (unlink(full_path))
1764 die_errno(_("failed to remove %s"), full_path);
1765}
1766
Taylor Blau426c00e2021-08-31 16:51:55 -04001767static void clear_midx_files_ext(const char *object_dir, const char *ext,
Taylor Blau38ff7ca2021-03-30 11:04:32 -04001768 unsigned char *keep_hash)
1769{
1770 struct clear_midx_data data;
1771 memset(&data, 0, sizeof(struct clear_midx_data));
1772
1773 if (keep_hash)
1774 data.keep = xstrfmt("multi-pack-index-%s%s",
1775 hash_to_hex(keep_hash), ext);
1776 data.ext = ext;
1777
Taylor Blau426c00e2021-08-31 16:51:55 -04001778 for_each_file_in_pack_dir(object_dir,
Taylor Blau38ff7ca2021-03-30 11:04:32 -04001779 clear_midx_file_ext,
1780 &data);
1781
1782 free(data.keep);
Derrick Stoleea3407732018-07-12 15:39:21 -04001783}
Derrick Stolee525e18c2018-07-12 15:39:40 -04001784
Derrick Stolee1dcd9f22018-10-12 10:34:19 -07001785void clear_midx_file(struct repository *r)
Derrick Stolee525e18c2018-07-12 15:39:40 -04001786{
Taylor Blau60980ae2021-10-26 17:01:21 -04001787 struct strbuf midx = STRBUF_INIT;
1788
1789 get_midx_filename(&midx, r->objects->odb->path);
Derrick Stolee1dcd9f22018-10-12 10:34:19 -07001790
1791 if (r->objects && r->objects->multi_pack_index) {
1792 close_midx(r->objects->multi_pack_index);
1793 r->objects->multi_pack_index = NULL;
1794 }
Derrick Stolee525e18c2018-07-12 15:39:40 -04001795
Taylor Blau60980ae2021-10-26 17:01:21 -04001796 if (remove_path(midx.buf))
1797 die(_("failed to clear multi-pack-index at %s"), midx.buf);
Derrick Stolee525e18c2018-07-12 15:39:40 -04001798
Taylor Blauc528e172021-08-31 16:52:24 -04001799 clear_midx_files_ext(r->objects->odb->path, ".bitmap", NULL);
Taylor Blau426c00e2021-08-31 16:51:55 -04001800 clear_midx_files_ext(r->objects->odb->path, ".rev", NULL);
Taylor Blau38ff7ca2021-03-30 11:04:32 -04001801
Taylor Blau60980ae2021-10-26 17:01:21 -04001802 strbuf_release(&midx);
Derrick Stolee525e18c2018-07-12 15:39:40 -04001803}
Derrick Stolee56ee7ff2018-09-13 11:02:13 -07001804
1805static int verify_midx_error;
1806
Ævar Arnfjörð Bjarmason48ca53c2021-07-13 10:05:18 +02001807__attribute__((format (printf, 1, 2)))
Derrick Stoleed4bf1d82018-09-13 11:02:19 -07001808static void midx_report(const char *fmt, ...)
1809{
1810 va_list ap;
1811 verify_midx_error = 1;
1812 va_start(ap, fmt);
1813 vfprintf(stderr, fmt, ap);
1814 fprintf(stderr, "\n");
1815 va_end(ap);
1816}
1817
Jeff Hostetler5ae18df2019-03-21 12:36:15 -07001818struct pair_pos_vs_id
1819{
1820 uint32_t pos;
1821 uint32_t pack_int_id;
1822};
1823
1824static int compare_pair_pos_vs_id(const void *_a, const void *_b)
1825{
1826 struct pair_pos_vs_id *a = (struct pair_pos_vs_id *)_a;
1827 struct pair_pos_vs_id *b = (struct pair_pos_vs_id *)_b;
1828
1829 return b->pack_int_id - a->pack_int_id;
1830}
1831
Jeff Hostetler430efb82019-03-21 12:36:14 -07001832/*
1833 * Limit calls to display_progress() for performance reasons.
1834 * The interval here was arbitrarily chosen.
1835 */
1836#define SPARSE_PROGRESS_INTERVAL (1 << 12)
1837#define midx_display_sparse_progress(progress, n) \
1838 do { \
1839 uint64_t _n = (n); \
1840 if ((_n & (SPARSE_PROGRESS_INTERVAL - 1)) == 0) \
1841 display_progress(progress, _n); \
1842 } while (0)
1843
William Bakerefbc3ae2019-10-21 18:39:58 +00001844int verify_midx_file(struct repository *r, const char *object_dir, unsigned flags)
Derrick Stolee56ee7ff2018-09-13 11:02:13 -07001845{
Jeff Hostetler5ae18df2019-03-21 12:36:15 -07001846 struct pair_pos_vs_id *pairs = NULL;
Derrick Stoleed4bf1d82018-09-13 11:02:19 -07001847 uint32_t i;
William Bakerad600962019-10-21 18:40:01 +00001848 struct progress *progress = NULL;
Derrick Stolee56ee7ff2018-09-13 11:02:13 -07001849 struct multi_pack_index *m = load_multi_pack_index(object_dir, 1);
1850 verify_midx_error = 0;
1851
Derrick Stoleed9607542020-08-17 14:04:48 +00001852 if (!m) {
1853 int result = 0;
1854 struct stat sb;
Taylor Blau60980ae2021-10-26 17:01:21 -04001855 struct strbuf filename = STRBUF_INIT;
1856
1857 get_midx_filename(&filename, object_dir);
1858
1859 if (!stat(filename.buf, &sb)) {
Derrick Stoleed9607542020-08-17 14:04:48 +00001860 error(_("multi-pack-index file exists, but failed to parse"));
1861 result = 1;
1862 }
Taylor Blau60980ae2021-10-26 17:01:21 -04001863 strbuf_release(&filename);
Derrick Stoleed9607542020-08-17 14:04:48 +00001864 return result;
1865 }
Derrick Stolee56ee7ff2018-09-13 11:02:13 -07001866
Taylor Blauf89ecf72021-06-23 14:39:15 -04001867 if (!midx_checksum_valid(m))
1868 midx_report(_("incorrect checksum"));
1869
William Bakerad600962019-10-21 18:40:01 +00001870 if (flags & MIDX_PROGRESS)
Derrick Stoleeefdd2f02020-09-25 12:33:35 +00001871 progress = start_delayed_progress(_("Looking for referenced packfiles"),
William Bakerad600962019-10-21 18:40:01 +00001872 m->num_packs);
Derrick Stoleed4bf1d82018-09-13 11:02:19 -07001873 for (i = 0; i < m->num_packs; i++) {
Derrick Stolee64404a22019-04-29 09:18:55 -07001874 if (prepare_midx_pack(r, m, i))
Derrick Stoleed4bf1d82018-09-13 11:02:19 -07001875 midx_report("failed to load pack in position %d", i);
Jeff Hostetler430efb82019-03-21 12:36:14 -07001876
1877 display_progress(progress, i + 1);
Derrick Stoleed4bf1d82018-09-13 11:02:19 -07001878 }
Jeff Hostetler430efb82019-03-21 12:36:14 -07001879 stop_progress(&progress);
Derrick Stoleed4bf1d82018-09-13 11:02:19 -07001880
Damien Robert796d61c2020-03-28 23:18:22 +01001881 if (m->num_objects == 0) {
1882 midx_report(_("the midx contains no oid"));
1883 /*
1884 * Remaining tests assume that we have objects, so we can
1885 * return here.
1886 */
Taylor Blau492cb392021-10-26 17:01:08 -04001887 goto cleanup;
Damien Robert796d61c2020-03-28 23:18:22 +01001888 }
1889
William Bakerad600962019-10-21 18:40:01 +00001890 if (flags & MIDX_PROGRESS)
1891 progress = start_sparse_progress(_("Verifying OID order in multi-pack-index"),
1892 m->num_objects - 1);
Derrick Stolee55c56482018-09-13 11:02:22 -07001893 for (i = 0; i < m->num_objects - 1; i++) {
1894 struct object_id oid1, oid2;
1895
1896 nth_midxed_object_oid(&oid1, m, i);
1897 nth_midxed_object_oid(&oid2, m, i + 1);
1898
1899 if (oidcmp(&oid1, &oid2) >= 0)
1900 midx_report(_("oid lookup out of order: oid[%d] = %s >= %s = oid[%d]"),
1901 i, oid_to_hex(&oid1), oid_to_hex(&oid2), i + 1);
Derrick Stolee55c56482018-09-13 11:02:22 -07001902
Jeff Hostetler430efb82019-03-21 12:36:14 -07001903 midx_display_sparse_progress(progress, i + 1);
1904 }
1905 stop_progress(&progress);
1906
Jeff Hostetler5ae18df2019-03-21 12:36:15 -07001907 /*
1908 * Create an array mapping each object to its packfile id. Sort it
1909 * to group the objects by packfile. Use this permutation to visit
1910 * each of the objects and only require 1 packfile to be open at a
1911 * time.
1912 */
1913 ALLOC_ARRAY(pairs, m->num_objects);
1914 for (i = 0; i < m->num_objects; i++) {
1915 pairs[i].pos = i;
1916 pairs[i].pack_int_id = nth_midxed_pack_int_id(m, i);
1917 }
1918
William Bakerad600962019-10-21 18:40:01 +00001919 if (flags & MIDX_PROGRESS)
1920 progress = start_sparse_progress(_("Sorting objects by packfile"),
1921 m->num_objects);
Jeff Hostetler5ae18df2019-03-21 12:36:15 -07001922 display_progress(progress, 0); /* TODO: Measure QSORT() progress */
1923 QSORT(pairs, m->num_objects, compare_pair_pos_vs_id);
1924 stop_progress(&progress);
1925
William Bakerad600962019-10-21 18:40:01 +00001926 if (flags & MIDX_PROGRESS)
1927 progress = start_sparse_progress(_("Verifying object offsets"), m->num_objects);
Derrick Stoleecc6af732018-09-13 11:02:25 -07001928 for (i = 0; i < m->num_objects; i++) {
1929 struct object_id oid;
1930 struct pack_entry e;
1931 off_t m_offset, p_offset;
1932
Jeff Hostetler5ae18df2019-03-21 12:36:15 -07001933 if (i > 0 && pairs[i-1].pack_int_id != pairs[i].pack_int_id &&
1934 m->packs[pairs[i-1].pack_int_id])
1935 {
1936 close_pack_fd(m->packs[pairs[i-1].pack_int_id]);
1937 close_pack_index(m->packs[pairs[i-1].pack_int_id]);
1938 }
1939
1940 nth_midxed_object_oid(&oid, m, pairs[i].pos);
1941
Derrick Stolee64404a22019-04-29 09:18:55 -07001942 if (!fill_midx_entry(r, &oid, &e, m)) {
Derrick Stoleecc6af732018-09-13 11:02:25 -07001943 midx_report(_("failed to load pack entry for oid[%d] = %s"),
Jeff Hostetler5ae18df2019-03-21 12:36:15 -07001944 pairs[i].pos, oid_to_hex(&oid));
Derrick Stoleecc6af732018-09-13 11:02:25 -07001945 continue;
1946 }
1947
1948 if (open_pack_index(e.p)) {
1949 midx_report(_("failed to load pack-index for packfile %s"),
1950 e.p->pack_name);
1951 break;
1952 }
1953
1954 m_offset = e.offset;
1955 p_offset = find_pack_entry_one(oid.hash, e.p);
1956
1957 if (m_offset != p_offset)
1958 midx_report(_("incorrect object offset for oid[%d] = %s: %"PRIx64" != %"PRIx64),
Jeff Hostetler5ae18df2019-03-21 12:36:15 -07001959 pairs[i].pos, oid_to_hex(&oid), m_offset, p_offset);
Derrick Stolee144d7032018-09-13 11:02:26 -07001960
Jeff Hostetler430efb82019-03-21 12:36:14 -07001961 midx_display_sparse_progress(progress, i + 1);
Derrick Stoleecc6af732018-09-13 11:02:25 -07001962 }
Derrick Stolee144d7032018-09-13 11:02:26 -07001963 stop_progress(&progress);
Derrick Stoleecc6af732018-09-13 11:02:25 -07001964
Taylor Blau492cb392021-10-26 17:01:08 -04001965cleanup:
Jeff Hostetler5ae18df2019-03-21 12:36:15 -07001966 free(pairs);
Taylor Blau492cb392021-10-26 17:01:08 -04001967 close_midx(m);
Jeff Hostetler5ae18df2019-03-21 12:36:15 -07001968
Derrick Stolee56ee7ff2018-09-13 11:02:13 -07001969 return verify_midx_error;
1970}
Derrick Stoleecff97112019-06-10 16:35:23 -07001971
William Bakerefbc3ae2019-10-21 18:39:58 +00001972int expire_midx_packs(struct repository *r, const char *object_dir, unsigned flags)
Derrick Stoleecff97112019-06-10 16:35:23 -07001973{
Derrick Stolee19575c72019-06-10 16:35:25 -07001974 uint32_t i, *count, result = 0;
1975 struct string_list packs_to_drop = STRING_LIST_INIT_DUP;
Taylor Blau98926e02021-10-08 17:46:32 -04001976 struct multi_pack_index *m = lookup_multi_pack_index(r, object_dir);
William Baker8dc18f82019-10-21 18:40:00 +00001977 struct progress *progress = NULL;
Derrick Stolee19575c72019-06-10 16:35:25 -07001978
1979 if (!m)
1980 return 0;
1981
René Scharfeca56dad2021-03-13 17:17:22 +01001982 CALLOC_ARRAY(count, m->num_packs);
William Baker8dc18f82019-10-21 18:40:00 +00001983
1984 if (flags & MIDX_PROGRESS)
Derrick Stoleeefdd2f02020-09-25 12:33:35 +00001985 progress = start_delayed_progress(_("Counting referenced objects"),
William Baker8dc18f82019-10-21 18:40:00 +00001986 m->num_objects);
Derrick Stolee19575c72019-06-10 16:35:25 -07001987 for (i = 0; i < m->num_objects; i++) {
1988 int pack_int_id = nth_midxed_pack_int_id(m, i);
1989 count[pack_int_id]++;
William Baker8dc18f82019-10-21 18:40:00 +00001990 display_progress(progress, i + 1);
Derrick Stolee19575c72019-06-10 16:35:25 -07001991 }
William Baker8dc18f82019-10-21 18:40:00 +00001992 stop_progress(&progress);
Derrick Stolee19575c72019-06-10 16:35:25 -07001993
William Baker8dc18f82019-10-21 18:40:00 +00001994 if (flags & MIDX_PROGRESS)
Derrick Stoleeefdd2f02020-09-25 12:33:35 +00001995 progress = start_delayed_progress(_("Finding and deleting unreferenced packfiles"),
William Baker8dc18f82019-10-21 18:40:00 +00001996 m->num_packs);
Derrick Stolee19575c72019-06-10 16:35:25 -07001997 for (i = 0; i < m->num_packs; i++) {
1998 char *pack_name;
William Baker8dc18f82019-10-21 18:40:00 +00001999 display_progress(progress, i + 1);
Derrick Stolee19575c72019-06-10 16:35:25 -07002000
2001 if (count[i])
2002 continue;
2003
2004 if (prepare_midx_pack(r, m, i))
2005 continue;
2006
Taylor Blau757d4572022-09-19 21:55:45 -04002007 if (m->packs[i]->pack_keep || m->packs[i]->is_cruft)
Derrick Stolee19575c72019-06-10 16:35:25 -07002008 continue;
2009
2010 pack_name = xstrdup(m->packs[i]->pack_name);
2011 close_pack(m->packs[i]);
2012
2013 string_list_insert(&packs_to_drop, m->pack_names[i]);
2014 unlink_pack_path(pack_name, 0);
2015 free(pack_name);
2016 }
William Baker8dc18f82019-10-21 18:40:00 +00002017 stop_progress(&progress);
Derrick Stolee19575c72019-06-10 16:35:25 -07002018
2019 free(count);
2020
Taylor Blau98926e02021-10-08 17:46:32 -04002021 if (packs_to_drop.nr)
Taylor Blau08944d12021-09-28 21:55:07 -04002022 result = write_midx_internal(object_dir, NULL, &packs_to_drop, NULL, NULL, flags);
Derrick Stolee19575c72019-06-10 16:35:25 -07002023
2024 string_list_clear(&packs_to_drop, 0);
Taylor Blau98926e02021-10-08 17:46:32 -04002025
Derrick Stolee19575c72019-06-10 16:35:25 -07002026 return result;
Derrick Stoleecff97112019-06-10 16:35:23 -07002027}
Derrick Stolee2af890b2019-06-10 16:35:26 -07002028
Derrick Stoleece1e4a12019-06-10 16:35:27 -07002029struct repack_info {
2030 timestamp_t mtime;
2031 uint32_t referenced_objects;
2032 uint32_t pack_int_id;
2033};
2034
2035static int compare_by_mtime(const void *a_, const void *b_)
2036{
2037 const struct repack_info *a, *b;
2038
2039 a = (const struct repack_info *)a_;
2040 b = (const struct repack_info *)b_;
2041
2042 if (a->mtime < b->mtime)
2043 return -1;
2044 if (a->mtime > b->mtime)
2045 return 1;
2046 return 0;
2047}
2048
Derrick Stolee3ce4ca02020-05-10 16:07:34 +00002049static int fill_included_packs_all(struct repository *r,
2050 struct multi_pack_index *m,
Derrick Stoleece1e4a12019-06-10 16:35:27 -07002051 unsigned char *include_pack)
2052{
Derrick Stolee3ce4ca02020-05-10 16:07:34 +00002053 uint32_t i, count = 0;
2054 int pack_kept_objects = 0;
Derrick Stoleece1e4a12019-06-10 16:35:27 -07002055
Derrick Stolee3ce4ca02020-05-10 16:07:34 +00002056 repo_config_get_bool(r, "repack.packkeptobjects", &pack_kept_objects);
2057
2058 for (i = 0; i < m->num_packs; i++) {
2059 if (prepare_midx_pack(r, m, i))
2060 continue;
2061 if (!pack_kept_objects && m->packs[i]->pack_keep)
2062 continue;
Taylor Blaud9f77212022-09-19 21:55:48 -04002063 if (m->packs[i]->is_cruft)
2064 continue;
Derrick Stolee3ce4ca02020-05-10 16:07:34 +00002065
Derrick Stoleece1e4a12019-06-10 16:35:27 -07002066 include_pack[i] = 1;
Derrick Stolee3ce4ca02020-05-10 16:07:34 +00002067 count++;
2068 }
Derrick Stoleece1e4a12019-06-10 16:35:27 -07002069
Derrick Stolee3ce4ca02020-05-10 16:07:34 +00002070 return count < 2;
Derrick Stoleece1e4a12019-06-10 16:35:27 -07002071}
2072
2073static int fill_included_packs_batch(struct repository *r,
2074 struct multi_pack_index *m,
2075 unsigned char *include_pack,
2076 size_t batch_size)
2077{
2078 uint32_t i, packs_to_repack;
2079 size_t total_size;
Taylor Blaucb6c48c2022-09-19 21:55:50 -04002080 struct repack_info *pack_info;
Derrick Stolee3ce4ca02020-05-10 16:07:34 +00002081 int pack_kept_objects = 0;
2082
Taylor Blaucb6c48c2022-09-19 21:55:50 -04002083 CALLOC_ARRAY(pack_info, m->num_packs);
2084
Derrick Stolee3ce4ca02020-05-10 16:07:34 +00002085 repo_config_get_bool(r, "repack.packkeptobjects", &pack_kept_objects);
Derrick Stoleece1e4a12019-06-10 16:35:27 -07002086
2087 for (i = 0; i < m->num_packs; i++) {
2088 pack_info[i].pack_int_id = i;
2089
2090 if (prepare_midx_pack(r, m, i))
2091 continue;
2092
2093 pack_info[i].mtime = m->packs[i]->mtime;
2094 }
2095
Taylor Blau0a8e5612022-09-19 21:55:53 -04002096 for (i = 0; i < m->num_objects; i++) {
Derrick Stoleece1e4a12019-06-10 16:35:27 -07002097 uint32_t pack_int_id = nth_midxed_pack_int_id(m, i);
2098 pack_info[pack_int_id].referenced_objects++;
2099 }
2100
2101 QSORT(pack_info, m->num_packs, compare_by_mtime);
2102
2103 total_size = 0;
2104 packs_to_repack = 0;
2105 for (i = 0; total_size < batch_size && i < m->num_packs; i++) {
2106 int pack_int_id = pack_info[i].pack_int_id;
2107 struct packed_git *p = m->packs[pack_int_id];
2108 size_t expected_size;
2109
2110 if (!p)
2111 continue;
Derrick Stolee3ce4ca02020-05-10 16:07:34 +00002112 if (!pack_kept_objects && p->pack_keep)
2113 continue;
Taylor Blaub62ad562022-09-19 21:55:56 -04002114 if (p->is_cruft)
2115 continue;
Derrick Stoleece1e4a12019-06-10 16:35:27 -07002116 if (open_pack_index(p) || !p->num_objects)
2117 continue;
2118
Taylor Blaud67609b2023-07-12 19:37:49 -04002119 expected_size = st_mult(p->pack_size,
2120 pack_info[i].referenced_objects);
Derrick Stoleece1e4a12019-06-10 16:35:27 -07002121 expected_size /= p->num_objects;
2122
2123 if (expected_size >= batch_size)
2124 continue;
2125
2126 packs_to_repack++;
2127 total_size += expected_size;
2128 include_pack[pack_int_id] = 1;
2129 }
2130
2131 free(pack_info);
2132
Derrick Stolee1eb22c72020-08-11 15:30:18 +00002133 if (packs_to_repack < 2)
Derrick Stoleece1e4a12019-06-10 16:35:27 -07002134 return 1;
2135
2136 return 0;
2137}
2138
William Bakerefbc3ae2019-10-21 18:39:58 +00002139int midx_repack(struct repository *r, const char *object_dir, size_t batch_size, unsigned flags)
Derrick Stolee2af890b2019-06-10 16:35:26 -07002140{
Derrick Stoleece1e4a12019-06-10 16:35:27 -07002141 int result = 0;
2142 uint32_t i;
2143 unsigned char *include_pack;
2144 struct child_process cmd = CHILD_PROCESS_INIT;
René Scharfe6af3b002020-08-12 18:52:54 +02002145 FILE *cmd_in;
Derrick Stoleece1e4a12019-06-10 16:35:27 -07002146 struct strbuf base_name = STRBUF_INIT;
Taylor Blauc0f1f9d2021-10-08 17:46:35 -04002147 struct multi_pack_index *m = lookup_multi_pack_index(r, object_dir);
Derrick Stoleece1e4a12019-06-10 16:35:27 -07002148
Son Luong Ngoce11d86d2020-05-10 16:07:33 +00002149 /*
2150 * When updating the default for these configuration
2151 * variables in builtin/repack.c, these must be adjusted
2152 * to match.
2153 */
2154 int delta_base_offset = 1;
2155 int use_delta_islands = 0;
2156
Derrick Stoleece1e4a12019-06-10 16:35:27 -07002157 if (!m)
2158 return 0;
2159
René Scharfeca56dad2021-03-13 17:17:22 +01002160 CALLOC_ARRAY(include_pack, m->num_packs);
Derrick Stoleece1e4a12019-06-10 16:35:27 -07002161
2162 if (batch_size) {
2163 if (fill_included_packs_batch(r, m, include_pack, batch_size))
2164 goto cleanup;
Derrick Stolee3ce4ca02020-05-10 16:07:34 +00002165 } else if (fill_included_packs_all(r, m, include_pack))
Derrick Stoleece1e4a12019-06-10 16:35:27 -07002166 goto cleanup;
2167
Son Luong Ngoce11d86d2020-05-10 16:07:33 +00002168 repo_config_get_bool(r, "repack.usedeltabaseoffset", &delta_base_offset);
2169 repo_config_get_bool(r, "repack.usedeltaislands", &use_delta_islands);
2170
Jeff Kingc972bf42020-07-28 16:25:12 -04002171 strvec_push(&cmd.args, "pack-objects");
Derrick Stoleece1e4a12019-06-10 16:35:27 -07002172
2173 strbuf_addstr(&base_name, object_dir);
2174 strbuf_addstr(&base_name, "/pack/pack");
Jeff Kingc972bf42020-07-28 16:25:12 -04002175 strvec_push(&cmd.args, base_name.buf);
William Baker64d80e72019-10-21 18:40:02 +00002176
Son Luong Ngoce11d86d2020-05-10 16:07:33 +00002177 if (delta_base_offset)
Jeff Kingc972bf42020-07-28 16:25:12 -04002178 strvec_push(&cmd.args, "--delta-base-offset");
Son Luong Ngoce11d86d2020-05-10 16:07:33 +00002179 if (use_delta_islands)
Jeff Kingc972bf42020-07-28 16:25:12 -04002180 strvec_push(&cmd.args, "--delta-islands");
Son Luong Ngoce11d86d2020-05-10 16:07:33 +00002181
William Baker64d80e72019-10-21 18:40:02 +00002182 if (flags & MIDX_PROGRESS)
Jeff Kingc972bf42020-07-28 16:25:12 -04002183 strvec_push(&cmd.args, "--progress");
William Baker64d80e72019-10-21 18:40:02 +00002184 else
Jeff Kingc972bf42020-07-28 16:25:12 -04002185 strvec_push(&cmd.args, "-q");
William Baker64d80e72019-10-21 18:40:02 +00002186
Derrick Stoleece1e4a12019-06-10 16:35:27 -07002187 strbuf_release(&base_name);
2188
2189 cmd.git_cmd = 1;
2190 cmd.in = cmd.out = -1;
2191
2192 if (start_command(&cmd)) {
2193 error(_("could not start pack-objects"));
2194 result = 1;
2195 goto cleanup;
2196 }
2197
René Scharfe6af3b002020-08-12 18:52:54 +02002198 cmd_in = xfdopen(cmd.in, "w");
2199
Derrick Stoleece1e4a12019-06-10 16:35:27 -07002200 for (i = 0; i < m->num_objects; i++) {
2201 struct object_id oid;
2202 uint32_t pack_int_id = nth_midxed_pack_int_id(m, i);
2203
2204 if (!include_pack[pack_int_id])
2205 continue;
2206
2207 nth_midxed_object_oid(&oid, m, i);
René Scharfe6af3b002020-08-12 18:52:54 +02002208 fprintf(cmd_in, "%s\n", oid_to_hex(&oid));
Derrick Stoleece1e4a12019-06-10 16:35:27 -07002209 }
René Scharfe6af3b002020-08-12 18:52:54 +02002210 fclose(cmd_in);
Derrick Stoleece1e4a12019-06-10 16:35:27 -07002211
2212 if (finish_command(&cmd)) {
2213 error(_("could not finish pack-objects"));
2214 result = 1;
2215 goto cleanup;
2216 }
2217
Taylor Blau08944d12021-09-28 21:55:07 -04002218 result = write_midx_internal(object_dir, NULL, NULL, NULL, NULL, flags);
Derrick Stoleece1e4a12019-06-10 16:35:27 -07002219
2220cleanup:
Derrick Stoleece1e4a12019-06-10 16:35:27 -07002221 free(include_pack);
2222 return result;
Derrick Stolee2af890b2019-06-10 16:35:26 -07002223}