blob: ce8a7d24f3d33e265e7d5d05bd77a3dfcf9c5b1f [file] [log] [blame]
Han-Wen Nienhuyse581fd72021-10-07 20:25:04 +00001/*
2Copyright 2020 Google LLC
3
4Use of this source code is governed by a BSD-style
5license that can be found in the LICENSE file or at
6https://developers.google.com/open-source/licenses/bsd
7*/
8
9#include "block.h"
10
11#include "blocksource.h"
12#include "constants.h"
13#include "record.h"
14#include "reftable-error.h"
15#include "system.h"
16#include <zlib.h>
17
18int header_size(int version)
19{
20 switch (version) {
21 case 1:
22 return 24;
23 case 2:
24 return 28;
25 }
26 abort();
27}
28
29int footer_size(int version)
30{
31 switch (version) {
32 case 1:
33 return 68;
34 case 2:
35 return 72;
36 }
37 abort();
38}
39
40static int block_writer_register_restart(struct block_writer *w, int n,
41 int is_restart, struct strbuf *key)
42{
43 int rlen = w->restart_len;
44 if (rlen >= MAX_RESTARTS) {
45 is_restart = 0;
46 }
47
48 if (is_restart) {
49 rlen++;
50 }
51 if (2 + 3 * rlen + n > w->block_size - w->next)
52 return -1;
53 if (is_restart) {
Patrick Steinhardtf6b58c12024-02-06 07:35:23 +010054 REFTABLE_ALLOC_GROW(w->restarts, w->restart_len + 1, w->restart_cap);
Han-Wen Nienhuyse581fd72021-10-07 20:25:04 +000055 w->restarts[w->restart_len++] = w->next;
56 }
57
58 w->next += n;
59
60 strbuf_reset(&w->last_key);
61 strbuf_addbuf(&w->last_key, key);
62 w->entries++;
63 return 0;
64}
65
66void block_writer_init(struct block_writer *bw, uint8_t typ, uint8_t *buf,
67 uint32_t block_size, uint32_t header_off, int hash_size)
68{
69 bw->buf = buf;
70 bw->hash_size = hash_size;
71 bw->block_size = block_size;
72 bw->header_off = header_off;
73 bw->buf[header_off] = typ;
74 bw->next = header_off + 4;
75 bw->restart_interval = 16;
76 bw->entries = 0;
77 bw->restart_len = 0;
78 bw->last_key.len = 0;
79}
80
81uint8_t block_writer_type(struct block_writer *bw)
82{
83 return bw->buf[bw->header_off];
84}
85
Han-Wen Nienhuys45c2fcc2022-02-21 18:46:07 +000086/* Adds the reftable_record to the block. Returns -1 if it does not fit, 0 on
87 success. Returns REFTABLE_API_ERROR if attempting to write a record with
88 empty key. */
Han-Wen Nienhuyse581fd72021-10-07 20:25:04 +000089int block_writer_add(struct block_writer *w, struct reftable_record *rec)
90{
91 struct strbuf empty = STRBUF_INIT;
92 struct strbuf last =
93 w->entries % w->restart_interval == 0 ? empty : w->last_key;
94 struct string_view out = {
95 .buf = w->buf + w->next,
96 .len = w->block_size - w->next,
97 };
98
99 struct string_view start = out;
100
101 int is_restart = 0;
102 struct strbuf key = STRBUF_INIT;
103 int n = 0;
Han-Wen Nienhuys45c2fcc2022-02-21 18:46:07 +0000104 int err = -1;
Han-Wen Nienhuyse581fd72021-10-07 20:25:04 +0000105
106 reftable_record_key(rec, &key);
Han-Wen Nienhuys45c2fcc2022-02-21 18:46:07 +0000107 if (!key.len) {
108 err = REFTABLE_API_ERROR;
109 goto done;
110 }
111
Han-Wen Nienhuyse581fd72021-10-07 20:25:04 +0000112 n = reftable_encode_key(&is_restart, out, last, key,
113 reftable_record_val_type(rec));
114 if (n < 0)
115 goto done;
116 string_view_consume(&out, n);
117
118 n = reftable_record_encode(rec, out, w->hash_size);
119 if (n < 0)
120 goto done;
121 string_view_consume(&out, n);
122
Han-Wen Nienhuys45c2fcc2022-02-21 18:46:07 +0000123 err = block_writer_register_restart(w, start.len - out.len, is_restart,
124 &key);
Han-Wen Nienhuyse581fd72021-10-07 20:25:04 +0000125done:
126 strbuf_release(&key);
Han-Wen Nienhuys45c2fcc2022-02-21 18:46:07 +0000127 return err;
Han-Wen Nienhuyse581fd72021-10-07 20:25:04 +0000128}
129
130int block_writer_finish(struct block_writer *w)
131{
132 int i;
133 for (i = 0; i < w->restart_len; i++) {
134 put_be24(w->buf + w->next, w->restarts[i]);
135 w->next += 3;
136 }
137
138 put_be16(w->buf + w->next, w->restart_len);
139 w->next += 2;
140 put_be24(w->buf + 1 + w->header_off, w->next);
141
142 if (block_writer_type(w) == BLOCK_TYPE_LOG) {
143 int block_header_skip = 4 + w->header_off;
144 uLongf src_len = w->next - block_header_skip;
145 uLongf dest_cap = src_len * 1.001 + 12;
Patrick Steinhardtb4ff12c2024-02-06 07:35:27 +0100146 uint8_t *compressed;
Han-Wen Nienhuyse581fd72021-10-07 20:25:04 +0000147
Patrick Steinhardtb4ff12c2024-02-06 07:35:27 +0100148 REFTABLE_ALLOC_ARRAY(compressed, dest_cap);
149
Han-Wen Nienhuyse581fd72021-10-07 20:25:04 +0000150 while (1) {
151 uLongf out_dest_len = dest_cap;
152 int zresult = compress2(compressed, &out_dest_len,
153 w->buf + block_header_skip,
154 src_len, 9);
155 if (zresult == Z_BUF_ERROR && dest_cap < LONG_MAX) {
156 dest_cap *= 2;
157 compressed =
158 reftable_realloc(compressed, dest_cap);
159 if (compressed)
160 continue;
161 }
162
163 if (Z_OK != zresult) {
164 reftable_free(compressed);
165 return REFTABLE_ZLIB_ERROR;
166 }
167
168 memcpy(w->buf + block_header_skip, compressed,
169 out_dest_len);
170 w->next = out_dest_len + block_header_skip;
171 reftable_free(compressed);
172 break;
173 }
174 }
175 return w->next;
176}
177
178uint8_t block_reader_type(struct block_reader *r)
179{
180 return r->block.data[r->header_off];
181}
182
183int block_reader_init(struct block_reader *br, struct reftable_block *block,
184 uint32_t header_off, uint32_t table_block_size,
185 int hash_size)
186{
187 uint32_t full_block_size = table_block_size;
188 uint8_t typ = block->data[header_off];
189 uint32_t sz = get_be24(block->data + header_off + 1);
Han-Wen Nienhuys24d4d382022-01-20 15:12:01 +0000190 int err = 0;
Han-Wen Nienhuyse581fd72021-10-07 20:25:04 +0000191 uint16_t restart_count = 0;
192 uint32_t restart_start = 0;
193 uint8_t *restart_bytes = NULL;
Han-Wen Nienhuys24d4d382022-01-20 15:12:01 +0000194 uint8_t *uncompressed = NULL;
Han-Wen Nienhuyse581fd72021-10-07 20:25:04 +0000195
Han-Wen Nienhuys24d4d382022-01-20 15:12:01 +0000196 if (!reftable_is_block_type(typ)) {
197 err = REFTABLE_FORMAT_ERROR;
198 goto done;
199 }
Han-Wen Nienhuyse581fd72021-10-07 20:25:04 +0000200
201 if (typ == BLOCK_TYPE_LOG) {
202 int block_header_skip = 4 + header_off;
203 uLongf dst_len = sz - block_header_skip; /* total size of dest
204 buffer. */
205 uLongf src_len = block->len - block_header_skip;
Patrick Steinhardtb4ff12c2024-02-06 07:35:27 +0100206
207 /* Log blocks specify the *uncompressed* size in their header. */
208 REFTABLE_ALLOC_ARRAY(uncompressed, sz);
Han-Wen Nienhuyse581fd72021-10-07 20:25:04 +0000209
210 /* Copy over the block header verbatim. It's not compressed. */
211 memcpy(uncompressed, block->data, block_header_skip);
212
213 /* Uncompress */
214 if (Z_OK !=
215 uncompress2(uncompressed + block_header_skip, &dst_len,
216 block->data + block_header_skip, &src_len)) {
Han-Wen Nienhuys24d4d382022-01-20 15:12:01 +0000217 err = REFTABLE_ZLIB_ERROR;
218 goto done;
Han-Wen Nienhuyse581fd72021-10-07 20:25:04 +0000219 }
220
Han-Wen Nienhuys24d4d382022-01-20 15:12:01 +0000221 if (dst_len + block_header_skip != sz) {
222 err = REFTABLE_FORMAT_ERROR;
223 goto done;
224 }
Han-Wen Nienhuyse581fd72021-10-07 20:25:04 +0000225
226 /* We're done with the input data. */
227 reftable_block_done(block);
228 block->data = uncompressed;
Han-Wen Nienhuys24d4d382022-01-20 15:12:01 +0000229 uncompressed = NULL;
Han-Wen Nienhuyse581fd72021-10-07 20:25:04 +0000230 block->len = sz;
231 block->source = malloc_block_source();
232 full_block_size = src_len + block_header_skip;
233 } else if (full_block_size == 0) {
234 full_block_size = sz;
235 } else if (sz < full_block_size && sz < block->len &&
236 block->data[sz] != 0) {
237 /* If the block is smaller than the full block size, it is
238 padded (data followed by '\0') or the next block is
239 unaligned. */
240 full_block_size = sz;
241 }
242
243 restart_count = get_be16(block->data + sz - 2);
244 restart_start = sz - 2 - 3 * restart_count;
245 restart_bytes = block->data + restart_start;
246
247 /* transfer ownership. */
248 br->block = *block;
249 block->data = NULL;
250 block->len = 0;
251
252 br->hash_size = hash_size;
253 br->block_len = restart_start;
254 br->full_block_size = full_block_size;
255 br->header_off = header_off;
256 br->restart_count = restart_count;
257 br->restart_bytes = restart_bytes;
258
Han-Wen Nienhuys24d4d382022-01-20 15:12:01 +0000259done:
260 reftable_free(uncompressed);
261 return err;
Han-Wen Nienhuyse581fd72021-10-07 20:25:04 +0000262}
263
264static uint32_t block_reader_restart_offset(struct block_reader *br, int i)
265{
266 return get_be24(br->restart_bytes + 3 * i);
267}
268
269void block_reader_start(struct block_reader *br, struct block_iter *it)
270{
271 it->br = br;
272 strbuf_reset(&it->last_key);
273 it->next_off = br->header_off + 4;
274}
275
276struct restart_find_args {
277 int error;
278 struct strbuf key;
279 struct block_reader *r;
280};
281
282static int restart_key_less(size_t idx, void *args)
283{
284 struct restart_find_args *a = args;
285 uint32_t off = block_reader_restart_offset(a->r, idx);
286 struct string_view in = {
287 .buf = a->r->block.data + off,
288 .len = a->r->block_len - off,
289 };
290
291 /* the restart key is verbatim in the block, so this could avoid the
292 alloc for decoding the key */
293 struct strbuf rkey = STRBUF_INIT;
294 struct strbuf last_key = STRBUF_INIT;
295 uint8_t unused_extra;
296 int n = reftable_decode_key(&rkey, &unused_extra, last_key, in);
297 int result;
298 if (n < 0) {
299 a->error = 1;
300 return -1;
301 }
302
303 result = strbuf_cmp(&a->key, &rkey);
304 strbuf_release(&rkey);
305 return result;
306}
307
308void block_iter_copy_from(struct block_iter *dest, struct block_iter *src)
309{
310 dest->br = src->br;
311 dest->next_off = src->next_off;
312 strbuf_reset(&dest->last_key);
313 strbuf_addbuf(&dest->last_key, &src->last_key);
314}
315
316int block_iter_next(struct block_iter *it, struct reftable_record *rec)
317{
318 struct string_view in = {
319 .buf = it->br->block.data + it->next_off,
320 .len = it->br->block_len - it->next_off,
321 };
322 struct string_view start = in;
Han-Wen Nienhuyse581fd72021-10-07 20:25:04 +0000323 uint8_t extra = 0;
324 int n = 0;
325
326 if (it->next_off >= it->br->block_len)
327 return 1;
328
Patrick Steinhardtc0cadb02023-12-11 10:08:12 +0100329 n = reftable_decode_key(&it->key, &extra, it->last_key, in);
Han-Wen Nienhuyse581fd72021-10-07 20:25:04 +0000330 if (n < 0)
331 return -1;
332
Patrick Steinhardtc0cadb02023-12-11 10:08:12 +0100333 if (!it->key.len)
Han-Wen Nienhuys45c2fcc2022-02-21 18:46:07 +0000334 return REFTABLE_FORMAT_ERROR;
335
Han-Wen Nienhuyse581fd72021-10-07 20:25:04 +0000336 string_view_consume(&in, n);
Patrick Steinhardtc0cadb02023-12-11 10:08:12 +0100337 n = reftable_record_decode(rec, it->key, extra, in, it->br->hash_size);
Han-Wen Nienhuyse581fd72021-10-07 20:25:04 +0000338 if (n < 0)
339 return -1;
340 string_view_consume(&in, n);
341
342 strbuf_reset(&it->last_key);
Patrick Steinhardtc0cadb02023-12-11 10:08:12 +0100343 strbuf_addbuf(&it->last_key, &it->key);
Han-Wen Nienhuyse581fd72021-10-07 20:25:04 +0000344 it->next_off += start.len - in.len;
Han-Wen Nienhuyse581fd72021-10-07 20:25:04 +0000345 return 0;
346}
347
348int block_reader_first_key(struct block_reader *br, struct strbuf *key)
349{
350 struct strbuf empty = STRBUF_INIT;
351 int off = br->header_off + 4;
352 struct string_view in = {
353 .buf = br->block.data + off,
354 .len = br->block_len - off,
355 };
356
357 uint8_t extra = 0;
358 int n = reftable_decode_key(key, &extra, empty, in);
359 if (n < 0)
360 return n;
Han-Wen Nienhuys45c2fcc2022-02-21 18:46:07 +0000361 if (!key->len)
362 return REFTABLE_FORMAT_ERROR;
Han-Wen Nienhuyse581fd72021-10-07 20:25:04 +0000363
364 return 0;
365}
366
367int block_iter_seek(struct block_iter *it, struct strbuf *want)
368{
369 return block_reader_seek(it->br, it, want);
370}
371
372void block_iter_close(struct block_iter *it)
373{
374 strbuf_release(&it->last_key);
Patrick Steinhardtc0cadb02023-12-11 10:08:12 +0100375 strbuf_release(&it->key);
Han-Wen Nienhuyse581fd72021-10-07 20:25:04 +0000376}
377
378int block_reader_seek(struct block_reader *br, struct block_iter *it,
379 struct strbuf *want)
380{
381 struct restart_find_args args = {
382 .key = *want,
383 .r = br,
384 };
Patrick Steinhardta8305bc2023-12-11 10:08:07 +0100385 struct block_iter next = BLOCK_ITER_INIT;
Patrick Steinhardt3ddef472024-02-06 07:35:59 +0100386 struct reftable_record rec;
387 int err = 0, i;
Han-Wen Nienhuyse581fd72021-10-07 20:25:04 +0000388
Han-Wen Nienhuyse581fd72021-10-07 20:25:04 +0000389 if (args.error) {
390 err = REFTABLE_FORMAT_ERROR;
391 goto done;
392 }
393
Patrick Steinhardt3ddef472024-02-06 07:35:59 +0100394 i = binsearch(br->restart_count, &restart_key_less, &args);
395 if (i > 0)
396 it->next_off = block_reader_restart_offset(br, i - 1);
397 else
Han-Wen Nienhuyse581fd72021-10-07 20:25:04 +0000398 it->next_off = br->header_off + 4;
Patrick Steinhardt3ddef472024-02-06 07:35:59 +0100399 it->br = br;
400
401 reftable_record_init(&rec, block_reader_type(br));
Han-Wen Nienhuyse581fd72021-10-07 20:25:04 +0000402
403 /* We're looking for the last entry less/equal than the wanted key, so
404 we have to go one entry too far and then back up.
405 */
406 while (1) {
407 block_iter_copy_from(&next, it);
408 err = block_iter_next(&next, &rec);
409 if (err < 0)
410 goto done;
411
Patrick Steinhardtc0cadb02023-12-11 10:08:12 +0100412 reftable_record_key(&rec, &it->key);
413 if (err > 0 || strbuf_cmp(&it->key, want) >= 0) {
Han-Wen Nienhuyse581fd72021-10-07 20:25:04 +0000414 err = 0;
415 goto done;
416 }
417
418 block_iter_copy_from(it, &next);
419 }
420
421done:
Patrick Steinhardtc0cadb02023-12-11 10:08:12 +0100422 block_iter_close(&next);
Han-Wen Nienhuys66c0dab2022-01-20 15:12:13 +0000423 reftable_record_release(&rec);
Han-Wen Nienhuyse581fd72021-10-07 20:25:04 +0000424
425 return err;
426}
427
428void block_writer_release(struct block_writer *bw)
429{
430 FREE_AND_NULL(bw->restarts);
431 strbuf_release(&bw->last_key);
432 /* the block is not owned. */
433}
434
435void reftable_block_done(struct reftable_block *blockp)
436{
437 struct reftable_block_source source = blockp->source;
438 if (blockp && source.ops)
439 source.ops->return_block(source.arg, blockp);
440 blockp->data = NULL;
441 blockp->len = 0;
442 blockp->source.ops = NULL;
443 blockp->source.arg = NULL;
444}