| /* |
| Copyright 2020 Google LLC |
| |
| Use of this source code is governed by a BSD-style |
| license that can be found in the LICENSE file or at |
| https://developers.google.com/open-source/licenses/bsd |
| */ |
| |
| #include "stack.h" |
| |
| #include "../write-or-die.h" |
| #include "system.h" |
| #include "constants.h" |
| #include "merged.h" |
| #include "reader.h" |
| #include "reftable-error.h" |
| #include "reftable-record.h" |
| #include "reftable-merged.h" |
| #include "writer.h" |
| #include "tempfile.h" |
| |
| static int stack_try_add(struct reftable_stack *st, |
| int (*write_table)(struct reftable_writer *wr, |
| void *arg), |
| void *arg); |
| static int stack_write_compact(struct reftable_stack *st, |
| struct reftable_writer *wr, |
| size_t first, size_t last, |
| struct reftable_log_expiry_config *config); |
| static void reftable_addition_close(struct reftable_addition *add); |
| static int reftable_stack_reload_maybe_reuse(struct reftable_stack *st, |
| int reuse_open); |
| |
| static int stack_filename(struct reftable_buf *dest, struct reftable_stack *st, |
| const char *name) |
| { |
| int err; |
| reftable_buf_reset(dest); |
| if ((err = reftable_buf_addstr(dest, st->reftable_dir)) < 0 || |
| (err = reftable_buf_addstr(dest, "/")) < 0 || |
| (err = reftable_buf_addstr(dest, name)) < 0) |
| return err; |
| return 0; |
| } |
| |
| static ssize_t reftable_fd_write(void *arg, const void *data, size_t sz) |
| { |
| int *fdp = (int *)arg; |
| return write_in_full(*fdp, data, sz); |
| } |
| |
| static int reftable_fd_flush(void *arg) |
| { |
| int *fdp = (int *)arg; |
| |
| return fsync_component(FSYNC_COMPONENT_REFERENCE, *fdp); |
| } |
| |
| int reftable_new_stack(struct reftable_stack **dest, const char *dir, |
| const struct reftable_write_options *_opts) |
| { |
| struct reftable_buf list_file_name = REFTABLE_BUF_INIT; |
| struct reftable_write_options opts = { 0 }; |
| struct reftable_stack *p; |
| int err; |
| |
| p = reftable_calloc(1, sizeof(*p)); |
| if (!p) { |
| err = REFTABLE_OUT_OF_MEMORY_ERROR; |
| goto out; |
| } |
| |
| if (_opts) |
| opts = *_opts; |
| if (opts.hash_id == 0) |
| opts.hash_id = GIT_SHA1_FORMAT_ID; |
| |
| *dest = NULL; |
| |
| reftable_buf_reset(&list_file_name); |
| if ((err = reftable_buf_addstr(&list_file_name, dir)) < 0 || |
| (err = reftable_buf_addstr(&list_file_name, "/tables.list")) < 0) |
| goto out; |
| |
| p->list_file = reftable_buf_detach(&list_file_name); |
| p->list_fd = -1; |
| p->opts = opts; |
| p->reftable_dir = reftable_strdup(dir); |
| if (!p->reftable_dir) { |
| err = REFTABLE_OUT_OF_MEMORY_ERROR; |
| goto out; |
| } |
| |
| err = reftable_stack_reload_maybe_reuse(p, 1); |
| if (err < 0) |
| goto out; |
| |
| *dest = p; |
| err = 0; |
| |
| out: |
| if (err < 0) |
| reftable_stack_destroy(p); |
| return err; |
| } |
| |
| static int fd_read_lines(int fd, char ***namesp) |
| { |
| off_t size = lseek(fd, 0, SEEK_END); |
| char *buf = NULL; |
| int err = 0; |
| if (size < 0) { |
| err = REFTABLE_IO_ERROR; |
| goto done; |
| } |
| err = lseek(fd, 0, SEEK_SET); |
| if (err < 0) { |
| err = REFTABLE_IO_ERROR; |
| goto done; |
| } |
| |
| REFTABLE_ALLOC_ARRAY(buf, size + 1); |
| if (!buf) { |
| err = REFTABLE_OUT_OF_MEMORY_ERROR; |
| goto done; |
| } |
| |
| if (read_in_full(fd, buf, size) != size) { |
| err = REFTABLE_IO_ERROR; |
| goto done; |
| } |
| buf[size] = 0; |
| |
| *namesp = parse_names(buf, size); |
| if (!*namesp) { |
| err = REFTABLE_OUT_OF_MEMORY_ERROR; |
| goto done; |
| } |
| |
| done: |
| reftable_free(buf); |
| return err; |
| } |
| |
| int read_lines(const char *filename, char ***namesp) |
| { |
| int fd = open(filename, O_RDONLY); |
| int err = 0; |
| if (fd < 0) { |
| if (errno == ENOENT) { |
| REFTABLE_CALLOC_ARRAY(*namesp, 1); |
| if (!*namesp) |
| return REFTABLE_OUT_OF_MEMORY_ERROR; |
| return 0; |
| } |
| |
| return REFTABLE_IO_ERROR; |
| } |
| err = fd_read_lines(fd, namesp); |
| close(fd); |
| return err; |
| } |
| |
| int reftable_stack_init_ref_iterator(struct reftable_stack *st, |
| struct reftable_iterator *it) |
| { |
| return merged_table_init_iter(reftable_stack_merged_table(st), |
| it, BLOCK_TYPE_REF); |
| } |
| |
| int reftable_stack_init_log_iterator(struct reftable_stack *st, |
| struct reftable_iterator *it) |
| { |
| return merged_table_init_iter(reftable_stack_merged_table(st), |
| it, BLOCK_TYPE_LOG); |
| } |
| |
| struct reftable_merged_table * |
| reftable_stack_merged_table(struct reftable_stack *st) |
| { |
| return st->merged; |
| } |
| |
| static int has_name(char **names, const char *name) |
| { |
| while (*names) { |
| if (!strcmp(*names, name)) |
| return 1; |
| names++; |
| } |
| return 0; |
| } |
| |
| /* Close and free the stack */ |
| void reftable_stack_destroy(struct reftable_stack *st) |
| { |
| char **names = NULL; |
| int err = 0; |
| |
| if (!st) |
| return; |
| |
| if (st->merged) { |
| reftable_merged_table_free(st->merged); |
| st->merged = NULL; |
| } |
| |
| err = read_lines(st->list_file, &names); |
| if (err < 0) { |
| REFTABLE_FREE_AND_NULL(names); |
| } |
| |
| if (st->readers) { |
| int i = 0; |
| struct reftable_buf filename = REFTABLE_BUF_INIT; |
| for (i = 0; i < st->readers_len; i++) { |
| const char *name = reader_name(st->readers[i]); |
| int try_unlinking = 1; |
| |
| reftable_buf_reset(&filename); |
| if (names && !has_name(names, name)) { |
| if (stack_filename(&filename, st, name) < 0) |
| try_unlinking = 0; |
| } |
| reftable_reader_decref(st->readers[i]); |
| |
| if (try_unlinking && filename.len) { |
| /* On Windows, can only unlink after closing. */ |
| unlink(filename.buf); |
| } |
| } |
| reftable_buf_release(&filename); |
| st->readers_len = 0; |
| REFTABLE_FREE_AND_NULL(st->readers); |
| } |
| |
| if (st->list_fd >= 0) { |
| close(st->list_fd); |
| st->list_fd = -1; |
| } |
| |
| REFTABLE_FREE_AND_NULL(st->list_file); |
| REFTABLE_FREE_AND_NULL(st->reftable_dir); |
| reftable_free(st); |
| free_names(names); |
| } |
| |
| static struct reftable_reader **stack_copy_readers(struct reftable_stack *st, |
| size_t cur_len) |
| { |
| struct reftable_reader **cur = reftable_calloc(cur_len, sizeof(*cur)); |
| if (!cur) |
| return NULL; |
| for (size_t i = 0; i < cur_len; i++) |
| cur[i] = st->readers[i]; |
| return cur; |
| } |
| |
| static int reftable_stack_reload_once(struct reftable_stack *st, |
| const char **names, |
| int reuse_open) |
| { |
| size_t cur_len = !st->merged ? 0 : st->merged->readers_len; |
| struct reftable_reader **cur; |
| struct reftable_reader **reused = NULL; |
| struct reftable_reader **new_readers; |
| size_t reused_len = 0, reused_alloc = 0, names_len; |
| size_t new_readers_len = 0; |
| struct reftable_merged_table *new_merged = NULL; |
| struct reftable_buf table_path = REFTABLE_BUF_INIT; |
| int err = 0; |
| size_t i; |
| |
| cur = stack_copy_readers(st, cur_len); |
| if (!cur) { |
| err = REFTABLE_OUT_OF_MEMORY_ERROR; |
| goto done; |
| } |
| |
| names_len = names_length(names); |
| |
| new_readers = reftable_calloc(names_len, sizeof(*new_readers)); |
| if (!new_readers) { |
| err = REFTABLE_OUT_OF_MEMORY_ERROR; |
| goto done; |
| } |
| |
| while (*names) { |
| struct reftable_reader *rd = NULL; |
| const char *name = *names++; |
| |
| /* this is linear; we assume compaction keeps the number of |
| tables under control so this is not quadratic. */ |
| for (i = 0; reuse_open && i < cur_len; i++) { |
| if (cur[i] && 0 == strcmp(cur[i]->name, name)) { |
| rd = cur[i]; |
| cur[i] = NULL; |
| |
| /* |
| * When reloading the stack fails, we end up |
| * releasing all new readers. This also |
| * includes the reused readers, even though |
| * they are still in used by the old stack. We |
| * thus need to keep them alive here, which we |
| * do by bumping their refcount. |
| */ |
| REFTABLE_ALLOC_GROW(reused, reused_len + 1, reused_alloc); |
| if (!reused) { |
| err = REFTABLE_OUT_OF_MEMORY_ERROR; |
| goto done; |
| } |
| reused[reused_len++] = rd; |
| reftable_reader_incref(rd); |
| break; |
| } |
| } |
| |
| if (!rd) { |
| struct reftable_block_source src = { NULL }; |
| |
| err = stack_filename(&table_path, st, name); |
| if (err < 0) |
| goto done; |
| |
| err = reftable_block_source_from_file(&src, |
| table_path.buf); |
| if (err < 0) |
| goto done; |
| |
| err = reftable_reader_new(&rd, &src, name); |
| if (err < 0) |
| goto done; |
| } |
| |
| new_readers[new_readers_len] = rd; |
| new_readers_len++; |
| } |
| |
| /* success! */ |
| err = reftable_merged_table_new(&new_merged, new_readers, |
| new_readers_len, st->opts.hash_id); |
| if (err < 0) |
| goto done; |
| |
| /* |
| * Close the old, non-reused readers and proactively try to unlink |
| * them. This is done for systems like Windows, where the underlying |
| * file of such an open reader wouldn't have been possible to be |
| * unlinked by the compacting process. |
| */ |
| for (i = 0; i < cur_len; i++) { |
| if (cur[i]) { |
| const char *name = reader_name(cur[i]); |
| |
| err = stack_filename(&table_path, st, name); |
| if (err < 0) |
| goto done; |
| |
| reftable_reader_decref(cur[i]); |
| unlink(table_path.buf); |
| } |
| } |
| |
| /* Update the stack to point to the new tables. */ |
| if (st->merged) |
| reftable_merged_table_free(st->merged); |
| new_merged->suppress_deletions = 1; |
| st->merged = new_merged; |
| |
| if (st->readers) |
| reftable_free(st->readers); |
| st->readers = new_readers; |
| st->readers_len = new_readers_len; |
| new_readers = NULL; |
| new_readers_len = 0; |
| |
| /* |
| * Decrement the refcount of reused readers again. This only needs to |
| * happen on the successful case, because on the unsuccessful one we |
| * decrement their refcount via `new_readers`. |
| */ |
| for (i = 0; i < reused_len; i++) |
| reftable_reader_decref(reused[i]); |
| |
| done: |
| for (i = 0; i < new_readers_len; i++) |
| reftable_reader_decref(new_readers[i]); |
| reftable_free(new_readers); |
| reftable_free(reused); |
| reftable_free(cur); |
| reftable_buf_release(&table_path); |
| return err; |
| } |
| |
| /* return negative if a before b. */ |
| static int tv_cmp(struct timeval *a, struct timeval *b) |
| { |
| time_t diff = a->tv_sec - b->tv_sec; |
| int udiff = a->tv_usec - b->tv_usec; |
| |
| if (diff != 0) |
| return diff; |
| |
| return udiff; |
| } |
| |
| static int reftable_stack_reload_maybe_reuse(struct reftable_stack *st, |
| int reuse_open) |
| { |
| char **names = NULL, **names_after = NULL; |
| struct timeval deadline; |
| int64_t delay = 0; |
| int tries = 0, err; |
| int fd = -1; |
| |
| err = gettimeofday(&deadline, NULL); |
| if (err < 0) |
| goto out; |
| deadline.tv_sec += 3; |
| |
| while (1) { |
| struct timeval now; |
| |
| err = gettimeofday(&now, NULL); |
| if (err < 0) |
| goto out; |
| |
| /* |
| * Only look at deadlines after the first few times. This |
| * simplifies debugging in GDB. |
| */ |
| tries++; |
| if (tries > 3 && tv_cmp(&now, &deadline) >= 0) |
| goto out; |
| |
| fd = open(st->list_file, O_RDONLY); |
| if (fd < 0) { |
| if (errno != ENOENT) { |
| err = REFTABLE_IO_ERROR; |
| goto out; |
| } |
| |
| REFTABLE_CALLOC_ARRAY(names, 1); |
| if (!names) { |
| err = REFTABLE_OUT_OF_MEMORY_ERROR; |
| goto out; |
| } |
| } else { |
| err = fd_read_lines(fd, &names); |
| if (err < 0) |
| goto out; |
| } |
| |
| err = reftable_stack_reload_once(st, (const char **) names, reuse_open); |
| if (!err) |
| break; |
| if (err != REFTABLE_NOT_EXIST_ERROR) |
| goto out; |
| |
| /* |
| * REFTABLE_NOT_EXIST_ERROR can be caused by a concurrent |
| * writer. Check if there was one by checking if the name list |
| * changed. |
| */ |
| err = read_lines(st->list_file, &names_after); |
| if (err < 0) |
| goto out; |
| if (names_equal((const char **) names_after, |
| (const char **) names)) { |
| err = REFTABLE_NOT_EXIST_ERROR; |
| goto out; |
| } |
| |
| free_names(names); |
| names = NULL; |
| free_names(names_after); |
| names_after = NULL; |
| close(fd); |
| fd = -1; |
| |
| delay = delay + (delay * rand()) / RAND_MAX + 1; |
| sleep_millisec(delay); |
| } |
| |
| out: |
| /* |
| * Invalidate the stat cache. It is sufficient to only close the file |
| * descriptor and keep the cached stat info because we never use the |
| * latter when the former is negative. |
| */ |
| if (st->list_fd >= 0) { |
| close(st->list_fd); |
| st->list_fd = -1; |
| } |
| |
| /* |
| * Cache stat information in case it provides a useful signal to us. |
| * According to POSIX, "The st_ino and st_dev fields taken together |
| * uniquely identify the file within the system." That being said, |
| * Windows is not POSIX compliant and we do not have these fields |
| * available. So the information we have there is insufficient to |
| * determine whether two file descriptors point to the same file. |
| * |
| * While we could fall back to using other signals like the file's |
| * mtime, those are not sufficient to avoid races. We thus refrain from |
| * using the stat cache on such systems and fall back to the secondary |
| * caching mechanism, which is to check whether contents of the file |
| * have changed. |
| * |
| * On other systems which are POSIX compliant we must keep the file |
| * descriptor open. This is to avoid a race condition where two |
| * processes access the reftable stack at the same point in time: |
| * |
| * 1. A reads the reftable stack and caches its stat info. |
| * |
| * 2. B updates the stack, appending a new table to "tables.list". |
| * This will both use a new inode and result in a different file |
| * size, thus invalidating A's cache in theory. |
| * |
| * 3. B decides to auto-compact the stack and merges two tables. The |
| * file size now matches what A has cached again. Furthermore, the |
| * filesystem may decide to recycle the inode number of the file |
| * we have replaced in (2) because it is not in use anymore. |
| * |
| * 4. A reloads the reftable stack. Neither the inode number nor the |
| * file size changed. If the timestamps did not change either then |
| * we think the cached copy of our stack is up-to-date. |
| * |
| * By keeping the file descriptor open the inode number cannot be |
| * recycled, mitigating the race. |
| */ |
| if (!err && fd >= 0 && !fstat(fd, &st->list_st) && |
| st->list_st.st_dev && st->list_st.st_ino) { |
| st->list_fd = fd; |
| fd = -1; |
| } |
| |
| if (fd >= 0) |
| close(fd); |
| free_names(names); |
| free_names(names_after); |
| return err; |
| } |
| |
| /* -1 = error |
| 0 = up to date |
| 1 = changed. */ |
| static int stack_uptodate(struct reftable_stack *st) |
| { |
| char **names = NULL; |
| int err; |
| int i = 0; |
| |
| /* |
| * When we have cached stat information available then we use it to |
| * verify whether the file has been rewritten. |
| * |
| * Note that we explicitly do not want to use `stat_validity_check()` |
| * and friends here because they may end up not comparing the `st_dev` |
| * and `st_ino` fields. These functions thus cannot guarantee that we |
| * indeed still have the same file. |
| */ |
| if (st->list_fd >= 0) { |
| struct stat list_st; |
| |
| if (stat(st->list_file, &list_st) < 0) { |
| /* |
| * It's fine for "tables.list" to not exist. In that |
| * case, we have to refresh when the loaded stack has |
| * any readers. |
| */ |
| if (errno == ENOENT) |
| return !!st->readers_len; |
| return REFTABLE_IO_ERROR; |
| } |
| |
| /* |
| * When "tables.list" refers to the same file we can assume |
| * that it didn't change. This is because we always use |
| * rename(3P) to update the file and never write to it |
| * directly. |
| */ |
| if (st->list_st.st_dev == list_st.st_dev && |
| st->list_st.st_ino == list_st.st_ino) |
| return 0; |
| } |
| |
| err = read_lines(st->list_file, &names); |
| if (err < 0) |
| return err; |
| |
| for (i = 0; i < st->readers_len; i++) { |
| if (!names[i]) { |
| err = 1; |
| goto done; |
| } |
| |
| if (strcmp(st->readers[i]->name, names[i])) { |
| err = 1; |
| goto done; |
| } |
| } |
| |
| if (names[st->merged->readers_len]) { |
| err = 1; |
| goto done; |
| } |
| |
| done: |
| free_names(names); |
| return err; |
| } |
| |
| int reftable_stack_reload(struct reftable_stack *st) |
| { |
| int err = stack_uptodate(st); |
| if (err > 0) |
| return reftable_stack_reload_maybe_reuse(st, 1); |
| return err; |
| } |
| |
| int reftable_stack_add(struct reftable_stack *st, |
| int (*write)(struct reftable_writer *wr, void *arg), |
| void *arg) |
| { |
| int err = stack_try_add(st, write, arg); |
| if (err < 0) { |
| if (err == REFTABLE_OUTDATED_ERROR) { |
| /* Ignore error return, we want to propagate |
| REFTABLE_OUTDATED_ERROR. |
| */ |
| reftable_stack_reload(st); |
| } |
| return err; |
| } |
| |
| return 0; |
| } |
| |
| static int format_name(struct reftable_buf *dest, uint64_t min, uint64_t max) |
| { |
| char buf[100]; |
| uint32_t rnd = (uint32_t)git_rand(); |
| snprintf(buf, sizeof(buf), "0x%012" PRIx64 "-0x%012" PRIx64 "-%08x", |
| min, max, rnd); |
| reftable_buf_reset(dest); |
| return reftable_buf_addstr(dest, buf); |
| } |
| |
| struct reftable_addition { |
| struct lock_file tables_list_lock; |
| struct reftable_stack *stack; |
| |
| char **new_tables; |
| size_t new_tables_len, new_tables_cap; |
| uint64_t next_update_index; |
| }; |
| |
| #define REFTABLE_ADDITION_INIT {0} |
| |
| static int reftable_stack_init_addition(struct reftable_addition *add, |
| struct reftable_stack *st, |
| unsigned int flags) |
| { |
| struct reftable_buf lock_file_name = REFTABLE_BUF_INIT; |
| int err; |
| |
| add->stack = st; |
| |
| err = hold_lock_file_for_update_timeout(&add->tables_list_lock, |
| st->list_file, |
| LOCK_NO_DEREF, |
| st->opts.lock_timeout_ms); |
| if (err < 0) { |
| if (errno == EEXIST) { |
| err = REFTABLE_LOCK_ERROR; |
| } else { |
| err = REFTABLE_IO_ERROR; |
| } |
| goto done; |
| } |
| if (st->opts.default_permissions) { |
| if (chmod(get_lock_file_path(&add->tables_list_lock), |
| st->opts.default_permissions) < 0) { |
| err = REFTABLE_IO_ERROR; |
| goto done; |
| } |
| } |
| |
| err = stack_uptodate(st); |
| if (err < 0) |
| goto done; |
| if (err > 0 && flags & REFTABLE_STACK_NEW_ADDITION_RELOAD) { |
| err = reftable_stack_reload_maybe_reuse(add->stack, 1); |
| if (err) |
| goto done; |
| } |
| if (err > 0) { |
| err = REFTABLE_OUTDATED_ERROR; |
| goto done; |
| } |
| |
| add->next_update_index = reftable_stack_next_update_index(st); |
| done: |
| if (err) |
| reftable_addition_close(add); |
| reftable_buf_release(&lock_file_name); |
| return err; |
| } |
| |
| static void reftable_addition_close(struct reftable_addition *add) |
| { |
| struct reftable_buf nm = REFTABLE_BUF_INIT; |
| size_t i; |
| |
| for (i = 0; i < add->new_tables_len; i++) { |
| if (!stack_filename(&nm, add->stack, add->new_tables[i])) |
| unlink(nm.buf); |
| reftable_free(add->new_tables[i]); |
| add->new_tables[i] = NULL; |
| } |
| reftable_free(add->new_tables); |
| add->new_tables = NULL; |
| add->new_tables_len = 0; |
| add->new_tables_cap = 0; |
| |
| rollback_lock_file(&add->tables_list_lock); |
| reftable_buf_release(&nm); |
| } |
| |
| void reftable_addition_destroy(struct reftable_addition *add) |
| { |
| if (!add) { |
| return; |
| } |
| reftable_addition_close(add); |
| reftable_free(add); |
| } |
| |
| int reftable_addition_commit(struct reftable_addition *add) |
| { |
| struct reftable_buf table_list = REFTABLE_BUF_INIT; |
| int lock_file_fd = get_lock_file_fd(&add->tables_list_lock); |
| int err = 0; |
| size_t i; |
| |
| if (add->new_tables_len == 0) |
| goto done; |
| |
| for (i = 0; i < add->stack->merged->readers_len; i++) { |
| if ((err = reftable_buf_addstr(&table_list, add->stack->readers[i]->name)) < 0 || |
| (err = reftable_buf_addstr(&table_list, "\n")) < 0) |
| goto done; |
| } |
| for (i = 0; i < add->new_tables_len; i++) { |
| if ((err = reftable_buf_addstr(&table_list, add->new_tables[i])) < 0 || |
| (err = reftable_buf_addstr(&table_list, "\n")) < 0) |
| goto done; |
| } |
| |
| err = write_in_full(lock_file_fd, table_list.buf, table_list.len); |
| reftable_buf_release(&table_list); |
| if (err < 0) { |
| err = REFTABLE_IO_ERROR; |
| goto done; |
| } |
| |
| err = fsync_component(FSYNC_COMPONENT_REFERENCE, lock_file_fd); |
| if (err < 0) { |
| err = REFTABLE_IO_ERROR; |
| goto done; |
| } |
| |
| err = commit_lock_file(&add->tables_list_lock); |
| if (err < 0) { |
| err = REFTABLE_IO_ERROR; |
| goto done; |
| } |
| |
| /* success, no more state to clean up. */ |
| for (i = 0; i < add->new_tables_len; i++) |
| reftable_free(add->new_tables[i]); |
| reftable_free(add->new_tables); |
| add->new_tables = NULL; |
| add->new_tables_len = 0; |
| add->new_tables_cap = 0; |
| |
| err = reftable_stack_reload_maybe_reuse(add->stack, 1); |
| if (err) |
| goto done; |
| |
| if (!add->stack->opts.disable_auto_compact) { |
| /* |
| * Auto-compact the stack to keep the number of tables in |
| * control. It is possible that a concurrent writer is already |
| * trying to compact parts of the stack, which would lead to a |
| * `REFTABLE_LOCK_ERROR` because parts of the stack are locked |
| * already. This is a benign error though, so we ignore it. |
| */ |
| err = reftable_stack_auto_compact(add->stack); |
| if (err < 0 && err != REFTABLE_LOCK_ERROR) |
| goto done; |
| err = 0; |
| } |
| |
| done: |
| reftable_addition_close(add); |
| return err; |
| } |
| |
| int reftable_stack_new_addition(struct reftable_addition **dest, |
| struct reftable_stack *st, |
| unsigned int flags) |
| { |
| int err = 0; |
| struct reftable_addition empty = REFTABLE_ADDITION_INIT; |
| |
| REFTABLE_CALLOC_ARRAY(*dest, 1); |
| if (!*dest) |
| return REFTABLE_OUT_OF_MEMORY_ERROR; |
| |
| **dest = empty; |
| err = reftable_stack_init_addition(*dest, st, flags); |
| if (err) { |
| reftable_free(*dest); |
| *dest = NULL; |
| } |
| return err; |
| } |
| |
| static int stack_try_add(struct reftable_stack *st, |
| int (*write_table)(struct reftable_writer *wr, |
| void *arg), |
| void *arg) |
| { |
| struct reftable_addition add = REFTABLE_ADDITION_INIT; |
| int err = reftable_stack_init_addition(&add, st, 0); |
| if (err < 0) |
| goto done; |
| |
| err = reftable_addition_add(&add, write_table, arg); |
| if (err < 0) |
| goto done; |
| |
| err = reftable_addition_commit(&add); |
| done: |
| reftable_addition_close(&add); |
| return err; |
| } |
| |
| int reftable_addition_add(struct reftable_addition *add, |
| int (*write_table)(struct reftable_writer *wr, |
| void *arg), |
| void *arg) |
| { |
| struct reftable_buf temp_tab_file_name = REFTABLE_BUF_INIT; |
| struct reftable_buf tab_file_name = REFTABLE_BUF_INIT; |
| struct reftable_buf next_name = REFTABLE_BUF_INIT; |
| struct reftable_writer *wr = NULL; |
| struct tempfile *tab_file = NULL; |
| int err = 0; |
| int tab_fd; |
| |
| reftable_buf_reset(&next_name); |
| |
| err = format_name(&next_name, add->next_update_index, add->next_update_index); |
| if (err < 0) |
| goto done; |
| |
| err = stack_filename(&temp_tab_file_name, add->stack, next_name.buf); |
| if (err < 0) |
| goto done; |
| |
| err = reftable_buf_addstr(&temp_tab_file_name, ".temp.XXXXXX"); |
| if (err < 0) |
| goto done; |
| |
| tab_file = mks_tempfile(temp_tab_file_name.buf); |
| if (!tab_file) { |
| err = REFTABLE_IO_ERROR; |
| goto done; |
| } |
| if (add->stack->opts.default_permissions) { |
| if (chmod(get_tempfile_path(tab_file), |
| add->stack->opts.default_permissions)) { |
| err = REFTABLE_IO_ERROR; |
| goto done; |
| } |
| } |
| tab_fd = get_tempfile_fd(tab_file); |
| |
| err = reftable_writer_new(&wr, reftable_fd_write, reftable_fd_flush, |
| &tab_fd, &add->stack->opts); |
| if (err < 0) |
| goto done; |
| |
| err = write_table(wr, arg); |
| if (err < 0) |
| goto done; |
| |
| err = reftable_writer_close(wr); |
| if (err == REFTABLE_EMPTY_TABLE_ERROR) { |
| err = 0; |
| goto done; |
| } |
| if (err < 0) |
| goto done; |
| |
| err = close_tempfile_gently(tab_file); |
| if (err < 0) { |
| err = REFTABLE_IO_ERROR; |
| goto done; |
| } |
| |
| if (wr->min_update_index < add->next_update_index) { |
| err = REFTABLE_API_ERROR; |
| goto done; |
| } |
| |
| err = format_name(&next_name, wr->min_update_index, wr->max_update_index); |
| if (err < 0) |
| goto done; |
| |
| err = reftable_buf_addstr(&next_name, ".ref"); |
| if (err < 0) |
| goto done; |
| |
| err = stack_filename(&tab_file_name, add->stack, next_name.buf); |
| if (err < 0) |
| goto done; |
| |
| /* |
| On windows, this relies on rand() picking a unique destination name. |
| Maybe we should do retry loop as well? |
| */ |
| err = rename_tempfile(&tab_file, tab_file_name.buf); |
| if (err < 0) { |
| err = REFTABLE_IO_ERROR; |
| goto done; |
| } |
| |
| REFTABLE_ALLOC_GROW(add->new_tables, add->new_tables_len + 1, |
| add->new_tables_cap); |
| if (!add->new_tables) { |
| err = REFTABLE_OUT_OF_MEMORY_ERROR; |
| goto done; |
| } |
| add->new_tables[add->new_tables_len++] = reftable_buf_detach(&next_name); |
| |
| done: |
| delete_tempfile(&tab_file); |
| reftable_buf_release(&temp_tab_file_name); |
| reftable_buf_release(&tab_file_name); |
| reftable_buf_release(&next_name); |
| reftable_writer_free(wr); |
| return err; |
| } |
| |
| uint64_t reftable_stack_next_update_index(struct reftable_stack *st) |
| { |
| int sz = st->merged->readers_len; |
| if (sz > 0) |
| return reftable_reader_max_update_index(st->readers[sz - 1]) + |
| 1; |
| return 1; |
| } |
| |
| static int stack_compact_locked(struct reftable_stack *st, |
| size_t first, size_t last, |
| struct reftable_log_expiry_config *config, |
| struct tempfile **tab_file_out) |
| { |
| struct reftable_buf next_name = REFTABLE_BUF_INIT; |
| struct reftable_buf tab_file_path = REFTABLE_BUF_INIT; |
| struct reftable_writer *wr = NULL; |
| struct tempfile *tab_file; |
| int tab_fd, err = 0; |
| |
| err = format_name(&next_name, reftable_reader_min_update_index(st->readers[first]), |
| reftable_reader_max_update_index(st->readers[last])); |
| if (err < 0) |
| goto done; |
| |
| err = stack_filename(&tab_file_path, st, next_name.buf); |
| if (err < 0) |
| goto done; |
| |
| err = reftable_buf_addstr(&tab_file_path, ".temp.XXXXXX"); |
| if (err < 0) |
| goto done; |
| |
| tab_file = mks_tempfile(tab_file_path.buf); |
| if (!tab_file) { |
| err = REFTABLE_IO_ERROR; |
| goto done; |
| } |
| tab_fd = get_tempfile_fd(tab_file); |
| |
| if (st->opts.default_permissions && |
| chmod(get_tempfile_path(tab_file), st->opts.default_permissions) < 0) { |
| err = REFTABLE_IO_ERROR; |
| goto done; |
| } |
| |
| err = reftable_writer_new(&wr, reftable_fd_write, reftable_fd_flush, |
| &tab_fd, &st->opts); |
| if (err < 0) |
| goto done; |
| |
| err = stack_write_compact(st, wr, first, last, config); |
| if (err < 0) |
| goto done; |
| |
| err = reftable_writer_close(wr); |
| if (err < 0) |
| goto done; |
| |
| err = close_tempfile_gently(tab_file); |
| if (err < 0) |
| goto done; |
| |
| *tab_file_out = tab_file; |
| tab_file = NULL; |
| |
| done: |
| delete_tempfile(&tab_file); |
| reftable_writer_free(wr); |
| reftable_buf_release(&next_name); |
| reftable_buf_release(&tab_file_path); |
| return err; |
| } |
| |
| static int stack_write_compact(struct reftable_stack *st, |
| struct reftable_writer *wr, |
| size_t first, size_t last, |
| struct reftable_log_expiry_config *config) |
| { |
| struct reftable_merged_table *mt = NULL; |
| struct reftable_iterator it = { NULL }; |
| struct reftable_ref_record ref = { NULL }; |
| struct reftable_log_record log = { NULL }; |
| size_t subtabs_len = last - first + 1; |
| uint64_t entries = 0; |
| int err = 0; |
| |
| for (size_t i = first; i <= last; i++) |
| st->stats.bytes += st->readers[i]->size; |
| reftable_writer_set_limits(wr, st->readers[first]->min_update_index, |
| st->readers[last]->max_update_index); |
| |
| err = reftable_merged_table_new(&mt, st->readers + first, subtabs_len, |
| st->opts.hash_id); |
| if (err < 0) |
| goto done; |
| |
| err = merged_table_init_iter(mt, &it, BLOCK_TYPE_REF); |
| if (err < 0) |
| goto done; |
| |
| err = reftable_iterator_seek_ref(&it, ""); |
| if (err < 0) |
| goto done; |
| |
| while (1) { |
| err = reftable_iterator_next_ref(&it, &ref); |
| if (err > 0) { |
| err = 0; |
| break; |
| } |
| if (err < 0) |
| goto done; |
| |
| if (first == 0 && reftable_ref_record_is_deletion(&ref)) { |
| continue; |
| } |
| |
| err = reftable_writer_add_ref(wr, &ref); |
| if (err < 0) |
| goto done; |
| entries++; |
| } |
| reftable_iterator_destroy(&it); |
| |
| err = merged_table_init_iter(mt, &it, BLOCK_TYPE_LOG); |
| if (err < 0) |
| goto done; |
| |
| err = reftable_iterator_seek_log(&it, ""); |
| if (err < 0) |
| goto done; |
| |
| while (1) { |
| err = reftable_iterator_next_log(&it, &log); |
| if (err > 0) { |
| err = 0; |
| break; |
| } |
| if (err < 0) |
| goto done; |
| if (first == 0 && reftable_log_record_is_deletion(&log)) { |
| continue; |
| } |
| |
| if (config && config->min_update_index > 0 && |
| log.update_index < config->min_update_index) { |
| continue; |
| } |
| |
| if (config && config->time > 0 && |
| log.value.update.time < config->time) { |
| continue; |
| } |
| |
| err = reftable_writer_add_log(wr, &log); |
| if (err < 0) |
| goto done; |
| entries++; |
| } |
| |
| done: |
| reftable_iterator_destroy(&it); |
| if (mt) |
| reftable_merged_table_free(mt); |
| reftable_ref_record_release(&ref); |
| reftable_log_record_release(&log); |
| st->stats.entries_written += entries; |
| return err; |
| } |
| |
| enum stack_compact_range_flags { |
| /* |
| * Perform a best-effort compaction. That is, even if we cannot lock |
| * all tables in the specified range, we will try to compact the |
| * remaining slice. |
| */ |
| STACK_COMPACT_RANGE_BEST_EFFORT = (1 << 0), |
| }; |
| |
| /* |
| * Compact all tables in the range `[first, last)` into a single new table. |
| * |
| * This function returns `0` on success or a code `< 0` on failure. When the |
| * stack or any of the tables in the specified range are already locked then |
| * this function returns `REFTABLE_LOCK_ERROR`. This is a benign error that |
| * callers can either ignore, or they may choose to retry compaction after some |
| * amount of time. |
| */ |
| static int stack_compact_range(struct reftable_stack *st, |
| size_t first, size_t last, |
| struct reftable_log_expiry_config *expiry, |
| unsigned int flags) |
| { |
| struct reftable_buf tables_list_buf = REFTABLE_BUF_INIT; |
| struct reftable_buf new_table_name = REFTABLE_BUF_INIT; |
| struct reftable_buf new_table_path = REFTABLE_BUF_INIT; |
| struct reftable_buf table_name = REFTABLE_BUF_INIT; |
| struct lock_file tables_list_lock = LOCK_INIT; |
| struct lock_file *table_locks = NULL; |
| struct tempfile *new_table = NULL; |
| int is_empty_table = 0, err = 0; |
| size_t first_to_replace, last_to_replace; |
| size_t i, nlocks = 0; |
| char **names = NULL; |
| |
| if (first > last || (!expiry && first == last)) { |
| err = 0; |
| goto done; |
| } |
| |
| st->stats.attempts++; |
| |
| /* |
| * Hold the lock so that we can read "tables.list" and lock all tables |
| * which are part of the user-specified range. |
| */ |
| err = hold_lock_file_for_update_timeout(&tables_list_lock, |
| st->list_file, |
| LOCK_NO_DEREF, |
| st->opts.lock_timeout_ms); |
| if (err < 0) { |
| if (errno == EEXIST) |
| err = REFTABLE_LOCK_ERROR; |
| else |
| err = REFTABLE_IO_ERROR; |
| goto done; |
| } |
| |
| err = stack_uptodate(st); |
| if (err) |
| goto done; |
| |
| /* |
| * Lock all tables in the user-provided range. This is the slice of our |
| * stack which we'll compact. |
| * |
| * Note that we lock tables in reverse order from last to first. The |
| * intent behind this is to allow a newer process to perform best |
| * effort compaction of tables that it has added in the case where an |
| * older process is still busy compacting tables which are preexisting |
| * from the point of view of the newer process. |
| */ |
| REFTABLE_CALLOC_ARRAY(table_locks, last - first + 1); |
| if (!table_locks) { |
| err = REFTABLE_OUT_OF_MEMORY_ERROR; |
| goto done; |
| } |
| |
| for (i = last + 1; i > first; i--) { |
| err = stack_filename(&table_name, st, reader_name(st->readers[i - 1])); |
| if (err < 0) |
| goto done; |
| |
| err = hold_lock_file_for_update(&table_locks[nlocks], |
| table_name.buf, LOCK_NO_DEREF); |
| if (err < 0) { |
| /* |
| * When the table is locked already we may do a |
| * best-effort compaction and compact only the tables |
| * that we have managed to lock so far. This of course |
| * requires that we have been able to lock at least two |
| * tables, otherwise there would be nothing to compact. |
| * In that case, we return a lock error to our caller. |
| */ |
| if (errno == EEXIST && last - (i - 1) >= 2 && |
| flags & STACK_COMPACT_RANGE_BEST_EFFORT) { |
| err = 0; |
| /* |
| * The subtraction is to offset the index, the |
| * addition is to only compact up to the table |
| * of the preceding iteration. They obviously |
| * cancel each other out, but that may be |
| * non-obvious when it was omitted. |
| */ |
| first = (i - 1) + 1; |
| break; |
| } else if (errno == EEXIST) { |
| err = REFTABLE_LOCK_ERROR; |
| goto done; |
| } else { |
| err = REFTABLE_IO_ERROR; |
| goto done; |
| } |
| } |
| |
| /* |
| * We need to close the lockfiles as we might otherwise easily |
| * run into file descriptor exhaustion when we compress a lot |
| * of tables. |
| */ |
| err = close_lock_file_gently(&table_locks[nlocks++]); |
| if (err < 0) { |
| err = REFTABLE_IO_ERROR; |
| goto done; |
| } |
| } |
| |
| /* |
| * We have locked all tables in our range and can thus release the |
| * "tables.list" lock while compacting the locked tables. This allows |
| * concurrent updates to the stack to proceed. |
| */ |
| err = rollback_lock_file(&tables_list_lock); |
| if (err < 0) { |
| err = REFTABLE_IO_ERROR; |
| goto done; |
| } |
| |
| /* |
| * Compact the now-locked tables into a new table. Note that compacting |
| * these tables may end up with an empty new table in case tombstones |
| * end up cancelling out all refs in that range. |
| */ |
| err = stack_compact_locked(st, first, last, expiry, &new_table); |
| if (err < 0) { |
| if (err != REFTABLE_EMPTY_TABLE_ERROR) |
| goto done; |
| is_empty_table = 1; |
| } |
| |
| /* |
| * Now that we have written the new, compacted table we need to re-lock |
| * "tables.list". We'll then replace the compacted range of tables with |
| * the new table. |
| */ |
| err = hold_lock_file_for_update_timeout(&tables_list_lock, |
| st->list_file, |
| LOCK_NO_DEREF, |
| st->opts.lock_timeout_ms); |
| if (err < 0) { |
| if (errno == EEXIST) |
| err = REFTABLE_LOCK_ERROR; |
| else |
| err = REFTABLE_IO_ERROR; |
| goto done; |
| } |
| |
| if (st->opts.default_permissions) { |
| if (chmod(get_lock_file_path(&tables_list_lock), |
| st->opts.default_permissions) < 0) { |
| err = REFTABLE_IO_ERROR; |
| goto done; |
| } |
| } |
| |
| /* |
| * As we have unlocked the stack while compacting our slice of tables |
| * it may have happened that a concurrently running process has updated |
| * the stack while we were compacting. In that case, we need to check |
| * whether the tables that we have just compacted still exist in the |
| * stack in the exact same order as we have compacted them. |
| * |
| * If they do exist, then it is fine to continue and replace those |
| * tables with our compacted version. If they don't, then we need to |
| * abort. |
| */ |
| err = stack_uptodate(st); |
| if (err < 0) |
| goto done; |
| if (err > 0) { |
| ssize_t new_offset = -1; |
| int fd; |
| |
| fd = open(st->list_file, O_RDONLY); |
| if (fd < 0) { |
| err = REFTABLE_IO_ERROR; |
| goto done; |
| } |
| |
| err = fd_read_lines(fd, &names); |
| close(fd); |
| if (err < 0) |
| goto done; |
| |
| /* |
| * Search for the offset of the first table that we have |
| * compacted in the updated "tables.list" file. |
| */ |
| for (size_t i = 0; names[i]; i++) { |
| if (strcmp(names[i], st->readers[first]->name)) |
| continue; |
| |
| /* |
| * We have found the first entry. Verify that all the |
| * subsequent tables we have compacted still exist in |
| * the modified stack in the exact same order as we |
| * have compacted them. |
| */ |
| for (size_t j = 1; j < last - first + 1; j++) { |
| const char *old = first + j < st->merged->readers_len ? |
| st->readers[first + j]->name : NULL; |
| const char *new = names[i + j]; |
| |
| /* |
| * If some entries are missing or in case the tables |
| * have changed then we need to bail out. Again, this |
| * shouldn't ever happen because we have locked the |
| * tables we are compacting. |
| */ |
| if (!old || !new || strcmp(old, new)) { |
| err = REFTABLE_OUTDATED_ERROR; |
| goto done; |
| } |
| } |
| |
| new_offset = i; |
| break; |
| } |
| |
| /* |
| * In case we didn't find our compacted tables in the stack we |
| * need to bail out. In theory, this should have never happened |
| * because we locked the tables we are compacting. |
| */ |
| if (new_offset < 0) { |
| err = REFTABLE_OUTDATED_ERROR; |
| goto done; |
| } |
| |
| /* |
| * We have found the new range that we want to replace, so |
| * let's update the range of tables that we want to replace. |
| */ |
| first_to_replace = new_offset; |
| last_to_replace = last + (new_offset - first); |
| } else { |
| /* |
| * `fd_read_lines()` uses a `NULL` sentinel to indicate that |
| * the array is at its end. As we use `free_names()` to free |
| * the array, we need to include this sentinel value here and |
| * thus have to allocate `readers_len + 1` many entries. |
| */ |
| REFTABLE_CALLOC_ARRAY(names, st->merged->readers_len + 1); |
| if (!names) { |
| err = REFTABLE_OUT_OF_MEMORY_ERROR; |
| goto done; |
| } |
| |
| for (size_t i = 0; i < st->merged->readers_len; i++) { |
| names[i] = reftable_strdup(st->readers[i]->name); |
| if (!names[i]) { |
| err = REFTABLE_OUT_OF_MEMORY_ERROR; |
| goto done; |
| } |
| } |
| first_to_replace = first; |
| last_to_replace = last; |
| } |
| |
| /* |
| * If the resulting compacted table is not empty, then we need to move |
| * it into place now. |
| */ |
| if (!is_empty_table) { |
| err = format_name(&new_table_name, st->readers[first]->min_update_index, |
| st->readers[last]->max_update_index); |
| if (err < 0) |
| goto done; |
| |
| err = reftable_buf_addstr(&new_table_name, ".ref"); |
| if (err < 0) |
| goto done; |
| |
| err = stack_filename(&new_table_path, st, new_table_name.buf); |
| if (err < 0) |
| goto done; |
| |
| err = rename_tempfile(&new_table, new_table_path.buf); |
| if (err < 0) { |
| err = REFTABLE_IO_ERROR; |
| goto done; |
| } |
| } |
| |
| /* |
| * Write the new "tables.list" contents with the compacted table we |
| * have just written. In case the compacted table became empty we |
| * simply skip writing it. |
| */ |
| for (i = 0; i < first_to_replace; i++) { |
| if ((err = reftable_buf_addstr(&tables_list_buf, names[i])) < 0 || |
| (err = reftable_buf_addstr(&tables_list_buf, "\n")) < 0) |
| goto done; |
| } |
| if (!is_empty_table) { |
| if ((err = reftable_buf_addstr(&tables_list_buf, new_table_name.buf)) < 0 || |
| (err = reftable_buf_addstr(&tables_list_buf, "\n")) < 0) |
| goto done; |
| } |
| for (i = last_to_replace + 1; names[i]; i++) { |
| if ((err = reftable_buf_addstr(&tables_list_buf, names[i])) < 0 || |
| (err = reftable_buf_addstr(&tables_list_buf, "\n")) < 0) |
| goto done; |
| } |
| |
| err = write_in_full(get_lock_file_fd(&tables_list_lock), |
| tables_list_buf.buf, tables_list_buf.len); |
| if (err < 0) { |
| err = REFTABLE_IO_ERROR; |
| unlink(new_table_path.buf); |
| goto done; |
| } |
| |
| err = fsync_component(FSYNC_COMPONENT_REFERENCE, get_lock_file_fd(&tables_list_lock)); |
| if (err < 0) { |
| err = REFTABLE_IO_ERROR; |
| unlink(new_table_path.buf); |
| goto done; |
| } |
| |
| err = commit_lock_file(&tables_list_lock); |
| if (err < 0) { |
| err = REFTABLE_IO_ERROR; |
| unlink(new_table_path.buf); |
| goto done; |
| } |
| |
| /* |
| * Reload the stack before deleting the compacted tables. We can only |
| * delete the files after we closed them on Windows, so this needs to |
| * happen first. |
| */ |
| err = reftable_stack_reload_maybe_reuse(st, first < last); |
| if (err < 0) |
| goto done; |
| |
| /* |
| * Delete the old tables. They may still be in use by concurrent |
| * readers, so it is expected that unlinking tables may fail. |
| */ |
| for (i = 0; i < nlocks; i++) { |
| struct lock_file *table_lock = &table_locks[i]; |
| char *table_path = get_locked_file_path(table_lock); |
| unlink(table_path); |
| reftable_free(table_path); |
| } |
| |
| done: |
| rollback_lock_file(&tables_list_lock); |
| for (i = 0; table_locks && i < nlocks; i++) |
| rollback_lock_file(&table_locks[i]); |
| reftable_free(table_locks); |
| |
| delete_tempfile(&new_table); |
| reftable_buf_release(&new_table_name); |
| reftable_buf_release(&new_table_path); |
| reftable_buf_release(&tables_list_buf); |
| reftable_buf_release(&table_name); |
| free_names(names); |
| |
| if (err == REFTABLE_LOCK_ERROR) |
| st->stats.failures++; |
| |
| return err; |
| } |
| |
| int reftable_stack_compact_all(struct reftable_stack *st, |
| struct reftable_log_expiry_config *config) |
| { |
| size_t last = st->merged->readers_len ? st->merged->readers_len - 1 : 0; |
| return stack_compact_range(st, 0, last, config, 0); |
| } |
| |
| static int segment_size(struct segment *s) |
| { |
| return s->end - s->start; |
| } |
| |
| struct segment suggest_compaction_segment(uint64_t *sizes, size_t n, |
| uint8_t factor) |
| { |
| struct segment seg = { 0 }; |
| uint64_t bytes; |
| size_t i; |
| |
| if (!factor) |
| factor = DEFAULT_GEOMETRIC_FACTOR; |
| |
| /* |
| * If there are no tables or only a single one then we don't have to |
| * compact anything. The sequence is geometric by definition already. |
| */ |
| if (n <= 1) |
| return seg; |
| |
| /* |
| * Find the ending table of the compaction segment needed to restore the |
| * geometric sequence. Note that the segment end is exclusive. |
| * |
| * To do so, we iterate backwards starting from the most recent table |
| * until a valid segment end is found. If the preceding table is smaller |
| * than the current table multiplied by the geometric factor (2), the |
| * compaction segment end has been identified. |
| * |
| * Tables after the ending point are not added to the byte count because |
| * they are already valid members of the geometric sequence. Due to the |
| * properties of a geometric sequence, it is not possible for the sum of |
| * these tables to exceed the value of the ending point table. |
| * |
| * Example table size sequence requiring no compaction: |
| * 64, 32, 16, 8, 4, 2, 1 |
| * |
| * Example table size sequence where compaction segment end is set to |
| * the last table. Since the segment end is exclusive, the last table is |
| * excluded during subsequent compaction and the table with size 3 is |
| * the final table included: |
| * 64, 32, 16, 8, 4, 3, 1 |
| */ |
| for (i = n - 1; i > 0; i--) { |
| if (sizes[i - 1] < sizes[i] * factor) { |
| seg.end = i + 1; |
| bytes = sizes[i]; |
| break; |
| } |
| } |
| |
| /* |
| * Find the starting table of the compaction segment by iterating |
| * through the remaining tables and keeping track of the accumulated |
| * size of all tables seen from the segment end table. The previous |
| * table is compared to the accumulated size because the tables from the |
| * segment end are merged backwards recursively. |
| * |
| * Note that we keep iterating even after we have found the first |
| * starting point. This is because there may be tables in the stack |
| * preceding that first starting point which violate the geometric |
| * sequence. |
| * |
| * Example compaction segment start set to table with size 32: |
| * 128, 32, 16, 8, 4, 3, 1 |
| */ |
| for (; i > 0; i--) { |
| uint64_t curr = bytes; |
| bytes += sizes[i - 1]; |
| |
| if (sizes[i - 1] < curr * factor) { |
| seg.start = i - 1; |
| seg.bytes = bytes; |
| } |
| } |
| |
| return seg; |
| } |
| |
| static uint64_t *stack_table_sizes_for_compaction(struct reftable_stack *st) |
| { |
| int version = (st->opts.hash_id == GIT_SHA1_FORMAT_ID) ? 1 : 2; |
| int overhead = header_size(version) - 1; |
| uint64_t *sizes; |
| |
| REFTABLE_CALLOC_ARRAY(sizes, st->merged->readers_len); |
| if (!sizes) |
| return NULL; |
| |
| for (size_t i = 0; i < st->merged->readers_len; i++) |
| sizes[i] = st->readers[i]->size - overhead; |
| |
| return sizes; |
| } |
| |
| int reftable_stack_auto_compact(struct reftable_stack *st) |
| { |
| struct segment seg; |
| uint64_t *sizes; |
| |
| sizes = stack_table_sizes_for_compaction(st); |
| if (!sizes) |
| return REFTABLE_OUT_OF_MEMORY_ERROR; |
| |
| seg = suggest_compaction_segment(sizes, st->merged->readers_len, |
| st->opts.auto_compaction_factor); |
| reftable_free(sizes); |
| |
| if (segment_size(&seg) > 0) |
| return stack_compact_range(st, seg.start, seg.end - 1, |
| NULL, STACK_COMPACT_RANGE_BEST_EFFORT); |
| |
| return 0; |
| } |
| |
| struct reftable_compaction_stats * |
| reftable_stack_compaction_stats(struct reftable_stack *st) |
| { |
| return &st->stats; |
| } |
| |
| int reftable_stack_read_ref(struct reftable_stack *st, const char *refname, |
| struct reftable_ref_record *ref) |
| { |
| struct reftable_iterator it = { 0 }; |
| int ret; |
| |
| ret = reftable_merged_table_init_ref_iterator(st->merged, &it); |
| if (ret) |
| goto out; |
| |
| ret = reftable_iterator_seek_ref(&it, refname); |
| if (ret) |
| goto out; |
| |
| ret = reftable_iterator_next_ref(&it, ref); |
| if (ret) |
| goto out; |
| |
| if (strcmp(ref->refname, refname) || |
| reftable_ref_record_is_deletion(ref)) { |
| reftable_ref_record_release(ref); |
| ret = 1; |
| goto out; |
| } |
| |
| out: |
| reftable_iterator_destroy(&it); |
| return ret; |
| } |
| |
| int reftable_stack_read_log(struct reftable_stack *st, const char *refname, |
| struct reftable_log_record *log) |
| { |
| struct reftable_iterator it = {0}; |
| int err; |
| |
| err = reftable_stack_init_log_iterator(st, &it); |
| if (err) |
| goto done; |
| |
| err = reftable_iterator_seek_log(&it, refname); |
| if (err) |
| goto done; |
| |
| err = reftable_iterator_next_log(&it, log); |
| if (err) |
| goto done; |
| |
| if (strcmp(log->refname, refname) || |
| reftable_log_record_is_deletion(log)) { |
| err = 1; |
| goto done; |
| } |
| |
| done: |
| if (err) { |
| reftable_log_record_release(log); |
| } |
| reftable_iterator_destroy(&it); |
| return err; |
| } |
| |
| static int is_table_name(const char *s) |
| { |
| const char *dot = strrchr(s, '.'); |
| return dot && !strcmp(dot, ".ref"); |
| } |
| |
| static void remove_maybe_stale_table(struct reftable_stack *st, uint64_t max, |
| const char *name) |
| { |
| int err = 0; |
| uint64_t update_idx = 0; |
| struct reftable_block_source src = { NULL }; |
| struct reftable_reader *rd = NULL; |
| struct reftable_buf table_path = REFTABLE_BUF_INIT; |
| |
| err = stack_filename(&table_path, st, name); |
| if (err < 0) |
| goto done; |
| |
| err = reftable_block_source_from_file(&src, table_path.buf); |
| if (err < 0) |
| goto done; |
| |
| err = reftable_reader_new(&rd, &src, name); |
| if (err < 0) |
| goto done; |
| |
| update_idx = reftable_reader_max_update_index(rd); |
| reftable_reader_decref(rd); |
| |
| if (update_idx <= max) { |
| unlink(table_path.buf); |
| } |
| done: |
| reftable_buf_release(&table_path); |
| } |
| |
| static int reftable_stack_clean_locked(struct reftable_stack *st) |
| { |
| uint64_t max = reftable_merged_table_max_update_index( |
| reftable_stack_merged_table(st)); |
| DIR *dir = opendir(st->reftable_dir); |
| struct dirent *d = NULL; |
| if (!dir) { |
| return REFTABLE_IO_ERROR; |
| } |
| |
| while ((d = readdir(dir))) { |
| int i = 0; |
| int found = 0; |
| if (!is_table_name(d->d_name)) |
| continue; |
| |
| for (i = 0; !found && i < st->readers_len; i++) { |
| found = !strcmp(reader_name(st->readers[i]), d->d_name); |
| } |
| if (found) |
| continue; |
| |
| remove_maybe_stale_table(st, max, d->d_name); |
| } |
| |
| closedir(dir); |
| return 0; |
| } |
| |
| int reftable_stack_clean(struct reftable_stack *st) |
| { |
| struct reftable_addition *add = NULL; |
| int err = reftable_stack_new_addition(&add, st, 0); |
| if (err < 0) { |
| goto done; |
| } |
| |
| err = reftable_stack_reload(st); |
| if (err < 0) { |
| goto done; |
| } |
| |
| err = reftable_stack_clean_locked(st); |
| |
| done: |
| reftable_addition_destroy(add); |
| return err; |
| } |