blob: adde6dfe21254f7022051fd6d3c19b722b34f54e [file] [log] [blame]
Garima Singhf52207a2020-03-30 00:31:24 +00001#ifndef BLOOM_H
2#define BLOOM_H
3
Garima Singhed591fe2020-03-30 00:31:26 +00004struct commit;
5struct repository;
6
Garima Singhf1294ea2020-03-30 00:31:25 +00007struct bloom_filter_settings {
8 /*
9 * The version of the hashing technique being used.
10 * We currently only support version = 1 which is
11 * the seeded murmur3 hashing technique implemented
12 * in bloom.c.
13 */
14 uint32_t hash_version;
15
16 /*
17 * The number of times a path is hashed, i.e. the
18 * number of bit positions tht cumulatively
19 * determine whether a path is present in the
20 * Bloom filter.
21 */
22 uint32_t num_hashes;
23
24 /*
25 * The minimum number of bits per entry in the Bloom
26 * filter. If the filter contains 'n' entries, then
27 * filter size is the minimum number of 8-bit words
28 * that contain n*b bits.
29 */
30 uint32_t bits_per_entry;
Taylor Blau97ffa4f2020-09-17 09:34:42 -040031
32 /*
33 * The maximum number of changed paths per commit
34 * before declaring a Bloom filter to be too-large.
35 *
36 * Not written to the commit-graph file.
37 */
38 uint32_t max_changed_paths;
Garima Singhf1294ea2020-03-30 00:31:25 +000039};
40
Taylor Blau97ffa4f2020-09-17 09:34:42 -040041#define DEFAULT_BLOOM_MAX_CHANGES 512
42#define DEFAULT_BLOOM_FILTER_SETTINGS { 1, 7, 10, DEFAULT_BLOOM_MAX_CHANGES }
Garima Singhf1294ea2020-03-30 00:31:25 +000043#define BITS_PER_WORD 8
Garima Singh1217c032020-04-06 16:59:50 +000044#define BLOOMDATA_CHUNK_HEADER_SIZE 3 * sizeof(uint32_t)
Garima Singhf1294ea2020-03-30 00:31:25 +000045
46/*
47 * A bloom_filter struct represents a data segment to
48 * use when testing hash values. The 'len' member
49 * dictates how many entries are stored in
50 * 'data'.
51 */
52struct bloom_filter {
53 unsigned char *data;
54 size_t len;
55};
56
57/*
58 * A bloom_key represents the k hash values for a
59 * given string. These can be precomputed and
60 * stored in a bloom_key for re-use when testing
61 * against a bloom_filter. The number of hashes is
62 * given by the Bloom filter settings and is the same
63 * for all Bloom filters and keys interacting with
64 * the loaded version of the commit graph file and
65 * the Bloom data chunks.
66 */
67struct bloom_key {
68 uint32_t *hashes;
69};
70
Garima Singhf52207a2020-03-30 00:31:24 +000071/*
72 * Calculate the murmur3 32-bit hash value for the given data
73 * using the given seed.
74 * Produces a uniformly distributed hash value.
75 * Not considered to be cryptographically secure.
76 * Implemented as described in https://en.wikipedia.org/wiki/MurmurHash#Algorithm
77 */
78uint32_t murmur3_seeded(uint32_t seed, const char *data, size_t len);
79
Garima Singhf1294ea2020-03-30 00:31:25 +000080void fill_bloom_key(const char *data,
81 size_t len,
82 struct bloom_key *key,
83 const struct bloom_filter_settings *settings);
Derrick Stoleef32dde82020-05-11 11:56:19 +000084void clear_bloom_key(struct bloom_key *key);
Garima Singhf1294ea2020-03-30 00:31:25 +000085
86void add_key_to_filter(const struct bloom_key *key,
Derrick Stoleeeb591e42020-05-01 15:30:18 +000087 struct bloom_filter *filter,
88 const struct bloom_filter_settings *settings);
Garima Singhf1294ea2020-03-30 00:31:25 +000089
Garima Singhed591fe2020-03-30 00:31:26 +000090void init_bloom_filters(void);
91
Taylor Blau312cff52020-09-16 14:07:32 -040092enum bloom_filter_computed {
93 BLOOM_NOT_COMPUTED = (1 << 0),
94 BLOOM_COMPUTED = (1 << 1),
95 BLOOM_TRUNC_LARGE = (1 << 2),
Taylor Blau59f0d502020-09-17 22:59:44 -040096 BLOOM_TRUNC_EMPTY = (1 << 3),
Taylor Blau312cff52020-09-16 14:07:32 -040097};
98
99struct bloom_filter *get_or_compute_bloom_filter(struct repository *r,
100 struct commit *c,
101 int compute_if_not_present,
Taylor Blau9a7a9ed2020-09-16 14:07:46 -0400102 const struct bloom_filter_settings *settings,
Taylor Blau312cff52020-09-16 14:07:32 -0400103 enum bloom_filter_computed *computed);
104
105#define get_bloom_filter(r, c) get_or_compute_bloom_filter( \
Taylor Blau9a7a9ed2020-09-16 14:07:46 -0400106 (r), (c), 0, NULL, NULL)
Garima Singhed591fe2020-03-30 00:31:26 +0000107
Garima Singha56b9462020-04-06 16:59:52 +0000108int bloom_filter_contains(const struct bloom_filter *filter,
109 const struct bloom_key *key,
110 const struct bloom_filter_settings *settings);
111
Đoàn Trần Công Danh066b70a2020-05-08 00:51:02 +0100112#endif