Linus Torvalds | 96872bc | 2008-03-21 13:16:24 -0700 | [diff] [blame] | 1 | /* |
| 2 | * name-hash.c |
| 3 | * |
| 4 | * Hashing names in the index state |
| 5 | * |
| 6 | * Copyright (C) 2008 Linus Torvalds |
| 7 | */ |
| 8 | #define NO_THE_INDEX_COMPATIBILITY_MACROS |
| 9 | #include "cache.h" |
| 10 | |
Karsten Blees | 2092678 | 2013-02-28 00:57:48 +0100 | [diff] [blame] | 11 | struct dir_entry { |
Karsten Blees | e05881a | 2013-11-14 20:20:58 +0100 | [diff] [blame] | 12 | struct hashmap_entry ent; |
Karsten Blees | 2092678 | 2013-02-28 00:57:48 +0100 | [diff] [blame] | 13 | struct dir_entry *parent; |
| 14 | struct cache_entry *ce; |
| 15 | int nr; |
| 16 | unsigned int namelen; |
| 17 | }; |
| 18 | |
Karsten Blees | e05881a | 2013-11-14 20:20:58 +0100 | [diff] [blame] | 19 | static int dir_entry_cmp(const struct dir_entry *e1, |
| 20 | const struct dir_entry *e2, const char *name) |
| 21 | { |
| 22 | return e1->namelen != e2->namelen || strncasecmp(e1->ce->name, |
| 23 | name ? name : e2->ce->name, e1->namelen); |
| 24 | } |
| 25 | |
Karsten Blees | 2092678 | 2013-02-28 00:57:48 +0100 | [diff] [blame] | 26 | static struct dir_entry *find_dir_entry(struct index_state *istate, |
| 27 | const char *name, unsigned int namelen) |
| 28 | { |
Karsten Blees | e05881a | 2013-11-14 20:20:58 +0100 | [diff] [blame] | 29 | struct dir_entry key; |
| 30 | hashmap_entry_init(&key, memihash(name, namelen)); |
| 31 | key.namelen = namelen; |
| 32 | return hashmap_get(&istate->dir_hash, &key, name); |
Karsten Blees | 2092678 | 2013-02-28 00:57:48 +0100 | [diff] [blame] | 33 | } |
| 34 | |
| 35 | static struct dir_entry *hash_dir_entry(struct index_state *istate, |
| 36 | struct cache_entry *ce, int namelen) |
Joshua Jensen | 5102c61 | 2010-10-03 09:56:43 +0000 | [diff] [blame] | 37 | { |
| 38 | /* |
| 39 | * Throw each directory component in the hash for quick lookup |
Eric Sunshine | d28eec2 | 2013-09-17 03:06:16 -0400 | [diff] [blame] | 40 | * during a git status. Directory components are stored without their |
Joshua Jensen | 5102c61 | 2010-10-03 09:56:43 +0000 | [diff] [blame] | 41 | * closing slash. Despite submodules being a directory, they never |
Eric Sunshine | d28eec2 | 2013-09-17 03:06:16 -0400 | [diff] [blame] | 42 | * reach this point, because they are stored |
Karsten Blees | 2092678 | 2013-02-28 00:57:48 +0100 | [diff] [blame] | 43 | * in index_state.name_hash (as ordinary cache_entries). |
Joshua Jensen | 5102c61 | 2010-10-03 09:56:43 +0000 | [diff] [blame] | 44 | * |
Karsten Blees | 2092678 | 2013-02-28 00:57:48 +0100 | [diff] [blame] | 45 | * Note that the cache_entry stored with the dir_entry merely |
| 46 | * supplies the name of the directory (up to dir_entry.namelen). We |
| 47 | * track the number of 'active' files in a directory in dir_entry.nr, |
| 48 | * so we can tell if the directory is still relevant, e.g. for git |
| 49 | * status. However, if cache_entries are removed, we cannot pinpoint |
| 50 | * an exact cache_entry that's still active. It is very possible that |
| 51 | * multiple dir_entries point to the same cache_entry. |
Joshua Jensen | 5102c61 | 2010-10-03 09:56:43 +0000 | [diff] [blame] | 52 | */ |
Karsten Blees | 2092678 | 2013-02-28 00:57:48 +0100 | [diff] [blame] | 53 | struct dir_entry *dir; |
Joshua Jensen | 5102c61 | 2010-10-03 09:56:43 +0000 | [diff] [blame] | 54 | |
Karsten Blees | 2092678 | 2013-02-28 00:57:48 +0100 | [diff] [blame] | 55 | /* get length of parent directory */ |
| 56 | while (namelen > 0 && !is_dir_sep(ce->name[namelen - 1])) |
| 57 | namelen--; |
| 58 | if (namelen <= 0) |
| 59 | return NULL; |
Eric Sunshine | d28eec2 | 2013-09-17 03:06:16 -0400 | [diff] [blame] | 60 | namelen--; |
Karsten Blees | 2092678 | 2013-02-28 00:57:48 +0100 | [diff] [blame] | 61 | |
| 62 | /* lookup existing entry for that directory */ |
| 63 | dir = find_dir_entry(istate, ce->name, namelen); |
| 64 | if (!dir) { |
| 65 | /* not found, create it and add to hash table */ |
Karsten Blees | 2092678 | 2013-02-28 00:57:48 +0100 | [diff] [blame] | 66 | dir = xcalloc(1, sizeof(struct dir_entry)); |
Karsten Blees | e05881a | 2013-11-14 20:20:58 +0100 | [diff] [blame] | 67 | hashmap_entry_init(dir, memihash(ce->name, namelen)); |
Karsten Blees | 2092678 | 2013-02-28 00:57:48 +0100 | [diff] [blame] | 68 | dir->namelen = namelen; |
| 69 | dir->ce = ce; |
Karsten Blees | e05881a | 2013-11-14 20:20:58 +0100 | [diff] [blame] | 70 | hashmap_add(&istate->dir_hash, dir); |
Karsten Blees | 2092678 | 2013-02-28 00:57:48 +0100 | [diff] [blame] | 71 | |
| 72 | /* recursively add missing parent directories */ |
Eric Sunshine | d28eec2 | 2013-09-17 03:06:16 -0400 | [diff] [blame] | 73 | dir->parent = hash_dir_entry(istate, ce, namelen); |
Joshua Jensen | 5102c61 | 2010-10-03 09:56:43 +0000 | [diff] [blame] | 74 | } |
Karsten Blees | 2092678 | 2013-02-28 00:57:48 +0100 | [diff] [blame] | 75 | return dir; |
| 76 | } |
| 77 | |
| 78 | static void add_dir_entry(struct index_state *istate, struct cache_entry *ce) |
| 79 | { |
| 80 | /* Add reference to the directory entry (and parents if 0). */ |
| 81 | struct dir_entry *dir = hash_dir_entry(istate, ce, ce_namelen(ce)); |
| 82 | while (dir && !(dir->nr++)) |
| 83 | dir = dir->parent; |
| 84 | } |
| 85 | |
| 86 | static void remove_dir_entry(struct index_state *istate, struct cache_entry *ce) |
| 87 | { |
| 88 | /* |
Karsten Blees | 1c8cca1 | 2013-11-14 20:21:26 +0100 | [diff] [blame] | 89 | * Release reference to the directory entry. If 0, remove and continue |
| 90 | * with parent directory. |
Karsten Blees | 2092678 | 2013-02-28 00:57:48 +0100 | [diff] [blame] | 91 | */ |
| 92 | struct dir_entry *dir = hash_dir_entry(istate, ce, ce_namelen(ce)); |
Karsten Blees | 1c8cca1 | 2013-11-14 20:21:26 +0100 | [diff] [blame] | 93 | while (dir && !(--dir->nr)) { |
| 94 | struct dir_entry *parent = dir->parent; |
| 95 | hashmap_remove(&istate->dir_hash, dir, NULL); |
| 96 | free(dir); |
| 97 | dir = parent; |
| 98 | } |
Joshua Jensen | 5102c61 | 2010-10-03 09:56:43 +0000 | [diff] [blame] | 99 | } |
| 100 | |
Linus Torvalds | 96872bc | 2008-03-21 13:16:24 -0700 | [diff] [blame] | 101 | static void hash_index_entry(struct index_state *istate, struct cache_entry *ce) |
| 102 | { |
Linus Torvalds | 96872bc | 2008-03-21 13:16:24 -0700 | [diff] [blame] | 103 | if (ce->ce_flags & CE_HASHED) |
| 104 | return; |
| 105 | ce->ce_flags |= CE_HASHED; |
Karsten Blees | 8b01378 | 2013-11-14 20:21:58 +0100 | [diff] [blame] | 106 | hashmap_entry_init(ce, memihash(ce->name, ce_namelen(ce))); |
| 107 | hashmap_add(&istate->name_hash, ce); |
Joshua Jensen | 5102c61 | 2010-10-03 09:56:43 +0000 | [diff] [blame] | 108 | |
Karsten Blees | 419a597 | 2013-11-14 20:22:27 +0100 | [diff] [blame] | 109 | if (ignore_case) |
Karsten Blees | 2092678 | 2013-02-28 00:57:48 +0100 | [diff] [blame] | 110 | add_dir_entry(istate, ce); |
Linus Torvalds | 96872bc | 2008-03-21 13:16:24 -0700 | [diff] [blame] | 111 | } |
| 112 | |
Karsten Blees | 419a597 | 2013-11-14 20:22:27 +0100 | [diff] [blame] | 113 | static int cache_entry_cmp(const struct cache_entry *ce1, |
| 114 | const struct cache_entry *ce2, const void *remove) |
| 115 | { |
| 116 | /* |
| 117 | * For remove_name_hash, find the exact entry (pointer equality); for |
Eric Sunshine | 7b359ea | 2014-01-02 16:57:12 -0500 | [diff] [blame] | 118 | * index_file_exists, find all entries with matching hash code and |
Karsten Blees | 419a597 | 2013-11-14 20:22:27 +0100 | [diff] [blame] | 119 | * decide whether the entry matches in same_name. |
| 120 | */ |
| 121 | return remove ? !(ce1 == ce2) : 0; |
| 122 | } |
| 123 | |
Linus Torvalds | 96872bc | 2008-03-21 13:16:24 -0700 | [diff] [blame] | 124 | static void lazy_init_name_hash(struct index_state *istate) |
| 125 | { |
| 126 | int nr; |
| 127 | |
| 128 | if (istate->name_hash_initialized) |
| 129 | return; |
Karsten Blees | 419a597 | 2013-11-14 20:22:27 +0100 | [diff] [blame] | 130 | hashmap_init(&istate->name_hash, (hashmap_cmp_fn) cache_entry_cmp, |
| 131 | istate->cache_nr); |
Karsten Blees | e05881a | 2013-11-14 20:20:58 +0100 | [diff] [blame] | 132 | hashmap_init(&istate->dir_hash, (hashmap_cmp_fn) dir_entry_cmp, 0); |
Linus Torvalds | 96872bc | 2008-03-21 13:16:24 -0700 | [diff] [blame] | 133 | for (nr = 0; nr < istate->cache_nr; nr++) |
| 134 | hash_index_entry(istate, istate->cache[nr]); |
| 135 | istate->name_hash_initialized = 1; |
| 136 | } |
| 137 | |
| 138 | void add_name_hash(struct index_state *istate, struct cache_entry *ce) |
| 139 | { |
Linus Torvalds | 96872bc | 2008-03-21 13:16:24 -0700 | [diff] [blame] | 140 | if (istate->name_hash_initialized) |
| 141 | hash_index_entry(istate, ce); |
| 142 | } |
| 143 | |
Karsten Blees | 2092678 | 2013-02-28 00:57:48 +0100 | [diff] [blame] | 144 | void remove_name_hash(struct index_state *istate, struct cache_entry *ce) |
| 145 | { |
Karsten Blees | 419a597 | 2013-11-14 20:22:27 +0100 | [diff] [blame] | 146 | if (!istate->name_hash_initialized || !(ce->ce_flags & CE_HASHED)) |
| 147 | return; |
| 148 | ce->ce_flags &= ~CE_HASHED; |
| 149 | hashmap_remove(&istate->name_hash, ce, ce); |
Karsten Blees | 2092678 | 2013-02-28 00:57:48 +0100 | [diff] [blame] | 150 | |
Karsten Blees | 419a597 | 2013-11-14 20:22:27 +0100 | [diff] [blame] | 151 | if (ignore_case) |
| 152 | remove_dir_entry(istate, ce); |
Karsten Blees | 2092678 | 2013-02-28 00:57:48 +0100 | [diff] [blame] | 153 | } |
| 154 | |
Linus Torvalds | cd2fef5 | 2008-03-21 15:55:19 -0700 | [diff] [blame] | 155 | static int slow_same_name(const char *name1, int len1, const char *name2, int len2) |
| 156 | { |
| 157 | if (len1 != len2) |
| 158 | return 0; |
| 159 | |
| 160 | while (len1) { |
| 161 | unsigned char c1 = *name1++; |
| 162 | unsigned char c2 = *name2++; |
| 163 | len1--; |
| 164 | if (c1 != c2) { |
| 165 | c1 = toupper(c1); |
| 166 | c2 = toupper(c2); |
| 167 | if (c1 != c2) |
| 168 | return 0; |
| 169 | } |
| 170 | } |
| 171 | return 1; |
| 172 | } |
| 173 | |
| 174 | static int same_name(const struct cache_entry *ce, const char *name, int namelen, int icase) |
| 175 | { |
| 176 | int len = ce_namelen(ce); |
| 177 | |
| 178 | /* |
| 179 | * Always do exact compare, even if we want a case-ignoring comparison; |
| 180 | * we do the quick exact one first, because it will be the common case. |
| 181 | */ |
Jeremiah Mahler | be99ec9 | 2014-06-19 19:06:43 -0700 | [diff] [blame] | 182 | if (len == namelen && !memcmp(name, ce->name, len)) |
Linus Torvalds | cd2fef5 | 2008-03-21 15:55:19 -0700 | [diff] [blame] | 183 | return 1; |
| 184 | |
Joshua Jensen | 5102c61 | 2010-10-03 09:56:43 +0000 | [diff] [blame] | 185 | if (!icase) |
| 186 | return 0; |
| 187 | |
Karsten Blees | 2092678 | 2013-02-28 00:57:48 +0100 | [diff] [blame] | 188 | return slow_same_name(name, namelen, ce->name, len); |
Linus Torvalds | cd2fef5 | 2008-03-21 15:55:19 -0700 | [diff] [blame] | 189 | } |
| 190 | |
Eric Sunshine | db5360f | 2013-09-17 03:06:14 -0400 | [diff] [blame] | 191 | struct cache_entry *index_dir_exists(struct index_state *istate, const char *name, int namelen) |
| 192 | { |
| 193 | struct cache_entry *ce; |
| 194 | struct dir_entry *dir; |
| 195 | |
| 196 | lazy_init_name_hash(istate); |
| 197 | dir = find_dir_entry(istate, name, namelen); |
| 198 | if (dir && dir->nr) |
| 199 | return dir->ce; |
| 200 | |
| 201 | /* |
| 202 | * It might be a submodule. Unlike plain directories, which are stored |
| 203 | * in the dir-hash, submodules are stored in the name-hash, so check |
| 204 | * there, as well. |
| 205 | */ |
Eric Sunshine | d28eec2 | 2013-09-17 03:06:16 -0400 | [diff] [blame] | 206 | ce = index_file_exists(istate, name, namelen, 1); |
Eric Sunshine | db5360f | 2013-09-17 03:06:14 -0400 | [diff] [blame] | 207 | if (ce && S_ISGITLINK(ce->ce_mode)) |
| 208 | return ce; |
| 209 | |
| 210 | return NULL; |
| 211 | } |
| 212 | |
| 213 | struct cache_entry *index_file_exists(struct index_state *istate, const char *name, int namelen, int icase) |
Linus Torvalds | 96872bc | 2008-03-21 13:16:24 -0700 | [diff] [blame] | 214 | { |
Linus Torvalds | 96872bc | 2008-03-21 13:16:24 -0700 | [diff] [blame] | 215 | struct cache_entry *ce; |
| 216 | |
| 217 | lazy_init_name_hash(istate); |
Linus Torvalds | 96872bc | 2008-03-21 13:16:24 -0700 | [diff] [blame] | 218 | |
Karsten Blees | ab73a9d | 2014-07-03 00:22:11 +0200 | [diff] [blame] | 219 | ce = hashmap_get_from_hash(&istate->name_hash, |
| 220 | memihash(name, namelen), NULL); |
Linus Torvalds | 96872bc | 2008-03-21 13:16:24 -0700 | [diff] [blame] | 221 | while (ce) { |
Karsten Blees | 419a597 | 2013-11-14 20:22:27 +0100 | [diff] [blame] | 222 | if (same_name(ce, name, namelen, icase)) |
| 223 | return ce; |
Karsten Blees | 8b01378 | 2013-11-14 20:21:58 +0100 | [diff] [blame] | 224 | ce = hashmap_get_next(&istate->name_hash, ce); |
Linus Torvalds | 96872bc | 2008-03-21 13:16:24 -0700 | [diff] [blame] | 225 | } |
Linus Torvalds | df292c7 | 2008-03-21 15:53:00 -0700 | [diff] [blame] | 226 | return NULL; |
Linus Torvalds | 96872bc | 2008-03-21 13:16:24 -0700 | [diff] [blame] | 227 | } |
Karsten Blees | 2092678 | 2013-02-28 00:57:48 +0100 | [diff] [blame] | 228 | |
Karsten Blees | 2092678 | 2013-02-28 00:57:48 +0100 | [diff] [blame] | 229 | void free_name_hash(struct index_state *istate) |
| 230 | { |
| 231 | if (!istate->name_hash_initialized) |
| 232 | return; |
| 233 | istate->name_hash_initialized = 0; |
Karsten Blees | 2092678 | 2013-02-28 00:57:48 +0100 | [diff] [blame] | 234 | |
Karsten Blees | 8b01378 | 2013-11-14 20:21:58 +0100 | [diff] [blame] | 235 | hashmap_free(&istate->name_hash, 0); |
Karsten Blees | e05881a | 2013-11-14 20:20:58 +0100 | [diff] [blame] | 236 | hashmap_free(&istate->dir_hash, 1); |
Karsten Blees | 2092678 | 2013-02-28 00:57:48 +0100 | [diff] [blame] | 237 | } |