Blame - name-hash.c - jrn/git

blob: 163849831c9f11316ce97c649b77c32cf2eed276 [file] [log] [blame]

Linus Torvalds	96872bc	2008-03-21 13:16:24 -0700	[diff] [blame]	1	/*
				2	* name-hash.c
				3	*
				4	* Hashing names in the index state
				5	*
				6	* Copyright (C) 2008 Linus Torvalds
				7	*/
				8	#define NO_THE_INDEX_COMPATIBILITY_MACROS
				9	#include "cache.h"
				10
Karsten Blees	2092678	2013-02-28 00:57:48 +0100	[diff] [blame]	11	struct dir_entry {
Karsten Blees	e05881a	2013-11-14 20:20:58 +0100	[diff] [blame]	12	struct hashmap_entry ent;
Karsten Blees	2092678	2013-02-28 00:57:48 +0100	[diff] [blame]	13	struct dir_entry *parent;
Karsten Blees	2092678	2013-02-28 00:57:48 +0100	[diff] [blame]	14	int nr;
				15	unsigned int namelen;
David Turner	41284eb	2015-10-21 13:54:11 -0400	[diff] [blame]	16	char name[FLEX_ARRAY];
Karsten Blees	2092678	2013-02-28 00:57:48 +0100	[diff] [blame]	17	};
				18
Stefan Beller	7663cdc	2017-06-30 12:14:05 -0700	[diff] [blame]	19	static int dir_entry_cmp(const void *unused_cmp_data,
Stefan Beller	56a14ea	2017-06-30 17:28:37 -0700	[diff] [blame]	20	const void *entry,
				21	const void *entry_or_key,
				22	const void *keydata)
Karsten Blees	e05881a	2013-11-14 20:20:58 +0100	[diff] [blame]	23	{
Stefan Beller	56a14ea	2017-06-30 17:28:37 -0700	[diff] [blame]	24	const struct dir_entry *e1 = entry;
				25	const struct dir_entry *e2 = entry_or_key;
				26	const char *name = keydata;
				27
David Turner	41284eb	2015-10-21 13:54:11 -0400	[diff] [blame]	28	return e1->namelen != e2->namelen \|\| strncasecmp(e1->name,
				29	name ? name : e2->name, e1->namelen);
Karsten Blees	e05881a	2013-11-14 20:20:58 +0100	[diff] [blame]	30	}
				31
Jeff Hostetler	846df80	2017-03-23 13:47:03 +0000	[diff] [blame]	32	static struct dir_entry find_dir_entry__hash(struct index_state istate,
				33	const char *name, unsigned int namelen, unsigned int hash)
				34	{
				35	struct dir_entry key;
				36	hashmap_entry_init(&key, hash);
				37	key.namelen = namelen;
				38	return hashmap_get(&istate->dir_hash, &key, name);
				39	}
				40
Karsten Blees	2092678	2013-02-28 00:57:48 +0100	[diff] [blame]	41	static struct dir_entry find_dir_entry(struct index_state istate,
				42	const char *name, unsigned int namelen)
				43	{
Jeff Hostetler	846df80	2017-03-23 13:47:03 +0000	[diff] [blame]	44	return find_dir_entry__hash(istate, name, namelen, memihash(name, namelen));
Karsten Blees	2092678	2013-02-28 00:57:48 +0100	[diff] [blame]	45	}
				46
				47	static struct dir_entry hash_dir_entry(struct index_state istate,
				48	struct cache_entry *ce, int namelen)
Joshua Jensen	5102c61	2010-10-03 09:56:43 +0000	[diff] [blame]	49	{
				50	/*
				51	* Throw each directory component in the hash for quick lookup
Eric Sunshine	d28eec2	2013-09-17 03:06:16 -0400	[diff] [blame]	52	* during a git status. Directory components are stored without their
Joshua Jensen	5102c61	2010-10-03 09:56:43 +0000	[diff] [blame]	53	* closing slash. Despite submodules being a directory, they never
Eric Sunshine	d28eec2	2013-09-17 03:06:16 -0400	[diff] [blame]	54	* reach this point, because they are stored
Karsten Blees	2092678	2013-02-28 00:57:48 +0100	[diff] [blame]	55	* in index_state.name_hash (as ordinary cache_entries).
Joshua Jensen	5102c61	2010-10-03 09:56:43 +0000	[diff] [blame]	56	*/
Karsten Blees	2092678	2013-02-28 00:57:48 +0100	[diff] [blame]	57	struct dir_entry *dir;
Joshua Jensen	5102c61	2010-10-03 09:56:43 +0000	[diff] [blame]	58
Karsten Blees	2092678	2013-02-28 00:57:48 +0100	[diff] [blame]	59	/* get length of parent directory */
				60	while (namelen > 0 && !is_dir_sep(ce->name[namelen - 1]))
				61	namelen--;
				62	if (namelen <= 0)
				63	return NULL;
Eric Sunshine	d28eec2	2013-09-17 03:06:16 -0400	[diff] [blame]	64	namelen--;
Karsten Blees	2092678	2013-02-28 00:57:48 +0100	[diff] [blame]	65
				66	/* lookup existing entry for that directory */
				67	dir = find_dir_entry(istate, ce->name, namelen);
				68	if (!dir) {
				69	/* not found, create it and add to hash table */
Jeff King	96ffc06	2016-02-22 17:44:32 -0500	[diff] [blame]	70	FLEX_ALLOC_MEM(dir, name, ce->name, namelen);
Karsten Blees	e05881a	2013-11-14 20:20:58 +0100	[diff] [blame]	71	hashmap_entry_init(dir, memihash(ce->name, namelen));
Karsten Blees	2092678	2013-02-28 00:57:48 +0100	[diff] [blame]	72	dir->namelen = namelen;
Karsten Blees	e05881a	2013-11-14 20:20:58 +0100	[diff] [blame]	73	hashmap_add(&istate->dir_hash, dir);
Karsten Blees	2092678	2013-02-28 00:57:48 +0100	[diff] [blame]	74
				75	/* recursively add missing parent directories */
Eric Sunshine	d28eec2	2013-09-17 03:06:16 -0400	[diff] [blame]	76	dir->parent = hash_dir_entry(istate, ce, namelen);
Joshua Jensen	5102c61	2010-10-03 09:56:43 +0000	[diff] [blame]	77	}
Karsten Blees	2092678	2013-02-28 00:57:48 +0100	[diff] [blame]	78	return dir;
				79	}
				80
				81	static void add_dir_entry(struct index_state istate, struct cache_entry ce)
				82	{
				83	/* Add reference to the directory entry (and parents if 0). */
				84	struct dir_entry *dir = hash_dir_entry(istate, ce, ce_namelen(ce));
				85	while (dir && !(dir->nr++))
				86	dir = dir->parent;
				87	}
				88
				89	static void remove_dir_entry(struct index_state istate, struct cache_entry ce)
				90	{
				91	/*
Karsten Blees	1c8cca1	2013-11-14 20:21:26 +0100	[diff] [blame]	92	* Release reference to the directory entry. If 0, remove and continue
				93	* with parent directory.
Karsten Blees	2092678	2013-02-28 00:57:48 +0100	[diff] [blame]	94	*/
				95	struct dir_entry *dir = hash_dir_entry(istate, ce, ce_namelen(ce));
Karsten Blees	1c8cca1	2013-11-14 20:21:26 +0100	[diff] [blame]	96	while (dir && !(--dir->nr)) {
				97	struct dir_entry *parent = dir->parent;
				98	hashmap_remove(&istate->dir_hash, dir, NULL);
				99	free(dir);
				100	dir = parent;
				101	}
Joshua Jensen	5102c61	2010-10-03 09:56:43 +0000	[diff] [blame]	102	}
				103
Linus Torvalds	96872bc	2008-03-21 13:16:24 -0700	[diff] [blame]	104	static void hash_index_entry(struct index_state istate, struct cache_entry ce)
				105	{
Linus Torvalds	96872bc	2008-03-21 13:16:24 -0700	[diff] [blame]	106	if (ce->ce_flags & CE_HASHED)
				107	return;
				108	ce->ce_flags \|= CE_HASHED;
Karsten Blees	8b01378	2013-11-14 20:21:58 +0100	[diff] [blame]	109	hashmap_entry_init(ce, memihash(ce->name, ce_namelen(ce)));
				110	hashmap_add(&istate->name_hash, ce);
Joshua Jensen	5102c61	2010-10-03 09:56:43 +0000	[diff] [blame]	111
Karsten Blees	419a597	2013-11-14 20:22:27 +0100	[diff] [blame]	112	if (ignore_case)
Karsten Blees	2092678	2013-02-28 00:57:48 +0100	[diff] [blame]	113	add_dir_entry(istate, ce);
Linus Torvalds	96872bc	2008-03-21 13:16:24 -0700	[diff] [blame]	114	}
				115
Stefan Beller	7663cdc	2017-06-30 12:14:05 -0700	[diff] [blame]	116	static int cache_entry_cmp(const void *unused_cmp_data,
Stefan Beller	56a14ea	2017-06-30 17:28:37 -0700	[diff] [blame]	117	const void *entry,
				118	const void *entry_or_key,
Stefan Beller	7663cdc	2017-06-30 12:14:05 -0700	[diff] [blame]	119	const void *remove)
Karsten Blees	419a597	2013-11-14 20:22:27 +0100	[diff] [blame]	120	{
Stefan Beller	56a14ea	2017-06-30 17:28:37 -0700	[diff] [blame]	121	const struct cache_entry *ce1 = entry;
				122	const struct cache_entry *ce2 = entry_or_key;
Karsten Blees	419a597	2013-11-14 20:22:27 +0100	[diff] [blame]	123	/*
				124	* For remove_name_hash, find the exact entry (pointer equality); for
Eric Sunshine	7b359ea	2014-01-02 16:57:12 -0500	[diff] [blame]	125	* index_file_exists, find all entries with matching hash code and
Karsten Blees	419a597	2013-11-14 20:22:27 +0100	[diff] [blame]	126	* decide whether the entry matches in same_name.
				127	*/
				128	return remove ? !(ce1 == ce2) : 0;
				129	}
				130
Jeff Hostetler	846df80	2017-03-23 13:47:03 +0000	[diff] [blame]	131	static int lazy_try_threaded = 1;
				132	static int lazy_nr_dir_threads;
				133
				134	#ifdef NO_PTHREADS
				135
				136	static inline int lookup_lazy_params(struct index_state *istate)
				137	{
				138	return 0;
				139	}
				140
				141	static inline void threaded_lazy_init_name_hash(
				142	struct index_state *istate)
				143	{
				144	}
				145
				146	#else
				147
				148	#include "thread-utils.h"
				149
				150	/*
				151	* Set a minimum number of cache_entries that we will handle per
				152	* thread and use that to decide how many threads to run (upto
				153	* the number on the system).
				154	*
				155	* For guidance setting the lower per-thread bound, see:
				156	* t/helper/test-lazy-init-name-hash --analyze
				157	*/
				158	#define LAZY_THREAD_COST (2000)
				159
				160	/*
				161	* We use n mutexes to guard n partitions of the "istate->dir_hash"
				162	* hashtable. Since "find" and "insert" operations will hash to a
				163	* particular bucket and modify/search a single chain, we can say
				164	* that "all chains mod n" are guarded by the same mutex -- rather
				165	* than having a single mutex to guard the entire table. (This does
				166	* require that we disable "rehashing" on the hashtable.)
				167	*
				168	* So, a larger value here decreases the probability of a collision
				169	* and the time that each thread must wait for the mutex.
				170	*/
				171	#define LAZY_MAX_MUTEX (32)
				172
				173	static pthread_mutex_t *lazy_dir_mutex_array;
				174
				175	/*
				176	* An array of lazy_entry items is used by the n threads in
				177	* the directory parse (first) phase to (lock-free) store the
				178	* intermediate results. These values are then referenced by
				179	* the 2 threads in the second phase.
				180	*/
				181	struct lazy_entry {
				182	struct dir_entry *dir;
				183	unsigned int hash_dir;
				184	unsigned int hash_name;
				185	};
				186
				187	/*
				188	* Decide if we want to use threads (if available) to load
				189	* the hash tables. We set "lazy_nr_dir_threads" to zero when
				190	* it is not worth it.
				191	*/
				192	static int lookup_lazy_params(struct index_state *istate)
				193	{
				194	int nr_cpus;
				195
				196	lazy_nr_dir_threads = 0;
				197
				198	if (!lazy_try_threaded)
				199	return 0;
				200
				201	/*
				202	* If we are respecting case, just use the original
				203	* code to build the "istate->name_hash". We don't
				204	* need the complexity here.
				205	*/
				206	if (!ignore_case)
				207	return 0;
				208
				209	nr_cpus = online_cpus();
				210	if (nr_cpus < 2)
				211	return 0;
				212
				213	if (istate->cache_nr < 2 * LAZY_THREAD_COST)
				214	return 0;
				215
				216	if (istate->cache_nr < nr_cpus * LAZY_THREAD_COST)
				217	nr_cpus = istate->cache_nr / LAZY_THREAD_COST;
				218	lazy_nr_dir_threads = nr_cpus;
				219	return lazy_nr_dir_threads;
				220	}
				221
				222	/*
				223	* Initialize n mutexes for use when searching and inserting
				224	* into "istate->dir_hash". All "dir" threads are trying
				225	* to insert partial pathnames into the hash as they iterate
				226	* over their portions of the index, so lock contention is
				227	* high.
				228	*
				229	* However, the hashmap is going to put items into bucket
				230	* chains based on their hash values. Use that to create n
				231	* mutexes and lock on mutex[bucket(hash) % n]. This will
				232	* decrease the collision rate by (hopefully) by a factor of n.
				233	*/
				234	static void init_dir_mutex(void)
				235	{
				236	int j;
				237
				238	lazy_dir_mutex_array = xcalloc(LAZY_MAX_MUTEX, sizeof(pthread_mutex_t));
				239
				240	for (j = 0; j < LAZY_MAX_MUTEX; j++)
				241	init_recursive_mutex(&lazy_dir_mutex_array[j]);
				242	}
				243
				244	static void cleanup_dir_mutex(void)
				245	{
				246	int j;
				247
				248	for (j = 0; j < LAZY_MAX_MUTEX; j++)
				249	pthread_mutex_destroy(&lazy_dir_mutex_array[j]);
				250
				251	free(lazy_dir_mutex_array);
				252	}
				253
				254	static void lock_dir_mutex(int j)
				255	{
				256	pthread_mutex_lock(&lazy_dir_mutex_array[j]);
				257	}
				258
				259	static void unlock_dir_mutex(int j)
				260	{
				261	pthread_mutex_unlock(&lazy_dir_mutex_array[j]);
				262	}
				263
				264	static inline int compute_dir_lock_nr(
				265	const struct hashmap *map,
				266	unsigned int hash)
				267	{
				268	return hashmap_bucket(map, hash) % LAZY_MAX_MUTEX;
				269	}
				270
				271	static struct dir_entry *hash_dir_entry_with_parent_and_prefix(
				272	struct index_state *istate,
				273	struct dir_entry *parent,
				274	struct strbuf *prefix)
				275	{
				276	struct dir_entry *dir;
				277	unsigned int hash;
				278	int lock_nr;
				279
				280	/*
				281	* Either we have a parent directory and path with slash(es)
				282	* or the directory is an immediate child of the root directory.
				283	*/
				284	assert((parent != NULL) ^ (strchr(prefix->buf, '/') == NULL));
				285
				286	if (parent)
				287	hash = memihash_cont(parent->ent.hash,
				288	prefix->buf + parent->namelen,
				289	prefix->len - parent->namelen);
				290	else
				291	hash = memihash(prefix->buf, prefix->len);
				292
				293	lock_nr = compute_dir_lock_nr(&istate->dir_hash, hash);
				294	lock_dir_mutex(lock_nr);
				295
				296	dir = find_dir_entry__hash(istate, prefix->buf, prefix->len, hash);
				297	if (!dir) {
				298	FLEX_ALLOC_MEM(dir, name, prefix->buf, prefix->len);
				299	hashmap_entry_init(dir, hash);
				300	dir->namelen = prefix->len;
				301	dir->parent = parent;
				302	hashmap_add(&istate->dir_hash, dir);
				303
				304	if (parent) {
				305	unlock_dir_mutex(lock_nr);
				306
				307	/* All I really need here is an InterlockedIncrement(&(parent->nr)) */
				308	lock_nr = compute_dir_lock_nr(&istate->dir_hash, parent->ent.hash);
				309	lock_dir_mutex(lock_nr);
				310	parent->nr++;
				311	}
				312	}
				313
				314	unlock_dir_mutex(lock_nr);
				315
				316	return dir;
				317	}
				318
				319	/*
				320	* handle_range_1() and handle_range_dir() are derived from
				321	* clear_ce_flags_1() and clear_ce_flags_dir() in unpack-trees.c
				322	* and handle the iteration over the entire array of index entries.
				323	* They use recursion for adjacent entries in the same parent
				324	* directory.
				325	*/
				326	static int handle_range_1(
				327	struct index_state *istate,
				328	int k_start,
				329	int k_end,
				330	struct dir_entry *parent,
				331	struct strbuf *prefix,
				332	struct lazy_entry *lazy_entries);
				333
				334	static int handle_range_dir(
				335	struct index_state *istate,
				336	int k_start,
				337	int k_end,
				338	struct dir_entry *parent,
				339	struct strbuf *prefix,
				340	struct lazy_entry *lazy_entries,
				341	struct dir_entry **dir_new_out)
				342	{
				343	int rc, k;
				344	int input_prefix_len = prefix->len;
				345	struct dir_entry *dir_new;
				346
				347	dir_new = hash_dir_entry_with_parent_and_prefix(istate, parent, prefix);
				348
				349	strbuf_addch(prefix, '/');
				350
				351	/*
				352	* Scan forward in the index array for index entries having the same
				353	* path prefix (that are also in this directory).
				354	*/
Kevin Willford	2a1bd45	2017-03-31 17:32:14 +0000	[diff] [blame]	355	if (k_start + 1 >= k_end)
				356	k = k_end;
				357	else if (strncmp(istate->cache[k_start + 1]->name, prefix->buf, prefix->len) > 0)
Jeff Hostetler	846df80	2017-03-23 13:47:03 +0000	[diff] [blame]	358	k = k_start + 1;
				359	else if (strncmp(istate->cache[k_end - 1]->name, prefix->buf, prefix->len) == 0)
				360	k = k_end;
				361	else {
				362	int begin = k_start;
				363	int end = k_end;
				364	while (begin < end) {
				365	int mid = (begin + end) >> 1;
				366	int cmp = strncmp(istate->cache[mid]->name, prefix->buf, prefix->len);
				367	if (cmp == 0) /* mid has same prefix; look in second part */
				368	begin = mid + 1;
				369	else if (cmp > 0) /* mid is past group; look in first part */
				370	end = mid;
				371	else
				372	die("cache entry out of order");
				373	}
				374	k = begin;
				375	}
				376
				377	/*
				378	* Recurse and process what we can of this subset [k_start, k).
				379	*/
				380	rc = handle_range_1(istate, k_start, k, dir_new, prefix, lazy_entries);
				381
				382	strbuf_setlen(prefix, input_prefix_len);
				383
				384	*dir_new_out = dir_new;
				385	return rc;
				386	}
				387
				388	static int handle_range_1(
				389	struct index_state *istate,
				390	int k_start,
				391	int k_end,
				392	struct dir_entry *parent,
				393	struct strbuf *prefix,
				394	struct lazy_entry *lazy_entries)
				395	{
				396	int input_prefix_len = prefix->len;
				397	int k = k_start;
				398
				399	while (k < k_end) {
				400	struct cache_entry *ce_k = istate->cache[k];
				401	const char name, slash;
				402
				403	if (prefix->len && strncmp(ce_k->name, prefix->buf, prefix->len))
				404	break;
				405
				406	name = ce_k->name + prefix->len;
				407	slash = strchr(name, '/');
				408
				409	if (slash) {
				410	int len = slash - name;
				411	int processed;
				412	struct dir_entry *dir_new;
				413
				414	strbuf_add(prefix, name, len);
				415	processed = handle_range_dir(istate, k, k_end, parent, prefix, lazy_entries, &dir_new);
				416	if (processed) {
				417	k += processed;
				418	strbuf_setlen(prefix, input_prefix_len);
				419	continue;
				420	}
				421
				422	strbuf_addch(prefix, '/');
				423	processed = handle_range_1(istate, k, k_end, dir_new, prefix, lazy_entries);
				424	k += processed;
				425	strbuf_setlen(prefix, input_prefix_len);
				426	continue;
				427	}
				428
				429	/*
				430	* It is too expensive to take a lock to insert "ce_k"
				431	* into "istate->name_hash" and increment the ref-count
				432	* on the "parent" dir. So we defer actually updating
				433	* permanent data structures until phase 2 (where we
				434	* can change the locking requirements) and simply
				435	* accumulate our current results into the lazy_entries
				436	* data array).
				437	*
				438	* We do not need to lock the lazy_entries array because
				439	* we have exclusive access to the cells in the range
				440	* [k_start,k_end) that this thread was given.
				441	*/
				442	lazy_entries[k].dir = parent;
				443	if (parent) {
				444	lazy_entries[k].hash_name = memihash_cont(
				445	parent->ent.hash,
				446	ce_k->name + parent->namelen,
				447	ce_namelen(ce_k) - parent->namelen);
				448	lazy_entries[k].hash_dir = parent->ent.hash;
				449	} else {
				450	lazy_entries[k].hash_name = memihash(ce_k->name, ce_namelen(ce_k));
				451	}
				452
				453	k++;
				454	}
				455
				456	return k - k_start;
				457	}
				458
				459	struct lazy_dir_thread_data {
				460	pthread_t pthread;
				461	struct index_state *istate;
				462	struct lazy_entry *lazy_entries;
				463	int k_start;
				464	int k_end;
				465	};
				466
				467	static void lazy_dir_thread_proc(void _data)
				468	{
				469	struct lazy_dir_thread_data *d = _data;
				470	struct strbuf prefix = STRBUF_INIT;
				471	handle_range_1(d->istate, d->k_start, d->k_end, NULL, &prefix, d->lazy_entries);
				472	strbuf_release(&prefix);
				473	return NULL;
				474	}
				475
				476	struct lazy_name_thread_data {
				477	pthread_t pthread;
				478	struct index_state *istate;
				479	struct lazy_entry *lazy_entries;
				480	};
				481
				482	static void lazy_name_thread_proc(void _data)
				483	{
				484	struct lazy_name_thread_data *d = _data;
				485	int k;
				486
				487	for (k = 0; k < d->istate->cache_nr; k++) {
				488	struct cache_entry *ce_k = d->istate->cache[k];
				489	ce_k->ce_flags \|= CE_HASHED;
				490	hashmap_entry_init(ce_k, d->lazy_entries[k].hash_name);
				491	hashmap_add(&d->istate->name_hash, ce_k);
				492	}
				493
				494	return NULL;
				495	}
				496
				497	static inline void lazy_update_dir_ref_counts(
				498	struct index_state *istate,
				499	struct lazy_entry *lazy_entries)
				500	{
				501	int k;
				502
				503	for (k = 0; k < istate->cache_nr; k++) {
				504	if (lazy_entries[k].dir)
				505	lazy_entries[k].dir->nr++;
				506	}
				507	}
				508
				509	static void threaded_lazy_init_name_hash(
				510	struct index_state *istate)
				511	{
				512	int nr_each;
				513	int k_start;
				514	int t;
				515	struct lazy_entry *lazy_entries;
				516	struct lazy_dir_thread_data *td_dir;
				517	struct lazy_name_thread_data *td_name;
				518
				519	k_start = 0;
				520	nr_each = DIV_ROUND_UP(istate->cache_nr, lazy_nr_dir_threads);
				521
				522	lazy_entries = xcalloc(istate->cache_nr, sizeof(struct lazy_entry));
				523	td_dir = xcalloc(lazy_nr_dir_threads, sizeof(struct lazy_dir_thread_data));
				524	td_name = xcalloc(1, sizeof(struct lazy_name_thread_data));
				525
				526	init_dir_mutex();
				527
				528	/*
				529	* Phase 1:
				530	* Build "istate->dir_hash" using n "dir" threads (and a read-only index).
				531	*/
				532	for (t = 0; t < lazy_nr_dir_threads; t++) {
				533	struct lazy_dir_thread_data *td_dir_t = td_dir + t;
				534	td_dir_t->istate = istate;
				535	td_dir_t->lazy_entries = lazy_entries;
				536	td_dir_t->k_start = k_start;
				537	k_start += nr_each;
				538	if (k_start > istate->cache_nr)
				539	k_start = istate->cache_nr;
				540	td_dir_t->k_end = k_start;
				541	if (pthread_create(&td_dir_t->pthread, NULL, lazy_dir_thread_proc, td_dir_t))
				542	die("unable to create lazy_dir_thread");
				543	}
				544	for (t = 0; t < lazy_nr_dir_threads; t++) {
				545	struct lazy_dir_thread_data *td_dir_t = td_dir + t;
				546	if (pthread_join(td_dir_t->pthread, NULL))
				547	die("unable to join lazy_dir_thread");
				548	}
				549
				550	/*
				551	* Phase 2:
				552	* Iterate over all index entries and add them to the "istate->name_hash"
				553	* using a single "name" background thread.
				554	* (Testing showed it wasn't worth running more than 1 thread for this.)
				555	*
				556	* Meanwhile, finish updating the parent directory ref-counts for each
				557	* index entry using the current thread. (This step is very fast and
				558	* doesn't need threading.)
				559	*/
				560	td_name->istate = istate;
				561	td_name->lazy_entries = lazy_entries;
				562	if (pthread_create(&td_name->pthread, NULL, lazy_name_thread_proc, td_name))
				563	die("unable to create lazy_name_thread");
				564
				565	lazy_update_dir_ref_counts(istate, lazy_entries);
				566
				567	if (pthread_join(td_name->pthread, NULL))
				568	die("unable to join lazy_name_thread");
				569
				570	cleanup_dir_mutex();
				571
				572	free(td_name);
				573	free(td_dir);
				574	free(lazy_entries);
				575	}
				576
				577	#endif
				578
Linus Torvalds	96872bc	2008-03-21 13:16:24 -0700	[diff] [blame]	579	static void lazy_init_name_hash(struct index_state *istate)
				580	{
Nguyễn Thái Ngọc Duy	ca54d9b	2018-01-27 19:27:56 +0700	[diff] [blame]	581	uint64_t start = getnanotime();
				582
Linus Torvalds	96872bc	2008-03-21 13:16:24 -0700	[diff] [blame]	583	if (istate->name_hash_initialized)
				584	return;
Stefan Beller	56a14ea	2017-06-30 17:28:37 -0700	[diff] [blame]	585	hashmap_init(&istate->name_hash, cache_entry_cmp, NULL, istate->cache_nr);
				586	hashmap_init(&istate->dir_hash, dir_entry_cmp, NULL, istate->cache_nr);
Jeff Hostetler	846df80	2017-03-23 13:47:03 +0000	[diff] [blame]	587
				588	if (lookup_lazy_params(istate)) {
Jeff Hostetler	8b604d1	2017-09-06 15:43:48 +0000	[diff] [blame]	589	/*
				590	* Disable item counting and automatic rehashing because
				591	* we do per-chain (mod n) locking rather than whole hashmap
				592	* locking and we need to prevent the table-size from changing
				593	* and bucket items from being redistributed.
				594	*/
				595	hashmap_disable_item_counting(&istate->dir_hash);
Jeff Hostetler	846df80	2017-03-23 13:47:03 +0000	[diff] [blame]	596	threaded_lazy_init_name_hash(istate);
Jeff Hostetler	8b604d1	2017-09-06 15:43:48 +0000	[diff] [blame]	597	hashmap_enable_item_counting(&istate->dir_hash);
Jeff Hostetler	846df80	2017-03-23 13:47:03 +0000	[diff] [blame]	598	} else {
				599	int nr;
				600	for (nr = 0; nr < istate->cache_nr; nr++)
				601	hash_index_entry(istate, istate->cache[nr]);
				602	}
				603
Linus Torvalds	96872bc	2008-03-21 13:16:24 -0700	[diff] [blame]	604	istate->name_hash_initialized = 1;
Nguyễn Thái Ngọc Duy	ca54d9b	2018-01-27 19:27:56 +0700	[diff] [blame]	605	trace_performance_since(start, "initialize name hash");
Linus Torvalds	96872bc	2008-03-21 13:16:24 -0700	[diff] [blame]	606	}
				607
Jeff Hostetler	846df80	2017-03-23 13:47:03 +0000	[diff] [blame]	608	/*
				609	* A test routine for t/helper/ sources.
				610	*
				611	* Returns the number of threads used or 0 when
				612	* the non-threaded code path was used.
				613	*
				614	* Requesting threading WILL NOT override guards
				615	* in lookup_lazy_params().
				616	*/
				617	int test_lazy_init_name_hash(struct index_state *istate, int try_threaded)
				618	{
				619	lazy_nr_dir_threads = 0;
				620	lazy_try_threaded = try_threaded;
				621
				622	lazy_init_name_hash(istate);
				623
				624	return lazy_nr_dir_threads;
				625	}
				626
Linus Torvalds	96872bc	2008-03-21 13:16:24 -0700	[diff] [blame]	627	void add_name_hash(struct index_state istate, struct cache_entry ce)
				628	{
Linus Torvalds	96872bc	2008-03-21 13:16:24 -0700	[diff] [blame]	629	if (istate->name_hash_initialized)
				630	hash_index_entry(istate, ce);
				631	}
				632
Karsten Blees	2092678	2013-02-28 00:57:48 +0100	[diff] [blame]	633	void remove_name_hash(struct index_state istate, struct cache_entry ce)
				634	{
Karsten Blees	419a597	2013-11-14 20:22:27 +0100	[diff] [blame]	635	if (!istate->name_hash_initialized \|\| !(ce->ce_flags & CE_HASHED))
				636	return;
				637	ce->ce_flags &= ~CE_HASHED;
				638	hashmap_remove(&istate->name_hash, ce, ce);
Karsten Blees	2092678	2013-02-28 00:57:48 +0100	[diff] [blame]	639
Karsten Blees	419a597	2013-11-14 20:22:27 +0100	[diff] [blame]	640	if (ignore_case)
				641	remove_dir_entry(istate, ce);
Karsten Blees	2092678	2013-02-28 00:57:48 +0100	[diff] [blame]	642	}
				643
Linus Torvalds	cd2fef5	2008-03-21 15:55:19 -0700	[diff] [blame]	644	static int slow_same_name(const char name1, int len1, const char name2, int len2)
				645	{
				646	if (len1 != len2)
				647	return 0;
				648
				649	while (len1) {
				650	unsigned char c1 = *name1++;
				651	unsigned char c2 = *name2++;
				652	len1--;
				653	if (c1 != c2) {
				654	c1 = toupper(c1);
				655	c2 = toupper(c2);
				656	if (c1 != c2)
				657	return 0;
				658	}
				659	}
				660	return 1;
				661	}
				662
				663	static int same_name(const struct cache_entry ce, const char name, int namelen, int icase)
				664	{
				665	int len = ce_namelen(ce);
				666
				667	/*
				668	* Always do exact compare, even if we want a case-ignoring comparison;
				669	* we do the quick exact one first, because it will be the common case.
				670	*/
Jeremiah Mahler	be99ec9	2014-06-19 19:06:43 -0700	[diff] [blame]	671	if (len == namelen && !memcmp(name, ce->name, len))
Linus Torvalds	cd2fef5	2008-03-21 15:55:19 -0700	[diff] [blame]	672	return 1;
				673
Joshua Jensen	5102c61	2010-10-03 09:56:43 +0000	[diff] [blame]	674	if (!icase)
				675	return 0;
				676
Karsten Blees	2092678	2013-02-28 00:57:48 +0100	[diff] [blame]	677	return slow_same_name(name, namelen, ce->name, len);
Linus Torvalds	cd2fef5	2008-03-21 15:55:19 -0700	[diff] [blame]	678	}
				679
David Turner	41284eb	2015-10-21 13:54:11 -0400	[diff] [blame]	680	int index_dir_exists(struct index_state istate, const char name, int namelen)
Eric Sunshine	db5360f	2013-09-17 03:06:14 -0400	[diff] [blame]	681	{
Eric Sunshine	db5360f	2013-09-17 03:06:14 -0400	[diff] [blame]	682	struct dir_entry *dir;
				683
				684	lazy_init_name_hash(istate);
				685	dir = find_dir_entry(istate, name, namelen);
David Turner	41284eb	2015-10-21 13:54:11 -0400	[diff] [blame]	686	return dir && dir->nr;
				687	}
Eric Sunshine	db5360f	2013-09-17 03:06:14 -0400	[diff] [blame]	688
David Turner	41284eb	2015-10-21 13:54:11 -0400	[diff] [blame]	689	void adjust_dirname_case(struct index_state istate, char name)
				690	{
				691	const char *startPtr = name;
				692	const char *ptr = startPtr;
Eric Sunshine	db5360f	2013-09-17 03:06:14 -0400	[diff] [blame]	693
David Turner	41284eb	2015-10-21 13:54:11 -0400	[diff] [blame]	694	lazy_init_name_hash(istate);
				695	while (*ptr) {
				696	while (ptr && ptr != '/')
				697	ptr++;
				698
				699	if (*ptr == '/') {
				700	struct dir_entry *dir;
				701
Ben Peart	c95525e	2018-02-08 14:23:33 -0500	[diff] [blame]	702	dir = find_dir_entry(istate, name, ptr - name);
David Turner	41284eb	2015-10-21 13:54:11 -0400	[diff] [blame]	703	if (dir) {
				704	memcpy((void *)startPtr, dir->name + (startPtr - name), ptr - startPtr);
Ben Peart	c95525e	2018-02-08 14:23:33 -0500	[diff] [blame]	705	startPtr = ptr + 1;
David Turner	41284eb	2015-10-21 13:54:11 -0400	[diff] [blame]	706	}
Ben Peart	c95525e	2018-02-08 14:23:33 -0500	[diff] [blame]	707	ptr++;
David Turner	41284eb	2015-10-21 13:54:11 -0400	[diff] [blame]	708	}
				709	}
Eric Sunshine	db5360f	2013-09-17 03:06:14 -0400	[diff] [blame]	710	}
				711
				712	struct cache_entry index_file_exists(struct index_state istate, const char *name, int namelen, int icase)
Linus Torvalds	96872bc	2008-03-21 13:16:24 -0700	[diff] [blame]	713	{
Linus Torvalds	96872bc	2008-03-21 13:16:24 -0700	[diff] [blame]	714	struct cache_entry *ce;
				715
				716	lazy_init_name_hash(istate);
Linus Torvalds	96872bc	2008-03-21 13:16:24 -0700	[diff] [blame]	717
Karsten Blees	ab73a9d	2014-07-03 00:22:11 +0200	[diff] [blame]	718	ce = hashmap_get_from_hash(&istate->name_hash,
				719	memihash(name, namelen), NULL);
Linus Torvalds	96872bc	2008-03-21 13:16:24 -0700	[diff] [blame]	720	while (ce) {
Karsten Blees	419a597	2013-11-14 20:22:27 +0100	[diff] [blame]	721	if (same_name(ce, name, namelen, icase))
				722	return ce;
Karsten Blees	8b01378	2013-11-14 20:21:58 +0100	[diff] [blame]	723	ce = hashmap_get_next(&istate->name_hash, ce);
Linus Torvalds	96872bc	2008-03-21 13:16:24 -0700	[diff] [blame]	724	}
Linus Torvalds	df292c7	2008-03-21 15:53:00 -0700	[diff] [blame]	725	return NULL;
Linus Torvalds	96872bc	2008-03-21 13:16:24 -0700	[diff] [blame]	726	}
Karsten Blees	2092678	2013-02-28 00:57:48 +0100	[diff] [blame]	727
Karsten Blees	2092678	2013-02-28 00:57:48 +0100	[diff] [blame]	728	void free_name_hash(struct index_state *istate)
				729	{
				730	if (!istate->name_hash_initialized)
				731	return;
				732	istate->name_hash_initialized = 0;
Karsten Blees	2092678	2013-02-28 00:57:48 +0100	[diff] [blame]	733
Karsten Blees	8b01378	2013-11-14 20:21:58 +0100	[diff] [blame]	734	hashmap_free(&istate->name_hash, 0);
Karsten Blees	e05881a	2013-11-14 20:20:58 +0100	[diff] [blame]	735	hashmap_free(&istate->dir_hash, 1);
Karsten Blees	2092678	2013-02-28 00:57:48 +0100	[diff] [blame]	736	}