untracked cache: record/validate dir mtime and reuse cached output

The main readdir loop in read_directory_recursive() is replaced with a
new one that checks if cached results of a directory is still valid.

If a file is added or removed from the index, the containing directory
is invalidated (but not its subdirs). If directory's mtime is changed,
the same happens. If a .gitignore is updated, the containing directory
and all subdirs are invalidated recursively. If dir_struct#flags or
other conditions change, the cache is ignored.

If a directory is invalidated, we opendir/readdir/closedir and run the
exclude machinery on that directory listing as usual. If untracked
cache is also enabled, we'll update the cache along the way. If a
directory is validated, we simply pull the untracked listing out from
the cache. The cache also records the list of direct subdirs that we
have to recurse in. Fully excluded directories are seen as "untracked
files".

In the best case when no dirs are invalidated, read_directory()
becomes a series of

  stat(dir), open(.gitignore), fstat(), read(), close() and optionally
  hash_sha1_file()

For comparison, standard read_directory() is a sequence of

  opendir(), readdir(), open(.gitignore), fstat(), read(), close(), the
  expensive last_exclude_matching() and closedir().

We already try not to open(.gitignore) if we know it does not exist,
so open/fstat/read/close sequence does not apply to every
directory. The sequence could be reduced further, as noted in
prep_exclude() in another patch. So in theory, the entire best-case
read_directory sequence could be reduced to a series of stat() and
nothing else.

This is not a silver bullet approach. When you compile a C file, for
example, the old .o file is removed and a new one with the same name
created, effectively invalidating the containing directory's cache
(but not its subdirectories). If your build process touches every
directory, this cache adds extra overhead for nothing, so it's a good
idea to separate generated files from tracked files.. Editors may use
the same strategy for saving files. And of course you're out of luck
running your repo on an unsupported filesystem and/or operating system.

Helped-by: Eric Sunshine <sunshine@sunshineco.com>
Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2 files changed
tree: 6526f8ed8ff6554d01cd1b2e54c99f2b5456eea8
  1. block-sha1/
  2. builtin/
  3. compat/
  4. contrib/
  5. Documentation/
  6. ewah/
  7. git-gui/
  8. gitk-git/
  9. gitweb/
  10. mergetools/
  11. perl/
  12. po/
  13. ppc/
  14. t/
  15. templates/
  16. vcs-svn/
  17. xdiff/
  18. .gitattributes
  19. .gitignore
  20. .mailmap
  21. abspath.c
  22. aclocal.m4
  23. advice.c
  24. advice.h
  25. alias.c
  26. alloc.c
  27. archive-tar.c
  28. archive-zip.c
  29. archive.c
  30. archive.h
  31. argv-array.c
  32. argv-array.h
  33. attr.c
  34. attr.h
  35. base85.c
  36. bisect.c
  37. bisect.h
  38. blob.c
  39. blob.h
  40. branch.c
  41. branch.h
  42. builtin.h
  43. bulk-checkin.c
  44. bulk-checkin.h
  45. bundle.c
  46. bundle.h
  47. cache-tree.c
  48. cache-tree.h
  49. cache.h
  50. check-builtins.sh
  51. check-racy.c
  52. check_bindir
  53. color.c
  54. color.h
  55. column.c
  56. column.h
  57. combine-diff.c
  58. command-list.txt
  59. commit-slab.h
  60. commit.c
  61. commit.h
  62. config.c
  63. config.mak.in
  64. config.mak.uname
  65. configure.ac
  66. connect.c
  67. connect.h
  68. connected.c
  69. connected.h
  70. convert.c
  71. convert.h
  72. copy.c
  73. COPYING
  74. credential-cache--daemon.c
  75. credential-cache.c
  76. credential-store.c
  77. credential.c
  78. credential.h
  79. csum-file.c
  80. csum-file.h
  81. ctype.c
  82. daemon.c
  83. date.c
  84. decorate.c
  85. decorate.h
  86. delta.h
  87. diff-delta.c
  88. diff-lib.c
  89. diff-no-index.c
  90. diff.c
  91. diff.h
  92. diffcore-break.c
  93. diffcore-delta.c
  94. diffcore-order.c
  95. diffcore-pickaxe.c
  96. diffcore-rename.c
  97. diffcore.h
  98. dir.c
  99. dir.h
  100. editor.c
  101. entry.c
  102. environment.c
  103. exec_cmd.c
  104. exec_cmd.h
  105. fast-import.c
  106. fetch-pack.c
  107. fetch-pack.h
  108. fmt-merge-msg.h
  109. fsck.c
  110. fsck.h
  111. generate-cmdlist.sh
  112. gettext.c
  113. gettext.h
  114. git-add--interactive.perl
  115. git-am.sh
  116. git-archimport.perl
  117. git-bisect.sh
  118. git-compat-util.h
  119. git-cvsexportcommit.perl
  120. git-cvsimport.perl
  121. git-cvsserver.perl
  122. git-difftool--helper.sh
  123. git-difftool.perl
  124. git-filter-branch.sh
  125. git-instaweb.sh
  126. git-merge-octopus.sh
  127. git-merge-one-file.sh
  128. git-merge-resolve.sh
  129. git-mergetool--lib.sh
  130. git-mergetool.sh
  131. git-p4.py
  132. git-parse-remote.sh
  133. git-pull.sh
  134. git-quiltimport.sh
  135. git-rebase--am.sh
  136. git-rebase--interactive.sh
  137. git-rebase--merge.sh
  138. git-rebase.sh
  139. git-relink.perl
  140. git-remote-testgit.sh
  141. git-request-pull.sh
  142. git-send-email.perl
  143. git-sh-i18n.sh
  144. git-sh-setup.sh
  145. git-stash.sh
  146. git-submodule.sh
  147. git-svn.perl
  148. GIT-VERSION-GEN
  149. git-web--browse.sh
  150. git.c
  151. git.rc
  152. git.spec.in
  153. gpg-interface.c
  154. gpg-interface.h
  155. graph.c
  156. graph.h
  157. grep.c
  158. grep.h
  159. hashmap.c
  160. hashmap.h
  161. help.c
  162. help.h
  163. hex.c
  164. http-backend.c
  165. http-fetch.c
  166. http-push.c
  167. http-walker.c
  168. http.c
  169. http.h
  170. ident.c
  171. imap-send.c
  172. INSTALL
  173. khash.h
  174. kwset.c
  175. kwset.h
  176. levenshtein.c
  177. levenshtein.h
  178. LGPL-2.1
  179. line-log.c
  180. line-log.h
  181. line-range.c
  182. line-range.h
  183. list-objects.c
  184. list-objects.h
  185. ll-merge.c
  186. ll-merge.h
  187. lockfile.c
  188. lockfile.h
  189. log-tree.c
  190. log-tree.h
  191. mailmap.c
  192. mailmap.h
  193. Makefile
  194. match-trees.c
  195. merge-blobs.c
  196. merge-blobs.h
  197. merge-recursive.c
  198. merge-recursive.h
  199. merge.c
  200. mergesort.c
  201. mergesort.h
  202. name-hash.c
  203. notes-cache.c
  204. notes-cache.h
  205. notes-merge.c
  206. notes-merge.h
  207. notes-utils.c
  208. notes-utils.h
  209. notes.c
  210. notes.h
  211. object.c
  212. object.h
  213. pack-bitmap-write.c
  214. pack-bitmap.c
  215. pack-bitmap.h
  216. pack-check.c
  217. pack-objects.c
  218. pack-objects.h
  219. pack-revindex.c
  220. pack-revindex.h
  221. pack-write.c
  222. pack.h
  223. pager.c
  224. parse-options-cb.c
  225. parse-options.c
  226. parse-options.h
  227. patch-delta.c
  228. patch-ids.c
  229. patch-ids.h
  230. path.c
  231. pathspec.c
  232. pathspec.h
  233. pkt-line.c
  234. pkt-line.h
  235. preload-index.c
  236. pretty.c
  237. prio-queue.c
  238. prio-queue.h
  239. progress.c
  240. progress.h
  241. prompt.c
  242. prompt.h
  243. quote.c
  244. quote.h
  245. reachable.c
  246. reachable.h
  247. read-cache.c
  248. README
  249. reflog-walk.c
  250. reflog-walk.h
  251. refs.c
  252. refs.h
  253. RelNotes
  254. remote-curl.c
  255. remote-testsvn.c
  256. remote.c
  257. remote.h
  258. replace_object.c
  259. rerere.c
  260. rerere.h
  261. resolve-undo.c
  262. resolve-undo.h
  263. revision.c
  264. revision.h
  265. run-command.c
  266. run-command.h
  267. send-pack.c
  268. send-pack.h
  269. sequencer.c
  270. sequencer.h
  271. server-info.c
  272. setup.c
  273. sh-i18n--envsubst.c
  274. sha1-array.c
  275. sha1-array.h
  276. sha1-lookup.c
  277. sha1-lookup.h
  278. sha1_file.c
  279. sha1_name.c
  280. shallow.c
  281. shell.c
  282. shortlog.h
  283. show-index.c
  284. sideband.c
  285. sideband.h
  286. sigchain.c
  287. sigchain.h
  288. split-index.c
  289. split-index.h
  290. strbuf.c
  291. strbuf.h
  292. streaming.c
  293. streaming.h
  294. string-list.c
  295. string-list.h
  296. submodule.c
  297. submodule.h
  298. symlinks.c
  299. tag.c
  300. tag.h
  301. tar.h
  302. test-chmtime.c
  303. test-config.c
  304. test-ctype.c
  305. test-date.c
  306. test-delta.c
  307. test-dump-cache-tree.c
  308. test-dump-split-index.c
  309. test-genrandom.c
  310. test-hashmap.c
  311. test-index-version.c
  312. test-line-buffer.c
  313. test-match-trees.c
  314. test-mergesort.c
  315. test-mktemp.c
  316. test-parse-options.c
  317. test-path-utils.c
  318. test-prio-queue.c
  319. test-read-cache.c
  320. test-regex.c
  321. test-revision-walking.c
  322. test-run-command.c
  323. test-scrap-cache-tree.c
  324. test-sha1-array.c
  325. test-sha1.c
  326. test-sha1.sh
  327. test-sigchain.c
  328. test-string-list.c
  329. test-subprocess.c
  330. test-svn-fe.c
  331. test-urlmatch-normalization.c
  332. test-wildmatch.c
  333. thread-utils.c
  334. thread-utils.h
  335. trace.c
  336. trace.h
  337. trailer.c
  338. trailer.h
  339. transport-helper.c
  340. transport.c
  341. transport.h
  342. tree-diff.c
  343. tree-walk.c
  344. tree-walk.h
  345. tree.c
  346. tree.h
  347. unicode_width.h
  348. unimplemented.sh
  349. unix-socket.c
  350. unix-socket.h
  351. unpack-trees.c
  352. unpack-trees.h
  353. update_unicode.sh
  354. upload-pack.c
  355. url.c
  356. url.h
  357. urlmatch.c
  358. urlmatch.h
  359. usage.c
  360. userdiff.c
  361. userdiff.h
  362. utf8.c
  363. utf8.h
  364. varint.c
  365. varint.h
  366. version.c
  367. version.h
  368. versioncmp.c
  369. walker.c
  370. walker.h
  371. wildmatch.c
  372. wildmatch.h
  373. wrap-for-bin.sh
  374. wrapper.c
  375. write_or_die.c
  376. ws.c
  377. wt-status.c
  378. wt-status.h
  379. xdiff-interface.c
  380. xdiff-interface.h
  381. zlib.c