Do linear-time/space rename logic for exact renames

This implements a smarter rename detector for exact renames, which
rather than doing a pairwise comparison (time O(m*n)) will just hash the
files into a hash-table (size O(n+m)), and only do pairwise comparisons
to renames that have the same hash (time O(n+m) except for unrealistic
hash collissions, which we just cull aggressively).

Admittedly the exact rename case is not nearly as interesting as the
generic case, but it's an important case none-the-less. A similar general
approach should work for the generic case too, but even then you do need
to handle the exact renames/copies separately (to avoid the inevitable
added cost factor that comes from the _size_ of the file), so this is
worth doing.

In the expectation that we will indeed do the same hashing trick for the
general rename case, this code uses a generic hash-table implementation
that can be used for other things too.  In fact, we might be able to
consolidate some of our existing hash tables with the new generic code
in hash.[ch].

Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
4 files changed
tree: 3ee3d48a47f89d7c41c5a9e4430f0db6507f4d9e
  1. arm/
  2. compat/
  3. contrib/
  4. Documentation/
  5. git-gui/
  6. gitweb/
  7. mozilla-sha1/
  8. perl/
  9. ppc/
  10. t/
  11. templates/
  12. xdiff/
  13. .gitignore
  14. .mailmap
  15. alloc.c
  16. archive-tar.c
  17. archive-zip.c
  18. archive.h
  19. attr.c
  20. attr.h
  21. base85.c
  22. blob.c
  23. blob.h
  24. builtin-add.c
  25. builtin-annotate.c
  26. builtin-apply.c
  27. builtin-archive.c
  28. builtin-blame.c
  29. builtin-branch.c
  30. builtin-bundle.c
  31. builtin-cat-file.c
  32. builtin-check-attr.c
  33. builtin-check-ref-format.c
  34. builtin-checkout-index.c
  35. builtin-commit-tree.c
  36. builtin-config.c
  37. builtin-count-objects.c
  38. builtin-describe.c
  39. builtin-diff-files.c
  40. builtin-diff-index.c
  41. builtin-diff-tree.c
  42. builtin-diff.c
  43. builtin-fetch--tool.c
  44. builtin-fetch-pack.c
  45. builtin-fetch.c
  46. builtin-fmt-merge-msg.c
  47. builtin-for-each-ref.c
  48. builtin-fsck.c
  49. builtin-gc.c
  50. builtin-grep.c
  51. builtin-http-fetch.c
  52. builtin-init-db.c
  53. builtin-log.c
  54. builtin-ls-files.c
  55. builtin-ls-tree.c
  56. builtin-mailinfo.c
  57. builtin-mailsplit.c
  58. builtin-merge-base.c
  59. builtin-merge-file.c
  60. builtin-mv.c
  61. builtin-name-rev.c
  62. builtin-pack-objects.c
  63. builtin-pack-refs.c
  64. builtin-prune-packed.c
  65. builtin-prune.c
  66. builtin-push.c
  67. builtin-read-tree.c
  68. builtin-reflog.c
  69. builtin-rerere.c
  70. builtin-reset.c
  71. builtin-rev-list.c
  72. builtin-rev-parse.c
  73. builtin-revert.c
  74. builtin-rm.c
  75. builtin-runstatus.c
  76. builtin-shortlog.c
  77. builtin-show-branch.c
  78. builtin-show-ref.c
  79. builtin-stripspace.c
  80. builtin-symbolic-ref.c
  81. builtin-tag.c
  82. builtin-tar-tree.c
  83. builtin-unpack-objects.c
  84. builtin-update-index.c
  85. builtin-update-ref.c
  86. builtin-upload-archive.c
  87. builtin-verify-pack.c
  88. builtin-verify-tag.c
  89. builtin-write-tree.c
  90. builtin.h
  91. bundle.c
  92. bundle.h
  93. cache-tree.c
  94. cache-tree.h
  95. cache.h
  96. check-builtins.sh
  97. check-racy.c
  98. color.c
  99. color.h
  100. combine-diff.c
  101. commit.c
  102. commit.h
  103. config.c
  104. config.mak.in
  105. configure.ac
  106. connect.c
  107. convert.c
  108. copy.c
  109. COPYING
  110. csum-file.c
  111. csum-file.h
  112. ctype.c
  113. daemon.c
  114. date.c
  115. decorate.c
  116. decorate.h
  117. delta.h
  118. diff-delta.c
  119. diff-lib.c
  120. diff.c
  121. diff.h
  122. diffcore-break.c
  123. diffcore-delta.c
  124. diffcore-order.c
  125. diffcore-pickaxe.c
  126. diffcore-rename.c
  127. diffcore.h
  128. dir.c
  129. dir.h
  130. dump-cache-tree.c
  131. entry.c
  132. environment.c
  133. exec_cmd.c
  134. exec_cmd.h
  135. fast-import.c
  136. fetch-pack.h
  137. fixup-builtins
  138. generate-cmdlist.sh
  139. git-add--interactive.perl
  140. git-am.sh
  141. git-archimport.perl
  142. git-bisect.sh
  143. git-checkout.sh
  144. git-clean.sh
  145. git-clone.sh
  146. git-commit.sh
  147. git-compat-util.h
  148. git-cvsexportcommit.perl
  149. git-cvsimport.perl
  150. git-cvsserver.perl
  151. git-filter-branch.sh
  152. git-instaweb.sh
  153. git-lost-found.sh
  154. git-ls-remote.sh
  155. git-merge-octopus.sh
  156. git-merge-one-file.sh
  157. git-merge-ours.sh
  158. git-merge-resolve.sh
  159. git-merge-stupid.sh
  160. git-merge.sh
  161. git-mergetool.sh
  162. git-parse-remote.sh
  163. git-pull.sh
  164. git-quiltimport.sh
  165. git-rebase--interactive.sh
  166. git-rebase.sh
  167. git-relink.perl
  168. git-remote.perl
  169. git-repack.sh
  170. git-request-pull.sh
  171. git-send-email.perl
  172. git-sh-setup.sh
  173. git-stash.sh
  174. git-submodule.sh
  175. git-svn.perl
  176. git-svnimport.perl
  177. GIT-VERSION-GEN
  178. git.c
  179. git.spec.in
  180. gitk
  181. grep.c
  182. grep.h
  183. hash-object.c
  184. hash.c
  185. hash.h
  186. help.c
  187. http-push.c
  188. http-walker.c
  189. http.c
  190. http.h
  191. ident.c
  192. imap-send.c
  193. index-pack.c
  194. INSTALL
  195. interpolate.c
  196. interpolate.h
  197. list-objects.c
  198. list-objects.h
  199. lockfile.c
  200. log-tree.c
  201. log-tree.h
  202. mailmap.c
  203. mailmap.h
  204. Makefile
  205. match-trees.c
  206. merge-file.c
  207. merge-index.c
  208. merge-recursive.c
  209. merge-tree.c
  210. mktag.c
  211. mktree.c
  212. object-refs.c
  213. object.c
  214. object.h
  215. pack-check.c
  216. pack-redundant.c
  217. pack-write.c
  218. pack.h
  219. pager.c
  220. patch-delta.c
  221. patch-id.c
  222. patch-ids.c
  223. patch-ids.h
  224. path-list.c
  225. path-list.h
  226. path.c
  227. peek-remote.c
  228. pkt-line.c
  229. pkt-line.h
  230. progress.c
  231. progress.h
  232. quote.c
  233. quote.h
  234. reachable.c
  235. reachable.h
  236. read-cache.c
  237. README
  238. receive-pack.c
  239. reflog-walk.c
  240. reflog-walk.h
  241. refs.c
  242. refs.h
  243. remote.c
  244. remote.h
  245. revision.c
  246. revision.h
  247. run-command.c
  248. run-command.h
  249. send-pack.c
  250. server-info.c
  251. setup.c
  252. sha1_file.c
  253. sha1_name.c
  254. shallow.c
  255. shell.c
  256. show-index.c
  257. sideband.c
  258. sideband.h
  259. strbuf.c
  260. strbuf.h
  261. symlinks.c
  262. tag.c
  263. tag.h
  264. tar.h
  265. test-absolute-path.c
  266. test-chmtime.c
  267. test-date.c
  268. test-delta.c
  269. test-genrandom.c
  270. test-match-trees.c
  271. test-sha1.c
  272. test-sha1.sh
  273. trace.c
  274. transport.c
  275. transport.h
  276. tree-diff.c
  277. tree-walk.c
  278. tree-walk.h
  279. tree.c
  280. tree.h
  281. unpack-file.c
  282. unpack-trees.c
  283. unpack-trees.h
  284. update-server-info.c
  285. upload-pack.c
  286. usage.c
  287. utf8.c
  288. utf8.h
  289. var.c
  290. walker.c
  291. walker.h
  292. write_or_die.c
  293. wt-status.c
  294. wt-status.h
  295. xdiff-interface.c
  296. xdiff-interface.h