Tolerate zlib deflation with window size < 32Kb

Git currently reports loose objects as 'corrupt' if they've been
deflated using a window size less than 32Kb, because the
experimental_loose_object() function doesn't recognise the header
byte as a zlib header. This patch makes the function tolerant of
all valid window sizes (15-bit to 8-bit) - but doesn't sacrifice
it's accuracy in distingushing the standard loose-object format
from the experimental (now abandoned) format.

On memory constrained systems zlib may use a much smaller window
size - working on Agit, I found that Android uses a 4KB window;
giving a header byte of 0x48, not 0x78. Consequently all loose
objects generated appear 'corrupt', which is why Agit is a read-only
Git client at this time - I don't want my client to generate Git
repos that other clients treat as broken :(

This patch makes Git tolerant of different deflate settings - it
might appear that it changes experimental_loose_object() to the point
where it could incorrectly identify the experimental format as the
standard one, but the two criteria (bitmask & checksum) can only
give a false result for an experimental object where both of the
following are true:

1) object size is exactly 8 bytes when uncompressed (bitmask)
2) [single-byte in-pack git type&size header] * 256
   + [1st byte of the following zlib header] % 31 = 0 (checksum)

As it happens, for all possible combinations of valid object type
(1-4) and window bits (0-7), the only time when the checksum will be
divisible by 31 is for 0x1838 - ie object type *1*, a Commit - which,
due the fields all Commit objects must contain, could never be as
small as 8 bytes in size.

Given this, the combination of the two criteria (bitmask & checksum)
always correctly determines the buffer format, and is more tolerant
than the previous version.

The alternative to this patch is simply removing support for the
experimental format, which I am also totally cool with.

References:

Android uses a 4KB window for deflation:
http://android.git.kernel.org/?p=platform/libcore.git;a=blob;f=luni/src/main/native/java_util_zip_Deflater.cpp;h=c0b2feff196e63a7b85d97cf9ae5bb2583409c28;hb=refs/heads/gingerbread#l53

Code snippet searching for false positives with the zlib checksum:
https://gist.github.com/1118177

Signed-off-by: Roberto Tyley <roberto.tyley@guardian.co.uk>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
19 files changed
tree: 3d7b763261b6aeb1e01cbb2b4a8bdc144c9f029a
  1. block-sha1/
  2. builtin/
  3. compat/
  4. contrib/
  5. Documentation/
  6. git-gui/
  7. git_remote_helpers/
  8. gitk-git/
  9. gitweb/
  10. perl/
  11. po/
  12. ppc/
  13. t/
  14. templates/
  15. vcs-svn/
  16. xdiff/
  17. .gitattributes
  18. .gitignore
  19. .mailmap
  20. abspath.c
  21. aclocal.m4
  22. advice.c
  23. advice.h
  24. alias.c
  25. alloc.c
  26. archive-tar.c
  27. archive-zip.c
  28. archive.c
  29. archive.h
  30. attr.c
  31. attr.h
  32. base85.c
  33. bisect.c
  34. bisect.h
  35. blob.c
  36. blob.h
  37. branch.c
  38. branch.h
  39. builtin.h
  40. bundle.c
  41. bundle.h
  42. cache-tree.c
  43. cache-tree.h
  44. cache.h
  45. check-builtins.sh
  46. check-racy.c
  47. check_bindir
  48. color.c
  49. color.h
  50. combine-diff.c
  51. command-list.txt
  52. commit.c
  53. commit.h
  54. config.c
  55. config.mak.in
  56. configure.ac
  57. connect.c
  58. convert.c
  59. copy.c
  60. COPYING
  61. csum-file.c
  62. csum-file.h
  63. ctype.c
  64. daemon.c
  65. date.c
  66. decorate.c
  67. decorate.h
  68. delta.h
  69. diff-delta.c
  70. diff-lib.c
  71. diff-no-index.c
  72. diff.c
  73. diff.h
  74. diffcore-break.c
  75. diffcore-delta.c
  76. diffcore-order.c
  77. diffcore-pickaxe.c
  78. diffcore-rename.c
  79. diffcore.h
  80. dir.c
  81. dir.h
  82. editor.c
  83. entry.c
  84. environment.c
  85. exec_cmd.c
  86. exec_cmd.h
  87. fast-import.c
  88. fetch-pack.h
  89. fixup-builtins
  90. fsck.c
  91. fsck.h
  92. generate-cmdlist.sh
  93. gettext.c
  94. gettext.h
  95. git-add--interactive.perl
  96. git-am.sh
  97. git-archimport.perl
  98. git-bisect.sh
  99. git-compat-util.h
  100. git-cvsexportcommit.perl
  101. git-cvsimport.perl
  102. git-cvsserver.perl
  103. git-difftool--helper.sh
  104. git-difftool.perl
  105. git-filter-branch.sh
  106. git-instaweb.sh
  107. git-lost-found.sh
  108. git-merge-octopus.sh
  109. git-merge-one-file.sh
  110. git-merge-resolve.sh
  111. git-mergetool--lib.sh
  112. git-mergetool.sh
  113. git-parse-remote.sh
  114. git-pull.sh
  115. git-quiltimport.sh
  116. git-rebase--am.sh
  117. git-rebase--interactive.sh
  118. git-rebase--merge.sh
  119. git-rebase.sh
  120. git-relink.perl
  121. git-remote-testgit.py
  122. git-repack.sh
  123. git-request-pull.sh
  124. git-send-email.perl
  125. git-sh-i18n.sh
  126. git-sh-setup.sh
  127. git-stash.sh
  128. git-submodule.sh
  129. git-svn.perl
  130. GIT-VERSION-GEN
  131. git-web--browse.sh
  132. git.c
  133. git.spec.in
  134. graph.c
  135. graph.h
  136. grep.c
  137. grep.h
  138. hash.c
  139. hash.h
  140. help.c
  141. help.h
  142. hex.c
  143. http-backend.c
  144. http-fetch.c
  145. http-push.c
  146. http-walker.c
  147. http.c
  148. http.h
  149. ident.c
  150. imap-send.c
  151. INSTALL
  152. levenshtein.c
  153. levenshtein.h
  154. LGPL-2.1
  155. list-objects.c
  156. list-objects.h
  157. ll-merge.c
  158. ll-merge.h
  159. lockfile.c
  160. log-tree.c
  161. log-tree.h
  162. mailmap.c
  163. mailmap.h
  164. Makefile
  165. match-trees.c
  166. merge-file.c
  167. merge-file.h
  168. merge-recursive.c
  169. merge-recursive.h
  170. name-hash.c
  171. notes-cache.c
  172. notes-cache.h
  173. notes-merge.c
  174. notes-merge.h
  175. notes.c
  176. notes.h
  177. object.c
  178. object.h
  179. pack-check.c
  180. pack-refs.c
  181. pack-refs.h
  182. pack-revindex.c
  183. pack-revindex.h
  184. pack-write.c
  185. pack.h
  186. pager.c
  187. parse-options.c
  188. parse-options.h
  189. patch-delta.c
  190. patch-ids.c
  191. patch-ids.h
  192. path.c
  193. pkt-line.c
  194. pkt-line.h
  195. preload-index.c
  196. pretty.c
  197. progress.c
  198. progress.h
  199. quote.c
  200. quote.h
  201. reachable.c
  202. reachable.h
  203. read-cache.c
  204. README
  205. reflog-walk.c
  206. reflog-walk.h
  207. refs.c
  208. refs.h
  209. remote-curl.c
  210. remote.c
  211. remote.h
  212. replace_object.c
  213. rerere.c
  214. rerere.h
  215. resolve-undo.c
  216. resolve-undo.h
  217. revision.c
  218. revision.h
  219. run-command.c
  220. run-command.h
  221. send-pack.h
  222. server-info.c
  223. setup.c
  224. sh-i18n--envsubst.c
  225. sha1-array.c
  226. sha1-array.h
  227. sha1-lookup.c
  228. sha1-lookup.h
  229. sha1_file.c
  230. sha1_name.c
  231. shallow.c
  232. shell.c
  233. shortlog.h
  234. show-index.c
  235. sideband.c
  236. sideband.h
  237. sigchain.c
  238. sigchain.h
  239. strbuf.c
  240. strbuf.h
  241. string-list.c
  242. string-list.h
  243. submodule.c
  244. submodule.h
  245. symlinks.c
  246. tag.c
  247. tag.h
  248. tar.h
  249. test-chmtime.c
  250. test-ctype.c
  251. test-date.c
  252. test-delta.c
  253. test-dump-cache-tree.c
  254. test-genrandom.c
  255. test-index-version.c
  256. test-line-buffer.c
  257. test-match-trees.c
  258. test-mktemp.c
  259. test-obj-pool.c
  260. test-parse-options.c
  261. test-path-utils.c
  262. test-run-command.c
  263. test-sha1.c
  264. test-sha1.sh
  265. test-sigchain.c
  266. test-string-pool.c
  267. test-subprocess.c
  268. test-svn-fe.c
  269. test-treap.c
  270. thread-utils.c
  271. thread-utils.h
  272. trace.c
  273. transport-helper.c
  274. transport.c
  275. transport.h
  276. tree-diff.c
  277. tree-walk.c
  278. tree-walk.h
  279. tree.c
  280. tree.h
  281. unimplemented.sh
  282. unpack-trees.c
  283. unpack-trees.h
  284. upload-pack.c
  285. url.c
  286. url.h
  287. usage.c
  288. userdiff.c
  289. userdiff.h
  290. utf8.c
  291. utf8.h
  292. walker.c
  293. walker.h
  294. wrap-for-bin.sh
  295. wrapper.c
  296. write_or_die.c
  297. ws.c
  298. wt-status.c
  299. wt-status.h
  300. xdiff-interface.c
  301. xdiff-interface.h
  302. zlib.c