blob: 93385e12baa0d90ae475bd02500edf5ddcce320c [file] [log] [blame]
Nicolas Pitrea310d432005-05-19 10:27:14 -04001/*
2 * diff-delta.c: generate a delta between two buffers
3 *
Nicolas Pitre366b53c2007-05-25 22:16:27 -04004 * This code was greatly inspired by parts of LibXDiff from Davide Libenzi
5 * http://www.xmailserver.org/xdiff-lib.html
Nicolas Pitrea310d432005-05-19 10:27:14 -04006 *
Nicolas Pitre03aa8ff2009-09-14 02:41:16 -04007 * Rewritten for GIT by Nicolas Pitre <nico@fluxnic.net>, (C) 2005-2007
Nicolas Pitrea310d432005-05-19 10:27:14 -04008 *
Nicolas Pitre366b53c2007-05-25 22:16:27 -04009 * This code is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License version 2 as
11 * published by the Free Software Foundation.
Nicolas Pitrea310d432005-05-19 10:27:14 -040012 */
13
Florian Forster63f17562006-06-18 17:18:04 +020014#include "git-compat-util.h"
Junio C Hamano85023572006-12-19 14:34:12 -080015#include "delta.h"
Nicolas Pitrea310d432005-05-19 10:27:14 -040016
Nicolas Pitre08abe662006-04-24 23:07:47 -040017/* maximum hash entry list for the same hash bucket */
18#define HASH_LIMIT 64
Nicolas Pitrea310d432005-05-19 10:27:14 -040019
Nicolas Pitre3dc5a9e2006-04-29 00:58:05 -040020#define RABIN_SHIFT 23
21#define RABIN_WINDOW 16
22
23static const unsigned int T[256] = {
24 0x00000000, 0xab59b4d1, 0x56b369a2, 0xfdeadd73, 0x063f6795, 0xad66d344,
25 0x508c0e37, 0xfbd5bae6, 0x0c7ecf2a, 0xa7277bfb, 0x5acda688, 0xf1941259,
26 0x0a41a8bf, 0xa1181c6e, 0x5cf2c11d, 0xf7ab75cc, 0x18fd9e54, 0xb3a42a85,
27 0x4e4ef7f6, 0xe5174327, 0x1ec2f9c1, 0xb59b4d10, 0x48719063, 0xe32824b2,
28 0x1483517e, 0xbfdae5af, 0x423038dc, 0xe9698c0d, 0x12bc36eb, 0xb9e5823a,
29 0x440f5f49, 0xef56eb98, 0x31fb3ca8, 0x9aa28879, 0x6748550a, 0xcc11e1db,
30 0x37c45b3d, 0x9c9defec, 0x6177329f, 0xca2e864e, 0x3d85f382, 0x96dc4753,
31 0x6b369a20, 0xc06f2ef1, 0x3bba9417, 0x90e320c6, 0x6d09fdb5, 0xc6504964,
32 0x2906a2fc, 0x825f162d, 0x7fb5cb5e, 0xd4ec7f8f, 0x2f39c569, 0x846071b8,
33 0x798aaccb, 0xd2d3181a, 0x25786dd6, 0x8e21d907, 0x73cb0474, 0xd892b0a5,
34 0x23470a43, 0x881ebe92, 0x75f463e1, 0xdeadd730, 0x63f67950, 0xc8afcd81,
35 0x354510f2, 0x9e1ca423, 0x65c91ec5, 0xce90aa14, 0x337a7767, 0x9823c3b6,
36 0x6f88b67a, 0xc4d102ab, 0x393bdfd8, 0x92626b09, 0x69b7d1ef, 0xc2ee653e,
37 0x3f04b84d, 0x945d0c9c, 0x7b0be704, 0xd05253d5, 0x2db88ea6, 0x86e13a77,
38 0x7d348091, 0xd66d3440, 0x2b87e933, 0x80de5de2, 0x7775282e, 0xdc2c9cff,
39 0x21c6418c, 0x8a9ff55d, 0x714a4fbb, 0xda13fb6a, 0x27f92619, 0x8ca092c8,
40 0x520d45f8, 0xf954f129, 0x04be2c5a, 0xafe7988b, 0x5432226d, 0xff6b96bc,
41 0x02814bcf, 0xa9d8ff1e, 0x5e738ad2, 0xf52a3e03, 0x08c0e370, 0xa39957a1,
42 0x584ced47, 0xf3155996, 0x0eff84e5, 0xa5a63034, 0x4af0dbac, 0xe1a96f7d,
43 0x1c43b20e, 0xb71a06df, 0x4ccfbc39, 0xe79608e8, 0x1a7cd59b, 0xb125614a,
44 0x468e1486, 0xedd7a057, 0x103d7d24, 0xbb64c9f5, 0x40b17313, 0xebe8c7c2,
45 0x16021ab1, 0xbd5bae60, 0x6cb54671, 0xc7ecf2a0, 0x3a062fd3, 0x915f9b02,
46 0x6a8a21e4, 0xc1d39535, 0x3c394846, 0x9760fc97, 0x60cb895b, 0xcb923d8a,
47 0x3678e0f9, 0x9d215428, 0x66f4eece, 0xcdad5a1f, 0x3047876c, 0x9b1e33bd,
48 0x7448d825, 0xdf116cf4, 0x22fbb187, 0x89a20556, 0x7277bfb0, 0xd92e0b61,
49 0x24c4d612, 0x8f9d62c3, 0x7836170f, 0xd36fa3de, 0x2e857ead, 0x85dcca7c,
50 0x7e09709a, 0xd550c44b, 0x28ba1938, 0x83e3ade9, 0x5d4e7ad9, 0xf617ce08,
51 0x0bfd137b, 0xa0a4a7aa, 0x5b711d4c, 0xf028a99d, 0x0dc274ee, 0xa69bc03f,
52 0x5130b5f3, 0xfa690122, 0x0783dc51, 0xacda6880, 0x570fd266, 0xfc5666b7,
53 0x01bcbbc4, 0xaae50f15, 0x45b3e48d, 0xeeea505c, 0x13008d2f, 0xb85939fe,
54 0x438c8318, 0xe8d537c9, 0x153feaba, 0xbe665e6b, 0x49cd2ba7, 0xe2949f76,
55 0x1f7e4205, 0xb427f6d4, 0x4ff24c32, 0xe4abf8e3, 0x19412590, 0xb2189141,
56 0x0f433f21, 0xa41a8bf0, 0x59f05683, 0xf2a9e252, 0x097c58b4, 0xa225ec65,
57 0x5fcf3116, 0xf49685c7, 0x033df00b, 0xa86444da, 0x558e99a9, 0xfed72d78,
58 0x0502979e, 0xae5b234f, 0x53b1fe3c, 0xf8e84aed, 0x17bea175, 0xbce715a4,
59 0x410dc8d7, 0xea547c06, 0x1181c6e0, 0xbad87231, 0x4732af42, 0xec6b1b93,
60 0x1bc06e5f, 0xb099da8e, 0x4d7307fd, 0xe62ab32c, 0x1dff09ca, 0xb6a6bd1b,
61 0x4b4c6068, 0xe015d4b9, 0x3eb80389, 0x95e1b758, 0x680b6a2b, 0xc352defa,
62 0x3887641c, 0x93ded0cd, 0x6e340dbe, 0xc56db96f, 0x32c6cca3, 0x999f7872,
63 0x6475a501, 0xcf2c11d0, 0x34f9ab36, 0x9fa01fe7, 0x624ac294, 0xc9137645,
64 0x26459ddd, 0x8d1c290c, 0x70f6f47f, 0xdbaf40ae, 0x207afa48, 0x8b234e99,
65 0x76c993ea, 0xdd90273b, 0x2a3b52f7, 0x8162e626, 0x7c883b55, 0xd7d18f84,
66 0x2c043562, 0x875d81b3, 0x7ab75cc0, 0xd1eee811
67};
68
69static const unsigned int U[256] = {
70 0x00000000, 0x7eb5200d, 0x5633f4cb, 0x2886d4c6, 0x073e5d47, 0x798b7d4a,
71 0x510da98c, 0x2fb88981, 0x0e7cba8e, 0x70c99a83, 0x584f4e45, 0x26fa6e48,
72 0x0942e7c9, 0x77f7c7c4, 0x5f711302, 0x21c4330f, 0x1cf9751c, 0x624c5511,
73 0x4aca81d7, 0x347fa1da, 0x1bc7285b, 0x65720856, 0x4df4dc90, 0x3341fc9d,
74 0x1285cf92, 0x6c30ef9f, 0x44b63b59, 0x3a031b54, 0x15bb92d5, 0x6b0eb2d8,
75 0x4388661e, 0x3d3d4613, 0x39f2ea38, 0x4747ca35, 0x6fc11ef3, 0x11743efe,
76 0x3eccb77f, 0x40799772, 0x68ff43b4, 0x164a63b9, 0x378e50b6, 0x493b70bb,
77 0x61bda47d, 0x1f088470, 0x30b00df1, 0x4e052dfc, 0x6683f93a, 0x1836d937,
78 0x250b9f24, 0x5bbebf29, 0x73386bef, 0x0d8d4be2, 0x2235c263, 0x5c80e26e,
79 0x740636a8, 0x0ab316a5, 0x2b7725aa, 0x55c205a7, 0x7d44d161, 0x03f1f16c,
80 0x2c4978ed, 0x52fc58e0, 0x7a7a8c26, 0x04cfac2b, 0x73e5d470, 0x0d50f47d,
81 0x25d620bb, 0x5b6300b6, 0x74db8937, 0x0a6ea93a, 0x22e87dfc, 0x5c5d5df1,
82 0x7d996efe, 0x032c4ef3, 0x2baa9a35, 0x551fba38, 0x7aa733b9, 0x041213b4,
83 0x2c94c772, 0x5221e77f, 0x6f1ca16c, 0x11a98161, 0x392f55a7, 0x479a75aa,
84 0x6822fc2b, 0x1697dc26, 0x3e1108e0, 0x40a428ed, 0x61601be2, 0x1fd53bef,
85 0x3753ef29, 0x49e6cf24, 0x665e46a5, 0x18eb66a8, 0x306db26e, 0x4ed89263,
86 0x4a173e48, 0x34a21e45, 0x1c24ca83, 0x6291ea8e, 0x4d29630f, 0x339c4302,
87 0x1b1a97c4, 0x65afb7c9, 0x446b84c6, 0x3adea4cb, 0x1258700d, 0x6ced5000,
88 0x4355d981, 0x3de0f98c, 0x15662d4a, 0x6bd30d47, 0x56ee4b54, 0x285b6b59,
89 0x00ddbf9f, 0x7e689f92, 0x51d01613, 0x2f65361e, 0x07e3e2d8, 0x7956c2d5,
90 0x5892f1da, 0x2627d1d7, 0x0ea10511, 0x7014251c, 0x5facac9d, 0x21198c90,
91 0x099f5856, 0x772a785b, 0x4c921c31, 0x32273c3c, 0x1aa1e8fa, 0x6414c8f7,
92 0x4bac4176, 0x3519617b, 0x1d9fb5bd, 0x632a95b0, 0x42eea6bf, 0x3c5b86b2,
93 0x14dd5274, 0x6a687279, 0x45d0fbf8, 0x3b65dbf5, 0x13e30f33, 0x6d562f3e,
94 0x506b692d, 0x2ede4920, 0x06589de6, 0x78edbdeb, 0x5755346a, 0x29e01467,
95 0x0166c0a1, 0x7fd3e0ac, 0x5e17d3a3, 0x20a2f3ae, 0x08242768, 0x76910765,
96 0x59298ee4, 0x279caee9, 0x0f1a7a2f, 0x71af5a22, 0x7560f609, 0x0bd5d604,
97 0x235302c2, 0x5de622cf, 0x725eab4e, 0x0ceb8b43, 0x246d5f85, 0x5ad87f88,
98 0x7b1c4c87, 0x05a96c8a, 0x2d2fb84c, 0x539a9841, 0x7c2211c0, 0x029731cd,
99 0x2a11e50b, 0x54a4c506, 0x69998315, 0x172ca318, 0x3faa77de, 0x411f57d3,
100 0x6ea7de52, 0x1012fe5f, 0x38942a99, 0x46210a94, 0x67e5399b, 0x19501996,
101 0x31d6cd50, 0x4f63ed5d, 0x60db64dc, 0x1e6e44d1, 0x36e89017, 0x485db01a,
102 0x3f77c841, 0x41c2e84c, 0x69443c8a, 0x17f11c87, 0x38499506, 0x46fcb50b,
103 0x6e7a61cd, 0x10cf41c0, 0x310b72cf, 0x4fbe52c2, 0x67388604, 0x198da609,
104 0x36352f88, 0x48800f85, 0x6006db43, 0x1eb3fb4e, 0x238ebd5d, 0x5d3b9d50,
105 0x75bd4996, 0x0b08699b, 0x24b0e01a, 0x5a05c017, 0x728314d1, 0x0c3634dc,
106 0x2df207d3, 0x534727de, 0x7bc1f318, 0x0574d315, 0x2acc5a94, 0x54797a99,
107 0x7cffae5f, 0x024a8e52, 0x06852279, 0x78300274, 0x50b6d6b2, 0x2e03f6bf,
108 0x01bb7f3e, 0x7f0e5f33, 0x57888bf5, 0x293dabf8, 0x08f998f7, 0x764cb8fa,
109 0x5eca6c3c, 0x207f4c31, 0x0fc7c5b0, 0x7172e5bd, 0x59f4317b, 0x27411176,
110 0x1a7c5765, 0x64c97768, 0x4c4fa3ae, 0x32fa83a3, 0x1d420a22, 0x63f72a2f,
111 0x4b71fee9, 0x35c4dee4, 0x1400edeb, 0x6ab5cde6, 0x42331920, 0x3c86392d,
112 0x133eb0ac, 0x6d8b90a1, 0x450d4467, 0x3bb8646a
113};
Nicolas Pitrea310d432005-05-19 10:27:14 -0400114
Nicolas Pitre08abe662006-04-24 23:07:47 -0400115struct index_entry {
Nicolas Pitrea310d432005-05-19 10:27:14 -0400116 const unsigned char *ptr;
Nicolas Pitre8e1454b2006-02-21 20:43:17 -0500117 unsigned int val;
David Kastrupd2100862007-09-08 23:17:44 +0200118};
119
120struct unpacked_index_entry {
121 struct index_entry entry;
122 struct unpacked_index_entry *next;
Nicolas Pitre8e1454b2006-02-21 20:43:17 -0500123};
Nicolas Pitrea310d432005-05-19 10:27:14 -0400124
Nicolas Pitre08abe662006-04-24 23:07:47 -0400125struct delta_index {
Brian Downing11779e72007-07-12 07:55:48 -0500126 unsigned long memsize;
Nicolas Pitre08abe662006-04-24 23:07:47 -0400127 const void *src_buf;
128 unsigned long src_size;
Nicolas Pitre3dc5a9e2006-04-29 00:58:05 -0400129 unsigned int hash_mask;
Florian Forster63f17562006-06-18 17:18:04 +0200130 struct index_entry *hash[FLEX_ARRAY];
Nicolas Pitre08abe662006-04-24 23:07:47 -0400131};
132
133struct delta_index * create_delta_index(const void *buf, unsigned long bufsize)
Nicolas Pitrea310d432005-05-19 10:27:14 -0400134{
Nicolas Pitre06a9f922006-05-02 23:31:00 -0400135 unsigned int i, hsize, hmask, entries, prev_val, *hash_count;
Nicolas Pitre08abe662006-04-24 23:07:47 -0400136 const unsigned char *data, *buffer = buf;
137 struct delta_index *index;
David Kastrupd2100862007-09-08 23:17:44 +0200138 struct unpacked_index_entry *entry, **hash;
139 struct index_entry *packed_entry, **packed_hash;
Nicolas Pitre8e1454b2006-02-21 20:43:17 -0500140 void *mem;
Nicolas Pitre06a9f922006-05-02 23:31:00 -0400141 unsigned long memsize;
Nicolas Pitrea310d432005-05-19 10:27:14 -0400142
Nicolas Pitre08abe662006-04-24 23:07:47 -0400143 if (!buf || !bufsize)
144 return NULL;
145
Nicolas Pitre3dc5a9e2006-04-29 00:58:05 -0400146 /* Determine index hash size. Note that indexing skips the
Pavel Roskin82e5a822006-07-10 01:50:18 -0400147 first byte to allow for optimizing the Rabin's polynomial
Nicolas Pitre3dc5a9e2006-04-29 00:58:05 -0400148 initialization in create_delta(). */
Nicolas Pitre506049c2010-08-21 01:00:13 -0400149 entries = (bufsize - 1) / RABIN_WINDOW;
150 if (bufsize >= 0xffffffffUL) {
151 /*
152 * Current delta format can't encode offsets into
153 * reference buffer with more than 32 bits.
154 */
155 entries = 0xfffffffeU / RABIN_WINDOW;
156 }
Nicolas Pitre8e1454b2006-02-21 20:43:17 -0500157 hsize = entries / 4;
Pierre Habouzitb05faa22006-08-23 11:17:55 +0200158 for (i = 4; (1u << i) < hsize && i < 31; i++);
Nicolas Pitre8e1454b2006-02-21 20:43:17 -0500159 hsize = 1 << i;
Nicolas Pitre3dc5a9e2006-04-29 00:58:05 -0400160 hmask = hsize - 1;
Nicolas Pitrea310d432005-05-19 10:27:14 -0400161
Nicolas Pitre8e1454b2006-02-21 20:43:17 -0500162 /* allocate lookup index */
David Kastrupd2100862007-09-08 23:17:44 +0200163 memsize = sizeof(*hash) * hsize +
Nicolas Pitre06a9f922006-05-02 23:31:00 -0400164 sizeof(*entry) * entries;
165 mem = malloc(memsize);
Nicolas Pitre8e1454b2006-02-21 20:43:17 -0500166 if (!mem)
167 return NULL;
168 hash = mem;
Nicolas Pitre08abe662006-04-24 23:07:47 -0400169 mem = hash + hsize;
170 entry = mem;
171
Nicolas Pitre8e1454b2006-02-21 20:43:17 -0500172 memset(hash, 0, hsize * sizeof(*hash));
173
Nicolas Pitrec13c6bf2006-03-08 14:32:50 -0500174 /* allocate an array to count hash entries */
175 hash_count = calloc(hsize, sizeof(*hash_count));
176 if (!hash_count) {
David Kastrupd2100862007-09-08 23:17:44 +0200177 free(hash);
Nicolas Pitrec13c6bf2006-03-08 14:32:50 -0500178 return NULL;
179 }
180
181 /* then populate the index */
Nicolas Pitre06a9f922006-05-02 23:31:00 -0400182 prev_val = ~0;
183 for (data = buffer + entries * RABIN_WINDOW - RABIN_WINDOW;
184 data >= buffer;
185 data -= RABIN_WINDOW) {
Nicolas Pitre3dc5a9e2006-04-29 00:58:05 -0400186 unsigned int val = 0;
187 for (i = 1; i <= RABIN_WINDOW; i++)
188 val = ((val << 8) | data[i]) ^ T[val >> RABIN_SHIFT];
Nicolas Pitre06a9f922006-05-02 23:31:00 -0400189 if (val == prev_val) {
190 /* keep the lowest of consecutive identical blocks */
David Kastrupd2100862007-09-08 23:17:44 +0200191 entry[-1].entry.ptr = data + RABIN_WINDOW;
192 --entries;
Nicolas Pitre06a9f922006-05-02 23:31:00 -0400193 } else {
194 prev_val = val;
195 i = val & hmask;
David Kastrupd2100862007-09-08 23:17:44 +0200196 entry->entry.ptr = data + RABIN_WINDOW;
197 entry->entry.val = val;
Nicolas Pitre06a9f922006-05-02 23:31:00 -0400198 entry->next = hash[i];
199 hash[i] = entry++;
200 hash_count[i]++;
Nicolas Pitre06a9f922006-05-02 23:31:00 -0400201 }
Nicolas Pitre3dc5a9e2006-04-29 00:58:05 -0400202 }
Nicolas Pitrea310d432005-05-19 10:27:14 -0400203
Nicolas Pitrec13c6bf2006-03-08 14:32:50 -0500204 /*
205 * Determine a limit on the number of entries in the same hash
Pavel Roskin82e5a822006-07-10 01:50:18 -0400206 * bucket. This guards us against pathological data sets causing
Nicolas Pitrec13c6bf2006-03-08 14:32:50 -0500207 * really bad hash distribution with most entries in the same hash
208 * bucket that would bring us to O(m*n) computing costs (m and n
209 * corresponding to reference and target buffer sizes).
210 *
Nicolas Pitre08abe662006-04-24 23:07:47 -0400211 * Make sure none of the hash buckets has more entries than
Nicolas Pitrec13c6bf2006-03-08 14:32:50 -0500212 * we're willing to test. Otherwise we cull the entry list
213 * uniformly to still preserve a good repartition across
214 * the reference buffer.
215 */
216 for (i = 0; i < hsize; i++) {
David Kastrup02e665c2007-09-08 23:25:55 +0200217 int acc;
218
219 if (hash_count[i] <= HASH_LIMIT)
Nicolas Pitrec13c6bf2006-03-08 14:32:50 -0500220 continue;
David Kastrup02e665c2007-09-08 23:25:55 +0200221
David Kastrup02e665c2007-09-08 23:25:55 +0200222 /* We leave exactly HASH_LIMIT entries in the bucket */
Nicolas Pitrece85b052007-12-18 10:15:39 -0500223 entries -= hash_count[i] - HASH_LIMIT;
David Kastrup02e665c2007-09-08 23:25:55 +0200224
Nicolas Pitrec13c6bf2006-03-08 14:32:50 -0500225 entry = hash[i];
David Kastrup02e665c2007-09-08 23:25:55 +0200226 acc = 0;
Nicolas Pitrece85b052007-12-18 10:15:39 -0500227
228 /*
229 * Assume that this loop is gone through exactly
230 * HASH_LIMIT times and is entered and left with
231 * acc==0. So the first statement in the loop
232 * contributes (hash_count[i]-HASH_LIMIT)*HASH_LIMIT
233 * to the accumulator, and the inner loop consequently
234 * is run (hash_count[i]-HASH_LIMIT) times, removing
235 * one element from the list each time. Since acc
236 * balances out to 0 at the final run, the inner loop
237 * body can't be left with entry==NULL. So we indeed
238 * encounter entry==NULL in the outer loop only.
239 */
Nicolas Pitrec13c6bf2006-03-08 14:32:50 -0500240 do {
David Kastrup02e665c2007-09-08 23:25:55 +0200241 acc += hash_count[i] - HASH_LIMIT;
242 if (acc > 0) {
243 struct unpacked_index_entry *keep = entry;
244 do {
245 entry = entry->next;
246 acc -= HASH_LIMIT;
247 } while (acc > 0);
248 keep->next = entry->next;
249 }
250 entry = entry->next;
251 } while (entry);
Nicolas Pitrec13c6bf2006-03-08 14:32:50 -0500252 }
253 free(hash_count);
254
Nicolas Pitrece85b052007-12-18 10:15:39 -0500255 /*
256 * Now create the packed index in array form
257 * rather than linked lists.
258 */
David Kastrupd2100862007-09-08 23:17:44 +0200259 memsize = sizeof(*index)
260 + sizeof(*packed_hash) * (hsize+1)
261 + sizeof(*packed_entry) * entries;
David Kastrupd2100862007-09-08 23:17:44 +0200262 mem = malloc(memsize);
David Kastrupd2100862007-09-08 23:17:44 +0200263 if (!mem) {
264 free(hash);
265 return NULL;
266 }
267
268 index = mem;
269 index->memsize = memsize;
270 index->src_buf = buf;
271 index->src_size = bufsize;
272 index->hash_mask = hmask;
273
Pierre Habouzitf9c5a802007-12-18 02:39:57 +0100274 mem = index->hash;
David Kastrupd2100862007-09-08 23:17:44 +0200275 packed_hash = mem;
276 mem = packed_hash + (hsize+1);
277 packed_entry = mem;
278
David Kastrupd2100862007-09-08 23:17:44 +0200279 for (i = 0; i < hsize; i++) {
Nicolas Pitrece85b052007-12-18 10:15:39 -0500280 /*
281 * Coalesce all entries belonging to one linked list
282 * into consecutive array entries.
283 */
David Kastrupd2100862007-09-08 23:17:44 +0200284 packed_hash[i] = packed_entry;
285 for (entry = hash[i]; entry; entry = entry->next)
286 *packed_entry++ = entry->entry;
287 }
288
Nicolas Pitrece85b052007-12-18 10:15:39 -0500289 /* Sentinel value to indicate the length of the last hash bucket */
David Kastrupd2100862007-09-08 23:17:44 +0200290 packed_hash[hsize] = packed_entry;
Nicolas Pitrece85b052007-12-18 10:15:39 -0500291
David Kastrupd2100862007-09-08 23:17:44 +0200292 assert(packed_entry - (struct index_entry *)mem == entries);
293 free(hash);
294
Nicolas Pitre08abe662006-04-24 23:07:47 -0400295 return index;
296}
297
298void free_delta_index(struct delta_index *index)
299{
300 free(index);
Nicolas Pitrea310d432005-05-19 10:27:14 -0400301}
302
Brian Downing11779e72007-07-12 07:55:48 -0500303unsigned long sizeof_delta_index(struct delta_index *index)
304{
305 if (index)
306 return index->memsize;
307 else
308 return 0;
309}
310
Nicolas Pitre3dc5a9e2006-04-29 00:58:05 -0400311/*
312 * The maximum size for any opcode sequence, including the initial header
Pavel Roskin82e5a822006-07-10 01:50:18 -0400313 * plus Rabin window plus biggest copy.
Nicolas Pitre3dc5a9e2006-04-29 00:58:05 -0400314 */
315#define MAX_OP_SIZE (5 + 5 + 1 + RABIN_WINDOW + 7)
Nicolas Pitrefe474b52006-02-21 20:41:41 -0500316
Nicolas Pitre08abe662006-04-24 23:07:47 -0400317void *
318create_delta(const struct delta_index *index,
319 const void *trg_buf, unsigned long trg_size,
320 unsigned long *delta_size, unsigned long max_size)
Nicolas Pitrea310d432005-05-19 10:27:14 -0400321{
Nicolas Pitre84336692007-05-25 21:38:58 -0400322 unsigned int i, outpos, outsize, moff, msize, val;
Nicolas Pitre5a1fb2c2006-03-17 22:45:07 -0500323 int inscnt;
Nicolas Pitre8e1454b2006-02-21 20:43:17 -0500324 const unsigned char *ref_data, *ref_top, *data, *top;
325 unsigned char *out;
Nicolas Pitrea310d432005-05-19 10:27:14 -0400326
Nicolas Pitre08abe662006-04-24 23:07:47 -0400327 if (!trg_buf || !trg_size)
Nicolas Pitre8e1454b2006-02-21 20:43:17 -0500328 return NULL;
329
Nicolas Pitrea310d432005-05-19 10:27:14 -0400330 outpos = 0;
331 outsize = 8192;
Nicolas Pitrefe474b52006-02-21 20:41:41 -0500332 if (max_size && outsize >= max_size)
333 outsize = max_size + MAX_OP_SIZE + 1;
Nicolas Pitrea310d432005-05-19 10:27:14 -0400334 out = malloc(outsize);
Nicolas Pitre08abe662006-04-24 23:07:47 -0400335 if (!out)
Nicolas Pitrea310d432005-05-19 10:27:14 -0400336 return NULL;
Nicolas Pitrea310d432005-05-19 10:27:14 -0400337
338 /* store reference buffer size */
Nicolas Pitre08abe662006-04-24 23:07:47 -0400339 i = index->src_size;
340 while (i >= 0x80) {
341 out[outpos++] = i | 0x80;
342 i >>= 7;
Nicolas Pitre69a2d422005-06-29 00:27:45 -0400343 }
Nicolas Pitre08abe662006-04-24 23:07:47 -0400344 out[outpos++] = i;
Nicolas Pitrea310d432005-05-19 10:27:14 -0400345
346 /* store target buffer size */
Nicolas Pitre08abe662006-04-24 23:07:47 -0400347 i = trg_size;
348 while (i >= 0x80) {
349 out[outpos++] = i | 0x80;
350 i >>= 7;
Nicolas Pitre69a2d422005-06-29 00:27:45 -0400351 }
Nicolas Pitre08abe662006-04-24 23:07:47 -0400352 out[outpos++] = i;
Nicolas Pitrea310d432005-05-19 10:27:14 -0400353
Nicolas Pitre08abe662006-04-24 23:07:47 -0400354 ref_data = index->src_buf;
355 ref_top = ref_data + index->src_size;
356 data = trg_buf;
Florian Forster1d7f1712006-06-18 17:18:09 +0200357 top = (const unsigned char *) trg_buf + trg_size;
Nicolas Pitre3dc5a9e2006-04-29 00:58:05 -0400358
359 outpos++;
360 val = 0;
361 for (i = 0; i < RABIN_WINDOW && data < top; i++, data++) {
362 out[outpos++] = *data;
363 val = ((val << 8) | *data) ^ T[val >> RABIN_SHIFT];
364 }
365 inscnt = i;
Nicolas Pitrea310d432005-05-19 10:27:14 -0400366
Nicolas Pitre84336692007-05-25 21:38:58 -0400367 moff = 0;
368 msize = 0;
Nicolas Pitre8e1454b2006-02-21 20:43:17 -0500369 while (data < top) {
Nicolas Pitre84336692007-05-25 21:38:58 -0400370 if (msize < 4096) {
371 struct index_entry *entry;
372 val ^= U[data[-RABIN_WINDOW]];
373 val = ((val << 8) | *data) ^ T[val >> RABIN_SHIFT];
374 i = val & index->hash_mask;
David Kastrupd2100862007-09-08 23:17:44 +0200375 for (entry = index->hash[i]; entry < index->hash[i+1]; entry++) {
Nicolas Pitre84336692007-05-25 21:38:58 -0400376 const unsigned char *ref = entry->ptr;
377 const unsigned char *src = data;
378 unsigned int ref_size = ref_top - ref;
379 if (entry->val != val)
380 continue;
381 if (ref_size > top - src)
382 ref_size = top - src;
383 if (ref_size <= msize)
384 break;
385 while (ref_size-- && *src++ == *ref)
386 ref++;
387 if (msize < ref - entry->ptr) {
388 /* this is our best match so far */
389 msize = ref - entry->ptr;
390 moff = entry->ptr - ref_data;
391 if (msize >= 4096) /* good enough */
392 break;
393 }
Nicolas Pitrea310d432005-05-19 10:27:14 -0400394 }
395 }
396
Nicolas Pitre3dc5a9e2006-04-29 00:58:05 -0400397 if (msize < 4) {
Nicolas Pitrea310d432005-05-19 10:27:14 -0400398 if (!inscnt)
399 outpos++;
400 out[outpos++] = *data++;
401 inscnt++;
402 if (inscnt == 0x7f) {
403 out[outpos - inscnt - 1] = inscnt;
404 inscnt = 0;
405 }
Nicolas Pitre84336692007-05-25 21:38:58 -0400406 msize = 0;
Nicolas Pitrea310d432005-05-19 10:27:14 -0400407 } else {
Nicolas Pitre84336692007-05-25 21:38:58 -0400408 unsigned int left;
Nicolas Pitre8e1454b2006-02-21 20:43:17 -0500409 unsigned char *op;
410
Nicolas Pitrea310d432005-05-19 10:27:14 -0400411 if (inscnt) {
Nicolas Pitre5a1fb2c2006-03-17 22:45:07 -0500412 while (moff && ref_data[moff-1] == data[-1]) {
Nicolas Pitre5a1fb2c2006-03-17 22:45:07 -0500413 /* we can match one byte back */
414 msize++;
415 moff--;
416 data--;
417 outpos--;
418 if (--inscnt)
419 continue;
420 outpos--; /* remove count slot */
421 inscnt--; /* make it -1 */
422 break;
423 }
Nicolas Pitrea310d432005-05-19 10:27:14 -0400424 out[outpos - inscnt - 1] = inscnt;
425 inscnt = 0;
426 }
427
Nicolas Pitre84336692007-05-25 21:38:58 -0400428 /* A copy op is currently limited to 64KB (pack v2) */
429 left = (msize < 0x10000) ? 0 : (msize - 0x10000);
430 msize -= left;
431
Nicolas Pitre8e1454b2006-02-21 20:43:17 -0500432 op = out + outpos++;
Nicolas Pitrea310d432005-05-19 10:27:14 -0400433 i = 0x80;
434
Nicolas Pitre84336692007-05-25 21:38:58 -0400435 if (moff & 0x000000ff)
436 out[outpos++] = moff >> 0, i |= 0x01;
437 if (moff & 0x0000ff00)
438 out[outpos++] = moff >> 8, i |= 0x02;
439 if (moff & 0x00ff0000)
440 out[outpos++] = moff >> 16, i |= 0x04;
441 if (moff & 0xff000000)
442 out[outpos++] = moff >> 24, i |= 0x08;
Nicolas Pitrea310d432005-05-19 10:27:14 -0400443
Nicolas Pitre84336692007-05-25 21:38:58 -0400444 if (msize & 0x00ff)
445 out[outpos++] = msize >> 0, i |= 0x10;
446 if (msize & 0xff00)
447 out[outpos++] = msize >> 8, i |= 0x20;
Nicolas Pitrea310d432005-05-19 10:27:14 -0400448
Nicolas Pitre8e1454b2006-02-21 20:43:17 -0500449 *op = i;
Nicolas Pitre84336692007-05-25 21:38:58 -0400450
451 data += msize;
452 moff += msize;
453 msize = left;
454
455 if (msize < 4096) {
456 int j;
457 val = 0;
458 for (j = -RABIN_WINDOW; j < 0; j++)
459 val = ((val << 8) | data[j])
460 ^ T[val >> RABIN_SHIFT];
461 }
Nicolas Pitrea310d432005-05-19 10:27:14 -0400462 }
463
Nicolas Pitrefe474b52006-02-21 20:41:41 -0500464 if (outpos >= outsize - MAX_OP_SIZE) {
Nicolas Pitrea310d432005-05-19 10:27:14 -0400465 void *tmp = out;
466 outsize = outsize * 3 / 2;
Nicolas Pitrefe474b52006-02-21 20:41:41 -0500467 if (max_size && outsize >= max_size)
468 outsize = max_size + MAX_OP_SIZE + 1;
469 if (max_size && outpos > max_size)
Nicolas Pitre3dc5a9e2006-04-29 00:58:05 -0400470 break;
Martin Koeglerb75c6c62007-05-29 21:08:35 +0200471 out = realloc(out, outsize);
Nicolas Pitrea310d432005-05-19 10:27:14 -0400472 if (!out) {
473 free(tmp);
Nicolas Pitrea310d432005-05-19 10:27:14 -0400474 return NULL;
475 }
476 }
477 }
478
479 if (inscnt)
480 out[outpos - inscnt - 1] = inscnt;
481
Nicolas Pitre3dc5a9e2006-04-29 00:58:05 -0400482 if (max_size && outpos > max_size) {
483 free(out);
484 return NULL;
485 }
486
Nicolas Pitrea310d432005-05-19 10:27:14 -0400487 *delta_size = outpos;
488 return out;
489}