blob: 3af583536f2ee9f2d1f2fd699148ff16166dc932 [file] [log] [blame]
Nicolas Pitrea310d432005-05-19 10:27:14 -04001/*
2 * diff-delta.c: generate a delta between two buffers
3 *
Nicolas Pitre366b53c2007-05-25 22:16:27 -04004 * This code was greatly inspired by parts of LibXDiff from Davide Libenzi
5 * http://www.xmailserver.org/xdiff-lib.html
Nicolas Pitrea310d432005-05-19 10:27:14 -04006 *
Nicolas Pitre366b53c2007-05-25 22:16:27 -04007 * Rewritten for GIT by Nicolas Pitre <nico@cam.org>, (C) 2005-2007
Nicolas Pitrea310d432005-05-19 10:27:14 -04008 *
Nicolas Pitre366b53c2007-05-25 22:16:27 -04009 * This code is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License version 2 as
11 * published by the Free Software Foundation.
Nicolas Pitrea310d432005-05-19 10:27:14 -040012 */
13
Florian Forster63f17562006-06-18 17:18:04 +020014#include "git-compat-util.h"
Junio C Hamano85023572006-12-19 14:34:12 -080015#include "delta.h"
Nicolas Pitrea310d432005-05-19 10:27:14 -040016
Nicolas Pitre08abe662006-04-24 23:07:47 -040017/* maximum hash entry list for the same hash bucket */
18#define HASH_LIMIT 64
Nicolas Pitrea310d432005-05-19 10:27:14 -040019
Nicolas Pitre3dc5a9e2006-04-29 00:58:05 -040020#define RABIN_SHIFT 23
21#define RABIN_WINDOW 16
22
23static const unsigned int T[256] = {
24 0x00000000, 0xab59b4d1, 0x56b369a2, 0xfdeadd73, 0x063f6795, 0xad66d344,
25 0x508c0e37, 0xfbd5bae6, 0x0c7ecf2a, 0xa7277bfb, 0x5acda688, 0xf1941259,
26 0x0a41a8bf, 0xa1181c6e, 0x5cf2c11d, 0xf7ab75cc, 0x18fd9e54, 0xb3a42a85,
27 0x4e4ef7f6, 0xe5174327, 0x1ec2f9c1, 0xb59b4d10, 0x48719063, 0xe32824b2,
28 0x1483517e, 0xbfdae5af, 0x423038dc, 0xe9698c0d, 0x12bc36eb, 0xb9e5823a,
29 0x440f5f49, 0xef56eb98, 0x31fb3ca8, 0x9aa28879, 0x6748550a, 0xcc11e1db,
30 0x37c45b3d, 0x9c9defec, 0x6177329f, 0xca2e864e, 0x3d85f382, 0x96dc4753,
31 0x6b369a20, 0xc06f2ef1, 0x3bba9417, 0x90e320c6, 0x6d09fdb5, 0xc6504964,
32 0x2906a2fc, 0x825f162d, 0x7fb5cb5e, 0xd4ec7f8f, 0x2f39c569, 0x846071b8,
33 0x798aaccb, 0xd2d3181a, 0x25786dd6, 0x8e21d907, 0x73cb0474, 0xd892b0a5,
34 0x23470a43, 0x881ebe92, 0x75f463e1, 0xdeadd730, 0x63f67950, 0xc8afcd81,
35 0x354510f2, 0x9e1ca423, 0x65c91ec5, 0xce90aa14, 0x337a7767, 0x9823c3b6,
36 0x6f88b67a, 0xc4d102ab, 0x393bdfd8, 0x92626b09, 0x69b7d1ef, 0xc2ee653e,
37 0x3f04b84d, 0x945d0c9c, 0x7b0be704, 0xd05253d5, 0x2db88ea6, 0x86e13a77,
38 0x7d348091, 0xd66d3440, 0x2b87e933, 0x80de5de2, 0x7775282e, 0xdc2c9cff,
39 0x21c6418c, 0x8a9ff55d, 0x714a4fbb, 0xda13fb6a, 0x27f92619, 0x8ca092c8,
40 0x520d45f8, 0xf954f129, 0x04be2c5a, 0xafe7988b, 0x5432226d, 0xff6b96bc,
41 0x02814bcf, 0xa9d8ff1e, 0x5e738ad2, 0xf52a3e03, 0x08c0e370, 0xa39957a1,
42 0x584ced47, 0xf3155996, 0x0eff84e5, 0xa5a63034, 0x4af0dbac, 0xe1a96f7d,
43 0x1c43b20e, 0xb71a06df, 0x4ccfbc39, 0xe79608e8, 0x1a7cd59b, 0xb125614a,
44 0x468e1486, 0xedd7a057, 0x103d7d24, 0xbb64c9f5, 0x40b17313, 0xebe8c7c2,
45 0x16021ab1, 0xbd5bae60, 0x6cb54671, 0xc7ecf2a0, 0x3a062fd3, 0x915f9b02,
46 0x6a8a21e4, 0xc1d39535, 0x3c394846, 0x9760fc97, 0x60cb895b, 0xcb923d8a,
47 0x3678e0f9, 0x9d215428, 0x66f4eece, 0xcdad5a1f, 0x3047876c, 0x9b1e33bd,
48 0x7448d825, 0xdf116cf4, 0x22fbb187, 0x89a20556, 0x7277bfb0, 0xd92e0b61,
49 0x24c4d612, 0x8f9d62c3, 0x7836170f, 0xd36fa3de, 0x2e857ead, 0x85dcca7c,
50 0x7e09709a, 0xd550c44b, 0x28ba1938, 0x83e3ade9, 0x5d4e7ad9, 0xf617ce08,
51 0x0bfd137b, 0xa0a4a7aa, 0x5b711d4c, 0xf028a99d, 0x0dc274ee, 0xa69bc03f,
52 0x5130b5f3, 0xfa690122, 0x0783dc51, 0xacda6880, 0x570fd266, 0xfc5666b7,
53 0x01bcbbc4, 0xaae50f15, 0x45b3e48d, 0xeeea505c, 0x13008d2f, 0xb85939fe,
54 0x438c8318, 0xe8d537c9, 0x153feaba, 0xbe665e6b, 0x49cd2ba7, 0xe2949f76,
55 0x1f7e4205, 0xb427f6d4, 0x4ff24c32, 0xe4abf8e3, 0x19412590, 0xb2189141,
56 0x0f433f21, 0xa41a8bf0, 0x59f05683, 0xf2a9e252, 0x097c58b4, 0xa225ec65,
57 0x5fcf3116, 0xf49685c7, 0x033df00b, 0xa86444da, 0x558e99a9, 0xfed72d78,
58 0x0502979e, 0xae5b234f, 0x53b1fe3c, 0xf8e84aed, 0x17bea175, 0xbce715a4,
59 0x410dc8d7, 0xea547c06, 0x1181c6e0, 0xbad87231, 0x4732af42, 0xec6b1b93,
60 0x1bc06e5f, 0xb099da8e, 0x4d7307fd, 0xe62ab32c, 0x1dff09ca, 0xb6a6bd1b,
61 0x4b4c6068, 0xe015d4b9, 0x3eb80389, 0x95e1b758, 0x680b6a2b, 0xc352defa,
62 0x3887641c, 0x93ded0cd, 0x6e340dbe, 0xc56db96f, 0x32c6cca3, 0x999f7872,
63 0x6475a501, 0xcf2c11d0, 0x34f9ab36, 0x9fa01fe7, 0x624ac294, 0xc9137645,
64 0x26459ddd, 0x8d1c290c, 0x70f6f47f, 0xdbaf40ae, 0x207afa48, 0x8b234e99,
65 0x76c993ea, 0xdd90273b, 0x2a3b52f7, 0x8162e626, 0x7c883b55, 0xd7d18f84,
66 0x2c043562, 0x875d81b3, 0x7ab75cc0, 0xd1eee811
67};
68
69static const unsigned int U[256] = {
70 0x00000000, 0x7eb5200d, 0x5633f4cb, 0x2886d4c6, 0x073e5d47, 0x798b7d4a,
71 0x510da98c, 0x2fb88981, 0x0e7cba8e, 0x70c99a83, 0x584f4e45, 0x26fa6e48,
72 0x0942e7c9, 0x77f7c7c4, 0x5f711302, 0x21c4330f, 0x1cf9751c, 0x624c5511,
73 0x4aca81d7, 0x347fa1da, 0x1bc7285b, 0x65720856, 0x4df4dc90, 0x3341fc9d,
74 0x1285cf92, 0x6c30ef9f, 0x44b63b59, 0x3a031b54, 0x15bb92d5, 0x6b0eb2d8,
75 0x4388661e, 0x3d3d4613, 0x39f2ea38, 0x4747ca35, 0x6fc11ef3, 0x11743efe,
76 0x3eccb77f, 0x40799772, 0x68ff43b4, 0x164a63b9, 0x378e50b6, 0x493b70bb,
77 0x61bda47d, 0x1f088470, 0x30b00df1, 0x4e052dfc, 0x6683f93a, 0x1836d937,
78 0x250b9f24, 0x5bbebf29, 0x73386bef, 0x0d8d4be2, 0x2235c263, 0x5c80e26e,
79 0x740636a8, 0x0ab316a5, 0x2b7725aa, 0x55c205a7, 0x7d44d161, 0x03f1f16c,
80 0x2c4978ed, 0x52fc58e0, 0x7a7a8c26, 0x04cfac2b, 0x73e5d470, 0x0d50f47d,
81 0x25d620bb, 0x5b6300b6, 0x74db8937, 0x0a6ea93a, 0x22e87dfc, 0x5c5d5df1,
82 0x7d996efe, 0x032c4ef3, 0x2baa9a35, 0x551fba38, 0x7aa733b9, 0x041213b4,
83 0x2c94c772, 0x5221e77f, 0x6f1ca16c, 0x11a98161, 0x392f55a7, 0x479a75aa,
84 0x6822fc2b, 0x1697dc26, 0x3e1108e0, 0x40a428ed, 0x61601be2, 0x1fd53bef,
85 0x3753ef29, 0x49e6cf24, 0x665e46a5, 0x18eb66a8, 0x306db26e, 0x4ed89263,
86 0x4a173e48, 0x34a21e45, 0x1c24ca83, 0x6291ea8e, 0x4d29630f, 0x339c4302,
87 0x1b1a97c4, 0x65afb7c9, 0x446b84c6, 0x3adea4cb, 0x1258700d, 0x6ced5000,
88 0x4355d981, 0x3de0f98c, 0x15662d4a, 0x6bd30d47, 0x56ee4b54, 0x285b6b59,
89 0x00ddbf9f, 0x7e689f92, 0x51d01613, 0x2f65361e, 0x07e3e2d8, 0x7956c2d5,
90 0x5892f1da, 0x2627d1d7, 0x0ea10511, 0x7014251c, 0x5facac9d, 0x21198c90,
91 0x099f5856, 0x772a785b, 0x4c921c31, 0x32273c3c, 0x1aa1e8fa, 0x6414c8f7,
92 0x4bac4176, 0x3519617b, 0x1d9fb5bd, 0x632a95b0, 0x42eea6bf, 0x3c5b86b2,
93 0x14dd5274, 0x6a687279, 0x45d0fbf8, 0x3b65dbf5, 0x13e30f33, 0x6d562f3e,
94 0x506b692d, 0x2ede4920, 0x06589de6, 0x78edbdeb, 0x5755346a, 0x29e01467,
95 0x0166c0a1, 0x7fd3e0ac, 0x5e17d3a3, 0x20a2f3ae, 0x08242768, 0x76910765,
96 0x59298ee4, 0x279caee9, 0x0f1a7a2f, 0x71af5a22, 0x7560f609, 0x0bd5d604,
97 0x235302c2, 0x5de622cf, 0x725eab4e, 0x0ceb8b43, 0x246d5f85, 0x5ad87f88,
98 0x7b1c4c87, 0x05a96c8a, 0x2d2fb84c, 0x539a9841, 0x7c2211c0, 0x029731cd,
99 0x2a11e50b, 0x54a4c506, 0x69998315, 0x172ca318, 0x3faa77de, 0x411f57d3,
100 0x6ea7de52, 0x1012fe5f, 0x38942a99, 0x46210a94, 0x67e5399b, 0x19501996,
101 0x31d6cd50, 0x4f63ed5d, 0x60db64dc, 0x1e6e44d1, 0x36e89017, 0x485db01a,
102 0x3f77c841, 0x41c2e84c, 0x69443c8a, 0x17f11c87, 0x38499506, 0x46fcb50b,
103 0x6e7a61cd, 0x10cf41c0, 0x310b72cf, 0x4fbe52c2, 0x67388604, 0x198da609,
104 0x36352f88, 0x48800f85, 0x6006db43, 0x1eb3fb4e, 0x238ebd5d, 0x5d3b9d50,
105 0x75bd4996, 0x0b08699b, 0x24b0e01a, 0x5a05c017, 0x728314d1, 0x0c3634dc,
106 0x2df207d3, 0x534727de, 0x7bc1f318, 0x0574d315, 0x2acc5a94, 0x54797a99,
107 0x7cffae5f, 0x024a8e52, 0x06852279, 0x78300274, 0x50b6d6b2, 0x2e03f6bf,
108 0x01bb7f3e, 0x7f0e5f33, 0x57888bf5, 0x293dabf8, 0x08f998f7, 0x764cb8fa,
109 0x5eca6c3c, 0x207f4c31, 0x0fc7c5b0, 0x7172e5bd, 0x59f4317b, 0x27411176,
110 0x1a7c5765, 0x64c97768, 0x4c4fa3ae, 0x32fa83a3, 0x1d420a22, 0x63f72a2f,
111 0x4b71fee9, 0x35c4dee4, 0x1400edeb, 0x6ab5cde6, 0x42331920, 0x3c86392d,
112 0x133eb0ac, 0x6d8b90a1, 0x450d4467, 0x3bb8646a
113};
Nicolas Pitrea310d432005-05-19 10:27:14 -0400114
Nicolas Pitre08abe662006-04-24 23:07:47 -0400115struct index_entry {
Nicolas Pitrea310d432005-05-19 10:27:14 -0400116 const unsigned char *ptr;
Nicolas Pitre8e1454b2006-02-21 20:43:17 -0500117 unsigned int val;
Nicolas Pitre08abe662006-04-24 23:07:47 -0400118 struct index_entry *next;
Nicolas Pitre8e1454b2006-02-21 20:43:17 -0500119};
Nicolas Pitrea310d432005-05-19 10:27:14 -0400120
Nicolas Pitre08abe662006-04-24 23:07:47 -0400121struct delta_index {
Brian Downing11779e72007-07-12 07:55:48 -0500122 unsigned long memsize;
Nicolas Pitre08abe662006-04-24 23:07:47 -0400123 const void *src_buf;
124 unsigned long src_size;
Nicolas Pitre3dc5a9e2006-04-29 00:58:05 -0400125 unsigned int hash_mask;
Florian Forster63f17562006-06-18 17:18:04 +0200126 struct index_entry *hash[FLEX_ARRAY];
Nicolas Pitre08abe662006-04-24 23:07:47 -0400127};
128
129struct delta_index * create_delta_index(const void *buf, unsigned long bufsize)
Nicolas Pitrea310d432005-05-19 10:27:14 -0400130{
Nicolas Pitre06a9f922006-05-02 23:31:00 -0400131 unsigned int i, hsize, hmask, entries, prev_val, *hash_count;
Nicolas Pitre08abe662006-04-24 23:07:47 -0400132 const unsigned char *data, *buffer = buf;
133 struct delta_index *index;
134 struct index_entry *entry, **hash;
Nicolas Pitre8e1454b2006-02-21 20:43:17 -0500135 void *mem;
Nicolas Pitre06a9f922006-05-02 23:31:00 -0400136 unsigned long memsize;
Nicolas Pitrea310d432005-05-19 10:27:14 -0400137
Nicolas Pitre08abe662006-04-24 23:07:47 -0400138 if (!buf || !bufsize)
139 return NULL;
140
Nicolas Pitre3dc5a9e2006-04-29 00:58:05 -0400141 /* Determine index hash size. Note that indexing skips the
Pavel Roskin82e5a822006-07-10 01:50:18 -0400142 first byte to allow for optimizing the Rabin's polynomial
Nicolas Pitre3dc5a9e2006-04-29 00:58:05 -0400143 initialization in create_delta(). */
144 entries = (bufsize - 1) / RABIN_WINDOW;
Nicolas Pitre8e1454b2006-02-21 20:43:17 -0500145 hsize = entries / 4;
Pierre Habouzitb05faa22006-08-23 11:17:55 +0200146 for (i = 4; (1u << i) < hsize && i < 31; i++);
Nicolas Pitre8e1454b2006-02-21 20:43:17 -0500147 hsize = 1 << i;
Nicolas Pitre3dc5a9e2006-04-29 00:58:05 -0400148 hmask = hsize - 1;
Nicolas Pitrea310d432005-05-19 10:27:14 -0400149
Nicolas Pitre8e1454b2006-02-21 20:43:17 -0500150 /* allocate lookup index */
Nicolas Pitre06a9f922006-05-02 23:31:00 -0400151 memsize = sizeof(*index) +
152 sizeof(*hash) * hsize +
153 sizeof(*entry) * entries;
154 mem = malloc(memsize);
Nicolas Pitre8e1454b2006-02-21 20:43:17 -0500155 if (!mem)
156 return NULL;
Nicolas Pitre08abe662006-04-24 23:07:47 -0400157 index = mem;
158 mem = index + 1;
Nicolas Pitre8e1454b2006-02-21 20:43:17 -0500159 hash = mem;
Nicolas Pitre08abe662006-04-24 23:07:47 -0400160 mem = hash + hsize;
161 entry = mem;
162
Brian Downing11779e72007-07-12 07:55:48 -0500163 index->memsize = memsize;
Nicolas Pitre08abe662006-04-24 23:07:47 -0400164 index->src_buf = buf;
165 index->src_size = bufsize;
Nicolas Pitre3dc5a9e2006-04-29 00:58:05 -0400166 index->hash_mask = hmask;
Nicolas Pitre8e1454b2006-02-21 20:43:17 -0500167 memset(hash, 0, hsize * sizeof(*hash));
168
Nicolas Pitrec13c6bf2006-03-08 14:32:50 -0500169 /* allocate an array to count hash entries */
170 hash_count = calloc(hsize, sizeof(*hash_count));
171 if (!hash_count) {
Nicolas Pitre08abe662006-04-24 23:07:47 -0400172 free(index);
Nicolas Pitrec13c6bf2006-03-08 14:32:50 -0500173 return NULL;
174 }
175
176 /* then populate the index */
Nicolas Pitre06a9f922006-05-02 23:31:00 -0400177 prev_val = ~0;
178 for (data = buffer + entries * RABIN_WINDOW - RABIN_WINDOW;
179 data >= buffer;
180 data -= RABIN_WINDOW) {
Nicolas Pitre3dc5a9e2006-04-29 00:58:05 -0400181 unsigned int val = 0;
182 for (i = 1; i <= RABIN_WINDOW; i++)
183 val = ((val << 8) | data[i]) ^ T[val >> RABIN_SHIFT];
Nicolas Pitre06a9f922006-05-02 23:31:00 -0400184 if (val == prev_val) {
185 /* keep the lowest of consecutive identical blocks */
186 entry[-1].ptr = data + RABIN_WINDOW;
187 } else {
188 prev_val = val;
189 i = val & hmask;
190 entry->ptr = data + RABIN_WINDOW;
191 entry->val = val;
192 entry->next = hash[i];
193 hash[i] = entry++;
194 hash_count[i]++;
Nicolas Pitre06a9f922006-05-02 23:31:00 -0400195 }
Nicolas Pitre3dc5a9e2006-04-29 00:58:05 -0400196 }
Nicolas Pitrea310d432005-05-19 10:27:14 -0400197
Nicolas Pitrec13c6bf2006-03-08 14:32:50 -0500198 /*
199 * Determine a limit on the number of entries in the same hash
Pavel Roskin82e5a822006-07-10 01:50:18 -0400200 * bucket. This guards us against pathological data sets causing
Nicolas Pitrec13c6bf2006-03-08 14:32:50 -0500201 * really bad hash distribution with most entries in the same hash
202 * bucket that would bring us to O(m*n) computing costs (m and n
203 * corresponding to reference and target buffer sizes).
204 *
Nicolas Pitre08abe662006-04-24 23:07:47 -0400205 * Make sure none of the hash buckets has more entries than
Nicolas Pitrec13c6bf2006-03-08 14:32:50 -0500206 * we're willing to test. Otherwise we cull the entry list
207 * uniformly to still preserve a good repartition across
208 * the reference buffer.
209 */
210 for (i = 0; i < hsize; i++) {
Nicolas Pitre08abe662006-04-24 23:07:47 -0400211 if (hash_count[i] < HASH_LIMIT)
Nicolas Pitrec13c6bf2006-03-08 14:32:50 -0500212 continue;
213 entry = hash[i];
214 do {
Nicolas Pitre08abe662006-04-24 23:07:47 -0400215 struct index_entry *keep = entry;
216 int skip = hash_count[i] / HASH_LIMIT / 2;
Nicolas Pitrec13c6bf2006-03-08 14:32:50 -0500217 do {
218 entry = entry->next;
219 } while(--skip && entry);
220 keep->next = entry;
221 } while(entry);
222 }
223 free(hash_count);
224
Nicolas Pitre08abe662006-04-24 23:07:47 -0400225 return index;
226}
227
228void free_delta_index(struct delta_index *index)
229{
230 free(index);
Nicolas Pitrea310d432005-05-19 10:27:14 -0400231}
232
Brian Downing11779e72007-07-12 07:55:48 -0500233unsigned long sizeof_delta_index(struct delta_index *index)
234{
235 if (index)
236 return index->memsize;
237 else
238 return 0;
239}
240
Nicolas Pitre3dc5a9e2006-04-29 00:58:05 -0400241/*
242 * The maximum size for any opcode sequence, including the initial header
Pavel Roskin82e5a822006-07-10 01:50:18 -0400243 * plus Rabin window plus biggest copy.
Nicolas Pitre3dc5a9e2006-04-29 00:58:05 -0400244 */
245#define MAX_OP_SIZE (5 + 5 + 1 + RABIN_WINDOW + 7)
Nicolas Pitrefe474b52006-02-21 20:41:41 -0500246
Nicolas Pitre08abe662006-04-24 23:07:47 -0400247void *
248create_delta(const struct delta_index *index,
249 const void *trg_buf, unsigned long trg_size,
250 unsigned long *delta_size, unsigned long max_size)
Nicolas Pitrea310d432005-05-19 10:27:14 -0400251{
Nicolas Pitre84336692007-05-25 21:38:58 -0400252 unsigned int i, outpos, outsize, moff, msize, val;
Nicolas Pitre5a1fb2c2006-03-17 22:45:07 -0500253 int inscnt;
Nicolas Pitre8e1454b2006-02-21 20:43:17 -0500254 const unsigned char *ref_data, *ref_top, *data, *top;
255 unsigned char *out;
Nicolas Pitrea310d432005-05-19 10:27:14 -0400256
Nicolas Pitre08abe662006-04-24 23:07:47 -0400257 if (!trg_buf || !trg_size)
Nicolas Pitre8e1454b2006-02-21 20:43:17 -0500258 return NULL;
259
Nicolas Pitrea310d432005-05-19 10:27:14 -0400260 outpos = 0;
261 outsize = 8192;
Nicolas Pitrefe474b52006-02-21 20:41:41 -0500262 if (max_size && outsize >= max_size)
263 outsize = max_size + MAX_OP_SIZE + 1;
Nicolas Pitrea310d432005-05-19 10:27:14 -0400264 out = malloc(outsize);
Nicolas Pitre08abe662006-04-24 23:07:47 -0400265 if (!out)
Nicolas Pitrea310d432005-05-19 10:27:14 -0400266 return NULL;
Nicolas Pitrea310d432005-05-19 10:27:14 -0400267
268 /* store reference buffer size */
Nicolas Pitre08abe662006-04-24 23:07:47 -0400269 i = index->src_size;
270 while (i >= 0x80) {
271 out[outpos++] = i | 0x80;
272 i >>= 7;
Nicolas Pitre69a2d422005-06-29 00:27:45 -0400273 }
Nicolas Pitre08abe662006-04-24 23:07:47 -0400274 out[outpos++] = i;
Nicolas Pitrea310d432005-05-19 10:27:14 -0400275
276 /* store target buffer size */
Nicolas Pitre08abe662006-04-24 23:07:47 -0400277 i = trg_size;
278 while (i >= 0x80) {
279 out[outpos++] = i | 0x80;
280 i >>= 7;
Nicolas Pitre69a2d422005-06-29 00:27:45 -0400281 }
Nicolas Pitre08abe662006-04-24 23:07:47 -0400282 out[outpos++] = i;
Nicolas Pitrea310d432005-05-19 10:27:14 -0400283
Nicolas Pitre08abe662006-04-24 23:07:47 -0400284 ref_data = index->src_buf;
285 ref_top = ref_data + index->src_size;
286 data = trg_buf;
Florian Forster1d7f1712006-06-18 17:18:09 +0200287 top = (const unsigned char *) trg_buf + trg_size;
Nicolas Pitre3dc5a9e2006-04-29 00:58:05 -0400288
289 outpos++;
290 val = 0;
291 for (i = 0; i < RABIN_WINDOW && data < top; i++, data++) {
292 out[outpos++] = *data;
293 val = ((val << 8) | *data) ^ T[val >> RABIN_SHIFT];
294 }
295 inscnt = i;
Nicolas Pitrea310d432005-05-19 10:27:14 -0400296
Nicolas Pitre84336692007-05-25 21:38:58 -0400297 moff = 0;
298 msize = 0;
Nicolas Pitre8e1454b2006-02-21 20:43:17 -0500299 while (data < top) {
Nicolas Pitre84336692007-05-25 21:38:58 -0400300 if (msize < 4096) {
301 struct index_entry *entry;
302 val ^= U[data[-RABIN_WINDOW]];
303 val = ((val << 8) | *data) ^ T[val >> RABIN_SHIFT];
304 i = val & index->hash_mask;
305 for (entry = index->hash[i]; entry; entry = entry->next) {
306 const unsigned char *ref = entry->ptr;
307 const unsigned char *src = data;
308 unsigned int ref_size = ref_top - ref;
309 if (entry->val != val)
310 continue;
311 if (ref_size > top - src)
312 ref_size = top - src;
313 if (ref_size <= msize)
314 break;
315 while (ref_size-- && *src++ == *ref)
316 ref++;
317 if (msize < ref - entry->ptr) {
318 /* this is our best match so far */
319 msize = ref - entry->ptr;
320 moff = entry->ptr - ref_data;
321 if (msize >= 4096) /* good enough */
322 break;
323 }
Nicolas Pitrea310d432005-05-19 10:27:14 -0400324 }
325 }
326
Nicolas Pitre3dc5a9e2006-04-29 00:58:05 -0400327 if (msize < 4) {
Nicolas Pitrea310d432005-05-19 10:27:14 -0400328 if (!inscnt)
329 outpos++;
330 out[outpos++] = *data++;
331 inscnt++;
332 if (inscnt == 0x7f) {
333 out[outpos - inscnt - 1] = inscnt;
334 inscnt = 0;
335 }
Nicolas Pitre84336692007-05-25 21:38:58 -0400336 msize = 0;
Nicolas Pitrea310d432005-05-19 10:27:14 -0400337 } else {
Nicolas Pitre84336692007-05-25 21:38:58 -0400338 unsigned int left;
Nicolas Pitre8e1454b2006-02-21 20:43:17 -0500339 unsigned char *op;
340
Nicolas Pitrea310d432005-05-19 10:27:14 -0400341 if (inscnt) {
Nicolas Pitre5a1fb2c2006-03-17 22:45:07 -0500342 while (moff && ref_data[moff-1] == data[-1]) {
Nicolas Pitre5a1fb2c2006-03-17 22:45:07 -0500343 /* we can match one byte back */
344 msize++;
345 moff--;
346 data--;
347 outpos--;
348 if (--inscnt)
349 continue;
350 outpos--; /* remove count slot */
351 inscnt--; /* make it -1 */
352 break;
353 }
Nicolas Pitrea310d432005-05-19 10:27:14 -0400354 out[outpos - inscnt - 1] = inscnt;
355 inscnt = 0;
356 }
357
Nicolas Pitre84336692007-05-25 21:38:58 -0400358 /* A copy op is currently limited to 64KB (pack v2) */
359 left = (msize < 0x10000) ? 0 : (msize - 0x10000);
360 msize -= left;
361
Nicolas Pitre8e1454b2006-02-21 20:43:17 -0500362 op = out + outpos++;
Nicolas Pitrea310d432005-05-19 10:27:14 -0400363 i = 0x80;
364
Nicolas Pitre84336692007-05-25 21:38:58 -0400365 if (moff & 0x000000ff)
366 out[outpos++] = moff >> 0, i |= 0x01;
367 if (moff & 0x0000ff00)
368 out[outpos++] = moff >> 8, i |= 0x02;
369 if (moff & 0x00ff0000)
370 out[outpos++] = moff >> 16, i |= 0x04;
371 if (moff & 0xff000000)
372 out[outpos++] = moff >> 24, i |= 0x08;
Nicolas Pitrea310d432005-05-19 10:27:14 -0400373
Nicolas Pitre84336692007-05-25 21:38:58 -0400374 if (msize & 0x00ff)
375 out[outpos++] = msize >> 0, i |= 0x10;
376 if (msize & 0xff00)
377 out[outpos++] = msize >> 8, i |= 0x20;
Nicolas Pitrea310d432005-05-19 10:27:14 -0400378
Nicolas Pitre8e1454b2006-02-21 20:43:17 -0500379 *op = i;
Nicolas Pitre84336692007-05-25 21:38:58 -0400380
381 data += msize;
382 moff += msize;
383 msize = left;
384
385 if (msize < 4096) {
386 int j;
387 val = 0;
388 for (j = -RABIN_WINDOW; j < 0; j++)
389 val = ((val << 8) | data[j])
390 ^ T[val >> RABIN_SHIFT];
391 }
Nicolas Pitrea310d432005-05-19 10:27:14 -0400392 }
393
Nicolas Pitrefe474b52006-02-21 20:41:41 -0500394 if (outpos >= outsize - MAX_OP_SIZE) {
Nicolas Pitrea310d432005-05-19 10:27:14 -0400395 void *tmp = out;
396 outsize = outsize * 3 / 2;
Nicolas Pitrefe474b52006-02-21 20:41:41 -0500397 if (max_size && outsize >= max_size)
398 outsize = max_size + MAX_OP_SIZE + 1;
399 if (max_size && outpos > max_size)
Nicolas Pitre3dc5a9e2006-04-29 00:58:05 -0400400 break;
Martin Koeglerb75c6c62007-05-29 21:08:35 +0200401 out = realloc(out, outsize);
Nicolas Pitrea310d432005-05-19 10:27:14 -0400402 if (!out) {
403 free(tmp);
Nicolas Pitrea310d432005-05-19 10:27:14 -0400404 return NULL;
405 }
406 }
407 }
408
409 if (inscnt)
410 out[outpos - inscnt - 1] = inscnt;
411
Nicolas Pitre3dc5a9e2006-04-29 00:58:05 -0400412 if (max_size && outpos > max_size) {
413 free(out);
414 return NULL;
415 }
416
Nicolas Pitrea310d432005-05-19 10:27:14 -0400417 *delta_size = outpos;
418 return out;
419}