blob: 9f998d0a73e0127d3a68a7caecb3727569149871 [file] [log] [blame]
Nicolas Pitrea310d432005-05-19 10:27:14 -04001/*
2 * diff-delta.c: generate a delta between two buffers
3 *
4 * Many parts of this file have been lifted from LibXDiff version 0.10.
5 * http://www.xmailserver.org/xdiff-lib.html
6 *
7 * LibXDiff was written by Davide Libenzi <davidel@xmailserver.org>
8 * Copyright (C) 2003 Davide Libenzi
9 *
10 * Many mods for GIT usage by Nicolas Pitre <nico@cam.org>, (C) 2005.
11 *
12 * This file is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU Lesser General Public
14 * License as published by the Free Software Foundation; either
15 * version 2.1 of the License, or (at your option) any later version.
16 *
17 * Use of this within git automatically means that the LGPL
18 * licensing gets turned into GPLv2 within this project.
19 */
20
Florian Forster63f17562006-06-18 17:18:04 +020021#include "git-compat-util.h"
Junio C Hamano85023572006-12-19 14:34:12 -080022#include "delta.h"
Nicolas Pitrea310d432005-05-19 10:27:14 -040023
Nicolas Pitre08abe662006-04-24 23:07:47 -040024/* maximum hash entry list for the same hash bucket */
25#define HASH_LIMIT 64
Nicolas Pitrea310d432005-05-19 10:27:14 -040026
Nicolas Pitre3dc5a9e2006-04-29 00:58:05 -040027#define RABIN_SHIFT 23
28#define RABIN_WINDOW 16
29
30static const unsigned int T[256] = {
31 0x00000000, 0xab59b4d1, 0x56b369a2, 0xfdeadd73, 0x063f6795, 0xad66d344,
32 0x508c0e37, 0xfbd5bae6, 0x0c7ecf2a, 0xa7277bfb, 0x5acda688, 0xf1941259,
33 0x0a41a8bf, 0xa1181c6e, 0x5cf2c11d, 0xf7ab75cc, 0x18fd9e54, 0xb3a42a85,
34 0x4e4ef7f6, 0xe5174327, 0x1ec2f9c1, 0xb59b4d10, 0x48719063, 0xe32824b2,
35 0x1483517e, 0xbfdae5af, 0x423038dc, 0xe9698c0d, 0x12bc36eb, 0xb9e5823a,
36 0x440f5f49, 0xef56eb98, 0x31fb3ca8, 0x9aa28879, 0x6748550a, 0xcc11e1db,
37 0x37c45b3d, 0x9c9defec, 0x6177329f, 0xca2e864e, 0x3d85f382, 0x96dc4753,
38 0x6b369a20, 0xc06f2ef1, 0x3bba9417, 0x90e320c6, 0x6d09fdb5, 0xc6504964,
39 0x2906a2fc, 0x825f162d, 0x7fb5cb5e, 0xd4ec7f8f, 0x2f39c569, 0x846071b8,
40 0x798aaccb, 0xd2d3181a, 0x25786dd6, 0x8e21d907, 0x73cb0474, 0xd892b0a5,
41 0x23470a43, 0x881ebe92, 0x75f463e1, 0xdeadd730, 0x63f67950, 0xc8afcd81,
42 0x354510f2, 0x9e1ca423, 0x65c91ec5, 0xce90aa14, 0x337a7767, 0x9823c3b6,
43 0x6f88b67a, 0xc4d102ab, 0x393bdfd8, 0x92626b09, 0x69b7d1ef, 0xc2ee653e,
44 0x3f04b84d, 0x945d0c9c, 0x7b0be704, 0xd05253d5, 0x2db88ea6, 0x86e13a77,
45 0x7d348091, 0xd66d3440, 0x2b87e933, 0x80de5de2, 0x7775282e, 0xdc2c9cff,
46 0x21c6418c, 0x8a9ff55d, 0x714a4fbb, 0xda13fb6a, 0x27f92619, 0x8ca092c8,
47 0x520d45f8, 0xf954f129, 0x04be2c5a, 0xafe7988b, 0x5432226d, 0xff6b96bc,
48 0x02814bcf, 0xa9d8ff1e, 0x5e738ad2, 0xf52a3e03, 0x08c0e370, 0xa39957a1,
49 0x584ced47, 0xf3155996, 0x0eff84e5, 0xa5a63034, 0x4af0dbac, 0xe1a96f7d,
50 0x1c43b20e, 0xb71a06df, 0x4ccfbc39, 0xe79608e8, 0x1a7cd59b, 0xb125614a,
51 0x468e1486, 0xedd7a057, 0x103d7d24, 0xbb64c9f5, 0x40b17313, 0xebe8c7c2,
52 0x16021ab1, 0xbd5bae60, 0x6cb54671, 0xc7ecf2a0, 0x3a062fd3, 0x915f9b02,
53 0x6a8a21e4, 0xc1d39535, 0x3c394846, 0x9760fc97, 0x60cb895b, 0xcb923d8a,
54 0x3678e0f9, 0x9d215428, 0x66f4eece, 0xcdad5a1f, 0x3047876c, 0x9b1e33bd,
55 0x7448d825, 0xdf116cf4, 0x22fbb187, 0x89a20556, 0x7277bfb0, 0xd92e0b61,
56 0x24c4d612, 0x8f9d62c3, 0x7836170f, 0xd36fa3de, 0x2e857ead, 0x85dcca7c,
57 0x7e09709a, 0xd550c44b, 0x28ba1938, 0x83e3ade9, 0x5d4e7ad9, 0xf617ce08,
58 0x0bfd137b, 0xa0a4a7aa, 0x5b711d4c, 0xf028a99d, 0x0dc274ee, 0xa69bc03f,
59 0x5130b5f3, 0xfa690122, 0x0783dc51, 0xacda6880, 0x570fd266, 0xfc5666b7,
60 0x01bcbbc4, 0xaae50f15, 0x45b3e48d, 0xeeea505c, 0x13008d2f, 0xb85939fe,
61 0x438c8318, 0xe8d537c9, 0x153feaba, 0xbe665e6b, 0x49cd2ba7, 0xe2949f76,
62 0x1f7e4205, 0xb427f6d4, 0x4ff24c32, 0xe4abf8e3, 0x19412590, 0xb2189141,
63 0x0f433f21, 0xa41a8bf0, 0x59f05683, 0xf2a9e252, 0x097c58b4, 0xa225ec65,
64 0x5fcf3116, 0xf49685c7, 0x033df00b, 0xa86444da, 0x558e99a9, 0xfed72d78,
65 0x0502979e, 0xae5b234f, 0x53b1fe3c, 0xf8e84aed, 0x17bea175, 0xbce715a4,
66 0x410dc8d7, 0xea547c06, 0x1181c6e0, 0xbad87231, 0x4732af42, 0xec6b1b93,
67 0x1bc06e5f, 0xb099da8e, 0x4d7307fd, 0xe62ab32c, 0x1dff09ca, 0xb6a6bd1b,
68 0x4b4c6068, 0xe015d4b9, 0x3eb80389, 0x95e1b758, 0x680b6a2b, 0xc352defa,
69 0x3887641c, 0x93ded0cd, 0x6e340dbe, 0xc56db96f, 0x32c6cca3, 0x999f7872,
70 0x6475a501, 0xcf2c11d0, 0x34f9ab36, 0x9fa01fe7, 0x624ac294, 0xc9137645,
71 0x26459ddd, 0x8d1c290c, 0x70f6f47f, 0xdbaf40ae, 0x207afa48, 0x8b234e99,
72 0x76c993ea, 0xdd90273b, 0x2a3b52f7, 0x8162e626, 0x7c883b55, 0xd7d18f84,
73 0x2c043562, 0x875d81b3, 0x7ab75cc0, 0xd1eee811
74};
75
76static const unsigned int U[256] = {
77 0x00000000, 0x7eb5200d, 0x5633f4cb, 0x2886d4c6, 0x073e5d47, 0x798b7d4a,
78 0x510da98c, 0x2fb88981, 0x0e7cba8e, 0x70c99a83, 0x584f4e45, 0x26fa6e48,
79 0x0942e7c9, 0x77f7c7c4, 0x5f711302, 0x21c4330f, 0x1cf9751c, 0x624c5511,
80 0x4aca81d7, 0x347fa1da, 0x1bc7285b, 0x65720856, 0x4df4dc90, 0x3341fc9d,
81 0x1285cf92, 0x6c30ef9f, 0x44b63b59, 0x3a031b54, 0x15bb92d5, 0x6b0eb2d8,
82 0x4388661e, 0x3d3d4613, 0x39f2ea38, 0x4747ca35, 0x6fc11ef3, 0x11743efe,
83 0x3eccb77f, 0x40799772, 0x68ff43b4, 0x164a63b9, 0x378e50b6, 0x493b70bb,
84 0x61bda47d, 0x1f088470, 0x30b00df1, 0x4e052dfc, 0x6683f93a, 0x1836d937,
85 0x250b9f24, 0x5bbebf29, 0x73386bef, 0x0d8d4be2, 0x2235c263, 0x5c80e26e,
86 0x740636a8, 0x0ab316a5, 0x2b7725aa, 0x55c205a7, 0x7d44d161, 0x03f1f16c,
87 0x2c4978ed, 0x52fc58e0, 0x7a7a8c26, 0x04cfac2b, 0x73e5d470, 0x0d50f47d,
88 0x25d620bb, 0x5b6300b6, 0x74db8937, 0x0a6ea93a, 0x22e87dfc, 0x5c5d5df1,
89 0x7d996efe, 0x032c4ef3, 0x2baa9a35, 0x551fba38, 0x7aa733b9, 0x041213b4,
90 0x2c94c772, 0x5221e77f, 0x6f1ca16c, 0x11a98161, 0x392f55a7, 0x479a75aa,
91 0x6822fc2b, 0x1697dc26, 0x3e1108e0, 0x40a428ed, 0x61601be2, 0x1fd53bef,
92 0x3753ef29, 0x49e6cf24, 0x665e46a5, 0x18eb66a8, 0x306db26e, 0x4ed89263,
93 0x4a173e48, 0x34a21e45, 0x1c24ca83, 0x6291ea8e, 0x4d29630f, 0x339c4302,
94 0x1b1a97c4, 0x65afb7c9, 0x446b84c6, 0x3adea4cb, 0x1258700d, 0x6ced5000,
95 0x4355d981, 0x3de0f98c, 0x15662d4a, 0x6bd30d47, 0x56ee4b54, 0x285b6b59,
96 0x00ddbf9f, 0x7e689f92, 0x51d01613, 0x2f65361e, 0x07e3e2d8, 0x7956c2d5,
97 0x5892f1da, 0x2627d1d7, 0x0ea10511, 0x7014251c, 0x5facac9d, 0x21198c90,
98 0x099f5856, 0x772a785b, 0x4c921c31, 0x32273c3c, 0x1aa1e8fa, 0x6414c8f7,
99 0x4bac4176, 0x3519617b, 0x1d9fb5bd, 0x632a95b0, 0x42eea6bf, 0x3c5b86b2,
100 0x14dd5274, 0x6a687279, 0x45d0fbf8, 0x3b65dbf5, 0x13e30f33, 0x6d562f3e,
101 0x506b692d, 0x2ede4920, 0x06589de6, 0x78edbdeb, 0x5755346a, 0x29e01467,
102 0x0166c0a1, 0x7fd3e0ac, 0x5e17d3a3, 0x20a2f3ae, 0x08242768, 0x76910765,
103 0x59298ee4, 0x279caee9, 0x0f1a7a2f, 0x71af5a22, 0x7560f609, 0x0bd5d604,
104 0x235302c2, 0x5de622cf, 0x725eab4e, 0x0ceb8b43, 0x246d5f85, 0x5ad87f88,
105 0x7b1c4c87, 0x05a96c8a, 0x2d2fb84c, 0x539a9841, 0x7c2211c0, 0x029731cd,
106 0x2a11e50b, 0x54a4c506, 0x69998315, 0x172ca318, 0x3faa77de, 0x411f57d3,
107 0x6ea7de52, 0x1012fe5f, 0x38942a99, 0x46210a94, 0x67e5399b, 0x19501996,
108 0x31d6cd50, 0x4f63ed5d, 0x60db64dc, 0x1e6e44d1, 0x36e89017, 0x485db01a,
109 0x3f77c841, 0x41c2e84c, 0x69443c8a, 0x17f11c87, 0x38499506, 0x46fcb50b,
110 0x6e7a61cd, 0x10cf41c0, 0x310b72cf, 0x4fbe52c2, 0x67388604, 0x198da609,
111 0x36352f88, 0x48800f85, 0x6006db43, 0x1eb3fb4e, 0x238ebd5d, 0x5d3b9d50,
112 0x75bd4996, 0x0b08699b, 0x24b0e01a, 0x5a05c017, 0x728314d1, 0x0c3634dc,
113 0x2df207d3, 0x534727de, 0x7bc1f318, 0x0574d315, 0x2acc5a94, 0x54797a99,
114 0x7cffae5f, 0x024a8e52, 0x06852279, 0x78300274, 0x50b6d6b2, 0x2e03f6bf,
115 0x01bb7f3e, 0x7f0e5f33, 0x57888bf5, 0x293dabf8, 0x08f998f7, 0x764cb8fa,
116 0x5eca6c3c, 0x207f4c31, 0x0fc7c5b0, 0x7172e5bd, 0x59f4317b, 0x27411176,
117 0x1a7c5765, 0x64c97768, 0x4c4fa3ae, 0x32fa83a3, 0x1d420a22, 0x63f72a2f,
118 0x4b71fee9, 0x35c4dee4, 0x1400edeb, 0x6ab5cde6, 0x42331920, 0x3c86392d,
119 0x133eb0ac, 0x6d8b90a1, 0x450d4467, 0x3bb8646a
120};
Nicolas Pitrea310d432005-05-19 10:27:14 -0400121
Nicolas Pitre08abe662006-04-24 23:07:47 -0400122struct index_entry {
Nicolas Pitrea310d432005-05-19 10:27:14 -0400123 const unsigned char *ptr;
Nicolas Pitre8e1454b2006-02-21 20:43:17 -0500124 unsigned int val;
Nicolas Pitre08abe662006-04-24 23:07:47 -0400125 struct index_entry *next;
Nicolas Pitre8e1454b2006-02-21 20:43:17 -0500126};
Nicolas Pitrea310d432005-05-19 10:27:14 -0400127
Nicolas Pitre08abe662006-04-24 23:07:47 -0400128struct delta_index {
129 const void *src_buf;
130 unsigned long src_size;
Nicolas Pitre3dc5a9e2006-04-29 00:58:05 -0400131 unsigned int hash_mask;
Florian Forster63f17562006-06-18 17:18:04 +0200132 struct index_entry *hash[FLEX_ARRAY];
Nicolas Pitre08abe662006-04-24 23:07:47 -0400133};
134
135struct delta_index * create_delta_index(const void *buf, unsigned long bufsize)
Nicolas Pitrea310d432005-05-19 10:27:14 -0400136{
Nicolas Pitre06a9f922006-05-02 23:31:00 -0400137 unsigned int i, hsize, hmask, entries, prev_val, *hash_count;
Nicolas Pitre08abe662006-04-24 23:07:47 -0400138 const unsigned char *data, *buffer = buf;
139 struct delta_index *index;
140 struct index_entry *entry, **hash;
Nicolas Pitre8e1454b2006-02-21 20:43:17 -0500141 void *mem;
Nicolas Pitre06a9f922006-05-02 23:31:00 -0400142 unsigned long memsize;
Nicolas Pitrea310d432005-05-19 10:27:14 -0400143
Nicolas Pitre08abe662006-04-24 23:07:47 -0400144 if (!buf || !bufsize)
145 return NULL;
146
Nicolas Pitre3dc5a9e2006-04-29 00:58:05 -0400147 /* Determine index hash size. Note that indexing skips the
Pavel Roskin82e5a822006-07-10 01:50:18 -0400148 first byte to allow for optimizing the Rabin's polynomial
Nicolas Pitre3dc5a9e2006-04-29 00:58:05 -0400149 initialization in create_delta(). */
150 entries = (bufsize - 1) / RABIN_WINDOW;
Nicolas Pitre8e1454b2006-02-21 20:43:17 -0500151 hsize = entries / 4;
Pierre Habouzitb05faa22006-08-23 11:17:55 +0200152 for (i = 4; (1u << i) < hsize && i < 31; i++);
Nicolas Pitre8e1454b2006-02-21 20:43:17 -0500153 hsize = 1 << i;
Nicolas Pitre3dc5a9e2006-04-29 00:58:05 -0400154 hmask = hsize - 1;
Nicolas Pitrea310d432005-05-19 10:27:14 -0400155
Nicolas Pitre8e1454b2006-02-21 20:43:17 -0500156 /* allocate lookup index */
Nicolas Pitre06a9f922006-05-02 23:31:00 -0400157 memsize = sizeof(*index) +
158 sizeof(*hash) * hsize +
159 sizeof(*entry) * entries;
160 mem = malloc(memsize);
Nicolas Pitre8e1454b2006-02-21 20:43:17 -0500161 if (!mem)
162 return NULL;
Nicolas Pitre08abe662006-04-24 23:07:47 -0400163 index = mem;
164 mem = index + 1;
Nicolas Pitre8e1454b2006-02-21 20:43:17 -0500165 hash = mem;
Nicolas Pitre08abe662006-04-24 23:07:47 -0400166 mem = hash + hsize;
167 entry = mem;
168
169 index->src_buf = buf;
170 index->src_size = bufsize;
Nicolas Pitre3dc5a9e2006-04-29 00:58:05 -0400171 index->hash_mask = hmask;
Nicolas Pitre8e1454b2006-02-21 20:43:17 -0500172 memset(hash, 0, hsize * sizeof(*hash));
173
Nicolas Pitrec13c6bf2006-03-08 14:32:50 -0500174 /* allocate an array to count hash entries */
175 hash_count = calloc(hsize, sizeof(*hash_count));
176 if (!hash_count) {
Nicolas Pitre08abe662006-04-24 23:07:47 -0400177 free(index);
Nicolas Pitrec13c6bf2006-03-08 14:32:50 -0500178 return NULL;
179 }
180
181 /* then populate the index */
Nicolas Pitre06a9f922006-05-02 23:31:00 -0400182 prev_val = ~0;
183 for (data = buffer + entries * RABIN_WINDOW - RABIN_WINDOW;
184 data >= buffer;
185 data -= RABIN_WINDOW) {
Nicolas Pitre3dc5a9e2006-04-29 00:58:05 -0400186 unsigned int val = 0;
187 for (i = 1; i <= RABIN_WINDOW; i++)
188 val = ((val << 8) | data[i]) ^ T[val >> RABIN_SHIFT];
Nicolas Pitre06a9f922006-05-02 23:31:00 -0400189 if (val == prev_val) {
190 /* keep the lowest of consecutive identical blocks */
191 entry[-1].ptr = data + RABIN_WINDOW;
192 } else {
193 prev_val = val;
194 i = val & hmask;
195 entry->ptr = data + RABIN_WINDOW;
196 entry->val = val;
197 entry->next = hash[i];
198 hash[i] = entry++;
199 hash_count[i]++;
Nicolas Pitre06a9f922006-05-02 23:31:00 -0400200 }
Nicolas Pitre3dc5a9e2006-04-29 00:58:05 -0400201 }
Nicolas Pitrea310d432005-05-19 10:27:14 -0400202
Nicolas Pitrec13c6bf2006-03-08 14:32:50 -0500203 /*
204 * Determine a limit on the number of entries in the same hash
Pavel Roskin82e5a822006-07-10 01:50:18 -0400205 * bucket. This guards us against pathological data sets causing
Nicolas Pitrec13c6bf2006-03-08 14:32:50 -0500206 * really bad hash distribution with most entries in the same hash
207 * bucket that would bring us to O(m*n) computing costs (m and n
208 * corresponding to reference and target buffer sizes).
209 *
Nicolas Pitre08abe662006-04-24 23:07:47 -0400210 * Make sure none of the hash buckets has more entries than
Nicolas Pitrec13c6bf2006-03-08 14:32:50 -0500211 * we're willing to test. Otherwise we cull the entry list
212 * uniformly to still preserve a good repartition across
213 * the reference buffer.
214 */
215 for (i = 0; i < hsize; i++) {
Nicolas Pitre08abe662006-04-24 23:07:47 -0400216 if (hash_count[i] < HASH_LIMIT)
Nicolas Pitrec13c6bf2006-03-08 14:32:50 -0500217 continue;
218 entry = hash[i];
219 do {
Nicolas Pitre08abe662006-04-24 23:07:47 -0400220 struct index_entry *keep = entry;
221 int skip = hash_count[i] / HASH_LIMIT / 2;
Nicolas Pitrec13c6bf2006-03-08 14:32:50 -0500222 do {
223 entry = entry->next;
224 } while(--skip && entry);
225 keep->next = entry;
226 } while(entry);
227 }
228 free(hash_count);
229
Nicolas Pitre08abe662006-04-24 23:07:47 -0400230 return index;
231}
232
233void free_delta_index(struct delta_index *index)
234{
235 free(index);
Nicolas Pitrea310d432005-05-19 10:27:14 -0400236}
237
Nicolas Pitre3dc5a9e2006-04-29 00:58:05 -0400238/*
239 * The maximum size for any opcode sequence, including the initial header
Pavel Roskin82e5a822006-07-10 01:50:18 -0400240 * plus Rabin window plus biggest copy.
Nicolas Pitre3dc5a9e2006-04-29 00:58:05 -0400241 */
242#define MAX_OP_SIZE (5 + 5 + 1 + RABIN_WINDOW + 7)
Nicolas Pitrefe474b52006-02-21 20:41:41 -0500243
Nicolas Pitre08abe662006-04-24 23:07:47 -0400244void *
245create_delta(const struct delta_index *index,
246 const void *trg_buf, unsigned long trg_size,
247 unsigned long *delta_size, unsigned long max_size)
Nicolas Pitrea310d432005-05-19 10:27:14 -0400248{
Nicolas Pitre2d08e5d2006-05-02 23:46:51 -0400249 unsigned int i, outpos, outsize, val;
Nicolas Pitre5a1fb2c2006-03-17 22:45:07 -0500250 int inscnt;
Nicolas Pitre8e1454b2006-02-21 20:43:17 -0500251 const unsigned char *ref_data, *ref_top, *data, *top;
252 unsigned char *out;
Nicolas Pitrea310d432005-05-19 10:27:14 -0400253
Nicolas Pitre08abe662006-04-24 23:07:47 -0400254 if (!trg_buf || !trg_size)
Nicolas Pitre8e1454b2006-02-21 20:43:17 -0500255 return NULL;
256
Nicolas Pitrea310d432005-05-19 10:27:14 -0400257 outpos = 0;
258 outsize = 8192;
Nicolas Pitrefe474b52006-02-21 20:41:41 -0500259 if (max_size && outsize >= max_size)
260 outsize = max_size + MAX_OP_SIZE + 1;
Nicolas Pitrea310d432005-05-19 10:27:14 -0400261 out = malloc(outsize);
Nicolas Pitre08abe662006-04-24 23:07:47 -0400262 if (!out)
Nicolas Pitrea310d432005-05-19 10:27:14 -0400263 return NULL;
Nicolas Pitrea310d432005-05-19 10:27:14 -0400264
265 /* store reference buffer size */
Nicolas Pitre08abe662006-04-24 23:07:47 -0400266 i = index->src_size;
267 while (i >= 0x80) {
268 out[outpos++] = i | 0x80;
269 i >>= 7;
Nicolas Pitre69a2d422005-06-29 00:27:45 -0400270 }
Nicolas Pitre08abe662006-04-24 23:07:47 -0400271 out[outpos++] = i;
Nicolas Pitrea310d432005-05-19 10:27:14 -0400272
273 /* store target buffer size */
Nicolas Pitre08abe662006-04-24 23:07:47 -0400274 i = trg_size;
275 while (i >= 0x80) {
276 out[outpos++] = i | 0x80;
277 i >>= 7;
Nicolas Pitre69a2d422005-06-29 00:27:45 -0400278 }
Nicolas Pitre08abe662006-04-24 23:07:47 -0400279 out[outpos++] = i;
Nicolas Pitrea310d432005-05-19 10:27:14 -0400280
Nicolas Pitre08abe662006-04-24 23:07:47 -0400281 ref_data = index->src_buf;
282 ref_top = ref_data + index->src_size;
283 data = trg_buf;
Florian Forster1d7f1712006-06-18 17:18:09 +0200284 top = (const unsigned char *) trg_buf + trg_size;
Nicolas Pitre3dc5a9e2006-04-29 00:58:05 -0400285
286 outpos++;
287 val = 0;
288 for (i = 0; i < RABIN_WINDOW && data < top; i++, data++) {
289 out[outpos++] = *data;
290 val = ((val << 8) | *data) ^ T[val >> RABIN_SHIFT];
291 }
292 inscnt = i;
Nicolas Pitrea310d432005-05-19 10:27:14 -0400293
Nicolas Pitre8e1454b2006-02-21 20:43:17 -0500294 while (data < top) {
295 unsigned int moff = 0, msize = 0;
Nicolas Pitre08abe662006-04-24 23:07:47 -0400296 struct index_entry *entry;
Nicolas Pitre3dc5a9e2006-04-29 00:58:05 -0400297 val ^= U[data[-RABIN_WINDOW]];
298 val = ((val << 8) | *data) ^ T[val >> RABIN_SHIFT];
Nicolas Pitre2d08e5d2006-05-02 23:46:51 -0400299 i = val & index->hash_mask;
Nicolas Pitre08abe662006-04-24 23:07:47 -0400300 for (entry = index->hash[i]; entry; entry = entry->next) {
301 const unsigned char *ref = entry->ptr;
302 const unsigned char *src = data;
303 unsigned int ref_size = ref_top - ref;
304 if (entry->val != val)
305 continue;
306 if (ref_size > top - src)
307 ref_size = top - src;
Junio C Hamano29f049a2006-10-14 23:37:41 -0700308 if (ref_size > 0x10000)
309 ref_size = 0x10000;
Nicolas Pitre08abe662006-04-24 23:07:47 -0400310 if (ref_size <= msize)
311 break;
312 while (ref_size-- && *src++ == *ref)
313 ref++;
314 if (msize < ref - entry->ptr) {
315 /* this is our best match so far */
316 msize = ref - entry->ptr;
317 moff = entry->ptr - ref_data;
Nicolas Pitrea310d432005-05-19 10:27:14 -0400318 }
319 }
320
Nicolas Pitre3dc5a9e2006-04-29 00:58:05 -0400321 if (msize < 4) {
Nicolas Pitrea310d432005-05-19 10:27:14 -0400322 if (!inscnt)
323 outpos++;
324 out[outpos++] = *data++;
325 inscnt++;
326 if (inscnt == 0x7f) {
327 out[outpos - inscnt - 1] = inscnt;
328 inscnt = 0;
329 }
330 } else {
Nicolas Pitre8e1454b2006-02-21 20:43:17 -0500331 unsigned char *op;
332
Nicolas Pitre3dc5a9e2006-04-29 00:58:05 -0400333 if (msize >= RABIN_WINDOW) {
334 const unsigned char *sk;
335 sk = data + msize - RABIN_WINDOW;
336 val = 0;
337 for (i = 0; i < RABIN_WINDOW; i++)
338 val = ((val << 8) | *sk++) ^ T[val >> RABIN_SHIFT];
339 } else {
340 const unsigned char *sk = data + 1;
341 for (i = 1; i < msize; i++) {
342 val ^= U[sk[-RABIN_WINDOW]];
343 val = ((val << 8) | *sk++) ^ T[val >> RABIN_SHIFT];
344 }
345 }
346
Nicolas Pitrea310d432005-05-19 10:27:14 -0400347 if (inscnt) {
Nicolas Pitre5a1fb2c2006-03-17 22:45:07 -0500348 while (moff && ref_data[moff-1] == data[-1]) {
349 if (msize == 0x10000)
350 break;
351 /* we can match one byte back */
352 msize++;
353 moff--;
354 data--;
355 outpos--;
356 if (--inscnt)
357 continue;
358 outpos--; /* remove count slot */
359 inscnt--; /* make it -1 */
360 break;
361 }
Nicolas Pitrea310d432005-05-19 10:27:14 -0400362 out[outpos - inscnt - 1] = inscnt;
363 inscnt = 0;
364 }
365
366 data += msize;
Nicolas Pitre8e1454b2006-02-21 20:43:17 -0500367 op = out + outpos++;
Nicolas Pitrea310d432005-05-19 10:27:14 -0400368 i = 0x80;
369
370 if (moff & 0xff) { out[outpos++] = moff; i |= 0x01; }
371 moff >>= 8;
372 if (moff & 0xff) { out[outpos++] = moff; i |= 0x02; }
373 moff >>= 8;
374 if (moff & 0xff) { out[outpos++] = moff; i |= 0x04; }
375 moff >>= 8;
376 if (moff & 0xff) { out[outpos++] = moff; i |= 0x08; }
377
378 if (msize & 0xff) { out[outpos++] = msize; i |= 0x10; }
379 msize >>= 8;
380 if (msize & 0xff) { out[outpos++] = msize; i |= 0x20; }
381
Nicolas Pitre8e1454b2006-02-21 20:43:17 -0500382 *op = i;
Nicolas Pitrea310d432005-05-19 10:27:14 -0400383 }
384
Nicolas Pitrefe474b52006-02-21 20:41:41 -0500385 if (outpos >= outsize - MAX_OP_SIZE) {
Nicolas Pitrea310d432005-05-19 10:27:14 -0400386 void *tmp = out;
387 outsize = outsize * 3 / 2;
Nicolas Pitrefe474b52006-02-21 20:41:41 -0500388 if (max_size && outsize >= max_size)
389 outsize = max_size + MAX_OP_SIZE + 1;
390 if (max_size && outpos > max_size)
Nicolas Pitre3dc5a9e2006-04-29 00:58:05 -0400391 break;
Jonas Fonseca83572c12006-08-26 16:16:18 +0200392 out = xrealloc(out, outsize);
Nicolas Pitrea310d432005-05-19 10:27:14 -0400393 if (!out) {
394 free(tmp);
Nicolas Pitrea310d432005-05-19 10:27:14 -0400395 return NULL;
396 }
397 }
398 }
399
400 if (inscnt)
401 out[outpos - inscnt - 1] = inscnt;
402
Nicolas Pitre3dc5a9e2006-04-29 00:58:05 -0400403 if (max_size && outpos > max_size) {
404 free(out);
405 return NULL;
406 }
407
Nicolas Pitrea310d432005-05-19 10:27:14 -0400408 *delta_size = outpos;
409 return out;
410}