blob: a4e28df714b4834e5efe42fa3abb647711913d71 [file] [log] [blame]
Nicolas Pitrea310d432005-05-19 10:27:14 -04001/*
2 * diff-delta.c: generate a delta between two buffers
3 *
Nicolas Pitre366b53c2007-05-25 22:16:27 -04004 * This code was greatly inspired by parts of LibXDiff from Davide Libenzi
5 * http://www.xmailserver.org/xdiff-lib.html
Nicolas Pitrea310d432005-05-19 10:27:14 -04006 *
Nicolas Pitre366b53c2007-05-25 22:16:27 -04007 * Rewritten for GIT by Nicolas Pitre <nico@cam.org>, (C) 2005-2007
Nicolas Pitrea310d432005-05-19 10:27:14 -04008 *
Nicolas Pitre366b53c2007-05-25 22:16:27 -04009 * This code is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License version 2 as
11 * published by the Free Software Foundation.
Nicolas Pitrea310d432005-05-19 10:27:14 -040012 */
13
Florian Forster63f17562006-06-18 17:18:04 +020014#include "git-compat-util.h"
Junio C Hamano85023572006-12-19 14:34:12 -080015#include "delta.h"
Nicolas Pitrea310d432005-05-19 10:27:14 -040016
Nicolas Pitre08abe662006-04-24 23:07:47 -040017/* maximum hash entry list for the same hash bucket */
18#define HASH_LIMIT 64
Nicolas Pitrea310d432005-05-19 10:27:14 -040019
Nicolas Pitre3dc5a9e2006-04-29 00:58:05 -040020#define RABIN_SHIFT 23
21#define RABIN_WINDOW 16
22
23static const unsigned int T[256] = {
24 0x00000000, 0xab59b4d1, 0x56b369a2, 0xfdeadd73, 0x063f6795, 0xad66d344,
25 0x508c0e37, 0xfbd5bae6, 0x0c7ecf2a, 0xa7277bfb, 0x5acda688, 0xf1941259,
26 0x0a41a8bf, 0xa1181c6e, 0x5cf2c11d, 0xf7ab75cc, 0x18fd9e54, 0xb3a42a85,
27 0x4e4ef7f6, 0xe5174327, 0x1ec2f9c1, 0xb59b4d10, 0x48719063, 0xe32824b2,
28 0x1483517e, 0xbfdae5af, 0x423038dc, 0xe9698c0d, 0x12bc36eb, 0xb9e5823a,
29 0x440f5f49, 0xef56eb98, 0x31fb3ca8, 0x9aa28879, 0x6748550a, 0xcc11e1db,
30 0x37c45b3d, 0x9c9defec, 0x6177329f, 0xca2e864e, 0x3d85f382, 0x96dc4753,
31 0x6b369a20, 0xc06f2ef1, 0x3bba9417, 0x90e320c6, 0x6d09fdb5, 0xc6504964,
32 0x2906a2fc, 0x825f162d, 0x7fb5cb5e, 0xd4ec7f8f, 0x2f39c569, 0x846071b8,
33 0x798aaccb, 0xd2d3181a, 0x25786dd6, 0x8e21d907, 0x73cb0474, 0xd892b0a5,
34 0x23470a43, 0x881ebe92, 0x75f463e1, 0xdeadd730, 0x63f67950, 0xc8afcd81,
35 0x354510f2, 0x9e1ca423, 0x65c91ec5, 0xce90aa14, 0x337a7767, 0x9823c3b6,
36 0x6f88b67a, 0xc4d102ab, 0x393bdfd8, 0x92626b09, 0x69b7d1ef, 0xc2ee653e,
37 0x3f04b84d, 0x945d0c9c, 0x7b0be704, 0xd05253d5, 0x2db88ea6, 0x86e13a77,
38 0x7d348091, 0xd66d3440, 0x2b87e933, 0x80de5de2, 0x7775282e, 0xdc2c9cff,
39 0x21c6418c, 0x8a9ff55d, 0x714a4fbb, 0xda13fb6a, 0x27f92619, 0x8ca092c8,
40 0x520d45f8, 0xf954f129, 0x04be2c5a, 0xafe7988b, 0x5432226d, 0xff6b96bc,
41 0x02814bcf, 0xa9d8ff1e, 0x5e738ad2, 0xf52a3e03, 0x08c0e370, 0xa39957a1,
42 0x584ced47, 0xf3155996, 0x0eff84e5, 0xa5a63034, 0x4af0dbac, 0xe1a96f7d,
43 0x1c43b20e, 0xb71a06df, 0x4ccfbc39, 0xe79608e8, 0x1a7cd59b, 0xb125614a,
44 0x468e1486, 0xedd7a057, 0x103d7d24, 0xbb64c9f5, 0x40b17313, 0xebe8c7c2,
45 0x16021ab1, 0xbd5bae60, 0x6cb54671, 0xc7ecf2a0, 0x3a062fd3, 0x915f9b02,
46 0x6a8a21e4, 0xc1d39535, 0x3c394846, 0x9760fc97, 0x60cb895b, 0xcb923d8a,
47 0x3678e0f9, 0x9d215428, 0x66f4eece, 0xcdad5a1f, 0x3047876c, 0x9b1e33bd,
48 0x7448d825, 0xdf116cf4, 0x22fbb187, 0x89a20556, 0x7277bfb0, 0xd92e0b61,
49 0x24c4d612, 0x8f9d62c3, 0x7836170f, 0xd36fa3de, 0x2e857ead, 0x85dcca7c,
50 0x7e09709a, 0xd550c44b, 0x28ba1938, 0x83e3ade9, 0x5d4e7ad9, 0xf617ce08,
51 0x0bfd137b, 0xa0a4a7aa, 0x5b711d4c, 0xf028a99d, 0x0dc274ee, 0xa69bc03f,
52 0x5130b5f3, 0xfa690122, 0x0783dc51, 0xacda6880, 0x570fd266, 0xfc5666b7,
53 0x01bcbbc4, 0xaae50f15, 0x45b3e48d, 0xeeea505c, 0x13008d2f, 0xb85939fe,
54 0x438c8318, 0xe8d537c9, 0x153feaba, 0xbe665e6b, 0x49cd2ba7, 0xe2949f76,
55 0x1f7e4205, 0xb427f6d4, 0x4ff24c32, 0xe4abf8e3, 0x19412590, 0xb2189141,
56 0x0f433f21, 0xa41a8bf0, 0x59f05683, 0xf2a9e252, 0x097c58b4, 0xa225ec65,
57 0x5fcf3116, 0xf49685c7, 0x033df00b, 0xa86444da, 0x558e99a9, 0xfed72d78,
58 0x0502979e, 0xae5b234f, 0x53b1fe3c, 0xf8e84aed, 0x17bea175, 0xbce715a4,
59 0x410dc8d7, 0xea547c06, 0x1181c6e0, 0xbad87231, 0x4732af42, 0xec6b1b93,
60 0x1bc06e5f, 0xb099da8e, 0x4d7307fd, 0xe62ab32c, 0x1dff09ca, 0xb6a6bd1b,
61 0x4b4c6068, 0xe015d4b9, 0x3eb80389, 0x95e1b758, 0x680b6a2b, 0xc352defa,
62 0x3887641c, 0x93ded0cd, 0x6e340dbe, 0xc56db96f, 0x32c6cca3, 0x999f7872,
63 0x6475a501, 0xcf2c11d0, 0x34f9ab36, 0x9fa01fe7, 0x624ac294, 0xc9137645,
64 0x26459ddd, 0x8d1c290c, 0x70f6f47f, 0xdbaf40ae, 0x207afa48, 0x8b234e99,
65 0x76c993ea, 0xdd90273b, 0x2a3b52f7, 0x8162e626, 0x7c883b55, 0xd7d18f84,
66 0x2c043562, 0x875d81b3, 0x7ab75cc0, 0xd1eee811
67};
68
69static const unsigned int U[256] = {
70 0x00000000, 0x7eb5200d, 0x5633f4cb, 0x2886d4c6, 0x073e5d47, 0x798b7d4a,
71 0x510da98c, 0x2fb88981, 0x0e7cba8e, 0x70c99a83, 0x584f4e45, 0x26fa6e48,
72 0x0942e7c9, 0x77f7c7c4, 0x5f711302, 0x21c4330f, 0x1cf9751c, 0x624c5511,
73 0x4aca81d7, 0x347fa1da, 0x1bc7285b, 0x65720856, 0x4df4dc90, 0x3341fc9d,
74 0x1285cf92, 0x6c30ef9f, 0x44b63b59, 0x3a031b54, 0x15bb92d5, 0x6b0eb2d8,
75 0x4388661e, 0x3d3d4613, 0x39f2ea38, 0x4747ca35, 0x6fc11ef3, 0x11743efe,
76 0x3eccb77f, 0x40799772, 0x68ff43b4, 0x164a63b9, 0x378e50b6, 0x493b70bb,
77 0x61bda47d, 0x1f088470, 0x30b00df1, 0x4e052dfc, 0x6683f93a, 0x1836d937,
78 0x250b9f24, 0x5bbebf29, 0x73386bef, 0x0d8d4be2, 0x2235c263, 0x5c80e26e,
79 0x740636a8, 0x0ab316a5, 0x2b7725aa, 0x55c205a7, 0x7d44d161, 0x03f1f16c,
80 0x2c4978ed, 0x52fc58e0, 0x7a7a8c26, 0x04cfac2b, 0x73e5d470, 0x0d50f47d,
81 0x25d620bb, 0x5b6300b6, 0x74db8937, 0x0a6ea93a, 0x22e87dfc, 0x5c5d5df1,
82 0x7d996efe, 0x032c4ef3, 0x2baa9a35, 0x551fba38, 0x7aa733b9, 0x041213b4,
83 0x2c94c772, 0x5221e77f, 0x6f1ca16c, 0x11a98161, 0x392f55a7, 0x479a75aa,
84 0x6822fc2b, 0x1697dc26, 0x3e1108e0, 0x40a428ed, 0x61601be2, 0x1fd53bef,
85 0x3753ef29, 0x49e6cf24, 0x665e46a5, 0x18eb66a8, 0x306db26e, 0x4ed89263,
86 0x4a173e48, 0x34a21e45, 0x1c24ca83, 0x6291ea8e, 0x4d29630f, 0x339c4302,
87 0x1b1a97c4, 0x65afb7c9, 0x446b84c6, 0x3adea4cb, 0x1258700d, 0x6ced5000,
88 0x4355d981, 0x3de0f98c, 0x15662d4a, 0x6bd30d47, 0x56ee4b54, 0x285b6b59,
89 0x00ddbf9f, 0x7e689f92, 0x51d01613, 0x2f65361e, 0x07e3e2d8, 0x7956c2d5,
90 0x5892f1da, 0x2627d1d7, 0x0ea10511, 0x7014251c, 0x5facac9d, 0x21198c90,
91 0x099f5856, 0x772a785b, 0x4c921c31, 0x32273c3c, 0x1aa1e8fa, 0x6414c8f7,
92 0x4bac4176, 0x3519617b, 0x1d9fb5bd, 0x632a95b0, 0x42eea6bf, 0x3c5b86b2,
93 0x14dd5274, 0x6a687279, 0x45d0fbf8, 0x3b65dbf5, 0x13e30f33, 0x6d562f3e,
94 0x506b692d, 0x2ede4920, 0x06589de6, 0x78edbdeb, 0x5755346a, 0x29e01467,
95 0x0166c0a1, 0x7fd3e0ac, 0x5e17d3a3, 0x20a2f3ae, 0x08242768, 0x76910765,
96 0x59298ee4, 0x279caee9, 0x0f1a7a2f, 0x71af5a22, 0x7560f609, 0x0bd5d604,
97 0x235302c2, 0x5de622cf, 0x725eab4e, 0x0ceb8b43, 0x246d5f85, 0x5ad87f88,
98 0x7b1c4c87, 0x05a96c8a, 0x2d2fb84c, 0x539a9841, 0x7c2211c0, 0x029731cd,
99 0x2a11e50b, 0x54a4c506, 0x69998315, 0x172ca318, 0x3faa77de, 0x411f57d3,
100 0x6ea7de52, 0x1012fe5f, 0x38942a99, 0x46210a94, 0x67e5399b, 0x19501996,
101 0x31d6cd50, 0x4f63ed5d, 0x60db64dc, 0x1e6e44d1, 0x36e89017, 0x485db01a,
102 0x3f77c841, 0x41c2e84c, 0x69443c8a, 0x17f11c87, 0x38499506, 0x46fcb50b,
103 0x6e7a61cd, 0x10cf41c0, 0x310b72cf, 0x4fbe52c2, 0x67388604, 0x198da609,
104 0x36352f88, 0x48800f85, 0x6006db43, 0x1eb3fb4e, 0x238ebd5d, 0x5d3b9d50,
105 0x75bd4996, 0x0b08699b, 0x24b0e01a, 0x5a05c017, 0x728314d1, 0x0c3634dc,
106 0x2df207d3, 0x534727de, 0x7bc1f318, 0x0574d315, 0x2acc5a94, 0x54797a99,
107 0x7cffae5f, 0x024a8e52, 0x06852279, 0x78300274, 0x50b6d6b2, 0x2e03f6bf,
108 0x01bb7f3e, 0x7f0e5f33, 0x57888bf5, 0x293dabf8, 0x08f998f7, 0x764cb8fa,
109 0x5eca6c3c, 0x207f4c31, 0x0fc7c5b0, 0x7172e5bd, 0x59f4317b, 0x27411176,
110 0x1a7c5765, 0x64c97768, 0x4c4fa3ae, 0x32fa83a3, 0x1d420a22, 0x63f72a2f,
111 0x4b71fee9, 0x35c4dee4, 0x1400edeb, 0x6ab5cde6, 0x42331920, 0x3c86392d,
112 0x133eb0ac, 0x6d8b90a1, 0x450d4467, 0x3bb8646a
113};
Nicolas Pitrea310d432005-05-19 10:27:14 -0400114
Nicolas Pitre08abe662006-04-24 23:07:47 -0400115struct index_entry {
Nicolas Pitrea310d432005-05-19 10:27:14 -0400116 const unsigned char *ptr;
Nicolas Pitre8e1454b2006-02-21 20:43:17 -0500117 unsigned int val;
David Kastrupd2100862007-09-08 23:17:44 +0200118};
119
120struct unpacked_index_entry {
121 struct index_entry entry;
122 struct unpacked_index_entry *next;
Nicolas Pitre8e1454b2006-02-21 20:43:17 -0500123};
Nicolas Pitrea310d432005-05-19 10:27:14 -0400124
Nicolas Pitre08abe662006-04-24 23:07:47 -0400125struct delta_index {
Brian Downing11779e72007-07-12 07:55:48 -0500126 unsigned long memsize;
Nicolas Pitre08abe662006-04-24 23:07:47 -0400127 const void *src_buf;
128 unsigned long src_size;
Nicolas Pitre3dc5a9e2006-04-29 00:58:05 -0400129 unsigned int hash_mask;
Florian Forster63f17562006-06-18 17:18:04 +0200130 struct index_entry *hash[FLEX_ARRAY];
Nicolas Pitre08abe662006-04-24 23:07:47 -0400131};
132
133struct delta_index * create_delta_index(const void *buf, unsigned long bufsize)
Nicolas Pitrea310d432005-05-19 10:27:14 -0400134{
Nicolas Pitre06a9f922006-05-02 23:31:00 -0400135 unsigned int i, hsize, hmask, entries, prev_val, *hash_count;
Nicolas Pitre08abe662006-04-24 23:07:47 -0400136 const unsigned char *data, *buffer = buf;
137 struct delta_index *index;
David Kastrupd2100862007-09-08 23:17:44 +0200138 struct unpacked_index_entry *entry, **hash;
139 struct index_entry *packed_entry, **packed_hash;
Nicolas Pitre8e1454b2006-02-21 20:43:17 -0500140 void *mem;
Nicolas Pitre06a9f922006-05-02 23:31:00 -0400141 unsigned long memsize;
Nicolas Pitrea310d432005-05-19 10:27:14 -0400142
Nicolas Pitre08abe662006-04-24 23:07:47 -0400143 if (!buf || !bufsize)
144 return NULL;
145
Nicolas Pitre3dc5a9e2006-04-29 00:58:05 -0400146 /* Determine index hash size. Note that indexing skips the
Pavel Roskin82e5a822006-07-10 01:50:18 -0400147 first byte to allow for optimizing the Rabin's polynomial
Nicolas Pitre3dc5a9e2006-04-29 00:58:05 -0400148 initialization in create_delta(). */
149 entries = (bufsize - 1) / RABIN_WINDOW;
Nicolas Pitre8e1454b2006-02-21 20:43:17 -0500150 hsize = entries / 4;
Pierre Habouzitb05faa22006-08-23 11:17:55 +0200151 for (i = 4; (1u << i) < hsize && i < 31; i++);
Nicolas Pitre8e1454b2006-02-21 20:43:17 -0500152 hsize = 1 << i;
Nicolas Pitre3dc5a9e2006-04-29 00:58:05 -0400153 hmask = hsize - 1;
Nicolas Pitrea310d432005-05-19 10:27:14 -0400154
Nicolas Pitre8e1454b2006-02-21 20:43:17 -0500155 /* allocate lookup index */
David Kastrupd2100862007-09-08 23:17:44 +0200156 memsize = sizeof(*hash) * hsize +
Nicolas Pitre06a9f922006-05-02 23:31:00 -0400157 sizeof(*entry) * entries;
158 mem = malloc(memsize);
Nicolas Pitre8e1454b2006-02-21 20:43:17 -0500159 if (!mem)
160 return NULL;
161 hash = mem;
Nicolas Pitre08abe662006-04-24 23:07:47 -0400162 mem = hash + hsize;
163 entry = mem;
164
Nicolas Pitre8e1454b2006-02-21 20:43:17 -0500165 memset(hash, 0, hsize * sizeof(*hash));
166
Nicolas Pitrec13c6bf2006-03-08 14:32:50 -0500167 /* allocate an array to count hash entries */
168 hash_count = calloc(hsize, sizeof(*hash_count));
169 if (!hash_count) {
David Kastrupd2100862007-09-08 23:17:44 +0200170 free(hash);
Nicolas Pitrec13c6bf2006-03-08 14:32:50 -0500171 return NULL;
172 }
173
174 /* then populate the index */
Nicolas Pitre06a9f922006-05-02 23:31:00 -0400175 prev_val = ~0;
176 for (data = buffer + entries * RABIN_WINDOW - RABIN_WINDOW;
177 data >= buffer;
178 data -= RABIN_WINDOW) {
Nicolas Pitre3dc5a9e2006-04-29 00:58:05 -0400179 unsigned int val = 0;
180 for (i = 1; i <= RABIN_WINDOW; i++)
181 val = ((val << 8) | data[i]) ^ T[val >> RABIN_SHIFT];
Nicolas Pitre06a9f922006-05-02 23:31:00 -0400182 if (val == prev_val) {
183 /* keep the lowest of consecutive identical blocks */
David Kastrupd2100862007-09-08 23:17:44 +0200184 entry[-1].entry.ptr = data + RABIN_WINDOW;
185 --entries;
Nicolas Pitre06a9f922006-05-02 23:31:00 -0400186 } else {
187 prev_val = val;
188 i = val & hmask;
David Kastrupd2100862007-09-08 23:17:44 +0200189 entry->entry.ptr = data + RABIN_WINDOW;
190 entry->entry.val = val;
Nicolas Pitre06a9f922006-05-02 23:31:00 -0400191 entry->next = hash[i];
192 hash[i] = entry++;
193 hash_count[i]++;
Nicolas Pitre06a9f922006-05-02 23:31:00 -0400194 }
Nicolas Pitre3dc5a9e2006-04-29 00:58:05 -0400195 }
Nicolas Pitrea310d432005-05-19 10:27:14 -0400196
Nicolas Pitrec13c6bf2006-03-08 14:32:50 -0500197 /*
198 * Determine a limit on the number of entries in the same hash
Pavel Roskin82e5a822006-07-10 01:50:18 -0400199 * bucket. This guards us against pathological data sets causing
Nicolas Pitrec13c6bf2006-03-08 14:32:50 -0500200 * really bad hash distribution with most entries in the same hash
201 * bucket that would bring us to O(m*n) computing costs (m and n
202 * corresponding to reference and target buffer sizes).
203 *
Nicolas Pitre08abe662006-04-24 23:07:47 -0400204 * Make sure none of the hash buckets has more entries than
Nicolas Pitrec13c6bf2006-03-08 14:32:50 -0500205 * we're willing to test. Otherwise we cull the entry list
206 * uniformly to still preserve a good repartition across
207 * the reference buffer.
208 */
209 for (i = 0; i < hsize; i++) {
David Kastrup02e665c2007-09-08 23:25:55 +0200210 int acc;
211
212 if (hash_count[i] <= HASH_LIMIT)
Nicolas Pitrec13c6bf2006-03-08 14:32:50 -0500213 continue;
David Kastrup02e665c2007-09-08 23:25:55 +0200214
David Kastrup02e665c2007-09-08 23:25:55 +0200215 /* We leave exactly HASH_LIMIT entries in the bucket */
Nicolas Pitrece85b052007-12-18 10:15:39 -0500216 entries -= hash_count[i] - HASH_LIMIT;
David Kastrup02e665c2007-09-08 23:25:55 +0200217
Nicolas Pitrec13c6bf2006-03-08 14:32:50 -0500218 entry = hash[i];
David Kastrup02e665c2007-09-08 23:25:55 +0200219 acc = 0;
Nicolas Pitrece85b052007-12-18 10:15:39 -0500220
221 /*
222 * Assume that this loop is gone through exactly
223 * HASH_LIMIT times and is entered and left with
224 * acc==0. So the first statement in the loop
225 * contributes (hash_count[i]-HASH_LIMIT)*HASH_LIMIT
226 * to the accumulator, and the inner loop consequently
227 * is run (hash_count[i]-HASH_LIMIT) times, removing
228 * one element from the list each time. Since acc
229 * balances out to 0 at the final run, the inner loop
230 * body can't be left with entry==NULL. So we indeed
231 * encounter entry==NULL in the outer loop only.
232 */
Nicolas Pitrec13c6bf2006-03-08 14:32:50 -0500233 do {
David Kastrup02e665c2007-09-08 23:25:55 +0200234 acc += hash_count[i] - HASH_LIMIT;
235 if (acc > 0) {
236 struct unpacked_index_entry *keep = entry;
237 do {
238 entry = entry->next;
239 acc -= HASH_LIMIT;
240 } while (acc > 0);
241 keep->next = entry->next;
242 }
243 entry = entry->next;
244 } while (entry);
Nicolas Pitrec13c6bf2006-03-08 14:32:50 -0500245 }
246 free(hash_count);
247
Nicolas Pitrece85b052007-12-18 10:15:39 -0500248 /*
249 * Now create the packed index in array form
250 * rather than linked lists.
251 */
David Kastrupd2100862007-09-08 23:17:44 +0200252 memsize = sizeof(*index)
253 + sizeof(*packed_hash) * (hsize+1)
254 + sizeof(*packed_entry) * entries;
David Kastrupd2100862007-09-08 23:17:44 +0200255 mem = malloc(memsize);
David Kastrupd2100862007-09-08 23:17:44 +0200256 if (!mem) {
257 free(hash);
258 return NULL;
259 }
260
261 index = mem;
262 index->memsize = memsize;
263 index->src_buf = buf;
264 index->src_size = bufsize;
265 index->hash_mask = hmask;
266
Pierre Habouzitf9c5a802007-12-18 02:39:57 +0100267 mem = index->hash;
David Kastrupd2100862007-09-08 23:17:44 +0200268 packed_hash = mem;
269 mem = packed_hash + (hsize+1);
270 packed_entry = mem;
271
David Kastrupd2100862007-09-08 23:17:44 +0200272 for (i = 0; i < hsize; i++) {
Nicolas Pitrece85b052007-12-18 10:15:39 -0500273 /*
274 * Coalesce all entries belonging to one linked list
275 * into consecutive array entries.
276 */
David Kastrupd2100862007-09-08 23:17:44 +0200277 packed_hash[i] = packed_entry;
278 for (entry = hash[i]; entry; entry = entry->next)
279 *packed_entry++ = entry->entry;
280 }
281
Nicolas Pitrece85b052007-12-18 10:15:39 -0500282 /* Sentinel value to indicate the length of the last hash bucket */
David Kastrupd2100862007-09-08 23:17:44 +0200283 packed_hash[hsize] = packed_entry;
Nicolas Pitrece85b052007-12-18 10:15:39 -0500284
David Kastrupd2100862007-09-08 23:17:44 +0200285 assert(packed_entry - (struct index_entry *)mem == entries);
286 free(hash);
287
Nicolas Pitre08abe662006-04-24 23:07:47 -0400288 return index;
289}
290
291void free_delta_index(struct delta_index *index)
292{
293 free(index);
Nicolas Pitrea310d432005-05-19 10:27:14 -0400294}
295
Brian Downing11779e72007-07-12 07:55:48 -0500296unsigned long sizeof_delta_index(struct delta_index *index)
297{
298 if (index)
299 return index->memsize;
300 else
301 return 0;
302}
303
Nicolas Pitre3dc5a9e2006-04-29 00:58:05 -0400304/*
305 * The maximum size for any opcode sequence, including the initial header
Pavel Roskin82e5a822006-07-10 01:50:18 -0400306 * plus Rabin window plus biggest copy.
Nicolas Pitre3dc5a9e2006-04-29 00:58:05 -0400307 */
308#define MAX_OP_SIZE (5 + 5 + 1 + RABIN_WINDOW + 7)
Nicolas Pitrefe474b52006-02-21 20:41:41 -0500309
Nicolas Pitre08abe662006-04-24 23:07:47 -0400310void *
311create_delta(const struct delta_index *index,
312 const void *trg_buf, unsigned long trg_size,
313 unsigned long *delta_size, unsigned long max_size)
Nicolas Pitrea310d432005-05-19 10:27:14 -0400314{
Nicolas Pitre84336692007-05-25 21:38:58 -0400315 unsigned int i, outpos, outsize, moff, msize, val;
Nicolas Pitre5a1fb2c2006-03-17 22:45:07 -0500316 int inscnt;
Nicolas Pitre8e1454b2006-02-21 20:43:17 -0500317 const unsigned char *ref_data, *ref_top, *data, *top;
318 unsigned char *out;
Nicolas Pitrea310d432005-05-19 10:27:14 -0400319
Nicolas Pitre08abe662006-04-24 23:07:47 -0400320 if (!trg_buf || !trg_size)
Nicolas Pitre8e1454b2006-02-21 20:43:17 -0500321 return NULL;
322
Nicolas Pitrea310d432005-05-19 10:27:14 -0400323 outpos = 0;
324 outsize = 8192;
Nicolas Pitrefe474b52006-02-21 20:41:41 -0500325 if (max_size && outsize >= max_size)
326 outsize = max_size + MAX_OP_SIZE + 1;
Nicolas Pitrea310d432005-05-19 10:27:14 -0400327 out = malloc(outsize);
Nicolas Pitre08abe662006-04-24 23:07:47 -0400328 if (!out)
Nicolas Pitrea310d432005-05-19 10:27:14 -0400329 return NULL;
Nicolas Pitrea310d432005-05-19 10:27:14 -0400330
331 /* store reference buffer size */
Nicolas Pitre08abe662006-04-24 23:07:47 -0400332 i = index->src_size;
333 while (i >= 0x80) {
334 out[outpos++] = i | 0x80;
335 i >>= 7;
Nicolas Pitre69a2d422005-06-29 00:27:45 -0400336 }
Nicolas Pitre08abe662006-04-24 23:07:47 -0400337 out[outpos++] = i;
Nicolas Pitrea310d432005-05-19 10:27:14 -0400338
339 /* store target buffer size */
Nicolas Pitre08abe662006-04-24 23:07:47 -0400340 i = trg_size;
341 while (i >= 0x80) {
342 out[outpos++] = i | 0x80;
343 i >>= 7;
Nicolas Pitre69a2d422005-06-29 00:27:45 -0400344 }
Nicolas Pitre08abe662006-04-24 23:07:47 -0400345 out[outpos++] = i;
Nicolas Pitrea310d432005-05-19 10:27:14 -0400346
Nicolas Pitre08abe662006-04-24 23:07:47 -0400347 ref_data = index->src_buf;
348 ref_top = ref_data + index->src_size;
349 data = trg_buf;
Florian Forster1d7f1712006-06-18 17:18:09 +0200350 top = (const unsigned char *) trg_buf + trg_size;
Nicolas Pitre3dc5a9e2006-04-29 00:58:05 -0400351
352 outpos++;
353 val = 0;
354 for (i = 0; i < RABIN_WINDOW && data < top; i++, data++) {
355 out[outpos++] = *data;
356 val = ((val << 8) | *data) ^ T[val >> RABIN_SHIFT];
357 }
358 inscnt = i;
Nicolas Pitrea310d432005-05-19 10:27:14 -0400359
Nicolas Pitre84336692007-05-25 21:38:58 -0400360 moff = 0;
361 msize = 0;
Nicolas Pitre8e1454b2006-02-21 20:43:17 -0500362 while (data < top) {
Nicolas Pitre84336692007-05-25 21:38:58 -0400363 if (msize < 4096) {
364 struct index_entry *entry;
365 val ^= U[data[-RABIN_WINDOW]];
366 val = ((val << 8) | *data) ^ T[val >> RABIN_SHIFT];
367 i = val & index->hash_mask;
David Kastrupd2100862007-09-08 23:17:44 +0200368 for (entry = index->hash[i]; entry < index->hash[i+1]; entry++) {
Nicolas Pitre84336692007-05-25 21:38:58 -0400369 const unsigned char *ref = entry->ptr;
370 const unsigned char *src = data;
371 unsigned int ref_size = ref_top - ref;
372 if (entry->val != val)
373 continue;
374 if (ref_size > top - src)
375 ref_size = top - src;
376 if (ref_size <= msize)
377 break;
378 while (ref_size-- && *src++ == *ref)
379 ref++;
380 if (msize < ref - entry->ptr) {
381 /* this is our best match so far */
382 msize = ref - entry->ptr;
383 moff = entry->ptr - ref_data;
384 if (msize >= 4096) /* good enough */
385 break;
386 }
Nicolas Pitrea310d432005-05-19 10:27:14 -0400387 }
388 }
389
Nicolas Pitre3dc5a9e2006-04-29 00:58:05 -0400390 if (msize < 4) {
Nicolas Pitrea310d432005-05-19 10:27:14 -0400391 if (!inscnt)
392 outpos++;
393 out[outpos++] = *data++;
394 inscnt++;
395 if (inscnt == 0x7f) {
396 out[outpos - inscnt - 1] = inscnt;
397 inscnt = 0;
398 }
Nicolas Pitre84336692007-05-25 21:38:58 -0400399 msize = 0;
Nicolas Pitrea310d432005-05-19 10:27:14 -0400400 } else {
Nicolas Pitre84336692007-05-25 21:38:58 -0400401 unsigned int left;
Nicolas Pitre8e1454b2006-02-21 20:43:17 -0500402 unsigned char *op;
403
Nicolas Pitrea310d432005-05-19 10:27:14 -0400404 if (inscnt) {
Nicolas Pitre5a1fb2c2006-03-17 22:45:07 -0500405 while (moff && ref_data[moff-1] == data[-1]) {
Nicolas Pitre5a1fb2c2006-03-17 22:45:07 -0500406 /* we can match one byte back */
407 msize++;
408 moff--;
409 data--;
410 outpos--;
411 if (--inscnt)
412 continue;
413 outpos--; /* remove count slot */
414 inscnt--; /* make it -1 */
415 break;
416 }
Nicolas Pitrea310d432005-05-19 10:27:14 -0400417 out[outpos - inscnt - 1] = inscnt;
418 inscnt = 0;
419 }
420
Nicolas Pitre84336692007-05-25 21:38:58 -0400421 /* A copy op is currently limited to 64KB (pack v2) */
422 left = (msize < 0x10000) ? 0 : (msize - 0x10000);
423 msize -= left;
424
Nicolas Pitre8e1454b2006-02-21 20:43:17 -0500425 op = out + outpos++;
Nicolas Pitrea310d432005-05-19 10:27:14 -0400426 i = 0x80;
427
Nicolas Pitre84336692007-05-25 21:38:58 -0400428 if (moff & 0x000000ff)
429 out[outpos++] = moff >> 0, i |= 0x01;
430 if (moff & 0x0000ff00)
431 out[outpos++] = moff >> 8, i |= 0x02;
432 if (moff & 0x00ff0000)
433 out[outpos++] = moff >> 16, i |= 0x04;
434 if (moff & 0xff000000)
435 out[outpos++] = moff >> 24, i |= 0x08;
Nicolas Pitrea310d432005-05-19 10:27:14 -0400436
Nicolas Pitre84336692007-05-25 21:38:58 -0400437 if (msize & 0x00ff)
438 out[outpos++] = msize >> 0, i |= 0x10;
439 if (msize & 0xff00)
440 out[outpos++] = msize >> 8, i |= 0x20;
Nicolas Pitrea310d432005-05-19 10:27:14 -0400441
Nicolas Pitre8e1454b2006-02-21 20:43:17 -0500442 *op = i;
Nicolas Pitre84336692007-05-25 21:38:58 -0400443
444 data += msize;
445 moff += msize;
446 msize = left;
447
448 if (msize < 4096) {
449 int j;
450 val = 0;
451 for (j = -RABIN_WINDOW; j < 0; j++)
452 val = ((val << 8) | data[j])
453 ^ T[val >> RABIN_SHIFT];
454 }
Nicolas Pitrea310d432005-05-19 10:27:14 -0400455 }
456
Nicolas Pitrefe474b52006-02-21 20:41:41 -0500457 if (outpos >= outsize - MAX_OP_SIZE) {
Nicolas Pitrea310d432005-05-19 10:27:14 -0400458 void *tmp = out;
459 outsize = outsize * 3 / 2;
Nicolas Pitrefe474b52006-02-21 20:41:41 -0500460 if (max_size && outsize >= max_size)
461 outsize = max_size + MAX_OP_SIZE + 1;
462 if (max_size && outpos > max_size)
Nicolas Pitre3dc5a9e2006-04-29 00:58:05 -0400463 break;
Martin Koeglerb75c6c62007-05-29 21:08:35 +0200464 out = realloc(out, outsize);
Nicolas Pitrea310d432005-05-19 10:27:14 -0400465 if (!out) {
466 free(tmp);
Nicolas Pitrea310d432005-05-19 10:27:14 -0400467 return NULL;
468 }
469 }
470 }
471
472 if (inscnt)
473 out[outpos - inscnt - 1] = inscnt;
474
Nicolas Pitre3dc5a9e2006-04-29 00:58:05 -0400475 if (max_size && outpos > max_size) {
476 free(out);
477 return NULL;
478 }
479
Nicolas Pitrea310d432005-05-19 10:27:14 -0400480 *delta_size = outpos;
481 return out;
482}