blob: e49643353bf56807b3d4ac4011784b5d8dd6f7a4 [file] [log] [blame]
Nicolas Pitrea310d432005-05-19 10:27:14 -04001/*
2 * diff-delta.c: generate a delta between two buffers
3 *
Nicolas Pitre366b53c2007-05-25 22:16:27 -04004 * This code was greatly inspired by parts of LibXDiff from Davide Libenzi
5 * http://www.xmailserver.org/xdiff-lib.html
Nicolas Pitrea310d432005-05-19 10:27:14 -04006 *
Nicolas Pitre03aa8ff2009-09-14 02:41:16 -04007 * Rewritten for GIT by Nicolas Pitre <nico@fluxnic.net>, (C) 2005-2007
Nicolas Pitrea310d432005-05-19 10:27:14 -04008 *
Nicolas Pitre366b53c2007-05-25 22:16:27 -04009 * This code is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License version 2 as
11 * published by the Free Software Foundation.
Nicolas Pitrea310d432005-05-19 10:27:14 -040012 */
13
Florian Forster63f17562006-06-18 17:18:04 +020014#include "git-compat-util.h"
Junio C Hamano85023572006-12-19 14:34:12 -080015#include "delta.h"
Nicolas Pitrea310d432005-05-19 10:27:14 -040016
Nicolas Pitre08abe662006-04-24 23:07:47 -040017/* maximum hash entry list for the same hash bucket */
18#define HASH_LIMIT 64
Nicolas Pitrea310d432005-05-19 10:27:14 -040019
Nicolas Pitre3dc5a9e2006-04-29 00:58:05 -040020#define RABIN_SHIFT 23
21#define RABIN_WINDOW 16
22
23static const unsigned int T[256] = {
24 0x00000000, 0xab59b4d1, 0x56b369a2, 0xfdeadd73, 0x063f6795, 0xad66d344,
25 0x508c0e37, 0xfbd5bae6, 0x0c7ecf2a, 0xa7277bfb, 0x5acda688, 0xf1941259,
26 0x0a41a8bf, 0xa1181c6e, 0x5cf2c11d, 0xf7ab75cc, 0x18fd9e54, 0xb3a42a85,
27 0x4e4ef7f6, 0xe5174327, 0x1ec2f9c1, 0xb59b4d10, 0x48719063, 0xe32824b2,
28 0x1483517e, 0xbfdae5af, 0x423038dc, 0xe9698c0d, 0x12bc36eb, 0xb9e5823a,
29 0x440f5f49, 0xef56eb98, 0x31fb3ca8, 0x9aa28879, 0x6748550a, 0xcc11e1db,
30 0x37c45b3d, 0x9c9defec, 0x6177329f, 0xca2e864e, 0x3d85f382, 0x96dc4753,
31 0x6b369a20, 0xc06f2ef1, 0x3bba9417, 0x90e320c6, 0x6d09fdb5, 0xc6504964,
32 0x2906a2fc, 0x825f162d, 0x7fb5cb5e, 0xd4ec7f8f, 0x2f39c569, 0x846071b8,
33 0x798aaccb, 0xd2d3181a, 0x25786dd6, 0x8e21d907, 0x73cb0474, 0xd892b0a5,
34 0x23470a43, 0x881ebe92, 0x75f463e1, 0xdeadd730, 0x63f67950, 0xc8afcd81,
35 0x354510f2, 0x9e1ca423, 0x65c91ec5, 0xce90aa14, 0x337a7767, 0x9823c3b6,
36 0x6f88b67a, 0xc4d102ab, 0x393bdfd8, 0x92626b09, 0x69b7d1ef, 0xc2ee653e,
37 0x3f04b84d, 0x945d0c9c, 0x7b0be704, 0xd05253d5, 0x2db88ea6, 0x86e13a77,
38 0x7d348091, 0xd66d3440, 0x2b87e933, 0x80de5de2, 0x7775282e, 0xdc2c9cff,
39 0x21c6418c, 0x8a9ff55d, 0x714a4fbb, 0xda13fb6a, 0x27f92619, 0x8ca092c8,
40 0x520d45f8, 0xf954f129, 0x04be2c5a, 0xafe7988b, 0x5432226d, 0xff6b96bc,
41 0x02814bcf, 0xa9d8ff1e, 0x5e738ad2, 0xf52a3e03, 0x08c0e370, 0xa39957a1,
42 0x584ced47, 0xf3155996, 0x0eff84e5, 0xa5a63034, 0x4af0dbac, 0xe1a96f7d,
43 0x1c43b20e, 0xb71a06df, 0x4ccfbc39, 0xe79608e8, 0x1a7cd59b, 0xb125614a,
44 0x468e1486, 0xedd7a057, 0x103d7d24, 0xbb64c9f5, 0x40b17313, 0xebe8c7c2,
45 0x16021ab1, 0xbd5bae60, 0x6cb54671, 0xc7ecf2a0, 0x3a062fd3, 0x915f9b02,
46 0x6a8a21e4, 0xc1d39535, 0x3c394846, 0x9760fc97, 0x60cb895b, 0xcb923d8a,
47 0x3678e0f9, 0x9d215428, 0x66f4eece, 0xcdad5a1f, 0x3047876c, 0x9b1e33bd,
48 0x7448d825, 0xdf116cf4, 0x22fbb187, 0x89a20556, 0x7277bfb0, 0xd92e0b61,
49 0x24c4d612, 0x8f9d62c3, 0x7836170f, 0xd36fa3de, 0x2e857ead, 0x85dcca7c,
50 0x7e09709a, 0xd550c44b, 0x28ba1938, 0x83e3ade9, 0x5d4e7ad9, 0xf617ce08,
51 0x0bfd137b, 0xa0a4a7aa, 0x5b711d4c, 0xf028a99d, 0x0dc274ee, 0xa69bc03f,
52 0x5130b5f3, 0xfa690122, 0x0783dc51, 0xacda6880, 0x570fd266, 0xfc5666b7,
53 0x01bcbbc4, 0xaae50f15, 0x45b3e48d, 0xeeea505c, 0x13008d2f, 0xb85939fe,
54 0x438c8318, 0xe8d537c9, 0x153feaba, 0xbe665e6b, 0x49cd2ba7, 0xe2949f76,
55 0x1f7e4205, 0xb427f6d4, 0x4ff24c32, 0xe4abf8e3, 0x19412590, 0xb2189141,
56 0x0f433f21, 0xa41a8bf0, 0x59f05683, 0xf2a9e252, 0x097c58b4, 0xa225ec65,
57 0x5fcf3116, 0xf49685c7, 0x033df00b, 0xa86444da, 0x558e99a9, 0xfed72d78,
58 0x0502979e, 0xae5b234f, 0x53b1fe3c, 0xf8e84aed, 0x17bea175, 0xbce715a4,
59 0x410dc8d7, 0xea547c06, 0x1181c6e0, 0xbad87231, 0x4732af42, 0xec6b1b93,
60 0x1bc06e5f, 0xb099da8e, 0x4d7307fd, 0xe62ab32c, 0x1dff09ca, 0xb6a6bd1b,
61 0x4b4c6068, 0xe015d4b9, 0x3eb80389, 0x95e1b758, 0x680b6a2b, 0xc352defa,
62 0x3887641c, 0x93ded0cd, 0x6e340dbe, 0xc56db96f, 0x32c6cca3, 0x999f7872,
63 0x6475a501, 0xcf2c11d0, 0x34f9ab36, 0x9fa01fe7, 0x624ac294, 0xc9137645,
64 0x26459ddd, 0x8d1c290c, 0x70f6f47f, 0xdbaf40ae, 0x207afa48, 0x8b234e99,
65 0x76c993ea, 0xdd90273b, 0x2a3b52f7, 0x8162e626, 0x7c883b55, 0xd7d18f84,
66 0x2c043562, 0x875d81b3, 0x7ab75cc0, 0xd1eee811
67};
68
69static const unsigned int U[256] = {
70 0x00000000, 0x7eb5200d, 0x5633f4cb, 0x2886d4c6, 0x073e5d47, 0x798b7d4a,
71 0x510da98c, 0x2fb88981, 0x0e7cba8e, 0x70c99a83, 0x584f4e45, 0x26fa6e48,
72 0x0942e7c9, 0x77f7c7c4, 0x5f711302, 0x21c4330f, 0x1cf9751c, 0x624c5511,
73 0x4aca81d7, 0x347fa1da, 0x1bc7285b, 0x65720856, 0x4df4dc90, 0x3341fc9d,
74 0x1285cf92, 0x6c30ef9f, 0x44b63b59, 0x3a031b54, 0x15bb92d5, 0x6b0eb2d8,
75 0x4388661e, 0x3d3d4613, 0x39f2ea38, 0x4747ca35, 0x6fc11ef3, 0x11743efe,
76 0x3eccb77f, 0x40799772, 0x68ff43b4, 0x164a63b9, 0x378e50b6, 0x493b70bb,
77 0x61bda47d, 0x1f088470, 0x30b00df1, 0x4e052dfc, 0x6683f93a, 0x1836d937,
78 0x250b9f24, 0x5bbebf29, 0x73386bef, 0x0d8d4be2, 0x2235c263, 0x5c80e26e,
79 0x740636a8, 0x0ab316a5, 0x2b7725aa, 0x55c205a7, 0x7d44d161, 0x03f1f16c,
80 0x2c4978ed, 0x52fc58e0, 0x7a7a8c26, 0x04cfac2b, 0x73e5d470, 0x0d50f47d,
81 0x25d620bb, 0x5b6300b6, 0x74db8937, 0x0a6ea93a, 0x22e87dfc, 0x5c5d5df1,
82 0x7d996efe, 0x032c4ef3, 0x2baa9a35, 0x551fba38, 0x7aa733b9, 0x041213b4,
83 0x2c94c772, 0x5221e77f, 0x6f1ca16c, 0x11a98161, 0x392f55a7, 0x479a75aa,
84 0x6822fc2b, 0x1697dc26, 0x3e1108e0, 0x40a428ed, 0x61601be2, 0x1fd53bef,
85 0x3753ef29, 0x49e6cf24, 0x665e46a5, 0x18eb66a8, 0x306db26e, 0x4ed89263,
86 0x4a173e48, 0x34a21e45, 0x1c24ca83, 0x6291ea8e, 0x4d29630f, 0x339c4302,
87 0x1b1a97c4, 0x65afb7c9, 0x446b84c6, 0x3adea4cb, 0x1258700d, 0x6ced5000,
88 0x4355d981, 0x3de0f98c, 0x15662d4a, 0x6bd30d47, 0x56ee4b54, 0x285b6b59,
89 0x00ddbf9f, 0x7e689f92, 0x51d01613, 0x2f65361e, 0x07e3e2d8, 0x7956c2d5,
90 0x5892f1da, 0x2627d1d7, 0x0ea10511, 0x7014251c, 0x5facac9d, 0x21198c90,
91 0x099f5856, 0x772a785b, 0x4c921c31, 0x32273c3c, 0x1aa1e8fa, 0x6414c8f7,
92 0x4bac4176, 0x3519617b, 0x1d9fb5bd, 0x632a95b0, 0x42eea6bf, 0x3c5b86b2,
93 0x14dd5274, 0x6a687279, 0x45d0fbf8, 0x3b65dbf5, 0x13e30f33, 0x6d562f3e,
94 0x506b692d, 0x2ede4920, 0x06589de6, 0x78edbdeb, 0x5755346a, 0x29e01467,
95 0x0166c0a1, 0x7fd3e0ac, 0x5e17d3a3, 0x20a2f3ae, 0x08242768, 0x76910765,
96 0x59298ee4, 0x279caee9, 0x0f1a7a2f, 0x71af5a22, 0x7560f609, 0x0bd5d604,
97 0x235302c2, 0x5de622cf, 0x725eab4e, 0x0ceb8b43, 0x246d5f85, 0x5ad87f88,
98 0x7b1c4c87, 0x05a96c8a, 0x2d2fb84c, 0x539a9841, 0x7c2211c0, 0x029731cd,
99 0x2a11e50b, 0x54a4c506, 0x69998315, 0x172ca318, 0x3faa77de, 0x411f57d3,
100 0x6ea7de52, 0x1012fe5f, 0x38942a99, 0x46210a94, 0x67e5399b, 0x19501996,
101 0x31d6cd50, 0x4f63ed5d, 0x60db64dc, 0x1e6e44d1, 0x36e89017, 0x485db01a,
102 0x3f77c841, 0x41c2e84c, 0x69443c8a, 0x17f11c87, 0x38499506, 0x46fcb50b,
103 0x6e7a61cd, 0x10cf41c0, 0x310b72cf, 0x4fbe52c2, 0x67388604, 0x198da609,
104 0x36352f88, 0x48800f85, 0x6006db43, 0x1eb3fb4e, 0x238ebd5d, 0x5d3b9d50,
105 0x75bd4996, 0x0b08699b, 0x24b0e01a, 0x5a05c017, 0x728314d1, 0x0c3634dc,
106 0x2df207d3, 0x534727de, 0x7bc1f318, 0x0574d315, 0x2acc5a94, 0x54797a99,
107 0x7cffae5f, 0x024a8e52, 0x06852279, 0x78300274, 0x50b6d6b2, 0x2e03f6bf,
108 0x01bb7f3e, 0x7f0e5f33, 0x57888bf5, 0x293dabf8, 0x08f998f7, 0x764cb8fa,
109 0x5eca6c3c, 0x207f4c31, 0x0fc7c5b0, 0x7172e5bd, 0x59f4317b, 0x27411176,
110 0x1a7c5765, 0x64c97768, 0x4c4fa3ae, 0x32fa83a3, 0x1d420a22, 0x63f72a2f,
111 0x4b71fee9, 0x35c4dee4, 0x1400edeb, 0x6ab5cde6, 0x42331920, 0x3c86392d,
112 0x133eb0ac, 0x6d8b90a1, 0x450d4467, 0x3bb8646a
113};
Nicolas Pitrea310d432005-05-19 10:27:14 -0400114
Nicolas Pitre08abe662006-04-24 23:07:47 -0400115struct index_entry {
Nicolas Pitrea310d432005-05-19 10:27:14 -0400116 const unsigned char *ptr;
Nicolas Pitre8e1454b2006-02-21 20:43:17 -0500117 unsigned int val;
David Kastrupd2100862007-09-08 23:17:44 +0200118};
119
120struct unpacked_index_entry {
121 struct index_entry entry;
122 struct unpacked_index_entry *next;
Nicolas Pitre8e1454b2006-02-21 20:43:17 -0500123};
Nicolas Pitrea310d432005-05-19 10:27:14 -0400124
Nicolas Pitre08abe662006-04-24 23:07:47 -0400125struct delta_index {
Brian Downing11779e72007-07-12 07:55:48 -0500126 unsigned long memsize;
Nicolas Pitre08abe662006-04-24 23:07:47 -0400127 const void *src_buf;
128 unsigned long src_size;
Nicolas Pitre3dc5a9e2006-04-29 00:58:05 -0400129 unsigned int hash_mask;
Florian Forster63f17562006-06-18 17:18:04 +0200130 struct index_entry *hash[FLEX_ARRAY];
Nicolas Pitre08abe662006-04-24 23:07:47 -0400131};
132
133struct delta_index * create_delta_index(const void *buf, unsigned long bufsize)
Nicolas Pitrea310d432005-05-19 10:27:14 -0400134{
Nicolas Pitre06a9f922006-05-02 23:31:00 -0400135 unsigned int i, hsize, hmask, entries, prev_val, *hash_count;
Nicolas Pitre08abe662006-04-24 23:07:47 -0400136 const unsigned char *data, *buffer = buf;
137 struct delta_index *index;
David Kastrupd2100862007-09-08 23:17:44 +0200138 struct unpacked_index_entry *entry, **hash;
139 struct index_entry *packed_entry, **packed_hash;
Nicolas Pitre8e1454b2006-02-21 20:43:17 -0500140 void *mem;
Nicolas Pitre06a9f922006-05-02 23:31:00 -0400141 unsigned long memsize;
Nicolas Pitrea310d432005-05-19 10:27:14 -0400142
Nicolas Pitre08abe662006-04-24 23:07:47 -0400143 if (!buf || !bufsize)
144 return NULL;
145
Nicolas Pitre3dc5a9e2006-04-29 00:58:05 -0400146 /* Determine index hash size. Note that indexing skips the
Pavel Roskin82e5a822006-07-10 01:50:18 -0400147 first byte to allow for optimizing the Rabin's polynomial
Nicolas Pitre3dc5a9e2006-04-29 00:58:05 -0400148 initialization in create_delta(). */
Nicolas Pitre506049c2010-08-21 01:00:13 -0400149 entries = (bufsize - 1) / RABIN_WINDOW;
150 if (bufsize >= 0xffffffffUL) {
151 /*
152 * Current delta format can't encode offsets into
153 * reference buffer with more than 32 bits.
154 */
155 entries = 0xfffffffeU / RABIN_WINDOW;
156 }
Nicolas Pitre8e1454b2006-02-21 20:43:17 -0500157 hsize = entries / 4;
Stefan Bellerf7466e92013-08-16 23:22:37 +0200158 for (i = 4; (1u << i) < hsize; i++);
Nicolas Pitre8e1454b2006-02-21 20:43:17 -0500159 hsize = 1 << i;
Nicolas Pitre3dc5a9e2006-04-29 00:58:05 -0400160 hmask = hsize - 1;
Nicolas Pitrea310d432005-05-19 10:27:14 -0400161
Nicolas Pitre8e1454b2006-02-21 20:43:17 -0500162 /* allocate lookup index */
David Kastrupd2100862007-09-08 23:17:44 +0200163 memsize = sizeof(*hash) * hsize +
Nicolas Pitre06a9f922006-05-02 23:31:00 -0400164 sizeof(*entry) * entries;
165 mem = malloc(memsize);
Nicolas Pitre8e1454b2006-02-21 20:43:17 -0500166 if (!mem)
167 return NULL;
168 hash = mem;
Nicolas Pitre08abe662006-04-24 23:07:47 -0400169 mem = hash + hsize;
170 entry = mem;
171
Nicolas Pitre8e1454b2006-02-21 20:43:17 -0500172 memset(hash, 0, hsize * sizeof(*hash));
173
Nicolas Pitrec13c6bf2006-03-08 14:32:50 -0500174 /* allocate an array to count hash entries */
175 hash_count = calloc(hsize, sizeof(*hash_count));
176 if (!hash_count) {
David Kastrupd2100862007-09-08 23:17:44 +0200177 free(hash);
Nicolas Pitrec13c6bf2006-03-08 14:32:50 -0500178 return NULL;
179 }
180
181 /* then populate the index */
Nicolas Pitre06a9f922006-05-02 23:31:00 -0400182 prev_val = ~0;
183 for (data = buffer + entries * RABIN_WINDOW - RABIN_WINDOW;
184 data >= buffer;
185 data -= RABIN_WINDOW) {
Nicolas Pitre3dc5a9e2006-04-29 00:58:05 -0400186 unsigned int val = 0;
187 for (i = 1; i <= RABIN_WINDOW; i++)
188 val = ((val << 8) | data[i]) ^ T[val >> RABIN_SHIFT];
Nicolas Pitre06a9f922006-05-02 23:31:00 -0400189 if (val == prev_val) {
190 /* keep the lowest of consecutive identical blocks */
David Kastrupd2100862007-09-08 23:17:44 +0200191 entry[-1].entry.ptr = data + RABIN_WINDOW;
192 --entries;
Nicolas Pitre06a9f922006-05-02 23:31:00 -0400193 } else {
194 prev_val = val;
195 i = val & hmask;
David Kastrupd2100862007-09-08 23:17:44 +0200196 entry->entry.ptr = data + RABIN_WINDOW;
197 entry->entry.val = val;
Nicolas Pitre06a9f922006-05-02 23:31:00 -0400198 entry->next = hash[i];
199 hash[i] = entry++;
200 hash_count[i]++;
Nicolas Pitre06a9f922006-05-02 23:31:00 -0400201 }
Nicolas Pitre3dc5a9e2006-04-29 00:58:05 -0400202 }
Nicolas Pitrea310d432005-05-19 10:27:14 -0400203
Nicolas Pitrec13c6bf2006-03-08 14:32:50 -0500204 /*
205 * Determine a limit on the number of entries in the same hash
Pavel Roskin82e5a822006-07-10 01:50:18 -0400206 * bucket. This guards us against pathological data sets causing
Nicolas Pitrec13c6bf2006-03-08 14:32:50 -0500207 * really bad hash distribution with most entries in the same hash
208 * bucket that would bring us to O(m*n) computing costs (m and n
209 * corresponding to reference and target buffer sizes).
210 *
Nicolas Pitre08abe662006-04-24 23:07:47 -0400211 * Make sure none of the hash buckets has more entries than
Nicolas Pitrec13c6bf2006-03-08 14:32:50 -0500212 * we're willing to test. Otherwise we cull the entry list
213 * uniformly to still preserve a good repartition across
214 * the reference buffer.
215 */
216 for (i = 0; i < hsize; i++) {
David Kastrup02e665c2007-09-08 23:25:55 +0200217 int acc;
218
219 if (hash_count[i] <= HASH_LIMIT)
Nicolas Pitrec13c6bf2006-03-08 14:32:50 -0500220 continue;
David Kastrup02e665c2007-09-08 23:25:55 +0200221
David Kastrup02e665c2007-09-08 23:25:55 +0200222 /* We leave exactly HASH_LIMIT entries in the bucket */
Nicolas Pitrece85b052007-12-18 10:15:39 -0500223 entries -= hash_count[i] - HASH_LIMIT;
David Kastrup02e665c2007-09-08 23:25:55 +0200224
Nicolas Pitrec13c6bf2006-03-08 14:32:50 -0500225 entry = hash[i];
David Kastrup02e665c2007-09-08 23:25:55 +0200226 acc = 0;
Nicolas Pitrece85b052007-12-18 10:15:39 -0500227
228 /*
229 * Assume that this loop is gone through exactly
230 * HASH_LIMIT times and is entered and left with
231 * acc==0. So the first statement in the loop
232 * contributes (hash_count[i]-HASH_LIMIT)*HASH_LIMIT
233 * to the accumulator, and the inner loop consequently
234 * is run (hash_count[i]-HASH_LIMIT) times, removing
235 * one element from the list each time. Since acc
236 * balances out to 0 at the final run, the inner loop
237 * body can't be left with entry==NULL. So we indeed
238 * encounter entry==NULL in the outer loop only.
239 */
Nicolas Pitrec13c6bf2006-03-08 14:32:50 -0500240 do {
David Kastrup02e665c2007-09-08 23:25:55 +0200241 acc += hash_count[i] - HASH_LIMIT;
242 if (acc > 0) {
243 struct unpacked_index_entry *keep = entry;
244 do {
245 entry = entry->next;
246 acc -= HASH_LIMIT;
247 } while (acc > 0);
248 keep->next = entry->next;
249 }
250 entry = entry->next;
251 } while (entry);
Nicolas Pitrec13c6bf2006-03-08 14:32:50 -0500252 }
253 free(hash_count);
254
Nicolas Pitrece85b052007-12-18 10:15:39 -0500255 /*
256 * Now create the packed index in array form
257 * rather than linked lists.
258 */
David Kastrupd2100862007-09-08 23:17:44 +0200259 memsize = sizeof(*index)
260 + sizeof(*packed_hash) * (hsize+1)
261 + sizeof(*packed_entry) * entries;
David Kastrupd2100862007-09-08 23:17:44 +0200262 mem = malloc(memsize);
David Kastrupd2100862007-09-08 23:17:44 +0200263 if (!mem) {
264 free(hash);
265 return NULL;
266 }
267
268 index = mem;
269 index->memsize = memsize;
270 index->src_buf = buf;
271 index->src_size = bufsize;
272 index->hash_mask = hmask;
273
Pierre Habouzitf9c5a802007-12-18 02:39:57 +0100274 mem = index->hash;
David Kastrupd2100862007-09-08 23:17:44 +0200275 packed_hash = mem;
276 mem = packed_hash + (hsize+1);
277 packed_entry = mem;
278
David Kastrupd2100862007-09-08 23:17:44 +0200279 for (i = 0; i < hsize; i++) {
Nicolas Pitrece85b052007-12-18 10:15:39 -0500280 /*
281 * Coalesce all entries belonging to one linked list
282 * into consecutive array entries.
283 */
David Kastrupd2100862007-09-08 23:17:44 +0200284 packed_hash[i] = packed_entry;
285 for (entry = hash[i]; entry; entry = entry->next)
286 *packed_entry++ = entry->entry;
287 }
288
Nicolas Pitrece85b052007-12-18 10:15:39 -0500289 /* Sentinel value to indicate the length of the last hash bucket */
David Kastrupd2100862007-09-08 23:17:44 +0200290 packed_hash[hsize] = packed_entry;
Nicolas Pitrece85b052007-12-18 10:15:39 -0500291
David Kastrupd2100862007-09-08 23:17:44 +0200292 assert(packed_entry - (struct index_entry *)mem == entries);
293 free(hash);
294
Nicolas Pitre08abe662006-04-24 23:07:47 -0400295 return index;
296}
297
298void free_delta_index(struct delta_index *index)
299{
300 free(index);
Nicolas Pitrea310d432005-05-19 10:27:14 -0400301}
302
Brian Downing11779e72007-07-12 07:55:48 -0500303unsigned long sizeof_delta_index(struct delta_index *index)
304{
305 if (index)
306 return index->memsize;
307 else
308 return 0;
309}
310
Nicolas Pitre3dc5a9e2006-04-29 00:58:05 -0400311/*
312 * The maximum size for any opcode sequence, including the initial header
Pavel Roskin82e5a822006-07-10 01:50:18 -0400313 * plus Rabin window plus biggest copy.
Nicolas Pitre3dc5a9e2006-04-29 00:58:05 -0400314 */
315#define MAX_OP_SIZE (5 + 5 + 1 + RABIN_WINDOW + 7)
Nicolas Pitrefe474b52006-02-21 20:41:41 -0500316
Nicolas Pitre08abe662006-04-24 23:07:47 -0400317void *
318create_delta(const struct delta_index *index,
319 const void *trg_buf, unsigned long trg_size,
320 unsigned long *delta_size, unsigned long max_size)
Nicolas Pitrea310d432005-05-19 10:27:14 -0400321{
Martin Koegler3f0a67a2017-08-10 09:01:01 +0200322 unsigned int i, val;
323 off_t outpos, moff;
324 size_t l, outsize, msize;
Nicolas Pitre5a1fb2c2006-03-17 22:45:07 -0500325 int inscnt;
Nicolas Pitre8e1454b2006-02-21 20:43:17 -0500326 const unsigned char *ref_data, *ref_top, *data, *top;
327 unsigned char *out;
Nicolas Pitrea310d432005-05-19 10:27:14 -0400328
Nicolas Pitre08abe662006-04-24 23:07:47 -0400329 if (!trg_buf || !trg_size)
Nicolas Pitre8e1454b2006-02-21 20:43:17 -0500330 return NULL;
331
Nicolas Pitrea310d432005-05-19 10:27:14 -0400332 outpos = 0;
333 outsize = 8192;
Nicolas Pitrefe474b52006-02-21 20:41:41 -0500334 if (max_size && outsize >= max_size)
335 outsize = max_size + MAX_OP_SIZE + 1;
Nicolas Pitrea310d432005-05-19 10:27:14 -0400336 out = malloc(outsize);
Nicolas Pitre08abe662006-04-24 23:07:47 -0400337 if (!out)
Nicolas Pitrea310d432005-05-19 10:27:14 -0400338 return NULL;
Nicolas Pitrea310d432005-05-19 10:27:14 -0400339
340 /* store reference buffer size */
Martin Koegler3f0a67a2017-08-10 09:01:01 +0200341 l = index->src_size;
342 while (l >= 0x80) {
343 out[outpos++] = l | 0x80;
344 l >>= 7;
Nicolas Pitre69a2d422005-06-29 00:27:45 -0400345 }
Martin Koegler3f0a67a2017-08-10 09:01:01 +0200346 out[outpos++] = l;
Nicolas Pitrea310d432005-05-19 10:27:14 -0400347
348 /* store target buffer size */
Martin Koegler3f0a67a2017-08-10 09:01:01 +0200349 l = trg_size;
350 while (l >= 0x80) {
351 out[outpos++] = l | 0x80;
352 l >>= 7;
Nicolas Pitre69a2d422005-06-29 00:27:45 -0400353 }
Martin Koegler3f0a67a2017-08-10 09:01:01 +0200354 out[outpos++] = l;
Nicolas Pitrea310d432005-05-19 10:27:14 -0400355
Nicolas Pitre08abe662006-04-24 23:07:47 -0400356 ref_data = index->src_buf;
357 ref_top = ref_data + index->src_size;
358 data = trg_buf;
Florian Forster1d7f1712006-06-18 17:18:09 +0200359 top = (const unsigned char *) trg_buf + trg_size;
Nicolas Pitre3dc5a9e2006-04-29 00:58:05 -0400360
361 outpos++;
362 val = 0;
363 for (i = 0; i < RABIN_WINDOW && data < top; i++, data++) {
364 out[outpos++] = *data;
365 val = ((val << 8) | *data) ^ T[val >> RABIN_SHIFT];
366 }
367 inscnt = i;
Nicolas Pitrea310d432005-05-19 10:27:14 -0400368
Nicolas Pitre84336692007-05-25 21:38:58 -0400369 moff = 0;
370 msize = 0;
Nicolas Pitre8e1454b2006-02-21 20:43:17 -0500371 while (data < top) {
Nicolas Pitre84336692007-05-25 21:38:58 -0400372 if (msize < 4096) {
373 struct index_entry *entry;
374 val ^= U[data[-RABIN_WINDOW]];
375 val = ((val << 8) | *data) ^ T[val >> RABIN_SHIFT];
376 i = val & index->hash_mask;
David Kastrupd2100862007-09-08 23:17:44 +0200377 for (entry = index->hash[i]; entry < index->hash[i+1]; entry++) {
Nicolas Pitre84336692007-05-25 21:38:58 -0400378 const unsigned char *ref = entry->ptr;
379 const unsigned char *src = data;
380 unsigned int ref_size = ref_top - ref;
381 if (entry->val != val)
382 continue;
383 if (ref_size > top - src)
384 ref_size = top - src;
385 if (ref_size <= msize)
386 break;
387 while (ref_size-- && *src++ == *ref)
388 ref++;
389 if (msize < ref - entry->ptr) {
390 /* this is our best match so far */
391 msize = ref - entry->ptr;
392 moff = entry->ptr - ref_data;
393 if (msize >= 4096) /* good enough */
394 break;
395 }
Nicolas Pitrea310d432005-05-19 10:27:14 -0400396 }
397 }
398
Nicolas Pitre3dc5a9e2006-04-29 00:58:05 -0400399 if (msize < 4) {
Nicolas Pitrea310d432005-05-19 10:27:14 -0400400 if (!inscnt)
401 outpos++;
402 out[outpos++] = *data++;
403 inscnt++;
404 if (inscnt == 0x7f) {
405 out[outpos - inscnt - 1] = inscnt;
406 inscnt = 0;
407 }
Nicolas Pitre84336692007-05-25 21:38:58 -0400408 msize = 0;
Nicolas Pitrea310d432005-05-19 10:27:14 -0400409 } else {
Nicolas Pitre84336692007-05-25 21:38:58 -0400410 unsigned int left;
Nicolas Pitre8e1454b2006-02-21 20:43:17 -0500411 unsigned char *op;
412
Nicolas Pitrea310d432005-05-19 10:27:14 -0400413 if (inscnt) {
Nicolas Pitre5a1fb2c2006-03-17 22:45:07 -0500414 while (moff && ref_data[moff-1] == data[-1]) {
Nicolas Pitre5a1fb2c2006-03-17 22:45:07 -0500415 /* we can match one byte back */
416 msize++;
417 moff--;
418 data--;
419 outpos--;
420 if (--inscnt)
421 continue;
422 outpos--; /* remove count slot */
423 inscnt--; /* make it -1 */
424 break;
425 }
Nicolas Pitrea310d432005-05-19 10:27:14 -0400426 out[outpos - inscnt - 1] = inscnt;
427 inscnt = 0;
428 }
429
Nicolas Pitre84336692007-05-25 21:38:58 -0400430 /* A copy op is currently limited to 64KB (pack v2) */
431 left = (msize < 0x10000) ? 0 : (msize - 0x10000);
432 msize -= left;
433
Nicolas Pitre8e1454b2006-02-21 20:43:17 -0500434 op = out + outpos++;
Nicolas Pitrea310d432005-05-19 10:27:14 -0400435 i = 0x80;
436
Nicolas Pitre84336692007-05-25 21:38:58 -0400437 if (moff & 0x000000ff)
438 out[outpos++] = moff >> 0, i |= 0x01;
439 if (moff & 0x0000ff00)
440 out[outpos++] = moff >> 8, i |= 0x02;
441 if (moff & 0x00ff0000)
442 out[outpos++] = moff >> 16, i |= 0x04;
443 if (moff & 0xff000000)
444 out[outpos++] = moff >> 24, i |= 0x08;
Nicolas Pitrea310d432005-05-19 10:27:14 -0400445
Nicolas Pitre84336692007-05-25 21:38:58 -0400446 if (msize & 0x00ff)
447 out[outpos++] = msize >> 0, i |= 0x10;
448 if (msize & 0xff00)
449 out[outpos++] = msize >> 8, i |= 0x20;
Nicolas Pitrea310d432005-05-19 10:27:14 -0400450
Nicolas Pitre8e1454b2006-02-21 20:43:17 -0500451 *op = i;
Nicolas Pitre84336692007-05-25 21:38:58 -0400452
453 data += msize;
454 moff += msize;
455 msize = left;
456
Martin Koeglerfed1ef92017-08-10 20:13:09 +0200457 if (moff > 0xffffffff)
458 msize = 0;
459
Nicolas Pitre84336692007-05-25 21:38:58 -0400460 if (msize < 4096) {
461 int j;
462 val = 0;
463 for (j = -RABIN_WINDOW; j < 0; j++)
464 val = ((val << 8) | data[j])
465 ^ T[val >> RABIN_SHIFT];
466 }
Nicolas Pitrea310d432005-05-19 10:27:14 -0400467 }
468
Nicolas Pitrefe474b52006-02-21 20:41:41 -0500469 if (outpos >= outsize - MAX_OP_SIZE) {
Nicolas Pitrea310d432005-05-19 10:27:14 -0400470 void *tmp = out;
471 outsize = outsize * 3 / 2;
Nicolas Pitrefe474b52006-02-21 20:41:41 -0500472 if (max_size && outsize >= max_size)
473 outsize = max_size + MAX_OP_SIZE + 1;
474 if (max_size && outpos > max_size)
Nicolas Pitre3dc5a9e2006-04-29 00:58:05 -0400475 break;
Martin Koeglerb75c6c62007-05-29 21:08:35 +0200476 out = realloc(out, outsize);
Nicolas Pitrea310d432005-05-19 10:27:14 -0400477 if (!out) {
478 free(tmp);
Nicolas Pitrea310d432005-05-19 10:27:14 -0400479 return NULL;
480 }
481 }
482 }
483
484 if (inscnt)
485 out[outpos - inscnt - 1] = inscnt;
486
Nicolas Pitre3dc5a9e2006-04-29 00:58:05 -0400487 if (max_size && outpos > max_size) {
488 free(out);
489 return NULL;
490 }
491
Nicolas Pitrea310d432005-05-19 10:27:14 -0400492 *delta_size = outpos;
493 return out;
494}