1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34 package org.waarp.compress.zstdsafe;
35
36 import static org.waarp.compress.zstdsafe.Constants.*;
37 import static org.waarp.compress.zstdsafe.UnsafeUtil.*;
38
39 class DoubleFastBlockCompressor implements BlockCompressor {
40 private static final int MIN_MATCH = 3;
41 private static final int SEARCH_STRENGTH = 8;
42 private static final int REP_MOVE = Constants.REPEATED_OFFSET_COUNT - 1;
43
44 public int compressBlock(final byte[] inputBase, final int inputAddress,
45 final int inputSize, final SequenceStore output,
46 final BlockCompressionState state,
47 final RepeatedOffsets offsets,
48 final CompressionParameters parameters) {
49 final int matchSearchLength = Math.max(parameters.getSearchLength(), 4);
50
51
52
53
54
55 final int baseAddress = state.getBaseAddress();
56 final int windowBaseAddress = baseAddress + state.getWindowBaseOffset();
57
58 final int[] longHashTable = state.hashTable;
59 final int longHashBits = parameters.getHashLog();
60
61 final int[] shortHashTable = state.chainTable;
62 final int shortHashBits = parameters.getChainLog();
63
64 final int inputEnd = inputAddress + inputSize;
65 final int inputLimit = inputEnd -
66 SIZE_OF_LONG;
67
68 int input = inputAddress;
69 int anchor = inputAddress;
70
71 int offset1 = offsets.getOffset0();
72 int offset2 = offsets.getOffset1();
73
74 int savedOffset = 0;
75
76 if (input - windowBaseAddress == 0) {
77 input++;
78 }
79 final int maxRep = input - windowBaseAddress;
80
81 if (offset2 > maxRep) {
82 savedOffset = offset2;
83 offset2 = 0;
84 }
85
86 if (offset1 > maxRep) {
87 savedOffset = offset1;
88 offset1 = 0;
89 }
90
91 while (input <
92 inputLimit) {
93 final int shortHash =
94 hash(inputBase, input, shortHashBits, matchSearchLength);
95 int shortMatchAddress = baseAddress + shortHashTable[shortHash];
96
97 final int longHash = hash8(getLong(inputBase, input), longHashBits);
98 int longMatchAddress = baseAddress + longHashTable[longHash];
99
100
101 final int current = input - baseAddress;
102 longHashTable[longHash] = current;
103 shortHashTable[shortHash] = current;
104
105 int matchLength;
106 final int offset;
107
108 if (offset1 > 0 && getInt(inputBase, input + 1 - offset1) ==
109 getInt(inputBase, input + 1)) {
110
111 matchLength = count(inputBase, input + 1 + SIZE_OF_INT, inputEnd,
112 input + 1 + SIZE_OF_INT - offset1) + SIZE_OF_INT;
113 input++;
114 output.storeSequence(inputBase, anchor, input - anchor, 0,
115 matchLength - MIN_MATCH);
116 } else {
117
118 if (longMatchAddress > windowBaseAddress &&
119 getLong(inputBase, longMatchAddress) == getLong(inputBase, input)) {
120 matchLength = count(inputBase, input + SIZE_OF_LONG, inputEnd,
121 longMatchAddress + SIZE_OF_LONG) + SIZE_OF_LONG;
122 offset = input - longMatchAddress;
123 while (input > anchor && longMatchAddress > windowBaseAddress &&
124 inputBase[input - 1] == inputBase[longMatchAddress - 1]) {
125 input--;
126 longMatchAddress--;
127 matchLength++;
128 }
129 } else {
130
131 if (shortMatchAddress > windowBaseAddress &&
132 getInt(inputBase, shortMatchAddress) ==
133 getInt(inputBase, input)) {
134 final int nextOffsetHash =
135 hash8(getLong(inputBase, input + 1), longHashBits);
136 int nextOffsetMatchAddress =
137 baseAddress + longHashTable[nextOffsetHash];
138 longHashTable[nextOffsetHash] = current + 1;
139
140
141 if (nextOffsetMatchAddress > windowBaseAddress &&
142 getLong(inputBase, nextOffsetMatchAddress) ==
143 getLong(inputBase, input + 1)) {
144 matchLength = count(inputBase, input + 1 + SIZE_OF_LONG, inputEnd,
145 nextOffsetMatchAddress + SIZE_OF_LONG) +
146 SIZE_OF_LONG;
147 input++;
148 offset = input - nextOffsetMatchAddress;
149 while (input > anchor &&
150 nextOffsetMatchAddress > windowBaseAddress &&
151 inputBase[input - 1] ==
152 inputBase[nextOffsetMatchAddress - 1]) {
153 input--;
154 nextOffsetMatchAddress--;
155 matchLength++;
156 }
157 } else {
158
159 matchLength = count(inputBase, input + SIZE_OF_INT, inputEnd,
160 shortMatchAddress + SIZE_OF_INT) +
161 SIZE_OF_INT;
162 offset = input - shortMatchAddress;
163 while (input > anchor && shortMatchAddress > windowBaseAddress &&
164 inputBase[input - 1] == inputBase[shortMatchAddress - 1]) {
165 input--;
166 shortMatchAddress--;
167 matchLength++;
168 }
169 }
170 } else {
171 input += ((input - anchor) >> SEARCH_STRENGTH) + 1;
172 continue;
173 }
174 }
175
176 offset2 = offset1;
177 offset1 = offset;
178
179 output.storeSequence(inputBase, anchor, input - anchor,
180 offset + REP_MOVE, matchLength - MIN_MATCH);
181 }
182
183 input += matchLength;
184 anchor = input;
185
186 if (input <= inputLimit) {
187
188 longHashTable[hash8(getLong(inputBase, baseAddress + current + 2),
189 longHashBits)] = current + 2;
190 shortHashTable[hash(inputBase, baseAddress + current + 2, shortHashBits,
191 matchSearchLength)] = current + 2;
192
193 longHashTable[hash8(getLong(inputBase, input - 2), longHashBits)] =
194 input - 2 - baseAddress;
195 shortHashTable[hash(inputBase, input - 2, shortHashBits,
196 matchSearchLength)] = input - 2 - baseAddress;
197
198 while (input <= inputLimit && offset2 > 0 &&
199 getInt(inputBase, input) == getInt(inputBase, input - offset2)) {
200 final int repetitionLength =
201 count(inputBase, input + SIZE_OF_INT, inputEnd,
202 input + SIZE_OF_INT - offset2) + SIZE_OF_INT;
203
204
205 final int temp = offset2;
206 offset2 = offset1;
207 offset1 = temp;
208
209 shortHashTable[hash(inputBase, input, shortHashBits,
210 matchSearchLength)] = input - baseAddress;
211 longHashTable[hash8(getLong(inputBase, input), longHashBits)] =
212 input - baseAddress;
213
214 output.storeSequence(inputBase, anchor, 0, 0,
215 repetitionLength - MIN_MATCH);
216
217 input += repetitionLength;
218 anchor = input;
219 }
220 }
221 }
222
223
224 offsets.saveOffset0(offset1 != 0? offset1 : savedOffset);
225 offsets.saveOffset1(offset2 != 0? offset2 : savedOffset);
226
227
228 return inputEnd - anchor;
229 }
230
231
232
233
234
235
236 public static int count(final byte[] inputBase, final int inputAddress,
237 final int inputLimit, final int matchAddress) {
238 int input = inputAddress;
239 int match = matchAddress;
240
241 final int remaining = inputLimit - inputAddress;
242
243
244 int count = 0;
245 while (count < remaining - (SIZE_OF_LONG - 1)) {
246 final long diff = getLong(inputBase, match) ^ getLong(inputBase, input);
247 if (diff != 0) {
248 return count + (Long.numberOfTrailingZeros(diff) >> 3);
249 }
250
251 count += SIZE_OF_LONG;
252 input += SIZE_OF_LONG;
253 match += SIZE_OF_LONG;
254 }
255
256 while (count < remaining && inputBase[match] == inputBase[input]) {
257 count++;
258 input++;
259 match++;
260 }
261
262 return count;
263 }
264
265 private static int hash(final byte[] inputBase, final int inputAddress,
266 final int bits, final int matchSearchLength) {
267 switch (matchSearchLength) {
268 case 8:
269 return hash8(getLong(inputBase, inputAddress), bits);
270 case 7:
271 return hash7(getLong(inputBase, inputAddress), bits);
272 case 6:
273 return hash6(getLong(inputBase, inputAddress), bits);
274 case 5:
275 return hash5(getLong(inputBase, inputAddress), bits);
276 default:
277 return hash4(getInt(inputBase, inputAddress), bits);
278 }
279 }
280
281 private static final int PRIME_4_BYTES = 0x9E3779B1;
282 private static final long PRIME_5_BYTES = 0xCF1BBCDCBBL;
283 private static final long PRIME_6_BYTES = 0xCF1BBCDCBF9BL;
284 private static final long PRIME_7_BYTES = 0xCF1BBCDCBFA563L;
285 private static final long PRIME_8_BYTES = 0xCF1BBCDCB7A56463L;
286
287 private static int hash4(final int value, final int bits) {
288 return (value * PRIME_4_BYTES) >>> (Integer.SIZE - bits);
289 }
290
291 private static int hash5(final long value, final int bits) {
292 return (int) (((value << (Long.SIZE - 40)) * PRIME_5_BYTES) >>>
293 (Long.SIZE - bits));
294 }
295
296 private static int hash6(final long value, final int bits) {
297 return (int) (((value << (Long.SIZE - 48)) * PRIME_6_BYTES) >>>
298 (Long.SIZE - bits));
299 }
300
301 private static int hash7(final long value, final int bits) {
302 return (int) (((value << (Long.SIZE - 56)) * PRIME_7_BYTES) >>>
303 (Long.SIZE - bits));
304 }
305
306 private static int hash8(final long value, final int bits) {
307 return (int) ((value * PRIME_8_BYTES) >>> (Long.SIZE - bits));
308 }
309 }