1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34 package org.waarp.compress.zstdunsafe;
35
36 class DoubleFastBlockCompressor implements BlockCompressor {
37 private static final int MIN_MATCH = 3;
38 private static final int SEARCH_STRENGTH = 8;
39 private static final int REP_MOVE = Constants.REPEATED_OFFSET_COUNT - 1;
40
41 public int compressBlock(final Object inputBase, final long inputAddress,
42 final int inputSize, final SequenceStore output,
43 final BlockCompressionState state,
44 final RepeatedOffsets offsets,
45 final CompressionParameters parameters) {
46 final int matchSearchLength = Math.max(parameters.getSearchLength(), 4);
47
48
49
50
51
52 final long baseAddress = state.getBaseAddress();
53 final long windowBaseAddress = baseAddress + state.getWindowBaseOffset();
54
55 final int[] longHashTable = state.hashTable;
56 final int longHashBits = parameters.getHashLog();
57
58 final int[] shortHashTable = state.chainTable;
59 final int shortHashBits = parameters.getChainLog();
60
61 final long inputEnd = inputAddress + inputSize;
62 final long inputLimit = inputEnd -
63 Constants.SIZE_OF_LONG;
64
65 long input = inputAddress;
66 long anchor = inputAddress;
67
68 int offset1 = offsets.getOffset0();
69 int offset2 = offsets.getOffset1();
70
71 int savedOffset = 0;
72
73 if (input - windowBaseAddress == 0) {
74 input++;
75 }
76 final int maxRep = (int) (input - windowBaseAddress);
77
78 if (offset2 > maxRep) {
79 savedOffset = offset2;
80 offset2 = 0;
81 }
82
83 if (offset1 > maxRep) {
84 savedOffset = offset1;
85 offset1 = 0;
86 }
87
88 while (input <
89 inputLimit) {
90 final int shortHash =
91 hash(inputBase, input, shortHashBits, matchSearchLength);
92 long shortMatchAddress = baseAddress + shortHashTable[shortHash];
93
94 final int longHash =
95 hash8(UnsafeUtil.UNSAFE.getLong(inputBase, input), longHashBits);
96 long longMatchAddress = baseAddress + longHashTable[longHash];
97
98
99 final int current = (int) (input - baseAddress);
100 longHashTable[longHash] = current;
101 shortHashTable[shortHash] = current;
102
103 int matchLength;
104 final int offset;
105
106 if (offset1 > 0 &&
107 UnsafeUtil.UNSAFE.getInt(inputBase, input + 1 - offset1) ==
108 UnsafeUtil.UNSAFE.getInt(inputBase, input + 1)) {
109
110 matchLength =
111 count(inputBase, input + 1 + Constants.SIZE_OF_INT, inputEnd,
112 input + 1 + Constants.SIZE_OF_INT - offset1) +
113 Constants.SIZE_OF_INT;
114 input++;
115 output.storeSequence(inputBase, anchor, (int) (input - anchor), 0,
116 matchLength - MIN_MATCH);
117 } else {
118
119 if (longMatchAddress > windowBaseAddress &&
120 UnsafeUtil.UNSAFE.getLong(inputBase, longMatchAddress) ==
121 UnsafeUtil.UNSAFE.getLong(inputBase, input)) {
122 matchLength =
123 count(inputBase, input + Constants.SIZE_OF_LONG, inputEnd,
124 longMatchAddress + Constants.SIZE_OF_LONG) +
125 Constants.SIZE_OF_LONG;
126 offset = (int) (input - longMatchAddress);
127 while (input > anchor && longMatchAddress > windowBaseAddress &&
128 UnsafeUtil.UNSAFE.getByte(inputBase, input - 1) ==
129 UnsafeUtil.UNSAFE.getByte(inputBase, longMatchAddress - 1)) {
130 input--;
131 longMatchAddress--;
132 matchLength++;
133 }
134 } else {
135
136 if (shortMatchAddress > windowBaseAddress &&
137 UnsafeUtil.UNSAFE.getInt(inputBase, shortMatchAddress) ==
138 UnsafeUtil.UNSAFE.getInt(inputBase, input)) {
139 final int nextOffsetHash =
140 hash8(UnsafeUtil.UNSAFE.getLong(inputBase, input + 1),
141 longHashBits);
142 long nextOffsetMatchAddress =
143 baseAddress + longHashTable[nextOffsetHash];
144 longHashTable[nextOffsetHash] = current + 1;
145
146
147 if (nextOffsetMatchAddress > windowBaseAddress &&
148 UnsafeUtil.UNSAFE.getLong(inputBase, nextOffsetMatchAddress) ==
149 UnsafeUtil.UNSAFE.getLong(inputBase, input + 1)) {
150 matchLength =
151 count(inputBase, input + 1 + Constants.SIZE_OF_LONG, inputEnd,
152 nextOffsetMatchAddress + Constants.SIZE_OF_LONG) +
153 Constants.SIZE_OF_LONG;
154 input++;
155 offset = (int) (input - nextOffsetMatchAddress);
156 while (input > anchor &&
157 nextOffsetMatchAddress > windowBaseAddress &&
158 UnsafeUtil.UNSAFE.getByte(inputBase, input - 1) ==
159 UnsafeUtil.UNSAFE.getByte(inputBase,
160 nextOffsetMatchAddress - 1)) {
161 input--;
162 nextOffsetMatchAddress--;
163 matchLength++;
164 }
165 } else {
166
167 matchLength =
168 count(inputBase, input + Constants.SIZE_OF_INT, inputEnd,
169 shortMatchAddress + Constants.SIZE_OF_INT) +
170 Constants.SIZE_OF_INT;
171 offset = (int) (input - shortMatchAddress);
172 while (input > anchor && shortMatchAddress > windowBaseAddress &&
173 UnsafeUtil.UNSAFE.getByte(inputBase, input - 1) ==
174 UnsafeUtil.UNSAFE.getByte(inputBase,
175 shortMatchAddress - 1)) {
176 input--;
177 shortMatchAddress--;
178 matchLength++;
179 }
180 }
181 } else {
182 input += ((input - anchor) >> SEARCH_STRENGTH) + 1;
183 continue;
184 }
185 }
186
187 offset2 = offset1;
188 offset1 = offset;
189
190 output.storeSequence(inputBase, anchor, (int) (input - anchor),
191 offset + REP_MOVE, matchLength - MIN_MATCH);
192 }
193
194 input += matchLength;
195 anchor = input;
196
197 if (input <= inputLimit) {
198
199 longHashTable[hash8(
200 UnsafeUtil.UNSAFE.getLong(inputBase, baseAddress + current + 2),
201 longHashBits)] = current + 2;
202 shortHashTable[hash(inputBase, baseAddress + current + 2, shortHashBits,
203 matchSearchLength)] = current + 2;
204
205 longHashTable[hash8(UnsafeUtil.UNSAFE.getLong(inputBase, input - 2),
206 longHashBits)] = (int) (input - 2 - baseAddress);
207 shortHashTable[hash(inputBase, input - 2, shortHashBits,
208 matchSearchLength)] =
209 (int) (input - 2 - baseAddress);
210
211 while (input <= inputLimit && offset2 > 0 &&
212 UnsafeUtil.UNSAFE.getInt(inputBase, input) ==
213 UnsafeUtil.UNSAFE.getInt(inputBase, input - offset2)) {
214 final int repetitionLength =
215 count(inputBase, input + Constants.SIZE_OF_INT, inputEnd,
216 input + Constants.SIZE_OF_INT - offset2) +
217 Constants.SIZE_OF_INT;
218
219
220 final int temp = offset2;
221 offset2 = offset1;
222 offset1 = temp;
223
224 shortHashTable[hash(inputBase, input, shortHashBits,
225 matchSearchLength)] = (int) (input - baseAddress);
226 longHashTable[hash8(UnsafeUtil.UNSAFE.getLong(inputBase, input),
227 longHashBits)] = (int) (input - baseAddress);
228
229 output.storeSequence(inputBase, anchor, 0, 0,
230 repetitionLength - MIN_MATCH);
231
232 input += repetitionLength;
233 anchor = input;
234 }
235 }
236 }
237
238
239 offsets.saveOffset0(offset1 != 0? offset1 : savedOffset);
240 offsets.saveOffset1(offset2 != 0? offset2 : savedOffset);
241
242
243 return (int) (inputEnd - anchor);
244 }
245
246
247
248
249
250
251 public static int count(final Object inputBase, final long inputAddress,
252 final long inputLimit, final long matchAddress) {
253 long input = inputAddress;
254 long match = matchAddress;
255
256 final int remaining = (int) (inputLimit - inputAddress);
257
258
259 int count = 0;
260 while (count < remaining - (Constants.SIZE_OF_LONG - 1)) {
261 final long diff = UnsafeUtil.UNSAFE.getLong(inputBase, match) ^
262 UnsafeUtil.UNSAFE.getLong(inputBase, input);
263 if (diff != 0) {
264 return count + (Long.numberOfTrailingZeros(diff) >> 3);
265 }
266
267 count += Constants.SIZE_OF_LONG;
268 input += Constants.SIZE_OF_LONG;
269 match += Constants.SIZE_OF_LONG;
270 }
271
272 while (count < remaining && UnsafeUtil.UNSAFE.getByte(inputBase, match) ==
273 UnsafeUtil.UNSAFE.getByte(inputBase, input)) {
274 count++;
275 input++;
276 match++;
277 }
278
279 return count;
280 }
281
282 private static int hash(final Object inputBase, final long inputAddress,
283 final int bits, final int matchSearchLength) {
284 switch (matchSearchLength) {
285 case 8:
286 return hash8(UnsafeUtil.UNSAFE.getLong(inputBase, inputAddress), bits);
287 case 7:
288 return hash7(UnsafeUtil.UNSAFE.getLong(inputBase, inputAddress), bits);
289 case 6:
290 return hash6(UnsafeUtil.UNSAFE.getLong(inputBase, inputAddress), bits);
291 case 5:
292 return hash5(UnsafeUtil.UNSAFE.getLong(inputBase, inputAddress), bits);
293 default:
294 return hash4(UnsafeUtil.UNSAFE.getInt(inputBase, inputAddress), bits);
295 }
296 }
297
298 private static final int PRIME_4_BYTES = 0x9E3779B1;
299 private static final long PRIME_5_BYTES = 0xCF1BBCDCBBL;
300 private static final long PRIME_6_BYTES = 0xCF1BBCDCBF9BL;
301 private static final long PRIME_7_BYTES = 0xCF1BBCDCBFA563L;
302 private static final long PRIME_8_BYTES = 0xCF1BBCDCB7A56463L;
303
304 private static int hash4(final int value, final int bits) {
305 return (value * PRIME_4_BYTES) >>> (Integer.SIZE - bits);
306 }
307
308 private static int hash5(final long value, final int bits) {
309 return (int) (((value << (Long.SIZE - 40)) * PRIME_5_BYTES) >>>
310 (Long.SIZE - bits));
311 }
312
313 private static int hash6(final long value, final int bits) {
314 return (int) (((value << (Long.SIZE - 48)) * PRIME_6_BYTES) >>>
315 (Long.SIZE - bits));
316 }
317
318 private static int hash7(final long value, final int bits) {
319 return (int) (((value << (Long.SIZE - 56)) * PRIME_7_BYTES) >>>
320 (Long.SIZE - bits));
321 }
322
323 private static int hash8(final long value, final int bits) {
324 return (int) ((value * PRIME_8_BYTES) >>> (Long.SIZE - bits));
325 }
326 }