View Javadoc
1   /*
2    * This file is part of Waarp Project (named also Waarp or GG).
3    *
4    *  Copyright (c) 2019, Waarp SAS, and individual contributors by the @author
5    *  tags. See the COPYRIGHT.txt in the distribution for a full listing of
6    * individual contributors.
7    *
8    *  All Waarp Project is free software: you can redistribute it and/or
9    * modify it under the terms of the GNU General Public License as published by
10   * the Free Software Foundation, either version 3 of the License, or (at your
11   * option) any later version.
12   *
13   * Waarp is distributed in the hope that it will be useful, but WITHOUT ANY
14   * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
15   * A PARTICULAR PURPOSE. See the GNU General Public License for more details.
16   *
17   *  You should have received a copy of the GNU General Public License along with
18   * Waarp . If not, see <http://www.gnu.org/licenses/>.
19   */
20  
21  /*
22   * Licensed under the Apache License, Version 2.0 (the "License");
23   * you may not use this file except in compliance with the License.
24   * You may obtain a copy of the License at
25   *
26   *     http://www.apache.org/licenses/LICENSE-2.0
27   *
28   * Unless required by applicable law or agreed to in writing, software
29   * distributed under the License is distributed on an "AS IS" BASIS,
30   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
31   * See the License for the specific language governing permissions and
32   * limitations under the License.
33   */
34  package org.waarp.compress.zstdunsafe;
35  
36  import static org.waarp.compress.zstdunsafe.Constants.*;
37  
38  class CompressionParameters {
39    private static final int MIN_HASH_LOG = 6;
40  
41    public static final int DEFAULT_COMPRESSION_LEVEL = 3;
42    private static final int MAX_COMPRESSION_LEVEL = 22;
43  
44    private final int windowLog;
45    // largest match distance : larger == more compression, more memory needed during decompression
46    private final int chainLog;
47    // fully searched segment : larger == more compression, slower, more memory (useless for fast)
48    private final int hashLog;   // dispatch table : larger == faster, more memory
49    private final int searchLog;
50    // nb of searches : larger == more compression, slower
51    private final int searchLength;
52    // match length searched : larger == faster decompression, sometimes less compression
53    private final int targetLength;
54    // acceptable match size for optimal parser (only) : larger == more compression, slower
55    private final Strategy strategy;
56  
57    private static final CompressionParameters[][]
58        DEFAULT_COMPRESSION_PARAMETERS = new CompressionParameters[][] {
59        {
60            // default
61            new CompressionParameters(19, 12, 13, 1, 6, 1, Strategy.FAST),
62            /* base for negative levels */
63            new CompressionParameters(19, 13, 14, 1, 7, 0, Strategy.FAST),
64            /* level  1 */
65            new CompressionParameters(19, 15, 16, 1, 6, 0, Strategy.FAST),
66            /* level  2 */
67            new CompressionParameters(20, 16, 17, 1, 5, 1, Strategy.DFAST),
68            /* level  3 */
69            new CompressionParameters(20, 18, 18, 1, 5, 1, Strategy.DFAST),
70            /* level  4 */
71            new CompressionParameters(20, 18, 18, 2, 5, 2, Strategy.GREEDY),
72            /* level  5 */
73            new CompressionParameters(21, 18, 19, 2, 5, 4, Strategy.LAZY),
74            /* level  6 */
75            new CompressionParameters(21, 18, 19, 3, 5, 8, Strategy.LAZY2),
76            /* level  7 */
77            new CompressionParameters(21, 19, 19, 3, 5, 16, Strategy.LAZY2),
78            /* level  8 */
79            new CompressionParameters(21, 19, 20, 4, 5, 16, Strategy.LAZY2),
80            /* level  9 */
81            new CompressionParameters(21, 20, 21, 4, 5, 16, Strategy.LAZY2),
82            /* level 10 */
83            new CompressionParameters(21, 21, 22, 4, 5, 16, Strategy.LAZY2),
84            /* level 11 */
85            new CompressionParameters(22, 20, 22, 5, 5, 16, Strategy.LAZY2),
86            /* level 12 */
87            new CompressionParameters(22, 21, 22, 4, 5, 32, Strategy.BTLAZY2),
88            /* level 13 */
89            new CompressionParameters(22, 21, 22, 5, 5, 32, Strategy.BTLAZY2),
90            /* level 14 */
91            new CompressionParameters(22, 22, 22, 6, 5, 32, Strategy.BTLAZY2),
92            /* level 15 */
93            new CompressionParameters(22, 21, 22, 4, 5, 48, Strategy.BTOPT),
94            /* level 16 */
95            new CompressionParameters(23, 22, 22, 4, 4, 64, Strategy.BTOPT),
96            /* level 17 */
97            new CompressionParameters(23, 23, 22, 6, 3, 256, Strategy.BTOPT),
98            /* level 18 */
99            new CompressionParameters(23, 24, 22, 7, 3, 256, Strategy.BTULTRA),
100           /* level 19 */
101           new CompressionParameters(25, 25, 23, 7, 3, 256, Strategy.BTULTRA),
102           /* level 20 */
103           new CompressionParameters(26, 26, 24, 7, 3, 512, Strategy.BTULTRA),
104           /* level 21 */
105           new CompressionParameters(27, 27, 25, 9, 3, 999, Strategy.BTULTRA)
106           /* level 22 */
107       }, {
108       // for size <= 256 KB
109       new CompressionParameters(18, 12, 13, 1, 5, 1, Strategy.FAST),
110       /* base for negative levels */
111       new CompressionParameters(18, 13, 14, 1, 6, 0, Strategy.FAST),
112       /* level  1 */
113       new CompressionParameters(18, 14, 14, 1, 5, 1, Strategy.DFAST),
114       /* level  2 */
115       new CompressionParameters(18, 16, 16, 1, 4, 1, Strategy.DFAST),
116       /* level  3 */
117       new CompressionParameters(18, 16, 17, 2, 5, 2, Strategy.GREEDY),
118       /* level  4.*/
119       new CompressionParameters(18, 18, 18, 3, 5, 2, Strategy.GREEDY),
120       /* level  5.*/
121       new CompressionParameters(18, 18, 19, 3, 5, 4, Strategy.LAZY),
122       /* level  6.*/
123       new CompressionParameters(18, 18, 19, 4, 4, 4, Strategy.LAZY),
124       /* level  7 */
125       new CompressionParameters(18, 18, 19, 4, 4, 8, Strategy.LAZY2),
126       /* level  8 */
127       new CompressionParameters(18, 18, 19, 5, 4, 8, Strategy.LAZY2),
128       /* level  9 */
129       new CompressionParameters(18, 18, 19, 6, 4, 8, Strategy.LAZY2),
130       /* level 10 */
131       new CompressionParameters(18, 18, 19, 5, 4, 16, Strategy.BTLAZY2),
132       /* level 11.*/
133       new CompressionParameters(18, 19, 19, 6, 4, 16, Strategy.BTLAZY2),
134       /* level 12.*/
135       new CompressionParameters(18, 19, 19, 8, 4, 16, Strategy.BTLAZY2),
136       /* level 13 */
137       new CompressionParameters(18, 18, 19, 4, 4, 24, Strategy.BTOPT),
138       /* level 14.*/
139       new CompressionParameters(18, 18, 19, 4, 3, 24, Strategy.BTOPT),
140       /* level 15.*/
141       new CompressionParameters(18, 19, 19, 6, 3, 64, Strategy.BTOPT),
142       /* level 16.*/
143       new CompressionParameters(18, 19, 19, 8, 3, 128, Strategy.BTOPT),
144       /* level 17.*/
145       new CompressionParameters(18, 19, 19, 10, 3, 256, Strategy.BTOPT),
146       /* level 18.*/
147       new CompressionParameters(18, 19, 19, 10, 3, 256, Strategy.BTULTRA),
148       /* level 19.*/
149       new CompressionParameters(18, 19, 19, 11, 3, 512, Strategy.BTULTRA),
150       /* level 20.*/
151       new CompressionParameters(18, 19, 19, 12, 3, 512, Strategy.BTULTRA),
152       /* level 21.*/
153       new CompressionParameters(18, 19, 19, 13, 3, 999, Strategy.BTULTRA)
154       /* level 22.*/
155   }, {
156       // for size <= 128 KB
157       new CompressionParameters(17, 12, 12, 1, 5, 1, Strategy.FAST),
158       /* base for negative levels */
159       new CompressionParameters(17, 12, 13, 1, 6, 0, Strategy.FAST),
160       /* level  1 */
161       new CompressionParameters(17, 13, 15, 1, 5, 0, Strategy.FAST),
162       /* level  2 */
163       new CompressionParameters(17, 15, 16, 2, 5, 1, Strategy.DFAST),
164       /* level  3 */
165       new CompressionParameters(17, 17, 17, 2, 4, 1, Strategy.DFAST),
166       /* level  4 */
167       new CompressionParameters(17, 16, 17, 3, 4, 2, Strategy.GREEDY),
168       /* level  5 */
169       new CompressionParameters(17, 17, 17, 3, 4, 4, Strategy.LAZY),
170       /* level  6 */
171       new CompressionParameters(17, 17, 17, 3, 4, 8, Strategy.LAZY2),
172       /* level  7 */
173       new CompressionParameters(17, 17, 17, 4, 4, 8, Strategy.LAZY2),
174       /* level  8 */
175       new CompressionParameters(17, 17, 17, 5, 4, 8, Strategy.LAZY2),
176       /* level  9 */
177       new CompressionParameters(17, 17, 17, 6, 4, 8, Strategy.LAZY2),
178       /* level 10 */
179       new CompressionParameters(17, 17, 17, 7, 4, 8, Strategy.LAZY2),
180       /* level 11 */
181       new CompressionParameters(17, 18, 17, 6, 4, 16, Strategy.BTLAZY2),
182       /* level 12 */
183       new CompressionParameters(17, 18, 17, 8, 4, 16, Strategy.BTLAZY2),
184       /* level 13.*/
185       new CompressionParameters(17, 18, 17, 4, 4, 32, Strategy.BTOPT),
186       /* level 14.*/
187       new CompressionParameters(17, 18, 17, 6, 3, 64, Strategy.BTOPT),
188       /* level 15.*/
189       new CompressionParameters(17, 18, 17, 7, 3, 128, Strategy.BTOPT),
190       /* level 16.*/
191       new CompressionParameters(17, 18, 17, 7, 3, 256, Strategy.BTOPT),
192       /* level 17.*/
193       new CompressionParameters(17, 18, 17, 8, 3, 256, Strategy.BTOPT),
194       /* level 18.*/
195       new CompressionParameters(17, 18, 17, 8, 3, 256, Strategy.BTULTRA),
196       /* level 19.*/
197       new CompressionParameters(17, 18, 17, 9, 3, 256, Strategy.BTULTRA),
198       /* level 20.*/
199       new CompressionParameters(17, 18, 17, 10, 3, 256, Strategy.BTULTRA),
200       /* level 21.*/
201       new CompressionParameters(17, 18, 17, 11, 3, 512, Strategy.BTULTRA)
202       /* level 22.*/
203   }, {
204       // for size <= 16 KB
205       new CompressionParameters(14, 12, 13, 1, 5, 1, Strategy.FAST),
206       /* base for negative levels */
207       new CompressionParameters(14, 14, 15, 1, 5, 0, Strategy.FAST),
208       /* level  1 */
209       new CompressionParameters(14, 14, 15, 1, 4, 0, Strategy.FAST),
210       /* level  2 */
211       new CompressionParameters(14, 14, 14, 2, 4, 1, Strategy.DFAST),
212       /* level  3.*/
213       new CompressionParameters(14, 14, 14, 4, 4, 2, Strategy.GREEDY),
214       /* level  4.*/
215       new CompressionParameters(14, 14, 14, 3, 4, 4, Strategy.LAZY),
216       /* level  5.*/
217       new CompressionParameters(14, 14, 14, 4, 4, 8, Strategy.LAZY2),
218       /* level  6 */
219       new CompressionParameters(14, 14, 14, 6, 4, 8, Strategy.LAZY2),
220       /* level  7 */
221       new CompressionParameters(14, 14, 14, 8, 4, 8, Strategy.LAZY2),
222       /* level  8.*/
223       new CompressionParameters(14, 15, 14, 5, 4, 8, Strategy.BTLAZY2),
224       /* level  9.*/
225       new CompressionParameters(14, 15, 14, 9, 4, 8, Strategy.BTLAZY2),
226       /* level 10.*/
227       new CompressionParameters(14, 15, 14, 3, 4, 12, Strategy.BTOPT),
228       /* level 11.*/
229       new CompressionParameters(14, 15, 14, 6, 3, 16, Strategy.BTOPT),
230       /* level 12.*/
231       new CompressionParameters(14, 15, 14, 6, 3, 24, Strategy.BTOPT),
232       /* level 13.*/
233       new CompressionParameters(14, 15, 15, 6, 3, 48, Strategy.BTOPT),
234       /* level 14.*/
235       new CompressionParameters(14, 15, 15, 6, 3, 64, Strategy.BTOPT),
236       /* level 15.*/
237       new CompressionParameters(14, 15, 15, 6, 3, 96, Strategy.BTOPT),
238       /* level 16.*/
239       new CompressionParameters(14, 15, 15, 6, 3, 128, Strategy.BTOPT),
240       /* level 17.*/
241       new CompressionParameters(14, 15, 15, 8, 3, 256, Strategy.BTOPT),
242       /* level 18.*/
243       new CompressionParameters(14, 15, 15, 6, 3, 256, Strategy.BTULTRA),
244       /* level 19.*/
245       new CompressionParameters(14, 15, 15, 8, 3, 256, Strategy.BTULTRA),
246       /* level 20.*/
247       new CompressionParameters(14, 15, 15, 9, 3, 256, Strategy.BTULTRA),
248       /* level 21.*/
249       new CompressionParameters(14, 15, 15, 10, 3, 512, Strategy.BTULTRA)
250       /* level 22.*/
251   }
252   };
253 
254   public enum Strategy {
255     // from faster to stronger
256 
257     // YC: fast is a "single probe" strategy : at every position, we attempt to find a match, and give up if we don't find any. similar to lz4.
258     FAST(BlockCompressor.UNSUPPORTED),
259 
260     // YC: double_fast is a 2 attempts strategies. They are not symmetrical by the way. One attempt is "normal" while the second one looks for "long matches". It was
261     // empirically found that this was the best trade off. As can be guessed, it's slower than single-attempt, but find more and better matches, so compresses better.
262     DFAST(new DoubleFastBlockCompressor()),
263 
264     // YC: greedy uses a hash chain strategy. Every position is hashed, and all positions with same hash are chained. The algorithm goes through all candidates. There are
265     // diminishing returns in going deeper and deeper, so after a nb of attempts (which can be selected), it abandons the search. The best (longest) match wins. If there is
266     // one winner, it's immediately encoded.
267     GREEDY(BlockCompressor.UNSUPPORTED),
268 
269     // YC: lazy will do something similar to greedy, but will not encode immediately. It will search again at next position, in case it would find something better.
270     // It's actually fairly common to have a small match at position p hiding a more worthy one at position p+1. This obviously increases the search workload. But the
271     // resulting compressed stream generally contains larger matches, hence compresses better.
272     LAZY(BlockCompressor.UNSUPPORTED),
273 
274     // YC: lazy2 is same as lazy, but deeper. It will search at P, P+1 and then P+2 in case it would find something even better. More workload. Better matches.
275     LAZY2(BlockCompressor.UNSUPPORTED),
276 
277     // YC: btlazy2 is like lazy2, but trades the hash chain for a binary tree. This becomes necessary, as the nb of attempts becomes prohibitively expensive. The binary tree
278     // complexity increases with log of search depth, instead of proportionally with search depth. So searching deeper in history quickly becomes the dominant operation.
279     // btlazy2 cuts into that. But it costs 2x more memory. It's also relatively "slow", even when trying to cut its parameters to make it perform faster. So it's really
280     // a high compression strategy.
281     BTLAZY2(BlockCompressor.UNSUPPORTED),
282 
283     // YC: btopt is, well, a hell of lot more complex.
284     // It will compute and find multiple matches per position, will dynamically compare every path from point P to P+N, reverse the graph to find cheapest path, iterate on
285     // batches of overlapping matches, etc. It's much more expensive. But the compression ratio is also much better.
286     BTOPT(BlockCompressor.UNSUPPORTED),
287 
288     // YC: btultra is about the same, but doesn't cut as many corners (btopt "abandons" more quickly unpromising little gains). Slower, stronger.
289     BTULTRA(BlockCompressor.UNSUPPORTED);
290 
291     private final BlockCompressor compressor;
292 
293     Strategy(final BlockCompressor compressor) {
294       this.compressor = compressor;
295     }
296 
297     public BlockCompressor getCompressor() {
298       return compressor;
299     }
300   }
301 
302   public CompressionParameters(final int windowLog, final int chainLog,
303                                final int hashLog, final int searchLog,
304                                final int searchLength, final int targetLength,
305                                final Strategy strategy) {
306     this.windowLog = windowLog;
307     this.chainLog = chainLog;
308     this.hashLog = hashLog;
309     this.searchLog = searchLog;
310     this.searchLength = searchLength;
311     this.targetLength = targetLength;
312     this.strategy = strategy;
313   }
314 
315   public int getWindowLog() {
316     return windowLog;
317   }
318 
319   public int getSearchLength() {
320     return searchLength;
321   }
322 
323   public int getChainLog() {
324     return chainLog;
325   }
326 
327   public int getHashLog() {
328     return hashLog;
329   }
330 
331   public int getSearchLog() {
332     return searchLog;
333   }
334 
335   public int getTargetLength() {
336     return targetLength;
337   }
338 
339   public Strategy getStrategy() {
340     return strategy;
341   }
342 
343   public static CompressionParameters compute(final int compressionLevel,
344                                               final int inputSize) {
345     final CompressionParameters defaultParameters =
346         getDefaultParameters(compressionLevel, inputSize);
347 
348     int targetLength = defaultParameters.targetLength;
349     int windowLog = defaultParameters.windowLog;
350     int chainLog = defaultParameters.chainLog;
351     int hashLog = defaultParameters.hashLog;
352     final int searchLog = defaultParameters.searchLog;
353     final int searchLength = defaultParameters.searchLength;
354     final Strategy strategy = defaultParameters.strategy;
355 
356     if (compressionLevel < 0) {
357       targetLength = -compressionLevel;   // acceleration factor
358     }
359 
360     // resize windowLog if input is small enough, to use less memory
361     final long maxWindowResize = 1L << (MAX_WINDOW_LOG - 1);
362     if (inputSize < maxWindowResize) {
363       final int hashSizeMin = 1 << MIN_HASH_LOG;
364       final int inputSizeLog = (inputSize < hashSizeMin)? MIN_HASH_LOG :
365           Util.highestBit(inputSize - 1) + 1;
366       if (windowLog > inputSizeLog) {
367         windowLog = inputSizeLog;
368       }
369     }
370 
371     if (hashLog > windowLog + 1) {
372       hashLog = windowLog + 1;
373     }
374 
375     final int cycleLog = Util.cycleLog(chainLog, strategy);
376     if (cycleLog > windowLog) {
377       chainLog -= (cycleLog - windowLog);
378     }
379 
380     if (windowLog < MIN_WINDOW_LOG) {
381       windowLog = MIN_WINDOW_LOG;
382     }
383 
384     return new CompressionParameters(windowLog, chainLog, hashLog, searchLog,
385                                      searchLength, targetLength, strategy);
386   }
387 
388   private static CompressionParameters getDefaultParameters(
389       final int compressionLevel, final long estimatedInputSize) {
390     int table = 0;
391 
392     if (estimatedInputSize != 0) {
393       if (estimatedInputSize <= 16 * 1024) {
394         table = 3;
395       } else if (estimatedInputSize <= 128 * 1024) {
396         table = 2;
397       } else if (estimatedInputSize <= 256 * 1024) {
398         table = 1;
399       }
400     }
401 
402     int row = DEFAULT_COMPRESSION_LEVEL;
403 
404     if (compressionLevel !=
405         0) { // TODO: figure out better way to indicate default compression level
406       row = Math.min(Math.max(0, compressionLevel), MAX_COMPRESSION_LEVEL);
407     }
408 
409     return DEFAULT_COMPRESSION_PARAMETERS[table][row];
410   }
411 }