View Javadoc
1   /*
2    * This file is part of Waarp Project (named also Waarp or GG).
3    *
4    *  Copyright (c) 2019, Waarp SAS, and individual contributors by the @author
5    *  tags. See the COPYRIGHT.txt in the distribution for a full listing of
6    * individual contributors.
7    *
8    *  All Waarp Project is free software: you can redistribute it and/or
9    * modify it under the terms of the GNU General Public License as published by
10   * the Free Software Foundation, either version 3 of the License, or (at your
11   * option) any later version.
12   *
13   * Waarp is distributed in the hope that it will be useful, but WITHOUT ANY
14   * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
15   * A PARTICULAR PURPOSE. See the GNU General Public License for more details.
16   *
17   *  You should have received a copy of the GNU General Public License along with
18   * Waarp . If not, see <http://www.gnu.org/licenses/>.
19   */
20  
21  /*
22   * Licensed under the Apache License, Version 2.0 (the "License");
23   * you may not use this file except in compliance with the License.
24   * You may obtain a copy of the License at
25   *
26   *     http://www.apache.org/licenses/LICENSE-2.0
27   *
28   * Unless required by applicable law or agreed to in writing, software
29   * distributed under the License is distributed on an "AS IS" BASIS,
30   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
31   * See the License for the specific language governing permissions and
32   * limitations under the License.
33   */
34  package org.waarp.compress.zstdsafe;
35  
36  import static org.waarp.compress.zstdsafe.Constants.*;
37  import static org.waarp.compress.zstdsafe.Util.*;
38  
39  class CompressionParameters {
40    private static final int MIN_HASH_LOG = 6;
41  
42    public static final int DEFAULT_COMPRESSION_LEVEL = 3;
43    private static final int MAX_COMPRESSION_LEVEL = 22;
44  
45    private final int windowLog;
46    // largest match distance : larger == more compression, more memory needed during decompression
47    private final int chainLog;
48    // fully searched segment : larger == more compression, slower, more memory (useless for fast)
49    private final int hashLog;   // dispatch table : larger == faster, more memory
50    private final int searchLog;
51    // nb of searches : larger == more compression, slower
52    private final int searchLength;
53    // match length searched : larger == faster decompression, sometimes less compression
54    private final int targetLength;
55    // acceptable match size for optimal parser (only) : larger == more compression, slower
56    private final Strategy strategy;
57  
58    private static final CompressionParameters[][]
59        DEFAULT_COMPRESSION_PARAMETERS = new CompressionParameters[][] {
60        {
61            // default
62            new CompressionParameters(19, 12, 13, 1, 6, 1, Strategy.FAST),
63            /* base for negative levels */
64            new CompressionParameters(19, 13, 14, 1, 7, 0, Strategy.FAST),
65            /* level  1 */
66            new CompressionParameters(19, 15, 16, 1, 6, 0, Strategy.FAST),
67            /* level  2 */
68            new CompressionParameters(20, 16, 17, 1, 5, 1, Strategy.DFAST),
69            /* level  3 */
70            new CompressionParameters(20, 18, 18, 1, 5, 1, Strategy.DFAST),
71            /* level  4 */
72            new CompressionParameters(20, 18, 18, 2, 5, 2, Strategy.GREEDY),
73            /* level  5 */
74            new CompressionParameters(21, 18, 19, 2, 5, 4, Strategy.LAZY),
75            /* level  6 */
76            new CompressionParameters(21, 18, 19, 3, 5, 8, Strategy.LAZY2),
77            /* level  7 */
78            new CompressionParameters(21, 19, 19, 3, 5, 16, Strategy.LAZY2),
79            /* level  8 */
80            new CompressionParameters(21, 19, 20, 4, 5, 16, Strategy.LAZY2),
81            /* level  9 */
82            new CompressionParameters(21, 20, 21, 4, 5, 16, Strategy.LAZY2),
83            /* level 10 */
84            new CompressionParameters(21, 21, 22, 4, 5, 16, Strategy.LAZY2),
85            /* level 11 */
86            new CompressionParameters(22, 20, 22, 5, 5, 16, Strategy.LAZY2),
87            /* level 12 */
88            new CompressionParameters(22, 21, 22, 4, 5, 32, Strategy.BTLAZY2),
89            /* level 13 */
90            new CompressionParameters(22, 21, 22, 5, 5, 32, Strategy.BTLAZY2),
91            /* level 14 */
92            new CompressionParameters(22, 22, 22, 6, 5, 32, Strategy.BTLAZY2),
93            /* level 15 */
94            new CompressionParameters(22, 21, 22, 4, 5, 48, Strategy.BTOPT),
95            /* level 16 */
96            new CompressionParameters(23, 22, 22, 4, 4, 64, Strategy.BTOPT),
97            /* level 17 */
98            new CompressionParameters(23, 23, 22, 6, 3, 256, Strategy.BTOPT),
99            /* level 18 */
100           new CompressionParameters(23, 24, 22, 7, 3, 256, Strategy.BTULTRA),
101           /* level 19 */
102           new CompressionParameters(25, 25, 23, 7, 3, 256, Strategy.BTULTRA),
103           /* level 20 */
104           new CompressionParameters(26, 26, 24, 7, 3, 512, Strategy.BTULTRA),
105           /* level 21 */
106           new CompressionParameters(27, 27, 25, 9, 3, 999, Strategy.BTULTRA)
107           /* level 22 */
108       }, {
109       // for size <= 256 KB
110       new CompressionParameters(18, 12, 13, 1, 5, 1, Strategy.FAST),
111       /* base for negative levels */
112       new CompressionParameters(18, 13, 14, 1, 6, 0, Strategy.FAST),
113       /* level  1 */
114       new CompressionParameters(18, 14, 14, 1, 5, 1, Strategy.DFAST),
115       /* level  2 */
116       new CompressionParameters(18, 16, 16, 1, 4, 1, Strategy.DFAST),
117       /* level  3 */
118       new CompressionParameters(18, 16, 17, 2, 5, 2, Strategy.GREEDY),
119       /* level  4.*/
120       new CompressionParameters(18, 18, 18, 3, 5, 2, Strategy.GREEDY),
121       /* level  5.*/
122       new CompressionParameters(18, 18, 19, 3, 5, 4, Strategy.LAZY),
123       /* level  6.*/
124       new CompressionParameters(18, 18, 19, 4, 4, 4, Strategy.LAZY),
125       /* level  7 */
126       new CompressionParameters(18, 18, 19, 4, 4, 8, Strategy.LAZY2),
127       /* level  8 */
128       new CompressionParameters(18, 18, 19, 5, 4, 8, Strategy.LAZY2),
129       /* level  9 */
130       new CompressionParameters(18, 18, 19, 6, 4, 8, Strategy.LAZY2),
131       /* level 10 */
132       new CompressionParameters(18, 18, 19, 5, 4, 16, Strategy.BTLAZY2),
133       /* level 11.*/
134       new CompressionParameters(18, 19, 19, 6, 4, 16, Strategy.BTLAZY2),
135       /* level 12.*/
136       new CompressionParameters(18, 19, 19, 8, 4, 16, Strategy.BTLAZY2),
137       /* level 13 */
138       new CompressionParameters(18, 18, 19, 4, 4, 24, Strategy.BTOPT),
139       /* level 14.*/
140       new CompressionParameters(18, 18, 19, 4, 3, 24, Strategy.BTOPT),
141       /* level 15.*/
142       new CompressionParameters(18, 19, 19, 6, 3, 64, Strategy.BTOPT),
143       /* level 16.*/
144       new CompressionParameters(18, 19, 19, 8, 3, 128, Strategy.BTOPT),
145       /* level 17.*/
146       new CompressionParameters(18, 19, 19, 10, 3, 256, Strategy.BTOPT),
147       /* level 18.*/
148       new CompressionParameters(18, 19, 19, 10, 3, 256, Strategy.BTULTRA),
149       /* level 19.*/
150       new CompressionParameters(18, 19, 19, 11, 3, 512, Strategy.BTULTRA),
151       /* level 20.*/
152       new CompressionParameters(18, 19, 19, 12, 3, 512, Strategy.BTULTRA),
153       /* level 21.*/
154       new CompressionParameters(18, 19, 19, 13, 3, 999, Strategy.BTULTRA)
155       /* level 22.*/
156   }, {
157       // for size <= 128 KB
158       new CompressionParameters(17, 12, 12, 1, 5, 1, Strategy.FAST),
159       /* base for negative levels */
160       new CompressionParameters(17, 12, 13, 1, 6, 0, Strategy.FAST),
161       /* level  1 */
162       new CompressionParameters(17, 13, 15, 1, 5, 0, Strategy.FAST),
163       /* level  2 */
164       new CompressionParameters(17, 15, 16, 2, 5, 1, Strategy.DFAST),
165       /* level  3 */
166       new CompressionParameters(17, 17, 17, 2, 4, 1, Strategy.DFAST),
167       /* level  4 */
168       new CompressionParameters(17, 16, 17, 3, 4, 2, Strategy.GREEDY),
169       /* level  5 */
170       new CompressionParameters(17, 17, 17, 3, 4, 4, Strategy.LAZY),
171       /* level  6 */
172       new CompressionParameters(17, 17, 17, 3, 4, 8, Strategy.LAZY2),
173       /* level  7 */
174       new CompressionParameters(17, 17, 17, 4, 4, 8, Strategy.LAZY2),
175       /* level  8 */
176       new CompressionParameters(17, 17, 17, 5, 4, 8, Strategy.LAZY2),
177       /* level  9 */
178       new CompressionParameters(17, 17, 17, 6, 4, 8, Strategy.LAZY2),
179       /* level 10 */
180       new CompressionParameters(17, 17, 17, 7, 4, 8, Strategy.LAZY2),
181       /* level 11 */
182       new CompressionParameters(17, 18, 17, 6, 4, 16, Strategy.BTLAZY2),
183       /* level 12 */
184       new CompressionParameters(17, 18, 17, 8, 4, 16, Strategy.BTLAZY2),
185       /* level 13.*/
186       new CompressionParameters(17, 18, 17, 4, 4, 32, Strategy.BTOPT),
187       /* level 14.*/
188       new CompressionParameters(17, 18, 17, 6, 3, 64, Strategy.BTOPT),
189       /* level 15.*/
190       new CompressionParameters(17, 18, 17, 7, 3, 128, Strategy.BTOPT),
191       /* level 16.*/
192       new CompressionParameters(17, 18, 17, 7, 3, 256, Strategy.BTOPT),
193       /* level 17.*/
194       new CompressionParameters(17, 18, 17, 8, 3, 256, Strategy.BTOPT),
195       /* level 18.*/
196       new CompressionParameters(17, 18, 17, 8, 3, 256, Strategy.BTULTRA),
197       /* level 19.*/
198       new CompressionParameters(17, 18, 17, 9, 3, 256, Strategy.BTULTRA),
199       /* level 20.*/
200       new CompressionParameters(17, 18, 17, 10, 3, 256, Strategy.BTULTRA),
201       /* level 21.*/
202       new CompressionParameters(17, 18, 17, 11, 3, 512, Strategy.BTULTRA)
203       /* level 22.*/
204   }, {
205       // for size <= 16 KB
206       new CompressionParameters(14, 12, 13, 1, 5, 1, Strategy.FAST),
207       /* base for negative levels */
208       new CompressionParameters(14, 14, 15, 1, 5, 0, Strategy.FAST),
209       /* level  1 */
210       new CompressionParameters(14, 14, 15, 1, 4, 0, Strategy.FAST),
211       /* level  2 */
212       new CompressionParameters(14, 14, 14, 2, 4, 1, Strategy.DFAST),
213       /* level  3.*/
214       new CompressionParameters(14, 14, 14, 4, 4, 2, Strategy.GREEDY),
215       /* level  4.*/
216       new CompressionParameters(14, 14, 14, 3, 4, 4, Strategy.LAZY),
217       /* level  5.*/
218       new CompressionParameters(14, 14, 14, 4, 4, 8, Strategy.LAZY2),
219       /* level  6 */
220       new CompressionParameters(14, 14, 14, 6, 4, 8, Strategy.LAZY2),
221       /* level  7 */
222       new CompressionParameters(14, 14, 14, 8, 4, 8, Strategy.LAZY2),
223       /* level  8.*/
224       new CompressionParameters(14, 15, 14, 5, 4, 8, Strategy.BTLAZY2),
225       /* level  9.*/
226       new CompressionParameters(14, 15, 14, 9, 4, 8, Strategy.BTLAZY2),
227       /* level 10.*/
228       new CompressionParameters(14, 15, 14, 3, 4, 12, Strategy.BTOPT),
229       /* level 11.*/
230       new CompressionParameters(14, 15, 14, 6, 3, 16, Strategy.BTOPT),
231       /* level 12.*/
232       new CompressionParameters(14, 15, 14, 6, 3, 24, Strategy.BTOPT),
233       /* level 13.*/
234       new CompressionParameters(14, 15, 15, 6, 3, 48, Strategy.BTOPT),
235       /* level 14.*/
236       new CompressionParameters(14, 15, 15, 6, 3, 64, Strategy.BTOPT),
237       /* level 15.*/
238       new CompressionParameters(14, 15, 15, 6, 3, 96, Strategy.BTOPT),
239       /* level 16.*/
240       new CompressionParameters(14, 15, 15, 6, 3, 128, Strategy.BTOPT),
241       /* level 17.*/
242       new CompressionParameters(14, 15, 15, 8, 3, 256, Strategy.BTOPT),
243       /* level 18.*/
244       new CompressionParameters(14, 15, 15, 6, 3, 256, Strategy.BTULTRA),
245       /* level 19.*/
246       new CompressionParameters(14, 15, 15, 8, 3, 256, Strategy.BTULTRA),
247       /* level 20.*/
248       new CompressionParameters(14, 15, 15, 9, 3, 256, Strategy.BTULTRA),
249       /* level 21.*/
250       new CompressionParameters(14, 15, 15, 10, 3, 512, Strategy.BTULTRA)
251       /* level 22.*/
252   }
253   };
254 
255   public enum Strategy {
256     // from faster to stronger
257 
258     // YC: fast is a "single probe" strategy : at every position, we attempt to find a match, and give up if we don't find any. similar to lz4.
259     FAST(BlockCompressor.UNSUPPORTED),
260 
261     // YC: double_fast is a 2 attempts strategies. They are not symmetrical by the way. One attempt is "normal" while the second one looks for "long matches". It was
262     // empirically found that this was the best trade off. As can be guessed, it's slower than single-attempt, but find more and better matches, so compresses better.
263     DFAST(new DoubleFastBlockCompressor()),
264 
265     // YC: greedy uses a hash chain strategy. Every position is hashed, and all positions with same hash are chained. The algorithm goes through all candidates. There are
266     // diminishing returns in going deeper and deeper, so after a nb of attempts (which can be selected), it abandons the search. The best (longest) match wins. If there is
267     // one winner, it's immediately encoded.
268     GREEDY(BlockCompressor.UNSUPPORTED),
269 
270     // YC: lazy will do something similar to greedy, but will not encode immediately. It will search again at next position, in case it would find something better.
271     // It's actually fairly common to have a small match at position p hiding a more worthy one at position p+1. This obviously increases the search workload. But the
272     // resulting compressed stream generally contains larger matches, hence compresses better.
273     LAZY(BlockCompressor.UNSUPPORTED),
274 
275     // YC: lazy2 is same as lazy, but deeper. It will search at P, P+1 and then P+2 in case it would find something even better. More workload. Better matches.
276     LAZY2(BlockCompressor.UNSUPPORTED),
277 
278     // YC: btlazy2 is like lazy2, but trades the hash chain for a binary tree. This becomes necessary, as the nb of attempts becomes prohibitively expensive. The binary tree
279     // complexity increases with log of search depth, instead of proportionally with search depth. So searching deeper in history quickly becomes the dominant operation.
280     // btlazy2 cuts into that. But it costs 2x more memory. It's also relatively "slow", even when trying to cut its parameters to make it perform faster. So it's really
281     // a high compression strategy.
282     BTLAZY2(BlockCompressor.UNSUPPORTED),
283 
284     // YC: btopt is, well, a hell of lot more complex.
285     // It will compute and find multiple matches per position, will dynamically compare every path from point P to P+N, reverse the graph to find cheapest path, iterate on
286     // batches of overlapping matches, etc. It's much more expensive. But the compression ratio is also much better.
287     BTOPT(BlockCompressor.UNSUPPORTED),
288 
289     // YC: btultra is about the same, but doesn't cut as many corners (btopt "abandons" more quickly unpromising little gains). Slower, stronger.
290     BTULTRA(BlockCompressor.UNSUPPORTED);
291 
292     private final BlockCompressor compressor;
293 
294     Strategy(final BlockCompressor compressor) {
295       this.compressor = compressor;
296     }
297 
298     public BlockCompressor getCompressor() {
299       return compressor;
300     }
301   }
302 
303   public CompressionParameters(final int windowLog, final int chainLog,
304                                final int hashLog, final int searchLog,
305                                final int searchLength, final int targetLength,
306                                final Strategy strategy) {
307     this.windowLog = windowLog;
308     this.chainLog = chainLog;
309     this.hashLog = hashLog;
310     this.searchLog = searchLog;
311     this.searchLength = searchLength;
312     this.targetLength = targetLength;
313     this.strategy = strategy;
314   }
315 
316   public int getWindowLog() {
317     return windowLog;
318   }
319 
320   public int getSearchLength() {
321     return searchLength;
322   }
323 
324   public int getChainLog() {
325     return chainLog;
326   }
327 
328   public int getHashLog() {
329     return hashLog;
330   }
331 
332   public int getSearchLog() {
333     return searchLog;
334   }
335 
336   public int getTargetLength() {
337     return targetLength;
338   }
339 
340   public Strategy getStrategy() {
341     return strategy;
342   }
343 
344   public static CompressionParameters compute(final int compressionLevel,
345                                               final int inputSize) {
346     final CompressionParameters defaultParameters =
347         getDefaultParameters(compressionLevel, inputSize);
348 
349     int targetLength = defaultParameters.targetLength;
350     int windowLog = defaultParameters.windowLog;
351     int chainLog = defaultParameters.chainLog;
352     int hashLog = defaultParameters.hashLog;
353     final int searchLog = defaultParameters.searchLog;
354     final int searchLength = defaultParameters.searchLength;
355     final Strategy strategy = defaultParameters.strategy;
356 
357     if (compressionLevel < 0) {
358       targetLength = -compressionLevel;   // acceleration factor
359     }
360 
361     // resize windowLog if input is small enough, to use less memory
362     final int maxWindowResize = 1 << (MAX_WINDOW_LOG - 1);
363     if (inputSize < maxWindowResize) {
364       final int hashSizeMin = 1 << MIN_HASH_LOG;
365       final int inputSizeLog = (inputSize < hashSizeMin)? MIN_HASH_LOG :
366           highestBit(inputSize - 1) + 1;
367       if (windowLog > inputSizeLog) {
368         windowLog = inputSizeLog;
369       }
370     }
371 
372     if (hashLog > windowLog + 1) {
373       hashLog = windowLog + 1;
374     }
375 
376     final int cycleLog = cycleLog(chainLog, strategy);
377     if (cycleLog > windowLog) {
378       chainLog -= (cycleLog - windowLog);
379     }
380 
381     if (windowLog < MIN_WINDOW_LOG) {
382       windowLog = MIN_WINDOW_LOG;
383     }
384 
385     return new CompressionParameters(windowLog, chainLog, hashLog, searchLog,
386                                      searchLength, targetLength, strategy);
387   }
388 
389   private static CompressionParameters getDefaultParameters(
390       final int compressionLevel, final int estimatedInputSize) {
391     int table = 0;
392 
393     if (estimatedInputSize != 0) {
394       if (estimatedInputSize <= 16 * 1024) {
395         table = 3;
396       } else if (estimatedInputSize <= 128 * 1024) {
397         table = 2;
398       } else if (estimatedInputSize <= 256 * 1024) {
399         table = 1;
400       }
401     }
402 
403     int row = DEFAULT_COMPRESSION_LEVEL;
404 
405     if (compressionLevel != 0) {
406       // TODO: figure out better way to indicate default compression level
407       row = Math.min(Math.max(0, compressionLevel), MAX_COMPRESSION_LEVEL);
408     }
409 
410     return DEFAULT_COMPRESSION_PARAMETERS[table][row];
411   }
412 }