View Javadoc
1   /*
2    * This file is part of Waarp Project (named also Waarp or GG).
3    *
4    *  Copyright (c) 2019, Waarp SAS, and individual contributors by the @author
5    *  tags. See the COPYRIGHT.txt in the distribution for a full listing of
6    * individual contributors.
7    *
8    *  All Waarp Project is free software: you can redistribute it and/or
9    * modify it under the terms of the GNU General Public License as published by
10   * the Free Software Foundation, either version 3 of the License, or (at your
11   * option) any later version.
12   *
13   * Waarp is distributed in the hope that it will be useful, but WITHOUT ANY
14   * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
15   * A PARTICULAR PURPOSE. See the GNU General Public License for more details.
16   *
17   *  You should have received a copy of the GNU General Public License along with
18   * Waarp . If not, see <http://www.gnu.org/licenses/>.
19   */
20  
21  /*
22   * Licensed under the Apache License, Version 2.0 (the "License");
23   * you may not use this file except in compliance with the License.
24   * You may obtain a copy of the License at
25   *
26   *     http://www.apache.org/licenses/LICENSE-2.0
27   *
28   * Unless required by applicable law or agreed to in writing, software
29   * distributed under the License is distributed on an "AS IS" BASIS,
30   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
31   * See the License for the specific language governing permissions and
32   * limitations under the License.
33   */
34  package org.waarp.compress.zstdunsafe;
35  
36  class FseTableReader {
37    private final short[] nextSymbol =
38        new short[FiniteStateEntropy.MAX_SYMBOL + 1];
39    private final short[] normalizedCounters =
40        new short[FiniteStateEntropy.MAX_SYMBOL + 1];
41  
42    public int readFseTable(final FiniteStateEntropy.Table table,
43                            final Object inputBase, final long inputAddress,
44                            final long inputLimit, int maxSymbol,
45                            final int maxTableLog) {
46      // read table headers
47      long input = inputAddress;
48      Util.verify(inputLimit - inputAddress >= 4, input,
49                  "Not enough input bytes");
50  
51      int threshold;
52      int symbolNumber = 0;
53      boolean previousIsZero = false;
54  
55      int bitStream = UnsafeUtil.UNSAFE.getInt(inputBase, input);
56  
57      final int tableLog = (bitStream & 0xF) + FiniteStateEntropy.MIN_TABLE_LOG;
58  
59      int numberOfBits = tableLog + 1;
60      bitStream >>>= 4;
61      int bitCount = 4;
62  
63      Util.verify(tableLog <= maxTableLog, input,
64                  "FSE table size exceeds maximum allowed size");
65  
66      int remaining = (1 << tableLog) + 1;
67      threshold = 1 << tableLog;
68  
69      while (remaining > 1 && symbolNumber <= maxSymbol) {
70        if (previousIsZero) {
71          int n0 = symbolNumber;
72          while ((bitStream & 0xFFFF) == 0xFFFF) {
73            n0 += 24;
74            if (input < inputLimit - 5) {
75              input += 2;
76              bitStream =
77                  (UnsafeUtil.UNSAFE.getInt(inputBase, input) >>> bitCount);
78            } else {
79              // end of bit stream
80              bitStream >>>= 16;
81              bitCount += 16;
82            }
83          }
84          while ((bitStream & 3) == 3) {
85            n0 += 3;
86            bitStream >>>= 2;
87            bitCount += 2;
88          }
89          n0 += bitStream & 3;
90          bitCount += 2;
91  
92          Util.verify(n0 <= maxSymbol, input, "Symbol larger than max value");
93  
94          while (symbolNumber < n0) {
95            normalizedCounters[symbolNumber++] = 0;
96          }
97          if ((input <= inputLimit - 7) ||
98              (input + (bitCount >>> 3) <= inputLimit - 4)) {
99            input += bitCount >>> 3;
100           bitCount &= 7;
101           bitStream = UnsafeUtil.UNSAFE.getInt(inputBase, input) >>> bitCount;
102         } else {
103           bitStream >>>= 2;
104         }
105       }
106 
107       final short max = (short) ((2 * threshold - 1) - remaining);
108       short count;
109 
110       if ((bitStream & (threshold - 1)) < max) {
111         count = (short) (bitStream & (threshold - 1));
112         bitCount += numberOfBits - 1;
113       } else {
114         count = (short) (bitStream & (2 * threshold - 1));
115         if (count >= threshold) {
116           count -= max;
117         }
118         bitCount += numberOfBits;
119       }
120       count--;  // extra accuracy
121 
122       remaining -= Math.abs(count);
123       normalizedCounters[symbolNumber++] = count;
124       previousIsZero = count == 0;
125       while (remaining < threshold) {
126         numberOfBits--;
127         threshold >>>= 1;
128       }
129 
130       if ((input <= inputLimit - 7) ||
131           (input + (bitCount >> 3) <= inputLimit - 4)) {
132         input += bitCount >>> 3;
133         bitCount &= 7;
134       } else {
135         bitCount -= (int) (8 * (inputLimit - 4 - input));
136         input = inputLimit - 4;
137       }
138       bitStream =
139           UnsafeUtil.UNSAFE.getInt(inputBase, input) >>> (bitCount & 31);
140     }
141 
142     Util.verify(remaining == 1 && bitCount <= 32, input, "Input is corrupted");
143 
144     maxSymbol = symbolNumber - 1;
145     Util.verify(maxSymbol <= FiniteStateEntropy.MAX_SYMBOL, input,
146                 "Max symbol value too large (too many symbols for FSE)");
147 
148     input += (bitCount + 7) >> 3;
149 
150     // populate decoding table
151     final int symbolCount = maxSymbol + 1;
152     final int tableSize = 1 << tableLog;
153     int highThreshold = tableSize - 1;
154 
155     table.log2Size = tableLog;
156 
157     for (byte symbol = 0; symbol < symbolCount; symbol++) {
158       if (normalizedCounters[symbol] == -1) {
159         table.symbol[highThreshold--] = symbol;
160         nextSymbol[symbol] = 1;
161       } else {
162         nextSymbol[symbol] = normalizedCounters[symbol];
163       }
164     }
165 
166     final int position =
167         FseCompressionTable.spreadSymbols(normalizedCounters, maxSymbol,
168                                           tableSize, highThreshold,
169                                           table.symbol);
170 
171     // position must reach all cells once, otherwise normalizedCounter is incorrect
172     Util.verify(position == 0, input, "Input is corrupted");
173 
174     for (int i = 0; i < tableSize; i++) {
175       final byte symbol = table.symbol[i];
176       final short nextState = nextSymbol[symbol]++;
177       table.numberOfBits[i] = (byte) (tableLog - Util.highestBit(nextState));
178       table.newState[i] =
179           (short) ((nextState << table.numberOfBits[i]) - tableSize);
180     }
181 
182     return (int) (input - inputAddress);
183   }
184 
185   public static void initializeRleTable(final FiniteStateEntropy.Table table,
186                                         final byte value) {
187     table.log2Size = 0;
188     table.symbol[0] = value;
189     table.newState[0] = 0;
190     table.numberOfBits[0] = 0;
191   }
192 }