1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075 |
- ///////////////////////////////////////////////////////////////////////////////
- //
- /// \file lzma_decoder.c
- /// \brief LZMA decoder
- ///
- // Authors: Igor Pavlov
- // Lasse Collin
- //
- // This file has been put into the public domain.
- // You can do whatever you want with this file.
- //
- ///////////////////////////////////////////////////////////////////////////////
- #include "lz_decoder.h"
- #include "lzma_common.h"
- #include "lzma_decoder.h"
- #include "range_decoder.h"
- #ifdef HAVE_SMALL
- // Macros for (somewhat) size-optimized code.
- #define seq_4(seq) seq
- #define seq_6(seq) seq
- #define seq_8(seq) seq
- #define seq_len(seq) \
- seq ## _CHOICE, \
- seq ## _CHOICE2, \
- seq ## _BITTREE
- #define len_decode(target, ld, pos_state, seq) \
- do { \
- case seq ## _CHOICE: \
- rc_if_0(ld.choice, seq ## _CHOICE) { \
- rc_update_0(ld.choice); \
- probs = ld.low[pos_state];\
- limit = LEN_LOW_SYMBOLS; \
- target = MATCH_LEN_MIN; \
- } else { \
- rc_update_1(ld.choice); \
- case seq ## _CHOICE2: \
- rc_if_0(ld.choice2, seq ## _CHOICE2) { \
- rc_update_0(ld.choice2); \
- probs = ld.mid[pos_state]; \
- limit = LEN_MID_SYMBOLS; \
- target = MATCH_LEN_MIN + LEN_LOW_SYMBOLS; \
- } else { \
- rc_update_1(ld.choice2); \
- probs = ld.high; \
- limit = LEN_HIGH_SYMBOLS; \
- target = MATCH_LEN_MIN + LEN_LOW_SYMBOLS \
- + LEN_MID_SYMBOLS; \
- } \
- } \
- symbol = 1; \
- case seq ## _BITTREE: \
- do { \
- rc_bit(probs[symbol], , , seq ## _BITTREE); \
- } while (symbol < limit); \
- target += symbol - limit; \
- } while (0)
- #else // HAVE_SMALL
- // Unrolled versions
- #define seq_4(seq) \
- seq ## 0, \
- seq ## 1, \
- seq ## 2, \
- seq ## 3
- #define seq_6(seq) \
- seq ## 0, \
- seq ## 1, \
- seq ## 2, \
- seq ## 3, \
- seq ## 4, \
- seq ## 5
- #define seq_8(seq) \
- seq ## 0, \
- seq ## 1, \
- seq ## 2, \
- seq ## 3, \
- seq ## 4, \
- seq ## 5, \
- seq ## 6, \
- seq ## 7
- #define seq_len(seq) \
- seq ## _CHOICE, \
- seq ## _LOW0, \
- seq ## _LOW1, \
- seq ## _LOW2, \
- seq ## _CHOICE2, \
- seq ## _MID0, \
- seq ## _MID1, \
- seq ## _MID2, \
- seq ## _HIGH0, \
- seq ## _HIGH1, \
- seq ## _HIGH2, \
- seq ## _HIGH3, \
- seq ## _HIGH4, \
- seq ## _HIGH5, \
- seq ## _HIGH6, \
- seq ## _HIGH7
- #define len_decode(target, ld, pos_state, seq) \
- do { \
- symbol = 1; \
- case seq ## _CHOICE: \
- rc_if_0(ld.choice, seq ## _CHOICE) { \
- rc_update_0(ld.choice); \
- rc_bit_case(ld.low[pos_state][symbol], 0, 0, seq ## _LOW0); \
- rc_bit_case(ld.low[pos_state][symbol], 0, 0, seq ## _LOW1); \
- rc_bit_case(ld.low[pos_state][symbol], 0, 0, seq ## _LOW2); \
- target = symbol - LEN_LOW_SYMBOLS + MATCH_LEN_MIN; \
- } else { \
- rc_update_1(ld.choice); \
- case seq ## _CHOICE2: \
- rc_if_0(ld.choice2, seq ## _CHOICE2) { \
- rc_update_0(ld.choice2); \
- rc_bit_case(ld.mid[pos_state][symbol], 0, 0, \
- seq ## _MID0); \
- rc_bit_case(ld.mid[pos_state][symbol], 0, 0, \
- seq ## _MID1); \
- rc_bit_case(ld.mid[pos_state][symbol], 0, 0, \
- seq ## _MID2); \
- target = symbol - LEN_MID_SYMBOLS \
- + MATCH_LEN_MIN + LEN_LOW_SYMBOLS; \
- } else { \
- rc_update_1(ld.choice2); \
- rc_bit_case(ld.high[symbol], 0, 0, seq ## _HIGH0); \
- rc_bit_case(ld.high[symbol], 0, 0, seq ## _HIGH1); \
- rc_bit_case(ld.high[symbol], 0, 0, seq ## _HIGH2); \
- rc_bit_case(ld.high[symbol], 0, 0, seq ## _HIGH3); \
- rc_bit_case(ld.high[symbol], 0, 0, seq ## _HIGH4); \
- rc_bit_case(ld.high[symbol], 0, 0, seq ## _HIGH5); \
- rc_bit_case(ld.high[symbol], 0, 0, seq ## _HIGH6); \
- rc_bit_case(ld.high[symbol], 0, 0, seq ## _HIGH7); \
- target = symbol - LEN_HIGH_SYMBOLS \
- + MATCH_LEN_MIN \
- + LEN_LOW_SYMBOLS + LEN_MID_SYMBOLS; \
- } \
- } \
- } while (0)
- #endif // HAVE_SMALL
- /// Length decoder probabilities; see comments in lzma_common.h.
- typedef struct {
- probability choice;
- probability choice2;
- probability low[POS_STATES_MAX][LEN_LOW_SYMBOLS];
- probability mid[POS_STATES_MAX][LEN_MID_SYMBOLS];
- probability high[LEN_HIGH_SYMBOLS];
- } lzma_length_decoder;
- struct lzma_coder_s {
- ///////////////////
- // Probabilities //
- ///////////////////
- /// Literals; see comments in lzma_common.h.
- probability literal[LITERAL_CODERS_MAX][LITERAL_CODER_SIZE];
- /// If 1, it's a match. Otherwise it's a single 8-bit literal.
- probability is_match[STATES][POS_STATES_MAX];
- /// If 1, it's a repeated match. The distance is one of rep0 .. rep3.
- probability is_rep[STATES];
- /// If 0, distance of a repeated match is rep0.
- /// Otherwise check is_rep1.
- probability is_rep0[STATES];
- /// If 0, distance of a repeated match is rep1.
- /// Otherwise check is_rep2.
- probability is_rep1[STATES];
- /// If 0, distance of a repeated match is rep2. Otherwise it is rep3.
- probability is_rep2[STATES];
- /// If 1, the repeated match has length of one byte. Otherwise
- /// the length is decoded from rep_len_decoder.
- probability is_rep0_long[STATES][POS_STATES_MAX];
- /// Probability tree for the highest two bits of the match distance.
- /// There is a separate probability tree for match lengths of
- /// 2 (i.e. MATCH_LEN_MIN), 3, 4, and [5, 273].
- probability pos_slot[LEN_TO_POS_STATES][POS_SLOTS];
- /// Probability trees for additional bits for match distance when the
- /// distance is in the range [4, 127].
- probability pos_special[FULL_DISTANCES - END_POS_MODEL_INDEX];
- /// Probability tree for the lowest four bits of a match distance
- /// that is equal to or greater than 128.
- probability pos_align[ALIGN_TABLE_SIZE];
- /// Length of a normal match
- lzma_length_decoder match_len_decoder;
- /// Length of a repeated match
- lzma_length_decoder rep_len_decoder;
- ///////////////////
- // Decoder state //
- ///////////////////
- // Range coder
- lzma_range_decoder rc;
- // Types of the most recently seen LZMA symbols
- lzma_lzma_state state;
- uint32_t rep0; ///< Distance of the latest match
- uint32_t rep1; ///< Distance of second latest match
- uint32_t rep2; ///< Distance of third latest match
- uint32_t rep3; ///< Distance of fourth latest match
- uint32_t pos_mask; // (1U << pb) - 1
- uint32_t literal_context_bits;
- uint32_t literal_pos_mask;
- /// Uncompressed size as bytes, or LZMA_VLI_UNKNOWN if end of
- /// payload marker is expected.
- lzma_vli uncompressed_size;
- ////////////////////////////////
- // State of incomplete symbol //
- ////////////////////////////////
- /// Position where to continue the decoder loop
- enum {
- SEQ_NORMALIZE,
- SEQ_IS_MATCH,
- seq_8(SEQ_LITERAL),
- seq_8(SEQ_LITERAL_MATCHED),
- SEQ_LITERAL_WRITE,
- SEQ_IS_REP,
- seq_len(SEQ_MATCH_LEN),
- seq_6(SEQ_POS_SLOT),
- SEQ_POS_MODEL,
- SEQ_DIRECT,
- seq_4(SEQ_ALIGN),
- SEQ_EOPM,
- SEQ_IS_REP0,
- SEQ_SHORTREP,
- SEQ_IS_REP0_LONG,
- SEQ_IS_REP1,
- SEQ_IS_REP2,
- seq_len(SEQ_REP_LEN),
- SEQ_COPY,
- } sequence;
- /// Base of the current probability tree
- probability *probs;
- /// Symbol being decoded. This is also used as an index variable in
- /// bittree decoders: probs[symbol]
- uint32_t symbol;
- /// Used as a loop termination condition on bittree decoders and
- /// direct bits decoder.
- uint32_t limit;
- /// Matched literal decoder: 0x100 or 0 to help avoiding branches.
- /// Bittree reverse decoders: Offset of the next bit: 1 << offset
- uint32_t offset;
- /// If decoding a literal: match byte.
- /// If decoding a match: length of the match.
- uint32_t len;
- };
- static lzma_ret
- lzma_decode(lzma_coder *LZMA_RESTRICT coder, lzma_dict *LZMA_RESTRICT dictptr,
- const uint8_t *LZMA_RESTRICT in,
- size_t *LZMA_RESTRICT in_pos, size_t in_size)
- {
- ///////////////
- // Variables //
- ///////////////
- // Making local copies of often-used variables improves both
- // speed and readability.
- lzma_dict dict = *dictptr;
- const size_t dict_start = dict.pos;
- // Range decoder
- rc_to_local(coder->rc, *in_pos);
- // State
- uint32_t state = coder->state;
- uint32_t rep0 = coder->rep0;
- uint32_t rep1 = coder->rep1;
- uint32_t rep2 = coder->rep2;
- uint32_t rep3 = coder->rep3;
- const uint32_t pos_mask = coder->pos_mask;
- // These variables are actually needed only if we last time ran
- // out of input in the middle of the decoder loop.
- probability *probs = coder->probs;
- uint32_t symbol = coder->symbol;
- uint32_t limit = coder->limit;
- uint32_t offset = coder->offset;
- uint32_t len = coder->len;
- const uint32_t literal_pos_mask = coder->literal_pos_mask;
- const uint32_t literal_context_bits = coder->literal_context_bits;
- // Temporary variables
- uint32_t pos_state = dict.pos & pos_mask;
- lzma_ret ret = LZMA_OK;
- // If uncompressed size is known, there must be no end of payload
- // marker.
- const bool no_eopm = coder->uncompressed_size
- != LZMA_VLI_UNKNOWN;
- if (no_eopm && coder->uncompressed_size < dict.limit - dict.pos)
- dict.limit = dict.pos + (size_t)(coder->uncompressed_size);
- ////////////////////
- // Initialization //
- ////////////////////
- if (!rc_read_init(&coder->rc, in, in_pos, in_size))
- return LZMA_OK;
- rc = coder->rc;
- rc_in_pos = *in_pos;
- // The main decoder loop. The "switch" is used to restart the decoder at
- // correct location. Once restarted, the "switch" is no longer used.
- switch (coder->sequence)
- while (true) {
- // Calculate new pos_state. This is skipped on the first loop
- // since we already calculated it when setting up the local
- // variables.
- pos_state = dict.pos & pos_mask;
- case SEQ_NORMALIZE:
- case SEQ_IS_MATCH:
- if (unlikely(no_eopm && dict.pos == dict.limit))
- break;
- rc_if_0(coder->is_match[state][pos_state], SEQ_IS_MATCH) {
- static const lzma_lzma_state next_state[] = {
- STATE_LIT_LIT,
- STATE_LIT_LIT,
- STATE_LIT_LIT,
- STATE_LIT_LIT,
- STATE_MATCH_LIT_LIT,
- STATE_REP_LIT_LIT,
- STATE_SHORTREP_LIT_LIT,
- STATE_MATCH_LIT,
- STATE_REP_LIT,
- STATE_SHORTREP_LIT,
- STATE_MATCH_LIT,
- STATE_REP_LIT
- };
- rc_update_0(coder->is_match[state][pos_state]);
- // It's a literal i.e. a single 8-bit byte.
- probs = literal_subcoder(coder->literal,
- literal_context_bits, literal_pos_mask,
- dict.pos, dict_get(&dict, 0));
- symbol = 1;
- if (is_literal_state(state)) {
- // Decode literal without match byte.
- #ifdef HAVE_SMALL
- case SEQ_LITERAL:
- do {
- rc_bit(probs[symbol], , , SEQ_LITERAL);
- } while (symbol < (1 << 8));
- #else
- rc_bit_case(probs[symbol], 0, 0, SEQ_LITERAL0);
- rc_bit_case(probs[symbol], 0, 0, SEQ_LITERAL1);
- rc_bit_case(probs[symbol], 0, 0, SEQ_LITERAL2);
- rc_bit_case(probs[symbol], 0, 0, SEQ_LITERAL3);
- rc_bit_case(probs[symbol], 0, 0, SEQ_LITERAL4);
- rc_bit_case(probs[symbol], 0, 0, SEQ_LITERAL5);
- rc_bit_case(probs[symbol], 0, 0, SEQ_LITERAL6);
- rc_bit_case(probs[symbol], 0, 0, SEQ_LITERAL7);
- #endif
- } else {
- #ifndef HAVE_SMALL
- uint32_t match_bit;
- uint32_t subcoder_index;
- #endif
- // Decode literal with match byte.
- //
- // We store the byte we compare against
- // ("match byte") to "len" to minimize the
- // number of variables we need to store
- // between decoder calls.
- len = dict_get(&dict, rep0) << 1;
- // The usage of "offset" allows omitting some
- // branches, which should give tiny speed
- // improvement on some CPUs. "offset" gets
- // set to zero if match_bit didn't match.
- offset = 0x100;
- #ifdef HAVE_SMALL
- case SEQ_LITERAL_MATCHED:
- do {
- const uint32_t match_bit
- = len & offset;
- const uint32_t subcoder_index
- = offset + match_bit
- + symbol;
- rc_bit(probs[subcoder_index],
- offset &= ~match_bit,
- offset &= match_bit,
- SEQ_LITERAL_MATCHED);
- // It seems to be faster to do this
- // here instead of putting it to the
- // beginning of the loop and then
- // putting the "case" in the middle
- // of the loop.
- len <<= 1;
- } while (symbol < (1 << 8));
- #else
- // Unroll the loop.
- # define d(seq) \
- case seq: \
- match_bit = len & offset; \
- subcoder_index = offset + match_bit + symbol; \
- rc_bit(probs[subcoder_index], \
- offset &= ~match_bit, \
- offset &= match_bit, \
- seq)
- d(SEQ_LITERAL_MATCHED0);
- len <<= 1;
- d(SEQ_LITERAL_MATCHED1);
- len <<= 1;
- d(SEQ_LITERAL_MATCHED2);
- len <<= 1;
- d(SEQ_LITERAL_MATCHED3);
- len <<= 1;
- d(SEQ_LITERAL_MATCHED4);
- len <<= 1;
- d(SEQ_LITERAL_MATCHED5);
- len <<= 1;
- d(SEQ_LITERAL_MATCHED6);
- len <<= 1;
- d(SEQ_LITERAL_MATCHED7);
- # undef d
- #endif
- }
- //update_literal(state);
- // Use a lookup table to update to literal state,
- // since compared to other state updates, this would
- // need two branches.
- state = next_state[state];
- case SEQ_LITERAL_WRITE:
- if (unlikely(dict_put(&dict, symbol))) {
- coder->sequence = SEQ_LITERAL_WRITE;
- goto out;
- }
- continue;
- }
- // Instead of a new byte we are going to get a byte range
- // (distance and length) which will be repeated from our
- // output history.
- rc_update_1(coder->is_match[state][pos_state]);
- case SEQ_IS_REP:
- rc_if_0(coder->is_rep[state], SEQ_IS_REP) {
- // Not a repeated match
- rc_update_0(coder->is_rep[state]);
- update_match(state);
- // The latest three match distances are kept in
- // memory in case there are repeated matches.
- rep3 = rep2;
- rep2 = rep1;
- rep1 = rep0;
- // Decode the length of the match.
- len_decode(len, coder->match_len_decoder,
- pos_state, SEQ_MATCH_LEN);
- // Prepare to decode the highest two bits of the
- // match distance.
- probs = coder->pos_slot[get_len_to_pos_state(len)];
- symbol = 1;
- #ifdef HAVE_SMALL
- case SEQ_POS_SLOT:
- do {
- rc_bit(probs[symbol], , , SEQ_POS_SLOT);
- } while (symbol < POS_SLOTS);
- #else
- rc_bit_case(probs[symbol], 0, 0, SEQ_POS_SLOT0);
- rc_bit_case(probs[symbol], 0, 0, SEQ_POS_SLOT1);
- rc_bit_case(probs[symbol], 0, 0, SEQ_POS_SLOT2);
- rc_bit_case(probs[symbol], 0, 0, SEQ_POS_SLOT3);
- rc_bit_case(probs[symbol], 0, 0, SEQ_POS_SLOT4);
- rc_bit_case(probs[symbol], 0, 0, SEQ_POS_SLOT5);
- #endif
- // Get rid of the highest bit that was needed for
- // indexing of the probability array.
- symbol -= POS_SLOTS;
- assert(symbol <= 63);
- if (symbol < START_POS_MODEL_INDEX) {
- // Match distances [0, 3] have only two bits.
- rep0 = symbol;
- } else {
- // Decode the lowest [1, 29] bits of
- // the match distance.
- limit = (symbol >> 1) - 1;
- assert(limit >= 1 && limit <= 30);
- rep0 = 2 + (symbol & 1);
- if (symbol < END_POS_MODEL_INDEX) {
- // Prepare to decode the low bits for
- // a distance of [4, 127].
- assert(limit <= 5);
- rep0 <<= limit;
- assert(rep0 <= 96);
- // -1 is fine, because we start
- // decoding at probs[1], not probs[0].
- // NOTE: This violates the C standard,
- // since we are doing pointer
- // arithmetic past the beginning of
- // the array.
- assert((int32_t)(rep0 - symbol - 1)
- >= -1);
- assert((int32_t)(rep0 - symbol - 1)
- <= 82);
- probs = coder->pos_special + rep0
- - symbol - 1;
- symbol = 1;
- offset = 0;
- case SEQ_POS_MODEL:
- #ifdef HAVE_SMALL
- do {
- rc_bit(probs[symbol], ,
- rep0 += 1 << offset,
- SEQ_POS_MODEL);
- } while (++offset < limit);
- #else
- switch (limit) {
- case 5:
- assert(offset == 0);
- rc_bit(probs[symbol], 0,
- rep0 += 1,
- SEQ_POS_MODEL);
- ++offset;
- --limit;
- case 4:
- rc_bit(probs[symbol], 0,
- rep0 += 1 << offset,
- SEQ_POS_MODEL);
- ++offset;
- --limit;
- case 3:
- rc_bit(probs[symbol], 0,
- rep0 += 1 << offset,
- SEQ_POS_MODEL);
- ++offset;
- --limit;
- case 2:
- rc_bit(probs[symbol], 0,
- rep0 += 1 << offset,
- SEQ_POS_MODEL);
- ++offset;
- --limit;
- case 1:
- // We need "symbol" only for
- // indexing the probability
- // array, thus we can use
- // rc_bit_last() here to omit
- // the unneeded updating of
- // "symbol".
- rc_bit_last(probs[symbol], 0,
- rep0 += 1 << offset,
- SEQ_POS_MODEL);
- }
- #endif
- } else {
- // The distance is >= 128. Decode the
- // lower bits without probabilities
- // except the lowest four bits.
- assert(symbol >= 14);
- assert(limit >= 6);
- limit -= ALIGN_BITS;
- assert(limit >= 2);
- case SEQ_DIRECT:
- // Not worth manual unrolling
- do {
- rc_direct(rep0, SEQ_DIRECT);
- } while (--limit > 0);
- // Decode the lowest four bits using
- // probabilities.
- rep0 <<= ALIGN_BITS;
- symbol = 1;
- #ifdef HAVE_SMALL
- offset = 0;
- case SEQ_ALIGN:
- do {
- rc_bit(coder->pos_align[
- symbol], ,
- rep0 += 1 << offset,
- SEQ_ALIGN);
- } while (++offset < ALIGN_BITS);
- #else
- case SEQ_ALIGN0:
- rc_bit(coder->pos_align[symbol], 0,
- rep0 += 1, SEQ_ALIGN0);
- case SEQ_ALIGN1:
- rc_bit(coder->pos_align[symbol], 0,
- rep0 += 2, SEQ_ALIGN1);
- case SEQ_ALIGN2:
- rc_bit(coder->pos_align[symbol], 0,
- rep0 += 4, SEQ_ALIGN2);
- case SEQ_ALIGN3:
- // Like in SEQ_POS_MODEL, we don't
- // need "symbol" for anything else
- // than indexing the probability array.
- rc_bit_last(coder->pos_align[symbol], 0,
- rep0 += 8, SEQ_ALIGN3);
- #endif
- if (rep0 == UINT32_MAX) {
- // End of payload marker was
- // found. It must not be
- // present if uncompressed
- // size is known.
- if (coder->uncompressed_size
- != LZMA_VLI_UNKNOWN) {
- ret = LZMA_DATA_ERROR;
- goto out;
- }
- case SEQ_EOPM:
- // LZMA1 stream with
- // end-of-payload marker.
- rc_normalize(SEQ_EOPM);
- ret = LZMA_STREAM_END;
- goto out;
- }
- }
- }
- // Validate the distance we just decoded.
- if (unlikely(!dict_is_distance_valid(&dict, rep0))) {
- ret = LZMA_DATA_ERROR;
- goto out;
- }
- } else {
- rc_update_1(coder->is_rep[state]);
- // Repeated match
- //
- // The match distance is a value that we have had
- // earlier. The latest four match distances are
- // available as rep0, rep1, rep2 and rep3. We will
- // now decode which of them is the new distance.
- //
- // There cannot be a match if we haven't produced
- // any output, so check that first.
- if (unlikely(!dict_is_distance_valid(&dict, 0))) {
- ret = LZMA_DATA_ERROR;
- goto out;
- }
- case SEQ_IS_REP0:
- rc_if_0(coder->is_rep0[state], SEQ_IS_REP0) {
- rc_update_0(coder->is_rep0[state]);
- // The distance is rep0.
- case SEQ_IS_REP0_LONG:
- rc_if_0(coder->is_rep0_long[state][pos_state],
- SEQ_IS_REP0_LONG) {
- rc_update_0(coder->is_rep0_long[
- state][pos_state]);
- update_short_rep(state);
- case SEQ_SHORTREP:
- if (unlikely(dict_put(&dict, dict_get(
- &dict, rep0)))) {
- coder->sequence = SEQ_SHORTREP;
- goto out;
- }
- continue;
- }
- // Repeating more than one byte at
- // distance of rep0.
- rc_update_1(coder->is_rep0_long[
- state][pos_state]);
- } else {
- rc_update_1(coder->is_rep0[state]);
- case SEQ_IS_REP1:
- // The distance is rep1, rep2 or rep3. Once
- // we find out which one of these three, it
- // is stored to rep0 and rep1, rep2 and rep3
- // are updated accordingly.
- rc_if_0(coder->is_rep1[state], SEQ_IS_REP1) {
- uint32_t distance;
- rc_update_0(coder->is_rep1[state]);
- distance = rep1;
- rep1 = rep0;
- rep0 = distance;
- } else {
- rc_update_1(coder->is_rep1[state]);
- case SEQ_IS_REP2:
- rc_if_0(coder->is_rep2[state],
- SEQ_IS_REP2) {
- uint32_t distance;
- rc_update_0(coder->is_rep2[
- state]);
- distance = rep2;
- rep2 = rep1;
- rep1 = rep0;
- rep0 = distance;
- } else {
- uint32_t distance;
- rc_update_1(coder->is_rep2[
- state]);
- distance = rep3;
- rep3 = rep2;
- rep2 = rep1;
- rep1 = rep0;
- rep0 = distance;
- }
- }
- }
- update_long_rep(state);
- // Decode the length of the repeated match.
- len_decode(len, coder->rep_len_decoder,
- pos_state, SEQ_REP_LEN);
- }
- /////////////////////////////////
- // Repeat from history buffer. //
- /////////////////////////////////
- // The length is always between these limits. There is no way
- // to trigger the algorithm to set len outside this range.
- assert(len >= MATCH_LEN_MIN);
- assert(len <= MATCH_LEN_MAX);
- case SEQ_COPY:
- // Repeat len bytes from distance of rep0.
- if (unlikely(dict_repeat(&dict, rep0, &len))) {
- coder->sequence = SEQ_COPY;
- goto out;
- }
- }
- rc_normalize(SEQ_NORMALIZE);
- coder->sequence = SEQ_IS_MATCH;
- out:
- // Save state
- // NOTE: Must not copy dict.limit.
- dictptr->pos = dict.pos;
- dictptr->full = dict.full;
- rc_from_local(coder->rc, *in_pos);
- coder->state = state;
- coder->rep0 = rep0;
- coder->rep1 = rep1;
- coder->rep2 = rep2;
- coder->rep3 = rep3;
- coder->probs = probs;
- coder->symbol = symbol;
- coder->limit = limit;
- coder->offset = offset;
- coder->len = len;
- // Update the remaining amount of uncompressed data if uncompressed
- // size was known.
- if (coder->uncompressed_size != LZMA_VLI_UNKNOWN) {
- coder->uncompressed_size -= dict.pos - dict_start;
- // Since there cannot be end of payload marker if the
- // uncompressed size was known, we check here if we
- // finished decoding.
- if (coder->uncompressed_size == 0 && ret == LZMA_OK
- && coder->sequence != SEQ_NORMALIZE)
- ret = coder->sequence == SEQ_IS_MATCH
- ? LZMA_STREAM_END : LZMA_DATA_ERROR;
- }
- // We can do an additional check in the range decoder to catch some
- // corrupted files.
- if (ret == LZMA_STREAM_END) {
- if (!rc_is_finished(coder->rc))
- ret = LZMA_DATA_ERROR;
- // Reset the range decoder so that it is ready to reinitialize
- // for a new LZMA2 chunk.
- rc_reset(coder->rc);
- }
- return ret;
- }
- static void
- lzma_decoder_uncompressed(lzma_coder *coder, lzma_vli uncompressed_size)
- {
- coder->uncompressed_size = uncompressed_size;
- }
- /*
- extern void
- lzma_lzma_decoder_uncompressed(void *coder_ptr, lzma_vli uncompressed_size)
- {
- // This is hack.
- (*(lzma_coder **)(coder))->uncompressed_size = uncompressed_size;
- }
- */
- static void
- lzma_decoder_reset(lzma_coder *coder, const void *opt)
- {
- uint32_t i, j, pos_state;
- uint32_t num_pos_states;
- const lzma_options_lzma *options = opt;
- // NOTE: We assume that lc/lp/pb are valid since they were
- // successfully decoded with lzma_lzma_decode_properties().
- // Calculate pos_mask. We don't need pos_bits as is for anything.
- coder->pos_mask = (1U << options->pb) - 1;
- // Initialize the literal decoder.
- literal_init(coder->literal, options->lc, options->lp);
- coder->literal_context_bits = options->lc;
- coder->literal_pos_mask = (1U << options->lp) - 1;
- // State
- coder->state = STATE_LIT_LIT;
- coder->rep0 = 0;
- coder->rep1 = 0;
- coder->rep2 = 0;
- coder->rep3 = 0;
- coder->pos_mask = (1U << options->pb) - 1;
- // Range decoder
- rc_reset(coder->rc);
- // Bit and bittree decoders
- for (i = 0; i < STATES; ++i) {
- for (j = 0; j <= coder->pos_mask; ++j) {
- bit_reset(coder->is_match[i][j]);
- bit_reset(coder->is_rep0_long[i][j]);
- }
- bit_reset(coder->is_rep[i]);
- bit_reset(coder->is_rep0[i]);
- bit_reset(coder->is_rep1[i]);
- bit_reset(coder->is_rep2[i]);
- }
- for (i = 0; i < LEN_TO_POS_STATES; ++i)
- bittree_reset(coder->pos_slot[i], POS_SLOT_BITS);
- for (i = 0; i < FULL_DISTANCES - END_POS_MODEL_INDEX; ++i)
- bit_reset(coder->pos_special[i]);
- bittree_reset(coder->pos_align, ALIGN_BITS);
- // Len decoders (also bit/bittree)
- num_pos_states = 1U << options->pb;
- bit_reset(coder->match_len_decoder.choice);
- bit_reset(coder->match_len_decoder.choice2);
- bit_reset(coder->rep_len_decoder.choice);
- bit_reset(coder->rep_len_decoder.choice2);
- for (pos_state = 0; pos_state < num_pos_states; ++pos_state) {
- bittree_reset(coder->match_len_decoder.low[pos_state],
- LEN_LOW_BITS);
- bittree_reset(coder->match_len_decoder.mid[pos_state],
- LEN_MID_BITS);
- bittree_reset(coder->rep_len_decoder.low[pos_state],
- LEN_LOW_BITS);
- bittree_reset(coder->rep_len_decoder.mid[pos_state],
- LEN_MID_BITS);
- }
- bittree_reset(coder->match_len_decoder.high, LEN_HIGH_BITS);
- bittree_reset(coder->rep_len_decoder.high, LEN_HIGH_BITS);
- coder->sequence = SEQ_IS_MATCH;
- coder->probs = NULL;
- coder->symbol = 0;
- coder->limit = 0;
- coder->offset = 0;
- coder->len = 0;
- return;
- }
- extern lzma_ret
- lzma_lzma_decoder_create(lzma_lz_decoder *lz, lzma_allocator *allocator,
- const void *opt, lzma_lz_options *lz_options)
- {
- const lzma_options_lzma *options = opt;
- if (lz->coder == NULL) {
- lz->coder = lzma_alloc(sizeof(lzma_coder), allocator);
- if (lz->coder == NULL)
- return LZMA_MEM_ERROR;
- lz->code = &lzma_decode;
- lz->reset = &lzma_decoder_reset;
- lz->set_uncompressed = &lzma_decoder_uncompressed;
- }
- // All dictionary sizes are OK here. LZ decoder will take care of
- // the special cases.
- lz_options->dict_size = options->dict_size;
- lz_options->preset_dict = options->preset_dict;
- lz_options->preset_dict_size = options->preset_dict_size;
- return LZMA_OK;
- }
- /// Allocate and initialize LZMA decoder. This is used only via LZ
- /// initialization (lzma_lzma_decoder_init() passes function pointer to
- /// the LZ initialization).
- static lzma_ret
- lzma_decoder_init(lzma_lz_decoder *lz, lzma_allocator *allocator,
- const void *options, lzma_lz_options *lz_options)
- {
- if (!is_lclppb_valid(options))
- return LZMA_PROG_ERROR;
- return_if_error(lzma_lzma_decoder_create(
- lz, allocator, options, lz_options));
- lzma_decoder_reset(lz->coder, options);
- lzma_decoder_uncompressed(lz->coder, LZMA_VLI_UNKNOWN);
- return LZMA_OK;
- }
- extern lzma_ret
- lzma_lzma_decoder_init(lzma_next_coder *next, lzma_allocator *allocator,
- const lzma_filter_info *filters)
- {
- // LZMA can only be the last filter in the chain. This is enforced
- // by the raw_decoder initialization.
- assert(filters[1].init == NULL);
- return lzma_lz_decoder_init(next, allocator, filters,
- &lzma_decoder_init);
- }
- extern bool
- lzma_lzma_lclppb_decode(lzma_options_lzma *options, uint8_t byte)
- {
- if (byte > (4 * 5 + 4) * 9 + 8)
- return true;
- // See the file format specification to understand this.
- options->pb = byte / (9 * 5);
- byte -= options->pb * 9 * 5;
- options->lp = byte / 9;
- options->lc = byte - options->lp * 9;
- return options->lc + options->lp > LZMA_LCLP_MAX;
- }
- extern uint64_t
- lzma_lzma_decoder_memusage_nocheck(const void *options)
- {
- const lzma_options_lzma *const opt = options;
- return sizeof(lzma_coder) + lzma_lz_decoder_memusage(opt->dict_size);
- }
- extern uint64_t
- lzma_lzma_decoder_memusage(const void *options)
- {
- if (!is_lclppb_valid(options))
- return UINT64_MAX;
- return lzma_lzma_decoder_memusage_nocheck(options);
- }
- extern lzma_ret
- lzma_lzma_props_decode(void **options, lzma_allocator *allocator,
- const uint8_t *props, size_t props_size)
- {
- lzma_options_lzma *opt;
- if (props_size != 5)
- return LZMA_OPTIONS_ERROR;
- opt = lzma_alloc(sizeof(lzma_options_lzma), allocator);
- if (opt == NULL)
- return LZMA_MEM_ERROR;
- if (lzma_lzma_lclppb_decode(opt, props[0]))
- goto error;
- // All dictionary sizes are accepted, including zero. LZ decoder
- // will automatically use a dictionary at least a few KiB even if
- // a smaller dictionary is requested.
- opt->dict_size = unaligned_read32le(props + 1);
- opt->preset_dict = NULL;
- opt->preset_dict_size = 0;
- *options = opt;
- return LZMA_OK;
- error:
- lzma_free(opt, allocator);
- return LZMA_OPTIONS_ERROR;
- }
|