lzma2_decoder.c 7.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305
  1. ///////////////////////////////////////////////////////////////////////////////
  2. //
  3. /// \file lzma2_decoder.c
  4. /// \brief LZMA2 decoder
  5. ///
  6. // Authors: Igor Pavlov
  7. // Lasse Collin
  8. //
  9. // This file has been put into the public domain.
  10. // You can do whatever you want with this file.
  11. //
  12. ///////////////////////////////////////////////////////////////////////////////
  13. #include "lzma2_decoder.h"
  14. #include "lz_decoder.h"
  15. #include "lzma_decoder.h"
  16. struct lzma_coder_s {
  17. enum sequence {
  18. SEQ_CONTROL,
  19. SEQ_UNCOMPRESSED_1,
  20. SEQ_UNCOMPRESSED_2,
  21. SEQ_COMPRESSED_0,
  22. SEQ_COMPRESSED_1,
  23. SEQ_PROPERTIES,
  24. SEQ_LZMA,
  25. SEQ_COPY,
  26. } sequence;
  27. /// Sequence after the size fields have been decoded.
  28. enum sequence next_sequence;
  29. /// LZMA decoder
  30. lzma_lz_decoder lzma;
  31. /// Uncompressed size of LZMA chunk
  32. size_t uncompressed_size;
  33. /// Compressed size of the chunk (naturally equals to uncompressed
  34. /// size of uncompressed chunk)
  35. size_t compressed_size;
  36. /// True if properties are needed. This is false before the
  37. /// first LZMA chunk.
  38. bool need_properties;
  39. /// True if dictionary reset is needed. This is false before the
  40. /// first chunk (LZMA or uncompressed).
  41. bool need_dictionary_reset;
  42. lzma_options_lzma options;
  43. };
  44. static lzma_ret
  45. lzma2_decode(lzma_coder *LZMA_RESTRICT coder, lzma_dict *LZMA_RESTRICT dict,
  46. const uint8_t *LZMA_RESTRICT in, size_t *LZMA_RESTRICT in_pos,
  47. size_t in_size)
  48. {
  49. // With SEQ_LZMA it is possible that no new input is needed to do
  50. // some progress. The rest of the sequences assume that there is
  51. // at least one byte of input.
  52. while (*in_pos < in_size || coder->sequence == SEQ_LZMA)
  53. switch (coder->sequence) {
  54. case SEQ_CONTROL: {
  55. const uint32_t control = in[*in_pos];
  56. ++*in_pos;
  57. // End marker
  58. if (control == 0x00)
  59. return LZMA_STREAM_END;
  60. if (control >= 0xE0 || control == 1) {
  61. // Dictionary reset implies that next LZMA chunk has
  62. // to set new properties.
  63. coder->need_properties = true;
  64. coder->need_dictionary_reset = true;
  65. } else if (coder->need_dictionary_reset) {
  66. return LZMA_DATA_ERROR;
  67. }
  68. if (control >= 0x80) {
  69. // LZMA chunk. The highest five bits of the
  70. // uncompressed size are taken from the control byte.
  71. coder->uncompressed_size = (control & 0x1F) << 16;
  72. coder->sequence = SEQ_UNCOMPRESSED_1;
  73. // See if there are new properties or if we need to
  74. // reset the state.
  75. if (control >= 0xC0) {
  76. // When there are new properties, state reset
  77. // is done at SEQ_PROPERTIES.
  78. coder->need_properties = false;
  79. coder->next_sequence = SEQ_PROPERTIES;
  80. } else if (coder->need_properties) {
  81. return LZMA_DATA_ERROR;
  82. } else {
  83. coder->next_sequence = SEQ_LZMA;
  84. // If only state reset is wanted with old
  85. // properties, do the resetting here for
  86. // simplicity.
  87. if (control >= 0xA0)
  88. coder->lzma.reset(coder->lzma.coder,
  89. &coder->options);
  90. }
  91. } else {
  92. // Invalid control values
  93. if (control > 2)
  94. return LZMA_DATA_ERROR;
  95. // It's uncompressed chunk
  96. coder->sequence = SEQ_COMPRESSED_0;
  97. coder->next_sequence = SEQ_COPY;
  98. }
  99. if (coder->need_dictionary_reset) {
  100. // Finish the dictionary reset and let the caller
  101. // flush the dictionary to the actual output buffer.
  102. coder->need_dictionary_reset = false;
  103. dict_reset(dict);
  104. return LZMA_OK;
  105. }
  106. break;
  107. }
  108. case SEQ_UNCOMPRESSED_1:
  109. coder->uncompressed_size += (uint32_t)(in[(*in_pos)++]) << 8;
  110. coder->sequence = SEQ_UNCOMPRESSED_2;
  111. break;
  112. case SEQ_UNCOMPRESSED_2:
  113. coder->uncompressed_size += in[(*in_pos)++] + 1;
  114. coder->sequence = SEQ_COMPRESSED_0;
  115. coder->lzma.set_uncompressed(coder->lzma.coder,
  116. coder->uncompressed_size);
  117. break;
  118. case SEQ_COMPRESSED_0:
  119. coder->compressed_size = (uint32_t)(in[(*in_pos)++]) << 8;
  120. coder->sequence = SEQ_COMPRESSED_1;
  121. break;
  122. case SEQ_COMPRESSED_1:
  123. coder->compressed_size += in[(*in_pos)++] + 1;
  124. coder->sequence = coder->next_sequence;
  125. break;
  126. case SEQ_PROPERTIES:
  127. if (lzma_lzma_lclppb_decode(&coder->options, in[(*in_pos)++]))
  128. return LZMA_DATA_ERROR;
  129. coder->lzma.reset(coder->lzma.coder, &coder->options);
  130. coder->sequence = SEQ_LZMA;
  131. break;
  132. case SEQ_LZMA: {
  133. // Store the start offset so that we can update
  134. // coder->compressed_size later.
  135. const size_t in_start = *in_pos;
  136. // Decode from in[] to *dict.
  137. const lzma_ret ret = coder->lzma.code(coder->lzma.coder,
  138. dict, in, in_pos, in_size);
  139. // Validate and update coder->compressed_size.
  140. const size_t in_used = *in_pos - in_start;
  141. if (in_used > coder->compressed_size)
  142. return LZMA_DATA_ERROR;
  143. coder->compressed_size -= in_used;
  144. // Return if we didn't finish the chunk, or an error occurred.
  145. if (ret != LZMA_STREAM_END)
  146. return ret;
  147. // The LZMA decoder must have consumed the whole chunk now.
  148. // We don't need to worry about uncompressed size since it
  149. // is checked by the LZMA decoder.
  150. if (coder->compressed_size != 0)
  151. return LZMA_DATA_ERROR;
  152. coder->sequence = SEQ_CONTROL;
  153. break;
  154. }
  155. case SEQ_COPY: {
  156. // Copy from input to the dictionary as is.
  157. dict_write(dict, in, in_pos, in_size, &coder->compressed_size);
  158. if (coder->compressed_size != 0)
  159. return LZMA_OK;
  160. coder->sequence = SEQ_CONTROL;
  161. break;
  162. }
  163. default:
  164. assert(0);
  165. return LZMA_PROG_ERROR;
  166. }
  167. return LZMA_OK;
  168. }
  169. static void
  170. lzma2_decoder_end(lzma_coder *coder, lzma_allocator *allocator)
  171. {
  172. assert(coder->lzma.end == NULL);
  173. lzma_free(coder->lzma.coder, allocator);
  174. lzma_free(coder, allocator);
  175. return;
  176. }
  177. static lzma_ret
  178. lzma2_decoder_init(lzma_lz_decoder *lz, lzma_allocator *allocator,
  179. const void *opt, lzma_lz_options *lz_options)
  180. {
  181. const lzma_options_lzma *options = opt;
  182. if (lz->coder == NULL) {
  183. lz->coder = lzma_alloc(sizeof(lzma_coder), allocator);
  184. if (lz->coder == NULL)
  185. return LZMA_MEM_ERROR;
  186. lz->code = &lzma2_decode;
  187. lz->end = &lzma2_decoder_end;
  188. lz->coder->lzma = LZMA_LZ_DECODER_INIT;
  189. }
  190. lz->coder->sequence = SEQ_CONTROL;
  191. lz->coder->need_properties = true;
  192. lz->coder->need_dictionary_reset = options->preset_dict == NULL
  193. || options->preset_dict_size == 0;
  194. return lzma_lzma_decoder_create(&lz->coder->lzma,
  195. allocator, options, lz_options);
  196. }
  197. extern lzma_ret
  198. lzma_lzma2_decoder_init(lzma_next_coder *next, lzma_allocator *allocator,
  199. const lzma_filter_info *filters)
  200. {
  201. // LZMA2 can only be the last filter in the chain. This is enforced
  202. // by the raw_decoder initialization.
  203. assert(filters[1].init == NULL);
  204. return lzma_lz_decoder_init(next, allocator, filters,
  205. &lzma2_decoder_init);
  206. }
  207. extern uint64_t
  208. lzma_lzma2_decoder_memusage(const void *options)
  209. {
  210. return sizeof(lzma_coder)
  211. + lzma_lzma_decoder_memusage_nocheck(options);
  212. }
  213. extern lzma_ret
  214. lzma_lzma2_props_decode(void **options, lzma_allocator *allocator,
  215. const uint8_t *props, size_t props_size)
  216. {
  217. lzma_options_lzma *opt;
  218. if (props_size != 1)
  219. return LZMA_OPTIONS_ERROR;
  220. // Check that reserved bits are unset.
  221. if (props[0] & 0xC0)
  222. return LZMA_OPTIONS_ERROR;
  223. // Decode the dictionary size.
  224. if (props[0] > 40)
  225. return LZMA_OPTIONS_ERROR;
  226. opt = lzma_alloc(sizeof(lzma_options_lzma), allocator);
  227. if (opt == NULL)
  228. return LZMA_MEM_ERROR;
  229. if (props[0] == 40) {
  230. opt->dict_size = UINT32_MAX;
  231. } else {
  232. opt->dict_size = 2 | (props[0] & 1);
  233. opt->dict_size <<= props[0] / 2 + 11;
  234. }
  235. opt->preset_dict = NULL;
  236. opt->preset_dict_size = 0;
  237. *options = opt;
  238. return LZMA_OK;
  239. }