stream_decoder.c 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459
  1. ///////////////////////////////////////////////////////////////////////////////
  2. //
  3. /// \file stream_decoder.c
  4. /// \brief Decodes .xz Streams
  5. //
  6. // Author: Lasse Collin
  7. //
  8. // This file has been put into the public domain.
  9. // You can do whatever you want with this file.
  10. //
  11. ///////////////////////////////////////////////////////////////////////////////
  12. #include "stream_decoder.h"
  13. #include "block_decoder.h"
  14. struct lzma_coder_s {
  15. enum {
  16. SEQ_STREAM_HEADER,
  17. SEQ_BLOCK_HEADER,
  18. SEQ_BLOCK,
  19. SEQ_INDEX,
  20. SEQ_STREAM_FOOTER,
  21. SEQ_STREAM_PADDING,
  22. } sequence;
  23. /// Block or Metadata decoder. This takes little memory and the same
  24. /// data structure can be used to decode every Block Header, so it's
  25. /// a good idea to have a separate lzma_next_coder structure for it.
  26. lzma_next_coder block_decoder;
  27. /// Block options decoded by the Block Header decoder and used by
  28. /// the Block decoder.
  29. lzma_block block_options;
  30. /// Stream Flags from Stream Header
  31. lzma_stream_flags stream_flags;
  32. /// Index is hashed so that it can be compared to the sizes of Blocks
  33. /// with O(1) memory usage.
  34. lzma_index_hash *index_hash;
  35. /// Memory usage limit
  36. uint64_t memlimit;
  37. /// Amount of memory actually needed (only an estimate)
  38. uint64_t memusage;
  39. /// If true, LZMA_NO_CHECK is returned if the Stream has
  40. /// no integrity check.
  41. bool tell_no_check;
  42. /// If true, LZMA_UNSUPPORTED_CHECK is returned if the Stream has
  43. /// an integrity check that isn't supported by this liblzma build.
  44. bool tell_unsupported_check;
  45. /// If true, LZMA_GET_CHECK is returned after decoding Stream Header.
  46. bool tell_any_check;
  47. /// If true, we will decode concatenated Streams that possibly have
  48. /// Stream Padding between or after them. LZMA_STREAM_END is returned
  49. /// once the application isn't giving us any new input, and we aren't
  50. /// in the middle of a Stream, and possible Stream Padding is a
  51. /// multiple of four bytes.
  52. bool concatenated;
  53. /// When decoding concatenated Streams, this is true as long as we
  54. /// are decoding the first Stream. This is needed to avoid misleading
  55. /// LZMA_FORMAT_ERROR in case the later Streams don't have valid magic
  56. /// bytes.
  57. bool first_stream;
  58. /// Write position in buffer[] and position in Stream Padding
  59. size_t pos;
  60. /// Buffer to hold Stream Header, Block Header, and Stream Footer.
  61. /// Block Header has biggest maximum size.
  62. uint8_t buffer[LZMA_BLOCK_HEADER_SIZE_MAX];
  63. };
  64. static lzma_ret
  65. stream_decoder_reset(lzma_coder *coder, lzma_allocator *allocator)
  66. {
  67. // Initialize the Index hash used to verify the Index.
  68. coder->index_hash = lzma_index_hash_init(coder->index_hash, allocator);
  69. if (coder->index_hash == NULL)
  70. return LZMA_MEM_ERROR;
  71. // Reset the rest of the variables.
  72. coder->sequence = SEQ_STREAM_HEADER;
  73. coder->pos = 0;
  74. return LZMA_OK;
  75. }
  76. static lzma_ret
  77. stream_decode(lzma_coder *coder, lzma_allocator *allocator,
  78. const uint8_t *LZMA_RESTRICT in, size_t *LZMA_RESTRICT in_pos,
  79. size_t in_size, uint8_t *LZMA_RESTRICT out,
  80. size_t *LZMA_RESTRICT out_pos, size_t out_size, lzma_action action)
  81. {
  82. // When decoding the actual Block, it may be able to produce more
  83. // output even if we don't give it any new input.
  84. while (true)
  85. switch (coder->sequence) {
  86. case SEQ_STREAM_HEADER: {
  87. lzma_ret ret;
  88. // Copy the Stream Header to the internal buffer.
  89. lzma_bufcpy(in, in_pos, in_size, coder->buffer, &coder->pos,
  90. LZMA_STREAM_HEADER_SIZE);
  91. // Return if we didn't get the whole Stream Header yet.
  92. if (coder->pos < LZMA_STREAM_HEADER_SIZE)
  93. return LZMA_OK;
  94. coder->pos = 0;
  95. // Decode the Stream Header.
  96. ret = lzma_stream_header_decode(
  97. &coder->stream_flags, coder->buffer);
  98. if (ret != LZMA_OK)
  99. return ret == LZMA_FORMAT_ERROR && !coder->first_stream
  100. ? LZMA_DATA_ERROR : ret;
  101. // If we are decoding concatenated Streams, and the later
  102. // Streams have invalid Header Magic Bytes, we give
  103. // LZMA_DATA_ERROR instead of LZMA_FORMAT_ERROR.
  104. coder->first_stream = false;
  105. // Copy the type of the Check so that Block Header and Block
  106. // decoders see it.
  107. coder->block_options.check = coder->stream_flags.check;
  108. // Even if we return LZMA_*_CHECK below, we want
  109. // to continue from Block Header decoding.
  110. coder->sequence = SEQ_BLOCK_HEADER;
  111. // Detect if there's no integrity check or if it is
  112. // unsupported if those were requested by the application.
  113. if (coder->tell_no_check && coder->stream_flags.check
  114. == LZMA_CHECK_NONE)
  115. return LZMA_NO_CHECK;
  116. if (coder->tell_unsupported_check
  117. && !lzma_check_is_supported(
  118. coder->stream_flags.check))
  119. return LZMA_UNSUPPORTED_CHECK;
  120. if (coder->tell_any_check)
  121. return LZMA_GET_CHECK;
  122. }
  123. // Fall through
  124. case SEQ_BLOCK_HEADER: {
  125. lzma_filter filters[LZMA_FILTERS_MAX + 1];
  126. uint64_t memusage;
  127. lzma_ret ret;
  128. size_t i;
  129. if (*in_pos >= in_size)
  130. return LZMA_OK;
  131. if (coder->pos == 0) {
  132. // Detect if it's Index.
  133. if (in[*in_pos] == 0x00) {
  134. coder->sequence = SEQ_INDEX;
  135. break;
  136. }
  137. // Calculate the size of the Block Header. Note that
  138. // Block Header decoder wants to see this byte too
  139. // so don't advance *in_pos.
  140. coder->block_options.header_size
  141. = lzma_block_header_size_decode(
  142. in[*in_pos]);
  143. }
  144. // Copy the Block Header to the internal buffer.
  145. lzma_bufcpy(in, in_pos, in_size, coder->buffer, &coder->pos,
  146. coder->block_options.header_size);
  147. // Return if we didn't get the whole Block Header yet.
  148. if (coder->pos < coder->block_options.header_size)
  149. return LZMA_OK;
  150. coder->pos = 0;
  151. // Version 0 is currently the only possible version.
  152. coder->block_options.version = 0;
  153. // Set up a buffer to hold the filter chain. Block Header
  154. // decoder will initialize all members of this array so
  155. // we don't need to do it here.
  156. coder->block_options.filters = filters;
  157. // Decode the Block Header.
  158. return_if_error(lzma_block_header_decode(&coder->block_options,
  159. allocator, coder->buffer));
  160. // Check the memory usage limit.
  161. memusage = lzma_raw_decoder_memusage(filters);
  162. if (memusage == UINT64_MAX) {
  163. // One or more unknown Filter IDs.
  164. ret = LZMA_OPTIONS_ERROR;
  165. } else {
  166. // Now we can set coder->memusage since we know that
  167. // the filter chain is valid. We don't want
  168. // lzma_memusage() to return UINT64_MAX in case of
  169. // invalid filter chain.
  170. coder->memusage = memusage;
  171. if (memusage > coder->memlimit) {
  172. // The chain would need too much memory.
  173. ret = LZMA_MEMLIMIT_ERROR;
  174. } else {
  175. // Memory usage is OK.
  176. // Initialize the Block decoder.
  177. ret = lzma_block_decoder_init(
  178. &coder->block_decoder,
  179. allocator,
  180. &coder->block_options);
  181. }
  182. }
  183. // Free the allocated filter options since they are needed
  184. // only to initialize the Block decoder.
  185. for (i = 0; i < LZMA_FILTERS_MAX; ++i)
  186. lzma_free(filters[i].options, allocator);
  187. coder->block_options.filters = NULL;
  188. // Check if memory usage calculation and Block enocoder
  189. // initialization succeeded.
  190. if (ret != LZMA_OK)
  191. return ret;
  192. coder->sequence = SEQ_BLOCK;
  193. }
  194. // Fall through
  195. case SEQ_BLOCK: {
  196. const lzma_ret ret = coder->block_decoder.code(
  197. coder->block_decoder.coder, allocator,
  198. in, in_pos, in_size, out, out_pos, out_size,
  199. action);
  200. if (ret != LZMA_STREAM_END)
  201. return ret;
  202. // Block decoded successfully. Add the new size pair to
  203. // the Index hash.
  204. return_if_error(lzma_index_hash_append(coder->index_hash,
  205. lzma_block_unpadded_size(
  206. &coder->block_options),
  207. coder->block_options.uncompressed_size));
  208. coder->sequence = SEQ_BLOCK_HEADER;
  209. break;
  210. }
  211. case SEQ_INDEX: {
  212. lzma_ret ret;
  213. // If we don't have any input, don't call
  214. // lzma_index_hash_decode() since it would return
  215. // LZMA_BUF_ERROR, which we must not do here.
  216. if (*in_pos >= in_size)
  217. return LZMA_OK;
  218. // Decode the Index and compare it to the hash calculated
  219. // from the sizes of the Blocks (if any).
  220. ret = lzma_index_hash_decode(coder->index_hash,
  221. in, in_pos, in_size);
  222. if (ret != LZMA_STREAM_END)
  223. return ret;
  224. coder->sequence = SEQ_STREAM_FOOTER;
  225. }
  226. // Fall through
  227. case SEQ_STREAM_FOOTER: {
  228. lzma_stream_flags footer_flags;
  229. lzma_ret ret;
  230. // Copy the Stream Footer to the internal buffer.
  231. lzma_bufcpy(in, in_pos, in_size, coder->buffer, &coder->pos,
  232. LZMA_STREAM_HEADER_SIZE);
  233. // Return if we didn't get the whole Stream Footer yet.
  234. if (coder->pos < LZMA_STREAM_HEADER_SIZE)
  235. return LZMA_OK;
  236. coder->pos = 0;
  237. // Decode the Stream Footer. The decoder gives
  238. // LZMA_FORMAT_ERROR if the magic bytes don't match,
  239. // so convert that return code to LZMA_DATA_ERROR.
  240. ret = lzma_stream_footer_decode(
  241. &footer_flags, coder->buffer);
  242. if (ret != LZMA_OK)
  243. return ret == LZMA_FORMAT_ERROR
  244. ? LZMA_DATA_ERROR : ret;
  245. // Check that Index Size stored in the Stream Footer matches
  246. // the real size of the Index field.
  247. if (lzma_index_hash_size(coder->index_hash)
  248. != footer_flags.backward_size)
  249. return LZMA_DATA_ERROR;
  250. // Compare that the Stream Flags fields are identical in
  251. // both Stream Header and Stream Footer.
  252. return_if_error(lzma_stream_flags_compare(
  253. &coder->stream_flags, &footer_flags));
  254. if (!coder->concatenated)
  255. return LZMA_STREAM_END;
  256. coder->sequence = SEQ_STREAM_PADDING;
  257. }
  258. // Fall through
  259. case SEQ_STREAM_PADDING:
  260. assert(coder->concatenated);
  261. // Skip over possible Stream Padding.
  262. while (true) {
  263. if (*in_pos >= in_size) {
  264. // Unless LZMA_FINISH was used, we cannot
  265. // know if there's more input coming later.
  266. if (action != LZMA_FINISH)
  267. return LZMA_OK;
  268. // Stream Padding must be a multiple of
  269. // four bytes.
  270. return coder->pos == 0
  271. ? LZMA_STREAM_END
  272. : LZMA_DATA_ERROR;
  273. }
  274. // If the byte is not zero, it probably indicates
  275. // beginning of a new Stream (or the file is corrupt).
  276. if (in[*in_pos] != 0x00)
  277. break;
  278. ++*in_pos;
  279. coder->pos = (coder->pos + 1) & 3;
  280. }
  281. // Stream Padding must be a multiple of four bytes (empty
  282. // Stream Padding is OK).
  283. if (coder->pos != 0) {
  284. ++*in_pos;
  285. return LZMA_DATA_ERROR;
  286. }
  287. // Prepare to decode the next Stream.
  288. return_if_error(stream_decoder_reset(coder, allocator));
  289. break;
  290. default:
  291. assert(0);
  292. return LZMA_PROG_ERROR;
  293. }
  294. // Never reached
  295. }
  296. static void
  297. stream_decoder_end(lzma_coder *coder, lzma_allocator *allocator)
  298. {
  299. lzma_next_end(&coder->block_decoder, allocator);
  300. lzma_index_hash_end(coder->index_hash, allocator);
  301. lzma_free(coder, allocator);
  302. return;
  303. }
  304. static lzma_check
  305. stream_decoder_get_check(const lzma_coder *coder)
  306. {
  307. return coder->stream_flags.check;
  308. }
  309. static lzma_ret
  310. stream_decoder_memconfig(lzma_coder *coder, uint64_t *memusage,
  311. uint64_t *old_memlimit, uint64_t new_memlimit)
  312. {
  313. *memusage = coder->memusage;
  314. *old_memlimit = coder->memlimit;
  315. if (new_memlimit != 0) {
  316. if (new_memlimit < coder->memusage)
  317. return LZMA_MEMLIMIT_ERROR;
  318. coder->memlimit = new_memlimit;
  319. }
  320. return LZMA_OK;
  321. }
  322. extern lzma_ret
  323. lzma_stream_decoder_init(lzma_next_coder *next, lzma_allocator *allocator,
  324. uint64_t memlimit, uint32_t flags)
  325. {
  326. lzma_next_coder_init(&lzma_stream_decoder_init, next, allocator);
  327. if (memlimit == 0)
  328. return LZMA_PROG_ERROR;
  329. if (flags & ~LZMA_SUPPORTED_FLAGS)
  330. return LZMA_OPTIONS_ERROR;
  331. if (next->coder == NULL) {
  332. next->coder = lzma_alloc(sizeof(lzma_coder), allocator);
  333. if (next->coder == NULL)
  334. return LZMA_MEM_ERROR;
  335. next->code = &stream_decode;
  336. next->end = &stream_decoder_end;
  337. next->get_check = &stream_decoder_get_check;
  338. next->memconfig = &stream_decoder_memconfig;
  339. next->coder->block_decoder = LZMA_NEXT_CODER_INIT;
  340. next->coder->index_hash = NULL;
  341. }
  342. next->coder->memlimit = memlimit;
  343. next->coder->memusage = LZMA_MEMUSAGE_BASE;
  344. next->coder->tell_no_check = (flags & LZMA_TELL_NO_CHECK) != 0;
  345. next->coder->tell_unsupported_check
  346. = (flags & LZMA_TELL_UNSUPPORTED_CHECK) != 0;
  347. next->coder->tell_any_check = (flags & LZMA_TELL_ANY_CHECK) != 0;
  348. next->coder->concatenated = (flags & LZMA_CONCATENATED) != 0;
  349. next->coder->first_stream = true;
  350. return stream_decoder_reset(next->coder, allocator);
  351. }
  352. extern LZMA_API(lzma_ret)
  353. lzma_stream_decoder(lzma_stream *strm, uint64_t memlimit, uint32_t flags)
  354. {
  355. lzma_next_strm_init2(lzma_stream_decoder_init, strm, memlimit, flags);
  356. strm->internal->supported_actions[LZMA_RUN] = true;
  357. strm->internal->supported_actions[LZMA_FINISH] = true;
  358. return LZMA_OK;
  359. }