index_decoder.c 8.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347
  1. ///////////////////////////////////////////////////////////////////////////////
  2. //
  3. /// \file index_decoder.c
  4. /// \brief Decodes the Index field
  5. //
  6. // Author: Lasse Collin
  7. //
  8. // This file has been put into the public domain.
  9. // You can do whatever you want with this file.
  10. //
  11. ///////////////////////////////////////////////////////////////////////////////
  12. #include "index.h"
  13. #include "check.h"
  14. struct lzma_coder_s {
  15. enum {
  16. SEQ_INDICATOR,
  17. SEQ_COUNT,
  18. SEQ_MEMUSAGE,
  19. SEQ_UNPADDED,
  20. SEQ_UNCOMPRESSED,
  21. SEQ_PADDING_INIT,
  22. SEQ_PADDING,
  23. SEQ_CRC32,
  24. } sequence;
  25. /// Memory usage limit
  26. uint64_t memlimit;
  27. /// Target Index
  28. lzma_index *index;
  29. /// Pointer give by the application, which is set after
  30. /// successful decoding.
  31. lzma_index **index_ptr;
  32. /// Number of Records left to decode.
  33. lzma_vli count;
  34. /// The most recent Unpadded Size field
  35. lzma_vli unpadded_size;
  36. /// The most recent Uncompressed Size field
  37. lzma_vli uncompressed_size;
  38. /// Position in integers
  39. size_t pos;
  40. /// CRC32 of the List of Records field
  41. uint32_t crc32;
  42. };
  43. static lzma_ret
  44. index_decode(lzma_coder *coder, lzma_allocator *allocator,
  45. const uint8_t *LZMA_RESTRICT in, size_t *LZMA_RESTRICT in_pos,
  46. size_t in_size,
  47. uint8_t *LZMA_RESTRICT out lzma_attribute((__unused__)),
  48. size_t *LZMA_RESTRICT out_pos lzma_attribute((__unused__)),
  49. size_t out_size lzma_attribute((__unused__)),
  50. lzma_action action lzma_attribute((__unused__)))
  51. {
  52. // Similar optimization as in index_encoder.c
  53. const size_t in_start = *in_pos;
  54. lzma_ret ret = LZMA_OK;
  55. while (*in_pos < in_size)
  56. switch (coder->sequence) {
  57. case SEQ_INDICATOR:
  58. // Return LZMA_DATA_ERROR instead of e.g. LZMA_PROG_ERROR or
  59. // LZMA_FORMAT_ERROR, because a typical usage case for Index
  60. // decoder is when parsing the Stream backwards. If seeking
  61. // backward from the Stream Footer gives us something that
  62. // doesn't begin with Index Indicator, the file is considered
  63. // corrupt, not "programming error" or "unrecognized file
  64. // format". One could argue that the application should
  65. // verify the Index Indicator before trying to decode the
  66. // Index, but well, I suppose it is simpler this way.
  67. if (in[(*in_pos)++] != 0x00)
  68. return LZMA_DATA_ERROR;
  69. coder->sequence = SEQ_COUNT;
  70. break;
  71. case SEQ_COUNT:
  72. ret = lzma_vli_decode(&coder->count, &coder->pos,
  73. in, in_pos, in_size);
  74. if (ret != LZMA_STREAM_END)
  75. goto out;
  76. coder->pos = 0;
  77. coder->sequence = SEQ_MEMUSAGE;
  78. // Fall through
  79. case SEQ_MEMUSAGE:
  80. if (lzma_index_memusage(1, coder->count) > coder->memlimit) {
  81. ret = LZMA_MEMLIMIT_ERROR;
  82. goto out;
  83. }
  84. // Tell the Index handling code how many Records this
  85. // Index has to allow it to allocate memory more efficiently.
  86. lzma_index_prealloc(coder->index, coder->count);
  87. ret = LZMA_OK;
  88. coder->sequence = coder->count == 0
  89. ? SEQ_PADDING_INIT : SEQ_UNPADDED;
  90. break;
  91. case SEQ_UNPADDED:
  92. case SEQ_UNCOMPRESSED: {
  93. lzma_vli *size = coder->sequence == SEQ_UNPADDED
  94. ? &coder->unpadded_size
  95. : &coder->uncompressed_size;
  96. ret = lzma_vli_decode(size, &coder->pos,
  97. in, in_pos, in_size);
  98. if (ret != LZMA_STREAM_END)
  99. goto out;
  100. ret = LZMA_OK;
  101. coder->pos = 0;
  102. if (coder->sequence == SEQ_UNPADDED) {
  103. // Validate that encoded Unpadded Size isn't too small
  104. // or too big.
  105. if (coder->unpadded_size < UNPADDED_SIZE_MIN
  106. || coder->unpadded_size
  107. > UNPADDED_SIZE_MAX)
  108. return LZMA_DATA_ERROR;
  109. coder->sequence = SEQ_UNCOMPRESSED;
  110. } else {
  111. // Add the decoded Record to the Index.
  112. return_if_error(lzma_index_append(
  113. coder->index, allocator,
  114. coder->unpadded_size,
  115. coder->uncompressed_size));
  116. // Check if this was the last Record.
  117. coder->sequence = --coder->count == 0
  118. ? SEQ_PADDING_INIT
  119. : SEQ_UNPADDED;
  120. }
  121. break;
  122. }
  123. case SEQ_PADDING_INIT:
  124. coder->pos = lzma_index_padding_size(coder->index);
  125. coder->sequence = SEQ_PADDING;
  126. // Fall through
  127. case SEQ_PADDING:
  128. if (coder->pos > 0) {
  129. --coder->pos;
  130. if (in[(*in_pos)++] != 0x00)
  131. return LZMA_DATA_ERROR;
  132. break;
  133. }
  134. // Finish the CRC32 calculation.
  135. coder->crc32 = lzma_crc32(in + in_start,
  136. *in_pos - in_start, coder->crc32);
  137. coder->sequence = SEQ_CRC32;
  138. // Fall through
  139. case SEQ_CRC32:
  140. do {
  141. if (*in_pos == in_size)
  142. return LZMA_OK;
  143. if (((coder->crc32 >> (coder->pos * 8)) & 0xFF)
  144. != in[(*in_pos)++])
  145. return LZMA_DATA_ERROR;
  146. } while (++coder->pos < 4);
  147. // Decoding was successful, now we can let the application
  148. // see the decoded Index.
  149. *coder->index_ptr = coder->index;
  150. // Make index NULL so we don't free it unintentionally.
  151. coder->index = NULL;
  152. return LZMA_STREAM_END;
  153. default:
  154. assert(0);
  155. return LZMA_PROG_ERROR;
  156. }
  157. out:
  158. // Update the CRC32,
  159. coder->crc32 = lzma_crc32(in + in_start,
  160. *in_pos - in_start, coder->crc32);
  161. return ret;
  162. }
  163. static void
  164. index_decoder_end(lzma_coder *coder, lzma_allocator *allocator)
  165. {
  166. lzma_index_end(coder->index, allocator);
  167. lzma_free(coder, allocator);
  168. return;
  169. }
  170. static lzma_ret
  171. index_decoder_memconfig(lzma_coder *coder, uint64_t *memusage,
  172. uint64_t *old_memlimit, uint64_t new_memlimit)
  173. {
  174. *memusage = lzma_index_memusage(1, coder->count);
  175. *old_memlimit = coder->memlimit;
  176. if (new_memlimit != 0) {
  177. if (new_memlimit < *memusage)
  178. return LZMA_MEMLIMIT_ERROR;
  179. coder->memlimit = new_memlimit;
  180. }
  181. return LZMA_OK;
  182. }
  183. static lzma_ret
  184. index_decoder_reset(lzma_coder *coder, lzma_allocator *allocator,
  185. lzma_index **i, uint64_t memlimit)
  186. {
  187. // Remember the pointer given by the application. We will set it
  188. // to point to the decoded Index only if decoding is successful.
  189. // Before that, keep it NULL so that applications can always safely
  190. // pass it to lzma_index_end() no matter did decoding succeed or not.
  191. coder->index_ptr = i;
  192. *i = NULL;
  193. // We always allocate a new lzma_index.
  194. coder->index = lzma_index_init(allocator);
  195. if (coder->index == NULL)
  196. return LZMA_MEM_ERROR;
  197. // Initialize the rest.
  198. coder->sequence = SEQ_INDICATOR;
  199. coder->memlimit = memlimit;
  200. coder->count = 0; // Needs to be initialized due to _memconfig().
  201. coder->pos = 0;
  202. coder->crc32 = 0;
  203. return LZMA_OK;
  204. }
  205. static lzma_ret
  206. index_decoder_init(lzma_next_coder *next, lzma_allocator *allocator,
  207. lzma_index **i, uint64_t memlimit)
  208. {
  209. lzma_next_coder_init(&index_decoder_init, next, allocator);
  210. if (i == NULL || memlimit == 0)
  211. return LZMA_PROG_ERROR;
  212. if (next->coder == NULL) {
  213. next->coder = lzma_alloc(sizeof(lzma_coder), allocator);
  214. if (next->coder == NULL)
  215. return LZMA_MEM_ERROR;
  216. next->code = &index_decode;
  217. next->end = &index_decoder_end;
  218. next->memconfig = &index_decoder_memconfig;
  219. next->coder->index = NULL;
  220. } else {
  221. lzma_index_end(next->coder->index, allocator);
  222. }
  223. return index_decoder_reset(next->coder, allocator, i, memlimit);
  224. }
  225. extern LZMA_API(lzma_ret)
  226. lzma_index_decoder(lzma_stream *strm, lzma_index **i, uint64_t memlimit)
  227. {
  228. lzma_next_strm_init2(index_decoder_init, strm, i, memlimit);
  229. strm->internal->supported_actions[LZMA_RUN] = true;
  230. strm->internal->supported_actions[LZMA_FINISH] = true;
  231. return LZMA_OK;
  232. }
  233. extern LZMA_API(lzma_ret)
  234. lzma_index_buffer_decode(
  235. lzma_index **i, uint64_t *memlimit, lzma_allocator *allocator,
  236. const uint8_t *in, size_t *in_pos, size_t in_size)
  237. {
  238. lzma_coder coder;
  239. lzma_ret ret;
  240. // Store the input start position so that we can restore it in case
  241. // of an error.
  242. const size_t in_start = *in_pos;
  243. // Sanity checks
  244. if (i == NULL || memlimit == NULL
  245. || in == NULL || in_pos == NULL || *in_pos > in_size)
  246. return LZMA_PROG_ERROR;
  247. // Initialize the decoder.
  248. return_if_error(index_decoder_reset(&coder, allocator, i, *memlimit));
  249. // Do the actual decoding.
  250. ret = index_decode(&coder, allocator, in, in_pos, in_size,
  251. NULL, NULL, 0, LZMA_RUN);
  252. if (ret == LZMA_STREAM_END) {
  253. ret = LZMA_OK;
  254. } else {
  255. // Something went wrong, free the Index structure and restore
  256. // the input position.
  257. lzma_index_end(coder.index, allocator);
  258. *in_pos = in_start;
  259. if (ret == LZMA_OK) {
  260. // The input is truncated or otherwise corrupt.
  261. // Use LZMA_DATA_ERROR instead of LZMA_BUF_ERROR
  262. // like lzma_vli_decode() does in single-call mode.
  263. ret = LZMA_DATA_ERROR;
  264. } else if (ret == LZMA_MEMLIMIT_ERROR) {
  265. // Tell the caller how much memory would have
  266. // been needed.
  267. *memlimit = lzma_index_memusage(1, coder.count);
  268. }
  269. }
  270. return ret;
  271. }