lz_encoder.c 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594
  1. ///////////////////////////////////////////////////////////////////////////////
  2. //
  3. /// \file lz_encoder.c
  4. /// \brief LZ in window
  5. ///
  6. // Authors: Igor Pavlov
  7. // Lasse Collin
  8. //
  9. // This file has been put into the public domain.
  10. // You can do whatever you want with this file.
  11. //
  12. ///////////////////////////////////////////////////////////////////////////////
  13. #include "lz_encoder.h"
  14. #include "lz_encoder_hash.h"
  15. // See lz_encoder_hash.h. This is a bit hackish but avoids making
  16. // endianness a conditional in makefiles.
  17. #if defined(WORDS_BIGENDIAN) && !defined(HAVE_SMALL)
  18. # include "lz_encoder_hash_table.h"
  19. #endif
  20. struct lzma_coder_s {
  21. /// LZ-based encoder e.g. LZMA
  22. lzma_lz_encoder lz;
  23. /// History buffer and match finder
  24. lzma_mf mf;
  25. /// Next coder in the chain
  26. lzma_next_coder next;
  27. };
  28. /// \brief Moves the data in the input window to free space for new data
  29. ///
  30. /// mf->buffer is a sliding input window, which keeps mf->keep_size_before
  31. /// bytes of input history available all the time. Now and then we need to
  32. /// "slide" the buffer to make space for the new data to the end of the
  33. /// buffer. At the same time, data older than keep_size_before is dropped.
  34. ///
  35. static void
  36. move_window(lzma_mf *mf)
  37. {
  38. uint32_t move_offset;
  39. size_t move_size;
  40. // Align the move to a multiple of 16 bytes. Some LZ-based encoders
  41. // like LZMA use the lowest bits of mf->read_pos to know the
  42. // alignment of the uncompressed data. We also get better speed
  43. // for memmove() with aligned buffers.
  44. assert(mf->read_pos > mf->keep_size_before);
  45. move_offset = (mf->read_pos - mf->keep_size_before) & ~UINT32_C(15);
  46. assert(mf->write_pos > move_offset);
  47. move_size = mf->write_pos - move_offset;
  48. assert(move_offset + move_size <= mf->size);
  49. memmove(mf->buffer, mf->buffer + move_offset, move_size);
  50. mf->offset += move_offset;
  51. mf->read_pos -= move_offset;
  52. mf->read_limit -= move_offset;
  53. mf->write_pos -= move_offset;
  54. return;
  55. }
  56. /// \brief Tries to fill the input window (mf->buffer)
  57. ///
  58. /// If we are the last encoder in the chain, our input data is in in[].
  59. /// Otherwise we call the next filter in the chain to process in[] and
  60. /// write its output to mf->buffer.
  61. ///
  62. /// This function must not be called once it has returned LZMA_STREAM_END.
  63. ///
  64. static lzma_ret
  65. fill_window(lzma_coder *coder, lzma_allocator *allocator, const uint8_t *in,
  66. size_t *in_pos, size_t in_size, lzma_action action)
  67. {
  68. size_t write_pos;
  69. lzma_ret ret;
  70. assert(coder->mf.read_pos <= coder->mf.write_pos);
  71. // Move the sliding window if needed.
  72. if (coder->mf.read_pos >= coder->mf.size - coder->mf.keep_size_after)
  73. move_window(&coder->mf);
  74. // Maybe this is ugly, but lzma_mf uses uint32_t for most things
  75. // (which I find cleanest), but we need size_t here when filling
  76. // the history window.
  77. write_pos = coder->mf.write_pos;
  78. if (coder->next.code == NULL) {
  79. // Not using a filter, simply memcpy() as much as possible.
  80. lzma_bufcpy(in, in_pos, in_size, coder->mf.buffer,
  81. &write_pos, coder->mf.size);
  82. ret = action != LZMA_RUN && *in_pos == in_size
  83. ? LZMA_STREAM_END : LZMA_OK;
  84. } else {
  85. ret = coder->next.code(coder->next.coder, allocator,
  86. in, in_pos, in_size,
  87. coder->mf.buffer, &write_pos,
  88. coder->mf.size, action);
  89. }
  90. coder->mf.write_pos = write_pos;
  91. // If end of stream has been reached or flushing completed, we allow
  92. // the encoder to process all the input (that is, read_pos is allowed
  93. // to reach write_pos). Otherwise we keep keep_size_after bytes
  94. // available as prebuffer.
  95. if (ret == LZMA_STREAM_END) {
  96. assert(*in_pos == in_size);
  97. ret = LZMA_OK;
  98. coder->mf.action = action;
  99. coder->mf.read_limit = coder->mf.write_pos;
  100. } else if (coder->mf.write_pos > coder->mf.keep_size_after) {
  101. // This needs to be done conditionally, because if we got
  102. // only little new input, there may be too little input
  103. // to do any encoding yet.
  104. coder->mf.read_limit = coder->mf.write_pos
  105. - coder->mf.keep_size_after;
  106. }
  107. // Restart the match finder after finished LZMA_SYNC_FLUSH.
  108. if (coder->mf.pending > 0
  109. && coder->mf.read_pos < coder->mf.read_limit) {
  110. // Match finder may update coder->pending and expects it to
  111. // start from zero, so use a temporary variable.
  112. const size_t pending = coder->mf.pending;
  113. coder->mf.pending = 0;
  114. // Rewind read_pos so that the match finder can hash
  115. // the pending bytes.
  116. assert(coder->mf.read_pos >= pending);
  117. coder->mf.read_pos -= pending;
  118. // Call the skip function directly instead of using
  119. // mf_skip(), since we don't want to touch mf->read_ahead.
  120. coder->mf.skip(&coder->mf, pending);
  121. }
  122. return ret;
  123. }
  124. static lzma_ret
  125. lz_encode(lzma_coder *coder, lzma_allocator *allocator,
  126. const uint8_t *LZMA_RESTRICT in, size_t *LZMA_RESTRICT in_pos,
  127. size_t in_size,
  128. uint8_t *LZMA_RESTRICT out, size_t *LZMA_RESTRICT out_pos,
  129. size_t out_size, lzma_action action)
  130. {
  131. while (*out_pos < out_size
  132. && (*in_pos < in_size || action != LZMA_RUN)) {
  133. lzma_ret ret;
  134. // Read more data to coder->mf.buffer if needed.
  135. if (coder->mf.action == LZMA_RUN && coder->mf.read_pos
  136. >= coder->mf.read_limit)
  137. return_if_error(fill_window(coder, allocator,
  138. in, in_pos, in_size, action));
  139. // Encode
  140. ret = coder->lz.code(coder->lz.coder,
  141. &coder->mf, out, out_pos, out_size);
  142. if (ret != LZMA_OK) {
  143. // Setting this to LZMA_RUN for cases when we are
  144. // flushing. It doesn't matter when finishing or if
  145. // an error occurred.
  146. coder->mf.action = LZMA_RUN;
  147. return ret;
  148. }
  149. }
  150. return LZMA_OK;
  151. }
  152. static bool
  153. lz_encoder_prepare(lzma_mf *mf, lzma_allocator *allocator,
  154. const lzma_lz_options *lz_options)
  155. {
  156. bool is_bt;
  157. uint32_t new_count;
  158. uint32_t reserve;
  159. uint32_t old_size;
  160. uint32_t hash_bytes;
  161. uint32_t hs;
  162. uint32_t old_count;
  163. // For now, the dictionary size is limited to 1.5 GiB. This may grow
  164. // in the future if needed, but it needs a little more work than just
  165. // changing this check.
  166. if (lz_options->dict_size < LZMA_DICT_SIZE_MIN
  167. || lz_options->dict_size
  168. > (UINT32_C(1) << 30) + (UINT32_C(1) << 29)
  169. || lz_options->nice_len > lz_options->match_len_max)
  170. return true;
  171. mf->keep_size_before = lz_options->before_size + lz_options->dict_size;
  172. mf->keep_size_after = lz_options->after_size
  173. + lz_options->match_len_max;
  174. // To avoid constant memmove()s, allocate some extra space. Since
  175. // memmove()s become more expensive when the size of the buffer
  176. // increases, we reserve more space when a large dictionary is
  177. // used to make the memmove() calls rarer.
  178. //
  179. // This works with dictionaries up to about 3 GiB. If bigger
  180. // dictionary is wanted, some extra work is needed:
  181. // - Several variables in lzma_mf have to be changed from uint32_t
  182. // to size_t.
  183. // - Memory usage calculation needs something too, e.g. use uint64_t
  184. // for mf->size.
  185. reserve = lz_options->dict_size / 2;
  186. if (reserve > (UINT32_C(1) << 30))
  187. reserve /= 2;
  188. reserve += (lz_options->before_size + lz_options->match_len_max
  189. + lz_options->after_size) / 2 + (UINT32_C(1) << 19);
  190. old_size = mf->size;
  191. mf->size = mf->keep_size_before + reserve + mf->keep_size_after;
  192. // Deallocate the old history buffer if it exists but has different
  193. // size than what is needed now.
  194. if (mf->buffer != NULL && old_size != mf->size) {
  195. lzma_free(mf->buffer, allocator);
  196. mf->buffer = NULL;
  197. }
  198. // Match finder options
  199. mf->match_len_max = lz_options->match_len_max;
  200. mf->nice_len = lz_options->nice_len;
  201. // cyclic_size has to stay smaller than 2 Gi. Note that this doesn't
  202. // mean limiting dictionary size to less than 2 GiB. With a match
  203. // finder that uses multibyte resolution (hashes start at e.g. every
  204. // fourth byte), cyclic_size would stay below 2 Gi even when
  205. // dictionary size is greater than 2 GiB.
  206. //
  207. // It would be possible to allow cyclic_size >= 2 Gi, but then we
  208. // would need to be careful to use 64-bit types in various places
  209. // (size_t could do since we would need bigger than 32-bit address
  210. // space anyway). It would also require either zeroing a multigigabyte
  211. // buffer at initialization (waste of time and RAM) or allow
  212. // normalization in lz_encoder_mf.c to access uninitialized
  213. // memory to keep the code simpler. The current way is simple and
  214. // still allows pretty big dictionaries, so I don't expect these
  215. // limits to change.
  216. mf->cyclic_size = lz_options->dict_size + 1;
  217. // Validate the match finder ID and setup the function pointers.
  218. switch (lz_options->match_finder) {
  219. #ifdef HAVE_MF_HC3
  220. case LZMA_MF_HC3:
  221. mf->find = &lzma_mf_hc3_find;
  222. mf->skip = &lzma_mf_hc3_skip;
  223. break;
  224. #endif
  225. #ifdef HAVE_MF_HC4
  226. case LZMA_MF_HC4:
  227. mf->find = &lzma_mf_hc4_find;
  228. mf->skip = &lzma_mf_hc4_skip;
  229. break;
  230. #endif
  231. #ifdef HAVE_MF_BT2
  232. case LZMA_MF_BT2:
  233. mf->find = &lzma_mf_bt2_find;
  234. mf->skip = &lzma_mf_bt2_skip;
  235. break;
  236. #endif
  237. #ifdef HAVE_MF_BT3
  238. case LZMA_MF_BT3:
  239. mf->find = &lzma_mf_bt3_find;
  240. mf->skip = &lzma_mf_bt3_skip;
  241. break;
  242. #endif
  243. #ifdef HAVE_MF_BT4
  244. case LZMA_MF_BT4:
  245. mf->find = &lzma_mf_bt4_find;
  246. mf->skip = &lzma_mf_bt4_skip;
  247. break;
  248. #endif
  249. default:
  250. return true;
  251. }
  252. // Calculate the sizes of mf->hash and mf->son and check that
  253. // nice_len is big enough for the selected match finder.
  254. hash_bytes = lz_options->match_finder & 0x0F;
  255. if (hash_bytes > mf->nice_len)
  256. return true;
  257. is_bt = (lz_options->match_finder & 0x10) != 0;
  258. if (hash_bytes == 2) {
  259. hs = 0xFFFF;
  260. } else {
  261. // Round dictionary size up to the next 2^n - 1 so it can
  262. // be used as a hash mask.
  263. hs = lz_options->dict_size - 1;
  264. hs |= hs >> 1;
  265. hs |= hs >> 2;
  266. hs |= hs >> 4;
  267. hs |= hs >> 8;
  268. hs >>= 1;
  269. hs |= 0xFFFF;
  270. if (hs > (UINT32_C(1) << 24)) {
  271. if (hash_bytes == 3)
  272. hs = (UINT32_C(1) << 24) - 1;
  273. else
  274. hs >>= 1;
  275. }
  276. }
  277. mf->hash_mask = hs;
  278. ++hs;
  279. if (hash_bytes > 2)
  280. hs += HASH_2_SIZE;
  281. if (hash_bytes > 3)
  282. hs += HASH_3_SIZE;
  283. /*
  284. No match finder uses this at the moment.
  285. if (mf->hash_bytes > 4)
  286. hs += HASH_4_SIZE;
  287. */
  288. // If the above code calculating hs is modified, make sure that
  289. // this assertion stays valid (UINT32_MAX / 5 is not strictly the
  290. // exact limit). If it doesn't, you need to calculate that
  291. // hash_size_sum + sons_count cannot overflow.
  292. assert(hs < UINT32_MAX / 5);
  293. old_count = mf->hash_size_sum + mf->sons_count;
  294. mf->hash_size_sum = hs;
  295. mf->sons_count = mf->cyclic_size;
  296. if (is_bt)
  297. mf->sons_count *= 2;
  298. new_count = mf->hash_size_sum + mf->sons_count;
  299. // Deallocate the old hash array if it exists and has different size
  300. // than what is needed now.
  301. if (old_count != new_count) {
  302. lzma_free(mf->hash, allocator);
  303. mf->hash = NULL;
  304. }
  305. // Maximum number of match finder cycles
  306. mf->depth = lz_options->depth;
  307. if (mf->depth == 0) {
  308. if (is_bt)
  309. mf->depth = 16 + mf->nice_len / 2;
  310. else
  311. mf->depth = 4 + mf->nice_len / 4;
  312. }
  313. return false;
  314. }
  315. static bool
  316. lz_encoder_init(lzma_mf *mf, lzma_allocator *allocator,
  317. const lzma_lz_options *lz_options)
  318. {
  319. size_t alloc_count;
  320. // Allocate the history buffer.
  321. if (mf->buffer == NULL) {
  322. mf->buffer = lzma_alloc(mf->size, allocator);
  323. if (mf->buffer == NULL)
  324. return true;
  325. }
  326. // Use cyclic_size as initial mf->offset. This allows
  327. // avoiding a few branches in the match finders. The downside is
  328. // that match finder needs to be normalized more often, which may
  329. // hurt performance with huge dictionaries.
  330. mf->offset = mf->cyclic_size;
  331. mf->read_pos = 0;
  332. mf->read_ahead = 0;
  333. mf->read_limit = 0;
  334. mf->write_pos = 0;
  335. mf->pending = 0;
  336. // Allocate match finder's hash array.
  337. alloc_count = mf->hash_size_sum + mf->sons_count;
  338. #if UINT32_MAX >= SIZE_MAX / 4
  339. // Check for integer overflow. (Huge dictionaries are not
  340. // possible on 32-bit CPU.)
  341. if (alloc_count > SIZE_MAX / sizeof(uint32_t))
  342. return true;
  343. #endif
  344. if (mf->hash == NULL) {
  345. mf->hash = lzma_alloc(alloc_count * sizeof(uint32_t),
  346. allocator);
  347. if (mf->hash == NULL)
  348. return true;
  349. }
  350. mf->son = mf->hash + mf->hash_size_sum;
  351. mf->cyclic_pos = 0;
  352. // Initialize the hash table. Since EMPTY_HASH_VALUE is zero, we
  353. // can use memset().
  354. /*
  355. for (uint32_t i = 0; i < hash_size_sum; ++i)
  356. mf->hash[i] = EMPTY_HASH_VALUE;
  357. */
  358. memzero(mf->hash, (size_t)(mf->hash_size_sum) * sizeof(uint32_t));
  359. // We don't need to initialize mf->son, but not doing that will
  360. // make Valgrind complain in normalization (see normalize() in
  361. // lz_encoder_mf.c).
  362. //
  363. // Skipping this initialization is *very* good when big dictionary is
  364. // used but only small amount of data gets actually compressed: most
  365. // of the mf->hash won't get actually allocated by the kernel, so
  366. // we avoid wasting RAM and improve initialization speed a lot.
  367. //memzero(mf->son, (size_t)(mf->sons_count) * sizeof(uint32_t));
  368. // Handle preset dictionary.
  369. if (lz_options->preset_dict != NULL
  370. && lz_options->preset_dict_size > 0) {
  371. // If the preset dictionary is bigger than the actual
  372. // dictionary, use only the tail.
  373. mf->write_pos = my_min(lz_options->preset_dict_size, mf->size);
  374. memcpy(mf->buffer, lz_options->preset_dict
  375. + lz_options->preset_dict_size - mf->write_pos,
  376. mf->write_pos);
  377. mf->action = LZMA_SYNC_FLUSH;
  378. mf->skip(mf, mf->write_pos);
  379. }
  380. mf->action = LZMA_RUN;
  381. return false;
  382. }
  383. extern uint64_t
  384. lzma_lz_encoder_memusage(const lzma_lz_options *lz_options)
  385. {
  386. // Old buffers must not exist when calling lz_encoder_prepare().
  387. lzma_mf mf = { NULL };
  388. // Setup the size information into mf.
  389. if (lz_encoder_prepare(&mf, NULL, lz_options))
  390. return UINT64_MAX;
  391. // Calculate the memory usage.
  392. return (uint64_t)(mf.hash_size_sum + mf.sons_count)
  393. * sizeof(uint32_t)
  394. + (uint64_t)(mf.size) + sizeof(lzma_coder);
  395. }
  396. static void
  397. lz_encoder_end(lzma_coder *coder, lzma_allocator *allocator)
  398. {
  399. lzma_next_end(&coder->next, allocator);
  400. lzma_free(coder->mf.hash, allocator);
  401. lzma_free(coder->mf.buffer, allocator);
  402. if (coder->lz.end != NULL)
  403. coder->lz.end(coder->lz.coder, allocator);
  404. else
  405. lzma_free(coder->lz.coder, allocator);
  406. lzma_free(coder, allocator);
  407. return;
  408. }
  409. static lzma_ret
  410. lz_encoder_update(lzma_coder *coder, lzma_allocator *allocator,
  411. const lzma_filter *filters_null lzma_attribute((__unused__)),
  412. const lzma_filter *reversed_filters)
  413. {
  414. if (coder->lz.options_update == NULL)
  415. return LZMA_PROG_ERROR;
  416. return_if_error(coder->lz.options_update(
  417. coder->lz.coder, reversed_filters));
  418. return lzma_next_filter_update(
  419. &coder->next, allocator, reversed_filters + 1);
  420. }
  421. extern lzma_ret
  422. lzma_lz_encoder_init(lzma_next_coder *next, lzma_allocator *allocator,
  423. const lzma_filter_info *filters,
  424. lzma_ret (*lz_init)(lzma_lz_encoder *lz,
  425. lzma_allocator *allocator, const void *options,
  426. lzma_lz_options *lz_options))
  427. {
  428. lzma_lz_options lz_options;
  429. #ifdef HAVE_SMALL
  430. // We need that the CRC32 table has been initialized.
  431. lzma_crc32_init();
  432. #endif
  433. // Allocate and initialize the base data structure.
  434. if (next->coder == NULL) {
  435. next->coder = lzma_alloc(sizeof(lzma_coder), allocator);
  436. if (next->coder == NULL)
  437. return LZMA_MEM_ERROR;
  438. next->code = &lz_encode;
  439. next->end = &lz_encoder_end;
  440. next->update = &lz_encoder_update;
  441. next->coder->lz.coder = NULL;
  442. next->coder->lz.code = NULL;
  443. next->coder->lz.end = NULL;
  444. next->coder->mf.buffer = NULL;
  445. next->coder->mf.hash = NULL;
  446. next->coder->mf.hash_size_sum = 0;
  447. next->coder->mf.sons_count = 0;
  448. next->coder->next = LZMA_NEXT_CODER_INIT;
  449. }
  450. // Initialize the LZ-based encoder.
  451. return_if_error(lz_init(&next->coder->lz, allocator,
  452. filters[0].options, &lz_options));
  453. // Setup the size information into next->coder->mf and deallocate
  454. // old buffers if they have wrong size.
  455. if (lz_encoder_prepare(&next->coder->mf, allocator, &lz_options))
  456. return LZMA_OPTIONS_ERROR;
  457. // Allocate new buffers if needed, and do the rest of
  458. // the initialization.
  459. if (lz_encoder_init(&next->coder->mf, allocator, &lz_options))
  460. return LZMA_MEM_ERROR;
  461. // Initialize the next filter in the chain, if any.
  462. return lzma_next_filter_init(&next->coder->next, allocator,
  463. filters + 1);
  464. }
  465. extern LZMA_API(lzma_bool)
  466. lzma_mf_is_supported(lzma_match_finder mf)
  467. {
  468. bool ret = false;
  469. #ifdef HAVE_MF_HC3
  470. if (mf == LZMA_MF_HC3)
  471. ret = true;
  472. #endif
  473. #ifdef HAVE_MF_HC4
  474. if (mf == LZMA_MF_HC4)
  475. ret = true;
  476. #endif
  477. #ifdef HAVE_MF_BT2
  478. if (mf == LZMA_MF_BT2)
  479. ret = true;
  480. #endif
  481. #ifdef HAVE_MF_BT3
  482. if (mf == LZMA_MF_BT3)
  483. ret = true;
  484. #endif
  485. #ifdef HAVE_MF_BT4
  486. if (mf == LZMA_MF_BT4)
  487. ret = true;
  488. #endif
  489. return ret;
  490. }