deflate.c 34 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929
  1. /*
  2. deflate.c - Zip 3
  3. Copyright (c) 1990-2007 Info-ZIP. All rights reserved.
  4. See the accompanying file LICENSE, version 2005-Feb-10 or later
  5. (the contents of which are also included in zip.h) for terms of use.
  6. If, for some reason, all these files are missing, the Info-ZIP license
  7. also may be found at: ftp://ftp.info-zip.org/pub/infozip/license.html
  8. */
  9. /*
  10. * deflate.c by Jean-loup Gailly.
  11. *
  12. * PURPOSE
  13. *
  14. * Identify new text as repetitions of old text within a fixed-
  15. * length sliding window trailing behind the new text.
  16. *
  17. * DISCUSSION
  18. *
  19. * The "deflation" process depends on being able to identify portions
  20. * of the input text which are identical to earlier input (within a
  21. * sliding window trailing behind the input currently being processed).
  22. *
  23. * The most straightforward technique turns out to be the fastest for
  24. * most input files: try all possible matches and select the longest.
  25. * The key feature of this algorithm is that insertions into the string
  26. * dictionary are very simple and thus fast, and deletions are avoided
  27. * completely. Insertions are performed at each input character, whereas
  28. * string matches are performed only when the previous match ends. So it
  29. * is preferable to spend more time in matches to allow very fast string
  30. * insertions and avoid deletions. The matching algorithm for small
  31. * strings is inspired from that of Rabin & Karp. A brute force approach
  32. * is used to find longer strings when a small match has been found.
  33. * A similar algorithm is used in comic (by Jan-Mark Wams) and freeze
  34. * (by Leonid Broukhis).
  35. * A previous version of this file used a more sophisticated algorithm
  36. * (by Fiala and Greene) which is guaranteed to run in linear amortized
  37. * time, but has a larger average cost, uses more memory and is patented.
  38. * However the F&G algorithm may be faster for some highly redundant
  39. * files if the parameter max_chain_length (described below) is too large.
  40. *
  41. * ACKNOWLEDGEMENTS
  42. *
  43. * The idea of lazy evaluation of matches is due to Jan-Mark Wams, and
  44. * I found it in 'freeze' written by Leonid Broukhis.
  45. * Thanks to many info-zippers for bug reports and testing.
  46. *
  47. * REFERENCES
  48. *
  49. * APPNOTE.TXT documentation file in PKZIP 1.93a distribution.
  50. *
  51. * A description of the Rabin and Karp algorithm is given in the book
  52. * "Algorithms" by R. Sedgewick, Addison-Wesley, p252.
  53. *
  54. * Fiala,E.R., and Greene,D.H.
  55. * Data Compression with Finite Windows, Comm.ACM, 32,4 (1989) 490-595
  56. *
  57. * INTERFACE
  58. *
  59. * void lm_init (int pack_level, ush *flags)
  60. * Initialize the "longest match" routines for a new file
  61. *
  62. * ulg deflate (void)
  63. * Processes a new input file and return its compressed length. Sets
  64. * the compressed length, crc, deflate flags and internal file
  65. * attributes.
  66. */
  67. #define __DEFLATE_C
  68. #include "zip.h"
  69. #ifndef USE_ZLIB
  70. /* ===========================================================================
  71. * Configuration parameters
  72. */
  73. /* Compile with MEDIUM_MEM to reduce the memory requirements or
  74. * with SMALL_MEM to use as little memory as possible. Use BIG_MEM if the
  75. * entire input file can be held in memory (not possible on 16 bit systems).
  76. * Warning: defining these symbols affects HASH_BITS (see below) and thus
  77. * affects the compression ratio. The compressed output
  78. * is still correct, and might even be smaller in some cases.
  79. */
  80. #ifdef SMALL_MEM
  81. # define HASH_BITS 13 /* Number of bits used to hash strings */
  82. #endif
  83. #ifdef MEDIUM_MEM
  84. # define HASH_BITS 14
  85. #endif
  86. #ifndef HASH_BITS
  87. # define HASH_BITS 15
  88. /* For portability to 16 bit machines, do not use values above 15. */
  89. #endif
  90. #define HASH_SIZE (unsigned)(1<<HASH_BITS)
  91. #define HASH_MASK (HASH_SIZE-1)
  92. #define WMASK (WSIZE-1)
  93. /* HASH_SIZE and WSIZE must be powers of two */
  94. #define NIL 0
  95. /* Tail of hash chains */
  96. #define FAST 4
  97. #define SLOW 2
  98. /* speed options for the general purpose bit flag */
  99. #ifndef TOO_FAR
  100. # define TOO_FAR 4096
  101. #endif
  102. /* Matches of length 3 are discarded if their distance exceeds TOO_FAR */
  103. #if (defined(ASMV) && !defined(MSDOS16) && defined(DYN_ALLOC))
  104. error: DYN_ALLOC not yet supported in match.S or match32.asm
  105. #endif
  106. #ifdef MEMORY16
  107. # define MAXSEG_64K
  108. #endif
  109. /* ===========================================================================
  110. * Local data used by the "longest match" routines.
  111. */
  112. #if defined(MMAP) || defined(BIG_MEM)
  113. typedef unsigned Pos; /* must be at least 32 bits */
  114. #else
  115. typedef ush Pos;
  116. #endif
  117. typedef unsigned IPos;
  118. /* A Pos is an index in the character window. We use short instead of int to
  119. * save space in the various tables. IPos is used only for parameter passing.
  120. */
  121. #ifndef DYN_ALLOC
  122. uch window[2L*WSIZE];
  123. /* Sliding window. Input bytes are read into the second half of the window,
  124. * and move to the first half later to keep a dictionary of at least WSIZE
  125. * bytes. With this organization, matches are limited to a distance of
  126. * WSIZE-MAX_MATCH bytes, but this ensures that IO is always
  127. * performed with a length multiple of the block size. Also, it limits
  128. * the window size to 64K, which is quite useful on MSDOS.
  129. * To do: limit the window size to WSIZE+CBSZ if SMALL_MEM (the code would
  130. * be less efficient since the data would have to be copied WSIZE/CBSZ times)
  131. */
  132. Pos prev[WSIZE];
  133. /* Link to older string with same hash index. To limit the size of this
  134. * array to 64K, this link is maintained only for the last 32K strings.
  135. * An index in this array is thus a window index modulo 32K.
  136. */
  137. Pos head[HASH_SIZE];
  138. /* Heads of the hash chains or NIL. If your compiler thinks that
  139. * HASH_SIZE is a dynamic value, recompile with -DDYN_ALLOC.
  140. */
  141. #else
  142. uch far * near window = NULL;
  143. Pos far * near prev = NULL;
  144. Pos far * near head;
  145. #endif
  146. ulg window_size;
  147. /* window size, 2*WSIZE except for MMAP or BIG_MEM, where it is the
  148. * input file length plus MIN_LOOKAHEAD.
  149. */
  150. long block_start;
  151. /* window position at the beginning of the current output block. Gets
  152. * negative when the window is moved backwards.
  153. */
  154. local int sliding;
  155. /* Set to false when the input file is already in memory */
  156. local unsigned ins_h; /* hash index of string to be inserted */
  157. #define H_SHIFT ((HASH_BITS+MIN_MATCH-1)/MIN_MATCH)
  158. /* Number of bits by which ins_h and del_h must be shifted at each
  159. * input step. It must be such that after MIN_MATCH steps, the oldest
  160. * byte no longer takes part in the hash key, that is:
  161. * H_SHIFT * MIN_MATCH >= HASH_BITS
  162. */
  163. unsigned int near prev_length;
  164. /* Length of the best match at previous step. Matches not greater than this
  165. * are discarded. This is used in the lazy match evaluation.
  166. */
  167. unsigned near strstart; /* start of string to insert */
  168. unsigned near match_start; /* start of matching string */
  169. local int eofile; /* flag set at end of input file */
  170. local unsigned lookahead; /* number of valid bytes ahead in window */
  171. unsigned near max_chain_length;
  172. /* To speed up deflation, hash chains are never searched beyond this length.
  173. * A higher limit improves compression ratio but degrades the speed.
  174. */
  175. local unsigned int max_lazy_match;
  176. /* Attempt to find a better match only when the current match is strictly
  177. * smaller than this value. This mechanism is used only for compression
  178. * levels >= 4.
  179. */
  180. #define max_insert_length max_lazy_match
  181. /* Insert new strings in the hash table only if the match length
  182. * is not greater than this length. This saves time but degrades compression.
  183. * max_insert_length is used only for compression levels <= 3.
  184. */
  185. unsigned near good_match;
  186. /* Use a faster search when the previous match is longer than this */
  187. #ifdef FULL_SEARCH
  188. # define nice_match MAX_MATCH
  189. #else
  190. int near nice_match; /* Stop searching when current match exceeds this */
  191. #endif
  192. /* Values for max_lazy_match, good_match, nice_match and max_chain_length,
  193. * depending on the desired pack level (0..9). The values given below have
  194. * been tuned to exclude worst case performance for pathological files.
  195. * Better values may be found for specific files.
  196. */
  197. typedef struct config {
  198. ush good_length; /* reduce lazy search above this match length */
  199. ush max_lazy; /* do not perform lazy search above this match length */
  200. ush nice_length; /* quit search above this match length */
  201. ush max_chain;
  202. } config;
  203. local config configuration_table[10] = {
  204. /* good lazy nice chain */
  205. /* 0 */ {0, 0, 0, 0}, /* store only */
  206. /* 1 */ {4, 4, 8, 4}, /* maximum speed, no lazy matches */
  207. /* 2 */ {4, 5, 16, 8},
  208. /* 3 */ {4, 6, 32, 32},
  209. /* 4 */ {4, 4, 16, 16}, /* lazy matches */
  210. /* 5 */ {8, 16, 32, 32},
  211. /* 6 */ {8, 16, 128, 128},
  212. /* 7 */ {8, 32, 128, 256},
  213. /* 8 */ {32, 128, 258, 1024},
  214. /* 9 */ {32, 258, 258, 4096}}; /* maximum compression */
  215. /* Note: the deflate() code requires max_lazy >= MIN_MATCH and max_chain >= 4
  216. * For deflate_fast() (levels <= 3) good is ignored and lazy has a different
  217. * meaning.
  218. */
  219. #define EQUAL 0
  220. /* result of memcmp for equal strings */
  221. /* ===========================================================================
  222. * Prototypes for local functions.
  223. */
  224. local void fill_window OF((void));
  225. local uzoff_t deflate_fast OF((void)); /* now use uzoff_t 7/24/04 EG */
  226. int longest_match OF((IPos cur_match));
  227. #if defined(ASMV) && !defined(RISCOS)
  228. void match_init OF((void)); /* asm code initialization */
  229. #endif
  230. #ifdef DEBUG
  231. local void check_match OF((IPos start, IPos match, int length));
  232. #endif
  233. /* ===========================================================================
  234. * Update a hash value with the given input byte
  235. * IN assertion: all calls to to UPDATE_HASH are made with consecutive
  236. * input characters, so that a running hash key can be computed from the
  237. * previous key instead of complete recalculation each time.
  238. */
  239. #define UPDATE_HASH(h,c) (h = (((h)<<H_SHIFT) ^ (c)) & HASH_MASK)
  240. /* ===========================================================================
  241. * Insert string s in the dictionary and set match_head to the previous head
  242. * of the hash chain (the most recent string with same hash key). Return
  243. * the previous length of the hash chain.
  244. * IN assertion: all calls to to INSERT_STRING are made with consecutive
  245. * input characters and the first MIN_MATCH bytes of s are valid
  246. * (except for the last MIN_MATCH-1 bytes of the input file).
  247. */
  248. #define INSERT_STRING(s, match_head) \
  249. (UPDATE_HASH(ins_h, window[(s) + (MIN_MATCH-1)]), \
  250. prev[(s) & WMASK] = match_head = head[ins_h], \
  251. head[ins_h] = (s))
  252. /* ===========================================================================
  253. * Initialize the "longest match" routines for a new file
  254. *
  255. * IN assertion: window_size is > 0 if the input file is already read or
  256. * mmap'ed in the window[] array, 0 otherwise. In the first case,
  257. * window_size is sufficient to contain the whole input file plus
  258. * MIN_LOOKAHEAD bytes (to avoid referencing memory beyond the end
  259. * of window[] when looking for matches towards the end).
  260. */
  261. void lm_init (pack_level, flags)
  262. int pack_level; /* 0: store, 1: best speed, 9: best compression */
  263. ush *flags; /* general purpose bit flag */
  264. {
  265. register unsigned j;
  266. if (pack_level < 1 || pack_level > 9) error("bad pack level");
  267. /* Do not slide the window if the whole input is already in memory
  268. * (window_size > 0)
  269. */
  270. sliding = 0;
  271. if (window_size == 0L) {
  272. sliding = 1;
  273. window_size = (ulg)2L*WSIZE;
  274. }
  275. /* Use dynamic allocation if compiler does not like big static arrays: */
  276. #ifdef DYN_ALLOC
  277. if (window == NULL) {
  278. window = (uch far *) zcalloc(WSIZE, 2*sizeof(uch));
  279. if (window == NULL) ziperr(ZE_MEM, "window allocation");
  280. }
  281. if (prev == NULL) {
  282. prev = (Pos far *) zcalloc(WSIZE, sizeof(Pos));
  283. head = (Pos far *) zcalloc(HASH_SIZE, sizeof(Pos));
  284. if (prev == NULL || head == NULL) {
  285. ziperr(ZE_MEM, "hash table allocation");
  286. }
  287. }
  288. #endif /* DYN_ALLOC */
  289. /* Initialize the hash table (avoiding 64K overflow for 16 bit systems).
  290. * prev[] will be initialized on the fly.
  291. */
  292. head[HASH_SIZE-1] = NIL;
  293. memset((char*)head, NIL, (unsigned)(HASH_SIZE-1)*sizeof(*head));
  294. /* Set the default configuration parameters:
  295. */
  296. max_lazy_match = configuration_table[pack_level].max_lazy;
  297. good_match = configuration_table[pack_level].good_length;
  298. #ifndef FULL_SEARCH
  299. nice_match = configuration_table[pack_level].nice_length;
  300. #endif
  301. max_chain_length = configuration_table[pack_level].max_chain;
  302. if (pack_level <= 2) {
  303. *flags |= FAST;
  304. } else if (pack_level >= 8) {
  305. *flags |= SLOW;
  306. }
  307. /* ??? reduce max_chain_length for binary files */
  308. strstart = 0;
  309. block_start = 0L;
  310. #if defined(ASMV) && !defined(RISCOS)
  311. match_init(); /* initialize the asm code */
  312. #endif
  313. j = WSIZE;
  314. #ifndef MAXSEG_64K
  315. if (sizeof(int) > 2) j <<= 1; /* Can read 64K in one step */
  316. #endif
  317. lookahead = (*read_buf)((char*)window, j);
  318. if (lookahead == 0 || lookahead == (unsigned)EOF) {
  319. eofile = 1, lookahead = 0;
  320. return;
  321. }
  322. eofile = 0;
  323. /* Make sure that we always have enough lookahead. This is important
  324. * if input comes from a device such as a tty.
  325. */
  326. if (lookahead < MIN_LOOKAHEAD) fill_window();
  327. ins_h = 0;
  328. for (j=0; j<MIN_MATCH-1; j++) UPDATE_HASH(ins_h, window[j]);
  329. /* If lookahead < MIN_MATCH, ins_h is garbage, but this is
  330. * not important since only literal bytes will be emitted.
  331. */
  332. }
  333. /* ===========================================================================
  334. * Free the window and hash table
  335. */
  336. void lm_free()
  337. {
  338. #ifdef DYN_ALLOC
  339. if (window != NULL) {
  340. zcfree(window);
  341. window = NULL;
  342. }
  343. if (prev != NULL) {
  344. zcfree(prev);
  345. zcfree(head);
  346. prev = head = NULL;
  347. }
  348. #endif /* DYN_ALLOC */
  349. }
  350. /* ===========================================================================
  351. * Set match_start to the longest match starting at the given string and
  352. * return its length. Matches shorter or equal to prev_length are discarded,
  353. * in which case the result is equal to prev_length and match_start is
  354. * garbage.
  355. * IN assertions: cur_match is the head of the hash chain for the current
  356. * string (strstart) and its distance is <= MAX_DIST, and prev_length >= 1
  357. */
  358. #ifndef ASMV
  359. /* For 80x86 and 680x0 and ARM, an optimized version is in match.asm or
  360. * match.S. The code is functionally equivalent, so you can use the C version
  361. * if desired.
  362. */
  363. int longest_match(cur_match)
  364. IPos cur_match; /* current match */
  365. {
  366. unsigned chain_length = max_chain_length; /* max hash chain length */
  367. register uch far *scan = window + strstart; /* current string */
  368. register uch far *match; /* matched string */
  369. register int len; /* length of current match */
  370. int best_len = prev_length; /* best match length so far */
  371. IPos limit = strstart > (IPos)MAX_DIST ? strstart - (IPos)MAX_DIST : NIL;
  372. /* Stop when cur_match becomes <= limit. To simplify the code,
  373. * we prevent matches with the string of window index 0.
  374. */
  375. /* The code is optimized for HASH_BITS >= 8 and MAX_MATCH-2 multiple of 16.
  376. * It is easy to get rid of this optimization if necessary.
  377. */
  378. #if HASH_BITS < 8 || MAX_MATCH != 258
  379. error: Code too clever
  380. #endif
  381. #ifdef UNALIGNED_OK
  382. /* Compare two bytes at a time. Note: this is not always beneficial.
  383. * Try with and without -DUNALIGNED_OK to check.
  384. */
  385. register uch far *strend = window + strstart + MAX_MATCH - 1;
  386. register ush scan_start = *(ush far *)scan;
  387. register ush scan_end = *(ush far *)(scan+best_len-1);
  388. #else
  389. register uch far *strend = window + strstart + MAX_MATCH;
  390. register uch scan_end1 = scan[best_len-1];
  391. register uch scan_end = scan[best_len];
  392. #endif
  393. /* Do not waste too much time if we already have a good match: */
  394. if (prev_length >= good_match) {
  395. chain_length >>= 2;
  396. }
  397. Assert(strstart <= window_size-MIN_LOOKAHEAD, "insufficient lookahead");
  398. do {
  399. Assert(cur_match < strstart, "no future");
  400. match = window + cur_match;
  401. /* Skip to next match if the match length cannot increase
  402. * or if the match length is less than 2:
  403. */
  404. #if (defined(UNALIGNED_OK) && MAX_MATCH == 258)
  405. /* This code assumes sizeof(unsigned short) == 2. Do not use
  406. * UNALIGNED_OK if your compiler uses a different size.
  407. */
  408. if (*(ush far *)(match+best_len-1) != scan_end ||
  409. *(ush far *)match != scan_start) continue;
  410. /* It is not necessary to compare scan[2] and match[2] since they are
  411. * always equal when the other bytes match, given that the hash keys
  412. * are equal and that HASH_BITS >= 8. Compare 2 bytes at a time at
  413. * strstart+3, +5, ... up to strstart+257. We check for insufficient
  414. * lookahead only every 4th comparison; the 128th check will be made
  415. * at strstart+257. If MAX_MATCH-2 is not a multiple of 8, it is
  416. * necessary to put more guard bytes at the end of the window, or
  417. * to check more often for insufficient lookahead.
  418. */
  419. scan++, match++;
  420. do {
  421. } while (*(ush far *)(scan+=2) == *(ush far *)(match+=2) &&
  422. *(ush far *)(scan+=2) == *(ush far *)(match+=2) &&
  423. *(ush far *)(scan+=2) == *(ush far *)(match+=2) &&
  424. *(ush far *)(scan+=2) == *(ush far *)(match+=2) &&
  425. scan < strend);
  426. /* The funny "do {}" generates better code on most compilers */
  427. /* Here, scan <= window+strstart+257 */
  428. Assert(scan <= window+(unsigned)(window_size-1), "wild scan");
  429. if (*scan == *match) scan++;
  430. len = (MAX_MATCH - 1) - (int)(strend-scan);
  431. scan = strend - (MAX_MATCH-1);
  432. #else /* UNALIGNED_OK */
  433. if (match[best_len] != scan_end ||
  434. match[best_len-1] != scan_end1 ||
  435. *match != *scan ||
  436. *++match != scan[1]) continue;
  437. /* The check at best_len-1 can be removed because it will be made
  438. * again later. (This heuristic is not always a win.)
  439. * It is not necessary to compare scan[2] and match[2] since they
  440. * are always equal when the other bytes match, given that
  441. * the hash keys are equal and that HASH_BITS >= 8.
  442. */
  443. scan += 2, match++;
  444. /* We check for insufficient lookahead only every 8th comparison;
  445. * the 256th check will be made at strstart+258.
  446. */
  447. do {
  448. } while (*++scan == *++match && *++scan == *++match &&
  449. *++scan == *++match && *++scan == *++match &&
  450. *++scan == *++match && *++scan == *++match &&
  451. *++scan == *++match && *++scan == *++match &&
  452. scan < strend);
  453. Assert(scan <= window+(unsigned)(window_size-1), "wild scan");
  454. len = MAX_MATCH - (int)(strend - scan);
  455. scan = strend - MAX_MATCH;
  456. #endif /* UNALIGNED_OK */
  457. if (len > best_len) {
  458. match_start = cur_match;
  459. best_len = len;
  460. if (len >= nice_match) break;
  461. #ifdef UNALIGNED_OK
  462. scan_end = *(ush far *)(scan+best_len-1);
  463. #else
  464. scan_end1 = scan[best_len-1];
  465. scan_end = scan[best_len];
  466. #endif
  467. }
  468. } while ((cur_match = prev[cur_match & WMASK]) > limit
  469. && --chain_length != 0);
  470. return best_len;
  471. }
  472. #endif /* ASMV */
  473. #ifdef DEBUG
  474. /* ===========================================================================
  475. * Check that the match at match_start is indeed a match.
  476. */
  477. local void check_match(start, match, length)
  478. IPos start, match;
  479. int length;
  480. {
  481. /* check that the match is indeed a match */
  482. if (memcmp((char*)window + match,
  483. (char*)window + start, length) != EQUAL) {
  484. fprintf(mesg,
  485. " start %d, match %d, length %d\n",
  486. start, match, length);
  487. error("invalid match");
  488. }
  489. if (verbose > 1) {
  490. fprintf(mesg,"\\[%d,%d]", start-match, length);
  491. #ifndef WINDLL
  492. do { putc(window[start++], mesg); } while (--length != 0);
  493. #else
  494. do { fprintf(stdout,"%c",window[start++]); } while (--length != 0);
  495. #endif
  496. }
  497. }
  498. #else
  499. # define check_match(start, match, length)
  500. #endif
  501. /* ===========================================================================
  502. * Flush the current block, with given end-of-file flag.
  503. * IN assertion: strstart is set to the end of the current match.
  504. */
  505. #define FLUSH_BLOCK(eof) \
  506. flush_block(block_start >= 0L ? (char*)&window[(unsigned)block_start] : \
  507. (char*)NULL, (ulg)strstart - (ulg)block_start, (eof))
  508. /* ===========================================================================
  509. * Fill the window when the lookahead becomes insufficient.
  510. * Updates strstart and lookahead, and sets eofile if end of input file.
  511. *
  512. * IN assertion: lookahead < MIN_LOOKAHEAD && strstart + lookahead > 0
  513. * OUT assertions: strstart <= window_size-MIN_LOOKAHEAD
  514. * At least one byte has been read, or eofile is set; file reads are
  515. * performed for at least two bytes (required for the translate_eol option).
  516. */
  517. local void fill_window()
  518. {
  519. register unsigned n, m;
  520. unsigned more; /* Amount of free space at the end of the window. */
  521. do {
  522. more = (unsigned)(window_size - (ulg)lookahead - (ulg)strstart);
  523. /* If the window is almost full and there is insufficient lookahead,
  524. * move the upper half to the lower one to make room in the upper half.
  525. */
  526. if (more == (unsigned)EOF) {
  527. /* Very unlikely, but possible on 16 bit machine if strstart == 0
  528. * and lookahead == 1 (input done one byte at time)
  529. */
  530. more--;
  531. /* For MMAP or BIG_MEM, the whole input file is already in memory so
  532. * we must not perform sliding. We must however call (*read_buf)() in
  533. * order to compute the crc, update lookahead and possibly set eofile.
  534. */
  535. } else if (strstart >= WSIZE+MAX_DIST && sliding) {
  536. #ifdef FORCE_METHOD
  537. /* When methods "stored" or "store_block" are requested, the
  538. * current block must be flushed before sliding the window.
  539. */
  540. if (level <= 2) FLUSH_BLOCK(0), block_start = strstart;
  541. #endif
  542. /* By the IN assertion, the window is not empty so we can't confuse
  543. * more == 0 with more == 64K on a 16 bit machine.
  544. */
  545. memcpy((char*)window, (char*)window+WSIZE, (unsigned)WSIZE);
  546. match_start -= WSIZE;
  547. strstart -= WSIZE; /* we now have strstart >= MAX_DIST: */
  548. block_start -= (long) WSIZE;
  549. for (n = 0; n < HASH_SIZE; n++) {
  550. m = head[n];
  551. head[n] = (Pos)(m >= WSIZE ? m-WSIZE : NIL);
  552. }
  553. for (n = 0; n < WSIZE; n++) {
  554. m = prev[n];
  555. prev[n] = (Pos)(m >= WSIZE ? m-WSIZE : NIL);
  556. /* If n is not on any hash chain, prev[n] is garbage but
  557. * its value will never be used.
  558. */
  559. }
  560. more += WSIZE;
  561. if (dot_size > 0 && !display_globaldots) {
  562. /* initial space */
  563. if (noisy && dot_count == -1) {
  564. #ifndef WINDLL
  565. putc(' ', mesg);
  566. fflush(mesg);
  567. #else
  568. fprintf(stdout,"%c",' ');
  569. #endif
  570. dot_count++;
  571. }
  572. dot_count++;
  573. if (dot_size <= (dot_count + 1) * WSIZE) dot_count = 0;
  574. }
  575. if ((verbose || noisy) && dot_size && !dot_count) {
  576. #ifndef WINDLL
  577. putc('.', mesg);
  578. fflush(mesg);
  579. #else
  580. fprintf(stdout,"%c",'.');
  581. #endif
  582. mesg_line_started = 1;
  583. }
  584. }
  585. if (eofile) return;
  586. /* If there was no sliding:
  587. * strstart <= WSIZE+MAX_DIST-1 && lookahead <= MIN_LOOKAHEAD - 1 &&
  588. * more == window_size - lookahead - strstart
  589. * => more >= window_size - (MIN_LOOKAHEAD-1 + WSIZE + MAX_DIST-1)
  590. * => more >= window_size - 2*WSIZE + 2
  591. * In the MMAP or BIG_MEM case (not yet supported in gzip),
  592. * window_size == input_size + MIN_LOOKAHEAD &&
  593. * strstart + lookahead <= input_size => more >= MIN_LOOKAHEAD.
  594. * Otherwise, window_size == 2*WSIZE so more >= 2.
  595. * If there was sliding, more >= WSIZE. So in all cases, more >= 2.
  596. */
  597. Assert(more >= 2, "more < 2");
  598. n = (*read_buf)((char*)window+strstart+lookahead, more);
  599. if (n == 0 || n == (unsigned)EOF) {
  600. eofile = 1;
  601. } else {
  602. lookahead += n;
  603. }
  604. } while (lookahead < MIN_LOOKAHEAD && !eofile);
  605. }
  606. /* ===========================================================================
  607. * Processes a new input file and return its compressed length. This
  608. * function does not perform lazy evaluation of matches and inserts
  609. * new strings in the dictionary only for unmatched strings or for short
  610. * matches. It is used only for the fast compression options.
  611. */
  612. local uzoff_t deflate_fast()
  613. {
  614. IPos hash_head = NIL; /* head of the hash chain */
  615. int flush; /* set if current block must be flushed */
  616. unsigned match_length = 0; /* length of best match */
  617. prev_length = MIN_MATCH-1;
  618. while (lookahead != 0) {
  619. /* Insert the string window[strstart .. strstart+2] in the
  620. * dictionary, and set hash_head to the head of the hash chain:
  621. */
  622. #ifndef DEFL_UNDETERM
  623. if (lookahead >= MIN_MATCH)
  624. #endif
  625. INSERT_STRING(strstart, hash_head);
  626. /* Find the longest match, discarding those <= prev_length.
  627. * At this point we have always match_length < MIN_MATCH
  628. */
  629. if (hash_head != NIL && strstart - hash_head <= MAX_DIST) {
  630. /* To simplify the code, we prevent matches with the string
  631. * of window index 0 (in particular we have to avoid a match
  632. * of the string with itself at the start of the input file).
  633. */
  634. #ifndef HUFFMAN_ONLY
  635. # ifndef DEFL_UNDETERM
  636. /* Do not look for matches beyond the end of the input.
  637. * This is necessary to make deflate deterministic.
  638. */
  639. if ((unsigned)nice_match > lookahead) nice_match = (int)lookahead;
  640. # endif
  641. match_length = longest_match (hash_head);
  642. /* longest_match() sets match_start */
  643. if (match_length > lookahead) match_length = lookahead;
  644. #endif
  645. }
  646. if (match_length >= MIN_MATCH) {
  647. check_match(strstart, match_start, match_length);
  648. flush = ct_tally(strstart-match_start, match_length - MIN_MATCH);
  649. lookahead -= match_length;
  650. /* Insert new strings in the hash table only if the match length
  651. * is not too large. This saves time but degrades compression.
  652. */
  653. if (match_length <= max_insert_length
  654. #ifndef DEFL_UNDETERM
  655. && lookahead >= MIN_MATCH
  656. #endif
  657. ) {
  658. match_length--; /* string at strstart already in hash table */
  659. do {
  660. strstart++;
  661. INSERT_STRING(strstart, hash_head);
  662. /* strstart never exceeds WSIZE-MAX_MATCH, so there are
  663. * always MIN_MATCH bytes ahead.
  664. */
  665. #ifdef DEFL_UNDETERM
  666. /* If lookahead < MIN_MATCH these bytes are garbage,
  667. * but it does not matter since the next lookahead bytes
  668. * will be emitted as literals.
  669. */
  670. #endif
  671. } while (--match_length != 0);
  672. strstart++;
  673. } else {
  674. strstart += match_length;
  675. match_length = 0;
  676. ins_h = window[strstart];
  677. UPDATE_HASH(ins_h, window[strstart+1]);
  678. #if MIN_MATCH != 3
  679. Call UPDATE_HASH() MIN_MATCH-3 more times
  680. #endif
  681. }
  682. } else {
  683. /* No match, output a literal byte */
  684. Tracevv((stderr,"%c",window[strstart]));
  685. flush = ct_tally (0, window[strstart]);
  686. lookahead--;
  687. strstart++;
  688. }
  689. if (flush) FLUSH_BLOCK(0), block_start = strstart;
  690. /* Make sure that we always have enough lookahead, except
  691. * at the end of the input file. We need MAX_MATCH bytes
  692. * for the next match, plus MIN_MATCH bytes to insert the
  693. * string following the next match.
  694. */
  695. if (lookahead < MIN_LOOKAHEAD) fill_window();
  696. }
  697. return FLUSH_BLOCK(1); /* eof */
  698. }
  699. /* ===========================================================================
  700. * Same as above, but achieves better compression. We use a lazy
  701. * evaluation for matches: a match is finally adopted only if there is
  702. * no better match at the next window position.
  703. */
  704. uzoff_t deflate()
  705. {
  706. IPos hash_head = NIL; /* head of hash chain */
  707. IPos prev_match; /* previous match */
  708. int flush; /* set if current block must be flushed */
  709. int match_available = 0; /* set if previous match exists */
  710. register unsigned match_length = MIN_MATCH-1; /* length of best match */
  711. #ifdef DEBUG
  712. extern uzoff_t isize; /* byte length of input file, for debug only */
  713. #endif
  714. if (level <= 3) return deflate_fast(); /* optimized for speed */
  715. /* Process the input block. */
  716. while (lookahead != 0) {
  717. /* Insert the string window[strstart .. strstart+2] in the
  718. * dictionary, and set hash_head to the head of the hash chain:
  719. */
  720. #ifndef DEFL_UNDETERM
  721. if (lookahead >= MIN_MATCH)
  722. #endif
  723. INSERT_STRING(strstart, hash_head);
  724. /* Find the longest match, discarding those <= prev_length.
  725. */
  726. prev_length = match_length, prev_match = match_start;
  727. match_length = MIN_MATCH-1;
  728. if (hash_head != NIL && prev_length < max_lazy_match &&
  729. strstart - hash_head <= MAX_DIST) {
  730. /* To simplify the code, we prevent matches with the string
  731. * of window index 0 (in particular we have to avoid a match
  732. * of the string with itself at the start of the input file).
  733. */
  734. #ifndef HUFFMAN_ONLY
  735. # ifndef DEFL_UNDETERM
  736. /* Do not look for matches beyond the end of the input.
  737. * This is necessary to make deflate deterministic.
  738. */
  739. if ((unsigned)nice_match > lookahead) nice_match = (int)lookahead;
  740. # endif
  741. match_length = longest_match (hash_head);
  742. /* longest_match() sets match_start */
  743. if (match_length > lookahead) match_length = lookahead;
  744. #endif
  745. #ifdef FILTERED
  746. /* Ignore matches of length <= 5 */
  747. if (match_length <= 5) {
  748. #else
  749. /* Ignore a length 3 match if it is too distant: */
  750. if (match_length == MIN_MATCH && strstart-match_start > TOO_FAR){
  751. #endif
  752. /* If prev_match is also MIN_MATCH, match_start is garbage
  753. * but we will ignore the current match anyway.
  754. */
  755. match_length = MIN_MATCH-1;
  756. }
  757. }
  758. /* If there was a match at the previous step and the current
  759. * match is not better, output the previous match:
  760. */
  761. if (prev_length >= MIN_MATCH && match_length <= prev_length) {
  762. #ifndef DEFL_UNDETERM
  763. unsigned max_insert = strstart + lookahead - MIN_MATCH;
  764. #endif
  765. check_match(strstart-1, prev_match, prev_length);
  766. flush = ct_tally(strstart-1-prev_match, prev_length - MIN_MATCH);
  767. /* Insert in hash table all strings up to the end of the match.
  768. * strstart-1 and strstart are already inserted.
  769. */
  770. lookahead -= prev_length-1;
  771. prev_length -= 2;
  772. #ifndef DEFL_UNDETERM
  773. do {
  774. if (++strstart <= max_insert) {
  775. INSERT_STRING(strstart, hash_head);
  776. /* strstart never exceeds WSIZE-MAX_MATCH, so there are
  777. * always MIN_MATCH bytes ahead.
  778. */
  779. }
  780. } while (--prev_length != 0);
  781. strstart++;
  782. #else /* DEFL_UNDETERM */
  783. do {
  784. strstart++;
  785. INSERT_STRING(strstart, hash_head);
  786. /* strstart never exceeds WSIZE-MAX_MATCH, so there are
  787. * always MIN_MATCH bytes ahead. If lookahead < MIN_MATCH
  788. * these bytes are garbage, but it does not matter since the
  789. * next lookahead bytes will always be emitted as literals.
  790. */
  791. } while (--prev_length != 0);
  792. strstart++;
  793. #endif /* ?DEFL_UNDETERM */
  794. match_available = 0;
  795. match_length = MIN_MATCH-1;
  796. if (flush) FLUSH_BLOCK(0), block_start = strstart;
  797. } else if (match_available) {
  798. /* If there was no match at the previous position, output a
  799. * single literal. If there was a match but the current match
  800. * is longer, truncate the previous match to a single literal.
  801. */
  802. Tracevv((stderr,"%c",window[strstart-1]));
  803. if (ct_tally (0, window[strstart-1])) {
  804. FLUSH_BLOCK(0), block_start = strstart;
  805. }
  806. strstart++;
  807. lookahead--;
  808. } else {
  809. /* There is no previous match to compare with, wait for
  810. * the next step to decide.
  811. */
  812. match_available = 1;
  813. strstart++;
  814. lookahead--;
  815. }
  816. Assert(strstart <= isize && lookahead <= isize, "a bit too far");
  817. /* Make sure that we always have enough lookahead, except
  818. * at the end of the input file. We need MAX_MATCH bytes
  819. * for the next match, plus MIN_MATCH bytes to insert the
  820. * string following the next match.
  821. */
  822. if (lookahead < MIN_LOOKAHEAD) fill_window();
  823. }
  824. if (match_available) ct_tally (0, window[strstart-1]);
  825. return FLUSH_BLOCK(1); /* eof */
  826. }
  827. #endif /* !USE_ZLIB */