deflate.a 35 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053
  1. ;===========================================================================
  2. ; Copyright (c) 1990-1999 Info-ZIP. All rights reserved.
  3. ;
  4. ; See the accompanying file LICENSE, version 1999-Oct-05 or later
  5. ; (the contents of which are also included in zip.h) for terms of use.
  6. ; If, for some reason, both of these files are missing, the Info-ZIP license
  7. ; also may be found at: ftp://ftp.cdrom.com/pub/infozip/license.html
  8. ;===========================================================================
  9. ; This is a 680x0 assembly language translation of the Info-ZIP source file
  10. ; deflate.c, by Paul Kienitz. The function longest_match is based in part
  11. ; on match.a by Carsten Steger, which in turn is partly based on match.s
  12. ; for 386 by Jean-loup Gailly and Kai Uwe Rommel. Mostly, however, this
  13. ; material is based on deflate.c, by Gailly, Rommel, and Igor Mandrichenko.
  14. ; This code is not commented very much; see deflate.c for comments that explain
  15. ; what the functions are doing.
  16. ;
  17. ; The symbols that can be used to select different versions are as follows:
  18. ;
  19. ; CPU020 if defined, use 68020 instructions always.
  20. ;
  21. ; CPUTEST if defined, check at runtime for CPU type. Another symbol
  22. ; specifying the platform-specific test must be used with this.
  23. ; If neither of these is defined, use 68000 instructions only.
  24. ; Runtime test is nonportable; it is different for each OS.
  25. ;
  26. ; AMIGA use Amiga-specific test for 68020, if CPUTEST defined. Also
  27. ; tells it that registers d0/a0/d1/a1 are not preserved by
  28. ; function calls. At present, if AMIGA is not defined, it
  29. ; causes functions to preserve all registers. ALL OF THIS CODE
  30. ; CURRENTLY ASSUMES THAT REGISTERS D2-D7/A2-A6 WILL BE PRESERVED
  31. ; BY ANY FUNCTIONS THAT IT CALLS.
  32. ;
  33. ; DYN_ALLOC should be defined here if it is defined for C source; tells us
  34. ; that big arrays are allocated instead of static.
  35. ;
  36. ; WSIZE must be defined as the same number used for WSIZE in the C
  37. ; source, and must be a power of two <= 32768. As elsewhere,
  38. ; the default value is 32768.
  39. ;
  40. ; INT16 define this if ints are 16 bits; otherwise 32 bit ints assumed.
  41. ;
  42. ; SMALL_MEM define this if it is defined in the C source; otherwise it uses
  43. ; the MEDIUM_MEM model. BIG_MEM and MMAP are *not* supported.
  44. ; The FULL_SEARCH option in deflate.c is also not supported.
  45. ;
  46. ; DEBUG activates some tracing output, as in the C source.
  47. ;
  48. ; QUADLONG this selects a different version of the innermost longest_match
  49. ; loop code for 68020 operations, comparing bytes four at a time
  50. ; instead of two at a time. It seems to be a tiny bit faster on
  51. ; average, but it's slower often enough that one can't generalize.
  52. ;
  53. ; This code currently assumes that function results are returned in D0 for
  54. ; all platforms. It assumes that args to functions are pushed onto the stack,
  55. ; last arg first. It also currently assumes that all C symbols have an
  56. ; underscore prepended when referenced from assembly.
  57. IFND CPU020
  58. IFND CPUTEST
  59. CPU000 equ 1
  60. ENDC
  61. ENDC
  62. ; Use these macros for accessing variables of type int:
  63. IFD INT16
  64. MOVINT MACRO
  65. move.w \1,\2
  66. ENDM
  67. CLRINT MACRO
  68. clr.w \1
  69. ENDM
  70. INTSIZE equ 2
  71. ELSE ; !INT16
  72. MOVINT MACRO
  73. move.l \1,\2
  74. ENDM
  75. CLRINT MACRO
  76. clr.l \1
  77. ENDM
  78. INTSIZE equ 4
  79. ENDC
  80. IFD DYN_ALLOC
  81. BASEPTR MACRO
  82. move.l \1,\2
  83. ENDM
  84. ELSE
  85. BASEPTR MACRO
  86. lea \1,\2
  87. ENDM
  88. ENDC
  89. ; constants we use, many of them adjustable:
  90. MAX_MATCH equ 258
  91. MIN_MATCH equ 3
  92. TOO_FAR equ 4096
  93. IFND WSIZE
  94. WSIZE equ 32768
  95. ENDC
  96. WMASK equ WSIZE-1
  97. MAX_DIST equ WSIZE-MAX_MATCH-MIN_MATCH-1
  98. MIN_LOOKAHEAD equ MAX_MATCH+MIN_MATCH+1
  99. ; IFD BIG_MEM ; NOT supported -- type Pos needs to be 32 bits
  100. ;HASH_BITS equ 15
  101. ; ELSE
  102. IFD SMALL_MEM
  103. HASH_BITS equ 13
  104. ELSE
  105. HASH_BITS equ 14 ; default -- MEDIUM_MEM
  106. ENDC
  107. ; ENDC ; BIG_MEM
  108. HASH_SIZE equ 1<<HASH_BITS
  109. HASH_MASK equ HASH_SIZE-1
  110. H_SHIFT equ (HASH_BITS+MIN_MATCH-1)/MIN_MATCH
  111. B_SLOW equ 1
  112. B_FAST equ 2
  113. ZE_MEM equ 4
  114. EOF equ -1
  115. ; struct config is defined by these offsets:
  116. Good_length equ 0
  117. Max_lazy equ 2
  118. Nice_length equ 4
  119. Max_chain equ 6
  120. Sizeof_config equ 8
  121. ; external functions we call:
  122. xref _ct_tally ; int ct_tally(int, int)
  123. xref _flush_block ; unsigned long F(char *, unsigned long, int)
  124. xref _ziperr ; void ziperr(int, char *)
  125. xref _error ; void error(char *)
  126. xref _calloc ; stdlib function: void *calloc(size_t, size_t)
  127. xref _free ; stdlib function: void free(void *)
  128. IFD DEBUG
  129. xref _fputc ; stdio function: int fputc(int, FILE *)
  130. xref _stderr ; pointer to FILE, which we pass to fputc
  131. ENDC
  132. ; our entry points:
  133. xdef _lm_init ; void lm_init(int level, unsigned short *flags)
  134. xdef _lm_free ; void lm_free(void)
  135. xdef _deflate ; void deflate(void) ...the big one
  136. xdef _fill_window ; this line is just for debugging
  137. ; ============================================================================
  138. ; Here is where we have our global variables.
  139. section deflatevars,data
  140. ; external global variables we reference:
  141. xref _verbose ; signed int
  142. xref _level ; signed int
  143. xref _read_buf ; int (*read_buf)(char *, unsigned int)
  144. ; global variables we make available:
  145. xdef _window
  146. xdef _prev
  147. xdef _head
  148. xdef _window_size
  149. xdef _block_start
  150. xdef _strstart
  151. IFD DYN_ALLOC
  152. _prev: ds.l 1 ; pointer to calloc()'d unsigned short array
  153. _head: ds.l 1 ; pointer to calloc()'d unsigned short array
  154. _window: ds.l 1 ; pointer to calloc()'d unsigned char array
  155. ELSE ; !DYN_ALLOC
  156. _prev: ds.w WSIZE ; array of unsigned short
  157. _head: ds.w HASH_SIZE ; array of unsigned short
  158. _window: ds.b 2*WSIZE ; array of unsigned char
  159. ENDC ; ?DYN_ALLOC
  160. _window_size: ds.l 1 ; unsigned long
  161. _block_start: ds.l 1 ; unsigned long
  162. _strstart: ds.w INTSIZE/2 ; unsigned int
  163. ; Now here are our private variables:
  164. IFD CPUTEST
  165. is020: ds.w 1 ; bool: CPU type is '020 or higher
  166. ENDC
  167. ins_h: ds.w 1 ; unsigned short
  168. sliding: ds.w 1 ; bool: the file is read a piece at a time
  169. eofile: ds.w 1 ; bool: we have read in the end of the file
  170. max_lazy_match: ds.w 1 ; unsigned short
  171. lookahead: ds.w 1 ; unsigned short
  172. ; These are NOT DECLARED AS STATIC in deflate.c, but currently could be:
  173. max_chain_len: ds.w 1 ; unsigned short (unsigned int in deflate.c)
  174. prev_length: ds.w 1 ; unsigned short (unsigned int in deflate.c)
  175. good_match: ds.w 1 ; unsigned short (unsigned int in deflate.c)
  176. nice_match: ds.w 1 ; unsigned short (signed int in deflate.c)
  177. match_start: ds.w 1 ; unsigned short (unsigned int in deflate.c)
  178. ; This array of struct config is a constant and could be in the code section:
  179. config_table: dc.w 0,0,0,0 ; level 0: store uncompressed
  180. dc.w 4,4,8,4 ; level 1: fastest, loosest compression
  181. dc.w 4,5,16,8 ; level 2
  182. dc.w 4,6,32,32 ; level 3: highest to use deflate_fast
  183. dc.w 4,4,16,16 ; level 4: lowest to use lazy matches
  184. dc.w 8,16,32,32 ; level 5
  185. dc.w 8,16,128,128 ; level 6: the default level
  186. dc.w 8,32,128,256 ; level 7
  187. dc.w 32,128,258,1024 ; level 8
  188. dc.w 32,258,258,4096 ; level 9: maximum compression, slow
  189. ;;CAL_SH MACRO ; macro for calling zcalloc()
  190. ;; IFD INT16
  191. ;; move.w #2,-(sp)
  192. ;; move.w #\1,-(sp)
  193. ;; jsr _zcalloc
  194. ;; addq #4,sp
  195. ;; ELSE
  196. ;; pea 2
  197. ;; pea \1
  198. ;; jsr _zcalloc
  199. ;; addq #8,sp
  200. ;; ENDC
  201. ;; ENDM
  202. CAL_SH MACRO ; Okay, we're back to using regular calloc()...
  203. pea 2
  204. pea \1
  205. jsr _calloc
  206. addq #8,sp
  207. ENDM
  208. ; ============================================================================
  209. ; And here we begin our functions. match_init is for internal use only:
  210. section deflate,code
  211. match_init:
  212. IFD CPUTEST ; now check for platform type
  213. IFD AMIGA ; Amiga specific test for '020 CPU:
  214. xref _SysBase
  215. NOLIST
  216. INCLUDE 'exec/execbase.i'
  217. LIST
  218. clr.w is020 ; default value is 68000
  219. move.l _SysBase,a0
  220. btst #AFB_68020,AttnFlags+1(a0)
  221. beq.s cheap
  222. move.w #1,is020
  223. cheap:
  224. ELSE ; !AMIGA
  225. FAIL Write an '020-detector for your system here!
  226. ; On the Macintosh, I believe GetEnvironment() provides the information.
  227. ENDC ; AMIGA
  228. ENDC ; CPUTEST
  229. rts ; match_init consists only of rts if CPUTEST unset
  230. ; ============================================================================
  231. ; Here is longest_match(), the function that the rest of this was built up
  232. ; from, the hottest hot spot in the program and therefore the most heavily
  233. ; optimized. It has two different versions, one for '020 and higher CPUs, and
  234. ; one for 68000/68010. It can test at runtime which version to use if you
  235. ; create a test function in match_init for your platform. Currently such a
  236. ; test is implemented for the Amiga. It can also be assembled to use '000 or
  237. ; '020 code only.
  238. Cur_Match equr d0 ; unsigned int, kept valid as long
  239. Best_Len equr d1 ; unsigned int, kept valid as long
  240. Scan_Start equr d3 ; pair of bytes
  241. Scan_End equr d4 ; pair of bytes
  242. Limit equr d5 ; unsigned int
  243. Chain_Length equr d6 ; unsigned int
  244. Scan_Test equr d7 ; counter, pair of bytes sometimes
  245. Scan equr a0 ; pointer to unsigned char
  246. Match equr a1 ; pointer to unsigned char
  247. Prev_Address equr a2 ; pointer to unsigned short
  248. Scan_Ini equr a3 ; pointer to unsigned char
  249. Match_Ini equr a5 ; pointer to unsigned char
  250. ; Note: "pair of bytes" means the two low order bytes of the register in
  251. ; 68020 code, but means the lowest and third lowest bytes on the 68000.
  252. IFD AMIGA
  253. SAVEREGS reg d3-d7/a2/a3/a5 ; don't protect d0/d1/a0/a1
  254. ELSE ; !AMIGA
  255. SAVEREGS reg d1/d3-d7/a0-a3/a5 ; protect all except d0 return val
  256. ENDC ; ?AMIGA
  257. ; d2, a4, a6 not used... on Amiga, a4 is used by small-data memory model
  258. longest_match:
  259. movem.l SAVEREGS,-(sp)
  260. ; setup steps common to byte and word versions:
  261. IFD INT16
  262. and.l #$0000FFFF,Cur_Match ; upper half must be zero!
  263. ; we use an and.l down here for the sake of ATSIGN/REGARGS.
  264. moveq #0,Limit ; so adding to Scan_Ini works
  265. ENDC
  266. move.w max_chain_len,Chain_Length
  267. move.w prev_length,Best_Len
  268. MOVINT _strstart,Limit
  269. BASEPTR _prev,Prev_Address
  270. BASEPTR _window,Match_Ini
  271. move.l Match_Ini,Scan_Ini
  272. addq #MIN_MATCH,Match_Ini ; optimizes inner loop
  273. add.l Limit,Scan_Ini
  274. sub.w #MAX_DIST,Limit
  275. bhi.s limit_ok
  276. moveq #0,Limit
  277. limit_ok:
  278. cmp.w good_match,Best_Len
  279. blo.s length_ok
  280. lsr.w #2,Chain_Length
  281. length_ok:
  282. subq.w #1,Chain_Length
  283. IFD CPUTEST
  284. tst.w is020 ; can we use '020 stuff today?
  285. bne WORD_match
  286. ENDC
  287. IFND CPU020
  288. ; for 68000 or 68010, use byte operations:
  289. moveq #0,Scan_Start ; clear 2nd & 4th bytes, use 1st & 3rd
  290. moveq #0,Scan_End ; likewise
  291. moveq #0,Scan_Test ; likewise
  292. move.b (Scan_Ini),Scan_Start
  293. swap Scan_Start ; swap is faster than 8 bit shift
  294. move.b 1(Scan_Ini),Scan_Start
  295. move.b -1(Scan_Ini,Best_Len.w),Scan_End
  296. swap Scan_End
  297. move.b 0(Scan_Ini,Best_Len.w),Scan_End
  298. bra.s bdo_scan
  299. blong_loop:
  300. move.b -1(Scan_Ini,Best_Len.w),Scan_End
  301. swap Scan_End
  302. move.b 0(Scan_Ini,Best_Len.w),Scan_End
  303. bshort_loop:
  304. add.w Cur_Match,Cur_Match ; assert value before doubling < 32K
  305. IFNE 32768-WSIZE
  306. and.w #(WMASK*2),Cur_Match
  307. ENDC
  308. move.w (Prev_Address,Cur_Match.l),Cur_Match
  309. cmp.w Limit,Cur_Match
  310. dbls Chain_Length,bdo_scan
  311. bra return
  312. bdo_scan:
  313. move.l Match_Ini,Match
  314. add.l Cur_Match,Match
  315. move.b -MIN_MATCH-1(Match,Best_Len.w),Scan_Test
  316. swap Scan_Test
  317. move.b -MIN_MATCH(Match,Best_Len.w),Scan_Test
  318. cmp.l Scan_Test,Scan_End
  319. bne.s bshort_loop
  320. move.b -MIN_MATCH(Match),Scan_Test
  321. swap Scan_Test
  322. move.b -MIN_MATCH+1(Match),Scan_Test
  323. cmp.l Scan_Test,Scan_Start
  324. bne.s bshort_loop
  325. move.w #(MAX_MATCH-3),Scan_Test
  326. lea MIN_MATCH(Scan_Ini),Scan ; offset optimizes inner loop
  327. bscan_loop:
  328. cmp.b (Match)+,(Scan)+
  329. dbne Scan_Test,bscan_loop
  330. subq #1,Scan
  331. sub.l Scan_Ini,Scan ; assert difference is 16 bits
  332. cmp.w Best_Len,Scan
  333. bls.s bshort_loop
  334. MOVINT Scan,Best_Len
  335. move.w Cur_Match,match_start
  336. cmp.w nice_match,Best_Len
  337. blo.s blong_loop
  338. IFD CPUTEST
  339. bra return
  340. ENDC
  341. ENDC ; !CPU020
  342. IFND CPU000
  343. MACHINE MC68020
  344. ; for 68020 or higher, use word operations even on odd addresses:
  345. WORD_match:
  346. move.w (Scan_Ini),Scan_Start
  347. move.w -1(Scan_Ini,Best_Len.w),Scan_End
  348. bra.s wdo_scan
  349. wlong_loop:
  350. move.w -1(Scan_Ini,Best_Len.w),Scan_End
  351. wshort_loop:
  352. and.w #WMASK,Cur_Match
  353. move.w (Prev_Address,Cur_Match.w*2),Cur_Match ; '020 addressing mode
  354. cmp.w Limit,Cur_Match
  355. dbls Chain_Length,wdo_scan
  356. bra.s return
  357. wdo_scan:
  358. move.l Match_Ini,Match
  359. add.l Cur_Match,Match
  360. cmp.w -MIN_MATCH-1(Match,Best_Len.w),Scan_End
  361. bne.s wshort_loop
  362. cmp.w -MIN_MATCH(Match),Scan_Start
  363. bne.s wshort_loop
  364. IFD QUADLONG
  365. ; By some measurements, this version of the code is a little tiny bit faster.
  366. ; But on some files it's slower. It probably pays off only when there are
  367. ; long match strings, and costs in the most common case of three-byte matches.
  368. moveq #((MAX_MATCH-MIN_MATCH)/16),Scan_Test ; value = 15
  369. lea MIN_MATCH(Scan_Ini),Scan ; offset optimizes inner loop
  370. wscan_loop:
  371. cmp.l (Match)+,(Scan)+ ; test four bytes at a time
  372. bne.s odd
  373. cmp.l (Match)+,(Scan)+
  374. bne.s odd
  375. cmp.l (Match)+,(Scan)+
  376. bne.s odd
  377. cmp.l (Match)+,(Scan)+
  378. dbne Scan_Test,wscan_loop ; '020 can cache a bigger loop
  379. odd:
  380. subq #4,Scan
  381. subq #4,Match
  382. cmp.b (Match)+,(Scan)+ ; find good bytes in bad longword
  383. bne.s even
  384. cmp.b (Match)+,(Scan)+
  385. bne.s even
  386. cmp.b (Match)+,(Scan)+
  387. beq.s steven
  388. even: subq #1,Scan
  389. ELSE ; !QUADLONG
  390. moveq #((MAX_MATCH-MIN_MATCH)/2),Scan_Test ; value = 127
  391. lea MIN_MATCH(Scan_Ini),Scan ; offset optimizes inner loop
  392. wscan_loop:
  393. cmp.w (Match)+,(Scan)+
  394. dbne Scan_Test,wscan_loop
  395. subq #2,Scan
  396. move.b -2(Match),Scan_Test
  397. cmp.b (Scan),Scan_Test
  398. bne.s steven
  399. addq #1,Scan
  400. ENDC ; ?QUADLONG
  401. steven:
  402. sub.l Scan_Ini,Scan ; assert: difference is 16 bits
  403. cmp.w Best_Len,Scan
  404. bls.s wshort_loop
  405. MOVINT Scan,Best_Len
  406. move.w Cur_Match,match_start
  407. cmp.w nice_match,Best_Len
  408. blo.s wlong_loop
  409. MACHINE MC68000
  410. ENDC ; !CPU000
  411. return:
  412. MOVINT Best_Len,d0 ; return value (upper half should be clear)
  413. movem.l (sp)+,SAVEREGS
  414. rts
  415. ; =============================================================================
  416. ; This is the deflate() function itself, our main entry point. It calls
  417. ; longest_match, above, and some outside functions. It is a hot spot, but not
  418. ; as hot as longest_match. It uses no special '020 code.
  419. ; ================== Several macros used in deflate() and later functions:
  420. ; Arg 1 is D-reg that new ins_h value is to be left in,
  421. ; arg 2 is the byte value to be hashed into it, which must not be the same reg
  422. UP_HASH MACRO
  423. move.w ins_h,\1
  424. asl.w #H_SHIFT,\1
  425. eor.b \2,\1
  426. and.w #HASH_MASK,\1 ; ((ins_h << H_SHIFT) ^ c) & HASH_MASK
  427. move.w \1,ins_h ; ins_h = that
  428. ENDM
  429. ; Arg 1 is scratch A, arg 2 is scratch D
  430. IN_STR MACRO
  431. move.l Strst,\2
  432. addq.w #MIN_MATCH-1,\2
  433. move.b (Window,\2.l),\2 ; window[strstart + MIN_MATCH - 1]
  434. UP_HASH Head,\2
  435. add.l Head,Head ; assert upper word is zero before add
  436. BASEPTR _head,\1
  437. add.l Head,\1
  438. move.w (\1),Head ; hash_head = head[ins_h]
  439. move.w Strst,(\1) ; head[ins_h] = strstart
  440. move.l Strst,\2
  441. IFNE WSIZE-32768
  442. and.w #WMASK,\2
  443. ENDC
  444. add.w \2,\2 ; masks implicitly when WSIZE == 32768
  445. move.w Head,(Prev,\2.l) ; prev[str_start & WMASK] = hash_head
  446. ENDM
  447. ; Arg 1 is bool (int) EOF flag, flush_block result is in d0, trashes d1/a0/a1
  448. FLUSH_B MACRO
  449. IFC '\1','#0'
  450. CLRINT -(sp)
  451. ELSE
  452. MOVINT \1,-(sp)
  453. ENDC
  454. move.l _block_start,d0
  455. blt.s nenu\@
  456. move.l Window,a0
  457. add.l d0,a0
  458. bra.s nun\@
  459. nenu\@: sub.l a0,a0 ; if block_start < 0, push NULL
  460. nun\@: sub.l Strst,d0
  461. neg.l d0
  462. move.l d0,-(sp)
  463. move.l a0,-(sp)
  464. jsr _flush_block
  465. lea 8+INTSIZE(sp),sp
  466. ENDM
  467. ; This expands to nothing unless DEBUG is defined.
  468. ; Arg 1 is a byte to be trace-outputted -- if it is d0 it must be a valid int
  469. TRACE_C MACRO
  470. IFD DEBUG
  471. cmp.w #1,_verbose+INTSIZE-2 ; test lower word only
  472. ble.s qui\@
  473. IFNC '\1','d0'
  474. moveq #0,d0
  475. move.b \1,d0
  476. ENDC
  477. move.l _stderr,-(sp)
  478. MOVINT d0,-(sp)
  479. jsr _fputc
  480. addq #4+INTSIZE,sp
  481. qui\@:
  482. ENDC ; DEBUG
  483. ENDM
  484. ; ================== Here are the register vars we use, and deflate() itself:
  485. Window equr a2 ; cached address of window[]
  486. Prev equr a3 ; cached address of prev[]
  487. Strst equr d7 ; strstart cached as a longword
  488. Look equr d6 ; lookahead cached as short
  489. Head equr d5 ; local variable hash_head, short
  490. PrevL equr d4 ; prev_length cached as short
  491. MatchL equr d3 ; local variable match_length, unsigned short
  492. Avail equr d2 ; local variable available_match, bool
  493. PrevM equr a5 ; local variable prev_match, int in an A-reg
  494. IFD AMIGA
  495. DEFREGS reg d2-d7/a2/a3/a5
  496. ELSE
  497. DEFREGS reg d0-d7/a0/a2/a3/a5 ; play it safe, preserve all regs
  498. ENDC
  499. _deflate: ; first, setup steps common to deflate and deflate_fast:
  500. movem.l DEFREGS,-(sp)
  501. IFD INT16
  502. moveq #0,Strst ; make sure strstart is valid as a long
  503. ENDC
  504. moveq #0,Head ; ditto for hash_head
  505. MOVINT _strstart,Strst
  506. move.w lookahead,Look
  507. move.w prev_length,PrevL
  508. BASEPTR _window,Window
  509. BASEPTR _prev,Prev
  510. MOVINT _level,d0
  511. cmp.w #3,d0
  512. ble deflate_fast
  513. moveq #MIN_MATCH-1,MatchL
  514. moveq #0,Avail
  515. look_loop:
  516. tst.w Look
  517. beq last_tally
  518. IN_STR a0,d0
  519. move.w MatchL,PrevL
  520. move.w match_start,PrevM
  521. move.w #MIN_MATCH-1,MatchL
  522. tst.w Head
  523. beq.s no_new_match
  524. cmp.w max_lazy_match,PrevL
  525. bhs.s no_new_match
  526. move.w Strst,d0
  527. sub.w Head,d0
  528. cmp.w #MAX_DIST,d0
  529. bhi.s no_new_match
  530. move.w PrevL,prev_length ; longest_match reads these variables
  531. MOVINT Strst,_strstart
  532. MOVINT Head,d0 ; parm for longest_match
  533. bsr longest_match ; sets match_start
  534. cmp.w Look,d0 ; does length exceed valid data?
  535. bls.s stml
  536. move.w Look,d0
  537. stml: move.w d0,MatchL ; valid length of match
  538. cmp.w #MIN_MATCH,MatchL ; is the match only three bytes?
  539. bne.s no_new_match
  540. move.w match_start,d0
  541. sub.w Strst,d0
  542. cmp.w #-TOO_FAR,d0
  543. bge.s no_new_match
  544. moveq #MIN_MATCH-1,MatchL ; mark the current match as no good
  545. no_new_match:
  546. cmp.w #MIN_MATCH,PrevL
  547. blo literal
  548. cmp.w MatchL,PrevL
  549. blo literal
  550. ; CHECK_MATCH Strst-1,PrevM,PrevL
  551. MOVINT Strst,_strstart ; ct_tally reads this variable
  552. move.l PrevL,d0
  553. subq.w #MIN_MATCH,d0
  554. MOVINT d0,-(sp)
  555. move.l Strst,d0
  556. sub.w PrevM,d0
  557. subq.w #1,d0
  558. MOVINT d0,-(sp)
  559. jsr _ct_tally ; sets d0 true if we have to flush
  560. addq #2*INTSIZE,sp
  561. subq.w #3,PrevL ; convert for dbra (prev_length - 2)
  562. sub.w PrevL,Look
  563. subq.w #2,Look
  564. insertmatch:
  565. addq.w #1,Strst
  566. IN_STR a0,d1 ; don't clobber d0
  567. dbra PrevL,insertmatch
  568. moveq #0,Avail
  569. moveq #0,PrevL ; not needed?
  570. moveq #MIN_MATCH-1,MatchL
  571. addq.w #1,Strst
  572. tst.w d0
  573. beq refill
  574. FLUSH_B #0
  575. move.l Strst,_block_start
  576. bra.s refill
  577. literal:
  578. tst.w Avail
  579. bne.s yeslit
  580. moveq #1,Avail
  581. bra.s skipliteral
  582. yeslit: TRACE_C <-1(Window,Strst.l)>
  583. MOVINT Strst,_strstart ; ct_tally reads this variable
  584. moveq #0,d0
  585. move.b -1(Window,Strst.l),d0
  586. MOVINT d0,-(sp)
  587. CLRINT -(sp)
  588. jsr _ct_tally
  589. addq #2*INTSIZE,sp
  590. tst.w d0
  591. beq.s skipliteral
  592. FLUSH_B #0
  593. move.l Strst,_block_start
  594. skipliteral:
  595. addq.w #1,Strst
  596. subq.w #1,Look
  597. refill:
  598. cmp.w #MIN_LOOKAHEAD,Look
  599. bhs look_loop
  600. bsr fill_window
  601. bra look_loop
  602. last_tally:
  603. tst.w Avail
  604. beq last_flush
  605. MOVINT Strst,_strstart ; ct_tally reads this variable
  606. moveq #0,d0
  607. move.b -1(Window,Strst.l),d0
  608. MOVINT d0,-(sp)
  609. CLRINT -(sp)
  610. jsr _ct_tally
  611. addq #2*INTSIZE,sp
  612. last_flush:
  613. FLUSH_B #1
  614. bra deflate_exit
  615. ; ================== This is another version used for low compression levels:
  616. deflate_fast:
  617. moveq #0,MatchL
  618. moveq #MIN_MATCH-1,PrevL
  619. flook_loop:
  620. tst.w Look
  621. beq flast_flush
  622. IN_STR a0,d0
  623. tst.w Head
  624. beq.s fno_new_match
  625. move.w Strst,d0
  626. sub.w Head,d0
  627. cmp.w #MAX_DIST,d0
  628. bhi.s fno_new_match
  629. move.w PrevL,prev_length ; longest_match reads these variables
  630. MOVINT Strst,_strstart
  631. MOVINT Head,d0 ; parm for longest_match
  632. bsr longest_match ; sets match_start
  633. cmp.w Look,d0 ; does length exceed valid data?
  634. bls.s fstml
  635. move.w Look,d0
  636. fstml: move.w d0,MatchL ; valid length of match
  637. fno_new_match:
  638. cmp.w #MIN_MATCH,MatchL
  639. blo fliteral
  640. ; CHECK_MATCH Strst,match_start,MatchL
  641. MOVINT Strst,_strstart ; ct_tally reads this variable
  642. move.l MatchL,d0
  643. subq.w #MIN_MATCH,d0
  644. MOVINT d0,-(sp)
  645. move.l Strst,d0
  646. sub.w match_start,d0
  647. MOVINT d0,-(sp)
  648. jsr _ct_tally ; sets d0 true if we have to flush
  649. addq #2*INTSIZE,sp
  650. sub.w MatchL,Look
  651. cmp.w max_lazy_match,MatchL
  652. bhi ftoolong
  653. subq.w #2,MatchL
  654. finsertmatch:
  655. addq.w #1,Strst
  656. IN_STR a0,d1 ; preserve d0
  657. dbra MatchL,finsertmatch
  658. moveq #0,MatchL ; not needed?
  659. addq.w #1,Strst
  660. bra.s flushfill
  661. ftoolong:
  662. add.w MatchL,Strst
  663. moveq #0,MatchL
  664. moveq #0,d1 ; preserve d0
  665. move.b (Window,Strst.l),d1
  666. move.w d1,ins_h
  667. ; My assembler objects to passing <1(Window,Strst.l)> directly to UP_HASH...
  668. move.b 1(Window,Strst.l),Avail ; Avail is not used in deflate_fast
  669. UP_HASH d1,Avail ; preserve d0
  670. IFNE MIN_MATCH-3
  671. FAIL needs to UP_HASH another MIN_MATCH-3 times, but with what arg?
  672. ENDC
  673. bra.s flushfill
  674. fliteral:
  675. TRACE_C <(Window,Strst.l)>
  676. MOVINT Strst,_strstart ; ct_tally reads this variable
  677. moveq #0,d0
  678. move.b (Window,Strst.l),d0
  679. MOVINT d0,-(sp)
  680. CLRINT -(sp)
  681. jsr _ct_tally ; d0 set if we need to flush
  682. addq #2*INTSIZE,sp
  683. addq.w #1,Strst
  684. subq.w #1,Look
  685. flushfill:
  686. tst.w d0
  687. beq.s frefill
  688. FLUSH_B #0
  689. move.l Strst,_block_start
  690. frefill:
  691. cmp.w #MIN_LOOKAHEAD,Look
  692. bhs flook_loop
  693. bsr fill_window
  694. bra flook_loop
  695. flast_flush:
  696. FLUSH_B #1 ; sets our return value
  697. deflate_exit:
  698. MOVINT Strst,_strstart ; save back cached values
  699. move.w PrevL,prev_length
  700. move.w Look,lookahead
  701. movem.l (sp)+,DEFREGS
  702. rts
  703. ; =========================================================================
  704. ; void fill_window(void) calls the input function to refill the sliding
  705. ; window that we use to find substring matches in.
  706. More equr Head ; local variable in fill_window
  707. WindTop equr Prev ; local variable used for sliding
  708. SlidIx equr PrevL ; local variable used for sliding
  709. IFD AMIGA
  710. FWREGS reg d2-d5/a2-a6 ; does NOT include Look and Strst
  711. ELSE
  712. FWREGS reg d1-d5/a0-a6 ; likewise
  713. ENDC
  714. ; all registers available to be clobbered by the sliding operation:
  715. ; we exclude More, WindTop, SlidIx, Look, Strst, Window, a4 and a7.
  716. SPAREGS reg d0-d3/a0-a1/a5-a6
  717. SPCOUNT equ 8 ; number of registers in SPAREGS
  718. _fill_window: ; C-callable entry point
  719. movem.l Strst/Look,-(sp)
  720. IFD INT16
  721. moveq #0,Strst ; Strst must be valid as a long
  722. ENDC
  723. MOVINT _strstart,Strst
  724. move.w lookahead,Look
  725. BASEPTR _window,Window
  726. bsr.s fill_window
  727. MOVINT Strst,_strstart
  728. move.w Look,lookahead
  729. movem.l (sp)+,Strst/Look
  730. rts
  731. ; strstart, lookahead, and window must be cached in Strst, Look, and Window:
  732. fill_window: ; asm-callable entry point
  733. movem.l FWREGS,-(sp)
  734. tst.w eofile ; we put this up here for speed
  735. bne fwdone
  736. and.l #$FFFF,Look ; make sure Look is valid as long
  737. fw_refill:
  738. move.l _window_size,More ; <= 64K
  739. sub.l Look,More
  740. sub.l Strst,More ; Strst is already valid as long
  741. cmp.w #EOF,More
  742. bne.s notboundary
  743. subq.w #1,More
  744. bra checkend
  745. notboundary:
  746. tst.w sliding
  747. beq checkend
  748. cmp.w #WSIZE+MAX_DIST,Strst
  749. blo checkend
  750. IFGT 32768-WSIZE
  751. lea WSIZE(Window),WindTop ; WindTop is aligned when Window is
  752. ELSE
  753. move.l Window,WindTop
  754. add.l #WSIZE,WindTop
  755. ENDC
  756. move.l Window,d0
  757. and.w #3,d0
  758. beq.s isaligned
  759. subq.w #1,d0
  760. align: move.b (WindTop)+,(Window)+ ; copy up to a longword boundary
  761. dbra d0,align
  762. isaligned:
  763. ; This is faster than a simple move.l (WindTop)+,(Window)+ / dbra loop:
  764. move.w #(WSIZE-1)/(4*SPCOUNT),SlidIx
  765. slide: movem.l (WindTop)+,SPAREGS ; copy, 32 bytes at a time!
  766. movem.l SPAREGS,(Window) ; a slight overshoot doesn't matter.
  767. lea 4*SPCOUNT(Window),Window ; can't use (aN)+ as movem.l dest
  768. dbra SlidIx,slide
  769. BASEPTR _window,Window ; restore cached value
  770. sub.w #WSIZE,match_start
  771. sub.w #WSIZE,Strst
  772. sub.l #WSIZE,_block_start
  773. add.w #WSIZE,More
  774. BASEPTR _head,a0
  775. move.w #HASH_SIZE-1,d0
  776. fixhead:
  777. move.w (a0),d1
  778. sub.w #WSIZE,d1
  779. bpl.s headok
  780. moveq #0,d1
  781. headok: move.w d1,(a0)+
  782. dbra d0,fixhead
  783. BASEPTR _prev,a0
  784. move.w #WSIZE-1,d0
  785. fixprev:
  786. move.w (a0),d1
  787. sub.w #WSIZE,d1
  788. bpl.s prevok
  789. moveq #0,d1
  790. prevok: move.w d1,(a0)+
  791. dbra d0,fixprev
  792. TRACE_C #'.'
  793. checkend: ; assert eofile is false
  794. MOVINT More,-(sp) ; assert More's upper word is zero
  795. move.l Strst,d0
  796. add.w Look,d0
  797. add.l Window,d0
  798. move.l d0,-(sp)
  799. move.l _read_buf,a0
  800. jsr (a0) ; refill the upper part of the window
  801. addq #4+INTSIZE,sp
  802. tst.w d0
  803. beq.s iseof
  804. cmp.w #EOF,d0
  805. beq.s iseof
  806. add.w d0,Look
  807. cmp.w #MIN_LOOKAHEAD,Look
  808. blo fw_refill ; eofile is still false
  809. bra.s fwdone
  810. iseof: move.w #1,eofile
  811. fwdone: movem.l (sp)+,FWREGS
  812. rts
  813. ; =========================================================================
  814. ; void lm_free(void) frees dynamic arrays in the DYN_ALLOC version.
  815. xdef _lm_free ; the entry point
  816. _lm_free:
  817. IFD DYN_ALLOC
  818. move.l _window,d0
  819. beq.s lf_no_window
  820. move.l d0,-(sp)
  821. jsr _free
  822. addq #4,sp
  823. clr.l _window
  824. lf_no_window:
  825. move.l _prev,d0
  826. beq.s lf_no_prev
  827. move.l d0,-(sp)
  828. jsr _free
  829. move.l _head,(sp) ; reuse the same stack arg slot
  830. jsr _free
  831. addq #4,sp
  832. clr.l _prev
  833. clr.l _head
  834. lf_no_prev:
  835. ENDC
  836. rts
  837. ; ============================================================================
  838. ; void lm_init(int pack_level, unsigned short *flags) allocates dynamic arrays
  839. ; if any, and initializes all variables so that deflate() is ready to go.
  840. xdef _lm_init ; the entry point
  841. Level equr d2
  842. ;Window equr a2 ; as in deflate()
  843. IFD AMIGA
  844. INIREGS reg d2/a2
  845. ELSE
  846. INIREGS reg d0-d2/a0-a1
  847. ENDC
  848. _lm_init:
  849. MOVINT 4(sp),d0
  850. move.l 4+INTSIZE(sp),a0
  851. movem.l INIREGS,-(sp)
  852. move.w d0,Level
  853. cmp.w #1,Level
  854. blt.s levelerr
  855. bgt.s try9
  856. bset.b #B_FAST,1(a0)
  857. try9: cmp.w #9,Level
  858. bgt.s levelerr
  859. blt.s levelok
  860. bset.b #B_SLOW,1(a0)
  861. bra.s levelok
  862. levelerr:
  863. pea level_message
  864. jsr _error ; never returns
  865. levelok:
  866. clr.w sliding
  867. tst.l _window_size
  868. bne.s gotawindowsize
  869. move.w #1,sliding
  870. move.l #2*WSIZE,_window_size
  871. gotawindowsize:
  872. BASEPTR _window,Window
  873. IFD DYN_ALLOC
  874. move.l Window,d0 ; fake tst.l
  875. bne.s gotsomewind
  876. CAL_SH WSIZE
  877. move.l d0,Window
  878. move.l d0,_window
  879. bne.s gotsomewind
  880. pea window_message
  881. MOVINT #ZE_MEM,-(sp)
  882. jsr _ziperr ; never returns
  883. gotsomewind:
  884. tst.l _prev
  885. bne.s gotsomehead
  886. CAL_SH WSIZE
  887. move.l d0,_prev
  888. beq.s nohead
  889. CAL_SH HASH_SIZE
  890. move.l d0,_head
  891. bne.s gotfreshhead ; newly calloc'd memory is zeroed
  892. nohead: pea hash_message
  893. MOVINT #ZE_MEM,-(sp)
  894. jsr _ziperr ; never returns
  895. gotsomehead:
  896. ENDC ; DYN_ALLOC
  897. move.w #(HASH_SIZE/2)-1,d0 ; two shortwords per loop
  898. BASEPTR _head,a0
  899. wipeh: clr.l (a0)+
  900. dbra d0,wipeh
  901. gotfreshhead:
  902. move.l Level,d0
  903. IFEQ Sizeof_config-8
  904. asl.l #3,d0
  905. ELSE
  906. mulu #Sizeof_config,d0
  907. ENDC
  908. lea config_table,a0
  909. add.l d0,a0
  910. move.w Max_lazy(a0),max_lazy_match
  911. move.w Good_length(a0),good_match
  912. move.w Nice_length(a0),nice_match
  913. move.w Max_chain(a0),max_chain_len
  914. CLRINT _strstart
  915. clr.l _block_start
  916. bsr match_init
  917. clr.w eofile
  918. MOVINT #WSIZE,-(sp) ; We read only 32K because lookahead is short
  919. move.l Window,-(sp) ; even when int size is long, as if deflate.c
  920. move.l _read_buf,a0 ; were compiled with MAXSEG_64K defined.
  921. jsr (a0)
  922. addq #4+INTSIZE,sp
  923. move.w d0,lookahead
  924. beq.s noread
  925. cmp.w #EOF,d0
  926. bne.s irefill
  927. noread: move.w #1,eofile
  928. clr.w lookahead
  929. bra.s init_done
  930. irefill:
  931. move.w lookahead,d0
  932. cmp.w #MIN_LOOKAHEAD,d0
  933. bhs.s hashify
  934. bsr _fill_window ; use the C-callable version
  935. hashify:
  936. clr.w ins_h
  937. moveq #MIN_MATCH-2,d0
  938. hash1: move.b (Window)+,d1
  939. UP_HASH Level,d1
  940. dbra d0,hash1
  941. init_done:
  942. movem.l (sp)+,INIREGS
  943. rts
  944. ; strings for error messages:
  945. hash_message dc.b 'hash table allocation',0
  946. window_message dc.b 'window allocation',0
  947. level_message dc.b 'bad pack level',0
  948. end