deflate.s 35 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076
  1. ;===========================================================================
  2. ; Copyright (c) 1990-1999 Info-ZIP. All rights reserved.
  3. ;
  4. ; See the accompanying file LICENSE, version 1999-Oct-05 or later
  5. ; (the contents of which are also included in zip.h) for terms of use.
  6. ; If, for some reason, both of these files are missing, the Info-ZIP license
  7. ; also may be found at: ftp://ftp.cdrom.com/pub/infozip/license.html
  8. ;===========================================================================
  9. ; This is a 680x0 assembly language translation of the Info-ZIP source file
  10. ; deflate.c, by Paul Kienitz. No rights reserved. The function longest_match
  11. ; is based in part on match.a by Carsten Steger, which in turn is partly based
  12. ; on match.s for 386 by Jean-loup Gailly and Kai Uwe Rommel. Mostly, however,
  13. ; this material is based on deflate.c, by Gailly, Rommel, and Igor Mandrichenko.
  14. ; This code is not commented very much; see deflate.c for comments that explain
  15. ; what the functions are doing.
  16. ;
  17. ; The symbols that can be used to select different versions are as follows:
  18. ;
  19. ; CPU020 if defined, use 68020 instructions always.
  20. ;
  21. ; CPUTEST if defined, check at runtime for CPU type. Another symbol
  22. ; specifying the platform-specific test must be used with this.
  23. ; If neither of these is defined, use 68000 instructions only.
  24. ; Runtime test is nonportable; it is different for each OS.
  25. ;
  26. ; AMIGA use Amiga-specific test for 68020, if CPUTEST defined. Also
  27. ; tells it that registers d0/a0/d1/a1 are not preserved by
  28. ; function calls. At present, if AMIGA is not defined, it
  29. ; causes functions to preserve all registers. ALL OF THIS CODE
  30. ; CURRENTLY ASSUMES THAT REGISTERS D2-D7/A2-A6 WILL BE PRESERVED
  31. ; BY ANY FUNCTIONS THAT IT CALLS.
  32. ;
  33. ; DYN_ALLOC should be defined here if it is defined for C source; tells us
  34. ; that big arrays are allocated instead of static.
  35. ;
  36. ; WSIZE must be defined as the same number used for WSIZE in the C
  37. ; source, and must be a power of two <= 32768. As elsewhere,
  38. ; the default value is 32768.
  39. ;
  40. ; INT16 define this if ints are 16 bits; otherwise 32 bit ints assumed.
  41. ;
  42. ; SMALL_MEM define this if it is defined in the C source; otherwise it uses
  43. ; the MEDIUM_MEM model. BIG_MEM and MMAP are *not* supported.
  44. ; The FULL_SEARCH option in deflate.c is also not supported.
  45. ;
  46. ; DEBUG activates some tracing output, as in the C source.
  47. ;
  48. ; QUADLONG this selects a different version of the innermost longest_match
  49. ; loop code for 68020 operations, comparing bytes four at a time
  50. ; instead of two at a time. It seems to be a tiny bit faster on
  51. ; average, but it's slower often enough that one can't generalize.
  52. ;
  53. ; This code currently assumes that function results are returned in D0 for
  54. ; all platforms. It assumes that args to functions are pushed onto the stack,
  55. ; last arg first. It also currently assumes that all C symbols have an
  56. ; underscore prepended when referenced from assembly.
  57. ;
  58. ; 1999/09/23: for Human68k: Modified by Shimazaki Ryo.
  59. IFNDEF CPU020
  60. IFNDEF CPUTEST
  61. CPU000 equ 1
  62. ENDC
  63. ENDC
  64. ; Use these macros for accessing variables of type int:
  65. IFDEF INT16
  66. MOVINT MACRO _1,_2
  67. move.w _1,_2
  68. ENDM
  69. CLRINT MACRO _1
  70. clr.w _1
  71. ENDM
  72. INTSIZE equ 2
  73. ELSE ; !INT16
  74. MOVINT MACRO _1,_2
  75. move.l _1,_2
  76. ENDM
  77. CLRINT MACRO _1
  78. clr.l _1
  79. ENDM
  80. INTSIZE equ 4
  81. ENDC
  82. IFDEF DYN_ALLOC
  83. BASEPTR MACRO _1,_2
  84. move.l _1,_2
  85. ENDM
  86. ELSE
  87. BASEPTR MACRO _1,_2
  88. lea _1,_2
  89. ENDM
  90. ENDC
  91. ; constants we use, many of them adjustable:
  92. MAX_MATCH equ 258
  93. MIN_MATCH equ 3
  94. TOO_FAR equ 4096
  95. IFNDEF WSIZE
  96. WSIZE equ 32768
  97. ENDC
  98. WMASK equ WSIZE-1
  99. MAX_DIST equ WSIZE-MAX_MATCH-MIN_MATCH-1
  100. MIN_LOOKAHEAD equ MAX_MATCH+MIN_MATCH+1
  101. ; IFD BIG_MEM ; NOT supported -- type Pos needs to be 32 bits
  102. ;HASH_BITS equ 15
  103. ; ELSE
  104. IFDEF SMALL_MEM
  105. HASH_BITS equ 13
  106. ELSE
  107. HASH_BITS equ 14 ; default -- MEDIUM_MEM
  108. ENDC
  109. ; ENDC ; BIG_MEM
  110. HASH_SIZE equ 1<<HASH_BITS
  111. HASH_MASK equ HASH_SIZE-1
  112. H_SHIFT equ (HASH_BITS+MIN_MATCH-1)/MIN_MATCH
  113. B_SLOW equ 1
  114. B_FAST equ 2
  115. ZE_MEM equ 4
  116. EOF equ -1
  117. ; struct config is defined by these offsets:
  118. Good_length equ 0
  119. Max_lazy equ 2
  120. Nice_length equ 4
  121. Max_chain equ 6
  122. Sizeof_config equ 8
  123. ; external functions we call:
  124. xref _ct_tally ; int ct_tally(int, int)
  125. xref _flush_block ; unsigned long F(char *, unsigned long, int)
  126. xref _ziperr ; void ziperr(int, char *)
  127. xref _error ; void error(char *)
  128. xref _calloc ; stdlib function: void *calloc(size_t, size_t)
  129. xref _free ; stdlib function: void free(void *)
  130. IFDEF DEBUG
  131. xref _fputc ; stdio function: int fputc(int, FILE *)
  132. xref _stderr ; pointer to FILE, which we pass to fputc
  133. ENDC
  134. ; our entry points:
  135. xdef _lm_init ; void lm_init(int level, unsigned short *flags)
  136. xdef _lm_free ; void lm_free(void)
  137. xdef _deflate ; void deflate(void) ...the big one
  138. xdef _fill_window ; this line is just for debugging
  139. ; ============================================================================
  140. ; Here is where we have our global variables.
  141. ;;; section deflatevars,data
  142. ; external global variables we reference:
  143. xref _verbose ; signed int
  144. xref _level ; signed int
  145. xref _read_buf ; int (*read_buf)(char *, unsigned int)
  146. ; global variables we make available:
  147. xdef _window
  148. xdef _prev
  149. xdef _head
  150. xdef _window_size
  151. xdef _block_start
  152. xdef _strstart
  153. bss
  154. quad
  155. IFDEF DYN_ALLOC
  156. _prev: ds.l 1 ; pointer to calloc()'d unsigned short array
  157. _head: ds.l 1 ; pointer to calloc()'d unsigned short array
  158. _window: ds.l 1 ; pointer to calloc()'d unsigned char array
  159. ELSE ; !DYN_ALLOC
  160. _prev: ds.w WSIZE ; array of unsigned short
  161. _head: ds.w HASH_SIZE ; array of unsigned short
  162. _window: ds.b 2*WSIZE ; array of unsigned char
  163. ENDC ; ?DYN_ALLOC
  164. text
  165. quad
  166. _window_size: ds.l 1 ; unsigned long
  167. _block_start: ds.l 1 ; unsigned long
  168. _strstart: ds.w INTSIZE/2 ; unsigned int
  169. ; Now here are our private variables:
  170. IFDEF CPUTEST
  171. is020: ds.w 1 ; bool: CPU type is '020 or higher
  172. ENDC
  173. ins_h: ds.w 1 ; unsigned short
  174. sliding: ds.w 1 ; bool: the file is read a piece at a time
  175. eofile: ds.w 1 ; bool: we have read in the end of the file
  176. max_lazy_match: ds.w 1 ; unsigned short
  177. lookahead: ds.w 1 ; unsigned short
  178. ; These are NOT DECLARED AS STATIC in deflate.c, but currently could be:
  179. max_chain_len: ds.w 1 ; unsigned short (unsigned int in deflate.c)
  180. prev_length: ds.w 1 ; unsigned short (unsigned int in deflate.c)
  181. good_match: ds.w 1 ; unsigned short (unsigned int in deflate.c)
  182. nice_match: ds.w 1 ; unsigned short (signed int in deflate.c)
  183. match_start: ds.w 1 ; unsigned short (unsigned int in deflate.c)
  184. ; This array of struct config is a constant and could be in the code section:
  185. config_table: dc.w 0,0,0,0 ; level 0: store uncompressed
  186. dc.w 4,4,8,4 ; level 1: fastest, loosest compression
  187. dc.w 4,5,16,8 ; level 2
  188. dc.w 4,6,32,32 ; level 3: highest to use deflate_fast
  189. dc.w 4,4,16,16 ; level 4: lowest to use lazy matches
  190. dc.w 8,16,32,32 ; level 5
  191. dc.w 8,16,128,128 ; level 6: the default level
  192. dc.w 8,32,128,256 ; level 7
  193. dc.w 32,128,258,1024 ; level 8
  194. dc.w 32,258,258,4096 ; level 9: maximum compression, slow
  195. ;;CAL_SH MACRO ; macro for calling zcalloc()
  196. ;; IFD INT16
  197. ;; move.w #2,-(sp)
  198. ;; move.w #\1,-(sp)
  199. ;; jsr _zcalloc
  200. ;; addq #4,sp
  201. ;; ELSE
  202. ;; pea 2
  203. ;; pea \1
  204. ;; jsr _zcalloc
  205. ;; addq #8,sp
  206. ;; ENDC
  207. ;; ENDM
  208. CAL_SH MACRO _1 ; Okay, we're back to using regular calloc()...
  209. movem.l d2/a2,-(sp)
  210. pea 2
  211. pea _1
  212. jsr _calloc
  213. addq #8,sp
  214. movem.l (sp)+,d2/a2
  215. ENDM
  216. ; ============================================================================
  217. ; And here we begin our functions. match_init is for internal use only:
  218. ;; section deflate,code
  219. match_init:
  220. IFDEF CPUTEST ; now check for platform type
  221. IFDEF AMIGA ; Amiga specific test for '020 CPU:
  222. xref _SysBase
  223. NOLIST
  224. INCLUDE 'exec/execbase.i'
  225. LIST
  226. clr.w is020 ; default value is 68000
  227. move.l _SysBase,a0
  228. btst #AFB_68020,AttnFlags+1(a0)
  229. beq.s cheap
  230. move.w #1,is020
  231. cheap:
  232. ELSE ; !AMIGA
  233. FAIL Write an '020-detector for your system here!
  234. ; On the Macintosh, I believe GetEnvironment() provides the information.
  235. ENDC ; AMIGA
  236. ENDC ; CPUTEST
  237. rts ; match_init consists only of rts if CPUTEST unset
  238. ; ============================================================================
  239. ; Here is longest_match(), the function that the rest of this was built up
  240. ; from, the hottest hot spot in the program and therefore the most heavily
  241. ; optimized. It has two different versions, one for '020 and higher CPUs, and
  242. ; one for 68000/68010. It can test at runtime which version to use if you
  243. ; create a test function in match_init for your platform. Currently such a
  244. ; test is implemented for the Amiga. It can also be assembled to use '000 or
  245. ; '020 code only.
  246. Cur_Match reg d0 ; unsigned int, kept valid as long
  247. Best_Len reg d1 ; unsigned int, kept valid as long
  248. Scan_Start reg d3 ; pair of bytes
  249. Scan_End reg d4 ; pair of bytes
  250. Limit reg d5 ; unsigned int
  251. Chain_Length reg d6 ; unsigned int
  252. Scan_Test reg d7 ; counter, pair of bytes sometimes
  253. Scan reg a0 ; pointer to unsigned char
  254. Match reg a1 ; pointer to unsigned char
  255. Prev_Address reg a2 ; pointer to unsigned short
  256. Scan_Ini reg a3 ; pointer to unsigned char
  257. Match_Ini reg a5 ; pointer to unsigned char
  258. ; Note: "pair of bytes" means the two low order bytes of the register in
  259. ; 68020 code, but means the lowest and third lowest bytes on the 68000.
  260. SAVEREGS reg d3-d7/a2/a3/a5 ; don't protect d0/d1/a0/a1
  261. ; d2, a4, a6 not used... on Amiga, a4 is used by small-data memory model
  262. longest_match:
  263. movem.l SAVEREGS,-(sp)
  264. ; setup steps common to byte and word versions:
  265. IFDEF INT16
  266. and.l #$0000FFFF,Cur_Match ; upper half must be zero!
  267. ; we use an and.l down here for the sake of ATSIGN/REGARGS.
  268. moveq #0,Limit ; so adding to Scan_Ini works
  269. ENDC
  270. move.w (max_chain_len,pc),Chain_Length
  271. move.w (prev_length,pc),Best_Len
  272. MOVINT (_strstart,pc),Limit
  273. BASEPTR _prev,Prev_Address
  274. BASEPTR _window,Match_Ini
  275. move.l Match_Ini,Scan_Ini
  276. addq #MIN_MATCH,Match_Ini ; optimizes inner loop
  277. add.l Limit,Scan_Ini
  278. sub.w #MAX_DIST,Limit
  279. bhi.s limit_ok
  280. moveq #0,Limit
  281. limit_ok:
  282. cmp.w (good_match,pc),Best_Len
  283. blo.s length_ok
  284. lsr.w #2,Chain_Length
  285. length_ok:
  286. subq.w #1,Chain_Length
  287. IFDEF CPUTEST
  288. tst.w is020 ; can we use '020 stuff today?
  289. bne WORD_match
  290. ENDC
  291. IFNDEF CPU020
  292. ; for 68000 or 68010, use byte operations:
  293. moveq #0,Scan_Start ; clear 2nd & 4th bytes, use 1st & 3rd
  294. moveq #0,Scan_End ; likewise
  295. moveq #0,Scan_Test ; likewise
  296. move.b (Scan_Ini),Scan_Start
  297. swap Scan_Start ; swap is faster than 8 bit shift
  298. move.b 1(Scan_Ini),Scan_Start
  299. move.b -1(Scan_Ini,Best_Len.w),Scan_End
  300. swap Scan_End
  301. move.b 0(Scan_Ini,Best_Len.w),Scan_End
  302. bra.s bdo_scan
  303. blong_loop:
  304. move.b -1(Scan_Ini,Best_Len.w),Scan_End
  305. swap Scan_End
  306. move.b 0(Scan_Ini,Best_Len.w),Scan_End
  307. bshort_loop:
  308. add.w Cur_Match,Cur_Match ; assert value before doubling < 32K
  309. IFNE 32768-WSIZE
  310. and.w #(WMASK*2),Cur_Match
  311. ENDC
  312. move.w (Prev_Address,Cur_Match.l),Cur_Match
  313. cmp.w Limit,Cur_Match
  314. dbls Chain_Length,bdo_scan
  315. bra return
  316. bdo_scan:
  317. move.l Match_Ini,Match
  318. add.l Cur_Match,Match
  319. move.b -MIN_MATCH-1(Match,Best_Len.w),Scan_Test
  320. swap Scan_Test
  321. move.b -MIN_MATCH(Match,Best_Len.w),Scan_Test
  322. cmp.l Scan_Test,Scan_End
  323. bne.s bshort_loop
  324. move.b -MIN_MATCH(Match),Scan_Test
  325. swap Scan_Test
  326. move.b -MIN_MATCH+1(Match),Scan_Test
  327. cmp.l Scan_Test,Scan_Start
  328. bne.s bshort_loop
  329. move.w #(MAX_MATCH-3),Scan_Test
  330. lea MIN_MATCH(Scan_Ini),Scan ; offset optimizes inner loop
  331. bscan_loop:
  332. cmp.b (Match)+,(Scan)+
  333. dbne Scan_Test,bscan_loop
  334. subq #1,Scan
  335. sub.l Scan_Ini,Scan ; assert difference is 16 bits
  336. cmp.w Best_Len,Scan
  337. bls.s bshort_loop
  338. MOVINT Scan,Best_Len
  339. move.w Cur_Match,match_start
  340. cmp.w (nice_match,pc),Best_Len
  341. blo.s blong_loop
  342. IFDEF CPUTEST
  343. bra return
  344. ENDC
  345. ENDC ; !CPU020
  346. IFNDEF CPU000
  347. ;;; MACHINE MC68020
  348. ; for 68020 or higher, use word operations even on odd addresses:
  349. WORD_match:
  350. move.w (Scan_Ini),Scan_Start
  351. move.w -1(Scan_Ini,Best_Len.w),Scan_End
  352. bra.s wdo_scan
  353. wlong_loop:
  354. move.w -1(Scan_Ini,Best_Len.w),Scan_End
  355. wshort_loop:
  356. and.w #WMASK,Cur_Match
  357. move.w (Prev_Address,Cur_Match.w*2),Cur_Match ; '020 addressing mode
  358. cmp.w Limit,Cur_Match
  359. dbls Chain_Length,wdo_scan
  360. bra.s return
  361. wdo_scan:
  362. move.l Match_Ini,Match
  363. add.l Cur_Match,Match
  364. cmp.w -MIN_MATCH-1(Match,Best_Len.w),Scan_End
  365. bne.s wshort_loop
  366. cmp.w -MIN_MATCH(Match),Scan_Start
  367. bne.s wshort_loop
  368. IFDEF QUADLONG
  369. ; By some measurements, this version of the code is a little tiny bit faster.
  370. ; But on some files it's slower. It probably pays off only when there are
  371. ; long match strings, and costs in the most common case of three-byte matches.
  372. moveq #((MAX_MATCH-MIN_MATCH)/16),Scan_Test ; value = 15
  373. lea MIN_MATCH(Scan_Ini),Scan ; offset optimizes inner loop
  374. wscan_loop:
  375. cmp.l (Match)+,(Scan)+ ; test four bytes at a time
  376. bne.s odd
  377. cmp.l (Match)+,(Scan)+
  378. bne.s odd
  379. cmp.l (Match)+,(Scan)+
  380. bne.s odd
  381. cmp.l (Match)+,(Scan)+
  382. dbne Scan_Test,wscan_loop ; '020 can cache a bigger loop
  383. odd:
  384. subq #4,Scan
  385. subq #4,Match
  386. cmp.b (Match)+,(Scan)+ ; find good bytes in bad longword
  387. bne.s even
  388. cmp.b (Match)+,(Scan)+
  389. bne.s even
  390. cmp.b (Match)+,(Scan)+
  391. beq.s steven
  392. even: subq #1,Scan
  393. ELSE ; !QUADLONG
  394. moveq #((MAX_MATCH-MIN_MATCH)/2),Scan_Test ; value = 127
  395. lea MIN_MATCH(Scan_Ini),Scan ; offset optimizes inner loop
  396. wscan_loop:
  397. cmp.w (Match)+,(Scan)+
  398. dbne Scan_Test,wscan_loop
  399. subq #2,Scan
  400. move.b -2(Match),Scan_Test
  401. cmp.b (Scan),Scan_Test
  402. bne.s steven
  403. addq #1,Scan
  404. ENDC ; ?QUADLONG
  405. steven:
  406. sub.l Scan_Ini,Scan ; assert: difference is 16 bits
  407. cmp.w Best_Len,Scan
  408. bls.s wshort_loop
  409. MOVINT Scan,Best_Len
  410. move.w Cur_Match,match_start
  411. cmp.w (nice_match,pc),Best_Len
  412. blo.s wlong_loop
  413. ;;; MACHINE MC68000
  414. ENDC ; !CPU000
  415. return:
  416. MOVINT Best_Len,d0 ; return value (upper half should be clear)
  417. movem.l (sp)+,SAVEREGS
  418. rts
  419. ; =============================================================================
  420. ; This is the deflate() function itself, our main entry point. It calls
  421. ; longest_match, above, and some outside functions. It is a hot spot, but not
  422. ; as hot as longest_match. It uses no special '020 code.
  423. ; ================== Several macros used in deflate() and later functions:
  424. ; Arg 1 is D-reg that new ins_h value is to be left in,
  425. ; arg 2 is the byte value to be hashed into it, which must not be the same reg
  426. UP_HASH MACRO _1,_2
  427. move.w (ins_h,pc),_1
  428. asl.w #H_SHIFT,_1
  429. eor.b _2,_1
  430. and.w #HASH_MASK,_1 ; ((ins_h << H_SHIFT) ^ c) & HASH_MASK
  431. move.w _1,ins_h ; ins_h = that
  432. ENDM
  433. ; Arg 1 is scratch A, arg 2 is scratch D
  434. IN_STR MACRO _1,_2
  435. move.l Strst,_2
  436. addq.w #MIN_MATCH-1,_2
  437. move.b (Window,_2.l),_2 ; window[strstart + MIN_MATCH - 1]
  438. UP_HASH Head,_2
  439. add.l Head,Head ; assert upper word is zero before add
  440. BASEPTR _head,_1
  441. add.l Head,_1
  442. move.w (_1),Head ; hash_head = head[ins_h]
  443. move.w Strst,(_1) ; head[ins_h] = strstart
  444. move.l Strst,_2
  445. IFNE WSIZE-32768
  446. and.w #WMASK,_2
  447. ENDC
  448. add.w _2,_2 ; masks implicitly when WSIZE == 32768
  449. move.w Head,(Prev,_2.l) ; prev[str_start & WMASK] = hash_head
  450. ENDM
  451. ; Arg 1 is bool (int) EOF flag, flush_block result is in d0, trashes d1/a0/a1
  452. FLUSH_B MACRO _1
  453. local nenu,nun
  454. movem.l d2/a2,-(sp)
  455. IF _1==0
  456. CLRINT -(sp)
  457. ELSEIF (INTSIZE==4).and.(_1<$8000)
  458. pea (_1).w
  459. ELSE
  460. MOVINT #_1,-(sp)
  461. ENDC
  462. move.l (_block_start,pc),d0
  463. blt.s nenu
  464. move.l Window,a0
  465. add.l d0,a0
  466. bra.s nun
  467. nenu: sub.l a0,a0 ; if block_start < 0, push NULL
  468. nun: sub.l Strst,d0
  469. neg.l d0
  470. move.l d0,-(sp)
  471. move.l a0,-(sp)
  472. jsr _flush_block
  473. lea 8+INTSIZE(sp),sp
  474. movem.l (sp)+,d2/a2
  475. ENDM
  476. ; This expands to nothing unless DEBUG is defined.
  477. ; Arg 1 is a byte to be trace-outputted -- if it is d0 it must be a valid int
  478. TRACE_C MACRO _1
  479. local qui
  480. IFDEF DEBUG
  481. cmp.w #1,_verbose+INTSIZE-2 ; test lower word only
  482. ble.s qui
  483. moveq #0,d0
  484. move.b ea,d0
  485. movem.l d2/a2,-(sp)
  486. move.l _stderr,-(sp)
  487. MOVINT d0,-(sp)
  488. jsr _fputc
  489. addq.l #4+INTSIZE,sp
  490. movem.l (sp)+,d2/a2
  491. qui:
  492. ENDC ; DEBUG
  493. ENDM
  494. ; ================== Here are the register vars we use, and deflate() itself:
  495. Window reg a2 ; cached address of window[]
  496. Prev reg a3 ; cached address of prev[]
  497. Strst reg d7 ; strstart cached as a longword
  498. Look reg d6 ; lookahead cached as short
  499. Head reg d5 ; local variable hash_head, short
  500. PrevL reg d4 ; prev_length cached as short
  501. MatchL reg d3 ; local variable match_length, unsigned short
  502. Avail reg d2 ; local variable available_match, bool
  503. PrevM reg a5 ; local variable prev_match, int in an A-reg
  504. DEFREGS reg d3-d7/a3/a5
  505. _deflate: ; first, setup steps common to deflate and deflate_fast:
  506. movem.l DEFREGS,-(sp)
  507. IFDEF INT16
  508. moveq #0,Strst ; make sure strstart is valid as a long
  509. ENDC
  510. moveq #0,Head ; ditto for hash_head
  511. MOVINT (_strstart,pc),Strst
  512. move.w (lookahead,pc),Look
  513. move.w (prev_length,pc),PrevL
  514. BASEPTR _window,Window
  515. BASEPTR _prev,Prev
  516. MOVINT _level,d0
  517. cmp.w #3,d0
  518. ble deflate_fast
  519. moveq #MIN_MATCH-1,MatchL
  520. moveq #0,Avail
  521. look_loop:
  522. tst.w Look
  523. beq last_tally
  524. IN_STR a0,d0
  525. move.w MatchL,PrevL
  526. move.w (match_start,pc),PrevM
  527. move.w #MIN_MATCH-1,MatchL
  528. tst.w Head
  529. beq.s no_new_match
  530. cmp.w (max_lazy_match,pc),PrevL
  531. bhs.s no_new_match
  532. move.w Strst,d0
  533. sub.w Head,d0
  534. cmp.w #MAX_DIST,d0
  535. bhi.s no_new_match
  536. move.w PrevL,prev_length ; longest_match reads these variables
  537. MOVINT Strst,_strstart
  538. MOVINT Head,d0 ; parm for longest_match
  539. bsr longest_match ; sets match_start
  540. cmp.w Look,d0 ; does length exceed valid data?
  541. bls.s stml
  542. move.w Look,d0
  543. stml: move.w d0,MatchL ; valid length of match
  544. cmp.w #MIN_MATCH,MatchL ; is the match only three bytes?
  545. bne.s no_new_match
  546. move.w (match_start,pc),d0
  547. sub.w Strst,d0
  548. cmp.w #-TOO_FAR,d0
  549. bge.s no_new_match
  550. moveq #MIN_MATCH-1,MatchL ; mark the current match as no good
  551. no_new_match:
  552. cmp.w #MIN_MATCH,PrevL
  553. blo literal
  554. cmp.w MatchL,PrevL
  555. blo literal
  556. ; CHECK_MATCH Strst-1,PrevM,PrevL
  557. MOVINT Strst,_strstart ; ct_tally reads this variable
  558. move.l PrevL,d0
  559. subq.w #MIN_MATCH,d0
  560. movem.l d2/a2,-(sp)
  561. MOVINT d0,-(sp)
  562. move.l Strst,d0
  563. sub.w PrevM,d0
  564. subq.w #1,d0
  565. MOVINT d0,-(sp)
  566. jsr _ct_tally ; sets d0 true if we have to flush
  567. addq #2*INTSIZE,sp
  568. movem.l (sp)+,d2/a2
  569. subq.w #3,PrevL ; convert for dbra (prev_length - 2)
  570. sub.w PrevL,Look
  571. subq.w #2,Look
  572. insertmatch:
  573. addq.w #1,Strst
  574. IN_STR a0,d1 ; don't clobber d0
  575. dbra PrevL,insertmatch
  576. moveq #0,Avail
  577. moveq #0,PrevL ; not needed?
  578. moveq #MIN_MATCH-1,MatchL
  579. addq.w #1,Strst
  580. tst.w d0
  581. beq refill
  582. FLUSH_B 0
  583. move.l Strst,_block_start
  584. bra.s refill
  585. literal:
  586. tst.w Avail
  587. bne.s yeslit
  588. moveq #1,Avail
  589. bra.s skipliteral
  590. yeslit: TRACE_C <-1(Window,Strst.l)>
  591. MOVINT Strst,_strstart ; ct_tally reads this variable
  592. moveq #0,d0
  593. move.b -1(Window,Strst.l),d0
  594. movem.l d2/a2,-(sp)
  595. MOVINT d0,-(sp)
  596. CLRINT -(sp)
  597. jsr _ct_tally
  598. addq #2*INTSIZE,sp
  599. movem.l (sp)+,d2/a2
  600. tst.w d0
  601. beq.s skipliteral
  602. FLUSH_B 0
  603. move.l Strst,_block_start
  604. skipliteral:
  605. addq.w #1,Strst
  606. subq.w #1,Look
  607. refill:
  608. cmp.w #MIN_LOOKAHEAD,Look
  609. bhs look_loop
  610. bsr fill_window
  611. bra look_loop
  612. last_tally:
  613. tst.w Avail
  614. beq last_flush
  615. MOVINT Strst,_strstart ; ct_tally reads this variable
  616. moveq #0,d0
  617. move.b -1(Window,Strst.l),d0
  618. movem.l d2/a2,-(sp)
  619. MOVINT d0,-(sp)
  620. CLRINT -(sp)
  621. jsr _ct_tally
  622. addq #2*INTSIZE,sp
  623. movem.l (sp)+,d2/a2
  624. last_flush:
  625. FLUSH_B 1
  626. bra deflate_exit
  627. ; ================== This is another version used for low compression levels:
  628. deflate_fast:
  629. moveq #0,MatchL
  630. moveq #MIN_MATCH-1,PrevL
  631. flook_loop:
  632. tst.w Look
  633. beq flast_flush
  634. IN_STR a0,d0
  635. tst.w Head
  636. beq.s fno_new_match
  637. move.w Strst,d0
  638. sub.w Head,d0
  639. cmp.w #MAX_DIST,d0
  640. bhi.s fno_new_match
  641. move.w PrevL,prev_length ; longest_match reads these variables
  642. MOVINT Strst,_strstart
  643. MOVINT Head,d0 ; parm for longest_match
  644. bsr longest_match ; sets match_start
  645. cmp.w Look,d0 ; does length exceed valid data?
  646. bls.s fstml
  647. move.w Look,d0
  648. fstml: move.w d0,MatchL ; valid length of match
  649. fno_new_match:
  650. cmp.w #MIN_MATCH,MatchL
  651. blo fliteral
  652. ; CHECK_MATCH Strst,match_start,MatchL
  653. MOVINT Strst,_strstart ; ct_tally reads this variable
  654. move.l MatchL,d0
  655. subq.w #MIN_MATCH,d0
  656. movem.l d2/a2,-(sp)
  657. MOVINT d0,-(sp)
  658. move.l Strst,d0
  659. sub.w (match_start,pc),d0
  660. MOVINT d0,-(sp)
  661. jsr _ct_tally ; sets d0 true if we have to flush
  662. addq #2*INTSIZE,sp
  663. movem.l (sp)+,d2/a2
  664. sub.w MatchL,Look
  665. cmp.w (max_lazy_match,pc),MatchL
  666. bhi ftoolong
  667. subq.w #2,MatchL
  668. finsertmatch:
  669. addq.w #1,Strst
  670. IN_STR a0,d1 ; preserve d0
  671. dbra MatchL,finsertmatch
  672. moveq #0,MatchL ; not needed?
  673. addq.w #1,Strst
  674. bra.s flushfill
  675. ftoolong:
  676. add.w MatchL,Strst
  677. moveq #0,MatchL
  678. moveq #0,d1 ; preserve d0
  679. move.b (Window,Strst.l),d1
  680. move.w d1,ins_h
  681. ; My assembler objects to passing <1(Window,Strst.l)> directly to UP_HASH...
  682. move.b 1(Window,Strst.l),Avail ; Avail is not used in deflate_fast
  683. UP_HASH d1,Avail ; preserve d0
  684. IFNE MIN_MATCH-3
  685. FAIL needs to UP_HASH another MIN_MATCH-3 times, but with what arg?
  686. ENDC
  687. bra.s flushfill
  688. fliteral:
  689. TRACE_C <(Window,Strst.l)>
  690. MOVINT Strst,_strstart ; ct_tally reads this variable
  691. moveq #0,d0
  692. move.b (Window,Strst.l),d0
  693. movem.l d2/a2,-(sp)
  694. MOVINT d0,-(sp)
  695. CLRINT -(sp)
  696. jsr _ct_tally ; d0 set if we need to flush
  697. addq #2*INTSIZE,sp
  698. movem.l (sp)+,d2/a2
  699. addq.w #1,Strst
  700. subq.w #1,Look
  701. flushfill:
  702. tst.w d0
  703. beq.s frefill
  704. FLUSH_B 0
  705. move.l Strst,_block_start
  706. frefill:
  707. cmp.w #MIN_LOOKAHEAD,Look
  708. bhs flook_loop
  709. bsr fill_window
  710. bra flook_loop
  711. flast_flush:
  712. FLUSH_B 1 ; sets our return value
  713. deflate_exit:
  714. MOVINT Strst,_strstart ; save back cached values
  715. move.w PrevL,prev_length
  716. move.w Look,lookahead
  717. movem.l (sp)+,DEFREGS
  718. rts
  719. ; =========================================================================
  720. ; void fill_window(void) calls the input function to refill the sliding
  721. ; window that we use to find substring matches in.
  722. More reg Head ; local variable in fill_window
  723. WindTop reg Prev ; local variable used for sliding
  724. SlidIx reg PrevL ; local variable used for sliding
  725. FWREGS reg d2-d5/a2-a6 ; does NOT include Look and Strst
  726. ; all registers available to be clobbered by the sliding operation:
  727. ; we exclude More, WindTop, SlidIx, Look, Strst, Window, a4 and a7.
  728. SPAREGS reg d0-d3/a0-a1/a5-a6
  729. SPCOUNT equ 8 ; number of registers in SPAREGS
  730. _fill_window: ; C-callable entry point
  731. movem.l Look/Strst,-(sp)
  732. IFDEF INT16
  733. moveq #0,Strst ; Strst must be valid as a long
  734. ENDC
  735. MOVINT (_strstart,pc),Strst
  736. move.w (lookahead,pc),Look
  737. BASEPTR _window,Window
  738. bsr.s fill_window
  739. MOVINT Strst,_strstart
  740. move.w Look,lookahead
  741. movem.l (sp)+,Look/Strst
  742. rts
  743. ; strstart, lookahead, and window must be cached in Strst, Look, and Window:
  744. fill_window: ; asm-callable entry point
  745. movem.l FWREGS,-(sp)
  746. move.w (eofile,pc),d0 ; we put this up here for speed
  747. bne fwdone
  748. and.l #$FFFF,Look ; make sure Look is valid as long
  749. fw_refill:
  750. move.l (_window_size,pc),More ; <= 64K
  751. sub.l Look,More
  752. sub.l Strst,More ; Strst is already valid as long
  753. cmp.w #EOF,More
  754. bne.s notboundary
  755. subq.w #1,More
  756. bra checkend
  757. notboundary:
  758. move.w (sliding,pc),d0
  759. beq checkend
  760. cmp.w #WSIZE+MAX_DIST,Strst
  761. blo checkend
  762. IF (32768-WSIZE)>0
  763. lea WSIZE(Window),WindTop ; WindTop is aligned when Window is
  764. ELSE
  765. move.l Window,WindTop
  766. add.l #WSIZE,WindTop
  767. ENDC
  768. move.l Window,d0
  769. and.w #3,d0
  770. beq.s isaligned
  771. subq.w #1,d0
  772. align: move.b (WindTop)+,(Window)+ ; copy up to a longword boundary
  773. dbra d0,align
  774. isaligned:
  775. ; This is faster than a simple move.l (WindTop)+,(Window)+ / dbra loop:
  776. move.w #(WSIZE-1)/(4*SPCOUNT),SlidIx
  777. slide: movem.l (WindTop)+,SPAREGS ; copy, 32 bytes at a time!
  778. movem.l SPAREGS,(Window) ; a slight overshoot doesn't matter.
  779. lea 4*SPCOUNT(Window),Window ; can't use (aN)+ as movem.l dest
  780. dbra SlidIx,slide
  781. BASEPTR _window,Window ; restore cached value
  782. sub.w #WSIZE,match_start
  783. sub.w #WSIZE,Strst
  784. sub.l #WSIZE,_block_start
  785. add.w #WSIZE,More
  786. BASEPTR _head,a0
  787. move.w #HASH_SIZE-1,d0
  788. fixhead:
  789. move.w (a0),d1
  790. sub.w #WSIZE,d1
  791. bpl.s headok
  792. moveq #0,d1
  793. headok: move.w d1,(a0)+
  794. dbra d0,fixhead
  795. BASEPTR _prev,a0
  796. move.w #WSIZE-1,d0
  797. fixprev:
  798. move.w (a0),d1
  799. sub.w #WSIZE,d1
  800. bpl.s prevok
  801. moveq #0,d1
  802. prevok: move.w d1,(a0)+
  803. dbra d0,fixprev
  804. TRACE_C #'.'
  805. move _verbose+INTSIZE-2,d0
  806. beq checkend
  807. movem.l d2/a2,-(sp)
  808. xref _print_period
  809. jsr _print_period
  810. movem.l (sp)+,d2/a2
  811. checkend: ; assert eofile is false
  812. movem.l d2/a2,-(sp)
  813. MOVINT More,-(sp) ; assert More's upper word is zero
  814. move.l Strst,d0
  815. add.w Look,d0
  816. add.l Window,d0
  817. move.l d0,-(sp)
  818. move.l _read_buf,a0
  819. jsr (a0) ; refill the upper part of the window
  820. addq #4+INTSIZE,sp
  821. movem.l (sp)+,d2/a2
  822. tst.w d0
  823. beq.s iseof
  824. cmp.w #EOF,d0
  825. beq.s iseof
  826. add.w d0,Look
  827. cmp.w #MIN_LOOKAHEAD,Look
  828. blo fw_refill ; eofile is still false
  829. bra.s fwdone
  830. iseof: move.w #1,eofile
  831. fwdone: movem.l (sp)+,FWREGS
  832. rts
  833. ; =========================================================================
  834. ; void lm_free(void) frees dynamic arrays in the DYN_ALLOC version.
  835. ;;; xdef _lm_free ; the entry point
  836. _lm_free:
  837. IFDEF DYN_ALLOC
  838. move.l _window,d0
  839. beq.s lf_no_window
  840. movem.l d2/a2,-(sp)
  841. move.l d0,-(sp)
  842. jsr _free
  843. addq #4,sp
  844. movem.l (sp)+,d2/a2
  845. clr.l _window
  846. lf_no_window:
  847. move.l _prev,d0
  848. beq.s lf_no_prev
  849. movem.l d2/a2,-(sp)
  850. move.l d0,-(sp)
  851. jsr _free
  852. move.l _head,(sp) ; reuse the same stack arg slot
  853. jsr _free
  854. addq #4,sp
  855. movem.l (sp)+,d2/a2
  856. clr.l _prev
  857. clr.l _head
  858. lf_no_prev:
  859. ENDC
  860. rts
  861. ; ============================================================================
  862. ; void lm_init(int pack_level, unsigned short *flags) allocates dynamic arrays
  863. ; if any, and initializes all variables so that deflate() is ready to go.
  864. ;;; xdef _lm_init ; the entry point
  865. Level reg d2
  866. ;Window reg a2 ; as in deflate()
  867. _lm_init:
  868. MOVINT 4(sp),d0
  869. move.l 4+INTSIZE(sp),a0
  870. move.w d0,Level
  871. cmp.w #1,Level
  872. blt.s levelerr
  873. bgt.s try9
  874. bset.b #B_FAST,1(a0)
  875. try9: cmp.w #9,Level
  876. bgt.s levelerr
  877. blt.s levelok
  878. bset.b #B_SLOW,1(a0)
  879. bra.s levelok
  880. levelerr:
  881. pea (level_message,pc)
  882. jsr _error ; never returns
  883. levelok:
  884. clr.w sliding
  885. move.l (_window_size,pc),d0
  886. bne.s gotawindowsize
  887. move.w #1,sliding
  888. move.l #2*WSIZE,_window_size
  889. gotawindowsize:
  890. BASEPTR _window,Window
  891. IFDEF DYN_ALLOC
  892. move.l Window,d0 ; fake tst.l
  893. bne.s gotsomewind
  894. CAL_SH WSIZE
  895. move.l d0,Window
  896. move.l d0,_window
  897. bne.s gotsomewind
  898. pea (window_message,pc)
  899. bra error
  900. gotsomewind:
  901. tst.l _prev
  902. bne.s gotsomehead
  903. CAL_SH WSIZE
  904. move.l d0,_prev
  905. beq.s nohead
  906. CAL_SH HASH_SIZE
  907. move.l d0,_head
  908. bne.s gotfreshhead ; newly calloc'd memory is zeroed
  909. nohead: pea (hash_message,pc)
  910. error: MOVINT #ZE_MEM,-(sp)
  911. jsr _ziperr ; never returns
  912. gotsomehead:
  913. ENDC ; DYN_ALLOC
  914. move.w #(HASH_SIZE/2)-1,d0 ; two shortwords per loop
  915. BASEPTR _head,a0
  916. wipeh: clr.l (a0)+
  917. dbra d0,wipeh
  918. gotfreshhead:
  919. move.l Level,d0
  920. IFEQ Sizeof_config-8
  921. asl.l #3,d0
  922. ELSE
  923. mulu #Sizeof_config,d0
  924. ENDC
  925. lea (config_table,pc),a0
  926. add.l d0,a0
  927. move.w Max_lazy(a0),max_lazy_match
  928. move.w Good_length(a0),good_match
  929. move.w Nice_length(a0),nice_match
  930. move.w Max_chain(a0),max_chain_len
  931. CLRINT _strstart
  932. clr.l _block_start
  933. bsr match_init
  934. clr.w eofile
  935. movem.l d2/a2,-(sp)
  936. MOVINT #WSIZE,-(sp) ; We read only 32K because lookahead is short
  937. move.l Window,-(sp) ; even when int size is long, as if deflate.c
  938. move.l _read_buf,a0 ; were compiled with MAXSEG_64K defined.
  939. jsr (a0)
  940. addq #4+INTSIZE,sp
  941. movem.l (sp)+,d2/a2
  942. move.w d0,lookahead
  943. beq.s noread
  944. cmp.w #EOF,d0
  945. bne.s irefill
  946. noread: move.w #1,eofile
  947. clr.w lookahead
  948. bra.s init_done
  949. irefill:
  950. move.w (lookahead,pc),d0
  951. cmp.w #MIN_LOOKAHEAD,d0
  952. bhs.s hashify
  953. bsr _fill_window ; use the C-callable version
  954. hashify:
  955. clr.w ins_h
  956. moveq #MIN_MATCH-2,d0
  957. hash1: move.b (Window)+,d1
  958. UP_HASH Level,d1
  959. dbra d0,hash1
  960. init_done:
  961. rts
  962. ; strings for error messages:
  963. IFDEF DYN_ALLOC
  964. hash_message dc.b 'hash table allocation',0
  965. window_message dc.b 'window allocation',0
  966. ENDC
  967. level_message dc.b 'bad pack level',0
  968. end