123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304 |
- /*
- Copyright (c) 1990-2007 Info-ZIP. All rights reserved.
- See the accompanying file LICENSE, version 2000-Apr-09 or later
- (the contents of which are also included in zip.h) for terms of use.
- If, for some reason, all these files are missing, the Info-ZIP license
- also may be found at: ftp://ftp.info-zip.org/pub/infozip/license.html
- */
- /*
- * crc_i386.S, optimized CRC calculation function for Zip and UnZip,
- * created by Paul Kienitz and Christian Spieler. Last revised 07 Jan 2007.
- *
- * GRR 961110: incorporated Scott Field optimizations from win32/crc_i386.asm
- * => overall 6% speedup in "unzip -tq" on 9MB zipfile (486-66)
- *
- * SPC 970402: revised for Rodney Brown's optimizations (32-bit-wide
- * aligned reads for most of the data from buffer), can be
- * disabled by defining the macro NO_32_BIT_LOADS
- *
- * SPC 971012: added Rodney Brown's additional tweaks for 32-bit-optimized
- * CPUs (like the Pentium Pro, Pentium II, and probably some
- * Pentium clones). This optimization is controlled by the
- * preprocessor switch "__686" and is disabled by default.
- * (This default is based on the assumption that most users
- * do not yet work on a Pentium Pro or Pentium II machine ...)
- *
- * COS 050116: Enabled the 686 build by default, because there are hardly any
- * pre-686 CPUs in serious use nowadays. (See SPC 970402 above.)
- *
- * SPC 060103: Updated code to incorporate newer optimizations found in zlib.
- *
- * SPC 070107: Added conditional switch to deactivate crc32() compilation.
- *
- * FLAT memory model assumed. Calling interface:
- * - args are pushed onto the stack from right to left,
- * - return value is given in the EAX register,
- * - all other registers (with exception of EFLAGS) are preserved. (With
- * GNU C 2.7.x, %edx and %ecx are `scratch' registers, but preserving
- * them nevertheless adds only 4 single byte instructions.)
- *
- * This source generates the function
- * ulg crc32(ulg crc, ZCONST uch *buf, extent len).
- *
- * Loop unrolling can be disabled by defining the macro NO_UNROLLED_LOOPS.
- * This results in shorter code at the expense of reduced performance.
- */
- /* This file is NOT used in conjunction with zlib, or when only creation of
- * the basic CRC_32_Table (for other purpose) is requested.
- */
- #if !defined(USE_ZLIB) && !defined(CRC_TABLE_ONLY)
- /* Preprocess with -DNO_UNDERLINE if your C compiler does not prefix
- * external symbols with an underline character '_'.
- */
- #if defined(NO_UNDERLINE) || defined(__ELF__)
- # define _crc32 crc32
- # define _get_crc_table get_crc_table
- #endif
- /* Use 16-byte alignment if your assembler supports it. Warning: gas
- * uses a log(x) parameter (.align 4 means 16-byte alignment). On SVR4
- * the parameter is a number of bytes.
- */
- #ifndef ALIGNMENT
- # define ALIGNMENT .align 4,0x90
- #endif
- #if defined(i386) || defined(_i386) || defined(_I386) || defined(__i386)
- /* This version is for 386 Unix, OS/2, MSDOS in 32 bit mode (gcc & gas).
- * Warning: it uses the AT&T syntax: mov source,dest
- * This file is only optional. If you want to use the C version,
- * remove -DASM_CRC from CFLAGS in Makefile and set OBJA to an empty string.
- */
- .file "crc_i386.S"
- #if !defined(PRE_686) && !defined(__686)
- /* Optimize for Pentium Pro and compatible CPUs by default. */
- # define __686
- #endif
- #if defined(NO_STD_STACKFRAME) && defined(USE_STD_STACKFRAME)
- # undef USE_STACKFRAME
- #else
- /* The default is to use standard stack frame entry, because it
- * results in smaller code!
- */
- # ifndef USE_STD_STACKFRAME
- # define USE_STD_STACKFRAME
- # endif
- #endif
- #ifdef USE_STD_STACKFRAME
- # define _STD_ENTRY pushl %ebp ; movl %esp,%ebp
- # define arg1 8(%ebp)
- # define arg2 12(%ebp)
- # define arg3 16(%ebp)
- # define _STD_LEAVE popl %ebp
- #else /* !USE_STD_STACKFRAME */
- # define _STD_ENTRY
- # define arg1 24(%esp)
- # define arg2 28(%esp)
- # define arg3 32(%esp)
- # define _STD_LEAVE
- #endif /* ?USE_STD_STACKFRAME */
- /*
- * These two (three) macros make up the loop body of the CRC32 cruncher.
- * registers modified:
- * eax : crc value "c"
- * esi : pointer to next data byte (or lword) "buf++"
- * registers read:
- * edi : pointer to base of crc_table array
- * scratch registers:
- * ebx : index into crc_table array
- * (requires upper three bytes = 0 when __686 is undefined)
- */
- #ifndef __686 /* optimize for 386, 486, Pentium */
- #define Do_CRC /* c = (c >> 8) ^ table[c & 0xFF] */\
- movb %al, %bl ;/* tmp = c & 0xFF */\
- shrl $8, %eax ;/* c = (c >> 8) */\
- xorl (%edi, %ebx, 4), %eax ;/* c ^= table[tmp] */
- #else /* __686 : optimize for Pentium Pro and compatible CPUs */
- #define Do_CRC /* c = (c >> 8) ^ table[c & 0xFF] */\
- movzbl %al, %ebx ;/* tmp = c & 0xFF */\
- shrl $8, %eax ;/* c = (c >> 8) */\
- xorl (%edi, %ebx, 4), %eax ;/* c ^=table[tmp] */
- #endif /* ?__686 */
- #define Do_CRC_byte /* c = (c >> 8) ^ table[(c^*buf++)&0xFF] */\
- xorb (%esi), %al ;/* c ^= *buf */\
- incl %esi ;/* buf++ */\
- Do_CRC
- #define Do_CRC_byteof(ofs) /* c = (c >> 8) ^ table[(c^*buf++)&0xFF] */\
- xorb ofs(%esi), %al ;/* c ^= *buf */\
- incl %esi ;/* buf++ */\
- Do_CRC
- #ifndef NO_32_BIT_LOADS
- # ifdef IZ_CRCOPTIM_UNFOLDTBL
- /* the edx register is needed in crc calculation */
- # define SavLen arg3
- # define UpdCRC_lword \
- movzbl %al, %ebx ; \
- movl 3072(%edi,%ebx,4), %edx ; \
- movzbl %ah, %ebx ; \
- shrl $16, %eax ; \
- xor 2048(%edi,%ebx,4), %edx ; \
- movzbl %al, %ebx ; \
- shrl $8,%eax ; \
- xorl 1024(%edi,%ebx,4), %edx ; \
- movl (%edi,%eax,4), %eax ; \
- xorl %edx,%eax ;
- # define UpdCRC_lword_sh(dwPtrIncr) \
- movzbl %al, %ebx ; \
- movl 3072(%edi,%ebx,4), %edx ; \
- movzbl %ah, %ebx ; \
- shrl $16, %eax ; \
- xor 2048(%edi,%ebx,4), %edx ; \
- movzbl %al, %ebx ; \
- addl $4*(dwPtrIncr), %esi ;/* ((ulg *)buf)+=dwPtrIncr */\
- shrl $8,%eax ; \
- xorl 1024(%edi,%ebx,4), %edx ; \
- movl (%edi,%eax,4),%eax ; \
- xorl %edx,%eax ;
- # else /* !IZ_CRCOPTIM_UNFOLDTBL */
- /* the edx register is not needed anywhere else */
- # define SavLen %edx
- # define UpdCRC_lword \
- Do_CRC \
- Do_CRC \
- Do_CRC \
- Do_CRC
- # define UpdCRC_lword_sh(dwPtrIncr) \
- Do_CRC \
- Do_CRC \
- addl $4*(dwPtrIncr), %esi ;/* ((ulg *)buf)++ */\
- Do_CRC \
- Do_CRC
- # endif /* ?IZ_CRCOPTIM_UNFOLDTBL */
- #define Do_CRC_lword \
- xorl (%esi), %eax ;/* c ^= *(ulg *)buf */\
- UpdCRC_lword_sh(1) /* ... ((ulg *)buf)++ */
- #define Do_CRC_4lword \
- xorl (%esi), %eax ;/* c ^= *(ulg *)buf */\
- UpdCRC_lword \
- xorl 4(%esi), %eax ;/* c ^= *((ulg *)buf+1) */\
- UpdCRC_lword \
- xorl 8(%esi), %eax ;/* c ^= *((ulg *)buf+2) */\
- UpdCRC_lword \
- xorl 12(%esi), %eax ;/* c ^= *((ulg *)buf]+3 */\
- UpdCRC_lword_sh(4) /* ... ((ulg *)buf)+=4 */
- #endif /* !NO_32_BIT_LOADS */
- .text
- .globl _crc32
- _crc32: /* ulg crc32(ulg crc, uch *buf, extent len) */
- _STD_ENTRY
- pushl %edi
- pushl %esi
- pushl %ebx
- pushl %edx
- pushl %ecx
- movl arg2, %esi /* 2nd arg: uch *buf */
- subl %eax, %eax /* > if (!buf) */
- testl %esi, %esi /* > return 0; */
- jz .L_fine /* > else { */
- call _get_crc_table
- movl %eax, %edi
- movl arg1, %eax /* 1st arg: ulg crc */
- #ifndef __686
- subl %ebx, %ebx /* ebx=0; bl usable as dword */
- #endif
- movl arg3, %ecx /* 3rd arg: extent len */
- notl %eax /* > c = ~crc; */
- testl %ecx, %ecx
- #ifndef NO_UNROLLED_LOOPS
- jz .L_bail
- # ifndef NO_32_BIT_LOADS
- /* Assert now have positive length */
- .L_align_loop:
- testl $3, %esi /* Align buf on lword boundary */
- jz .L_aligned_now
- Do_CRC_byte
- decl %ecx
- jnz .L_align_loop
- .L_aligned_now:
- # endif /* !NO_32_BIT_LOADS */
- movl %ecx, SavLen /* save current value of len */
- shrl $4, %ecx /* ecx = len / 16 */
- jz .L_No_Sixteens
- /* align loop head at start of 486 internal cache line !! */
- ALIGNMENT
- .L_Next_Sixteen:
- # ifndef NO_32_BIT_LOADS
- Do_CRC_4lword
- # else /* NO_32_BIT_LOADS */
- Do_CRC_byteof(0)
- Do_CRC_byteof(1)
- Do_CRC_byteof(2)
- Do_CRC_byteof(3)
- Do_CRC_byteof(4)
- Do_CRC_byteof(5)
- Do_CRC_byteof(6)
- Do_CRC_byteof(7)
- Do_CRC_byteof(8)
- Do_CRC_byteof(9)
- Do_CRC_byteof(10)
- Do_CRC_byteof(11)
- Do_CRC_byteof(12)
- Do_CRC_byteof(13)
- Do_CRC_byteof(14)
- Do_CRC_byteof(15)
- addl $16,%esi ;/* buf += 16 */
- # endif /* ?NO_32_BIT_LOADS */
- decl %ecx
- jnz .L_Next_Sixteen
- .L_No_Sixteens:
- movl SavLen, %ecx
- andl $15, %ecx /* ecx = len % 16 */
- # ifndef NO_32_BIT_LOADS
- shrl $2,%ecx /* ecx = len / 4 */
- jz .L_No_Fours
- .L_Next_Four:
- Do_CRC_lword
- decl %ecx
- jnz .L_Next_Four
- .L_No_Fours:
- movl SavLen,%ecx
- andl $3,%ecx /* ecx = len % 4 */
- # endif /* !NO_32_BIT_LOADS */
- #endif /* !NO_UNROLLED_LOOPS */
- jz .L_bail /* > if (len) */
- /* align loop head at start of 486 internal cache line !! */
- ALIGNMENT
- .L_loupe: /* > do { */
- Do_CRC_byte /* c = CRC32(c,*buf++,crctab);*/
- decl %ecx /* > } while (--len); */
- jnz .L_loupe
- .L_bail: /* > } */
- notl %eax /* > return ~c; */
- .L_fine:
- popl %ecx
- popl %edx
- popl %ebx
- popl %esi
- popl %edi
- _STD_LEAVE
- ret
- #else
- error: this asm version is for 386 only
- #endif /* i386 || _i386 || _I386 || __i386 */
- #endif /* !USE_ZLIB && !CRC_TABLE_ONLY */
|