1/* 2 Copyright (c) 1990-2001 Info-ZIP. All rights reserved. 3 4 See the accompanying file LICENSE, version 2000-Apr-09 or later 5 (the contents of which are also included in zip.h) for terms of use. 6 If, for some reason, all these files are missing, the Info-ZIP license 7 also may be found at: ftp://ftp.info-zip.org/pub/infozip/license.html 8*/ 9/* 10 * crc_i386.S, optimized CRC calculation function for Zip and UnZip, 11 * created by Paul Kienitz and Christian Spieler. Last revised 24 Dec 98. 12 * 13 * GRR 961110: incorporated Scott Field optimizations from win32/crc_i386.asm 14 * => overall 6% speedup in "unzip -tq" on 9MB zipfile (486-66) 15 * 16 * SPC 970402: revised for Rodney Brown's optimizations (32-bit-wide 17 * aligned reads for most of the data from buffer), can be 18 * disabled by defining the macro NO_32_BIT_LOADS 19 * 20 * SPC 971012: added Rodney Brown's additional tweaks for 32-bit-optimized 21 * CPUs (like the Pentium Pro, Pentium II, and probably some 22 * Pentium clones). This optimization is controlled by the 23 * preprocessor switch "__686" and is disabled by default. 24 * (This default is based on the assumption that most users 25 * do not yet work on a Pentium Pro or Pentium II machine ...) 26 * 27 * FLAT memory model assumed. Calling interface: 28 * - args are pushed onto the stack from right to left, 29 * - return value is given in the EAX register, 30 * - all other registers (with exception of EFLAGS) are preserved. (With 31 * GNU C 2.7.x, %edx and %ecx are `scratch' registers, but preserving 32 * them nevertheless adds only 4 single byte instructions.) 33 * 34 * This source generates the function 35 * ulg crc32(ulg crc, ZCONST uch *buf, extent len). 36 * 37 * The loop unrolling can be disabled by defining the macro NO_UNROLLED_LOOPS. 38 * This results in shorter code at the expense of reduced performance. 39 */ 40 41/* This file is NOT used in conjunction with zlib. */ 42#ifndef USE_ZLIB 43 44/* Preprocess with -DNO_UNDERLINE if your C compiler does not prefix 45 * external symbols with an underline character '_'. 46 */ 47#if defined(NO_UNDERLINE) || defined(__ELF__) 48# define _crc32 crc32 49# define _get_crc_table get_crc_table 50#endif 51/* Use 16-byte alignment if your assembler supports it. Warning: gas 52 * uses a log(x) parameter (.align 4 means 16-byte alignment). On SVR4 53 * the parameter is a number of bytes. 54 */ 55#ifndef ALIGNMENT 56# define ALIGNMENT .align 4,0x90 57#endif 58 59#if defined(i386) || defined(_i386) || defined(_I386) || defined(__i386) 60 61/* This version is for 386 Unix, OS/2, MSDOS in 32 bit mode (gcc & gas). 62 * Warning: it uses the AT&T syntax: mov source,dest 63 * This file is only optional. If you want to use the C version, 64 * remove -DASM_CRC from CFLAGS in Makefile and set OBJA to an empty string. 65 */ 66 67 .file "crc_i386.S" 68 69#if defined(NO_STD_STACKFRAME) && defined(USE_STD_STACKFRAME) 70# undef USE_STACKFRAME 71#else 72 /* The default is to use standard stack frame entry, because it 73 * results in smaller code! 74 */ 75# ifndef USE_STD_STACKFRAME 76# define USE_STD_STACKFRAME 77# endif 78#endif 79 80#ifdef USE_STD_STACKFRAME 81# define _STD_ENTRY pushl %ebp ; movl %esp,%ebp 82# define arg1 8(%ebp) 83# define arg2 12(%ebp) 84# define arg3 16(%ebp) 85# define _STD_LEAVE popl %ebp 86#else /* !USE_STD_STACKFRAME */ 87# define _STD_ENTRY 88# define arg1 24(%esp) 89# define arg2 28(%esp) 90# define arg3 32(%esp) 91# define _STD_LEAVE 92#endif /* ?USE_STD_STACKFRAME */ 93 94/* 95 * These two (three) macros make up the loop body of the CRC32 cruncher. 96 * registers modified: 97 * eax : crc value "c" 98 * esi : pointer to next data byte (or lword) "buf++" 99 * registers read: 100 * edi : pointer to base of crc_table array 101 * scratch registers: 102 * ebx : index into crc_table array 103 * (requires upper three bytes = 0 when __686 is undefined) 104 */ 105#ifndef __686 /* optimize for 386, 486, Pentium */ 106#define Do_CRC /* c = (c >> 8) ^ table[c & 0xFF] */\ 107 movb %al, %bl ;/* tmp = c & 0xFF */\ 108 shrl $8, %eax ;/* c = (c >> 8) */\ 109 xorl (%edi, %ebx, 4), %eax ;/* c ^= table[tmp] */ 110#else /* __686 : optimize for Pentium Pro and compatible CPUs */ 111#define Do_CRC /* c = (c >> 8) ^ table[c & 0xFF] */\ 112 movzbl %al, %ebx ;/* tmp = c & 0xFF */\ 113 shrl $8, %eax ;/* c = (c >> 8) */\ 114 xorl (%edi, %ebx, 4), %eax ;/* c ^=table[tmp] */ 115#endif /* ?__686 */ 116 117#define Do_CRC_byte /* c = (c >> 8) ^ table[(c^*buf++)&0xFF] */\ 118 xorb (%esi), %al ;/* c ^= *buf */\ 119 incl %esi ;/* buf++ */\ 120 Do_CRC 121 122#ifndef NO_32_BIT_LOADS 123#define Do_CRC_lword \ 124 xorl (%esi), %eax ;/* c ^= *(ulg *)buf */\ 125 addl $4, %esi ;/* ((ulg *)buf)++ */\ 126 Do_CRC \ 127 Do_CRC \ 128 Do_CRC \ 129 Do_CRC 130#endif /* !NO_32_BIT_LOADS */ 131 132 133 .text 134 135 .globl _crc32 136 137_crc32: /* ulg crc32(ulg crc, uch *buf, extent len) */ 138 _STD_ENTRY 139 pushl %edi 140 pushl %esi 141 pushl %ebx 142 pushl %edx 143 pushl %ecx 144 145 movl arg2, %esi /* 2nd arg: uch *buf */ 146 subl %eax, %eax /* > if (!buf) */ 147 testl %esi, %esi /* > return 0; */ 148 jz .L_fine /* > else { */ 149 call _get_crc_table 150 movl %eax, %edi 151 movl arg1, %eax /* 1st arg: ulg crc */ 152#ifndef __686 153 subl %ebx, %ebx /* ebx=0; bl usable as dword */ 154#endif 155 movl arg3, %ecx /* 3rd arg: extent len */ 156 notl %eax /* > c = ~crc; */ 157 158 testl %ecx, %ecx 159#ifndef NO_UNROLLED_LOOPS 160 jz .L_bail 161# ifndef NO_32_BIT_LOADS 162 /* Assert now have positive length */ 163.L_align_loop: 164 testl $3, %esi /* Align buf on lword boundary */ 165 jz .L_aligned_now 166 Do_CRC_byte 167 decl %ecx 168 jnz .L_align_loop 169.L_aligned_now: 170# endif /* !NO_32_BIT_LOADS */ 171 movl %ecx, %edx /* save len in edx */ 172 shrl $3, %ecx /* ecx = len / 8 */ 173 jz .L_No_Eights 174/* align loop head at start of 486 internal cache line !! */ 175 ALIGNMENT 176.L_Next_Eight: 177# ifndef NO_32_BIT_LOADS 178 /* Do_CRC_lword */ 179 xorl (%esi), %eax ;/* c ^= *(ulg *)buf */ 180 addl $4, %esi ;/* ((ulg *)buf)++ */ 181 Do_CRC 182 Do_CRC 183 Do_CRC 184 Do_CRC 185 /* Do_CRC_lword */ 186 xorl (%esi), %eax ;/* c ^= *(ulg *)buf */ 187 addl $4, %esi ;/* ((ulg *)buf)++ */ 188 Do_CRC 189 Do_CRC 190 Do_CRC 191 Do_CRC 192# else /* NO_32_BIT_LOADS */ 193 Do_CRC_byte 194 Do_CRC_byte 195 Do_CRC_byte 196 Do_CRC_byte 197 Do_CRC_byte 198 Do_CRC_byte 199 Do_CRC_byte 200 Do_CRC_byte 201# endif /* ?NO_32_BIT_LOADS */ 202 decl %ecx 203 jnz .L_Next_Eight 204 205.L_No_Eights: 206 movl %edx, %ecx 207 andl $7, %ecx /* ecx = len % 8 */ 208#endif /* !NO_UNROLLED_LOOPS */ 209 jz .L_bail /* > if (len) */ 210/* align loop head at start of 486 internal cache line !! */ 211 ALIGNMENT 212.L_loupe: /* > do { */ 213 Do_CRC_byte /* c = CRC32(c, *buf++); */ 214 decl %ecx /* > } while (--len); */ 215 jnz .L_loupe 216 217.L_bail: /* > } */ 218 notl %eax /* > return ~c; */ 219.L_fine: 220 popl %ecx 221 popl %edx 222 popl %ebx 223 popl %esi 224 popl %edi 225 _STD_LEAVE 226 ret 227 228#else 229 error: this asm version is for 386 only 230#endif /* i386 || _i386 || _I386 || __i386 */ 231 232#endif /* !USE_ZLIB */ 233