1/* Pentium optimized __mpn_lshift -- 2 Copyright (C) 1992, 94, 95, 96, 97, 98, 2000 Free Software Foundation, Inc. 3 This file is part of the GNU C Library. 4 5 The GNU C Library is free software; you can redistribute it and/or 6 modify it under the terms of the GNU Lesser General Public 7 License as published by the Free Software Foundation; either 8 version 2.1 of the License, or (at your option) any later version. 9 10 The GNU C Library is distributed in the hope that it will be useful, 11 but WITHOUT ANY WARRANTY; without even the implied warranty of 12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 13 Lesser General Public License for more details. 14 15 You should have received a copy of the GNU Lesser General Public 16 License along with the GNU C Library; if not, write to the Free 17 Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 18 02111-1307 USA. */ 19 20#include "sysdep.h" 21#include "asm-syntax.h" 22#include "bp-sym.h" 23#include "bp-asm.h" 24 25#define PARMS LINKAGE+16 /* space for 4 saved regs */ 26#define RES PARMS 27#define S RES+PTR_SIZE 28#define SIZE S+PTR_SIZE 29#define CNT SIZE+4 30 31 .text 32ENTRY (BP_SYM (__mpn_lshift)) 33 ENTER 34 35 pushl %edi 36 pushl %esi 37 pushl %ebp 38 pushl %ebx 39 40 movl RES(%esp),%edi 41 movl S(%esp),%esi 42 movl SIZE(%esp),%ebx 43 movl CNT(%esp),%ecx 44#if __BOUNDED_POINTERS__ 45 shll $2, %ebx /* convert limbs to bytes */ 46 CHECK_BOUNDS_BOTH_WIDE (%edi, RES(%esp), %ebx) 47 CHECK_BOUNDS_BOTH_WIDE (%esi, S(%esp), %ebx) 48 shrl $2, %ebx 49#endif 50 51/* We can use faster code for shift-by-1 under certain conditions. */ 52 cmp $1,%ecx 53 jne L(normal) 54 leal 4(%esi),%eax 55 cmpl %edi,%eax 56 jnc L(special) /* jump if s_ptr + 1 >= res_ptr */ 57 leal (%esi,%ebx,4),%eax 58 cmpl %eax,%edi 59 jnc L(special) /* jump if res_ptr >= s_ptr + size */ 60 61L(normal): 62 leal -4(%edi,%ebx,4),%edi 63 leal -4(%esi,%ebx,4),%esi 64 65 movl (%esi),%edx 66 subl $4,%esi 67 xorl %eax,%eax 68 shldl %cl,%edx,%eax /* compute carry limb */ 69 pushl %eax /* push carry limb onto stack */ 70 71 decl %ebx 72 pushl %ebx 73 shrl $3,%ebx 74 jz L(end) 75 76 movl (%edi),%eax /* fetch destination cache line */ 77 78 ALIGN (2) 79L(oop): movl -28(%edi),%eax /* fetch destination cache line */ 80 movl %edx,%ebp 81 82 movl (%esi),%eax 83 movl -4(%esi),%edx 84 shldl %cl,%eax,%ebp 85 shldl %cl,%edx,%eax 86 movl %ebp,(%edi) 87 movl %eax,-4(%edi) 88 89 movl -8(%esi),%ebp 90 movl -12(%esi),%eax 91 shldl %cl,%ebp,%edx 92 shldl %cl,%eax,%ebp 93 movl %edx,-8(%edi) 94 movl %ebp,-12(%edi) 95 96 movl -16(%esi),%edx 97 movl -20(%esi),%ebp 98 shldl %cl,%edx,%eax 99 shldl %cl,%ebp,%edx 100 movl %eax,-16(%edi) 101 movl %edx,-20(%edi) 102 103 movl -24(%esi),%eax 104 movl -28(%esi),%edx 105 shldl %cl,%eax,%ebp 106 shldl %cl,%edx,%eax 107 movl %ebp,-24(%edi) 108 movl %eax,-28(%edi) 109 110 subl $32,%esi 111 subl $32,%edi 112 decl %ebx 113 jnz L(oop) 114 115L(end): popl %ebx 116 andl $7,%ebx 117 jz L(end2) 118L(oop2): 119 movl (%esi),%eax 120 shldl %cl,%eax,%edx 121 movl %edx,(%edi) 122 movl %eax,%edx 123 subl $4,%esi 124 subl $4,%edi 125 decl %ebx 126 jnz L(oop2) 127 128L(end2): 129 shll %cl,%edx /* compute least significant limb */ 130 movl %edx,(%edi) /* store it */ 131 132 popl %eax /* pop carry limb */ 133 134 popl %ebx 135 popl %ebp 136 popl %esi 137 popl %edi 138 139 LEAVE 140 ret 141 142/* We loop from least significant end of the arrays, which is only 143 permissible if the source and destination don't overlap, since the 144 function is documented to work for overlapping source and destination. 145*/ 146 147L(special): 148 movl (%esi),%edx 149 addl $4,%esi 150 151 decl %ebx 152 pushl %ebx 153 shrl $3,%ebx 154 155 addl %edx,%edx 156 incl %ebx 157 decl %ebx 158 jz L(Lend) 159 160 movl (%edi),%eax /* fetch destination cache line */ 161 162 ALIGN (2) 163L(Loop): 164 movl 28(%edi),%eax /* fetch destination cache line */ 165 movl %edx,%ebp 166 167 movl (%esi),%eax 168 movl 4(%esi),%edx 169 adcl %eax,%eax 170 movl %ebp,(%edi) 171 adcl %edx,%edx 172 movl %eax,4(%edi) 173 174 movl 8(%esi),%ebp 175 movl 12(%esi),%eax 176 adcl %ebp,%ebp 177 movl %edx,8(%edi) 178 adcl %eax,%eax 179 movl %ebp,12(%edi) 180 181 movl 16(%esi),%edx 182 movl 20(%esi),%ebp 183 adcl %edx,%edx 184 movl %eax,16(%edi) 185 adcl %ebp,%ebp 186 movl %edx,20(%edi) 187 188 movl 24(%esi),%eax 189 movl 28(%esi),%edx 190 adcl %eax,%eax 191 movl %ebp,24(%edi) 192 adcl %edx,%edx 193 movl %eax,28(%edi) 194 195 leal 32(%esi),%esi /* use leal not to clobber carry */ 196 leal 32(%edi),%edi 197 decl %ebx 198 jnz L(Loop) 199 200L(Lend): 201 popl %ebx 202 sbbl %eax,%eax /* save carry in %eax */ 203 andl $7,%ebx 204 jz L(Lend2) 205 addl %eax,%eax /* restore carry from eax */ 206L(Loop2): 207 movl %edx,%ebp 208 movl (%esi),%edx 209 adcl %edx,%edx 210 movl %ebp,(%edi) 211 212 leal 4(%esi),%esi /* use leal not to clobber carry */ 213 leal 4(%edi),%edi 214 decl %ebx 215 jnz L(Loop2) 216 217 jmp L(L1) 218L(Lend2): 219 addl %eax,%eax /* restore carry from eax */ 220L(L1): movl %edx,(%edi) /* store last limb */ 221 222 sbbl %eax,%eax 223 negl %eax 224 225 popl %ebx 226 popl %ebp 227 popl %esi 228 popl %edi 229 230 LEAVE 231 ret 232END (BP_SYM (__mpn_lshift)) 233