1/* Pentium optimized __mpn_rshift -- 2 Copyright (C) 1992, 94, 95, 96, 97, 98, 2000 Free Software Foundation, Inc. 3 This file is part of the GNU MP Library. 4 5 The GNU MP Library is free software; you can redistribute it and/or modify 6 it under the terms of the GNU Lesser General Public License as published by 7 the Free Software Foundation; either version 2.1 of the License, or (at your 8 option) any later version. 9 10 The GNU MP Library is distributed in the hope that it will be useful, but 11 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 12 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public 13 License for more details. 14 15 You should have received a copy of the GNU Lesser General Public License 16 along with the GNU MP Library; see the file COPYING.LIB. If not, write to 17 the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, 18 MA 02111-1307, USA. */ 19 20#include "sysdep.h" 21#include "asm-syntax.h" 22#include "bp-sym.h" 23#include "bp-asm.h" 24 25#define PARMS LINKAGE+16 /* space for 4 saved regs */ 26#define RES PARMS 27#define S RES+PTR_SIZE 28#define SIZE S+PTR_SIZE 29#define CNT SIZE+4 30 31 .text 32ENTRY (BP_SYM (__mpn_rshift)) 33 ENTER 34 35 pushl %edi 36 pushl %esi 37 pushl %ebp 38 pushl %ebx 39 40 movl RES(%esp),%edi 41 movl S(%esp),%esi 42 movl SIZE(%esp),%ebx 43 movl CNT(%esp),%ecx 44#if __BOUNDED_POINTERS__ 45 shll $2, %ebx /* convert limbs to bytes */ 46 CHECK_BOUNDS_BOTH_WIDE (%edi, RES(%esp), %ebx) 47 CHECK_BOUNDS_BOTH_WIDE (%esi, S(%esp), %ebx) 48 shrl $2, %ebx 49#endif 50 51/* We can use faster code for shift-by-1 under certain conditions. */ 52 cmp $1,%ecx 53 jne L(normal) 54 leal 4(%edi),%eax 55 cmpl %esi,%eax 56 jnc L(special) /* jump if res_ptr + 1 >= s_ptr */ 57 leal (%edi,%ebx,4),%eax 58 cmpl %eax,%esi 59 jnc L(special) /* jump if s_ptr >= res_ptr + size */ 60 61L(normal): 62 movl (%esi),%edx 63 addl $4,%esi 64 xorl %eax,%eax 65 shrdl %cl,%edx,%eax /* compute carry limb */ 66 pushl %eax /* push carry limb onto stack */ 67 68 decl %ebx 69 pushl %ebx 70 shrl $3,%ebx 71 jz L(end) 72 73 movl (%edi),%eax /* fetch destination cache line */ 74 75 ALIGN (2) 76L(oop): movl 28(%edi),%eax /* fetch destination cache line */ 77 movl %edx,%ebp 78 79 movl (%esi),%eax 80 movl 4(%esi),%edx 81 shrdl %cl,%eax,%ebp 82 shrdl %cl,%edx,%eax 83 movl %ebp,(%edi) 84 movl %eax,4(%edi) 85 86 movl 8(%esi),%ebp 87 movl 12(%esi),%eax 88 shrdl %cl,%ebp,%edx 89 shrdl %cl,%eax,%ebp 90 movl %edx,8(%edi) 91 movl %ebp,12(%edi) 92 93 movl 16(%esi),%edx 94 movl 20(%esi),%ebp 95 shrdl %cl,%edx,%eax 96 shrdl %cl,%ebp,%edx 97 movl %eax,16(%edi) 98 movl %edx,20(%edi) 99 100 movl 24(%esi),%eax 101 movl 28(%esi),%edx 102 shrdl %cl,%eax,%ebp 103 shrdl %cl,%edx,%eax 104 movl %ebp,24(%edi) 105 movl %eax,28(%edi) 106 107 addl $32,%esi 108 addl $32,%edi 109 decl %ebx 110 jnz L(oop) 111 112L(end): popl %ebx 113 andl $7,%ebx 114 jz L(end2) 115L(oop2): 116 movl (%esi),%eax 117 shrdl %cl,%eax,%edx /* compute result limb */ 118 movl %edx,(%edi) 119 movl %eax,%edx 120 addl $4,%esi 121 addl $4,%edi 122 decl %ebx 123 jnz L(oop2) 124 125L(end2): 126 shrl %cl,%edx /* compute most significant limb */ 127 movl %edx,(%edi) /* store it */ 128 129 popl %eax /* pop carry limb */ 130 131 popl %ebx 132 popl %ebp 133 popl %esi 134 popl %edi 135 136 LEAVE 137 ret 138 139/* We loop from least significant end of the arrays, which is only 140 permissible if the source and destination don't overlap, since the 141 function is documented to work for overlapping source and destination. 142*/ 143 144L(special): 145 leal -4(%edi,%ebx,4),%edi 146 leal -4(%esi,%ebx,4),%esi 147 148 movl (%esi),%edx 149 subl $4,%esi 150 151 decl %ebx 152 pushl %ebx 153 shrl $3,%ebx 154 155 shrl $1,%edx 156 incl %ebx 157 decl %ebx 158 jz L(Lend) 159 160 movl (%edi),%eax /* fetch destination cache line */ 161 162 ALIGN (2) 163L(Loop): 164 movl -28(%edi),%eax /* fetch destination cache line */ 165 movl %edx,%ebp 166 167 movl (%esi),%eax 168 movl -4(%esi),%edx 169 rcrl $1,%eax 170 movl %ebp,(%edi) 171 rcrl $1,%edx 172 movl %eax,-4(%edi) 173 174 movl -8(%esi),%ebp 175 movl -12(%esi),%eax 176 rcrl $1,%ebp 177 movl %edx,-8(%edi) 178 rcrl $1,%eax 179 movl %ebp,-12(%edi) 180 181 movl -16(%esi),%edx 182 movl -20(%esi),%ebp 183 rcrl $1,%edx 184 movl %eax,-16(%edi) 185 rcrl $1,%ebp 186 movl %edx,-20(%edi) 187 188 movl -24(%esi),%eax 189 movl -28(%esi),%edx 190 rcrl $1,%eax 191 movl %ebp,-24(%edi) 192 rcrl $1,%edx 193 movl %eax,-28(%edi) 194 195 leal -32(%esi),%esi /* use leal not to clobber carry */ 196 leal -32(%edi),%edi 197 decl %ebx 198 jnz L(Loop) 199 200L(Lend): 201 popl %ebx 202 sbbl %eax,%eax /* save carry in %eax */ 203 andl $7,%ebx 204 jz L(Lend2) 205 addl %eax,%eax /* restore carry from eax */ 206L(Loop2): 207 movl %edx,%ebp 208 movl (%esi),%edx 209 rcrl $1,%edx 210 movl %ebp,(%edi) 211 212 leal -4(%esi),%esi /* use leal not to clobber carry */ 213 leal -4(%edi),%edi 214 decl %ebx 215 jnz L(Loop2) 216 217 jmp L(L1) 218L(Lend2): 219 addl %eax,%eax /* restore carry from eax */ 220L(L1): movl %edx,(%edi) /* store last limb */ 221 222 movl $0,%eax 223 rcrl $1,%eax 224 225 popl %ebx 226 popl %ebp 227 popl %esi 228 popl %edi 229 230 LEAVE 231 ret 232END (BP_SYM (__mpn_rshift)) 233