1/* 2** Copyright 2001, Travis Geiselbrecht. All rights reserved. 3** Distributed under the terms of the NewOS License. 4*/ 5 6#include <asm_defs.h> 7 8#if 1 9 10/* that should be enough for now */ 11 12.align 4 13FUNCTION(memcpy): 14FUNCTION(__aeabi_memcpy): 15 // check for zero length copy or the same pointer 16 cmp r2, #0 17 cmpne r1, r0 18 bxeq lr 19 20 // save a few registers for use and the return code (input dst) 21 stmfd sp!, {r0, r4, r5, lr} 22 23 // check for forwards overlap (src > dst, distance < len) 24 subs r3, r0, r1 25 cmpgt r2, r3 26 bgt .L_forwardoverlap 27 28 // check for a short copy len. 29 // 20 bytes is enough so that if a 16 byte alignment needs to happen there is at least a 30 // wordwise copy worth of work to be done. 31 cmp r2, #(16+4) 32 blt .L_bytewise 33 34 // see if they are similarly aligned on 4 byte boundaries 35 eor r3, r0, r1 36 tst r3, #3 37 bne .L_bytewise // dissimilarly aligned, nothing we can do (for now) 38 39 // check for 16 byte alignment on dst. 40 // this will also catch src being not 4 byte aligned, since it is similarly 4 byte 41 // aligned with dst at this point. 42 tst r0, #15 43 bne .L_not16bytealigned 44 45 // check to see if we have at least 32 bytes of data to copy. 46 // if not, just revert to wordwise copy 47 cmp r2, #32 48 blt .L_wordwise 49 50.L_bigcopy: 51 // copy 32 bytes at a time. src & dst need to be at least 4 byte aligned, 52 // and we need at least 32 bytes remaining to copy 53 54 // save r6-r7 for use in the big copy 55 stmfd sp!, {r6-r7} 56 57 sub r2, r2, #32 // subtract an extra 32 to the len so we can avoid an extra compare 58 59.L_bigcopy_loop: 60 ldmia r1!, {r4, r5, r6, r7} 61 stmia r0!, {r4, r5, r6, r7} 62 ldmia r1!, {r4, r5, r6, r7} 63 subs r2, r2, #32 64 stmia r0!, {r4, r5, r6, r7} 65 bge .L_bigcopy_loop 66 67 // restore r6-r7 68 ldmfd sp!, {r6-r7} 69 70 // see if we are done 71 adds r2, r2, #32 72 beq .L_done 73 74 // less then 4 bytes left? 75 cmp r2, #4 76 blt .L_bytewise 77 78.L_wordwise: 79 // copy 4 bytes at a time. 80 // src & dst are guaranteed to be word aligned, and at least 4 bytes are left to copy. 81 subs r2, r2, #4 82 83.L_wordwise_loop: 84 ldr r3, [r1], #4 85 subs r2, r2, #4 86 str r3, [r0], #4 87 bge .L_wordwise_loop 88 89 // correct the remaining len and test for completion 90 adds r2, r2, #4 91 beq .L_done 92 93.L_bytewise: 94 // simple bytewise copy 95 ldrb r3, [r1], #1 96 subs r2, r2, #1 97 strb r3, [r0], #1 98 bgt .L_bytewise 99 100.L_done: 101 // load dst for return and restore r4,r5 102//#if ARM_ARCH_LEVEL >= 5 103// ldmfd sp!, {r0, r4, r5, pc} 104//#else 105 ldmfd sp!, {r0, r4, r5, lr} 106 bx lr 107//#endif 108 109.L_not16bytealigned: 110 // dst is not 16 byte aligned, so we will copy up to 15 bytes to get it aligned. 111 // src is guaranteed to be similarly word aligned with dst. 112 113 // set the condition flags based on the alignment. 114 lsl r12, r0, #28 115 rsb r12, r12, #0 116 msr CPSR_f, r12 // move into NZCV fields in CPSR 117 118 // move as many bytes as necessary to get the dst aligned 119#ifdef __clang__ 120 ldrbvs r3, [r1], #1 // V set 121 ldrhcs r4, [r1], #2 // C set 122 ldreq r5, [r1], #4 // Z set 123 124 strbvs r3, [r0], #1 125 strhcs r4, [r0], #2 126 streq r5, [r0], #4 127 128 ldmiami r1!, {r3-r4} // N set 129 stmiami r0!, {r3-r4} 130#else 131 ldrvsb r3, [r1], #1 // V set 132 ldrcsh r4, [r1], #2 // C set 133 ldreq r5, [r1], #4 // Z set 134 135 strvsb r3, [r0], #1 136 strcsh r4, [r0], #2 137 streq r5, [r0], #4 138 139 ldmmiia r1!, {r3-r4} // N set 140 stmmiia r0!, {r3-r4} 141#endif 142 143 // fix the remaining len 144 sub r2, r2, r12, lsr #28 145 146 // test to see what we should do now 147 cmp r2, #32 148 bge .L_bigcopy 149 b .L_wordwise 150 151 // src and dest overlap 'forwards' or dst > src 152.L_forwardoverlap: 153 154 // do a bytewise reverse copy for now 155 add r1, r1, r2 156 add r0, r0, r2 157 158.L_bytewisereverse: 159 // simple bytewise reverse copy 160 ldrb r3, [r1], #-1 161 subs r2, r2, #1 162 strb r3, [r0], #-1 163 bgt .L_bytewisereverse 164 165 b .L_done 166 // check for zero length copy or the same pointer 167 168FUNCTION_END(memcpy) 169FUNCTION_END(__aeabi_memcpy) 170#endif 171