1/* 2** Copyright 2001, Travis Geiselbrecht. All rights reserved. 3** Distributed under the terms of the NewOS License. 4*/ 5 6#include <asm_defs.h> 7 8#if 1 9 10/* that should be enough for now */ 11 12.align 4 13FUNCTION(memcpy): 14 // check for zero length copy or the same pointer 15 cmp r2, #0 16 cmpne r1, r0 17 bxeq lr 18 19 // save a few registers for use and the return code (input dst) 20 stmfd sp!, {r0, r4, r5, lr} 21 22 // check for forwards overlap (src > dst, distance < len) 23 subs r3, r0, r1 24 cmpgt r2, r3 25 bgt .L_forwardoverlap 26 27 // check for a short copy len. 28 // 20 bytes is enough so that if a 16 byte alignment needs to happen there is at least a 29 // wordwise copy worth of work to be done. 30 cmp r2, #(16+4) 31 blt .L_bytewise 32 33 // see if they are similarly aligned on 4 byte boundaries 34 eor r3, r0, r1 35 tst r3, #3 36 bne .L_bytewise // dissimilarly aligned, nothing we can do (for now) 37 38 // check for 16 byte alignment on dst. 39 // this will also catch src being not 4 byte aligned, since it is similarly 4 byte 40 // aligned with dst at this point. 41 tst r0, #15 42 bne .L_not16bytealigned 43 44 // check to see if we have at least 32 bytes of data to copy. 45 // if not, just revert to wordwise copy 46 cmp r2, #32 47 blt .L_wordwise 48 49.L_bigcopy: 50 // copy 32 bytes at a time. src & dst need to be at least 4 byte aligned, 51 // and we need at least 32 bytes remaining to copy 52 53 // save r6-r7 for use in the big copy 54 stmfd sp!, {r6-r7} 55 56 sub r2, r2, #32 // subtract an extra 32 to the len so we can avoid an extra compare 57 58.L_bigcopy_loop: 59 ldmia r1!, {r4, r5, r6, r7} 60 stmia r0!, {r4, r5, r6, r7} 61 ldmia r1!, {r4, r5, r6, r7} 62 subs r2, r2, #32 63 stmia r0!, {r4, r5, r6, r7} 64 bge .L_bigcopy_loop 65 66 // restore r6-r7 67 ldmfd sp!, {r6-r7} 68 69 // see if we are done 70 adds r2, r2, #32 71 beq .L_done 72 73 // less then 4 bytes left? 74 cmp r2, #4 75 blt .L_bytewise 76 77.L_wordwise: 78 // copy 4 bytes at a time. 79 // src & dst are guaranteed to be word aligned, and at least 4 bytes are left to copy. 80 subs r2, r2, #4 81 82.L_wordwise_loop: 83 ldr r3, [r1], #4 84 subs r2, r2, #4 85 str r3, [r0], #4 86 bge .L_wordwise_loop 87 88 // correct the remaining len and test for completion 89 adds r2, r2, #4 90 beq .L_done 91 92.L_bytewise: 93 // simple bytewise copy 94 ldrb r3, [r1], #1 95 subs r2, r2, #1 96 strb r3, [r0], #1 97 bgt .L_bytewise 98 99.L_done: 100 // load dst for return and restore r4,r5 101//#if ARM_ARCH_LEVEL >= 5 102// ldmfd sp!, {r0, r4, r5, pc} 103//#else 104 ldmfd sp!, {r0, r4, r5, lr} 105 bx lr 106//#endif 107 108.L_not16bytealigned: 109 // dst is not 16 byte aligned, so we will copy up to 15 bytes to get it aligned. 110 // src is guaranteed to be similarly word aligned with dst. 111 112 // set the condition flags based on the alignment. 113 lsl r12, r0, #28 114 rsb r12, r12, #0 115 msr CPSR_f, r12 // move into NZCV fields in CPSR 116 117 // move as many bytes as necessary to get the dst aligned 118 ldrvsb r3, [r1], #1 // V set 119 ldrcsh r4, [r1], #2 // C set 120 ldreq r5, [r1], #4 // Z set 121 122 strvsb r3, [r0], #1 123 strcsh r4, [r0], #2 124 streq r5, [r0], #4 125 126 ldmmiia r1!, {r3-r4} // N set 127 stmmiia r0!, {r3-r4} 128 129 // fix the remaining len 130 sub r2, r2, r12, lsr #28 131 132 // test to see what we should do now 133 cmp r2, #32 134 bge .L_bigcopy 135 b .L_wordwise 136 137 // src and dest overlap 'forwards' or dst > src 138.L_forwardoverlap: 139 140 // do a bytewise reverse copy for now 141 add r1, r1, r2 142 add r0, r0, r2 143 144.L_bytewisereverse: 145 // simple bytewise reverse copy 146 ldrb r3, [r1], #-1 147 subs r2, r2, #1 148 strb r3, [r0], #-1 149 bgt .L_bytewisereverse 150 151 b .L_done 152 // check for zero length copy or the same pointer 153 154FUNCTION_END(memcpy) 155#endif 156