1/* 2** Copyright 2001, Travis Geiselbrecht. All rights reserved. 3** Distributed under the terms of the NewOS License. 4*/ 5 6#include <asm_defs.h> 7 8//#warning M68K: optimize memcpy 9#if 1 10 11/* that should be enough for now */ 12 13.align 4 14FUNCTION(memcpy): 15 // check for zero length copy or the same pointer 16 cmp r2, #0 17 cmpne r1, r0 18 bxeq lr 19 20 // save a few registers for use and the return code (input dst) 21 stmfd sp!, {r0, r4, r5, lr} 22 23 // check for forwards overlap (src > dst, distance < len) 24 subs r3, r0, r1 25 cmpgt r2, r3 26 bgt .L_forwardoverlap 27 28 // check for a short copy len. 29 // 20 bytes is enough so that if a 16 byte alignment needs to happen there is at least a 30 // wordwise copy worth of work to be done. 31 cmp r2, #(16+4) 32 blt .L_bytewise 33 34 // see if they are similarly aligned on 4 byte boundaries 35 eor r3, r0, r1 36 tst r3, #3 37 bne .L_bytewise // dissimilarly aligned, nothing we can do (for now) 38 39 // check for 16 byte alignment on dst. 40 // this will also catch src being not 4 byte aligned, since it is similarly 4 byte 41 // aligned with dst at this point. 42 tst r0, #15 43 bne .L_not16bytealigned 44 45 // check to see if we have at least 32 bytes of data to copy. 46 // if not, just revert to wordwise copy 47 cmp r2, #32 48 blt .L_wordwise 49 50.L_bigcopy: 51 // copy 32 bytes at a time. src & dst need to be at least 4 byte aligned, 52 // and we need at least 32 bytes remaining to copy 53 54 // save r6-r7 for use in the big copy 55 stmfd sp!, {r6-r7} 56 57 sub r2, r2, #32 // subtract an extra 32 to the len so we can avoid an extra compare 58 59.L_bigcopy_loop: 60 ldmia r1!, {r4, r5, r6, r7} 61 stmia r0!, {r4, r5, r6, r7} 62 ldmia r1!, {r4, r5, r6, r7} 63 subs r2, r2, #32 64 stmia r0!, {r4, r5, r6, r7} 65 bge .L_bigcopy_loop 66 67 // restore r6-r7 68 ldmfd sp!, {r6-r7} 69 70 // see if we are done 71 adds r2, r2, #32 72 beq .L_done 73 74 // less then 4 bytes left? 75 cmp r2, #4 76 blt .L_bytewise 77 78.L_wordwise: 79 // copy 4 bytes at a time. 80 // src & dst are guaranteed to be word aligned, and at least 4 bytes are left to copy. 81 subs r2, r2, #4 82 83.L_wordwise_loop: 84 ldr r3, [r1], #4 85 subs r2, r2, #4 86 str r3, [r0], #4 87 bge .L_wordwise_loop 88 89 // correct the remaining len and test for completion 90 adds r2, r2, #4 91 beq .L_done 92 93.L_bytewise: 94 // simple bytewise copy 95 ldrb r3, [r1], #1 96 subs r2, r2, #1 97 strb r3, [r0], #1 98 bgt .L_bytewise 99 100.L_done: 101 // load dst for return and restore r4,r5 102//#if ARM_ARCH_LEVEL >= 5 103// ldmfd sp!, {r0, r4, r5, pc} 104//#else 105 ldmfd sp!, {r0, r4, r5, lr} 106 bx lr 107//#endif 108 109.L_not16bytealigned: 110 // dst is not 16 byte aligned, so we will copy up to 15 bytes to get it aligned. 111 // src is guaranteed to be similarly word aligned with dst. 112 113 // set the condition flags based on the alignment. 114 lsl r12, r0, #28 115 rsb r12, r12, #0 116 msr CPSR_f, r12 // move into NZCV fields in CPSR 117 118 // move as many bytes as necessary to get the dst aligned 119 ldrvsb r3, [r1], #1 // V set 120 ldrcsh r4, [r1], #2 // C set 121 ldreq r5, [r1], #4 // Z set 122 123 strvsb r3, [r0], #1 124 strcsh r4, [r0], #2 125 streq r5, [r0], #4 126 127 ldmmiia r1!, {r3-r4} // N set 128 stmmiia r0!, {r3-r4} 129 130 // fix the remaining len 131 sub r2, r2, r12, lsr #28 132 133 // test to see what we should do now 134 cmp r2, #32 135 bge .L_bigcopy 136 b .L_wordwise 137 138 // src and dest overlap 'forwards' or dst > src 139.L_forwardoverlap: 140 141 // do a bytewise reverse copy for now 142 add r1, r1, r2 143 add r0, r0, r2 144 145.L_bytewisereverse: 146 // simple bytewise reverse copy 147 ldrb r3, [r1], #-1 148 subs r2, r2, #1 149 strb r3, [r0], #-1 150 bgt .L_bytewisereverse 151 152 b .L_done 153 // check for zero length copy or the same pointer 154 155FUNCTION_END(memcpy) 156#endif 157