xref: /haiku/src/system/libroot/posix/string/arch/arm/arch_string.S (revision 83b1a68c52ba3e0e8796282759f694b7fdddf06d)
1/*
2** Copyright 2001, Travis Geiselbrecht. All rights reserved.
3** Distributed under the terms of the NewOS License.
4*/
5
6#include <asm_defs.h>
7
8#if 1
9
10/* that should be enough for now */
11
12.align 4
13FUNCTION(memcpy):
14	// check for zero length copy or the same pointer
15	cmp		r2, #0
16	cmpne	r1, r0
17	bxeq	lr
18
19	// save a few registers for use and the return code (input dst)
20	stmfd	sp!, {r0, r4, r5, lr}
21
22	// check for forwards overlap (src > dst, distance < len)
23	subs	r3, r0, r1
24	cmpgt	r2, r3
25	bgt		.L_forwardoverlap
26
27	// check for a short copy len.
28	// 20 bytes is enough so that if a 16 byte alignment needs to happen there is at least a
29	//   wordwise copy worth of work to be done.
30	cmp		r2, #(16+4)
31	blt		.L_bytewise
32
33	// see if they are similarly aligned on 4 byte boundaries
34	eor		r3, r0, r1
35	tst		r3, #3
36	bne		.L_bytewise		// dissimilarly aligned, nothing we can do (for now)
37
38	// check for 16 byte alignment on dst.
39	// this will also catch src being not 4 byte aligned, since it is similarly 4 byte
40	//   aligned with dst at this point.
41	tst		r0, #15
42	bne		.L_not16bytealigned
43
44	// check to see if we have at least 32 bytes of data to copy.
45	// if not, just revert to wordwise copy
46	cmp		r2, #32
47	blt		.L_wordwise
48
49.L_bigcopy:
50	// copy 32 bytes at a time. src & dst need to be at least 4 byte aligned,
51	// and we need at least 32 bytes remaining to copy
52
53	// save r6-r7 for use in the big copy
54	stmfd	sp!, {r6-r7}
55
56	sub		r2, r2, #32		// subtract an extra 32 to the len so we can avoid an extra compare
57
58.L_bigcopy_loop:
59	ldmia	r1!, {r4, r5, r6, r7}
60	stmia	r0!, {r4, r5, r6, r7}
61	ldmia	r1!, {r4, r5, r6, r7}
62	subs	r2, r2, #32
63	stmia	r0!, {r4, r5, r6, r7}
64	bge		.L_bigcopy_loop
65
66	// restore r6-r7
67	ldmfd	sp!, {r6-r7}
68
69	// see if we are done
70	adds	r2, r2, #32
71	beq		.L_done
72
73	// less then 4 bytes left?
74	cmp		r2, #4
75	blt		.L_bytewise
76
77.L_wordwise:
78	// copy 4 bytes at a time.
79	// src & dst are guaranteed to be word aligned, and at least 4 bytes are left to copy.
80	subs	r2, r2, #4
81
82.L_wordwise_loop:
83	ldr		r3, [r1], #4
84	subs	r2, r2, #4
85	str		r3, [r0], #4
86	bge		.L_wordwise_loop
87
88	// correct the remaining len and test for completion
89	adds	r2, r2, #4
90	beq		.L_done
91
92.L_bytewise:
93	// simple bytewise copy
94	ldrb	r3, [r1], #1
95	subs	r2, r2, #1
96	strb	r3, [r0], #1
97	bgt		.L_bytewise
98
99.L_done:
100	// load dst for return and restore r4,r5
101//#if ARM_ARCH_LEVEL >= 5
102//	ldmfd	sp!, {r0, r4, r5, pc}
103//#else
104	ldmfd	sp!, {r0, r4, r5, lr}
105	bx		lr
106//#endif
107
108.L_not16bytealigned:
109	// dst is not 16 byte aligned, so we will copy up to 15 bytes to get it aligned.
110	// src is guaranteed to be similarly word aligned with dst.
111
112	// set the condition flags based on the alignment.
113	lsl		r12, r0, #28
114	rsb		r12, r12, #0
115	msr		CPSR_f, r12				// move into NZCV fields in CPSR
116
117	// move as many bytes as necessary to get the dst aligned
118	ldrvsb	r3, [r1], #1			// V set
119	ldrcsh	r4, [r1], #2			// C set
120	ldreq	r5, [r1], #4			// Z set
121
122	strvsb	r3, [r0], #1
123	strcsh	r4, [r0], #2
124	streq	r5, [r0], #4
125
126	ldmmiia	r1!, {r3-r4}			// N set
127	stmmiia	r0!, {r3-r4}
128
129	// fix the remaining len
130	sub		r2, r2, r12, lsr #28
131
132	// test to see what we should do now
133	cmp		r2, #32
134	bge		.L_bigcopy
135	b		.L_wordwise
136
137	// src and dest overlap 'forwards' or dst > src
138.L_forwardoverlap:
139
140	// do a bytewise reverse copy for now
141	add		r1, r1, r2
142	add		r0, r0, r2
143
144.L_bytewisereverse:
145	// simple bytewise reverse copy
146	ldrb	r3, [r1], #-1
147	subs	r2, r2, #1
148	strb	r3, [r0], #-1
149	bgt		.L_bytewisereverse
150
151	b		.L_done
152	// check for zero length copy or the same pointer
153
154FUNCTION_END(memcpy)
155#endif
156