xref: /haiku/src/system/libroot/posix/string/arch/arm/arch_string.S (revision 44d19f4d32b8f7e9c01f00294c87ca5cc2e057f7)
1/*
2** Copyright 2001, Travis Geiselbrecht. All rights reserved.
3** Distributed under the terms of the NewOS License.
4*/
5
6#include <asm_defs.h>
7
8#if 1
9
10/* that should be enough for now */
11
12.align 4
13FUNCTION(memcpy):
14FUNCTION(__aeabi_memcpy):
15	// check for zero length copy or the same pointer
16	cmp		r2, #0
17	cmpne	r1, r0
18	bxeq	lr
19
20	// save a few registers for use and the return code (input dst)
21	stmfd	sp!, {r0, r4, r5, lr}
22
23	// check for forwards overlap (src > dst, distance < len)
24	subs	r3, r0, r1
25	cmpgt	r2, r3
26	bgt		.L_forwardoverlap
27
28	// check for a short copy len.
29	// 20 bytes is enough so that if a 16 byte alignment needs to happen there is at least a
30	//   wordwise copy worth of work to be done.
31	cmp		r2, #(16+4)
32	blt		.L_bytewise
33
34	// see if they are similarly aligned on 4 byte boundaries
35	eor		r3, r0, r1
36	tst		r3, #3
37	bne		.L_bytewise		// dissimilarly aligned, nothing we can do (for now)
38
39	// check for 16 byte alignment on dst.
40	// this will also catch src being not 4 byte aligned, since it is similarly 4 byte
41	//   aligned with dst at this point.
42	tst		r0, #15
43	bne		.L_not16bytealigned
44
45	// check to see if we have at least 32 bytes of data to copy.
46	// if not, just revert to wordwise copy
47	cmp		r2, #32
48	blt		.L_wordwise
49
50.L_bigcopy:
51	// copy 32 bytes at a time. src & dst need to be at least 4 byte aligned,
52	// and we need at least 32 bytes remaining to copy
53
54	// save r6-r7 for use in the big copy
55	stmfd	sp!, {r6-r7}
56
57	sub		r2, r2, #32		// subtract an extra 32 to the len so we can avoid an extra compare
58
59.L_bigcopy_loop:
60	ldmia	r1!, {r4, r5, r6, r7}
61	stmia	r0!, {r4, r5, r6, r7}
62	ldmia	r1!, {r4, r5, r6, r7}
63	subs	r2, r2, #32
64	stmia	r0!, {r4, r5, r6, r7}
65	bge		.L_bigcopy_loop
66
67	// restore r6-r7
68	ldmfd	sp!, {r6-r7}
69
70	// see if we are done
71	adds	r2, r2, #32
72	beq		.L_done
73
74	// less then 4 bytes left?
75	cmp		r2, #4
76	blt		.L_bytewise
77
78.L_wordwise:
79	// copy 4 bytes at a time.
80	// src & dst are guaranteed to be word aligned, and at least 4 bytes are left to copy.
81	subs	r2, r2, #4
82
83.L_wordwise_loop:
84	ldr		r3, [r1], #4
85	subs	r2, r2, #4
86	str		r3, [r0], #4
87	bge		.L_wordwise_loop
88
89	// correct the remaining len and test for completion
90	adds	r2, r2, #4
91	beq		.L_done
92
93.L_bytewise:
94	// simple bytewise copy
95	ldrb	r3, [r1], #1
96	subs	r2, r2, #1
97	strb	r3, [r0], #1
98	bgt		.L_bytewise
99
100.L_done:
101	// load dst for return and restore r4,r5
102//#if ARM_ARCH_LEVEL >= 5
103//	ldmfd	sp!, {r0, r4, r5, pc}
104//#else
105	ldmfd	sp!, {r0, r4, r5, lr}
106	bx		lr
107//#endif
108
109.L_not16bytealigned:
110	// dst is not 16 byte aligned, so we will copy up to 15 bytes to get it aligned.
111	// src is guaranteed to be similarly word aligned with dst.
112
113	// set the condition flags based on the alignment.
114	lsl		r12, r0, #28
115	rsb		r12, r12, #0
116	msr		CPSR_f, r12				// move into NZCV fields in CPSR
117
118	// move as many bytes as necessary to get the dst aligned
119#ifdef __clang__
120	ldrbvs	r3, [r1], #1			// V set
121	ldrhcs	r4, [r1], #2			// C set
122	ldreq	r5, [r1], #4			// Z set
123
124	strbvs	r3, [r0], #1
125	strhcs	r4, [r0], #2
126	streq	r5, [r0], #4
127
128	ldmiami	r1!, {r3-r4}			// N set
129	stmiami	r0!, {r3-r4}
130#else
131	ldrvsb	r3, [r1], #1			// V set
132	ldrcsh	r4, [r1], #2			// C set
133	ldreq	r5, [r1], #4			// Z set
134
135	strvsb	r3, [r0], #1
136	strcsh	r4, [r0], #2
137	streq	r5, [r0], #4
138
139	ldmmiia r1!, {r3-r4}			// N set
140	stmmiia r0!, {r3-r4}
141#endif
142
143	// fix the remaining len
144	sub		r2, r2, r12, lsr #28
145
146	// test to see what we should do now
147	cmp		r2, #32
148	bge		.L_bigcopy
149	b		.L_wordwise
150
151	// src and dest overlap 'forwards' or dst > src
152.L_forwardoverlap:
153
154	// do a bytewise reverse copy for now
155	add		r1, r1, r2
156	add		r0, r0, r2
157
158.L_bytewisereverse:
159	// simple bytewise reverse copy
160	ldrb	r3, [r1], #-1
161	subs	r2, r2, #1
162	strb	r3, [r0], #-1
163	bgt		.L_bytewisereverse
164
165	b		.L_done
166	// check for zero length copy or the same pointer
167
168FUNCTION_END(memcpy)
169FUNCTION_END(__aeabi_memcpy)
170#endif
171