xref: /haiku/src/system/libroot/os/arch/arm/stdatomic.c (revision ed24eb5ff12640d052171c6a7feba37fab8a75d1)
1 /*-
2  * Copyright (c) 2013 Ed Schouten <ed@FreeBSD.org>
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24  * SUCH DAMAGE.
25  */
26 
27 #include <stdint.h>
28 #include <sys/cdefs.h>
29 // __FBSDID("$FreeBSD: head/sys/arm/arm/stdatomic.c 255738 2013-09-20 20:44:32Z zbb $");
30 
31 #define __SYNC_ATOMICS
32 #define __strong_reference(sym,aliassym)        \
33 	extern __typeof (sym) aliassym __attribute__ ((__alias__ (#sym)))
34 
35 #include <sys/param.h>
36 #include <sys/types.h>
37 
38 #ifdef _KERNEL
39 #include "opt_global.h"
40 #endif
41 
42 /*
43  * Executing statements with interrupts disabled.
44  */
45 
46 #if defined(_KERNEL) && !defined(SMP)
47 #define	WITHOUT_INTERRUPTS(s) do {					\
48 	register_t regs;						\
49 									\
50 	regs = intr_disable();						\
51 	do s while (0);							\
52 	intr_restore(regs);						\
53 } while (0)
54 #endif /* _KERNEL && !SMP */
55 
56 /*
57  * Memory barriers.
58  *
59  * It turns out __sync_synchronize() does not emit any code when used
60  * with GCC 4.2. Implement our own version that does work reliably.
61  *
62  * Although __sync_lock_test_and_set() should only perform an acquire
63  * barrier, make it do a full barrier like the other functions. This
64  * should make <stdatomic.h>'s atomic_exchange_explicit() work reliably.
65  */
66 
67 #if defined(_KERNEL) && !defined(SMP)
68 static inline void
69 do_sync(void)
70 {
71 
72 	__asm volatile ("" : : : "memory");
73 }
74 #elif defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__)
75 static inline void
76 do_sync(void)
77 {
78 
79 	__asm volatile ("dmb" : : : "memory");
80 }
81 #elif defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) || \
82     defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6Z__) || \
83     defined(__ARM_ARCH_6ZK__)
84 static inline void
85 do_sync(void)
86 {
87 
88 	__asm volatile ("mcr p15, 0, %0, c7, c10, 5" : : "r" (0) : "memory");
89 }
90 #endif
91 
92 #if defined(__CLANG_ATOMICS) || defined(__GNUC_ATOMICS)
93 
94 /*
95  * New C11 __atomic_* API.
96  */
97 
98 #if defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) || \
99     defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6Z__) || \
100     defined(__ARM_ARCH_6ZK__) || \
101     defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__)
102 
103 /* These systems should be supported by the compiler. */
104 
105 #else /* __ARM_ARCH_5__ */
106 
107 /* Clang doesn't allow us to reimplement builtins without this. */
108 #ifdef __clang__
109 #pragma redefine_extname __sync_synchronize_ext __sync_synchronize
110 #define __sync_synchronize __sync_synchronize_ext
111 #endif
112 
113 void
114 __sync_synchronize(void)
115 {
116 }
117 
118 #ifdef _KERNEL
119 
120 #ifdef SMP
121 #error "On SMP systems we should have proper atomic operations."
122 #endif
123 
124 /*
125  * On uniprocessor systems, we can perform the atomic operations by
126  * disabling interrupts.
127  */
128 
129 #define	EMIT_LOAD_N(N, uintN_t)						\
130 uintN_t									\
131 __atomic_load_##N(uintN_t *mem, int model __unused)			\
132 {									\
133 	uintN_t ret;							\
134 									\
135 	WITHOUT_INTERRUPTS({						\
136 		ret = *mem;						\
137 	});								\
138 	return (ret);							\
139 }
140 
141 #define	EMIT_STORE_N(N, uintN_t)					\
142 void									\
143 __atomic_store_##N(uintN_t *mem, uintN_t val, int model __unused)	\
144 {									\
145 									\
146 	WITHOUT_INTERRUPTS({						\
147 		*mem = val;						\
148 	});								\
149 }
150 
151 #define	EMIT_COMPARE_EXCHANGE_N(N, uintN_t)				\
152 _Bool									\
153 __atomic_compare_exchange_##N(uintN_t *mem, uintN_t *expected,		\
154     uintN_t desired, int success __unused, int failure __unused)	\
155 {									\
156 	_Bool ret;							\
157 									\
158 	WITHOUT_INTERRUPTS({						\
159 		if (*mem == *expected) {				\
160 			*mem = desired;					\
161 			ret = 1;					\
162 		} else {						\
163 			*expected = *mem;				\
164 			ret = 0;					\
165 		}							\
166 	});								\
167 	return (ret);							\
168 }
169 
170 #define	EMIT_FETCH_OP_N(N, uintN_t, name, op)				\
171 uintN_t									\
172 __atomic_##name##_##N(uintN_t *mem, uintN_t val, int model __unused)	\
173 {									\
174 	uintN_t ret;							\
175 									\
176 	WITHOUT_INTERRUPTS({						\
177 		ret = *mem;						\
178 		*mem op val;						\
179 	});								\
180 	return (ret);							\
181 }
182 
183 #define	EMIT_ALL_OPS_N(N, uintN_t)					\
184 EMIT_LOAD_N(N, uintN_t)							\
185 EMIT_STORE_N(N, uintN_t)						\
186 EMIT_COMPARE_EXCHANGE_N(N, uintN_t)					\
187 EMIT_FETCH_OP_N(N, uintN_t, exchange, =)				\
188 EMIT_FETCH_OP_N(N, uintN_t, fetch_add, +=)				\
189 EMIT_FETCH_OP_N(N, uintN_t, fetch_and, &=)				\
190 EMIT_FETCH_OP_N(N, uintN_t, fetch_or, |=)				\
191 EMIT_FETCH_OP_N(N, uintN_t, fetch_sub, -=)				\
192 EMIT_FETCH_OP_N(N, uintN_t, fetch_xor, ^=)
193 
194 EMIT_ALL_OPS_N(1, uint8_t)
195 EMIT_ALL_OPS_N(2, uint16_t)
196 EMIT_ALL_OPS_N(4, uint32_t)
197 EMIT_ALL_OPS_N(8, uint64_t)
198 #undef	EMIT_ALL_OPS_N
199 
200 #else /* !_KERNEL */
201 
202 /*
203  * For userspace on uniprocessor systems, we can implement the atomic
204  * operations by using a Restartable Atomic Sequence. This makes the
205  * kernel restart the code from the beginning when interrupted.
206  */
207 
208 #define	EMIT_LOAD_N(N, uintN_t)						\
209 uintN_t									\
210 __atomic_load_##N(uintN_t *mem, int model __unused)			\
211 {									\
212 									\
213 	return (*mem);							\
214 }
215 
216 #define	EMIT_STORE_N(N, uintN_t)					\
217 void									\
218 __atomic_store_##N(uintN_t *mem, uintN_t val, int model __unused)	\
219 {									\
220 									\
221 	*mem = val;							\
222 }
223 
224 #define	EMIT_EXCHANGE_N(N, uintN_t, ldr, str)				\
225 uintN_t									\
226 __atomic_exchange_##N(uintN_t *mem, uintN_t val, int model __unused)	\
227 {									\
228 	uint32_t old, temp, ras_start;					\
229 									\
230 	ras_start = ARM_RAS_START;					\
231 	__asm volatile (						\
232 		/* Set up Restartable Atomic Sequence. */		\
233 		"1:"							\
234 		"\tadr   %2, 1b\n"					\
235 		"\tstr   %2, [%5]\n"					\
236 		"\tadr   %2, 2f\n"					\
237 		"\tstr   %2, [%5, #4]\n"				\
238 									\
239 		"\t"ldr" %0, %4\n"	/* Load old value. */		\
240 		"\t"str" %3, %1\n"	/* Store new value. */		\
241 									\
242 		/* Tear down Restartable Atomic Sequence. */		\
243 		"2:"							\
244 		"\tmov   %2, #0x00000000\n"				\
245 		"\tstr   %2, [%5]\n"					\
246 		"\tmov   %2, #0xffffffff\n"				\
247 		"\tstr   %2, [%5, #4]\n"				\
248 		: "=&r" (old), "=m" (*mem), "=&r" (temp)		\
249 		: "r" (val), "m" (*mem), "r" (ras_start));		\
250 	return (old);							\
251 }
252 
253 #define	EMIT_COMPARE_EXCHANGE_N(N, uintN_t, ldr, streq)			\
254 _Bool									\
255 __atomic_compare_exchange_##N(uintN_t *mem, uintN_t *pexpected,		\
256     uintN_t desired, int success __unused, int failure __unused)	\
257 {									\
258 	uint32_t expected, old, temp, ras_start;			\
259 									\
260 	expected = *pexpected;						\
261 	ras_start = ARM_RAS_START;					\
262 	__asm volatile (						\
263 		/* Set up Restartable Atomic Sequence. */		\
264 		"1:"							\
265 		"\tadr   %2, 1b\n"					\
266 		"\tstr   %2, [%6]\n"					\
267 		"\tadr   %2, 2f\n"					\
268 		"\tstr   %2, [%6, #4]\n"				\
269 									\
270 		"\t"ldr" %0, %5\n"	/* Load old value. */		\
271 		"\tcmp   %0, %3\n"	/* Compare to expected value. */\
272 		"\t"streq" %4, %1\n"	/* Store new value. */		\
273 									\
274 		/* Tear down Restartable Atomic Sequence. */		\
275 		"2:"							\
276 		"\tmov   %2, #0x00000000\n"				\
277 		"\tstr   %2, [%6]\n"					\
278 		"\tmov   %2, #0xffffffff\n"				\
279 		"\tstr   %2, [%6, #4]\n"				\
280 		: "=&r" (old), "=m" (*mem), "=&r" (temp)		\
281 		: "r" (expected), "r" (desired), "m" (*mem),		\
282 		  "r" (ras_start));					\
283 	if (old == expected) {						\
284 		return (1);						\
285 	} else {							\
286 		*pexpected = old;					\
287 		return (0);						\
288 	}								\
289 }
290 
291 #define	EMIT_FETCH_OP_N(N, uintN_t, ldr, str, name, op)			\
292 uintN_t									\
293 __atomic_##name##_##N(uintN_t *mem, uintN_t val, int model __unused)	\
294 {									\
295 	uint32_t old, temp, ras_start;					\
296 									\
297 	ras_start = ARM_RAS_START;					\
298 	__asm volatile (						\
299 		/* Set up Restartable Atomic Sequence. */		\
300 		"1:"							\
301 		"\tadr   %2, 1b\n"					\
302 		"\tstr   %2, [%5]\n"					\
303 		"\tadr   %2, 2f\n"					\
304 		"\tstr   %2, [%5, #4]\n"				\
305 									\
306 		"\t"ldr" %0, %4\n"	/* Load old value. */		\
307 		"\t"op"  %2, %0, %3\n"	/* Calculate new value. */	\
308 		"\t"str" %2, %1\n"	/* Store new value. */		\
309 									\
310 		/* Tear down Restartable Atomic Sequence. */		\
311 		"2:"							\
312 		"\tmov   %2, #0x00000000\n"				\
313 		"\tstr   %2, [%5]\n"					\
314 		"\tmov   %2, #0xffffffff\n"				\
315 		"\tstr   %2, [%5, #4]\n"				\
316 		: "=&r" (old), "=m" (*mem), "=&r" (temp)		\
317 		: "r" (val), "m" (*mem), "r" (ras_start));		\
318 	return (old);							\
319 }
320 
321 #define	EMIT_ALL_OPS_N(N, uintN_t, ldr, str, streq)			\
322 EMIT_LOAD_N(N, uintN_t)							\
323 EMIT_STORE_N(N, uintN_t)						\
324 EMIT_EXCHANGE_N(N, uintN_t, ldr, str)					\
325 EMIT_COMPARE_EXCHANGE_N(N, uintN_t, ldr, streq)				\
326 EMIT_FETCH_OP_N(N, uintN_t, ldr, str, fetch_add, "add")			\
327 EMIT_FETCH_OP_N(N, uintN_t, ldr, str, fetch_and, "and")			\
328 EMIT_FETCH_OP_N(N, uintN_t, ldr, str, fetch_or, "orr")			\
329 EMIT_FETCH_OP_N(N, uintN_t, ldr, str, fetch_sub, "sub")			\
330 EMIT_FETCH_OP_N(N, uintN_t, ldr, str, fetch_xor, "eor")
331 
332 EMIT_ALL_OPS_N(1, uint8_t, "ldrb", "strb", "strbeq")
333 EMIT_ALL_OPS_N(2, uint16_t, "ldrh", "strh", "strheq")
334 EMIT_ALL_OPS_N(4, uint32_t, "ldr", "str", "streq")
335 #undef	EMIT_ALL_OPS_N
336 
337 #endif /* _KERNEL */
338 
339 #endif
340 
341 #endif /* __CLANG_ATOMICS || __GNUC_ATOMICS */
342 
343 #if defined(__SYNC_ATOMICS) || defined(EMIT_SYNC_ATOMICS)
344 
345 #ifdef __clang__
346 #pragma redefine_extname __sync_lock_test_and_set_1_c __sync_lock_test_and_set_1
347 #pragma redefine_extname __sync_lock_test_and_set_2_c __sync_lock_test_and_set_2
348 #pragma	redefine_extname __sync_lock_test_and_set_4_c __sync_lock_test_and_set_4
349 #pragma	redefine_extname __sync_val_compare_and_swap_1_c __sync_val_compare_and_swap_1
350 #pragma	redefine_extname __sync_val_compare_and_swap_2_c __sync_val_compare_and_swap_2
351 #pragma	redefine_extname __sync_val_compare_and_swap_4_c __sync_val_compare_and_swap_4
352 #pragma	redefine_extname __sync_fetch_and_add_1_c __sync_fetch_and_add_1
353 #pragma	redefine_extname __sync_fetch_and_add_2_c __sync_fetch_and_add_2
354 #pragma	redefine_extname __sync_fetch_and_and_1_c __sync_fetch_and_and_1
355 #pragma	redefine_extname __sync_fetch_and_and_2_c __sync_fetch_and_and_2
356 #pragma	redefine_extname __sync_fetch_and_and_4_c __sync_fetch_and_and_4
357 #pragma	redefine_extname __sync_fetch_and_or_1_c __sync_fetch_and_or_1
358 #pragma	redefine_extname __sync_fetch_and_or_2_c __sync_fetch_and_or_2
359 #pragma	redefine_extname __sync_fetch_and_or_4_c __sync_fetch_and_or_4
360 #pragma	redefine_extname __sync_fetch_and_xor_1_c __sync_fetch_and_xor_1
361 #pragma	redefine_extname __sync_fetch_and_xor_2_c __sync_fetch_and_xor_2
362 #pragma	redefine_extname __sync_fetch_and_xor_4_c __sync_fetch_and_xor_4
363 #pragma	redefine_extname __sync_fetch_and_sub_1_c __sync_fetch_and_sub_1
364 #pragma	redefine_extname __sync_fetch_and_sub_2_c __sync_fetch_and_sub_2
365 #pragma	redefine_extname __sync_fetch_and_sub_4_c __sync_fetch_and_sub_4
366 #endif
367 
368 /*
369  * Old __sync_* API.
370  */
371 
372 #if defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) || \
373     defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6Z__) || \
374     defined(__ARM_ARCH_6ZK__) || \
375     defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__)
376 
377 /* Implementations for old GCC versions, lacking support for atomics. */
378 
379 typedef union {
380 	uint8_t		v8[4];
381 	uint32_t	v32;
382 } reg_t;
383 
384 /*
385  * Given a memory address pointing to an 8-bit or 16-bit integer, return
386  * the address of the 32-bit word containing it.
387  */
388 
389 static inline uint32_t *
390 round_to_word(void *ptr)
391 {
392 
393 	return ((uint32_t *)((intptr_t)ptr & ~3));
394 }
395 
396 /*
397  * Utility functions for loading and storing 8-bit and 16-bit integers
398  * in 32-bit words at an offset corresponding with the location of the
399  * atomic variable.
400  */
401 
402 static inline void
403 put_1(reg_t *r, const uint8_t *offset_ptr, uint8_t val)
404 {
405 	size_t offset;
406 
407 	offset = (intptr_t)offset_ptr & 3;
408 	r->v8[offset] = val;
409 }
410 
411 static inline uint8_t
412 get_1(const reg_t *r, const uint8_t *offset_ptr)
413 {
414 	size_t offset;
415 
416 	offset = (intptr_t)offset_ptr & 3;
417 	return (r->v8[offset]);
418 }
419 
420 static inline void
421 put_2(reg_t *r, const uint16_t *offset_ptr, uint16_t val)
422 {
423 	size_t offset;
424 	union {
425 		uint16_t in;
426 		uint8_t out[2];
427 	} bytes;
428 
429 	offset = (intptr_t)offset_ptr & 3;
430 	bytes.in = val;
431 	r->v8[offset] = bytes.out[0];
432 	r->v8[offset + 1] = bytes.out[1];
433 }
434 
435 static inline uint16_t
436 get_2(const reg_t *r, const uint16_t *offset_ptr)
437 {
438 	size_t offset;
439 	union {
440 		uint8_t in[2];
441 		uint16_t out;
442 	} bytes;
443 
444 	offset = (intptr_t)offset_ptr & 3;
445 	bytes.in[0] = r->v8[offset];
446 	bytes.in[1] = r->v8[offset + 1];
447 	return (bytes.out);
448 }
449 
450 /*
451  * 8-bit and 16-bit routines.
452  *
453  * These operations are not natively supported by the CPU, so we use
454  * some shifting and bitmasking on top of the 32-bit instructions.
455  */
456 
457 #define	EMIT_LOCK_TEST_AND_SET_N(N, uintN_t)				\
458 uintN_t									\
459 __sync_lock_test_and_set_##N##_c(uintN_t *mem, uintN_t val)			\
460 {									\
461 	uint32_t *mem32;						\
462 	reg_t val32, negmask, old;					\
463 	uint32_t temp1, temp2;						\
464 									\
465 	mem32 = round_to_word(mem);					\
466 	val32.v32 = 0x00000000;						\
467 	put_##N(&val32, mem, val);					\
468 	negmask.v32 = 0xffffffff;					\
469 	put_##N(&negmask, mem, 0);					\
470 									\
471 	do_sync();							\
472 	__asm volatile (						\
473 		"1:"							\
474 		"\tldrex %0, %6\n"	/* Load old value. */		\
475 		"\tand   %2, %5, %0\n"	/* Remove the old value. */	\
476 		"\torr   %2, %2, %4\n"	/* Put in the new value. */	\
477 		"\tstrex %3, %2, %1\n"	/* Attempt to store. */		\
478 		"\tcmp   %3, #0\n"	/* Did it succeed? */		\
479 		"\tbne   1b\n"		/* Spin if failed. */		\
480 		: "=&r" (old.v32), "=m" (*mem32), "=&r" (temp1),	\
481 		  "=&r" (temp2)						\
482 		: "r" (val32.v32), "r" (negmask.v32), "m" (*mem32));	\
483 	return (get_##N(&old, mem));					\
484 }
485 
486 EMIT_LOCK_TEST_AND_SET_N(1, uint8_t)
487 EMIT_LOCK_TEST_AND_SET_N(2, uint16_t)
488 
489 #define	EMIT_VAL_COMPARE_AND_SWAP_N(N, uintN_t)				\
490 uintN_t									\
491 __sync_val_compare_and_swap_##N##_c(uintN_t *mem, uintN_t expected,		\
492     uintN_t desired)							\
493 {									\
494 	uint32_t *mem32;						\
495 	reg_t expected32, desired32, posmask, old;			\
496 	uint32_t negmask, temp1, temp2;					\
497 									\
498 	mem32 = round_to_word(mem);					\
499 	expected32.v32 = 0x00000000;					\
500 	put_##N(&expected32, mem, expected);				\
501 	desired32.v32 = 0x00000000;					\
502 	put_##N(&desired32, mem, desired);				\
503 	posmask.v32 = 0x00000000;					\
504 	put_##N(&posmask, mem, ~0);					\
505 	negmask = ~posmask.v32;						\
506 									\
507 	do_sync();							\
508 	__asm volatile (						\
509 		"1:"							\
510 		"\tldrex %0, %8\n"	/* Load old value. */		\
511 		"\tand   %2, %6, %0\n"	/* Isolate the old value. */	\
512 		"\tcmp   %2, %4\n"	/* Compare to expected value. */\
513 		"\tbne   2f\n"		/* Values are unequal. */	\
514 		"\tand   %2, %7, %0\n"	/* Remove the old value. */	\
515 		"\torr   %2, %5\n"	/* Put in the new value. */	\
516 		"\tstrex %3, %2, %1\n"	/* Attempt to store. */		\
517 		"\tcmp   %3, #0\n"	/* Did it succeed? */		\
518 		"\tbne   1b\n"		/* Spin if failed. */		\
519 		"2:"							\
520 		: "=&r" (old), "=m" (*mem32), "=&r" (temp1),		\
521 		  "=&r" (temp2)						\
522 		: "r" (expected32.v32), "r" (desired32.v32),		\
523 		  "r" (posmask.v32), "r" (negmask), "m" (*mem32));	\
524 	return (get_##N(&old, mem));					\
525 }
526 
527 EMIT_VAL_COMPARE_AND_SWAP_N(1, uint8_t)
528 EMIT_VAL_COMPARE_AND_SWAP_N(2, uint16_t)
529 
530 #define	EMIT_ARITHMETIC_FETCH_AND_OP_N(N, uintN_t, name, op)		\
531 uintN_t									\
532 __sync_##name##_##N##_c(uintN_t *mem, uintN_t val)				\
533 {									\
534 	uint32_t *mem32;						\
535 	reg_t val32, posmask, old;					\
536 	uint32_t negmask, temp1, temp2;					\
537 									\
538 	mem32 = round_to_word(mem);					\
539 	val32.v32 = 0x00000000;						\
540 	put_##N(&val32, mem, val);					\
541 	posmask.v32 = 0x00000000;					\
542 	put_##N(&posmask, mem, ~0);					\
543 	negmask = ~posmask.v32;						\
544 									\
545 	do_sync();							\
546 	__asm volatile (						\
547 		"1:"							\
548 		"\tldrex %0, %7\n"	/* Load old value. */		\
549 		"\t"op"  %2, %0, %4\n"	/* Calculate new value. */	\
550 		"\tand   %2, %5\n"	/* Isolate the new value. */	\
551 		"\tand   %3, %6, %0\n"	/* Remove the old value. */	\
552 		"\torr   %2, %2, %3\n"	/* Put in the new value. */	\
553 		"\tstrex %3, %2, %1\n"	/* Attempt to store. */		\
554 		"\tcmp   %3, #0\n"	/* Did it succeed? */		\
555 		"\tbne   1b\n"		/* Spin if failed. */		\
556 		: "=&r" (old.v32), "=m" (*mem32), "=&r" (temp1),	\
557 		  "=&r" (temp2)						\
558 		: "r" (val32.v32), "r" (posmask.v32), "r" (negmask),	\
559 		  "m" (*mem32));					\
560 	return (get_##N(&old, mem));					\
561 }
562 
563 EMIT_ARITHMETIC_FETCH_AND_OP_N(1, uint8_t, fetch_and_add, "add")
564 EMIT_ARITHMETIC_FETCH_AND_OP_N(1, uint8_t, fetch_and_sub, "sub")
565 EMIT_ARITHMETIC_FETCH_AND_OP_N(2, uint16_t, fetch_and_add, "add")
566 EMIT_ARITHMETIC_FETCH_AND_OP_N(2, uint16_t, fetch_and_sub, "sub")
567 
568 #define	EMIT_BITWISE_FETCH_AND_OP_N(N, uintN_t, name, op, idempotence)	\
569 uintN_t									\
570 __sync_##name##_##N##_c(uintN_t *mem, uintN_t val)				\
571 {									\
572 	uint32_t *mem32;						\
573 	reg_t val32, old;						\
574 	uint32_t temp1, temp2;						\
575 									\
576 	mem32 = round_to_word(mem);					\
577 	val32.v32 = idempotence ? 0xffffffff : 0x00000000;		\
578 	put_##N(&val32, mem, val);					\
579 									\
580 	do_sync();							\
581 	__asm volatile (						\
582 		"1:"							\
583 		"\tldrex %0, %5\n"	/* Load old value. */		\
584 		"\t"op"  %2, %4, %0\n"	/* Calculate new value. */	\
585 		"\tstrex %3, %2, %1\n"	/* Attempt to store. */		\
586 		"\tcmp   %3, #0\n"	/* Did it succeed? */		\
587 		"\tbne   1b\n"		/* Spin if failed. */		\
588 		: "=&r" (old.v32), "=m" (*mem32), "=&r" (temp1),	\
589 		  "=&r" (temp2)						\
590 		: "r" (val32.v32), "m" (*mem32));			\
591 	return (get_##N(&old, mem));					\
592 }
593 
594 EMIT_BITWISE_FETCH_AND_OP_N(1, uint8_t, fetch_and_and, "and", 1)
595 EMIT_BITWISE_FETCH_AND_OP_N(1, uint8_t, fetch_and_or, "orr", 0)
596 EMIT_BITWISE_FETCH_AND_OP_N(1, uint8_t, fetch_and_xor, "eor", 0)
597 EMIT_BITWISE_FETCH_AND_OP_N(2, uint16_t, fetch_and_and, "and", 1)
598 EMIT_BITWISE_FETCH_AND_OP_N(2, uint16_t, fetch_and_or, "orr", 0)
599 EMIT_BITWISE_FETCH_AND_OP_N(2, uint16_t, fetch_and_xor, "eor", 0)
600 
601 /*
602  * 32-bit routines.
603  */
604 
605 uint32_t
606 __sync_lock_test_and_set_4_c(uint32_t *mem, uint32_t val)
607 {
608 	uint32_t old, temp;
609 
610 	do_sync();
611 	__asm volatile (
612 		"1:"
613 		"\tldrex %0, %4\n"	/* Load old value. */
614 		"\tstrex %2, %3, %1\n"	/* Attempt to store. */
615 		"\tcmp   %2, #0\n"	/* Did it succeed? */
616 		"\tbne   1b\n"		/* Spin if failed. */
617 		: "=&r" (old), "=m" (*mem), "=&r" (temp)
618 		: "r" (val), "m" (*mem));
619 	return (old);
620 }
621 
622 uint32_t
623 __sync_val_compare_and_swap_4_c(uint32_t *mem, uint32_t expected,
624     uint32_t desired)
625 {
626 	uint32_t old, temp;
627 
628 	do_sync();
629 	__asm volatile (
630 		"1:"
631 		"\tldrex %0, %5\n"	/* Load old value. */
632 		"\tcmp   %0, %3\n"	/* Compare to expected value. */
633 		"\tbne   2f\n"		/* Values are unequal. */
634 		"\tstrex %2, %4, %1\n"	/* Attempt to store. */
635 		"\tcmp   %2, #0\n"	/* Did it succeed? */
636 		"\tbne   1b\n"		/* Spin if failed. */
637 		"2:"
638 		: "=&r" (old), "=m" (*mem), "=&r" (temp)
639 		: "r" (expected), "r" (desired), "m" (*mem));
640 	return (old);
641 }
642 
643 #define	EMIT_FETCH_AND_OP_4(name, op)					\
644 uint32_t								\
645 __sync_##name##_4##_c(uint32_t *mem, uint32_t val)				\
646 {									\
647 	uint32_t old, temp1, temp2;					\
648 									\
649 	do_sync();							\
650 	__asm volatile (						\
651 		"1:"							\
652 		"\tldrex %0, %5\n"	/* Load old value. */		\
653 		"\t"op"  %2, %0, %4\n"	/* Calculate new value. */	\
654 		"\tstrex %3, %2, %1\n"	/* Attempt to store. */		\
655 		"\tcmp   %3, #0\n"	/* Did it succeed? */		\
656 		"\tbne   1b\n"		/* Spin if failed. */		\
657 		: "=&r" (old), "=m" (*mem), "=&r" (temp1),		\
658 		  "=&r" (temp2)						\
659 		: "r" (val), "m" (*mem));				\
660 	return (old);							\
661 }
662 
663 EMIT_FETCH_AND_OP_4(fetch_and_and, "and")
664 EMIT_FETCH_AND_OP_4(fetch_and_or, "orr")
665 EMIT_FETCH_AND_OP_4(fetch_and_sub, "sub")
666 EMIT_FETCH_AND_OP_4(fetch_and_xor, "eor")
667 
668 #ifndef __clang__
669 __strong_reference(__sync_lock_test_and_set_1_c, __sync_lock_test_and_set_1);
670 __strong_reference(__sync_lock_test_and_set_2_c, __sync_lock_test_and_set_2);
671 __strong_reference(__sync_lock_test_and_set_4_c, __sync_lock_test_and_set_4);
672 __strong_reference(__sync_val_compare_and_swap_1_c, __sync_val_compare_and_swap_1);
673 __strong_reference(__sync_val_compare_and_swap_2_c, __sync_val_compare_and_swap_2);
674 __strong_reference(__sync_val_compare_and_swap_4_c, __sync_val_compare_and_swap_4);
675 __strong_reference(__sync_fetch_and_add_1_c, __sync_fetch_and_add_1);
676 __strong_reference(__sync_fetch_and_add_2_c, __sync_fetch_and_add_2);
677 __strong_reference(__sync_fetch_and_and_1_c, __sync_fetch_and_and_1);
678 __strong_reference(__sync_fetch_and_and_2_c, __sync_fetch_and_and_2);
679 __strong_reference(__sync_fetch_and_and_4_c, __sync_fetch_and_and_4);
680 __strong_reference(__sync_fetch_and_sub_1_c, __sync_fetch_and_sub_1);
681 __strong_reference(__sync_fetch_and_sub_2_c, __sync_fetch_and_sub_2);
682 __strong_reference(__sync_fetch_and_sub_4_c, __sync_fetch_and_sub_4);
683 __strong_reference(__sync_fetch_and_or_1_c, __sync_fetch_and_or_1);
684 __strong_reference(__sync_fetch_and_or_2_c, __sync_fetch_and_or_2);
685 __strong_reference(__sync_fetch_and_or_4_c, __sync_fetch_and_or_4);
686 __strong_reference(__sync_fetch_and_xor_1_c, __sync_fetch_and_xor_1);
687 __strong_reference(__sync_fetch_and_xor_2_c, __sync_fetch_and_xor_2);
688 __strong_reference(__sync_fetch_and_xor_4_c, __sync_fetch_and_xor_4);
689 #endif
690 
691 #else /* __ARM_ARCH_5__ */
692 
693 #ifdef _KERNEL
694 
695 #ifdef SMP
696 #error "On SMP systems we should have proper atomic operations."
697 #endif
698 
699 /*
700  * On uniprocessor systems, we can perform the atomic operations by
701  * disabling interrupts.
702  */
703 
704 #define	EMIT_VAL_COMPARE_AND_SWAP_N(N, uintN_t)				\
705 uintN_t									\
706 __sync_val_compare_and_swap_##N(uintN_t *mem, uintN_t expected,		\
707     uintN_t desired)							\
708 {									\
709 	uintN_t ret;							\
710 									\
711 	WITHOUT_INTERRUPTS({						\
712 		ret = *mem;						\
713 		if (*mem == expected)					\
714 			*mem = desired;					\
715 	});								\
716 	return (ret);							\
717 }
718 
719 #define	EMIT_FETCH_AND_OP_N(N, uintN_t, name, op)			\
720 uintN_t									\
721 __sync_##name##_##N(uintN_t *mem, uintN_t val)				\
722 {									\
723 	uintN_t ret;							\
724 									\
725 	WITHOUT_INTERRUPTS({						\
726 		ret = *mem;						\
727 		*mem op val;						\
728 	});								\
729 	return (ret);							\
730 }
731 
732 #define	EMIT_ALL_OPS_N(N, uintN_t)					\
733 EMIT_VAL_COMPARE_AND_SWAP_N(N, uintN_t)					\
734 EMIT_FETCH_AND_OP_N(N, uintN_t, lock_test_and_set, =)			\
735 EMIT_FETCH_AND_OP_N(N, uintN_t, fetch_and_add, +=)			\
736 EMIT_FETCH_AND_OP_N(N, uintN_t, fetch_and_and, &=)			\
737 EMIT_FETCH_AND_OP_N(N, uintN_t, fetch_and_or, |=)			\
738 EMIT_FETCH_AND_OP_N(N, uintN_t, fetch_and_sub, -=)			\
739 EMIT_FETCH_AND_OP_N(N, uintN_t, fetch_and_xor, ^=)
740 
741 EMIT_ALL_OPS_N(1, uint8_t)
742 EMIT_ALL_OPS_N(2, uint16_t)
743 EMIT_ALL_OPS_N(4, uint32_t)
744 EMIT_ALL_OPS_N(8, uint64_t)
745 #undef	EMIT_ALL_OPS_N
746 
747 #else /* !_KERNEL */
748 
749 /*
750  * For userspace on uniprocessor systems, we can implement the atomic
751  * operations by using a Restartable Atomic Sequence. This makes the
752  * kernel restart the code from the beginning when interrupted.
753  */
754 
755 #define	EMIT_LOCK_TEST_AND_SET_N(N, uintN_t, ldr, str)			\
756 uintN_t									\
757 __sync_lock_test_and_set_##N##_c(uintN_t *mem, uintN_t val)			\
758 {									\
759 	uint32_t old, temp, ras_start;					\
760 									\
761 	ras_start = ARM_RAS_START;					\
762 	__asm volatile (						\
763 		/* Set up Restartable Atomic Sequence. */		\
764 		"1:"							\
765 		"\tadr   %2, 1b\n"					\
766 		"\tstr   %2, [%5]\n"					\
767 		"\tadr   %2, 2f\n"					\
768 		"\tstr   %2, [%5, #4]\n"				\
769 									\
770 		"\t"ldr" %0, %4\n"	/* Load old value. */		\
771 		"\t"str" %3, %1\n"	/* Store new value. */		\
772 									\
773 		/* Tear down Restartable Atomic Sequence. */		\
774 		"2:"							\
775 		"\tmov   %2, #0x00000000\n"				\
776 		"\tstr   %2, [%5]\n"					\
777 		"\tmov   %2, #0xffffffff\n"				\
778 		"\tstr   %2, [%5, #4]\n"				\
779 		: "=&r" (old), "=m" (*mem), "=&r" (temp)		\
780 		: "r" (val), "m" (*mem), "r" (ras_start));		\
781 	return (old);							\
782 }
783 
784 #define	EMIT_VAL_COMPARE_AND_SWAP_N(N, uintN_t, ldr, streq)		\
785 uintN_t									\
786 __sync_val_compare_and_swap_##N##_c(uintN_t *mem, uintN_t expected,		\
787     uintN_t desired)							\
788 {									\
789 	uint32_t old, temp, ras_start;					\
790 									\
791 	ras_start = ARM_RAS_START;					\
792 	__asm volatile (						\
793 		/* Set up Restartable Atomic Sequence. */		\
794 		"1:"							\
795 		"\tadr   %2, 1b\n"					\
796 		"\tstr   %2, [%6]\n"					\
797 		"\tadr   %2, 2f\n"					\
798 		"\tstr   %2, [%6, #4]\n"				\
799 									\
800 		"\t"ldr" %0, %5\n"	/* Load old value. */		\
801 		"\tcmp   %0, %3\n"	/* Compare to expected value. */\
802 		"\t"streq" %4, %1\n"	/* Store new value. */		\
803 									\
804 		/* Tear down Restartable Atomic Sequence. */		\
805 		"2:"							\
806 		"\tmov   %2, #0x00000000\n"				\
807 		"\tstr   %2, [%6]\n"					\
808 		"\tmov   %2, #0xffffffff\n"				\
809 		"\tstr   %2, [%6, #4]\n"				\
810 		: "=&r" (old), "=m" (*mem), "=&r" (temp)		\
811 		: "r" (expected), "r" (desired), "m" (*mem),		\
812 		  "r" (ras_start));					\
813 	return (old);							\
814 }
815 
816 #define	EMIT_FETCH_AND_OP_N(N, uintN_t, ldr, str, name, op)		\
817 uintN_t									\
818 __sync_##name##_##N##_c(uintN_t *mem, uintN_t val)				\
819 {									\
820 	uint32_t old, temp, ras_start;					\
821 									\
822 	ras_start = ARM_RAS_START;					\
823 	__asm volatile (						\
824 		/* Set up Restartable Atomic Sequence. */		\
825 		"1:"							\
826 		"\tadr   %2, 1b\n"					\
827 		"\tstr   %2, [%5]\n"					\
828 		"\tadr   %2, 2f\n"					\
829 		"\tstr   %2, [%5, #4]\n"				\
830 									\
831 		"\t"ldr" %0, %4\n"	/* Load old value. */		\
832 		"\t"op"  %2, %0, %3\n"	/* Calculate new value. */	\
833 		"\t"str" %2, %1\n"	/* Store new value. */		\
834 									\
835 		/* Tear down Restartable Atomic Sequence. */		\
836 		"2:"							\
837 		"\tmov   %2, #0x00000000\n"				\
838 		"\tstr   %2, [%5]\n"					\
839 		"\tmov   %2, #0xffffffff\n"				\
840 		"\tstr   %2, [%5, #4]\n"				\
841 		: "=&r" (old), "=m" (*mem), "=&r" (temp)		\
842 		: "r" (val), "m" (*mem), "r" (ras_start));		\
843 	return (old);							\
844 }
845 
846 #define	EMIT_ALL_OPS_N(N, uintN_t, ldr, str, streq)			\
847 EMIT_LOCK_TEST_AND_SET_N(N, uintN_t, ldr, str)				\
848 EMIT_VAL_COMPARE_AND_SWAP_N(N, uintN_t, ldr, streq)			\
849 EMIT_FETCH_AND_OP_N(N, uintN_t, ldr, str, fetch_and_add, "add")		\
850 EMIT_FETCH_AND_OP_N(N, uintN_t, ldr, str, fetch_and_and, "and")		\
851 EMIT_FETCH_AND_OP_N(N, uintN_t, ldr, str, fetch_and_or, "orr")		\
852 EMIT_FETCH_AND_OP_N(N, uintN_t, ldr, str, fetch_and_sub, "sub")		\
853 EMIT_FETCH_AND_OP_N(N, uintN_t, ldr, str, fetch_and_xor, "eor")
854 
855 EMIT_ALL_OPS_N(1, uint8_t, "ldrb", "strb", "streqb")
856 EMIT_ALL_OPS_N(2, uint16_t, "ldrh", "strh", "streqh")
857 EMIT_ALL_OPS_N(4, uint32_t, "ldr", "str", "streq")
858 
859 #ifndef __clang__
860 __strong_reference(__sync_lock_test_and_set_1_c, __sync_lock_test_and_set_1);
861 __strong_reference(__sync_lock_test_and_set_2_c, __sync_lock_test_and_set_2);
862 __strong_reference(__sync_lock_test_and_set_4_c, __sync_lock_test_and_set_4);
863 __strong_reference(__sync_val_compare_and_swap_1_c, __sync_val_compare_and_swap_1);
864 __strong_reference(__sync_val_compare_and_swap_2_c, __sync_val_compare_and_swap_2);
865 __strong_reference(__sync_val_compare_and_swap_4_c, __sync_val_compare_and_swap_4);
866 __strong_reference(__sync_fetch_and_add_1_c, __sync_fetch_and_add_1);
867 __strong_reference(__sync_fetch_and_add_2_c, __sync_fetch_and_add_2);
868 __strong_reference(__sync_fetch_and_and_1_c, __sync_fetch_and_and_1);
869 __strong_reference(__sync_fetch_and_and_2_c, __sync_fetch_and_and_2);
870 __strong_reference(__sync_fetch_and_and_4_c, __sync_fetch_and_and_4);
871 __strong_reference(__sync_fetch_and_sub_1_c, __sync_fetch_and_sub_1);
872 __strong_reference(__sync_fetch_and_sub_2_c, __sync_fetch_and_sub_2);
873 __strong_reference(__sync_fetch_and_sub_4_c, __sync_fetch_and_sub_4);
874 __strong_reference(__sync_fetch_and_or_1_c, __sync_fetch_and_or_1);
875 __strong_reference(__sync_fetch_and_or_2_c, __sync_fetch_and_or_2);
876 __strong_reference(__sync_fetch_and_or_4_c, __sync_fetch_and_or_4);
877 __strong_reference(__sync_fetch_and_xor_1_c, __sync_fetch_and_xor_1);
878 __strong_reference(__sync_fetch_and_xor_2_c, __sync_fetch_and_xor_2);
879 __strong_reference(__sync_fetch_and_xor_4_c, __sync_fetch_and_xor_4);
880 #endif
881 
882 #endif /* _KERNEL */
883 
884 #endif
885 
886 #endif /* __SYNC_ATOMICS */
887