1 /*- 2 * Copyright (c) 2013 Ed Schouten <ed@FreeBSD.org> 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24 * SUCH DAMAGE. 25 */ 26 27 #include <stdint.h> 28 #include <sys/cdefs.h> 29 // __FBSDID("$FreeBSD: head/sys/arm/arm/stdatomic.c 255738 2013-09-20 20:44:32Z zbb $"); 30 31 #define __SYNC_ATOMICS 32 #define __strong_reference(sym,aliassym) \ 33 extern __typeof (sym) aliassym __attribute__ ((__alias__ (#sym))) 34 35 #include <sys/param.h> 36 #include <sys/types.h> 37 38 #ifdef _KERNEL 39 #include "opt_global.h" 40 #endif 41 42 /* 43 * Executing statements with interrupts disabled. 44 */ 45 46 #if defined(_KERNEL) && !defined(SMP) 47 #define WITHOUT_INTERRUPTS(s) do { \ 48 register_t regs; \ 49 \ 50 regs = intr_disable(); \ 51 do s while (0); \ 52 intr_restore(regs); \ 53 } while (0) 54 #endif /* _KERNEL && !SMP */ 55 56 /* 57 * Memory barriers. 58 * 59 * It turns out __sync_synchronize() does not emit any code when used 60 * with GCC 4.2. Implement our own version that does work reliably. 61 * 62 * Although __sync_lock_test_and_set() should only perform an acquire 63 * barrier, make it do a full barrier like the other functions. This 64 * should make <stdatomic.h>'s atomic_exchange_explicit() work reliably. 65 */ 66 67 #if defined(_KERNEL) && !defined(SMP) 68 static inline void 69 do_sync(void) 70 { 71 72 __asm volatile ("" : : : "memory"); 73 } 74 #elif defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) 75 static inline void 76 do_sync(void) 77 { 78 79 __asm volatile ("dmb" : : : "memory"); 80 } 81 #elif defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) || \ 82 defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6Z__) || \ 83 defined(__ARM_ARCH_6ZK__) 84 static inline void 85 do_sync(void) 86 { 87 88 __asm volatile ("mcr p15, 0, %0, c7, c10, 5" : : "r" (0) : "memory"); 89 } 90 #endif 91 92 #if defined(__CLANG_ATOMICS) || defined(__GNUC_ATOMICS) 93 94 /* 95 * New C11 __atomic_* API. 96 */ 97 98 #if defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) || \ 99 defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6Z__) || \ 100 defined(__ARM_ARCH_6ZK__) || \ 101 defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) 102 103 /* These systems should be supported by the compiler. */ 104 105 #else /* __ARM_ARCH_5__ */ 106 107 /* Clang doesn't allow us to reimplement builtins without this. */ 108 #ifdef __clang__ 109 #pragma redefine_extname __sync_synchronize_ext __sync_synchronize 110 #define __sync_synchronize __sync_synchronize_ext 111 #endif 112 113 void 114 __sync_synchronize(void) 115 { 116 } 117 118 #ifdef _KERNEL 119 120 #ifdef SMP 121 #error "On SMP systems we should have proper atomic operations." 122 #endif 123 124 /* 125 * On uniprocessor systems, we can perform the atomic operations by 126 * disabling interrupts. 127 */ 128 129 #define EMIT_LOAD_N(N, uintN_t) \ 130 uintN_t \ 131 __atomic_load_##N(uintN_t *mem, int model __unused) \ 132 { \ 133 uintN_t ret; \ 134 \ 135 WITHOUT_INTERRUPTS({ \ 136 ret = *mem; \ 137 }); \ 138 return (ret); \ 139 } 140 141 #define EMIT_STORE_N(N, uintN_t) \ 142 void \ 143 __atomic_store_##N(uintN_t *mem, uintN_t val, int model __unused) \ 144 { \ 145 \ 146 WITHOUT_INTERRUPTS({ \ 147 *mem = val; \ 148 }); \ 149 } 150 151 #define EMIT_COMPARE_EXCHANGE_N(N, uintN_t) \ 152 _Bool \ 153 __atomic_compare_exchange_##N(uintN_t *mem, uintN_t *expected, \ 154 uintN_t desired, int success __unused, int failure __unused) \ 155 { \ 156 _Bool ret; \ 157 \ 158 WITHOUT_INTERRUPTS({ \ 159 if (*mem == *expected) { \ 160 *mem = desired; \ 161 ret = 1; \ 162 } else { \ 163 *expected = *mem; \ 164 ret = 0; \ 165 } \ 166 }); \ 167 return (ret); \ 168 } 169 170 #define EMIT_FETCH_OP_N(N, uintN_t, name, op) \ 171 uintN_t \ 172 __atomic_##name##_##N(uintN_t *mem, uintN_t val, int model __unused) \ 173 { \ 174 uintN_t ret; \ 175 \ 176 WITHOUT_INTERRUPTS({ \ 177 ret = *mem; \ 178 *mem op val; \ 179 }); \ 180 return (ret); \ 181 } 182 183 #define EMIT_ALL_OPS_N(N, uintN_t) \ 184 EMIT_LOAD_N(N, uintN_t) \ 185 EMIT_STORE_N(N, uintN_t) \ 186 EMIT_COMPARE_EXCHANGE_N(N, uintN_t) \ 187 EMIT_FETCH_OP_N(N, uintN_t, exchange, =) \ 188 EMIT_FETCH_OP_N(N, uintN_t, fetch_add, +=) \ 189 EMIT_FETCH_OP_N(N, uintN_t, fetch_and, &=) \ 190 EMIT_FETCH_OP_N(N, uintN_t, fetch_or, |=) \ 191 EMIT_FETCH_OP_N(N, uintN_t, fetch_sub, -=) \ 192 EMIT_FETCH_OP_N(N, uintN_t, fetch_xor, ^=) 193 194 EMIT_ALL_OPS_N(1, uint8_t) 195 EMIT_ALL_OPS_N(2, uint16_t) 196 EMIT_ALL_OPS_N(4, uint32_t) 197 EMIT_ALL_OPS_N(8, uint64_t) 198 #undef EMIT_ALL_OPS_N 199 200 #else /* !_KERNEL */ 201 202 /* 203 * For userspace on uniprocessor systems, we can implement the atomic 204 * operations by using a Restartable Atomic Sequence. This makes the 205 * kernel restart the code from the beginning when interrupted. 206 */ 207 208 #define EMIT_LOAD_N(N, uintN_t) \ 209 uintN_t \ 210 __atomic_load_##N(uintN_t *mem, int model __unused) \ 211 { \ 212 \ 213 return (*mem); \ 214 } 215 216 #define EMIT_STORE_N(N, uintN_t) \ 217 void \ 218 __atomic_store_##N(uintN_t *mem, uintN_t val, int model __unused) \ 219 { \ 220 \ 221 *mem = val; \ 222 } 223 224 #define EMIT_EXCHANGE_N(N, uintN_t, ldr, str) \ 225 uintN_t \ 226 __atomic_exchange_##N(uintN_t *mem, uintN_t val, int model __unused) \ 227 { \ 228 uint32_t old, temp, ras_start; \ 229 \ 230 ras_start = ARM_RAS_START; \ 231 __asm volatile ( \ 232 /* Set up Restartable Atomic Sequence. */ \ 233 "1:" \ 234 "\tadr %2, 1b\n" \ 235 "\tstr %2, [%5]\n" \ 236 "\tadr %2, 2f\n" \ 237 "\tstr %2, [%5, #4]\n" \ 238 \ 239 "\t"ldr" %0, %4\n" /* Load old value. */ \ 240 "\t"str" %3, %1\n" /* Store new value. */ \ 241 \ 242 /* Tear down Restartable Atomic Sequence. */ \ 243 "2:" \ 244 "\tmov %2, #0x00000000\n" \ 245 "\tstr %2, [%5]\n" \ 246 "\tmov %2, #0xffffffff\n" \ 247 "\tstr %2, [%5, #4]\n" \ 248 : "=&r" (old), "=m" (*mem), "=&r" (temp) \ 249 : "r" (val), "m" (*mem), "r" (ras_start)); \ 250 return (old); \ 251 } 252 253 #define EMIT_COMPARE_EXCHANGE_N(N, uintN_t, ldr, streq) \ 254 _Bool \ 255 __atomic_compare_exchange_##N(uintN_t *mem, uintN_t *pexpected, \ 256 uintN_t desired, int success __unused, int failure __unused) \ 257 { \ 258 uint32_t expected, old, temp, ras_start; \ 259 \ 260 expected = *pexpected; \ 261 ras_start = ARM_RAS_START; \ 262 __asm volatile ( \ 263 /* Set up Restartable Atomic Sequence. */ \ 264 "1:" \ 265 "\tadr %2, 1b\n" \ 266 "\tstr %2, [%6]\n" \ 267 "\tadr %2, 2f\n" \ 268 "\tstr %2, [%6, #4]\n" \ 269 \ 270 "\t"ldr" %0, %5\n" /* Load old value. */ \ 271 "\tcmp %0, %3\n" /* Compare to expected value. */\ 272 "\t"streq" %4, %1\n" /* Store new value. */ \ 273 \ 274 /* Tear down Restartable Atomic Sequence. */ \ 275 "2:" \ 276 "\tmov %2, #0x00000000\n" \ 277 "\tstr %2, [%6]\n" \ 278 "\tmov %2, #0xffffffff\n" \ 279 "\tstr %2, [%6, #4]\n" \ 280 : "=&r" (old), "=m" (*mem), "=&r" (temp) \ 281 : "r" (expected), "r" (desired), "m" (*mem), \ 282 "r" (ras_start)); \ 283 if (old == expected) { \ 284 return (1); \ 285 } else { \ 286 *pexpected = old; \ 287 return (0); \ 288 } \ 289 } 290 291 #define EMIT_FETCH_OP_N(N, uintN_t, ldr, str, name, op) \ 292 uintN_t \ 293 __atomic_##name##_##N(uintN_t *mem, uintN_t val, int model __unused) \ 294 { \ 295 uint32_t old, temp, ras_start; \ 296 \ 297 ras_start = ARM_RAS_START; \ 298 __asm volatile ( \ 299 /* Set up Restartable Atomic Sequence. */ \ 300 "1:" \ 301 "\tadr %2, 1b\n" \ 302 "\tstr %2, [%5]\n" \ 303 "\tadr %2, 2f\n" \ 304 "\tstr %2, [%5, #4]\n" \ 305 \ 306 "\t"ldr" %0, %4\n" /* Load old value. */ \ 307 "\t"op" %2, %0, %3\n" /* Calculate new value. */ \ 308 "\t"str" %2, %1\n" /* Store new value. */ \ 309 \ 310 /* Tear down Restartable Atomic Sequence. */ \ 311 "2:" \ 312 "\tmov %2, #0x00000000\n" \ 313 "\tstr %2, [%5]\n" \ 314 "\tmov %2, #0xffffffff\n" \ 315 "\tstr %2, [%5, #4]\n" \ 316 : "=&r" (old), "=m" (*mem), "=&r" (temp) \ 317 : "r" (val), "m" (*mem), "r" (ras_start)); \ 318 return (old); \ 319 } 320 321 #define EMIT_ALL_OPS_N(N, uintN_t, ldr, str, streq) \ 322 EMIT_LOAD_N(N, uintN_t) \ 323 EMIT_STORE_N(N, uintN_t) \ 324 EMIT_EXCHANGE_N(N, uintN_t, ldr, str) \ 325 EMIT_COMPARE_EXCHANGE_N(N, uintN_t, ldr, streq) \ 326 EMIT_FETCH_OP_N(N, uintN_t, ldr, str, fetch_add, "add") \ 327 EMIT_FETCH_OP_N(N, uintN_t, ldr, str, fetch_and, "and") \ 328 EMIT_FETCH_OP_N(N, uintN_t, ldr, str, fetch_or, "orr") \ 329 EMIT_FETCH_OP_N(N, uintN_t, ldr, str, fetch_sub, "sub") \ 330 EMIT_FETCH_OP_N(N, uintN_t, ldr, str, fetch_xor, "eor") 331 332 EMIT_ALL_OPS_N(1, uint8_t, "ldrb", "strb", "strbeq") 333 EMIT_ALL_OPS_N(2, uint16_t, "ldrh", "strh", "strheq") 334 EMIT_ALL_OPS_N(4, uint32_t, "ldr", "str", "streq") 335 #undef EMIT_ALL_OPS_N 336 337 #endif /* _KERNEL */ 338 339 #endif 340 341 #endif /* __CLANG_ATOMICS || __GNUC_ATOMICS */ 342 343 #if defined(__SYNC_ATOMICS) || defined(EMIT_SYNC_ATOMICS) 344 345 #ifdef __clang__ 346 #pragma redefine_extname __sync_lock_test_and_set_1_c __sync_lock_test_and_set_1 347 #pragma redefine_extname __sync_lock_test_and_set_2_c __sync_lock_test_and_set_2 348 #pragma redefine_extname __sync_lock_test_and_set_4_c __sync_lock_test_and_set_4 349 #pragma redefine_extname __sync_val_compare_and_swap_1_c __sync_val_compare_and_swap_1 350 #pragma redefine_extname __sync_val_compare_and_swap_2_c __sync_val_compare_and_swap_2 351 #pragma redefine_extname __sync_val_compare_and_swap_4_c __sync_val_compare_and_swap_4 352 #pragma redefine_extname __sync_fetch_and_add_1_c __sync_fetch_and_add_1 353 #pragma redefine_extname __sync_fetch_and_add_2_c __sync_fetch_and_add_2 354 #pragma redefine_extname __sync_fetch_and_and_1_c __sync_fetch_and_and_1 355 #pragma redefine_extname __sync_fetch_and_and_2_c __sync_fetch_and_and_2 356 #pragma redefine_extname __sync_fetch_and_and_4_c __sync_fetch_and_and_4 357 #pragma redefine_extname __sync_fetch_and_or_1_c __sync_fetch_and_or_1 358 #pragma redefine_extname __sync_fetch_and_or_2_c __sync_fetch_and_or_2 359 #pragma redefine_extname __sync_fetch_and_or_4_c __sync_fetch_and_or_4 360 #pragma redefine_extname __sync_fetch_and_xor_1_c __sync_fetch_and_xor_1 361 #pragma redefine_extname __sync_fetch_and_xor_2_c __sync_fetch_and_xor_2 362 #pragma redefine_extname __sync_fetch_and_xor_4_c __sync_fetch_and_xor_4 363 #pragma redefine_extname __sync_fetch_and_sub_1_c __sync_fetch_and_sub_1 364 #pragma redefine_extname __sync_fetch_and_sub_2_c __sync_fetch_and_sub_2 365 #pragma redefine_extname __sync_fetch_and_sub_4_c __sync_fetch_and_sub_4 366 #endif 367 368 /* 369 * Old __sync_* API. 370 */ 371 372 #if defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) || \ 373 defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6Z__) || \ 374 defined(__ARM_ARCH_6ZK__) || \ 375 defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) 376 377 /* Implementations for old GCC versions, lacking support for atomics. */ 378 379 typedef union { 380 uint8_t v8[4]; 381 uint32_t v32; 382 } reg_t; 383 384 /* 385 * Given a memory address pointing to an 8-bit or 16-bit integer, return 386 * the address of the 32-bit word containing it. 387 */ 388 389 static inline uint32_t * 390 round_to_word(void *ptr) 391 { 392 393 return ((uint32_t *)((intptr_t)ptr & ~3)); 394 } 395 396 /* 397 * Utility functions for loading and storing 8-bit and 16-bit integers 398 * in 32-bit words at an offset corresponding with the location of the 399 * atomic variable. 400 */ 401 402 static inline void 403 put_1(reg_t *r, const uint8_t *offset_ptr, uint8_t val) 404 { 405 size_t offset; 406 407 offset = (intptr_t)offset_ptr & 3; 408 r->v8[offset] = val; 409 } 410 411 static inline uint8_t 412 get_1(const reg_t *r, const uint8_t *offset_ptr) 413 { 414 size_t offset; 415 416 offset = (intptr_t)offset_ptr & 3; 417 return (r->v8[offset]); 418 } 419 420 static inline void 421 put_2(reg_t *r, const uint16_t *offset_ptr, uint16_t val) 422 { 423 size_t offset; 424 union { 425 uint16_t in; 426 uint8_t out[2]; 427 } bytes; 428 429 offset = (intptr_t)offset_ptr & 3; 430 bytes.in = val; 431 r->v8[offset] = bytes.out[0]; 432 r->v8[offset + 1] = bytes.out[1]; 433 } 434 435 static inline uint16_t 436 get_2(const reg_t *r, const uint16_t *offset_ptr) 437 { 438 size_t offset; 439 union { 440 uint8_t in[2]; 441 uint16_t out; 442 } bytes; 443 444 offset = (intptr_t)offset_ptr & 3; 445 bytes.in[0] = r->v8[offset]; 446 bytes.in[1] = r->v8[offset + 1]; 447 return (bytes.out); 448 } 449 450 /* 451 * 8-bit and 16-bit routines. 452 * 453 * These operations are not natively supported by the CPU, so we use 454 * some shifting and bitmasking on top of the 32-bit instructions. 455 */ 456 457 #define EMIT_LOCK_TEST_AND_SET_N(N, uintN_t) \ 458 uintN_t \ 459 __sync_lock_test_and_set_##N##_c(uintN_t *mem, uintN_t val) \ 460 { \ 461 uint32_t *mem32; \ 462 reg_t val32, negmask, old; \ 463 uint32_t temp1, temp2; \ 464 \ 465 mem32 = round_to_word(mem); \ 466 val32.v32 = 0x00000000; \ 467 put_##N(&val32, mem, val); \ 468 negmask.v32 = 0xffffffff; \ 469 put_##N(&negmask, mem, 0); \ 470 \ 471 do_sync(); \ 472 __asm volatile ( \ 473 "1:" \ 474 "\tldrex %0, %6\n" /* Load old value. */ \ 475 "\tand %2, %5, %0\n" /* Remove the old value. */ \ 476 "\torr %2, %2, %4\n" /* Put in the new value. */ \ 477 "\tstrex %3, %2, %1\n" /* Attempt to store. */ \ 478 "\tcmp %3, #0\n" /* Did it succeed? */ \ 479 "\tbne 1b\n" /* Spin if failed. */ \ 480 : "=&r" (old.v32), "=m" (*mem32), "=&r" (temp1), \ 481 "=&r" (temp2) \ 482 : "r" (val32.v32), "r" (negmask.v32), "m" (*mem32)); \ 483 return (get_##N(&old, mem)); \ 484 } 485 486 EMIT_LOCK_TEST_AND_SET_N(1, uint8_t) 487 EMIT_LOCK_TEST_AND_SET_N(2, uint16_t) 488 489 #define EMIT_VAL_COMPARE_AND_SWAP_N(N, uintN_t) \ 490 uintN_t \ 491 __sync_val_compare_and_swap_##N##_c(uintN_t *mem, uintN_t expected, \ 492 uintN_t desired) \ 493 { \ 494 uint32_t *mem32; \ 495 reg_t expected32, desired32, posmask, old; \ 496 uint32_t negmask, temp1, temp2; \ 497 \ 498 mem32 = round_to_word(mem); \ 499 expected32.v32 = 0x00000000; \ 500 put_##N(&expected32, mem, expected); \ 501 desired32.v32 = 0x00000000; \ 502 put_##N(&desired32, mem, desired); \ 503 posmask.v32 = 0x00000000; \ 504 put_##N(&posmask, mem, ~0); \ 505 negmask = ~posmask.v32; \ 506 \ 507 do_sync(); \ 508 __asm volatile ( \ 509 "1:" \ 510 "\tldrex %0, %8\n" /* Load old value. */ \ 511 "\tand %2, %6, %0\n" /* Isolate the old value. */ \ 512 "\tcmp %2, %4\n" /* Compare to expected value. */\ 513 "\tbne 2f\n" /* Values are unequal. */ \ 514 "\tand %2, %7, %0\n" /* Remove the old value. */ \ 515 "\torr %2, %5\n" /* Put in the new value. */ \ 516 "\tstrex %3, %2, %1\n" /* Attempt to store. */ \ 517 "\tcmp %3, #0\n" /* Did it succeed? */ \ 518 "\tbne 1b\n" /* Spin if failed. */ \ 519 "2:" \ 520 : "=&r" (old), "=m" (*mem32), "=&r" (temp1), \ 521 "=&r" (temp2) \ 522 : "r" (expected32.v32), "r" (desired32.v32), \ 523 "r" (posmask.v32), "r" (negmask), "m" (*mem32)); \ 524 return (get_##N(&old, mem)); \ 525 } 526 527 EMIT_VAL_COMPARE_AND_SWAP_N(1, uint8_t) 528 EMIT_VAL_COMPARE_AND_SWAP_N(2, uint16_t) 529 530 #define EMIT_ARITHMETIC_FETCH_AND_OP_N(N, uintN_t, name, op) \ 531 uintN_t \ 532 __sync_##name##_##N##_c(uintN_t *mem, uintN_t val) \ 533 { \ 534 uint32_t *mem32; \ 535 reg_t val32, posmask, old; \ 536 uint32_t negmask, temp1, temp2; \ 537 \ 538 mem32 = round_to_word(mem); \ 539 val32.v32 = 0x00000000; \ 540 put_##N(&val32, mem, val); \ 541 posmask.v32 = 0x00000000; \ 542 put_##N(&posmask, mem, ~0); \ 543 negmask = ~posmask.v32; \ 544 \ 545 do_sync(); \ 546 __asm volatile ( \ 547 "1:" \ 548 "\tldrex %0, %7\n" /* Load old value. */ \ 549 "\t"op" %2, %0, %4\n" /* Calculate new value. */ \ 550 "\tand %2, %5\n" /* Isolate the new value. */ \ 551 "\tand %3, %6, %0\n" /* Remove the old value. */ \ 552 "\torr %2, %2, %3\n" /* Put in the new value. */ \ 553 "\tstrex %3, %2, %1\n" /* Attempt to store. */ \ 554 "\tcmp %3, #0\n" /* Did it succeed? */ \ 555 "\tbne 1b\n" /* Spin if failed. */ \ 556 : "=&r" (old.v32), "=m" (*mem32), "=&r" (temp1), \ 557 "=&r" (temp2) \ 558 : "r" (val32.v32), "r" (posmask.v32), "r" (negmask), \ 559 "m" (*mem32)); \ 560 return (get_##N(&old, mem)); \ 561 } 562 563 EMIT_ARITHMETIC_FETCH_AND_OP_N(1, uint8_t, fetch_and_add, "add") 564 EMIT_ARITHMETIC_FETCH_AND_OP_N(1, uint8_t, fetch_and_sub, "sub") 565 EMIT_ARITHMETIC_FETCH_AND_OP_N(2, uint16_t, fetch_and_add, "add") 566 EMIT_ARITHMETIC_FETCH_AND_OP_N(2, uint16_t, fetch_and_sub, "sub") 567 568 #define EMIT_BITWISE_FETCH_AND_OP_N(N, uintN_t, name, op, idempotence) \ 569 uintN_t \ 570 __sync_##name##_##N##_c(uintN_t *mem, uintN_t val) \ 571 { \ 572 uint32_t *mem32; \ 573 reg_t val32, old; \ 574 uint32_t temp1, temp2; \ 575 \ 576 mem32 = round_to_word(mem); \ 577 val32.v32 = idempotence ? 0xffffffff : 0x00000000; \ 578 put_##N(&val32, mem, val); \ 579 \ 580 do_sync(); \ 581 __asm volatile ( \ 582 "1:" \ 583 "\tldrex %0, %5\n" /* Load old value. */ \ 584 "\t"op" %2, %4, %0\n" /* Calculate new value. */ \ 585 "\tstrex %3, %2, %1\n" /* Attempt to store. */ \ 586 "\tcmp %3, #0\n" /* Did it succeed? */ \ 587 "\tbne 1b\n" /* Spin if failed. */ \ 588 : "=&r" (old.v32), "=m" (*mem32), "=&r" (temp1), \ 589 "=&r" (temp2) \ 590 : "r" (val32.v32), "m" (*mem32)); \ 591 return (get_##N(&old, mem)); \ 592 } 593 594 EMIT_BITWISE_FETCH_AND_OP_N(1, uint8_t, fetch_and_and, "and", 1) 595 EMIT_BITWISE_FETCH_AND_OP_N(1, uint8_t, fetch_and_or, "orr", 0) 596 EMIT_BITWISE_FETCH_AND_OP_N(1, uint8_t, fetch_and_xor, "eor", 0) 597 EMIT_BITWISE_FETCH_AND_OP_N(2, uint16_t, fetch_and_and, "and", 1) 598 EMIT_BITWISE_FETCH_AND_OP_N(2, uint16_t, fetch_and_or, "orr", 0) 599 EMIT_BITWISE_FETCH_AND_OP_N(2, uint16_t, fetch_and_xor, "eor", 0) 600 601 /* 602 * 32-bit routines. 603 */ 604 605 uint32_t 606 __sync_lock_test_and_set_4_c(uint32_t *mem, uint32_t val) 607 { 608 uint32_t old, temp; 609 610 do_sync(); 611 __asm volatile ( 612 "1:" 613 "\tldrex %0, %4\n" /* Load old value. */ 614 "\tstrex %2, %3, %1\n" /* Attempt to store. */ 615 "\tcmp %2, #0\n" /* Did it succeed? */ 616 "\tbne 1b\n" /* Spin if failed. */ 617 : "=&r" (old), "=m" (*mem), "=&r" (temp) 618 : "r" (val), "m" (*mem)); 619 return (old); 620 } 621 622 uint32_t 623 __sync_val_compare_and_swap_4_c(uint32_t *mem, uint32_t expected, 624 uint32_t desired) 625 { 626 uint32_t old, temp; 627 628 do_sync(); 629 __asm volatile ( 630 "1:" 631 "\tldrex %0, %5\n" /* Load old value. */ 632 "\tcmp %0, %3\n" /* Compare to expected value. */ 633 "\tbne 2f\n" /* Values are unequal. */ 634 "\tstrex %2, %4, %1\n" /* Attempt to store. */ 635 "\tcmp %2, #0\n" /* Did it succeed? */ 636 "\tbne 1b\n" /* Spin if failed. */ 637 "2:" 638 : "=&r" (old), "=m" (*mem), "=&r" (temp) 639 : "r" (expected), "r" (desired), "m" (*mem)); 640 return (old); 641 } 642 643 #define EMIT_FETCH_AND_OP_4(name, op) \ 644 uint32_t \ 645 __sync_##name##_4##_c(uint32_t *mem, uint32_t val) \ 646 { \ 647 uint32_t old, temp1, temp2; \ 648 \ 649 do_sync(); \ 650 __asm volatile ( \ 651 "1:" \ 652 "\tldrex %0, %5\n" /* Load old value. */ \ 653 "\t"op" %2, %0, %4\n" /* Calculate new value. */ \ 654 "\tstrex %3, %2, %1\n" /* Attempt to store. */ \ 655 "\tcmp %3, #0\n" /* Did it succeed? */ \ 656 "\tbne 1b\n" /* Spin if failed. */ \ 657 : "=&r" (old), "=m" (*mem), "=&r" (temp1), \ 658 "=&r" (temp2) \ 659 : "r" (val), "m" (*mem)); \ 660 return (old); \ 661 } 662 663 EMIT_FETCH_AND_OP_4(fetch_and_and, "and") 664 EMIT_FETCH_AND_OP_4(fetch_and_or, "orr") 665 EMIT_FETCH_AND_OP_4(fetch_and_sub, "sub") 666 EMIT_FETCH_AND_OP_4(fetch_and_xor, "eor") 667 668 #ifndef __clang__ 669 __strong_reference(__sync_lock_test_and_set_1_c, __sync_lock_test_and_set_1); 670 __strong_reference(__sync_lock_test_and_set_2_c, __sync_lock_test_and_set_2); 671 __strong_reference(__sync_lock_test_and_set_4_c, __sync_lock_test_and_set_4); 672 __strong_reference(__sync_val_compare_and_swap_1_c, __sync_val_compare_and_swap_1); 673 __strong_reference(__sync_val_compare_and_swap_2_c, __sync_val_compare_and_swap_2); 674 __strong_reference(__sync_val_compare_and_swap_4_c, __sync_val_compare_and_swap_4); 675 __strong_reference(__sync_fetch_and_add_1_c, __sync_fetch_and_add_1); 676 __strong_reference(__sync_fetch_and_add_2_c, __sync_fetch_and_add_2); 677 __strong_reference(__sync_fetch_and_and_1_c, __sync_fetch_and_and_1); 678 __strong_reference(__sync_fetch_and_and_2_c, __sync_fetch_and_and_2); 679 __strong_reference(__sync_fetch_and_and_4_c, __sync_fetch_and_and_4); 680 __strong_reference(__sync_fetch_and_sub_1_c, __sync_fetch_and_sub_1); 681 __strong_reference(__sync_fetch_and_sub_2_c, __sync_fetch_and_sub_2); 682 __strong_reference(__sync_fetch_and_sub_4_c, __sync_fetch_and_sub_4); 683 __strong_reference(__sync_fetch_and_or_1_c, __sync_fetch_and_or_1); 684 __strong_reference(__sync_fetch_and_or_2_c, __sync_fetch_and_or_2); 685 __strong_reference(__sync_fetch_and_or_4_c, __sync_fetch_and_or_4); 686 __strong_reference(__sync_fetch_and_xor_1_c, __sync_fetch_and_xor_1); 687 __strong_reference(__sync_fetch_and_xor_2_c, __sync_fetch_and_xor_2); 688 __strong_reference(__sync_fetch_and_xor_4_c, __sync_fetch_and_xor_4); 689 #endif 690 691 #else /* __ARM_ARCH_5__ */ 692 693 #ifdef _KERNEL 694 695 #ifdef SMP 696 #error "On SMP systems we should have proper atomic operations." 697 #endif 698 699 /* 700 * On uniprocessor systems, we can perform the atomic operations by 701 * disabling interrupts. 702 */ 703 704 #define EMIT_VAL_COMPARE_AND_SWAP_N(N, uintN_t) \ 705 uintN_t \ 706 __sync_val_compare_and_swap_##N(uintN_t *mem, uintN_t expected, \ 707 uintN_t desired) \ 708 { \ 709 uintN_t ret; \ 710 \ 711 WITHOUT_INTERRUPTS({ \ 712 ret = *mem; \ 713 if (*mem == expected) \ 714 *mem = desired; \ 715 }); \ 716 return (ret); \ 717 } 718 719 #define EMIT_FETCH_AND_OP_N(N, uintN_t, name, op) \ 720 uintN_t \ 721 __sync_##name##_##N(uintN_t *mem, uintN_t val) \ 722 { \ 723 uintN_t ret; \ 724 \ 725 WITHOUT_INTERRUPTS({ \ 726 ret = *mem; \ 727 *mem op val; \ 728 }); \ 729 return (ret); \ 730 } 731 732 #define EMIT_ALL_OPS_N(N, uintN_t) \ 733 EMIT_VAL_COMPARE_AND_SWAP_N(N, uintN_t) \ 734 EMIT_FETCH_AND_OP_N(N, uintN_t, lock_test_and_set, =) \ 735 EMIT_FETCH_AND_OP_N(N, uintN_t, fetch_and_add, +=) \ 736 EMIT_FETCH_AND_OP_N(N, uintN_t, fetch_and_and, &=) \ 737 EMIT_FETCH_AND_OP_N(N, uintN_t, fetch_and_or, |=) \ 738 EMIT_FETCH_AND_OP_N(N, uintN_t, fetch_and_sub, -=) \ 739 EMIT_FETCH_AND_OP_N(N, uintN_t, fetch_and_xor, ^=) 740 741 EMIT_ALL_OPS_N(1, uint8_t) 742 EMIT_ALL_OPS_N(2, uint16_t) 743 EMIT_ALL_OPS_N(4, uint32_t) 744 EMIT_ALL_OPS_N(8, uint64_t) 745 #undef EMIT_ALL_OPS_N 746 747 #else /* !_KERNEL */ 748 749 /* 750 * For userspace on uniprocessor systems, we can implement the atomic 751 * operations by using a Restartable Atomic Sequence. This makes the 752 * kernel restart the code from the beginning when interrupted. 753 */ 754 755 #define EMIT_LOCK_TEST_AND_SET_N(N, uintN_t, ldr, str) \ 756 uintN_t \ 757 __sync_lock_test_and_set_##N##_c(uintN_t *mem, uintN_t val) \ 758 { \ 759 uint32_t old, temp, ras_start; \ 760 \ 761 ras_start = ARM_RAS_START; \ 762 __asm volatile ( \ 763 /* Set up Restartable Atomic Sequence. */ \ 764 "1:" \ 765 "\tadr %2, 1b\n" \ 766 "\tstr %2, [%5]\n" \ 767 "\tadr %2, 2f\n" \ 768 "\tstr %2, [%5, #4]\n" \ 769 \ 770 "\t"ldr" %0, %4\n" /* Load old value. */ \ 771 "\t"str" %3, %1\n" /* Store new value. */ \ 772 \ 773 /* Tear down Restartable Atomic Sequence. */ \ 774 "2:" \ 775 "\tmov %2, #0x00000000\n" \ 776 "\tstr %2, [%5]\n" \ 777 "\tmov %2, #0xffffffff\n" \ 778 "\tstr %2, [%5, #4]\n" \ 779 : "=&r" (old), "=m" (*mem), "=&r" (temp) \ 780 : "r" (val), "m" (*mem), "r" (ras_start)); \ 781 return (old); \ 782 } 783 784 #define EMIT_VAL_COMPARE_AND_SWAP_N(N, uintN_t, ldr, streq) \ 785 uintN_t \ 786 __sync_val_compare_and_swap_##N##_c(uintN_t *mem, uintN_t expected, \ 787 uintN_t desired) \ 788 { \ 789 uint32_t old, temp, ras_start; \ 790 \ 791 ras_start = ARM_RAS_START; \ 792 __asm volatile ( \ 793 /* Set up Restartable Atomic Sequence. */ \ 794 "1:" \ 795 "\tadr %2, 1b\n" \ 796 "\tstr %2, [%6]\n" \ 797 "\tadr %2, 2f\n" \ 798 "\tstr %2, [%6, #4]\n" \ 799 \ 800 "\t"ldr" %0, %5\n" /* Load old value. */ \ 801 "\tcmp %0, %3\n" /* Compare to expected value. */\ 802 "\t"streq" %4, %1\n" /* Store new value. */ \ 803 \ 804 /* Tear down Restartable Atomic Sequence. */ \ 805 "2:" \ 806 "\tmov %2, #0x00000000\n" \ 807 "\tstr %2, [%6]\n" \ 808 "\tmov %2, #0xffffffff\n" \ 809 "\tstr %2, [%6, #4]\n" \ 810 : "=&r" (old), "=m" (*mem), "=&r" (temp) \ 811 : "r" (expected), "r" (desired), "m" (*mem), \ 812 "r" (ras_start)); \ 813 return (old); \ 814 } 815 816 #define EMIT_FETCH_AND_OP_N(N, uintN_t, ldr, str, name, op) \ 817 uintN_t \ 818 __sync_##name##_##N##_c(uintN_t *mem, uintN_t val) \ 819 { \ 820 uint32_t old, temp, ras_start; \ 821 \ 822 ras_start = ARM_RAS_START; \ 823 __asm volatile ( \ 824 /* Set up Restartable Atomic Sequence. */ \ 825 "1:" \ 826 "\tadr %2, 1b\n" \ 827 "\tstr %2, [%5]\n" \ 828 "\tadr %2, 2f\n" \ 829 "\tstr %2, [%5, #4]\n" \ 830 \ 831 "\t"ldr" %0, %4\n" /* Load old value. */ \ 832 "\t"op" %2, %0, %3\n" /* Calculate new value. */ \ 833 "\t"str" %2, %1\n" /* Store new value. */ \ 834 \ 835 /* Tear down Restartable Atomic Sequence. */ \ 836 "2:" \ 837 "\tmov %2, #0x00000000\n" \ 838 "\tstr %2, [%5]\n" \ 839 "\tmov %2, #0xffffffff\n" \ 840 "\tstr %2, [%5, #4]\n" \ 841 : "=&r" (old), "=m" (*mem), "=&r" (temp) \ 842 : "r" (val), "m" (*mem), "r" (ras_start)); \ 843 return (old); \ 844 } 845 846 #define EMIT_ALL_OPS_N(N, uintN_t, ldr, str, streq) \ 847 EMIT_LOCK_TEST_AND_SET_N(N, uintN_t, ldr, str) \ 848 EMIT_VAL_COMPARE_AND_SWAP_N(N, uintN_t, ldr, streq) \ 849 EMIT_FETCH_AND_OP_N(N, uintN_t, ldr, str, fetch_and_add, "add") \ 850 EMIT_FETCH_AND_OP_N(N, uintN_t, ldr, str, fetch_and_and, "and") \ 851 EMIT_FETCH_AND_OP_N(N, uintN_t, ldr, str, fetch_and_or, "orr") \ 852 EMIT_FETCH_AND_OP_N(N, uintN_t, ldr, str, fetch_and_sub, "sub") \ 853 EMIT_FETCH_AND_OP_N(N, uintN_t, ldr, str, fetch_and_xor, "eor") 854 855 EMIT_ALL_OPS_N(1, uint8_t, "ldrb", "strb", "streqb") 856 EMIT_ALL_OPS_N(2, uint16_t, "ldrh", "strh", "streqh") 857 EMIT_ALL_OPS_N(4, uint32_t, "ldr", "str", "streq") 858 859 #ifndef __clang__ 860 __strong_reference(__sync_lock_test_and_set_1_c, __sync_lock_test_and_set_1); 861 __strong_reference(__sync_lock_test_and_set_2_c, __sync_lock_test_and_set_2); 862 __strong_reference(__sync_lock_test_and_set_4_c, __sync_lock_test_and_set_4); 863 __strong_reference(__sync_val_compare_and_swap_1_c, __sync_val_compare_and_swap_1); 864 __strong_reference(__sync_val_compare_and_swap_2_c, __sync_val_compare_and_swap_2); 865 __strong_reference(__sync_val_compare_and_swap_4_c, __sync_val_compare_and_swap_4); 866 __strong_reference(__sync_fetch_and_add_1_c, __sync_fetch_and_add_1); 867 __strong_reference(__sync_fetch_and_add_2_c, __sync_fetch_and_add_2); 868 __strong_reference(__sync_fetch_and_and_1_c, __sync_fetch_and_and_1); 869 __strong_reference(__sync_fetch_and_and_2_c, __sync_fetch_and_and_2); 870 __strong_reference(__sync_fetch_and_and_4_c, __sync_fetch_and_and_4); 871 __strong_reference(__sync_fetch_and_sub_1_c, __sync_fetch_and_sub_1); 872 __strong_reference(__sync_fetch_and_sub_2_c, __sync_fetch_and_sub_2); 873 __strong_reference(__sync_fetch_and_sub_4_c, __sync_fetch_and_sub_4); 874 __strong_reference(__sync_fetch_and_or_1_c, __sync_fetch_and_or_1); 875 __strong_reference(__sync_fetch_and_or_2_c, __sync_fetch_and_or_2); 876 __strong_reference(__sync_fetch_and_or_4_c, __sync_fetch_and_or_4); 877 __strong_reference(__sync_fetch_and_xor_1_c, __sync_fetch_and_xor_1); 878 __strong_reference(__sync_fetch_and_xor_2_c, __sync_fetch_and_xor_2); 879 __strong_reference(__sync_fetch_and_xor_4_c, __sync_fetch_and_xor_4); 880 #endif 881 882 #endif /* _KERNEL */ 883 884 #endif 885 886 #endif /* __SYNC_ATOMICS */ 887