HIP: Heterogenous-computing Interface for Portability
Loading...
Searching...
No Matches
amd_hip_atomic.h
1/*
2Copyright (c) 2015 - Present Advanced Micro Devices, Inc. All rights reserved.
3
4Permission is hereby granted, free of charge, to any person obtaining a copy
5of this software and associated documentation files (the "Software"), to deal
6in the Software without restriction, including without limitation the rights
7to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
8copies of the Software, and to permit persons to whom the Software is
9furnished to do so, subject to the following conditions:
10
11The above copyright notice and this permission notice shall be included in
12all copies or substantial portions of the Software.
13
14THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
19OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
20THE SOFTWARE.
21*/
22
23#pragma once
24
25#include "amd_device_functions.h"
26
27#if __has_builtin(__hip_atomic_compare_exchange_strong)
28
29template<bool B, typename T, typename F> struct Cond_t;
30
31template<typename T, typename F> struct Cond_t<true, T, F> { using type = T; };
32template<typename T, typename F> struct Cond_t<false, T, F> { using type = F; };
33
34#if !__HIP_DEVICE_COMPILE__
35//TODO: Remove this after compiler pre-defines the following Macros.
36#define __HIP_MEMORY_SCOPE_SINGLETHREAD 1
37#define __HIP_MEMORY_SCOPE_WAVEFRONT 2
38#define __HIP_MEMORY_SCOPE_WORKGROUP 3
39#define __HIP_MEMORY_SCOPE_AGENT 4
40#define __HIP_MEMORY_SCOPE_SYSTEM 5
41#endif
42
43#if !defined(__HIPCC_RTC__)
44#include "amd_hip_unsafe_atomics.h"
45#endif
46
47// Atomic expanders
48template<
49 int mem_order = __ATOMIC_SEQ_CST,
50 int mem_scope= __HIP_MEMORY_SCOPE_SYSTEM,
51 typename T,
52 typename Op,
53 typename F>
54inline
55__attribute__((always_inline, device))
56T hip_cas_expander(T* p, T x, Op op, F f) noexcept
57{
58 using FP = __attribute__((address_space(0))) const void*;
59
60 __device__
61 extern bool is_shared_workaround(FP) asm("llvm.amdgcn.is.shared");
62
63 if (is_shared_workaround((FP)p))
64 return f();
65
66 using U = typename Cond_t<
67 sizeof(T) == sizeof(unsigned int), unsigned int, unsigned long long>::type;
68
69 auto q = reinterpret_cast<U*>(p);
70
71 U tmp0{__hip_atomic_load(q, mem_order, mem_scope)};
72 U tmp1;
73 do {
74 tmp1 = tmp0;
75
76 op(reinterpret_cast<T&>(tmp1), x);
77 } while (!__hip_atomic_compare_exchange_strong(q, &tmp0, tmp1, mem_order,
78 mem_order, mem_scope));
79
80 return reinterpret_cast<const T&>(tmp0);
81}
82
83template<
84 int mem_order = __ATOMIC_SEQ_CST,
85 int mem_scope= __HIP_MEMORY_SCOPE_SYSTEM,
86 typename T,
87 typename Cmp,
88 typename F>
89inline
90__attribute__((always_inline, device))
91T hip_cas_extrema_expander(T* p, T x, Cmp cmp, F f) noexcept
92{
93 using FP = __attribute__((address_space(0))) const void*;
94
95 __device__
96 extern bool is_shared_workaround(FP) asm("llvm.amdgcn.is.shared");
97
98 if (is_shared_workaround((FP)p))
99 return f();
100
101 using U = typename Cond_t<
102 sizeof(T) == sizeof(unsigned int), unsigned int, unsigned long long>::type;
103
104 auto q = reinterpret_cast<U*>(p);
105
106 U tmp{__hip_atomic_load(q, mem_order, mem_scope)};
107 while (cmp(x, reinterpret_cast<const T&>(tmp)) &&
108 !__hip_atomic_compare_exchange_strong(q, &tmp, x, mem_order, mem_order,
109 mem_scope));
110
111 return reinterpret_cast<const T&>(tmp);
112}
113
114__device__
115inline
116int atomicCAS(int* address, int compare, int val) {
117 __hip_atomic_compare_exchange_strong(address, &compare, val, __ATOMIC_RELAXED, __ATOMIC_RELAXED,
118 __HIP_MEMORY_SCOPE_AGENT);
119 return compare;
120}
121
122__device__
123inline
124int atomicCAS_system(int* address, int compare, int val) {
125 __hip_atomic_compare_exchange_strong(address, &compare, val, __ATOMIC_RELAXED, __ATOMIC_RELAXED,
126 __HIP_MEMORY_SCOPE_SYSTEM);
127 return compare;
128}
129
130__device__
131inline
132unsigned int atomicCAS(unsigned int* address, unsigned int compare, unsigned int val) {
133 __hip_atomic_compare_exchange_strong(address, &compare, val, __ATOMIC_RELAXED, __ATOMIC_RELAXED,
134 __HIP_MEMORY_SCOPE_AGENT);
135 return compare;
136}
137
138__device__
139inline
140unsigned int atomicCAS_system(unsigned int* address, unsigned int compare, unsigned int val) {
141 __hip_atomic_compare_exchange_strong(address, &compare, val, __ATOMIC_RELAXED, __ATOMIC_RELAXED,
142 __HIP_MEMORY_SCOPE_SYSTEM);
143 return compare;
144}
145
146__device__
147inline
148unsigned long atomicCAS(unsigned long* address, unsigned long compare, unsigned long val) {
149 __hip_atomic_compare_exchange_strong(address, &compare, val, __ATOMIC_RELAXED, __ATOMIC_RELAXED,
150 __HIP_MEMORY_SCOPE_AGENT);
151 return compare;
152}
153
154__device__
155inline
156unsigned long atomicCAS_system(unsigned long* address, unsigned long compare, unsigned long val) {
157 __hip_atomic_compare_exchange_strong(address, &compare, val, __ATOMIC_RELAXED, __ATOMIC_RELAXED,
158 __HIP_MEMORY_SCOPE_SYSTEM);
159 return compare;
160}
161
162__device__
163inline
164unsigned long long atomicCAS(unsigned long long* address, unsigned long long compare,
165 unsigned long long val) {
166 __hip_atomic_compare_exchange_strong(address, &compare, val, __ATOMIC_RELAXED, __ATOMIC_RELAXED,
167 __HIP_MEMORY_SCOPE_AGENT);
168 return compare;
169}
170
171__device__
172inline
173unsigned long long atomicCAS_system(unsigned long long* address, unsigned long long compare,
174 unsigned long long val) {
175 __hip_atomic_compare_exchange_strong(address, &compare, val, __ATOMIC_RELAXED, __ATOMIC_RELAXED,
176 __HIP_MEMORY_SCOPE_SYSTEM);
177 return compare;
178}
179
180__device__
181inline
182float atomicCAS(float* address, float compare, float val) {
183 __hip_atomic_compare_exchange_strong(address, &compare, val, __ATOMIC_RELAXED, __ATOMIC_RELAXED,
184 __HIP_MEMORY_SCOPE_AGENT);
185 return compare;
186}
187
188__device__
189inline
190float atomicCAS_system(float* address, float compare, float val) {
191 __hip_atomic_compare_exchange_strong(address, &compare, val, __ATOMIC_RELAXED, __ATOMIC_RELAXED,
192 __HIP_MEMORY_SCOPE_SYSTEM);
193 return compare;
194}
195
196__device__
197inline
198double atomicCAS(double* address, double compare, double val) {
199 __hip_atomic_compare_exchange_strong(address, &compare, val, __ATOMIC_RELAXED, __ATOMIC_RELAXED,
200 __HIP_MEMORY_SCOPE_AGENT);
201 return compare;
202}
203
204__device__
205inline
206double atomicCAS_system(double* address, double compare, double val) {
207 __hip_atomic_compare_exchange_strong(address, &compare, val, __ATOMIC_RELAXED, __ATOMIC_RELAXED,
208 __HIP_MEMORY_SCOPE_SYSTEM);
209 return compare;
210}
211
212__device__
213inline
214int atomicAdd(int* address, int val) {
215 return __hip_atomic_fetch_add(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
216}
217
218__device__
219inline
220int atomicAdd_system(int* address, int val) {
221 return __hip_atomic_fetch_add(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
222}
223
224__device__
225inline
226unsigned int atomicAdd(unsigned int* address, unsigned int val) {
227 return __hip_atomic_fetch_add(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
228}
229
230__device__
231inline
232unsigned int atomicAdd_system(unsigned int* address, unsigned int val) {
233 return __hip_atomic_fetch_add(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
234}
235
236__device__
237inline
238unsigned long atomicAdd(unsigned long* address, unsigned long val) {
239 return __hip_atomic_fetch_add(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
240}
241
242__device__
243inline
244unsigned long atomicAdd_system(unsigned long* address, unsigned long val) {
245 return __hip_atomic_fetch_add(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
246}
247
248__device__
249inline
250unsigned long long atomicAdd(unsigned long long* address, unsigned long long val) {
251 return __hip_atomic_fetch_add(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
252}
253
254__device__
255inline
256unsigned long long atomicAdd_system(unsigned long long* address, unsigned long long val) {
257 return __hip_atomic_fetch_add(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
258}
259
260__device__
261inline
262float atomicAdd(float* address, float val) {
263#if defined(__AMDGCN_UNSAFE_FP_ATOMICS__)
264 return unsafeAtomicAdd(address, val);
265#else
266 return __hip_atomic_fetch_add(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
267#endif
268}
269
270__device__
271inline
272float atomicAdd_system(float* address, float val) {
273 return __hip_atomic_fetch_add(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
274}
275
276#if !defined(__HIPCC_RTC__)
277DEPRECATED("use atomicAdd instead")
278#endif // !defined(__HIPCC_RTC__)
279__device__
280inline
281void atomicAddNoRet(float* address, float val)
282{
283 __ockl_atomic_add_noret_f32(address, val);
284}
285
286__device__
287inline
288double atomicAdd(double* address, double val) {
289#if defined(__AMDGCN_UNSAFE_FP_ATOMICS__)
290 return unsafeAtomicAdd(address, val);
291#else
292 return __hip_atomic_fetch_add(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
293#endif
294}
295
296__device__
297inline
298double atomicAdd_system(double* address, double val) {
299 return __hip_atomic_fetch_add(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
300}
301
302__device__
303inline
304int atomicSub(int* address, int val) {
305 return __hip_atomic_fetch_add(address, -val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
306}
307
308__device__
309inline
310int atomicSub_system(int* address, int val) {
311 return __hip_atomic_fetch_add(address, -val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
312}
313
314__device__
315inline
316unsigned int atomicSub(unsigned int* address, unsigned int val) {
317 return __hip_atomic_fetch_add(address, -val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
318}
319
320__device__
321inline
322unsigned int atomicSub_system(unsigned int* address, unsigned int val) {
323 return __hip_atomic_fetch_add(address, -val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
324}
325
326__device__
327inline
328unsigned long atomicSub(unsigned long* address, unsigned long val) {
329 return __hip_atomic_fetch_add(address, -val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
330}
331
332__device__
333inline
334unsigned long atomicSub_system(unsigned long* address, unsigned long val) {
335 return __hip_atomic_fetch_add(address, -val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
336}
337
338__device__
339inline
340unsigned long long atomicSub(unsigned long long* address, unsigned long long val) {
341 return __hip_atomic_fetch_add(address, -val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
342}
343
344__device__
345inline
346unsigned long long atomicSub_system(unsigned long long* address, unsigned long long val) {
347 return __hip_atomic_fetch_add(address, -val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
348}
349
350__device__
351inline
352float atomicSub(float* address, float val) {
353#if defined(__AMDGCN_UNSAFE_FP_ATOMICS__)
354 return unsafeAtomicAdd(address, -val);
355#else
356 return __hip_atomic_fetch_add(address, -val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
357#endif
358}
359
360__device__
361inline
362float atomicSub_system(float* address, float val) {
363 return __hip_atomic_fetch_add(address, -val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
364}
365
366__device__
367inline
368double atomicSub(double* address, double val) {
369#if defined(__AMDGCN_UNSAFE_FP_ATOMICS__)
370 return unsafeAtomicAdd(address, -val);
371#else
372 return __hip_atomic_fetch_add(address, -val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
373#endif
374}
375
376__device__
377inline
378double atomicSub_system(double* address, double val) {
379 return __hip_atomic_fetch_add(address, -val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
380}
381
382__device__
383inline
384int atomicExch(int* address, int val) {
385 return __hip_atomic_exchange(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
386}
387
388__device__
389inline
390int atomicExch_system(int* address, int val) {
391 return __hip_atomic_exchange(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
392}
393
394__device__
395inline
396unsigned int atomicExch(unsigned int* address, unsigned int val) {
397 return __hip_atomic_exchange(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
398}
399
400__device__
401inline
402unsigned int atomicExch_system(unsigned int* address, unsigned int val) {
403 return __hip_atomic_exchange(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
404}
405
406__device__
407inline
408unsigned long atomicExch(unsigned long* address, unsigned long val) {
409 return __hip_atomic_exchange(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
410}
411
412__device__
413inline
414unsigned long atomicExch_system(unsigned long* address, unsigned long val) {
415 return __hip_atomic_exchange(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
416}
417
418__device__
419inline
420unsigned long long atomicExch(unsigned long long* address, unsigned long long val) {
421 return __hip_atomic_exchange(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
422}
423
424__device__
425inline
426unsigned long long atomicExch_system(unsigned long long* address, unsigned long long val) {
427 return __hip_atomic_exchange(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
428}
429
430__device__
431inline
432float atomicExch(float* address, float val) {
433 return __hip_atomic_exchange(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
434}
435
436__device__
437inline
438float atomicExch_system(float* address, float val) {
439 return __hip_atomic_exchange(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
440}
441
442__device__
443inline
444double atomicExch(double* address, double val) {
445 return __hip_atomic_exchange(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
446}
447
448__device__
449inline
450double atomicExch_system(double* address, double val) {
451 return __hip_atomic_exchange(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
452}
453
454__device__
455inline
456int atomicMin(int* address, int val) {
457#if defined(__gfx941__)
458 return hip_cas_extrema_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>(
459 address, val, [](int x, int y) { return x < y; }, [=]() {
460 return __hip_atomic_fetch_min(address, val, __ATOMIC_RELAXED,
461 __HIP_MEMORY_SCOPE_AGENT);
462 });
463#else
464 return __hip_atomic_fetch_min(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
465#endif // __gfx941__
466}
467
468__device__
469inline
470int atomicMin_system(int* address, int val) {
471#if defined(__gfx941__)
472 return hip_cas_extrema_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM>(
473 address, val, [](int x, int y) { return x < y; }, [=]() {
474 return __hip_atomic_fetch_min(address, val, __ATOMIC_RELAXED,
475 __HIP_MEMORY_SCOPE_SYSTEM);
476 });
477#else
478 return __hip_atomic_fetch_min(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
479#endif // __gfx941__
480}
481
482__device__
483inline
484unsigned int atomicMin(unsigned int* address, unsigned int val) {
485#if defined(__gfx941__)
486 return hip_cas_extrema_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>(
487 address, val, [](unsigned int x, unsigned int y) { return x < y; }, [=]() {
488 return __hip_atomic_fetch_min(address, val, __ATOMIC_RELAXED,
489 __HIP_MEMORY_SCOPE_AGENT);
490 });
491#else
492 return __hip_atomic_fetch_min(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
493#endif // __gfx941__
494
495}
496
497__device__
498inline
499unsigned int atomicMin_system(unsigned int* address, unsigned int val) {
500#if defined(__gfx941__)
501 return hip_cas_extrema_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM>(
502 address, val, [](unsigned int x, unsigned int y) { return x < y; }, [=]() {
503 return __hip_atomic_fetch_min(address, val, __ATOMIC_RELAXED,
504 __HIP_MEMORY_SCOPE_SYSTEM);
505 });
506#else
507 return __hip_atomic_fetch_min(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
508#endif // __gfx941__
509}
510
511__device__
512inline
513unsigned long long atomicMin(unsigned long* address, unsigned long val) {
514#if defined(__gfx941__)
515 return hip_cas_extrema_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>(
516 address,
517 val,
518 [](unsigned long x, unsigned long y) { return x < y; },
519 [=]() {
520 return __hip_atomic_fetch_min(address, val, __ATOMIC_RELAXED,
521 __HIP_MEMORY_SCOPE_AGENT);
522 });
523#else
524 return __hip_atomic_fetch_min(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
525#endif // __gfx941__
526}
527
528__device__
529inline
530unsigned long atomicMin_system(unsigned long* address, unsigned long val) {
531#if defined(__gfx941__)
532 return hip_cas_extrema_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM>(
533 address,
534 val,
535 [](unsigned long x, unsigned long y) { return x < y; },
536 [=]() {
537 return __hip_atomic_fetch_min(address, val, __ATOMIC_RELAXED,
538 __HIP_MEMORY_SCOPE_SYSTEM);
539 });
540#else
541 return __hip_atomic_fetch_min(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
542#endif // __gfx941__
543}
544
545__device__
546inline
547unsigned long long atomicMin(unsigned long long* address, unsigned long long val) {
548#if defined(__gfx941__)
549 return hip_cas_extrema_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>(
550 address,
551 val,
552 [](unsigned long long x, unsigned long long y) { return x < y; },
553 [=]() {
554 return __hip_atomic_fetch_min(address, val, __ATOMIC_RELAXED,
555 __HIP_MEMORY_SCOPE_AGENT);
556 });
557#else
558 return __hip_atomic_fetch_min(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
559#endif // __gfx941__
560}
561
562__device__
563inline
564unsigned long long atomicMin_system(unsigned long long* address, unsigned long long val) {
565#if defined(__gfx941__)
566 return hip_cas_extrema_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM>(
567 address,
568 val,
569 [](unsigned long long x, unsigned long long y) { return x < y; },
570 [=]() {
571 return __hip_atomic_fetch_min(address, val, __ATOMIC_RELAXED,
572 __HIP_MEMORY_SCOPE_SYSTEM);
573 });
574#else
575 return __hip_atomic_fetch_min(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
576#endif // __gfx941__
577}
578
579__device__
580inline
581long long atomicMin(long long* address, long long val) {
582#if defined(__gfx941__)
583 return hip_cas_extrema_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>(
584 address, val, [](long long x, long long y) { return x < y; },
585 [=]() {
586 return __hip_atomic_fetch_min(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
587 });
588#else
589 return __hip_atomic_fetch_min(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
590#endif // __gfx941__
591}
592
593__device__
594inline
595long long atomicMin_system(long long* address, long long val) {
596#if defined(__gfx941__)
597 return hip_cas_extrema_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM>(
598 address, val, [](long long x, long long y) { return x < y; },
599 [=]() {
600 return __hip_atomic_fetch_min(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
601 });
602#else
603 return __hip_atomic_fetch_min(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
604#endif // __gfx941__
605}
606
607__device__
608inline
609float atomicMin(float* addr, float val) {
610#if defined(__AMDGCN_UNSAFE_FP_ATOMICS__)
611 return unsafeAtomicMin(addr, val);
612#else
613 #if __has_builtin(__hip_atomic_load) && \
614 __has_builtin(__hip_atomic_compare_exchange_strong)
615 float value = __hip_atomic_load(addr, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
616 bool done = false;
617 while (!done && value > val) {
618 done = __hip_atomic_compare_exchange_strong(addr, &value, val,
619 __ATOMIC_RELAXED, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
620 }
621 return value;
622 #else
623 unsigned int *uaddr = (unsigned int *)addr;
624 unsigned int value = __atomic_load_n(uaddr, __ATOMIC_RELAXED);
625 bool done = false;
626 while (!done && __uint_as_float(value) > val) {
627 done = __atomic_compare_exchange_n(uaddr, &value, __float_as_uint(val), false,
628 __ATOMIC_RELAXED, __ATOMIC_RELAXED);
629 }
630 return __uint_as_float(value);
631 #endif
632#endif
633}
634
635__device__
636inline
637float atomicMin_system(float* address, float val) {
638 unsigned int* uaddr { reinterpret_cast<unsigned int*>(address) };
639 #if __has_builtin(__hip_atomic_load)
640 unsigned int tmp {__hip_atomic_load(uaddr, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM)};
641 #else
642 unsigned int tmp {__atomic_load_n(uaddr, __ATOMIC_RELAXED)};
643 #endif
644 float value = __uint_as_float(tmp);
645
646 while (val < value) {
647 value = atomicCAS_system(address, value, val);
648 }
649
650 return value;
651}
652
653__device__
654inline
655double atomicMin(double* addr, double val) {
656#if defined(__AMDGCN_UNSAFE_FP_ATOMICS__)
657 return unsafeAtomicMin(addr, val);
658#else
659 #if __has_builtin(__hip_atomic_load) && \
660 __has_builtin(__hip_atomic_compare_exchange_strong)
661 double value = __hip_atomic_load(addr, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
662 bool done = false;
663 while (!done && value > val) {
664 done = __hip_atomic_compare_exchange_strong(addr, &value, val,
665 __ATOMIC_RELAXED, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
666 }
667 return value;
668 #else
669 unsigned long long *uaddr = (unsigned long long *)addr;
670 unsigned long long value = __atomic_load_n(uaddr, __ATOMIC_RELAXED);
671 bool done = false;
672 while (!done && __longlong_as_double(value) > val) {
673 done = __atomic_compare_exchange_n(uaddr, &value, __double_as_longlong(val), false,
674 __ATOMIC_RELAXED, __ATOMIC_RELAXED);
675 }
676 return __longlong_as_double(value);
677 #endif
678#endif
679}
680
681__device__
682inline
683double atomicMin_system(double* address, double val) {
684 unsigned long long* uaddr { reinterpret_cast<unsigned long long*>(address) };
685 #if __has_builtin(__hip_atomic_load)
686 unsigned long long tmp {__hip_atomic_load(uaddr, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM)};
687 #else
688 unsigned long long tmp {__atomic_load_n(uaddr, __ATOMIC_RELAXED)};
689 #endif
690 double value = __longlong_as_double(tmp);
691
692 while (val < value) {
693 value = atomicCAS_system(address, value, val);
694 }
695
696 return value;
697}
698
699__device__
700inline
701int atomicMax(int* address, int val) {
702#if defined(__gfx941__)
703 return hip_cas_extrema_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>(
704 address, val, [](int x, int y) { return y < x; }, [=]() {
705 return __hip_atomic_fetch_max(address, val, __ATOMIC_RELAXED,
706 __HIP_MEMORY_SCOPE_AGENT);
707 });
708#else
709 return __hip_atomic_fetch_max(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
710#endif // __gfx941__
711}
712
713__device__
714inline
715int atomicMax_system(int* address, int val) {
716#if defined(__gfx941__)
717 return hip_cas_extrema_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM>(
718 address, val, [](int x, int y) { return y < x; }, [=]() {
719 return __hip_atomic_fetch_max(address, val, __ATOMIC_RELAXED,
720 __HIP_MEMORY_SCOPE_SYSTEM);
721 });
722#else
723 return __hip_atomic_fetch_max(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
724#endif // __gfx941__
725}
726
727__device__
728inline
729unsigned int atomicMax(unsigned int* address, unsigned int val) {
730#if defined(__gfx941__)
731 return hip_cas_extrema_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>(
732 address, val, [](unsigned int x, unsigned int y) { return y < x; }, [=]() {
733 return __hip_atomic_fetch_max(address, val, __ATOMIC_RELAXED,
734 __HIP_MEMORY_SCOPE_AGENT);
735 });
736#else
737 return __hip_atomic_fetch_max(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
738#endif // __gfx941__
739}
740
741__device__
742inline
743unsigned int atomicMax_system(unsigned int* address, unsigned int val) {
744#if defined(__gfx941__)
745 return hip_cas_extrema_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM>(
746 address, val, [](unsigned int x, unsigned int y) { return y < x; }, [=]() {
747 return __hip_atomic_fetch_max(address, val, __ATOMIC_RELAXED,
748 __HIP_MEMORY_SCOPE_SYSTEM);
749 });
750#else
751 return __hip_atomic_fetch_max(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
752#endif // __gfx941__
753}
754
755__device__
756inline
757unsigned long atomicMax(unsigned long* address, unsigned long val) {
758#if defined(__gfx941__)
759 return hip_cas_extrema_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>(
760 address,
761 val,
762 [](unsigned long x, unsigned long y) { return y < x; },
763 [=]() {
764 return __hip_atomic_fetch_max(address, val, __ATOMIC_RELAXED,
765 __HIP_MEMORY_SCOPE_AGENT);
766 });
767#else
768 return __hip_atomic_fetch_max(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
769#endif // __gfx941__
770}
771
772__device__
773inline
774unsigned long atomicMax_system(unsigned long* address, unsigned long val) {
775#if defined(__gfx941__)
776 return hip_cas_extrema_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM>(
777 address,
778 val,
779 [](unsigned long x, unsigned long y) { return y < x; },
780 [=]() {
781 return __hip_atomic_fetch_max(address, val, __ATOMIC_RELAXED,
782 __HIP_MEMORY_SCOPE_SYSTEM);
783 });
784#else
785 return __hip_atomic_fetch_max(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
786#endif // __gfx941__
787}
788
789__device__
790inline
791unsigned long long atomicMax(unsigned long long* address, unsigned long long val) {
792#if defined(__gfx941__)
793 return hip_cas_extrema_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>(
794 address,
795 val,
796 [](unsigned long long x, unsigned long long y) { return y < x; },
797 [=]() {
798 return __hip_atomic_fetch_max(address, val, __ATOMIC_RELAXED,
799 __HIP_MEMORY_SCOPE_AGENT);
800 });
801#else
802 return __hip_atomic_fetch_max(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
803#endif // __gfx941__
804}
805
806__device__
807inline
808unsigned long long atomicMax_system(unsigned long long* address, unsigned long long val) {
809#if defined(__gfx941__)
810 return hip_cas_extrema_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM>(
811 address,
812 val,
813 [](unsigned long long x, unsigned long long y) { return y < x; },
814 [=]() {
815 return __hip_atomic_fetch_max(address, val, __ATOMIC_RELAXED,
816 __HIP_MEMORY_SCOPE_SYSTEM);
817 });
818#else
819 return __hip_atomic_fetch_max(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
820#endif // __gfx941__
821}
822
823__device__
824inline
825long long atomicMax(long long* address, long long val) {
826 #if defined(__gfx941__)
827 return hip_cas_extrema_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>(
828 address, val, [](long long x, long long y) { return y < x; },
829 [=]() {
830 return __hip_atomic_fetch_max(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
831 });
832#else
833 return __hip_atomic_fetch_max(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
834#endif // __gfx941__
835}
836
837__device__
838inline
839long long atomicMax_system(long long* address, long long val) {
840#if defined(__gfx941__)
841 return hip_cas_extrema_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM>(
842 address, val, [](long long x, long long y) { return y < x; },
843 [=]() {
844 return __hip_atomic_fetch_max(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
845 });
846#else
847 return __hip_atomic_fetch_max(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
848#endif // __gfx941__
849}
850
851__device__
852inline
853float atomicMax(float* addr, float val) {
854#if defined(__AMDGCN_UNSAFE_FP_ATOMICS__)
855 return unsafeAtomicMax(addr, val);
856#else
857 #if __has_builtin(__hip_atomic_load) && \
858 __has_builtin(__hip_atomic_compare_exchange_strong)
859 float value = __hip_atomic_load(addr, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
860 bool done = false;
861 while (!done && value < val) {
862 done = __hip_atomic_compare_exchange_strong(addr, &value, val,
863 __ATOMIC_RELAXED, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
864 }
865 return value;
866 #else
867 unsigned int *uaddr = (unsigned int *)addr;
868 unsigned int value = __atomic_load_n(uaddr, __ATOMIC_RELAXED);
869 bool done = false;
870 while (!done && __uint_as_float(value) < val) {
871 done = __atomic_compare_exchange_n(uaddr, &value, __float_as_uint(val), false,
872 __ATOMIC_RELAXED, __ATOMIC_RELAXED);
873 }
874 return __uint_as_float(value);
875 #endif
876#endif
877}
878
879__device__
880inline
881float atomicMax_system(float* address, float val) {
882 unsigned int* uaddr { reinterpret_cast<unsigned int*>(address) };
883 #if __has_builtin(__hip_atomic_load)
884 unsigned int tmp {__hip_atomic_load(uaddr, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM)};
885 #else
886 unsigned int tmp {__atomic_load_n(uaddr, __ATOMIC_RELAXED)};
887 #endif
888 float value = __uint_as_float(tmp);
889
890 while (value < val) {
891 value = atomicCAS_system(address, value, val);
892 }
893
894 return value;
895}
896
897__device__
898inline
899double atomicMax(double* addr, double val) {
900#if defined(__AMDGCN_UNSAFE_FP_ATOMICS__)
901 return unsafeAtomicMax(addr, val);
902#else
903 #if __has_builtin(__hip_atomic_load) && \
904 __has_builtin(__hip_atomic_compare_exchange_strong)
905 double value = __hip_atomic_load(addr, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
906 bool done = false;
907 while (!done && value < val) {
908 done = __hip_atomic_compare_exchange_strong(addr, &value, val,
909 __ATOMIC_RELAXED, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
910 }
911 return value;
912 #else
913 unsigned long long *uaddr = (unsigned long long *)addr;
914 unsigned long long value = __atomic_load_n(uaddr, __ATOMIC_RELAXED);
915 bool done = false;
916 while (!done && __longlong_as_double(value) < val) {
917 done = __atomic_compare_exchange_n(uaddr, &value, __double_as_longlong(val), false,
918 __ATOMIC_RELAXED, __ATOMIC_RELAXED);
919 }
920 return __longlong_as_double(value);
921 #endif
922#endif
923}
924
925__device__
926inline
927double atomicMax_system(double* address, double val) {
928 unsigned long long* uaddr { reinterpret_cast<unsigned long long*>(address) };
929 #if __has_builtin(__hip_atomic_load)
930 unsigned long long tmp {__hip_atomic_load(uaddr, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM)};
931 #else
932 unsigned long long tmp {__atomic_load_n(uaddr, __ATOMIC_RELAXED)};
933 #endif
934 double value = __longlong_as_double(tmp);
935
936 while (value < val) {
937 value = atomicCAS_system(address, value, val);
938 }
939
940 return value;
941}
942
943__device__
944inline
945unsigned int atomicInc(unsigned int* address, unsigned int val)
946{
947#if defined(__gfx941__)
948 __device__
949 extern
950 unsigned int __builtin_amdgcn_atomic_inc(
951 unsigned int*,
952 unsigned int,
953 unsigned int,
954 unsigned int,
955 bool) __asm("llvm.amdgcn.atomic.inc.i32.p0i32");
956
957 return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>(
958 address,
959 val,
960 [](unsigned int& x, unsigned int y) { x = (x >= y) ? 0 : (x + 1); },
961 [=]() {
962 return
963 __builtin_amdgcn_atomic_inc(address, val, __ATOMIC_RELAXED, 1, false);
964 });
965#else
966 return __builtin_amdgcn_atomic_inc32(address, val, __ATOMIC_RELAXED, "agent");
967#endif // __gfx941__
968
969}
970
971__device__
972inline
973unsigned int atomicDec(unsigned int* address, unsigned int val)
974{
975#if defined(__gfx941__)
976 __device__
977 extern
978 unsigned int __builtin_amdgcn_atomic_dec(
979 unsigned int*,
980 unsigned int,
981 unsigned int,
982 unsigned int,
983 bool) __asm("llvm.amdgcn.atomic.dec.i32.p0i32");
984
985 return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>(
986 address,
987 val,
988 [](unsigned int& x, unsigned int y) { x = (!x || x > y) ? y : (x - 1); },
989 [=]() {
990 return
991 __builtin_amdgcn_atomic_dec(address, val, __ATOMIC_RELAXED, 1, false);
992 });
993#else
994 return __builtin_amdgcn_atomic_dec32(address, val, __ATOMIC_RELAXED, "agent");
995#endif // __gfx941__
996
997}
998
999__device__
1000inline
1001int atomicAnd(int* address, int val) {
1002#if defined(__gfx941__)
1003 return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>(
1004 address, val, [](int& x, int y) { x &= y; }, [=]() {
1005 return __hip_atomic_fetch_and(address, val, __ATOMIC_RELAXED,
1006 __HIP_MEMORY_SCOPE_AGENT);
1007 });
1008#else
1009 return __hip_atomic_fetch_and(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
1010#endif // __gfx941__
1011}
1012
1013__device__
1014inline
1015int atomicAnd_system(int* address, int val) {
1016#if defined(__gfx941__)
1017 return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM>(
1018 address, val, [](int& x, int y) { x &= y; }, [=]() {
1019 return __hip_atomic_fetch_and(address, val, __ATOMIC_RELAXED,
1020 __HIP_MEMORY_SCOPE_SYSTEM);
1021 });
1022#else
1023 return __hip_atomic_fetch_and(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
1024#endif // __gfx941__
1025}
1026
1027__device__
1028inline
1029unsigned int atomicAnd(unsigned int* address, unsigned int val) {
1030#if defined(__gfx941__)
1031 return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>(
1032 address, val, [](unsigned int& x, unsigned int y) { x &= y; }, [=]() {
1033 return __hip_atomic_fetch_and(address, val, __ATOMIC_RELAXED,
1034 __HIP_MEMORY_SCOPE_AGENT);
1035 });
1036#else
1037 return __hip_atomic_fetch_and(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
1038#endif // __gfx941__
1039}
1040
1041__device__
1042inline
1043unsigned int atomicAnd_system(unsigned int* address, unsigned int val) {
1044#if defined(__gfx941__)
1045 return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM>(
1046 address, val, [](unsigned int& x, unsigned int y) { x &= y; }, [=]() {
1047 return __hip_atomic_fetch_and(address, val, __ATOMIC_RELAXED,
1048 __HIP_MEMORY_SCOPE_SYSTEM);
1049 });
1050#else
1051 return __hip_atomic_fetch_and(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
1052#endif // __gfx941__
1053}
1054
1055__device__
1056inline
1057unsigned long atomicAnd(unsigned long* address, unsigned long val) {
1058#if defined(__gfx941__)
1059 return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>(
1060 address, val, [](unsigned long& x, unsigned long y) { x &= y; }, [=]() {
1061 return __hip_atomic_fetch_and(address, val, __ATOMIC_RELAXED,
1062 __HIP_MEMORY_SCOPE_AGENT);
1063 });
1064#else
1065 return __hip_atomic_fetch_and(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
1066#endif // __gfx941__
1067}
1068
1069__device__
1070inline
1071unsigned long atomicAnd_system(unsigned long* address, unsigned long val) {
1072#if defined(__gfx941__)
1073 return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM>(
1074 address, val, [](unsigned long& x, unsigned long y) { x &= y; }, [=]() {
1075 return __hip_atomic_fetch_and(address, val, __ATOMIC_RELAXED,
1076 __HIP_MEMORY_SCOPE_SYSTEM);
1077 });
1078#else
1079 return __hip_atomic_fetch_and(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
1080#endif // __gfx941__
1081}
1082
1083__device__
1084inline
1085unsigned long long atomicAnd(unsigned long long* address, unsigned long long val) {
1086#if defined(__gfx941__)
1087 return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>(
1088 address,
1089 val,
1090 [](unsigned long long& x, unsigned long long y) { x &= y; },
1091 [=]() {
1092 return __hip_atomic_fetch_and(address, val, __ATOMIC_RELAXED,
1093 __HIP_MEMORY_SCOPE_AGENT);
1094 });
1095#else
1096 return __hip_atomic_fetch_and(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
1097#endif // __gfx941__
1098}
1099
1100__device__
1101inline
1102unsigned long long atomicAnd_system(unsigned long long* address, unsigned long long val) {
1103#if defined(__gfx941__)
1104 return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM>(
1105 address,
1106 val,
1107 [](unsigned long long& x, unsigned long long y) { x &= y; },
1108 [=]() {
1109 return __hip_atomic_fetch_and(address, val, __ATOMIC_RELAXED,
1110 __HIP_MEMORY_SCOPE_SYSTEM);
1111 });
1112#else
1113 return __hip_atomic_fetch_and(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
1114#endif // __gfx941__
1115}
1116
1117__device__
1118inline
1119int atomicOr(int* address, int val) {
1120#if defined(__gfx941__)
1121 return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>(
1122 address, val, [](int& x, int y) { x |= y; }, [=]() {
1123 return __hip_atomic_fetch_or(address, val, __ATOMIC_RELAXED,
1124 __HIP_MEMORY_SCOPE_AGENT);
1125 });
1126#else
1127 return __hip_atomic_fetch_or(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
1128#endif // __gfx941__
1129}
1130
1131__device__
1132inline
1133int atomicOr_system(int* address, int val) {
1134#if defined(__gfx941__)
1135 return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM>(
1136 address, val, [](int& x, int y) { x |= y; }, [=]() {
1137 return __hip_atomic_fetch_or(address, val, __ATOMIC_RELAXED,
1138 __HIP_MEMORY_SCOPE_SYSTEM);
1139 });
1140#else
1141 return __hip_atomic_fetch_or(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
1142#endif // __gfx941__
1143}
1144
1145__device__
1146inline
1147unsigned int atomicOr(unsigned int* address, unsigned int val) {
1148#if defined(__gfx941__)
1149 return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>(
1150 address, val, [](unsigned int& x, unsigned int y) { x |= y; }, [=]() {
1151 return __hip_atomic_fetch_or(address, val, __ATOMIC_RELAXED,
1152 __HIP_MEMORY_SCOPE_AGENT);
1153 });
1154#else
1155 return __hip_atomic_fetch_or(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
1156#endif // __gfx941__
1157}
1158
1159__device__
1160inline
1161unsigned int atomicOr_system(unsigned int* address, unsigned int val) {
1162#if defined(__gfx941__)
1163 return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM>(
1164 address, val, [](unsigned int& x, unsigned int y) { x |= y; }, [=]() {
1165 return __hip_atomic_fetch_or(address, val, __ATOMIC_RELAXED,
1166 __HIP_MEMORY_SCOPE_SYSTEM);
1167 });
1168#else
1169 return __hip_atomic_fetch_or(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
1170#endif // __gfx941__
1171}
1172
1173__device__
1174inline
1175unsigned long atomicOr(unsigned long* address, unsigned long val) {
1176#if defined(__gfx941__)
1177 return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>(
1178 address, val, [](unsigned long& x, unsigned long y) { x |= y; }, [=]() {
1179 return __hip_atomic_fetch_or(address, val, __ATOMIC_RELAXED,
1180 __HIP_MEMORY_SCOPE_AGENT);
1181 });
1182#else
1183 return __hip_atomic_fetch_or(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
1184#endif // __gfx941__
1185}
1186
1187__device__
1188inline
1189unsigned long atomicOr_system(unsigned long* address, unsigned long val) {
1190#if defined(__gfx941__)
1191 return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM>(
1192 address, val, [](unsigned long& x, unsigned long y) { x |= y; }, [=]() {
1193 return __hip_atomic_fetch_or(address, val, __ATOMIC_RELAXED,
1194 __HIP_MEMORY_SCOPE_SYSTEM);
1195 });
1196#else
1197 return __hip_atomic_fetch_or(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
1198#endif // __gfx941__
1199}
1200
1201__device__
1202inline
1203unsigned long long atomicOr(unsigned long long* address, unsigned long long val) {
1204#if defined(__gfx941__)
1205 return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>(
1206 address,
1207 val,
1208 [](unsigned long long& x, unsigned long long y) { x |= y; },
1209 [=]() {
1210 return __hip_atomic_fetch_or(address, val, __ATOMIC_RELAXED,
1211 __HIP_MEMORY_SCOPE_AGENT);
1212 });
1213#else
1214 return __hip_atomic_fetch_or(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
1215#endif // __gfx941__
1216}
1217
1218__device__
1219inline
1220unsigned long long atomicOr_system(unsigned long long* address, unsigned long long val) {
1221#if defined(__gfx941__)
1222 return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM>(
1223 address,
1224 val,
1225 [](unsigned long long& x, unsigned long long y) { x |= y; },
1226 [=]() {
1227 return __hip_atomic_fetch_or(address, val, __ATOMIC_RELAXED,
1228 __HIP_MEMORY_SCOPE_SYSTEM);
1229 });
1230#else
1231 return __hip_atomic_fetch_or(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
1232#endif // __gfx941__
1233}
1234
1235__device__
1236inline
1237int atomicXor(int* address, int val) {
1238#if defined(__gfx941__)
1239 return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>(
1240 address, val, [](int& x, int y) { x ^= y; }, [=]() {
1241 return __hip_atomic_fetch_xor(address, val, __ATOMIC_RELAXED,
1242 __HIP_MEMORY_SCOPE_AGENT);
1243 });
1244#else
1245 return __hip_atomic_fetch_xor(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
1246#endif // __gfx941__
1247}
1248
1249__device__
1250inline
1251int atomicXor_system(int* address, int val) {
1252#if defined(__gfx941__)
1253 return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM>(
1254 address, val, [](int& x, int y) { x ^= y; }, [=]() {
1255 return __hip_atomic_fetch_xor(address, val, __ATOMIC_RELAXED,
1256 __HIP_MEMORY_SCOPE_SYSTEM);
1257 });
1258#else
1259 return __hip_atomic_fetch_xor(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
1260#endif // __gfx941__
1261}
1262
1263__device__
1264inline
1265unsigned int atomicXor(unsigned int* address, unsigned int val) {
1266#if defined(__gfx941__)
1267 return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>(
1268 address, val, [](unsigned int& x, unsigned int y) { x ^= y; }, [=]() {
1269 return __hip_atomic_fetch_xor(address, val, __ATOMIC_RELAXED,
1270 __HIP_MEMORY_SCOPE_AGENT);
1271 });
1272#else
1273 return __hip_atomic_fetch_xor(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
1274#endif // __gfx941__
1275}
1276
1277__device__
1278inline
1279unsigned int atomicXor_system(unsigned int* address, unsigned int val) {
1280#if defined(__gfx941__)
1281 return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM>(
1282 address, val, [](unsigned int& x, unsigned int y) { x ^= y; }, [=]() {
1283 return __hip_atomic_fetch_xor(address, val, __ATOMIC_RELAXED,
1284 __HIP_MEMORY_SCOPE_SYSTEM);
1285 });
1286#else
1287 return __hip_atomic_fetch_xor(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
1288#endif // __gfx941__
1289}
1290
1291__device__
1292inline
1293unsigned long atomicXor(unsigned long* address, unsigned long val) {
1294#if defined(__gfx941__)
1295 return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>(
1296 address, val, [](unsigned long& x, unsigned long y) { x ^= y; }, [=]() {
1297 return __hip_atomic_fetch_xor(address, val, __ATOMIC_RELAXED,
1298 __HIP_MEMORY_SCOPE_AGENT);
1299 });
1300#else
1301 return __hip_atomic_fetch_xor(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
1302#endif // __gfx941__
1303}
1304
1305__device__
1306inline
1307unsigned long atomicXor_system(unsigned long* address, unsigned long val) {
1308#if defined(__gfx941__)
1309 return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM>(
1310 address, val, [](unsigned long& x, unsigned long y) { x ^= y; }, [=]() {
1311 return __hip_atomic_fetch_xor(address, val, __ATOMIC_RELAXED,
1312 __HIP_MEMORY_SCOPE_SYSTEM);
1313 });
1314#else
1315 return __hip_atomic_fetch_xor(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
1316#endif // __gfx941__
1317}
1318
1319__device__
1320inline
1321unsigned long long atomicXor(unsigned long long* address, unsigned long long val) {
1322#if defined(__gfx941__)
1323 return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>(
1324 address,
1325 val,
1326 [](unsigned long long& x, unsigned long long y) { x ^= y; },
1327 [=]() {
1328 return __hip_atomic_fetch_xor(address, val, __ATOMIC_RELAXED,
1329 __HIP_MEMORY_SCOPE_AGENT);
1330 });
1331#else
1332 return __hip_atomic_fetch_xor(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
1333#endif // __gfx941__
1334}
1335
1336__device__
1337inline
1338unsigned long long atomicXor_system(unsigned long long* address, unsigned long long val) {
1339 return __hip_atomic_fetch_xor(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
1340}
1341
1342#else // __hip_atomic_compare_exchange_strong
1343
1344__device__
1345inline
1346int atomicCAS(int* address, int compare, int val)
1347{
1348 __atomic_compare_exchange_n(
1349 address, &compare, val, false, __ATOMIC_RELAXED, __ATOMIC_RELAXED);
1350
1351 return compare;
1352}
1353__device__
1354inline
1355unsigned int atomicCAS(
1356 unsigned int* address, unsigned int compare, unsigned int val)
1357{
1358 __atomic_compare_exchange_n(
1359 address, &compare, val, false, __ATOMIC_RELAXED, __ATOMIC_RELAXED);
1360
1361 return compare;
1362}
1363__device__
1364inline
1365unsigned long long atomicCAS(
1366 unsigned long long* address,
1367 unsigned long long compare,
1368 unsigned long long val)
1369{
1370 __atomic_compare_exchange_n(
1371 address, &compare, val, false, __ATOMIC_RELAXED, __ATOMIC_RELAXED);
1372
1373 return compare;
1374}
1375
1376__device__
1377inline
1378int atomicAdd(int* address, int val)
1379{
1380 return __atomic_fetch_add(address, val, __ATOMIC_RELAXED);
1381}
1382__device__
1383inline
1384unsigned int atomicAdd(unsigned int* address, unsigned int val)
1385{
1386 return __atomic_fetch_add(address, val, __ATOMIC_RELAXED);
1387}
1388__device__
1389inline
1390unsigned long long atomicAdd(
1391 unsigned long long* address, unsigned long long val)
1392{
1393 return __atomic_fetch_add(address, val, __ATOMIC_RELAXED);
1394}
1395__device__
1396inline
1397float atomicAdd(float* address, float val)
1398{
1399#if defined(__AMDGCN_UNSAFE_FP_ATOMICS__)
1400 return unsafeAtomicAdd(address, val);
1401#else
1402 return __atomic_fetch_add(address, val, __ATOMIC_RELAXED);
1403#endif
1404}
1405
1406#if !defined(__HIPCC_RTC__)
1407DEPRECATED("use atomicAdd instead")
1408#endif // !defined(__HIPCC_RTC__)
1409__device__
1410inline
1411void atomicAddNoRet(float* address, float val)
1412{
1413 __ockl_atomic_add_noret_f32(address, val);
1414}
1415
1416__device__
1417inline
1418double atomicAdd(double* address, double val)
1419{
1420#if defined(__AMDGCN_UNSAFE_FP_ATOMICS__)
1421 return unsafeAtomicAdd(address, val);
1422#else
1423 return __atomic_fetch_add(address, val, __ATOMIC_RELAXED);
1424#endif
1425}
1426
1427__device__
1428inline
1429int atomicSub(int* address, int val)
1430{
1431 return __atomic_fetch_sub(address, val, __ATOMIC_RELAXED);
1432}
1433__device__
1434inline
1435unsigned int atomicSub(unsigned int* address, unsigned int val)
1436{
1437 return __atomic_fetch_sub(address, val, __ATOMIC_RELAXED);
1438}
1439
1440__device__
1441inline
1442int atomicExch(int* address, int val)
1443{
1444 return __atomic_exchange_n(address, val, __ATOMIC_RELAXED);
1445}
1446__device__
1447inline
1448unsigned int atomicExch(unsigned int* address, unsigned int val)
1449{
1450 return __atomic_exchange_n(address, val, __ATOMIC_RELAXED);
1451}
1452__device__
1453inline
1454unsigned long long atomicExch(unsigned long long* address, unsigned long long val)
1455{
1456 return __atomic_exchange_n(address, val, __ATOMIC_RELAXED);
1457}
1458__device__
1459inline
1460float atomicExch(float* address, float val)
1461{
1462 return __uint_as_float(__atomic_exchange_n(
1463 reinterpret_cast<unsigned int*>(address),
1464 __float_as_uint(val),
1465 __ATOMIC_RELAXED));
1466}
1467
1468__device__
1469inline
1470int atomicMin(int* address, int val)
1471{
1472 return __atomic_fetch_min(address, val, __ATOMIC_RELAXED);
1473}
1474__device__
1475inline
1476unsigned int atomicMin(unsigned int* address, unsigned int val)
1477{
1478 return __atomic_fetch_min(address, val, __ATOMIC_RELAXED);
1479}
1480__device__
1481inline
1482unsigned long long atomicMin(
1483 unsigned long long* address, unsigned long long val)
1484{
1485 unsigned long long tmp{__atomic_load_n(address, __ATOMIC_RELAXED)};
1486 while (val < tmp) {
1487 const auto tmp1 = __atomic_load_n(address, __ATOMIC_RELAXED);
1488
1489 if (tmp1 != tmp) { tmp = tmp1; continue; }
1490
1491 tmp = atomicCAS(address, tmp, val);
1492 }
1493
1494 return tmp;
1495}
1496__device__ inline long long atomicMin(long long* address, long long val) {
1497 long long tmp{__atomic_load_n(address, __ATOMIC_RELAXED)};
1498 while (val < tmp) {
1499 const auto tmp1 = __atomic_load_n(address, __ATOMIC_RELAXED);
1500
1501 if (tmp1 != tmp) {
1502 tmp = tmp1;
1503 continue;
1504 }
1505
1506 tmp = atomicCAS(address, tmp, val);
1507 }
1508 return tmp;
1509}
1510
1511__device__
1512inline
1513int atomicMax(int* address, int val)
1514{
1515 return __atomic_fetch_max(address, val, __ATOMIC_RELAXED);
1516}
1517__device__
1518inline
1519unsigned int atomicMax(unsigned int* address, unsigned int val)
1520{
1521 return __atomic_fetch_max(address, val, __ATOMIC_RELAXED);
1522}
1523__device__
1524inline
1525unsigned long long atomicMax(
1526 unsigned long long* address, unsigned long long val)
1527{
1528 unsigned long long tmp{__atomic_load_n(address, __ATOMIC_RELAXED)};
1529 while (tmp < val) {
1530 const auto tmp1 = __atomic_load_n(address, __ATOMIC_RELAXED);
1531
1532 if (tmp1 != tmp) { tmp = tmp1; continue; }
1533
1534 tmp = atomicCAS(address, tmp, val);
1535 }
1536
1537 return tmp;
1538}
1539__device__ inline long long atomicMax(long long* address, long long val) {
1540 long long tmp{__atomic_load_n(address, __ATOMIC_RELAXED)};
1541 while (tmp < val) {
1542 const auto tmp1 = __atomic_load_n(address, __ATOMIC_RELAXED);
1543
1544 if (tmp1 != tmp) {
1545 tmp = tmp1;
1546 continue;
1547 }
1548
1549 tmp = atomicCAS(address, tmp, val);
1550 }
1551 return tmp;
1552}
1553
1554__device__
1555inline
1556unsigned int atomicInc(unsigned int* address, unsigned int val)
1557{
1558 return __builtin_amdgcn_atomic_inc32(address, val, __ATOMIC_RELAXED, "agent");
1559}
1560
1561__device__
1562inline
1563unsigned int atomicDec(unsigned int* address, unsigned int val)
1564{
1565 return __builtin_amdgcn_atomic_dec32(address, val, __ATOMIC_RELAXED, "agent");
1566}
1567
1568__device__
1569inline
1570int atomicAnd(int* address, int val)
1571{
1572 return __atomic_fetch_and(address, val, __ATOMIC_RELAXED);
1573}
1574__device__
1575inline
1576unsigned int atomicAnd(unsigned int* address, unsigned int val)
1577{
1578 return __atomic_fetch_and(address, val, __ATOMIC_RELAXED);
1579}
1580__device__
1581inline
1582unsigned long long atomicAnd(
1583 unsigned long long* address, unsigned long long val)
1584{
1585 return __atomic_fetch_and(address, val, __ATOMIC_RELAXED);
1586}
1587
1588__device__
1589inline
1590int atomicOr(int* address, int val)
1591{
1592 return __atomic_fetch_or(address, val, __ATOMIC_RELAXED);
1593}
1594__device__
1595inline
1596unsigned int atomicOr(unsigned int* address, unsigned int val)
1597{
1598 return __atomic_fetch_or(address, val, __ATOMIC_RELAXED);
1599}
1600__device__
1601inline
1602unsigned long long atomicOr(
1603 unsigned long long* address, unsigned long long val)
1604{
1605 return __atomic_fetch_or(address, val, __ATOMIC_RELAXED);
1606}
1607
1608__device__
1609inline
1610int atomicXor(int* address, int val)
1611{
1612 return __atomic_fetch_xor(address, val, __ATOMIC_RELAXED);
1613}
1614__device__
1615inline
1616unsigned int atomicXor(unsigned int* address, unsigned int val)
1617{
1618 return __atomic_fetch_xor(address, val, __ATOMIC_RELAXED);
1619}
1620__device__
1621inline
1622unsigned long long atomicXor(
1623 unsigned long long* address, unsigned long long val)
1624{
1625 return __atomic_fetch_xor(address, val, __ATOMIC_RELAXED);
1626}
1627
1628#endif // __hip_atomic_compare_exchange_strong