HIP: Heterogenous-computing Interface for Portability
Loading...
Searching...
No Matches
amd_math_functions.h
1/*
2Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. All rights reserved.
3
4Permission is hereby granted, free of charge, to any person obtaining a copy
5of this software and associated documentation files (the "Software"), to deal
6in the Software without restriction, including without limitation the rights
7to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
8copies of the Software, and to permit persons to whom the Software is
9furnished to do so, subject to the following conditions:
10
11The above copyright notice and this permission notice shall be included in
12all copies or substantial portions of the Software.
13
14THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
19OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
20THE SOFTWARE.
21*/
22
23#pragma once
24
25#include "hip_fp16_math_fwd.h"
26#include "amd_hip_vector_types.h"
27#include "math_fwd.h"
28
30
31#if !defined(__HIPCC_RTC__)
32#include <algorithm>
33// assert.h is only for the host version of assert.
34// The device version of assert is implemented in hip/amd_detail/hip_runtime.h.
35// Users should include hip_runtime.h for the device version of assert.
36#if !__HIP_DEVICE_COMPILE__
37#include <assert.h>
38#endif
39#include <limits.h>
40#include <limits>
41#include <stdint.h>
42#endif // !defined(__HIPCC_RTC__)
43
44#if _LIBCPP_VERSION && __HIP__
45namespace std {
46template <>
47struct __numeric_type<_Float16>
48{
49 static _Float16 __test(_Float16);
50
51 typedef _Float16 type;
52 static const bool value = true;
53};
54}
55#endif // _LIBCPP_VERSION
56
57#pragma push_macro("__DEVICE__")
58#pragma push_macro("__RETURN_TYPE")
59
60#define __DEVICE__ static __device__
61#define __RETURN_TYPE bool
62
63#if !__CLANG_HIP_RUNTIME_WRAPPER_INCLUDED__
64__DEVICE__
65inline
66uint64_t __make_mantissa_base8(const char* tagp)
67{
68 uint64_t r = 0;
69 while (tagp) {
70 char tmp = *tagp;
71
72 if (tmp >= '0' && tmp <= '7') r = (r * 8u) + tmp - '0';
73 else return 0;
74
75 ++tagp;
76 }
77
78 return r;
79}
80
81__DEVICE__
82inline
83uint64_t __make_mantissa_base10(const char* tagp)
84{
85 uint64_t r = 0;
86 while (tagp) {
87 char tmp = *tagp;
88
89 if (tmp >= '0' && tmp <= '9') r = (r * 10u) + tmp - '0';
90 else return 0;
91
92 ++tagp;
93 }
94
95 return r;
96}
97
98__DEVICE__
99inline
100uint64_t __make_mantissa_base16(const char* tagp)
101{
102 uint64_t r = 0;
103 while (tagp) {
104 char tmp = *tagp;
105
106 if (tmp >= '0' && tmp <= '9') r = (r * 16u) + tmp - '0';
107 else if (tmp >= 'a' && tmp <= 'f') r = (r * 16u) + tmp - 'a' + 10;
108 else if (tmp >= 'A' && tmp <= 'F') r = (r * 16u) + tmp - 'A' + 10;
109 else return 0;
110
111 ++tagp;
112 }
113
114 return r;
115}
116
117__DEVICE__
118inline
119uint64_t __make_mantissa(const char* tagp)
120{
121 if (!tagp) return 0u;
122
123 if (*tagp == '0') {
124 ++tagp;
125
126 if (*tagp == 'x' || *tagp == 'X') return __make_mantissa_base16(tagp);
127 else return __make_mantissa_base8(tagp);
128 }
129
130 return __make_mantissa_base10(tagp);
131}
132#endif // !__CLANG_HIP_RUNTIME_WRAPPER_INCLUDED__
133
134// DOT FUNCTIONS
135#if __HIP_CLANG_ONLY__
136__DEVICE__
137inline
138int amd_mixed_dot(short2 a, short2 b, int c, bool saturate) {
139 return __ockl_sdot2(a.data, b.data, c, saturate);
140}
141__DEVICE__
142inline
143uint amd_mixed_dot(ushort2 a, ushort2 b, uint c, bool saturate) {
144 return __ockl_udot2(a.data, b.data, c, saturate);
145}
146__DEVICE__
147inline
148int amd_mixed_dot(char4 a, char4 b, int c, bool saturate) {
149 return __ockl_sdot4(a.data, b.data, c, saturate);
150}
151__DEVICE__
152inline
153uint amd_mixed_dot(uchar4 a, uchar4 b, uint c, bool saturate) {
154 return __ockl_udot4(a.data, b.data, c, saturate);
155}
156__DEVICE__
157inline
158int amd_mixed_dot(int a, int b, int c, bool saturate) {
159 return __ockl_sdot8(a, b, c, saturate);
160}
161__DEVICE__
162inline
163uint amd_mixed_dot(uint a, uint b, uint c, bool saturate) {
164 return __ockl_udot8(a, b, c, saturate);
165}
166#endif
167
168#if !__CLANG_HIP_RUNTIME_WRAPPER_INCLUDED__
169// BEGIN FLOAT
170__DEVICE__
171inline
172float abs(float x) { return __ocml_fabs_f32(x); }
173__DEVICE__
174inline
175float acosf(float x) { return __ocml_acos_f32(x); }
176__DEVICE__
177inline
178float acoshf(float x) { return __ocml_acosh_f32(x); }
179__DEVICE__
180inline
181float asinf(float x) { return __ocml_asin_f32(x); }
182__DEVICE__
183inline
184float asinhf(float x) { return __ocml_asinh_f32(x); }
185__DEVICE__
186inline
187float atan2f(float x, float y) { return __ocml_atan2_f32(x, y); }
188__DEVICE__
189inline
190float atanf(float x) { return __ocml_atan_f32(x); }
191__DEVICE__
192inline
193float atanhf(float x) { return __ocml_atanh_f32(x); }
194__DEVICE__
195inline
196float cbrtf(float x) { return __ocml_cbrt_f32(x); }
197__DEVICE__
198inline
199float ceilf(float x) { return __ocml_ceil_f32(x); }
200__DEVICE__
201inline
202float copysignf(float x, float y) { return __ocml_copysign_f32(x, y); }
203__DEVICE__
204inline
205float cosf(float x) { return __ocml_cos_f32(x); }
206__DEVICE__
207inline
208float coshf(float x) { return __ocml_cosh_f32(x); }
209__DEVICE__
210inline
211float cospif(float x) { return __ocml_cospi_f32(x); }
212__DEVICE__
213inline
214float cyl_bessel_i0f(float x) { return __ocml_i0_f32(x); }
215__DEVICE__
216inline
217float cyl_bessel_i1f(float x) { return __ocml_i1_f32(x); }
218__DEVICE__
219inline
220float erfcf(float x) { return __ocml_erfc_f32(x); }
221__DEVICE__
222inline
223float erfcinvf(float x) { return __ocml_erfcinv_f32(x); }
224__DEVICE__
225inline
226float erfcxf(float x) { return __ocml_erfcx_f32(x); }
227__DEVICE__
228inline
229float erff(float x) { return __ocml_erf_f32(x); }
230__DEVICE__
231inline
232float erfinvf(float x) { return __ocml_erfinv_f32(x); }
233__DEVICE__
234inline
235float exp10f(float x) { return __ocml_exp10_f32(x); }
236__DEVICE__
237inline
238float exp2f(float x) { return __ocml_exp2_f32(x); }
239__DEVICE__
240inline
241float expf(float x) { return __ocml_exp_f32(x); }
242__DEVICE__
243inline
244float expm1f(float x) { return __ocml_expm1_f32(x); }
245__DEVICE__
246inline
247float fabsf(float x) { return __ocml_fabs_f32(x); }
248__DEVICE__
249inline
250float fdimf(float x, float y) { return __ocml_fdim_f32(x, y); }
251__DEVICE__
252inline
253float fdividef(float x, float y) { return x / y; }
254__DEVICE__
255inline
256float floorf(float x) { return __ocml_floor_f32(x); }
257__DEVICE__
258inline
259float fmaf(float x, float y, float z) { return __ocml_fma_f32(x, y, z); }
260__DEVICE__
261inline
262float fmaxf(float x, float y) { return __ocml_fmax_f32(x, y); }
263__DEVICE__
264inline
265float fminf(float x, float y) { return __ocml_fmin_f32(x, y); }
266__DEVICE__
267inline
268float fmodf(float x, float y) { return __ocml_fmod_f32(x, y); }
269__DEVICE__
270inline
271float frexpf(float x, int* nptr)
272{
273 int tmp;
274 float r =
275 __ocml_frexp_f32(x, (__attribute__((address_space(5))) int*) &tmp);
276 *nptr = tmp;
277
278 return r;
279}
280__DEVICE__
281inline
282float hypotf(float x, float y) { return __ocml_hypot_f32(x, y); }
283__DEVICE__
284inline
285int ilogbf(float x) { return __ocml_ilogb_f32(x); }
286__DEVICE__
287inline
288__RETURN_TYPE isfinite(float x) { return __ocml_isfinite_f32(x); }
289__DEVICE__
290inline
291__RETURN_TYPE isinf(float x) { return __ocml_isinf_f32(x); }
292__DEVICE__
293inline
294__RETURN_TYPE isnan(float x) { return __ocml_isnan_f32(x); }
295__DEVICE__
296inline
297float j0f(float x) { return __ocml_j0_f32(x); }
298__DEVICE__
299inline
300float j1f(float x) { return __ocml_j1_f32(x); }
301__DEVICE__
302inline
303float jnf(int n, float x)
304{ // TODO: we could use Ahmes multiplication and the Miller & Brown algorithm
305 // for linear recurrences to get O(log n) steps, but it's unclear if
306 // it'd be beneficial in this case.
307 if (n == 0) return j0f(x);
308 if (n == 1) return j1f(x);
309
310 float x0 = j0f(x);
311 float x1 = j1f(x);
312 for (int i = 1; i < n; ++i) {
313 float x2 = (2 * i) / x * x1 - x0;
314 x0 = x1;
315 x1 = x2;
316 }
317
318 return x1;
319}
320__DEVICE__
321inline
322float ldexpf(float x, int e) { return __ocml_ldexp_f32(x, e); }
323__DEVICE__
324inline
325float lgammaf(float x) { return __ocml_lgamma_f32(x); }
326__DEVICE__
327inline
328long long int llrintf(float x) { return __ocml_rint_f32(x); }
329__DEVICE__
330inline
331long long int llroundf(float x) { return __ocml_round_f32(x); }
332__DEVICE__
333inline
334float log10f(float x) { return __ocml_log10_f32(x); }
335__DEVICE__
336inline
337float log1pf(float x) { return __ocml_log1p_f32(x); }
338__DEVICE__
339inline
340float log2f(float x) { return __ocml_log2_f32(x); }
341__DEVICE__
342inline
343float logbf(float x) { return __ocml_logb_f32(x); }
344__DEVICE__
345inline
346float logf(float x) { return __ocml_log_f32(x); }
347__DEVICE__
348inline
349long int lrintf(float x) { return __ocml_rint_f32(x); }
350__DEVICE__
351inline
352long int lroundf(float x) { return __ocml_round_f32(x); }
353__DEVICE__
354inline
355float modff(float x, float* iptr)
356{
357 float tmp;
358 float r =
359 __ocml_modf_f32(x, (__attribute__((address_space(5))) float*) &tmp);
360 *iptr = tmp;
361
362 return r;
363}
364__DEVICE__
365inline
366float nanf(const char* tagp)
367{
368 union {
369 float val;
370 struct ieee_float {
371 uint32_t mantissa : 22;
372 uint32_t quiet : 1;
373 uint32_t exponent : 8;
374 uint32_t sign : 1;
375 } bits;
376
377 static_assert(sizeof(float) == sizeof(ieee_float), "");
378 } tmp;
379
380 tmp.bits.sign = 0u;
381 tmp.bits.exponent = ~0u;
382 tmp.bits.quiet = 1u;
383 tmp.bits.mantissa = __make_mantissa(tagp);
384
385 return tmp.val;
386}
387__DEVICE__
388inline
389float nearbyintf(float x) { return __ocml_nearbyint_f32(x); }
390__DEVICE__
391inline
392float nextafterf(float x, float y) { return __ocml_nextafter_f32(x, y); }
393__DEVICE__
394inline
395float norm3df(float x, float y, float z) { return __ocml_len3_f32(x, y, z); }
396__DEVICE__
397inline
398float norm4df(float x, float y, float z, float w)
399{
400 return __ocml_len4_f32(x, y, z, w);
401}
402__DEVICE__
403inline
404float normcdff(float x) { return __ocml_ncdf_f32(x); }
405__DEVICE__
406inline
407float normcdfinvf(float x) { return __ocml_ncdfinv_f32(x); }
408__DEVICE__
409inline
410float normf(int dim, const float* a)
411{ // TODO: placeholder until OCML adds support.
412 float r = 0;
413 while (dim--) { r += a[0] * a[0]; ++a; }
414
415 return __ocml_sqrt_f32(r);
416}
417__DEVICE__
418inline
419float powf(float x, float y) { return __ocml_pow_f32(x, y); }
420__DEVICE__
421inline
422float powif(float base, int iexp) { return __ocml_pown_f32(base, iexp); }
423__DEVICE__
424inline
425float rcbrtf(float x) { return __ocml_rcbrt_f32(x); }
426__DEVICE__
427inline
428float remainderf(float x, float y) { return __ocml_remainder_f32(x, y); }
429__DEVICE__
430inline
431float remquof(float x, float y, int* quo)
432{
433 int tmp;
434 float r =
435 __ocml_remquo_f32(x, y, (__attribute__((address_space(5))) int*) &tmp);
436 *quo = tmp;
437
438 return r;
439}
440__DEVICE__
441inline
442float rhypotf(float x, float y) { return __ocml_rhypot_f32(x, y); }
443__DEVICE__
444inline
445float rintf(float x) { return __ocml_rint_f32(x); }
446__DEVICE__
447inline
448float rnorm3df(float x, float y, float z)
449{
450 return __ocml_rlen3_f32(x, y, z);
451}
452
453__DEVICE__
454inline
455float rnorm4df(float x, float y, float z, float w)
456{
457 return __ocml_rlen4_f32(x, y, z, w);
458}
459__DEVICE__
460inline
461float rnormf(int dim, const float* a)
462{ // TODO: placeholder until OCML adds support.
463 float r = 0;
464 while (dim--) { r += a[0] * a[0]; ++a; }
465
466 return __ocml_rsqrt_f32(r);
467}
468__DEVICE__
469inline
470float roundf(float x) { return __ocml_round_f32(x); }
471__DEVICE__
472inline
473float rsqrtf(float x) { return __ocml_rsqrt_f32(x); }
474__DEVICE__
475inline
476float scalblnf(float x, long int n)
477{
478 return (n < INT_MAX) ? __ocml_scalbn_f32(x, n) : __ocml_scalb_f32(x, n);
479}
480__DEVICE__
481inline
482float scalbnf(float x, int n) { return __ocml_scalbn_f32(x, n); }
483__DEVICE__
484inline
485__RETURN_TYPE signbit(float x) { return __ocml_signbit_f32(x); }
486__DEVICE__
487inline
488void sincosf(float x, float* sptr, float* cptr)
489{
490 float tmp;
491
492 *sptr =
493 __ocml_sincos_f32(x, (__attribute__((address_space(5))) float*) &tmp);
494 *cptr = tmp;
495}
496__DEVICE__
497inline
498void sincospif(float x, float* sptr, float* cptr)
499{
500 float tmp;
501
502 *sptr =
503 __ocml_sincospi_f32(x, (__attribute__((address_space(5))) float*) &tmp);
504 *cptr = tmp;
505}
506__DEVICE__
507inline
508float sinf(float x) { return __ocml_sin_f32(x); }
509__DEVICE__
510inline
511float sinhf(float x) { return __ocml_sinh_f32(x); }
512__DEVICE__
513inline
514float sinpif(float x) { return __ocml_sinpi_f32(x); }
515__DEVICE__
516inline
517float sqrtf(float x) { return __ocml_sqrt_f32(x); }
518__DEVICE__
519inline
520float tanf(float x) { return __ocml_tan_f32(x); }
521__DEVICE__
522inline
523float tanhf(float x) { return __ocml_tanh_f32(x); }
524__DEVICE__
525inline
526float tgammaf(float x) { return __ocml_tgamma_f32(x); }
527__DEVICE__
528inline
529float truncf(float x) { return __ocml_trunc_f32(x); }
530__DEVICE__
531inline
532float y0f(float x) { return __ocml_y0_f32(x); }
533__DEVICE__
534inline
535float y1f(float x) { return __ocml_y1_f32(x); }
536__DEVICE__
537inline
538float ynf(int n, float x)
539{ // TODO: we could use Ahmes multiplication and the Miller & Brown algorithm
540 // for linear recurrences to get O(log n) steps, but it's unclear if
541 // it'd be beneficial in this case. Placeholder until OCML adds
542 // support.
543 if (n == 0) return y0f(x);
544 if (n == 1) return y1f(x);
545
546 float x0 = y0f(x);
547 float x1 = y1f(x);
548 for (int i = 1; i < n; ++i) {
549 float x2 = (2 * i) / x * x1 - x0;
550 x0 = x1;
551 x1 = x2;
552 }
553
554 return x1;
555}
556
557// BEGIN INTRINSICS
558__DEVICE__
559inline
560float __cosf(float x) { return __ocml_native_cos_f32(x); }
561__DEVICE__
562inline
563float __exp10f(float x) { return __ocml_native_exp10_f32(x); }
564__DEVICE__
565inline
566float __expf(float x) { return __ocml_native_exp_f32(x); }
567#if defined OCML_BASIC_ROUNDED_OPERATIONS
568__DEVICE__
569inline
570float __fadd_rd(float x, float y) { return __ocml_add_rtn_f32(x, y); }
571#endif
572__DEVICE__
573inline
574float __fadd_rn(float x, float y) { return x + y; }
575#if defined OCML_BASIC_ROUNDED_OPERATIONS
576__DEVICE__
577inline
578float __fadd_ru(float x, float y) { return __ocml_add_rtp_f32(x, y); }
579__DEVICE__
580inline
581float __fadd_rz(float x, float y) { return __ocml_add_rtz_f32(x, y); }
582__DEVICE__
583inline
584float __fdiv_rd(float x, float y) { return __ocml_div_rtn_f32(x, y); }
585#endif
586__DEVICE__
587inline
588float __fdiv_rn(float x, float y) { return x / y; }
589#if defined OCML_BASIC_ROUNDED_OPERATIONS
590__DEVICE__
591inline
592float __fdiv_ru(float x, float y) { return __ocml_div_rtp_f32(x, y); }
593__DEVICE__
594inline
595float __fdiv_rz(float x, float y) { return __ocml_div_rtz_f32(x, y); }
596#endif
597__DEVICE__
598inline
599float __fdividef(float x, float y) { return x / y; }
600#if defined OCML_BASIC_ROUNDED_OPERATIONS
601__DEVICE__
602inline
603float __fmaf_rd(float x, float y, float z)
604{
605 return __ocml_fma_rtn_f32(x, y, z);
606}
607#endif
608__DEVICE__
609inline
610float __fmaf_rn(float x, float y, float z)
611{
612 return __ocml_fma_f32(x, y, z);
613}
614#if defined OCML_BASIC_ROUNDED_OPERATIONS
615__DEVICE__
616inline
617float __fmaf_ru(float x, float y, float z)
618{
619 return __ocml_fma_rtp_f32(x, y, z);
620}
621__DEVICE__
622inline
623float __fmaf_rz(float x, float y, float z)
624{
625 return __ocml_fma_rtz_f32(x, y, z);
626}
627__DEVICE__
628inline
629float __fmul_rd(float x, float y) { return __ocml_mul_rtn_f32(x, y); }
630#endif
631__DEVICE__
632inline
633float __fmul_rn(float x, float y) { return x * y; }
634#if defined OCML_BASIC_ROUNDED_OPERATIONS
635__DEVICE__
636inline
637float __fmul_ru(float x, float y) { return __ocml_mul_rtp_f32(x, y); }
638__DEVICE__
639inline
640float __fmul_rz(float x, float y) { return __ocml_mul_rtz_f32(x, y); }
641__DEVICE__
642inline
643float __frcp_rd(float x) { return __builtin_amdgcn_rcpf(x); }
644#endif
645__DEVICE__
646inline
647float __frcp_rn(float x) { return __builtin_amdgcn_rcpf(x); }
648#if defined OCML_BASIC_ROUNDED_OPERATIONS
649__DEVICE__
650inline
651float __frcp_ru(float x) { return __builtin_amdgcn_rcpf(x); }
652__DEVICE__
653inline
654float __frcp_rz(float x) { return __builtin_amdgcn_rcpf(x); }
655#endif
656__DEVICE__
657inline
658float __frsqrt_rn(float x) { return __builtin_amdgcn_rsqf(x); }
659#if defined OCML_BASIC_ROUNDED_OPERATIONS
660__DEVICE__
661inline
662float __fsqrt_rd(float x) { return __ocml_sqrt_rtn_f32(x); }
663#endif
664__DEVICE__
665inline
666float __fsqrt_rn(float x) { return __ocml_native_sqrt_f32(x); }
667#if defined OCML_BASIC_ROUNDED_OPERATIONS
668__DEVICE__
669inline
670float __fsqrt_ru(float x) { return __ocml_sqrt_rtp_f32(x); }
671__DEVICE__
672inline
673float __fsqrt_rz(float x) { return __ocml_sqrt_rtz_f32(x); }
674__DEVICE__
675inline
676float __fsub_rd(float x, float y) { return __ocml_sub_rtn_f32(x, y); }
677#endif
678__DEVICE__
679inline
680float __fsub_rn(float x, float y) { return x - y; }
681#if defined OCML_BASIC_ROUNDED_OPERATIONS
682__DEVICE__
683inline
684float __fsub_ru(float x, float y) { return __ocml_sub_rtp_f32(x, y); }
685__DEVICE__
686inline
687float __fsub_rz(float x, float y) { return __ocml_sub_rtz_f32(x, y); }
688#endif
689__DEVICE__
690inline
691float __log10f(float x) { return __ocml_native_log10_f32(x); }
692__DEVICE__
693inline
694float __log2f(float x) { return __ocml_native_log2_f32(x); }
695__DEVICE__
696inline
697float __logf(float x) { return __ocml_native_log_f32(x); }
698__DEVICE__
699inline
700float __powf(float x, float y) { return __ocml_pow_f32(x, y); }
701__DEVICE__
702inline
703float __saturatef(float x) { return (x < 0) ? 0 : ((x > 1) ? 1 : x); }
704__DEVICE__
705inline
706void __sincosf(float x, float* sptr, float* cptr)
707{
708 *sptr = __ocml_native_sin_f32(x);
709 *cptr = __ocml_native_cos_f32(x);
710}
711__DEVICE__
712inline
713float __sinf(float x) { return __ocml_native_sin_f32(x); }
714__DEVICE__
715inline
716float __tanf(float x) { return __ocml_tan_f32(x); }
717// END INTRINSICS
718// END FLOAT
719
720// BEGIN DOUBLE
721__DEVICE__
722inline
723double abs(double x) { return __ocml_fabs_f64(x); }
724__DEVICE__
725inline
726double acos(double x) { return __ocml_acos_f64(x); }
727__DEVICE__
728inline
729double acosh(double x) { return __ocml_acosh_f64(x); }
730__DEVICE__
731inline
732double asin(double x) { return __ocml_asin_f64(x); }
733__DEVICE__
734inline
735double asinh(double x) { return __ocml_asinh_f64(x); }
736__DEVICE__
737inline
738double atan(double x) { return __ocml_atan_f64(x); }
739__DEVICE__
740inline
741double atan2(double x, double y) { return __ocml_atan2_f64(x, y); }
742__DEVICE__
743inline
744double atanh(double x) { return __ocml_atanh_f64(x); }
745__DEVICE__
746inline
747double cbrt(double x) { return __ocml_cbrt_f64(x); }
748__DEVICE__
749inline
750double ceil(double x) { return __ocml_ceil_f64(x); }
751__DEVICE__
752inline
753double copysign(double x, double y) { return __ocml_copysign_f64(x, y); }
754__DEVICE__
755inline
756double cos(double x) { return __ocml_cos_f64(x); }
757__DEVICE__
758inline
759double cosh(double x) { return __ocml_cosh_f64(x); }
760__DEVICE__
761inline
762double cospi(double x) { return __ocml_cospi_f64(x); }
763__DEVICE__
764inline
765double cyl_bessel_i0(double x) { return __ocml_i0_f64(x); }
766__DEVICE__
767inline
768double cyl_bessel_i1(double x) { return __ocml_i1_f64(x); }
769__DEVICE__
770inline
771double erf(double x) { return __ocml_erf_f64(x); }
772__DEVICE__
773inline
774double erfc(double x) { return __ocml_erfc_f64(x); }
775__DEVICE__
776inline
777double erfcinv(double x) { return __ocml_erfcinv_f64(x); }
778__DEVICE__
779inline
780double erfcx(double x) { return __ocml_erfcx_f64(x); }
781__DEVICE__
782inline
783double erfinv(double x) { return __ocml_erfinv_f64(x); }
784__DEVICE__
785inline
786double exp(double x) { return __ocml_exp_f64(x); }
787__DEVICE__
788inline
789double exp10(double x) { return __ocml_exp10_f64(x); }
790__DEVICE__
791inline
792double exp2(double x) { return __ocml_exp2_f64(x); }
793__DEVICE__
794inline
795double expm1(double x) { return __ocml_expm1_f64(x); }
796__DEVICE__
797inline
798double fabs(double x) { return __ocml_fabs_f64(x); }
799__DEVICE__
800inline
801double fdim(double x, double y) { return __ocml_fdim_f64(x, y); }
802__DEVICE__
803inline
804double floor(double x) { return __ocml_floor_f64(x); }
805__DEVICE__
806inline
807double fma(double x, double y, double z) { return __ocml_fma_f64(x, y, z); }
808__DEVICE__
809inline
810double fmax(double x, double y) { return __ocml_fmax_f64(x, y); }
811__DEVICE__
812inline
813double fmin(double x, double y) { return __ocml_fmin_f64(x, y); }
814__DEVICE__
815inline
816double fmod(double x, double y) { return __ocml_fmod_f64(x, y); }
817__DEVICE__
818inline
819double frexp(double x, int* nptr)
820{
821 int tmp;
822 double r =
823 __ocml_frexp_f64(x, (__attribute__((address_space(5))) int*) &tmp);
824 *nptr = tmp;
825
826 return r;
827}
828__DEVICE__
829inline
830double hypot(double x, double y) { return __ocml_hypot_f64(x, y); }
831__DEVICE__
832inline
833int ilogb(double x) { return __ocml_ilogb_f64(x); }
834__DEVICE__
835inline
836__RETURN_TYPE isfinite(double x) { return __ocml_isfinite_f64(x); }
837__DEVICE__
838inline
839__RETURN_TYPE isinf(double x) { return __ocml_isinf_f64(x); }
840__DEVICE__
841inline
842__RETURN_TYPE isnan(double x) { return __ocml_isnan_f64(x); }
843__DEVICE__
844inline
845double j0(double x) { return __ocml_j0_f64(x); }
846__DEVICE__
847inline
848double j1(double x) { return __ocml_j1_f64(x); }
849__DEVICE__
850inline
851double jn(int n, double x)
852{ // TODO: we could use Ahmes multiplication and the Miller & Brown algorithm
853 // for linear recurrences to get O(log n) steps, but it's unclear if
854 // it'd be beneficial in this case. Placeholder until OCML adds
855 // support.
856 if (n == 0) return j0f(x);
857 if (n == 1) return j1f(x);
858
859 double x0 = j0f(x);
860 double x1 = j1f(x);
861 for (int i = 1; i < n; ++i) {
862 double x2 = (2 * i) / x * x1 - x0;
863 x0 = x1;
864 x1 = x2;
865 }
866
867 return x1;
868}
869__DEVICE__
870inline
871double ldexp(double x, int e) { return __ocml_ldexp_f64(x, e); }
872__DEVICE__
873inline
874double lgamma(double x) { return __ocml_lgamma_f64(x); }
875__DEVICE__
876inline
877long long int llrint(double x) { return __ocml_rint_f64(x); }
878__DEVICE__
879inline
880long long int llround(double x) { return __ocml_round_f64(x); }
881__DEVICE__
882inline
883double log(double x) { return __ocml_log_f64(x); }
884__DEVICE__
885inline
886double log10(double x) { return __ocml_log10_f64(x); }
887__DEVICE__
888inline
889double log1p(double x) { return __ocml_log1p_f64(x); }
890__DEVICE__
891inline
892double log2(double x) { return __ocml_log2_f64(x); }
893__DEVICE__
894inline
895double logb(double x) { return __ocml_logb_f64(x); }
896__DEVICE__
897inline
898long int lrint(double x) { return __ocml_rint_f64(x); }
899__DEVICE__
900inline
901long int lround(double x) { return __ocml_round_f64(x); }
902__DEVICE__
903inline
904double modf(double x, double* iptr)
905{
906 double tmp;
907 double r =
908 __ocml_modf_f64(x, (__attribute__((address_space(5))) double*) &tmp);
909 *iptr = tmp;
910
911 return r;
912}
913__DEVICE__
914inline
915double nan(const char* tagp)
916{
917#if !_WIN32
918 union {
919 double val;
920 struct ieee_double {
921 uint64_t mantissa : 51;
922 uint32_t quiet : 1;
923 uint32_t exponent : 11;
924 uint32_t sign : 1;
925 } bits;
926 static_assert(sizeof(double) == sizeof(ieee_double), "");
927 } tmp;
928
929 tmp.bits.sign = 0u;
930 tmp.bits.exponent = ~0u;
931 tmp.bits.quiet = 1u;
932 tmp.bits.mantissa = __make_mantissa(tagp);
933
934 return tmp.val;
935#else
936 static_assert(sizeof(uint64_t)==sizeof(double));
937 uint64_t val = __make_mantissa(tagp);
938 val |= 0xFFF << 51;
939 return *reinterpret_cast<double*>(&val);
940#endif
941}
942__DEVICE__
943inline
944double nearbyint(double x) { return __ocml_nearbyint_f64(x); }
945__DEVICE__
946inline
947double nextafter(double x, double y) { return __ocml_nextafter_f64(x, y); }
948__DEVICE__
949inline
950double norm(int dim, const double* a)
951{ // TODO: placeholder until OCML adds support.
952 double r = 0;
953 while (dim--) { r += a[0] * a[0]; ++a; }
954
955 return __ocml_sqrt_f64(r);
956}
957__DEVICE__
958inline
959double norm3d(double x, double y, double z)
960{
961 return __ocml_len3_f64(x, y, z);
962}
963__DEVICE__
964inline
965double norm4d(double x, double y, double z, double w)
966{
967 return __ocml_len4_f64(x, y, z, w);
968}
969__DEVICE__
970inline
971double normcdf(double x) { return __ocml_ncdf_f64(x); }
972__DEVICE__
973inline
974double normcdfinv(double x) { return __ocml_ncdfinv_f64(x); }
975__DEVICE__
976inline
977double pow(double x, double y) { return __ocml_pow_f64(x, y); }
978__DEVICE__
979inline
980double powi(double base, int iexp) { return __ocml_pown_f64(base, iexp); }
981__DEVICE__
982inline
983double rcbrt(double x) { return __ocml_rcbrt_f64(x); }
984__DEVICE__
985inline
986double remainder(double x, double y) { return __ocml_remainder_f64(x, y); }
987__DEVICE__
988inline
989double remquo(double x, double y, int* quo)
990{
991 int tmp;
992 double r =
993 __ocml_remquo_f64(x, y, (__attribute__((address_space(5))) int*) &tmp);
994 *quo = tmp;
995
996 return r;
997}
998__DEVICE__
999inline
1000double rhypot(double x, double y) { return __ocml_rhypot_f64(x, y); }
1001__DEVICE__
1002inline
1003double rint(double x) { return __ocml_rint_f64(x); }
1004__DEVICE__
1005inline
1006double rnorm(int dim, const double* a)
1007{ // TODO: placeholder until OCML adds support.
1008 double r = 0;
1009 while (dim--) { r += a[0] * a[0]; ++a; }
1010
1011 return __ocml_rsqrt_f64(r);
1012}
1013__DEVICE__
1014inline
1015double rnorm3d(double x, double y, double z)
1016{
1017 return __ocml_rlen3_f64(x, y, z);
1018}
1019__DEVICE__
1020inline
1021double rnorm4d(double x, double y, double z, double w)
1022{
1023 return __ocml_rlen4_f64(x, y, z, w);
1024}
1025__DEVICE__
1026inline
1027double round(double x) { return __ocml_round_f64(x); }
1028__DEVICE__
1029inline
1030double rsqrt(double x) { return __ocml_rsqrt_f64(x); }
1031__DEVICE__
1032inline
1033double scalbln(double x, long int n)
1034{
1035 return (n < INT_MAX) ? __ocml_scalbn_f64(x, n) : __ocml_scalb_f64(x, n);
1036}
1037__DEVICE__
1038inline
1039double scalbn(double x, int n) { return __ocml_scalbn_f64(x, n); }
1040__DEVICE__
1041inline
1042__RETURN_TYPE signbit(double x) { return __ocml_signbit_f64(x); }
1043__DEVICE__
1044inline
1045double sin(double x) { return __ocml_sin_f64(x); }
1046__DEVICE__
1047inline
1048void sincos(double x, double* sptr, double* cptr)
1049{
1050 double tmp;
1051 *sptr =
1052 __ocml_sincos_f64(x, (__attribute__((address_space(5))) double*) &tmp);
1053 *cptr = tmp;
1054}
1055__DEVICE__
1056inline
1057void sincospi(double x, double* sptr, double* cptr)
1058{
1059 double tmp;
1060 *sptr = __ocml_sincospi_f64(
1061 x, (__attribute__((address_space(5))) double*) &tmp);
1062 *cptr = tmp;
1063}
1064__DEVICE__
1065inline
1066double sinh(double x) { return __ocml_sinh_f64(x); }
1067__DEVICE__
1068inline
1069double sinpi(double x) { return __ocml_sinpi_f64(x); }
1070__DEVICE__
1071inline
1072double sqrt(double x) { return __ocml_sqrt_f64(x); }
1073__DEVICE__
1074inline
1075double tan(double x) { return __ocml_tan_f64(x); }
1076__DEVICE__
1077inline
1078double tanh(double x) { return __ocml_tanh_f64(x); }
1079__DEVICE__
1080inline
1081double tgamma(double x) { return __ocml_tgamma_f64(x); }
1082__DEVICE__
1083inline
1084double trunc(double x) { return __ocml_trunc_f64(x); }
1085__DEVICE__
1086inline
1087double y0(double x) { return __ocml_y0_f64(x); }
1088__DEVICE__
1089inline
1090double y1(double x) { return __ocml_y1_f64(x); }
1091__DEVICE__
1092inline
1093double yn(int n, double x)
1094{ // TODO: we could use Ahmes multiplication and the Miller & Brown algorithm
1095 // for linear recurrences to get O(log n) steps, but it's unclear if
1096 // it'd be beneficial in this case. Placeholder until OCML adds
1097 // support.
1098 if (n == 0) return j0f(x);
1099 if (n == 1) return j1f(x);
1100
1101 double x0 = j0f(x);
1102 double x1 = j1f(x);
1103 for (int i = 1; i < n; ++i) {
1104 double x2 = (2 * i) / x * x1 - x0;
1105 x0 = x1;
1106 x1 = x2;
1107 }
1108
1109 return x1;
1110}
1111
1112// BEGIN INTRINSICS
1113#if defined OCML_BASIC_ROUNDED_OPERATIONS
1114__DEVICE__
1115inline
1116double __dadd_rd(double x, double y) { return __ocml_add_rtn_f64(x, y); }
1117#endif
1118__DEVICE__
1119inline
1120double __dadd_rn(double x, double y) { return x + y; }
1121#if defined OCML_BASIC_ROUNDED_OPERATIONS
1122__DEVICE__
1123inline
1124double __dadd_ru(double x, double y) { return __ocml_add_rtp_f64(x, y); }
1125__DEVICE__
1126inline
1127double __dadd_rz(double x, double y) { return __ocml_add_rtz_f64(x, y); }
1128__DEVICE__
1129inline
1130double __ddiv_rd(double x, double y) { return __ocml_div_rtn_f64(x, y); }
1131#endif
1132__DEVICE__
1133inline
1134double __ddiv_rn(double x, double y) { return x / y; }
1135#if defined OCML_BASIC_ROUNDED_OPERATIONS
1136__DEVICE__
1137inline
1138double __ddiv_ru(double x, double y) { return __ocml_div_rtp_f64(x, y); }
1139__DEVICE__
1140inline
1141double __ddiv_rz(double x, double y) { return __ocml_div_rtz_f64(x, y); }
1142__DEVICE__
1143inline
1144double __dmul_rd(double x, double y) { return __ocml_mul_rtn_f64(x, y); }
1145#endif
1146__DEVICE__
1147inline
1148double __dmul_rn(double x, double y) { return x * y; }
1149#if defined OCML_BASIC_ROUNDED_OPERATIONS
1150__DEVICE__
1151inline
1152double __dmul_ru(double x, double y) { return __ocml_mul_rtp_f64(x, y); }
1153__DEVICE__
1154inline
1155double __dmul_rz(double x, double y) { return __ocml_mul_rtz_f64(x, y); }
1156__DEVICE__
1157inline
1158double __drcp_rd(double x) { return __builtin_amdgcn_rcp(x); }
1159#endif
1160__DEVICE__
1161inline
1162double __drcp_rn(double x) { return __builtin_amdgcn_rcp(x); }
1163#if defined OCML_BASIC_ROUNDED_OPERATIONS
1164__DEVICE__
1165inline
1166double __drcp_ru(double x) { return __builtin_amdgcn_rcp(x); }
1167__DEVICE__
1168inline
1169double __drcp_rz(double x) { return __builtin_amdgcn_rcp(x); }
1170__DEVICE__
1171inline
1172double __dsqrt_rd(double x) { return __ocml_sqrt_rtn_f64(x); }
1173#endif
1174__DEVICE__
1175inline
1176double __dsqrt_rn(double x) { return __ocml_sqrt_f64(x); }
1177#if defined OCML_BASIC_ROUNDED_OPERATIONS
1178__DEVICE__
1179inline
1180double __dsqrt_ru(double x) { return __ocml_sqrt_rtp_f64(x); }
1181__DEVICE__
1182inline
1183double __dsqrt_rz(double x) { return __ocml_sqrt_rtz_f64(x); }
1184__DEVICE__
1185inline
1186double __dsub_rd(double x, double y) { return __ocml_sub_rtn_f64(x, y); }
1187#endif
1188__DEVICE__
1189inline
1190double __dsub_rn(double x, double y) { return x - y; }
1191#if defined OCML_BASIC_ROUNDED_OPERATIONS
1192__DEVICE__
1193inline
1194double __dsub_ru(double x, double y) { return __ocml_sub_rtp_f64(x, y); }
1195__DEVICE__
1196inline
1197double __dsub_rz(double x, double y) { return __ocml_sub_rtz_f64(x, y); }
1198__DEVICE__
1199inline
1200double __fma_rd(double x, double y, double z)
1201{
1202 return __ocml_fma_rtn_f64(x, y, z);
1203}
1204#endif
1205__DEVICE__
1206inline
1207double __fma_rn(double x, double y, double z)
1208{
1209 return __ocml_fma_f64(x, y, z);
1210}
1211#if defined OCML_BASIC_ROUNDED_OPERATIONS
1212__DEVICE__
1213inline
1214double __fma_ru(double x, double y, double z)
1215{
1216 return __ocml_fma_rtp_f64(x, y, z);
1217}
1218__DEVICE__
1219inline
1220double __fma_rz(double x, double y, double z)
1221{
1222 return __ocml_fma_rtz_f64(x, y, z);
1223}
1224#endif
1225// END INTRINSICS
1226// END DOUBLE
1227
1228// BEGIN INTEGER
1229__DEVICE__
1230inline
1231int abs(int x)
1232{
1233 int sgn = x >> (sizeof(int) * CHAR_BIT - 1);
1234 return (x ^ sgn) - sgn;
1235}
1236__DEVICE__
1237inline
1238long labs(long x)
1239{
1240 long sgn = x >> (sizeof(long) * CHAR_BIT - 1);
1241 return (x ^ sgn) - sgn;
1242}
1243__DEVICE__
1244inline
1245long long llabs(long long x)
1246{
1247 long long sgn = x >> (sizeof(long long) * CHAR_BIT - 1);
1248 return (x ^ sgn) - sgn;
1249}
1250
1251#if defined(__cplusplus)
1252 __DEVICE__
1253 inline
1254 long abs(long x) { return labs(x); }
1255 __DEVICE__
1256 inline
1257 long long abs(long long x) { return llabs(x); }
1258#endif
1259// END INTEGER
1260
1261__DEVICE__
1262inline _Float16 fma(_Float16 x, _Float16 y, _Float16 z) {
1263 return __ocml_fma_f16(x, y, z);
1264}
1265
1266__DEVICE__
1267inline float fma(float x, float y, float z) {
1268 return fmaf(x, y, z);
1269}
1270
1271#pragma push_macro("__DEF_FLOAT_FUN")
1272#pragma push_macro("__DEF_FLOAT_FUN2")
1273#pragma push_macro("__DEF_FLOAT_FUN2I")
1274#pragma push_macro("__HIP_OVERLOAD")
1275#pragma push_macro("__HIP_OVERLOAD2")
1276
1277// __hip_enable_if::type is a type function which returns __T if __B is true.
1278template<bool __B, class __T = void>
1280
1281template <class __T> struct __hip_enable_if<true, __T> {
1282 typedef __T type;
1283};
1284
1285// __HIP_OVERLOAD1 is used to resolve function calls with integer argument to
1286// avoid compilation error due to ambibuity. e.g. floor(5) is resolved with
1287// floor(double).
1288#define __HIP_OVERLOAD1(__retty, __fn) \
1289 template <typename __T> \
1290 __DEVICE__ \
1291 typename __hip_enable_if<std::numeric_limits<__T>::is_integer, \
1292 __retty>::type \
1293 __fn(__T __x) { \
1294 return ::__fn((double)__x); \
1295 }
1296
1297// __HIP_OVERLOAD2 is used to resolve function calls with mixed float/double
1298// or integer argument to avoid compilation error due to ambibuity. e.g.
1299// max(5.0f, 6.0) is resolved with max(double, double).
1300#define __HIP_OVERLOAD2(__retty, __fn) \
1301 template <typename __T1, typename __T2> \
1302 __DEVICE__ typename __hip_enable_if< \
1303 std::numeric_limits<__T1>::is_specialized && \
1304 std::numeric_limits<__T2>::is_specialized, \
1305 __retty>::type \
1306 __fn(__T1 __x, __T2 __y) { \
1307 return __fn((double)__x, (double)__y); \
1308 }
1309
1310// Define cmath functions with float argument and returns float.
1311#define __DEF_FUN1(retty, func) \
1312__DEVICE__ \
1313inline \
1314float func(float x) \
1315{ \
1316 return func##f(x); \
1317} \
1318__HIP_OVERLOAD1(retty, func)
1319
1320// Define cmath functions with float argument and returns retty.
1321#define __DEF_FUNI(retty, func) \
1322__DEVICE__ \
1323inline \
1324retty func(float x) \
1325{ \
1326 return func##f(x); \
1327} \
1328__HIP_OVERLOAD1(retty, func)
1329
1330// define cmath functions with two float arguments.
1331#define __DEF_FUN2(retty, func) \
1332__DEVICE__ \
1333inline \
1334float func(float x, float y) \
1335{ \
1336 return func##f(x, y); \
1337} \
1338__HIP_OVERLOAD2(retty, func)
1339
1340__DEF_FUN1(double, acos)
1341__DEF_FUN1(double, acosh)
1342__DEF_FUN1(double, asin)
1343__DEF_FUN1(double, asinh)
1344__DEF_FUN1(double, atan)
1345__DEF_FUN2(double, atan2);
1346__DEF_FUN1(double, atanh)
1347__DEF_FUN1(double, cbrt)
1348__DEF_FUN1(double, ceil)
1349__DEF_FUN2(double, copysign);
1350__DEF_FUN1(double, cos)
1351__DEF_FUN1(double, cosh)
1352__DEF_FUN1(double, erf)
1353__DEF_FUN1(double, erfc)
1354__DEF_FUN1(double, exp)
1355__DEF_FUN1(double, exp2)
1356__DEF_FUN1(double, expm1)
1357__DEF_FUN1(double, fabs)
1358__DEF_FUN2(double, fdim);
1359__DEF_FUN1(double, floor)
1360__DEF_FUN2(double, fmax);
1361__DEF_FUN2(double, fmin);
1362__DEF_FUN2(double, fmod);
1363//__HIP_OVERLOAD1(int, fpclassify)
1364__DEF_FUN2(double, hypot);
1365__DEF_FUNI(int, ilogb)
1366__HIP_OVERLOAD1(bool, isfinite)
1367__HIP_OVERLOAD2(bool, isgreater);
1368__HIP_OVERLOAD2(bool, isgreaterequal);
1369__HIP_OVERLOAD1(bool, isinf);
1370__HIP_OVERLOAD2(bool, isless);
1371__HIP_OVERLOAD2(bool, islessequal);
1372__HIP_OVERLOAD2(bool, islessgreater);
1373__HIP_OVERLOAD1(bool, isnan);
1374//__HIP_OVERLOAD1(bool, isnormal)
1375__HIP_OVERLOAD2(bool, isunordered);
1376__DEF_FUN1(double, lgamma)
1377__DEF_FUN1(double, log)
1378__DEF_FUN1(double, log10)
1379__DEF_FUN1(double, log1p)
1380__DEF_FUN1(double, log2)
1381__DEF_FUN1(double, logb)
1382__DEF_FUNI(long long, llrint)
1383__DEF_FUNI(long long, llround)
1384__DEF_FUNI(long, lrint)
1385__DEF_FUNI(long, lround)
1386__DEF_FUN1(double, nearbyint);
1387__DEF_FUN2(double, nextafter);
1388__DEF_FUN2(double, pow);
1389__DEF_FUN2(double, remainder);
1390__DEF_FUN1(double, rint);
1391__DEF_FUN1(double, round);
1392__HIP_OVERLOAD1(bool, signbit)
1393__DEF_FUN1(double, sin)
1394__DEF_FUN1(double, sinh)
1395__DEF_FUN1(double, sqrt)
1396__DEF_FUN1(double, tan)
1397__DEF_FUN1(double, tanh)
1398__DEF_FUN1(double, tgamma)
1399__DEF_FUN1(double, trunc);
1400
1401// define cmath functions with a float and an integer argument.
1402#define __DEF_FLOAT_FUN2I(func) \
1403__DEVICE__ \
1404inline \
1405float func(float x, int y) \
1406{ \
1407 return func##f(x, y); \
1408}
1409__DEF_FLOAT_FUN2I(scalbn)
1410__DEF_FLOAT_FUN2I(ldexp)
1411
1412template<class T>
1413__DEVICE__ inline T min(T arg1, T arg2) {
1414 return (arg1 < arg2) ? arg1 : arg2;
1415}
1416
1417template<class T>
1418__DEVICE__ inline T max(T arg1, T arg2) {
1419 return (arg1 > arg2) ? arg1 : arg2;
1420}
1421
1422__DEVICE__ inline int min(int arg1, int arg2) {
1423 return (arg1 < arg2) ? arg1 : arg2;
1424}
1425__DEVICE__ inline int max(int arg1, int arg2) {
1426 return (arg1 > arg2) ? arg1 : arg2;
1427}
1428
1429__DEVICE__ inline int min(uint32_t arg1, int arg2) {
1430 return (arg1 < arg2) ? arg1 : arg2;
1431}
1432__DEVICE__ inline int max(uint32_t arg1, int arg2) {
1433 return (arg1 > arg2) ? arg1 : arg2;
1434}
1435
1436__DEVICE__
1437inline
1438float max(float x, float y) {
1439 return fmaxf(x, y);
1440}
1441
1442__DEVICE__
1443inline
1444double max(double x, double y) {
1445 return fmax(x, y);
1446}
1447
1448__DEVICE__
1449inline
1450float min(float x, float y) {
1451 return fminf(x, y);
1452}
1453
1454__DEVICE__
1455inline
1456double min(double x, double y) {
1457 return fmin(x, y);
1458}
1459
1460__HIP_OVERLOAD2(double, max)
1461__HIP_OVERLOAD2(double, min)
1462
1463#if !defined(__HIPCC_RTC__)
1464__host__ inline static int min(int arg1, int arg2) {
1465 return std::min(arg1, arg2);
1466}
1467
1468__host__ inline static int max(int arg1, int arg2) {
1469 return std::max(arg1, arg2);
1470}
1471#endif // !defined(__HIPCC_RTC__)
1472
1473__DEVICE__
1474inline float pow(float base, int iexp) {
1475 return powif(base, iexp);
1476}
1477
1478__DEVICE__
1479inline double pow(double base, int iexp) {
1480 return powi(base, iexp);
1481}
1482
1483__DEVICE__
1484inline _Float16 pow(_Float16 base, int iexp) {
1485 return __ocml_pown_f16(base, iexp);
1486}
1487
1488#pragma pop_macro("__DEF_FLOAT_FUN")
1489#pragma pop_macro("__DEF_FLOAT_FUN2")
1490#pragma pop_macro("__DEF_FLOAT_FUN2I")
1491#pragma pop_macro("__HIP_OVERLOAD")
1492#pragma pop_macro("__HIP_OVERLOAD2")
1493
1494#endif // !__CLANG_HIP_RUNTIME_WRAPPER_INCLUDED__
1495
1496#pragma pop_macro("__DEVICE__")
1497#pragma pop_macro("__RETURN_TYPE")
1498
1499// For backward compatibility.
1500// There are HIP applications e.g. TensorFlow, expecting __HIP_ARCH_* macros
1501// defined after including math_functions.h.
1502#include <hip/amd_detail/amd_hip_runtime.h>
#define __host__
Definition host_defines.h:170
Definition amd_hip_vector_types.h:1623
Definition amd_hip_vector_types.h:1660
Definition amd_hip_vector_types.h:1690
Definition amd_hip_vector_types.h:1727
Definition amd_math_functions.h:1279