Mock Version: 4.1 Mock Version: 4.1 Mock Version: 4.1 ENTER ['do_with_status'](['bash', '--login', '-c', '/usr/bin/rpmbuild -bs --noclean --target x86_64 --nodeps /builddir/build/SPECS/nccl-cuda-12.0.spec'], chrootPath='/var/lib/mock/dist-oc9-epol-build-230014-81432/root'env={'TERM': 'vt100', 'SHELL': '/bin/bash', 'HOME': '/builddir', 'HOSTNAME': 'mock', 'PATH': '/usr/bin:/bin:/usr/sbin:/sbin', 'PROMPT_COMMAND': 'printf "\\033]0;\\007"', 'PS1': ' \\s-\\v\\$ ', 'LANG': 'C.UTF-8'}shell=Falselogger=timeout=86400uid=991gid=135user='mockbuild'nspawn_args=[]unshare_net=FalseprintOutput=False) Executing command: ['bash', '--login', '-c', '/usr/bin/rpmbuild -bs --noclean --target x86_64 --nodeps /builddir/build/SPECS/nccl-cuda-12.0.spec'] with env {'TERM': 'vt100', 'SHELL': '/bin/bash', 'HOME': '/builddir', 'HOSTNAME': 'mock', 'PATH': '/usr/bin:/bin:/usr/sbin:/sbin', 'PROMPT_COMMAND': 'printf "\\033]0;\\007"', 'PS1': ' \\s-\\v\\$ ', 'LANG': 'C.UTF-8'} and shell False Building target platforms: x86_64 Building for target x86_64 Wrote: /builddir/build/SRPMS/nccl-cuda-12.0-2.28.9.1-1.oc9.src.rpm Child return code was: 0 ENTER ['do_with_status'](['bash', '--login', '-c', '/usr/bin/rpmbuild -bb --noclean --target x86_64 --nodeps /builddir/build/SPECS/nccl-cuda-12.0.spec'], chrootPath='/var/lib/mock/dist-oc9-epol-build-230014-81432/root'env={'TERM': 'vt100', 'SHELL': '/bin/bash', 'HOME': '/builddir', 'HOSTNAME': 'mock', 'PATH': '/usr/bin:/bin:/usr/sbin:/sbin', 'PROMPT_COMMAND': 'printf "\\033]0;\\007"', 'PS1': ' \\s-\\v\\$ ', 'LANG': 'C.UTF-8'}shell=Falselogger=timeout=86400uid=991gid=135user='mockbuild'nspawn_args=[]unshare_net=FalseprintOutput=False) Executing command: ['bash', '--login', '-c', '/usr/bin/rpmbuild -bb --noclean --target x86_64 --nodeps /builddir/build/SPECS/nccl-cuda-12.0.spec'] with env {'TERM': 'vt100', 'SHELL': '/bin/bash', 'HOME': '/builddir', 'HOSTNAME': 'mock', 'PATH': '/usr/bin:/bin:/usr/sbin:/sbin', 'PROMPT_COMMAND': 'printf "\\033]0;\\007"', 'PS1': ' \\s-\\v\\$ ', 'LANG': 'C.UTF-8'} and shell False Building target platforms: x86_64 Building for target x86_64 Executing(%prep): /bin/sh -e /var/tmp/rpm-tmp.jP6YdT + umask 022 + cd /builddir/build/BUILD + cd /builddir/build/BUILD + rm -rf nccl-2.28.9-1 + /usr/lib/rpm/rpmuncompress -x /builddir/build/SOURCES/v2.28.9-1.tar.gz + STATUS=0 + '[' 0 -ne 0 ']' + cd nccl-2.28.9-1 + /usr/bin/chmod -Rf a+rX,u+w,g-w,o-w . + RPM_EC=0 ++ jobs -p + exit 0 Executing(%build): /bin/sh -e /var/tmp/rpm-tmp.54zIeX + umask 022 + cd /builddir/build/BUILD + cd nccl-2.28.9-1 + export CXX=g++ + CXX=g++ + export CC=gcc + CC=gcc + export CXXFLAGS=-ffat-lto-objects + CXXFLAGS=-ffat-lto-objects + export CUDA_HOME=/usr/local/cuda-12.0 + CUDA_HOME=/usr/local/cuda-12.0 + cmake '-GUnix Makefiles' -S . -B nccl_build '-DCMAKE_CUDA_ARCHITECTURES=70;75;80;86;87;89;90' -DBUILD_SHARED_LIBS=OFF -DCMAKE_VERBOSE_MAKEFILE=ON -DCMAKE_INSTALL_PREFIX=/usr -DINCLUDE_INSTALL_DIR=/usr/include -DLIB_INSTALL_DIR:PATH=/usr/lib64 -DSYSCONF_INSTALL_DIR=/etc -DLIB_SUFFIX=64 -DBUILD_SHARED_LIBS:BOOL=ON -- The CUDA compiler identification is NVIDIA 12.0.140 -- The CXX compiler identification is GNU 12.3.1 -- The C compiler identification is GNU 12.3.1 -- Detecting CUDA compiler ABI info -- Detecting CUDA compiler ABI info - done -- Check for working CUDA compiler: /usr/local/cuda/bin/nvcc - skipped -- Detecting CUDA compile features -- Detecting CUDA compile features - done -- Detecting CXX compiler ABI info -- Detecting CXX compiler ABI info - done -- Check for working CXX compiler: /usr/bin/g++ - skipped -- Detecting CXX compile features -- Detecting CXX compile features - done -- Detecting C compiler ABI info -- Detecting C compiler ABI info - done -- Check for working C compiler: /usr/bin/gcc - skipped -- Detecting C compile features -- Detecting C compile features - done -- Found CUDAToolkit: /usr/local/cuda/include (found version "12.0.140") -- Performing Test CMAKE_HAVE_LIBC_PTHREAD -- Performing Test CMAKE_HAVE_LIBC_PTHREAD - Success -- Found Threads: TRUE -- Using CUDA_ARCHITECTURES: 70;75;80;86;87;89;90 -- Configuring done (4.1s) -- Generating done (0.1s) CMake Warning: Manually-specified variables were not used by the project: INCLUDE_INSTALL_DIR LIB_INSTALL_DIR LIB_SUFFIX SYSCONF_INSTALL_DIR -- Build files have been written to: /builddir/build/BUILD/nccl-2.28.9-1/nccl_build ++ nproc + cmake --build nccl_build -j32 --verbose /usr/bin/cmake -S/builddir/build/BUILD/nccl-2.28.9-1 -B/builddir/build/BUILD/nccl-2.28.9-1/nccl_build --check-build-system CMakeFiles/Makefile.cmake 0 /usr/bin/cmake -E cmake_progress_start /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/CMakeFiles /builddir/build/BUILD/nccl-2.28.9-1/nccl_build//CMakeFiles/progress.marks /usr/bin/gmake -f CMakeFiles/Makefile2 all gmake[1]: Entering directory '/builddir/build/BUILD/nccl-2.28.9-1/nccl_build' /usr/bin/gmake -f ext-net/example/CMakeFiles/nccl-net-example.dir/build.make ext-net/example/CMakeFiles/nccl-net-example.dir/depend /usr/bin/gmake -f ext-profiler/example/CMakeFiles/nccl-profiler-example.dir/build.make ext-profiler/example/CMakeFiles/nccl-profiler-example.dir/depend /usr/bin/gmake -f ext-tuner/example/CMakeFiles/nccl-tuner-example.dir/build.make ext-tuner/example/CMakeFiles/nccl-tuner-example.dir/depend /usr/bin/gmake -f src/CMakeFiles/nccl_header.dir/build.make src/CMakeFiles/nccl_header.dir/depend gmake[2]: Entering directory '/builddir/build/BUILD/nccl-2.28.9-1/nccl_build' cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build && /usr/bin/cmake -E cmake_depends "Unix Makefiles" /builddir/build/BUILD/nccl-2.28.9-1 /builddir/build/BUILD/nccl-2.28.9-1/ext-net/example /builddir/build/BUILD/nccl-2.28.9-1/nccl_build /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/ext-net/example /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/ext-net/example/CMakeFiles/nccl-net-example.dir/DependInfo.cmake --color= gmake[2]: Entering directory '/builddir/build/BUILD/nccl-2.28.9-1/nccl_build' cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build && /usr/bin/cmake -E cmake_depends "Unix Makefiles" /builddir/build/BUILD/nccl-2.28.9-1 /builddir/build/BUILD/nccl-2.28.9-1/ext-profiler/example /builddir/build/BUILD/nccl-2.28.9-1/nccl_build /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/ext-profiler/example /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/ext-profiler/example/CMakeFiles/nccl-profiler-example.dir/DependInfo.cmake --color= gmake[2]: Entering directory '/builddir/build/BUILD/nccl-2.28.9-1/nccl_build' cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build && /usr/bin/cmake -E cmake_depends "Unix Makefiles" /builddir/build/BUILD/nccl-2.28.9-1 /builddir/build/BUILD/nccl-2.28.9-1/ext-tuner/example /builddir/build/BUILD/nccl-2.28.9-1/nccl_build /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/ext-tuner/example /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/ext-tuner/example/CMakeFiles/nccl-tuner-example.dir/DependInfo.cmake --color= gmake[2]: Entering directory '/builddir/build/BUILD/nccl-2.28.9-1/nccl_build' cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build && /usr/bin/cmake -E cmake_depends "Unix Makefiles" /builddir/build/BUILD/nccl-2.28.9-1 /builddir/build/BUILD/nccl-2.28.9-1/src /builddir/build/BUILD/nccl-2.28.9-1/nccl_build /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src/CMakeFiles/nccl_header.dir/DependInfo.cmake --color= gmake[2]: Leaving directory '/builddir/build/BUILD/nccl-2.28.9-1/nccl_build' /usr/bin/gmake -f ext-net/example/CMakeFiles/nccl-net-example.dir/build.make ext-net/example/CMakeFiles/nccl-net-example.dir/build gmake[2]: Leaving directory '/builddir/build/BUILD/nccl-2.28.9-1/nccl_build' /usr/bin/gmake -f ext-tuner/example/CMakeFiles/nccl-tuner-example.dir/build.make ext-tuner/example/CMakeFiles/nccl-tuner-example.dir/build gmake[2]: Entering directory '/builddir/build/BUILD/nccl-2.28.9-1/nccl_build' gmake[2]: Entering directory '/builddir/build/BUILD/nccl-2.28.9-1/nccl_build' gmake[2]: Leaving directory '/builddir/build/BUILD/nccl-2.28.9-1/nccl_build' gmake[2]: Leaving directory '/builddir/build/BUILD/nccl-2.28.9-1/nccl_build' /usr/bin/gmake -f src/CMakeFiles/nccl_header.dir/build.make src/CMakeFiles/nccl_header.dir/build /usr/bin/gmake -f ext-profiler/example/CMakeFiles/nccl-profiler-example.dir/build.make ext-profiler/example/CMakeFiles/nccl-profiler-example.dir/build gmake[2]: Entering directory '/builddir/build/BUILD/nccl-2.28.9-1/nccl_build' gmake[2]: Entering directory '/builddir/build/BUILD/nccl-2.28.9-1/nccl_build' [ 0%] Building C object ext-tuner/example/CMakeFiles/nccl-tuner-example.dir/plugin.c.o [ 0%] Building C object ext-net/example/CMakeFiles/nccl-net-example.dir/plugin.c.o cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/ext-tuner/example && /usr/bin/gcc -DCUDA_MAJOR=12 -DCUDA_MINOR=0 -DDOCA_VERBS_USE_CUDA_WRAPPER -DDOCA_VERBS_USE_NET_WRAPPER -DNCCL_GIN_PROXY_ENABLE=1 -DNCCL_MAJOR=2 -DNCCL_MINOR=28 -DNCCL_PATCH=9 -DNCCL_USE_CMAKE -DNCCL_VERSION_CODE=22809 -DPROFAPI -Dnccl_tuner_example_EXPORTS -I/builddir/build/BUILD/nccl-2.28.9-1/ext-tuner/example/nccl -O3 -DNDEBUG -fPIC -MD -MT ext-tuner/example/CMakeFiles/nccl-tuner-example.dir/plugin.c.o -MF CMakeFiles/nccl-tuner-example.dir/plugin.c.o.d -o CMakeFiles/nccl-tuner-example.dir/plugin.c.o -c /builddir/build/BUILD/nccl-2.28.9-1/ext-tuner/example/plugin.c cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/ext-net/example && /usr/bin/gcc -DCUDA_MAJOR=12 -DCUDA_MINOR=0 -DDOCA_VERBS_USE_CUDA_WRAPPER -DDOCA_VERBS_USE_NET_WRAPPER -DNCCL_GIN_PROXY_ENABLE=1 -DNCCL_MAJOR=2 -DNCCL_MINOR=28 -DNCCL_PATCH=9 -DNCCL_USE_CMAKE -DNCCL_VERSION_CODE=22809 -DPROFAPI -Dnccl_net_example_EXPORTS -I/builddir/build/BUILD/nccl-2.28.9-1/ext-net/example/nccl -O3 -DNDEBUG -fPIC -MD -MT ext-net/example/CMakeFiles/nccl-net-example.dir/plugin.c.o -MF CMakeFiles/nccl-net-example.dir/plugin.c.o.d -o CMakeFiles/nccl-net-example.dir/plugin.c.o -c /builddir/build/BUILD/nccl-2.28.9-1/ext-net/example/plugin.c [ 0%] Generating ../include/nccl.h [ 0%] Building CXX object ext-profiler/example/CMakeFiles/nccl-profiler-example.dir/plugin.cc.o cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src && /usr/bin/cmake -E make_directory /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/include cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/ext-profiler/example && /usr/bin/g++ -DCUDA_MAJOR=12 -DCUDA_MINOR=0 -DDOCA_VERBS_USE_CUDA_WRAPPER -DDOCA_VERBS_USE_NET_WRAPPER -DNCCL_GIN_PROXY_ENABLE=1 -DNCCL_MAJOR=2 -DNCCL_MINOR=28 -DNCCL_PATCH=9 -DNCCL_USE_CMAKE -DNCCL_VERSION_CODE=22809 -DPROFAPI -Dnccl_profiler_example_EXPORTS -I/builddir/build/BUILD/nccl-2.28.9-1/ext-profiler/example/nccl -I/usr/local/cuda/include -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -O3 -fPIC -MD -MT ext-profiler/example/CMakeFiles/nccl-profiler-example.dir/plugin.cc.o -MF CMakeFiles/nccl-profiler-example.dir/plugin.cc.o.d -o CMakeFiles/nccl-profiler-example.dir/plugin.cc.o -c /builddir/build/BUILD/nccl-2.28.9-1/ext-profiler/example/plugin.cc [ 1%] Building CXX object ext-profiler/example/CMakeFiles/nccl-profiler-example.dir/print_event.cc.o cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/ext-profiler/example && /usr/bin/g++ -DCUDA_MAJOR=12 -DCUDA_MINOR=0 -DDOCA_VERBS_USE_CUDA_WRAPPER -DDOCA_VERBS_USE_NET_WRAPPER -DNCCL_GIN_PROXY_ENABLE=1 -DNCCL_MAJOR=2 -DNCCL_MINOR=28 -DNCCL_PATCH=9 -DNCCL_USE_CMAKE -DNCCL_VERSION_CODE=22809 -DPROFAPI -Dnccl_profiler_example_EXPORTS -I/builddir/build/BUILD/nccl-2.28.9-1/ext-profiler/example/nccl -I/usr/local/cuda/include -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -O3 -fPIC -MD -MT ext-profiler/example/CMakeFiles/nccl-profiler-example.dir/print_event.cc.o -MF CMakeFiles/nccl-profiler-example.dir/print_event.cc.o.d -o CMakeFiles/nccl-profiler-example.dir/print_event.cc.o -c /builddir/build/BUILD/nccl-2.28.9-1/ext-profiler/example/print_event.cc cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src && sed -e s/\$\{nccl:Major\}/2/g -e s/\$\{nccl:Minor\}/28/g -e s/\$\{nccl:Patch\}/9/g -e s/\$\{nccl:Suffix\}//g -e s/\$\{nccl:Version\}/22809/g /builddir/build/BUILD/nccl-2.28.9-1/src/nccl.h.in > /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/include/nccl.h gmake[2]: Leaving directory '/builddir/build/BUILD/nccl-2.28.9-1/nccl_build' [ 1%] Built target nccl_header /usr/bin/gmake -f src/device/CMakeFiles/nccl_device.dir/build.make src/device/CMakeFiles/nccl_device.dir/depend /usr/bin/gmake -f src/CMakeFiles/ncclras.dir/build.make src/CMakeFiles/ncclras.dir/depend gmake[2]: Entering directory '/builddir/build/BUILD/nccl-2.28.9-1/nccl_build' cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build && /usr/bin/cmake -E cmake_depends "Unix Makefiles" /builddir/build/BUILD/nccl-2.28.9-1 /builddir/build/BUILD/nccl-2.28.9-1/src /builddir/build/BUILD/nccl-2.28.9-1/nccl_build /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src/CMakeFiles/ncclras.dir/DependInfo.cmake --color= gmake[2]: Entering directory '/builddir/build/BUILD/nccl-2.28.9-1/nccl_build' gmake[2]: Leaving directory '/builddir/build/BUILD/nccl-2.28.9-1/nccl_build' /usr/bin/gmake -f src/CMakeFiles/ncclras.dir/build.make src/CMakeFiles/ncclras.dir/build gmake[2]: Entering directory '/builddir/build/BUILD/nccl-2.28.9-1/nccl_build' cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build && /usr/bin/cmake -E cmake_depends "Unix Makefiles" /builddir/build/BUILD/nccl-2.28.9-1 /builddir/build/BUILD/nccl-2.28.9-1/src/device /builddir/build/BUILD/nccl-2.28.9-1/nccl_build /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src/device /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src/device/CMakeFiles/nccl_device.dir/DependInfo.cmake --color= [ 1%] Building CXX object src/CMakeFiles/ncclras.dir/ras/client.cc.o cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src && /usr/bin/g++ -DCUDA_MAJOR=12 -DCUDA_MINOR=0 -DDOCA_VERBS_USE_CUDA_WRAPPER -DDOCA_VERBS_USE_NET_WRAPPER -DNCCL_GIN_PROXY_ENABLE=1 -DNCCL_MAJOR=2 -DNCCL_MINOR=28 -DNCCL_PATCH=9 -DNCCL_USE_CMAKE -DNCCL_VERSION_CODE=22809 -DPROFAPI -I/builddir/build/BUILD/nccl-2.28.9-1/nccl_build/include -I/usr/local/cuda/include -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -O3 -fmacro-prefix-map=/builddir/build/BUILD/nccl-2.28.9-1/src/= -MD -MT src/CMakeFiles/ncclras.dir/ras/client.cc.o -MF CMakeFiles/ncclras.dir/ras/client.cc.o.d -o CMakeFiles/ncclras.dir/ras/client.cc.o -c /builddir/build/BUILD/nccl-2.28.9-1/src/ras/client.cc gmake[2]: Leaving directory '/builddir/build/BUILD/nccl-2.28.9-1/nccl_build' /usr/bin/gmake -f src/device/CMakeFiles/nccl_device.dir/build.make src/device/CMakeFiles/nccl_device.dir/build gmake[2]: Entering directory '/builddir/build/BUILD/nccl-2.28.9-1/nccl_build' [ 2%] Building CUDA object src/device/CMakeFiles/nccl_device.dir/gensrc/all_reduce_minmax_bf16.cu.o [ 2%] Building CUDA object src/device/CMakeFiles/nccl_device.dir/gensrc/all_gather.cu.o cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src/device && /usr/local/cuda/bin/nvcc -forward-unknown-to-host-compiler -DCUDA_MAJOR=12 -DCUDA_MINOR=0 -DDOCA_VERBS_USE_CUDA_WRAPPER -DDOCA_VERBS_USE_NET_WRAPPER -DNCCL_GIN_PROXY_ENABLE=1 -DNCCL_MAJOR=2 -DNCCL_MINOR=28 -DNCCL_PATCH=9 -DNCCL_USE_CMAKE -DNCCL_VERSION_CODE=22809 -DPROFAPI --options-file CMakeFiles/nccl_device.dir/includes_CUDA.rsp --expt-extended-lambda -Xptxas -maxrregcount=96 -Xfatbin -compress-all -fPIC --expt-extended-lambda -Xptxas -maxrregcount=96 -Xfatbin -compress-all -fPIC -O3 --generate-code=arch=compute_70,code=[compute_70,sm_70] --generate-code=arch=compute_75,code=[compute_75,sm_75] --generate-code=arch=compute_80,code=[compute_80,sm_80] --generate-code=arch=compute_86,code=[compute_86,sm_86] --generate-code=arch=compute_87,code=[compute_87,sm_87] --generate-code=arch=compute_89,code=[compute_89,sm_89] --generate-code=arch=compute_90,code=[compute_90,sm_90] -MD -MT src/device/CMakeFiles/nccl_device.dir/gensrc/all_gather.cu.o -MF CMakeFiles/nccl_device.dir/gensrc/all_gather.cu.o.d -x cu -rdc=true -c /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src/device/gensrc/all_gather.cu -o CMakeFiles/nccl_device.dir/gensrc/all_gather.cu.o cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src/device && /usr/local/cuda/bin/nvcc -forward-unknown-to-host-compiler -DCUDA_MAJOR=12 -DCUDA_MINOR=0 -DDOCA_VERBS_USE_CUDA_WRAPPER -DDOCA_VERBS_USE_NET_WRAPPER -DNCCL_GIN_PROXY_ENABLE=1 -DNCCL_MAJOR=2 -DNCCL_MINOR=28 -DNCCL_PATCH=9 -DNCCL_USE_CMAKE -DNCCL_VERSION_CODE=22809 -DPROFAPI --options-file CMakeFiles/nccl_device.dir/includes_CUDA.rsp --expt-extended-lambda -Xptxas -maxrregcount=96 -Xfatbin -compress-all -fPIC --expt-extended-lambda -Xptxas -maxrregcount=96 -Xfatbin -compress-all -fPIC -O3 --generate-code=arch=compute_70,code=[compute_70,sm_70] --generate-code=arch=compute_75,code=[compute_75,sm_75] --generate-code=arch=compute_80,code=[compute_80,sm_80] --generate-code=arch=compute_86,code=[compute_86,sm_86] --generate-code=arch=compute_87,code=[compute_87,sm_87] --generate-code=arch=compute_89,code=[compute_89,sm_89] --generate-code=arch=compute_90,code=[compute_90,sm_90] -MD -MT src/device/CMakeFiles/nccl_device.dir/gensrc/all_reduce_minmax_bf16.cu.o -MF CMakeFiles/nccl_device.dir/gensrc/all_reduce_minmax_bf16.cu.o.d -x cu -rdc=true -c /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src/device/gensrc/all_reduce_minmax_bf16.cu -o CMakeFiles/nccl_device.dir/gensrc/all_reduce_minmax_bf16.cu.o [ 2%] Building CUDA object src/device/CMakeFiles/nccl_device.dir/gensrc/all_reduce_minmax_f8e5m2.cu.o [ 2%] Building CUDA object src/device/CMakeFiles/nccl_device.dir/gensrc/all_reduce_minmax_f8e4m3.cu.o [ 2%] Building CUDA object src/device/CMakeFiles/nccl_device.dir/gensrc/all_reduce_minmax_f16.cu.o [ 2%] Building CUDA object src/device/CMakeFiles/nccl_device.dir/gensrc/all_reduce_minmax_f32.cu.o [ 2%] Building CUDA object src/device/CMakeFiles/nccl_device.dir/gensrc/all_reduce_premulsum_f8e4m3.cu.o [ 4%] Building CUDA object src/device/CMakeFiles/nccl_device.dir/gensrc/all_reduce_minmax_i64.cu.o [ 4%] Building CUDA object src/device/CMakeFiles/nccl_device.dir/gensrc/all_reduce_minmax_f64.cu.o [ 4%] Building CUDA object src/device/CMakeFiles/nccl_device.dir/gensrc/all_reduce_minmax_u64.cu.o [ 4%] Building CUDA object src/device/CMakeFiles/nccl_device.dir/gensrc/all_reduce_minmax_u8.cu.o [ 4%] Building CUDA object src/device/CMakeFiles/nccl_device.dir/gensrc/all_reduce_minmax_i32.cu.o [ 4%] Building CUDA object src/device/CMakeFiles/nccl_device.dir/gensrc/all_reduce_premulsum_f16.cu.o [ 4%] Building CUDA object src/device/CMakeFiles/nccl_device.dir/gensrc/all_reduce_minmax_u32.cu.o cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src/device && /usr/local/cuda/bin/nvcc -forward-unknown-to-host-compiler -DCUDA_MAJOR=12 -DCUDA_MINOR=0 -DDOCA_VERBS_USE_CUDA_WRAPPER -DDOCA_VERBS_USE_NET_WRAPPER -DNCCL_GIN_PROXY_ENABLE=1 -DNCCL_MAJOR=2 -DNCCL_MINOR=28 -DNCCL_PATCH=9 -DNCCL_USE_CMAKE -DNCCL_VERSION_CODE=22809 -DPROFAPI --options-file CMakeFiles/nccl_device.dir/includes_CUDA.rsp --expt-extended-lambda -Xptxas -maxrregcount=96 -Xfatbin -compress-all -fPIC --expt-extended-lambda -Xptxas -maxrregcount=96 -Xfatbin -compress-all -fPIC -O3 --generate-code=arch=compute_70,code=[compute_70,sm_70] --generate-code=arch=compute_75,code=[compute_75,sm_75] --generate-code=arch=compute_80,code=[compute_80,sm_80] --generate-code=arch=compute_86,code=[compute_86,sm_86] --generate-code=arch=compute_87,code=[compute_87,sm_87] --generate-code=arch=compute_89,code=[compute_89,sm_89] --generate-code=arch=compute_90,code=[compute_90,sm_90] -MD -MT src/device/CMakeFiles/nccl_device.dir/gensrc/all_reduce_minmax_f16.cu.o -MF CMakeFiles/nccl_device.dir/gensrc/all_reduce_minmax_f16.cu.o.d -x cu -rdc=true -c /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src/device/gensrc/all_reduce_minmax_f16.cu -o CMakeFiles/nccl_device.dir/gensrc/all_reduce_minmax_f16.cu.o [ 5%] Building CUDA object src/device/CMakeFiles/nccl_device.dir/gensrc/all_reduce_premulsum_f32.cu.o [ 5%] Building CUDA object src/device/CMakeFiles/nccl_device.dir/gensrc/all_reduce_premulsum_bf16.cu.o [ 5%] Building CUDA object src/device/CMakeFiles/nccl_device.dir/gensrc/all_reduce_premulsum_u32.cu.o cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src/device && /usr/local/cuda/bin/nvcc -forward-unknown-to-host-compiler -DCUDA_MAJOR=12 -DCUDA_MINOR=0 -DDOCA_VERBS_USE_CUDA_WRAPPER -DDOCA_VERBS_USE_NET_WRAPPER -DNCCL_GIN_PROXY_ENABLE=1 -DNCCL_MAJOR=2 -DNCCL_MINOR=28 -DNCCL_PATCH=9 -DNCCL_USE_CMAKE -DNCCL_VERSION_CODE=22809 -DPROFAPI --options-file CMakeFiles/nccl_device.dir/includes_CUDA.rsp --expt-extended-lambda -Xptxas -maxrregcount=96 -Xfatbin -compress-all -fPIC --expt-extended-lambda -Xptxas -maxrregcount=96 -Xfatbin -compress-all -fPIC -O3 --generate-code=arch=compute_70,code=[compute_70,sm_70] --generate-code=arch=compute_75,code=[compute_75,sm_75] --generate-code=arch=compute_80,code=[compute_80,sm_80] --generate-code=arch=compute_86,code=[compute_86,sm_86] --generate-code=arch=compute_87,code=[compute_87,sm_87] --generate-code=arch=compute_89,code=[compute_89,sm_89] --generate-code=arch=compute_90,code=[compute_90,sm_90] -MD -MT src/device/CMakeFiles/nccl_device.dir/gensrc/all_reduce_minmax_f32.cu.o -MF CMakeFiles/nccl_device.dir/gensrc/all_reduce_minmax_f32.cu.o.d -x cu -rdc=true -c /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src/device/gensrc/all_reduce_minmax_f32.cu -o CMakeFiles/nccl_device.dir/gensrc/all_reduce_minmax_f32.cu.o cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src/device && /usr/local/cuda/bin/nvcc -forward-unknown-to-host-compiler -DCUDA_MAJOR=12 -DCUDA_MINOR=0 -DDOCA_VERBS_USE_CUDA_WRAPPER -DDOCA_VERBS_USE_NET_WRAPPER -DNCCL_GIN_PROXY_ENABLE=1 -DNCCL_MAJOR=2 -DNCCL_MINOR=28 -DNCCL_PATCH=9 -DNCCL_USE_CMAKE -DNCCL_VERSION_CODE=22809 -DPROFAPI --options-file CMakeFiles/nccl_device.dir/includes_CUDA.rsp --expt-extended-lambda -Xptxas -maxrregcount=96 -Xfatbin -compress-all -fPIC --expt-extended-lambda -Xptxas -maxrregcount=96 -Xfatbin -compress-all -fPIC -O3 --generate-code=arch=compute_70,code=[compute_70,sm_70] --generate-code=arch=compute_75,code=[compute_75,sm_75] --generate-code=arch=compute_80,code=[compute_80,sm_80] --generate-code=arch=compute_86,code=[compute_86,sm_86] --generate-code=arch=compute_87,code=[compute_87,sm_87] --generate-code=arch=compute_89,code=[compute_89,sm_89] --generate-code=arch=compute_90,code=[compute_90,sm_90] -MD -MT src/device/CMakeFiles/nccl_device.dir/gensrc/all_reduce_minmax_f64.cu.o -MF CMakeFiles/nccl_device.dir/gensrc/all_reduce_minmax_f64.cu.o.d -x cu -rdc=true -c /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src/device/gensrc/all_reduce_minmax_f64.cu -o CMakeFiles/nccl_device.dir/gensrc/all_reduce_minmax_f64.cu.o [ 6%] Building CUDA object src/device/CMakeFiles/nccl_device.dir/gensrc/all_reduce_premulsum_f64.cu.o cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src/device && /usr/local/cuda/bin/nvcc -forward-unknown-to-host-compiler -DCUDA_MAJOR=12 -DCUDA_MINOR=0 -DDOCA_VERBS_USE_CUDA_WRAPPER -DDOCA_VERBS_USE_NET_WRAPPER -DNCCL_GIN_PROXY_ENABLE=1 -DNCCL_MAJOR=2 -DNCCL_MINOR=28 -DNCCL_PATCH=9 -DNCCL_USE_CMAKE -DNCCL_VERSION_CODE=22809 -DPROFAPI --options-file CMakeFiles/nccl_device.dir/includes_CUDA.rsp --expt-extended-lambda -Xptxas -maxrregcount=96 -Xfatbin -compress-all -fPIC --expt-extended-lambda -Xptxas -maxrregcount=96 -Xfatbin -compress-all -fPIC -O3 --generate-code=arch=compute_70,code=[compute_70,sm_70] --generate-code=arch=compute_75,code=[compute_75,sm_75] --generate-code=arch=compute_80,code=[compute_80,sm_80] --generate-code=arch=compute_86,code=[compute_86,sm_86] --generate-code=arch=compute_87,code=[compute_87,sm_87] --generate-code=arch=compute_89,code=[compute_89,sm_89] --generate-code=arch=compute_90,code=[compute_90,sm_90] -MD -MT src/device/CMakeFiles/nccl_device.dir/gensrc/all_reduce_minmax_f8e4m3.cu.o -MF CMakeFiles/nccl_device.dir/gensrc/all_reduce_minmax_f8e4m3.cu.o.d -x cu -rdc=true -c /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src/device/gensrc/all_reduce_minmax_f8e4m3.cu -o CMakeFiles/nccl_device.dir/gensrc/all_reduce_minmax_f8e4m3.cu.o [ 7%] Building CUDA object src/device/CMakeFiles/nccl_device.dir/gensrc/all_reduce_premulsum_u64.cu.o cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src/device && /usr/local/cuda/bin/nvcc -forward-unknown-to-host-compiler -DCUDA_MAJOR=12 -DCUDA_MINOR=0 -DDOCA_VERBS_USE_CUDA_WRAPPER -DDOCA_VERBS_USE_NET_WRAPPER -DNCCL_GIN_PROXY_ENABLE=1 -DNCCL_MAJOR=2 -DNCCL_MINOR=28 -DNCCL_PATCH=9 -DNCCL_USE_CMAKE -DNCCL_VERSION_CODE=22809 -DPROFAPI --options-file CMakeFiles/nccl_device.dir/includes_CUDA.rsp --expt-extended-lambda -Xptxas -maxrregcount=96 -Xfatbin -compress-all -fPIC --expt-extended-lambda -Xptxas -maxrregcount=96 -Xfatbin -compress-all -fPIC -O3 --generate-code=arch=compute_70,code=[compute_70,sm_70] --generate-code=arch=compute_75,code=[compute_75,sm_75] --generate-code=arch=compute_80,code=[compute_80,sm_80] --generate-code=arch=compute_86,code=[compute_86,sm_86] --generate-code=arch=compute_87,code=[compute_87,sm_87] --generate-code=arch=compute_89,code=[compute_89,sm_89] --generate-code=arch=compute_90,code=[compute_90,sm_90] -MD -MT src/device/CMakeFiles/nccl_device.dir/gensrc/all_reduce_minmax_f8e5m2.cu.o -MF CMakeFiles/nccl_device.dir/gensrc/all_reduce_minmax_f8e5m2.cu.o.d -x cu -rdc=true -c /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src/device/gensrc/all_reduce_minmax_f8e5m2.cu -o CMakeFiles/nccl_device.dir/gensrc/all_reduce_minmax_f8e5m2.cu.o cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src/device && /usr/local/cuda/bin/nvcc -forward-unknown-to-host-compiler -DCUDA_MAJOR=12 -DCUDA_MINOR=0 -DDOCA_VERBS_USE_CUDA_WRAPPER -DDOCA_VERBS_USE_NET_WRAPPER -DNCCL_GIN_PROXY_ENABLE=1 -DNCCL_MAJOR=2 -DNCCL_MINOR=28 -DNCCL_PATCH=9 -DNCCL_USE_CMAKE -DNCCL_VERSION_CODE=22809 -DPROFAPI --options-file CMakeFiles/nccl_device.dir/includes_CUDA.rsp --expt-extended-lambda -Xptxas -maxrregcount=96 -Xfatbin -compress-all -fPIC --expt-extended-lambda -Xptxas -maxrregcount=96 -Xfatbin -compress-all -fPIC -O3 --generate-code=arch=compute_70,code=[compute_70,sm_70] --generate-code=arch=compute_75,code=[compute_75,sm_75] --generate-code=arch=compute_80,code=[compute_80,sm_80] --generate-code=arch=compute_86,code=[compute_86,sm_86] --generate-code=arch=compute_87,code=[compute_87,sm_87] --generate-code=arch=compute_89,code=[compute_89,sm_89] --generate-code=arch=compute_90,code=[compute_90,sm_90] -MD -MT src/device/CMakeFiles/nccl_device.dir/gensrc/all_reduce_minmax_i32.cu.o -MF CMakeFiles/nccl_device.dir/gensrc/all_reduce_minmax_i32.cu.o.d -x cu -rdc=true -c /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src/device/gensrc/all_reduce_minmax_i32.cu -o CMakeFiles/nccl_device.dir/gensrc/all_reduce_minmax_i32.cu.o cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src/device && /usr/local/cuda/bin/nvcc -forward-unknown-to-host-compiler -DCUDA_MAJOR=12 -DCUDA_MINOR=0 -DDOCA_VERBS_USE_CUDA_WRAPPER -DDOCA_VERBS_USE_NET_WRAPPER -DNCCL_GIN_PROXY_ENABLE=1 -DNCCL_MAJOR=2 -DNCCL_MINOR=28 -DNCCL_PATCH=9 -DNCCL_USE_CMAKE -DNCCL_VERSION_CODE=22809 -DPROFAPI --options-file CMakeFiles/nccl_device.dir/includes_CUDA.rsp --expt-extended-lambda -Xptxas -maxrregcount=96 -Xfatbin -compress-all -fPIC --expt-extended-lambda -Xptxas -maxrregcount=96 -Xfatbin -compress-all -fPIC -O3 --generate-code=arch=compute_70,code=[compute_70,sm_70] --generate-code=arch=compute_75,code=[compute_75,sm_75] --generate-code=arch=compute_80,code=[compute_80,sm_80] --generate-code=arch=compute_86,code=[compute_86,sm_86] --generate-code=arch=compute_87,code=[compute_87,sm_87] --generate-code=arch=compute_89,code=[compute_89,sm_89] --generate-code=arch=compute_90,code=[compute_90,sm_90] -MD -MT src/device/CMakeFiles/nccl_device.dir/gensrc/all_reduce_minmax_i64.cu.o -MF CMakeFiles/nccl_device.dir/gensrc/all_reduce_minmax_i64.cu.o.d -x cu -rdc=true -c /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src/device/gensrc/all_reduce_minmax_i64.cu -o CMakeFiles/nccl_device.dir/gensrc/all_reduce_minmax_i64.cu.o [ 7%] Building CUDA object src/device/CMakeFiles/nccl_device.dir/gensrc/all_reduce_premulsum_u8.cu.o [ 7%] Building CUDA object src/device/CMakeFiles/nccl_device.dir/gensrc/all_reduce_premulsum_f8e5m2.cu.o cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src/device && /usr/local/cuda/bin/nvcc -forward-unknown-to-host-compiler -DCUDA_MAJOR=12 -DCUDA_MINOR=0 -DDOCA_VERBS_USE_CUDA_WRAPPER -DDOCA_VERBS_USE_NET_WRAPPER -DNCCL_GIN_PROXY_ENABLE=1 -DNCCL_MAJOR=2 -DNCCL_MINOR=28 -DNCCL_PATCH=9 -DNCCL_USE_CMAKE -DNCCL_VERSION_CODE=22809 -DPROFAPI --options-file CMakeFiles/nccl_device.dir/includes_CUDA.rsp --expt-extended-lambda -Xptxas -maxrregcount=96 -Xfatbin -compress-all -fPIC --expt-extended-lambda -Xptxas -maxrregcount=96 -Xfatbin -compress-all -fPIC -O3 --generate-code=arch=compute_70,code=[compute_70,sm_70] --generate-code=arch=compute_75,code=[compute_75,sm_75] --generate-code=arch=compute_80,code=[compute_80,sm_80] --generate-code=arch=compute_86,code=[compute_86,sm_86] --generate-code=arch=compute_87,code=[compute_87,sm_87] --generate-code=arch=compute_89,code=[compute_89,sm_89] --generate-code=arch=compute_90,code=[compute_90,sm_90] -MD -MT src/device/CMakeFiles/nccl_device.dir/gensrc/all_reduce_minmax_u32.cu.o -MF CMakeFiles/nccl_device.dir/gensrc/all_reduce_minmax_u32.cu.o.d -x cu -rdc=true -c /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src/device/gensrc/all_reduce_minmax_u32.cu -o CMakeFiles/nccl_device.dir/gensrc/all_reduce_minmax_u32.cu.o [ 7%] Building CUDA object src/device/CMakeFiles/nccl_device.dir/gensrc/all_reduce_prod_bf16.cu.o cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src/device && /usr/local/cuda/bin/nvcc -forward-unknown-to-host-compiler -DCUDA_MAJOR=12 -DCUDA_MINOR=0 -DDOCA_VERBS_USE_CUDA_WRAPPER -DDOCA_VERBS_USE_NET_WRAPPER -DNCCL_GIN_PROXY_ENABLE=1 -DNCCL_MAJOR=2 -DNCCL_MINOR=28 -DNCCL_PATCH=9 -DNCCL_USE_CMAKE -DNCCL_VERSION_CODE=22809 -DPROFAPI --options-file CMakeFiles/nccl_device.dir/includes_CUDA.rsp --expt-extended-lambda -Xptxas -maxrregcount=96 -Xfatbin -compress-all -fPIC --expt-extended-lambda -Xptxas -maxrregcount=96 -Xfatbin -compress-all -fPIC -O3 --generate-code=arch=compute_70,code=[compute_70,sm_70] --generate-code=arch=compute_75,code=[compute_75,sm_75] --generate-code=arch=compute_80,code=[compute_80,sm_80] --generate-code=arch=compute_86,code=[compute_86,sm_86] --generate-code=arch=compute_87,code=[compute_87,sm_87] --generate-code=arch=compute_89,code=[compute_89,sm_89] --generate-code=arch=compute_90,code=[compute_90,sm_90] -MD -MT src/device/CMakeFiles/nccl_device.dir/gensrc/all_reduce_minmax_u64.cu.o -MF CMakeFiles/nccl_device.dir/gensrc/all_reduce_minmax_u64.cu.o.d -x cu -rdc=true -c /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src/device/gensrc/all_reduce_minmax_u64.cu -o CMakeFiles/nccl_device.dir/gensrc/all_reduce_minmax_u64.cu.o cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src/device && /usr/local/cuda/bin/nvcc -forward-unknown-to-host-compiler -DCUDA_MAJOR=12 -DCUDA_MINOR=0 -DDOCA_VERBS_USE_CUDA_WRAPPER -DDOCA_VERBS_USE_NET_WRAPPER -DNCCL_GIN_PROXY_ENABLE=1 -DNCCL_MAJOR=2 -DNCCL_MINOR=28 -DNCCL_PATCH=9 -DNCCL_USE_CMAKE -DNCCL_VERSION_CODE=22809 -DPROFAPI --options-file CMakeFiles/nccl_device.dir/includes_CUDA.rsp --expt-extended-lambda -Xptxas -maxrregcount=96 -Xfatbin -compress-all -fPIC --expt-extended-lambda -Xptxas -maxrregcount=96 -Xfatbin -compress-all -fPIC -O3 --generate-code=arch=compute_70,code=[compute_70,sm_70] --generate-code=arch=compute_75,code=[compute_75,sm_75] --generate-code=arch=compute_80,code=[compute_80,sm_80] --generate-code=arch=compute_86,code=[compute_86,sm_86] --generate-code=arch=compute_87,code=[compute_87,sm_87] --generate-code=arch=compute_89,code=[compute_89,sm_89] --generate-code=arch=compute_90,code=[compute_90,sm_90] -MD -MT src/device/CMakeFiles/nccl_device.dir/gensrc/all_reduce_minmax_u8.cu.o -MF CMakeFiles/nccl_device.dir/gensrc/all_reduce_minmax_u8.cu.o.d -x cu -rdc=true -c /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src/device/gensrc/all_reduce_minmax_u8.cu -o CMakeFiles/nccl_device.dir/gensrc/all_reduce_minmax_u8.cu.o cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src/device && /usr/local/cuda/bin/nvcc -forward-unknown-to-host-compiler -DCUDA_MAJOR=12 -DCUDA_MINOR=0 -DDOCA_VERBS_USE_CUDA_WRAPPER -DDOCA_VERBS_USE_NET_WRAPPER -DNCCL_GIN_PROXY_ENABLE=1 -DNCCL_MAJOR=2 -DNCCL_MINOR=28 -DNCCL_PATCH=9 -DNCCL_USE_CMAKE -DNCCL_VERSION_CODE=22809 -DPROFAPI --options-file CMakeFiles/nccl_device.dir/includes_CUDA.rsp --expt-extended-lambda -Xptxas -maxrregcount=96 -Xfatbin -compress-all -fPIC --expt-extended-lambda -Xptxas -maxrregcount=96 -Xfatbin -compress-all -fPIC -O3 --generate-code=arch=compute_70,code=[compute_70,sm_70] --generate-code=arch=compute_75,code=[compute_75,sm_75] --generate-code=arch=compute_80,code=[compute_80,sm_80] --generate-code=arch=compute_86,code=[compute_86,sm_86] --generate-code=arch=compute_87,code=[compute_87,sm_87] --generate-code=arch=compute_89,code=[compute_89,sm_89] --generate-code=arch=compute_90,code=[compute_90,sm_90] -MD -MT src/device/CMakeFiles/nccl_device.dir/gensrc/all_reduce_premulsum_bf16.cu.o -MF CMakeFiles/nccl_device.dir/gensrc/all_reduce_premulsum_bf16.cu.o.d -x cu -rdc=true -c /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src/device/gensrc/all_reduce_premulsum_bf16.cu -o CMakeFiles/nccl_device.dir/gensrc/all_reduce_premulsum_bf16.cu.o [ 8%] Building CUDA object src/device/CMakeFiles/nccl_device.dir/gensrc/all_reduce_prod_f16.cu.o cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src/device && /usr/local/cuda/bin/nvcc -forward-unknown-to-host-compiler -DCUDA_MAJOR=12 -DCUDA_MINOR=0 -DDOCA_VERBS_USE_CUDA_WRAPPER -DDOCA_VERBS_USE_NET_WRAPPER -DNCCL_GIN_PROXY_ENABLE=1 -DNCCL_MAJOR=2 -DNCCL_MINOR=28 -DNCCL_PATCH=9 -DNCCL_USE_CMAKE -DNCCL_VERSION_CODE=22809 -DPROFAPI --options-file CMakeFiles/nccl_device.dir/includes_CUDA.rsp --expt-extended-lambda -Xptxas -maxrregcount=96 -Xfatbin -compress-all -fPIC --expt-extended-lambda -Xptxas -maxrregcount=96 -Xfatbin -compress-all -fPIC -O3 --generate-code=arch=compute_70,code=[compute_70,sm_70] --generate-code=arch=compute_75,code=[compute_75,sm_75] --generate-code=arch=compute_80,code=[compute_80,sm_80] --generate-code=arch=compute_86,code=[compute_86,sm_86] --generate-code=arch=compute_87,code=[compute_87,sm_87] --generate-code=arch=compute_89,code=[compute_89,sm_89] --generate-code=arch=compute_90,code=[compute_90,sm_90] -MD -MT src/device/CMakeFiles/nccl_device.dir/gensrc/all_reduce_premulsum_f16.cu.o -MF CMakeFiles/nccl_device.dir/gensrc/all_reduce_premulsum_f16.cu.o.d -x cu -rdc=true -c /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src/device/gensrc/all_reduce_premulsum_f16.cu -o CMakeFiles/nccl_device.dir/gensrc/all_reduce_premulsum_f16.cu.o cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src/device && /usr/local/cuda/bin/nvcc -forward-unknown-to-host-compiler -DCUDA_MAJOR=12 -DCUDA_MINOR=0 -DDOCA_VERBS_USE_CUDA_WRAPPER -DDOCA_VERBS_USE_NET_WRAPPER -DNCCL_GIN_PROXY_ENABLE=1 -DNCCL_MAJOR=2 -DNCCL_MINOR=28 -DNCCL_PATCH=9 -DNCCL_USE_CMAKE -DNCCL_VERSION_CODE=22809 -DPROFAPI --options-file CMakeFiles/nccl_device.dir/includes_CUDA.rsp --expt-extended-lambda -Xptxas -maxrregcount=96 -Xfatbin -compress-all -fPIC --expt-extended-lambda -Xptxas -maxrregcount=96 -Xfatbin -compress-all -fPIC -O3 --generate-code=arch=compute_70,code=[compute_70,sm_70] --generate-code=arch=compute_75,code=[compute_75,sm_75] --generate-code=arch=compute_80,code=[compute_80,sm_80] --generate-code=arch=compute_86,code=[compute_86,sm_86] --generate-code=arch=compute_87,code=[compute_87,sm_87] --generate-code=arch=compute_89,code=[compute_89,sm_89] --generate-code=arch=compute_90,code=[compute_90,sm_90] -MD -MT src/device/CMakeFiles/nccl_device.dir/gensrc/all_reduce_premulsum_f32.cu.o -MF CMakeFiles/nccl_device.dir/gensrc/all_reduce_premulsum_f32.cu.o.d -x cu -rdc=true -c /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src/device/gensrc/all_reduce_premulsum_f32.cu -o CMakeFiles/nccl_device.dir/gensrc/all_reduce_premulsum_f32.cu.o cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src/device && /usr/local/cuda/bin/nvcc -forward-unknown-to-host-compiler -DCUDA_MAJOR=12 -DCUDA_MINOR=0 -DDOCA_VERBS_USE_CUDA_WRAPPER -DDOCA_VERBS_USE_NET_WRAPPER -DNCCL_GIN_PROXY_ENABLE=1 -DNCCL_MAJOR=2 -DNCCL_MINOR=28 -DNCCL_PATCH=9 -DNCCL_USE_CMAKE -DNCCL_VERSION_CODE=22809 -DPROFAPI --options-file CMakeFiles/nccl_device.dir/includes_CUDA.rsp --expt-extended-lambda -Xptxas -maxrregcount=96 -Xfatbin -compress-all -fPIC --expt-extended-lambda -Xptxas -maxrregcount=96 -Xfatbin -compress-all -fPIC -O3 --generate-code=arch=compute_70,code=[compute_70,sm_70] --generate-code=arch=compute_75,code=[compute_75,sm_75] --generate-code=arch=compute_80,code=[compute_80,sm_80] --generate-code=arch=compute_86,code=[compute_86,sm_86] --generate-code=arch=compute_87,code=[compute_87,sm_87] --generate-code=arch=compute_89,code=[compute_89,sm_89] --generate-code=arch=compute_90,code=[compute_90,sm_90] -MD -MT src/device/CMakeFiles/nccl_device.dir/gensrc/all_reduce_premulsum_f64.cu.o -MF CMakeFiles/nccl_device.dir/gensrc/all_reduce_premulsum_f64.cu.o.d -x cu -rdc=true -c /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src/device/gensrc/all_reduce_premulsum_f64.cu -o CMakeFiles/nccl_device.dir/gensrc/all_reduce_premulsum_f64.cu.o [ 8%] Building CUDA object src/device/CMakeFiles/nccl_device.dir/gensrc/all_reduce_prod_f32.cu.o cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src/device && /usr/local/cuda/bin/nvcc -forward-unknown-to-host-compiler -DCUDA_MAJOR=12 -DCUDA_MINOR=0 -DDOCA_VERBS_USE_CUDA_WRAPPER -DDOCA_VERBS_USE_NET_WRAPPER -DNCCL_GIN_PROXY_ENABLE=1 -DNCCL_MAJOR=2 -DNCCL_MINOR=28 -DNCCL_PATCH=9 -DNCCL_USE_CMAKE -DNCCL_VERSION_CODE=22809 -DPROFAPI --options-file CMakeFiles/nccl_device.dir/includes_CUDA.rsp --expt-extended-lambda -Xptxas -maxrregcount=96 -Xfatbin -compress-all -fPIC --expt-extended-lambda -Xptxas -maxrregcount=96 -Xfatbin -compress-all -fPIC -O3 --generate-code=arch=compute_70,code=[compute_70,sm_70] --generate-code=arch=compute_75,code=[compute_75,sm_75] --generate-code=arch=compute_80,code=[compute_80,sm_80] --generate-code=arch=compute_86,code=[compute_86,sm_86] --generate-code=arch=compute_87,code=[compute_87,sm_87] --generate-code=arch=compute_89,code=[compute_89,sm_89] --generate-code=arch=compute_90,code=[compute_90,sm_90] -MD -MT src/device/CMakeFiles/nccl_device.dir/gensrc/all_reduce_premulsum_f8e4m3.cu.o -MF CMakeFiles/nccl_device.dir/gensrc/all_reduce_premulsum_f8e4m3.cu.o.d -x cu -rdc=true -c /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src/device/gensrc/all_reduce_premulsum_f8e4m3.cu -o CMakeFiles/nccl_device.dir/gensrc/all_reduce_premulsum_f8e4m3.cu.o [ 8%] Building CUDA object src/device/CMakeFiles/nccl_device.dir/gensrc/all_reduce_prod_f8e4m3.cu.o [ 8%] Building CUDA object src/device/CMakeFiles/nccl_device.dir/gensrc/all_reduce_prod_f64.cu.o cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src/device && /usr/local/cuda/bin/nvcc -forward-unknown-to-host-compiler -DCUDA_MAJOR=12 -DCUDA_MINOR=0 -DDOCA_VERBS_USE_CUDA_WRAPPER -DDOCA_VERBS_USE_NET_WRAPPER -DNCCL_GIN_PROXY_ENABLE=1 -DNCCL_MAJOR=2 -DNCCL_MINOR=28 -DNCCL_PATCH=9 -DNCCL_USE_CMAKE -DNCCL_VERSION_CODE=22809 -DPROFAPI --options-file CMakeFiles/nccl_device.dir/includes_CUDA.rsp --expt-extended-lambda -Xptxas -maxrregcount=96 -Xfatbin -compress-all -fPIC --expt-extended-lambda -Xptxas -maxrregcount=96 -Xfatbin -compress-all -fPIC -O3 --generate-code=arch=compute_70,code=[compute_70,sm_70] --generate-code=arch=compute_75,code=[compute_75,sm_75] --generate-code=arch=compute_80,code=[compute_80,sm_80] --generate-code=arch=compute_86,code=[compute_86,sm_86] --generate-code=arch=compute_87,code=[compute_87,sm_87] --generate-code=arch=compute_89,code=[compute_89,sm_89] --generate-code=arch=compute_90,code=[compute_90,sm_90] -MD -MT src/device/CMakeFiles/nccl_device.dir/gensrc/all_reduce_premulsum_f8e5m2.cu.o -MF CMakeFiles/nccl_device.dir/gensrc/all_reduce_premulsum_f8e5m2.cu.o.d -x cu -rdc=true -c /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src/device/gensrc/all_reduce_premulsum_f8e5m2.cu -o CMakeFiles/nccl_device.dir/gensrc/all_reduce_premulsum_f8e5m2.cu.o cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src/device && /usr/local/cuda/bin/nvcc -forward-unknown-to-host-compiler -DCUDA_MAJOR=12 -DCUDA_MINOR=0 -DDOCA_VERBS_USE_CUDA_WRAPPER -DDOCA_VERBS_USE_NET_WRAPPER -DNCCL_GIN_PROXY_ENABLE=1 -DNCCL_MAJOR=2 -DNCCL_MINOR=28 -DNCCL_PATCH=9 -DNCCL_USE_CMAKE -DNCCL_VERSION_CODE=22809 -DPROFAPI --options-file CMakeFiles/nccl_device.dir/includes_CUDA.rsp --expt-extended-lambda -Xptxas -maxrregcount=96 -Xfatbin -compress-all -fPIC --expt-extended-lambda -Xptxas -maxrregcount=96 -Xfatbin -compress-all -fPIC -O3 --generate-code=arch=compute_70,code=[compute_70,sm_70] --generate-code=arch=compute_75,code=[compute_75,sm_75] --generate-code=arch=compute_80,code=[compute_80,sm_80] --generate-code=arch=compute_86,code=[compute_86,sm_86] --generate-code=arch=compute_87,code=[compute_87,sm_87] --generate-code=arch=compute_89,code=[compute_89,sm_89] --generate-code=arch=compute_90,code=[compute_90,sm_90] -MD -MT src/device/CMakeFiles/nccl_device.dir/gensrc/all_reduce_premulsum_u32.cu.o -MF CMakeFiles/nccl_device.dir/gensrc/all_reduce_premulsum_u32.cu.o.d -x cu -rdc=true -c /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src/device/gensrc/all_reduce_premulsum_u32.cu -o CMakeFiles/nccl_device.dir/gensrc/all_reduce_premulsum_u32.cu.o cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src/device && /usr/local/cuda/bin/nvcc -forward-unknown-to-host-compiler -DCUDA_MAJOR=12 -DCUDA_MINOR=0 -DDOCA_VERBS_USE_CUDA_WRAPPER -DDOCA_VERBS_USE_NET_WRAPPER -DNCCL_GIN_PROXY_ENABLE=1 -DNCCL_MAJOR=2 -DNCCL_MINOR=28 -DNCCL_PATCH=9 -DNCCL_USE_CMAKE -DNCCL_VERSION_CODE=22809 -DPROFAPI --options-file CMakeFiles/nccl_device.dir/includes_CUDA.rsp --expt-extended-lambda -Xptxas -maxrregcount=96 -Xfatbin -compress-all -fPIC --expt-extended-lambda -Xptxas -maxrregcount=96 -Xfatbin -compress-all -fPIC -O3 --generate-code=arch=compute_70,code=[compute_70,sm_70] --generate-code=arch=compute_75,code=[compute_75,sm_75] --generate-code=arch=compute_80,code=[compute_80,sm_80] --generate-code=arch=compute_86,code=[compute_86,sm_86] --generate-code=arch=compute_87,code=[compute_87,sm_87] --generate-code=arch=compute_89,code=[compute_89,sm_89] --generate-code=arch=compute_90,code=[compute_90,sm_90] -MD -MT src/device/CMakeFiles/nccl_device.dir/gensrc/all_reduce_premulsum_u64.cu.o -MF CMakeFiles/nccl_device.dir/gensrc/all_reduce_premulsum_u64.cu.o.d -x cu -rdc=true -c /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src/device/gensrc/all_reduce_premulsum_u64.cu -o CMakeFiles/nccl_device.dir/gensrc/all_reduce_premulsum_u64.cu.o cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src/device && /usr/local/cuda/bin/nvcc -forward-unknown-to-host-compiler -DCUDA_MAJOR=12 -DCUDA_MINOR=0 -DDOCA_VERBS_USE_CUDA_WRAPPER -DDOCA_VERBS_USE_NET_WRAPPER -DNCCL_GIN_PROXY_ENABLE=1 -DNCCL_MAJOR=2 -DNCCL_MINOR=28 -DNCCL_PATCH=9 -DNCCL_USE_CMAKE -DNCCL_VERSION_CODE=22809 -DPROFAPI --options-file CMakeFiles/nccl_device.dir/includes_CUDA.rsp --expt-extended-lambda -Xptxas -maxrregcount=96 -Xfatbin -compress-all -fPIC --expt-extended-lambda -Xptxas -maxrregcount=96 -Xfatbin -compress-all -fPIC -O3 --generate-code=arch=compute_70,code=[compute_70,sm_70] --generate-code=arch=compute_75,code=[compute_75,sm_75] --generate-code=arch=compute_80,code=[compute_80,sm_80] --generate-code=arch=compute_86,code=[compute_86,sm_86] --generate-code=arch=compute_87,code=[compute_87,sm_87] --generate-code=arch=compute_89,code=[compute_89,sm_89] --generate-code=arch=compute_90,code=[compute_90,sm_90] -MD -MT src/device/CMakeFiles/nccl_device.dir/gensrc/all_reduce_premulsum_u8.cu.o -MF CMakeFiles/nccl_device.dir/gensrc/all_reduce_premulsum_u8.cu.o.d -x cu -rdc=true -c /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src/device/gensrc/all_reduce_premulsum_u8.cu -o CMakeFiles/nccl_device.dir/gensrc/all_reduce_premulsum_u8.cu.o cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src/device && /usr/local/cuda/bin/nvcc -forward-unknown-to-host-compiler -DCUDA_MAJOR=12 -DCUDA_MINOR=0 -DDOCA_VERBS_USE_CUDA_WRAPPER -DDOCA_VERBS_USE_NET_WRAPPER -DNCCL_GIN_PROXY_ENABLE=1 -DNCCL_MAJOR=2 -DNCCL_MINOR=28 -DNCCL_PATCH=9 -DNCCL_USE_CMAKE -DNCCL_VERSION_CODE=22809 -DPROFAPI --options-file CMakeFiles/nccl_device.dir/includes_CUDA.rsp --expt-extended-lambda -Xptxas -maxrregcount=96 -Xfatbin -compress-all -fPIC --expt-extended-lambda -Xptxas -maxrregcount=96 -Xfatbin -compress-all -fPIC -O3 --generate-code=arch=compute_70,code=[compute_70,sm_70] --generate-code=arch=compute_75,code=[compute_75,sm_75] --generate-code=arch=compute_80,code=[compute_80,sm_80] --generate-code=arch=compute_86,code=[compute_86,sm_86] --generate-code=arch=compute_87,code=[compute_87,sm_87] --generate-code=arch=compute_89,code=[compute_89,sm_89] --generate-code=arch=compute_90,code=[compute_90,sm_90] -MD -MT src/device/CMakeFiles/nccl_device.dir/gensrc/all_reduce_prod_bf16.cu.o -MF CMakeFiles/nccl_device.dir/gensrc/all_reduce_prod_bf16.cu.o.d -x cu -rdc=true -c /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src/device/gensrc/all_reduce_prod_bf16.cu -o CMakeFiles/nccl_device.dir/gensrc/all_reduce_prod_bf16.cu.o cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src/device && /usr/local/cuda/bin/nvcc -forward-unknown-to-host-compiler -DCUDA_MAJOR=12 -DCUDA_MINOR=0 -DDOCA_VERBS_USE_CUDA_WRAPPER -DDOCA_VERBS_USE_NET_WRAPPER -DNCCL_GIN_PROXY_ENABLE=1 -DNCCL_MAJOR=2 -DNCCL_MINOR=28 -DNCCL_PATCH=9 -DNCCL_USE_CMAKE -DNCCL_VERSION_CODE=22809 -DPROFAPI --options-file CMakeFiles/nccl_device.dir/includes_CUDA.rsp --expt-extended-lambda -Xptxas -maxrregcount=96 -Xfatbin -compress-all -fPIC --expt-extended-lambda -Xptxas -maxrregcount=96 -Xfatbin -compress-all -fPIC -O3 --generate-code=arch=compute_70,code=[compute_70,sm_70] --generate-code=arch=compute_75,code=[compute_75,sm_75] --generate-code=arch=compute_80,code=[compute_80,sm_80] --generate-code=arch=compute_86,code=[compute_86,sm_86] --generate-code=arch=compute_87,code=[compute_87,sm_87] --generate-code=arch=compute_89,code=[compute_89,sm_89] --generate-code=arch=compute_90,code=[compute_90,sm_90] -MD -MT src/device/CMakeFiles/nccl_device.dir/gensrc/all_reduce_prod_f16.cu.o -MF CMakeFiles/nccl_device.dir/gensrc/all_reduce_prod_f16.cu.o.d -x cu -rdc=true -c /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src/device/gensrc/all_reduce_prod_f16.cu -o CMakeFiles/nccl_device.dir/gensrc/all_reduce_prod_f16.cu.o cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src/device && /usr/local/cuda/bin/nvcc -forward-unknown-to-host-compiler -DCUDA_MAJOR=12 -DCUDA_MINOR=0 -DDOCA_VERBS_USE_CUDA_WRAPPER -DDOCA_VERBS_USE_NET_WRAPPER -DNCCL_GIN_PROXY_ENABLE=1 -DNCCL_MAJOR=2 -DNCCL_MINOR=28 -DNCCL_PATCH=9 -DNCCL_USE_CMAKE -DNCCL_VERSION_CODE=22809 -DPROFAPI --options-file CMakeFiles/nccl_device.dir/includes_CUDA.rsp --expt-extended-lambda -Xptxas -maxrregcount=96 -Xfatbin -compress-all -fPIC --expt-extended-lambda -Xptxas -maxrregcount=96 -Xfatbin -compress-all -fPIC -O3 --generate-code=arch=compute_70,code=[compute_70,sm_70] --generate-code=arch=compute_75,code=[compute_75,sm_75] --generate-code=arch=compute_80,code=[compute_80,sm_80] --generate-code=arch=compute_86,code=[compute_86,sm_86] --generate-code=arch=compute_87,code=[compute_87,sm_87] --generate-code=arch=compute_89,code=[compute_89,sm_89] --generate-code=arch=compute_90,code=[compute_90,sm_90] -MD -MT src/device/CMakeFiles/nccl_device.dir/gensrc/all_reduce_prod_f32.cu.o -MF CMakeFiles/nccl_device.dir/gensrc/all_reduce_prod_f32.cu.o.d -x cu -rdc=true -c /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src/device/gensrc/all_reduce_prod_f32.cu -o CMakeFiles/nccl_device.dir/gensrc/all_reduce_prod_f32.cu.o cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src/device && /usr/local/cuda/bin/nvcc -forward-unknown-to-host-compiler -DCUDA_MAJOR=12 -DCUDA_MINOR=0 -DDOCA_VERBS_USE_CUDA_WRAPPER -DDOCA_VERBS_USE_NET_WRAPPER -DNCCL_GIN_PROXY_ENABLE=1 -DNCCL_MAJOR=2 -DNCCL_MINOR=28 -DNCCL_PATCH=9 -DNCCL_USE_CMAKE -DNCCL_VERSION_CODE=22809 -DPROFAPI --options-file CMakeFiles/nccl_device.dir/includes_CUDA.rsp --expt-extended-lambda -Xptxas -maxrregcount=96 -Xfatbin -compress-all -fPIC --expt-extended-lambda -Xptxas -maxrregcount=96 -Xfatbin -compress-all -fPIC -O3 --generate-code=arch=compute_70,code=[compute_70,sm_70] --generate-code=arch=compute_75,code=[compute_75,sm_75] --generate-code=arch=compute_80,code=[compute_80,sm_80] --generate-code=arch=compute_86,code=[compute_86,sm_86] --generate-code=arch=compute_87,code=[compute_87,sm_87] --generate-code=arch=compute_89,code=[compute_89,sm_89] --generate-code=arch=compute_90,code=[compute_90,sm_90] -MD -MT src/device/CMakeFiles/nccl_device.dir/gensrc/all_reduce_prod_f64.cu.o -MF CMakeFiles/nccl_device.dir/gensrc/all_reduce_prod_f64.cu.o.d -x cu -rdc=true -c /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src/device/gensrc/all_reduce_prod_f64.cu -o CMakeFiles/nccl_device.dir/gensrc/all_reduce_prod_f64.cu.o cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src/device && /usr/local/cuda/bin/nvcc -forward-unknown-to-host-compiler -DCUDA_MAJOR=12 -DCUDA_MINOR=0 -DDOCA_VERBS_USE_CUDA_WRAPPER -DDOCA_VERBS_USE_NET_WRAPPER -DNCCL_GIN_PROXY_ENABLE=1 -DNCCL_MAJOR=2 -DNCCL_MINOR=28 -DNCCL_PATCH=9 -DNCCL_USE_CMAKE -DNCCL_VERSION_CODE=22809 -DPROFAPI --options-file CMakeFiles/nccl_device.dir/includes_CUDA.rsp --expt-extended-lambda -Xptxas -maxrregcount=96 -Xfatbin -compress-all -fPIC --expt-extended-lambda -Xptxas -maxrregcount=96 -Xfatbin -compress-all -fPIC -O3 --generate-code=arch=compute_70,code=[compute_70,sm_70] --generate-code=arch=compute_75,code=[compute_75,sm_75] --generate-code=arch=compute_80,code=[compute_80,sm_80] --generate-code=arch=compute_86,code=[compute_86,sm_86] --generate-code=arch=compute_87,code=[compute_87,sm_87] --generate-code=arch=compute_89,code=[compute_89,sm_89] --generate-code=arch=compute_90,code=[compute_90,sm_90] -MD -MT src/device/CMakeFiles/nccl_device.dir/gensrc/all_reduce_prod_f8e4m3.cu.o -MF CMakeFiles/nccl_device.dir/gensrc/all_reduce_prod_f8e4m3.cu.o.d -x cu -rdc=true -c /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src/device/gensrc/all_reduce_prod_f8e4m3.cu -o CMakeFiles/nccl_device.dir/gensrc/all_reduce_prod_f8e4m3.cu.o [ 9%] Building CUDA object src/device/CMakeFiles/nccl_device.dir/gensrc/all_reduce_prod_f8e5m2.cu.o cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src/device && /usr/local/cuda/bin/nvcc -forward-unknown-to-host-compiler -DCUDA_MAJOR=12 -DCUDA_MINOR=0 -DDOCA_VERBS_USE_CUDA_WRAPPER -DDOCA_VERBS_USE_NET_WRAPPER -DNCCL_GIN_PROXY_ENABLE=1 -DNCCL_MAJOR=2 -DNCCL_MINOR=28 -DNCCL_PATCH=9 -DNCCL_USE_CMAKE -DNCCL_VERSION_CODE=22809 -DPROFAPI --options-file CMakeFiles/nccl_device.dir/includes_CUDA.rsp --expt-extended-lambda -Xptxas -maxrregcount=96 -Xfatbin -compress-all -fPIC --expt-extended-lambda -Xptxas -maxrregcount=96 -Xfatbin -compress-all -fPIC -O3 --generate-code=arch=compute_70,code=[compute_70,sm_70] --generate-code=arch=compute_75,code=[compute_75,sm_75] --generate-code=arch=compute_80,code=[compute_80,sm_80] --generate-code=arch=compute_86,code=[compute_86,sm_86] --generate-code=arch=compute_87,code=[compute_87,sm_87] --generate-code=arch=compute_89,code=[compute_89,sm_89] --generate-code=arch=compute_90,code=[compute_90,sm_90] -MD -MT src/device/CMakeFiles/nccl_device.dir/gensrc/all_reduce_prod_f8e5m2.cu.o -MF CMakeFiles/nccl_device.dir/gensrc/all_reduce_prod_f8e5m2.cu.o.d -x cu -rdc=true -c /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src/device/gensrc/all_reduce_prod_f8e5m2.cu -o CMakeFiles/nccl_device.dir/gensrc/all_reduce_prod_f8e5m2.cu.o [ 9%] Linking C shared library ../../test/unit/plugins/libnccl-net-example.so cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/ext-net/example && /usr/bin/cmake -E cmake_link_script CMakeFiles/nccl-net-example.dir/link.txt --verbose=1 /usr/bin/gcc -fPIC -O3 -DNDEBUG -shared -Wl,-soname,libnccl-net-example.so -o ../../test/unit/plugins/libnccl-net-example.so "CMakeFiles/nccl-net-example.dir/plugin.c.o" gmake[2]: Leaving directory '/builddir/build/BUILD/nccl-2.28.9-1/nccl_build' [ 9%] Built target nccl-net-example [ 9%] Building CUDA object src/device/CMakeFiles/nccl_device.dir/gensrc/all_reduce_prod_u32.cu.o cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src/device && /usr/local/cuda/bin/nvcc -forward-unknown-to-host-compiler -DCUDA_MAJOR=12 -DCUDA_MINOR=0 -DDOCA_VERBS_USE_CUDA_WRAPPER -DDOCA_VERBS_USE_NET_WRAPPER -DNCCL_GIN_PROXY_ENABLE=1 -DNCCL_MAJOR=2 -DNCCL_MINOR=28 -DNCCL_PATCH=9 -DNCCL_USE_CMAKE -DNCCL_VERSION_CODE=22809 -DPROFAPI --options-file CMakeFiles/nccl_device.dir/includes_CUDA.rsp --expt-extended-lambda -Xptxas -maxrregcount=96 -Xfatbin -compress-all -fPIC --expt-extended-lambda -Xptxas -maxrregcount=96 -Xfatbin -compress-all -fPIC -O3 --generate-code=arch=compute_70,code=[compute_70,sm_70] --generate-code=arch=compute_75,code=[compute_75,sm_75] --generate-code=arch=compute_80,code=[compute_80,sm_80] --generate-code=arch=compute_86,code=[compute_86,sm_86] --generate-code=arch=compute_87,code=[compute_87,sm_87] --generate-code=arch=compute_89,code=[compute_89,sm_89] --generate-code=arch=compute_90,code=[compute_90,sm_90] -MD -MT src/device/CMakeFiles/nccl_device.dir/gensrc/all_reduce_prod_u32.cu.o -MF CMakeFiles/nccl_device.dir/gensrc/all_reduce_prod_u32.cu.o.d -x cu -rdc=true -c /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src/device/gensrc/all_reduce_prod_u32.cu -o CMakeFiles/nccl_device.dir/gensrc/all_reduce_prod_u32.cu.o [ 10%] Linking C shared library ../../test/unit/plugins/libnccl-tuner-example.so cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/ext-tuner/example && /usr/bin/cmake -E cmake_link_script CMakeFiles/nccl-tuner-example.dir/link.txt --verbose=1 /usr/bin/gcc -fPIC -O3 -DNDEBUG -shared -Wl,-soname,libnccl-tuner-example.so -o ../../test/unit/plugins/libnccl-tuner-example.so "CMakeFiles/nccl-tuner-example.dir/plugin.c.o" gmake[2]: Leaving directory '/builddir/build/BUILD/nccl-2.28.9-1/nccl_build' [ 10%] Built target nccl-tuner-example [ 10%] Building CUDA object src/device/CMakeFiles/nccl_device.dir/gensrc/all_reduce_prod_u64.cu.o cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src/device && /usr/local/cuda/bin/nvcc -forward-unknown-to-host-compiler -DCUDA_MAJOR=12 -DCUDA_MINOR=0 -DDOCA_VERBS_USE_CUDA_WRAPPER -DDOCA_VERBS_USE_NET_WRAPPER -DNCCL_GIN_PROXY_ENABLE=1 -DNCCL_MAJOR=2 -DNCCL_MINOR=28 -DNCCL_PATCH=9 -DNCCL_USE_CMAKE -DNCCL_VERSION_CODE=22809 -DPROFAPI --options-file CMakeFiles/nccl_device.dir/includes_CUDA.rsp --expt-extended-lambda -Xptxas -maxrregcount=96 -Xfatbin -compress-all -fPIC --expt-extended-lambda -Xptxas -maxrregcount=96 -Xfatbin -compress-all -fPIC -O3 --generate-code=arch=compute_70,code=[compute_70,sm_70] --generate-code=arch=compute_75,code=[compute_75,sm_75] --generate-code=arch=compute_80,code=[compute_80,sm_80] --generate-code=arch=compute_86,code=[compute_86,sm_86] --generate-code=arch=compute_87,code=[compute_87,sm_87] --generate-code=arch=compute_89,code=[compute_89,sm_89] --generate-code=arch=compute_90,code=[compute_90,sm_90] -MD -MT src/device/CMakeFiles/nccl_device.dir/gensrc/all_reduce_prod_u64.cu.o -MF CMakeFiles/nccl_device.dir/gensrc/all_reduce_prod_u64.cu.o.d -x cu -rdc=true -c /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src/device/gensrc/all_reduce_prod_u64.cu -o CMakeFiles/nccl_device.dir/gensrc/all_reduce_prod_u64.cu.o [ 11%] Linking CXX executable ../bin/ncclras cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src && /usr/bin/cmake -E cmake_link_script CMakeFiles/ncclras.dir/link.txt --verbose=1 [ 12%] Building CUDA object src/device/CMakeFiles/nccl_device.dir/gensrc/all_reduce_prod_u8.cu.o cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src/device && /usr/local/cuda/bin/nvcc -forward-unknown-to-host-compiler -DCUDA_MAJOR=12 -DCUDA_MINOR=0 -DDOCA_VERBS_USE_CUDA_WRAPPER -DDOCA_VERBS_USE_NET_WRAPPER -DNCCL_GIN_PROXY_ENABLE=1 -DNCCL_MAJOR=2 -DNCCL_MINOR=28 -DNCCL_PATCH=9 -DNCCL_USE_CMAKE -DNCCL_VERSION_CODE=22809 -DPROFAPI --options-file CMakeFiles/nccl_device.dir/includes_CUDA.rsp --expt-extended-lambda -Xptxas -maxrregcount=96 -Xfatbin -compress-all -fPIC --expt-extended-lambda -Xptxas -maxrregcount=96 -Xfatbin -compress-all -fPIC -O3 --generate-code=arch=compute_70,code=[compute_70,sm_70] --generate-code=arch=compute_75,code=[compute_75,sm_75] --generate-code=arch=compute_80,code=[compute_80,sm_80] --generate-code=arch=compute_86,code=[compute_86,sm_86] --generate-code=arch=compute_87,code=[compute_87,sm_87] --generate-code=arch=compute_89,code=[compute_89,sm_89] --generate-code=arch=compute_90,code=[compute_90,sm_90] -MD -MT src/device/CMakeFiles/nccl_device.dir/gensrc/all_reduce_prod_u8.cu.o -MF CMakeFiles/nccl_device.dir/gensrc/all_reduce_prod_u8.cu.o.d -x cu -rdc=true -c /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src/device/gensrc/all_reduce_prod_u8.cu -o CMakeFiles/nccl_device.dir/gensrc/all_reduce_prod_u8.cu.o /usr/bin/g++ -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -O3 CMakeFiles/ncclras.dir/ras/client.cc.o -o ../bin/ncclras -lpthread -lrt -ldl gmake[2]: Leaving directory '/builddir/build/BUILD/nccl-2.28.9-1/nccl_build' [ 12%] Built target ncclras [ 12%] Building CUDA object src/device/CMakeFiles/nccl_device.dir/gensrc/all_reduce_sum_bf16.cu.o cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src/device && /usr/local/cuda/bin/nvcc -forward-unknown-to-host-compiler -DCUDA_MAJOR=12 -DCUDA_MINOR=0 -DDOCA_VERBS_USE_CUDA_WRAPPER -DDOCA_VERBS_USE_NET_WRAPPER -DNCCL_GIN_PROXY_ENABLE=1 -DNCCL_MAJOR=2 -DNCCL_MINOR=28 -DNCCL_PATCH=9 -DNCCL_USE_CMAKE -DNCCL_VERSION_CODE=22809 -DPROFAPI --options-file CMakeFiles/nccl_device.dir/includes_CUDA.rsp --expt-extended-lambda -Xptxas -maxrregcount=96 -Xfatbin -compress-all -fPIC --expt-extended-lambda -Xptxas -maxrregcount=96 -Xfatbin -compress-all -fPIC -O3 --generate-code=arch=compute_70,code=[compute_70,sm_70] --generate-code=arch=compute_75,code=[compute_75,sm_75] --generate-code=arch=compute_80,code=[compute_80,sm_80] --generate-code=arch=compute_86,code=[compute_86,sm_86] --generate-code=arch=compute_87,code=[compute_87,sm_87] --generate-code=arch=compute_89,code=[compute_89,sm_89] --generate-code=arch=compute_90,code=[compute_90,sm_90] -MD -MT src/device/CMakeFiles/nccl_device.dir/gensrc/all_reduce_sum_bf16.cu.o -MF CMakeFiles/nccl_device.dir/gensrc/all_reduce_sum_bf16.cu.o.d -x cu -rdc=true -c /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src/device/gensrc/all_reduce_sum_bf16.cu -o CMakeFiles/nccl_device.dir/gensrc/all_reduce_sum_bf16.cu.o [ 12%] Linking CXX shared library ../../lib/libnccl-profiler-example.so cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/ext-profiler/example && /usr/bin/cmake -E cmake_link_script CMakeFiles/nccl-profiler-example.dir/link.txt --verbose=1 /usr/bin/g++ -fPIC -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -O3 -shared -Wl,-soname,libnccl-profiler-example.so -o ../../lib/libnccl-profiler-example.so "CMakeFiles/nccl-profiler-example.dir/plugin.cc.o" "CMakeFiles/nccl-profiler-example.dir/print_event.cc.o" cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/ext-profiler/example && /usr/bin/cmake -E make_directory /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/test/unit/plugins cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/ext-profiler/example && /usr/bin/cmake -E copy /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/lib/libnccl-profiler-example.so /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/test/unit/plugins gmake[2]: Leaving directory '/builddir/build/BUILD/nccl-2.28.9-1/nccl_build' [ 12%] Built target nccl-profiler-example [ 12%] Building CUDA object src/device/CMakeFiles/nccl_device.dir/gensrc/all_reduce_sum_f16.cu.o cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src/device && /usr/local/cuda/bin/nvcc -forward-unknown-to-host-compiler -DCUDA_MAJOR=12 -DCUDA_MINOR=0 -DDOCA_VERBS_USE_CUDA_WRAPPER -DDOCA_VERBS_USE_NET_WRAPPER -DNCCL_GIN_PROXY_ENABLE=1 -DNCCL_MAJOR=2 -DNCCL_MINOR=28 -DNCCL_PATCH=9 -DNCCL_USE_CMAKE -DNCCL_VERSION_CODE=22809 -DPROFAPI --options-file CMakeFiles/nccl_device.dir/includes_CUDA.rsp --expt-extended-lambda -Xptxas -maxrregcount=96 -Xfatbin -compress-all -fPIC --expt-extended-lambda -Xptxas -maxrregcount=96 -Xfatbin -compress-all -fPIC -O3 --generate-code=arch=compute_70,code=[compute_70,sm_70] --generate-code=arch=compute_75,code=[compute_75,sm_75] --generate-code=arch=compute_80,code=[compute_80,sm_80] --generate-code=arch=compute_86,code=[compute_86,sm_86] --generate-code=arch=compute_87,code=[compute_87,sm_87] --generate-code=arch=compute_89,code=[compute_89,sm_89] --generate-code=arch=compute_90,code=[compute_90,sm_90] -MD -MT src/device/CMakeFiles/nccl_device.dir/gensrc/all_reduce_sum_f16.cu.o -MF CMakeFiles/nccl_device.dir/gensrc/all_reduce_sum_f16.cu.o.d -x cu -rdc=true -c /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src/device/gensrc/all_reduce_sum_f16.cu -o CMakeFiles/nccl_device.dir/gensrc/all_reduce_sum_f16.cu.o [ 12%] Building CUDA object src/device/CMakeFiles/nccl_device.dir/gensrc/all_reduce_sum_f32.cu.o cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src/device && /usr/local/cuda/bin/nvcc -forward-unknown-to-host-compiler -DCUDA_MAJOR=12 -DCUDA_MINOR=0 -DDOCA_VERBS_USE_CUDA_WRAPPER -DDOCA_VERBS_USE_NET_WRAPPER -DNCCL_GIN_PROXY_ENABLE=1 -DNCCL_MAJOR=2 -DNCCL_MINOR=28 -DNCCL_PATCH=9 -DNCCL_USE_CMAKE -DNCCL_VERSION_CODE=22809 -DPROFAPI --options-file CMakeFiles/nccl_device.dir/includes_CUDA.rsp --expt-extended-lambda -Xptxas -maxrregcount=96 -Xfatbin -compress-all -fPIC --expt-extended-lambda -Xptxas -maxrregcount=96 -Xfatbin -compress-all -fPIC -O3 --generate-code=arch=compute_70,code=[compute_70,sm_70] --generate-code=arch=compute_75,code=[compute_75,sm_75] --generate-code=arch=compute_80,code=[compute_80,sm_80] --generate-code=arch=compute_86,code=[compute_86,sm_86] --generate-code=arch=compute_87,code=[compute_87,sm_87] --generate-code=arch=compute_89,code=[compute_89,sm_89] --generate-code=arch=compute_90,code=[compute_90,sm_90] -MD -MT src/device/CMakeFiles/nccl_device.dir/gensrc/all_reduce_sum_f32.cu.o -MF CMakeFiles/nccl_device.dir/gensrc/all_reduce_sum_f32.cu.o.d -x cu -rdc=true -c /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src/device/gensrc/all_reduce_sum_f32.cu -o CMakeFiles/nccl_device.dir/gensrc/all_reduce_sum_f32.cu.o [ 13%] Building CUDA object src/device/CMakeFiles/nccl_device.dir/gensrc/all_reduce_sum_f64.cu.o cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src/device && /usr/local/cuda/bin/nvcc -forward-unknown-to-host-compiler -DCUDA_MAJOR=12 -DCUDA_MINOR=0 -DDOCA_VERBS_USE_CUDA_WRAPPER -DDOCA_VERBS_USE_NET_WRAPPER -DNCCL_GIN_PROXY_ENABLE=1 -DNCCL_MAJOR=2 -DNCCL_MINOR=28 -DNCCL_PATCH=9 -DNCCL_USE_CMAKE -DNCCL_VERSION_CODE=22809 -DPROFAPI --options-file CMakeFiles/nccl_device.dir/includes_CUDA.rsp --expt-extended-lambda -Xptxas -maxrregcount=96 -Xfatbin -compress-all -fPIC --expt-extended-lambda -Xptxas -maxrregcount=96 -Xfatbin -compress-all -fPIC -O3 --generate-code=arch=compute_70,code=[compute_70,sm_70] --generate-code=arch=compute_75,code=[compute_75,sm_75] --generate-code=arch=compute_80,code=[compute_80,sm_80] --generate-code=arch=compute_86,code=[compute_86,sm_86] --generate-code=arch=compute_87,code=[compute_87,sm_87] --generate-code=arch=compute_89,code=[compute_89,sm_89] --generate-code=arch=compute_90,code=[compute_90,sm_90] -MD -MT src/device/CMakeFiles/nccl_device.dir/gensrc/all_reduce_sum_f64.cu.o -MF CMakeFiles/nccl_device.dir/gensrc/all_reduce_sum_f64.cu.o.d -x cu -rdc=true -c /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src/device/gensrc/all_reduce_sum_f64.cu -o CMakeFiles/nccl_device.dir/gensrc/all_reduce_sum_f64.cu.o [ 13%] Building CUDA object src/device/CMakeFiles/nccl_device.dir/gensrc/all_reduce_sum_f8e4m3.cu.o cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src/device && /usr/local/cuda/bin/nvcc -forward-unknown-to-host-compiler -DCUDA_MAJOR=12 -DCUDA_MINOR=0 -DDOCA_VERBS_USE_CUDA_WRAPPER -DDOCA_VERBS_USE_NET_WRAPPER -DNCCL_GIN_PROXY_ENABLE=1 -DNCCL_MAJOR=2 -DNCCL_MINOR=28 -DNCCL_PATCH=9 -DNCCL_USE_CMAKE -DNCCL_VERSION_CODE=22809 -DPROFAPI --options-file CMakeFiles/nccl_device.dir/includes_CUDA.rsp --expt-extended-lambda -Xptxas -maxrregcount=96 -Xfatbin -compress-all -fPIC --expt-extended-lambda -Xptxas -maxrregcount=96 -Xfatbin -compress-all -fPIC -O3 --generate-code=arch=compute_70,code=[compute_70,sm_70] --generate-code=arch=compute_75,code=[compute_75,sm_75] --generate-code=arch=compute_80,code=[compute_80,sm_80] --generate-code=arch=compute_86,code=[compute_86,sm_86] --generate-code=arch=compute_87,code=[compute_87,sm_87] --generate-code=arch=compute_89,code=[compute_89,sm_89] --generate-code=arch=compute_90,code=[compute_90,sm_90] -MD -MT src/device/CMakeFiles/nccl_device.dir/gensrc/all_reduce_sum_f8e4m3.cu.o -MF CMakeFiles/nccl_device.dir/gensrc/all_reduce_sum_f8e4m3.cu.o.d -x cu -rdc=true -c /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src/device/gensrc/all_reduce_sum_f8e4m3.cu -o CMakeFiles/nccl_device.dir/gensrc/all_reduce_sum_f8e4m3.cu.o [ 13%] Building CUDA object src/device/CMakeFiles/nccl_device.dir/gensrc/all_reduce_sum_f8e5m2.cu.o cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src/device && /usr/local/cuda/bin/nvcc -forward-unknown-to-host-compiler -DCUDA_MAJOR=12 -DCUDA_MINOR=0 -DDOCA_VERBS_USE_CUDA_WRAPPER -DDOCA_VERBS_USE_NET_WRAPPER -DNCCL_GIN_PROXY_ENABLE=1 -DNCCL_MAJOR=2 -DNCCL_MINOR=28 -DNCCL_PATCH=9 -DNCCL_USE_CMAKE -DNCCL_VERSION_CODE=22809 -DPROFAPI --options-file CMakeFiles/nccl_device.dir/includes_CUDA.rsp --expt-extended-lambda -Xptxas -maxrregcount=96 -Xfatbin -compress-all -fPIC --expt-extended-lambda -Xptxas -maxrregcount=96 -Xfatbin -compress-all -fPIC -O3 --generate-code=arch=compute_70,code=[compute_70,sm_70] --generate-code=arch=compute_75,code=[compute_75,sm_75] --generate-code=arch=compute_80,code=[compute_80,sm_80] --generate-code=arch=compute_86,code=[compute_86,sm_86] --generate-code=arch=compute_87,code=[compute_87,sm_87] --generate-code=arch=compute_89,code=[compute_89,sm_89] --generate-code=arch=compute_90,code=[compute_90,sm_90] -MD -MT src/device/CMakeFiles/nccl_device.dir/gensrc/all_reduce_sum_f8e5m2.cu.o -MF CMakeFiles/nccl_device.dir/gensrc/all_reduce_sum_f8e5m2.cu.o.d -x cu -rdc=true -c /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src/device/gensrc/all_reduce_sum_f8e5m2.cu -o CMakeFiles/nccl_device.dir/gensrc/all_reduce_sum_f8e5m2.cu.o [ 14%] Building CUDA object src/device/CMakeFiles/nccl_device.dir/gensrc/all_reduce_sum_u32.cu.o cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src/device && /usr/local/cuda/bin/nvcc -forward-unknown-to-host-compiler -DCUDA_MAJOR=12 -DCUDA_MINOR=0 -DDOCA_VERBS_USE_CUDA_WRAPPER -DDOCA_VERBS_USE_NET_WRAPPER -DNCCL_GIN_PROXY_ENABLE=1 -DNCCL_MAJOR=2 -DNCCL_MINOR=28 -DNCCL_PATCH=9 -DNCCL_USE_CMAKE -DNCCL_VERSION_CODE=22809 -DPROFAPI --options-file CMakeFiles/nccl_device.dir/includes_CUDA.rsp --expt-extended-lambda -Xptxas -maxrregcount=96 -Xfatbin -compress-all -fPIC --expt-extended-lambda -Xptxas -maxrregcount=96 -Xfatbin -compress-all -fPIC -O3 --generate-code=arch=compute_70,code=[compute_70,sm_70] --generate-code=arch=compute_75,code=[compute_75,sm_75] --generate-code=arch=compute_80,code=[compute_80,sm_80] --generate-code=arch=compute_86,code=[compute_86,sm_86] --generate-code=arch=compute_87,code=[compute_87,sm_87] --generate-code=arch=compute_89,code=[compute_89,sm_89] --generate-code=arch=compute_90,code=[compute_90,sm_90] -MD -MT src/device/CMakeFiles/nccl_device.dir/gensrc/all_reduce_sum_u32.cu.o -MF CMakeFiles/nccl_device.dir/gensrc/all_reduce_sum_u32.cu.o.d -x cu -rdc=true -c /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src/device/gensrc/all_reduce_sum_u32.cu -o CMakeFiles/nccl_device.dir/gensrc/all_reduce_sum_u32.cu.o [ 14%] Building CUDA object src/device/CMakeFiles/nccl_device.dir/gensrc/all_reduce_sum_u64.cu.o cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src/device && /usr/local/cuda/bin/nvcc -forward-unknown-to-host-compiler -DCUDA_MAJOR=12 -DCUDA_MINOR=0 -DDOCA_VERBS_USE_CUDA_WRAPPER -DDOCA_VERBS_USE_NET_WRAPPER -DNCCL_GIN_PROXY_ENABLE=1 -DNCCL_MAJOR=2 -DNCCL_MINOR=28 -DNCCL_PATCH=9 -DNCCL_USE_CMAKE -DNCCL_VERSION_CODE=22809 -DPROFAPI --options-file CMakeFiles/nccl_device.dir/includes_CUDA.rsp --expt-extended-lambda -Xptxas -maxrregcount=96 -Xfatbin -compress-all -fPIC --expt-extended-lambda -Xptxas -maxrregcount=96 -Xfatbin -compress-all -fPIC -O3 --generate-code=arch=compute_70,code=[compute_70,sm_70] --generate-code=arch=compute_75,code=[compute_75,sm_75] --generate-code=arch=compute_80,code=[compute_80,sm_80] --generate-code=arch=compute_86,code=[compute_86,sm_86] --generate-code=arch=compute_87,code=[compute_87,sm_87] --generate-code=arch=compute_89,code=[compute_89,sm_89] --generate-code=arch=compute_90,code=[compute_90,sm_90] -MD -MT src/device/CMakeFiles/nccl_device.dir/gensrc/all_reduce_sum_u64.cu.o -MF CMakeFiles/nccl_device.dir/gensrc/all_reduce_sum_u64.cu.o.d -x cu -rdc=true -c /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src/device/gensrc/all_reduce_sum_u64.cu -o CMakeFiles/nccl_device.dir/gensrc/all_reduce_sum_u64.cu.o [ 14%] Building CUDA object src/device/CMakeFiles/nccl_device.dir/gensrc/all_reduce_sum_u8.cu.o cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src/device && /usr/local/cuda/bin/nvcc -forward-unknown-to-host-compiler -DCUDA_MAJOR=12 -DCUDA_MINOR=0 -DDOCA_VERBS_USE_CUDA_WRAPPER -DDOCA_VERBS_USE_NET_WRAPPER -DNCCL_GIN_PROXY_ENABLE=1 -DNCCL_MAJOR=2 -DNCCL_MINOR=28 -DNCCL_PATCH=9 -DNCCL_USE_CMAKE -DNCCL_VERSION_CODE=22809 -DPROFAPI --options-file CMakeFiles/nccl_device.dir/includes_CUDA.rsp --expt-extended-lambda -Xptxas -maxrregcount=96 -Xfatbin -compress-all -fPIC --expt-extended-lambda -Xptxas -maxrregcount=96 -Xfatbin -compress-all -fPIC -O3 --generate-code=arch=compute_70,code=[compute_70,sm_70] --generate-code=arch=compute_75,code=[compute_75,sm_75] --generate-code=arch=compute_80,code=[compute_80,sm_80] --generate-code=arch=compute_86,code=[compute_86,sm_86] --generate-code=arch=compute_87,code=[compute_87,sm_87] --generate-code=arch=compute_89,code=[compute_89,sm_89] --generate-code=arch=compute_90,code=[compute_90,sm_90] -MD -MT src/device/CMakeFiles/nccl_device.dir/gensrc/all_reduce_sum_u8.cu.o -MF CMakeFiles/nccl_device.dir/gensrc/all_reduce_sum_u8.cu.o.d -x cu -rdc=true -c /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src/device/gensrc/all_reduce_sum_u8.cu -o CMakeFiles/nccl_device.dir/gensrc/all_reduce_sum_u8.cu.o [ 14%] Building CUDA object src/device/CMakeFiles/nccl_device.dir/gensrc/all_reduce_sumpostdiv_u32.cu.o cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src/device && /usr/local/cuda/bin/nvcc -forward-unknown-to-host-compiler -DCUDA_MAJOR=12 -DCUDA_MINOR=0 -DDOCA_VERBS_USE_CUDA_WRAPPER -DDOCA_VERBS_USE_NET_WRAPPER -DNCCL_GIN_PROXY_ENABLE=1 -DNCCL_MAJOR=2 -DNCCL_MINOR=28 -DNCCL_PATCH=9 -DNCCL_USE_CMAKE -DNCCL_VERSION_CODE=22809 -DPROFAPI --options-file CMakeFiles/nccl_device.dir/includes_CUDA.rsp --expt-extended-lambda -Xptxas -maxrregcount=96 -Xfatbin -compress-all -fPIC --expt-extended-lambda -Xptxas -maxrregcount=96 -Xfatbin -compress-all -fPIC -O3 --generate-code=arch=compute_70,code=[compute_70,sm_70] --generate-code=arch=compute_75,code=[compute_75,sm_75] --generate-code=arch=compute_80,code=[compute_80,sm_80] --generate-code=arch=compute_86,code=[compute_86,sm_86] --generate-code=arch=compute_87,code=[compute_87,sm_87] --generate-code=arch=compute_89,code=[compute_89,sm_89] --generate-code=arch=compute_90,code=[compute_90,sm_90] -MD -MT src/device/CMakeFiles/nccl_device.dir/gensrc/all_reduce_sumpostdiv_u32.cu.o -MF CMakeFiles/nccl_device.dir/gensrc/all_reduce_sumpostdiv_u32.cu.o.d -x cu -rdc=true -c /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src/device/gensrc/all_reduce_sumpostdiv_u32.cu -o CMakeFiles/nccl_device.dir/gensrc/all_reduce_sumpostdiv_u32.cu.o [ 15%] Building CUDA object src/device/CMakeFiles/nccl_device.dir/gensrc/all_reduce_sumpostdiv_u64.cu.o cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src/device && /usr/local/cuda/bin/nvcc -forward-unknown-to-host-compiler -DCUDA_MAJOR=12 -DCUDA_MINOR=0 -DDOCA_VERBS_USE_CUDA_WRAPPER -DDOCA_VERBS_USE_NET_WRAPPER -DNCCL_GIN_PROXY_ENABLE=1 -DNCCL_MAJOR=2 -DNCCL_MINOR=28 -DNCCL_PATCH=9 -DNCCL_USE_CMAKE -DNCCL_VERSION_CODE=22809 -DPROFAPI --options-file CMakeFiles/nccl_device.dir/includes_CUDA.rsp --expt-extended-lambda -Xptxas -maxrregcount=96 -Xfatbin -compress-all -fPIC --expt-extended-lambda -Xptxas -maxrregcount=96 -Xfatbin -compress-all -fPIC -O3 --generate-code=arch=compute_70,code=[compute_70,sm_70] --generate-code=arch=compute_75,code=[compute_75,sm_75] --generate-code=arch=compute_80,code=[compute_80,sm_80] --generate-code=arch=compute_86,code=[compute_86,sm_86] --generate-code=arch=compute_87,code=[compute_87,sm_87] --generate-code=arch=compute_89,code=[compute_89,sm_89] --generate-code=arch=compute_90,code=[compute_90,sm_90] -MD -MT src/device/CMakeFiles/nccl_device.dir/gensrc/all_reduce_sumpostdiv_u64.cu.o -MF CMakeFiles/nccl_device.dir/gensrc/all_reduce_sumpostdiv_u64.cu.o.d -x cu -rdc=true -c /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src/device/gensrc/all_reduce_sumpostdiv_u64.cu -o CMakeFiles/nccl_device.dir/gensrc/all_reduce_sumpostdiv_u64.cu.o [ 15%] Building CUDA object src/device/CMakeFiles/nccl_device.dir/gensrc/all_reduce_sumpostdiv_u8.cu.o cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src/device && /usr/local/cuda/bin/nvcc -forward-unknown-to-host-compiler -DCUDA_MAJOR=12 -DCUDA_MINOR=0 -DDOCA_VERBS_USE_CUDA_WRAPPER -DDOCA_VERBS_USE_NET_WRAPPER -DNCCL_GIN_PROXY_ENABLE=1 -DNCCL_MAJOR=2 -DNCCL_MINOR=28 -DNCCL_PATCH=9 -DNCCL_USE_CMAKE -DNCCL_VERSION_CODE=22809 -DPROFAPI --options-file CMakeFiles/nccl_device.dir/includes_CUDA.rsp --expt-extended-lambda -Xptxas -maxrregcount=96 -Xfatbin -compress-all -fPIC --expt-extended-lambda -Xptxas -maxrregcount=96 -Xfatbin -compress-all -fPIC -O3 --generate-code=arch=compute_70,code=[compute_70,sm_70] --generate-code=arch=compute_75,code=[compute_75,sm_75] --generate-code=arch=compute_80,code=[compute_80,sm_80] --generate-code=arch=compute_86,code=[compute_86,sm_86] --generate-code=arch=compute_87,code=[compute_87,sm_87] --generate-code=arch=compute_89,code=[compute_89,sm_89] --generate-code=arch=compute_90,code=[compute_90,sm_90] -MD -MT src/device/CMakeFiles/nccl_device.dir/gensrc/all_reduce_sumpostdiv_u8.cu.o -MF CMakeFiles/nccl_device.dir/gensrc/all_reduce_sumpostdiv_u8.cu.o.d -x cu -rdc=true -c /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src/device/gensrc/all_reduce_sumpostdiv_u8.cu -o CMakeFiles/nccl_device.dir/gensrc/all_reduce_sumpostdiv_u8.cu.o [ 15%] Building CUDA object src/device/CMakeFiles/nccl_device.dir/gensrc/broadcast.cu.o cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src/device && /usr/local/cuda/bin/nvcc -forward-unknown-to-host-compiler -DCUDA_MAJOR=12 -DCUDA_MINOR=0 -DDOCA_VERBS_USE_CUDA_WRAPPER -DDOCA_VERBS_USE_NET_WRAPPER -DNCCL_GIN_PROXY_ENABLE=1 -DNCCL_MAJOR=2 -DNCCL_MINOR=28 -DNCCL_PATCH=9 -DNCCL_USE_CMAKE -DNCCL_VERSION_CODE=22809 -DPROFAPI --options-file CMakeFiles/nccl_device.dir/includes_CUDA.rsp --expt-extended-lambda -Xptxas -maxrregcount=96 -Xfatbin -compress-all -fPIC --expt-extended-lambda -Xptxas -maxrregcount=96 -Xfatbin -compress-all -fPIC -O3 --generate-code=arch=compute_70,code=[compute_70,sm_70] --generate-code=arch=compute_75,code=[compute_75,sm_75] --generate-code=arch=compute_80,code=[compute_80,sm_80] --generate-code=arch=compute_86,code=[compute_86,sm_86] --generate-code=arch=compute_87,code=[compute_87,sm_87] --generate-code=arch=compute_89,code=[compute_89,sm_89] --generate-code=arch=compute_90,code=[compute_90,sm_90] -MD -MT src/device/CMakeFiles/nccl_device.dir/gensrc/broadcast.cu.o -MF CMakeFiles/nccl_device.dir/gensrc/broadcast.cu.o.d -x cu -rdc=true -c /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src/device/gensrc/broadcast.cu -o CMakeFiles/nccl_device.dir/gensrc/broadcast.cu.o [ 15%] Building CUDA object src/device/CMakeFiles/nccl_device.dir/gensrc/reduce_minmax_bf16.cu.o cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src/device && /usr/local/cuda/bin/nvcc -forward-unknown-to-host-compiler -DCUDA_MAJOR=12 -DCUDA_MINOR=0 -DDOCA_VERBS_USE_CUDA_WRAPPER -DDOCA_VERBS_USE_NET_WRAPPER -DNCCL_GIN_PROXY_ENABLE=1 -DNCCL_MAJOR=2 -DNCCL_MINOR=28 -DNCCL_PATCH=9 -DNCCL_USE_CMAKE -DNCCL_VERSION_CODE=22809 -DPROFAPI --options-file CMakeFiles/nccl_device.dir/includes_CUDA.rsp --expt-extended-lambda -Xptxas -maxrregcount=96 -Xfatbin -compress-all -fPIC --expt-extended-lambda -Xptxas -maxrregcount=96 -Xfatbin -compress-all -fPIC -O3 --generate-code=arch=compute_70,code=[compute_70,sm_70] --generate-code=arch=compute_75,code=[compute_75,sm_75] --generate-code=arch=compute_80,code=[compute_80,sm_80] --generate-code=arch=compute_86,code=[compute_86,sm_86] --generate-code=arch=compute_87,code=[compute_87,sm_87] --generate-code=arch=compute_89,code=[compute_89,sm_89] --generate-code=arch=compute_90,code=[compute_90,sm_90] -MD -MT src/device/CMakeFiles/nccl_device.dir/gensrc/reduce_minmax_bf16.cu.o -MF CMakeFiles/nccl_device.dir/gensrc/reduce_minmax_bf16.cu.o.d -x cu -rdc=true -c /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src/device/gensrc/reduce_minmax_bf16.cu -o CMakeFiles/nccl_device.dir/gensrc/reduce_minmax_bf16.cu.o [ 16%] Building CUDA object src/device/CMakeFiles/nccl_device.dir/gensrc/reduce_minmax_f16.cu.o cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src/device && /usr/local/cuda/bin/nvcc -forward-unknown-to-host-compiler -DCUDA_MAJOR=12 -DCUDA_MINOR=0 -DDOCA_VERBS_USE_CUDA_WRAPPER -DDOCA_VERBS_USE_NET_WRAPPER -DNCCL_GIN_PROXY_ENABLE=1 -DNCCL_MAJOR=2 -DNCCL_MINOR=28 -DNCCL_PATCH=9 -DNCCL_USE_CMAKE -DNCCL_VERSION_CODE=22809 -DPROFAPI --options-file CMakeFiles/nccl_device.dir/includes_CUDA.rsp --expt-extended-lambda -Xptxas -maxrregcount=96 -Xfatbin -compress-all -fPIC --expt-extended-lambda -Xptxas -maxrregcount=96 -Xfatbin -compress-all -fPIC -O3 --generate-code=arch=compute_70,code=[compute_70,sm_70] --generate-code=arch=compute_75,code=[compute_75,sm_75] --generate-code=arch=compute_80,code=[compute_80,sm_80] --generate-code=arch=compute_86,code=[compute_86,sm_86] --generate-code=arch=compute_87,code=[compute_87,sm_87] --generate-code=arch=compute_89,code=[compute_89,sm_89] --generate-code=arch=compute_90,code=[compute_90,sm_90] -MD -MT src/device/CMakeFiles/nccl_device.dir/gensrc/reduce_minmax_f16.cu.o -MF CMakeFiles/nccl_device.dir/gensrc/reduce_minmax_f16.cu.o.d -x cu -rdc=true -c /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src/device/gensrc/reduce_minmax_f16.cu -o CMakeFiles/nccl_device.dir/gensrc/reduce_minmax_f16.cu.o [ 16%] Building CUDA object src/device/CMakeFiles/nccl_device.dir/gensrc/reduce_minmax_f32.cu.o cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src/device && /usr/local/cuda/bin/nvcc -forward-unknown-to-host-compiler -DCUDA_MAJOR=12 -DCUDA_MINOR=0 -DDOCA_VERBS_USE_CUDA_WRAPPER -DDOCA_VERBS_USE_NET_WRAPPER -DNCCL_GIN_PROXY_ENABLE=1 -DNCCL_MAJOR=2 -DNCCL_MINOR=28 -DNCCL_PATCH=9 -DNCCL_USE_CMAKE -DNCCL_VERSION_CODE=22809 -DPROFAPI --options-file CMakeFiles/nccl_device.dir/includes_CUDA.rsp --expt-extended-lambda -Xptxas -maxrregcount=96 -Xfatbin -compress-all -fPIC --expt-extended-lambda -Xptxas -maxrregcount=96 -Xfatbin -compress-all -fPIC -O3 --generate-code=arch=compute_70,code=[compute_70,sm_70] --generate-code=arch=compute_75,code=[compute_75,sm_75] --generate-code=arch=compute_80,code=[compute_80,sm_80] --generate-code=arch=compute_86,code=[compute_86,sm_86] --generate-code=arch=compute_87,code=[compute_87,sm_87] --generate-code=arch=compute_89,code=[compute_89,sm_89] --generate-code=arch=compute_90,code=[compute_90,sm_90] -MD -MT src/device/CMakeFiles/nccl_device.dir/gensrc/reduce_minmax_f32.cu.o -MF CMakeFiles/nccl_device.dir/gensrc/reduce_minmax_f32.cu.o.d -x cu -rdc=true -c /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src/device/gensrc/reduce_minmax_f32.cu -o CMakeFiles/nccl_device.dir/gensrc/reduce_minmax_f32.cu.o [ 16%] Building CUDA object src/device/CMakeFiles/nccl_device.dir/gensrc/reduce_minmax_f64.cu.o cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src/device && /usr/local/cuda/bin/nvcc -forward-unknown-to-host-compiler -DCUDA_MAJOR=12 -DCUDA_MINOR=0 -DDOCA_VERBS_USE_CUDA_WRAPPER -DDOCA_VERBS_USE_NET_WRAPPER -DNCCL_GIN_PROXY_ENABLE=1 -DNCCL_MAJOR=2 -DNCCL_MINOR=28 -DNCCL_PATCH=9 -DNCCL_USE_CMAKE -DNCCL_VERSION_CODE=22809 -DPROFAPI --options-file CMakeFiles/nccl_device.dir/includes_CUDA.rsp --expt-extended-lambda -Xptxas -maxrregcount=96 -Xfatbin -compress-all -fPIC --expt-extended-lambda -Xptxas -maxrregcount=96 -Xfatbin -compress-all -fPIC -O3 --generate-code=arch=compute_70,code=[compute_70,sm_70] --generate-code=arch=compute_75,code=[compute_75,sm_75] --generate-code=arch=compute_80,code=[compute_80,sm_80] --generate-code=arch=compute_86,code=[compute_86,sm_86] --generate-code=arch=compute_87,code=[compute_87,sm_87] --generate-code=arch=compute_89,code=[compute_89,sm_89] --generate-code=arch=compute_90,code=[compute_90,sm_90] -MD -MT src/device/CMakeFiles/nccl_device.dir/gensrc/reduce_minmax_f64.cu.o -MF CMakeFiles/nccl_device.dir/gensrc/reduce_minmax_f64.cu.o.d -x cu -rdc=true -c /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src/device/gensrc/reduce_minmax_f64.cu -o CMakeFiles/nccl_device.dir/gensrc/reduce_minmax_f64.cu.o [ 17%] Building CUDA object src/device/CMakeFiles/nccl_device.dir/gensrc/reduce_minmax_f8e4m3.cu.o cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src/device && /usr/local/cuda/bin/nvcc -forward-unknown-to-host-compiler -DCUDA_MAJOR=12 -DCUDA_MINOR=0 -DDOCA_VERBS_USE_CUDA_WRAPPER -DDOCA_VERBS_USE_NET_WRAPPER -DNCCL_GIN_PROXY_ENABLE=1 -DNCCL_MAJOR=2 -DNCCL_MINOR=28 -DNCCL_PATCH=9 -DNCCL_USE_CMAKE -DNCCL_VERSION_CODE=22809 -DPROFAPI --options-file CMakeFiles/nccl_device.dir/includes_CUDA.rsp --expt-extended-lambda -Xptxas -maxrregcount=96 -Xfatbin -compress-all -fPIC --expt-extended-lambda -Xptxas -maxrregcount=96 -Xfatbin -compress-all -fPIC -O3 --generate-code=arch=compute_70,code=[compute_70,sm_70] --generate-code=arch=compute_75,code=[compute_75,sm_75] --generate-code=arch=compute_80,code=[compute_80,sm_80] --generate-code=arch=compute_86,code=[compute_86,sm_86] --generate-code=arch=compute_87,code=[compute_87,sm_87] --generate-code=arch=compute_89,code=[compute_89,sm_89] --generate-code=arch=compute_90,code=[compute_90,sm_90] -MD -MT src/device/CMakeFiles/nccl_device.dir/gensrc/reduce_minmax_f8e4m3.cu.o -MF CMakeFiles/nccl_device.dir/gensrc/reduce_minmax_f8e4m3.cu.o.d -x cu -rdc=true -c /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src/device/gensrc/reduce_minmax_f8e4m3.cu -o CMakeFiles/nccl_device.dir/gensrc/reduce_minmax_f8e4m3.cu.o [ 17%] Building CUDA object src/device/CMakeFiles/nccl_device.dir/gensrc/reduce_minmax_f8e5m2.cu.o cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src/device && /usr/local/cuda/bin/nvcc -forward-unknown-to-host-compiler -DCUDA_MAJOR=12 -DCUDA_MINOR=0 -DDOCA_VERBS_USE_CUDA_WRAPPER -DDOCA_VERBS_USE_NET_WRAPPER -DNCCL_GIN_PROXY_ENABLE=1 -DNCCL_MAJOR=2 -DNCCL_MINOR=28 -DNCCL_PATCH=9 -DNCCL_USE_CMAKE -DNCCL_VERSION_CODE=22809 -DPROFAPI --options-file CMakeFiles/nccl_device.dir/includes_CUDA.rsp --expt-extended-lambda -Xptxas -maxrregcount=96 -Xfatbin -compress-all -fPIC --expt-extended-lambda -Xptxas -maxrregcount=96 -Xfatbin -compress-all -fPIC -O3 --generate-code=arch=compute_70,code=[compute_70,sm_70] --generate-code=arch=compute_75,code=[compute_75,sm_75] --generate-code=arch=compute_80,code=[compute_80,sm_80] --generate-code=arch=compute_86,code=[compute_86,sm_86] --generate-code=arch=compute_87,code=[compute_87,sm_87] --generate-code=arch=compute_89,code=[compute_89,sm_89] --generate-code=arch=compute_90,code=[compute_90,sm_90] -MD -MT src/device/CMakeFiles/nccl_device.dir/gensrc/reduce_minmax_f8e5m2.cu.o -MF CMakeFiles/nccl_device.dir/gensrc/reduce_minmax_f8e5m2.cu.o.d -x cu -rdc=true -c /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src/device/gensrc/reduce_minmax_f8e5m2.cu -o CMakeFiles/nccl_device.dir/gensrc/reduce_minmax_f8e5m2.cu.o [ 17%] Building CUDA object src/device/CMakeFiles/nccl_device.dir/gensrc/reduce_minmax_u32.cu.o cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src/device && /usr/local/cuda/bin/nvcc -forward-unknown-to-host-compiler -DCUDA_MAJOR=12 -DCUDA_MINOR=0 -DDOCA_VERBS_USE_CUDA_WRAPPER -DDOCA_VERBS_USE_NET_WRAPPER -DNCCL_GIN_PROXY_ENABLE=1 -DNCCL_MAJOR=2 -DNCCL_MINOR=28 -DNCCL_PATCH=9 -DNCCL_USE_CMAKE -DNCCL_VERSION_CODE=22809 -DPROFAPI --options-file CMakeFiles/nccl_device.dir/includes_CUDA.rsp --expt-extended-lambda -Xptxas -maxrregcount=96 -Xfatbin -compress-all -fPIC --expt-extended-lambda -Xptxas -maxrregcount=96 -Xfatbin -compress-all -fPIC -O3 --generate-code=arch=compute_70,code=[compute_70,sm_70] --generate-code=arch=compute_75,code=[compute_75,sm_75] --generate-code=arch=compute_80,code=[compute_80,sm_80] --generate-code=arch=compute_86,code=[compute_86,sm_86] --generate-code=arch=compute_87,code=[compute_87,sm_87] --generate-code=arch=compute_89,code=[compute_89,sm_89] --generate-code=arch=compute_90,code=[compute_90,sm_90] -MD -MT src/device/CMakeFiles/nccl_device.dir/gensrc/reduce_minmax_u32.cu.o -MF CMakeFiles/nccl_device.dir/gensrc/reduce_minmax_u32.cu.o.d -x cu -rdc=true -c /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src/device/gensrc/reduce_minmax_u32.cu -o CMakeFiles/nccl_device.dir/gensrc/reduce_minmax_u32.cu.o [ 17%] Building CUDA object src/device/CMakeFiles/nccl_device.dir/gensrc/reduce_minmax_u64.cu.o cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src/device && /usr/local/cuda/bin/nvcc -forward-unknown-to-host-compiler -DCUDA_MAJOR=12 -DCUDA_MINOR=0 -DDOCA_VERBS_USE_CUDA_WRAPPER -DDOCA_VERBS_USE_NET_WRAPPER -DNCCL_GIN_PROXY_ENABLE=1 -DNCCL_MAJOR=2 -DNCCL_MINOR=28 -DNCCL_PATCH=9 -DNCCL_USE_CMAKE -DNCCL_VERSION_CODE=22809 -DPROFAPI --options-file CMakeFiles/nccl_device.dir/includes_CUDA.rsp --expt-extended-lambda -Xptxas -maxrregcount=96 -Xfatbin -compress-all -fPIC --expt-extended-lambda -Xptxas -maxrregcount=96 -Xfatbin -compress-all -fPIC -O3 --generate-code=arch=compute_70,code=[compute_70,sm_70] --generate-code=arch=compute_75,code=[compute_75,sm_75] --generate-code=arch=compute_80,code=[compute_80,sm_80] --generate-code=arch=compute_86,code=[compute_86,sm_86] --generate-code=arch=compute_87,code=[compute_87,sm_87] --generate-code=arch=compute_89,code=[compute_89,sm_89] --generate-code=arch=compute_90,code=[compute_90,sm_90] -MD -MT src/device/CMakeFiles/nccl_device.dir/gensrc/reduce_minmax_u64.cu.o -MF CMakeFiles/nccl_device.dir/gensrc/reduce_minmax_u64.cu.o.d -x cu -rdc=true -c /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src/device/gensrc/reduce_minmax_u64.cu -o CMakeFiles/nccl_device.dir/gensrc/reduce_minmax_u64.cu.o [ 18%] Building CUDA object src/device/CMakeFiles/nccl_device.dir/gensrc/reduce_minmax_u8.cu.o cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src/device && /usr/local/cuda/bin/nvcc -forward-unknown-to-host-compiler -DCUDA_MAJOR=12 -DCUDA_MINOR=0 -DDOCA_VERBS_USE_CUDA_WRAPPER -DDOCA_VERBS_USE_NET_WRAPPER -DNCCL_GIN_PROXY_ENABLE=1 -DNCCL_MAJOR=2 -DNCCL_MINOR=28 -DNCCL_PATCH=9 -DNCCL_USE_CMAKE -DNCCL_VERSION_CODE=22809 -DPROFAPI --options-file CMakeFiles/nccl_device.dir/includes_CUDA.rsp --expt-extended-lambda -Xptxas -maxrregcount=96 -Xfatbin -compress-all -fPIC --expt-extended-lambda -Xptxas -maxrregcount=96 -Xfatbin -compress-all -fPIC -O3 --generate-code=arch=compute_70,code=[compute_70,sm_70] --generate-code=arch=compute_75,code=[compute_75,sm_75] --generate-code=arch=compute_80,code=[compute_80,sm_80] --generate-code=arch=compute_86,code=[compute_86,sm_86] --generate-code=arch=compute_87,code=[compute_87,sm_87] --generate-code=arch=compute_89,code=[compute_89,sm_89] --generate-code=arch=compute_90,code=[compute_90,sm_90] -MD -MT src/device/CMakeFiles/nccl_device.dir/gensrc/reduce_minmax_u8.cu.o -MF CMakeFiles/nccl_device.dir/gensrc/reduce_minmax_u8.cu.o.d -x cu -rdc=true -c /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src/device/gensrc/reduce_minmax_u8.cu -o CMakeFiles/nccl_device.dir/gensrc/reduce_minmax_u8.cu.o [ 18%] Building CUDA object src/device/CMakeFiles/nccl_device.dir/gensrc/reduce_premulsum_bf16.cu.o cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src/device && /usr/local/cuda/bin/nvcc -forward-unknown-to-host-compiler -DCUDA_MAJOR=12 -DCUDA_MINOR=0 -DDOCA_VERBS_USE_CUDA_WRAPPER -DDOCA_VERBS_USE_NET_WRAPPER -DNCCL_GIN_PROXY_ENABLE=1 -DNCCL_MAJOR=2 -DNCCL_MINOR=28 -DNCCL_PATCH=9 -DNCCL_USE_CMAKE -DNCCL_VERSION_CODE=22809 -DPROFAPI --options-file CMakeFiles/nccl_device.dir/includes_CUDA.rsp --expt-extended-lambda -Xptxas -maxrregcount=96 -Xfatbin -compress-all -fPIC --expt-extended-lambda -Xptxas -maxrregcount=96 -Xfatbin -compress-all -fPIC -O3 --generate-code=arch=compute_70,code=[compute_70,sm_70] --generate-code=arch=compute_75,code=[compute_75,sm_75] --generate-code=arch=compute_80,code=[compute_80,sm_80] --generate-code=arch=compute_86,code=[compute_86,sm_86] --generate-code=arch=compute_87,code=[compute_87,sm_87] --generate-code=arch=compute_89,code=[compute_89,sm_89] --generate-code=arch=compute_90,code=[compute_90,sm_90] -MD -MT src/device/CMakeFiles/nccl_device.dir/gensrc/reduce_premulsum_bf16.cu.o -MF CMakeFiles/nccl_device.dir/gensrc/reduce_premulsum_bf16.cu.o.d -x cu -rdc=true -c /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src/device/gensrc/reduce_premulsum_bf16.cu -o CMakeFiles/nccl_device.dir/gensrc/reduce_premulsum_bf16.cu.o [ 18%] Building CUDA object src/device/CMakeFiles/nccl_device.dir/gensrc/reduce_premulsum_f16.cu.o cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src/device && /usr/local/cuda/bin/nvcc -forward-unknown-to-host-compiler -DCUDA_MAJOR=12 -DCUDA_MINOR=0 -DDOCA_VERBS_USE_CUDA_WRAPPER -DDOCA_VERBS_USE_NET_WRAPPER -DNCCL_GIN_PROXY_ENABLE=1 -DNCCL_MAJOR=2 -DNCCL_MINOR=28 -DNCCL_PATCH=9 -DNCCL_USE_CMAKE -DNCCL_VERSION_CODE=22809 -DPROFAPI --options-file CMakeFiles/nccl_device.dir/includes_CUDA.rsp --expt-extended-lambda -Xptxas -maxrregcount=96 -Xfatbin -compress-all -fPIC --expt-extended-lambda -Xptxas -maxrregcount=96 -Xfatbin -compress-all -fPIC -O3 --generate-code=arch=compute_70,code=[compute_70,sm_70] --generate-code=arch=compute_75,code=[compute_75,sm_75] --generate-code=arch=compute_80,code=[compute_80,sm_80] --generate-code=arch=compute_86,code=[compute_86,sm_86] --generate-code=arch=compute_87,code=[compute_87,sm_87] --generate-code=arch=compute_89,code=[compute_89,sm_89] --generate-code=arch=compute_90,code=[compute_90,sm_90] -MD -MT src/device/CMakeFiles/nccl_device.dir/gensrc/reduce_premulsum_f16.cu.o -MF CMakeFiles/nccl_device.dir/gensrc/reduce_premulsum_f16.cu.o.d -x cu -rdc=true -c /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src/device/gensrc/reduce_premulsum_f16.cu -o CMakeFiles/nccl_device.dir/gensrc/reduce_premulsum_f16.cu.o [ 19%] Building CUDA object src/device/CMakeFiles/nccl_device.dir/gensrc/reduce_premulsum_f32.cu.o cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src/device && /usr/local/cuda/bin/nvcc -forward-unknown-to-host-compiler -DCUDA_MAJOR=12 -DCUDA_MINOR=0 -DDOCA_VERBS_USE_CUDA_WRAPPER -DDOCA_VERBS_USE_NET_WRAPPER -DNCCL_GIN_PROXY_ENABLE=1 -DNCCL_MAJOR=2 -DNCCL_MINOR=28 -DNCCL_PATCH=9 -DNCCL_USE_CMAKE -DNCCL_VERSION_CODE=22809 -DPROFAPI --options-file CMakeFiles/nccl_device.dir/includes_CUDA.rsp --expt-extended-lambda -Xptxas -maxrregcount=96 -Xfatbin -compress-all -fPIC --expt-extended-lambda -Xptxas -maxrregcount=96 -Xfatbin -compress-all -fPIC -O3 --generate-code=arch=compute_70,code=[compute_70,sm_70] --generate-code=arch=compute_75,code=[compute_75,sm_75] --generate-code=arch=compute_80,code=[compute_80,sm_80] --generate-code=arch=compute_86,code=[compute_86,sm_86] --generate-code=arch=compute_87,code=[compute_87,sm_87] --generate-code=arch=compute_89,code=[compute_89,sm_89] --generate-code=arch=compute_90,code=[compute_90,sm_90] -MD -MT src/device/CMakeFiles/nccl_device.dir/gensrc/reduce_premulsum_f32.cu.o -MF CMakeFiles/nccl_device.dir/gensrc/reduce_premulsum_f32.cu.o.d -x cu -rdc=true -c /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src/device/gensrc/reduce_premulsum_f32.cu -o CMakeFiles/nccl_device.dir/gensrc/reduce_premulsum_f32.cu.o [ 19%] Building CUDA object src/device/CMakeFiles/nccl_device.dir/gensrc/reduce_premulsum_f64.cu.o cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src/device && /usr/local/cuda/bin/nvcc -forward-unknown-to-host-compiler -DCUDA_MAJOR=12 -DCUDA_MINOR=0 -DDOCA_VERBS_USE_CUDA_WRAPPER -DDOCA_VERBS_USE_NET_WRAPPER -DNCCL_GIN_PROXY_ENABLE=1 -DNCCL_MAJOR=2 -DNCCL_MINOR=28 -DNCCL_PATCH=9 -DNCCL_USE_CMAKE -DNCCL_VERSION_CODE=22809 -DPROFAPI --options-file CMakeFiles/nccl_device.dir/includes_CUDA.rsp --expt-extended-lambda -Xptxas -maxrregcount=96 -Xfatbin -compress-all -fPIC --expt-extended-lambda -Xptxas -maxrregcount=96 -Xfatbin -compress-all -fPIC -O3 --generate-code=arch=compute_70,code=[compute_70,sm_70] --generate-code=arch=compute_75,code=[compute_75,sm_75] --generate-code=arch=compute_80,code=[compute_80,sm_80] --generate-code=arch=compute_86,code=[compute_86,sm_86] --generate-code=arch=compute_87,code=[compute_87,sm_87] --generate-code=arch=compute_89,code=[compute_89,sm_89] --generate-code=arch=compute_90,code=[compute_90,sm_90] -MD -MT src/device/CMakeFiles/nccl_device.dir/gensrc/reduce_premulsum_f64.cu.o -MF CMakeFiles/nccl_device.dir/gensrc/reduce_premulsum_f64.cu.o.d -x cu -rdc=true -c /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src/device/gensrc/reduce_premulsum_f64.cu -o CMakeFiles/nccl_device.dir/gensrc/reduce_premulsum_f64.cu.o [ 19%] Building CUDA object src/device/CMakeFiles/nccl_device.dir/gensrc/reduce_premulsum_f8e4m3.cu.o cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src/device && /usr/local/cuda/bin/nvcc -forward-unknown-to-host-compiler -DCUDA_MAJOR=12 -DCUDA_MINOR=0 -DDOCA_VERBS_USE_CUDA_WRAPPER -DDOCA_VERBS_USE_NET_WRAPPER -DNCCL_GIN_PROXY_ENABLE=1 -DNCCL_MAJOR=2 -DNCCL_MINOR=28 -DNCCL_PATCH=9 -DNCCL_USE_CMAKE -DNCCL_VERSION_CODE=22809 -DPROFAPI --options-file CMakeFiles/nccl_device.dir/includes_CUDA.rsp --expt-extended-lambda -Xptxas -maxrregcount=96 -Xfatbin -compress-all -fPIC --expt-extended-lambda -Xptxas -maxrregcount=96 -Xfatbin -compress-all -fPIC -O3 --generate-code=arch=compute_70,code=[compute_70,sm_70] --generate-code=arch=compute_75,code=[compute_75,sm_75] --generate-code=arch=compute_80,code=[compute_80,sm_80] --generate-code=arch=compute_86,code=[compute_86,sm_86] --generate-code=arch=compute_87,code=[compute_87,sm_87] --generate-code=arch=compute_89,code=[compute_89,sm_89] --generate-code=arch=compute_90,code=[compute_90,sm_90] -MD -MT src/device/CMakeFiles/nccl_device.dir/gensrc/reduce_premulsum_f8e4m3.cu.o -MF CMakeFiles/nccl_device.dir/gensrc/reduce_premulsum_f8e4m3.cu.o.d -x cu -rdc=true -c /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src/device/gensrc/reduce_premulsum_f8e4m3.cu -o CMakeFiles/nccl_device.dir/gensrc/reduce_premulsum_f8e4m3.cu.o [ 19%] Building CUDA object src/device/CMakeFiles/nccl_device.dir/gensrc/reduce_premulsum_f8e5m2.cu.o cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src/device && /usr/local/cuda/bin/nvcc -forward-unknown-to-host-compiler -DCUDA_MAJOR=12 -DCUDA_MINOR=0 -DDOCA_VERBS_USE_CUDA_WRAPPER -DDOCA_VERBS_USE_NET_WRAPPER -DNCCL_GIN_PROXY_ENABLE=1 -DNCCL_MAJOR=2 -DNCCL_MINOR=28 -DNCCL_PATCH=9 -DNCCL_USE_CMAKE -DNCCL_VERSION_CODE=22809 -DPROFAPI --options-file CMakeFiles/nccl_device.dir/includes_CUDA.rsp --expt-extended-lambda -Xptxas -maxrregcount=96 -Xfatbin -compress-all -fPIC --expt-extended-lambda -Xptxas -maxrregcount=96 -Xfatbin -compress-all -fPIC -O3 --generate-code=arch=compute_70,code=[compute_70,sm_70] --generate-code=arch=compute_75,code=[compute_75,sm_75] --generate-code=arch=compute_80,code=[compute_80,sm_80] --generate-code=arch=compute_86,code=[compute_86,sm_86] --generate-code=arch=compute_87,code=[compute_87,sm_87] --generate-code=arch=compute_89,code=[compute_89,sm_89] --generate-code=arch=compute_90,code=[compute_90,sm_90] -MD -MT src/device/CMakeFiles/nccl_device.dir/gensrc/reduce_premulsum_f8e5m2.cu.o -MF CMakeFiles/nccl_device.dir/gensrc/reduce_premulsum_f8e5m2.cu.o.d -x cu -rdc=true -c /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src/device/gensrc/reduce_premulsum_f8e5m2.cu -o CMakeFiles/nccl_device.dir/gensrc/reduce_premulsum_f8e5m2.cu.o [ 20%] Building CUDA object src/device/CMakeFiles/nccl_device.dir/gensrc/reduce_premulsum_u32.cu.o cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src/device && /usr/local/cuda/bin/nvcc -forward-unknown-to-host-compiler -DCUDA_MAJOR=12 -DCUDA_MINOR=0 -DDOCA_VERBS_USE_CUDA_WRAPPER -DDOCA_VERBS_USE_NET_WRAPPER -DNCCL_GIN_PROXY_ENABLE=1 -DNCCL_MAJOR=2 -DNCCL_MINOR=28 -DNCCL_PATCH=9 -DNCCL_USE_CMAKE -DNCCL_VERSION_CODE=22809 -DPROFAPI --options-file CMakeFiles/nccl_device.dir/includes_CUDA.rsp --expt-extended-lambda -Xptxas -maxrregcount=96 -Xfatbin -compress-all -fPIC --expt-extended-lambda -Xptxas -maxrregcount=96 -Xfatbin -compress-all -fPIC -O3 --generate-code=arch=compute_70,code=[compute_70,sm_70] --generate-code=arch=compute_75,code=[compute_75,sm_75] --generate-code=arch=compute_80,code=[compute_80,sm_80] --generate-code=arch=compute_86,code=[compute_86,sm_86] --generate-code=arch=compute_87,code=[compute_87,sm_87] --generate-code=arch=compute_89,code=[compute_89,sm_89] --generate-code=arch=compute_90,code=[compute_90,sm_90] -MD -MT src/device/CMakeFiles/nccl_device.dir/gensrc/reduce_premulsum_u32.cu.o -MF CMakeFiles/nccl_device.dir/gensrc/reduce_premulsum_u32.cu.o.d -x cu -rdc=true -c /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src/device/gensrc/reduce_premulsum_u32.cu -o CMakeFiles/nccl_device.dir/gensrc/reduce_premulsum_u32.cu.o [ 20%] Building CUDA object src/device/CMakeFiles/nccl_device.dir/gensrc/reduce_premulsum_u64.cu.o cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src/device && /usr/local/cuda/bin/nvcc -forward-unknown-to-host-compiler -DCUDA_MAJOR=12 -DCUDA_MINOR=0 -DDOCA_VERBS_USE_CUDA_WRAPPER -DDOCA_VERBS_USE_NET_WRAPPER -DNCCL_GIN_PROXY_ENABLE=1 -DNCCL_MAJOR=2 -DNCCL_MINOR=28 -DNCCL_PATCH=9 -DNCCL_USE_CMAKE -DNCCL_VERSION_CODE=22809 -DPROFAPI --options-file CMakeFiles/nccl_device.dir/includes_CUDA.rsp --expt-extended-lambda -Xptxas -maxrregcount=96 -Xfatbin -compress-all -fPIC --expt-extended-lambda -Xptxas -maxrregcount=96 -Xfatbin -compress-all -fPIC -O3 --generate-code=arch=compute_70,code=[compute_70,sm_70] --generate-code=arch=compute_75,code=[compute_75,sm_75] --generate-code=arch=compute_80,code=[compute_80,sm_80] --generate-code=arch=compute_86,code=[compute_86,sm_86] --generate-code=arch=compute_87,code=[compute_87,sm_87] --generate-code=arch=compute_89,code=[compute_89,sm_89] --generate-code=arch=compute_90,code=[compute_90,sm_90] -MD -MT src/device/CMakeFiles/nccl_device.dir/gensrc/reduce_premulsum_u64.cu.o -MF CMakeFiles/nccl_device.dir/gensrc/reduce_premulsum_u64.cu.o.d -x cu -rdc=true -c /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src/device/gensrc/reduce_premulsum_u64.cu -o CMakeFiles/nccl_device.dir/gensrc/reduce_premulsum_u64.cu.o [ 20%] Building CUDA object src/device/CMakeFiles/nccl_device.dir/gensrc/reduce_premulsum_u8.cu.o cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src/device && /usr/local/cuda/bin/nvcc -forward-unknown-to-host-compiler -DCUDA_MAJOR=12 -DCUDA_MINOR=0 -DDOCA_VERBS_USE_CUDA_WRAPPER -DDOCA_VERBS_USE_NET_WRAPPER -DNCCL_GIN_PROXY_ENABLE=1 -DNCCL_MAJOR=2 -DNCCL_MINOR=28 -DNCCL_PATCH=9 -DNCCL_USE_CMAKE -DNCCL_VERSION_CODE=22809 -DPROFAPI --options-file CMakeFiles/nccl_device.dir/includes_CUDA.rsp --expt-extended-lambda -Xptxas -maxrregcount=96 -Xfatbin -compress-all -fPIC --expt-extended-lambda -Xptxas -maxrregcount=96 -Xfatbin -compress-all -fPIC -O3 --generate-code=arch=compute_70,code=[compute_70,sm_70] --generate-code=arch=compute_75,code=[compute_75,sm_75] --generate-code=arch=compute_80,code=[compute_80,sm_80] --generate-code=arch=compute_86,code=[compute_86,sm_86] --generate-code=arch=compute_87,code=[compute_87,sm_87] --generate-code=arch=compute_89,code=[compute_89,sm_89] --generate-code=arch=compute_90,code=[compute_90,sm_90] -MD -MT src/device/CMakeFiles/nccl_device.dir/gensrc/reduce_premulsum_u8.cu.o -MF CMakeFiles/nccl_device.dir/gensrc/reduce_premulsum_u8.cu.o.d -x cu -rdc=true -c /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src/device/gensrc/reduce_premulsum_u8.cu -o CMakeFiles/nccl_device.dir/gensrc/reduce_premulsum_u8.cu.o [ 21%] Building CUDA object src/device/CMakeFiles/nccl_device.dir/gensrc/reduce_prod_bf16.cu.o cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src/device && /usr/local/cuda/bin/nvcc -forward-unknown-to-host-compiler -DCUDA_MAJOR=12 -DCUDA_MINOR=0 -DDOCA_VERBS_USE_CUDA_WRAPPER -DDOCA_VERBS_USE_NET_WRAPPER -DNCCL_GIN_PROXY_ENABLE=1 -DNCCL_MAJOR=2 -DNCCL_MINOR=28 -DNCCL_PATCH=9 -DNCCL_USE_CMAKE -DNCCL_VERSION_CODE=22809 -DPROFAPI --options-file CMakeFiles/nccl_device.dir/includes_CUDA.rsp --expt-extended-lambda -Xptxas -maxrregcount=96 -Xfatbin -compress-all -fPIC --expt-extended-lambda -Xptxas -maxrregcount=96 -Xfatbin -compress-all -fPIC -O3 --generate-code=arch=compute_70,code=[compute_70,sm_70] --generate-code=arch=compute_75,code=[compute_75,sm_75] --generate-code=arch=compute_80,code=[compute_80,sm_80] --generate-code=arch=compute_86,code=[compute_86,sm_86] --generate-code=arch=compute_87,code=[compute_87,sm_87] --generate-code=arch=compute_89,code=[compute_89,sm_89] --generate-code=arch=compute_90,code=[compute_90,sm_90] -MD -MT src/device/CMakeFiles/nccl_device.dir/gensrc/reduce_prod_bf16.cu.o -MF CMakeFiles/nccl_device.dir/gensrc/reduce_prod_bf16.cu.o.d -x cu -rdc=true -c /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src/device/gensrc/reduce_prod_bf16.cu -o CMakeFiles/nccl_device.dir/gensrc/reduce_prod_bf16.cu.o [ 21%] Building CUDA object src/device/CMakeFiles/nccl_device.dir/gensrc/reduce_prod_f16.cu.o cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src/device && /usr/local/cuda/bin/nvcc -forward-unknown-to-host-compiler -DCUDA_MAJOR=12 -DCUDA_MINOR=0 -DDOCA_VERBS_USE_CUDA_WRAPPER -DDOCA_VERBS_USE_NET_WRAPPER -DNCCL_GIN_PROXY_ENABLE=1 -DNCCL_MAJOR=2 -DNCCL_MINOR=28 -DNCCL_PATCH=9 -DNCCL_USE_CMAKE -DNCCL_VERSION_CODE=22809 -DPROFAPI --options-file CMakeFiles/nccl_device.dir/includes_CUDA.rsp --expt-extended-lambda -Xptxas -maxrregcount=96 -Xfatbin -compress-all -fPIC --expt-extended-lambda -Xptxas -maxrregcount=96 -Xfatbin -compress-all -fPIC -O3 --generate-code=arch=compute_70,code=[compute_70,sm_70] --generate-code=arch=compute_75,code=[compute_75,sm_75] --generate-code=arch=compute_80,code=[compute_80,sm_80] --generate-code=arch=compute_86,code=[compute_86,sm_86] --generate-code=arch=compute_87,code=[compute_87,sm_87] --generate-code=arch=compute_89,code=[compute_89,sm_89] --generate-code=arch=compute_90,code=[compute_90,sm_90] -MD -MT src/device/CMakeFiles/nccl_device.dir/gensrc/reduce_prod_f16.cu.o -MF CMakeFiles/nccl_device.dir/gensrc/reduce_prod_f16.cu.o.d -x cu -rdc=true -c /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src/device/gensrc/reduce_prod_f16.cu -o CMakeFiles/nccl_device.dir/gensrc/reduce_prod_f16.cu.o [ 21%] Building CUDA object src/device/CMakeFiles/nccl_device.dir/gensrc/reduce_prod_f32.cu.o cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src/device && /usr/local/cuda/bin/nvcc -forward-unknown-to-host-compiler -DCUDA_MAJOR=12 -DCUDA_MINOR=0 -DDOCA_VERBS_USE_CUDA_WRAPPER -DDOCA_VERBS_USE_NET_WRAPPER -DNCCL_GIN_PROXY_ENABLE=1 -DNCCL_MAJOR=2 -DNCCL_MINOR=28 -DNCCL_PATCH=9 -DNCCL_USE_CMAKE -DNCCL_VERSION_CODE=22809 -DPROFAPI --options-file CMakeFiles/nccl_device.dir/includes_CUDA.rsp --expt-extended-lambda -Xptxas -maxrregcount=96 -Xfatbin -compress-all -fPIC --expt-extended-lambda -Xptxas -maxrregcount=96 -Xfatbin -compress-all -fPIC -O3 --generate-code=arch=compute_70,code=[compute_70,sm_70] --generate-code=arch=compute_75,code=[compute_75,sm_75] --generate-code=arch=compute_80,code=[compute_80,sm_80] --generate-code=arch=compute_86,code=[compute_86,sm_86] --generate-code=arch=compute_87,code=[compute_87,sm_87] --generate-code=arch=compute_89,code=[compute_89,sm_89] --generate-code=arch=compute_90,code=[compute_90,sm_90] -MD -MT src/device/CMakeFiles/nccl_device.dir/gensrc/reduce_prod_f32.cu.o -MF CMakeFiles/nccl_device.dir/gensrc/reduce_prod_f32.cu.o.d -x cu -rdc=true -c /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src/device/gensrc/reduce_prod_f32.cu -o CMakeFiles/nccl_device.dir/gensrc/reduce_prod_f32.cu.o [ 21%] Building CUDA object src/device/CMakeFiles/nccl_device.dir/gensrc/reduce_prod_f64.cu.o cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src/device && /usr/local/cuda/bin/nvcc -forward-unknown-to-host-compiler -DCUDA_MAJOR=12 -DCUDA_MINOR=0 -DDOCA_VERBS_USE_CUDA_WRAPPER -DDOCA_VERBS_USE_NET_WRAPPER -DNCCL_GIN_PROXY_ENABLE=1 -DNCCL_MAJOR=2 -DNCCL_MINOR=28 -DNCCL_PATCH=9 -DNCCL_USE_CMAKE -DNCCL_VERSION_CODE=22809 -DPROFAPI --options-file CMakeFiles/nccl_device.dir/includes_CUDA.rsp --expt-extended-lambda -Xptxas -maxrregcount=96 -Xfatbin -compress-all -fPIC --expt-extended-lambda -Xptxas -maxrregcount=96 -Xfatbin -compress-all -fPIC -O3 --generate-code=arch=compute_70,code=[compute_70,sm_70] --generate-code=arch=compute_75,code=[compute_75,sm_75] --generate-code=arch=compute_80,code=[compute_80,sm_80] --generate-code=arch=compute_86,code=[compute_86,sm_86] --generate-code=arch=compute_87,code=[compute_87,sm_87] --generate-code=arch=compute_89,code=[compute_89,sm_89] --generate-code=arch=compute_90,code=[compute_90,sm_90] -MD -MT src/device/CMakeFiles/nccl_device.dir/gensrc/reduce_prod_f64.cu.o -MF CMakeFiles/nccl_device.dir/gensrc/reduce_prod_f64.cu.o.d -x cu -rdc=true -c /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src/device/gensrc/reduce_prod_f64.cu -o CMakeFiles/nccl_device.dir/gensrc/reduce_prod_f64.cu.o [ 22%] Building CUDA object src/device/CMakeFiles/nccl_device.dir/gensrc/reduce_prod_f8e4m3.cu.o cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src/device && /usr/local/cuda/bin/nvcc -forward-unknown-to-host-compiler -DCUDA_MAJOR=12 -DCUDA_MINOR=0 -DDOCA_VERBS_USE_CUDA_WRAPPER -DDOCA_VERBS_USE_NET_WRAPPER -DNCCL_GIN_PROXY_ENABLE=1 -DNCCL_MAJOR=2 -DNCCL_MINOR=28 -DNCCL_PATCH=9 -DNCCL_USE_CMAKE -DNCCL_VERSION_CODE=22809 -DPROFAPI --options-file CMakeFiles/nccl_device.dir/includes_CUDA.rsp --expt-extended-lambda -Xptxas -maxrregcount=96 -Xfatbin -compress-all -fPIC --expt-extended-lambda -Xptxas -maxrregcount=96 -Xfatbin -compress-all -fPIC -O3 --generate-code=arch=compute_70,code=[compute_70,sm_70] --generate-code=arch=compute_75,code=[compute_75,sm_75] --generate-code=arch=compute_80,code=[compute_80,sm_80] --generate-code=arch=compute_86,code=[compute_86,sm_86] --generate-code=arch=compute_87,code=[compute_87,sm_87] --generate-code=arch=compute_89,code=[compute_89,sm_89] --generate-code=arch=compute_90,code=[compute_90,sm_90] -MD -MT src/device/CMakeFiles/nccl_device.dir/gensrc/reduce_prod_f8e4m3.cu.o -MF CMakeFiles/nccl_device.dir/gensrc/reduce_prod_f8e4m3.cu.o.d -x cu -rdc=true -c /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src/device/gensrc/reduce_prod_f8e4m3.cu -o CMakeFiles/nccl_device.dir/gensrc/reduce_prod_f8e4m3.cu.o [ 22%] Building CUDA object src/device/CMakeFiles/nccl_device.dir/gensrc/reduce_prod_f8e5m2.cu.o cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src/device && /usr/local/cuda/bin/nvcc -forward-unknown-to-host-compiler -DCUDA_MAJOR=12 -DCUDA_MINOR=0 -DDOCA_VERBS_USE_CUDA_WRAPPER -DDOCA_VERBS_USE_NET_WRAPPER -DNCCL_GIN_PROXY_ENABLE=1 -DNCCL_MAJOR=2 -DNCCL_MINOR=28 -DNCCL_PATCH=9 -DNCCL_USE_CMAKE -DNCCL_VERSION_CODE=22809 -DPROFAPI --options-file CMakeFiles/nccl_device.dir/includes_CUDA.rsp --expt-extended-lambda -Xptxas -maxrregcount=96 -Xfatbin -compress-all -fPIC --expt-extended-lambda -Xptxas -maxrregcount=96 -Xfatbin -compress-all -fPIC -O3 --generate-code=arch=compute_70,code=[compute_70,sm_70] --generate-code=arch=compute_75,code=[compute_75,sm_75] --generate-code=arch=compute_80,code=[compute_80,sm_80] --generate-code=arch=compute_86,code=[compute_86,sm_86] --generate-code=arch=compute_87,code=[compute_87,sm_87] --generate-code=arch=compute_89,code=[compute_89,sm_89] --generate-code=arch=compute_90,code=[compute_90,sm_90] -MD -MT src/device/CMakeFiles/nccl_device.dir/gensrc/reduce_prod_f8e5m2.cu.o -MF CMakeFiles/nccl_device.dir/gensrc/reduce_prod_f8e5m2.cu.o.d -x cu -rdc=true -c /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src/device/gensrc/reduce_prod_f8e5m2.cu -o CMakeFiles/nccl_device.dir/gensrc/reduce_prod_f8e5m2.cu.o [ 22%] Building CUDA object src/device/CMakeFiles/nccl_device.dir/gensrc/reduce_prod_u32.cu.o cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src/device && /usr/local/cuda/bin/nvcc -forward-unknown-to-host-compiler -DCUDA_MAJOR=12 -DCUDA_MINOR=0 -DDOCA_VERBS_USE_CUDA_WRAPPER -DDOCA_VERBS_USE_NET_WRAPPER -DNCCL_GIN_PROXY_ENABLE=1 -DNCCL_MAJOR=2 -DNCCL_MINOR=28 -DNCCL_PATCH=9 -DNCCL_USE_CMAKE -DNCCL_VERSION_CODE=22809 -DPROFAPI --options-file CMakeFiles/nccl_device.dir/includes_CUDA.rsp --expt-extended-lambda -Xptxas -maxrregcount=96 -Xfatbin -compress-all -fPIC --expt-extended-lambda -Xptxas -maxrregcount=96 -Xfatbin -compress-all -fPIC -O3 --generate-code=arch=compute_70,code=[compute_70,sm_70] --generate-code=arch=compute_75,code=[compute_75,sm_75] --generate-code=arch=compute_80,code=[compute_80,sm_80] --generate-code=arch=compute_86,code=[compute_86,sm_86] --generate-code=arch=compute_87,code=[compute_87,sm_87] --generate-code=arch=compute_89,code=[compute_89,sm_89] --generate-code=arch=compute_90,code=[compute_90,sm_90] -MD -MT src/device/CMakeFiles/nccl_device.dir/gensrc/reduce_prod_u32.cu.o -MF CMakeFiles/nccl_device.dir/gensrc/reduce_prod_u32.cu.o.d -x cu -rdc=true -c /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src/device/gensrc/reduce_prod_u32.cu -o CMakeFiles/nccl_device.dir/gensrc/reduce_prod_u32.cu.o [ 23%] Building CUDA object src/device/CMakeFiles/nccl_device.dir/gensrc/reduce_prod_u64.cu.o cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src/device && /usr/local/cuda/bin/nvcc -forward-unknown-to-host-compiler -DCUDA_MAJOR=12 -DCUDA_MINOR=0 -DDOCA_VERBS_USE_CUDA_WRAPPER -DDOCA_VERBS_USE_NET_WRAPPER -DNCCL_GIN_PROXY_ENABLE=1 -DNCCL_MAJOR=2 -DNCCL_MINOR=28 -DNCCL_PATCH=9 -DNCCL_USE_CMAKE -DNCCL_VERSION_CODE=22809 -DPROFAPI --options-file CMakeFiles/nccl_device.dir/includes_CUDA.rsp --expt-extended-lambda -Xptxas -maxrregcount=96 -Xfatbin -compress-all -fPIC --expt-extended-lambda -Xptxas -maxrregcount=96 -Xfatbin -compress-all -fPIC -O3 --generate-code=arch=compute_70,code=[compute_70,sm_70] --generate-code=arch=compute_75,code=[compute_75,sm_75] --generate-code=arch=compute_80,code=[compute_80,sm_80] --generate-code=arch=compute_86,code=[compute_86,sm_86] --generate-code=arch=compute_87,code=[compute_87,sm_87] --generate-code=arch=compute_89,code=[compute_89,sm_89] --generate-code=arch=compute_90,code=[compute_90,sm_90] -MD -MT src/device/CMakeFiles/nccl_device.dir/gensrc/reduce_prod_u64.cu.o -MF CMakeFiles/nccl_device.dir/gensrc/reduce_prod_u64.cu.o.d -x cu -rdc=true -c /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src/device/gensrc/reduce_prod_u64.cu -o CMakeFiles/nccl_device.dir/gensrc/reduce_prod_u64.cu.o [ 23%] Building CUDA object src/device/CMakeFiles/nccl_device.dir/gensrc/reduce_prod_u8.cu.o cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src/device && /usr/local/cuda/bin/nvcc -forward-unknown-to-host-compiler -DCUDA_MAJOR=12 -DCUDA_MINOR=0 -DDOCA_VERBS_USE_CUDA_WRAPPER -DDOCA_VERBS_USE_NET_WRAPPER -DNCCL_GIN_PROXY_ENABLE=1 -DNCCL_MAJOR=2 -DNCCL_MINOR=28 -DNCCL_PATCH=9 -DNCCL_USE_CMAKE -DNCCL_VERSION_CODE=22809 -DPROFAPI --options-file CMakeFiles/nccl_device.dir/includes_CUDA.rsp --expt-extended-lambda -Xptxas -maxrregcount=96 -Xfatbin -compress-all -fPIC --expt-extended-lambda -Xptxas -maxrregcount=96 -Xfatbin -compress-all -fPIC -O3 --generate-code=arch=compute_70,code=[compute_70,sm_70] --generate-code=arch=compute_75,code=[compute_75,sm_75] --generate-code=arch=compute_80,code=[compute_80,sm_80] --generate-code=arch=compute_86,code=[compute_86,sm_86] --generate-code=arch=compute_87,code=[compute_87,sm_87] --generate-code=arch=compute_89,code=[compute_89,sm_89] --generate-code=arch=compute_90,code=[compute_90,sm_90] -MD -MT src/device/CMakeFiles/nccl_device.dir/gensrc/reduce_prod_u8.cu.o -MF CMakeFiles/nccl_device.dir/gensrc/reduce_prod_u8.cu.o.d -x cu -rdc=true -c /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src/device/gensrc/reduce_prod_u8.cu -o CMakeFiles/nccl_device.dir/gensrc/reduce_prod_u8.cu.o [ 23%] Building CUDA object src/device/CMakeFiles/nccl_device.dir/gensrc/reduce_scatter_minmax_bf16.cu.o cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src/device && /usr/local/cuda/bin/nvcc -forward-unknown-to-host-compiler -DCUDA_MAJOR=12 -DCUDA_MINOR=0 -DDOCA_VERBS_USE_CUDA_WRAPPER -DDOCA_VERBS_USE_NET_WRAPPER -DNCCL_GIN_PROXY_ENABLE=1 -DNCCL_MAJOR=2 -DNCCL_MINOR=28 -DNCCL_PATCH=9 -DNCCL_USE_CMAKE -DNCCL_VERSION_CODE=22809 -DPROFAPI --options-file CMakeFiles/nccl_device.dir/includes_CUDA.rsp --expt-extended-lambda -Xptxas -maxrregcount=96 -Xfatbin -compress-all -fPIC --expt-extended-lambda -Xptxas -maxrregcount=96 -Xfatbin -compress-all -fPIC -O3 --generate-code=arch=compute_70,code=[compute_70,sm_70] --generate-code=arch=compute_75,code=[compute_75,sm_75] --generate-code=arch=compute_80,code=[compute_80,sm_80] --generate-code=arch=compute_86,code=[compute_86,sm_86] --generate-code=arch=compute_87,code=[compute_87,sm_87] --generate-code=arch=compute_89,code=[compute_89,sm_89] --generate-code=arch=compute_90,code=[compute_90,sm_90] -MD -MT src/device/CMakeFiles/nccl_device.dir/gensrc/reduce_scatter_minmax_bf16.cu.o -MF CMakeFiles/nccl_device.dir/gensrc/reduce_scatter_minmax_bf16.cu.o.d -x cu -rdc=true -c /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src/device/gensrc/reduce_scatter_minmax_bf16.cu -o CMakeFiles/nccl_device.dir/gensrc/reduce_scatter_minmax_bf16.cu.o [ 23%] Building CUDA object src/device/CMakeFiles/nccl_device.dir/gensrc/reduce_scatter_minmax_f16.cu.o cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src/device && /usr/local/cuda/bin/nvcc -forward-unknown-to-host-compiler -DCUDA_MAJOR=12 -DCUDA_MINOR=0 -DDOCA_VERBS_USE_CUDA_WRAPPER -DDOCA_VERBS_USE_NET_WRAPPER -DNCCL_GIN_PROXY_ENABLE=1 -DNCCL_MAJOR=2 -DNCCL_MINOR=28 -DNCCL_PATCH=9 -DNCCL_USE_CMAKE -DNCCL_VERSION_CODE=22809 -DPROFAPI --options-file CMakeFiles/nccl_device.dir/includes_CUDA.rsp --expt-extended-lambda -Xptxas -maxrregcount=96 -Xfatbin -compress-all -fPIC --expt-extended-lambda -Xptxas -maxrregcount=96 -Xfatbin -compress-all -fPIC -O3 --generate-code=arch=compute_70,code=[compute_70,sm_70] --generate-code=arch=compute_75,code=[compute_75,sm_75] --generate-code=arch=compute_80,code=[compute_80,sm_80] --generate-code=arch=compute_86,code=[compute_86,sm_86] --generate-code=arch=compute_87,code=[compute_87,sm_87] --generate-code=arch=compute_89,code=[compute_89,sm_89] --generate-code=arch=compute_90,code=[compute_90,sm_90] -MD -MT src/device/CMakeFiles/nccl_device.dir/gensrc/reduce_scatter_minmax_f16.cu.o -MF CMakeFiles/nccl_device.dir/gensrc/reduce_scatter_minmax_f16.cu.o.d -x cu -rdc=true -c /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src/device/gensrc/reduce_scatter_minmax_f16.cu -o CMakeFiles/nccl_device.dir/gensrc/reduce_scatter_minmax_f16.cu.o [ 24%] Building CUDA object src/device/CMakeFiles/nccl_device.dir/gensrc/reduce_scatter_minmax_f32.cu.o cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src/device && /usr/local/cuda/bin/nvcc -forward-unknown-to-host-compiler -DCUDA_MAJOR=12 -DCUDA_MINOR=0 -DDOCA_VERBS_USE_CUDA_WRAPPER -DDOCA_VERBS_USE_NET_WRAPPER -DNCCL_GIN_PROXY_ENABLE=1 -DNCCL_MAJOR=2 -DNCCL_MINOR=28 -DNCCL_PATCH=9 -DNCCL_USE_CMAKE -DNCCL_VERSION_CODE=22809 -DPROFAPI --options-file CMakeFiles/nccl_device.dir/includes_CUDA.rsp --expt-extended-lambda -Xptxas -maxrregcount=96 -Xfatbin -compress-all -fPIC --expt-extended-lambda -Xptxas -maxrregcount=96 -Xfatbin -compress-all -fPIC -O3 --generate-code=arch=compute_70,code=[compute_70,sm_70] --generate-code=arch=compute_75,code=[compute_75,sm_75] --generate-code=arch=compute_80,code=[compute_80,sm_80] --generate-code=arch=compute_86,code=[compute_86,sm_86] --generate-code=arch=compute_87,code=[compute_87,sm_87] --generate-code=arch=compute_89,code=[compute_89,sm_89] --generate-code=arch=compute_90,code=[compute_90,sm_90] -MD -MT src/device/CMakeFiles/nccl_device.dir/gensrc/reduce_scatter_minmax_f32.cu.o -MF CMakeFiles/nccl_device.dir/gensrc/reduce_scatter_minmax_f32.cu.o.d -x cu -rdc=true -c /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src/device/gensrc/reduce_scatter_minmax_f32.cu -o CMakeFiles/nccl_device.dir/gensrc/reduce_scatter_minmax_f32.cu.o [ 24%] Building CUDA object src/device/CMakeFiles/nccl_device.dir/gensrc/reduce_scatter_minmax_f64.cu.o cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src/device && /usr/local/cuda/bin/nvcc -forward-unknown-to-host-compiler -DCUDA_MAJOR=12 -DCUDA_MINOR=0 -DDOCA_VERBS_USE_CUDA_WRAPPER -DDOCA_VERBS_USE_NET_WRAPPER -DNCCL_GIN_PROXY_ENABLE=1 -DNCCL_MAJOR=2 -DNCCL_MINOR=28 -DNCCL_PATCH=9 -DNCCL_USE_CMAKE -DNCCL_VERSION_CODE=22809 -DPROFAPI --options-file CMakeFiles/nccl_device.dir/includes_CUDA.rsp --expt-extended-lambda -Xptxas -maxrregcount=96 -Xfatbin -compress-all -fPIC --expt-extended-lambda -Xptxas -maxrregcount=96 -Xfatbin -compress-all -fPIC -O3 --generate-code=arch=compute_70,code=[compute_70,sm_70] --generate-code=arch=compute_75,code=[compute_75,sm_75] --generate-code=arch=compute_80,code=[compute_80,sm_80] --generate-code=arch=compute_86,code=[compute_86,sm_86] --generate-code=arch=compute_87,code=[compute_87,sm_87] --generate-code=arch=compute_89,code=[compute_89,sm_89] --generate-code=arch=compute_90,code=[compute_90,sm_90] -MD -MT src/device/CMakeFiles/nccl_device.dir/gensrc/reduce_scatter_minmax_f64.cu.o -MF CMakeFiles/nccl_device.dir/gensrc/reduce_scatter_minmax_f64.cu.o.d -x cu -rdc=true -c /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src/device/gensrc/reduce_scatter_minmax_f64.cu -o CMakeFiles/nccl_device.dir/gensrc/reduce_scatter_minmax_f64.cu.o [ 24%] Building CUDA object src/device/CMakeFiles/nccl_device.dir/gensrc/reduce_scatter_minmax_f8e4m3.cu.o cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src/device && /usr/local/cuda/bin/nvcc -forward-unknown-to-host-compiler -DCUDA_MAJOR=12 -DCUDA_MINOR=0 -DDOCA_VERBS_USE_CUDA_WRAPPER -DDOCA_VERBS_USE_NET_WRAPPER -DNCCL_GIN_PROXY_ENABLE=1 -DNCCL_MAJOR=2 -DNCCL_MINOR=28 -DNCCL_PATCH=9 -DNCCL_USE_CMAKE -DNCCL_VERSION_CODE=22809 -DPROFAPI --options-file CMakeFiles/nccl_device.dir/includes_CUDA.rsp --expt-extended-lambda -Xptxas -maxrregcount=96 -Xfatbin -compress-all -fPIC --expt-extended-lambda -Xptxas -maxrregcount=96 -Xfatbin -compress-all -fPIC -O3 --generate-code=arch=compute_70,code=[compute_70,sm_70] --generate-code=arch=compute_75,code=[compute_75,sm_75] --generate-code=arch=compute_80,code=[compute_80,sm_80] --generate-code=arch=compute_86,code=[compute_86,sm_86] --generate-code=arch=compute_87,code=[compute_87,sm_87] --generate-code=arch=compute_89,code=[compute_89,sm_89] --generate-code=arch=compute_90,code=[compute_90,sm_90] -MD -MT src/device/CMakeFiles/nccl_device.dir/gensrc/reduce_scatter_minmax_f8e4m3.cu.o -MF CMakeFiles/nccl_device.dir/gensrc/reduce_scatter_minmax_f8e4m3.cu.o.d -x cu -rdc=true -c /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src/device/gensrc/reduce_scatter_minmax_f8e4m3.cu -o CMakeFiles/nccl_device.dir/gensrc/reduce_scatter_minmax_f8e4m3.cu.o [ 24%] Building CUDA object src/device/CMakeFiles/nccl_device.dir/gensrc/reduce_scatter_minmax_f8e5m2.cu.o cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src/device && /usr/local/cuda/bin/nvcc -forward-unknown-to-host-compiler -DCUDA_MAJOR=12 -DCUDA_MINOR=0 -DDOCA_VERBS_USE_CUDA_WRAPPER -DDOCA_VERBS_USE_NET_WRAPPER -DNCCL_GIN_PROXY_ENABLE=1 -DNCCL_MAJOR=2 -DNCCL_MINOR=28 -DNCCL_PATCH=9 -DNCCL_USE_CMAKE -DNCCL_VERSION_CODE=22809 -DPROFAPI --options-file CMakeFiles/nccl_device.dir/includes_CUDA.rsp --expt-extended-lambda -Xptxas -maxrregcount=96 -Xfatbin -compress-all -fPIC --expt-extended-lambda -Xptxas -maxrregcount=96 -Xfatbin -compress-all -fPIC -O3 --generate-code=arch=compute_70,code=[compute_70,sm_70] --generate-code=arch=compute_75,code=[compute_75,sm_75] --generate-code=arch=compute_80,code=[compute_80,sm_80] --generate-code=arch=compute_86,code=[compute_86,sm_86] --generate-code=arch=compute_87,code=[compute_87,sm_87] --generate-code=arch=compute_89,code=[compute_89,sm_89] --generate-code=arch=compute_90,code=[compute_90,sm_90] -MD -MT src/device/CMakeFiles/nccl_device.dir/gensrc/reduce_scatter_minmax_f8e5m2.cu.o -MF CMakeFiles/nccl_device.dir/gensrc/reduce_scatter_minmax_f8e5m2.cu.o.d -x cu -rdc=true -c /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src/device/gensrc/reduce_scatter_minmax_f8e5m2.cu -o CMakeFiles/nccl_device.dir/gensrc/reduce_scatter_minmax_f8e5m2.cu.o [ 25%] Building CUDA object src/device/CMakeFiles/nccl_device.dir/gensrc/reduce_scatter_minmax_i32.cu.o cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src/device && /usr/local/cuda/bin/nvcc -forward-unknown-to-host-compiler -DCUDA_MAJOR=12 -DCUDA_MINOR=0 -DDOCA_VERBS_USE_CUDA_WRAPPER -DDOCA_VERBS_USE_NET_WRAPPER -DNCCL_GIN_PROXY_ENABLE=1 -DNCCL_MAJOR=2 -DNCCL_MINOR=28 -DNCCL_PATCH=9 -DNCCL_USE_CMAKE -DNCCL_VERSION_CODE=22809 -DPROFAPI --options-file CMakeFiles/nccl_device.dir/includes_CUDA.rsp --expt-extended-lambda -Xptxas -maxrregcount=96 -Xfatbin -compress-all -fPIC --expt-extended-lambda -Xptxas -maxrregcount=96 -Xfatbin -compress-all -fPIC -O3 --generate-code=arch=compute_70,code=[compute_70,sm_70] --generate-code=arch=compute_75,code=[compute_75,sm_75] --generate-code=arch=compute_80,code=[compute_80,sm_80] --generate-code=arch=compute_86,code=[compute_86,sm_86] --generate-code=arch=compute_87,code=[compute_87,sm_87] --generate-code=arch=compute_89,code=[compute_89,sm_89] --generate-code=arch=compute_90,code=[compute_90,sm_90] -MD -MT src/device/CMakeFiles/nccl_device.dir/gensrc/reduce_scatter_minmax_i32.cu.o -MF CMakeFiles/nccl_device.dir/gensrc/reduce_scatter_minmax_i32.cu.o.d -x cu -rdc=true -c /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src/device/gensrc/reduce_scatter_minmax_i32.cu -o CMakeFiles/nccl_device.dir/gensrc/reduce_scatter_minmax_i32.cu.o [ 25%] Building CUDA object src/device/CMakeFiles/nccl_device.dir/gensrc/reduce_scatter_minmax_i64.cu.o cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src/device && /usr/local/cuda/bin/nvcc -forward-unknown-to-host-compiler -DCUDA_MAJOR=12 -DCUDA_MINOR=0 -DDOCA_VERBS_USE_CUDA_WRAPPER -DDOCA_VERBS_USE_NET_WRAPPER -DNCCL_GIN_PROXY_ENABLE=1 -DNCCL_MAJOR=2 -DNCCL_MINOR=28 -DNCCL_PATCH=9 -DNCCL_USE_CMAKE -DNCCL_VERSION_CODE=22809 -DPROFAPI --options-file CMakeFiles/nccl_device.dir/includes_CUDA.rsp --expt-extended-lambda -Xptxas -maxrregcount=96 -Xfatbin -compress-all -fPIC --expt-extended-lambda -Xptxas -maxrregcount=96 -Xfatbin -compress-all -fPIC -O3 --generate-code=arch=compute_70,code=[compute_70,sm_70] --generate-code=arch=compute_75,code=[compute_75,sm_75] --generate-code=arch=compute_80,code=[compute_80,sm_80] --generate-code=arch=compute_86,code=[compute_86,sm_86] --generate-code=arch=compute_87,code=[compute_87,sm_87] --generate-code=arch=compute_89,code=[compute_89,sm_89] --generate-code=arch=compute_90,code=[compute_90,sm_90] -MD -MT src/device/CMakeFiles/nccl_device.dir/gensrc/reduce_scatter_minmax_i64.cu.o -MF CMakeFiles/nccl_device.dir/gensrc/reduce_scatter_minmax_i64.cu.o.d -x cu -rdc=true -c /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src/device/gensrc/reduce_scatter_minmax_i64.cu -o CMakeFiles/nccl_device.dir/gensrc/reduce_scatter_minmax_i64.cu.o [ 25%] Building CUDA object src/device/CMakeFiles/nccl_device.dir/gensrc/reduce_scatter_minmax_u32.cu.o cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src/device && /usr/local/cuda/bin/nvcc -forward-unknown-to-host-compiler -DCUDA_MAJOR=12 -DCUDA_MINOR=0 -DDOCA_VERBS_USE_CUDA_WRAPPER -DDOCA_VERBS_USE_NET_WRAPPER -DNCCL_GIN_PROXY_ENABLE=1 -DNCCL_MAJOR=2 -DNCCL_MINOR=28 -DNCCL_PATCH=9 -DNCCL_USE_CMAKE -DNCCL_VERSION_CODE=22809 -DPROFAPI --options-file CMakeFiles/nccl_device.dir/includes_CUDA.rsp --expt-extended-lambda -Xptxas -maxrregcount=96 -Xfatbin -compress-all -fPIC --expt-extended-lambda -Xptxas -maxrregcount=96 -Xfatbin -compress-all -fPIC -O3 --generate-code=arch=compute_70,code=[compute_70,sm_70] --generate-code=arch=compute_75,code=[compute_75,sm_75] --generate-code=arch=compute_80,code=[compute_80,sm_80] --generate-code=arch=compute_86,code=[compute_86,sm_86] --generate-code=arch=compute_87,code=[compute_87,sm_87] --generate-code=arch=compute_89,code=[compute_89,sm_89] --generate-code=arch=compute_90,code=[compute_90,sm_90] -MD -MT src/device/CMakeFiles/nccl_device.dir/gensrc/reduce_scatter_minmax_u32.cu.o -MF CMakeFiles/nccl_device.dir/gensrc/reduce_scatter_minmax_u32.cu.o.d -x cu -rdc=true -c /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src/device/gensrc/reduce_scatter_minmax_u32.cu -o CMakeFiles/nccl_device.dir/gensrc/reduce_scatter_minmax_u32.cu.o [ 26%] Building CUDA object src/device/CMakeFiles/nccl_device.dir/gensrc/reduce_scatter_minmax_u64.cu.o cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src/device && /usr/local/cuda/bin/nvcc -forward-unknown-to-host-compiler -DCUDA_MAJOR=12 -DCUDA_MINOR=0 -DDOCA_VERBS_USE_CUDA_WRAPPER -DDOCA_VERBS_USE_NET_WRAPPER -DNCCL_GIN_PROXY_ENABLE=1 -DNCCL_MAJOR=2 -DNCCL_MINOR=28 -DNCCL_PATCH=9 -DNCCL_USE_CMAKE -DNCCL_VERSION_CODE=22809 -DPROFAPI --options-file CMakeFiles/nccl_device.dir/includes_CUDA.rsp --expt-extended-lambda -Xptxas -maxrregcount=96 -Xfatbin -compress-all -fPIC --expt-extended-lambda -Xptxas -maxrregcount=96 -Xfatbin -compress-all -fPIC -O3 --generate-code=arch=compute_70,code=[compute_70,sm_70] --generate-code=arch=compute_75,code=[compute_75,sm_75] --generate-code=arch=compute_80,code=[compute_80,sm_80] --generate-code=arch=compute_86,code=[compute_86,sm_86] --generate-code=arch=compute_87,code=[compute_87,sm_87] --generate-code=arch=compute_89,code=[compute_89,sm_89] --generate-code=arch=compute_90,code=[compute_90,sm_90] -MD -MT src/device/CMakeFiles/nccl_device.dir/gensrc/reduce_scatter_minmax_u64.cu.o -MF CMakeFiles/nccl_device.dir/gensrc/reduce_scatter_minmax_u64.cu.o.d -x cu -rdc=true -c /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src/device/gensrc/reduce_scatter_minmax_u64.cu -o CMakeFiles/nccl_device.dir/gensrc/reduce_scatter_minmax_u64.cu.o [ 26%] Building CUDA object src/device/CMakeFiles/nccl_device.dir/gensrc/reduce_scatter_minmax_u8.cu.o cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src/device && /usr/local/cuda/bin/nvcc -forward-unknown-to-host-compiler -DCUDA_MAJOR=12 -DCUDA_MINOR=0 -DDOCA_VERBS_USE_CUDA_WRAPPER -DDOCA_VERBS_USE_NET_WRAPPER -DNCCL_GIN_PROXY_ENABLE=1 -DNCCL_MAJOR=2 -DNCCL_MINOR=28 -DNCCL_PATCH=9 -DNCCL_USE_CMAKE -DNCCL_VERSION_CODE=22809 -DPROFAPI --options-file CMakeFiles/nccl_device.dir/includes_CUDA.rsp --expt-extended-lambda -Xptxas -maxrregcount=96 -Xfatbin -compress-all -fPIC --expt-extended-lambda -Xptxas -maxrregcount=96 -Xfatbin -compress-all -fPIC -O3 --generate-code=arch=compute_70,code=[compute_70,sm_70] --generate-code=arch=compute_75,code=[compute_75,sm_75] --generate-code=arch=compute_80,code=[compute_80,sm_80] --generate-code=arch=compute_86,code=[compute_86,sm_86] --generate-code=arch=compute_87,code=[compute_87,sm_87] --generate-code=arch=compute_89,code=[compute_89,sm_89] --generate-code=arch=compute_90,code=[compute_90,sm_90] -MD -MT src/device/CMakeFiles/nccl_device.dir/gensrc/reduce_scatter_minmax_u8.cu.o -MF CMakeFiles/nccl_device.dir/gensrc/reduce_scatter_minmax_u8.cu.o.d -x cu -rdc=true -c /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src/device/gensrc/reduce_scatter_minmax_u8.cu -o CMakeFiles/nccl_device.dir/gensrc/reduce_scatter_minmax_u8.cu.o [ 26%] Building CUDA object src/device/CMakeFiles/nccl_device.dir/gensrc/reduce_scatter_premulsum_bf16.cu.o cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src/device && /usr/local/cuda/bin/nvcc -forward-unknown-to-host-compiler -DCUDA_MAJOR=12 -DCUDA_MINOR=0 -DDOCA_VERBS_USE_CUDA_WRAPPER -DDOCA_VERBS_USE_NET_WRAPPER -DNCCL_GIN_PROXY_ENABLE=1 -DNCCL_MAJOR=2 -DNCCL_MINOR=28 -DNCCL_PATCH=9 -DNCCL_USE_CMAKE -DNCCL_VERSION_CODE=22809 -DPROFAPI --options-file CMakeFiles/nccl_device.dir/includes_CUDA.rsp --expt-extended-lambda -Xptxas -maxrregcount=96 -Xfatbin -compress-all -fPIC --expt-extended-lambda -Xptxas -maxrregcount=96 -Xfatbin -compress-all -fPIC -O3 --generate-code=arch=compute_70,code=[compute_70,sm_70] --generate-code=arch=compute_75,code=[compute_75,sm_75] --generate-code=arch=compute_80,code=[compute_80,sm_80] --generate-code=arch=compute_86,code=[compute_86,sm_86] --generate-code=arch=compute_87,code=[compute_87,sm_87] --generate-code=arch=compute_89,code=[compute_89,sm_89] --generate-code=arch=compute_90,code=[compute_90,sm_90] -MD -MT src/device/CMakeFiles/nccl_device.dir/gensrc/reduce_scatter_premulsum_bf16.cu.o -MF CMakeFiles/nccl_device.dir/gensrc/reduce_scatter_premulsum_bf16.cu.o.d -x cu -rdc=true -c /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src/device/gensrc/reduce_scatter_premulsum_bf16.cu -o CMakeFiles/nccl_device.dir/gensrc/reduce_scatter_premulsum_bf16.cu.o [ 26%] Building CUDA object src/device/CMakeFiles/nccl_device.dir/gensrc/reduce_scatter_premulsum_f16.cu.o cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src/device && /usr/local/cuda/bin/nvcc -forward-unknown-to-host-compiler -DCUDA_MAJOR=12 -DCUDA_MINOR=0 -DDOCA_VERBS_USE_CUDA_WRAPPER -DDOCA_VERBS_USE_NET_WRAPPER -DNCCL_GIN_PROXY_ENABLE=1 -DNCCL_MAJOR=2 -DNCCL_MINOR=28 -DNCCL_PATCH=9 -DNCCL_USE_CMAKE -DNCCL_VERSION_CODE=22809 -DPROFAPI --options-file CMakeFiles/nccl_device.dir/includes_CUDA.rsp --expt-extended-lambda -Xptxas -maxrregcount=96 -Xfatbin -compress-all -fPIC --expt-extended-lambda -Xptxas -maxrregcount=96 -Xfatbin -compress-all -fPIC -O3 --generate-code=arch=compute_70,code=[compute_70,sm_70] --generate-code=arch=compute_75,code=[compute_75,sm_75] --generate-code=arch=compute_80,code=[compute_80,sm_80] --generate-code=arch=compute_86,code=[compute_86,sm_86] --generate-code=arch=compute_87,code=[compute_87,sm_87] --generate-code=arch=compute_89,code=[compute_89,sm_89] --generate-code=arch=compute_90,code=[compute_90,sm_90] -MD -MT src/device/CMakeFiles/nccl_device.dir/gensrc/reduce_scatter_premulsum_f16.cu.o -MF CMakeFiles/nccl_device.dir/gensrc/reduce_scatter_premulsum_f16.cu.o.d -x cu -rdc=true -c /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src/device/gensrc/reduce_scatter_premulsum_f16.cu -o CMakeFiles/nccl_device.dir/gensrc/reduce_scatter_premulsum_f16.cu.o [ 27%] Building CUDA object src/device/CMakeFiles/nccl_device.dir/gensrc/reduce_scatter_premulsum_f32.cu.o cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src/device && /usr/local/cuda/bin/nvcc -forward-unknown-to-host-compiler -DCUDA_MAJOR=12 -DCUDA_MINOR=0 -DDOCA_VERBS_USE_CUDA_WRAPPER -DDOCA_VERBS_USE_NET_WRAPPER -DNCCL_GIN_PROXY_ENABLE=1 -DNCCL_MAJOR=2 -DNCCL_MINOR=28 -DNCCL_PATCH=9 -DNCCL_USE_CMAKE -DNCCL_VERSION_CODE=22809 -DPROFAPI --options-file CMakeFiles/nccl_device.dir/includes_CUDA.rsp --expt-extended-lambda -Xptxas -maxrregcount=96 -Xfatbin -compress-all -fPIC --expt-extended-lambda -Xptxas -maxrregcount=96 -Xfatbin -compress-all -fPIC -O3 --generate-code=arch=compute_70,code=[compute_70,sm_70] --generate-code=arch=compute_75,code=[compute_75,sm_75] --generate-code=arch=compute_80,code=[compute_80,sm_80] --generate-code=arch=compute_86,code=[compute_86,sm_86] --generate-code=arch=compute_87,code=[compute_87,sm_87] --generate-code=arch=compute_89,code=[compute_89,sm_89] --generate-code=arch=compute_90,code=[compute_90,sm_90] -MD -MT src/device/CMakeFiles/nccl_device.dir/gensrc/reduce_scatter_premulsum_f32.cu.o -MF CMakeFiles/nccl_device.dir/gensrc/reduce_scatter_premulsum_f32.cu.o.d -x cu -rdc=true -c /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src/device/gensrc/reduce_scatter_premulsum_f32.cu -o CMakeFiles/nccl_device.dir/gensrc/reduce_scatter_premulsum_f32.cu.o [ 27%] Building CUDA object src/device/CMakeFiles/nccl_device.dir/gensrc/reduce_scatter_premulsum_f64.cu.o cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src/device && /usr/local/cuda/bin/nvcc -forward-unknown-to-host-compiler -DCUDA_MAJOR=12 -DCUDA_MINOR=0 -DDOCA_VERBS_USE_CUDA_WRAPPER -DDOCA_VERBS_USE_NET_WRAPPER -DNCCL_GIN_PROXY_ENABLE=1 -DNCCL_MAJOR=2 -DNCCL_MINOR=28 -DNCCL_PATCH=9 -DNCCL_USE_CMAKE -DNCCL_VERSION_CODE=22809 -DPROFAPI --options-file CMakeFiles/nccl_device.dir/includes_CUDA.rsp --expt-extended-lambda -Xptxas -maxrregcount=96 -Xfatbin -compress-all -fPIC --expt-extended-lambda -Xptxas -maxrregcount=96 -Xfatbin -compress-all -fPIC -O3 --generate-code=arch=compute_70,code=[compute_70,sm_70] --generate-code=arch=compute_75,code=[compute_75,sm_75] --generate-code=arch=compute_80,code=[compute_80,sm_80] --generate-code=arch=compute_86,code=[compute_86,sm_86] --generate-code=arch=compute_87,code=[compute_87,sm_87] --generate-code=arch=compute_89,code=[compute_89,sm_89] --generate-code=arch=compute_90,code=[compute_90,sm_90] -MD -MT src/device/CMakeFiles/nccl_device.dir/gensrc/reduce_scatter_premulsum_f64.cu.o -MF CMakeFiles/nccl_device.dir/gensrc/reduce_scatter_premulsum_f64.cu.o.d -x cu -rdc=true -c /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src/device/gensrc/reduce_scatter_premulsum_f64.cu -o CMakeFiles/nccl_device.dir/gensrc/reduce_scatter_premulsum_f64.cu.o [ 27%] Building CUDA object src/device/CMakeFiles/nccl_device.dir/gensrc/reduce_scatter_premulsum_f8e4m3.cu.o cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src/device && /usr/local/cuda/bin/nvcc -forward-unknown-to-host-compiler -DCUDA_MAJOR=12 -DCUDA_MINOR=0 -DDOCA_VERBS_USE_CUDA_WRAPPER -DDOCA_VERBS_USE_NET_WRAPPER -DNCCL_GIN_PROXY_ENABLE=1 -DNCCL_MAJOR=2 -DNCCL_MINOR=28 -DNCCL_PATCH=9 -DNCCL_USE_CMAKE -DNCCL_VERSION_CODE=22809 -DPROFAPI --options-file CMakeFiles/nccl_device.dir/includes_CUDA.rsp --expt-extended-lambda -Xptxas -maxrregcount=96 -Xfatbin -compress-all -fPIC --expt-extended-lambda -Xptxas -maxrregcount=96 -Xfatbin -compress-all -fPIC -O3 --generate-code=arch=compute_70,code=[compute_70,sm_70] --generate-code=arch=compute_75,code=[compute_75,sm_75] --generate-code=arch=compute_80,code=[compute_80,sm_80] --generate-code=arch=compute_86,code=[compute_86,sm_86] --generate-code=arch=compute_87,code=[compute_87,sm_87] --generate-code=arch=compute_89,code=[compute_89,sm_89] --generate-code=arch=compute_90,code=[compute_90,sm_90] -MD -MT src/device/CMakeFiles/nccl_device.dir/gensrc/reduce_scatter_premulsum_f8e4m3.cu.o -MF CMakeFiles/nccl_device.dir/gensrc/reduce_scatter_premulsum_f8e4m3.cu.o.d -x cu -rdc=true -c /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src/device/gensrc/reduce_scatter_premulsum_f8e4m3.cu -o CMakeFiles/nccl_device.dir/gensrc/reduce_scatter_premulsum_f8e4m3.cu.o [ 28%] Building CUDA object src/device/CMakeFiles/nccl_device.dir/gensrc/reduce_scatter_premulsum_f8e5m2.cu.o cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src/device && /usr/local/cuda/bin/nvcc -forward-unknown-to-host-compiler -DCUDA_MAJOR=12 -DCUDA_MINOR=0 -DDOCA_VERBS_USE_CUDA_WRAPPER -DDOCA_VERBS_USE_NET_WRAPPER -DNCCL_GIN_PROXY_ENABLE=1 -DNCCL_MAJOR=2 -DNCCL_MINOR=28 -DNCCL_PATCH=9 -DNCCL_USE_CMAKE -DNCCL_VERSION_CODE=22809 -DPROFAPI --options-file CMakeFiles/nccl_device.dir/includes_CUDA.rsp --expt-extended-lambda -Xptxas -maxrregcount=96 -Xfatbin -compress-all -fPIC --expt-extended-lambda -Xptxas -maxrregcount=96 -Xfatbin -compress-all -fPIC -O3 --generate-code=arch=compute_70,code=[compute_70,sm_70] --generate-code=arch=compute_75,code=[compute_75,sm_75] --generate-code=arch=compute_80,code=[compute_80,sm_80] --generate-code=arch=compute_86,code=[compute_86,sm_86] --generate-code=arch=compute_87,code=[compute_87,sm_87] --generate-code=arch=compute_89,code=[compute_89,sm_89] --generate-code=arch=compute_90,code=[compute_90,sm_90] -MD -MT src/device/CMakeFiles/nccl_device.dir/gensrc/reduce_scatter_premulsum_f8e5m2.cu.o -MF CMakeFiles/nccl_device.dir/gensrc/reduce_scatter_premulsum_f8e5m2.cu.o.d -x cu -rdc=true -c /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src/device/gensrc/reduce_scatter_premulsum_f8e5m2.cu -o CMakeFiles/nccl_device.dir/gensrc/reduce_scatter_premulsum_f8e5m2.cu.o [ 28%] Building CUDA object src/device/CMakeFiles/nccl_device.dir/gensrc/reduce_scatter_premulsum_u32.cu.o cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src/device && /usr/local/cuda/bin/nvcc -forward-unknown-to-host-compiler -DCUDA_MAJOR=12 -DCUDA_MINOR=0 -DDOCA_VERBS_USE_CUDA_WRAPPER -DDOCA_VERBS_USE_NET_WRAPPER -DNCCL_GIN_PROXY_ENABLE=1 -DNCCL_MAJOR=2 -DNCCL_MINOR=28 -DNCCL_PATCH=9 -DNCCL_USE_CMAKE -DNCCL_VERSION_CODE=22809 -DPROFAPI --options-file CMakeFiles/nccl_device.dir/includes_CUDA.rsp --expt-extended-lambda -Xptxas -maxrregcount=96 -Xfatbin -compress-all -fPIC --expt-extended-lambda -Xptxas -maxrregcount=96 -Xfatbin -compress-all -fPIC -O3 --generate-code=arch=compute_70,code=[compute_70,sm_70] --generate-code=arch=compute_75,code=[compute_75,sm_75] --generate-code=arch=compute_80,code=[compute_80,sm_80] --generate-code=arch=compute_86,code=[compute_86,sm_86] --generate-code=arch=compute_87,code=[compute_87,sm_87] --generate-code=arch=compute_89,code=[compute_89,sm_89] --generate-code=arch=compute_90,code=[compute_90,sm_90] -MD -MT src/device/CMakeFiles/nccl_device.dir/gensrc/reduce_scatter_premulsum_u32.cu.o -MF CMakeFiles/nccl_device.dir/gensrc/reduce_scatter_premulsum_u32.cu.o.d -x cu -rdc=true -c /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src/device/gensrc/reduce_scatter_premulsum_u32.cu -o CMakeFiles/nccl_device.dir/gensrc/reduce_scatter_premulsum_u32.cu.o [ 28%] Building CUDA object src/device/CMakeFiles/nccl_device.dir/gensrc/reduce_scatter_premulsum_u64.cu.o cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src/device && /usr/local/cuda/bin/nvcc -forward-unknown-to-host-compiler -DCUDA_MAJOR=12 -DCUDA_MINOR=0 -DDOCA_VERBS_USE_CUDA_WRAPPER -DDOCA_VERBS_USE_NET_WRAPPER -DNCCL_GIN_PROXY_ENABLE=1 -DNCCL_MAJOR=2 -DNCCL_MINOR=28 -DNCCL_PATCH=9 -DNCCL_USE_CMAKE -DNCCL_VERSION_CODE=22809 -DPROFAPI --options-file CMakeFiles/nccl_device.dir/includes_CUDA.rsp --expt-extended-lambda -Xptxas -maxrregcount=96 -Xfatbin -compress-all -fPIC --expt-extended-lambda -Xptxas -maxrregcount=96 -Xfatbin -compress-all -fPIC -O3 --generate-code=arch=compute_70,code=[compute_70,sm_70] --generate-code=arch=compute_75,code=[compute_75,sm_75] --generate-code=arch=compute_80,code=[compute_80,sm_80] --generate-code=arch=compute_86,code=[compute_86,sm_86] --generate-code=arch=compute_87,code=[compute_87,sm_87] --generate-code=arch=compute_89,code=[compute_89,sm_89] --generate-code=arch=compute_90,code=[compute_90,sm_90] -MD -MT src/device/CMakeFiles/nccl_device.dir/gensrc/reduce_scatter_premulsum_u64.cu.o -MF CMakeFiles/nccl_device.dir/gensrc/reduce_scatter_premulsum_u64.cu.o.d -x cu -rdc=true -c /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src/device/gensrc/reduce_scatter_premulsum_u64.cu -o CMakeFiles/nccl_device.dir/gensrc/reduce_scatter_premulsum_u64.cu.o [ 28%] Building CUDA object src/device/CMakeFiles/nccl_device.dir/gensrc/reduce_scatter_premulsum_u8.cu.o cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src/device && /usr/local/cuda/bin/nvcc -forward-unknown-to-host-compiler -DCUDA_MAJOR=12 -DCUDA_MINOR=0 -DDOCA_VERBS_USE_CUDA_WRAPPER -DDOCA_VERBS_USE_NET_WRAPPER -DNCCL_GIN_PROXY_ENABLE=1 -DNCCL_MAJOR=2 -DNCCL_MINOR=28 -DNCCL_PATCH=9 -DNCCL_USE_CMAKE -DNCCL_VERSION_CODE=22809 -DPROFAPI --options-file CMakeFiles/nccl_device.dir/includes_CUDA.rsp --expt-extended-lambda -Xptxas -maxrregcount=96 -Xfatbin -compress-all -fPIC --expt-extended-lambda -Xptxas -maxrregcount=96 -Xfatbin -compress-all -fPIC -O3 --generate-code=arch=compute_70,code=[compute_70,sm_70] --generate-code=arch=compute_75,code=[compute_75,sm_75] --generate-code=arch=compute_80,code=[compute_80,sm_80] --generate-code=arch=compute_86,code=[compute_86,sm_86] --generate-code=arch=compute_87,code=[compute_87,sm_87] --generate-code=arch=compute_89,code=[compute_89,sm_89] --generate-code=arch=compute_90,code=[compute_90,sm_90] -MD -MT src/device/CMakeFiles/nccl_device.dir/gensrc/reduce_scatter_premulsum_u8.cu.o -MF CMakeFiles/nccl_device.dir/gensrc/reduce_scatter_premulsum_u8.cu.o.d -x cu -rdc=true -c /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src/device/gensrc/reduce_scatter_premulsum_u8.cu -o CMakeFiles/nccl_device.dir/gensrc/reduce_scatter_premulsum_u8.cu.o [ 29%] Building CUDA object src/device/CMakeFiles/nccl_device.dir/gensrc/reduce_scatter_prod_bf16.cu.o cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src/device && /usr/local/cuda/bin/nvcc -forward-unknown-to-host-compiler -DCUDA_MAJOR=12 -DCUDA_MINOR=0 -DDOCA_VERBS_USE_CUDA_WRAPPER -DDOCA_VERBS_USE_NET_WRAPPER -DNCCL_GIN_PROXY_ENABLE=1 -DNCCL_MAJOR=2 -DNCCL_MINOR=28 -DNCCL_PATCH=9 -DNCCL_USE_CMAKE -DNCCL_VERSION_CODE=22809 -DPROFAPI --options-file CMakeFiles/nccl_device.dir/includes_CUDA.rsp --expt-extended-lambda -Xptxas -maxrregcount=96 -Xfatbin -compress-all -fPIC --expt-extended-lambda -Xptxas -maxrregcount=96 -Xfatbin -compress-all -fPIC -O3 --generate-code=arch=compute_70,code=[compute_70,sm_70] --generate-code=arch=compute_75,code=[compute_75,sm_75] --generate-code=arch=compute_80,code=[compute_80,sm_80] --generate-code=arch=compute_86,code=[compute_86,sm_86] --generate-code=arch=compute_87,code=[compute_87,sm_87] --generate-code=arch=compute_89,code=[compute_89,sm_89] --generate-code=arch=compute_90,code=[compute_90,sm_90] -MD -MT src/device/CMakeFiles/nccl_device.dir/gensrc/reduce_scatter_prod_bf16.cu.o -MF CMakeFiles/nccl_device.dir/gensrc/reduce_scatter_prod_bf16.cu.o.d -x cu -rdc=true -c /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src/device/gensrc/reduce_scatter_prod_bf16.cu -o CMakeFiles/nccl_device.dir/gensrc/reduce_scatter_prod_bf16.cu.o [ 29%] Building CUDA object src/device/CMakeFiles/nccl_device.dir/gensrc/reduce_scatter_prod_f16.cu.o cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src/device && /usr/local/cuda/bin/nvcc -forward-unknown-to-host-compiler -DCUDA_MAJOR=12 -DCUDA_MINOR=0 -DDOCA_VERBS_USE_CUDA_WRAPPER -DDOCA_VERBS_USE_NET_WRAPPER -DNCCL_GIN_PROXY_ENABLE=1 -DNCCL_MAJOR=2 -DNCCL_MINOR=28 -DNCCL_PATCH=9 -DNCCL_USE_CMAKE -DNCCL_VERSION_CODE=22809 -DPROFAPI --options-file CMakeFiles/nccl_device.dir/includes_CUDA.rsp --expt-extended-lambda -Xptxas -maxrregcount=96 -Xfatbin -compress-all -fPIC --expt-extended-lambda -Xptxas -maxrregcount=96 -Xfatbin -compress-all -fPIC -O3 --generate-code=arch=compute_70,code=[compute_70,sm_70] --generate-code=arch=compute_75,code=[compute_75,sm_75] --generate-code=arch=compute_80,code=[compute_80,sm_80] --generate-code=arch=compute_86,code=[compute_86,sm_86] --generate-code=arch=compute_87,code=[compute_87,sm_87] --generate-code=arch=compute_89,code=[compute_89,sm_89] --generate-code=arch=compute_90,code=[compute_90,sm_90] -MD -MT src/device/CMakeFiles/nccl_device.dir/gensrc/reduce_scatter_prod_f16.cu.o -MF CMakeFiles/nccl_device.dir/gensrc/reduce_scatter_prod_f16.cu.o.d -x cu -rdc=true -c /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src/device/gensrc/reduce_scatter_prod_f16.cu -o CMakeFiles/nccl_device.dir/gensrc/reduce_scatter_prod_f16.cu.o [ 29%] Building CUDA object src/device/CMakeFiles/nccl_device.dir/gensrc/reduce_scatter_prod_f32.cu.o cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src/device && /usr/local/cuda/bin/nvcc -forward-unknown-to-host-compiler -DCUDA_MAJOR=12 -DCUDA_MINOR=0 -DDOCA_VERBS_USE_CUDA_WRAPPER -DDOCA_VERBS_USE_NET_WRAPPER -DNCCL_GIN_PROXY_ENABLE=1 -DNCCL_MAJOR=2 -DNCCL_MINOR=28 -DNCCL_PATCH=9 -DNCCL_USE_CMAKE -DNCCL_VERSION_CODE=22809 -DPROFAPI --options-file CMakeFiles/nccl_device.dir/includes_CUDA.rsp --expt-extended-lambda -Xptxas -maxrregcount=96 -Xfatbin -compress-all -fPIC --expt-extended-lambda -Xptxas -maxrregcount=96 -Xfatbin -compress-all -fPIC -O3 --generate-code=arch=compute_70,code=[compute_70,sm_70] --generate-code=arch=compute_75,code=[compute_75,sm_75] --generate-code=arch=compute_80,code=[compute_80,sm_80] --generate-code=arch=compute_86,code=[compute_86,sm_86] --generate-code=arch=compute_87,code=[compute_87,sm_87] --generate-code=arch=compute_89,code=[compute_89,sm_89] --generate-code=arch=compute_90,code=[compute_90,sm_90] -MD -MT src/device/CMakeFiles/nccl_device.dir/gensrc/reduce_scatter_prod_f32.cu.o -MF CMakeFiles/nccl_device.dir/gensrc/reduce_scatter_prod_f32.cu.o.d -x cu -rdc=true -c /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src/device/gensrc/reduce_scatter_prod_f32.cu -o CMakeFiles/nccl_device.dir/gensrc/reduce_scatter_prod_f32.cu.o [ 30%] Building CUDA object src/device/CMakeFiles/nccl_device.dir/gensrc/reduce_scatter_prod_f64.cu.o cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src/device && /usr/local/cuda/bin/nvcc -forward-unknown-to-host-compiler -DCUDA_MAJOR=12 -DCUDA_MINOR=0 -DDOCA_VERBS_USE_CUDA_WRAPPER -DDOCA_VERBS_USE_NET_WRAPPER -DNCCL_GIN_PROXY_ENABLE=1 -DNCCL_MAJOR=2 -DNCCL_MINOR=28 -DNCCL_PATCH=9 -DNCCL_USE_CMAKE -DNCCL_VERSION_CODE=22809 -DPROFAPI --options-file CMakeFiles/nccl_device.dir/includes_CUDA.rsp --expt-extended-lambda -Xptxas -maxrregcount=96 -Xfatbin -compress-all -fPIC --expt-extended-lambda -Xptxas -maxrregcount=96 -Xfatbin -compress-all -fPIC -O3 --generate-code=arch=compute_70,code=[compute_70,sm_70] --generate-code=arch=compute_75,code=[compute_75,sm_75] --generate-code=arch=compute_80,code=[compute_80,sm_80] --generate-code=arch=compute_86,code=[compute_86,sm_86] --generate-code=arch=compute_87,code=[compute_87,sm_87] --generate-code=arch=compute_89,code=[compute_89,sm_89] --generate-code=arch=compute_90,code=[compute_90,sm_90] -MD -MT src/device/CMakeFiles/nccl_device.dir/gensrc/reduce_scatter_prod_f64.cu.o -MF CMakeFiles/nccl_device.dir/gensrc/reduce_scatter_prod_f64.cu.o.d -x cu -rdc=true -c /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src/device/gensrc/reduce_scatter_prod_f64.cu -o CMakeFiles/nccl_device.dir/gensrc/reduce_scatter_prod_f64.cu.o [ 30%] Building CUDA object src/device/CMakeFiles/nccl_device.dir/gensrc/reduce_scatter_prod_f8e4m3.cu.o cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src/device && /usr/local/cuda/bin/nvcc -forward-unknown-to-host-compiler -DCUDA_MAJOR=12 -DCUDA_MINOR=0 -DDOCA_VERBS_USE_CUDA_WRAPPER -DDOCA_VERBS_USE_NET_WRAPPER -DNCCL_GIN_PROXY_ENABLE=1 -DNCCL_MAJOR=2 -DNCCL_MINOR=28 -DNCCL_PATCH=9 -DNCCL_USE_CMAKE -DNCCL_VERSION_CODE=22809 -DPROFAPI --options-file CMakeFiles/nccl_device.dir/includes_CUDA.rsp --expt-extended-lambda -Xptxas -maxrregcount=96 -Xfatbin -compress-all -fPIC --expt-extended-lambda -Xptxas -maxrregcount=96 -Xfatbin -compress-all -fPIC -O3 --generate-code=arch=compute_70,code=[compute_70,sm_70] --generate-code=arch=compute_75,code=[compute_75,sm_75] --generate-code=arch=compute_80,code=[compute_80,sm_80] --generate-code=arch=compute_86,code=[compute_86,sm_86] --generate-code=arch=compute_87,code=[compute_87,sm_87] --generate-code=arch=compute_89,code=[compute_89,sm_89] --generate-code=arch=compute_90,code=[compute_90,sm_90] -MD -MT src/device/CMakeFiles/nccl_device.dir/gensrc/reduce_scatter_prod_f8e4m3.cu.o -MF CMakeFiles/nccl_device.dir/gensrc/reduce_scatter_prod_f8e4m3.cu.o.d -x cu -rdc=true -c /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src/device/gensrc/reduce_scatter_prod_f8e4m3.cu -o CMakeFiles/nccl_device.dir/gensrc/reduce_scatter_prod_f8e4m3.cu.o [ 30%] Building CUDA object src/device/CMakeFiles/nccl_device.dir/gensrc/reduce_scatter_prod_f8e5m2.cu.o cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src/device && /usr/local/cuda/bin/nvcc -forward-unknown-to-host-compiler -DCUDA_MAJOR=12 -DCUDA_MINOR=0 -DDOCA_VERBS_USE_CUDA_WRAPPER -DDOCA_VERBS_USE_NET_WRAPPER -DNCCL_GIN_PROXY_ENABLE=1 -DNCCL_MAJOR=2 -DNCCL_MINOR=28 -DNCCL_PATCH=9 -DNCCL_USE_CMAKE -DNCCL_VERSION_CODE=22809 -DPROFAPI --options-file CMakeFiles/nccl_device.dir/includes_CUDA.rsp --expt-extended-lambda -Xptxas -maxrregcount=96 -Xfatbin -compress-all -fPIC --expt-extended-lambda -Xptxas -maxrregcount=96 -Xfatbin -compress-all -fPIC -O3 --generate-code=arch=compute_70,code=[compute_70,sm_70] --generate-code=arch=compute_75,code=[compute_75,sm_75] --generate-code=arch=compute_80,code=[compute_80,sm_80] --generate-code=arch=compute_86,code=[compute_86,sm_86] --generate-code=arch=compute_87,code=[compute_87,sm_87] --generate-code=arch=compute_89,code=[compute_89,sm_89] --generate-code=arch=compute_90,code=[compute_90,sm_90] -MD -MT src/device/CMakeFiles/nccl_device.dir/gensrc/reduce_scatter_prod_f8e5m2.cu.o -MF CMakeFiles/nccl_device.dir/gensrc/reduce_scatter_prod_f8e5m2.cu.o.d -x cu -rdc=true -c /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src/device/gensrc/reduce_scatter_prod_f8e5m2.cu -o CMakeFiles/nccl_device.dir/gensrc/reduce_scatter_prod_f8e5m2.cu.o [ 30%] Building CUDA object src/device/CMakeFiles/nccl_device.dir/gensrc/reduce_scatter_prod_u32.cu.o cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src/device && /usr/local/cuda/bin/nvcc -forward-unknown-to-host-compiler -DCUDA_MAJOR=12 -DCUDA_MINOR=0 -DDOCA_VERBS_USE_CUDA_WRAPPER -DDOCA_VERBS_USE_NET_WRAPPER -DNCCL_GIN_PROXY_ENABLE=1 -DNCCL_MAJOR=2 -DNCCL_MINOR=28 -DNCCL_PATCH=9 -DNCCL_USE_CMAKE -DNCCL_VERSION_CODE=22809 -DPROFAPI --options-file CMakeFiles/nccl_device.dir/includes_CUDA.rsp --expt-extended-lambda -Xptxas -maxrregcount=96 -Xfatbin -compress-all -fPIC --expt-extended-lambda -Xptxas -maxrregcount=96 -Xfatbin -compress-all -fPIC -O3 --generate-code=arch=compute_70,code=[compute_70,sm_70] --generate-code=arch=compute_75,code=[compute_75,sm_75] --generate-code=arch=compute_80,code=[compute_80,sm_80] --generate-code=arch=compute_86,code=[compute_86,sm_86] --generate-code=arch=compute_87,code=[compute_87,sm_87] --generate-code=arch=compute_89,code=[compute_89,sm_89] --generate-code=arch=compute_90,code=[compute_90,sm_90] -MD -MT src/device/CMakeFiles/nccl_device.dir/gensrc/reduce_scatter_prod_u32.cu.o -MF CMakeFiles/nccl_device.dir/gensrc/reduce_scatter_prod_u32.cu.o.d -x cu -rdc=true -c /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src/device/gensrc/reduce_scatter_prod_u32.cu -o CMakeFiles/nccl_device.dir/gensrc/reduce_scatter_prod_u32.cu.o [ 31%] Building CUDA object src/device/CMakeFiles/nccl_device.dir/gensrc/reduce_scatter_prod_u64.cu.o cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src/device && /usr/local/cuda/bin/nvcc -forward-unknown-to-host-compiler -DCUDA_MAJOR=12 -DCUDA_MINOR=0 -DDOCA_VERBS_USE_CUDA_WRAPPER -DDOCA_VERBS_USE_NET_WRAPPER -DNCCL_GIN_PROXY_ENABLE=1 -DNCCL_MAJOR=2 -DNCCL_MINOR=28 -DNCCL_PATCH=9 -DNCCL_USE_CMAKE -DNCCL_VERSION_CODE=22809 -DPROFAPI --options-file CMakeFiles/nccl_device.dir/includes_CUDA.rsp --expt-extended-lambda -Xptxas -maxrregcount=96 -Xfatbin -compress-all -fPIC --expt-extended-lambda -Xptxas -maxrregcount=96 -Xfatbin -compress-all -fPIC -O3 --generate-code=arch=compute_70,code=[compute_70,sm_70] --generate-code=arch=compute_75,code=[compute_75,sm_75] --generate-code=arch=compute_80,code=[compute_80,sm_80] --generate-code=arch=compute_86,code=[compute_86,sm_86] --generate-code=arch=compute_87,code=[compute_87,sm_87] --generate-code=arch=compute_89,code=[compute_89,sm_89] --generate-code=arch=compute_90,code=[compute_90,sm_90] -MD -MT src/device/CMakeFiles/nccl_device.dir/gensrc/reduce_scatter_prod_u64.cu.o -MF CMakeFiles/nccl_device.dir/gensrc/reduce_scatter_prod_u64.cu.o.d -x cu -rdc=true -c /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src/device/gensrc/reduce_scatter_prod_u64.cu -o CMakeFiles/nccl_device.dir/gensrc/reduce_scatter_prod_u64.cu.o [ 31%] Building CUDA object src/device/CMakeFiles/nccl_device.dir/gensrc/reduce_scatter_prod_u8.cu.o cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src/device && /usr/local/cuda/bin/nvcc -forward-unknown-to-host-compiler -DCUDA_MAJOR=12 -DCUDA_MINOR=0 -DDOCA_VERBS_USE_CUDA_WRAPPER -DDOCA_VERBS_USE_NET_WRAPPER -DNCCL_GIN_PROXY_ENABLE=1 -DNCCL_MAJOR=2 -DNCCL_MINOR=28 -DNCCL_PATCH=9 -DNCCL_USE_CMAKE -DNCCL_VERSION_CODE=22809 -DPROFAPI --options-file CMakeFiles/nccl_device.dir/includes_CUDA.rsp --expt-extended-lambda -Xptxas -maxrregcount=96 -Xfatbin -compress-all -fPIC --expt-extended-lambda -Xptxas -maxrregcount=96 -Xfatbin -compress-all -fPIC -O3 --generate-code=arch=compute_70,code=[compute_70,sm_70] --generate-code=arch=compute_75,code=[compute_75,sm_75] --generate-code=arch=compute_80,code=[compute_80,sm_80] --generate-code=arch=compute_86,code=[compute_86,sm_86] --generate-code=arch=compute_87,code=[compute_87,sm_87] --generate-code=arch=compute_89,code=[compute_89,sm_89] --generate-code=arch=compute_90,code=[compute_90,sm_90] -MD -MT src/device/CMakeFiles/nccl_device.dir/gensrc/reduce_scatter_prod_u8.cu.o -MF CMakeFiles/nccl_device.dir/gensrc/reduce_scatter_prod_u8.cu.o.d -x cu -rdc=true -c /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src/device/gensrc/reduce_scatter_prod_u8.cu -o CMakeFiles/nccl_device.dir/gensrc/reduce_scatter_prod_u8.cu.o [ 31%] Building CUDA object src/device/CMakeFiles/nccl_device.dir/gensrc/reduce_scatter_sum_bf16.cu.o cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src/device && /usr/local/cuda/bin/nvcc -forward-unknown-to-host-compiler -DCUDA_MAJOR=12 -DCUDA_MINOR=0 -DDOCA_VERBS_USE_CUDA_WRAPPER -DDOCA_VERBS_USE_NET_WRAPPER -DNCCL_GIN_PROXY_ENABLE=1 -DNCCL_MAJOR=2 -DNCCL_MINOR=28 -DNCCL_PATCH=9 -DNCCL_USE_CMAKE -DNCCL_VERSION_CODE=22809 -DPROFAPI --options-file CMakeFiles/nccl_device.dir/includes_CUDA.rsp --expt-extended-lambda -Xptxas -maxrregcount=96 -Xfatbin -compress-all -fPIC --expt-extended-lambda -Xptxas -maxrregcount=96 -Xfatbin -compress-all -fPIC -O3 --generate-code=arch=compute_70,code=[compute_70,sm_70] --generate-code=arch=compute_75,code=[compute_75,sm_75] --generate-code=arch=compute_80,code=[compute_80,sm_80] --generate-code=arch=compute_86,code=[compute_86,sm_86] --generate-code=arch=compute_87,code=[compute_87,sm_87] --generate-code=arch=compute_89,code=[compute_89,sm_89] --generate-code=arch=compute_90,code=[compute_90,sm_90] -MD -MT src/device/CMakeFiles/nccl_device.dir/gensrc/reduce_scatter_sum_bf16.cu.o -MF CMakeFiles/nccl_device.dir/gensrc/reduce_scatter_sum_bf16.cu.o.d -x cu -rdc=true -c /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src/device/gensrc/reduce_scatter_sum_bf16.cu -o CMakeFiles/nccl_device.dir/gensrc/reduce_scatter_sum_bf16.cu.o [ 31%] Building CUDA object src/device/CMakeFiles/nccl_device.dir/gensrc/reduce_scatter_sum_f16.cu.o cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src/device && /usr/local/cuda/bin/nvcc -forward-unknown-to-host-compiler -DCUDA_MAJOR=12 -DCUDA_MINOR=0 -DDOCA_VERBS_USE_CUDA_WRAPPER -DDOCA_VERBS_USE_NET_WRAPPER -DNCCL_GIN_PROXY_ENABLE=1 -DNCCL_MAJOR=2 -DNCCL_MINOR=28 -DNCCL_PATCH=9 -DNCCL_USE_CMAKE -DNCCL_VERSION_CODE=22809 -DPROFAPI --options-file CMakeFiles/nccl_device.dir/includes_CUDA.rsp --expt-extended-lambda -Xptxas -maxrregcount=96 -Xfatbin -compress-all -fPIC --expt-extended-lambda -Xptxas -maxrregcount=96 -Xfatbin -compress-all -fPIC -O3 --generate-code=arch=compute_70,code=[compute_70,sm_70] --generate-code=arch=compute_75,code=[compute_75,sm_75] --generate-code=arch=compute_80,code=[compute_80,sm_80] --generate-code=arch=compute_86,code=[compute_86,sm_86] --generate-code=arch=compute_87,code=[compute_87,sm_87] --generate-code=arch=compute_89,code=[compute_89,sm_89] --generate-code=arch=compute_90,code=[compute_90,sm_90] -MD -MT src/device/CMakeFiles/nccl_device.dir/gensrc/reduce_scatter_sum_f16.cu.o -MF CMakeFiles/nccl_device.dir/gensrc/reduce_scatter_sum_f16.cu.o.d -x cu -rdc=true -c /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src/device/gensrc/reduce_scatter_sum_f16.cu -o CMakeFiles/nccl_device.dir/gensrc/reduce_scatter_sum_f16.cu.o [ 32%] Building CUDA object src/device/CMakeFiles/nccl_device.dir/gensrc/reduce_scatter_sum_f32.cu.o cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src/device && /usr/local/cuda/bin/nvcc -forward-unknown-to-host-compiler -DCUDA_MAJOR=12 -DCUDA_MINOR=0 -DDOCA_VERBS_USE_CUDA_WRAPPER -DDOCA_VERBS_USE_NET_WRAPPER -DNCCL_GIN_PROXY_ENABLE=1 -DNCCL_MAJOR=2 -DNCCL_MINOR=28 -DNCCL_PATCH=9 -DNCCL_USE_CMAKE -DNCCL_VERSION_CODE=22809 -DPROFAPI --options-file CMakeFiles/nccl_device.dir/includes_CUDA.rsp --expt-extended-lambda -Xptxas -maxrregcount=96 -Xfatbin -compress-all -fPIC --expt-extended-lambda -Xptxas -maxrregcount=96 -Xfatbin -compress-all -fPIC -O3 --generate-code=arch=compute_70,code=[compute_70,sm_70] --generate-code=arch=compute_75,code=[compute_75,sm_75] --generate-code=arch=compute_80,code=[compute_80,sm_80] --generate-code=arch=compute_86,code=[compute_86,sm_86] --generate-code=arch=compute_87,code=[compute_87,sm_87] --generate-code=arch=compute_89,code=[compute_89,sm_89] --generate-code=arch=compute_90,code=[compute_90,sm_90] -MD -MT src/device/CMakeFiles/nccl_device.dir/gensrc/reduce_scatter_sum_f32.cu.o -MF CMakeFiles/nccl_device.dir/gensrc/reduce_scatter_sum_f32.cu.o.d -x cu -rdc=true -c /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src/device/gensrc/reduce_scatter_sum_f32.cu -o CMakeFiles/nccl_device.dir/gensrc/reduce_scatter_sum_f32.cu.o [ 32%] Building CUDA object src/device/CMakeFiles/nccl_device.dir/gensrc/reduce_scatter_sum_f64.cu.o cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src/device && /usr/local/cuda/bin/nvcc -forward-unknown-to-host-compiler -DCUDA_MAJOR=12 -DCUDA_MINOR=0 -DDOCA_VERBS_USE_CUDA_WRAPPER -DDOCA_VERBS_USE_NET_WRAPPER -DNCCL_GIN_PROXY_ENABLE=1 -DNCCL_MAJOR=2 -DNCCL_MINOR=28 -DNCCL_PATCH=9 -DNCCL_USE_CMAKE -DNCCL_VERSION_CODE=22809 -DPROFAPI --options-file CMakeFiles/nccl_device.dir/includes_CUDA.rsp --expt-extended-lambda -Xptxas -maxrregcount=96 -Xfatbin -compress-all -fPIC --expt-extended-lambda -Xptxas -maxrregcount=96 -Xfatbin -compress-all -fPIC -O3 --generate-code=arch=compute_70,code=[compute_70,sm_70] --generate-code=arch=compute_75,code=[compute_75,sm_75] --generate-code=arch=compute_80,code=[compute_80,sm_80] --generate-code=arch=compute_86,code=[compute_86,sm_86] --generate-code=arch=compute_87,code=[compute_87,sm_87] --generate-code=arch=compute_89,code=[compute_89,sm_89] --generate-code=arch=compute_90,code=[compute_90,sm_90] -MD -MT src/device/CMakeFiles/nccl_device.dir/gensrc/reduce_scatter_sum_f64.cu.o -MF CMakeFiles/nccl_device.dir/gensrc/reduce_scatter_sum_f64.cu.o.d -x cu -rdc=true -c /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src/device/gensrc/reduce_scatter_sum_f64.cu -o CMakeFiles/nccl_device.dir/gensrc/reduce_scatter_sum_f64.cu.o [ 32%] Building CUDA object src/device/CMakeFiles/nccl_device.dir/gensrc/reduce_scatter_sum_f8e4m3.cu.o cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src/device && /usr/local/cuda/bin/nvcc -forward-unknown-to-host-compiler -DCUDA_MAJOR=12 -DCUDA_MINOR=0 -DDOCA_VERBS_USE_CUDA_WRAPPER -DDOCA_VERBS_USE_NET_WRAPPER -DNCCL_GIN_PROXY_ENABLE=1 -DNCCL_MAJOR=2 -DNCCL_MINOR=28 -DNCCL_PATCH=9 -DNCCL_USE_CMAKE -DNCCL_VERSION_CODE=22809 -DPROFAPI --options-file CMakeFiles/nccl_device.dir/includes_CUDA.rsp --expt-extended-lambda -Xptxas -maxrregcount=96 -Xfatbin -compress-all -fPIC --expt-extended-lambda -Xptxas -maxrregcount=96 -Xfatbin -compress-all -fPIC -O3 --generate-code=arch=compute_70,code=[compute_70,sm_70] --generate-code=arch=compute_75,code=[compute_75,sm_75] --generate-code=arch=compute_80,code=[compute_80,sm_80] --generate-code=arch=compute_86,code=[compute_86,sm_86] --generate-code=arch=compute_87,code=[compute_87,sm_87] --generate-code=arch=compute_89,code=[compute_89,sm_89] --generate-code=arch=compute_90,code=[compute_90,sm_90] -MD -MT src/device/CMakeFiles/nccl_device.dir/gensrc/reduce_scatter_sum_f8e4m3.cu.o -MF CMakeFiles/nccl_device.dir/gensrc/reduce_scatter_sum_f8e4m3.cu.o.d -x cu -rdc=true -c /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src/device/gensrc/reduce_scatter_sum_f8e4m3.cu -o CMakeFiles/nccl_device.dir/gensrc/reduce_scatter_sum_f8e4m3.cu.o [ 33%] Building CUDA object src/device/CMakeFiles/nccl_device.dir/gensrc/reduce_scatter_sum_f8e5m2.cu.o cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src/device && /usr/local/cuda/bin/nvcc -forward-unknown-to-host-compiler -DCUDA_MAJOR=12 -DCUDA_MINOR=0 -DDOCA_VERBS_USE_CUDA_WRAPPER -DDOCA_VERBS_USE_NET_WRAPPER -DNCCL_GIN_PROXY_ENABLE=1 -DNCCL_MAJOR=2 -DNCCL_MINOR=28 -DNCCL_PATCH=9 -DNCCL_USE_CMAKE -DNCCL_VERSION_CODE=22809 -DPROFAPI --options-file CMakeFiles/nccl_device.dir/includes_CUDA.rsp --expt-extended-lambda -Xptxas -maxrregcount=96 -Xfatbin -compress-all -fPIC --expt-extended-lambda -Xptxas -maxrregcount=96 -Xfatbin -compress-all -fPIC -O3 --generate-code=arch=compute_70,code=[compute_70,sm_70] --generate-code=arch=compute_75,code=[compute_75,sm_75] --generate-code=arch=compute_80,code=[compute_80,sm_80] --generate-code=arch=compute_86,code=[compute_86,sm_86] --generate-code=arch=compute_87,code=[compute_87,sm_87] --generate-code=arch=compute_89,code=[compute_89,sm_89] --generate-code=arch=compute_90,code=[compute_90,sm_90] -MD -MT src/device/CMakeFiles/nccl_device.dir/gensrc/reduce_scatter_sum_f8e5m2.cu.o -MF CMakeFiles/nccl_device.dir/gensrc/reduce_scatter_sum_f8e5m2.cu.o.d -x cu -rdc=true -c /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src/device/gensrc/reduce_scatter_sum_f8e5m2.cu -o CMakeFiles/nccl_device.dir/gensrc/reduce_scatter_sum_f8e5m2.cu.o [ 33%] Building CUDA object src/device/CMakeFiles/nccl_device.dir/gensrc/reduce_scatter_sum_u32.cu.o cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src/device && /usr/local/cuda/bin/nvcc -forward-unknown-to-host-compiler -DCUDA_MAJOR=12 -DCUDA_MINOR=0 -DDOCA_VERBS_USE_CUDA_WRAPPER -DDOCA_VERBS_USE_NET_WRAPPER -DNCCL_GIN_PROXY_ENABLE=1 -DNCCL_MAJOR=2 -DNCCL_MINOR=28 -DNCCL_PATCH=9 -DNCCL_USE_CMAKE -DNCCL_VERSION_CODE=22809 -DPROFAPI --options-file CMakeFiles/nccl_device.dir/includes_CUDA.rsp --expt-extended-lambda -Xptxas -maxrregcount=96 -Xfatbin -compress-all -fPIC --expt-extended-lambda -Xptxas -maxrregcount=96 -Xfatbin -compress-all -fPIC -O3 --generate-code=arch=compute_70,code=[compute_70,sm_70] --generate-code=arch=compute_75,code=[compute_75,sm_75] --generate-code=arch=compute_80,code=[compute_80,sm_80] --generate-code=arch=compute_86,code=[compute_86,sm_86] --generate-code=arch=compute_87,code=[compute_87,sm_87] --generate-code=arch=compute_89,code=[compute_89,sm_89] --generate-code=arch=compute_90,code=[compute_90,sm_90] -MD -MT src/device/CMakeFiles/nccl_device.dir/gensrc/reduce_scatter_sum_u32.cu.o -MF CMakeFiles/nccl_device.dir/gensrc/reduce_scatter_sum_u32.cu.o.d -x cu -rdc=true -c /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src/device/gensrc/reduce_scatter_sum_u32.cu -o CMakeFiles/nccl_device.dir/gensrc/reduce_scatter_sum_u32.cu.o [ 33%] Building CUDA object src/device/CMakeFiles/nccl_device.dir/gensrc/reduce_scatter_sum_u64.cu.o cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src/device && /usr/local/cuda/bin/nvcc -forward-unknown-to-host-compiler -DCUDA_MAJOR=12 -DCUDA_MINOR=0 -DDOCA_VERBS_USE_CUDA_WRAPPER -DDOCA_VERBS_USE_NET_WRAPPER -DNCCL_GIN_PROXY_ENABLE=1 -DNCCL_MAJOR=2 -DNCCL_MINOR=28 -DNCCL_PATCH=9 -DNCCL_USE_CMAKE -DNCCL_VERSION_CODE=22809 -DPROFAPI --options-file CMakeFiles/nccl_device.dir/includes_CUDA.rsp --expt-extended-lambda -Xptxas -maxrregcount=96 -Xfatbin -compress-all -fPIC --expt-extended-lambda -Xptxas -maxrregcount=96 -Xfatbin -compress-all -fPIC -O3 --generate-code=arch=compute_70,code=[compute_70,sm_70] --generate-code=arch=compute_75,code=[compute_75,sm_75] --generate-code=arch=compute_80,code=[compute_80,sm_80] --generate-code=arch=compute_86,code=[compute_86,sm_86] --generate-code=arch=compute_87,code=[compute_87,sm_87] --generate-code=arch=compute_89,code=[compute_89,sm_89] --generate-code=arch=compute_90,code=[compute_90,sm_90] -MD -MT src/device/CMakeFiles/nccl_device.dir/gensrc/reduce_scatter_sum_u64.cu.o -MF CMakeFiles/nccl_device.dir/gensrc/reduce_scatter_sum_u64.cu.o.d -x cu -rdc=true -c /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src/device/gensrc/reduce_scatter_sum_u64.cu -o CMakeFiles/nccl_device.dir/gensrc/reduce_scatter_sum_u64.cu.o [ 33%] Building CUDA object src/device/CMakeFiles/nccl_device.dir/gensrc/reduce_scatter_sum_u8.cu.o cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src/device && /usr/local/cuda/bin/nvcc -forward-unknown-to-host-compiler -DCUDA_MAJOR=12 -DCUDA_MINOR=0 -DDOCA_VERBS_USE_CUDA_WRAPPER -DDOCA_VERBS_USE_NET_WRAPPER -DNCCL_GIN_PROXY_ENABLE=1 -DNCCL_MAJOR=2 -DNCCL_MINOR=28 -DNCCL_PATCH=9 -DNCCL_USE_CMAKE -DNCCL_VERSION_CODE=22809 -DPROFAPI --options-file CMakeFiles/nccl_device.dir/includes_CUDA.rsp --expt-extended-lambda -Xptxas -maxrregcount=96 -Xfatbin -compress-all -fPIC --expt-extended-lambda -Xptxas -maxrregcount=96 -Xfatbin -compress-all -fPIC -O3 --generate-code=arch=compute_70,code=[compute_70,sm_70] --generate-code=arch=compute_75,code=[compute_75,sm_75] --generate-code=arch=compute_80,code=[compute_80,sm_80] --generate-code=arch=compute_86,code=[compute_86,sm_86] --generate-code=arch=compute_87,code=[compute_87,sm_87] --generate-code=arch=compute_89,code=[compute_89,sm_89] --generate-code=arch=compute_90,code=[compute_90,sm_90] -MD -MT src/device/CMakeFiles/nccl_device.dir/gensrc/reduce_scatter_sum_u8.cu.o -MF CMakeFiles/nccl_device.dir/gensrc/reduce_scatter_sum_u8.cu.o.d -x cu -rdc=true -c /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src/device/gensrc/reduce_scatter_sum_u8.cu -o CMakeFiles/nccl_device.dir/gensrc/reduce_scatter_sum_u8.cu.o [ 34%] Building CUDA object src/device/CMakeFiles/nccl_device.dir/gensrc/reduce_scatter_sumpostdiv_u32.cu.o cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src/device && /usr/local/cuda/bin/nvcc -forward-unknown-to-host-compiler -DCUDA_MAJOR=12 -DCUDA_MINOR=0 -DDOCA_VERBS_USE_CUDA_WRAPPER -DDOCA_VERBS_USE_NET_WRAPPER -DNCCL_GIN_PROXY_ENABLE=1 -DNCCL_MAJOR=2 -DNCCL_MINOR=28 -DNCCL_PATCH=9 -DNCCL_USE_CMAKE -DNCCL_VERSION_CODE=22809 -DPROFAPI --options-file CMakeFiles/nccl_device.dir/includes_CUDA.rsp --expt-extended-lambda -Xptxas -maxrregcount=96 -Xfatbin -compress-all -fPIC --expt-extended-lambda -Xptxas -maxrregcount=96 -Xfatbin -compress-all -fPIC -O3 --generate-code=arch=compute_70,code=[compute_70,sm_70] --generate-code=arch=compute_75,code=[compute_75,sm_75] --generate-code=arch=compute_80,code=[compute_80,sm_80] --generate-code=arch=compute_86,code=[compute_86,sm_86] --generate-code=arch=compute_87,code=[compute_87,sm_87] --generate-code=arch=compute_89,code=[compute_89,sm_89] --generate-code=arch=compute_90,code=[compute_90,sm_90] -MD -MT src/device/CMakeFiles/nccl_device.dir/gensrc/reduce_scatter_sumpostdiv_u32.cu.o -MF CMakeFiles/nccl_device.dir/gensrc/reduce_scatter_sumpostdiv_u32.cu.o.d -x cu -rdc=true -c /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src/device/gensrc/reduce_scatter_sumpostdiv_u32.cu -o CMakeFiles/nccl_device.dir/gensrc/reduce_scatter_sumpostdiv_u32.cu.o [ 34%] Building CUDA object src/device/CMakeFiles/nccl_device.dir/gensrc/reduce_scatter_sumpostdiv_u64.cu.o cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src/device && /usr/local/cuda/bin/nvcc -forward-unknown-to-host-compiler -DCUDA_MAJOR=12 -DCUDA_MINOR=0 -DDOCA_VERBS_USE_CUDA_WRAPPER -DDOCA_VERBS_USE_NET_WRAPPER -DNCCL_GIN_PROXY_ENABLE=1 -DNCCL_MAJOR=2 -DNCCL_MINOR=28 -DNCCL_PATCH=9 -DNCCL_USE_CMAKE -DNCCL_VERSION_CODE=22809 -DPROFAPI --options-file CMakeFiles/nccl_device.dir/includes_CUDA.rsp --expt-extended-lambda -Xptxas -maxrregcount=96 -Xfatbin -compress-all -fPIC --expt-extended-lambda -Xptxas -maxrregcount=96 -Xfatbin -compress-all -fPIC -O3 --generate-code=arch=compute_70,code=[compute_70,sm_70] --generate-code=arch=compute_75,code=[compute_75,sm_75] --generate-code=arch=compute_80,code=[compute_80,sm_80] --generate-code=arch=compute_86,code=[compute_86,sm_86] --generate-code=arch=compute_87,code=[compute_87,sm_87] --generate-code=arch=compute_89,code=[compute_89,sm_89] --generate-code=arch=compute_90,code=[compute_90,sm_90] -MD -MT src/device/CMakeFiles/nccl_device.dir/gensrc/reduce_scatter_sumpostdiv_u64.cu.o -MF CMakeFiles/nccl_device.dir/gensrc/reduce_scatter_sumpostdiv_u64.cu.o.d -x cu -rdc=true -c /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src/device/gensrc/reduce_scatter_sumpostdiv_u64.cu -o CMakeFiles/nccl_device.dir/gensrc/reduce_scatter_sumpostdiv_u64.cu.o [ 34%] Building CUDA object src/device/CMakeFiles/nccl_device.dir/gensrc/reduce_scatter_sumpostdiv_u8.cu.o cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src/device && /usr/local/cuda/bin/nvcc -forward-unknown-to-host-compiler -DCUDA_MAJOR=12 -DCUDA_MINOR=0 -DDOCA_VERBS_USE_CUDA_WRAPPER -DDOCA_VERBS_USE_NET_WRAPPER -DNCCL_GIN_PROXY_ENABLE=1 -DNCCL_MAJOR=2 -DNCCL_MINOR=28 -DNCCL_PATCH=9 -DNCCL_USE_CMAKE -DNCCL_VERSION_CODE=22809 -DPROFAPI --options-file CMakeFiles/nccl_device.dir/includes_CUDA.rsp --expt-extended-lambda -Xptxas -maxrregcount=96 -Xfatbin -compress-all -fPIC --expt-extended-lambda -Xptxas -maxrregcount=96 -Xfatbin -compress-all -fPIC -O3 --generate-code=arch=compute_70,code=[compute_70,sm_70] --generate-code=arch=compute_75,code=[compute_75,sm_75] --generate-code=arch=compute_80,code=[compute_80,sm_80] --generate-code=arch=compute_86,code=[compute_86,sm_86] --generate-code=arch=compute_87,code=[compute_87,sm_87] --generate-code=arch=compute_89,code=[compute_89,sm_89] --generate-code=arch=compute_90,code=[compute_90,sm_90] -MD -MT src/device/CMakeFiles/nccl_device.dir/gensrc/reduce_scatter_sumpostdiv_u8.cu.o -MF CMakeFiles/nccl_device.dir/gensrc/reduce_scatter_sumpostdiv_u8.cu.o.d -x cu -rdc=true -c /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src/device/gensrc/reduce_scatter_sumpostdiv_u8.cu -o CMakeFiles/nccl_device.dir/gensrc/reduce_scatter_sumpostdiv_u8.cu.o [ 35%] Building CUDA object src/device/CMakeFiles/nccl_device.dir/gensrc/reduce_sum_bf16.cu.o cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src/device && /usr/local/cuda/bin/nvcc -forward-unknown-to-host-compiler -DCUDA_MAJOR=12 -DCUDA_MINOR=0 -DDOCA_VERBS_USE_CUDA_WRAPPER -DDOCA_VERBS_USE_NET_WRAPPER -DNCCL_GIN_PROXY_ENABLE=1 -DNCCL_MAJOR=2 -DNCCL_MINOR=28 -DNCCL_PATCH=9 -DNCCL_USE_CMAKE -DNCCL_VERSION_CODE=22809 -DPROFAPI --options-file CMakeFiles/nccl_device.dir/includes_CUDA.rsp --expt-extended-lambda -Xptxas -maxrregcount=96 -Xfatbin -compress-all -fPIC --expt-extended-lambda -Xptxas -maxrregcount=96 -Xfatbin -compress-all -fPIC -O3 --generate-code=arch=compute_70,code=[compute_70,sm_70] --generate-code=arch=compute_75,code=[compute_75,sm_75] --generate-code=arch=compute_80,code=[compute_80,sm_80] --generate-code=arch=compute_86,code=[compute_86,sm_86] --generate-code=arch=compute_87,code=[compute_87,sm_87] --generate-code=arch=compute_89,code=[compute_89,sm_89] --generate-code=arch=compute_90,code=[compute_90,sm_90] -MD -MT src/device/CMakeFiles/nccl_device.dir/gensrc/reduce_sum_bf16.cu.o -MF CMakeFiles/nccl_device.dir/gensrc/reduce_sum_bf16.cu.o.d -x cu -rdc=true -c /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src/device/gensrc/reduce_sum_bf16.cu -o CMakeFiles/nccl_device.dir/gensrc/reduce_sum_bf16.cu.o [ 35%] Building CUDA object src/device/CMakeFiles/nccl_device.dir/gensrc/reduce_sum_f16.cu.o cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src/device && /usr/local/cuda/bin/nvcc -forward-unknown-to-host-compiler -DCUDA_MAJOR=12 -DCUDA_MINOR=0 -DDOCA_VERBS_USE_CUDA_WRAPPER -DDOCA_VERBS_USE_NET_WRAPPER -DNCCL_GIN_PROXY_ENABLE=1 -DNCCL_MAJOR=2 -DNCCL_MINOR=28 -DNCCL_PATCH=9 -DNCCL_USE_CMAKE -DNCCL_VERSION_CODE=22809 -DPROFAPI --options-file CMakeFiles/nccl_device.dir/includes_CUDA.rsp --expt-extended-lambda -Xptxas -maxrregcount=96 -Xfatbin -compress-all -fPIC --expt-extended-lambda -Xptxas -maxrregcount=96 -Xfatbin -compress-all -fPIC -O3 --generate-code=arch=compute_70,code=[compute_70,sm_70] --generate-code=arch=compute_75,code=[compute_75,sm_75] --generate-code=arch=compute_80,code=[compute_80,sm_80] --generate-code=arch=compute_86,code=[compute_86,sm_86] --generate-code=arch=compute_87,code=[compute_87,sm_87] --generate-code=arch=compute_89,code=[compute_89,sm_89] --generate-code=arch=compute_90,code=[compute_90,sm_90] -MD -MT src/device/CMakeFiles/nccl_device.dir/gensrc/reduce_sum_f16.cu.o -MF CMakeFiles/nccl_device.dir/gensrc/reduce_sum_f16.cu.o.d -x cu -rdc=true -c /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src/device/gensrc/reduce_sum_f16.cu -o CMakeFiles/nccl_device.dir/gensrc/reduce_sum_f16.cu.o [ 35%] Building CUDA object src/device/CMakeFiles/nccl_device.dir/gensrc/reduce_sum_f32.cu.o cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src/device && /usr/local/cuda/bin/nvcc -forward-unknown-to-host-compiler -DCUDA_MAJOR=12 -DCUDA_MINOR=0 -DDOCA_VERBS_USE_CUDA_WRAPPER -DDOCA_VERBS_USE_NET_WRAPPER -DNCCL_GIN_PROXY_ENABLE=1 -DNCCL_MAJOR=2 -DNCCL_MINOR=28 -DNCCL_PATCH=9 -DNCCL_USE_CMAKE -DNCCL_VERSION_CODE=22809 -DPROFAPI --options-file CMakeFiles/nccl_device.dir/includes_CUDA.rsp --expt-extended-lambda -Xptxas -maxrregcount=96 -Xfatbin -compress-all -fPIC --expt-extended-lambda -Xptxas -maxrregcount=96 -Xfatbin -compress-all -fPIC -O3 --generate-code=arch=compute_70,code=[compute_70,sm_70] --generate-code=arch=compute_75,code=[compute_75,sm_75] --generate-code=arch=compute_80,code=[compute_80,sm_80] --generate-code=arch=compute_86,code=[compute_86,sm_86] --generate-code=arch=compute_87,code=[compute_87,sm_87] --generate-code=arch=compute_89,code=[compute_89,sm_89] --generate-code=arch=compute_90,code=[compute_90,sm_90] -MD -MT src/device/CMakeFiles/nccl_device.dir/gensrc/reduce_sum_f32.cu.o -MF CMakeFiles/nccl_device.dir/gensrc/reduce_sum_f32.cu.o.d -x cu -rdc=true -c /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src/device/gensrc/reduce_sum_f32.cu -o CMakeFiles/nccl_device.dir/gensrc/reduce_sum_f32.cu.o [ 35%] Building CUDA object src/device/CMakeFiles/nccl_device.dir/gensrc/reduce_sum_f64.cu.o cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src/device && /usr/local/cuda/bin/nvcc -forward-unknown-to-host-compiler -DCUDA_MAJOR=12 -DCUDA_MINOR=0 -DDOCA_VERBS_USE_CUDA_WRAPPER -DDOCA_VERBS_USE_NET_WRAPPER -DNCCL_GIN_PROXY_ENABLE=1 -DNCCL_MAJOR=2 -DNCCL_MINOR=28 -DNCCL_PATCH=9 -DNCCL_USE_CMAKE -DNCCL_VERSION_CODE=22809 -DPROFAPI --options-file CMakeFiles/nccl_device.dir/includes_CUDA.rsp --expt-extended-lambda -Xptxas -maxrregcount=96 -Xfatbin -compress-all -fPIC --expt-extended-lambda -Xptxas -maxrregcount=96 -Xfatbin -compress-all -fPIC -O3 --generate-code=arch=compute_70,code=[compute_70,sm_70] --generate-code=arch=compute_75,code=[compute_75,sm_75] --generate-code=arch=compute_80,code=[compute_80,sm_80] --generate-code=arch=compute_86,code=[compute_86,sm_86] --generate-code=arch=compute_87,code=[compute_87,sm_87] --generate-code=arch=compute_89,code=[compute_89,sm_89] --generate-code=arch=compute_90,code=[compute_90,sm_90] -MD -MT src/device/CMakeFiles/nccl_device.dir/gensrc/reduce_sum_f64.cu.o -MF CMakeFiles/nccl_device.dir/gensrc/reduce_sum_f64.cu.o.d -x cu -rdc=true -c /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src/device/gensrc/reduce_sum_f64.cu -o CMakeFiles/nccl_device.dir/gensrc/reduce_sum_f64.cu.o [ 36%] Building CUDA object src/device/CMakeFiles/nccl_device.dir/gensrc/reduce_sum_f8e4m3.cu.o cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src/device && /usr/local/cuda/bin/nvcc -forward-unknown-to-host-compiler -DCUDA_MAJOR=12 -DCUDA_MINOR=0 -DDOCA_VERBS_USE_CUDA_WRAPPER -DDOCA_VERBS_USE_NET_WRAPPER -DNCCL_GIN_PROXY_ENABLE=1 -DNCCL_MAJOR=2 -DNCCL_MINOR=28 -DNCCL_PATCH=9 -DNCCL_USE_CMAKE -DNCCL_VERSION_CODE=22809 -DPROFAPI --options-file CMakeFiles/nccl_device.dir/includes_CUDA.rsp --expt-extended-lambda -Xptxas -maxrregcount=96 -Xfatbin -compress-all -fPIC --expt-extended-lambda -Xptxas -maxrregcount=96 -Xfatbin -compress-all -fPIC -O3 --generate-code=arch=compute_70,code=[compute_70,sm_70] --generate-code=arch=compute_75,code=[compute_75,sm_75] --generate-code=arch=compute_80,code=[compute_80,sm_80] --generate-code=arch=compute_86,code=[compute_86,sm_86] --generate-code=arch=compute_87,code=[compute_87,sm_87] --generate-code=arch=compute_89,code=[compute_89,sm_89] --generate-code=arch=compute_90,code=[compute_90,sm_90] -MD -MT src/device/CMakeFiles/nccl_device.dir/gensrc/reduce_sum_f8e4m3.cu.o -MF CMakeFiles/nccl_device.dir/gensrc/reduce_sum_f8e4m3.cu.o.d -x cu -rdc=true -c /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src/device/gensrc/reduce_sum_f8e4m3.cu -o CMakeFiles/nccl_device.dir/gensrc/reduce_sum_f8e4m3.cu.o [ 36%] Building CUDA object src/device/CMakeFiles/nccl_device.dir/gensrc/reduce_sum_f8e5m2.cu.o cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src/device && /usr/local/cuda/bin/nvcc -forward-unknown-to-host-compiler -DCUDA_MAJOR=12 -DCUDA_MINOR=0 -DDOCA_VERBS_USE_CUDA_WRAPPER -DDOCA_VERBS_USE_NET_WRAPPER -DNCCL_GIN_PROXY_ENABLE=1 -DNCCL_MAJOR=2 -DNCCL_MINOR=28 -DNCCL_PATCH=9 -DNCCL_USE_CMAKE -DNCCL_VERSION_CODE=22809 -DPROFAPI --options-file CMakeFiles/nccl_device.dir/includes_CUDA.rsp --expt-extended-lambda -Xptxas -maxrregcount=96 -Xfatbin -compress-all -fPIC --expt-extended-lambda -Xptxas -maxrregcount=96 -Xfatbin -compress-all -fPIC -O3 --generate-code=arch=compute_70,code=[compute_70,sm_70] --generate-code=arch=compute_75,code=[compute_75,sm_75] --generate-code=arch=compute_80,code=[compute_80,sm_80] --generate-code=arch=compute_86,code=[compute_86,sm_86] --generate-code=arch=compute_87,code=[compute_87,sm_87] --generate-code=arch=compute_89,code=[compute_89,sm_89] --generate-code=arch=compute_90,code=[compute_90,sm_90] -MD -MT src/device/CMakeFiles/nccl_device.dir/gensrc/reduce_sum_f8e5m2.cu.o -MF CMakeFiles/nccl_device.dir/gensrc/reduce_sum_f8e5m2.cu.o.d -x cu -rdc=true -c /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src/device/gensrc/reduce_sum_f8e5m2.cu -o CMakeFiles/nccl_device.dir/gensrc/reduce_sum_f8e5m2.cu.o [ 36%] Building CUDA object src/device/CMakeFiles/nccl_device.dir/gensrc/reduce_sum_u32.cu.o cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src/device && /usr/local/cuda/bin/nvcc -forward-unknown-to-host-compiler -DCUDA_MAJOR=12 -DCUDA_MINOR=0 -DDOCA_VERBS_USE_CUDA_WRAPPER -DDOCA_VERBS_USE_NET_WRAPPER -DNCCL_GIN_PROXY_ENABLE=1 -DNCCL_MAJOR=2 -DNCCL_MINOR=28 -DNCCL_PATCH=9 -DNCCL_USE_CMAKE -DNCCL_VERSION_CODE=22809 -DPROFAPI --options-file CMakeFiles/nccl_device.dir/includes_CUDA.rsp --expt-extended-lambda -Xptxas -maxrregcount=96 -Xfatbin -compress-all -fPIC --expt-extended-lambda -Xptxas -maxrregcount=96 -Xfatbin -compress-all -fPIC -O3 --generate-code=arch=compute_70,code=[compute_70,sm_70] --generate-code=arch=compute_75,code=[compute_75,sm_75] --generate-code=arch=compute_80,code=[compute_80,sm_80] --generate-code=arch=compute_86,code=[compute_86,sm_86] --generate-code=arch=compute_87,code=[compute_87,sm_87] --generate-code=arch=compute_89,code=[compute_89,sm_89] --generate-code=arch=compute_90,code=[compute_90,sm_90] -MD -MT src/device/CMakeFiles/nccl_device.dir/gensrc/reduce_sum_u32.cu.o -MF CMakeFiles/nccl_device.dir/gensrc/reduce_sum_u32.cu.o.d -x cu -rdc=true -c /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src/device/gensrc/reduce_sum_u32.cu -o CMakeFiles/nccl_device.dir/gensrc/reduce_sum_u32.cu.o [ 37%] Building CUDA object src/device/CMakeFiles/nccl_device.dir/gensrc/reduce_sum_u64.cu.o cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src/device && /usr/local/cuda/bin/nvcc -forward-unknown-to-host-compiler -DCUDA_MAJOR=12 -DCUDA_MINOR=0 -DDOCA_VERBS_USE_CUDA_WRAPPER -DDOCA_VERBS_USE_NET_WRAPPER -DNCCL_GIN_PROXY_ENABLE=1 -DNCCL_MAJOR=2 -DNCCL_MINOR=28 -DNCCL_PATCH=9 -DNCCL_USE_CMAKE -DNCCL_VERSION_CODE=22809 -DPROFAPI --options-file CMakeFiles/nccl_device.dir/includes_CUDA.rsp --expt-extended-lambda -Xptxas -maxrregcount=96 -Xfatbin -compress-all -fPIC --expt-extended-lambda -Xptxas -maxrregcount=96 -Xfatbin -compress-all -fPIC -O3 --generate-code=arch=compute_70,code=[compute_70,sm_70] --generate-code=arch=compute_75,code=[compute_75,sm_75] --generate-code=arch=compute_80,code=[compute_80,sm_80] --generate-code=arch=compute_86,code=[compute_86,sm_86] --generate-code=arch=compute_87,code=[compute_87,sm_87] --generate-code=arch=compute_89,code=[compute_89,sm_89] --generate-code=arch=compute_90,code=[compute_90,sm_90] -MD -MT src/device/CMakeFiles/nccl_device.dir/gensrc/reduce_sum_u64.cu.o -MF CMakeFiles/nccl_device.dir/gensrc/reduce_sum_u64.cu.o.d -x cu -rdc=true -c /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src/device/gensrc/reduce_sum_u64.cu -o CMakeFiles/nccl_device.dir/gensrc/reduce_sum_u64.cu.o [ 37%] Building CUDA object src/device/CMakeFiles/nccl_device.dir/gensrc/reduce_sum_u8.cu.o cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src/device && /usr/local/cuda/bin/nvcc -forward-unknown-to-host-compiler -DCUDA_MAJOR=12 -DCUDA_MINOR=0 -DDOCA_VERBS_USE_CUDA_WRAPPER -DDOCA_VERBS_USE_NET_WRAPPER -DNCCL_GIN_PROXY_ENABLE=1 -DNCCL_MAJOR=2 -DNCCL_MINOR=28 -DNCCL_PATCH=9 -DNCCL_USE_CMAKE -DNCCL_VERSION_CODE=22809 -DPROFAPI --options-file CMakeFiles/nccl_device.dir/includes_CUDA.rsp --expt-extended-lambda -Xptxas -maxrregcount=96 -Xfatbin -compress-all -fPIC --expt-extended-lambda -Xptxas -maxrregcount=96 -Xfatbin -compress-all -fPIC -O3 --generate-code=arch=compute_70,code=[compute_70,sm_70] --generate-code=arch=compute_75,code=[compute_75,sm_75] --generate-code=arch=compute_80,code=[compute_80,sm_80] --generate-code=arch=compute_86,code=[compute_86,sm_86] --generate-code=arch=compute_87,code=[compute_87,sm_87] --generate-code=arch=compute_89,code=[compute_89,sm_89] --generate-code=arch=compute_90,code=[compute_90,sm_90] -MD -MT src/device/CMakeFiles/nccl_device.dir/gensrc/reduce_sum_u8.cu.o -MF CMakeFiles/nccl_device.dir/gensrc/reduce_sum_u8.cu.o.d -x cu -rdc=true -c /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src/device/gensrc/reduce_sum_u8.cu -o CMakeFiles/nccl_device.dir/gensrc/reduce_sum_u8.cu.o [ 37%] Building CUDA object src/device/CMakeFiles/nccl_device.dir/gensrc/reduce_sumpostdiv_u32.cu.o cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src/device && /usr/local/cuda/bin/nvcc -forward-unknown-to-host-compiler -DCUDA_MAJOR=12 -DCUDA_MINOR=0 -DDOCA_VERBS_USE_CUDA_WRAPPER -DDOCA_VERBS_USE_NET_WRAPPER -DNCCL_GIN_PROXY_ENABLE=1 -DNCCL_MAJOR=2 -DNCCL_MINOR=28 -DNCCL_PATCH=9 -DNCCL_USE_CMAKE -DNCCL_VERSION_CODE=22809 -DPROFAPI --options-file CMakeFiles/nccl_device.dir/includes_CUDA.rsp --expt-extended-lambda -Xptxas -maxrregcount=96 -Xfatbin -compress-all -fPIC --expt-extended-lambda -Xptxas -maxrregcount=96 -Xfatbin -compress-all -fPIC -O3 --generate-code=arch=compute_70,code=[compute_70,sm_70] --generate-code=arch=compute_75,code=[compute_75,sm_75] --generate-code=arch=compute_80,code=[compute_80,sm_80] --generate-code=arch=compute_86,code=[compute_86,sm_86] --generate-code=arch=compute_87,code=[compute_87,sm_87] --generate-code=arch=compute_89,code=[compute_89,sm_89] --generate-code=arch=compute_90,code=[compute_90,sm_90] -MD -MT src/device/CMakeFiles/nccl_device.dir/gensrc/reduce_sumpostdiv_u32.cu.o -MF CMakeFiles/nccl_device.dir/gensrc/reduce_sumpostdiv_u32.cu.o.d -x cu -rdc=true -c /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src/device/gensrc/reduce_sumpostdiv_u32.cu -o CMakeFiles/nccl_device.dir/gensrc/reduce_sumpostdiv_u32.cu.o [ 37%] Building CUDA object src/device/CMakeFiles/nccl_device.dir/gensrc/reduce_sumpostdiv_u64.cu.o cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src/device && /usr/local/cuda/bin/nvcc -forward-unknown-to-host-compiler -DCUDA_MAJOR=12 -DCUDA_MINOR=0 -DDOCA_VERBS_USE_CUDA_WRAPPER -DDOCA_VERBS_USE_NET_WRAPPER -DNCCL_GIN_PROXY_ENABLE=1 -DNCCL_MAJOR=2 -DNCCL_MINOR=28 -DNCCL_PATCH=9 -DNCCL_USE_CMAKE -DNCCL_VERSION_CODE=22809 -DPROFAPI --options-file CMakeFiles/nccl_device.dir/includes_CUDA.rsp --expt-extended-lambda -Xptxas -maxrregcount=96 -Xfatbin -compress-all -fPIC --expt-extended-lambda -Xptxas -maxrregcount=96 -Xfatbin -compress-all -fPIC -O3 --generate-code=arch=compute_70,code=[compute_70,sm_70] --generate-code=arch=compute_75,code=[compute_75,sm_75] --generate-code=arch=compute_80,code=[compute_80,sm_80] --generate-code=arch=compute_86,code=[compute_86,sm_86] --generate-code=arch=compute_87,code=[compute_87,sm_87] --generate-code=arch=compute_89,code=[compute_89,sm_89] --generate-code=arch=compute_90,code=[compute_90,sm_90] -MD -MT src/device/CMakeFiles/nccl_device.dir/gensrc/reduce_sumpostdiv_u64.cu.o -MF CMakeFiles/nccl_device.dir/gensrc/reduce_sumpostdiv_u64.cu.o.d -x cu -rdc=true -c /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src/device/gensrc/reduce_sumpostdiv_u64.cu -o CMakeFiles/nccl_device.dir/gensrc/reduce_sumpostdiv_u64.cu.o [ 38%] Building CUDA object src/device/CMakeFiles/nccl_device.dir/gensrc/reduce_sumpostdiv_u8.cu.o cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src/device && /usr/local/cuda/bin/nvcc -forward-unknown-to-host-compiler -DCUDA_MAJOR=12 -DCUDA_MINOR=0 -DDOCA_VERBS_USE_CUDA_WRAPPER -DDOCA_VERBS_USE_NET_WRAPPER -DNCCL_GIN_PROXY_ENABLE=1 -DNCCL_MAJOR=2 -DNCCL_MINOR=28 -DNCCL_PATCH=9 -DNCCL_USE_CMAKE -DNCCL_VERSION_CODE=22809 -DPROFAPI --options-file CMakeFiles/nccl_device.dir/includes_CUDA.rsp --expt-extended-lambda -Xptxas -maxrregcount=96 -Xfatbin -compress-all -fPIC --expt-extended-lambda -Xptxas -maxrregcount=96 -Xfatbin -compress-all -fPIC -O3 --generate-code=arch=compute_70,code=[compute_70,sm_70] --generate-code=arch=compute_75,code=[compute_75,sm_75] --generate-code=arch=compute_80,code=[compute_80,sm_80] --generate-code=arch=compute_86,code=[compute_86,sm_86] --generate-code=arch=compute_87,code=[compute_87,sm_87] --generate-code=arch=compute_89,code=[compute_89,sm_89] --generate-code=arch=compute_90,code=[compute_90,sm_90] -MD -MT src/device/CMakeFiles/nccl_device.dir/gensrc/reduce_sumpostdiv_u8.cu.o -MF CMakeFiles/nccl_device.dir/gensrc/reduce_sumpostdiv_u8.cu.o.d -x cu -rdc=true -c /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src/device/gensrc/reduce_sumpostdiv_u8.cu -o CMakeFiles/nccl_device.dir/gensrc/reduce_sumpostdiv_u8.cu.o [ 38%] Building CUDA object src/device/CMakeFiles/nccl_device.dir/gensrc/sendrecv.cu.o cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src/device && /usr/local/cuda/bin/nvcc -forward-unknown-to-host-compiler -DCUDA_MAJOR=12 -DCUDA_MINOR=0 -DDOCA_VERBS_USE_CUDA_WRAPPER -DDOCA_VERBS_USE_NET_WRAPPER -DNCCL_GIN_PROXY_ENABLE=1 -DNCCL_MAJOR=2 -DNCCL_MINOR=28 -DNCCL_PATCH=9 -DNCCL_USE_CMAKE -DNCCL_VERSION_CODE=22809 -DPROFAPI --options-file CMakeFiles/nccl_device.dir/includes_CUDA.rsp --expt-extended-lambda -Xptxas -maxrregcount=96 -Xfatbin -compress-all -fPIC --expt-extended-lambda -Xptxas -maxrregcount=96 -Xfatbin -compress-all -fPIC -O3 --generate-code=arch=compute_70,code=[compute_70,sm_70] --generate-code=arch=compute_75,code=[compute_75,sm_75] --generate-code=arch=compute_80,code=[compute_80,sm_80] --generate-code=arch=compute_86,code=[compute_86,sm_86] --generate-code=arch=compute_87,code=[compute_87,sm_87] --generate-code=arch=compute_89,code=[compute_89,sm_89] --generate-code=arch=compute_90,code=[compute_90,sm_90] -MD -MT src/device/CMakeFiles/nccl_device.dir/gensrc/sendrecv.cu.o -MF CMakeFiles/nccl_device.dir/gensrc/sendrecv.cu.o.d -x cu -rdc=true -c /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src/device/gensrc/sendrecv.cu -o CMakeFiles/nccl_device.dir/gensrc/sendrecv.cu.o [ 38%] Building CUDA object src/device/CMakeFiles/nccl_device.dir/gensrc/device_table.cu.o cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src/device && /usr/local/cuda/bin/nvcc -forward-unknown-to-host-compiler -DCUDA_MAJOR=12 -DCUDA_MINOR=0 -DDOCA_VERBS_USE_CUDA_WRAPPER -DDOCA_VERBS_USE_NET_WRAPPER -DNCCL_GIN_PROXY_ENABLE=1 -DNCCL_MAJOR=2 -DNCCL_MINOR=28 -DNCCL_PATCH=9 -DNCCL_USE_CMAKE -DNCCL_VERSION_CODE=22809 -DPROFAPI --options-file CMakeFiles/nccl_device.dir/includes_CUDA.rsp --expt-extended-lambda -Xptxas -maxrregcount=96 -Xfatbin -compress-all -fPIC --expt-extended-lambda -Xptxas -maxrregcount=96 -Xfatbin -compress-all -fPIC -O3 --generate-code=arch=compute_70,code=[compute_70,sm_70] --generate-code=arch=compute_75,code=[compute_75,sm_75] --generate-code=arch=compute_80,code=[compute_80,sm_80] --generate-code=arch=compute_86,code=[compute_86,sm_86] --generate-code=arch=compute_87,code=[compute_87,sm_87] --generate-code=arch=compute_89,code=[compute_89,sm_89] --generate-code=arch=compute_90,code=[compute_90,sm_90] -MD -MT src/device/CMakeFiles/nccl_device.dir/gensrc/device_table.cu.o -MF CMakeFiles/nccl_device.dir/gensrc/device_table.cu.o.d -x cu -rdc=true -c /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src/device/gensrc/device_table.cu -o CMakeFiles/nccl_device.dir/gensrc/device_table.cu.o [ 39%] Building CXX object src/device/CMakeFiles/nccl_device.dir/gensrc/host_table.cc.o cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src/device && /usr/bin/g++ -DCUDA_MAJOR=12 -DCUDA_MINOR=0 -DDOCA_VERBS_USE_CUDA_WRAPPER -DDOCA_VERBS_USE_NET_WRAPPER -DNCCL_GIN_PROXY_ENABLE=1 -DNCCL_MAJOR=2 -DNCCL_MINOR=28 -DNCCL_PATCH=9 -DNCCL_USE_CMAKE -DNCCL_VERSION_CODE=22809 -DPROFAPI -I/builddir/build/BUILD/nccl-2.28.9-1/src/device -I/builddir/build/BUILD/nccl-2.28.9-1/nccl_build/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/include/plugin -I/usr/local/cuda/include -I/usr/local/cuda/include/cccl -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -O3 -MD -MT src/device/CMakeFiles/nccl_device.dir/gensrc/host_table.cc.o -MF CMakeFiles/nccl_device.dir/gensrc/host_table.cc.o.d -o CMakeFiles/nccl_device.dir/gensrc/host_table.cc.o -c /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src/device/gensrc/host_table.cc [ 39%] Building CUDA object src/device/CMakeFiles/nccl_device.dir/gensrc/symmetric/all_gather.cu.o cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src/device && /usr/local/cuda/bin/nvcc -forward-unknown-to-host-compiler -DCUDA_MAJOR=12 -DCUDA_MINOR=0 -DDOCA_VERBS_USE_CUDA_WRAPPER -DDOCA_VERBS_USE_NET_WRAPPER -DNCCL_GIN_PROXY_ENABLE=1 -DNCCL_MAJOR=2 -DNCCL_MINOR=28 -DNCCL_PATCH=9 -DNCCL_USE_CMAKE -DNCCL_VERSION_CODE=22809 -DPROFAPI --options-file CMakeFiles/nccl_device.dir/includes_CUDA.rsp --expt-extended-lambda -Xptxas -maxrregcount=96 -Xfatbin -compress-all -fPIC --expt-extended-lambda -Xptxas -maxrregcount=96 -Xfatbin -compress-all -fPIC -O3 --generate-code=arch=compute_70,code=[compute_70,sm_70] --generate-code=arch=compute_75,code=[compute_75,sm_75] --generate-code=arch=compute_80,code=[compute_80,sm_80] --generate-code=arch=compute_86,code=[compute_86,sm_86] --generate-code=arch=compute_87,code=[compute_87,sm_87] --generate-code=arch=compute_89,code=[compute_89,sm_89] --generate-code=arch=compute_90,code=[compute_90,sm_90] -MD -MT src/device/CMakeFiles/nccl_device.dir/gensrc/symmetric/all_gather.cu.o -MF CMakeFiles/nccl_device.dir/gensrc/symmetric/all_gather.cu.o.d -x cu -rdc=true -c /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src/device/gensrc/symmetric/all_gather.cu -o CMakeFiles/nccl_device.dir/gensrc/symmetric/all_gather.cu.o [ 39%] Building CUDA object src/device/CMakeFiles/nccl_device.dir/gensrc/symmetric/all_reduce_sum_f32.cu.o cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src/device && /usr/local/cuda/bin/nvcc -forward-unknown-to-host-compiler -DCUDA_MAJOR=12 -DCUDA_MINOR=0 -DDOCA_VERBS_USE_CUDA_WRAPPER -DDOCA_VERBS_USE_NET_WRAPPER -DNCCL_GIN_PROXY_ENABLE=1 -DNCCL_MAJOR=2 -DNCCL_MINOR=28 -DNCCL_PATCH=9 -DNCCL_USE_CMAKE -DNCCL_VERSION_CODE=22809 -DPROFAPI --options-file CMakeFiles/nccl_device.dir/includes_CUDA.rsp --expt-extended-lambda -Xptxas -maxrregcount=96 -Xfatbin -compress-all -fPIC --expt-extended-lambda -Xptxas -maxrregcount=96 -Xfatbin -compress-all -fPIC -O3 --generate-code=arch=compute_70,code=[compute_70,sm_70] --generate-code=arch=compute_75,code=[compute_75,sm_75] --generate-code=arch=compute_80,code=[compute_80,sm_80] --generate-code=arch=compute_86,code=[compute_86,sm_86] --generate-code=arch=compute_87,code=[compute_87,sm_87] --generate-code=arch=compute_89,code=[compute_89,sm_89] --generate-code=arch=compute_90,code=[compute_90,sm_90] -MD -MT src/device/CMakeFiles/nccl_device.dir/gensrc/symmetric/all_reduce_sum_f32.cu.o -MF CMakeFiles/nccl_device.dir/gensrc/symmetric/all_reduce_sum_f32.cu.o.d -x cu -rdc=true -c /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src/device/gensrc/symmetric/all_reduce_sum_f32.cu -o CMakeFiles/nccl_device.dir/gensrc/symmetric/all_reduce_sum_f32.cu.o [ 39%] Building CUDA object src/device/CMakeFiles/nccl_device.dir/gensrc/symmetric/reduce_scatter_sum_f32.cu.o cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src/device && /usr/local/cuda/bin/nvcc -forward-unknown-to-host-compiler -DCUDA_MAJOR=12 -DCUDA_MINOR=0 -DDOCA_VERBS_USE_CUDA_WRAPPER -DDOCA_VERBS_USE_NET_WRAPPER -DNCCL_GIN_PROXY_ENABLE=1 -DNCCL_MAJOR=2 -DNCCL_MINOR=28 -DNCCL_PATCH=9 -DNCCL_USE_CMAKE -DNCCL_VERSION_CODE=22809 -DPROFAPI --options-file CMakeFiles/nccl_device.dir/includes_CUDA.rsp --expt-extended-lambda -Xptxas -maxrregcount=96 -Xfatbin -compress-all -fPIC --expt-extended-lambda -Xptxas -maxrregcount=96 -Xfatbin -compress-all -fPIC -O3 --generate-code=arch=compute_70,code=[compute_70,sm_70] --generate-code=arch=compute_75,code=[compute_75,sm_75] --generate-code=arch=compute_80,code=[compute_80,sm_80] --generate-code=arch=compute_86,code=[compute_86,sm_86] --generate-code=arch=compute_87,code=[compute_87,sm_87] --generate-code=arch=compute_89,code=[compute_89,sm_89] --generate-code=arch=compute_90,code=[compute_90,sm_90] -MD -MT src/device/CMakeFiles/nccl_device.dir/gensrc/symmetric/reduce_scatter_sum_f32.cu.o -MF CMakeFiles/nccl_device.dir/gensrc/symmetric/reduce_scatter_sum_f32.cu.o.d -x cu -rdc=true -c /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src/device/gensrc/symmetric/reduce_scatter_sum_f32.cu -o CMakeFiles/nccl_device.dir/gensrc/symmetric/reduce_scatter_sum_f32.cu.o [ 40%] Building CUDA object src/device/CMakeFiles/nccl_device.dir/gensrc/symmetric/all_reduce_sum_f16.cu.o cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src/device && /usr/local/cuda/bin/nvcc -forward-unknown-to-host-compiler -DCUDA_MAJOR=12 -DCUDA_MINOR=0 -DDOCA_VERBS_USE_CUDA_WRAPPER -DDOCA_VERBS_USE_NET_WRAPPER -DNCCL_GIN_PROXY_ENABLE=1 -DNCCL_MAJOR=2 -DNCCL_MINOR=28 -DNCCL_PATCH=9 -DNCCL_USE_CMAKE -DNCCL_VERSION_CODE=22809 -DPROFAPI --options-file CMakeFiles/nccl_device.dir/includes_CUDA.rsp --expt-extended-lambda -Xptxas -maxrregcount=96 -Xfatbin -compress-all -fPIC --expt-extended-lambda -Xptxas -maxrregcount=96 -Xfatbin -compress-all -fPIC -O3 --generate-code=arch=compute_70,code=[compute_70,sm_70] --generate-code=arch=compute_75,code=[compute_75,sm_75] --generate-code=arch=compute_80,code=[compute_80,sm_80] --generate-code=arch=compute_86,code=[compute_86,sm_86] --generate-code=arch=compute_87,code=[compute_87,sm_87] --generate-code=arch=compute_89,code=[compute_89,sm_89] --generate-code=arch=compute_90,code=[compute_90,sm_90] -MD -MT src/device/CMakeFiles/nccl_device.dir/gensrc/symmetric/all_reduce_sum_f16.cu.o -MF CMakeFiles/nccl_device.dir/gensrc/symmetric/all_reduce_sum_f16.cu.o.d -x cu -rdc=true -c /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src/device/gensrc/symmetric/all_reduce_sum_f16.cu -o CMakeFiles/nccl_device.dir/gensrc/symmetric/all_reduce_sum_f16.cu.o [ 40%] Building CUDA object src/device/CMakeFiles/nccl_device.dir/gensrc/symmetric/reduce_scatter_sum_f16.cu.o cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src/device && /usr/local/cuda/bin/nvcc -forward-unknown-to-host-compiler -DCUDA_MAJOR=12 -DCUDA_MINOR=0 -DDOCA_VERBS_USE_CUDA_WRAPPER -DDOCA_VERBS_USE_NET_WRAPPER -DNCCL_GIN_PROXY_ENABLE=1 -DNCCL_MAJOR=2 -DNCCL_MINOR=28 -DNCCL_PATCH=9 -DNCCL_USE_CMAKE -DNCCL_VERSION_CODE=22809 -DPROFAPI --options-file CMakeFiles/nccl_device.dir/includes_CUDA.rsp --expt-extended-lambda -Xptxas -maxrregcount=96 -Xfatbin -compress-all -fPIC --expt-extended-lambda -Xptxas -maxrregcount=96 -Xfatbin -compress-all -fPIC -O3 --generate-code=arch=compute_70,code=[compute_70,sm_70] --generate-code=arch=compute_75,code=[compute_75,sm_75] --generate-code=arch=compute_80,code=[compute_80,sm_80] --generate-code=arch=compute_86,code=[compute_86,sm_86] --generate-code=arch=compute_87,code=[compute_87,sm_87] --generate-code=arch=compute_89,code=[compute_89,sm_89] --generate-code=arch=compute_90,code=[compute_90,sm_90] -MD -MT src/device/CMakeFiles/nccl_device.dir/gensrc/symmetric/reduce_scatter_sum_f16.cu.o -MF CMakeFiles/nccl_device.dir/gensrc/symmetric/reduce_scatter_sum_f16.cu.o.d -x cu -rdc=true -c /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src/device/gensrc/symmetric/reduce_scatter_sum_f16.cu -o CMakeFiles/nccl_device.dir/gensrc/symmetric/reduce_scatter_sum_f16.cu.o [ 40%] Building CUDA object src/device/CMakeFiles/nccl_device.dir/gensrc/symmetric/all_reduce_sum_bf16.cu.o cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src/device && /usr/local/cuda/bin/nvcc -forward-unknown-to-host-compiler -DCUDA_MAJOR=12 -DCUDA_MINOR=0 -DDOCA_VERBS_USE_CUDA_WRAPPER -DDOCA_VERBS_USE_NET_WRAPPER -DNCCL_GIN_PROXY_ENABLE=1 -DNCCL_MAJOR=2 -DNCCL_MINOR=28 -DNCCL_PATCH=9 -DNCCL_USE_CMAKE -DNCCL_VERSION_CODE=22809 -DPROFAPI --options-file CMakeFiles/nccl_device.dir/includes_CUDA.rsp --expt-extended-lambda -Xptxas -maxrregcount=96 -Xfatbin -compress-all -fPIC --expt-extended-lambda -Xptxas -maxrregcount=96 -Xfatbin -compress-all -fPIC -O3 --generate-code=arch=compute_70,code=[compute_70,sm_70] --generate-code=arch=compute_75,code=[compute_75,sm_75] --generate-code=arch=compute_80,code=[compute_80,sm_80] --generate-code=arch=compute_86,code=[compute_86,sm_86] --generate-code=arch=compute_87,code=[compute_87,sm_87] --generate-code=arch=compute_89,code=[compute_89,sm_89] --generate-code=arch=compute_90,code=[compute_90,sm_90] -MD -MT src/device/CMakeFiles/nccl_device.dir/gensrc/symmetric/all_reduce_sum_bf16.cu.o -MF CMakeFiles/nccl_device.dir/gensrc/symmetric/all_reduce_sum_bf16.cu.o.d -x cu -rdc=true -c /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src/device/gensrc/symmetric/all_reduce_sum_bf16.cu -o CMakeFiles/nccl_device.dir/gensrc/symmetric/all_reduce_sum_bf16.cu.o [ 40%] Building CUDA object src/device/CMakeFiles/nccl_device.dir/gensrc/symmetric/reduce_scatter_sum_bf16.cu.o cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src/device && /usr/local/cuda/bin/nvcc -forward-unknown-to-host-compiler -DCUDA_MAJOR=12 -DCUDA_MINOR=0 -DDOCA_VERBS_USE_CUDA_WRAPPER -DDOCA_VERBS_USE_NET_WRAPPER -DNCCL_GIN_PROXY_ENABLE=1 -DNCCL_MAJOR=2 -DNCCL_MINOR=28 -DNCCL_PATCH=9 -DNCCL_USE_CMAKE -DNCCL_VERSION_CODE=22809 -DPROFAPI --options-file CMakeFiles/nccl_device.dir/includes_CUDA.rsp --expt-extended-lambda -Xptxas -maxrregcount=96 -Xfatbin -compress-all -fPIC --expt-extended-lambda -Xptxas -maxrregcount=96 -Xfatbin -compress-all -fPIC -O3 --generate-code=arch=compute_70,code=[compute_70,sm_70] --generate-code=arch=compute_75,code=[compute_75,sm_75] --generate-code=arch=compute_80,code=[compute_80,sm_80] --generate-code=arch=compute_86,code=[compute_86,sm_86] --generate-code=arch=compute_87,code=[compute_87,sm_87] --generate-code=arch=compute_89,code=[compute_89,sm_89] --generate-code=arch=compute_90,code=[compute_90,sm_90] -MD -MT src/device/CMakeFiles/nccl_device.dir/gensrc/symmetric/reduce_scatter_sum_bf16.cu.o -MF CMakeFiles/nccl_device.dir/gensrc/symmetric/reduce_scatter_sum_bf16.cu.o.d -x cu -rdc=true -c /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src/device/gensrc/symmetric/reduce_scatter_sum_bf16.cu -o CMakeFiles/nccl_device.dir/gensrc/symmetric/reduce_scatter_sum_bf16.cu.o [ 41%] Building CUDA object src/device/CMakeFiles/nccl_device.dir/gensrc/symmetric/all_reduce_sum_f8e4m3.cu.o cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src/device && /usr/local/cuda/bin/nvcc -forward-unknown-to-host-compiler -DCUDA_MAJOR=12 -DCUDA_MINOR=0 -DDOCA_VERBS_USE_CUDA_WRAPPER -DDOCA_VERBS_USE_NET_WRAPPER -DNCCL_GIN_PROXY_ENABLE=1 -DNCCL_MAJOR=2 -DNCCL_MINOR=28 -DNCCL_PATCH=9 -DNCCL_USE_CMAKE -DNCCL_VERSION_CODE=22809 -DPROFAPI --options-file CMakeFiles/nccl_device.dir/includes_CUDA.rsp --expt-extended-lambda -Xptxas -maxrregcount=96 -Xfatbin -compress-all -fPIC --expt-extended-lambda -Xptxas -maxrregcount=96 -Xfatbin -compress-all -fPIC -O3 --generate-code=arch=compute_70,code=[compute_70,sm_70] --generate-code=arch=compute_75,code=[compute_75,sm_75] --generate-code=arch=compute_80,code=[compute_80,sm_80] --generate-code=arch=compute_86,code=[compute_86,sm_86] --generate-code=arch=compute_87,code=[compute_87,sm_87] --generate-code=arch=compute_89,code=[compute_89,sm_89] --generate-code=arch=compute_90,code=[compute_90,sm_90] -MD -MT src/device/CMakeFiles/nccl_device.dir/gensrc/symmetric/all_reduce_sum_f8e4m3.cu.o -MF CMakeFiles/nccl_device.dir/gensrc/symmetric/all_reduce_sum_f8e4m3.cu.o.d -x cu -rdc=true -c /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src/device/gensrc/symmetric/all_reduce_sum_f8e4m3.cu -o CMakeFiles/nccl_device.dir/gensrc/symmetric/all_reduce_sum_f8e4m3.cu.o [ 41%] Building CUDA object src/device/CMakeFiles/nccl_device.dir/gensrc/symmetric/all_reduce_sum_f8e4m3_RSxLDMC_AGxSTMC.cu.o cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src/device && /usr/local/cuda/bin/nvcc -forward-unknown-to-host-compiler -DCUDA_MAJOR=12 -DCUDA_MINOR=0 -DDOCA_VERBS_USE_CUDA_WRAPPER -DDOCA_VERBS_USE_NET_WRAPPER -DNCCL_GIN_PROXY_ENABLE=1 -DNCCL_MAJOR=2 -DNCCL_MINOR=28 -DNCCL_PATCH=9 -DNCCL_USE_CMAKE -DNCCL_VERSION_CODE=22809 -DPROFAPI --options-file CMakeFiles/nccl_device.dir/includes_CUDA.rsp --expt-extended-lambda -Xptxas -maxrregcount=96 -Xfatbin -compress-all -fPIC --expt-extended-lambda -Xptxas -maxrregcount=96 -Xfatbin -compress-all -fPIC -O3 --generate-code=arch=compute_70,code=[compute_70,sm_70] --generate-code=arch=compute_75,code=[compute_75,sm_75] --generate-code=arch=compute_80,code=[compute_80,sm_80] --generate-code=arch=compute_86,code=[compute_86,sm_86] --generate-code=arch=compute_87,code=[compute_87,sm_87] --generate-code=arch=compute_89,code=[compute_89,sm_89] --generate-code=arch=compute_90,code=[compute_90,sm_90] -MD -MT src/device/CMakeFiles/nccl_device.dir/gensrc/symmetric/all_reduce_sum_f8e4m3_RSxLDMC_AGxSTMC.cu.o -MF CMakeFiles/nccl_device.dir/gensrc/symmetric/all_reduce_sum_f8e4m3_RSxLDMC_AGxSTMC.cu.o.d -x cu -rdc=true -c /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src/device/gensrc/symmetric/all_reduce_sum_f8e4m3_RSxLDMC_AGxSTMC.cu -o CMakeFiles/nccl_device.dir/gensrc/symmetric/all_reduce_sum_f8e4m3_RSxLDMC_AGxSTMC.cu.o [ 41%] Building CUDA object src/device/CMakeFiles/nccl_device.dir/gensrc/symmetric/reduce_scatter_sum_f8e4m3.cu.o cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src/device && /usr/local/cuda/bin/nvcc -forward-unknown-to-host-compiler -DCUDA_MAJOR=12 -DCUDA_MINOR=0 -DDOCA_VERBS_USE_CUDA_WRAPPER -DDOCA_VERBS_USE_NET_WRAPPER -DNCCL_GIN_PROXY_ENABLE=1 -DNCCL_MAJOR=2 -DNCCL_MINOR=28 -DNCCL_PATCH=9 -DNCCL_USE_CMAKE -DNCCL_VERSION_CODE=22809 -DPROFAPI --options-file CMakeFiles/nccl_device.dir/includes_CUDA.rsp --expt-extended-lambda -Xptxas -maxrregcount=96 -Xfatbin -compress-all -fPIC --expt-extended-lambda -Xptxas -maxrregcount=96 -Xfatbin -compress-all -fPIC -O3 --generate-code=arch=compute_70,code=[compute_70,sm_70] --generate-code=arch=compute_75,code=[compute_75,sm_75] --generate-code=arch=compute_80,code=[compute_80,sm_80] --generate-code=arch=compute_86,code=[compute_86,sm_86] --generate-code=arch=compute_87,code=[compute_87,sm_87] --generate-code=arch=compute_89,code=[compute_89,sm_89] --generate-code=arch=compute_90,code=[compute_90,sm_90] -MD -MT src/device/CMakeFiles/nccl_device.dir/gensrc/symmetric/reduce_scatter_sum_f8e4m3.cu.o -MF CMakeFiles/nccl_device.dir/gensrc/symmetric/reduce_scatter_sum_f8e4m3.cu.o.d -x cu -rdc=true -c /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src/device/gensrc/symmetric/reduce_scatter_sum_f8e4m3.cu -o CMakeFiles/nccl_device.dir/gensrc/symmetric/reduce_scatter_sum_f8e4m3.cu.o [ 42%] Building CUDA object src/device/CMakeFiles/nccl_device.dir/gensrc/symmetric/reduce_scatter_sum_f8e4m3_LDMC.cu.o cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src/device && /usr/local/cuda/bin/nvcc -forward-unknown-to-host-compiler -DCUDA_MAJOR=12 -DCUDA_MINOR=0 -DDOCA_VERBS_USE_CUDA_WRAPPER -DDOCA_VERBS_USE_NET_WRAPPER -DNCCL_GIN_PROXY_ENABLE=1 -DNCCL_MAJOR=2 -DNCCL_MINOR=28 -DNCCL_PATCH=9 -DNCCL_USE_CMAKE -DNCCL_VERSION_CODE=22809 -DPROFAPI --options-file CMakeFiles/nccl_device.dir/includes_CUDA.rsp --expt-extended-lambda -Xptxas -maxrregcount=96 -Xfatbin -compress-all -fPIC --expt-extended-lambda -Xptxas -maxrregcount=96 -Xfatbin -compress-all -fPIC -O3 --generate-code=arch=compute_70,code=[compute_70,sm_70] --generate-code=arch=compute_75,code=[compute_75,sm_75] --generate-code=arch=compute_80,code=[compute_80,sm_80] --generate-code=arch=compute_86,code=[compute_86,sm_86] --generate-code=arch=compute_87,code=[compute_87,sm_87] --generate-code=arch=compute_89,code=[compute_89,sm_89] --generate-code=arch=compute_90,code=[compute_90,sm_90] -MD -MT src/device/CMakeFiles/nccl_device.dir/gensrc/symmetric/reduce_scatter_sum_f8e4m3_LDMC.cu.o -MF CMakeFiles/nccl_device.dir/gensrc/symmetric/reduce_scatter_sum_f8e4m3_LDMC.cu.o.d -x cu -rdc=true -c /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src/device/gensrc/symmetric/reduce_scatter_sum_f8e4m3_LDMC.cu -o CMakeFiles/nccl_device.dir/gensrc/symmetric/reduce_scatter_sum_f8e4m3_LDMC.cu.o [ 42%] Building CUDA object src/device/CMakeFiles/nccl_device.dir/gensrc/symmetric/all_reduce_sum_f8e5m2.cu.o cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src/device && /usr/local/cuda/bin/nvcc -forward-unknown-to-host-compiler -DCUDA_MAJOR=12 -DCUDA_MINOR=0 -DDOCA_VERBS_USE_CUDA_WRAPPER -DDOCA_VERBS_USE_NET_WRAPPER -DNCCL_GIN_PROXY_ENABLE=1 -DNCCL_MAJOR=2 -DNCCL_MINOR=28 -DNCCL_PATCH=9 -DNCCL_USE_CMAKE -DNCCL_VERSION_CODE=22809 -DPROFAPI --options-file CMakeFiles/nccl_device.dir/includes_CUDA.rsp --expt-extended-lambda -Xptxas -maxrregcount=96 -Xfatbin -compress-all -fPIC --expt-extended-lambda -Xptxas -maxrregcount=96 -Xfatbin -compress-all -fPIC -O3 --generate-code=arch=compute_70,code=[compute_70,sm_70] --generate-code=arch=compute_75,code=[compute_75,sm_75] --generate-code=arch=compute_80,code=[compute_80,sm_80] --generate-code=arch=compute_86,code=[compute_86,sm_86] --generate-code=arch=compute_87,code=[compute_87,sm_87] --generate-code=arch=compute_89,code=[compute_89,sm_89] --generate-code=arch=compute_90,code=[compute_90,sm_90] -MD -MT src/device/CMakeFiles/nccl_device.dir/gensrc/symmetric/all_reduce_sum_f8e5m2.cu.o -MF CMakeFiles/nccl_device.dir/gensrc/symmetric/all_reduce_sum_f8e5m2.cu.o.d -x cu -rdc=true -c /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src/device/gensrc/symmetric/all_reduce_sum_f8e5m2.cu -o CMakeFiles/nccl_device.dir/gensrc/symmetric/all_reduce_sum_f8e5m2.cu.o [ 42%] Building CUDA object src/device/CMakeFiles/nccl_device.dir/gensrc/symmetric/all_reduce_sum_f8e5m2_RSxLDMC_AGxSTMC.cu.o cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src/device && /usr/local/cuda/bin/nvcc -forward-unknown-to-host-compiler -DCUDA_MAJOR=12 -DCUDA_MINOR=0 -DDOCA_VERBS_USE_CUDA_WRAPPER -DDOCA_VERBS_USE_NET_WRAPPER -DNCCL_GIN_PROXY_ENABLE=1 -DNCCL_MAJOR=2 -DNCCL_MINOR=28 -DNCCL_PATCH=9 -DNCCL_USE_CMAKE -DNCCL_VERSION_CODE=22809 -DPROFAPI --options-file CMakeFiles/nccl_device.dir/includes_CUDA.rsp --expt-extended-lambda -Xptxas -maxrregcount=96 -Xfatbin -compress-all -fPIC --expt-extended-lambda -Xptxas -maxrregcount=96 -Xfatbin -compress-all -fPIC -O3 --generate-code=arch=compute_70,code=[compute_70,sm_70] --generate-code=arch=compute_75,code=[compute_75,sm_75] --generate-code=arch=compute_80,code=[compute_80,sm_80] --generate-code=arch=compute_86,code=[compute_86,sm_86] --generate-code=arch=compute_87,code=[compute_87,sm_87] --generate-code=arch=compute_89,code=[compute_89,sm_89] --generate-code=arch=compute_90,code=[compute_90,sm_90] -MD -MT src/device/CMakeFiles/nccl_device.dir/gensrc/symmetric/all_reduce_sum_f8e5m2_RSxLDMC_AGxSTMC.cu.o -MF CMakeFiles/nccl_device.dir/gensrc/symmetric/all_reduce_sum_f8e5m2_RSxLDMC_AGxSTMC.cu.o.d -x cu -rdc=true -c /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src/device/gensrc/symmetric/all_reduce_sum_f8e5m2_RSxLDMC_AGxSTMC.cu -o CMakeFiles/nccl_device.dir/gensrc/symmetric/all_reduce_sum_f8e5m2_RSxLDMC_AGxSTMC.cu.o [ 42%] Building CUDA object src/device/CMakeFiles/nccl_device.dir/gensrc/symmetric/reduce_scatter_sum_f8e5m2.cu.o cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src/device && /usr/local/cuda/bin/nvcc -forward-unknown-to-host-compiler -DCUDA_MAJOR=12 -DCUDA_MINOR=0 -DDOCA_VERBS_USE_CUDA_WRAPPER -DDOCA_VERBS_USE_NET_WRAPPER -DNCCL_GIN_PROXY_ENABLE=1 -DNCCL_MAJOR=2 -DNCCL_MINOR=28 -DNCCL_PATCH=9 -DNCCL_USE_CMAKE -DNCCL_VERSION_CODE=22809 -DPROFAPI --options-file CMakeFiles/nccl_device.dir/includes_CUDA.rsp --expt-extended-lambda -Xptxas -maxrregcount=96 -Xfatbin -compress-all -fPIC --expt-extended-lambda -Xptxas -maxrregcount=96 -Xfatbin -compress-all -fPIC -O3 --generate-code=arch=compute_70,code=[compute_70,sm_70] --generate-code=arch=compute_75,code=[compute_75,sm_75] --generate-code=arch=compute_80,code=[compute_80,sm_80] --generate-code=arch=compute_86,code=[compute_86,sm_86] --generate-code=arch=compute_87,code=[compute_87,sm_87] --generate-code=arch=compute_89,code=[compute_89,sm_89] --generate-code=arch=compute_90,code=[compute_90,sm_90] -MD -MT src/device/CMakeFiles/nccl_device.dir/gensrc/symmetric/reduce_scatter_sum_f8e5m2.cu.o -MF CMakeFiles/nccl_device.dir/gensrc/symmetric/reduce_scatter_sum_f8e5m2.cu.o.d -x cu -rdc=true -c /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src/device/gensrc/symmetric/reduce_scatter_sum_f8e5m2.cu -o CMakeFiles/nccl_device.dir/gensrc/symmetric/reduce_scatter_sum_f8e5m2.cu.o [ 43%] Building CUDA object src/device/CMakeFiles/nccl_device.dir/gensrc/symmetric/reduce_scatter_sum_f8e5m2_LDMC.cu.o cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src/device && /usr/local/cuda/bin/nvcc -forward-unknown-to-host-compiler -DCUDA_MAJOR=12 -DCUDA_MINOR=0 -DDOCA_VERBS_USE_CUDA_WRAPPER -DDOCA_VERBS_USE_NET_WRAPPER -DNCCL_GIN_PROXY_ENABLE=1 -DNCCL_MAJOR=2 -DNCCL_MINOR=28 -DNCCL_PATCH=9 -DNCCL_USE_CMAKE -DNCCL_VERSION_CODE=22809 -DPROFAPI --options-file CMakeFiles/nccl_device.dir/includes_CUDA.rsp --expt-extended-lambda -Xptxas -maxrregcount=96 -Xfatbin -compress-all -fPIC --expt-extended-lambda -Xptxas -maxrregcount=96 -Xfatbin -compress-all -fPIC -O3 --generate-code=arch=compute_70,code=[compute_70,sm_70] --generate-code=arch=compute_75,code=[compute_75,sm_75] --generate-code=arch=compute_80,code=[compute_80,sm_80] --generate-code=arch=compute_86,code=[compute_86,sm_86] --generate-code=arch=compute_87,code=[compute_87,sm_87] --generate-code=arch=compute_89,code=[compute_89,sm_89] --generate-code=arch=compute_90,code=[compute_90,sm_90] -MD -MT src/device/CMakeFiles/nccl_device.dir/gensrc/symmetric/reduce_scatter_sum_f8e5m2_LDMC.cu.o -MF CMakeFiles/nccl_device.dir/gensrc/symmetric/reduce_scatter_sum_f8e5m2_LDMC.cu.o.d -x cu -rdc=true -c /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src/device/gensrc/symmetric/reduce_scatter_sum_f8e5m2_LDMC.cu -o CMakeFiles/nccl_device.dir/gensrc/symmetric/reduce_scatter_sum_f8e5m2_LDMC.cu.o [ 43%] Building CUDA object src/device/CMakeFiles/nccl_device.dir/gensrc/symmetric/all_reduce.cu.o cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src/device && /usr/local/cuda/bin/nvcc -forward-unknown-to-host-compiler -DCUDA_MAJOR=12 -DCUDA_MINOR=0 -DDOCA_VERBS_USE_CUDA_WRAPPER -DDOCA_VERBS_USE_NET_WRAPPER -DNCCL_GIN_PROXY_ENABLE=1 -DNCCL_MAJOR=2 -DNCCL_MINOR=28 -DNCCL_PATCH=9 -DNCCL_USE_CMAKE -DNCCL_VERSION_CODE=22809 -DPROFAPI --options-file CMakeFiles/nccl_device.dir/includes_CUDA.rsp --expt-extended-lambda -Xptxas -maxrregcount=96 -Xfatbin -compress-all -fPIC --expt-extended-lambda -Xptxas -maxrregcount=96 -Xfatbin -compress-all -fPIC -O3 --generate-code=arch=compute_70,code=[compute_70,sm_70] --generate-code=arch=compute_75,code=[compute_75,sm_75] --generate-code=arch=compute_80,code=[compute_80,sm_80] --generate-code=arch=compute_86,code=[compute_86,sm_86] --generate-code=arch=compute_87,code=[compute_87,sm_87] --generate-code=arch=compute_89,code=[compute_89,sm_89] --generate-code=arch=compute_90,code=[compute_90,sm_90] -MD -MT src/device/CMakeFiles/nccl_device.dir/gensrc/symmetric/all_reduce.cu.o -MF CMakeFiles/nccl_device.dir/gensrc/symmetric/all_reduce.cu.o.d -x cu -rdc=true -c /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src/device/gensrc/symmetric/all_reduce.cu -o CMakeFiles/nccl_device.dir/gensrc/symmetric/all_reduce.cu.o [ 43%] Building CUDA object src/device/CMakeFiles/nccl_device.dir/gensrc/symmetric/reduce_scatter.cu.o cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src/device && /usr/local/cuda/bin/nvcc -forward-unknown-to-host-compiler -DCUDA_MAJOR=12 -DCUDA_MINOR=0 -DDOCA_VERBS_USE_CUDA_WRAPPER -DDOCA_VERBS_USE_NET_WRAPPER -DNCCL_GIN_PROXY_ENABLE=1 -DNCCL_MAJOR=2 -DNCCL_MINOR=28 -DNCCL_PATCH=9 -DNCCL_USE_CMAKE -DNCCL_VERSION_CODE=22809 -DPROFAPI --options-file CMakeFiles/nccl_device.dir/includes_CUDA.rsp --expt-extended-lambda -Xptxas -maxrregcount=96 -Xfatbin -compress-all -fPIC --expt-extended-lambda -Xptxas -maxrregcount=96 -Xfatbin -compress-all -fPIC -O3 --generate-code=arch=compute_70,code=[compute_70,sm_70] --generate-code=arch=compute_75,code=[compute_75,sm_75] --generate-code=arch=compute_80,code=[compute_80,sm_80] --generate-code=arch=compute_86,code=[compute_86,sm_86] --generate-code=arch=compute_87,code=[compute_87,sm_87] --generate-code=arch=compute_89,code=[compute_89,sm_89] --generate-code=arch=compute_90,code=[compute_90,sm_90] -MD -MT src/device/CMakeFiles/nccl_device.dir/gensrc/symmetric/reduce_scatter.cu.o -MF CMakeFiles/nccl_device.dir/gensrc/symmetric/reduce_scatter.cu.o.d -x cu -rdc=true -c /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src/device/gensrc/symmetric/reduce_scatter.cu -o CMakeFiles/nccl_device.dir/gensrc/symmetric/reduce_scatter.cu.o [ 44%] Building CXX object src/device/CMakeFiles/nccl_device.dir/gensrc/symmetric/sym_kernels_host.cc.o cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src/device && /usr/bin/g++ -DCUDA_MAJOR=12 -DCUDA_MINOR=0 -DDOCA_VERBS_USE_CUDA_WRAPPER -DDOCA_VERBS_USE_NET_WRAPPER -DNCCL_GIN_PROXY_ENABLE=1 -DNCCL_MAJOR=2 -DNCCL_MINOR=28 -DNCCL_PATCH=9 -DNCCL_USE_CMAKE -DNCCL_VERSION_CODE=22809 -DPROFAPI -I/builddir/build/BUILD/nccl-2.28.9-1/src/device -I/builddir/build/BUILD/nccl-2.28.9-1/nccl_build/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/include/plugin -I/usr/local/cuda/include -I/usr/local/cuda/include/cccl -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -O3 -MD -MT src/device/CMakeFiles/nccl_device.dir/gensrc/symmetric/sym_kernels_host.cc.o -MF CMakeFiles/nccl_device.dir/gensrc/symmetric/sym_kernels_host.cc.o.d -o CMakeFiles/nccl_device.dir/gensrc/symmetric/sym_kernels_host.cc.o -c /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src/device/gensrc/symmetric/sym_kernels_host.cc [ 44%] Building CUDA object src/device/CMakeFiles/nccl_device.dir/common.cu.o cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src/device && /usr/local/cuda/bin/nvcc -forward-unknown-to-host-compiler -DCUDA_MAJOR=12 -DCUDA_MINOR=0 -DDOCA_VERBS_USE_CUDA_WRAPPER -DDOCA_VERBS_USE_NET_WRAPPER -DNCCL_GIN_PROXY_ENABLE=1 -DNCCL_MAJOR=2 -DNCCL_MINOR=28 -DNCCL_PATCH=9 -DNCCL_USE_CMAKE -DNCCL_VERSION_CODE=22809 -DPROFAPI --options-file CMakeFiles/nccl_device.dir/includes_CUDA.rsp --expt-extended-lambda -Xptxas -maxrregcount=96 -Xfatbin -compress-all -fPIC --expt-extended-lambda -Xptxas -maxrregcount=96 -Xfatbin -compress-all -fPIC -O3 --generate-code=arch=compute_70,code=[compute_70,sm_70] --generate-code=arch=compute_75,code=[compute_75,sm_75] --generate-code=arch=compute_80,code=[compute_80,sm_80] --generate-code=arch=compute_86,code=[compute_86,sm_86] --generate-code=arch=compute_87,code=[compute_87,sm_87] --generate-code=arch=compute_89,code=[compute_89,sm_89] --generate-code=arch=compute_90,code=[compute_90,sm_90] -MD -MT src/device/CMakeFiles/nccl_device.dir/common.cu.o -MF CMakeFiles/nccl_device.dir/common.cu.o.d -x cu -rdc=true -c /builddir/build/BUILD/nccl-2.28.9-1/src/device/common.cu -o CMakeFiles/nccl_device.dir/common.cu.o [ 44%] Building CUDA object src/device/CMakeFiles/nccl_device.dir/onerank.cu.o cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src/device && /usr/local/cuda/bin/nvcc -forward-unknown-to-host-compiler -DCUDA_MAJOR=12 -DCUDA_MINOR=0 -DDOCA_VERBS_USE_CUDA_WRAPPER -DDOCA_VERBS_USE_NET_WRAPPER -DNCCL_GIN_PROXY_ENABLE=1 -DNCCL_MAJOR=2 -DNCCL_MINOR=28 -DNCCL_PATCH=9 -DNCCL_USE_CMAKE -DNCCL_VERSION_CODE=22809 -DPROFAPI --options-file CMakeFiles/nccl_device.dir/includes_CUDA.rsp --expt-extended-lambda -Xptxas -maxrregcount=96 -Xfatbin -compress-all -fPIC --expt-extended-lambda -Xptxas -maxrregcount=96 -Xfatbin -compress-all -fPIC -O3 --generate-code=arch=compute_70,code=[compute_70,sm_70] --generate-code=arch=compute_75,code=[compute_75,sm_75] --generate-code=arch=compute_80,code=[compute_80,sm_80] --generate-code=arch=compute_86,code=[compute_86,sm_86] --generate-code=arch=compute_87,code=[compute_87,sm_87] --generate-code=arch=compute_89,code=[compute_89,sm_89] --generate-code=arch=compute_90,code=[compute_90,sm_90] -MD -MT src/device/CMakeFiles/nccl_device.dir/onerank.cu.o -MF CMakeFiles/nccl_device.dir/onerank.cu.o.d -x cu -rdc=true -c /builddir/build/BUILD/nccl-2.28.9-1/src/device/onerank.cu -o CMakeFiles/nccl_device.dir/onerank.cu.o gmake[2]: Leaving directory '/builddir/build/BUILD/nccl-2.28.9-1/nccl_build' [ 44%] Built target nccl_device /usr/bin/gmake -f src/CMakeFiles/nccl.dir/build.make src/CMakeFiles/nccl.dir/depend /usr/bin/gmake -f src/CMakeFiles/nccl_static.dir/build.make src/CMakeFiles/nccl_static.dir/depend gmake[2]: Entering directory '/builddir/build/BUILD/nccl-2.28.9-1/nccl_build' cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build && /usr/bin/cmake -E cmake_depends "Unix Makefiles" /builddir/build/BUILD/nccl-2.28.9-1 /builddir/build/BUILD/nccl-2.28.9-1/src /builddir/build/BUILD/nccl-2.28.9-1/nccl_build /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src/CMakeFiles/nccl.dir/DependInfo.cmake --color= gmake[2]: Entering directory '/builddir/build/BUILD/nccl-2.28.9-1/nccl_build' cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build && /usr/bin/cmake -E cmake_depends "Unix Makefiles" /builddir/build/BUILD/nccl-2.28.9-1 /builddir/build/BUILD/nccl-2.28.9-1/src /builddir/build/BUILD/nccl-2.28.9-1/nccl_build /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src/CMakeFiles/nccl_static.dir/DependInfo.cmake --color= gmake[2]: Leaving directory '/builddir/build/BUILD/nccl-2.28.9-1/nccl_build' /usr/bin/gmake -f src/CMakeFiles/nccl.dir/build.make src/CMakeFiles/nccl.dir/build gmake[2]: Leaving directory '/builddir/build/BUILD/nccl-2.28.9-1/nccl_build' /usr/bin/gmake -f src/CMakeFiles/nccl_static.dir/build.make src/CMakeFiles/nccl_static.dir/build gmake[2]: Entering directory '/builddir/build/BUILD/nccl-2.28.9-1/nccl_build' gmake[2]: Entering directory '/builddir/build/BUILD/nccl-2.28.9-1/nccl_build' [ 45%] Building CXX object src/CMakeFiles/nccl.dir/channel.cc.o [ 45%] Building CXX object src/CMakeFiles/nccl.dir/bootstrap.cc.o [ 45%] Building CXX object src/CMakeFiles/nccl.dir/collectives.cc.o [ 45%] Building CXX object src/CMakeFiles/nccl.dir/ce_coll.cc.o cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src && /usr/bin/g++ -DCUDA_MAJOR=12 -DCUDA_MINOR=0 -DDOCA_VERBS_USE_CUDA_WRAPPER -DDOCA_VERBS_USE_NET_WRAPPER -DNCCL_GIN_PROXY_ENABLE=1 -DNCCL_MAJOR=2 -DNCCL_MINOR=28 -DNCCL_PATCH=9 -DNCCL_USE_CMAKE -DNCCL_VERSION_CODE=22809 -DPROFAPI -Dnccl_EXPORTS -I/builddir/build/BUILD/nccl-2.28.9-1/src/device -I/builddir/build/BUILD/nccl-2.28.9-1/src/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/include/plugin -I/usr/local/cuda/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/transport/gdaki/doca-gpunetio/include -I/usr/local/cuda/include/cccl -I/builddir/build/BUILD/nccl-2.28.9-1/nccl_build/include -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -O3 -fPIC -fmacro-prefix-map=/builddir/build/BUILD/nccl-2.28.9-1/src/= -MD -MT src/CMakeFiles/nccl.dir/channel.cc.o -MF CMakeFiles/nccl.dir/channel.cc.o.d -o CMakeFiles/nccl.dir/channel.cc.o -c /builddir/build/BUILD/nccl-2.28.9-1/src/channel.cc [ 45%] Building CXX object src/CMakeFiles/nccl.dir/group.cc.o [ 45%] Building CXX object src/CMakeFiles/nccl.dir/init.cc.o [ 46%] Building CXX object src/CMakeFiles/nccl.dir/transport.cc.o [ 46%] Building CXX object src/CMakeFiles/nccl.dir/debug.cc.o [ 46%] Building CXX object src/CMakeFiles/nccl.dir/enqueue.cc.o [ 46%] Building CXX object src/CMakeFiles/nccl_static.dir/channel.cc.o [ 47%] Building CXX object src/CMakeFiles/nccl_static.dir/bootstrap.cc.o [ 47%] Building CXX object src/CMakeFiles/nccl.dir/proxy.cc.o [ 48%] Building CXX object src/CMakeFiles/nccl_static.dir/collectives.cc.o [ 48%] Building CXX object src/CMakeFiles/nccl_static.dir/ce_coll.cc.o [ 49%] Building CXX object src/CMakeFiles/nccl.dir/allocator.cc.o [ 49%] Building CXX object src/CMakeFiles/nccl_static.dir/debug.cc.o cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src && /usr/bin/g++ -DCUDA_MAJOR=12 -DCUDA_MINOR=0 -DDOCA_VERBS_USE_CUDA_WRAPPER -DDOCA_VERBS_USE_NET_WRAPPER -DNCCL_GIN_PROXY_ENABLE=1 -DNCCL_MAJOR=2 -DNCCL_MINOR=28 -DNCCL_PATCH=9 -DNCCL_USE_CMAKE -DNCCL_VERSION_CODE=22809 -DPROFAPI -Dnccl_EXPORTS -I/builddir/build/BUILD/nccl-2.28.9-1/src/device -I/builddir/build/BUILD/nccl-2.28.9-1/src/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/include/plugin -I/usr/local/cuda/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/transport/gdaki/doca-gpunetio/include -I/usr/local/cuda/include/cccl -I/builddir/build/BUILD/nccl-2.28.9-1/nccl_build/include -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -O3 -fPIC -fmacro-prefix-map=/builddir/build/BUILD/nccl-2.28.9-1/src/= -MD -MT src/CMakeFiles/nccl.dir/bootstrap.cc.o -MF CMakeFiles/nccl.dir/bootstrap.cc.o.d -o CMakeFiles/nccl.dir/bootstrap.cc.o -c /builddir/build/BUILD/nccl-2.28.9-1/src/bootstrap.cc [ 49%] Building CXX object src/CMakeFiles/nccl.dir/mnnvl.cc.o [ 49%] Building CXX object src/CMakeFiles/nccl.dir/sym_kernels.cc.o cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src && /usr/bin/g++ -DCUDA_MAJOR=12 -DCUDA_MINOR=0 -DDOCA_VERBS_USE_CUDA_WRAPPER -DDOCA_VERBS_USE_NET_WRAPPER -DNCCL_GIN_PROXY_ENABLE=1 -DNCCL_MAJOR=2 -DNCCL_MINOR=28 -DNCCL_PATCH=9 -DNCCL_USE_CMAKE -DNCCL_VERSION_CODE=22809 -DPROFAPI -Dnccl_EXPORTS -I/builddir/build/BUILD/nccl-2.28.9-1/src/device -I/builddir/build/BUILD/nccl-2.28.9-1/src/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/include/plugin -I/usr/local/cuda/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/transport/gdaki/doca-gpunetio/include -I/usr/local/cuda/include/cccl -I/builddir/build/BUILD/nccl-2.28.9-1/nccl_build/include -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -O3 -fPIC -fmacro-prefix-map=/builddir/build/BUILD/nccl-2.28.9-1/src/= -MD -MT src/CMakeFiles/nccl.dir/ce_coll.cc.o -MF CMakeFiles/nccl.dir/ce_coll.cc.o.d -o CMakeFiles/nccl.dir/ce_coll.cc.o -c /builddir/build/BUILD/nccl-2.28.9-1/src/ce_coll.cc cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src && /usr/bin/g++ -DCUDA_MAJOR=12 -DCUDA_MINOR=0 -DDOCA_VERBS_USE_CUDA_WRAPPER -DDOCA_VERBS_USE_NET_WRAPPER -DNCCL_GIN_PROXY_ENABLE=1 -DNCCL_MAJOR=2 -DNCCL_MINOR=28 -DNCCL_PATCH=9 -DNCCL_USE_CMAKE -DNCCL_VERSION_CODE=22809 -DPROFAPI -I/builddir/build/BUILD/nccl-2.28.9-1/src/device -I/builddir/build/BUILD/nccl-2.28.9-1/src/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/include/plugin -I/usr/local/cuda/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/transport/gdaki/doca-gpunetio/include -I/usr/local/cuda/include/cccl -I/builddir/build/BUILD/nccl-2.28.9-1/nccl_build/include -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -O3 -fPIC -fmacro-prefix-map=/builddir/build/BUILD/nccl-2.28.9-1/src/= -MD -MT src/CMakeFiles/nccl_static.dir/channel.cc.o -MF CMakeFiles/nccl_static.dir/channel.cc.o.d -o CMakeFiles/nccl_static.dir/channel.cc.o -c /builddir/build/BUILD/nccl-2.28.9-1/src/channel.cc cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src && /usr/bin/g++ -DCUDA_MAJOR=12 -DCUDA_MINOR=0 -DDOCA_VERBS_USE_CUDA_WRAPPER -DDOCA_VERBS_USE_NET_WRAPPER -DNCCL_GIN_PROXY_ENABLE=1 -DNCCL_MAJOR=2 -DNCCL_MINOR=28 -DNCCL_PATCH=9 -DNCCL_USE_CMAKE -DNCCL_VERSION_CODE=22809 -DPROFAPI -Dnccl_EXPORTS -I/builddir/build/BUILD/nccl-2.28.9-1/src/device -I/builddir/build/BUILD/nccl-2.28.9-1/src/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/include/plugin -I/usr/local/cuda/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/transport/gdaki/doca-gpunetio/include -I/usr/local/cuda/include/cccl -I/builddir/build/BUILD/nccl-2.28.9-1/nccl_build/include -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -O3 -fPIC -fmacro-prefix-map=/builddir/build/BUILD/nccl-2.28.9-1/src/= -MD -MT src/CMakeFiles/nccl.dir/collectives.cc.o -MF CMakeFiles/nccl.dir/collectives.cc.o.d -o CMakeFiles/nccl.dir/collectives.cc.o -c /builddir/build/BUILD/nccl-2.28.9-1/src/collectives.cc [ 49%] Building CXX object src/CMakeFiles/nccl_static.dir/group.cc.o [ 49%] Building CXX object src/CMakeFiles/nccl.dir/dev_runtime.cc.o cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src && /usr/bin/g++ -DCUDA_MAJOR=12 -DCUDA_MINOR=0 -DDOCA_VERBS_USE_CUDA_WRAPPER -DDOCA_VERBS_USE_NET_WRAPPER -DNCCL_GIN_PROXY_ENABLE=1 -DNCCL_MAJOR=2 -DNCCL_MINOR=28 -DNCCL_PATCH=9 -DNCCL_USE_CMAKE -DNCCL_VERSION_CODE=22809 -DPROFAPI -Dnccl_EXPORTS -I/builddir/build/BUILD/nccl-2.28.9-1/src/device -I/builddir/build/BUILD/nccl-2.28.9-1/src/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/include/plugin -I/usr/local/cuda/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/transport/gdaki/doca-gpunetio/include -I/usr/local/cuda/include/cccl -I/builddir/build/BUILD/nccl-2.28.9-1/nccl_build/include -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -O3 -fPIC -fmacro-prefix-map=/builddir/build/BUILD/nccl-2.28.9-1/src/= -MD -MT src/CMakeFiles/nccl.dir/debug.cc.o -MF CMakeFiles/nccl.dir/debug.cc.o.d -o CMakeFiles/nccl.dir/debug.cc.o -c /builddir/build/BUILD/nccl-2.28.9-1/src/debug.cc cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src && /usr/bin/g++ -DCUDA_MAJOR=12 -DCUDA_MINOR=0 -DDOCA_VERBS_USE_CUDA_WRAPPER -DDOCA_VERBS_USE_NET_WRAPPER -DNCCL_GIN_PROXY_ENABLE=1 -DNCCL_MAJOR=2 -DNCCL_MINOR=28 -DNCCL_PATCH=9 -DNCCL_USE_CMAKE -DNCCL_VERSION_CODE=22809 -DPROFAPI -I/builddir/build/BUILD/nccl-2.28.9-1/src/device -I/builddir/build/BUILD/nccl-2.28.9-1/src/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/include/plugin -I/usr/local/cuda/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/transport/gdaki/doca-gpunetio/include -I/usr/local/cuda/include/cccl -I/builddir/build/BUILD/nccl-2.28.9-1/nccl_build/include -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -O3 -fPIC -fmacro-prefix-map=/builddir/build/BUILD/nccl-2.28.9-1/src/= -MD -MT src/CMakeFiles/nccl_static.dir/bootstrap.cc.o -MF CMakeFiles/nccl_static.dir/bootstrap.cc.o.d -o CMakeFiles/nccl_static.dir/bootstrap.cc.o -c /builddir/build/BUILD/nccl-2.28.9-1/src/bootstrap.cc [ 50%] Building CXX object src/CMakeFiles/nccl_static.dir/init.cc.o cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src && /usr/bin/g++ -DCUDA_MAJOR=12 -DCUDA_MINOR=0 -DDOCA_VERBS_USE_CUDA_WRAPPER -DDOCA_VERBS_USE_NET_WRAPPER -DNCCL_GIN_PROXY_ENABLE=1 -DNCCL_MAJOR=2 -DNCCL_MINOR=28 -DNCCL_PATCH=9 -DNCCL_USE_CMAKE -DNCCL_VERSION_CODE=22809 -DPROFAPI -Dnccl_EXPORTS -I/builddir/build/BUILD/nccl-2.28.9-1/src/device -I/builddir/build/BUILD/nccl-2.28.9-1/src/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/include/plugin -I/usr/local/cuda/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/transport/gdaki/doca-gpunetio/include -I/usr/local/cuda/include/cccl -I/builddir/build/BUILD/nccl-2.28.9-1/nccl_build/include -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -O3 -fPIC -fmacro-prefix-map=/builddir/build/BUILD/nccl-2.28.9-1/src/= -MD -MT src/CMakeFiles/nccl.dir/enqueue.cc.o -MF CMakeFiles/nccl.dir/enqueue.cc.o.d -o CMakeFiles/nccl.dir/enqueue.cc.o -c /builddir/build/BUILD/nccl-2.28.9-1/src/enqueue.cc cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src && /usr/bin/g++ -DCUDA_MAJOR=12 -DCUDA_MINOR=0 -DDOCA_VERBS_USE_CUDA_WRAPPER -DDOCA_VERBS_USE_NET_WRAPPER -DNCCL_GIN_PROXY_ENABLE=1 -DNCCL_MAJOR=2 -DNCCL_MINOR=28 -DNCCL_PATCH=9 -DNCCL_USE_CMAKE -DNCCL_VERSION_CODE=22809 -DPROFAPI -I/builddir/build/BUILD/nccl-2.28.9-1/src/device -I/builddir/build/BUILD/nccl-2.28.9-1/src/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/include/plugin -I/usr/local/cuda/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/transport/gdaki/doca-gpunetio/include -I/usr/local/cuda/include/cccl -I/builddir/build/BUILD/nccl-2.28.9-1/nccl_build/include -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -O3 -fPIC -fmacro-prefix-map=/builddir/build/BUILD/nccl-2.28.9-1/src/= -MD -MT src/CMakeFiles/nccl_static.dir/ce_coll.cc.o -MF CMakeFiles/nccl_static.dir/ce_coll.cc.o.d -o CMakeFiles/nccl_static.dir/ce_coll.cc.o -c /builddir/build/BUILD/nccl-2.28.9-1/src/ce_coll.cc cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src && /usr/bin/g++ -DCUDA_MAJOR=12 -DCUDA_MINOR=0 -DDOCA_VERBS_USE_CUDA_WRAPPER -DDOCA_VERBS_USE_NET_WRAPPER -DNCCL_GIN_PROXY_ENABLE=1 -DNCCL_MAJOR=2 -DNCCL_MINOR=28 -DNCCL_PATCH=9 -DNCCL_USE_CMAKE -DNCCL_VERSION_CODE=22809 -DPROFAPI -Dnccl_EXPORTS -I/builddir/build/BUILD/nccl-2.28.9-1/src/device -I/builddir/build/BUILD/nccl-2.28.9-1/src/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/include/plugin -I/usr/local/cuda/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/transport/gdaki/doca-gpunetio/include -I/usr/local/cuda/include/cccl -I/builddir/build/BUILD/nccl-2.28.9-1/nccl_build/include -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -O3 -fPIC -fmacro-prefix-map=/builddir/build/BUILD/nccl-2.28.9-1/src/= -MD -MT src/CMakeFiles/nccl.dir/group.cc.o -MF CMakeFiles/nccl.dir/group.cc.o.d -o CMakeFiles/nccl.dir/group.cc.o -c /builddir/build/BUILD/nccl-2.28.9-1/src/group.cc cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src && /usr/bin/g++ -DCUDA_MAJOR=12 -DCUDA_MINOR=0 -DDOCA_VERBS_USE_CUDA_WRAPPER -DDOCA_VERBS_USE_NET_WRAPPER -DNCCL_GIN_PROXY_ENABLE=1 -DNCCL_MAJOR=2 -DNCCL_MINOR=28 -DNCCL_PATCH=9 -DNCCL_USE_CMAKE -DNCCL_VERSION_CODE=22809 -DPROFAPI -I/builddir/build/BUILD/nccl-2.28.9-1/src/device -I/builddir/build/BUILD/nccl-2.28.9-1/src/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/include/plugin -I/usr/local/cuda/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/transport/gdaki/doca-gpunetio/include -I/usr/local/cuda/include/cccl -I/builddir/build/BUILD/nccl-2.28.9-1/nccl_build/include -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -O3 -fPIC -fmacro-prefix-map=/builddir/build/BUILD/nccl-2.28.9-1/src/= -MD -MT src/CMakeFiles/nccl_static.dir/collectives.cc.o -MF CMakeFiles/nccl_static.dir/collectives.cc.o.d -o CMakeFiles/nccl_static.dir/collectives.cc.o -c /builddir/build/BUILD/nccl-2.28.9-1/src/collectives.cc [ 50%] Building CXX object src/CMakeFiles/nccl_static.dir/enqueue.cc.o cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src && /usr/bin/g++ -DCUDA_MAJOR=12 -DCUDA_MINOR=0 -DDOCA_VERBS_USE_CUDA_WRAPPER -DDOCA_VERBS_USE_NET_WRAPPER -DNCCL_GIN_PROXY_ENABLE=1 -DNCCL_MAJOR=2 -DNCCL_MINOR=28 -DNCCL_PATCH=9 -DNCCL_USE_CMAKE -DNCCL_VERSION_CODE=22809 -DPROFAPI -Dnccl_EXPORTS -I/builddir/build/BUILD/nccl-2.28.9-1/src/device -I/builddir/build/BUILD/nccl-2.28.9-1/src/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/include/plugin -I/usr/local/cuda/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/transport/gdaki/doca-gpunetio/include -I/usr/local/cuda/include/cccl -I/builddir/build/BUILD/nccl-2.28.9-1/nccl_build/include -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -O3 -fPIC -fmacro-prefix-map=/builddir/build/BUILD/nccl-2.28.9-1/src/= -MD -MT src/CMakeFiles/nccl.dir/init.cc.o -MF CMakeFiles/nccl.dir/init.cc.o.d -o CMakeFiles/nccl.dir/init.cc.o -c /builddir/build/BUILD/nccl-2.28.9-1/src/init.cc [ 51%] Building CXX object src/CMakeFiles/nccl.dir/transport/nvls.cc.o [ 51%] Building CXX object src/CMakeFiles/nccl_static.dir/proxy.cc.o [ 51%] Building CXX object src/CMakeFiles/nccl_static.dir/init_nvtx.cc.o cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src && /usr/bin/g++ -DCUDA_MAJOR=12 -DCUDA_MINOR=0 -DDOCA_VERBS_USE_CUDA_WRAPPER -DDOCA_VERBS_USE_NET_WRAPPER -DNCCL_GIN_PROXY_ENABLE=1 -DNCCL_MAJOR=2 -DNCCL_MINOR=28 -DNCCL_PATCH=9 -DNCCL_USE_CMAKE -DNCCL_VERSION_CODE=22809 -DPROFAPI -Dnccl_EXPORTS -I/builddir/build/BUILD/nccl-2.28.9-1/src/device -I/builddir/build/BUILD/nccl-2.28.9-1/src/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/include/plugin -I/usr/local/cuda/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/transport/gdaki/doca-gpunetio/include -I/usr/local/cuda/include/cccl -I/builddir/build/BUILD/nccl-2.28.9-1/nccl_build/include -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -O3 -fPIC -fmacro-prefix-map=/builddir/build/BUILD/nccl-2.28.9-1/src/= -MD -MT src/CMakeFiles/nccl.dir/proxy.cc.o -MF CMakeFiles/nccl.dir/proxy.cc.o.d -o CMakeFiles/nccl.dir/proxy.cc.o -c /builddir/build/BUILD/nccl-2.28.9-1/src/proxy.cc cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src && /usr/bin/g++ -DCUDA_MAJOR=12 -DCUDA_MINOR=0 -DDOCA_VERBS_USE_CUDA_WRAPPER -DDOCA_VERBS_USE_NET_WRAPPER -DNCCL_GIN_PROXY_ENABLE=1 -DNCCL_MAJOR=2 -DNCCL_MINOR=28 -DNCCL_PATCH=9 -DNCCL_USE_CMAKE -DNCCL_VERSION_CODE=22809 -DPROFAPI -I/builddir/build/BUILD/nccl-2.28.9-1/src/device -I/builddir/build/BUILD/nccl-2.28.9-1/src/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/include/plugin -I/usr/local/cuda/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/transport/gdaki/doca-gpunetio/include -I/usr/local/cuda/include/cccl -I/builddir/build/BUILD/nccl-2.28.9-1/nccl_build/include -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -O3 -fPIC -fmacro-prefix-map=/builddir/build/BUILD/nccl-2.28.9-1/src/= -MD -MT src/CMakeFiles/nccl_static.dir/debug.cc.o -MF CMakeFiles/nccl_static.dir/debug.cc.o.d -o CMakeFiles/nccl_static.dir/debug.cc.o -c /builddir/build/BUILD/nccl-2.28.9-1/src/debug.cc cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src && /usr/bin/g++ -DCUDA_MAJOR=12 -DCUDA_MINOR=0 -DDOCA_VERBS_USE_CUDA_WRAPPER -DDOCA_VERBS_USE_NET_WRAPPER -DNCCL_GIN_PROXY_ENABLE=1 -DNCCL_MAJOR=2 -DNCCL_MINOR=28 -DNCCL_PATCH=9 -DNCCL_USE_CMAKE -DNCCL_VERSION_CODE=22809 -DPROFAPI -Dnccl_EXPORTS -I/builddir/build/BUILD/nccl-2.28.9-1/src/device -I/builddir/build/BUILD/nccl-2.28.9-1/src/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/include/plugin -I/usr/local/cuda/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/transport/gdaki/doca-gpunetio/include -I/usr/local/cuda/include/cccl -I/builddir/build/BUILD/nccl-2.28.9-1/nccl_build/include -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -O3 -fPIC -fmacro-prefix-map=/builddir/build/BUILD/nccl-2.28.9-1/src/= -MD -MT src/CMakeFiles/nccl.dir/transport.cc.o -MF CMakeFiles/nccl.dir/transport.cc.o.d -o CMakeFiles/nccl.dir/transport.cc.o -c /builddir/build/BUILD/nccl-2.28.9-1/src/transport.cc cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src && /usr/bin/g++ -DCUDA_MAJOR=12 -DCUDA_MINOR=0 -DDOCA_VERBS_USE_CUDA_WRAPPER -DDOCA_VERBS_USE_NET_WRAPPER -DNCCL_GIN_PROXY_ENABLE=1 -DNCCL_MAJOR=2 -DNCCL_MINOR=28 -DNCCL_PATCH=9 -DNCCL_USE_CMAKE -DNCCL_VERSION_CODE=22809 -DPROFAPI -I/builddir/build/BUILD/nccl-2.28.9-1/src/device -I/builddir/build/BUILD/nccl-2.28.9-1/src/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/include/plugin -I/usr/local/cuda/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/transport/gdaki/doca-gpunetio/include -I/usr/local/cuda/include/cccl -I/builddir/build/BUILD/nccl-2.28.9-1/nccl_build/include -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -O3 -fPIC -fmacro-prefix-map=/builddir/build/BUILD/nccl-2.28.9-1/src/= -MD -MT src/CMakeFiles/nccl_static.dir/group.cc.o -MF CMakeFiles/nccl_static.dir/group.cc.o.d -o CMakeFiles/nccl_static.dir/group.cc.o -c /builddir/build/BUILD/nccl-2.28.9-1/src/group.cc [ 52%] Building CXX object src/CMakeFiles/nccl.dir/transport/profiler.cc.o [ 52%] Building CXX object src/CMakeFiles/nccl_static.dir/transport.cc.o cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src && /usr/bin/g++ -DCUDA_MAJOR=12 -DCUDA_MINOR=0 -DDOCA_VERBS_USE_CUDA_WRAPPER -DDOCA_VERBS_USE_NET_WRAPPER -DNCCL_GIN_PROXY_ENABLE=1 -DNCCL_MAJOR=2 -DNCCL_MINOR=28 -DNCCL_PATCH=9 -DNCCL_USE_CMAKE -DNCCL_VERSION_CODE=22809 -DPROFAPI -Dnccl_EXPORTS -I/builddir/build/BUILD/nccl-2.28.9-1/src/device -I/builddir/build/BUILD/nccl-2.28.9-1/src/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/include/plugin -I/usr/local/cuda/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/transport/gdaki/doca-gpunetio/include -I/usr/local/cuda/include/cccl -I/builddir/build/BUILD/nccl-2.28.9-1/nccl_build/include -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -O3 -fPIC -fmacro-prefix-map=/builddir/build/BUILD/nccl-2.28.9-1/src/= -MD -MT src/CMakeFiles/nccl.dir/mnnvl.cc.o -MF CMakeFiles/nccl.dir/mnnvl.cc.o.d -o CMakeFiles/nccl.dir/mnnvl.cc.o -c /builddir/build/BUILD/nccl-2.28.9-1/src/mnnvl.cc cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src && /usr/bin/g++ -DCUDA_MAJOR=12 -DCUDA_MINOR=0 -DDOCA_VERBS_USE_CUDA_WRAPPER -DDOCA_VERBS_USE_NET_WRAPPER -DNCCL_GIN_PROXY_ENABLE=1 -DNCCL_MAJOR=2 -DNCCL_MINOR=28 -DNCCL_PATCH=9 -DNCCL_USE_CMAKE -DNCCL_VERSION_CODE=22809 -DPROFAPI -I/builddir/build/BUILD/nccl-2.28.9-1/src/device -I/builddir/build/BUILD/nccl-2.28.9-1/src/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/include/plugin -I/usr/local/cuda/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/transport/gdaki/doca-gpunetio/include -I/usr/local/cuda/include/cccl -I/builddir/build/BUILD/nccl-2.28.9-1/nccl_build/include -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -O3 -fPIC -fmacro-prefix-map=/builddir/build/BUILD/nccl-2.28.9-1/src/= -MD -MT src/CMakeFiles/nccl_static.dir/enqueue.cc.o -MF CMakeFiles/nccl_static.dir/enqueue.cc.o.d -o CMakeFiles/nccl_static.dir/enqueue.cc.o -c /builddir/build/BUILD/nccl-2.28.9-1/src/enqueue.cc cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src && /usr/bin/g++ -DCUDA_MAJOR=12 -DCUDA_MINOR=0 -DDOCA_VERBS_USE_CUDA_WRAPPER -DDOCA_VERBS_USE_NET_WRAPPER -DNCCL_GIN_PROXY_ENABLE=1 -DNCCL_MAJOR=2 -DNCCL_MINOR=28 -DNCCL_PATCH=9 -DNCCL_USE_CMAKE -DNCCL_VERSION_CODE=22809 -DPROFAPI -Dnccl_EXPORTS -I/builddir/build/BUILD/nccl-2.28.9-1/src/device -I/builddir/build/BUILD/nccl-2.28.9-1/src/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/include/plugin -I/usr/local/cuda/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/transport/gdaki/doca-gpunetio/include -I/usr/local/cuda/include/cccl -I/builddir/build/BUILD/nccl-2.28.9-1/nccl_build/include -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -O3 -fPIC -fmacro-prefix-map=/builddir/build/BUILD/nccl-2.28.9-1/src/= -MD -MT src/CMakeFiles/nccl.dir/allocator.cc.o -MF CMakeFiles/nccl.dir/allocator.cc.o.d -o CMakeFiles/nccl.dir/allocator.cc.o -c /builddir/build/BUILD/nccl-2.28.9-1/src/allocator.cc cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src && /usr/bin/g++ -DCUDA_MAJOR=12 -DCUDA_MINOR=0 -DDOCA_VERBS_USE_CUDA_WRAPPER -DDOCA_VERBS_USE_NET_WRAPPER -DNCCL_GIN_PROXY_ENABLE=1 -DNCCL_MAJOR=2 -DNCCL_MINOR=28 -DNCCL_PATCH=9 -DNCCL_USE_CMAKE -DNCCL_VERSION_CODE=22809 -DPROFAPI -I/builddir/build/BUILD/nccl-2.28.9-1/src/device -I/builddir/build/BUILD/nccl-2.28.9-1/src/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/include/plugin -I/usr/local/cuda/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/transport/gdaki/doca-gpunetio/include -I/usr/local/cuda/include/cccl -I/builddir/build/BUILD/nccl-2.28.9-1/nccl_build/include -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -O3 -fPIC -fmacro-prefix-map=/builddir/build/BUILD/nccl-2.28.9-1/src/= -MD -MT src/CMakeFiles/nccl_static.dir/init.cc.o -MF CMakeFiles/nccl_static.dir/init.cc.o.d -o CMakeFiles/nccl_static.dir/init.cc.o -c /builddir/build/BUILD/nccl-2.28.9-1/src/init.cc cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src && /usr/bin/g++ -DCUDA_MAJOR=12 -DCUDA_MINOR=0 -DDOCA_VERBS_USE_CUDA_WRAPPER -DDOCA_VERBS_USE_NET_WRAPPER -DNCCL_GIN_PROXY_ENABLE=1 -DNCCL_MAJOR=2 -DNCCL_MINOR=28 -DNCCL_PATCH=9 -DNCCL_USE_CMAKE -DNCCL_VERSION_CODE=22809 -DPROFAPI -Dnccl_EXPORTS -I/builddir/build/BUILD/nccl-2.28.9-1/src/device -I/builddir/build/BUILD/nccl-2.28.9-1/src/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/include/plugin -I/usr/local/cuda/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/transport/gdaki/doca-gpunetio/include -I/usr/local/cuda/include/cccl -I/builddir/build/BUILD/nccl-2.28.9-1/nccl_build/include -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -O3 -fPIC -fmacro-prefix-map=/builddir/build/BUILD/nccl-2.28.9-1/src/= -MD -MT src/CMakeFiles/nccl.dir/sym_kernels.cc.o -MF CMakeFiles/nccl.dir/sym_kernels.cc.o.d -o CMakeFiles/nccl.dir/sym_kernels.cc.o -c /builddir/build/BUILD/nccl-2.28.9-1/src/sym_kernels.cc [ 53%] Building CXX object src/CMakeFiles/nccl.dir/init_nvtx.cc.o cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src && /usr/bin/g++ -DCUDA_MAJOR=12 -DCUDA_MINOR=0 -DDOCA_VERBS_USE_CUDA_WRAPPER -DDOCA_VERBS_USE_NET_WRAPPER -DNCCL_GIN_PROXY_ENABLE=1 -DNCCL_MAJOR=2 -DNCCL_MINOR=28 -DNCCL_PATCH=9 -DNCCL_USE_CMAKE -DNCCL_VERSION_CODE=22809 -DPROFAPI -I/builddir/build/BUILD/nccl-2.28.9-1/src/device -I/builddir/build/BUILD/nccl-2.28.9-1/src/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/include/plugin -I/usr/local/cuda/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/transport/gdaki/doca-gpunetio/include -I/usr/local/cuda/include/cccl -I/builddir/build/BUILD/nccl-2.28.9-1/nccl_build/include -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -O3 -fPIC -fmacro-prefix-map=/builddir/build/BUILD/nccl-2.28.9-1/src/= -MD -MT src/CMakeFiles/nccl_static.dir/init_nvtx.cc.o -MF CMakeFiles/nccl_static.dir/init_nvtx.cc.o.d -o CMakeFiles/nccl_static.dir/init_nvtx.cc.o -c /builddir/build/BUILD/nccl-2.28.9-1/src/init_nvtx.cc cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src && /usr/bin/g++ -DCUDA_MAJOR=12 -DCUDA_MINOR=0 -DDOCA_VERBS_USE_CUDA_WRAPPER -DDOCA_VERBS_USE_NET_WRAPPER -DNCCL_GIN_PROXY_ENABLE=1 -DNCCL_MAJOR=2 -DNCCL_MINOR=28 -DNCCL_PATCH=9 -DNCCL_USE_CMAKE -DNCCL_VERSION_CODE=22809 -DPROFAPI -Dnccl_EXPORTS -I/builddir/build/BUILD/nccl-2.28.9-1/src/device -I/builddir/build/BUILD/nccl-2.28.9-1/src/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/include/plugin -I/usr/local/cuda/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/transport/gdaki/doca-gpunetio/include -I/usr/local/cuda/include/cccl -I/builddir/build/BUILD/nccl-2.28.9-1/nccl_build/include -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -O3 -fPIC -fmacro-prefix-map=/builddir/build/BUILD/nccl-2.28.9-1/src/= -MD -MT src/CMakeFiles/nccl.dir/dev_runtime.cc.o -MF CMakeFiles/nccl.dir/dev_runtime.cc.o.d -o CMakeFiles/nccl.dir/dev_runtime.cc.o -c /builddir/build/BUILD/nccl-2.28.9-1/src/dev_runtime.cc cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src && /usr/bin/g++ -DCUDA_MAJOR=12 -DCUDA_MINOR=0 -DDOCA_VERBS_USE_CUDA_WRAPPER -DDOCA_VERBS_USE_NET_WRAPPER -DNCCL_GIN_PROXY_ENABLE=1 -DNCCL_MAJOR=2 -DNCCL_MINOR=28 -DNCCL_PATCH=9 -DNCCL_USE_CMAKE -DNCCL_VERSION_CODE=22809 -DPROFAPI -I/builddir/build/BUILD/nccl-2.28.9-1/src/device -I/builddir/build/BUILD/nccl-2.28.9-1/src/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/include/plugin -I/usr/local/cuda/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/transport/gdaki/doca-gpunetio/include -I/usr/local/cuda/include/cccl -I/builddir/build/BUILD/nccl-2.28.9-1/nccl_build/include -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -O3 -fPIC -fmacro-prefix-map=/builddir/build/BUILD/nccl-2.28.9-1/src/= -MD -MT src/CMakeFiles/nccl_static.dir/proxy.cc.o -MF CMakeFiles/nccl_static.dir/proxy.cc.o.d -o CMakeFiles/nccl_static.dir/proxy.cc.o -c /builddir/build/BUILD/nccl-2.28.9-1/src/proxy.cc [ 53%] Building CXX object src/CMakeFiles/nccl_static.dir/allocator.cc.o cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src && /usr/bin/g++ -DCUDA_MAJOR=12 -DCUDA_MINOR=0 -DDOCA_VERBS_USE_CUDA_WRAPPER -DDOCA_VERBS_USE_NET_WRAPPER -DNCCL_GIN_PROXY_ENABLE=1 -DNCCL_MAJOR=2 -DNCCL_MINOR=28 -DNCCL_PATCH=9 -DNCCL_USE_CMAKE -DNCCL_VERSION_CODE=22809 -DPROFAPI -I/builddir/build/BUILD/nccl-2.28.9-1/src/device -I/builddir/build/BUILD/nccl-2.28.9-1/src/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/include/plugin -I/usr/local/cuda/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/transport/gdaki/doca-gpunetio/include -I/usr/local/cuda/include/cccl -I/builddir/build/BUILD/nccl-2.28.9-1/nccl_build/include -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -O3 -fPIC -fmacro-prefix-map=/builddir/build/BUILD/nccl-2.28.9-1/src/= -MD -MT src/CMakeFiles/nccl_static.dir/transport.cc.o -MF CMakeFiles/nccl_static.dir/transport.cc.o.d -o CMakeFiles/nccl_static.dir/transport.cc.o -c /builddir/build/BUILD/nccl-2.28.9-1/src/transport.cc cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src && /usr/bin/g++ -DCUDA_MAJOR=12 -DCUDA_MINOR=0 -DDOCA_VERBS_USE_CUDA_WRAPPER -DDOCA_VERBS_USE_NET_WRAPPER -DNCCL_GIN_PROXY_ENABLE=1 -DNCCL_MAJOR=2 -DNCCL_MINOR=28 -DNCCL_PATCH=9 -DNCCL_USE_CMAKE -DNCCL_VERSION_CODE=22809 -DPROFAPI -Dnccl_EXPORTS -I/builddir/build/BUILD/nccl-2.28.9-1/src/device -I/builddir/build/BUILD/nccl-2.28.9-1/src/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/include/plugin -I/usr/local/cuda/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/transport/gdaki/doca-gpunetio/include -I/usr/local/cuda/include/cccl -I/builddir/build/BUILD/nccl-2.28.9-1/nccl_build/include -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -O3 -fPIC -fmacro-prefix-map=/builddir/build/BUILD/nccl-2.28.9-1/src/= -MD -MT src/CMakeFiles/nccl.dir/init_nvtx.cc.o -MF CMakeFiles/nccl.dir/init_nvtx.cc.o.d -o CMakeFiles/nccl.dir/init_nvtx.cc.o -c /builddir/build/BUILD/nccl-2.28.9-1/src/init_nvtx.cc [ 53%] Building CXX object src/CMakeFiles/nccl_static.dir/mnnvl.cc.o cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src && /usr/bin/g++ -DCUDA_MAJOR=12 -DCUDA_MINOR=0 -DDOCA_VERBS_USE_CUDA_WRAPPER -DDOCA_VERBS_USE_NET_WRAPPER -DNCCL_GIN_PROXY_ENABLE=1 -DNCCL_MAJOR=2 -DNCCL_MINOR=28 -DNCCL_PATCH=9 -DNCCL_USE_CMAKE -DNCCL_VERSION_CODE=22809 -DPROFAPI -Dnccl_EXPORTS -I/builddir/build/BUILD/nccl-2.28.9-1/src/device -I/builddir/build/BUILD/nccl-2.28.9-1/src/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/include/plugin -I/usr/local/cuda/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/transport/gdaki/doca-gpunetio/include -I/usr/local/cuda/include/cccl -I/builddir/build/BUILD/nccl-2.28.9-1/nccl_build/include -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -O3 -fPIC -fmacro-prefix-map=/builddir/build/BUILD/nccl-2.28.9-1/src/= -MD -MT src/CMakeFiles/nccl.dir/transport/nvls.cc.o -MF CMakeFiles/nccl.dir/transport/nvls.cc.o.d -o CMakeFiles/nccl.dir/transport/nvls.cc.o -c /builddir/build/BUILD/nccl-2.28.9-1/src/transport/nvls.cc cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src && /usr/bin/g++ -DCUDA_MAJOR=12 -DCUDA_MINOR=0 -DDOCA_VERBS_USE_CUDA_WRAPPER -DDOCA_VERBS_USE_NET_WRAPPER -DNCCL_GIN_PROXY_ENABLE=1 -DNCCL_MAJOR=2 -DNCCL_MINOR=28 -DNCCL_PATCH=9 -DNCCL_USE_CMAKE -DNCCL_VERSION_CODE=22809 -DPROFAPI -I/builddir/build/BUILD/nccl-2.28.9-1/src/device -I/builddir/build/BUILD/nccl-2.28.9-1/src/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/include/plugin -I/usr/local/cuda/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/transport/gdaki/doca-gpunetio/include -I/usr/local/cuda/include/cccl -I/builddir/build/BUILD/nccl-2.28.9-1/nccl_build/include -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -O3 -fPIC -fmacro-prefix-map=/builddir/build/BUILD/nccl-2.28.9-1/src/= -MD -MT src/CMakeFiles/nccl_static.dir/allocator.cc.o -MF CMakeFiles/nccl_static.dir/allocator.cc.o.d -o CMakeFiles/nccl_static.dir/allocator.cc.o -c /builddir/build/BUILD/nccl-2.28.9-1/src/allocator.cc cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src && /usr/bin/g++ -DCUDA_MAJOR=12 -DCUDA_MINOR=0 -DDOCA_VERBS_USE_CUDA_WRAPPER -DDOCA_VERBS_USE_NET_WRAPPER -DNCCL_GIN_PROXY_ENABLE=1 -DNCCL_MAJOR=2 -DNCCL_MINOR=28 -DNCCL_PATCH=9 -DNCCL_USE_CMAKE -DNCCL_VERSION_CODE=22809 -DPROFAPI -Dnccl_EXPORTS -I/builddir/build/BUILD/nccl-2.28.9-1/src/device -I/builddir/build/BUILD/nccl-2.28.9-1/src/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/include/plugin -I/usr/local/cuda/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/transport/gdaki/doca-gpunetio/include -I/usr/local/cuda/include/cccl -I/builddir/build/BUILD/nccl-2.28.9-1/nccl_build/include -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -O3 -fPIC -fmacro-prefix-map=/builddir/build/BUILD/nccl-2.28.9-1/src/= -MD -MT src/CMakeFiles/nccl.dir/transport/profiler.cc.o -MF CMakeFiles/nccl.dir/transport/profiler.cc.o.d -o CMakeFiles/nccl.dir/transport/profiler.cc.o -c /builddir/build/BUILD/nccl-2.28.9-1/src/transport/profiler.cc cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src && /usr/bin/g++ -DCUDA_MAJOR=12 -DCUDA_MINOR=0 -DDOCA_VERBS_USE_CUDA_WRAPPER -DDOCA_VERBS_USE_NET_WRAPPER -DNCCL_GIN_PROXY_ENABLE=1 -DNCCL_MAJOR=2 -DNCCL_MINOR=28 -DNCCL_PATCH=9 -DNCCL_USE_CMAKE -DNCCL_VERSION_CODE=22809 -DPROFAPI -I/builddir/build/BUILD/nccl-2.28.9-1/src/device -I/builddir/build/BUILD/nccl-2.28.9-1/src/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/include/plugin -I/usr/local/cuda/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/transport/gdaki/doca-gpunetio/include -I/usr/local/cuda/include/cccl -I/builddir/build/BUILD/nccl-2.28.9-1/nccl_build/include -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -O3 -fPIC -fmacro-prefix-map=/builddir/build/BUILD/nccl-2.28.9-1/src/= -MD -MT src/CMakeFiles/nccl_static.dir/mnnvl.cc.o -MF CMakeFiles/nccl_static.dir/mnnvl.cc.o.d -o CMakeFiles/nccl_static.dir/mnnvl.cc.o -c /builddir/build/BUILD/nccl-2.28.9-1/src/mnnvl.cc [ 53%] Building CXX object src/CMakeFiles/nccl.dir/transport/net_socket.cc.o [ 53%] Building CXX object src/CMakeFiles/nccl_static.dir/sym_kernels.cc.o cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src && /usr/bin/g++ -DCUDA_MAJOR=12 -DCUDA_MINOR=0 -DDOCA_VERBS_USE_CUDA_WRAPPER -DDOCA_VERBS_USE_NET_WRAPPER -DNCCL_GIN_PROXY_ENABLE=1 -DNCCL_MAJOR=2 -DNCCL_MINOR=28 -DNCCL_PATCH=9 -DNCCL_USE_CMAKE -DNCCL_VERSION_CODE=22809 -DPROFAPI -I/builddir/build/BUILD/nccl-2.28.9-1/src/device -I/builddir/build/BUILD/nccl-2.28.9-1/src/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/include/plugin -I/usr/local/cuda/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/transport/gdaki/doca-gpunetio/include -I/usr/local/cuda/include/cccl -I/builddir/build/BUILD/nccl-2.28.9-1/nccl_build/include -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -O3 -fPIC -fmacro-prefix-map=/builddir/build/BUILD/nccl-2.28.9-1/src/= -MD -MT src/CMakeFiles/nccl_static.dir/sym_kernels.cc.o -MF CMakeFiles/nccl_static.dir/sym_kernels.cc.o.d -o CMakeFiles/nccl_static.dir/sym_kernels.cc.o -c /builddir/build/BUILD/nccl-2.28.9-1/src/sym_kernels.cc cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src && /usr/bin/g++ -DCUDA_MAJOR=12 -DCUDA_MINOR=0 -DDOCA_VERBS_USE_CUDA_WRAPPER -DDOCA_VERBS_USE_NET_WRAPPER -DNCCL_GIN_PROXY_ENABLE=1 -DNCCL_MAJOR=2 -DNCCL_MINOR=28 -DNCCL_PATCH=9 -DNCCL_USE_CMAKE -DNCCL_VERSION_CODE=22809 -DPROFAPI -Dnccl_EXPORTS -I/builddir/build/BUILD/nccl-2.28.9-1/src/device -I/builddir/build/BUILD/nccl-2.28.9-1/src/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/include/plugin -I/usr/local/cuda/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/transport/gdaki/doca-gpunetio/include -I/usr/local/cuda/include/cccl -I/builddir/build/BUILD/nccl-2.28.9-1/nccl_build/include -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -O3 -fPIC -fmacro-prefix-map=/builddir/build/BUILD/nccl-2.28.9-1/src/= -MD -MT src/CMakeFiles/nccl.dir/transport/net_socket.cc.o -MF CMakeFiles/nccl.dir/transport/net_socket.cc.o.d -o CMakeFiles/nccl.dir/transport/net_socket.cc.o -c /builddir/build/BUILD/nccl-2.28.9-1/src/transport/net_socket.cc [ 54%] Building CXX object src/CMakeFiles/nccl_static.dir/dev_runtime.cc.o cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src && /usr/bin/g++ -DCUDA_MAJOR=12 -DCUDA_MINOR=0 -DDOCA_VERBS_USE_CUDA_WRAPPER -DDOCA_VERBS_USE_NET_WRAPPER -DNCCL_GIN_PROXY_ENABLE=1 -DNCCL_MAJOR=2 -DNCCL_MINOR=28 -DNCCL_PATCH=9 -DNCCL_USE_CMAKE -DNCCL_VERSION_CODE=22809 -DPROFAPI -I/builddir/build/BUILD/nccl-2.28.9-1/src/device -I/builddir/build/BUILD/nccl-2.28.9-1/src/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/include/plugin -I/usr/local/cuda/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/transport/gdaki/doca-gpunetio/include -I/usr/local/cuda/include/cccl -I/builddir/build/BUILD/nccl-2.28.9-1/nccl_build/include -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -O3 -fPIC -fmacro-prefix-map=/builddir/build/BUILD/nccl-2.28.9-1/src/= -MD -MT src/CMakeFiles/nccl_static.dir/dev_runtime.cc.o -MF CMakeFiles/nccl_static.dir/dev_runtime.cc.o.d -o CMakeFiles/nccl_static.dir/dev_runtime.cc.o -c /builddir/build/BUILD/nccl-2.28.9-1/src/dev_runtime.cc [ 54%] Building CXX object src/CMakeFiles/nccl.dir/transport/p2p.cc.o cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src && /usr/bin/g++ -DCUDA_MAJOR=12 -DCUDA_MINOR=0 -DDOCA_VERBS_USE_CUDA_WRAPPER -DDOCA_VERBS_USE_NET_WRAPPER -DNCCL_GIN_PROXY_ENABLE=1 -DNCCL_MAJOR=2 -DNCCL_MINOR=28 -DNCCL_PATCH=9 -DNCCL_USE_CMAKE -DNCCL_VERSION_CODE=22809 -DPROFAPI -Dnccl_EXPORTS -I/builddir/build/BUILD/nccl-2.28.9-1/src/device -I/builddir/build/BUILD/nccl-2.28.9-1/src/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/include/plugin -I/usr/local/cuda/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/transport/gdaki/doca-gpunetio/include -I/usr/local/cuda/include/cccl -I/builddir/build/BUILD/nccl-2.28.9-1/nccl_build/include -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -O3 -fPIC -fmacro-prefix-map=/builddir/build/BUILD/nccl-2.28.9-1/src/= -MD -MT src/CMakeFiles/nccl.dir/transport/p2p.cc.o -MF CMakeFiles/nccl.dir/transport/p2p.cc.o.d -o CMakeFiles/nccl.dir/transport/p2p.cc.o -c /builddir/build/BUILD/nccl-2.28.9-1/src/transport/p2p.cc [ 54%] Building CXX object src/CMakeFiles/nccl_static.dir/transport/nvls.cc.o cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src && /usr/bin/g++ -DCUDA_MAJOR=12 -DCUDA_MINOR=0 -DDOCA_VERBS_USE_CUDA_WRAPPER -DDOCA_VERBS_USE_NET_WRAPPER -DNCCL_GIN_PROXY_ENABLE=1 -DNCCL_MAJOR=2 -DNCCL_MINOR=28 -DNCCL_PATCH=9 -DNCCL_USE_CMAKE -DNCCL_VERSION_CODE=22809 -DPROFAPI -I/builddir/build/BUILD/nccl-2.28.9-1/src/device -I/builddir/build/BUILD/nccl-2.28.9-1/src/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/include/plugin -I/usr/local/cuda/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/transport/gdaki/doca-gpunetio/include -I/usr/local/cuda/include/cccl -I/builddir/build/BUILD/nccl-2.28.9-1/nccl_build/include -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -O3 -fPIC -fmacro-prefix-map=/builddir/build/BUILD/nccl-2.28.9-1/src/= -MD -MT src/CMakeFiles/nccl_static.dir/transport/nvls.cc.o -MF CMakeFiles/nccl_static.dir/transport/nvls.cc.o.d -o CMakeFiles/nccl_static.dir/transport/nvls.cc.o -c /builddir/build/BUILD/nccl-2.28.9-1/src/transport/nvls.cc [ 55%] Building CXX object src/CMakeFiles/nccl.dir/transport/net.cc.o cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src && /usr/bin/g++ -DCUDA_MAJOR=12 -DCUDA_MINOR=0 -DDOCA_VERBS_USE_CUDA_WRAPPER -DDOCA_VERBS_USE_NET_WRAPPER -DNCCL_GIN_PROXY_ENABLE=1 -DNCCL_MAJOR=2 -DNCCL_MINOR=28 -DNCCL_PATCH=9 -DNCCL_USE_CMAKE -DNCCL_VERSION_CODE=22809 -DPROFAPI -Dnccl_EXPORTS -I/builddir/build/BUILD/nccl-2.28.9-1/src/device -I/builddir/build/BUILD/nccl-2.28.9-1/src/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/include/plugin -I/usr/local/cuda/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/transport/gdaki/doca-gpunetio/include -I/usr/local/cuda/include/cccl -I/builddir/build/BUILD/nccl-2.28.9-1/nccl_build/include -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -O3 -fPIC -fmacro-prefix-map=/builddir/build/BUILD/nccl-2.28.9-1/src/= -MD -MT src/CMakeFiles/nccl.dir/transport/net.cc.o -MF CMakeFiles/nccl.dir/transport/net.cc.o.d -o CMakeFiles/nccl.dir/transport/net.cc.o -c /builddir/build/BUILD/nccl-2.28.9-1/src/transport/net.cc [ 55%] Building CXX object src/CMakeFiles/nccl.dir/transport/net_ib.cc.o cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src && /usr/bin/g++ -DCUDA_MAJOR=12 -DCUDA_MINOR=0 -DDOCA_VERBS_USE_CUDA_WRAPPER -DDOCA_VERBS_USE_NET_WRAPPER -DNCCL_GIN_PROXY_ENABLE=1 -DNCCL_MAJOR=2 -DNCCL_MINOR=28 -DNCCL_PATCH=9 -DNCCL_USE_CMAKE -DNCCL_VERSION_CODE=22809 -DPROFAPI -Dnccl_EXPORTS -I/builddir/build/BUILD/nccl-2.28.9-1/src/device -I/builddir/build/BUILD/nccl-2.28.9-1/src/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/include/plugin -I/usr/local/cuda/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/transport/gdaki/doca-gpunetio/include -I/usr/local/cuda/include/cccl -I/builddir/build/BUILD/nccl-2.28.9-1/nccl_build/include -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -O3 -fPIC -fmacro-prefix-map=/builddir/build/BUILD/nccl-2.28.9-1/src/= -MD -MT src/CMakeFiles/nccl.dir/transport/net_ib.cc.o -MF CMakeFiles/nccl.dir/transport/net_ib.cc.o.d -o CMakeFiles/nccl.dir/transport/net_ib.cc.o -c /builddir/build/BUILD/nccl-2.28.9-1/src/transport/net_ib.cc [ 55%] Building CXX object src/CMakeFiles/nccl.dir/transport/coll_net.cc.o cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src && /usr/bin/g++ -DCUDA_MAJOR=12 -DCUDA_MINOR=0 -DDOCA_VERBS_USE_CUDA_WRAPPER -DDOCA_VERBS_USE_NET_WRAPPER -DNCCL_GIN_PROXY_ENABLE=1 -DNCCL_MAJOR=2 -DNCCL_MINOR=28 -DNCCL_PATCH=9 -DNCCL_USE_CMAKE -DNCCL_VERSION_CODE=22809 -DPROFAPI -Dnccl_EXPORTS -I/builddir/build/BUILD/nccl-2.28.9-1/src/device -I/builddir/build/BUILD/nccl-2.28.9-1/src/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/include/plugin -I/usr/local/cuda/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/transport/gdaki/doca-gpunetio/include -I/usr/local/cuda/include/cccl -I/builddir/build/BUILD/nccl-2.28.9-1/nccl_build/include -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -O3 -fPIC -fmacro-prefix-map=/builddir/build/BUILD/nccl-2.28.9-1/src/= -MD -MT src/CMakeFiles/nccl.dir/transport/coll_net.cc.o -MF CMakeFiles/nccl.dir/transport/coll_net.cc.o.d -o CMakeFiles/nccl.dir/transport/coll_net.cc.o -c /builddir/build/BUILD/nccl-2.28.9-1/src/transport/coll_net.cc [ 55%] Building CXX object src/CMakeFiles/nccl_static.dir/transport/profiler.cc.o cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src && /usr/bin/g++ -DCUDA_MAJOR=12 -DCUDA_MINOR=0 -DDOCA_VERBS_USE_CUDA_WRAPPER -DDOCA_VERBS_USE_NET_WRAPPER -DNCCL_GIN_PROXY_ENABLE=1 -DNCCL_MAJOR=2 -DNCCL_MINOR=28 -DNCCL_PATCH=9 -DNCCL_USE_CMAKE -DNCCL_VERSION_CODE=22809 -DPROFAPI -I/builddir/build/BUILD/nccl-2.28.9-1/src/device -I/builddir/build/BUILD/nccl-2.28.9-1/src/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/include/plugin -I/usr/local/cuda/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/transport/gdaki/doca-gpunetio/include -I/usr/local/cuda/include/cccl -I/builddir/build/BUILD/nccl-2.28.9-1/nccl_build/include -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -O3 -fPIC -fmacro-prefix-map=/builddir/build/BUILD/nccl-2.28.9-1/src/= -MD -MT src/CMakeFiles/nccl_static.dir/transport/profiler.cc.o -MF CMakeFiles/nccl_static.dir/transport/profiler.cc.o.d -o CMakeFiles/nccl_static.dir/transport/profiler.cc.o -c /builddir/build/BUILD/nccl-2.28.9-1/src/transport/profiler.cc [ 56%] Building CXX object src/CMakeFiles/nccl.dir/transport/shm.cc.o cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src && /usr/bin/g++ -DCUDA_MAJOR=12 -DCUDA_MINOR=0 -DDOCA_VERBS_USE_CUDA_WRAPPER -DDOCA_VERBS_USE_NET_WRAPPER -DNCCL_GIN_PROXY_ENABLE=1 -DNCCL_MAJOR=2 -DNCCL_MINOR=28 -DNCCL_PATCH=9 -DNCCL_USE_CMAKE -DNCCL_VERSION_CODE=22809 -DPROFAPI -Dnccl_EXPORTS -I/builddir/build/BUILD/nccl-2.28.9-1/src/device -I/builddir/build/BUILD/nccl-2.28.9-1/src/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/include/plugin -I/usr/local/cuda/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/transport/gdaki/doca-gpunetio/include -I/usr/local/cuda/include/cccl -I/builddir/build/BUILD/nccl-2.28.9-1/nccl_build/include -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -O3 -fPIC -fmacro-prefix-map=/builddir/build/BUILD/nccl-2.28.9-1/src/= -MD -MT src/CMakeFiles/nccl.dir/transport/shm.cc.o -MF CMakeFiles/nccl.dir/transport/shm.cc.o.d -o CMakeFiles/nccl.dir/transport/shm.cc.o -c /builddir/build/BUILD/nccl-2.28.9-1/src/transport/shm.cc [ 56%] Building CXX object src/CMakeFiles/nccl_static.dir/transport/net_socket.cc.o cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src && /usr/bin/g++ -DCUDA_MAJOR=12 -DCUDA_MINOR=0 -DDOCA_VERBS_USE_CUDA_WRAPPER -DDOCA_VERBS_USE_NET_WRAPPER -DNCCL_GIN_PROXY_ENABLE=1 -DNCCL_MAJOR=2 -DNCCL_MINOR=28 -DNCCL_PATCH=9 -DNCCL_USE_CMAKE -DNCCL_VERSION_CODE=22809 -DPROFAPI -I/builddir/build/BUILD/nccl-2.28.9-1/src/device -I/builddir/build/BUILD/nccl-2.28.9-1/src/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/include/plugin -I/usr/local/cuda/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/transport/gdaki/doca-gpunetio/include -I/usr/local/cuda/include/cccl -I/builddir/build/BUILD/nccl-2.28.9-1/nccl_build/include -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -O3 -fPIC -fmacro-prefix-map=/builddir/build/BUILD/nccl-2.28.9-1/src/= -MD -MT src/CMakeFiles/nccl_static.dir/transport/net_socket.cc.o -MF CMakeFiles/nccl_static.dir/transport/net_socket.cc.o.d -o CMakeFiles/nccl_static.dir/transport/net_socket.cc.o -c /builddir/build/BUILD/nccl-2.28.9-1/src/transport/net_socket.cc [ 56%] Building CXX object src/CMakeFiles/nccl.dir/transport/generic.cc.o cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src && /usr/bin/g++ -DCUDA_MAJOR=12 -DCUDA_MINOR=0 -DDOCA_VERBS_USE_CUDA_WRAPPER -DDOCA_VERBS_USE_NET_WRAPPER -DNCCL_GIN_PROXY_ENABLE=1 -DNCCL_MAJOR=2 -DNCCL_MINOR=28 -DNCCL_PATCH=9 -DNCCL_USE_CMAKE -DNCCL_VERSION_CODE=22809 -DPROFAPI -Dnccl_EXPORTS -I/builddir/build/BUILD/nccl-2.28.9-1/src/device -I/builddir/build/BUILD/nccl-2.28.9-1/src/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/include/plugin -I/usr/local/cuda/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/transport/gdaki/doca-gpunetio/include -I/usr/local/cuda/include/cccl -I/builddir/build/BUILD/nccl-2.28.9-1/nccl_build/include -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -O3 -fPIC -fmacro-prefix-map=/builddir/build/BUILD/nccl-2.28.9-1/src/= -MD -MT src/CMakeFiles/nccl.dir/transport/generic.cc.o -MF CMakeFiles/nccl.dir/transport/generic.cc.o.d -o CMakeFiles/nccl.dir/transport/generic.cc.o -c /builddir/build/BUILD/nccl-2.28.9-1/src/transport/generic.cc [ 56%] Building CXX object src/CMakeFiles/nccl.dir/transport/gdaki/gin_host_gdaki.cc.o cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src && /usr/bin/g++ -DCUDA_MAJOR=12 -DCUDA_MINOR=0 -DDOCA_VERBS_USE_CUDA_WRAPPER -DDOCA_VERBS_USE_NET_WRAPPER -DNCCL_GIN_PROXY_ENABLE=1 -DNCCL_MAJOR=2 -DNCCL_MINOR=28 -DNCCL_PATCH=9 -DNCCL_USE_CMAKE -DNCCL_VERSION_CODE=22809 -DPROFAPI -Dnccl_EXPORTS -I/builddir/build/BUILD/nccl-2.28.9-1/src/device -I/builddir/build/BUILD/nccl-2.28.9-1/src/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/include/plugin -I/usr/local/cuda/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/transport/gdaki/doca-gpunetio/include -I/usr/local/cuda/include/cccl -I/builddir/build/BUILD/nccl-2.28.9-1/nccl_build/include -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -O3 -fPIC -fmacro-prefix-map=/builddir/build/BUILD/nccl-2.28.9-1/src/= -MD -MT src/CMakeFiles/nccl.dir/transport/gdaki/gin_host_gdaki.cc.o -MF CMakeFiles/nccl.dir/transport/gdaki/gin_host_gdaki.cc.o.d -o CMakeFiles/nccl.dir/transport/gdaki/gin_host_gdaki.cc.o -c /builddir/build/BUILD/nccl-2.28.9-1/src/transport/gdaki/gin_host_gdaki.cc [ 57%] Building CXX object src/CMakeFiles/nccl_static.dir/transport/p2p.cc.o cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src && /usr/bin/g++ -DCUDA_MAJOR=12 -DCUDA_MINOR=0 -DDOCA_VERBS_USE_CUDA_WRAPPER -DDOCA_VERBS_USE_NET_WRAPPER -DNCCL_GIN_PROXY_ENABLE=1 -DNCCL_MAJOR=2 -DNCCL_MINOR=28 -DNCCL_PATCH=9 -DNCCL_USE_CMAKE -DNCCL_VERSION_CODE=22809 -DPROFAPI -I/builddir/build/BUILD/nccl-2.28.9-1/src/device -I/builddir/build/BUILD/nccl-2.28.9-1/src/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/include/plugin -I/usr/local/cuda/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/transport/gdaki/doca-gpunetio/include -I/usr/local/cuda/include/cccl -I/builddir/build/BUILD/nccl-2.28.9-1/nccl_build/include -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -O3 -fPIC -fmacro-prefix-map=/builddir/build/BUILD/nccl-2.28.9-1/src/= -MD -MT src/CMakeFiles/nccl_static.dir/transport/p2p.cc.o -MF CMakeFiles/nccl_static.dir/transport/p2p.cc.o.d -o CMakeFiles/nccl_static.dir/transport/p2p.cc.o -c /builddir/build/BUILD/nccl-2.28.9-1/src/transport/p2p.cc [ 57%] Building CXX object src/CMakeFiles/nccl_static.dir/transport/net.cc.o cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src && /usr/bin/g++ -DCUDA_MAJOR=12 -DCUDA_MINOR=0 -DDOCA_VERBS_USE_CUDA_WRAPPER -DDOCA_VERBS_USE_NET_WRAPPER -DNCCL_GIN_PROXY_ENABLE=1 -DNCCL_MAJOR=2 -DNCCL_MINOR=28 -DNCCL_PATCH=9 -DNCCL_USE_CMAKE -DNCCL_VERSION_CODE=22809 -DPROFAPI -I/builddir/build/BUILD/nccl-2.28.9-1/src/device -I/builddir/build/BUILD/nccl-2.28.9-1/src/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/include/plugin -I/usr/local/cuda/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/transport/gdaki/doca-gpunetio/include -I/usr/local/cuda/include/cccl -I/builddir/build/BUILD/nccl-2.28.9-1/nccl_build/include -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -O3 -fPIC -fmacro-prefix-map=/builddir/build/BUILD/nccl-2.28.9-1/src/= -MD -MT src/CMakeFiles/nccl_static.dir/transport/net.cc.o -MF CMakeFiles/nccl_static.dir/transport/net.cc.o.d -o CMakeFiles/nccl_static.dir/transport/net.cc.o -c /builddir/build/BUILD/nccl-2.28.9-1/src/transport/net.cc [ 57%] Building CXX object src/CMakeFiles/nccl.dir/misc/strongstream.cc.o cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src && /usr/bin/g++ -DCUDA_MAJOR=12 -DCUDA_MINOR=0 -DDOCA_VERBS_USE_CUDA_WRAPPER -DDOCA_VERBS_USE_NET_WRAPPER -DNCCL_GIN_PROXY_ENABLE=1 -DNCCL_MAJOR=2 -DNCCL_MINOR=28 -DNCCL_PATCH=9 -DNCCL_USE_CMAKE -DNCCL_VERSION_CODE=22809 -DPROFAPI -Dnccl_EXPORTS -I/builddir/build/BUILD/nccl-2.28.9-1/src/device -I/builddir/build/BUILD/nccl-2.28.9-1/src/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/include/plugin -I/usr/local/cuda/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/transport/gdaki/doca-gpunetio/include -I/usr/local/cuda/include/cccl -I/builddir/build/BUILD/nccl-2.28.9-1/nccl_build/include -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -O3 -fPIC -fmacro-prefix-map=/builddir/build/BUILD/nccl-2.28.9-1/src/= -MD -MT src/CMakeFiles/nccl.dir/misc/strongstream.cc.o -MF CMakeFiles/nccl.dir/misc/strongstream.cc.o.d -o CMakeFiles/nccl.dir/misc/strongstream.cc.o -c /builddir/build/BUILD/nccl-2.28.9-1/src/misc/strongstream.cc [ 58%] Building CXX object src/CMakeFiles/nccl.dir/misc/socket.cc.o cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src && /usr/bin/g++ -DCUDA_MAJOR=12 -DCUDA_MINOR=0 -DDOCA_VERBS_USE_CUDA_WRAPPER -DDOCA_VERBS_USE_NET_WRAPPER -DNCCL_GIN_PROXY_ENABLE=1 -DNCCL_MAJOR=2 -DNCCL_MINOR=28 -DNCCL_PATCH=9 -DNCCL_USE_CMAKE -DNCCL_VERSION_CODE=22809 -DPROFAPI -Dnccl_EXPORTS -I/builddir/build/BUILD/nccl-2.28.9-1/src/device -I/builddir/build/BUILD/nccl-2.28.9-1/src/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/include/plugin -I/usr/local/cuda/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/transport/gdaki/doca-gpunetio/include -I/usr/local/cuda/include/cccl -I/builddir/build/BUILD/nccl-2.28.9-1/nccl_build/include -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -O3 -fPIC -fmacro-prefix-map=/builddir/build/BUILD/nccl-2.28.9-1/src/= -MD -MT src/CMakeFiles/nccl.dir/misc/socket.cc.o -MF CMakeFiles/nccl.dir/misc/socket.cc.o.d -o CMakeFiles/nccl.dir/misc/socket.cc.o -c /builddir/build/BUILD/nccl-2.28.9-1/src/misc/socket.cc [ 58%] Building CXX object src/CMakeFiles/nccl_static.dir/transport/net_ib.cc.o cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src && /usr/bin/g++ -DCUDA_MAJOR=12 -DCUDA_MINOR=0 -DDOCA_VERBS_USE_CUDA_WRAPPER -DDOCA_VERBS_USE_NET_WRAPPER -DNCCL_GIN_PROXY_ENABLE=1 -DNCCL_MAJOR=2 -DNCCL_MINOR=28 -DNCCL_PATCH=9 -DNCCL_USE_CMAKE -DNCCL_VERSION_CODE=22809 -DPROFAPI -I/builddir/build/BUILD/nccl-2.28.9-1/src/device -I/builddir/build/BUILD/nccl-2.28.9-1/src/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/include/plugin -I/usr/local/cuda/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/transport/gdaki/doca-gpunetio/include -I/usr/local/cuda/include/cccl -I/builddir/build/BUILD/nccl-2.28.9-1/nccl_build/include -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -O3 -fPIC -fmacro-prefix-map=/builddir/build/BUILD/nccl-2.28.9-1/src/= -MD -MT src/CMakeFiles/nccl_static.dir/transport/net_ib.cc.o -MF CMakeFiles/nccl_static.dir/transport/net_ib.cc.o.d -o CMakeFiles/nccl_static.dir/transport/net_ib.cc.o -c /builddir/build/BUILD/nccl-2.28.9-1/src/transport/net_ib.cc [ 58%] Building CXX object src/CMakeFiles/nccl.dir/misc/ibvwrap.cc.o cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src && /usr/bin/g++ -DCUDA_MAJOR=12 -DCUDA_MINOR=0 -DDOCA_VERBS_USE_CUDA_WRAPPER -DDOCA_VERBS_USE_NET_WRAPPER -DNCCL_GIN_PROXY_ENABLE=1 -DNCCL_MAJOR=2 -DNCCL_MINOR=28 -DNCCL_PATCH=9 -DNCCL_USE_CMAKE -DNCCL_VERSION_CODE=22809 -DPROFAPI -Dnccl_EXPORTS -I/builddir/build/BUILD/nccl-2.28.9-1/src/device -I/builddir/build/BUILD/nccl-2.28.9-1/src/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/include/plugin -I/usr/local/cuda/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/transport/gdaki/doca-gpunetio/include -I/usr/local/cuda/include/cccl -I/builddir/build/BUILD/nccl-2.28.9-1/nccl_build/include -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -O3 -fPIC -fmacro-prefix-map=/builddir/build/BUILD/nccl-2.28.9-1/src/= -MD -MT src/CMakeFiles/nccl.dir/misc/ibvwrap.cc.o -MF CMakeFiles/nccl.dir/misc/ibvwrap.cc.o.d -o CMakeFiles/nccl.dir/misc/ibvwrap.cc.o -c /builddir/build/BUILD/nccl-2.28.9-1/src/misc/ibvwrap.cc [ 58%] Building CXX object src/CMakeFiles/nccl.dir/misc/mlx5dvsymbols.cc.o cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src && /usr/bin/g++ -DCUDA_MAJOR=12 -DCUDA_MINOR=0 -DDOCA_VERBS_USE_CUDA_WRAPPER -DDOCA_VERBS_USE_NET_WRAPPER -DNCCL_GIN_PROXY_ENABLE=1 -DNCCL_MAJOR=2 -DNCCL_MINOR=28 -DNCCL_PATCH=9 -DNCCL_USE_CMAKE -DNCCL_VERSION_CODE=22809 -DPROFAPI -Dnccl_EXPORTS -I/builddir/build/BUILD/nccl-2.28.9-1/src/device -I/builddir/build/BUILD/nccl-2.28.9-1/src/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/include/plugin -I/usr/local/cuda/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/transport/gdaki/doca-gpunetio/include -I/usr/local/cuda/include/cccl -I/builddir/build/BUILD/nccl-2.28.9-1/nccl_build/include -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -O3 -fPIC -fmacro-prefix-map=/builddir/build/BUILD/nccl-2.28.9-1/src/= -MD -MT src/CMakeFiles/nccl.dir/misc/mlx5dvsymbols.cc.o -MF CMakeFiles/nccl.dir/misc/mlx5dvsymbols.cc.o.d -o CMakeFiles/nccl.dir/misc/mlx5dvsymbols.cc.o -c /builddir/build/BUILD/nccl-2.28.9-1/src/misc/mlx5dvsymbols.cc [ 59%] Building CXX object src/CMakeFiles/nccl_static.dir/transport/coll_net.cc.o cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src && /usr/bin/g++ -DCUDA_MAJOR=12 -DCUDA_MINOR=0 -DDOCA_VERBS_USE_CUDA_WRAPPER -DDOCA_VERBS_USE_NET_WRAPPER -DNCCL_GIN_PROXY_ENABLE=1 -DNCCL_MAJOR=2 -DNCCL_MINOR=28 -DNCCL_PATCH=9 -DNCCL_USE_CMAKE -DNCCL_VERSION_CODE=22809 -DPROFAPI -I/builddir/build/BUILD/nccl-2.28.9-1/src/device -I/builddir/build/BUILD/nccl-2.28.9-1/src/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/include/plugin -I/usr/local/cuda/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/transport/gdaki/doca-gpunetio/include -I/usr/local/cuda/include/cccl -I/builddir/build/BUILD/nccl-2.28.9-1/nccl_build/include -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -O3 -fPIC -fmacro-prefix-map=/builddir/build/BUILD/nccl-2.28.9-1/src/= -MD -MT src/CMakeFiles/nccl_static.dir/transport/coll_net.cc.o -MF CMakeFiles/nccl_static.dir/transport/coll_net.cc.o.d -o CMakeFiles/nccl_static.dir/transport/coll_net.cc.o -c /builddir/build/BUILD/nccl-2.28.9-1/src/transport/coll_net.cc [ 59%] Building CXX object src/CMakeFiles/nccl.dir/misc/mlx5dvwrap.cc.o cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src && /usr/bin/g++ -DCUDA_MAJOR=12 -DCUDA_MINOR=0 -DDOCA_VERBS_USE_CUDA_WRAPPER -DDOCA_VERBS_USE_NET_WRAPPER -DNCCL_GIN_PROXY_ENABLE=1 -DNCCL_MAJOR=2 -DNCCL_MINOR=28 -DNCCL_PATCH=9 -DNCCL_USE_CMAKE -DNCCL_VERSION_CODE=22809 -DPROFAPI -Dnccl_EXPORTS -I/builddir/build/BUILD/nccl-2.28.9-1/src/device -I/builddir/build/BUILD/nccl-2.28.9-1/src/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/include/plugin -I/usr/local/cuda/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/transport/gdaki/doca-gpunetio/include -I/usr/local/cuda/include/cccl -I/builddir/build/BUILD/nccl-2.28.9-1/nccl_build/include -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -O3 -fPIC -fmacro-prefix-map=/builddir/build/BUILD/nccl-2.28.9-1/src/= -MD -MT src/CMakeFiles/nccl.dir/misc/mlx5dvwrap.cc.o -MF CMakeFiles/nccl.dir/misc/mlx5dvwrap.cc.o.d -o CMakeFiles/nccl.dir/misc/mlx5dvwrap.cc.o -c /builddir/build/BUILD/nccl-2.28.9-1/src/misc/mlx5dvwrap.cc [ 59%] Building CXX object src/CMakeFiles/nccl_static.dir/transport/shm.cc.o cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src && /usr/bin/g++ -DCUDA_MAJOR=12 -DCUDA_MINOR=0 -DDOCA_VERBS_USE_CUDA_WRAPPER -DDOCA_VERBS_USE_NET_WRAPPER -DNCCL_GIN_PROXY_ENABLE=1 -DNCCL_MAJOR=2 -DNCCL_MINOR=28 -DNCCL_PATCH=9 -DNCCL_USE_CMAKE -DNCCL_VERSION_CODE=22809 -DPROFAPI -I/builddir/build/BUILD/nccl-2.28.9-1/src/device -I/builddir/build/BUILD/nccl-2.28.9-1/src/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/include/plugin -I/usr/local/cuda/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/transport/gdaki/doca-gpunetio/include -I/usr/local/cuda/include/cccl -I/builddir/build/BUILD/nccl-2.28.9-1/nccl_build/include -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -O3 -fPIC -fmacro-prefix-map=/builddir/build/BUILD/nccl-2.28.9-1/src/= -MD -MT src/CMakeFiles/nccl_static.dir/transport/shm.cc.o -MF CMakeFiles/nccl_static.dir/transport/shm.cc.o.d -o CMakeFiles/nccl_static.dir/transport/shm.cc.o -c /builddir/build/BUILD/nccl-2.28.9-1/src/transport/shm.cc [ 60%] Building CXX object src/CMakeFiles/nccl.dir/misc/cudawrap.cc.o cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src && /usr/bin/g++ -DCUDA_MAJOR=12 -DCUDA_MINOR=0 -DDOCA_VERBS_USE_CUDA_WRAPPER -DDOCA_VERBS_USE_NET_WRAPPER -DNCCL_GIN_PROXY_ENABLE=1 -DNCCL_MAJOR=2 -DNCCL_MINOR=28 -DNCCL_PATCH=9 -DNCCL_USE_CMAKE -DNCCL_VERSION_CODE=22809 -DPROFAPI -Dnccl_EXPORTS -I/builddir/build/BUILD/nccl-2.28.9-1/src/device -I/builddir/build/BUILD/nccl-2.28.9-1/src/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/include/plugin -I/usr/local/cuda/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/transport/gdaki/doca-gpunetio/include -I/usr/local/cuda/include/cccl -I/builddir/build/BUILD/nccl-2.28.9-1/nccl_build/include -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -O3 -fPIC -fmacro-prefix-map=/builddir/build/BUILD/nccl-2.28.9-1/src/= -MD -MT src/CMakeFiles/nccl.dir/misc/cudawrap.cc.o -MF CMakeFiles/nccl.dir/misc/cudawrap.cc.o.d -o CMakeFiles/nccl.dir/misc/cudawrap.cc.o -c /builddir/build/BUILD/nccl-2.28.9-1/src/misc/cudawrap.cc [ 60%] Building CXX object src/CMakeFiles/nccl.dir/misc/param.cc.o cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src && /usr/bin/g++ -DCUDA_MAJOR=12 -DCUDA_MINOR=0 -DDOCA_VERBS_USE_CUDA_WRAPPER -DDOCA_VERBS_USE_NET_WRAPPER -DNCCL_GIN_PROXY_ENABLE=1 -DNCCL_MAJOR=2 -DNCCL_MINOR=28 -DNCCL_PATCH=9 -DNCCL_USE_CMAKE -DNCCL_VERSION_CODE=22809 -DPROFAPI -Dnccl_EXPORTS -I/builddir/build/BUILD/nccl-2.28.9-1/src/device -I/builddir/build/BUILD/nccl-2.28.9-1/src/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/include/plugin -I/usr/local/cuda/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/transport/gdaki/doca-gpunetio/include -I/usr/local/cuda/include/cccl -I/builddir/build/BUILD/nccl-2.28.9-1/nccl_build/include -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -O3 -fPIC -fmacro-prefix-map=/builddir/build/BUILD/nccl-2.28.9-1/src/= -MD -MT src/CMakeFiles/nccl.dir/misc/param.cc.o -MF CMakeFiles/nccl.dir/misc/param.cc.o.d -o CMakeFiles/nccl.dir/misc/param.cc.o -c /builddir/build/BUILD/nccl-2.28.9-1/src/misc/param.cc [ 60%] Building CXX object src/CMakeFiles/nccl_static.dir/transport/generic.cc.o cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src && /usr/bin/g++ -DCUDA_MAJOR=12 -DCUDA_MINOR=0 -DDOCA_VERBS_USE_CUDA_WRAPPER -DDOCA_VERBS_USE_NET_WRAPPER -DNCCL_GIN_PROXY_ENABLE=1 -DNCCL_MAJOR=2 -DNCCL_MINOR=28 -DNCCL_PATCH=9 -DNCCL_USE_CMAKE -DNCCL_VERSION_CODE=22809 -DPROFAPI -I/builddir/build/BUILD/nccl-2.28.9-1/src/device -I/builddir/build/BUILD/nccl-2.28.9-1/src/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/include/plugin -I/usr/local/cuda/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/transport/gdaki/doca-gpunetio/include -I/usr/local/cuda/include/cccl -I/builddir/build/BUILD/nccl-2.28.9-1/nccl_build/include -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -O3 -fPIC -fmacro-prefix-map=/builddir/build/BUILD/nccl-2.28.9-1/src/= -MD -MT src/CMakeFiles/nccl_static.dir/transport/generic.cc.o -MF CMakeFiles/nccl_static.dir/transport/generic.cc.o.d -o CMakeFiles/nccl_static.dir/transport/generic.cc.o -c /builddir/build/BUILD/nccl-2.28.9-1/src/transport/generic.cc [ 60%] Building CXX object src/CMakeFiles/nccl.dir/misc/ipcsocket.cc.o cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src && /usr/bin/g++ -DCUDA_MAJOR=12 -DCUDA_MINOR=0 -DDOCA_VERBS_USE_CUDA_WRAPPER -DDOCA_VERBS_USE_NET_WRAPPER -DNCCL_GIN_PROXY_ENABLE=1 -DNCCL_MAJOR=2 -DNCCL_MINOR=28 -DNCCL_PATCH=9 -DNCCL_USE_CMAKE -DNCCL_VERSION_CODE=22809 -DPROFAPI -Dnccl_EXPORTS -I/builddir/build/BUILD/nccl-2.28.9-1/src/device -I/builddir/build/BUILD/nccl-2.28.9-1/src/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/include/plugin -I/usr/local/cuda/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/transport/gdaki/doca-gpunetio/include -I/usr/local/cuda/include/cccl -I/builddir/build/BUILD/nccl-2.28.9-1/nccl_build/include -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -O3 -fPIC -fmacro-prefix-map=/builddir/build/BUILD/nccl-2.28.9-1/src/= -MD -MT src/CMakeFiles/nccl.dir/misc/ipcsocket.cc.o -MF CMakeFiles/nccl.dir/misc/ipcsocket.cc.o.d -o CMakeFiles/nccl.dir/misc/ipcsocket.cc.o -c /builddir/build/BUILD/nccl-2.28.9-1/src/misc/ipcsocket.cc [ 61%] Building CXX object src/CMakeFiles/nccl.dir/misc/utils.cc.o cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src && /usr/bin/g++ -DCUDA_MAJOR=12 -DCUDA_MINOR=0 -DDOCA_VERBS_USE_CUDA_WRAPPER -DDOCA_VERBS_USE_NET_WRAPPER -DNCCL_GIN_PROXY_ENABLE=1 -DNCCL_MAJOR=2 -DNCCL_MINOR=28 -DNCCL_PATCH=9 -DNCCL_USE_CMAKE -DNCCL_VERSION_CODE=22809 -DPROFAPI -Dnccl_EXPORTS -I/builddir/build/BUILD/nccl-2.28.9-1/src/device -I/builddir/build/BUILD/nccl-2.28.9-1/src/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/include/plugin -I/usr/local/cuda/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/transport/gdaki/doca-gpunetio/include -I/usr/local/cuda/include/cccl -I/builddir/build/BUILD/nccl-2.28.9-1/nccl_build/include -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -O3 -fPIC -fmacro-prefix-map=/builddir/build/BUILD/nccl-2.28.9-1/src/= -MD -MT src/CMakeFiles/nccl.dir/misc/utils.cc.o -MF CMakeFiles/nccl.dir/misc/utils.cc.o.d -o CMakeFiles/nccl.dir/misc/utils.cc.o -c /builddir/build/BUILD/nccl-2.28.9-1/src/misc/utils.cc [ 61%] Building CXX object src/CMakeFiles/nccl_static.dir/transport/gdaki/gin_host_gdaki.cc.o cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src && /usr/bin/g++ -DCUDA_MAJOR=12 -DCUDA_MINOR=0 -DDOCA_VERBS_USE_CUDA_WRAPPER -DDOCA_VERBS_USE_NET_WRAPPER -DNCCL_GIN_PROXY_ENABLE=1 -DNCCL_MAJOR=2 -DNCCL_MINOR=28 -DNCCL_PATCH=9 -DNCCL_USE_CMAKE -DNCCL_VERSION_CODE=22809 -DPROFAPI -I/builddir/build/BUILD/nccl-2.28.9-1/src/device -I/builddir/build/BUILD/nccl-2.28.9-1/src/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/include/plugin -I/usr/local/cuda/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/transport/gdaki/doca-gpunetio/include -I/usr/local/cuda/include/cccl -I/builddir/build/BUILD/nccl-2.28.9-1/nccl_build/include -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -O3 -fPIC -fmacro-prefix-map=/builddir/build/BUILD/nccl-2.28.9-1/src/= -MD -MT src/CMakeFiles/nccl_static.dir/transport/gdaki/gin_host_gdaki.cc.o -MF CMakeFiles/nccl_static.dir/transport/gdaki/gin_host_gdaki.cc.o.d -o CMakeFiles/nccl_static.dir/transport/gdaki/gin_host_gdaki.cc.o -c /builddir/build/BUILD/nccl-2.28.9-1/src/transport/gdaki/gin_host_gdaki.cc [ 61%] Building CXX object src/CMakeFiles/nccl.dir/misc/shmutils.cc.o cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src && /usr/bin/g++ -DCUDA_MAJOR=12 -DCUDA_MINOR=0 -DDOCA_VERBS_USE_CUDA_WRAPPER -DDOCA_VERBS_USE_NET_WRAPPER -DNCCL_GIN_PROXY_ENABLE=1 -DNCCL_MAJOR=2 -DNCCL_MINOR=28 -DNCCL_PATCH=9 -DNCCL_USE_CMAKE -DNCCL_VERSION_CODE=22809 -DPROFAPI -Dnccl_EXPORTS -I/builddir/build/BUILD/nccl-2.28.9-1/src/device -I/builddir/build/BUILD/nccl-2.28.9-1/src/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/include/plugin -I/usr/local/cuda/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/transport/gdaki/doca-gpunetio/include -I/usr/local/cuda/include/cccl -I/builddir/build/BUILD/nccl-2.28.9-1/nccl_build/include -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -O3 -fPIC -fmacro-prefix-map=/builddir/build/BUILD/nccl-2.28.9-1/src/= -MD -MT src/CMakeFiles/nccl.dir/misc/shmutils.cc.o -MF CMakeFiles/nccl.dir/misc/shmutils.cc.o.d -o CMakeFiles/nccl.dir/misc/shmutils.cc.o -c /builddir/build/BUILD/nccl-2.28.9-1/src/misc/shmutils.cc [ 62%] Building CXX object src/CMakeFiles/nccl_static.dir/misc/strongstream.cc.o cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src && /usr/bin/g++ -DCUDA_MAJOR=12 -DCUDA_MINOR=0 -DDOCA_VERBS_USE_CUDA_WRAPPER -DDOCA_VERBS_USE_NET_WRAPPER -DNCCL_GIN_PROXY_ENABLE=1 -DNCCL_MAJOR=2 -DNCCL_MINOR=28 -DNCCL_PATCH=9 -DNCCL_USE_CMAKE -DNCCL_VERSION_CODE=22809 -DPROFAPI -I/builddir/build/BUILD/nccl-2.28.9-1/src/device -I/builddir/build/BUILD/nccl-2.28.9-1/src/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/include/plugin -I/usr/local/cuda/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/transport/gdaki/doca-gpunetio/include -I/usr/local/cuda/include/cccl -I/builddir/build/BUILD/nccl-2.28.9-1/nccl_build/include -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -O3 -fPIC -fmacro-prefix-map=/builddir/build/BUILD/nccl-2.28.9-1/src/= -MD -MT src/CMakeFiles/nccl_static.dir/misc/strongstream.cc.o -MF CMakeFiles/nccl_static.dir/misc/strongstream.cc.o.d -o CMakeFiles/nccl_static.dir/misc/strongstream.cc.o -c /builddir/build/BUILD/nccl-2.28.9-1/src/misc/strongstream.cc [ 62%] Building CXX object src/CMakeFiles/nccl.dir/misc/nvmlwrap.cc.o cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src && /usr/bin/g++ -DCUDA_MAJOR=12 -DCUDA_MINOR=0 -DDOCA_VERBS_USE_CUDA_WRAPPER -DDOCA_VERBS_USE_NET_WRAPPER -DNCCL_GIN_PROXY_ENABLE=1 -DNCCL_MAJOR=2 -DNCCL_MINOR=28 -DNCCL_PATCH=9 -DNCCL_USE_CMAKE -DNCCL_VERSION_CODE=22809 -DPROFAPI -Dnccl_EXPORTS -I/builddir/build/BUILD/nccl-2.28.9-1/src/device -I/builddir/build/BUILD/nccl-2.28.9-1/src/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/include/plugin -I/usr/local/cuda/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/transport/gdaki/doca-gpunetio/include -I/usr/local/cuda/include/cccl -I/builddir/build/BUILD/nccl-2.28.9-1/nccl_build/include -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -O3 -fPIC -fmacro-prefix-map=/builddir/build/BUILD/nccl-2.28.9-1/src/= -MD -MT src/CMakeFiles/nccl.dir/misc/nvmlwrap.cc.o -MF CMakeFiles/nccl.dir/misc/nvmlwrap.cc.o.d -o CMakeFiles/nccl.dir/misc/nvmlwrap.cc.o -c /builddir/build/BUILD/nccl-2.28.9-1/src/misc/nvmlwrap.cc [ 62%] Building CXX object src/CMakeFiles/nccl_static.dir/misc/socket.cc.o cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src && /usr/bin/g++ -DCUDA_MAJOR=12 -DCUDA_MINOR=0 -DDOCA_VERBS_USE_CUDA_WRAPPER -DDOCA_VERBS_USE_NET_WRAPPER -DNCCL_GIN_PROXY_ENABLE=1 -DNCCL_MAJOR=2 -DNCCL_MINOR=28 -DNCCL_PATCH=9 -DNCCL_USE_CMAKE -DNCCL_VERSION_CODE=22809 -DPROFAPI -I/builddir/build/BUILD/nccl-2.28.9-1/src/device -I/builddir/build/BUILD/nccl-2.28.9-1/src/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/include/plugin -I/usr/local/cuda/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/transport/gdaki/doca-gpunetio/include -I/usr/local/cuda/include/cccl -I/builddir/build/BUILD/nccl-2.28.9-1/nccl_build/include -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -O3 -fPIC -fmacro-prefix-map=/builddir/build/BUILD/nccl-2.28.9-1/src/= -MD -MT src/CMakeFiles/nccl_static.dir/misc/socket.cc.o -MF CMakeFiles/nccl_static.dir/misc/socket.cc.o.d -o CMakeFiles/nccl_static.dir/misc/socket.cc.o -c /builddir/build/BUILD/nccl-2.28.9-1/src/misc/socket.cc [ 62%] Building CXX object src/CMakeFiles/nccl_static.dir/misc/ibvwrap.cc.o cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src && /usr/bin/g++ -DCUDA_MAJOR=12 -DCUDA_MINOR=0 -DDOCA_VERBS_USE_CUDA_WRAPPER -DDOCA_VERBS_USE_NET_WRAPPER -DNCCL_GIN_PROXY_ENABLE=1 -DNCCL_MAJOR=2 -DNCCL_MINOR=28 -DNCCL_PATCH=9 -DNCCL_USE_CMAKE -DNCCL_VERSION_CODE=22809 -DPROFAPI -I/builddir/build/BUILD/nccl-2.28.9-1/src/device -I/builddir/build/BUILD/nccl-2.28.9-1/src/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/include/plugin -I/usr/local/cuda/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/transport/gdaki/doca-gpunetio/include -I/usr/local/cuda/include/cccl -I/builddir/build/BUILD/nccl-2.28.9-1/nccl_build/include -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -O3 -fPIC -fmacro-prefix-map=/builddir/build/BUILD/nccl-2.28.9-1/src/= -MD -MT src/CMakeFiles/nccl_static.dir/misc/ibvwrap.cc.o -MF CMakeFiles/nccl_static.dir/misc/ibvwrap.cc.o.d -o CMakeFiles/nccl_static.dir/misc/ibvwrap.cc.o -c /builddir/build/BUILD/nccl-2.28.9-1/src/misc/ibvwrap.cc [ 62%] Building CXX object src/CMakeFiles/nccl.dir/misc/argcheck.cc.o cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src && /usr/bin/g++ -DCUDA_MAJOR=12 -DCUDA_MINOR=0 -DDOCA_VERBS_USE_CUDA_WRAPPER -DDOCA_VERBS_USE_NET_WRAPPER -DNCCL_GIN_PROXY_ENABLE=1 -DNCCL_MAJOR=2 -DNCCL_MINOR=28 -DNCCL_PATCH=9 -DNCCL_USE_CMAKE -DNCCL_VERSION_CODE=22809 -DPROFAPI -Dnccl_EXPORTS -I/builddir/build/BUILD/nccl-2.28.9-1/src/device -I/builddir/build/BUILD/nccl-2.28.9-1/src/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/include/plugin -I/usr/local/cuda/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/transport/gdaki/doca-gpunetio/include -I/usr/local/cuda/include/cccl -I/builddir/build/BUILD/nccl-2.28.9-1/nccl_build/include -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -O3 -fPIC -fmacro-prefix-map=/builddir/build/BUILD/nccl-2.28.9-1/src/= -MD -MT src/CMakeFiles/nccl.dir/misc/argcheck.cc.o -MF CMakeFiles/nccl.dir/misc/argcheck.cc.o.d -o CMakeFiles/nccl.dir/misc/argcheck.cc.o -c /builddir/build/BUILD/nccl-2.28.9-1/src/misc/argcheck.cc [ 63%] Building CXX object src/CMakeFiles/nccl.dir/misc/gdrwrap.cc.o cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src && /usr/bin/g++ -DCUDA_MAJOR=12 -DCUDA_MINOR=0 -DDOCA_VERBS_USE_CUDA_WRAPPER -DDOCA_VERBS_USE_NET_WRAPPER -DNCCL_GIN_PROXY_ENABLE=1 -DNCCL_MAJOR=2 -DNCCL_MINOR=28 -DNCCL_PATCH=9 -DNCCL_USE_CMAKE -DNCCL_VERSION_CODE=22809 -DPROFAPI -Dnccl_EXPORTS -I/builddir/build/BUILD/nccl-2.28.9-1/src/device -I/builddir/build/BUILD/nccl-2.28.9-1/src/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/include/plugin -I/usr/local/cuda/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/transport/gdaki/doca-gpunetio/include -I/usr/local/cuda/include/cccl -I/builddir/build/BUILD/nccl-2.28.9-1/nccl_build/include -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -O3 -fPIC -fmacro-prefix-map=/builddir/build/BUILD/nccl-2.28.9-1/src/= -MD -MT src/CMakeFiles/nccl.dir/misc/gdrwrap.cc.o -MF CMakeFiles/nccl.dir/misc/gdrwrap.cc.o.d -o CMakeFiles/nccl.dir/misc/gdrwrap.cc.o -c /builddir/build/BUILD/nccl-2.28.9-1/src/misc/gdrwrap.cc [ 64%] Building CXX object src/CMakeFiles/nccl_static.dir/misc/mlx5dvsymbols.cc.o cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src && /usr/bin/g++ -DCUDA_MAJOR=12 -DCUDA_MINOR=0 -DDOCA_VERBS_USE_CUDA_WRAPPER -DDOCA_VERBS_USE_NET_WRAPPER -DNCCL_GIN_PROXY_ENABLE=1 -DNCCL_MAJOR=2 -DNCCL_MINOR=28 -DNCCL_PATCH=9 -DNCCL_USE_CMAKE -DNCCL_VERSION_CODE=22809 -DPROFAPI -I/builddir/build/BUILD/nccl-2.28.9-1/src/device -I/builddir/build/BUILD/nccl-2.28.9-1/src/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/include/plugin -I/usr/local/cuda/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/transport/gdaki/doca-gpunetio/include -I/usr/local/cuda/include/cccl -I/builddir/build/BUILD/nccl-2.28.9-1/nccl_build/include -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -O3 -fPIC -fmacro-prefix-map=/builddir/build/BUILD/nccl-2.28.9-1/src/= -MD -MT src/CMakeFiles/nccl_static.dir/misc/mlx5dvsymbols.cc.o -MF CMakeFiles/nccl_static.dir/misc/mlx5dvsymbols.cc.o.d -o CMakeFiles/nccl_static.dir/misc/mlx5dvsymbols.cc.o -c /builddir/build/BUILD/nccl-2.28.9-1/src/misc/mlx5dvsymbols.cc [ 64%] Building CXX object src/CMakeFiles/nccl.dir/misc/ibvsymbols.cc.o cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src && /usr/bin/g++ -DCUDA_MAJOR=12 -DCUDA_MINOR=0 -DDOCA_VERBS_USE_CUDA_WRAPPER -DDOCA_VERBS_USE_NET_WRAPPER -DNCCL_GIN_PROXY_ENABLE=1 -DNCCL_MAJOR=2 -DNCCL_MINOR=28 -DNCCL_PATCH=9 -DNCCL_USE_CMAKE -DNCCL_VERSION_CODE=22809 -DPROFAPI -Dnccl_EXPORTS -I/builddir/build/BUILD/nccl-2.28.9-1/src/device -I/builddir/build/BUILD/nccl-2.28.9-1/src/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/include/plugin -I/usr/local/cuda/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/transport/gdaki/doca-gpunetio/include -I/usr/local/cuda/include/cccl -I/builddir/build/BUILD/nccl-2.28.9-1/nccl_build/include -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -O3 -fPIC -fmacro-prefix-map=/builddir/build/BUILD/nccl-2.28.9-1/src/= -MD -MT src/CMakeFiles/nccl.dir/misc/ibvsymbols.cc.o -MF CMakeFiles/nccl.dir/misc/ibvsymbols.cc.o.d -o CMakeFiles/nccl.dir/misc/ibvsymbols.cc.o -c /builddir/build/BUILD/nccl-2.28.9-1/src/misc/ibvsymbols.cc [ 64%] Building CXX object src/CMakeFiles/nccl_static.dir/misc/mlx5dvwrap.cc.o cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src && /usr/bin/g++ -DCUDA_MAJOR=12 -DCUDA_MINOR=0 -DDOCA_VERBS_USE_CUDA_WRAPPER -DDOCA_VERBS_USE_NET_WRAPPER -DNCCL_GIN_PROXY_ENABLE=1 -DNCCL_MAJOR=2 -DNCCL_MINOR=28 -DNCCL_PATCH=9 -DNCCL_USE_CMAKE -DNCCL_VERSION_CODE=22809 -DPROFAPI -I/builddir/build/BUILD/nccl-2.28.9-1/src/device -I/builddir/build/BUILD/nccl-2.28.9-1/src/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/include/plugin -I/usr/local/cuda/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/transport/gdaki/doca-gpunetio/include -I/usr/local/cuda/include/cccl -I/builddir/build/BUILD/nccl-2.28.9-1/nccl_build/include -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -O3 -fPIC -fmacro-prefix-map=/builddir/build/BUILD/nccl-2.28.9-1/src/= -MD -MT src/CMakeFiles/nccl_static.dir/misc/mlx5dvwrap.cc.o -MF CMakeFiles/nccl_static.dir/misc/mlx5dvwrap.cc.o.d -o CMakeFiles/nccl_static.dir/misc/mlx5dvwrap.cc.o -c /builddir/build/BUILD/nccl-2.28.9-1/src/misc/mlx5dvwrap.cc [ 64%] Building CXX object src/CMakeFiles/nccl.dir/register/register.cc.o cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src && /usr/bin/g++ -DCUDA_MAJOR=12 -DCUDA_MINOR=0 -DDOCA_VERBS_USE_CUDA_WRAPPER -DDOCA_VERBS_USE_NET_WRAPPER -DNCCL_GIN_PROXY_ENABLE=1 -DNCCL_MAJOR=2 -DNCCL_MINOR=28 -DNCCL_PATCH=9 -DNCCL_USE_CMAKE -DNCCL_VERSION_CODE=22809 -DPROFAPI -Dnccl_EXPORTS -I/builddir/build/BUILD/nccl-2.28.9-1/src/device -I/builddir/build/BUILD/nccl-2.28.9-1/src/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/include/plugin -I/usr/local/cuda/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/transport/gdaki/doca-gpunetio/include -I/usr/local/cuda/include/cccl -I/builddir/build/BUILD/nccl-2.28.9-1/nccl_build/include -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -O3 -fPIC -fmacro-prefix-map=/builddir/build/BUILD/nccl-2.28.9-1/src/= -MD -MT src/CMakeFiles/nccl.dir/register/register.cc.o -MF CMakeFiles/nccl.dir/register/register.cc.o.d -o CMakeFiles/nccl.dir/register/register.cc.o -c /builddir/build/BUILD/nccl-2.28.9-1/src/register/register.cc [ 65%] Building CXX object src/CMakeFiles/nccl.dir/register/coll_reg.cc.o cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src && /usr/bin/g++ -DCUDA_MAJOR=12 -DCUDA_MINOR=0 -DDOCA_VERBS_USE_CUDA_WRAPPER -DDOCA_VERBS_USE_NET_WRAPPER -DNCCL_GIN_PROXY_ENABLE=1 -DNCCL_MAJOR=2 -DNCCL_MINOR=28 -DNCCL_PATCH=9 -DNCCL_USE_CMAKE -DNCCL_VERSION_CODE=22809 -DPROFAPI -Dnccl_EXPORTS -I/builddir/build/BUILD/nccl-2.28.9-1/src/device -I/builddir/build/BUILD/nccl-2.28.9-1/src/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/include/plugin -I/usr/local/cuda/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/transport/gdaki/doca-gpunetio/include -I/usr/local/cuda/include/cccl -I/builddir/build/BUILD/nccl-2.28.9-1/nccl_build/include -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -O3 -fPIC -fmacro-prefix-map=/builddir/build/BUILD/nccl-2.28.9-1/src/= -MD -MT src/CMakeFiles/nccl.dir/register/coll_reg.cc.o -MF CMakeFiles/nccl.dir/register/coll_reg.cc.o.d -o CMakeFiles/nccl.dir/register/coll_reg.cc.o -c /builddir/build/BUILD/nccl-2.28.9-1/src/register/coll_reg.cc [ 65%] Building CXX object src/CMakeFiles/nccl.dir/register/sendrecv_reg.cc.o cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src && /usr/bin/g++ -DCUDA_MAJOR=12 -DCUDA_MINOR=0 -DDOCA_VERBS_USE_CUDA_WRAPPER -DDOCA_VERBS_USE_NET_WRAPPER -DNCCL_GIN_PROXY_ENABLE=1 -DNCCL_MAJOR=2 -DNCCL_MINOR=28 -DNCCL_PATCH=9 -DNCCL_USE_CMAKE -DNCCL_VERSION_CODE=22809 -DPROFAPI -Dnccl_EXPORTS -I/builddir/build/BUILD/nccl-2.28.9-1/src/device -I/builddir/build/BUILD/nccl-2.28.9-1/src/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/include/plugin -I/usr/local/cuda/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/transport/gdaki/doca-gpunetio/include -I/usr/local/cuda/include/cccl -I/builddir/build/BUILD/nccl-2.28.9-1/nccl_build/include -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -O3 -fPIC -fmacro-prefix-map=/builddir/build/BUILD/nccl-2.28.9-1/src/= -MD -MT src/CMakeFiles/nccl.dir/register/sendrecv_reg.cc.o -MF CMakeFiles/nccl.dir/register/sendrecv_reg.cc.o.d -o CMakeFiles/nccl.dir/register/sendrecv_reg.cc.o -c /builddir/build/BUILD/nccl-2.28.9-1/src/register/sendrecv_reg.cc [ 65%] Building CXX object src/CMakeFiles/nccl.dir/graph/topo.cc.o cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src && /usr/bin/g++ -DCUDA_MAJOR=12 -DCUDA_MINOR=0 -DDOCA_VERBS_USE_CUDA_WRAPPER -DDOCA_VERBS_USE_NET_WRAPPER -DNCCL_GIN_PROXY_ENABLE=1 -DNCCL_MAJOR=2 -DNCCL_MINOR=28 -DNCCL_PATCH=9 -DNCCL_USE_CMAKE -DNCCL_VERSION_CODE=22809 -DPROFAPI -Dnccl_EXPORTS -I/builddir/build/BUILD/nccl-2.28.9-1/src/device -I/builddir/build/BUILD/nccl-2.28.9-1/src/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/include/plugin -I/usr/local/cuda/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/transport/gdaki/doca-gpunetio/include -I/usr/local/cuda/include/cccl -I/builddir/build/BUILD/nccl-2.28.9-1/nccl_build/include -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -O3 -fPIC -fmacro-prefix-map=/builddir/build/BUILD/nccl-2.28.9-1/src/= -MD -MT src/CMakeFiles/nccl.dir/graph/topo.cc.o -MF CMakeFiles/nccl.dir/graph/topo.cc.o.d -o CMakeFiles/nccl.dir/graph/topo.cc.o -c /builddir/build/BUILD/nccl-2.28.9-1/src/graph/topo.cc [ 65%] Building CXX object src/CMakeFiles/nccl_static.dir/misc/cudawrap.cc.o cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src && /usr/bin/g++ -DCUDA_MAJOR=12 -DCUDA_MINOR=0 -DDOCA_VERBS_USE_CUDA_WRAPPER -DDOCA_VERBS_USE_NET_WRAPPER -DNCCL_GIN_PROXY_ENABLE=1 -DNCCL_MAJOR=2 -DNCCL_MINOR=28 -DNCCL_PATCH=9 -DNCCL_USE_CMAKE -DNCCL_VERSION_CODE=22809 -DPROFAPI -I/builddir/build/BUILD/nccl-2.28.9-1/src/device -I/builddir/build/BUILD/nccl-2.28.9-1/src/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/include/plugin -I/usr/local/cuda/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/transport/gdaki/doca-gpunetio/include -I/usr/local/cuda/include/cccl -I/builddir/build/BUILD/nccl-2.28.9-1/nccl_build/include -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -O3 -fPIC -fmacro-prefix-map=/builddir/build/BUILD/nccl-2.28.9-1/src/= -MD -MT src/CMakeFiles/nccl_static.dir/misc/cudawrap.cc.o -MF CMakeFiles/nccl_static.dir/misc/cudawrap.cc.o.d -o CMakeFiles/nccl_static.dir/misc/cudawrap.cc.o -c /builddir/build/BUILD/nccl-2.28.9-1/src/misc/cudawrap.cc [ 65%] Building CXX object src/CMakeFiles/nccl.dir/graph/tuning.cc.o cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src && /usr/bin/g++ -DCUDA_MAJOR=12 -DCUDA_MINOR=0 -DDOCA_VERBS_USE_CUDA_WRAPPER -DDOCA_VERBS_USE_NET_WRAPPER -DNCCL_GIN_PROXY_ENABLE=1 -DNCCL_MAJOR=2 -DNCCL_MINOR=28 -DNCCL_PATCH=9 -DNCCL_USE_CMAKE -DNCCL_VERSION_CODE=22809 -DPROFAPI -Dnccl_EXPORTS -I/builddir/build/BUILD/nccl-2.28.9-1/src/device -I/builddir/build/BUILD/nccl-2.28.9-1/src/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/include/plugin -I/usr/local/cuda/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/transport/gdaki/doca-gpunetio/include -I/usr/local/cuda/include/cccl -I/builddir/build/BUILD/nccl-2.28.9-1/nccl_build/include -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -O3 -fPIC -fmacro-prefix-map=/builddir/build/BUILD/nccl-2.28.9-1/src/= -MD -MT src/CMakeFiles/nccl.dir/graph/tuning.cc.o -MF CMakeFiles/nccl.dir/graph/tuning.cc.o.d -o CMakeFiles/nccl.dir/graph/tuning.cc.o -c /builddir/build/BUILD/nccl-2.28.9-1/src/graph/tuning.cc [ 66%] Building CXX object src/CMakeFiles/nccl.dir/graph/xml.cc.o cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src && /usr/bin/g++ -DCUDA_MAJOR=12 -DCUDA_MINOR=0 -DDOCA_VERBS_USE_CUDA_WRAPPER -DDOCA_VERBS_USE_NET_WRAPPER -DNCCL_GIN_PROXY_ENABLE=1 -DNCCL_MAJOR=2 -DNCCL_MINOR=28 -DNCCL_PATCH=9 -DNCCL_USE_CMAKE -DNCCL_VERSION_CODE=22809 -DPROFAPI -Dnccl_EXPORTS -I/builddir/build/BUILD/nccl-2.28.9-1/src/device -I/builddir/build/BUILD/nccl-2.28.9-1/src/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/include/plugin -I/usr/local/cuda/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/transport/gdaki/doca-gpunetio/include -I/usr/local/cuda/include/cccl -I/builddir/build/BUILD/nccl-2.28.9-1/nccl_build/include -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -O3 -fPIC -fmacro-prefix-map=/builddir/build/BUILD/nccl-2.28.9-1/src/= -MD -MT src/CMakeFiles/nccl.dir/graph/xml.cc.o -MF CMakeFiles/nccl.dir/graph/xml.cc.o.d -o CMakeFiles/nccl.dir/graph/xml.cc.o -c /builddir/build/BUILD/nccl-2.28.9-1/src/graph/xml.cc [ 66%] Building CXX object src/CMakeFiles/nccl.dir/graph/search.cc.o cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src && /usr/bin/g++ -DCUDA_MAJOR=12 -DCUDA_MINOR=0 -DDOCA_VERBS_USE_CUDA_WRAPPER -DDOCA_VERBS_USE_NET_WRAPPER -DNCCL_GIN_PROXY_ENABLE=1 -DNCCL_MAJOR=2 -DNCCL_MINOR=28 -DNCCL_PATCH=9 -DNCCL_USE_CMAKE -DNCCL_VERSION_CODE=22809 -DPROFAPI -Dnccl_EXPORTS -I/builddir/build/BUILD/nccl-2.28.9-1/src/device -I/builddir/build/BUILD/nccl-2.28.9-1/src/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/include/plugin -I/usr/local/cuda/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/transport/gdaki/doca-gpunetio/include -I/usr/local/cuda/include/cccl -I/builddir/build/BUILD/nccl-2.28.9-1/nccl_build/include -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -O3 -fPIC -fmacro-prefix-map=/builddir/build/BUILD/nccl-2.28.9-1/src/= -MD -MT src/CMakeFiles/nccl.dir/graph/search.cc.o -MF CMakeFiles/nccl.dir/graph/search.cc.o.d -o CMakeFiles/nccl.dir/graph/search.cc.o -c /builddir/build/BUILD/nccl-2.28.9-1/src/graph/search.cc [ 66%] Building CXX object src/CMakeFiles/nccl.dir/graph/paths.cc.o cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src && /usr/bin/g++ -DCUDA_MAJOR=12 -DCUDA_MINOR=0 -DDOCA_VERBS_USE_CUDA_WRAPPER -DDOCA_VERBS_USE_NET_WRAPPER -DNCCL_GIN_PROXY_ENABLE=1 -DNCCL_MAJOR=2 -DNCCL_MINOR=28 -DNCCL_PATCH=9 -DNCCL_USE_CMAKE -DNCCL_VERSION_CODE=22809 -DPROFAPI -Dnccl_EXPORTS -I/builddir/build/BUILD/nccl-2.28.9-1/src/device -I/builddir/build/BUILD/nccl-2.28.9-1/src/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/include/plugin -I/usr/local/cuda/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/transport/gdaki/doca-gpunetio/include -I/usr/local/cuda/include/cccl -I/builddir/build/BUILD/nccl-2.28.9-1/nccl_build/include -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -O3 -fPIC -fmacro-prefix-map=/builddir/build/BUILD/nccl-2.28.9-1/src/= -MD -MT src/CMakeFiles/nccl.dir/graph/paths.cc.o -MF CMakeFiles/nccl.dir/graph/paths.cc.o.d -o CMakeFiles/nccl.dir/graph/paths.cc.o -c /builddir/build/BUILD/nccl-2.28.9-1/src/graph/paths.cc [ 66%] Building CXX object src/CMakeFiles/nccl_static.dir/misc/param.cc.o cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src && /usr/bin/g++ -DCUDA_MAJOR=12 -DCUDA_MINOR=0 -DDOCA_VERBS_USE_CUDA_WRAPPER -DDOCA_VERBS_USE_NET_WRAPPER -DNCCL_GIN_PROXY_ENABLE=1 -DNCCL_MAJOR=2 -DNCCL_MINOR=28 -DNCCL_PATCH=9 -DNCCL_USE_CMAKE -DNCCL_VERSION_CODE=22809 -DPROFAPI -I/builddir/build/BUILD/nccl-2.28.9-1/src/device -I/builddir/build/BUILD/nccl-2.28.9-1/src/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/include/plugin -I/usr/local/cuda/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/transport/gdaki/doca-gpunetio/include -I/usr/local/cuda/include/cccl -I/builddir/build/BUILD/nccl-2.28.9-1/nccl_build/include -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -O3 -fPIC -fmacro-prefix-map=/builddir/build/BUILD/nccl-2.28.9-1/src/= -MD -MT src/CMakeFiles/nccl_static.dir/misc/param.cc.o -MF CMakeFiles/nccl_static.dir/misc/param.cc.o.d -o CMakeFiles/nccl_static.dir/misc/param.cc.o -c /builddir/build/BUILD/nccl-2.28.9-1/src/misc/param.cc [ 67%] Building CXX object src/CMakeFiles/nccl_static.dir/misc/ipcsocket.cc.o cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src && /usr/bin/g++ -DCUDA_MAJOR=12 -DCUDA_MINOR=0 -DDOCA_VERBS_USE_CUDA_WRAPPER -DDOCA_VERBS_USE_NET_WRAPPER -DNCCL_GIN_PROXY_ENABLE=1 -DNCCL_MAJOR=2 -DNCCL_MINOR=28 -DNCCL_PATCH=9 -DNCCL_USE_CMAKE -DNCCL_VERSION_CODE=22809 -DPROFAPI -I/builddir/build/BUILD/nccl-2.28.9-1/src/device -I/builddir/build/BUILD/nccl-2.28.9-1/src/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/include/plugin -I/usr/local/cuda/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/transport/gdaki/doca-gpunetio/include -I/usr/local/cuda/include/cccl -I/builddir/build/BUILD/nccl-2.28.9-1/nccl_build/include -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -O3 -fPIC -fmacro-prefix-map=/builddir/build/BUILD/nccl-2.28.9-1/src/= -MD -MT src/CMakeFiles/nccl_static.dir/misc/ipcsocket.cc.o -MF CMakeFiles/nccl_static.dir/misc/ipcsocket.cc.o.d -o CMakeFiles/nccl_static.dir/misc/ipcsocket.cc.o -c /builddir/build/BUILD/nccl-2.28.9-1/src/misc/ipcsocket.cc [ 68%] Building CXX object src/CMakeFiles/nccl.dir/graph/connect.cc.o cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src && /usr/bin/g++ -DCUDA_MAJOR=12 -DCUDA_MINOR=0 -DDOCA_VERBS_USE_CUDA_WRAPPER -DDOCA_VERBS_USE_NET_WRAPPER -DNCCL_GIN_PROXY_ENABLE=1 -DNCCL_MAJOR=2 -DNCCL_MINOR=28 -DNCCL_PATCH=9 -DNCCL_USE_CMAKE -DNCCL_VERSION_CODE=22809 -DPROFAPI -Dnccl_EXPORTS -I/builddir/build/BUILD/nccl-2.28.9-1/src/device -I/builddir/build/BUILD/nccl-2.28.9-1/src/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/include/plugin -I/usr/local/cuda/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/transport/gdaki/doca-gpunetio/include -I/usr/local/cuda/include/cccl -I/builddir/build/BUILD/nccl-2.28.9-1/nccl_build/include -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -O3 -fPIC -fmacro-prefix-map=/builddir/build/BUILD/nccl-2.28.9-1/src/= -MD -MT src/CMakeFiles/nccl.dir/graph/connect.cc.o -MF CMakeFiles/nccl.dir/graph/connect.cc.o.d -o CMakeFiles/nccl.dir/graph/connect.cc.o -c /builddir/build/BUILD/nccl-2.28.9-1/src/graph/connect.cc [ 68%] Building CXX object src/CMakeFiles/nccl_static.dir/misc/utils.cc.o cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src && /usr/bin/g++ -DCUDA_MAJOR=12 -DCUDA_MINOR=0 -DDOCA_VERBS_USE_CUDA_WRAPPER -DDOCA_VERBS_USE_NET_WRAPPER -DNCCL_GIN_PROXY_ENABLE=1 -DNCCL_MAJOR=2 -DNCCL_MINOR=28 -DNCCL_PATCH=9 -DNCCL_USE_CMAKE -DNCCL_VERSION_CODE=22809 -DPROFAPI -I/builddir/build/BUILD/nccl-2.28.9-1/src/device -I/builddir/build/BUILD/nccl-2.28.9-1/src/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/include/plugin -I/usr/local/cuda/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/transport/gdaki/doca-gpunetio/include -I/usr/local/cuda/include/cccl -I/builddir/build/BUILD/nccl-2.28.9-1/nccl_build/include -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -O3 -fPIC -fmacro-prefix-map=/builddir/build/BUILD/nccl-2.28.9-1/src/= -MD -MT src/CMakeFiles/nccl_static.dir/misc/utils.cc.o -MF CMakeFiles/nccl_static.dir/misc/utils.cc.o.d -o CMakeFiles/nccl_static.dir/misc/utils.cc.o -c /builddir/build/BUILD/nccl-2.28.9-1/src/misc/utils.cc [ 68%] Building CXX object src/CMakeFiles/nccl_static.dir/misc/shmutils.cc.o cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src && /usr/bin/g++ -DCUDA_MAJOR=12 -DCUDA_MINOR=0 -DDOCA_VERBS_USE_CUDA_WRAPPER -DDOCA_VERBS_USE_NET_WRAPPER -DNCCL_GIN_PROXY_ENABLE=1 -DNCCL_MAJOR=2 -DNCCL_MINOR=28 -DNCCL_PATCH=9 -DNCCL_USE_CMAKE -DNCCL_VERSION_CODE=22809 -DPROFAPI -I/builddir/build/BUILD/nccl-2.28.9-1/src/device -I/builddir/build/BUILD/nccl-2.28.9-1/src/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/include/plugin -I/usr/local/cuda/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/transport/gdaki/doca-gpunetio/include -I/usr/local/cuda/include/cccl -I/builddir/build/BUILD/nccl-2.28.9-1/nccl_build/include -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -O3 -fPIC -fmacro-prefix-map=/builddir/build/BUILD/nccl-2.28.9-1/src/= -MD -MT src/CMakeFiles/nccl_static.dir/misc/shmutils.cc.o -MF CMakeFiles/nccl_static.dir/misc/shmutils.cc.o.d -o CMakeFiles/nccl_static.dir/misc/shmutils.cc.o -c /builddir/build/BUILD/nccl-2.28.9-1/src/misc/shmutils.cc [ 68%] Building CXX object src/CMakeFiles/nccl.dir/graph/rings.cc.o cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src && /usr/bin/g++ -DCUDA_MAJOR=12 -DCUDA_MINOR=0 -DDOCA_VERBS_USE_CUDA_WRAPPER -DDOCA_VERBS_USE_NET_WRAPPER -DNCCL_GIN_PROXY_ENABLE=1 -DNCCL_MAJOR=2 -DNCCL_MINOR=28 -DNCCL_PATCH=9 -DNCCL_USE_CMAKE -DNCCL_VERSION_CODE=22809 -DPROFAPI -Dnccl_EXPORTS -I/builddir/build/BUILD/nccl-2.28.9-1/src/device -I/builddir/build/BUILD/nccl-2.28.9-1/src/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/include/plugin -I/usr/local/cuda/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/transport/gdaki/doca-gpunetio/include -I/usr/local/cuda/include/cccl -I/builddir/build/BUILD/nccl-2.28.9-1/nccl_build/include -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -O3 -fPIC -fmacro-prefix-map=/builddir/build/BUILD/nccl-2.28.9-1/src/= -MD -MT src/CMakeFiles/nccl.dir/graph/rings.cc.o -MF CMakeFiles/nccl.dir/graph/rings.cc.o.d -o CMakeFiles/nccl.dir/graph/rings.cc.o -c /builddir/build/BUILD/nccl-2.28.9-1/src/graph/rings.cc [ 68%] Building CXX object src/CMakeFiles/nccl.dir/graph/trees.cc.o cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src && /usr/bin/g++ -DCUDA_MAJOR=12 -DCUDA_MINOR=0 -DDOCA_VERBS_USE_CUDA_WRAPPER -DDOCA_VERBS_USE_NET_WRAPPER -DNCCL_GIN_PROXY_ENABLE=1 -DNCCL_MAJOR=2 -DNCCL_MINOR=28 -DNCCL_PATCH=9 -DNCCL_USE_CMAKE -DNCCL_VERSION_CODE=22809 -DPROFAPI -Dnccl_EXPORTS -I/builddir/build/BUILD/nccl-2.28.9-1/src/device -I/builddir/build/BUILD/nccl-2.28.9-1/src/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/include/plugin -I/usr/local/cuda/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/transport/gdaki/doca-gpunetio/include -I/usr/local/cuda/include/cccl -I/builddir/build/BUILD/nccl-2.28.9-1/nccl_build/include -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -O3 -fPIC -fmacro-prefix-map=/builddir/build/BUILD/nccl-2.28.9-1/src/= -MD -MT src/CMakeFiles/nccl.dir/graph/trees.cc.o -MF CMakeFiles/nccl.dir/graph/trees.cc.o.d -o CMakeFiles/nccl.dir/graph/trees.cc.o -c /builddir/build/BUILD/nccl-2.28.9-1/src/graph/trees.cc [ 69%] Building CXX object src/CMakeFiles/nccl_static.dir/misc/nvmlwrap.cc.o cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src && /usr/bin/g++ -DCUDA_MAJOR=12 -DCUDA_MINOR=0 -DDOCA_VERBS_USE_CUDA_WRAPPER -DDOCA_VERBS_USE_NET_WRAPPER -DNCCL_GIN_PROXY_ENABLE=1 -DNCCL_MAJOR=2 -DNCCL_MINOR=28 -DNCCL_PATCH=9 -DNCCL_USE_CMAKE -DNCCL_VERSION_CODE=22809 -DPROFAPI -I/builddir/build/BUILD/nccl-2.28.9-1/src/device -I/builddir/build/BUILD/nccl-2.28.9-1/src/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/include/plugin -I/usr/local/cuda/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/transport/gdaki/doca-gpunetio/include -I/usr/local/cuda/include/cccl -I/builddir/build/BUILD/nccl-2.28.9-1/nccl_build/include -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -O3 -fPIC -fmacro-prefix-map=/builddir/build/BUILD/nccl-2.28.9-1/src/= -MD -MT src/CMakeFiles/nccl_static.dir/misc/nvmlwrap.cc.o -MF CMakeFiles/nccl_static.dir/misc/nvmlwrap.cc.o.d -o CMakeFiles/nccl_static.dir/misc/nvmlwrap.cc.o -c /builddir/build/BUILD/nccl-2.28.9-1/src/misc/nvmlwrap.cc [ 69%] Building CXX object src/CMakeFiles/nccl_static.dir/misc/argcheck.cc.o cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src && /usr/bin/g++ -DCUDA_MAJOR=12 -DCUDA_MINOR=0 -DDOCA_VERBS_USE_CUDA_WRAPPER -DDOCA_VERBS_USE_NET_WRAPPER -DNCCL_GIN_PROXY_ENABLE=1 -DNCCL_MAJOR=2 -DNCCL_MINOR=28 -DNCCL_PATCH=9 -DNCCL_USE_CMAKE -DNCCL_VERSION_CODE=22809 -DPROFAPI -I/builddir/build/BUILD/nccl-2.28.9-1/src/device -I/builddir/build/BUILD/nccl-2.28.9-1/src/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/include/plugin -I/usr/local/cuda/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/transport/gdaki/doca-gpunetio/include -I/usr/local/cuda/include/cccl -I/builddir/build/BUILD/nccl-2.28.9-1/nccl_build/include -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -O3 -fPIC -fmacro-prefix-map=/builddir/build/BUILD/nccl-2.28.9-1/src/= -MD -MT src/CMakeFiles/nccl_static.dir/misc/argcheck.cc.o -MF CMakeFiles/nccl_static.dir/misc/argcheck.cc.o.d -o CMakeFiles/nccl_static.dir/misc/argcheck.cc.o -c /builddir/build/BUILD/nccl-2.28.9-1/src/misc/argcheck.cc [ 69%] Building CXX object src/CMakeFiles/nccl.dir/plugin/net.cc.o cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src && /usr/bin/g++ -DCUDA_MAJOR=12 -DCUDA_MINOR=0 -DDOCA_VERBS_USE_CUDA_WRAPPER -DDOCA_VERBS_USE_NET_WRAPPER -DNCCL_GIN_PROXY_ENABLE=1 -DNCCL_MAJOR=2 -DNCCL_MINOR=28 -DNCCL_PATCH=9 -DNCCL_USE_CMAKE -DNCCL_VERSION_CODE=22809 -DPROFAPI -Dnccl_EXPORTS -I/builddir/build/BUILD/nccl-2.28.9-1/src/device -I/builddir/build/BUILD/nccl-2.28.9-1/src/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/include/plugin -I/usr/local/cuda/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/transport/gdaki/doca-gpunetio/include -I/usr/local/cuda/include/cccl -I/builddir/build/BUILD/nccl-2.28.9-1/nccl_build/include -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -O3 -fPIC -fmacro-prefix-map=/builddir/build/BUILD/nccl-2.28.9-1/src/= -MD -MT src/CMakeFiles/nccl.dir/plugin/net.cc.o -MF CMakeFiles/nccl.dir/plugin/net.cc.o.d -o CMakeFiles/nccl.dir/plugin/net.cc.o -c /builddir/build/BUILD/nccl-2.28.9-1/src/plugin/net.cc [ 70%] Building CXX object src/CMakeFiles/nccl.dir/plugin/profiler.cc.o cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src && /usr/bin/g++ -DCUDA_MAJOR=12 -DCUDA_MINOR=0 -DDOCA_VERBS_USE_CUDA_WRAPPER -DDOCA_VERBS_USE_NET_WRAPPER -DNCCL_GIN_PROXY_ENABLE=1 -DNCCL_MAJOR=2 -DNCCL_MINOR=28 -DNCCL_PATCH=9 -DNCCL_USE_CMAKE -DNCCL_VERSION_CODE=22809 -DPROFAPI -Dnccl_EXPORTS -I/builddir/build/BUILD/nccl-2.28.9-1/src/device -I/builddir/build/BUILD/nccl-2.28.9-1/src/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/include/plugin -I/usr/local/cuda/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/transport/gdaki/doca-gpunetio/include -I/usr/local/cuda/include/cccl -I/builddir/build/BUILD/nccl-2.28.9-1/nccl_build/include -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -O3 -fPIC -fmacro-prefix-map=/builddir/build/BUILD/nccl-2.28.9-1/src/= -MD -MT src/CMakeFiles/nccl.dir/plugin/profiler.cc.o -MF CMakeFiles/nccl.dir/plugin/profiler.cc.o.d -o CMakeFiles/nccl.dir/plugin/profiler.cc.o -c /builddir/build/BUILD/nccl-2.28.9-1/src/plugin/profiler.cc [ 70%] Building CXX object src/CMakeFiles/nccl_static.dir/misc/gdrwrap.cc.o cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src && /usr/bin/g++ -DCUDA_MAJOR=12 -DCUDA_MINOR=0 -DDOCA_VERBS_USE_CUDA_WRAPPER -DDOCA_VERBS_USE_NET_WRAPPER -DNCCL_GIN_PROXY_ENABLE=1 -DNCCL_MAJOR=2 -DNCCL_MINOR=28 -DNCCL_PATCH=9 -DNCCL_USE_CMAKE -DNCCL_VERSION_CODE=22809 -DPROFAPI -I/builddir/build/BUILD/nccl-2.28.9-1/src/device -I/builddir/build/BUILD/nccl-2.28.9-1/src/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/include/plugin -I/usr/local/cuda/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/transport/gdaki/doca-gpunetio/include -I/usr/local/cuda/include/cccl -I/builddir/build/BUILD/nccl-2.28.9-1/nccl_build/include -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -O3 -fPIC -fmacro-prefix-map=/builddir/build/BUILD/nccl-2.28.9-1/src/= -MD -MT src/CMakeFiles/nccl_static.dir/misc/gdrwrap.cc.o -MF CMakeFiles/nccl_static.dir/misc/gdrwrap.cc.o.d -o CMakeFiles/nccl_static.dir/misc/gdrwrap.cc.o -c /builddir/build/BUILD/nccl-2.28.9-1/src/misc/gdrwrap.cc [ 70%] Building CXX object src/CMakeFiles/nccl_static.dir/misc/ibvsymbols.cc.o cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src && /usr/bin/g++ -DCUDA_MAJOR=12 -DCUDA_MINOR=0 -DDOCA_VERBS_USE_CUDA_WRAPPER -DDOCA_VERBS_USE_NET_WRAPPER -DNCCL_GIN_PROXY_ENABLE=1 -DNCCL_MAJOR=2 -DNCCL_MINOR=28 -DNCCL_PATCH=9 -DNCCL_USE_CMAKE -DNCCL_VERSION_CODE=22809 -DPROFAPI -I/builddir/build/BUILD/nccl-2.28.9-1/src/device -I/builddir/build/BUILD/nccl-2.28.9-1/src/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/include/plugin -I/usr/local/cuda/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/transport/gdaki/doca-gpunetio/include -I/usr/local/cuda/include/cccl -I/builddir/build/BUILD/nccl-2.28.9-1/nccl_build/include -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -O3 -fPIC -fmacro-prefix-map=/builddir/build/BUILD/nccl-2.28.9-1/src/= -MD -MT src/CMakeFiles/nccl_static.dir/misc/ibvsymbols.cc.o -MF CMakeFiles/nccl_static.dir/misc/ibvsymbols.cc.o.d -o CMakeFiles/nccl_static.dir/misc/ibvsymbols.cc.o -c /builddir/build/BUILD/nccl-2.28.9-1/src/misc/ibvsymbols.cc [ 70%] Building CXX object src/CMakeFiles/nccl.dir/plugin/plugin_open.cc.o cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src && /usr/bin/g++ -DCUDA_MAJOR=12 -DCUDA_MINOR=0 -DDOCA_VERBS_USE_CUDA_WRAPPER -DDOCA_VERBS_USE_NET_WRAPPER -DNCCL_GIN_PROXY_ENABLE=1 -DNCCL_MAJOR=2 -DNCCL_MINOR=28 -DNCCL_PATCH=9 -DNCCL_USE_CMAKE -DNCCL_VERSION_CODE=22809 -DPROFAPI -Dnccl_EXPORTS -I/builddir/build/BUILD/nccl-2.28.9-1/src/device -I/builddir/build/BUILD/nccl-2.28.9-1/src/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/include/plugin -I/usr/local/cuda/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/transport/gdaki/doca-gpunetio/include -I/usr/local/cuda/include/cccl -I/builddir/build/BUILD/nccl-2.28.9-1/nccl_build/include -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -O3 -fPIC -fmacro-prefix-map=/builddir/build/BUILD/nccl-2.28.9-1/src/= -MD -MT src/CMakeFiles/nccl.dir/plugin/plugin_open.cc.o -MF CMakeFiles/nccl.dir/plugin/plugin_open.cc.o.d -o CMakeFiles/nccl.dir/plugin/plugin_open.cc.o -c /builddir/build/BUILD/nccl-2.28.9-1/src/plugin/plugin_open.cc [ 70%] Building CXX object src/CMakeFiles/nccl.dir/plugin/tuner.cc.o cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src && /usr/bin/g++ -DCUDA_MAJOR=12 -DCUDA_MINOR=0 -DDOCA_VERBS_USE_CUDA_WRAPPER -DDOCA_VERBS_USE_NET_WRAPPER -DNCCL_GIN_PROXY_ENABLE=1 -DNCCL_MAJOR=2 -DNCCL_MINOR=28 -DNCCL_PATCH=9 -DNCCL_USE_CMAKE -DNCCL_VERSION_CODE=22809 -DPROFAPI -Dnccl_EXPORTS -I/builddir/build/BUILD/nccl-2.28.9-1/src/device -I/builddir/build/BUILD/nccl-2.28.9-1/src/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/include/plugin -I/usr/local/cuda/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/transport/gdaki/doca-gpunetio/include -I/usr/local/cuda/include/cccl -I/builddir/build/BUILD/nccl-2.28.9-1/nccl_build/include -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -O3 -fPIC -fmacro-prefix-map=/builddir/build/BUILD/nccl-2.28.9-1/src/= -MD -MT src/CMakeFiles/nccl.dir/plugin/tuner.cc.o -MF CMakeFiles/nccl.dir/plugin/tuner.cc.o.d -o CMakeFiles/nccl.dir/plugin/tuner.cc.o -c /builddir/build/BUILD/nccl-2.28.9-1/src/plugin/tuner.cc [ 71%] Building CXX object src/CMakeFiles/nccl_static.dir/register/register.cc.o cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src && /usr/bin/g++ -DCUDA_MAJOR=12 -DCUDA_MINOR=0 -DDOCA_VERBS_USE_CUDA_WRAPPER -DDOCA_VERBS_USE_NET_WRAPPER -DNCCL_GIN_PROXY_ENABLE=1 -DNCCL_MAJOR=2 -DNCCL_MINOR=28 -DNCCL_PATCH=9 -DNCCL_USE_CMAKE -DNCCL_VERSION_CODE=22809 -DPROFAPI -I/builddir/build/BUILD/nccl-2.28.9-1/src/device -I/builddir/build/BUILD/nccl-2.28.9-1/src/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/include/plugin -I/usr/local/cuda/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/transport/gdaki/doca-gpunetio/include -I/usr/local/cuda/include/cccl -I/builddir/build/BUILD/nccl-2.28.9-1/nccl_build/include -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -O3 -fPIC -fmacro-prefix-map=/builddir/build/BUILD/nccl-2.28.9-1/src/= -MD -MT src/CMakeFiles/nccl_static.dir/register/register.cc.o -MF CMakeFiles/nccl_static.dir/register/register.cc.o.d -o CMakeFiles/nccl_static.dir/register/register.cc.o -c /builddir/build/BUILD/nccl-2.28.9-1/src/register/register.cc [ 72%] Building CXX object src/CMakeFiles/nccl.dir/plugin/env.cc.o cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src && /usr/bin/g++ -DCUDA_MAJOR=12 -DCUDA_MINOR=0 -DDOCA_VERBS_USE_CUDA_WRAPPER -DDOCA_VERBS_USE_NET_WRAPPER -DNCCL_GIN_PROXY_ENABLE=1 -DNCCL_MAJOR=2 -DNCCL_MINOR=28 -DNCCL_PATCH=9 -DNCCL_USE_CMAKE -DNCCL_VERSION_CODE=22809 -DPROFAPI -Dnccl_EXPORTS -I/builddir/build/BUILD/nccl-2.28.9-1/src/device -I/builddir/build/BUILD/nccl-2.28.9-1/src/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/include/plugin -I/usr/local/cuda/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/transport/gdaki/doca-gpunetio/include -I/usr/local/cuda/include/cccl -I/builddir/build/BUILD/nccl-2.28.9-1/nccl_build/include -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -O3 -fPIC -fmacro-prefix-map=/builddir/build/BUILD/nccl-2.28.9-1/src/= -MD -MT src/CMakeFiles/nccl.dir/plugin/env.cc.o -MF CMakeFiles/nccl.dir/plugin/env.cc.o.d -o CMakeFiles/nccl.dir/plugin/env.cc.o -c /builddir/build/BUILD/nccl-2.28.9-1/src/plugin/env.cc [ 72%] Building CXX object src/CMakeFiles/nccl_static.dir/register/coll_reg.cc.o cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src && /usr/bin/g++ -DCUDA_MAJOR=12 -DCUDA_MINOR=0 -DDOCA_VERBS_USE_CUDA_WRAPPER -DDOCA_VERBS_USE_NET_WRAPPER -DNCCL_GIN_PROXY_ENABLE=1 -DNCCL_MAJOR=2 -DNCCL_MINOR=28 -DNCCL_PATCH=9 -DNCCL_USE_CMAKE -DNCCL_VERSION_CODE=22809 -DPROFAPI -I/builddir/build/BUILD/nccl-2.28.9-1/src/device -I/builddir/build/BUILD/nccl-2.28.9-1/src/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/include/plugin -I/usr/local/cuda/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/transport/gdaki/doca-gpunetio/include -I/usr/local/cuda/include/cccl -I/builddir/build/BUILD/nccl-2.28.9-1/nccl_build/include -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -O3 -fPIC -fmacro-prefix-map=/builddir/build/BUILD/nccl-2.28.9-1/src/= -MD -MT src/CMakeFiles/nccl_static.dir/register/coll_reg.cc.o -MF CMakeFiles/nccl_static.dir/register/coll_reg.cc.o.d -o CMakeFiles/nccl_static.dir/register/coll_reg.cc.o -c /builddir/build/BUILD/nccl-2.28.9-1/src/register/coll_reg.cc [ 72%] Building CXX object src/CMakeFiles/nccl.dir/plugin/net/net_v9.cc.o cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src && /usr/bin/g++ -DCUDA_MAJOR=12 -DCUDA_MINOR=0 -DDOCA_VERBS_USE_CUDA_WRAPPER -DDOCA_VERBS_USE_NET_WRAPPER -DNCCL_GIN_PROXY_ENABLE=1 -DNCCL_MAJOR=2 -DNCCL_MINOR=28 -DNCCL_PATCH=9 -DNCCL_USE_CMAKE -DNCCL_VERSION_CODE=22809 -DPROFAPI -Dnccl_EXPORTS -I/builddir/build/BUILD/nccl-2.28.9-1/src/device -I/builddir/build/BUILD/nccl-2.28.9-1/src/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/include/plugin -I/usr/local/cuda/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/transport/gdaki/doca-gpunetio/include -I/usr/local/cuda/include/cccl -I/builddir/build/BUILD/nccl-2.28.9-1/nccl_build/include -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -O3 -fPIC -fmacro-prefix-map=/builddir/build/BUILD/nccl-2.28.9-1/src/= -MD -MT src/CMakeFiles/nccl.dir/plugin/net/net_v9.cc.o -MF CMakeFiles/nccl.dir/plugin/net/net_v9.cc.o.d -o CMakeFiles/nccl.dir/plugin/net/net_v9.cc.o -c /builddir/build/BUILD/nccl-2.28.9-1/src/plugin/net/net_v9.cc [ 72%] Building CXX object src/CMakeFiles/nccl_static.dir/register/sendrecv_reg.cc.o cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src && /usr/bin/g++ -DCUDA_MAJOR=12 -DCUDA_MINOR=0 -DDOCA_VERBS_USE_CUDA_WRAPPER -DDOCA_VERBS_USE_NET_WRAPPER -DNCCL_GIN_PROXY_ENABLE=1 -DNCCL_MAJOR=2 -DNCCL_MINOR=28 -DNCCL_PATCH=9 -DNCCL_USE_CMAKE -DNCCL_VERSION_CODE=22809 -DPROFAPI -I/builddir/build/BUILD/nccl-2.28.9-1/src/device -I/builddir/build/BUILD/nccl-2.28.9-1/src/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/include/plugin -I/usr/local/cuda/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/transport/gdaki/doca-gpunetio/include -I/usr/local/cuda/include/cccl -I/builddir/build/BUILD/nccl-2.28.9-1/nccl_build/include -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -O3 -fPIC -fmacro-prefix-map=/builddir/build/BUILD/nccl-2.28.9-1/src/= -MD -MT src/CMakeFiles/nccl_static.dir/register/sendrecv_reg.cc.o -MF CMakeFiles/nccl_static.dir/register/sendrecv_reg.cc.o.d -o CMakeFiles/nccl_static.dir/register/sendrecv_reg.cc.o -c /builddir/build/BUILD/nccl-2.28.9-1/src/register/sendrecv_reg.cc [ 72%] Building CXX object src/CMakeFiles/nccl_static.dir/graph/topo.cc.o cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src && /usr/bin/g++ -DCUDA_MAJOR=12 -DCUDA_MINOR=0 -DDOCA_VERBS_USE_CUDA_WRAPPER -DDOCA_VERBS_USE_NET_WRAPPER -DNCCL_GIN_PROXY_ENABLE=1 -DNCCL_MAJOR=2 -DNCCL_MINOR=28 -DNCCL_PATCH=9 -DNCCL_USE_CMAKE -DNCCL_VERSION_CODE=22809 -DPROFAPI -I/builddir/build/BUILD/nccl-2.28.9-1/src/device -I/builddir/build/BUILD/nccl-2.28.9-1/src/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/include/plugin -I/usr/local/cuda/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/transport/gdaki/doca-gpunetio/include -I/usr/local/cuda/include/cccl -I/builddir/build/BUILD/nccl-2.28.9-1/nccl_build/include -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -O3 -fPIC -fmacro-prefix-map=/builddir/build/BUILD/nccl-2.28.9-1/src/= -MD -MT src/CMakeFiles/nccl_static.dir/graph/topo.cc.o -MF CMakeFiles/nccl_static.dir/graph/topo.cc.o.d -o CMakeFiles/nccl_static.dir/graph/topo.cc.o -c /builddir/build/BUILD/nccl-2.28.9-1/src/graph/topo.cc [ 72%] Building CXX object src/CMakeFiles/nccl.dir/plugin/net/net_v6.cc.o cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src && /usr/bin/g++ -DCUDA_MAJOR=12 -DCUDA_MINOR=0 -DDOCA_VERBS_USE_CUDA_WRAPPER -DDOCA_VERBS_USE_NET_WRAPPER -DNCCL_GIN_PROXY_ENABLE=1 -DNCCL_MAJOR=2 -DNCCL_MINOR=28 -DNCCL_PATCH=9 -DNCCL_USE_CMAKE -DNCCL_VERSION_CODE=22809 -DPROFAPI -Dnccl_EXPORTS -I/builddir/build/BUILD/nccl-2.28.9-1/src/device -I/builddir/build/BUILD/nccl-2.28.9-1/src/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/include/plugin -I/usr/local/cuda/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/transport/gdaki/doca-gpunetio/include -I/usr/local/cuda/include/cccl -I/builddir/build/BUILD/nccl-2.28.9-1/nccl_build/include -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -O3 -fPIC -fmacro-prefix-map=/builddir/build/BUILD/nccl-2.28.9-1/src/= -MD -MT src/CMakeFiles/nccl.dir/plugin/net/net_v6.cc.o -MF CMakeFiles/nccl.dir/plugin/net/net_v6.cc.o.d -o CMakeFiles/nccl.dir/plugin/net/net_v6.cc.o -c /builddir/build/BUILD/nccl-2.28.9-1/src/plugin/net/net_v6.cc [ 72%] Building CXX object src/CMakeFiles/nccl.dir/plugin/net/net_v7.cc.o cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src && /usr/bin/g++ -DCUDA_MAJOR=12 -DCUDA_MINOR=0 -DDOCA_VERBS_USE_CUDA_WRAPPER -DDOCA_VERBS_USE_NET_WRAPPER -DNCCL_GIN_PROXY_ENABLE=1 -DNCCL_MAJOR=2 -DNCCL_MINOR=28 -DNCCL_PATCH=9 -DNCCL_USE_CMAKE -DNCCL_VERSION_CODE=22809 -DPROFAPI -Dnccl_EXPORTS -I/builddir/build/BUILD/nccl-2.28.9-1/src/device -I/builddir/build/BUILD/nccl-2.28.9-1/src/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/include/plugin -I/usr/local/cuda/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/transport/gdaki/doca-gpunetio/include -I/usr/local/cuda/include/cccl -I/builddir/build/BUILD/nccl-2.28.9-1/nccl_build/include -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -O3 -fPIC -fmacro-prefix-map=/builddir/build/BUILD/nccl-2.28.9-1/src/= -MD -MT src/CMakeFiles/nccl.dir/plugin/net/net_v7.cc.o -MF CMakeFiles/nccl.dir/plugin/net/net_v7.cc.o.d -o CMakeFiles/nccl.dir/plugin/net/net_v7.cc.o -c /builddir/build/BUILD/nccl-2.28.9-1/src/plugin/net/net_v7.cc [ 73%] Building CXX object src/CMakeFiles/nccl.dir/plugin/net/net_v8.cc.o cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src && /usr/bin/g++ -DCUDA_MAJOR=12 -DCUDA_MINOR=0 -DDOCA_VERBS_USE_CUDA_WRAPPER -DDOCA_VERBS_USE_NET_WRAPPER -DNCCL_GIN_PROXY_ENABLE=1 -DNCCL_MAJOR=2 -DNCCL_MINOR=28 -DNCCL_PATCH=9 -DNCCL_USE_CMAKE -DNCCL_VERSION_CODE=22809 -DPROFAPI -Dnccl_EXPORTS -I/builddir/build/BUILD/nccl-2.28.9-1/src/device -I/builddir/build/BUILD/nccl-2.28.9-1/src/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/include/plugin -I/usr/local/cuda/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/transport/gdaki/doca-gpunetio/include -I/usr/local/cuda/include/cccl -I/builddir/build/BUILD/nccl-2.28.9-1/nccl_build/include -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -O3 -fPIC -fmacro-prefix-map=/builddir/build/BUILD/nccl-2.28.9-1/src/= -MD -MT src/CMakeFiles/nccl.dir/plugin/net/net_v8.cc.o -MF CMakeFiles/nccl.dir/plugin/net/net_v8.cc.o.d -o CMakeFiles/nccl.dir/plugin/net/net_v8.cc.o -c /builddir/build/BUILD/nccl-2.28.9-1/src/plugin/net/net_v8.cc [ 73%] Building CXX object src/CMakeFiles/nccl.dir/plugin/net/net_v10.cc.o cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src && /usr/bin/g++ -DCUDA_MAJOR=12 -DCUDA_MINOR=0 -DDOCA_VERBS_USE_CUDA_WRAPPER -DDOCA_VERBS_USE_NET_WRAPPER -DNCCL_GIN_PROXY_ENABLE=1 -DNCCL_MAJOR=2 -DNCCL_MINOR=28 -DNCCL_PATCH=9 -DNCCL_USE_CMAKE -DNCCL_VERSION_CODE=22809 -DPROFAPI -Dnccl_EXPORTS -I/builddir/build/BUILD/nccl-2.28.9-1/src/device -I/builddir/build/BUILD/nccl-2.28.9-1/src/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/include/plugin -I/usr/local/cuda/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/transport/gdaki/doca-gpunetio/include -I/usr/local/cuda/include/cccl -I/builddir/build/BUILD/nccl-2.28.9-1/nccl_build/include -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -O3 -fPIC -fmacro-prefix-map=/builddir/build/BUILD/nccl-2.28.9-1/src/= -MD -MT src/CMakeFiles/nccl.dir/plugin/net/net_v10.cc.o -MF CMakeFiles/nccl.dir/plugin/net/net_v10.cc.o.d -o CMakeFiles/nccl.dir/plugin/net/net_v10.cc.o -c /builddir/build/BUILD/nccl-2.28.9-1/src/plugin/net/net_v10.cc [ 74%] Building CXX object src/CMakeFiles/nccl_static.dir/graph/tuning.cc.o cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src && /usr/bin/g++ -DCUDA_MAJOR=12 -DCUDA_MINOR=0 -DDOCA_VERBS_USE_CUDA_WRAPPER -DDOCA_VERBS_USE_NET_WRAPPER -DNCCL_GIN_PROXY_ENABLE=1 -DNCCL_MAJOR=2 -DNCCL_MINOR=28 -DNCCL_PATCH=9 -DNCCL_USE_CMAKE -DNCCL_VERSION_CODE=22809 -DPROFAPI -I/builddir/build/BUILD/nccl-2.28.9-1/src/device -I/builddir/build/BUILD/nccl-2.28.9-1/src/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/include/plugin -I/usr/local/cuda/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/transport/gdaki/doca-gpunetio/include -I/usr/local/cuda/include/cccl -I/builddir/build/BUILD/nccl-2.28.9-1/nccl_build/include -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -O3 -fPIC -fmacro-prefix-map=/builddir/build/BUILD/nccl-2.28.9-1/src/= -MD -MT src/CMakeFiles/nccl_static.dir/graph/tuning.cc.o -MF CMakeFiles/nccl_static.dir/graph/tuning.cc.o.d -o CMakeFiles/nccl_static.dir/graph/tuning.cc.o -c /builddir/build/BUILD/nccl-2.28.9-1/src/graph/tuning.cc [ 74%] Building CXX object src/CMakeFiles/nccl.dir/plugin/net/net_v11.cc.o cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src && /usr/bin/g++ -DCUDA_MAJOR=12 -DCUDA_MINOR=0 -DDOCA_VERBS_USE_CUDA_WRAPPER -DDOCA_VERBS_USE_NET_WRAPPER -DNCCL_GIN_PROXY_ENABLE=1 -DNCCL_MAJOR=2 -DNCCL_MINOR=28 -DNCCL_PATCH=9 -DNCCL_USE_CMAKE -DNCCL_VERSION_CODE=22809 -DPROFAPI -Dnccl_EXPORTS -I/builddir/build/BUILD/nccl-2.28.9-1/src/device -I/builddir/build/BUILD/nccl-2.28.9-1/src/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/include/plugin -I/usr/local/cuda/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/transport/gdaki/doca-gpunetio/include -I/usr/local/cuda/include/cccl -I/builddir/build/BUILD/nccl-2.28.9-1/nccl_build/include -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -O3 -fPIC -fmacro-prefix-map=/builddir/build/BUILD/nccl-2.28.9-1/src/= -MD -MT src/CMakeFiles/nccl.dir/plugin/net/net_v11.cc.o -MF CMakeFiles/nccl.dir/plugin/net/net_v11.cc.o.d -o CMakeFiles/nccl.dir/plugin/net/net_v11.cc.o -c /builddir/build/BUILD/nccl-2.28.9-1/src/plugin/net/net_v11.cc [ 74%] Building CXX object src/CMakeFiles/nccl.dir/plugin/profiler/profiler_v3.cc.o cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src && /usr/bin/g++ -DCUDA_MAJOR=12 -DCUDA_MINOR=0 -DDOCA_VERBS_USE_CUDA_WRAPPER -DDOCA_VERBS_USE_NET_WRAPPER -DNCCL_GIN_PROXY_ENABLE=1 -DNCCL_MAJOR=2 -DNCCL_MINOR=28 -DNCCL_PATCH=9 -DNCCL_USE_CMAKE -DNCCL_VERSION_CODE=22809 -DPROFAPI -Dnccl_EXPORTS -I/builddir/build/BUILD/nccl-2.28.9-1/src/device -I/builddir/build/BUILD/nccl-2.28.9-1/src/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/include/plugin -I/usr/local/cuda/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/transport/gdaki/doca-gpunetio/include -I/usr/local/cuda/include/cccl -I/builddir/build/BUILD/nccl-2.28.9-1/nccl_build/include -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -O3 -fPIC -fmacro-prefix-map=/builddir/build/BUILD/nccl-2.28.9-1/src/= -MD -MT src/CMakeFiles/nccl.dir/plugin/profiler/profiler_v3.cc.o -MF CMakeFiles/nccl.dir/plugin/profiler/profiler_v3.cc.o.d -o CMakeFiles/nccl.dir/plugin/profiler/profiler_v3.cc.o -c /builddir/build/BUILD/nccl-2.28.9-1/src/plugin/profiler/profiler_v3.cc [ 74%] Building CXX object src/CMakeFiles/nccl_static.dir/graph/xml.cc.o cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src && /usr/bin/g++ -DCUDA_MAJOR=12 -DCUDA_MINOR=0 -DDOCA_VERBS_USE_CUDA_WRAPPER -DDOCA_VERBS_USE_NET_WRAPPER -DNCCL_GIN_PROXY_ENABLE=1 -DNCCL_MAJOR=2 -DNCCL_MINOR=28 -DNCCL_PATCH=9 -DNCCL_USE_CMAKE -DNCCL_VERSION_CODE=22809 -DPROFAPI -I/builddir/build/BUILD/nccl-2.28.9-1/src/device -I/builddir/build/BUILD/nccl-2.28.9-1/src/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/include/plugin -I/usr/local/cuda/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/transport/gdaki/doca-gpunetio/include -I/usr/local/cuda/include/cccl -I/builddir/build/BUILD/nccl-2.28.9-1/nccl_build/include -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -O3 -fPIC -fmacro-prefix-map=/builddir/build/BUILD/nccl-2.28.9-1/src/= -MD -MT src/CMakeFiles/nccl_static.dir/graph/xml.cc.o -MF CMakeFiles/nccl_static.dir/graph/xml.cc.o.d -o CMakeFiles/nccl_static.dir/graph/xml.cc.o -c /builddir/build/BUILD/nccl-2.28.9-1/src/graph/xml.cc [ 74%] Building CXX object src/CMakeFiles/nccl_static.dir/graph/search.cc.o cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src && /usr/bin/g++ -DCUDA_MAJOR=12 -DCUDA_MINOR=0 -DDOCA_VERBS_USE_CUDA_WRAPPER -DDOCA_VERBS_USE_NET_WRAPPER -DNCCL_GIN_PROXY_ENABLE=1 -DNCCL_MAJOR=2 -DNCCL_MINOR=28 -DNCCL_PATCH=9 -DNCCL_USE_CMAKE -DNCCL_VERSION_CODE=22809 -DPROFAPI -I/builddir/build/BUILD/nccl-2.28.9-1/src/device -I/builddir/build/BUILD/nccl-2.28.9-1/src/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/include/plugin -I/usr/local/cuda/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/transport/gdaki/doca-gpunetio/include -I/usr/local/cuda/include/cccl -I/builddir/build/BUILD/nccl-2.28.9-1/nccl_build/include -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -O3 -fPIC -fmacro-prefix-map=/builddir/build/BUILD/nccl-2.28.9-1/src/= -MD -MT src/CMakeFiles/nccl_static.dir/graph/search.cc.o -MF CMakeFiles/nccl_static.dir/graph/search.cc.o.d -o CMakeFiles/nccl_static.dir/graph/search.cc.o -c /builddir/build/BUILD/nccl-2.28.9-1/src/graph/search.cc [ 75%] Building CXX object src/CMakeFiles/nccl.dir/plugin/profiler/profiler_v4.cc.o cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src && /usr/bin/g++ -DCUDA_MAJOR=12 -DCUDA_MINOR=0 -DDOCA_VERBS_USE_CUDA_WRAPPER -DDOCA_VERBS_USE_NET_WRAPPER -DNCCL_GIN_PROXY_ENABLE=1 -DNCCL_MAJOR=2 -DNCCL_MINOR=28 -DNCCL_PATCH=9 -DNCCL_USE_CMAKE -DNCCL_VERSION_CODE=22809 -DPROFAPI -Dnccl_EXPORTS -I/builddir/build/BUILD/nccl-2.28.9-1/src/device -I/builddir/build/BUILD/nccl-2.28.9-1/src/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/include/plugin -I/usr/local/cuda/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/transport/gdaki/doca-gpunetio/include -I/usr/local/cuda/include/cccl -I/builddir/build/BUILD/nccl-2.28.9-1/nccl_build/include -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -O3 -fPIC -fmacro-prefix-map=/builddir/build/BUILD/nccl-2.28.9-1/src/= -MD -MT src/CMakeFiles/nccl.dir/plugin/profiler/profiler_v4.cc.o -MF CMakeFiles/nccl.dir/plugin/profiler/profiler_v4.cc.o.d -o CMakeFiles/nccl.dir/plugin/profiler/profiler_v4.cc.o -c /builddir/build/BUILD/nccl-2.28.9-1/src/plugin/profiler/profiler_v4.cc [ 76%] Building CXX object src/CMakeFiles/nccl_static.dir/graph/paths.cc.o cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src && /usr/bin/g++ -DCUDA_MAJOR=12 -DCUDA_MINOR=0 -DDOCA_VERBS_USE_CUDA_WRAPPER -DDOCA_VERBS_USE_NET_WRAPPER -DNCCL_GIN_PROXY_ENABLE=1 -DNCCL_MAJOR=2 -DNCCL_MINOR=28 -DNCCL_PATCH=9 -DNCCL_USE_CMAKE -DNCCL_VERSION_CODE=22809 -DPROFAPI -I/builddir/build/BUILD/nccl-2.28.9-1/src/device -I/builddir/build/BUILD/nccl-2.28.9-1/src/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/include/plugin -I/usr/local/cuda/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/transport/gdaki/doca-gpunetio/include -I/usr/local/cuda/include/cccl -I/builddir/build/BUILD/nccl-2.28.9-1/nccl_build/include -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -O3 -fPIC -fmacro-prefix-map=/builddir/build/BUILD/nccl-2.28.9-1/src/= -MD -MT src/CMakeFiles/nccl_static.dir/graph/paths.cc.o -MF CMakeFiles/nccl_static.dir/graph/paths.cc.o.d -o CMakeFiles/nccl_static.dir/graph/paths.cc.o -c /builddir/build/BUILD/nccl-2.28.9-1/src/graph/paths.cc [ 76%] Building CXX object src/CMakeFiles/nccl.dir/plugin/profiler/profiler_v1.cc.o cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src && /usr/bin/g++ -DCUDA_MAJOR=12 -DCUDA_MINOR=0 -DDOCA_VERBS_USE_CUDA_WRAPPER -DDOCA_VERBS_USE_NET_WRAPPER -DNCCL_GIN_PROXY_ENABLE=1 -DNCCL_MAJOR=2 -DNCCL_MINOR=28 -DNCCL_PATCH=9 -DNCCL_USE_CMAKE -DNCCL_VERSION_CODE=22809 -DPROFAPI -Dnccl_EXPORTS -I/builddir/build/BUILD/nccl-2.28.9-1/src/device -I/builddir/build/BUILD/nccl-2.28.9-1/src/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/include/plugin -I/usr/local/cuda/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/transport/gdaki/doca-gpunetio/include -I/usr/local/cuda/include/cccl -I/builddir/build/BUILD/nccl-2.28.9-1/nccl_build/include -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -O3 -fPIC -fmacro-prefix-map=/builddir/build/BUILD/nccl-2.28.9-1/src/= -MD -MT src/CMakeFiles/nccl.dir/plugin/profiler/profiler_v1.cc.o -MF CMakeFiles/nccl.dir/plugin/profiler/profiler_v1.cc.o.d -o CMakeFiles/nccl.dir/plugin/profiler/profiler_v1.cc.o -c /builddir/build/BUILD/nccl-2.28.9-1/src/plugin/profiler/profiler_v1.cc [ 76%] Building CXX object src/CMakeFiles/nccl_static.dir/graph/connect.cc.o cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src && /usr/bin/g++ -DCUDA_MAJOR=12 -DCUDA_MINOR=0 -DDOCA_VERBS_USE_CUDA_WRAPPER -DDOCA_VERBS_USE_NET_WRAPPER -DNCCL_GIN_PROXY_ENABLE=1 -DNCCL_MAJOR=2 -DNCCL_MINOR=28 -DNCCL_PATCH=9 -DNCCL_USE_CMAKE -DNCCL_VERSION_CODE=22809 -DPROFAPI -I/builddir/build/BUILD/nccl-2.28.9-1/src/device -I/builddir/build/BUILD/nccl-2.28.9-1/src/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/include/plugin -I/usr/local/cuda/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/transport/gdaki/doca-gpunetio/include -I/usr/local/cuda/include/cccl -I/builddir/build/BUILD/nccl-2.28.9-1/nccl_build/include -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -O3 -fPIC -fmacro-prefix-map=/builddir/build/BUILD/nccl-2.28.9-1/src/= -MD -MT src/CMakeFiles/nccl_static.dir/graph/connect.cc.o -MF CMakeFiles/nccl_static.dir/graph/connect.cc.o.d -o CMakeFiles/nccl_static.dir/graph/connect.cc.o -c /builddir/build/BUILD/nccl-2.28.9-1/src/graph/connect.cc [ 76%] Building CXX object src/CMakeFiles/nccl_static.dir/graph/rings.cc.o cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src && /usr/bin/g++ -DCUDA_MAJOR=12 -DCUDA_MINOR=0 -DDOCA_VERBS_USE_CUDA_WRAPPER -DDOCA_VERBS_USE_NET_WRAPPER -DNCCL_GIN_PROXY_ENABLE=1 -DNCCL_MAJOR=2 -DNCCL_MINOR=28 -DNCCL_PATCH=9 -DNCCL_USE_CMAKE -DNCCL_VERSION_CODE=22809 -DPROFAPI -I/builddir/build/BUILD/nccl-2.28.9-1/src/device -I/builddir/build/BUILD/nccl-2.28.9-1/src/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/include/plugin -I/usr/local/cuda/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/transport/gdaki/doca-gpunetio/include -I/usr/local/cuda/include/cccl -I/builddir/build/BUILD/nccl-2.28.9-1/nccl_build/include -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -O3 -fPIC -fmacro-prefix-map=/builddir/build/BUILD/nccl-2.28.9-1/src/= -MD -MT src/CMakeFiles/nccl_static.dir/graph/rings.cc.o -MF CMakeFiles/nccl_static.dir/graph/rings.cc.o.d -o CMakeFiles/nccl_static.dir/graph/rings.cc.o -c /builddir/build/BUILD/nccl-2.28.9-1/src/graph/rings.cc [ 76%] Building CXX object src/CMakeFiles/nccl.dir/plugin/profiler/profiler_v2.cc.o cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src && /usr/bin/g++ -DCUDA_MAJOR=12 -DCUDA_MINOR=0 -DDOCA_VERBS_USE_CUDA_WRAPPER -DDOCA_VERBS_USE_NET_WRAPPER -DNCCL_GIN_PROXY_ENABLE=1 -DNCCL_MAJOR=2 -DNCCL_MINOR=28 -DNCCL_PATCH=9 -DNCCL_USE_CMAKE -DNCCL_VERSION_CODE=22809 -DPROFAPI -Dnccl_EXPORTS -I/builddir/build/BUILD/nccl-2.28.9-1/src/device -I/builddir/build/BUILD/nccl-2.28.9-1/src/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/include/plugin -I/usr/local/cuda/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/transport/gdaki/doca-gpunetio/include -I/usr/local/cuda/include/cccl -I/builddir/build/BUILD/nccl-2.28.9-1/nccl_build/include -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -O3 -fPIC -fmacro-prefix-map=/builddir/build/BUILD/nccl-2.28.9-1/src/= -MD -MT src/CMakeFiles/nccl.dir/plugin/profiler/profiler_v2.cc.o -MF CMakeFiles/nccl.dir/plugin/profiler/profiler_v2.cc.o.d -o CMakeFiles/nccl.dir/plugin/profiler/profiler_v2.cc.o -c /builddir/build/BUILD/nccl-2.28.9-1/src/plugin/profiler/profiler_v2.cc [ 76%] Building CXX object src/CMakeFiles/nccl_static.dir/graph/trees.cc.o cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src && /usr/bin/g++ -DCUDA_MAJOR=12 -DCUDA_MINOR=0 -DDOCA_VERBS_USE_CUDA_WRAPPER -DDOCA_VERBS_USE_NET_WRAPPER -DNCCL_GIN_PROXY_ENABLE=1 -DNCCL_MAJOR=2 -DNCCL_MINOR=28 -DNCCL_PATCH=9 -DNCCL_USE_CMAKE -DNCCL_VERSION_CODE=22809 -DPROFAPI -I/builddir/build/BUILD/nccl-2.28.9-1/src/device -I/builddir/build/BUILD/nccl-2.28.9-1/src/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/include/plugin -I/usr/local/cuda/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/transport/gdaki/doca-gpunetio/include -I/usr/local/cuda/include/cccl -I/builddir/build/BUILD/nccl-2.28.9-1/nccl_build/include -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -O3 -fPIC -fmacro-prefix-map=/builddir/build/BUILD/nccl-2.28.9-1/src/= -MD -MT src/CMakeFiles/nccl_static.dir/graph/trees.cc.o -MF CMakeFiles/nccl_static.dir/graph/trees.cc.o.d -o CMakeFiles/nccl_static.dir/graph/trees.cc.o -c /builddir/build/BUILD/nccl-2.28.9-1/src/graph/trees.cc [ 77%] Building CXX object src/CMakeFiles/nccl_static.dir/plugin/net.cc.o cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src && /usr/bin/g++ -DCUDA_MAJOR=12 -DCUDA_MINOR=0 -DDOCA_VERBS_USE_CUDA_WRAPPER -DDOCA_VERBS_USE_NET_WRAPPER -DNCCL_GIN_PROXY_ENABLE=1 -DNCCL_MAJOR=2 -DNCCL_MINOR=28 -DNCCL_PATCH=9 -DNCCL_USE_CMAKE -DNCCL_VERSION_CODE=22809 -DPROFAPI -I/builddir/build/BUILD/nccl-2.28.9-1/src/device -I/builddir/build/BUILD/nccl-2.28.9-1/src/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/include/plugin -I/usr/local/cuda/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/transport/gdaki/doca-gpunetio/include -I/usr/local/cuda/include/cccl -I/builddir/build/BUILD/nccl-2.28.9-1/nccl_build/include -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -O3 -fPIC -fmacro-prefix-map=/builddir/build/BUILD/nccl-2.28.9-1/src/= -MD -MT src/CMakeFiles/nccl_static.dir/plugin/net.cc.o -MF CMakeFiles/nccl_static.dir/plugin/net.cc.o.d -o CMakeFiles/nccl_static.dir/plugin/net.cc.o -c /builddir/build/BUILD/nccl-2.28.9-1/src/plugin/net.cc [ 77%] Building CXX object src/CMakeFiles/nccl_static.dir/plugin/profiler.cc.o cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src && /usr/bin/g++ -DCUDA_MAJOR=12 -DCUDA_MINOR=0 -DDOCA_VERBS_USE_CUDA_WRAPPER -DDOCA_VERBS_USE_NET_WRAPPER -DNCCL_GIN_PROXY_ENABLE=1 -DNCCL_MAJOR=2 -DNCCL_MINOR=28 -DNCCL_PATCH=9 -DNCCL_USE_CMAKE -DNCCL_VERSION_CODE=22809 -DPROFAPI -I/builddir/build/BUILD/nccl-2.28.9-1/src/device -I/builddir/build/BUILD/nccl-2.28.9-1/src/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/include/plugin -I/usr/local/cuda/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/transport/gdaki/doca-gpunetio/include -I/usr/local/cuda/include/cccl -I/builddir/build/BUILD/nccl-2.28.9-1/nccl_build/include -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -O3 -fPIC -fmacro-prefix-map=/builddir/build/BUILD/nccl-2.28.9-1/src/= -MD -MT src/CMakeFiles/nccl_static.dir/plugin/profiler.cc.o -MF CMakeFiles/nccl_static.dir/plugin/profiler.cc.o.d -o CMakeFiles/nccl_static.dir/plugin/profiler.cc.o -c /builddir/build/BUILD/nccl-2.28.9-1/src/plugin/profiler.cc [ 77%] Building CXX object src/CMakeFiles/nccl_static.dir/plugin/plugin_open.cc.o cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src && /usr/bin/g++ -DCUDA_MAJOR=12 -DCUDA_MINOR=0 -DDOCA_VERBS_USE_CUDA_WRAPPER -DDOCA_VERBS_USE_NET_WRAPPER -DNCCL_GIN_PROXY_ENABLE=1 -DNCCL_MAJOR=2 -DNCCL_MINOR=28 -DNCCL_PATCH=9 -DNCCL_USE_CMAKE -DNCCL_VERSION_CODE=22809 -DPROFAPI -I/builddir/build/BUILD/nccl-2.28.9-1/src/device -I/builddir/build/BUILD/nccl-2.28.9-1/src/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/include/plugin -I/usr/local/cuda/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/transport/gdaki/doca-gpunetio/include -I/usr/local/cuda/include/cccl -I/builddir/build/BUILD/nccl-2.28.9-1/nccl_build/include -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -O3 -fPIC -fmacro-prefix-map=/builddir/build/BUILD/nccl-2.28.9-1/src/= -MD -MT src/CMakeFiles/nccl_static.dir/plugin/plugin_open.cc.o -MF CMakeFiles/nccl_static.dir/plugin/plugin_open.cc.o.d -o CMakeFiles/nccl_static.dir/plugin/plugin_open.cc.o -c /builddir/build/BUILD/nccl-2.28.9-1/src/plugin/plugin_open.cc [ 78%] Building CXX object src/CMakeFiles/nccl_static.dir/plugin/tuner.cc.o cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src && /usr/bin/g++ -DCUDA_MAJOR=12 -DCUDA_MINOR=0 -DDOCA_VERBS_USE_CUDA_WRAPPER -DDOCA_VERBS_USE_NET_WRAPPER -DNCCL_GIN_PROXY_ENABLE=1 -DNCCL_MAJOR=2 -DNCCL_MINOR=28 -DNCCL_PATCH=9 -DNCCL_USE_CMAKE -DNCCL_VERSION_CODE=22809 -DPROFAPI -I/builddir/build/BUILD/nccl-2.28.9-1/src/device -I/builddir/build/BUILD/nccl-2.28.9-1/src/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/include/plugin -I/usr/local/cuda/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/transport/gdaki/doca-gpunetio/include -I/usr/local/cuda/include/cccl -I/builddir/build/BUILD/nccl-2.28.9-1/nccl_build/include -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -O3 -fPIC -fmacro-prefix-map=/builddir/build/BUILD/nccl-2.28.9-1/src/= -MD -MT src/CMakeFiles/nccl_static.dir/plugin/tuner.cc.o -MF CMakeFiles/nccl_static.dir/plugin/tuner.cc.o.d -o CMakeFiles/nccl_static.dir/plugin/tuner.cc.o -c /builddir/build/BUILD/nccl-2.28.9-1/src/plugin/tuner.cc [ 78%] Building CXX object src/CMakeFiles/nccl_static.dir/plugin/env.cc.o cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src && /usr/bin/g++ -DCUDA_MAJOR=12 -DCUDA_MINOR=0 -DDOCA_VERBS_USE_CUDA_WRAPPER -DDOCA_VERBS_USE_NET_WRAPPER -DNCCL_GIN_PROXY_ENABLE=1 -DNCCL_MAJOR=2 -DNCCL_MINOR=28 -DNCCL_PATCH=9 -DNCCL_USE_CMAKE -DNCCL_VERSION_CODE=22809 -DPROFAPI -I/builddir/build/BUILD/nccl-2.28.9-1/src/device -I/builddir/build/BUILD/nccl-2.28.9-1/src/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/include/plugin -I/usr/local/cuda/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/transport/gdaki/doca-gpunetio/include -I/usr/local/cuda/include/cccl -I/builddir/build/BUILD/nccl-2.28.9-1/nccl_build/include -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -O3 -fPIC -fmacro-prefix-map=/builddir/build/BUILD/nccl-2.28.9-1/src/= -MD -MT src/CMakeFiles/nccl_static.dir/plugin/env.cc.o -MF CMakeFiles/nccl_static.dir/plugin/env.cc.o.d -o CMakeFiles/nccl_static.dir/plugin/env.cc.o -c /builddir/build/BUILD/nccl-2.28.9-1/src/plugin/env.cc [ 79%] Building CXX object src/CMakeFiles/nccl.dir/plugin/profiler/profiler_v5.cc.o cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src && /usr/bin/g++ -DCUDA_MAJOR=12 -DCUDA_MINOR=0 -DDOCA_VERBS_USE_CUDA_WRAPPER -DDOCA_VERBS_USE_NET_WRAPPER -DNCCL_GIN_PROXY_ENABLE=1 -DNCCL_MAJOR=2 -DNCCL_MINOR=28 -DNCCL_PATCH=9 -DNCCL_USE_CMAKE -DNCCL_VERSION_CODE=22809 -DPROFAPI -Dnccl_EXPORTS -I/builddir/build/BUILD/nccl-2.28.9-1/src/device -I/builddir/build/BUILD/nccl-2.28.9-1/src/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/include/plugin -I/usr/local/cuda/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/transport/gdaki/doca-gpunetio/include -I/usr/local/cuda/include/cccl -I/builddir/build/BUILD/nccl-2.28.9-1/nccl_build/include -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -O3 -fPIC -fmacro-prefix-map=/builddir/build/BUILD/nccl-2.28.9-1/src/= -MD -MT src/CMakeFiles/nccl.dir/plugin/profiler/profiler_v5.cc.o -MF CMakeFiles/nccl.dir/plugin/profiler/profiler_v5.cc.o.d -o CMakeFiles/nccl.dir/plugin/profiler/profiler_v5.cc.o -c /builddir/build/BUILD/nccl-2.28.9-1/src/plugin/profiler/profiler_v5.cc [ 79%] Building CXX object src/CMakeFiles/nccl.dir/plugin/tuner/tuner_v2.cc.o cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src && /usr/bin/g++ -DCUDA_MAJOR=12 -DCUDA_MINOR=0 -DDOCA_VERBS_USE_CUDA_WRAPPER -DDOCA_VERBS_USE_NET_WRAPPER -DNCCL_GIN_PROXY_ENABLE=1 -DNCCL_MAJOR=2 -DNCCL_MINOR=28 -DNCCL_PATCH=9 -DNCCL_USE_CMAKE -DNCCL_VERSION_CODE=22809 -DPROFAPI -Dnccl_EXPORTS -I/builddir/build/BUILD/nccl-2.28.9-1/src/device -I/builddir/build/BUILD/nccl-2.28.9-1/src/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/include/plugin -I/usr/local/cuda/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/transport/gdaki/doca-gpunetio/include -I/usr/local/cuda/include/cccl -I/builddir/build/BUILD/nccl-2.28.9-1/nccl_build/include -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -O3 -fPIC -fmacro-prefix-map=/builddir/build/BUILD/nccl-2.28.9-1/src/= -MD -MT src/CMakeFiles/nccl.dir/plugin/tuner/tuner_v2.cc.o -MF CMakeFiles/nccl.dir/plugin/tuner/tuner_v2.cc.o.d -o CMakeFiles/nccl.dir/plugin/tuner/tuner_v2.cc.o -c /builddir/build/BUILD/nccl-2.28.9-1/src/plugin/tuner/tuner_v2.cc [ 79%] Building CXX object src/CMakeFiles/nccl.dir/plugin/tuner/tuner_v3.cc.o cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src && /usr/bin/g++ -DCUDA_MAJOR=12 -DCUDA_MINOR=0 -DDOCA_VERBS_USE_CUDA_WRAPPER -DDOCA_VERBS_USE_NET_WRAPPER -DNCCL_GIN_PROXY_ENABLE=1 -DNCCL_MAJOR=2 -DNCCL_MINOR=28 -DNCCL_PATCH=9 -DNCCL_USE_CMAKE -DNCCL_VERSION_CODE=22809 -DPROFAPI -Dnccl_EXPORTS -I/builddir/build/BUILD/nccl-2.28.9-1/src/device -I/builddir/build/BUILD/nccl-2.28.9-1/src/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/include/plugin -I/usr/local/cuda/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/transport/gdaki/doca-gpunetio/include -I/usr/local/cuda/include/cccl -I/builddir/build/BUILD/nccl-2.28.9-1/nccl_build/include -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -O3 -fPIC -fmacro-prefix-map=/builddir/build/BUILD/nccl-2.28.9-1/src/= -MD -MT src/CMakeFiles/nccl.dir/plugin/tuner/tuner_v3.cc.o -MF CMakeFiles/nccl.dir/plugin/tuner/tuner_v3.cc.o.d -o CMakeFiles/nccl.dir/plugin/tuner/tuner_v3.cc.o -c /builddir/build/BUILD/nccl-2.28.9-1/src/plugin/tuner/tuner_v3.cc [ 79%] Building CXX object src/CMakeFiles/nccl.dir/plugin/tuner/tuner_v4.cc.o cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src && /usr/bin/g++ -DCUDA_MAJOR=12 -DCUDA_MINOR=0 -DDOCA_VERBS_USE_CUDA_WRAPPER -DDOCA_VERBS_USE_NET_WRAPPER -DNCCL_GIN_PROXY_ENABLE=1 -DNCCL_MAJOR=2 -DNCCL_MINOR=28 -DNCCL_PATCH=9 -DNCCL_USE_CMAKE -DNCCL_VERSION_CODE=22809 -DPROFAPI -Dnccl_EXPORTS -I/builddir/build/BUILD/nccl-2.28.9-1/src/device -I/builddir/build/BUILD/nccl-2.28.9-1/src/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/include/plugin -I/usr/local/cuda/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/transport/gdaki/doca-gpunetio/include -I/usr/local/cuda/include/cccl -I/builddir/build/BUILD/nccl-2.28.9-1/nccl_build/include -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -O3 -fPIC -fmacro-prefix-map=/builddir/build/BUILD/nccl-2.28.9-1/src/= -MD -MT src/CMakeFiles/nccl.dir/plugin/tuner/tuner_v4.cc.o -MF CMakeFiles/nccl.dir/plugin/tuner/tuner_v4.cc.o.d -o CMakeFiles/nccl.dir/plugin/tuner/tuner_v4.cc.o -c /builddir/build/BUILD/nccl-2.28.9-1/src/plugin/tuner/tuner_v4.cc [ 80%] Building CXX object src/CMakeFiles/nccl.dir/plugin/tuner/tuner_v5.cc.o cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src && /usr/bin/g++ -DCUDA_MAJOR=12 -DCUDA_MINOR=0 -DDOCA_VERBS_USE_CUDA_WRAPPER -DDOCA_VERBS_USE_NET_WRAPPER -DNCCL_GIN_PROXY_ENABLE=1 -DNCCL_MAJOR=2 -DNCCL_MINOR=28 -DNCCL_PATCH=9 -DNCCL_USE_CMAKE -DNCCL_VERSION_CODE=22809 -DPROFAPI -Dnccl_EXPORTS -I/builddir/build/BUILD/nccl-2.28.9-1/src/device -I/builddir/build/BUILD/nccl-2.28.9-1/src/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/include/plugin -I/usr/local/cuda/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/transport/gdaki/doca-gpunetio/include -I/usr/local/cuda/include/cccl -I/builddir/build/BUILD/nccl-2.28.9-1/nccl_build/include -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -O3 -fPIC -fmacro-prefix-map=/builddir/build/BUILD/nccl-2.28.9-1/src/= -MD -MT src/CMakeFiles/nccl.dir/plugin/tuner/tuner_v5.cc.o -MF CMakeFiles/nccl.dir/plugin/tuner/tuner_v5.cc.o.d -o CMakeFiles/nccl.dir/plugin/tuner/tuner_v5.cc.o -c /builddir/build/BUILD/nccl-2.28.9-1/src/plugin/tuner/tuner_v5.cc [ 80%] Building CXX object src/CMakeFiles/nccl.dir/plugin/env/env_v1.cc.o cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src && /usr/bin/g++ -DCUDA_MAJOR=12 -DCUDA_MINOR=0 -DDOCA_VERBS_USE_CUDA_WRAPPER -DDOCA_VERBS_USE_NET_WRAPPER -DNCCL_GIN_PROXY_ENABLE=1 -DNCCL_MAJOR=2 -DNCCL_MINOR=28 -DNCCL_PATCH=9 -DNCCL_USE_CMAKE -DNCCL_VERSION_CODE=22809 -DPROFAPI -Dnccl_EXPORTS -I/builddir/build/BUILD/nccl-2.28.9-1/src/device -I/builddir/build/BUILD/nccl-2.28.9-1/src/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/include/plugin -I/usr/local/cuda/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/transport/gdaki/doca-gpunetio/include -I/usr/local/cuda/include/cccl -I/builddir/build/BUILD/nccl-2.28.9-1/nccl_build/include -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -O3 -fPIC -fmacro-prefix-map=/builddir/build/BUILD/nccl-2.28.9-1/src/= -MD -MT src/CMakeFiles/nccl.dir/plugin/env/env_v1.cc.o -MF CMakeFiles/nccl.dir/plugin/env/env_v1.cc.o.d -o CMakeFiles/nccl.dir/plugin/env/env_v1.cc.o -c /builddir/build/BUILD/nccl-2.28.9-1/src/plugin/env/env_v1.cc [ 80%] Building CXX object src/CMakeFiles/nccl_static.dir/plugin/net/net_v9.cc.o cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src && /usr/bin/g++ -DCUDA_MAJOR=12 -DCUDA_MINOR=0 -DDOCA_VERBS_USE_CUDA_WRAPPER -DDOCA_VERBS_USE_NET_WRAPPER -DNCCL_GIN_PROXY_ENABLE=1 -DNCCL_MAJOR=2 -DNCCL_MINOR=28 -DNCCL_PATCH=9 -DNCCL_USE_CMAKE -DNCCL_VERSION_CODE=22809 -DPROFAPI -I/builddir/build/BUILD/nccl-2.28.9-1/src/device -I/builddir/build/BUILD/nccl-2.28.9-1/src/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/include/plugin -I/usr/local/cuda/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/transport/gdaki/doca-gpunetio/include -I/usr/local/cuda/include/cccl -I/builddir/build/BUILD/nccl-2.28.9-1/nccl_build/include -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -O3 -fPIC -fmacro-prefix-map=/builddir/build/BUILD/nccl-2.28.9-1/src/= -MD -MT src/CMakeFiles/nccl_static.dir/plugin/net/net_v9.cc.o -MF CMakeFiles/nccl_static.dir/plugin/net/net_v9.cc.o.d -o CMakeFiles/nccl_static.dir/plugin/net/net_v9.cc.o -c /builddir/build/BUILD/nccl-2.28.9-1/src/plugin/net/net_v9.cc [ 80%] Building CXX object src/CMakeFiles/nccl.dir/ras/collectives.cc.o cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src && /usr/bin/g++ -DCUDA_MAJOR=12 -DCUDA_MINOR=0 -DDOCA_VERBS_USE_CUDA_WRAPPER -DDOCA_VERBS_USE_NET_WRAPPER -DNCCL_GIN_PROXY_ENABLE=1 -DNCCL_MAJOR=2 -DNCCL_MINOR=28 -DNCCL_PATCH=9 -DNCCL_USE_CMAKE -DNCCL_VERSION_CODE=22809 -DPROFAPI -Dnccl_EXPORTS -I/builddir/build/BUILD/nccl-2.28.9-1/src/device -I/builddir/build/BUILD/nccl-2.28.9-1/src/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/include/plugin -I/usr/local/cuda/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/transport/gdaki/doca-gpunetio/include -I/usr/local/cuda/include/cccl -I/builddir/build/BUILD/nccl-2.28.9-1/nccl_build/include -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -O3 -fPIC -fmacro-prefix-map=/builddir/build/BUILD/nccl-2.28.9-1/src/= -MD -MT src/CMakeFiles/nccl.dir/ras/collectives.cc.o -MF CMakeFiles/nccl.dir/ras/collectives.cc.o.d -o CMakeFiles/nccl.dir/ras/collectives.cc.o -c /builddir/build/BUILD/nccl-2.28.9-1/src/ras/collectives.cc [ 81%] Building CXX object src/CMakeFiles/nccl.dir/ras/rasnet.cc.o cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src && /usr/bin/g++ -DCUDA_MAJOR=12 -DCUDA_MINOR=0 -DDOCA_VERBS_USE_CUDA_WRAPPER -DDOCA_VERBS_USE_NET_WRAPPER -DNCCL_GIN_PROXY_ENABLE=1 -DNCCL_MAJOR=2 -DNCCL_MINOR=28 -DNCCL_PATCH=9 -DNCCL_USE_CMAKE -DNCCL_VERSION_CODE=22809 -DPROFAPI -Dnccl_EXPORTS -I/builddir/build/BUILD/nccl-2.28.9-1/src/device -I/builddir/build/BUILD/nccl-2.28.9-1/src/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/include/plugin -I/usr/local/cuda/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/transport/gdaki/doca-gpunetio/include -I/usr/local/cuda/include/cccl -I/builddir/build/BUILD/nccl-2.28.9-1/nccl_build/include -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -O3 -fPIC -fmacro-prefix-map=/builddir/build/BUILD/nccl-2.28.9-1/src/= -MD -MT src/CMakeFiles/nccl.dir/ras/rasnet.cc.o -MF CMakeFiles/nccl.dir/ras/rasnet.cc.o.d -o CMakeFiles/nccl.dir/ras/rasnet.cc.o -c /builddir/build/BUILD/nccl-2.28.9-1/src/ras/rasnet.cc [ 81%] Building CXX object src/CMakeFiles/nccl.dir/ras/peers.cc.o [ 81%] Building CXX object src/CMakeFiles/nccl_static.dir/plugin/net/net_v6.cc.o cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src && /usr/bin/g++ -DCUDA_MAJOR=12 -DCUDA_MINOR=0 -DDOCA_VERBS_USE_CUDA_WRAPPER -DDOCA_VERBS_USE_NET_WRAPPER -DNCCL_GIN_PROXY_ENABLE=1 -DNCCL_MAJOR=2 -DNCCL_MINOR=28 -DNCCL_PATCH=9 -DNCCL_USE_CMAKE -DNCCL_VERSION_CODE=22809 -DPROFAPI -I/builddir/build/BUILD/nccl-2.28.9-1/src/device -I/builddir/build/BUILD/nccl-2.28.9-1/src/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/include/plugin -I/usr/local/cuda/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/transport/gdaki/doca-gpunetio/include -I/usr/local/cuda/include/cccl -I/builddir/build/BUILD/nccl-2.28.9-1/nccl_build/include -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -O3 -fPIC -fmacro-prefix-map=/builddir/build/BUILD/nccl-2.28.9-1/src/= -MD -MT src/CMakeFiles/nccl_static.dir/plugin/net/net_v6.cc.o -MF CMakeFiles/nccl_static.dir/plugin/net/net_v6.cc.o.d -o CMakeFiles/nccl_static.dir/plugin/net/net_v6.cc.o -c /builddir/build/BUILD/nccl-2.28.9-1/src/plugin/net/net_v6.cc cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src && /usr/bin/g++ -DCUDA_MAJOR=12 -DCUDA_MINOR=0 -DDOCA_VERBS_USE_CUDA_WRAPPER -DDOCA_VERBS_USE_NET_WRAPPER -DNCCL_GIN_PROXY_ENABLE=1 -DNCCL_MAJOR=2 -DNCCL_MINOR=28 -DNCCL_PATCH=9 -DNCCL_USE_CMAKE -DNCCL_VERSION_CODE=22809 -DPROFAPI -Dnccl_EXPORTS -I/builddir/build/BUILD/nccl-2.28.9-1/src/device -I/builddir/build/BUILD/nccl-2.28.9-1/src/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/include/plugin -I/usr/local/cuda/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/transport/gdaki/doca-gpunetio/include -I/usr/local/cuda/include/cccl -I/builddir/build/BUILD/nccl-2.28.9-1/nccl_build/include -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -O3 -fPIC -fmacro-prefix-map=/builddir/build/BUILD/nccl-2.28.9-1/src/= -MD -MT src/CMakeFiles/nccl.dir/ras/peers.cc.o -MF CMakeFiles/nccl.dir/ras/peers.cc.o.d -o CMakeFiles/nccl.dir/ras/peers.cc.o -c /builddir/build/BUILD/nccl-2.28.9-1/src/ras/peers.cc [ 81%] Building CXX object src/CMakeFiles/nccl.dir/ras/ras.cc.o cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src && /usr/bin/g++ -DCUDA_MAJOR=12 -DCUDA_MINOR=0 -DDOCA_VERBS_USE_CUDA_WRAPPER -DDOCA_VERBS_USE_NET_WRAPPER -DNCCL_GIN_PROXY_ENABLE=1 -DNCCL_MAJOR=2 -DNCCL_MINOR=28 -DNCCL_PATCH=9 -DNCCL_USE_CMAKE -DNCCL_VERSION_CODE=22809 -DPROFAPI -Dnccl_EXPORTS -I/builddir/build/BUILD/nccl-2.28.9-1/src/device -I/builddir/build/BUILD/nccl-2.28.9-1/src/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/include/plugin -I/usr/local/cuda/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/transport/gdaki/doca-gpunetio/include -I/usr/local/cuda/include/cccl -I/builddir/build/BUILD/nccl-2.28.9-1/nccl_build/include -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -O3 -fPIC -fmacro-prefix-map=/builddir/build/BUILD/nccl-2.28.9-1/src/= -MD -MT src/CMakeFiles/nccl.dir/ras/ras.cc.o -MF CMakeFiles/nccl.dir/ras/ras.cc.o.d -o CMakeFiles/nccl.dir/ras/ras.cc.o -c /builddir/build/BUILD/nccl-2.28.9-1/src/ras/ras.cc [ 81%] Building CXX object src/CMakeFiles/nccl.dir/ras/client_support.cc.o cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src && /usr/bin/g++ -DCUDA_MAJOR=12 -DCUDA_MINOR=0 -DDOCA_VERBS_USE_CUDA_WRAPPER -DDOCA_VERBS_USE_NET_WRAPPER -DNCCL_GIN_PROXY_ENABLE=1 -DNCCL_MAJOR=2 -DNCCL_MINOR=28 -DNCCL_PATCH=9 -DNCCL_USE_CMAKE -DNCCL_VERSION_CODE=22809 -DPROFAPI -Dnccl_EXPORTS -I/builddir/build/BUILD/nccl-2.28.9-1/src/device -I/builddir/build/BUILD/nccl-2.28.9-1/src/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/include/plugin -I/usr/local/cuda/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/transport/gdaki/doca-gpunetio/include -I/usr/local/cuda/include/cccl -I/builddir/build/BUILD/nccl-2.28.9-1/nccl_build/include -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -O3 -fPIC -fmacro-prefix-map=/builddir/build/BUILD/nccl-2.28.9-1/src/= -MD -MT src/CMakeFiles/nccl.dir/ras/client_support.cc.o -MF CMakeFiles/nccl.dir/ras/client_support.cc.o.d -o CMakeFiles/nccl.dir/ras/client_support.cc.o -c /builddir/build/BUILD/nccl-2.28.9-1/src/ras/client_support.cc [ 82%] Building CXX object src/CMakeFiles/nccl.dir/nccl_device/core.cc.o cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src && /usr/bin/g++ -DCUDA_MAJOR=12 -DCUDA_MINOR=0 -DDOCA_VERBS_USE_CUDA_WRAPPER -DDOCA_VERBS_USE_NET_WRAPPER -DNCCL_GIN_PROXY_ENABLE=1 -DNCCL_MAJOR=2 -DNCCL_MINOR=28 -DNCCL_PATCH=9 -DNCCL_USE_CMAKE -DNCCL_VERSION_CODE=22809 -DPROFAPI -Dnccl_EXPORTS -I/builddir/build/BUILD/nccl-2.28.9-1/src/device -I/builddir/build/BUILD/nccl-2.28.9-1/src/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/include/plugin -I/usr/local/cuda/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/transport/gdaki/doca-gpunetio/include -I/usr/local/cuda/include/cccl -I/builddir/build/BUILD/nccl-2.28.9-1/nccl_build/include -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -O3 -fPIC -fmacro-prefix-map=/builddir/build/BUILD/nccl-2.28.9-1/src/= -MD -MT src/CMakeFiles/nccl.dir/nccl_device/core.cc.o -MF CMakeFiles/nccl.dir/nccl_device/core.cc.o.d -o CMakeFiles/nccl.dir/nccl_device/core.cc.o -c /builddir/build/BUILD/nccl-2.28.9-1/src/nccl_device/core.cc [ 83%] Building CXX object src/CMakeFiles/nccl_static.dir/plugin/net/net_v7.cc.o cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src && /usr/bin/g++ -DCUDA_MAJOR=12 -DCUDA_MINOR=0 -DDOCA_VERBS_USE_CUDA_WRAPPER -DDOCA_VERBS_USE_NET_WRAPPER -DNCCL_GIN_PROXY_ENABLE=1 -DNCCL_MAJOR=2 -DNCCL_MINOR=28 -DNCCL_PATCH=9 -DNCCL_USE_CMAKE -DNCCL_VERSION_CODE=22809 -DPROFAPI -I/builddir/build/BUILD/nccl-2.28.9-1/src/device -I/builddir/build/BUILD/nccl-2.28.9-1/src/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/include/plugin -I/usr/local/cuda/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/transport/gdaki/doca-gpunetio/include -I/usr/local/cuda/include/cccl -I/builddir/build/BUILD/nccl-2.28.9-1/nccl_build/include -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -O3 -fPIC -fmacro-prefix-map=/builddir/build/BUILD/nccl-2.28.9-1/src/= -MD -MT src/CMakeFiles/nccl_static.dir/plugin/net/net_v7.cc.o -MF CMakeFiles/nccl_static.dir/plugin/net/net_v7.cc.o.d -o CMakeFiles/nccl_static.dir/plugin/net/net_v7.cc.o -c /builddir/build/BUILD/nccl-2.28.9-1/src/plugin/net/net_v7.cc [ 83%] Building CXX object src/CMakeFiles/nccl_static.dir/plugin/net/net_v8.cc.o cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src && /usr/bin/g++ -DCUDA_MAJOR=12 -DCUDA_MINOR=0 -DDOCA_VERBS_USE_CUDA_WRAPPER -DDOCA_VERBS_USE_NET_WRAPPER -DNCCL_GIN_PROXY_ENABLE=1 -DNCCL_MAJOR=2 -DNCCL_MINOR=28 -DNCCL_PATCH=9 -DNCCL_USE_CMAKE -DNCCL_VERSION_CODE=22809 -DPROFAPI -I/builddir/build/BUILD/nccl-2.28.9-1/src/device -I/builddir/build/BUILD/nccl-2.28.9-1/src/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/include/plugin -I/usr/local/cuda/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/transport/gdaki/doca-gpunetio/include -I/usr/local/cuda/include/cccl -I/builddir/build/BUILD/nccl-2.28.9-1/nccl_build/include -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -O3 -fPIC -fmacro-prefix-map=/builddir/build/BUILD/nccl-2.28.9-1/src/= -MD -MT src/CMakeFiles/nccl_static.dir/plugin/net/net_v8.cc.o -MF CMakeFiles/nccl_static.dir/plugin/net/net_v8.cc.o.d -o CMakeFiles/nccl_static.dir/plugin/net/net_v8.cc.o -c /builddir/build/BUILD/nccl-2.28.9-1/src/plugin/net/net_v8.cc [ 83%] Building CXX object src/CMakeFiles/nccl.dir/nccl_device/ll_a2a.cc.o cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src && /usr/bin/g++ -DCUDA_MAJOR=12 -DCUDA_MINOR=0 -DDOCA_VERBS_USE_CUDA_WRAPPER -DDOCA_VERBS_USE_NET_WRAPPER -DNCCL_GIN_PROXY_ENABLE=1 -DNCCL_MAJOR=2 -DNCCL_MINOR=28 -DNCCL_PATCH=9 -DNCCL_USE_CMAKE -DNCCL_VERSION_CODE=22809 -DPROFAPI -Dnccl_EXPORTS -I/builddir/build/BUILD/nccl-2.28.9-1/src/device -I/builddir/build/BUILD/nccl-2.28.9-1/src/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/include/plugin -I/usr/local/cuda/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/transport/gdaki/doca-gpunetio/include -I/usr/local/cuda/include/cccl -I/builddir/build/BUILD/nccl-2.28.9-1/nccl_build/include -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -O3 -fPIC -fmacro-prefix-map=/builddir/build/BUILD/nccl-2.28.9-1/src/= -MD -MT src/CMakeFiles/nccl.dir/nccl_device/ll_a2a.cc.o -MF CMakeFiles/nccl.dir/nccl_device/ll_a2a.cc.o.d -o CMakeFiles/nccl.dir/nccl_device/ll_a2a.cc.o -c /builddir/build/BUILD/nccl-2.28.9-1/src/nccl_device/ll_a2a.cc [ 83%] Building CXX object src/CMakeFiles/nccl.dir/nccl_device/lsa_barrier.cc.o cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src && /usr/bin/g++ -DCUDA_MAJOR=12 -DCUDA_MINOR=0 -DDOCA_VERBS_USE_CUDA_WRAPPER -DDOCA_VERBS_USE_NET_WRAPPER -DNCCL_GIN_PROXY_ENABLE=1 -DNCCL_MAJOR=2 -DNCCL_MINOR=28 -DNCCL_PATCH=9 -DNCCL_USE_CMAKE -DNCCL_VERSION_CODE=22809 -DPROFAPI -Dnccl_EXPORTS -I/builddir/build/BUILD/nccl-2.28.9-1/src/device -I/builddir/build/BUILD/nccl-2.28.9-1/src/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/include/plugin -I/usr/local/cuda/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/transport/gdaki/doca-gpunetio/include -I/usr/local/cuda/include/cccl -I/builddir/build/BUILD/nccl-2.28.9-1/nccl_build/include -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -O3 -fPIC -fmacro-prefix-map=/builddir/build/BUILD/nccl-2.28.9-1/src/= -MD -MT src/CMakeFiles/nccl.dir/nccl_device/lsa_barrier.cc.o -MF CMakeFiles/nccl.dir/nccl_device/lsa_barrier.cc.o.d -o CMakeFiles/nccl.dir/nccl_device/lsa_barrier.cc.o -c /builddir/build/BUILD/nccl-2.28.9-1/src/nccl_device/lsa_barrier.cc [ 84%] Building CXX object src/CMakeFiles/nccl.dir/nccl_device/gin_barrier.cc.o cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src && /usr/bin/g++ -DCUDA_MAJOR=12 -DCUDA_MINOR=0 -DDOCA_VERBS_USE_CUDA_WRAPPER -DDOCA_VERBS_USE_NET_WRAPPER -DNCCL_GIN_PROXY_ENABLE=1 -DNCCL_MAJOR=2 -DNCCL_MINOR=28 -DNCCL_PATCH=9 -DNCCL_USE_CMAKE -DNCCL_VERSION_CODE=22809 -DPROFAPI -Dnccl_EXPORTS -I/builddir/build/BUILD/nccl-2.28.9-1/src/device -I/builddir/build/BUILD/nccl-2.28.9-1/src/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/include/plugin -I/usr/local/cuda/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/transport/gdaki/doca-gpunetio/include -I/usr/local/cuda/include/cccl -I/builddir/build/BUILD/nccl-2.28.9-1/nccl_build/include -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -O3 -fPIC -fmacro-prefix-map=/builddir/build/BUILD/nccl-2.28.9-1/src/= -MD -MT src/CMakeFiles/nccl.dir/nccl_device/gin_barrier.cc.o -MF CMakeFiles/nccl.dir/nccl_device/gin_barrier.cc.o.d -o CMakeFiles/nccl.dir/nccl_device/gin_barrier.cc.o -c /builddir/build/BUILD/nccl-2.28.9-1/src/nccl_device/gin_barrier.cc [ 84%] Building CXX object src/CMakeFiles/nccl_static.dir/plugin/net/net_v10.cc.o [ 84%] Building CXX object src/CMakeFiles/nccl.dir/gin/gin_host.cc.o cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src && /usr/bin/g++ -DCUDA_MAJOR=12 -DCUDA_MINOR=0 -DDOCA_VERBS_USE_CUDA_WRAPPER -DDOCA_VERBS_USE_NET_WRAPPER -DNCCL_GIN_PROXY_ENABLE=1 -DNCCL_MAJOR=2 -DNCCL_MINOR=28 -DNCCL_PATCH=9 -DNCCL_USE_CMAKE -DNCCL_VERSION_CODE=22809 -DPROFAPI -I/builddir/build/BUILD/nccl-2.28.9-1/src/device -I/builddir/build/BUILD/nccl-2.28.9-1/src/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/include/plugin -I/usr/local/cuda/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/transport/gdaki/doca-gpunetio/include -I/usr/local/cuda/include/cccl -I/builddir/build/BUILD/nccl-2.28.9-1/nccl_build/include -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -O3 -fPIC -fmacro-prefix-map=/builddir/build/BUILD/nccl-2.28.9-1/src/= -MD -MT src/CMakeFiles/nccl_static.dir/plugin/net/net_v10.cc.o -MF CMakeFiles/nccl_static.dir/plugin/net/net_v10.cc.o.d -o CMakeFiles/nccl_static.dir/plugin/net/net_v10.cc.o -c /builddir/build/BUILD/nccl-2.28.9-1/src/plugin/net/net_v10.cc cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src && /usr/bin/g++ -DCUDA_MAJOR=12 -DCUDA_MINOR=0 -DDOCA_VERBS_USE_CUDA_WRAPPER -DDOCA_VERBS_USE_NET_WRAPPER -DNCCL_GIN_PROXY_ENABLE=1 -DNCCL_MAJOR=2 -DNCCL_MINOR=28 -DNCCL_PATCH=9 -DNCCL_USE_CMAKE -DNCCL_VERSION_CODE=22809 -DPROFAPI -Dnccl_EXPORTS -I/builddir/build/BUILD/nccl-2.28.9-1/src/device -I/builddir/build/BUILD/nccl-2.28.9-1/src/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/include/plugin -I/usr/local/cuda/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/transport/gdaki/doca-gpunetio/include -I/usr/local/cuda/include/cccl -I/builddir/build/BUILD/nccl-2.28.9-1/nccl_build/include -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -O3 -fPIC -fmacro-prefix-map=/builddir/build/BUILD/nccl-2.28.9-1/src/= -MD -MT src/CMakeFiles/nccl.dir/gin/gin_host.cc.o -MF CMakeFiles/nccl.dir/gin/gin_host.cc.o.d -o CMakeFiles/nccl.dir/gin/gin_host.cc.o -c /builddir/build/BUILD/nccl-2.28.9-1/src/gin/gin_host.cc [ 84%] Building CXX object src/CMakeFiles/nccl.dir/scheduler/symmetric_sched.cc.o cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src && /usr/bin/g++ -DCUDA_MAJOR=12 -DCUDA_MINOR=0 -DDOCA_VERBS_USE_CUDA_WRAPPER -DDOCA_VERBS_USE_NET_WRAPPER -DNCCL_GIN_PROXY_ENABLE=1 -DNCCL_MAJOR=2 -DNCCL_MINOR=28 -DNCCL_PATCH=9 -DNCCL_USE_CMAKE -DNCCL_VERSION_CODE=22809 -DPROFAPI -Dnccl_EXPORTS -I/builddir/build/BUILD/nccl-2.28.9-1/src/device -I/builddir/build/BUILD/nccl-2.28.9-1/src/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/include/plugin -I/usr/local/cuda/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/transport/gdaki/doca-gpunetio/include -I/usr/local/cuda/include/cccl -I/builddir/build/BUILD/nccl-2.28.9-1/nccl_build/include -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -O3 -fPIC -fmacro-prefix-map=/builddir/build/BUILD/nccl-2.28.9-1/src/= -MD -MT src/CMakeFiles/nccl.dir/scheduler/symmetric_sched.cc.o -MF CMakeFiles/nccl.dir/scheduler/symmetric_sched.cc.o.d -o CMakeFiles/nccl.dir/scheduler/symmetric_sched.cc.o -c /builddir/build/BUILD/nccl-2.28.9-1/src/scheduler/symmetric_sched.cc [ 84%] Building CXX object src/CMakeFiles/nccl.dir/gin/gin_host_proxy.cc.o cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src && /usr/bin/g++ -DCUDA_MAJOR=12 -DCUDA_MINOR=0 -DDOCA_VERBS_USE_CUDA_WRAPPER -DDOCA_VERBS_USE_NET_WRAPPER -DNCCL_GIN_PROXY_ENABLE=1 -DNCCL_MAJOR=2 -DNCCL_MINOR=28 -DNCCL_PATCH=9 -DNCCL_USE_CMAKE -DNCCL_VERSION_CODE=22809 -DPROFAPI -Dnccl_EXPORTS -I/builddir/build/BUILD/nccl-2.28.9-1/src/device -I/builddir/build/BUILD/nccl-2.28.9-1/src/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/include/plugin -I/usr/local/cuda/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/transport/gdaki/doca-gpunetio/include -I/usr/local/cuda/include/cccl -I/builddir/build/BUILD/nccl-2.28.9-1/nccl_build/include -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -O3 -fPIC -fmacro-prefix-map=/builddir/build/BUILD/nccl-2.28.9-1/src/= -MD -MT src/CMakeFiles/nccl.dir/gin/gin_host_proxy.cc.o -MF CMakeFiles/nccl.dir/gin/gin_host_proxy.cc.o.d -o CMakeFiles/nccl.dir/gin/gin_host_proxy.cc.o -c /builddir/build/BUILD/nccl-2.28.9-1/src/gin/gin_host_proxy.cc [ 85%] Building CXX object src/CMakeFiles/nccl_static.dir/plugin/net/net_v11.cc.o cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src && /usr/bin/g++ -DCUDA_MAJOR=12 -DCUDA_MINOR=0 -DDOCA_VERBS_USE_CUDA_WRAPPER -DDOCA_VERBS_USE_NET_WRAPPER -DNCCL_GIN_PROXY_ENABLE=1 -DNCCL_MAJOR=2 -DNCCL_MINOR=28 -DNCCL_PATCH=9 -DNCCL_USE_CMAKE -DNCCL_VERSION_CODE=22809 -DPROFAPI -I/builddir/build/BUILD/nccl-2.28.9-1/src/device -I/builddir/build/BUILD/nccl-2.28.9-1/src/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/include/plugin -I/usr/local/cuda/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/transport/gdaki/doca-gpunetio/include -I/usr/local/cuda/include/cccl -I/builddir/build/BUILD/nccl-2.28.9-1/nccl_build/include -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -O3 -fPIC -fmacro-prefix-map=/builddir/build/BUILD/nccl-2.28.9-1/src/= -MD -MT src/CMakeFiles/nccl_static.dir/plugin/net/net_v11.cc.o -MF CMakeFiles/nccl_static.dir/plugin/net/net_v11.cc.o.d -o CMakeFiles/nccl_static.dir/plugin/net/net_v11.cc.o -c /builddir/build/BUILD/nccl-2.28.9-1/src/plugin/net/net_v11.cc [ 86%] Building CXX object src/CMakeFiles/nccl.dir/transport/gdaki/doca-gpunetio/src/doca_verbs_qp.cpp.o cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src && /usr/bin/g++ -DCUDA_MAJOR=12 -DCUDA_MINOR=0 -DDOCA_VERBS_USE_CUDA_WRAPPER -DDOCA_VERBS_USE_NET_WRAPPER -DNCCL_GIN_PROXY_ENABLE=1 -DNCCL_MAJOR=2 -DNCCL_MINOR=28 -DNCCL_PATCH=9 -DNCCL_USE_CMAKE -DNCCL_VERSION_CODE=22809 -DPROFAPI -Dnccl_EXPORTS -I/builddir/build/BUILD/nccl-2.28.9-1/src/device -I/builddir/build/BUILD/nccl-2.28.9-1/src/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/include/plugin -I/usr/local/cuda/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/transport/gdaki/doca-gpunetio/include -I/usr/local/cuda/include/cccl -I/builddir/build/BUILD/nccl-2.28.9-1/nccl_build/include -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -O3 -fPIC -fmacro-prefix-map=/builddir/build/BUILD/nccl-2.28.9-1/src/= -MD -MT src/CMakeFiles/nccl.dir/transport/gdaki/doca-gpunetio/src/doca_verbs_qp.cpp.o -MF CMakeFiles/nccl.dir/transport/gdaki/doca-gpunetio/src/doca_verbs_qp.cpp.o.d -o CMakeFiles/nccl.dir/transport/gdaki/doca-gpunetio/src/doca_verbs_qp.cpp.o -c /builddir/build/BUILD/nccl-2.28.9-1/src/transport/gdaki/doca-gpunetio/src/doca_verbs_qp.cpp [ 86%] Building CXX object src/CMakeFiles/nccl.dir/transport/gdaki/doca-gpunetio/src/doca_verbs_cq.cpp.o cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src && /usr/bin/g++ -DCUDA_MAJOR=12 -DCUDA_MINOR=0 -DDOCA_VERBS_USE_CUDA_WRAPPER -DDOCA_VERBS_USE_NET_WRAPPER -DNCCL_GIN_PROXY_ENABLE=1 -DNCCL_MAJOR=2 -DNCCL_MINOR=28 -DNCCL_PATCH=9 -DNCCL_USE_CMAKE -DNCCL_VERSION_CODE=22809 -DPROFAPI -Dnccl_EXPORTS -I/builddir/build/BUILD/nccl-2.28.9-1/src/device -I/builddir/build/BUILD/nccl-2.28.9-1/src/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/include/plugin -I/usr/local/cuda/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/transport/gdaki/doca-gpunetio/include -I/usr/local/cuda/include/cccl -I/builddir/build/BUILD/nccl-2.28.9-1/nccl_build/include -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -O3 -fPIC -fmacro-prefix-map=/builddir/build/BUILD/nccl-2.28.9-1/src/= -MD -MT src/CMakeFiles/nccl.dir/transport/gdaki/doca-gpunetio/src/doca_verbs_cq.cpp.o -MF CMakeFiles/nccl.dir/transport/gdaki/doca-gpunetio/src/doca_verbs_cq.cpp.o.d -o CMakeFiles/nccl.dir/transport/gdaki/doca-gpunetio/src/doca_verbs_cq.cpp.o -c /builddir/build/BUILD/nccl-2.28.9-1/src/transport/gdaki/doca-gpunetio/src/doca_verbs_cq.cpp [ 86%] Building CXX object src/CMakeFiles/nccl.dir/transport/gdaki/doca-gpunetio/src/doca_verbs_device_attr.cpp.o cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src && /usr/bin/g++ -DCUDA_MAJOR=12 -DCUDA_MINOR=0 -DDOCA_VERBS_USE_CUDA_WRAPPER -DDOCA_VERBS_USE_NET_WRAPPER -DNCCL_GIN_PROXY_ENABLE=1 -DNCCL_MAJOR=2 -DNCCL_MINOR=28 -DNCCL_PATCH=9 -DNCCL_USE_CMAKE -DNCCL_VERSION_CODE=22809 -DPROFAPI -Dnccl_EXPORTS -I/builddir/build/BUILD/nccl-2.28.9-1/src/device -I/builddir/build/BUILD/nccl-2.28.9-1/src/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/include/plugin -I/usr/local/cuda/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/transport/gdaki/doca-gpunetio/include -I/usr/local/cuda/include/cccl -I/builddir/build/BUILD/nccl-2.28.9-1/nccl_build/include -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -O3 -fPIC -fmacro-prefix-map=/builddir/build/BUILD/nccl-2.28.9-1/src/= -MD -MT src/CMakeFiles/nccl.dir/transport/gdaki/doca-gpunetio/src/doca_verbs_device_attr.cpp.o -MF CMakeFiles/nccl.dir/transport/gdaki/doca-gpunetio/src/doca_verbs_device_attr.cpp.o.d -o CMakeFiles/nccl.dir/transport/gdaki/doca-gpunetio/src/doca_verbs_device_attr.cpp.o -c /builddir/build/BUILD/nccl-2.28.9-1/src/transport/gdaki/doca-gpunetio/src/doca_verbs_device_attr.cpp [ 87%] Building CXX object src/CMakeFiles/nccl.dir/transport/gdaki/doca-gpunetio/src/doca_verbs_umem.cpp.o cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src && /usr/bin/g++ -DCUDA_MAJOR=12 -DCUDA_MINOR=0 -DDOCA_VERBS_USE_CUDA_WRAPPER -DDOCA_VERBS_USE_NET_WRAPPER -DNCCL_GIN_PROXY_ENABLE=1 -DNCCL_MAJOR=2 -DNCCL_MINOR=28 -DNCCL_PATCH=9 -DNCCL_USE_CMAKE -DNCCL_VERSION_CODE=22809 -DPROFAPI -Dnccl_EXPORTS -I/builddir/build/BUILD/nccl-2.28.9-1/src/device -I/builddir/build/BUILD/nccl-2.28.9-1/src/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/include/plugin -I/usr/local/cuda/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/transport/gdaki/doca-gpunetio/include -I/usr/local/cuda/include/cccl -I/builddir/build/BUILD/nccl-2.28.9-1/nccl_build/include -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -O3 -fPIC -fmacro-prefix-map=/builddir/build/BUILD/nccl-2.28.9-1/src/= -MD -MT src/CMakeFiles/nccl.dir/transport/gdaki/doca-gpunetio/src/doca_verbs_umem.cpp.o -MF CMakeFiles/nccl.dir/transport/gdaki/doca-gpunetio/src/doca_verbs_umem.cpp.o.d -o CMakeFiles/nccl.dir/transport/gdaki/doca-gpunetio/src/doca_verbs_umem.cpp.o -c /builddir/build/BUILD/nccl-2.28.9-1/src/transport/gdaki/doca-gpunetio/src/doca_verbs_umem.cpp [ 87%] Building CXX object src/CMakeFiles/nccl_static.dir/plugin/profiler/profiler_v3.cc.o cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src && /usr/bin/g++ -DCUDA_MAJOR=12 -DCUDA_MINOR=0 -DDOCA_VERBS_USE_CUDA_WRAPPER -DDOCA_VERBS_USE_NET_WRAPPER -DNCCL_GIN_PROXY_ENABLE=1 -DNCCL_MAJOR=2 -DNCCL_MINOR=28 -DNCCL_PATCH=9 -DNCCL_USE_CMAKE -DNCCL_VERSION_CODE=22809 -DPROFAPI -I/builddir/build/BUILD/nccl-2.28.9-1/src/device -I/builddir/build/BUILD/nccl-2.28.9-1/src/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/include/plugin -I/usr/local/cuda/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/transport/gdaki/doca-gpunetio/include -I/usr/local/cuda/include/cccl -I/builddir/build/BUILD/nccl-2.28.9-1/nccl_build/include -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -O3 -fPIC -fmacro-prefix-map=/builddir/build/BUILD/nccl-2.28.9-1/src/= -MD -MT src/CMakeFiles/nccl_static.dir/plugin/profiler/profiler_v3.cc.o -MF CMakeFiles/nccl_static.dir/plugin/profiler/profiler_v3.cc.o.d -o CMakeFiles/nccl_static.dir/plugin/profiler/profiler_v3.cc.o -c /builddir/build/BUILD/nccl-2.28.9-1/src/plugin/profiler/profiler_v3.cc [ 87%] Building CXX object src/CMakeFiles/nccl_static.dir/plugin/profiler/profiler_v4.cc.o cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src && /usr/bin/g++ -DCUDA_MAJOR=12 -DCUDA_MINOR=0 -DDOCA_VERBS_USE_CUDA_WRAPPER -DDOCA_VERBS_USE_NET_WRAPPER -DNCCL_GIN_PROXY_ENABLE=1 -DNCCL_MAJOR=2 -DNCCL_MINOR=28 -DNCCL_PATCH=9 -DNCCL_USE_CMAKE -DNCCL_VERSION_CODE=22809 -DPROFAPI -I/builddir/build/BUILD/nccl-2.28.9-1/src/device -I/builddir/build/BUILD/nccl-2.28.9-1/src/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/include/plugin -I/usr/local/cuda/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/transport/gdaki/doca-gpunetio/include -I/usr/local/cuda/include/cccl -I/builddir/build/BUILD/nccl-2.28.9-1/nccl_build/include -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -O3 -fPIC -fmacro-prefix-map=/builddir/build/BUILD/nccl-2.28.9-1/src/= -MD -MT src/CMakeFiles/nccl_static.dir/plugin/profiler/profiler_v4.cc.o -MF CMakeFiles/nccl_static.dir/plugin/profiler/profiler_v4.cc.o.d -o CMakeFiles/nccl_static.dir/plugin/profiler/profiler_v4.cc.o -c /builddir/build/BUILD/nccl-2.28.9-1/src/plugin/profiler/profiler_v4.cc [ 87%] Building CXX object src/CMakeFiles/nccl.dir/transport/gdaki/doca-gpunetio/src/doca_verbs_srq.cpp.o cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src && /usr/bin/g++ -DCUDA_MAJOR=12 -DCUDA_MINOR=0 -DDOCA_VERBS_USE_CUDA_WRAPPER -DDOCA_VERBS_USE_NET_WRAPPER -DNCCL_GIN_PROXY_ENABLE=1 -DNCCL_MAJOR=2 -DNCCL_MINOR=28 -DNCCL_PATCH=9 -DNCCL_USE_CMAKE -DNCCL_VERSION_CODE=22809 -DPROFAPI -Dnccl_EXPORTS -I/builddir/build/BUILD/nccl-2.28.9-1/src/device -I/builddir/build/BUILD/nccl-2.28.9-1/src/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/include/plugin -I/usr/local/cuda/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/transport/gdaki/doca-gpunetio/include -I/usr/local/cuda/include/cccl -I/builddir/build/BUILD/nccl-2.28.9-1/nccl_build/include -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -O3 -fPIC -fmacro-prefix-map=/builddir/build/BUILD/nccl-2.28.9-1/src/= -MD -MT src/CMakeFiles/nccl.dir/transport/gdaki/doca-gpunetio/src/doca_verbs_srq.cpp.o -MF CMakeFiles/nccl.dir/transport/gdaki/doca-gpunetio/src/doca_verbs_srq.cpp.o.d -o CMakeFiles/nccl.dir/transport/gdaki/doca-gpunetio/src/doca_verbs_srq.cpp.o -c /builddir/build/BUILD/nccl-2.28.9-1/src/transport/gdaki/doca-gpunetio/src/doca_verbs_srq.cpp [ 87%] Building CXX object src/CMakeFiles/nccl.dir/transport/gdaki/doca-gpunetio/src/doca_verbs_uar.cpp.o cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src && /usr/bin/g++ -DCUDA_MAJOR=12 -DCUDA_MINOR=0 -DDOCA_VERBS_USE_CUDA_WRAPPER -DDOCA_VERBS_USE_NET_WRAPPER -DNCCL_GIN_PROXY_ENABLE=1 -DNCCL_MAJOR=2 -DNCCL_MINOR=28 -DNCCL_PATCH=9 -DNCCL_USE_CMAKE -DNCCL_VERSION_CODE=22809 -DPROFAPI -Dnccl_EXPORTS -I/builddir/build/BUILD/nccl-2.28.9-1/src/device -I/builddir/build/BUILD/nccl-2.28.9-1/src/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/include/plugin -I/usr/local/cuda/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/transport/gdaki/doca-gpunetio/include -I/usr/local/cuda/include/cccl -I/builddir/build/BUILD/nccl-2.28.9-1/nccl_build/include -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -O3 -fPIC -fmacro-prefix-map=/builddir/build/BUILD/nccl-2.28.9-1/src/= -MD -MT src/CMakeFiles/nccl.dir/transport/gdaki/doca-gpunetio/src/doca_verbs_uar.cpp.o -MF CMakeFiles/nccl.dir/transport/gdaki/doca-gpunetio/src/doca_verbs_uar.cpp.o.d -o CMakeFiles/nccl.dir/transport/gdaki/doca-gpunetio/src/doca_verbs_uar.cpp.o -c /builddir/build/BUILD/nccl-2.28.9-1/src/transport/gdaki/doca-gpunetio/src/doca_verbs_uar.cpp [ 87%] Building CXX object src/CMakeFiles/nccl_static.dir/plugin/profiler/profiler_v1.cc.o cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src && /usr/bin/g++ -DCUDA_MAJOR=12 -DCUDA_MINOR=0 -DDOCA_VERBS_USE_CUDA_WRAPPER -DDOCA_VERBS_USE_NET_WRAPPER -DNCCL_GIN_PROXY_ENABLE=1 -DNCCL_MAJOR=2 -DNCCL_MINOR=28 -DNCCL_PATCH=9 -DNCCL_USE_CMAKE -DNCCL_VERSION_CODE=22809 -DPROFAPI -I/builddir/build/BUILD/nccl-2.28.9-1/src/device -I/builddir/build/BUILD/nccl-2.28.9-1/src/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/include/plugin -I/usr/local/cuda/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/transport/gdaki/doca-gpunetio/include -I/usr/local/cuda/include/cccl -I/builddir/build/BUILD/nccl-2.28.9-1/nccl_build/include -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -O3 -fPIC -fmacro-prefix-map=/builddir/build/BUILD/nccl-2.28.9-1/src/= -MD -MT src/CMakeFiles/nccl_static.dir/plugin/profiler/profiler_v1.cc.o -MF CMakeFiles/nccl_static.dir/plugin/profiler/profiler_v1.cc.o.d -o CMakeFiles/nccl_static.dir/plugin/profiler/profiler_v1.cc.o -c /builddir/build/BUILD/nccl-2.28.9-1/src/plugin/profiler/profiler_v1.cc [ 87%] Building CXX object src/CMakeFiles/nccl.dir/transport/gdaki/doca-gpunetio/src/doca_gpunetio.cpp.o cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src && /usr/bin/g++ -DCUDA_MAJOR=12 -DCUDA_MINOR=0 -DDOCA_VERBS_USE_CUDA_WRAPPER -DDOCA_VERBS_USE_NET_WRAPPER -DNCCL_GIN_PROXY_ENABLE=1 -DNCCL_MAJOR=2 -DNCCL_MINOR=28 -DNCCL_PATCH=9 -DNCCL_USE_CMAKE -DNCCL_VERSION_CODE=22809 -DPROFAPI -Dnccl_EXPORTS -I/builddir/build/BUILD/nccl-2.28.9-1/src/device -I/builddir/build/BUILD/nccl-2.28.9-1/src/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/include/plugin -I/usr/local/cuda/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/transport/gdaki/doca-gpunetio/include -I/usr/local/cuda/include/cccl -I/builddir/build/BUILD/nccl-2.28.9-1/nccl_build/include -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -O3 -fPIC -fmacro-prefix-map=/builddir/build/BUILD/nccl-2.28.9-1/src/= -MD -MT src/CMakeFiles/nccl.dir/transport/gdaki/doca-gpunetio/src/doca_gpunetio.cpp.o -MF CMakeFiles/nccl.dir/transport/gdaki/doca-gpunetio/src/doca_gpunetio.cpp.o.d -o CMakeFiles/nccl.dir/transport/gdaki/doca-gpunetio/src/doca_gpunetio.cpp.o -c /builddir/build/BUILD/nccl-2.28.9-1/src/transport/gdaki/doca-gpunetio/src/doca_gpunetio.cpp [ 88%] Building CXX object src/CMakeFiles/nccl_static.dir/plugin/profiler/profiler_v2.cc.o cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src && /usr/bin/g++ -DCUDA_MAJOR=12 -DCUDA_MINOR=0 -DDOCA_VERBS_USE_CUDA_WRAPPER -DDOCA_VERBS_USE_NET_WRAPPER -DNCCL_GIN_PROXY_ENABLE=1 -DNCCL_MAJOR=2 -DNCCL_MINOR=28 -DNCCL_PATCH=9 -DNCCL_USE_CMAKE -DNCCL_VERSION_CODE=22809 -DPROFAPI -I/builddir/build/BUILD/nccl-2.28.9-1/src/device -I/builddir/build/BUILD/nccl-2.28.9-1/src/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/include/plugin -I/usr/local/cuda/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/transport/gdaki/doca-gpunetio/include -I/usr/local/cuda/include/cccl -I/builddir/build/BUILD/nccl-2.28.9-1/nccl_build/include -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -O3 -fPIC -fmacro-prefix-map=/builddir/build/BUILD/nccl-2.28.9-1/src/= -MD -MT src/CMakeFiles/nccl_static.dir/plugin/profiler/profiler_v2.cc.o -MF CMakeFiles/nccl_static.dir/plugin/profiler/profiler_v2.cc.o.d -o CMakeFiles/nccl_static.dir/plugin/profiler/profiler_v2.cc.o -c /builddir/build/BUILD/nccl-2.28.9-1/src/plugin/profiler/profiler_v2.cc [ 89%] Building CXX object src/CMakeFiles/nccl.dir/transport/gdaki/doca-gpunetio/src/doca_gpunetio_log.cpp.o cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src && /usr/bin/g++ -DCUDA_MAJOR=12 -DCUDA_MINOR=0 -DDOCA_VERBS_USE_CUDA_WRAPPER -DDOCA_VERBS_USE_NET_WRAPPER -DNCCL_GIN_PROXY_ENABLE=1 -DNCCL_MAJOR=2 -DNCCL_MINOR=28 -DNCCL_PATCH=9 -DNCCL_USE_CMAKE -DNCCL_VERSION_CODE=22809 -DPROFAPI -Dnccl_EXPORTS -I/builddir/build/BUILD/nccl-2.28.9-1/src/device -I/builddir/build/BUILD/nccl-2.28.9-1/src/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/include/plugin -I/usr/local/cuda/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/transport/gdaki/doca-gpunetio/include -I/usr/local/cuda/include/cccl -I/builddir/build/BUILD/nccl-2.28.9-1/nccl_build/include -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -O3 -fPIC -fmacro-prefix-map=/builddir/build/BUILD/nccl-2.28.9-1/src/= -MD -MT src/CMakeFiles/nccl.dir/transport/gdaki/doca-gpunetio/src/doca_gpunetio_log.cpp.o -MF CMakeFiles/nccl.dir/transport/gdaki/doca-gpunetio/src/doca_gpunetio_log.cpp.o.d -o CMakeFiles/nccl.dir/transport/gdaki/doca-gpunetio/src/doca_gpunetio_log.cpp.o -c /builddir/build/BUILD/nccl-2.28.9-1/src/transport/gdaki/doca-gpunetio/src/doca_gpunetio_log.cpp [ 89%] Building CXX object src/CMakeFiles/nccl.dir/transport/gdaki/doca-gpunetio/src/doca_gpunetio_high_level.cpp.o cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src && /usr/bin/g++ -DCUDA_MAJOR=12 -DCUDA_MINOR=0 -DDOCA_VERBS_USE_CUDA_WRAPPER -DDOCA_VERBS_USE_NET_WRAPPER -DNCCL_GIN_PROXY_ENABLE=1 -DNCCL_MAJOR=2 -DNCCL_MINOR=28 -DNCCL_PATCH=9 -DNCCL_USE_CMAKE -DNCCL_VERSION_CODE=22809 -DPROFAPI -Dnccl_EXPORTS -I/builddir/build/BUILD/nccl-2.28.9-1/src/device -I/builddir/build/BUILD/nccl-2.28.9-1/src/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/include/plugin -I/usr/local/cuda/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/transport/gdaki/doca-gpunetio/include -I/usr/local/cuda/include/cccl -I/builddir/build/BUILD/nccl-2.28.9-1/nccl_build/include -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -O3 -fPIC -fmacro-prefix-map=/builddir/build/BUILD/nccl-2.28.9-1/src/= -MD -MT src/CMakeFiles/nccl.dir/transport/gdaki/doca-gpunetio/src/doca_gpunetio_high_level.cpp.o -MF CMakeFiles/nccl.dir/transport/gdaki/doca-gpunetio/src/doca_gpunetio_high_level.cpp.o.d -o CMakeFiles/nccl.dir/transport/gdaki/doca-gpunetio/src/doca_gpunetio_high_level.cpp.o -c /builddir/build/BUILD/nccl-2.28.9-1/src/transport/gdaki/doca-gpunetio/src/doca_gpunetio_high_level.cpp [ 89%] Building CXX object src/CMakeFiles/nccl_static.dir/plugin/profiler/profiler_v5.cc.o cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src && /usr/bin/g++ -DCUDA_MAJOR=12 -DCUDA_MINOR=0 -DDOCA_VERBS_USE_CUDA_WRAPPER -DDOCA_VERBS_USE_NET_WRAPPER -DNCCL_GIN_PROXY_ENABLE=1 -DNCCL_MAJOR=2 -DNCCL_MINOR=28 -DNCCL_PATCH=9 -DNCCL_USE_CMAKE -DNCCL_VERSION_CODE=22809 -DPROFAPI -I/builddir/build/BUILD/nccl-2.28.9-1/src/device -I/builddir/build/BUILD/nccl-2.28.9-1/src/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/include/plugin -I/usr/local/cuda/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/transport/gdaki/doca-gpunetio/include -I/usr/local/cuda/include/cccl -I/builddir/build/BUILD/nccl-2.28.9-1/nccl_build/include -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -O3 -fPIC -fmacro-prefix-map=/builddir/build/BUILD/nccl-2.28.9-1/src/= -MD -MT src/CMakeFiles/nccl_static.dir/plugin/profiler/profiler_v5.cc.o -MF CMakeFiles/nccl_static.dir/plugin/profiler/profiler_v5.cc.o.d -o CMakeFiles/nccl_static.dir/plugin/profiler/profiler_v5.cc.o -c /builddir/build/BUILD/nccl-2.28.9-1/src/plugin/profiler/profiler_v5.cc [ 89%] Building CXX object src/CMakeFiles/nccl_static.dir/plugin/tuner/tuner_v2.cc.o cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src && /usr/bin/g++ -DCUDA_MAJOR=12 -DCUDA_MINOR=0 -DDOCA_VERBS_USE_CUDA_WRAPPER -DDOCA_VERBS_USE_NET_WRAPPER -DNCCL_GIN_PROXY_ENABLE=1 -DNCCL_MAJOR=2 -DNCCL_MINOR=28 -DNCCL_PATCH=9 -DNCCL_USE_CMAKE -DNCCL_VERSION_CODE=22809 -DPROFAPI -I/builddir/build/BUILD/nccl-2.28.9-1/src/device -I/builddir/build/BUILD/nccl-2.28.9-1/src/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/include/plugin -I/usr/local/cuda/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/transport/gdaki/doca-gpunetio/include -I/usr/local/cuda/include/cccl -I/builddir/build/BUILD/nccl-2.28.9-1/nccl_build/include -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -O3 -fPIC -fmacro-prefix-map=/builddir/build/BUILD/nccl-2.28.9-1/src/= -MD -MT src/CMakeFiles/nccl_static.dir/plugin/tuner/tuner_v2.cc.o -MF CMakeFiles/nccl_static.dir/plugin/tuner/tuner_v2.cc.o.d -o CMakeFiles/nccl_static.dir/plugin/tuner/tuner_v2.cc.o -c /builddir/build/BUILD/nccl-2.28.9-1/src/plugin/tuner/tuner_v2.cc [ 89%] Building CXX object src/CMakeFiles/nccl.dir/transport/gdaki/doca-gpunetio/src/doca_verbs_cuda_wrapper.cpp.o cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src && /usr/bin/g++ -DCUDA_MAJOR=12 -DCUDA_MINOR=0 -DDOCA_VERBS_USE_CUDA_WRAPPER -DDOCA_VERBS_USE_NET_WRAPPER -DNCCL_GIN_PROXY_ENABLE=1 -DNCCL_MAJOR=2 -DNCCL_MINOR=28 -DNCCL_PATCH=9 -DNCCL_USE_CMAKE -DNCCL_VERSION_CODE=22809 -DPROFAPI -Dnccl_EXPORTS -I/builddir/build/BUILD/nccl-2.28.9-1/src/device -I/builddir/build/BUILD/nccl-2.28.9-1/src/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/include/plugin -I/usr/local/cuda/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/transport/gdaki/doca-gpunetio/include -I/usr/local/cuda/include/cccl -I/builddir/build/BUILD/nccl-2.28.9-1/nccl_build/include -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -O3 -fPIC -fmacro-prefix-map=/builddir/build/BUILD/nccl-2.28.9-1/src/= -MD -MT src/CMakeFiles/nccl.dir/transport/gdaki/doca-gpunetio/src/doca_verbs_cuda_wrapper.cpp.o -MF CMakeFiles/nccl.dir/transport/gdaki/doca-gpunetio/src/doca_verbs_cuda_wrapper.cpp.o.d -o CMakeFiles/nccl.dir/transport/gdaki/doca-gpunetio/src/doca_verbs_cuda_wrapper.cpp.o -c /builddir/build/BUILD/nccl-2.28.9-1/src/transport/gdaki/doca-gpunetio/src/doca_verbs_cuda_wrapper.cpp [ 89%] Building CXX object src/CMakeFiles/nccl.dir/transport/gdaki/doca-gpunetio/src/doca_verbs_mlx5dv_wrapper.cpp.o cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src && /usr/bin/g++ -DCUDA_MAJOR=12 -DCUDA_MINOR=0 -DDOCA_VERBS_USE_CUDA_WRAPPER -DDOCA_VERBS_USE_NET_WRAPPER -DNCCL_GIN_PROXY_ENABLE=1 -DNCCL_MAJOR=2 -DNCCL_MINOR=28 -DNCCL_PATCH=9 -DNCCL_USE_CMAKE -DNCCL_VERSION_CODE=22809 -DPROFAPI -Dnccl_EXPORTS -I/builddir/build/BUILD/nccl-2.28.9-1/src/device -I/builddir/build/BUILD/nccl-2.28.9-1/src/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/include/plugin -I/usr/local/cuda/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/transport/gdaki/doca-gpunetio/include -I/usr/local/cuda/include/cccl -I/builddir/build/BUILD/nccl-2.28.9-1/nccl_build/include -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -O3 -fPIC -fmacro-prefix-map=/builddir/build/BUILD/nccl-2.28.9-1/src/= -MD -MT src/CMakeFiles/nccl.dir/transport/gdaki/doca-gpunetio/src/doca_verbs_mlx5dv_wrapper.cpp.o -MF CMakeFiles/nccl.dir/transport/gdaki/doca-gpunetio/src/doca_verbs_mlx5dv_wrapper.cpp.o.d -o CMakeFiles/nccl.dir/transport/gdaki/doca-gpunetio/src/doca_verbs_mlx5dv_wrapper.cpp.o -c /builddir/build/BUILD/nccl-2.28.9-1/src/transport/gdaki/doca-gpunetio/src/doca_verbs_mlx5dv_wrapper.cpp [ 90%] Building CXX object src/CMakeFiles/nccl.dir/transport/gdaki/doca-gpunetio/src/doca_verbs_ibv_wrapper.cpp.o cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src && /usr/bin/g++ -DCUDA_MAJOR=12 -DCUDA_MINOR=0 -DDOCA_VERBS_USE_CUDA_WRAPPER -DDOCA_VERBS_USE_NET_WRAPPER -DNCCL_GIN_PROXY_ENABLE=1 -DNCCL_MAJOR=2 -DNCCL_MINOR=28 -DNCCL_PATCH=9 -DNCCL_USE_CMAKE -DNCCL_VERSION_CODE=22809 -DPROFAPI -Dnccl_EXPORTS -I/builddir/build/BUILD/nccl-2.28.9-1/src/device -I/builddir/build/BUILD/nccl-2.28.9-1/src/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/include/plugin -I/usr/local/cuda/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/transport/gdaki/doca-gpunetio/include -I/usr/local/cuda/include/cccl -I/builddir/build/BUILD/nccl-2.28.9-1/nccl_build/include -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -O3 -fPIC -fmacro-prefix-map=/builddir/build/BUILD/nccl-2.28.9-1/src/= -MD -MT src/CMakeFiles/nccl.dir/transport/gdaki/doca-gpunetio/src/doca_verbs_ibv_wrapper.cpp.o -MF CMakeFiles/nccl.dir/transport/gdaki/doca-gpunetio/src/doca_verbs_ibv_wrapper.cpp.o.d -o CMakeFiles/nccl.dir/transport/gdaki/doca-gpunetio/src/doca_verbs_ibv_wrapper.cpp.o -c /builddir/build/BUILD/nccl-2.28.9-1/src/transport/gdaki/doca-gpunetio/src/doca_verbs_ibv_wrapper.cpp [ 91%] Building CXX object src/CMakeFiles/nccl_static.dir/plugin/tuner/tuner_v3.cc.o cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src && /usr/bin/g++ -DCUDA_MAJOR=12 -DCUDA_MINOR=0 -DDOCA_VERBS_USE_CUDA_WRAPPER -DDOCA_VERBS_USE_NET_WRAPPER -DNCCL_GIN_PROXY_ENABLE=1 -DNCCL_MAJOR=2 -DNCCL_MINOR=28 -DNCCL_PATCH=9 -DNCCL_USE_CMAKE -DNCCL_VERSION_CODE=22809 -DPROFAPI -I/builddir/build/BUILD/nccl-2.28.9-1/src/device -I/builddir/build/BUILD/nccl-2.28.9-1/src/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/include/plugin -I/usr/local/cuda/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/transport/gdaki/doca-gpunetio/include -I/usr/local/cuda/include/cccl -I/builddir/build/BUILD/nccl-2.28.9-1/nccl_build/include -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -O3 -fPIC -fmacro-prefix-map=/builddir/build/BUILD/nccl-2.28.9-1/src/= -MD -MT src/CMakeFiles/nccl_static.dir/plugin/tuner/tuner_v3.cc.o -MF CMakeFiles/nccl_static.dir/plugin/tuner/tuner_v3.cc.o.d -o CMakeFiles/nccl_static.dir/plugin/tuner/tuner_v3.cc.o -c /builddir/build/BUILD/nccl-2.28.9-1/src/plugin/tuner/tuner_v3.cc [ 91%] Building CXX object src/CMakeFiles/nccl_static.dir/plugin/tuner/tuner_v4.cc.o cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src && /usr/bin/g++ -DCUDA_MAJOR=12 -DCUDA_MINOR=0 -DDOCA_VERBS_USE_CUDA_WRAPPER -DDOCA_VERBS_USE_NET_WRAPPER -DNCCL_GIN_PROXY_ENABLE=1 -DNCCL_MAJOR=2 -DNCCL_MINOR=28 -DNCCL_PATCH=9 -DNCCL_USE_CMAKE -DNCCL_VERSION_CODE=22809 -DPROFAPI -I/builddir/build/BUILD/nccl-2.28.9-1/src/device -I/builddir/build/BUILD/nccl-2.28.9-1/src/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/include/plugin -I/usr/local/cuda/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/transport/gdaki/doca-gpunetio/include -I/usr/local/cuda/include/cccl -I/builddir/build/BUILD/nccl-2.28.9-1/nccl_build/include -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -O3 -fPIC -fmacro-prefix-map=/builddir/build/BUILD/nccl-2.28.9-1/src/= -MD -MT src/CMakeFiles/nccl_static.dir/plugin/tuner/tuner_v4.cc.o -MF CMakeFiles/nccl_static.dir/plugin/tuner/tuner_v4.cc.o.d -o CMakeFiles/nccl_static.dir/plugin/tuner/tuner_v4.cc.o -c /builddir/build/BUILD/nccl-2.28.9-1/src/plugin/tuner/tuner_v4.cc [ 91%] Building CXX object src/CMakeFiles/nccl_static.dir/plugin/tuner/tuner_v5.cc.o cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src && /usr/bin/g++ -DCUDA_MAJOR=12 -DCUDA_MINOR=0 -DDOCA_VERBS_USE_CUDA_WRAPPER -DDOCA_VERBS_USE_NET_WRAPPER -DNCCL_GIN_PROXY_ENABLE=1 -DNCCL_MAJOR=2 -DNCCL_MINOR=28 -DNCCL_PATCH=9 -DNCCL_USE_CMAKE -DNCCL_VERSION_CODE=22809 -DPROFAPI -I/builddir/build/BUILD/nccl-2.28.9-1/src/device -I/builddir/build/BUILD/nccl-2.28.9-1/src/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/include/plugin -I/usr/local/cuda/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/transport/gdaki/doca-gpunetio/include -I/usr/local/cuda/include/cccl -I/builddir/build/BUILD/nccl-2.28.9-1/nccl_build/include -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -O3 -fPIC -fmacro-prefix-map=/builddir/build/BUILD/nccl-2.28.9-1/src/= -MD -MT src/CMakeFiles/nccl_static.dir/plugin/tuner/tuner_v5.cc.o -MF CMakeFiles/nccl_static.dir/plugin/tuner/tuner_v5.cc.o.d -o CMakeFiles/nccl_static.dir/plugin/tuner/tuner_v5.cc.o -c /builddir/build/BUILD/nccl-2.28.9-1/src/plugin/tuner/tuner_v5.cc [ 91%] Building CXX object src/CMakeFiles/nccl.dir/transport/gdaki/doca-gpunetio/src/doca_gpunetio_gdrcopy.cpp.o cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src && /usr/bin/g++ -DCUDA_MAJOR=12 -DCUDA_MINOR=0 -DDOCA_VERBS_USE_CUDA_WRAPPER -DDOCA_VERBS_USE_NET_WRAPPER -DNCCL_GIN_PROXY_ENABLE=1 -DNCCL_MAJOR=2 -DNCCL_MINOR=28 -DNCCL_PATCH=9 -DNCCL_USE_CMAKE -DNCCL_VERSION_CODE=22809 -DPROFAPI -Dnccl_EXPORTS -I/builddir/build/BUILD/nccl-2.28.9-1/src/device -I/builddir/build/BUILD/nccl-2.28.9-1/src/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/include/plugin -I/usr/local/cuda/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/transport/gdaki/doca-gpunetio/include -I/usr/local/cuda/include/cccl -I/builddir/build/BUILD/nccl-2.28.9-1/nccl_build/include -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -O3 -fPIC -fmacro-prefix-map=/builddir/build/BUILD/nccl-2.28.9-1/src/= -MD -MT src/CMakeFiles/nccl.dir/transport/gdaki/doca-gpunetio/src/doca_gpunetio_gdrcopy.cpp.o -MF CMakeFiles/nccl.dir/transport/gdaki/doca-gpunetio/src/doca_gpunetio_gdrcopy.cpp.o.d -o CMakeFiles/nccl.dir/transport/gdaki/doca-gpunetio/src/doca_gpunetio_gdrcopy.cpp.o -c /builddir/build/BUILD/nccl-2.28.9-1/src/transport/gdaki/doca-gpunetio/src/doca_gpunetio_gdrcopy.cpp [ 91%] Building CXX object src/CMakeFiles/nccl_static.dir/plugin/env/env_v1.cc.o cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src && /usr/bin/g++ -DCUDA_MAJOR=12 -DCUDA_MINOR=0 -DDOCA_VERBS_USE_CUDA_WRAPPER -DDOCA_VERBS_USE_NET_WRAPPER -DNCCL_GIN_PROXY_ENABLE=1 -DNCCL_MAJOR=2 -DNCCL_MINOR=28 -DNCCL_PATCH=9 -DNCCL_USE_CMAKE -DNCCL_VERSION_CODE=22809 -DPROFAPI -I/builddir/build/BUILD/nccl-2.28.9-1/src/device -I/builddir/build/BUILD/nccl-2.28.9-1/src/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/include/plugin -I/usr/local/cuda/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/transport/gdaki/doca-gpunetio/include -I/usr/local/cuda/include/cccl -I/builddir/build/BUILD/nccl-2.28.9-1/nccl_build/include -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -O3 -fPIC -fmacro-prefix-map=/builddir/build/BUILD/nccl-2.28.9-1/src/= -MD -MT src/CMakeFiles/nccl_static.dir/plugin/env/env_v1.cc.o -MF CMakeFiles/nccl_static.dir/plugin/env/env_v1.cc.o.d -o CMakeFiles/nccl_static.dir/plugin/env/env_v1.cc.o -c /builddir/build/BUILD/nccl-2.28.9-1/src/plugin/env/env_v1.cc [ 92%] Building CXX object src/CMakeFiles/nccl_static.dir/ras/collectives.cc.o cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src && /usr/bin/g++ -DCUDA_MAJOR=12 -DCUDA_MINOR=0 -DDOCA_VERBS_USE_CUDA_WRAPPER -DDOCA_VERBS_USE_NET_WRAPPER -DNCCL_GIN_PROXY_ENABLE=1 -DNCCL_MAJOR=2 -DNCCL_MINOR=28 -DNCCL_PATCH=9 -DNCCL_USE_CMAKE -DNCCL_VERSION_CODE=22809 -DPROFAPI -I/builddir/build/BUILD/nccl-2.28.9-1/src/device -I/builddir/build/BUILD/nccl-2.28.9-1/src/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/include/plugin -I/usr/local/cuda/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/transport/gdaki/doca-gpunetio/include -I/usr/local/cuda/include/cccl -I/builddir/build/BUILD/nccl-2.28.9-1/nccl_build/include -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -O3 -fPIC -fmacro-prefix-map=/builddir/build/BUILD/nccl-2.28.9-1/src/= -MD -MT src/CMakeFiles/nccl_static.dir/ras/collectives.cc.o -MF CMakeFiles/nccl_static.dir/ras/collectives.cc.o.d -o CMakeFiles/nccl_static.dir/ras/collectives.cc.o -c /builddir/build/BUILD/nccl-2.28.9-1/src/ras/collectives.cc [ 92%] Building CXX object src/CMakeFiles/nccl_static.dir/ras/rasnet.cc.o cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src && /usr/bin/g++ -DCUDA_MAJOR=12 -DCUDA_MINOR=0 -DDOCA_VERBS_USE_CUDA_WRAPPER -DDOCA_VERBS_USE_NET_WRAPPER -DNCCL_GIN_PROXY_ENABLE=1 -DNCCL_MAJOR=2 -DNCCL_MINOR=28 -DNCCL_PATCH=9 -DNCCL_USE_CMAKE -DNCCL_VERSION_CODE=22809 -DPROFAPI -I/builddir/build/BUILD/nccl-2.28.9-1/src/device -I/builddir/build/BUILD/nccl-2.28.9-1/src/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/include/plugin -I/usr/local/cuda/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/transport/gdaki/doca-gpunetio/include -I/usr/local/cuda/include/cccl -I/builddir/build/BUILD/nccl-2.28.9-1/nccl_build/include -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -O3 -fPIC -fmacro-prefix-map=/builddir/build/BUILD/nccl-2.28.9-1/src/= -MD -MT src/CMakeFiles/nccl_static.dir/ras/rasnet.cc.o -MF CMakeFiles/nccl_static.dir/ras/rasnet.cc.o.d -o CMakeFiles/nccl_static.dir/ras/rasnet.cc.o -c /builddir/build/BUILD/nccl-2.28.9-1/src/ras/rasnet.cc [ 92%] Building CXX object src/CMakeFiles/nccl_static.dir/ras/peers.cc.o cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src && /usr/bin/g++ -DCUDA_MAJOR=12 -DCUDA_MINOR=0 -DDOCA_VERBS_USE_CUDA_WRAPPER -DDOCA_VERBS_USE_NET_WRAPPER -DNCCL_GIN_PROXY_ENABLE=1 -DNCCL_MAJOR=2 -DNCCL_MINOR=28 -DNCCL_PATCH=9 -DNCCL_USE_CMAKE -DNCCL_VERSION_CODE=22809 -DPROFAPI -I/builddir/build/BUILD/nccl-2.28.9-1/src/device -I/builddir/build/BUILD/nccl-2.28.9-1/src/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/include/plugin -I/usr/local/cuda/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/transport/gdaki/doca-gpunetio/include -I/usr/local/cuda/include/cccl -I/builddir/build/BUILD/nccl-2.28.9-1/nccl_build/include -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -O3 -fPIC -fmacro-prefix-map=/builddir/build/BUILD/nccl-2.28.9-1/src/= -MD -MT src/CMakeFiles/nccl_static.dir/ras/peers.cc.o -MF CMakeFiles/nccl_static.dir/ras/peers.cc.o.d -o CMakeFiles/nccl_static.dir/ras/peers.cc.o -c /builddir/build/BUILD/nccl-2.28.9-1/src/ras/peers.cc [ 92%] Building CXX object src/CMakeFiles/nccl_static.dir/ras/ras.cc.o cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src && /usr/bin/g++ -DCUDA_MAJOR=12 -DCUDA_MINOR=0 -DDOCA_VERBS_USE_CUDA_WRAPPER -DDOCA_VERBS_USE_NET_WRAPPER -DNCCL_GIN_PROXY_ENABLE=1 -DNCCL_MAJOR=2 -DNCCL_MINOR=28 -DNCCL_PATCH=9 -DNCCL_USE_CMAKE -DNCCL_VERSION_CODE=22809 -DPROFAPI -I/builddir/build/BUILD/nccl-2.28.9-1/src/device -I/builddir/build/BUILD/nccl-2.28.9-1/src/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/include/plugin -I/usr/local/cuda/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/transport/gdaki/doca-gpunetio/include -I/usr/local/cuda/include/cccl -I/builddir/build/BUILD/nccl-2.28.9-1/nccl_build/include -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -O3 -fPIC -fmacro-prefix-map=/builddir/build/BUILD/nccl-2.28.9-1/src/= -MD -MT src/CMakeFiles/nccl_static.dir/ras/ras.cc.o -MF CMakeFiles/nccl_static.dir/ras/ras.cc.o.d -o CMakeFiles/nccl_static.dir/ras/ras.cc.o -c /builddir/build/BUILD/nccl-2.28.9-1/src/ras/ras.cc [ 93%] Building CXX object src/CMakeFiles/nccl_static.dir/ras/client_support.cc.o cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src && /usr/bin/g++ -DCUDA_MAJOR=12 -DCUDA_MINOR=0 -DDOCA_VERBS_USE_CUDA_WRAPPER -DDOCA_VERBS_USE_NET_WRAPPER -DNCCL_GIN_PROXY_ENABLE=1 -DNCCL_MAJOR=2 -DNCCL_MINOR=28 -DNCCL_PATCH=9 -DNCCL_USE_CMAKE -DNCCL_VERSION_CODE=22809 -DPROFAPI -I/builddir/build/BUILD/nccl-2.28.9-1/src/device -I/builddir/build/BUILD/nccl-2.28.9-1/src/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/include/plugin -I/usr/local/cuda/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/transport/gdaki/doca-gpunetio/include -I/usr/local/cuda/include/cccl -I/builddir/build/BUILD/nccl-2.28.9-1/nccl_build/include -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -O3 -fPIC -fmacro-prefix-map=/builddir/build/BUILD/nccl-2.28.9-1/src/= -MD -MT src/CMakeFiles/nccl_static.dir/ras/client_support.cc.o -MF CMakeFiles/nccl_static.dir/ras/client_support.cc.o.d -o CMakeFiles/nccl_static.dir/ras/client_support.cc.o -c /builddir/build/BUILD/nccl-2.28.9-1/src/ras/client_support.cc [ 93%] Building CXX object src/CMakeFiles/nccl_static.dir/nccl_device/core.cc.o cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src && /usr/bin/g++ -DCUDA_MAJOR=12 -DCUDA_MINOR=0 -DDOCA_VERBS_USE_CUDA_WRAPPER -DDOCA_VERBS_USE_NET_WRAPPER -DNCCL_GIN_PROXY_ENABLE=1 -DNCCL_MAJOR=2 -DNCCL_MINOR=28 -DNCCL_PATCH=9 -DNCCL_USE_CMAKE -DNCCL_VERSION_CODE=22809 -DPROFAPI -I/builddir/build/BUILD/nccl-2.28.9-1/src/device -I/builddir/build/BUILD/nccl-2.28.9-1/src/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/include/plugin -I/usr/local/cuda/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/transport/gdaki/doca-gpunetio/include -I/usr/local/cuda/include/cccl -I/builddir/build/BUILD/nccl-2.28.9-1/nccl_build/include -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -O3 -fPIC -fmacro-prefix-map=/builddir/build/BUILD/nccl-2.28.9-1/src/= -MD -MT src/CMakeFiles/nccl_static.dir/nccl_device/core.cc.o -MF CMakeFiles/nccl_static.dir/nccl_device/core.cc.o.d -o CMakeFiles/nccl_static.dir/nccl_device/core.cc.o -c /builddir/build/BUILD/nccl-2.28.9-1/src/nccl_device/core.cc [ 93%] Building CXX object src/CMakeFiles/nccl_static.dir/nccl_device/ll_a2a.cc.o cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src && /usr/bin/g++ -DCUDA_MAJOR=12 -DCUDA_MINOR=0 -DDOCA_VERBS_USE_CUDA_WRAPPER -DDOCA_VERBS_USE_NET_WRAPPER -DNCCL_GIN_PROXY_ENABLE=1 -DNCCL_MAJOR=2 -DNCCL_MINOR=28 -DNCCL_PATCH=9 -DNCCL_USE_CMAKE -DNCCL_VERSION_CODE=22809 -DPROFAPI -I/builddir/build/BUILD/nccl-2.28.9-1/src/device -I/builddir/build/BUILD/nccl-2.28.9-1/src/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/include/plugin -I/usr/local/cuda/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/transport/gdaki/doca-gpunetio/include -I/usr/local/cuda/include/cccl -I/builddir/build/BUILD/nccl-2.28.9-1/nccl_build/include -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -O3 -fPIC -fmacro-prefix-map=/builddir/build/BUILD/nccl-2.28.9-1/src/= -MD -MT src/CMakeFiles/nccl_static.dir/nccl_device/ll_a2a.cc.o -MF CMakeFiles/nccl_static.dir/nccl_device/ll_a2a.cc.o.d -o CMakeFiles/nccl_static.dir/nccl_device/ll_a2a.cc.o -c /builddir/build/BUILD/nccl-2.28.9-1/src/nccl_device/ll_a2a.cc [ 94%] Building CXX object src/CMakeFiles/nccl_static.dir/nccl_device/lsa_barrier.cc.o cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src && /usr/bin/g++ -DCUDA_MAJOR=12 -DCUDA_MINOR=0 -DDOCA_VERBS_USE_CUDA_WRAPPER -DDOCA_VERBS_USE_NET_WRAPPER -DNCCL_GIN_PROXY_ENABLE=1 -DNCCL_MAJOR=2 -DNCCL_MINOR=28 -DNCCL_PATCH=9 -DNCCL_USE_CMAKE -DNCCL_VERSION_CODE=22809 -DPROFAPI -I/builddir/build/BUILD/nccl-2.28.9-1/src/device -I/builddir/build/BUILD/nccl-2.28.9-1/src/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/include/plugin -I/usr/local/cuda/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/transport/gdaki/doca-gpunetio/include -I/usr/local/cuda/include/cccl -I/builddir/build/BUILD/nccl-2.28.9-1/nccl_build/include -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -O3 -fPIC -fmacro-prefix-map=/builddir/build/BUILD/nccl-2.28.9-1/src/= -MD -MT src/CMakeFiles/nccl_static.dir/nccl_device/lsa_barrier.cc.o -MF CMakeFiles/nccl_static.dir/nccl_device/lsa_barrier.cc.o.d -o CMakeFiles/nccl_static.dir/nccl_device/lsa_barrier.cc.o -c /builddir/build/BUILD/nccl-2.28.9-1/src/nccl_device/lsa_barrier.cc [ 94%] Building CXX object src/CMakeFiles/nccl_static.dir/scheduler/symmetric_sched.cc.o cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src && /usr/bin/g++ -DCUDA_MAJOR=12 -DCUDA_MINOR=0 -DDOCA_VERBS_USE_CUDA_WRAPPER -DDOCA_VERBS_USE_NET_WRAPPER -DNCCL_GIN_PROXY_ENABLE=1 -DNCCL_MAJOR=2 -DNCCL_MINOR=28 -DNCCL_PATCH=9 -DNCCL_USE_CMAKE -DNCCL_VERSION_CODE=22809 -DPROFAPI -I/builddir/build/BUILD/nccl-2.28.9-1/src/device -I/builddir/build/BUILD/nccl-2.28.9-1/src/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/include/plugin -I/usr/local/cuda/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/transport/gdaki/doca-gpunetio/include -I/usr/local/cuda/include/cccl -I/builddir/build/BUILD/nccl-2.28.9-1/nccl_build/include -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -O3 -fPIC -fmacro-prefix-map=/builddir/build/BUILD/nccl-2.28.9-1/src/= -MD -MT src/CMakeFiles/nccl_static.dir/scheduler/symmetric_sched.cc.o -MF CMakeFiles/nccl_static.dir/scheduler/symmetric_sched.cc.o.d -o CMakeFiles/nccl_static.dir/scheduler/symmetric_sched.cc.o -c /builddir/build/BUILD/nccl-2.28.9-1/src/scheduler/symmetric_sched.cc [ 94%] Building CXX object src/CMakeFiles/nccl_static.dir/nccl_device/gin_barrier.cc.o cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src && /usr/bin/g++ -DCUDA_MAJOR=12 -DCUDA_MINOR=0 -DDOCA_VERBS_USE_CUDA_WRAPPER -DDOCA_VERBS_USE_NET_WRAPPER -DNCCL_GIN_PROXY_ENABLE=1 -DNCCL_MAJOR=2 -DNCCL_MINOR=28 -DNCCL_PATCH=9 -DNCCL_USE_CMAKE -DNCCL_VERSION_CODE=22809 -DPROFAPI -I/builddir/build/BUILD/nccl-2.28.9-1/src/device -I/builddir/build/BUILD/nccl-2.28.9-1/src/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/include/plugin -I/usr/local/cuda/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/transport/gdaki/doca-gpunetio/include -I/usr/local/cuda/include/cccl -I/builddir/build/BUILD/nccl-2.28.9-1/nccl_build/include -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -O3 -fPIC -fmacro-prefix-map=/builddir/build/BUILD/nccl-2.28.9-1/src/= -MD -MT src/CMakeFiles/nccl_static.dir/nccl_device/gin_barrier.cc.o -MF CMakeFiles/nccl_static.dir/nccl_device/gin_barrier.cc.o.d -o CMakeFiles/nccl_static.dir/nccl_device/gin_barrier.cc.o -c /builddir/build/BUILD/nccl-2.28.9-1/src/nccl_device/gin_barrier.cc [ 94%] Building CXX object src/CMakeFiles/nccl_static.dir/gin/gin_host.cc.o cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src && /usr/bin/g++ -DCUDA_MAJOR=12 -DCUDA_MINOR=0 -DDOCA_VERBS_USE_CUDA_WRAPPER -DDOCA_VERBS_USE_NET_WRAPPER -DNCCL_GIN_PROXY_ENABLE=1 -DNCCL_MAJOR=2 -DNCCL_MINOR=28 -DNCCL_PATCH=9 -DNCCL_USE_CMAKE -DNCCL_VERSION_CODE=22809 -DPROFAPI -I/builddir/build/BUILD/nccl-2.28.9-1/src/device -I/builddir/build/BUILD/nccl-2.28.9-1/src/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/include/plugin -I/usr/local/cuda/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/transport/gdaki/doca-gpunetio/include -I/usr/local/cuda/include/cccl -I/builddir/build/BUILD/nccl-2.28.9-1/nccl_build/include -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -O3 -fPIC -fmacro-prefix-map=/builddir/build/BUILD/nccl-2.28.9-1/src/= -MD -MT src/CMakeFiles/nccl_static.dir/gin/gin_host.cc.o -MF CMakeFiles/nccl_static.dir/gin/gin_host.cc.o.d -o CMakeFiles/nccl_static.dir/gin/gin_host.cc.o -c /builddir/build/BUILD/nccl-2.28.9-1/src/gin/gin_host.cc [ 95%] Building CXX object src/CMakeFiles/nccl_static.dir/gin/gin_host_proxy.cc.o cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src && /usr/bin/g++ -DCUDA_MAJOR=12 -DCUDA_MINOR=0 -DDOCA_VERBS_USE_CUDA_WRAPPER -DDOCA_VERBS_USE_NET_WRAPPER -DNCCL_GIN_PROXY_ENABLE=1 -DNCCL_MAJOR=2 -DNCCL_MINOR=28 -DNCCL_PATCH=9 -DNCCL_USE_CMAKE -DNCCL_VERSION_CODE=22809 -DPROFAPI -I/builddir/build/BUILD/nccl-2.28.9-1/src/device -I/builddir/build/BUILD/nccl-2.28.9-1/src/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/include/plugin -I/usr/local/cuda/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/transport/gdaki/doca-gpunetio/include -I/usr/local/cuda/include/cccl -I/builddir/build/BUILD/nccl-2.28.9-1/nccl_build/include -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -O3 -fPIC -fmacro-prefix-map=/builddir/build/BUILD/nccl-2.28.9-1/src/= -MD -MT src/CMakeFiles/nccl_static.dir/gin/gin_host_proxy.cc.o -MF CMakeFiles/nccl_static.dir/gin/gin_host_proxy.cc.o.d -o CMakeFiles/nccl_static.dir/gin/gin_host_proxy.cc.o -c /builddir/build/BUILD/nccl-2.28.9-1/src/gin/gin_host_proxy.cc [ 95%] Building CXX object src/CMakeFiles/nccl_static.dir/transport/gdaki/doca-gpunetio/src/doca_verbs_qp.cpp.o cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src && /usr/bin/g++ -DCUDA_MAJOR=12 -DCUDA_MINOR=0 -DDOCA_VERBS_USE_CUDA_WRAPPER -DDOCA_VERBS_USE_NET_WRAPPER -DNCCL_GIN_PROXY_ENABLE=1 -DNCCL_MAJOR=2 -DNCCL_MINOR=28 -DNCCL_PATCH=9 -DNCCL_USE_CMAKE -DNCCL_VERSION_CODE=22809 -DPROFAPI -I/builddir/build/BUILD/nccl-2.28.9-1/src/device -I/builddir/build/BUILD/nccl-2.28.9-1/src/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/include/plugin -I/usr/local/cuda/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/transport/gdaki/doca-gpunetio/include -I/usr/local/cuda/include/cccl -I/builddir/build/BUILD/nccl-2.28.9-1/nccl_build/include -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -O3 -fPIC -fmacro-prefix-map=/builddir/build/BUILD/nccl-2.28.9-1/src/= -MD -MT src/CMakeFiles/nccl_static.dir/transport/gdaki/doca-gpunetio/src/doca_verbs_qp.cpp.o -MF CMakeFiles/nccl_static.dir/transport/gdaki/doca-gpunetio/src/doca_verbs_qp.cpp.o.d -o CMakeFiles/nccl_static.dir/transport/gdaki/doca-gpunetio/src/doca_verbs_qp.cpp.o -c /builddir/build/BUILD/nccl-2.28.9-1/src/transport/gdaki/doca-gpunetio/src/doca_verbs_qp.cpp [ 95%] Building CXX object src/CMakeFiles/nccl_static.dir/transport/gdaki/doca-gpunetio/src/doca_verbs_cq.cpp.o cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src && /usr/bin/g++ -DCUDA_MAJOR=12 -DCUDA_MINOR=0 -DDOCA_VERBS_USE_CUDA_WRAPPER -DDOCA_VERBS_USE_NET_WRAPPER -DNCCL_GIN_PROXY_ENABLE=1 -DNCCL_MAJOR=2 -DNCCL_MINOR=28 -DNCCL_PATCH=9 -DNCCL_USE_CMAKE -DNCCL_VERSION_CODE=22809 -DPROFAPI -I/builddir/build/BUILD/nccl-2.28.9-1/src/device -I/builddir/build/BUILD/nccl-2.28.9-1/src/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/include/plugin -I/usr/local/cuda/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/transport/gdaki/doca-gpunetio/include -I/usr/local/cuda/include/cccl -I/builddir/build/BUILD/nccl-2.28.9-1/nccl_build/include -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -O3 -fPIC -fmacro-prefix-map=/builddir/build/BUILD/nccl-2.28.9-1/src/= -MD -MT src/CMakeFiles/nccl_static.dir/transport/gdaki/doca-gpunetio/src/doca_verbs_cq.cpp.o -MF CMakeFiles/nccl_static.dir/transport/gdaki/doca-gpunetio/src/doca_verbs_cq.cpp.o.d -o CMakeFiles/nccl_static.dir/transport/gdaki/doca-gpunetio/src/doca_verbs_cq.cpp.o -c /builddir/build/BUILD/nccl-2.28.9-1/src/transport/gdaki/doca-gpunetio/src/doca_verbs_cq.cpp [ 96%] Building CXX object src/CMakeFiles/nccl_static.dir/transport/gdaki/doca-gpunetio/src/doca_verbs_device_attr.cpp.o cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src && /usr/bin/g++ -DCUDA_MAJOR=12 -DCUDA_MINOR=0 -DDOCA_VERBS_USE_CUDA_WRAPPER -DDOCA_VERBS_USE_NET_WRAPPER -DNCCL_GIN_PROXY_ENABLE=1 -DNCCL_MAJOR=2 -DNCCL_MINOR=28 -DNCCL_PATCH=9 -DNCCL_USE_CMAKE -DNCCL_VERSION_CODE=22809 -DPROFAPI -I/builddir/build/BUILD/nccl-2.28.9-1/src/device -I/builddir/build/BUILD/nccl-2.28.9-1/src/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/include/plugin -I/usr/local/cuda/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/transport/gdaki/doca-gpunetio/include -I/usr/local/cuda/include/cccl -I/builddir/build/BUILD/nccl-2.28.9-1/nccl_build/include -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -O3 -fPIC -fmacro-prefix-map=/builddir/build/BUILD/nccl-2.28.9-1/src/= -MD -MT src/CMakeFiles/nccl_static.dir/transport/gdaki/doca-gpunetio/src/doca_verbs_device_attr.cpp.o -MF CMakeFiles/nccl_static.dir/transport/gdaki/doca-gpunetio/src/doca_verbs_device_attr.cpp.o.d -o CMakeFiles/nccl_static.dir/transport/gdaki/doca-gpunetio/src/doca_verbs_device_attr.cpp.o -c /builddir/build/BUILD/nccl-2.28.9-1/src/transport/gdaki/doca-gpunetio/src/doca_verbs_device_attr.cpp [ 96%] Building CXX object src/CMakeFiles/nccl_static.dir/transport/gdaki/doca-gpunetio/src/doca_verbs_umem.cpp.o cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src && /usr/bin/g++ -DCUDA_MAJOR=12 -DCUDA_MINOR=0 -DDOCA_VERBS_USE_CUDA_WRAPPER -DDOCA_VERBS_USE_NET_WRAPPER -DNCCL_GIN_PROXY_ENABLE=1 -DNCCL_MAJOR=2 -DNCCL_MINOR=28 -DNCCL_PATCH=9 -DNCCL_USE_CMAKE -DNCCL_VERSION_CODE=22809 -DPROFAPI -I/builddir/build/BUILD/nccl-2.28.9-1/src/device -I/builddir/build/BUILD/nccl-2.28.9-1/src/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/include/plugin -I/usr/local/cuda/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/transport/gdaki/doca-gpunetio/include -I/usr/local/cuda/include/cccl -I/builddir/build/BUILD/nccl-2.28.9-1/nccl_build/include -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -O3 -fPIC -fmacro-prefix-map=/builddir/build/BUILD/nccl-2.28.9-1/src/= -MD -MT src/CMakeFiles/nccl_static.dir/transport/gdaki/doca-gpunetio/src/doca_verbs_umem.cpp.o -MF CMakeFiles/nccl_static.dir/transport/gdaki/doca-gpunetio/src/doca_verbs_umem.cpp.o.d -o CMakeFiles/nccl_static.dir/transport/gdaki/doca-gpunetio/src/doca_verbs_umem.cpp.o -c /builddir/build/BUILD/nccl-2.28.9-1/src/transport/gdaki/doca-gpunetio/src/doca_verbs_umem.cpp [ 96%] Building CXX object src/CMakeFiles/nccl_static.dir/transport/gdaki/doca-gpunetio/src/doca_verbs_srq.cpp.o cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src && /usr/bin/g++ -DCUDA_MAJOR=12 -DCUDA_MINOR=0 -DDOCA_VERBS_USE_CUDA_WRAPPER -DDOCA_VERBS_USE_NET_WRAPPER -DNCCL_GIN_PROXY_ENABLE=1 -DNCCL_MAJOR=2 -DNCCL_MINOR=28 -DNCCL_PATCH=9 -DNCCL_USE_CMAKE -DNCCL_VERSION_CODE=22809 -DPROFAPI -I/builddir/build/BUILD/nccl-2.28.9-1/src/device -I/builddir/build/BUILD/nccl-2.28.9-1/src/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/include/plugin -I/usr/local/cuda/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/transport/gdaki/doca-gpunetio/include -I/usr/local/cuda/include/cccl -I/builddir/build/BUILD/nccl-2.28.9-1/nccl_build/include -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -O3 -fPIC -fmacro-prefix-map=/builddir/build/BUILD/nccl-2.28.9-1/src/= -MD -MT src/CMakeFiles/nccl_static.dir/transport/gdaki/doca-gpunetio/src/doca_verbs_srq.cpp.o -MF CMakeFiles/nccl_static.dir/transport/gdaki/doca-gpunetio/src/doca_verbs_srq.cpp.o.d -o CMakeFiles/nccl_static.dir/transport/gdaki/doca-gpunetio/src/doca_verbs_srq.cpp.o -c /builddir/build/BUILD/nccl-2.28.9-1/src/transport/gdaki/doca-gpunetio/src/doca_verbs_srq.cpp [ 96%] Building CXX object src/CMakeFiles/nccl_static.dir/transport/gdaki/doca-gpunetio/src/doca_verbs_uar.cpp.o cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src && /usr/bin/g++ -DCUDA_MAJOR=12 -DCUDA_MINOR=0 -DDOCA_VERBS_USE_CUDA_WRAPPER -DDOCA_VERBS_USE_NET_WRAPPER -DNCCL_GIN_PROXY_ENABLE=1 -DNCCL_MAJOR=2 -DNCCL_MINOR=28 -DNCCL_PATCH=9 -DNCCL_USE_CMAKE -DNCCL_VERSION_CODE=22809 -DPROFAPI -I/builddir/build/BUILD/nccl-2.28.9-1/src/device -I/builddir/build/BUILD/nccl-2.28.9-1/src/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/include/plugin -I/usr/local/cuda/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/transport/gdaki/doca-gpunetio/include -I/usr/local/cuda/include/cccl -I/builddir/build/BUILD/nccl-2.28.9-1/nccl_build/include -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -O3 -fPIC -fmacro-prefix-map=/builddir/build/BUILD/nccl-2.28.9-1/src/= -MD -MT src/CMakeFiles/nccl_static.dir/transport/gdaki/doca-gpunetio/src/doca_verbs_uar.cpp.o -MF CMakeFiles/nccl_static.dir/transport/gdaki/doca-gpunetio/src/doca_verbs_uar.cpp.o.d -o CMakeFiles/nccl_static.dir/transport/gdaki/doca-gpunetio/src/doca_verbs_uar.cpp.o -c /builddir/build/BUILD/nccl-2.28.9-1/src/transport/gdaki/doca-gpunetio/src/doca_verbs_uar.cpp [ 96%] Building CXX object src/CMakeFiles/nccl_static.dir/transport/gdaki/doca-gpunetio/src/doca_gpunetio_log.cpp.o cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src && /usr/bin/g++ -DCUDA_MAJOR=12 -DCUDA_MINOR=0 -DDOCA_VERBS_USE_CUDA_WRAPPER -DDOCA_VERBS_USE_NET_WRAPPER -DNCCL_GIN_PROXY_ENABLE=1 -DNCCL_MAJOR=2 -DNCCL_MINOR=28 -DNCCL_PATCH=9 -DNCCL_USE_CMAKE -DNCCL_VERSION_CODE=22809 -DPROFAPI -I/builddir/build/BUILD/nccl-2.28.9-1/src/device -I/builddir/build/BUILD/nccl-2.28.9-1/src/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/include/plugin -I/usr/local/cuda/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/transport/gdaki/doca-gpunetio/include -I/usr/local/cuda/include/cccl -I/builddir/build/BUILD/nccl-2.28.9-1/nccl_build/include -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -O3 -fPIC -fmacro-prefix-map=/builddir/build/BUILD/nccl-2.28.9-1/src/= -MD -MT src/CMakeFiles/nccl_static.dir/transport/gdaki/doca-gpunetio/src/doca_gpunetio_log.cpp.o -MF CMakeFiles/nccl_static.dir/transport/gdaki/doca-gpunetio/src/doca_gpunetio_log.cpp.o.d -o CMakeFiles/nccl_static.dir/transport/gdaki/doca-gpunetio/src/doca_gpunetio_log.cpp.o -c /builddir/build/BUILD/nccl-2.28.9-1/src/transport/gdaki/doca-gpunetio/src/doca_gpunetio_log.cpp [ 97%] Building CXX object src/CMakeFiles/nccl_static.dir/transport/gdaki/doca-gpunetio/src/doca_gpunetio.cpp.o cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src && /usr/bin/g++ -DCUDA_MAJOR=12 -DCUDA_MINOR=0 -DDOCA_VERBS_USE_CUDA_WRAPPER -DDOCA_VERBS_USE_NET_WRAPPER -DNCCL_GIN_PROXY_ENABLE=1 -DNCCL_MAJOR=2 -DNCCL_MINOR=28 -DNCCL_PATCH=9 -DNCCL_USE_CMAKE -DNCCL_VERSION_CODE=22809 -DPROFAPI -I/builddir/build/BUILD/nccl-2.28.9-1/src/device -I/builddir/build/BUILD/nccl-2.28.9-1/src/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/include/plugin -I/usr/local/cuda/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/transport/gdaki/doca-gpunetio/include -I/usr/local/cuda/include/cccl -I/builddir/build/BUILD/nccl-2.28.9-1/nccl_build/include -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -O3 -fPIC -fmacro-prefix-map=/builddir/build/BUILD/nccl-2.28.9-1/src/= -MD -MT src/CMakeFiles/nccl_static.dir/transport/gdaki/doca-gpunetio/src/doca_gpunetio.cpp.o -MF CMakeFiles/nccl_static.dir/transport/gdaki/doca-gpunetio/src/doca_gpunetio.cpp.o.d -o CMakeFiles/nccl_static.dir/transport/gdaki/doca-gpunetio/src/doca_gpunetio.cpp.o -c /builddir/build/BUILD/nccl-2.28.9-1/src/transport/gdaki/doca-gpunetio/src/doca_gpunetio.cpp [ 97%] Building CXX object src/CMakeFiles/nccl_static.dir/transport/gdaki/doca-gpunetio/src/doca_gpunetio_high_level.cpp.o cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src && /usr/bin/g++ -DCUDA_MAJOR=12 -DCUDA_MINOR=0 -DDOCA_VERBS_USE_CUDA_WRAPPER -DDOCA_VERBS_USE_NET_WRAPPER -DNCCL_GIN_PROXY_ENABLE=1 -DNCCL_MAJOR=2 -DNCCL_MINOR=28 -DNCCL_PATCH=9 -DNCCL_USE_CMAKE -DNCCL_VERSION_CODE=22809 -DPROFAPI -I/builddir/build/BUILD/nccl-2.28.9-1/src/device -I/builddir/build/BUILD/nccl-2.28.9-1/src/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/include/plugin -I/usr/local/cuda/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/transport/gdaki/doca-gpunetio/include -I/usr/local/cuda/include/cccl -I/builddir/build/BUILD/nccl-2.28.9-1/nccl_build/include -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -O3 -fPIC -fmacro-prefix-map=/builddir/build/BUILD/nccl-2.28.9-1/src/= -MD -MT src/CMakeFiles/nccl_static.dir/transport/gdaki/doca-gpunetio/src/doca_gpunetio_high_level.cpp.o -MF CMakeFiles/nccl_static.dir/transport/gdaki/doca-gpunetio/src/doca_gpunetio_high_level.cpp.o.d -o CMakeFiles/nccl_static.dir/transport/gdaki/doca-gpunetio/src/doca_gpunetio_high_level.cpp.o -c /builddir/build/BUILD/nccl-2.28.9-1/src/transport/gdaki/doca-gpunetio/src/doca_gpunetio_high_level.cpp [ 98%] Building CXX object src/CMakeFiles/nccl_static.dir/transport/gdaki/doca-gpunetio/src/doca_verbs_cuda_wrapper.cpp.o cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src && /usr/bin/g++ -DCUDA_MAJOR=12 -DCUDA_MINOR=0 -DDOCA_VERBS_USE_CUDA_WRAPPER -DDOCA_VERBS_USE_NET_WRAPPER -DNCCL_GIN_PROXY_ENABLE=1 -DNCCL_MAJOR=2 -DNCCL_MINOR=28 -DNCCL_PATCH=9 -DNCCL_USE_CMAKE -DNCCL_VERSION_CODE=22809 -DPROFAPI -I/builddir/build/BUILD/nccl-2.28.9-1/src/device -I/builddir/build/BUILD/nccl-2.28.9-1/src/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/include/plugin -I/usr/local/cuda/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/transport/gdaki/doca-gpunetio/include -I/usr/local/cuda/include/cccl -I/builddir/build/BUILD/nccl-2.28.9-1/nccl_build/include -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -O3 -fPIC -fmacro-prefix-map=/builddir/build/BUILD/nccl-2.28.9-1/src/= -MD -MT src/CMakeFiles/nccl_static.dir/transport/gdaki/doca-gpunetio/src/doca_verbs_cuda_wrapper.cpp.o -MF CMakeFiles/nccl_static.dir/transport/gdaki/doca-gpunetio/src/doca_verbs_cuda_wrapper.cpp.o.d -o CMakeFiles/nccl_static.dir/transport/gdaki/doca-gpunetio/src/doca_verbs_cuda_wrapper.cpp.o -c /builddir/build/BUILD/nccl-2.28.9-1/src/transport/gdaki/doca-gpunetio/src/doca_verbs_cuda_wrapper.cpp [ 98%] Building CXX object src/CMakeFiles/nccl_static.dir/transport/gdaki/doca-gpunetio/src/doca_verbs_mlx5dv_wrapper.cpp.o cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src && /usr/bin/g++ -DCUDA_MAJOR=12 -DCUDA_MINOR=0 -DDOCA_VERBS_USE_CUDA_WRAPPER -DDOCA_VERBS_USE_NET_WRAPPER -DNCCL_GIN_PROXY_ENABLE=1 -DNCCL_MAJOR=2 -DNCCL_MINOR=28 -DNCCL_PATCH=9 -DNCCL_USE_CMAKE -DNCCL_VERSION_CODE=22809 -DPROFAPI -I/builddir/build/BUILD/nccl-2.28.9-1/src/device -I/builddir/build/BUILD/nccl-2.28.9-1/src/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/include/plugin -I/usr/local/cuda/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/transport/gdaki/doca-gpunetio/include -I/usr/local/cuda/include/cccl -I/builddir/build/BUILD/nccl-2.28.9-1/nccl_build/include -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -O3 -fPIC -fmacro-prefix-map=/builddir/build/BUILD/nccl-2.28.9-1/src/= -MD -MT src/CMakeFiles/nccl_static.dir/transport/gdaki/doca-gpunetio/src/doca_verbs_mlx5dv_wrapper.cpp.o -MF CMakeFiles/nccl_static.dir/transport/gdaki/doca-gpunetio/src/doca_verbs_mlx5dv_wrapper.cpp.o.d -o CMakeFiles/nccl_static.dir/transport/gdaki/doca-gpunetio/src/doca_verbs_mlx5dv_wrapper.cpp.o -c /builddir/build/BUILD/nccl-2.28.9-1/src/transport/gdaki/doca-gpunetio/src/doca_verbs_mlx5dv_wrapper.cpp [ 98%] Building CXX object src/CMakeFiles/nccl_static.dir/transport/gdaki/doca-gpunetio/src/doca_verbs_ibv_wrapper.cpp.o cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src && /usr/bin/g++ -DCUDA_MAJOR=12 -DCUDA_MINOR=0 -DDOCA_VERBS_USE_CUDA_WRAPPER -DDOCA_VERBS_USE_NET_WRAPPER -DNCCL_GIN_PROXY_ENABLE=1 -DNCCL_MAJOR=2 -DNCCL_MINOR=28 -DNCCL_PATCH=9 -DNCCL_USE_CMAKE -DNCCL_VERSION_CODE=22809 -DPROFAPI -I/builddir/build/BUILD/nccl-2.28.9-1/src/device -I/builddir/build/BUILD/nccl-2.28.9-1/src/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/include/plugin -I/usr/local/cuda/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/transport/gdaki/doca-gpunetio/include -I/usr/local/cuda/include/cccl -I/builddir/build/BUILD/nccl-2.28.9-1/nccl_build/include -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -O3 -fPIC -fmacro-prefix-map=/builddir/build/BUILD/nccl-2.28.9-1/src/= -MD -MT src/CMakeFiles/nccl_static.dir/transport/gdaki/doca-gpunetio/src/doca_verbs_ibv_wrapper.cpp.o -MF CMakeFiles/nccl_static.dir/transport/gdaki/doca-gpunetio/src/doca_verbs_ibv_wrapper.cpp.o.d -o CMakeFiles/nccl_static.dir/transport/gdaki/doca-gpunetio/src/doca_verbs_ibv_wrapper.cpp.o -c /builddir/build/BUILD/nccl-2.28.9-1/src/transport/gdaki/doca-gpunetio/src/doca_verbs_ibv_wrapper.cpp [ 98%] Building CXX object src/CMakeFiles/nccl_static.dir/transport/gdaki/doca-gpunetio/src/doca_gpunetio_gdrcopy.cpp.o cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src && /usr/bin/g++ -DCUDA_MAJOR=12 -DCUDA_MINOR=0 -DDOCA_VERBS_USE_CUDA_WRAPPER -DDOCA_VERBS_USE_NET_WRAPPER -DNCCL_GIN_PROXY_ENABLE=1 -DNCCL_MAJOR=2 -DNCCL_MINOR=28 -DNCCL_PATCH=9 -DNCCL_USE_CMAKE -DNCCL_VERSION_CODE=22809 -DPROFAPI -I/builddir/build/BUILD/nccl-2.28.9-1/src/device -I/builddir/build/BUILD/nccl-2.28.9-1/src/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/include/plugin -I/usr/local/cuda/include -I/builddir/build/BUILD/nccl-2.28.9-1/src/transport/gdaki/doca-gpunetio/include -I/usr/local/cuda/include/cccl -I/builddir/build/BUILD/nccl-2.28.9-1/nccl_build/include -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -O3 -fPIC -fmacro-prefix-map=/builddir/build/BUILD/nccl-2.28.9-1/src/= -MD -MT src/CMakeFiles/nccl_static.dir/transport/gdaki/doca-gpunetio/src/doca_gpunetio_gdrcopy.cpp.o -MF CMakeFiles/nccl_static.dir/transport/gdaki/doca-gpunetio/src/doca_gpunetio_gdrcopy.cpp.o.d -o CMakeFiles/nccl_static.dir/transport/gdaki/doca-gpunetio/src/doca_gpunetio_gdrcopy.cpp.o -c /builddir/build/BUILD/nccl-2.28.9-1/src/transport/gdaki/doca-gpunetio/src/doca_gpunetio_gdrcopy.cpp [ 98%] Linking CUDA device code CMakeFiles/nccl.dir/cmake_device_link.o cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src && /usr/bin/cmake -E cmake_link_script CMakeFiles/nccl.dir/dlink.txt --verbose=1 /usr/local/cuda/bin/nvcc -forward-unknown-to-host-compiler --expt-extended-lambda -Xptxas -maxrregcount=96 -Xfatbin -compress-all -fPIC --expt-extended-lambda -Xptxas -maxrregcount=96 -Xfatbin -compress-all -fPIC -O3 --generate-code=arch=compute_70,code=[compute_70,sm_70] --generate-code=arch=compute_75,code=[compute_75,sm_75] --generate-code=arch=compute_80,code=[compute_80,sm_80] --generate-code=arch=compute_86,code=[compute_86,sm_86] --generate-code=arch=compute_87,code=[compute_87,sm_87] --generate-code=arch=compute_89,code=[compute_89,sm_89] --generate-code=arch=compute_90,code=[compute_90,sm_90] -Xlinker=--version-script=/builddir/build/BUILD/nccl-2.28.9-1/src/libnccl.map -Xcompiler=-fPIC -Wno-deprecated-gpu-targets -shared -dlink --options-file CMakeFiles/nccl.dir/deviceObjects1.rsp -o CMakeFiles/nccl.dir/cmake_device_link.o --options-file CMakeFiles/nccl.dir/deviceLinkLibs.rsp [ 99%] Linking CUDA device code CMakeFiles/nccl_static.dir/cmake_device_link.o cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src && /usr/bin/cmake -E cmake_link_script CMakeFiles/nccl_static.dir/dlink.txt --verbose=1 /usr/local/cuda/bin/nvcc -forward-unknown-to-host-compiler --expt-extended-lambda -Xptxas -maxrregcount=96 -Xfatbin -compress-all -fPIC --expt-extended-lambda -Xptxas -maxrregcount=96 -Xfatbin -compress-all -fPIC -O3 --generate-code=arch=compute_70,code=[compute_70,sm_70] --generate-code=arch=compute_75,code=[compute_75,sm_75] --generate-code=arch=compute_80,code=[compute_80,sm_80] --generate-code=arch=compute_86,code=[compute_86,sm_86] --generate-code=arch=compute_87,code=[compute_87,sm_87] --generate-code=arch=compute_89,code=[compute_89,sm_89] --generate-code=arch=compute_90,code=[compute_90,sm_90] -Xcompiler=-fPIC -Wno-deprecated-gpu-targets -shared -dlink --options-file CMakeFiles/nccl_static.dir/deviceObjects1.rsp -o CMakeFiles/nccl_static.dir/cmake_device_link.o --options-file CMakeFiles/nccl_static.dir/deviceLinkLibs.rsp [100%] Linking CXX shared library ../lib/libnccl.so cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src && /usr/bin/cmake -E cmake_link_script CMakeFiles/nccl.dir/link.txt --verbose=1 /usr/bin/g++ -fPIC -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -ffat-lto-objects -fPIC -fvisibility=hidden -Wall -Wno-unused-function -Wno-sign-compare -Wvla -g -O3 -Wl,--version-script=/builddir/build/BUILD/nccl-2.28.9-1/src/libnccl.map -shared -Wl,-soname,libnccl.so.2 -o ../lib/libnccl.so.2.28.9 CMakeFiles/nccl.dir/bootstrap.cc.o CMakeFiles/nccl.dir/channel.cc.o CMakeFiles/nccl.dir/ce_coll.cc.o CMakeFiles/nccl.dir/collectives.cc.o CMakeFiles/nccl.dir/debug.cc.o CMakeFiles/nccl.dir/enqueue.cc.o CMakeFiles/nccl.dir/group.cc.o CMakeFiles/nccl.dir/init.cc.o CMakeFiles/nccl.dir/init_nvtx.cc.o CMakeFiles/nccl.dir/proxy.cc.o CMakeFiles/nccl.dir/transport.cc.o CMakeFiles/nccl.dir/mnnvl.cc.o CMakeFiles/nccl.dir/allocator.cc.o CMakeFiles/nccl.dir/sym_kernels.cc.o CMakeFiles/nccl.dir/dev_runtime.cc.o CMakeFiles/nccl.dir/transport/nvls.cc.o CMakeFiles/nccl.dir/transport/profiler.cc.o CMakeFiles/nccl.dir/transport/net_socket.cc.o CMakeFiles/nccl.dir/transport/p2p.cc.o CMakeFiles/nccl.dir/transport/net.cc.o CMakeFiles/nccl.dir/transport/net_ib.cc.o CMakeFiles/nccl.dir/transport/coll_net.cc.o CMakeFiles/nccl.dir/transport/shm.cc.o CMakeFiles/nccl.dir/transport/generic.cc.o CMakeFiles/nccl.dir/transport/gdaki/gin_host_gdaki.cc.o CMakeFiles/nccl.dir/misc/strongstream.cc.o CMakeFiles/nccl.dir/misc/socket.cc.o CMakeFiles/nccl.dir/misc/ibvwrap.cc.o CMakeFiles/nccl.dir/misc/mlx5dvsymbols.cc.o CMakeFiles/nccl.dir/misc/mlx5dvwrap.cc.o CMakeFiles/nccl.dir/misc/cudawrap.cc.o CMakeFiles/nccl.dir/misc/param.cc.o CMakeFiles/nccl.dir/misc/ipcsocket.cc.o CMakeFiles/nccl.dir/misc/utils.cc.o CMakeFiles/nccl.dir/misc/shmutils.cc.o CMakeFiles/nccl.dir/misc/nvmlwrap.cc.o CMakeFiles/nccl.dir/misc/argcheck.cc.o CMakeFiles/nccl.dir/misc/gdrwrap.cc.o CMakeFiles/nccl.dir/misc/ibvsymbols.cc.o CMakeFiles/nccl.dir/register/register.cc.o CMakeFiles/nccl.dir/register/coll_reg.cc.o CMakeFiles/nccl.dir/register/sendrecv_reg.cc.o CMakeFiles/nccl.dir/graph/topo.cc.o CMakeFiles/nccl.dir/graph/tuning.cc.o CMakeFiles/nccl.dir/graph/xml.cc.o CMakeFiles/nccl.dir/graph/search.cc.o CMakeFiles/nccl.dir/graph/paths.cc.o CMakeFiles/nccl.dir/graph/connect.cc.o CMakeFiles/nccl.dir/graph/rings.cc.o CMakeFiles/nccl.dir/graph/trees.cc.o CMakeFiles/nccl.dir/plugin/net.cc.o CMakeFiles/nccl.dir/plugin/profiler.cc.o CMakeFiles/nccl.dir/plugin/plugin_open.cc.o CMakeFiles/nccl.dir/plugin/tuner.cc.o CMakeFiles/nccl.dir/plugin/env.cc.o CMakeFiles/nccl.dir/plugin/net/net_v9.cc.o CMakeFiles/nccl.dir/plugin/net/net_v6.cc.o CMakeFiles/nccl.dir/plugin/net/net_v7.cc.o CMakeFiles/nccl.dir/plugin/net/net_v8.cc.o CMakeFiles/nccl.dir/plugin/net/net_v10.cc.o CMakeFiles/nccl.dir/plugin/net/net_v11.cc.o CMakeFiles/nccl.dir/plugin/profiler/profiler_v3.cc.o CMakeFiles/nccl.dir/plugin/profiler/profiler_v4.cc.o CMakeFiles/nccl.dir/plugin/profiler/profiler_v1.cc.o CMakeFiles/nccl.dir/plugin/profiler/profiler_v2.cc.o CMakeFiles/nccl.dir/plugin/profiler/profiler_v5.cc.o CMakeFiles/nccl.dir/plugin/tuner/tuner_v2.cc.o CMakeFiles/nccl.dir/plugin/tuner/tuner_v3.cc.o CMakeFiles/nccl.dir/plugin/tuner/tuner_v4.cc.o CMakeFiles/nccl.dir/plugin/tuner/tuner_v5.cc.o CMakeFiles/nccl.dir/plugin/env/env_v1.cc.o CMakeFiles/nccl.dir/ras/collectives.cc.o CMakeFiles/nccl.dir/ras/rasnet.cc.o CMakeFiles/nccl.dir/ras/peers.cc.o CMakeFiles/nccl.dir/ras/ras.cc.o CMakeFiles/nccl.dir/ras/client_support.cc.o CMakeFiles/nccl.dir/nccl_device/core.cc.o CMakeFiles/nccl.dir/nccl_device/ll_a2a.cc.o CMakeFiles/nccl.dir/nccl_device/lsa_barrier.cc.o CMakeFiles/nccl.dir/nccl_device/gin_barrier.cc.o CMakeFiles/nccl.dir/scheduler/symmetric_sched.cc.o CMakeFiles/nccl.dir/gin/gin_host.cc.o CMakeFiles/nccl.dir/gin/gin_host_proxy.cc.o "CMakeFiles/nccl.dir/transport/gdaki/doca-gpunetio/src/doca_verbs_qp.cpp.o" "CMakeFiles/nccl.dir/transport/gdaki/doca-gpunetio/src/doca_verbs_cq.cpp.o" "CMakeFiles/nccl.dir/transport/gdaki/doca-gpunetio/src/doca_verbs_device_attr.cpp.o" "CMakeFiles/nccl.dir/transport/gdaki/doca-gpunetio/src/doca_verbs_umem.cpp.o" "CMakeFiles/nccl.dir/transport/gdaki/doca-gpunetio/src/doca_verbs_srq.cpp.o" "CMakeFiles/nccl.dir/transport/gdaki/doca-gpunetio/src/doca_verbs_uar.cpp.o" "CMakeFiles/nccl.dir/transport/gdaki/doca-gpunetio/src/doca_gpunetio.cpp.o" "CMakeFiles/nccl.dir/transport/gdaki/doca-gpunetio/src/doca_gpunetio_log.cpp.o" "CMakeFiles/nccl.dir/transport/gdaki/doca-gpunetio/src/doca_gpunetio_high_level.cpp.o" "CMakeFiles/nccl.dir/transport/gdaki/doca-gpunetio/src/doca_verbs_cuda_wrapper.cpp.o" "CMakeFiles/nccl.dir/transport/gdaki/doca-gpunetio/src/doca_verbs_mlx5dv_wrapper.cpp.o" "CMakeFiles/nccl.dir/transport/gdaki/doca-gpunetio/src/doca_verbs_ibv_wrapper.cpp.o" "CMakeFiles/nccl.dir/transport/gdaki/doca-gpunetio/src/doca_gpunetio_gdrcopy.cpp.o" device/CMakeFiles/nccl_device.dir/gensrc/all_gather.cu.o device/CMakeFiles/nccl_device.dir/gensrc/all_reduce_minmax_bf16.cu.o device/CMakeFiles/nccl_device.dir/gensrc/all_reduce_minmax_f16.cu.o device/CMakeFiles/nccl_device.dir/gensrc/all_reduce_minmax_f32.cu.o device/CMakeFiles/nccl_device.dir/gensrc/all_reduce_minmax_f64.cu.o device/CMakeFiles/nccl_device.dir/gensrc/all_reduce_minmax_f8e4m3.cu.o device/CMakeFiles/nccl_device.dir/gensrc/all_reduce_minmax_f8e5m2.cu.o device/CMakeFiles/nccl_device.dir/gensrc/all_reduce_minmax_i32.cu.o device/CMakeFiles/nccl_device.dir/gensrc/all_reduce_minmax_i64.cu.o device/CMakeFiles/nccl_device.dir/gensrc/all_reduce_minmax_u32.cu.o device/CMakeFiles/nccl_device.dir/gensrc/all_reduce_minmax_u64.cu.o device/CMakeFiles/nccl_device.dir/gensrc/all_reduce_minmax_u8.cu.o device/CMakeFiles/nccl_device.dir/gensrc/all_reduce_premulsum_bf16.cu.o device/CMakeFiles/nccl_device.dir/gensrc/all_reduce_premulsum_f16.cu.o device/CMakeFiles/nccl_device.dir/gensrc/all_reduce_premulsum_f32.cu.o device/CMakeFiles/nccl_device.dir/gensrc/all_reduce_premulsum_f64.cu.o device/CMakeFiles/nccl_device.dir/gensrc/all_reduce_premulsum_f8e4m3.cu.o device/CMakeFiles/nccl_device.dir/gensrc/all_reduce_premulsum_f8e5m2.cu.o device/CMakeFiles/nccl_device.dir/gensrc/all_reduce_premulsum_u32.cu.o device/CMakeFiles/nccl_device.dir/gensrc/all_reduce_premulsum_u64.cu.o device/CMakeFiles/nccl_device.dir/gensrc/all_reduce_premulsum_u8.cu.o device/CMakeFiles/nccl_device.dir/gensrc/all_reduce_prod_bf16.cu.o device/CMakeFiles/nccl_device.dir/gensrc/all_reduce_prod_f16.cu.o device/CMakeFiles/nccl_device.dir/gensrc/all_reduce_prod_f32.cu.o device/CMakeFiles/nccl_device.dir/gensrc/all_reduce_prod_f64.cu.o device/CMakeFiles/nccl_device.dir/gensrc/all_reduce_prod_f8e4m3.cu.o device/CMakeFiles/nccl_device.dir/gensrc/all_reduce_prod_f8e5m2.cu.o device/CMakeFiles/nccl_device.dir/gensrc/all_reduce_prod_u32.cu.o device/CMakeFiles/nccl_device.dir/gensrc/all_reduce_prod_u64.cu.o device/CMakeFiles/nccl_device.dir/gensrc/all_reduce_prod_u8.cu.o device/CMakeFiles/nccl_device.dir/gensrc/all_reduce_sum_bf16.cu.o device/CMakeFiles/nccl_device.dir/gensrc/all_reduce_sum_f16.cu.o device/CMakeFiles/nccl_device.dir/gensrc/all_reduce_sum_f32.cu.o device/CMakeFiles/nccl_device.dir/gensrc/all_reduce_sum_f64.cu.o device/CMakeFiles/nccl_device.dir/gensrc/all_reduce_sum_f8e4m3.cu.o device/CMakeFiles/nccl_device.dir/gensrc/all_reduce_sum_f8e5m2.cu.o device/CMakeFiles/nccl_device.dir/gensrc/all_reduce_sum_u32.cu.o device/CMakeFiles/nccl_device.dir/gensrc/all_reduce_sum_u64.cu.o device/CMakeFiles/nccl_device.dir/gensrc/all_reduce_sum_u8.cu.o device/CMakeFiles/nccl_device.dir/gensrc/all_reduce_sumpostdiv_u32.cu.o device/CMakeFiles/nccl_device.dir/gensrc/all_reduce_sumpostdiv_u64.cu.o device/CMakeFiles/nccl_device.dir/gensrc/all_reduce_sumpostdiv_u8.cu.o device/CMakeFiles/nccl_device.dir/gensrc/broadcast.cu.o device/CMakeFiles/nccl_device.dir/gensrc/reduce_minmax_bf16.cu.o device/CMakeFiles/nccl_device.dir/gensrc/reduce_minmax_f16.cu.o device/CMakeFiles/nccl_device.dir/gensrc/reduce_minmax_f32.cu.o device/CMakeFiles/nccl_device.dir/gensrc/reduce_minmax_f64.cu.o device/CMakeFiles/nccl_device.dir/gensrc/reduce_minmax_f8e4m3.cu.o device/CMakeFiles/nccl_device.dir/gensrc/reduce_minmax_f8e5m2.cu.o device/CMakeFiles/nccl_device.dir/gensrc/reduce_minmax_u32.cu.o device/CMakeFiles/nccl_device.dir/gensrc/reduce_minmax_u64.cu.o device/CMakeFiles/nccl_device.dir/gensrc/reduce_minmax_u8.cu.o device/CMakeFiles/nccl_device.dir/gensrc/reduce_premulsum_bf16.cu.o device/CMakeFiles/nccl_device.dir/gensrc/reduce_premulsum_f16.cu.o device/CMakeFiles/nccl_device.dir/gensrc/reduce_premulsum_f32.cu.o device/CMakeFiles/nccl_device.dir/gensrc/reduce_premulsum_f64.cu.o device/CMakeFiles/nccl_device.dir/gensrc/reduce_premulsum_f8e4m3.cu.o device/CMakeFiles/nccl_device.dir/gensrc/reduce_premulsum_f8e5m2.cu.o device/CMakeFiles/nccl_device.dir/gensrc/reduce_premulsum_u32.cu.o device/CMakeFiles/nccl_device.dir/gensrc/reduce_premulsum_u64.cu.o device/CMakeFiles/nccl_device.dir/gensrc/reduce_premulsum_u8.cu.o device/CMakeFiles/nccl_device.dir/gensrc/reduce_prod_bf16.cu.o device/CMakeFiles/nccl_device.dir/gensrc/reduce_prod_f16.cu.o device/CMakeFiles/nccl_device.dir/gensrc/reduce_prod_f32.cu.o device/CMakeFiles/nccl_device.dir/gensrc/reduce_prod_f64.cu.o device/CMakeFiles/nccl_device.dir/gensrc/reduce_prod_f8e4m3.cu.o device/CMakeFiles/nccl_device.dir/gensrc/reduce_prod_f8e5m2.cu.o device/CMakeFiles/nccl_device.dir/gensrc/reduce_prod_u32.cu.o device/CMakeFiles/nccl_device.dir/gensrc/reduce_prod_u64.cu.o device/CMakeFiles/nccl_device.dir/gensrc/reduce_prod_u8.cu.o device/CMakeFiles/nccl_device.dir/gensrc/reduce_scatter_minmax_bf16.cu.o device/CMakeFiles/nccl_device.dir/gensrc/reduce_scatter_minmax_f16.cu.o device/CMakeFiles/nccl_device.dir/gensrc/reduce_scatter_minmax_f32.cu.o device/CMakeFiles/nccl_device.dir/gensrc/reduce_scatter_minmax_f64.cu.o device/CMakeFiles/nccl_device.dir/gensrc/reduce_scatter_minmax_f8e4m3.cu.o device/CMakeFiles/nccl_device.dir/gensrc/reduce_scatter_minmax_f8e5m2.cu.o device/CMakeFiles/nccl_device.dir/gensrc/reduce_scatter_minmax_i32.cu.o device/CMakeFiles/nccl_device.dir/gensrc/reduce_scatter_minmax_i64.cu.o device/CMakeFiles/nccl_device.dir/gensrc/reduce_scatter_minmax_u32.cu.o device/CMakeFiles/nccl_device.dir/gensrc/reduce_scatter_minmax_u64.cu.o device/CMakeFiles/nccl_device.dir/gensrc/reduce_scatter_minmax_u8.cu.o device/CMakeFiles/nccl_device.dir/gensrc/reduce_scatter_premulsum_bf16.cu.o device/CMakeFiles/nccl_device.dir/gensrc/reduce_scatter_premulsum_f16.cu.o device/CMakeFiles/nccl_device.dir/gensrc/reduce_scatter_premulsum_f32.cu.o device/CMakeFiles/nccl_device.dir/gensrc/reduce_scatter_premulsum_f64.cu.o device/CMakeFiles/nccl_device.dir/gensrc/reduce_scatter_premulsum_f8e4m3.cu.o device/CMakeFiles/nccl_device.dir/gensrc/reduce_scatter_premulsum_f8e5m2.cu.o device/CMakeFiles/nccl_device.dir/gensrc/reduce_scatter_premulsum_u32.cu.o device/CMakeFiles/nccl_device.dir/gensrc/reduce_scatter_premulsum_u64.cu.o device/CMakeFiles/nccl_device.dir/gensrc/reduce_scatter_premulsum_u8.cu.o device/CMakeFiles/nccl_device.dir/gensrc/reduce_scatter_prod_bf16.cu.o device/CMakeFiles/nccl_device.dir/gensrc/reduce_scatter_prod_f16.cu.o device/CMakeFiles/nccl_device.dir/gensrc/reduce_scatter_prod_f32.cu.o device/CMakeFiles/nccl_device.dir/gensrc/reduce_scatter_prod_f64.cu.o device/CMakeFiles/nccl_device.dir/gensrc/reduce_scatter_prod_f8e4m3.cu.o device/CMakeFiles/nccl_device.dir/gensrc/reduce_scatter_prod_f8e5m2.cu.o device/CMakeFiles/nccl_device.dir/gensrc/reduce_scatter_prod_u32.cu.o device/CMakeFiles/nccl_device.dir/gensrc/reduce_scatter_prod_u64.cu.o device/CMakeFiles/nccl_device.dir/gensrc/reduce_scatter_prod_u8.cu.o device/CMakeFiles/nccl_device.dir/gensrc/reduce_scatter_sum_bf16.cu.o device/CMakeFiles/nccl_device.dir/gensrc/reduce_scatter_sum_f16.cu.o device/CMakeFiles/nccl_device.dir/gensrc/reduce_scatter_sum_f32.cu.o device/CMakeFiles/nccl_device.dir/gensrc/reduce_scatter_sum_f64.cu.o device/CMakeFiles/nccl_device.dir/gensrc/reduce_scatter_sum_f8e4m3.cu.o device/CMakeFiles/nccl_device.dir/gensrc/reduce_scatter_sum_f8e5m2.cu.o device/CMakeFiles/nccl_device.dir/gensrc/reduce_scatter_sum_u32.cu.o device/CMakeFiles/nccl_device.dir/gensrc/reduce_scatter_sum_u64.cu.o device/CMakeFiles/nccl_device.dir/gensrc/reduce_scatter_sum_u8.cu.o device/CMakeFiles/nccl_device.dir/gensrc/reduce_scatter_sumpostdiv_u32.cu.o device/CMakeFiles/nccl_device.dir/gensrc/reduce_scatter_sumpostdiv_u64.cu.o device/CMakeFiles/nccl_device.dir/gensrc/reduce_scatter_sumpostdiv_u8.cu.o device/CMakeFiles/nccl_device.dir/gensrc/reduce_sum_bf16.cu.o device/CMakeFiles/nccl_device.dir/gensrc/reduce_sum_f16.cu.o device/CMakeFiles/nccl_device.dir/gensrc/reduce_sum_f32.cu.o device/CMakeFiles/nccl_device.dir/gensrc/reduce_sum_f64.cu.o device/CMakeFiles/nccl_device.dir/gensrc/reduce_sum_f8e4m3.cu.o device/CMakeFiles/nccl_device.dir/gensrc/reduce_sum_f8e5m2.cu.o device/CMakeFiles/nccl_device.dir/gensrc/reduce_sum_u32.cu.o device/CMakeFiles/nccl_device.dir/gensrc/reduce_sum_u64.cu.o device/CMakeFiles/nccl_device.dir/gensrc/reduce_sum_u8.cu.o device/CMakeFiles/nccl_device.dir/gensrc/reduce_sumpostdiv_u32.cu.o device/CMakeFiles/nccl_device.dir/gensrc/reduce_sumpostdiv_u64.cu.o device/CMakeFiles/nccl_device.dir/gensrc/reduce_sumpostdiv_u8.cu.o device/CMakeFiles/nccl_device.dir/gensrc/sendrecv.cu.o device/CMakeFiles/nccl_device.dir/gensrc/device_table.cu.o device/CMakeFiles/nccl_device.dir/gensrc/host_table.cc.o device/CMakeFiles/nccl_device.dir/gensrc/symmetric/all_gather.cu.o device/CMakeFiles/nccl_device.dir/gensrc/symmetric/all_reduce_sum_f32.cu.o device/CMakeFiles/nccl_device.dir/gensrc/symmetric/reduce_scatter_sum_f32.cu.o device/CMakeFiles/nccl_device.dir/gensrc/symmetric/all_reduce_sum_f16.cu.o device/CMakeFiles/nccl_device.dir/gensrc/symmetric/reduce_scatter_sum_f16.cu.o device/CMakeFiles/nccl_device.dir/gensrc/symmetric/all_reduce_sum_bf16.cu.o device/CMakeFiles/nccl_device.dir/gensrc/symmetric/reduce_scatter_sum_bf16.cu.o device/CMakeFiles/nccl_device.dir/gensrc/symmetric/all_reduce_sum_f8e4m3.cu.o device/CMakeFiles/nccl_device.dir/gensrc/symmetric/all_reduce_sum_f8e4m3_RSxLDMC_AGxSTMC.cu.o device/CMakeFiles/nccl_device.dir/gensrc/symmetric/reduce_scatter_sum_f8e4m3.cu.o device/CMakeFiles/nccl_device.dir/gensrc/symmetric/reduce_scatter_sum_f8e4m3_LDMC.cu.o device/CMakeFiles/nccl_device.dir/gensrc/symmetric/all_reduce_sum_f8e5m2.cu.o device/CMakeFiles/nccl_device.dir/gensrc/symmetric/all_reduce_sum_f8e5m2_RSxLDMC_AGxSTMC.cu.o device/CMakeFiles/nccl_device.dir/gensrc/symmetric/reduce_scatter_sum_f8e5m2.cu.o device/CMakeFiles/nccl_device.dir/gensrc/symmetric/reduce_scatter_sum_f8e5m2_LDMC.cu.o device/CMakeFiles/nccl_device.dir/gensrc/symmetric/all_reduce.cu.o device/CMakeFiles/nccl_device.dir/gensrc/symmetric/reduce_scatter.cu.o device/CMakeFiles/nccl_device.dir/gensrc/symmetric/sym_kernels_host.cc.o device/CMakeFiles/nccl_device.dir/common.cu.o device/CMakeFiles/nccl_device.dir/onerank.cu.o CMakeFiles/nccl.dir/cmake_device_link.o -L/usr/local/cuda/targets/x86_64-linux/lib/stubs -L/usr/local/cuda/targets/x86_64-linux/lib -lpthread -lrt -ldl /usr/lib64/librt.a -lcudadevrt -lcudart_static -lrt -lpthread -ldl cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src && /usr/bin/cmake -E cmake_symlink_library ../lib/libnccl.so.2.28.9 ../lib/libnccl.so.2 ../lib/libnccl.so gmake[2]: Leaving directory '/builddir/build/BUILD/nccl-2.28.9-1/nccl_build' [100%] Built target nccl [100%] Linking CXX static library ../lib/libnccl_static.a cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src && /usr/bin/cmake -P CMakeFiles/nccl_static.dir/cmake_clean_target.cmake cd /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/src && /usr/bin/cmake -E cmake_link_script CMakeFiles/nccl_static.dir/link.txt --verbose=1 /usr/bin/ar qc ../lib/libnccl_static.a CMakeFiles/nccl_static.dir/bootstrap.cc.o CMakeFiles/nccl_static.dir/channel.cc.o CMakeFiles/nccl_static.dir/ce_coll.cc.o CMakeFiles/nccl_static.dir/collectives.cc.o CMakeFiles/nccl_static.dir/debug.cc.o CMakeFiles/nccl_static.dir/enqueue.cc.o CMakeFiles/nccl_static.dir/group.cc.o CMakeFiles/nccl_static.dir/init.cc.o CMakeFiles/nccl_static.dir/init_nvtx.cc.o CMakeFiles/nccl_static.dir/proxy.cc.o CMakeFiles/nccl_static.dir/transport.cc.o CMakeFiles/nccl_static.dir/mnnvl.cc.o CMakeFiles/nccl_static.dir/allocator.cc.o CMakeFiles/nccl_static.dir/sym_kernels.cc.o CMakeFiles/nccl_static.dir/dev_runtime.cc.o CMakeFiles/nccl_static.dir/transport/nvls.cc.o CMakeFiles/nccl_static.dir/transport/profiler.cc.o CMakeFiles/nccl_static.dir/transport/net_socket.cc.o CMakeFiles/nccl_static.dir/transport/p2p.cc.o CMakeFiles/nccl_static.dir/transport/net.cc.o CMakeFiles/nccl_static.dir/transport/net_ib.cc.o CMakeFiles/nccl_static.dir/transport/coll_net.cc.o CMakeFiles/nccl_static.dir/transport/shm.cc.o CMakeFiles/nccl_static.dir/transport/generic.cc.o CMakeFiles/nccl_static.dir/transport/gdaki/gin_host_gdaki.cc.o CMakeFiles/nccl_static.dir/misc/strongstream.cc.o CMakeFiles/nccl_static.dir/misc/socket.cc.o CMakeFiles/nccl_static.dir/misc/ibvwrap.cc.o CMakeFiles/nccl_static.dir/misc/mlx5dvsymbols.cc.o CMakeFiles/nccl_static.dir/misc/mlx5dvwrap.cc.o CMakeFiles/nccl_static.dir/misc/cudawrap.cc.o CMakeFiles/nccl_static.dir/misc/param.cc.o CMakeFiles/nccl_static.dir/misc/ipcsocket.cc.o CMakeFiles/nccl_static.dir/misc/utils.cc.o CMakeFiles/nccl_static.dir/misc/shmutils.cc.o CMakeFiles/nccl_static.dir/misc/nvmlwrap.cc.o CMakeFiles/nccl_static.dir/misc/argcheck.cc.o CMakeFiles/nccl_static.dir/misc/gdrwrap.cc.o CMakeFiles/nccl_static.dir/misc/ibvsymbols.cc.o CMakeFiles/nccl_static.dir/register/register.cc.o CMakeFiles/nccl_static.dir/register/coll_reg.cc.o CMakeFiles/nccl_static.dir/register/sendrecv_reg.cc.o CMakeFiles/nccl_static.dir/graph/topo.cc.o CMakeFiles/nccl_static.dir/graph/tuning.cc.o CMakeFiles/nccl_static.dir/graph/xml.cc.o CMakeFiles/nccl_static.dir/graph/search.cc.o CMakeFiles/nccl_static.dir/graph/paths.cc.o CMakeFiles/nccl_static.dir/graph/connect.cc.o CMakeFiles/nccl_static.dir/graph/rings.cc.o CMakeFiles/nccl_static.dir/graph/trees.cc.o CMakeFiles/nccl_static.dir/plugin/net.cc.o CMakeFiles/nccl_static.dir/plugin/profiler.cc.o CMakeFiles/nccl_static.dir/plugin/plugin_open.cc.o CMakeFiles/nccl_static.dir/plugin/tuner.cc.o CMakeFiles/nccl_static.dir/plugin/env.cc.o CMakeFiles/nccl_static.dir/plugin/net/net_v9.cc.o CMakeFiles/nccl_static.dir/plugin/net/net_v6.cc.o CMakeFiles/nccl_static.dir/plugin/net/net_v7.cc.o CMakeFiles/nccl_static.dir/plugin/net/net_v8.cc.o CMakeFiles/nccl_static.dir/plugin/net/net_v10.cc.o CMakeFiles/nccl_static.dir/plugin/net/net_v11.cc.o CMakeFiles/nccl_static.dir/plugin/profiler/profiler_v3.cc.o CMakeFiles/nccl_static.dir/plugin/profiler/profiler_v4.cc.o CMakeFiles/nccl_static.dir/plugin/profiler/profiler_v1.cc.o CMakeFiles/nccl_static.dir/plugin/profiler/profiler_v2.cc.o CMakeFiles/nccl_static.dir/plugin/profiler/profiler_v5.cc.o CMakeFiles/nccl_static.dir/plugin/tuner/tuner_v2.cc.o CMakeFiles/nccl_static.dir/plugin/tuner/tuner_v3.cc.o CMakeFiles/nccl_static.dir/plugin/tuner/tuner_v4.cc.o CMakeFiles/nccl_static.dir/plugin/tuner/tuner_v5.cc.o CMakeFiles/nccl_static.dir/plugin/env/env_v1.cc.o CMakeFiles/nccl_static.dir/ras/collectives.cc.o CMakeFiles/nccl_static.dir/ras/rasnet.cc.o CMakeFiles/nccl_static.dir/ras/peers.cc.o CMakeFiles/nccl_static.dir/ras/ras.cc.o CMakeFiles/nccl_static.dir/ras/client_support.cc.o CMakeFiles/nccl_static.dir/nccl_device/core.cc.o CMakeFiles/nccl_static.dir/nccl_device/ll_a2a.cc.o CMakeFiles/nccl_static.dir/nccl_device/lsa_barrier.cc.o CMakeFiles/nccl_static.dir/nccl_device/gin_barrier.cc.o CMakeFiles/nccl_static.dir/scheduler/symmetric_sched.cc.o CMakeFiles/nccl_static.dir/gin/gin_host.cc.o CMakeFiles/nccl_static.dir/gin/gin_host_proxy.cc.o "CMakeFiles/nccl_static.dir/transport/gdaki/doca-gpunetio/src/doca_verbs_qp.cpp.o" "CMakeFiles/nccl_static.dir/transport/gdaki/doca-gpunetio/src/doca_verbs_cq.cpp.o" "CMakeFiles/nccl_static.dir/transport/gdaki/doca-gpunetio/src/doca_verbs_device_attr.cpp.o" "CMakeFiles/nccl_static.dir/transport/gdaki/doca-gpunetio/src/doca_verbs_umem.cpp.o" "CMakeFiles/nccl_static.dir/transport/gdaki/doca-gpunetio/src/doca_verbs_srq.cpp.o" "CMakeFiles/nccl_static.dir/transport/gdaki/doca-gpunetio/src/doca_verbs_uar.cpp.o" "CMakeFiles/nccl_static.dir/transport/gdaki/doca-gpunetio/src/doca_gpunetio.cpp.o" "CMakeFiles/nccl_static.dir/transport/gdaki/doca-gpunetio/src/doca_gpunetio_log.cpp.o" "CMakeFiles/nccl_static.dir/transport/gdaki/doca-gpunetio/src/doca_gpunetio_high_level.cpp.o" "CMakeFiles/nccl_static.dir/transport/gdaki/doca-gpunetio/src/doca_verbs_cuda_wrapper.cpp.o" "CMakeFiles/nccl_static.dir/transport/gdaki/doca-gpunetio/src/doca_verbs_mlx5dv_wrapper.cpp.o" "CMakeFiles/nccl_static.dir/transport/gdaki/doca-gpunetio/src/doca_verbs_ibv_wrapper.cpp.o" "CMakeFiles/nccl_static.dir/transport/gdaki/doca-gpunetio/src/doca_gpunetio_gdrcopy.cpp.o" device/CMakeFiles/nccl_device.dir/gensrc/all_gather.cu.o device/CMakeFiles/nccl_device.dir/gensrc/all_reduce_minmax_bf16.cu.o device/CMakeFiles/nccl_device.dir/gensrc/all_reduce_minmax_f16.cu.o device/CMakeFiles/nccl_device.dir/gensrc/all_reduce_minmax_f32.cu.o device/CMakeFiles/nccl_device.dir/gensrc/all_reduce_minmax_f64.cu.o device/CMakeFiles/nccl_device.dir/gensrc/all_reduce_minmax_f8e4m3.cu.o device/CMakeFiles/nccl_device.dir/gensrc/all_reduce_minmax_f8e5m2.cu.o device/CMakeFiles/nccl_device.dir/gensrc/all_reduce_minmax_i32.cu.o device/CMakeFiles/nccl_device.dir/gensrc/all_reduce_minmax_i64.cu.o device/CMakeFiles/nccl_device.dir/gensrc/all_reduce_minmax_u32.cu.o device/CMakeFiles/nccl_device.dir/gensrc/all_reduce_minmax_u64.cu.o device/CMakeFiles/nccl_device.dir/gensrc/all_reduce_minmax_u8.cu.o device/CMakeFiles/nccl_device.dir/gensrc/all_reduce_premulsum_bf16.cu.o device/CMakeFiles/nccl_device.dir/gensrc/all_reduce_premulsum_f16.cu.o device/CMakeFiles/nccl_device.dir/gensrc/all_reduce_premulsum_f32.cu.o device/CMakeFiles/nccl_device.dir/gensrc/all_reduce_premulsum_f64.cu.o device/CMakeFiles/nccl_device.dir/gensrc/all_reduce_premulsum_f8e4m3.cu.o device/CMakeFiles/nccl_device.dir/gensrc/all_reduce_premulsum_f8e5m2.cu.o device/CMakeFiles/nccl_device.dir/gensrc/all_reduce_premulsum_u32.cu.o device/CMakeFiles/nccl_device.dir/gensrc/all_reduce_premulsum_u64.cu.o device/CMakeFiles/nccl_device.dir/gensrc/all_reduce_premulsum_u8.cu.o device/CMakeFiles/nccl_device.dir/gensrc/all_reduce_prod_bf16.cu.o device/CMakeFiles/nccl_device.dir/gensrc/all_reduce_prod_f16.cu.o device/CMakeFiles/nccl_device.dir/gensrc/all_reduce_prod_f32.cu.o device/CMakeFiles/nccl_device.dir/gensrc/all_reduce_prod_f64.cu.o device/CMakeFiles/nccl_device.dir/gensrc/all_reduce_prod_f8e4m3.cu.o device/CMakeFiles/nccl_device.dir/gensrc/all_reduce_prod_f8e5m2.cu.o device/CMakeFiles/nccl_device.dir/gensrc/all_reduce_prod_u32.cu.o device/CMakeFiles/nccl_device.dir/gensrc/all_reduce_prod_u64.cu.o device/CMakeFiles/nccl_device.dir/gensrc/all_reduce_prod_u8.cu.o device/CMakeFiles/nccl_device.dir/gensrc/all_reduce_sum_bf16.cu.o device/CMakeFiles/nccl_device.dir/gensrc/all_reduce_sum_f16.cu.o device/CMakeFiles/nccl_device.dir/gensrc/all_reduce_sum_f32.cu.o device/CMakeFiles/nccl_device.dir/gensrc/all_reduce_sum_f64.cu.o device/CMakeFiles/nccl_device.dir/gensrc/all_reduce_sum_f8e4m3.cu.o device/CMakeFiles/nccl_device.dir/gensrc/all_reduce_sum_f8e5m2.cu.o device/CMakeFiles/nccl_device.dir/gensrc/all_reduce_sum_u32.cu.o device/CMakeFiles/nccl_device.dir/gensrc/all_reduce_sum_u64.cu.o device/CMakeFiles/nccl_device.dir/gensrc/all_reduce_sum_u8.cu.o device/CMakeFiles/nccl_device.dir/gensrc/all_reduce_sumpostdiv_u32.cu.o device/CMakeFiles/nccl_device.dir/gensrc/all_reduce_sumpostdiv_u64.cu.o device/CMakeFiles/nccl_device.dir/gensrc/all_reduce_sumpostdiv_u8.cu.o device/CMakeFiles/nccl_device.dir/gensrc/broadcast.cu.o device/CMakeFiles/nccl_device.dir/gensrc/reduce_minmax_bf16.cu.o device/CMakeFiles/nccl_device.dir/gensrc/reduce_minmax_f16.cu.o device/CMakeFiles/nccl_device.dir/gensrc/reduce_minmax_f32.cu.o device/CMakeFiles/nccl_device.dir/gensrc/reduce_minmax_f64.cu.o device/CMakeFiles/nccl_device.dir/gensrc/reduce_minmax_f8e4m3.cu.o device/CMakeFiles/nccl_device.dir/gensrc/reduce_minmax_f8e5m2.cu.o device/CMakeFiles/nccl_device.dir/gensrc/reduce_minmax_u32.cu.o device/CMakeFiles/nccl_device.dir/gensrc/reduce_minmax_u64.cu.o device/CMakeFiles/nccl_device.dir/gensrc/reduce_minmax_u8.cu.o device/CMakeFiles/nccl_device.dir/gensrc/reduce_premulsum_bf16.cu.o device/CMakeFiles/nccl_device.dir/gensrc/reduce_premulsum_f16.cu.o device/CMakeFiles/nccl_device.dir/gensrc/reduce_premulsum_f32.cu.o device/CMakeFiles/nccl_device.dir/gensrc/reduce_premulsum_f64.cu.o device/CMakeFiles/nccl_device.dir/gensrc/reduce_premulsum_f8e4m3.cu.o device/CMakeFiles/nccl_device.dir/gensrc/reduce_premulsum_f8e5m2.cu.o device/CMakeFiles/nccl_device.dir/gensrc/reduce_premulsum_u32.cu.o device/CMakeFiles/nccl_device.dir/gensrc/reduce_premulsum_u64.cu.o device/CMakeFiles/nccl_device.dir/gensrc/reduce_premulsum_u8.cu.o device/CMakeFiles/nccl_device.dir/gensrc/reduce_prod_bf16.cu.o device/CMakeFiles/nccl_device.dir/gensrc/reduce_prod_f16.cu.o device/CMakeFiles/nccl_device.dir/gensrc/reduce_prod_f32.cu.o device/CMakeFiles/nccl_device.dir/gensrc/reduce_prod_f64.cu.o device/CMakeFiles/nccl_device.dir/gensrc/reduce_prod_f8e4m3.cu.o device/CMakeFiles/nccl_device.dir/gensrc/reduce_prod_f8e5m2.cu.o device/CMakeFiles/nccl_device.dir/gensrc/reduce_prod_u32.cu.o device/CMakeFiles/nccl_device.dir/gensrc/reduce_prod_u64.cu.o device/CMakeFiles/nccl_device.dir/gensrc/reduce_prod_u8.cu.o device/CMakeFiles/nccl_device.dir/gensrc/reduce_scatter_minmax_bf16.cu.o device/CMakeFiles/nccl_device.dir/gensrc/reduce_scatter_minmax_f16.cu.o device/CMakeFiles/nccl_device.dir/gensrc/reduce_scatter_minmax_f32.cu.o device/CMakeFiles/nccl_device.dir/gensrc/reduce_scatter_minmax_f64.cu.o device/CMakeFiles/nccl_device.dir/gensrc/reduce_scatter_minmax_f8e4m3.cu.o device/CMakeFiles/nccl_device.dir/gensrc/reduce_scatter_minmax_f8e5m2.cu.o device/CMakeFiles/nccl_device.dir/gensrc/reduce_scatter_minmax_i32.cu.o device/CMakeFiles/nccl_device.dir/gensrc/reduce_scatter_minmax_i64.cu.o device/CMakeFiles/nccl_device.dir/gensrc/reduce_scatter_minmax_u32.cu.o device/CMakeFiles/nccl_device.dir/gensrc/reduce_scatter_minmax_u64.cu.o device/CMakeFiles/nccl_device.dir/gensrc/reduce_scatter_minmax_u8.cu.o device/CMakeFiles/nccl_device.dir/gensrc/reduce_scatter_premulsum_bf16.cu.o device/CMakeFiles/nccl_device.dir/gensrc/reduce_scatter_premulsum_f16.cu.o device/CMakeFiles/nccl_device.dir/gensrc/reduce_scatter_premulsum_f32.cu.o device/CMakeFiles/nccl_device.dir/gensrc/reduce_scatter_premulsum_f64.cu.o device/CMakeFiles/nccl_device.dir/gensrc/reduce_scatter_premulsum_f8e4m3.cu.o device/CMakeFiles/nccl_device.dir/gensrc/reduce_scatter_premulsum_f8e5m2.cu.o device/CMakeFiles/nccl_device.dir/gensrc/reduce_scatter_premulsum_u32.cu.o device/CMakeFiles/nccl_device.dir/gensrc/reduce_scatter_premulsum_u64.cu.o device/CMakeFiles/nccl_device.dir/gensrc/reduce_scatter_premulsum_u8.cu.o device/CMakeFiles/nccl_device.dir/gensrc/reduce_scatter_prod_bf16.cu.o device/CMakeFiles/nccl_device.dir/gensrc/reduce_scatter_prod_f16.cu.o device/CMakeFiles/nccl_device.dir/gensrc/reduce_scatter_prod_f32.cu.o device/CMakeFiles/nccl_device.dir/gensrc/reduce_scatter_prod_f64.cu.o device/CMakeFiles/nccl_device.dir/gensrc/reduce_scatter_prod_f8e4m3.cu.o device/CMakeFiles/nccl_device.dir/gensrc/reduce_scatter_prod_f8e5m2.cu.o device/CMakeFiles/nccl_device.dir/gensrc/reduce_scatter_prod_u32.cu.o device/CMakeFiles/nccl_device.dir/gensrc/reduce_scatter_prod_u64.cu.o device/CMakeFiles/nccl_device.dir/gensrc/reduce_scatter_prod_u8.cu.o device/CMakeFiles/nccl_device.dir/gensrc/reduce_scatter_sum_bf16.cu.o device/CMakeFiles/nccl_device.dir/gensrc/reduce_scatter_sum_f16.cu.o device/CMakeFiles/nccl_device.dir/gensrc/reduce_scatter_sum_f32.cu.o device/CMakeFiles/nccl_device.dir/gensrc/reduce_scatter_sum_f64.cu.o device/CMakeFiles/nccl_device.dir/gensrc/reduce_scatter_sum_f8e4m3.cu.o device/CMakeFiles/nccl_device.dir/gensrc/reduce_scatter_sum_f8e5m2.cu.o device/CMakeFiles/nccl_device.dir/gensrc/reduce_scatter_sum_u32.cu.o device/CMakeFiles/nccl_device.dir/gensrc/reduce_scatter_sum_u64.cu.o device/CMakeFiles/nccl_device.dir/gensrc/reduce_scatter_sum_u8.cu.o device/CMakeFiles/nccl_device.dir/gensrc/reduce_scatter_sumpostdiv_u32.cu.o device/CMakeFiles/nccl_device.dir/gensrc/reduce_scatter_sumpostdiv_u64.cu.o device/CMakeFiles/nccl_device.dir/gensrc/reduce_scatter_sumpostdiv_u8.cu.o device/CMakeFiles/nccl_device.dir/gensrc/reduce_sum_bf16.cu.o device/CMakeFiles/nccl_device.dir/gensrc/reduce_sum_f16.cu.o device/CMakeFiles/nccl_device.dir/gensrc/reduce_sum_f32.cu.o device/CMakeFiles/nccl_device.dir/gensrc/reduce_sum_f64.cu.o device/CMakeFiles/nccl_device.dir/gensrc/reduce_sum_f8e4m3.cu.o device/CMakeFiles/nccl_device.dir/gensrc/reduce_sum_f8e5m2.cu.o device/CMakeFiles/nccl_device.dir/gensrc/reduce_sum_u32.cu.o device/CMakeFiles/nccl_device.dir/gensrc/reduce_sum_u64.cu.o device/CMakeFiles/nccl_device.dir/gensrc/reduce_sum_u8.cu.o device/CMakeFiles/nccl_device.dir/gensrc/reduce_sumpostdiv_u32.cu.o device/CMakeFiles/nccl_device.dir/gensrc/reduce_sumpostdiv_u64.cu.o device/CMakeFiles/nccl_device.dir/gensrc/reduce_sumpostdiv_u8.cu.o device/CMakeFiles/nccl_device.dir/gensrc/sendrecv.cu.o device/CMakeFiles/nccl_device.dir/gensrc/device_table.cu.o device/CMakeFiles/nccl_device.dir/gensrc/host_table.cc.o device/CMakeFiles/nccl_device.dir/gensrc/symmetric/all_gather.cu.o device/CMakeFiles/nccl_device.dir/gensrc/symmetric/all_reduce_sum_f32.cu.o device/CMakeFiles/nccl_device.dir/gensrc/symmetric/reduce_scatter_sum_f32.cu.o device/CMakeFiles/nccl_device.dir/gensrc/symmetric/all_reduce_sum_f16.cu.o device/CMakeFiles/nccl_device.dir/gensrc/symmetric/reduce_scatter_sum_f16.cu.o device/CMakeFiles/nccl_device.dir/gensrc/symmetric/all_reduce_sum_bf16.cu.o device/CMakeFiles/nccl_device.dir/gensrc/symmetric/reduce_scatter_sum_bf16.cu.o device/CMakeFiles/nccl_device.dir/gensrc/symmetric/all_reduce_sum_f8e4m3.cu.o device/CMakeFiles/nccl_device.dir/gensrc/symmetric/all_reduce_sum_f8e4m3_RSxLDMC_AGxSTMC.cu.o device/CMakeFiles/nccl_device.dir/gensrc/symmetric/reduce_scatter_sum_f8e4m3.cu.o device/CMakeFiles/nccl_device.dir/gensrc/symmetric/reduce_scatter_sum_f8e4m3_LDMC.cu.o device/CMakeFiles/nccl_device.dir/gensrc/symmetric/all_reduce_sum_f8e5m2.cu.o device/CMakeFiles/nccl_device.dir/gensrc/symmetric/all_reduce_sum_f8e5m2_RSxLDMC_AGxSTMC.cu.o device/CMakeFiles/nccl_device.dir/gensrc/symmetric/reduce_scatter_sum_f8e5m2.cu.o device/CMakeFiles/nccl_device.dir/gensrc/symmetric/reduce_scatter_sum_f8e5m2_LDMC.cu.o device/CMakeFiles/nccl_device.dir/gensrc/symmetric/all_reduce.cu.o device/CMakeFiles/nccl_device.dir/gensrc/symmetric/reduce_scatter.cu.o device/CMakeFiles/nccl_device.dir/gensrc/symmetric/sym_kernels_host.cc.o device/CMakeFiles/nccl_device.dir/common.cu.o device/CMakeFiles/nccl_device.dir/onerank.cu.o /usr/bin/ar q ../lib/libnccl_static.a CMakeFiles/nccl_static.dir/cmake_device_link.o /usr/bin/ranlib ../lib/libnccl_static.a gmake[2]: Leaving directory '/builddir/build/BUILD/nccl-2.28.9-1/nccl_build' [100%] Built target nccl_static gmake[1]: Leaving directory '/builddir/build/BUILD/nccl-2.28.9-1/nccl_build' /usr/bin/cmake -E cmake_progress_start /builddir/build/BUILD/nccl-2.28.9-1/nccl_build/CMakeFiles 0 + RPM_EC=0 ++ jobs -p + exit 0 Executing(%install): /bin/sh -e /var/tmp/rpm-tmp.eBihZC + umask 022 + cd /builddir/build/BUILD + '[' /builddir/build/BUILDROOT/nccl-cuda-12.0-2.28.9.1-1.oc9.x86_64 '!=' / ']' + rm -rf /builddir/build/BUILDROOT/nccl-cuda-12.0-2.28.9.1-1.oc9.x86_64 ++ dirname /builddir/build/BUILDROOT/nccl-cuda-12.0-2.28.9.1-1.oc9.x86_64 + mkdir -p /builddir/build/BUILDROOT + mkdir /builddir/build/BUILDROOT/nccl-cuda-12.0-2.28.9.1-1.oc9.x86_64 + cd nccl-2.28.9-1 + install -d /builddir/build/BUILDROOT/nccl-cuda-12.0-2.28.9.1-1.oc9.x86_64/usr/bin /builddir/build/BUILDROOT/nccl-cuda-12.0-2.28.9.1-1.oc9.x86_64/usr/lib64 /builddir/build/BUILDROOT/nccl-cuda-12.0-2.28.9.1-1.oc9.x86_64/usr/include + pushd nccl_build ~/build/BUILD/nccl-2.28.9-1/nccl_build ~/build/BUILD/nccl-2.28.9-1 + cp -r bin/ncclras /builddir/build/BUILDROOT/nccl-cuda-12.0-2.28.9.1-1.oc9.x86_64/usr/bin/ + cp -r lib/libnccl-profiler-example.so lib/libnccl.so lib/libnccl.so.2 lib/libnccl.so.2.28.9 lib/libnccl_static.a lib/pkgconfig /builddir/build/BUILDROOT/nccl-cuda-12.0-2.28.9.1-1.oc9.x86_64/usr/lib64/ + cp -r include/nccl.h include/nccl_device include/nccl_device.h /builddir/build/BUILDROOT/nccl-cuda-12.0-2.28.9.1-1.oc9.x86_64/usr/include/ + popd ~/build/BUILD/nccl-2.28.9-1 + find /builddir/build/BUILDROOT/nccl-cuda-12.0-2.28.9.1-1.oc9.x86_64/usr/lib64/pkgconfig -name '*.pc' -exec sed -i -e 's|\${nccl:Prefix}|/usr|g' -e 's|\${nccl:Major}|2|g' -e 's|\${nccl:Minor}|28|g' -e 's|\${nccl:Patch}|9|g' -e 's|/lib|lib64|g' '{}' ';' + /usr/lib/rpm/check-buildroot + /usr/lib/rpm/OpenCloudOS/brp-ldconfig + /usr/lib/rpm/brp-compress + /usr/lib/rpm/brp-strip /usr/bin/strip + /usr/lib/rpm/brp-strip-comment-note /usr/bin/strip /usr/bin/objdump + /usr/lib/rpm/OpenCloudOS/brp-strip-lto /usr/bin/strip + /usr/lib/rpm/brp-strip-static-archive /usr/bin/strip + /usr/lib/rpm/check-rpaths + /usr/lib/rpm/OpenCloudOS/brp-mangle-shebangs *** WARNING: ./usr/lib64/pkgconfig/nccl.pc is executable but has no shebang, removing executable bit + /usr/lib/rpm/OpenCloudOS/brp-python-bytecompile '' 1 0 + /usr/lib/rpm/OpenCloudOS/brp-python-hardlink Processing files: nccl-cuda-12-0-2.28.9.1-1.oc9.x86_64 Executing(%license): /bin/sh -e /var/tmp/rpm-tmp.UEapx5 + umask 022 + cd /builddir/build/BUILD + cd nccl-2.28.9-1 + LICENSEDIR=/builddir/build/BUILDROOT/nccl-cuda-12.0-2.28.9.1-1.oc9.x86_64/usr/share/licenses/nccl-cuda-12-0 + export LC_ALL=C + LC_ALL=C + export LICENSEDIR + /usr/bin/mkdir -p /builddir/build/BUILDROOT/nccl-cuda-12.0-2.28.9.1-1.oc9.x86_64/usr/share/licenses/nccl-cuda-12-0 + cp -pr LICENSE.txt /builddir/build/BUILDROOT/nccl-cuda-12.0-2.28.9.1-1.oc9.x86_64/usr/share/licenses/nccl-cuda-12-0 + RPM_EC=0 ++ jobs -p + exit 0 Provides: nccl-cuda-12-0 = 2.28.9.1-1.oc9 nccl-cuda-12-0(x86-64) = 2.28.9.1-1.oc9 Requires(interp): /bin/sh /bin/sh Requires(rpmlib): rpmlib(CompressedFileNames) <= 3.0.4-1 rpmlib(FileDigests) <= 4.6.0-1 rpmlib(PayloadFilesHavePrefix) <= 4.0-1 Requires(post): /bin/sh Requires(postun): /bin/sh Requires: libc.so.6()(64bit) libc.so.6(GLIBC_2.2.5)(64bit) libc.so.6(GLIBC_2.34)(64bit) libc.so.6(GLIBC_2.38)(64bit) libgcc_s.so.1()(64bit) libm.so.6()(64bit) libstdc++.so.6()(64bit) rtld(GNU_HASH) Processing files: libnccl-cuda-12-0-2.28.9.1-1.oc9.x86_64 Executing(%license): /bin/sh -e /var/tmp/rpm-tmp.DzOE83 + umask 022 + cd /builddir/build/BUILD + cd nccl-2.28.9-1 + LICENSEDIR=/builddir/build/BUILDROOT/nccl-cuda-12.0-2.28.9.1-1.oc9.x86_64/usr/share/licenses/libnccl-cuda-12-0 + export LC_ALL=C + LC_ALL=C + export LICENSEDIR + /usr/bin/mkdir -p /builddir/build/BUILDROOT/nccl-cuda-12.0-2.28.9.1-1.oc9.x86_64/usr/share/licenses/libnccl-cuda-12-0 + cp -pr LICENSE.txt /builddir/build/BUILDROOT/nccl-cuda-12.0-2.28.9.1-1.oc9.x86_64/usr/share/licenses/libnccl-cuda-12-0 + RPM_EC=0 ++ jobs -p + exit 0 Provides: libnccl-cuda-12-0 = 2.28.9.1-1.oc9 libnccl-cuda-12-0(x86-64) = 2.28.9.1-1.oc9 libnccl.so.2()(64bit) Requires(rpmlib): rpmlib(CompressedFileNames) <= 3.0.4-1 rpmlib(FileDigests) <= 4.6.0-1 rpmlib(PayloadFilesHavePrefix) <= 4.0-1 Requires: ld-linux-x86-64.so.2()(64bit) ld-linux-x86-64.so.2(GLIBC_2.3)(64bit) libc.so.6()(64bit) libc.so.6(GLIBC_2.10)(64bit) libc.so.6(GLIBC_2.14)(64bit) libc.so.6(GLIBC_2.16)(64bit) libc.so.6(GLIBC_2.17)(64bit) libc.so.6(GLIBC_2.2.5)(64bit) libc.so.6(GLIBC_2.3)(64bit) libc.so.6(GLIBC_2.3.2)(64bit) libc.so.6(GLIBC_2.3.3)(64bit) libc.so.6(GLIBC_2.3.4)(64bit) libc.so.6(GLIBC_2.33)(64bit) libc.so.6(GLIBC_2.34)(64bit) libc.so.6(GLIBC_2.38)(64bit) libc.so.6(GLIBC_2.6)(64bit) libgcc_s.so.1()(64bit) libgcc_s.so.1(GCC_3.0)(64bit) libgcc_s.so.1(GCC_3.4)(64bit) libm.so.6()(64bit) libm.so.6(GLIBC_2.2.5)(64bit) libm.so.6(GLIBC_2.29)(64bit) libstdc++.so.6()(64bit) libstdc++.so.6(CXXABI_1.3)(64bit) libstdc++.so.6(CXXABI_1.3.9)(64bit) libstdc++.so.6(GLIBCXX_3.4)(64bit) libstdc++.so.6(GLIBCXX_3.4.11)(64bit) libstdc++.so.6(GLIBCXX_3.4.18)(64bit) libstdc++.so.6(GLIBCXX_3.4.19)(64bit) libstdc++.so.6(GLIBCXX_3.4.29)(64bit) libstdc++.so.6(GLIBCXX_3.4.30)(64bit) rtld(GNU_HASH) Processing files: libnccl-devel-cuda-12-0-2.28.9.1-1.oc9.x86_64 Executing(%license): /bin/sh -e /var/tmp/rpm-tmp.hWGnu4 + umask 022 + cd /builddir/build/BUILD + cd nccl-2.28.9-1 + LICENSEDIR=/builddir/build/BUILDROOT/nccl-cuda-12.0-2.28.9.1-1.oc9.x86_64/usr/share/licenses/libnccl-devel-cuda-12-0 + export LC_ALL=C + LC_ALL=C + export LICENSEDIR + /usr/bin/mkdir -p /builddir/build/BUILDROOT/nccl-cuda-12.0-2.28.9.1-1.oc9.x86_64/usr/share/licenses/libnccl-devel-cuda-12-0 + cp -pr LICENSE.txt /builddir/build/BUILDROOT/nccl-cuda-12.0-2.28.9.1-1.oc9.x86_64/usr/share/licenses/libnccl-devel-cuda-12-0 + RPM_EC=0 ++ jobs -p + exit 0 Provides: libnccl-devel-cuda-12-0 = 2.28.9.1-1.oc9 libnccl-devel-cuda-12-0(x86-64) = 2.28.9.1-1.oc9 libnccl-profiler-example.so()(64bit) pkgconfig(nccl) = 2.28.9 Requires(rpmlib): rpmlib(CompressedFileNames) <= 3.0.4-1 rpmlib(FileDigests) <= 4.6.0-1 rpmlib(PayloadFilesHavePrefix) <= 4.0-1 Requires: /usr/bin/pkg-config ld-linux-x86-64.so.2()(64bit) ld-linux-x86-64.so.2(GLIBC_2.3)(64bit) libc.so.6()(64bit) libc.so.6(GLIBC_2.17)(64bit) libc.so.6(GLIBC_2.2.5)(64bit) libc.so.6(GLIBC_2.38)(64bit) libgcc_s.so.1()(64bit) libm.so.6()(64bit) libnccl.so.2()(64bit) libstdc++.so.6()(64bit) rtld(GNU_HASH) Processing files: libnccl-static-cuda-12-0-2.28.9.1-1.oc9.x86_64 Executing(%license): /bin/sh -e /var/tmp/rpm-tmp.0SwEs6 + umask 022 + cd /builddir/build/BUILD + cd nccl-2.28.9-1 + LICENSEDIR=/builddir/build/BUILDROOT/nccl-cuda-12.0-2.28.9.1-1.oc9.x86_64/usr/share/licenses/libnccl-static-cuda-12-0 + export LC_ALL=C + LC_ALL=C + export LICENSEDIR + /usr/bin/mkdir -p /builddir/build/BUILDROOT/nccl-cuda-12.0-2.28.9.1-1.oc9.x86_64/usr/share/licenses/libnccl-static-cuda-12-0 + cp -pr LICENSE.txt /builddir/build/BUILDROOT/nccl-cuda-12.0-2.28.9.1-1.oc9.x86_64/usr/share/licenses/libnccl-static-cuda-12-0 + RPM_EC=0 ++ jobs -p + exit 0 Provides: libnccl-static-cuda-12-0 = 2.28.9.1-1.oc9 libnccl-static-cuda-12-0(x86-64) = 2.28.9.1-1.oc9 Requires(rpmlib): rpmlib(CompressedFileNames) <= 3.0.4-1 rpmlib(FileDigests) <= 4.6.0-1 rpmlib(PayloadFilesHavePrefix) <= 4.0-1 Checking for unpackaged file(s): /usr/lib/rpm/check-files /builddir/build/BUILDROOT/nccl-cuda-12.0-2.28.9.1-1.oc9.x86_64 Wrote: /builddir/build/RPMS/nccl-cuda-12-0-2.28.9.1-1.oc9.x86_64.rpm Wrote: /builddir/build/RPMS/libnccl-devel-cuda-12-0-2.28.9.1-1.oc9.x86_64.rpm Wrote: /builddir/build/RPMS/libnccl-static-cuda-12-0-2.28.9.1-1.oc9.x86_64.rpm Wrote: /builddir/build/RPMS/libnccl-cuda-12-0-2.28.9.1-1.oc9.x86_64.rpm Child return code was: 0