@@ -47,39 +47,53 @@ if(CORENRN_ENABLE_GPU)
47
47
endif ()
48
48
set (CORENRN_CUDA_VERSION_SHORT "${CUDAToolkit_VERSION_MAJOR} .${CUDAToolkit_VERSION_MINOR} " )
49
49
endif ()
50
- # -acc enables OpenACC support, -cuda links CUDA libraries and (very importantly!) seems to be
51
- # required to make the NVHPC compiler do the device code linking. Otherwise the explicit CUDA
52
- # device code (.cu files in libcoreneuron) has to be linked in a separate, earlier, step, which
53
- # apparently causes problems with interoperability with OpenACC. Passing -cuda to nvc++ when
54
- # compiling (as opposed to linking) seems to enable CUDA C++ support, which has other consequences
55
- # due to e.g. __CUDACC__ being defined. See https://github.com/BlueBrain/CoreNeuron/issues/607 for
56
- # more information about this. -gpu=cudaX.Y ensures that OpenACC code is compiled with the same
57
- # CUDA version as is used for the explicit CUDA code.
58
- set (NVHPC_ACC_COMP_FLAGS "-acc -Minfo=accel -gpu=cuda${CORENRN_CUDA_VERSION_SHORT} ,lineinfo" )
59
- set (NVHPC_ACC_LINK_FLAGS "-acc -cuda" )
60
- # Make sure that OpenACC code is generated for the same compute capabilities as the explicit CUDA
61
- # code. Otherwise there may be confusing linker errors. We cannot rely on nvcc and nvc++ using the
62
- # same default compute capabilities as each other, particularly on GPU-less build machines.
63
- foreach (compute_capability ${CMAKE_CUDA_ARCHITECTURES} )
64
- string (APPEND NVHPC_ACC_COMP_FLAGS ",cc${compute_capability} " )
65
- endforeach ()
66
- if (CORENRN_ENABLE_OPENMP AND CORENRN_ENABLE_OPENMP_OFFLOAD )
67
- # Enable OpenMP target offload to GPU and if both OpenACC and OpenMP directives are available
68
- # for a region then prefer OpenMP.
69
- add_compile_definitions (CORENEURON_PREFER_OPENMP_OFFLOAD )
70
- string (APPEND NVHPC_ACC_COMP_FLAGS " -mp=gpu -Minfo=mp" )
50
+
51
+ if (${CMAKE_CXX_COMPILER_ID} STREQUAL "XLClang" )
52
+ set (NVHPC_ACC_COMP_FLAGS "-qsmp=omp -qoffload -qreport" )
53
+ set (NVHPC_ACC_LINK_FLAGS "-qcuda -lcaliper" )
54
+
55
+ if (CORENRN_ENABLE_OPENMP AND CORENRN_ENABLE_OPENMP_OFFLOAD )
56
+ # Enable OpenMP target offload to GPU and if both OpenACC and OpenMP directives are available
57
+ # for a region then prefer OpenMP.
58
+ add_compile_definitions (CORENRN_PREFER_OPENMP_OFFLOAD )
59
+ endif ()
60
+
61
+ elseif (${CMAKE_CXX_COMPILER_ID} STREQUAL "Clang" )
62
+ set (NVHPC_ACC_COMP_FLAGS "-fopenmp -fopenmp-targets=nvptx64-nvidia-cuda -Wno-unknown-cuda-version -I${CUDAToolkit_INCLUDE_DIRS} " )
63
+ set (NVHPC_ACC_LINK_FLAGS )
64
+ else ()
65
+ # -acc enables OpenACC support, -cuda links CUDA libraries and (very importantly!) seems to be
66
+ # required to make the NVHPC compiler do the device code linking. Otherwise the explicit CUDA
67
+ # device code (.cu files in libcoreneuron) has to be linked in a separate, earlier, step, which
68
+ # apparently causes problems with interoperability with OpenACC. Passing -cuda to nvc++ when
69
+ # compiling (as opposed to linking) seems to enable CUDA C++ support, which has other consequences
70
+ # due to e.g. __CUDACC__ being defined. See https://github.com/BlueBrain/CoreNeuron/issues/607 for
71
+ # more information about this. -gpu=cudaX.Y ensures that OpenACC code is compiled with the same
72
+ # CUDA version as is used for the explicit CUDA code.
73
+ set (NVHPC_ACC_COMP_FLAGS "-acc -Minfo=accel -gpu=cuda${CORENRN_CUDA_VERSION_SHORT} ,lineinfo" )
74
+ set (NVHPC_ACC_LINK_FLAGS "-acc -cuda" )
75
+ # Make sure that OpenACC code is generated for the same compute capabilities as the explicit CUDA
76
+ # code. Otherwise there may be confusing linker errors. We cannot rely on nvcc and nvc++ using the
77
+ # same default compute capabilities as each other, particularly on GPU-less build machines.
78
+ foreach (compute_capability ${CMAKE_CUDA_ARCHITECTURES} )
79
+ string (APPEND NVHPC_ACC_COMP_FLAGS ",cc${compute_capability} " )
80
+ endforeach ()
81
+ if (CORENRN_ENABLE_OPENMP AND CORENRN_ENABLE_OPENMP_OFFLOAD )
82
+ # Enable OpenMP target offload to GPU and if both OpenACC and OpenMP directives are available
83
+ # for a region then prefer OpenMP.
84
+ add_compile_definitions (CORENEURON_PREFER_OPENMP_OFFLOAD )
85
+ string (APPEND NVHPC_ACC_COMP_FLAGS " -mp=gpu -Minfo=mp" )
86
+ endif ()
87
+ # avoid PGI adding standard compliant "-A" flags
88
+ # set(CMAKE_CXX14_STANDARD_COMPILE_OPTION --c++14)
89
+ string (APPEND CMAKE_EXE_LINKER_FLAGS " ${NVHPC_ACC_LINK_FLAGS} " )
90
+ # Use `-Mautoinline` option to compile .cpp files generated from .mod files only. This is
91
+ # especially needed when we compile with -O0 or -O1 optimisation level where we get link errors.
92
+ # Use of `-Mautoinline` ensure that the necessary functions like `net_receive_kernel` are inlined
93
+ # for OpenACC code generation.
94
+ set (NVHPC_CXX_INLINE_FLAGS "-Mautoinline" )
95
+ set (NVHPC_CXX_INLINE_FLAGS )
71
96
endif ()
72
- set (NVHPC_ACC_COMP_FLAGS "-fopenmp -fopenmp-targets=nvptx64-nvidia-cuda -Wno-unknown-cuda-version -I${CUDAToolkit_INCLUDE_DIRS} " )
73
- set (NVHPC_ACC_LINK_FLAGS )
74
- # avoid PGI adding standard compliant "-A" flags
75
- # set(CMAKE_CXX14_STANDARD_COMPILE_OPTION --c++14)
76
- string (APPEND CMAKE_EXE_LINKER_FLAGS " ${NVHPC_ACC_LINK_FLAGS} " )
77
- # Use `-Mautoinline` option to compile .cpp files generated from .mod files only. This is
78
- # especially needed when we compile with -O0 or -O1 optimisation level where we get link errors.
79
- # Use of `-Mautoinline` ensure that the necessary functions like `net_receive_kernel` are inlined
80
- # for OpenACC code generation.
81
- set (NVHPC_CXX_INLINE_FLAGS "-Mautoinline" )
82
- set (NVHPC_CXX_INLINE_FLAGS )
83
97
endif ()
84
98
85
99
# =============================================================================
0 commit comments