#
#     Copyright (c) 2020, NVIDIA CORPORATION.  All rights reserved.
#
# NVIDIA CORPORATION and its licensors retain all intellectual property
# and proprietary rights in and to this software, related documentation
# and any modifications thereto.  Any use, reproduction, disclosure or
# distribution of this software and related documentation without an express
# license agreement from NVIDIA CORPORATION is strictly prohibited.
#

cinclude rcfiles/deprecatedrc;

# Target Option
variable TGT is default(0); # Set to 1 when `-target` is used
variable TGTCUDA is default(0); # Set to 1 when `-cuda` is used
variable TGTCUDAX is default(0); # Set to 1 when `-cuda` is used
variable GPU is default(0); # Set to 1 when `-gpu` is used
variable TGTNKW is default(1); # -target no-keyword, to prioritize rcfile error/warning messages
variable GPUNKW is default(1); # -gpu no-keyword, to prioritize rcfile error/warning messages
# Languages
variable LNGACC is default(0); # OpenACC
variable LNGOMP is default(0); # OpenMP
variable LNGSPA is default(0); # Standard Parallel Algorithm
# Devices
variable TGTGPU is default(0x1); # GPU device
variable TGTCPU is default(0x2); # Multicore device
variable TGTSEQ is default(0x4); # Multicore device
variable TGTALL is default(0x7); # GPU and Multicore device
# OpenACC
variable TGLACC is default(0); # Local value to use to set the final value in case the default changed
variable TGTACCGPU is default(0); # CPU value for `-target` suboption
variable TGTACCCPU is default(0); # GPU value for `-target` suboption
variable TGTACCSEQ is default(0); # SEQ value for `-target` suboption
variable TGLACCGPU is default(0); # CPU value for language specific suboption
variable TGLACCCPU is default(0); # GPU value for language specific suboption
variable TGLACCSEQ is default(0); # SEQ value for language specific suboption
variable TGTACC is default($expr($TGTGPU | $TGTSEQ)); # Multicore default (TODO: default will be GPU,MC)
# OpenMP
variable TGLOMP is default(0); # Local value to use to set the final value in case the default changed
variable TGTOMPGPU is default(0); # CPU value in case a language specifies suboption or `-target` suboption was used
variable TGTOMPCPU is default(0); # GPU value in case a language specifies suboption or `-target` suboption was used
variable TGLOMPGPU is default(0); # CPU value for language specific suboption
variable TGLOMPCPU is default(0); # GPU value for language specific suboption
variable TGTOMP is default($TGTCPU); # Multicore default (TODO: default will be GPU,MC)
# Standard Parallel Algorithm
variable TGLSPA is default(0); # Local value to use to set the final value in case the default changed
variable TGTSPAGPU is default(0); # CPU value in case a language specifies suboption or `-target` suboption was used
variable TGTSPACPU is default(0); # GPU value in case a language specifies suboption or `-target` suboption was used
variable TGLSPAGPU is default(0); # CPU value for language specific suboption
variable TGLSPACPU is default(0); # GPU value for language specific suboption
variable TGTSPA is default($TGTGPU); # Multicore default (TODO: default will be GPU,MC)
variable STDPARGPU is default(0); # -stdpar=gpu
variable STDPARMC is default(0); # -stdpar=multicore
variable STDPARMCXX is default(0); # -stdpar=multicore
variable NOSTDPAR is default(0); # -nostdpar
variable PINNEDSET is default(0); # is there an expliciclty set -gpu=pinned?
variable MANAGEDSET is default(0); # is there an expliciclty set -gpu=[no]managed?
variable EXPLICITMANAGED is default(0); # explicit value of -gpu=[no]managed
variable UNIFIEDSET is default(0); # is there an expliciclty set -gpu=[no]unified
variable EXPLICITUNIFIED is default(0); # explicit value of -gpu=[no]unified

variable SPLITCOMPILE is default(0);

# When `-nvcchost` is passed to nvc++ this variable is true and the
# `-x cu, -cuda, .cu` processing should be disabled because nvc++ is
# being used as host compiler
variable NVCCHOST is default(0);

# This indicates whether unified memory is supported by the CUDA driver.
variable HASCUDAUNIFIED is  default($if($STDPARGPU, $action(cudagetum()), 0));

variable NEEDUNIFIED is default(0);
variable NEEDMANAGED is default(0);
variable NEEDPINNED is default(0);

# Is any of -gpu=mem:* options set explicitly?
variable MEMSETEXPLICIT is default(0);
# The last -gpu=mem:* option set
variable SETMEMORYMODE is default(0);
# Corresponds to the unset mem option e.g. -gpu=mem or no -gpu=mem:*
variable UNSETMEMMODE is default(0);
# Corresponds to -gpu=mem:separate
variable SEPARATEMEMMODE is default(1);
# Corresponds to -gpu=mem:managed
variable MANAGEDMEMMODE is default(3);
# Corresponds to -gpu=mem:unified
variable UNIFIEDMEMMODE is default(4);

# Is -gpu=[no]interceptdeallocations option set explicitly?
variable INTERCEPTDEALLOCSETEXPLICIT is default(0);

# Should managed memory allocations be added for unified memory?
variable USEMANALLOCWITHUNIFIEDMEM is default(1);
# Should pinned memory allocations be used with separate memory?
variable USEPINNEDALLOCWITHSEPARATEMEM is default(0);

variable ISCUDARTNEEDED is default($lor(
    $TGTCUDA,
    $ANYCU,
    $ANYCUF,
    $land($LNGACC,$expr($TGTACC & $TGTGPU)),
    $land($LNGOMP,$expr($TGTOMP & $TGTGPU)),
    $land($LNGSPA,$expr($TGTSPA & $TGTGPU))));

variable ISACCLIBNEEDED is default($lor(
    $TGTCUDA,
    $ANYCU,
    $ANYCUF,
    $TA,
    $land($LNGACC,$expr($TGTACC & $TGTALL)),
    $land($LNGOMP,$expr($TGTOMP & $TGTGPU)),
    $land($LNGSPA,$expr($TGTSPA & $TGTALL))));

variable USENVVM70 is default($land($ISPREBLACKWELL,$ISBLACKWELLPLUS));
variable USENVVMDEV is default(0);
variable USENVVMNEXT is default(0);
variable ENABLENVVMCOMPILERFLAG is default($if($USENVVMDEV,-x 226 0x08,$if($USENVVMNEXT,-x 226 0x10,$if($USENVVM70,-x 187 0x40000))));
variable ENABLENVVMGPUCODEGENERATIONFLAG is default($if($ISPREBLACKWELL,-x 187 0x8000000,-x 187 0x10000000));
variable NVVMFLAGS is default($if($USENVVMDEV,-nvvm-dev,$if($USENVVMNEXT,-nvvm-next,$if($USENVVM70,-nvvm70))));    
variable NVVMXBITS is default($ENABLENVVMCOMPILERFLAG $ENABLENVVMGPUCODEGENERATIONFLAG);
variable GPU_MATH_UNIFORM is default(0);
variable GPU_FASTMATH is default(0);
variable USEGPUBC is default($land($ISPREBLACKWELL,$ISBLACKWELLPLUS));
variable USEOLDNVVMD is default(0);
variable USENVVMVERIFY is default(0);
variable DEBUGLIBNVVM is default(0);

# Set to 1 when a pre-Blackwell compute capability (< cc100) is requested
variable ISPREBLACKWELL is default(0);
set ISPREBLACKWELL=$lor($foreach(cc,$COMPUTECAPS,$if($expr($cc < 100),1)),0);
# Set to 1 when a Blackwell+ compute capability is requested
variable ISBLACKWELLPLUS is default(0);
set ISBLACKWELLPLUS=$lor($foreach(cc,$COMPUTECAPS,$if($expr($cc >= 100),1)),0);
# Set to 1 when -gpu=ccall is used
variable CCALL is default(0);
# Set to 1 when -gpu=ccall-major is used
variable CCALLMAJOR is default(0);

# FPIFP Support
variable SUPPORTSFPIFP is default($if($NEEDFPIFP,$if($expr($first($COMPUTECAPS) >= 90),1,0),0));

# Compute capabilities supported based on the languages we are
# compiling for. Note that the OpenMP CC are a subset of STDPAR CC
# which are then a subset of all the CC we support
# (e.g. OpenACC. Therefore the order to select the supported CC based
# on the languanges we are compiling for is important
# i.e. OpenMP --> # STDPAR --> OpenACC.
variable SUPPORTEDSYSCAP is default($foreach(ll,$SYSCAP,
$if($land($LNGOMP,$expr($TGTOMP &
$TGTGPU)),$if($contains($OMPCOMPUTECAPS,$ll),$ll ),
$if($land($or($land($LNGSPA,$expr($TGTSPA &
$TGTGPU),$equal($DRIVERLANG,CPP)),$land($TGTCUDA,$equal($DRIVERLANG,CPP))),$not($land($LNGACC,$expr($TGTACC
& $TGTGPU)))),$if($contains($SPACOMPUTECAPS,$ll),$ll ), $COMPUTECAP
))));

# Major Compute Capabilities
variable SYSCAPMAJOR is default(35 $foreach(ll,$SYSCAP,$if($equal($expr($ll % 10),0),$ll )));
variable SYSCAPSMAJOR is default($foreach(ll,$SYSCAPMAJOR,
                                          $if($land($LNGOMP,$expr($TGTOMP & $TGTGPU)),$if($contains($OMPCOMPUTECAPS,$ll),$ll ),
                                          $if($land($or($land($LNGSPA,$expr($TGTSPA & $TGTGPU),$equal($DRIVERLANG,CPP)),$land($TGTCUDA,$equal($DRIVERLANG,CPP))),$not($land($LNGACC,$expr($TGTACC & $TGTGPU)))),$if($contains($SPACOMPUTECAPS,$ll),$ll ),
                                          $COMPUTECAP ))));

variable ISCCNATIVE is default(0);

# Compile `.cu` files
append CGARGS=$if($ANYCU,$DEFAULTCAPFLAG $TOOLKITFLAG);

# Device Link Time Optimization
variable NEEDCUDALTO is default(0);

# Select the target for all parallel programming paradigms used (OpenACC, OpenMP, Standard Languages)
# -target={gpu|gpu,multicore|multicore}
#      gpu                     -acc implies -acc=gpu, -mp implies -mp=gpu, -stdpar implies -stdpar=gpu
#      gpu,multicore           (default) -acc implies -acc=gpu,multicore, -mp implies -mp=gpu,multicore, -stdpar implies -stdpar=gpu,multicore
#      multicore                         -acc implies -acc=multicore, -mp implies -mp=multicore, -stdpar implies -stdpar=multicore

# (ERROR00) -target=... option alone does not have any effect if no language specific option is specified
warning($if($and($TGT,$not($TGTNKW)),$ifn($or($LNGACC,$LNGOMP,$LNGSPA,$land($equal($DRIVERLANG,Fortran),$ISCUDALIB)),The -target option has no effect unless it is combined with a programming model option that supports multiple targets\, such as -acc\, -mp\, or -stdpar)));
# (ERROR01) -gpu=... option alone does not have any effect if no language specific option is specified
warning($if($land($GPU,$not($GPUNKW),$not($ANYCUF),$not($ANYCU)),$ifn($or($land($LNGACC,$expr($TGTACC & $TGTGPU)),$land($LNGOMP,$expr($TGTOMP & $TGTGPU)),$land($LNGSPA,$expr($TGTSPA & $TGTGPU)),$TGTCUDA,$MANAGED),The -gpu option has no effect unless it is combined with a programming model option that enables GPU code generation\, such as -acc\, -mp=gpu\, $ifn($equal($DRIVERLANG,Fortran),or )-stdpar$if($equal($DRIVERLANG,Fortran),\, or -cuda ))));
# (ERROR04) -stdpar option does not currently support compilation for multiple compute capabilities
# fatal($if($and($SETCAPS,$LNGSPA,$expr($TGTSPA & $TGTGPU),$expr($count($COMPUTECAPS)>1),$notequal($DRIVERLANG,Fortran)),The -stdpar option does not currently support compilation for multiple compute capabilities: $COMPUTECAPS));
# (ERROR10) -stdpar (C++) is allowed only with GPU that have a compute capability ">=cc60".
fatal($if($and($LNGSPA,$TGTSPACXX,$expr($TGTSPA & $TGTGPU)),$if($or($equal($SPACOMPUTECAPS,),$and($SETCAPS,$or($contains($COMPUTECAPS,30),$contains($COMPUTECAPS,35),$contains($COMPUTECAPS,50)))),"The `-stdpar` option is available only on systems with NVIDIA GPUs with compute capability '>= cc60")));
# (ERROR15) -cuda (C++) is allowed only with GPU that have a compute capability ">=cc60".
fatal($if($and($TGTCUDA,$equal($DRIVERLANG,CPP),$not($LNGACC)),$if($or($equal($SPACOMPUTECAPS,),$and($SETCAPS,$or($contains($COMPUTECAPS,30),$contains($COMPUTECAPS,35),$contains($COMPUTECAPS,50)))),"The `-cuda` option is available only on systems with NVIDIA GPUs with compute capability '>= cc60")));

# This error is intentionally left in the source but commented out.  This makes
# it easy to turn -stdpar=gpu,multicore on or off until we are confident that
# we can support it.  This comment and this error check should be removed from
# the code once 'nvc++ -stdpar=gpu,multicore' has been out in the wild and
# working well for a couple releases.
# (ERROR16) C++ -stdpar=gpu,multicore not supported
#fatal($if($and($equal($DRIVERLANG,CPP),$STDPARMC,$STDPARGPU),"The option -stdpar=gpu,multicore is not yet supported by nvc++"));

# (ERROR17) -gpu=fastmath and -gpu=math_uniform are mutually exclusive
fatal($if($and($GPU_FASTMATH,$GPU_MATH_UNIFORM),The options -gpu=fastmath and -gpu=math_uniform are incompatible and cannot be used together));
# (ERROR21) -cudalib=cusolvermp is not supported with static linking
error($if($land($NEEDCUSOLVERMP,$notequal($PGISTATICX,)),The option '-cudalib=cusolvermp' is currently not supported with static linking.));
# (ERROR22) -cudalib=cusolvermp is not supported with CUDA less than 11.3 and greater/equal than 12.0
error($if($land($NEEDCUSOLVERMP,$index($TARGET,linux86-64),$expr($CUDAXXYY<11040)),The option '-cudalib=cusolvermp' is not supported with this version of CUDA.));
# (ERROR24) -cudalib=cufftmp is not supported with static linking
error($if($land($NEEDCUFFTMP,$notequal($PGISTATICX,)),The option '-cudalib=cufftmp' is currently not supported with static linking.));
# (ERROR25) -cudalib=cufftmp is not supported on Linux_x86_64 with CUDA less than 11.2 or on Linux_ppc64le with CUDA less than 11.2.
error($if($land($NEEDCUFFTMP,$lor($land($index($TARGET,linux86-64),$expr($CUDAXXYY<11020)),$land($index($TARGET,linuxpower),$expr($CUDAXXYY<11040)))),The option '-cudalib=cufftmp' is not supported with this version of CUDA.));
# (ERROR26) -cudalib=cufftmp and -cudalib=cufft cannot both be specified on the command line.
error($if($and($NEEDCUFFTMP, $NEEDCUFFT), The option '-cudalib=cufftmp' is incompatible with the option '-cudalib=cufft'.));
# (ERROR27) Emit warning if no visible supported GPU on the system, and the default architecture will be used.
warning($if($land($equal($DETECTEDCAPS,),$ISCCNATIVE),Cannot find valid GPU for '-gpu=ccnative'\, the default supported compute capability will be used.));
# (ERROR28) LTO is only available starting from CUDA 11.2
error($if($land($NEEDCUDALTO,$expr($CUDAXXYY<11020)),The Device Link Time Optimization (LTO) is not supported with this version of CUDA; CUDA 11.2 or newer is required.));
# (ERROR30) `-gpu=fpifp` is supported starting from CUDA 12.4
error($if($land($NEEDFPIFP,$expr($CUDAXXYY<12040),$equal($DRIVERLANG,CPP)), Device function pointer support is not available with this version of CUDA; CUDA 12.4 or newer is required.));
# (ERROR31) `-gpu=fpifp` is not supported in Fortran
error($if($land($NEEDFPIFP,$equal($DRIVERLANG,Fortran)), Device function pointer support is not currently available in Fortran.));
# (ERROR32) `-gpu=fpifp` is supported only starting from cc90
error($if($land($NEEDFPIFP,$expr($CUDAXXYY>=12000),$equal($DRIVERLANG,CPP),$not($SUPPORTSFPIFP)), Device function pointer support is only available with compute capability 90\, please use -gpu=cc90.));
# (ERROR33) -cudalib=nvshmem is supported on Linux_aarch64 only with CUDA 12.2 or newer
error($if($land($index($TARGET,linuxarm64),$NEEDNVSHMEM,$expr($CUDAXXYY<12020)),The option '-cudalib=nvshmem' is not supported with this version of CUDA; CUDA 12.2 or newer is required.));
# (ERROR35) -gpu=redundant is not supported for managed or unified memory (-gpu=managed -gpu=unified)
error($if($land($lor($NEEDMANAGEDMEMORY, $NEEDUNIFIED),$REDUNDANT), Redundant (-gpu=redundant) and Autocompare (-gpu=autocompare) are not supported with Unified (-gpu=unified) or Managed (-gpu=managed) memory modes.));
# (ERROR36) `-gpu=nvvm-dev` is supported starting from CUDA 12.4
error($if($land($USENVVMDEV,$expr($CUDAXXYY<12000)),Support for '-gpu=nvvm-dev' requires CUDA 12.0 or newer.));
# (ERROR37) `-gpu=nvvm-next` is supported starting from CUDA 12.0
error($if($land($USENVVMNEXT,$expr($CUDAXXYY<12000)),Support for '-gpu=nvvm-next' requires CUDA 12.0 or newer.));
# (ERROR38) -cudalib=cublasmp is not supported with static linking
error($if($land($NEEDCUBLASMP,$notequal($PGISTATICX,)),The option '-cudalib=cublasmp' is currently not supported with static linking.));
# (ERROR39) -gpu=splitcompile is supported starting from CUDA 12.1
warning($if($land($SPLITCOMPILE,$expr($CUDAXXYY<12010)),Split compilation is supported starting from CUDA 12.1\, -gpu=splitcompile will be ignored));
# (ERROR40) nvvm-next is only available with CUDA 12.7 or newer
error($if($land($USENVVMNEXT,$expr($CUDAXXYY<12070)),Support for '-gpu=nvvm-next' requires CUDA 12.7 or newer.));

# If -gpu=math_uniform
append ACCCGFLAGS=$if($GPU_MATH_UNIFORM, -x 189 0x40000000);

# Use old libNVVM driver (nvvmd)
append ACCCGFLAGS=$if($USEOLDNVVMD,-x 226 0x100);

# Convert .gpu to .bc before passing it to libNVVM
append ACCCGFLAGS=$if($USEGPUBC,-x 226 0x20);

# Add convergent attribute to all device functions
append ACCCGFLAGS=$if($lor($USENVVMDEV,$USENVVMNEXT,$expr($first($COMPUTECAPS)>=100)),-x 219 0x40);

# Set predefined target macros for Stdpar Fortran
append USRDEFDEF=$if($STDPARGPU, -def __NVCOMPILER_STDPAR_GPU);
append USRDEFDEF=$if($STDPARMC, -def __NVCOMPILER_STDPAR_MULTICORE);

################### Memory specific settings
# Separate memory
append USRDEFDEF=$if($land($not($NEEDMANAGED),$not($NEEDUNIFIED)), -def __NVCOMPILER_GPU_SEPARATE_MEM);

# -gpu=pinned
# NEEDPINNED controls all flags for Pinned Memory. It is set if:
# - Legacy flag -gpu=pinned is passed;
# - The last -gpu=mem:separate:[no]pinnedalloc options is -gpu=mem:separate:pinnedalloc.
set NEEDPINNED=$lor($land($MEMSETEXPLICIT, $equal($SETMEMORYMODE, $SEPARATEMEMMODE), $USEPINNEDALLOCWITHSEPARATEMEM), $land($not($MEMSETEXPLICIT), $PINNEDSET));
set PININIT=$if($NEEDPINNED, 1, 0);
append F901ARGS=$if($NEEDPINNED, -x 198 0x400000);
append CGARGS=$if($NEEDPINNED, -x 198 0x400000);

# -gpu=[no]managed
# NEEDMANAGED controls all flags for Managed Memory. It is set if:
# - The last explicitly passed -gpu=mem:* flag is -gpu=mem:managed;
# - The last explicitly passed legacy MM flag is -gpu=managed;
# - If Unified Memory is enabled and -gpu=nomanaged is not passed explicitly. 
# - When compiling with -stdpar and -gpu=nomanaged,pinned,mem:* are not passed explicitly. 
set NEEDMANAGEDNEW=$land($MEMSETEXPLICIT, $equal($SETMEMORYMODE, $MANAGEDMEMMODE));
set NEEDMANAGEDLEGACY=$land($not($MEMSETEXPLICIT), $land($MANAGEDSET, $EXPLICITMANAGED));
set ADDMANAGEDIMPL = $land($lor($not($MEMSETEXPLICIT), $land($equal($SETMEMORYMODE, $UNIFIEDMEMMODE), $USEMANALLOCWITHUNIFIEDMEM)), $not($MANAGEDSET), $not($PINNEDSET));
set NEEDMANAGEDIMPLICIT=$land($ADDMANAGEDIMPL, $lor($STDPARGPU, $NEEDUNIFIED));
set NEEDMANAGED=$lor($NEEDMANAGEDNEW, $NEEDMANAGEDLEGACY, $NEEDMANAGEDIMPLICIT);

set DEFACCPRESTDINC=$if($NEEDMANAGED, $COMPBASE/$COMPSYS/$COMPVER/$quote($COMPINCPREFIX)include_man);

# FIXME: FS#34543 - These vars should probably be simply replaced by NEEDMANAGED.
set MANAGED=$if($NEEDMANAGED, 1);
set NEEDMANAGEDMEMORY=$if($NEEDMANAGED, 1);
set MANINIT=$if($NEEDMANAGED, 1);

# FIXME: FS#34543 - This variable was used in F901ARGS through F901MANPIN which doesn't work now
# since F901MANPIN is not set at the global scope. Should it be removed?
set DEFF901MANPIN=$if($NEEDMANAGED, -x 198 0x100 $if($land($FNEEDCUDA, $not($NEEDUNIFIED)), -x 137 0x6000));
append F901ARGS=$if($NEEDMANAGED, -x 198 0x100 $if($land($FNEEDCUDA, $not($NEEDUNIFIED)), -x 137 0x6000));

# FIXME: FS#34543 - This variable was used in CPP1ARGS through CPP1MANPIN which doesn't work now
# since CPP1MANPIN is not set at the global scope. Should it be removed?
set DEFCPP1MANPIN=$if($NEEDMANAGED, --accel_managed);

append CPP1ARGS=$if($NEEDMANAGED, --accel_managed);

# FIXME: FS#34543 - This variable was used in CGARGS through CGMANPIN which doesn't work now
# since CGMANPIN is not set at the global scope. Should it be removed?
append DEFCGMANPIN=$if($NEEDMANAGED, -x 194 0x20000000 -x 198 0x100);

append CGARGS=$if($NEEDMANAGED, -x 194 0x20000000 -x 198 0x100);

#FIXME: FS#34543 - Find out if __PGI_ACC_UMEM macros are still required.
append USRDDEF=$if($NEEDMANAGED, -D__PGI_ACC_UMEM);
append USRDEFDEF=$if($NEEDMANAGED, -def __PGI_ACC_UMEM -def __NVCOMPILER_GPU_MANAGED_MEM);


# OpenMP
append CPP2ARGS=$if($NEEDMANAGED, $if($and($LNGOMP,$expr($TGTOMP & $TGTGPU)),-x 233 0x10000));
append F901ARGS=$if($NEEDMANAGED, $if($and($LNGOMP,$expr($TGTOMP & $TGTGPU)),-x 233 0x10000 -x 137 0x2000));
append F902ARGS=$if($NEEDMANAGED, $if($and($LNGOMP,$expr($TGTOMP & $TGTGPU)),-x 233 0x10000));


# -gpu=[no]unified
# NEEDUNIFIED controls all flags for UM. It is set if:
# - The last explicitly passed -gpu=mem:* flag is -gpu=mem:unified
# - The last explicitly passed legacy UM flag is -gpu=unified
# - If -stdpar is passed and CUDA driver supports UM and -gpu=nounified|[no]managed|mem:* are not passed explicitly. 
set NEEDUNIFIEDNEW=$land($MEMSETEXPLICIT, $equal($SETMEMORYMODE, $UNIFIEDMEMMODE));
set NEEDUNIFIEDLEGACY=$land($not($MEMSETEXPLICIT), $UNIFIEDSET, $EXPLICITUNIFIED);
set NEEDUNIFIEDIMPLICIT=$land($not($lor($MEMSETEXPLICIT, $UNIFIEDSET, $MANAGEDSET, $PINNEDSET)), $land($HASCUDAUNIFIED, $STDPARGPU));
set NEEDUNIFIED=$lor($NEEDUNIFIEDNEW, $NEEDUNIFIEDLEGACY, $NEEDUNIFIEDIMPLICIT);

append EXTRAINIT=$if($NEEDUNIFIED, -init=unified);

# For UM we set CUDA flags differently to MM (not clear why).
set DEFF901MANPIN=$if($land($NEEDUNIFIED, $FNEEDCUDA), -x 137 0x4000);
append F901ARGS=$if($land($NEEDUNIFIED, $FNEEDCUDA), -x 137 0x4000);

# FIXME: FS#34543 - This variable was used in CGARGS through CGMANPIN which doesn't work now
# since CGMANPIN is not set at the global scope. Should it be removed?
append DEFCGMANPIN=$if($NEEDUNIFIED, -x 194 0x20000000);

append F901ARGS=$if($NEEDUNIFIED, -x 198 0x80000000);
append CPP1ARGS=$if($NEEDUNIFIED, --accel_unified);
append EXTRAACCLNK=$if($NEEDUNIFIED, $if($expr($CUDAXXYY>=10000), -unifiedmem));

#FIXME: FS#34543 - Find out if __PGI_ACC_UMEM macros are still required.
append USRDDEF=$if($NEEDUNIFIED, -D__PGI_ACC_UMEM);
append USRDEFDEF=$if($NEEDUNIFIED, -def __PGI_ACC_UMEM -def __NVCOMPILER_GPU_UNIFIED_MEM);

append CGARGS=$if($NEEDUNIFIED, $CGMANPIN -x 194 0x20000000 -x 194 0x40000000 -x 198 0x80000000);

# Both managed and unified memory support use features from OpenACC libs such as global variable
# registration, etc. Currently setting this var here is only required for CUDA compilation.
# For other programming models it is set elsewhere.
set NEEDACCLIB=$lor($NEEDUNIFIED, $NEEDMANAGED, $NEEDPINNED, $NEEDACCLIB);

# Indicate whether interception of calls to free should be enabled by default.
set DEFNEEDMANPINMEMORYINTERCEPTION=$if($expr($STDPARGPU & $NEEDMANAGED),1,0);

# Memory mode related diagnostics.
error($if($land($NEEDUNIFIED,$PININIT), The -gpu=pinned option may not be used with -gpu=unified));
error($if($land($NEEDMANAGED, $PININIT), The -gpu=pinned option may not be used with -gpu=managed));
#FIXME: FS#34543 - It seems we are providing double diagnistic - should one be removed?
error($if($land($NEEDMANAGED, $PININIT), The -gpu=pinned option may not be used with -Mpstl));
error($if($land($MEMSETEXPLICIT, $equal($SETMEMORYMODE, $UNSETMEMMODE))," The -gpu=mem option requires one of the following sub-options: separate, managed, or unified"));
error($if($land($MEMSETEXPLICIT, $lor($UNIFIEDSET, $MANAGEDSET, $PINNEDSET))," The -gpu=mem:[opts] option can't be combined with the deprecated -gpu=pinned,[no]managed,[no]unified flags"));
error($if($land($INTERCEPTDEALLOCSETEXPLICIT, $lor($UNIFIEDSET, $MANAGEDSET, $PINNEDSET))," The -gpu=[no]interceptdeallocations option can't be combined with the deprecated -gpu=pinned,[no]managed,[no]unified flags"));
warning($if($land($NEEDMANPINMEMORYINTERCEPTION, $not($lor($NEEDMANAGED, $NEEDPINNED)))," The -gpu=[no]interceptdeallocations option is set without managed or pinned allocations"));
warning($if($PINNEDSET," The -gpu=pinned option is deprecated; please use -gpu=mem:separate:pinnedalloc instead"));
warning($if($MANAGEDSET," The -gpu=[no]managed option is deprecated; please use -gpu=mem:managed or -gpu=mem:separate instead"));
warning($if($UNIFIEDSET," The -gpu=[no]unified option is deprecated; please use -gpu=mem:unified, -gpu=mem:managed or -gpu=mem:separate instead"));

switch -target is
     help(Select the target device for all parallel programming paradigms used (OpenACC, OpenMP, Standard Languages))
     helpgroup(target)
     mustkeyword
     set(TGT=1)

     keyword(
         gpu(
             help(Globally set the target device to an NVIDIA GPU)
             set(TGTNKW=0)

             # OpenACC
             set(TGTACCGPU=$TGTGPU)
             set(TGTACC=$if($TGLACC,$expr($TGLACCCPU | $TGLACCGPU | $TGLACCSEQ),$if($TGT,$expr($TGTACCCPU | $TGTACCGPU | $TGTACCSEQ),$TGTGPU)))

             # OpenMP
             set(TGTOMPGPU=$TGTGPU)
             set(TGTOMP=$if($TGLOMP,$expr($TGLOMPCPU | $TGLOMPGPU),$if($TGT,$expr($TGTOMPCPU | $TGTOMPGPU),$TGTGPU)))

             # StdPar
             set(TGTSPAGPU=$TGTGPU)
             set(TGTSPA=$if($TGLSPA,$expr($TGLSPACPU | $TGLSPAGPU),$if($TGT,$expr($TGTSPACPU | $TGTSPAGPU),$TGTGPU)))
         )
         multicore(
             help(Globally set the target device to the host CPU)
             set(TGTNKW=0)

             # OpenACC
             set(TGTACCCPU=$TGTCPU)
             set(TGTACC=$if($TGLACC,$expr($TGLACCCPU | $TGLACCGPU | $TGLACCSEQ),$if($TGT,$expr($TGTACCCPU | $TGTACCGPU | $TGTACCSEQ),$TGTGPU)))

             # OpenMP
             set(TGTOMPCPU=$TGTCPU)
             set(TGTOMP=$if($TGLOMP,$expr($TGLOMPCPU | $TGLOMPGPU),$if($TGT,$expr($TGTOMPCPU | $TGTOMPGPU),$TGTCPU)))

             # StdPar
             set(TGTSPACPU=$TGTCPU)
             set(TGTSPA=$if($TGLSPA,$expr($TGLSPACPU | $TGLSPAGPU),$if($TGT,$expr($TGTSPACPU | $TGTSPAGPU),$TGTCPU)))
         )
     );

switch -gpu is
     help(Select specific options for GPU code generation)
     helpgroup(target)
     mustkeyword
     set(GPU=1)

     keyword(
         analysis(hide
             set(GPUNKW=0)
             append(ACCCGFLAGS=-x 163 2)
             help(Analysis only, no code generation)
             set(IGNORECUDALIB=1)
         )
         autocollapse(hide
             set(GPUNKW=0)
             append(ACCCGFLAGS=-y 205 1)
             helpname([no]autocollapse)
             help(Automatically collapse tightly nested loops)
         )
         noautocollapse(hide
             set(GPUNKW=0)
             append(ACCCGFLAGS=-x 205 1)
             help(Do not automatically collapse tightly nested loops)
         )
         cache(hide
             set(GPUNKW=0)
             set(ACCCACHE=-x 163 0x100)
             help(Automatically use software data cache in OpenACC)
         )
         "ccnative"(
             set(GPUNKW=0)
             help(Detects the visible GPUs on the system and generates codes for them. If no device is available, the compute capability matching NVCC default will be used.)
             set(ISCCNATIVE=1)
         )
         nocache(hide
             set(GPUNKW=0)
             set(ACCCACHE=-y 163 0x100)
             help(Do not use software data cache)
         )
         kepler(hide
             set(GPUNKW=0)
             append(COMPUTECAPS=
                 $if($contains($SYSCAP,35),35))
             set(SETCAPS=1)
             help(Compile for Kepler architecture)
             error($ifn($contains($SYSCAP,35),The -gpu=kepler option is not supported on $TARGET systems))
         )
         "kepler+"(hide
             set(GPUNKW=0)
             append(COMPUTECAPS=
                 $if($contains($SYSCAP,35),35)
                 $if($contains($SYSCAP,50),50)
                 $if($and($contains($SYSCAP,60),$expr($CUDAXXYY>=8000)),60)
                 $if($and($contains($SYSCAP,61),$expr($CUDAXXYY>=8000)),61)
                 $if($and($contains($SYSCAP,62),$expr($CUDAXXYY>=10000)),62)
                 $if($and($contains($SYSCAP,70),$expr($CUDAXXYY>=9000)),70)
                 $if($and($contains($SYSCAP,72),$expr($CUDAXXYY>=10000)),72)
                 $if($and($contains($SYSCAP,75),$expr($CUDAXXYY>=10000)),75)
                 $if($and($contains($SYSCAP,80),$expr($CUDAXXYY>=11000)),80)
                 $if($and($contains($SYSCAP,86),$expr($CUDAXXYY>=11010)),86)
                 $if($and($contains($SYSCAP,87),$expr($CUDAXXYY>=11040)),87)
                 $if($and($contains($SYSCAP,90),$expr($CUDAXXYY>=11080)),90)
             )
             set(SETCAPS=1)
             help(Compile for Kepler architecture and above)
             error($ifn($contains($SYSCAP,35),The -gpu=kepler+ option is not supported on $TARGET systems))
         )
         cc30(hide error(The -gpu=cc30 option is no longer supported))
         cc35(hide
             set(GPUNKW=0)
             append(COMPUTECAPS=35)
             help(Compile for compute capability 3.5)
             set(SETCAPS=1)
             set(ISPREBLACKWELL=1)
             error($ifn($contains($SYSCAP,35),The -gpu=cc35 option is not supported on $TARGET systems))
         )
         cc3x(hide
             set(GPUNKW=0)
             append(COMPUTECAPS=
                 $if($contains($SYSCAP,35),35) )
             set(SETCAPS=1)
             set(ISPREBLACKWELL=1)
             help(Compile for compute capability 3.x)
             error($ifn($contains($SYSCAP,35),The -gpu=cc3x option is not supported on $TARGET systems))
         )
         "cc3+"(hide
             set(GPUNKW=0)
             append(COMPUTECAPS=
                 $if($contains($SYSCAP,35),35)
                 $if($contains($SYSCAP,50),50)
                 $if($and($contains($SYSCAP,60),$expr($CUDAXXYY>=8000)),60)
                 $if($and($contains($SYSCAP,61),$expr($CUDAXXYY>=8000)),61)
                 $if($and($contains($SYSCAP,62),$expr($CUDAXXYY>=10000)),62)
                 $if($and($contains($SYSCAP,70),$expr($CUDAXXYY>=9000)),70)
                 $if($and($contains($SYSCAP,72),$expr($CUDAXXYY>=10000)),72)
                 $if($and($contains($SYSCAP,75),$expr($CUDAXXYY>=10000)),75)
                 $if($and($contains($SYSCAP,80),$expr($CUDAXXYY>=11000)),80)
                 $if($and($contains($SYSCAP,86),$expr($CUDAXXYY>=11010)),86)
                 $if($and($contains($SYSCAP,87),$expr($CUDAXXYY>=11040)),87)
                 $if($and($contains($SYSCAP,90),$expr($CUDAXXYY>=11080)),90)
             )
             set(SETCAPS=1)
             set(ISPREBLACKWELL=1)
             help(Compile for compute capability 3.x and above)
             error($ifn($contains($SYSCAP,35),The -gpu=cc3+ option is not supported on $TARGET systems)))
         maxwell(hide
             set(GPUNKW=0)
             append(COMPUTECAPS=50)
             set(SETCAPS=1)
             set(ISPREBLACKWELL=1)
             help(Compile for Maxwell architecture)
             error($ifn($contains($SYSCAP,50),The -gpu=maxwell option is not supported on $TARGET systems))
         )
         "maxwell+"(hide
             set(GPUNKW=0)
             append(COMPUTECAPS=
                 $if($contains($SYSCAP,50),50)
                 $if($and($contains($SYSCAP,60),$expr($CUDAXXYY>=8000)),60)
                 $if($and($contains($SYSCAP,61),$expr($CUDAXXYY>=8000)),61)
                 $if($and($contains($SYSCAP,62),$expr($CUDAXXYY>=10000)),62)
                 $if($and($contains($SYSCAP,70),$expr($CUDAXXYY>=9000)),70)
                 $if($and($contains($SYSCAP,72),$expr($CUDAXXYY>=10000)),72)
                 $if($and($contains($SYSCAP,75),$expr($CUDAXXYY>=10000)),75)
                 $if($and($contains($SYSCAP,80),$expr($CUDAXXYY>=11000)),80)
                 $if($and($contains($SYSCAP,86),$expr($CUDAXXYY>=11010)),86)
                 $if($and($contains($SYSCAP,87),$expr($CUDAXXYY>=11040)),87)
                 $if($and($contains($SYSCAP,90),$expr($CUDAXXYY>=11080)),90)
             )
             set(SETCAPS=1)
             set(ISPREBLACKWELL=1)
             help(Compile for Kepler architecture and above)
             error($ifn($contains($SYSCAP,50),The -gpu=maxwell+ option is not supported on $TARGET systems))
         )
         cc50(hide
             set(GPUNKW=0)
             append(COMPUTECAPS=50)
             set(SETCAPS=1)
             set(ISPREBLACKWELL=1)
             help(Compile for compute capability 5.0)
             error($ifn($contains($SYSCAP,50),The -gpu=cc50 option is not supported on $TARGET systems))
         )
         sm_50(hide
             set(GPUNKW=0)
             append(COMPUTECAPS=50)
             set(SETCAPS=1)
             set(ISPREBLACKWELL=1)
             help(Compile for compute capability 5.0)
             error($ifn($contains($SYSCAP,50),The -gpu=sm_50 option is not supported on $TARGET systems))
         )
         cc5x(hide
             set(GPUNKW=0)
             append(COMPUTECAPS=50)
             set(SETCAPS=1)
             set(ISPREBLACKWELL=1)
             help(Compile for compute capability 5.x)
             error($ifn($contains($SYSCAP,50),The -gpu=cc5x option is not supported on $TARGET systems))
         )
         "cc5+"(hide
             set(GPUNKW=0)
             append(COMPUTECAPS=
                 $if($contains($SYSCAP,50),50)
                 $if($and($contains($SYSCAP,60),$expr($CUDAXXYY>=8000)),60)
                 $if($and($contains($SYSCAP,61),$expr($CUDAXXYY>=8000)),61)
                 $if($and($contains($SYSCAP,62),$expr($CUDAXXYY>=10000)),62)
                 $if($and($contains($SYSCAP,70),$expr($CUDAXXYY>=9000)),70)
                 $if($and($contains($SYSCAP,72),$expr($CUDAXXYY>=10000)),72)
                 $if($and($contains($SYSCAP,75),$expr($CUDAXXYY>=10000)),75)
                 $if($and($contains($SYSCAP,80),$expr($CUDAXXYY>=11000)),80)
                 $if($and($contains($SYSCAP,86),$expr($CUDAXXYY>=11010)),86)
                 $if($and($contains($SYSCAP,87),$expr($CUDAXXYY>=11040)),87)
                 $if($and($contains($SYSCAP,90),$expr($CUDAXXYY>=11080)),90)
             )
             set(SETCAPS=1)
             set(ISPREBLACKWELL=1)
             help(Compile for compute capability 5.x and above)
             error($ifn($contains($SYSCAP,50),The -gpu=cc5+ option is not supported on $TARGET systems))
         )
         cc60(hide
             set(GPUNKW=0)
             append(COMPUTECAPS=60)
             help(Compile for compute capability 6.0)
             set(NEEDCUDA80=127)
             set(REQCUDAVERSION=$if($NEEDCUDA100,10.0,$if($NEEDCUDA90,9.0,8.0)))
             set(SETCAPS=1)
             set(ISPREBLACKWELL=1)
             error($ifn($contains($SYSCAP,60),The -gpu=cc60 option is not supported on $TARGET systems))
         )
         sm_60(hide
             set(GPUNKW=0)
             append(COMPUTECAPS=60)
             help(Compile for compute capability 6.0)
             set(NEEDCUDA80=127)
             set(REQCUDAVERSION=$if($NEEDCUDA100,10.0,$if($NEEDCUDA90,9.0,8.0)))
             set(SETCAPS=1)
             set(ISPREBLACKWELL=1)
             error($ifn($contains($SYSCAP,60),The -gpu=sm_60 option is not supported on $TARGET systems))
         )
         cc61(hide
             set(GPUNKW=0)
             append(COMPUTECAPS=61)
             help(Compile for compute capability 6.1)
             set(NEEDCUDA80=127)
             set(REQCUDAVERSION=$if($NEEDCUDA90,9.0,8.0))
             set(SETCAPS=1)
             set(ISPREBLACKWELL=1)
             error($ifn($or($contains($SYSCAP,60),$contains($SYSCAP,61)),The -gpu=cc61 option is not supported on $TARGET systems))
         )
         sm_61(hide
             set(GPUNKW=0)
             append(COMPUTECAPS=61)
             help(Compile for compute capability 6.1)
             set(NEEDCUDA80=127)
             set(REQCUDAVERSION=$if($NEEDCUDA90,9.0,8.0))
             set(SETCAPS=1)
             set(ISPREBLACKWELL=1)
             error($ifn($or($contains($SYSCAP,60),$contains($SYSCAP,61)),The -gpu=sm_61 option is not supported on $TARGET systems))
         )
         cc62(hide
             set(GPUNKW=0)
             append(COMPUTECAPS=62)
             help(Compile for compute capability 6.2)
             set(SETCAPS=1)
             set(ISPREBLACKWELL=1)
             set(NEEDCUDA100=127)
             set(REQCUDAVERSION=10.0)
             error($ifn($contains($SYSCAP,62),The -gpu=cc62 option is not supported on $TARGET systems))
         )
         sm_62(hide
             set(GPUNKW=0)
             append(COMPUTECAPS=62)
             help(Compile for compute capability 6.2)
             set(SETCAPS=1)
             set(ISPREBLACKWELL=1)
             set(NEEDCUDA100=127)
             set(REQCUDAVERSION=10.0)
             error($ifn($contains($SYSCAP,62),The -gpu=sm_62 option is not supported on $TARGET systems))
         )
         "cc6+"(hide
             set(GPUNKW=0)
             append(COMPUTECAPS=
                 $if($and($contains($SYSCAP,60),$expr($CUDAXXYY>=8000)),60)
                 $if($and($contains($SYSCAP,61),$expr($CUDAXXYY>=8000)),61)
                 $if($and($contains($SYSCAP,62),$expr($CUDAXXYY>=10000)),62)
                 $if($and($contains($SYSCAP,70),$expr($CUDAXXYY>=9000)),70)
                 $if($and($contains($SYSCAP,72),$expr($CUDAXXYY>=10000)),72)
                 $if($and($contains($SYSCAP,75),$expr($CUDAXXYY>=10000)),75)
                 $if($and($contains($SYSCAP,80),$expr($CUDAXXYY>=11000)),80)
                 $if($and($contains($SYSCAP,86),$expr($CUDAXXYY>=11010)),86)
                 $if($and($contains($SYSCAP,87),$expr($CUDAXXYY>=11040)),87)
                 $if($and($contains($SYSCAP,90),$expr($CUDAXXYY>=11080)),90)
             )
             set(SETCAPS=1)
             set(ISPREBLACKWELL=1)
             help(Compile for compute capability 6.x and above)
             error($ifn($or($contains($SYSCAP,60), $contains($SYSCAP,61), $contains($SYSCAP,62)),The -gpu=cc6+ option is not supported on $TARGET systems))
         )
         cc70(hide
             set(GPUNKW=0)
             append(COMPUTECAPS=70)
             help(Compile for compute capability 7.0)
             set(SETCAPS=1)
             set(ISPREBLACKWELL=1)
             set(NEEDCUDA90=127) set(REQCUDAVERSION=9.0)
             error($ifn($contains($SYSCAP,70),The -gpu=cc70 option is not supported on $TARGET systems))
         )
         sm_70(hide
             set(GPUNKW=0)
             append(COMPUTECAPS=70)
             help(Compile for compute capability 7.0)
             set(SETCAPS=1)
             set(ISPREBLACKWELL=1)
             set(NEEDCUDA90=127) set(REQCUDAVERSION=9.0)
             error($ifn($contains($SYSCAP,70),The -gpu=sm_70 option is not supported on $TARGET systems))
         )
         cc72(hide
             set(GPUNKW=0)
             append(COMPUTECAPS=72)
             help(Compile for compute capability 7.2)
             set(SETCAPS=1)
             set(ISPREBLACKWELL=1)
             set(NEEDCUDA100=127) set(REQCUDAVERSION=10.0)
             error($ifn($contains($SYSCAP,72),The -gpu=cc72 option is not supported on $TARGET systems))
         )
         sm_72(hide
             set(GPUNKW=0)
             append(COMPUTECAPS=72)
             help(Compile for compute capability 7.2)
             set(SETCAPS=1)
             set(ISPREBLACKWELL=1)
             set(NEEDCUDA100=127) set(REQCUDAVERSION=10.0)
             error($ifn($contains($SYSCAP,72),The -gpu=sm_72 option is not supported on $TARGET systems))
         )
         cc75(hide
             set(GPUNKW=0)
             append(COMPUTECAPS=75) help(Compile for compute capability 7.5)
             set(SETCAPS=1)
             set(ISPREBLACKWELL=1)
             set(NEEDCUDA100=127) set(REQCUDAVERSION=10.0)
             error($ifn($contains($SYSCAP,75),The -gpu=cc75 option is not supported on $TARGET systems))
         )
         sm_75(hide
             set(GPUNKW=0)
             append(COMPUTECAPS=75) help(Compile for compute capability 7.5)
             set(SETCAPS=1)
             set(ISPREBLACKWELL=1)
             set(NEEDCUDA100=127) set(REQCUDAVERSION=10.0)
             error($ifn($contains($SYSCAP,75),The -gpu=sm_75 option is not supported on $TARGET systems))
         )
         "cc7+"(hide
             set(GPUNKW=0)
             append(COMPUTECAPS=
                 $if($and($contains($SYSCAP,70),$expr($CUDAXXYY>=9000)),70)
                 $if($and($contains($SYSCAP,72),$expr($CUDAXXYY>=10000)),72)
                 $if($and($contains($SYSCAP,75),$expr($CUDAXXYY>=10000)),75)
                 $if($and($contains($SYSCAP,80),$expr($CUDAXXYY>=11000)),80)
                 $if($and($contains($SYSCAP,86),$expr($CUDAXXYY>=11010)),86)
                 $if($and($contains($SYSCAP,87),$expr($CUDAXXYY>=11040)),87)
                 $if($and($contains($SYSCAP,90),$expr($CUDAXXYY>=11080)),90)
             )
             set(NEEDCUDA100=127) set(REQCUDAVERSION=10.0)
             set(SETCAPS=1)
             set(ISPREBLACKWELL=1)
             help(Compile for compute capability 7.x and above)
             error($ifn($contains($SYSCAP,70),The -gpu=cc7+ option is not supported on $TARGET systems))
         )
         cc7x(hide
             set(GPUNKW=0)
             append(COMPUTECAPS=70)
             append(CGARGS=-x 215 0x1000)
             help(Compile for compute capability 7.x)
             set(SETCAPS=1)
             set(ISPREBLACKWELL=1)
             set(NEEDCUDA90=127) set(REQCUDAVERSION=9.0)
             error($ifn($contains($SYSCAP,70),The -gpu=cc7x option is not supported on $TARGET systems))
         )
         cc80(hide
             set(GPUNKW=0)
             append(COMPUTECAPS=80)
             help(Compile for compute capability 8.0)
             set(SETCAPS=1)
             set(ISPREBLACKWELL=1)
             set(NEEDCUDA110=127) set(REQCUDAVERSION=11.0)
             error($ifn($contains($SYSCAP,80),The -gpu=cc80 option is not supported on $TARGET systems))
         )
         sm_80(hide
             set(GPUNKW=0)
             append(COMPUTECAPS=80)
             help(Compile for compute capability 8.0)
             set(SETCAPS=1)
             set(ISPREBLACKWELL=1)
             set(NEEDCUDA110=127) set(REQCUDAVERSION=11.0)
             error($ifn($contains($SYSCAP,80),The -gpu=sm_80 option is not supported on $TARGET systems))
         )
         cc86(hide
             set(GPUNKW=0)
             append(COMPUTECAPS=86)
             help(Compile for compute capability 8.6)
             set(SETCAPS=1)
             set(ISPREBLACKWELL=1)
             set(NEEDCUDA111=127) set(REQCUDAVERSION=11.1)
             error($ifn($contains($SYSCAP,86),The -gpu=cc86 option is not supported on $TARGET systems))
         )
         sm_86(hide
             set(GPUNKW=0)
             append(COMPUTECAPS=86)
             help(Compile for compute capability 8.6)
             set(SETCAPS=1)
             set(ISPREBLACKWELL=1)
             set(NEEDCUDA111=127) set(REQCUDAVERSION=11.1)
             error($ifn($contains($SYSCAP,86),The -gpu=sm_86 option is not supported on $TARGET systems))
         )
         cc87(hide
             set(GPUNKW=0)
             append(COMPUTECAPS=87)
             help(Compile for compute capability 8.7)
             set(SETCAPS=1)
             set(ISPREBLACKWELL=1)
             set(NEEDCUDA114=127) set(REQCUDAVERSION=11.4)
             error($ifn($contains($SYSCAP,87),The -gpu=cc87 option is not supported on $TARGET systems))
         )
         sm_87(hide
             set(GPUNKW=0)
             append(COMPUTECAPS=87)
             help(Compile for compute capability 8.7)
             set(SETCAPS=1)
             set(ISPREBLACKWELL=1)
             set(NEEDCUDA114=127) set(REQCUDAVERSION=11.4)
             error($ifn($contains($SYSCAP,87),The -gpu=sm_87 option is not supported on $TARGET systems))
         )
         cc89(hide
             set(GPUNKW=0)
             append(COMPUTECAPS=89)
             help(Compile for compute capability 8.9)
             set(SETCAPS=1)
             set(ISPREBLACKWELL=1)
             set(NEEDCUDA118=127) set(REQCUDAVERSION=11.8)
             error($ifn($contains($SYSCAP,89),The -gpu=cc89 option is not supported on $TARGET systems))
         )
         sm_89(hide
             set(GPUNKW=0)
             append(COMPUTECAPS=89)
             help(Compile for compute capability 8.9)
             set(SETCAPS=1)
             set(ISPREBLACKWELL=1)
             set(NEEDCUDA118=127) set(REQCUDAVERSION=11.8)
             error($ifn($contains($SYSCAP,89),The -gpu=sm_89 option is not supported on $TARGET systems))
         )
         "cc8+"(hide
             set(GPUNKW=0)
             append(COMPUTECAPS=
                 $if($and($contains($SYSCAP,80),$expr($CUDAXXYY>=11000)),80)
                 $if($and($contains($SYSCAP,86),$expr($CUDAXXYY>=11010)),86)
                 $if($and($contains($SYSCAP,87),$expr($CUDAXXYY>=11040)),87)
                 $if($and($contains($SYSCAP,89),$expr($CUDAXXYY>=11080)),89)
                 $if($and($contains($SYSCAP,90),$expr($CUDAXXYY>=11080)),90)
             )
             set(NEEDCUDA110=127) set(REQCUDAVERSION=11.0)
             set(SETCAPS=1)
             set(ISPREBLACKWELL=1)
             help(Compile for compute capability 8.x and above)
             error($ifn($or($contains($SYSCAP,80),$contains($SYSCAP,86)),The -gpu=cc8+ option is not supported on $TARGET systems))
         )
         cc8x(hide
             append(COMPUTECAPS=80)
             help(Compile for compute capability 8.x)
             set(SETCAPS=1)
                 set(ISPREBLACKWELL=1)
             set(NEEDCUDA110=127) set(REQCUDAVERSION=11.0)
             error($ifn($or($contains($SYSCAP,80),$contains($SYSCAP,86)),The -gpu=cc8x option is not supported on $TARGET systems))
         )
         cc90(
             # To simplify the -help output, put all -gpu=ccXY on one -help line
             # For maintenance, hide all the ccXY() options except one, the highest one,
             # and update the list of supported values.
             helpname(ccXY)
             help(Compile for compute capability X.Y; supported values: $SYSCAP)
             set(GPUNKW=0)
             append(COMPUTECAPS=90)
             set(SETCAPS=1)
             set(ISPREBLACKWELL=1)
             set(NEEDCUDA118=127) set(REQCUDAVERSION=11.8)
             error($ifn($contains($SYSCAP,90),The -gpu=cc90 option is not supported on $TARGET systems))
         )
         sm_90(
             # To simplify the -help output, put all -gpu=sm_XY on one -help line
             # For maintenance, hide all the sm_XY() options except one, the highest one,
             # and update the list of supported values.
             helpname(sm_XY)
             help(Compile for compute capability X.Y; supported values: $SYSCAP)
             set(GPUNKW=0)
             append(COMPUTECAPS=90)
             set(SETCAPS=1)
             set(ISPREBLACKWELL=1)
             set(NEEDCUDA118=127) set(REQCUDAVERSION=11.8)
             error($ifn($contains($SYSCAP,90),The -gpu=sm_90 option is not supported on $TARGET systems))
         )
         cc100(
             # To simplify the -help output, put all -gpu=ccXY on one -help line
             # For maintenance, hide all the ccXY() options except one, the highest one,
             # and update the list of supported values.
             helpname(ccXY)
             help(Compile for compute capability X.Y; supported values: $SYSCAP)
             set(GPUNKW=0)
             append(COMPUTECAPS=100)
             set(SETCAPS=1)
             set(ISBLACKWELLPLUS=1)
             set(NEEDCUDA127=1) set(REQCUDAVERSION=12.7)
             error($ifn($contains($SYSCAP,100),The -gpu=cc100 option is not supported on $TARGET systems))
         )
         sm_100(
             helpname(sm_XY)
             help(Compile for compute capability X.Y; supported values: $SYSCAP)
             set(GPUNKW=0)
             append(COMPUTECAPS=100)
             set(SETCAPS=1)
             set(ISBLACKWELLPLUS=1)
             set(NEEDCUDA127=1) set(REQCUDAVERSION=12.7)
             error($ifn($contains($SYSCAP,100),The -gpu=sm_100 option is not supported on $TARGET systems))
         )
         cc101(
             # To simplify the -help output, put all -gpu=ccXY on one -help line
             # For maintenance, hide all the ccXY() options except one, the highest one,
             # and update the list of supported values.
             helpname(ccXY)
             help(Compile for compute capability X.Y; supported values: $SYSCAP)
             set(GPUNKW=0)
             append(COMPUTECAPS=101)
             set(SETCAPS=1)
             set(ISBLACKWELLPLUS=1)
             set(NEEDCUDA128=1) set(REQCUDAVERSION=12.8)
             error($ifn($contains($SYSCAP,101),The -gpu=cc101 option is not supported on $TARGET systems))
         )
         sm_101(
             helpname(sm_XY)
             help(Compile for compute capability X.Y; supported values: $SYSCAP)
             set(GPUNKW=0)
             append(COMPUTECAPS=101)
             set(SETCAPS=1)
             set(ISBLACKWELLPLUS=1)
             set(NEEDCUDA128=1) set(REQCUDAVERSION=12.8)
             error($ifn($contains($SYSCAP,101),The -gpu=sm_101 option is not supported on $TARGET systems))
         )
         cc120(
             # To simplify the -help output, put all -gpu=ccXY on one -help line
             # For maintenance, hide all the ccXY() options except one, the highest one,
             # and update the list of supported values.
             helpname(ccXY)
             help(Compile for compute capability X.Y; supported values: $SYSCAP)
             set(GPUNKW=0)
             append(COMPUTECAPS=120)
             set(SETCAPS=1)
             set(ISBLACKWELLPLUS=1)
             set(NEEDCUDA128=1) set(REQCUDAVERSION=12.8)
             error($ifn($contains($SYSCAP,120),The -gpu=cc120 option is not supported on $TARGET systems))
         )
         sm_120(
             helpname(sm_XY)
             help(Compile for compute capability X.Y; supported values: $SYSCAP)
             set(GPUNKW=0)
             append(COMPUTECAPS=120)
             set(SETCAPS=1)
             set(ISBLACKWELLPLUS=1)
             set(NEEDCUDA128=1) set(REQCUDAVERSION=12.8)
             error($ifn($contains($SYSCAP,120),The -gpu=sm_120 option is not supported on $TARGET systems))
         )
         # Starting with cc90, we will no longer provide ccYx and ccY+
         # suboptions. We need to continue to maintain the previous ccY+
         # suboptions until we no longer support those architectures.
         ccn$xy(hide
             set(GPUNKW=0)
             append(COMPUTECAPS=next $xy)
             add(CCNEXTCOUNT=1)
             error($if($expr($CCNEXTCOUNT>1),Only one -gpu=ccnXY option is supported))
             help(Compile for compute capability next)
         )
         "cc3.0"==cc30(hide error(The -gpu=cc3.0 option is no longer supported))
         "cc3.5"==cc35(hide
             set(GPUNKW=0)
         )
         "cc3.x"==cc3x(hide
             set(GPUNKW=0)
         )
         "cc5.0"==cc50(hide
             set(GPUNKW=0)
         )
         "cc5.x"==cc5x(hide
             set(GPUNKW=0)
         )
         "cc6.0"==cc60(hide
             set(GPUNKW=0)
         )
         "cc6.1"==cc61(hide
             set(GPUNKW=0)
         )
         "cc6.2"==cc62(hide
             set(GPUNKW=0)
         )
         "cc7.0"==cc70(hide
             set(GPUNKW=0)
         )
         "cc7.2"==cc72(hide
             set(GPUNKW=0)
         )
         "cc7.5"==cc75(hide
             set(GPUNKW=0)
         )
         "cc8.0"==cc80(hide
             set(GPUNKW=0)
         )
         "cc8.6"==cc86(hide
             set(GPUNKW=0)
         )
         "cc8.7"==cc87(hide
             set(GPUNKW=0)
         )
         "cc9.0"==cc90(hide
             set(GPUNKW=0)
         )
         "cc10.0"==cc100(hide
             set(GPUNKW=0)
         )
         ccall(
             set(GPUNKW=0)
             append(COMPUTECAPS=
                 $if($and($contains($SUPPORTEDSYSCAP,35),$expr($CUDAXXYY<12000)),35)
                 $if($contains($SUPPORTEDSYSCAP,50),50)
                 $if($land($contains($SUPPORTEDSYSCAP,60),$expr($CUDAXXYY>=8000)),60)
                 $if($land($contains($SUPPORTEDSYSCAP,61),$expr($CUDAXXYY>=8000)),61)
                 $if($land($contains($SUPPORTEDSYSCAP,62),$expr($CUDAXXYY>=10000)),62)
                 $if($land($contains($SUPPORTEDSYSCAP,70),$expr($CUDAXXYY>=9000)),70)
                 $if($land($contains($SUPPORTEDSYSCAP,72),$expr($CUDAXXYY>=10000)),72)
                 $if($land($contains($SUPPORTEDSYSCAP,75),$expr($CUDAXXYY>=10000)),75)
                 $if($land($contains($SUPPORTEDSYSCAP,80),$expr($CUDAXXYY>=11000)),80)
                 $if($land($contains($SUPPORTEDSYSCAP,86),$expr($CUDAXXYY>=11010)),86)
                 $if($land($contains($SUPPORTEDSYSCAP,87),$expr($CUDAXXYY>=11040)),87)
                 $if($land($contains($SUPPORTEDSYSCAP,89),$expr($CUDAXXYY>=11080)),89)
                 $if($land($contains($SUPPORTEDSYSCAP,90),$expr($CUDAXXYY>=11080)),90)
                 $if($land($contains($SUPPORTEDSYSCAP,100),$expr($CUDAXXYY>=12070)),100)
                 $if($land($contains($SUPPORTEDSYSCAP,101),$expr($CUDAXXYY>=12080)),101)
                 $if($land($contains($SUPPORTEDSYSCAP,120),$expr($CUDAXXYY>=12080)),120)
             )
             set(SETCAPS=1)
             set(CCALL=1)
             help(Compile for all supported compute capabilities)
         )
         "ccall-major"(
             set(GPUNKW=0)
             append(COMPUTECAPS=
                 $if($land($contains($SYSCAPSMAJOR,35),$expr($CUDAXXYY<12000)),35)
                 $if($contains($SYSCAPSMAJOR,50),50)
                 $if($land($contains($SYSCAPSMAJOR,60),$expr($CUDAXXYY>=8000)),60)
                 $if($land($contains($SYSCAPSMAJOR,70),$expr($CUDAXXYY>=9000)),70)
                 $if($land($contains($SYSCAPSMAJOR,80),$expr($CUDAXXYY>=11000)),80)
                 $if($land($contains($SYSCAPSMAJOR,90),$expr($CUDAXXYY>=11080)),90)
                 $if($land($contains($SYSCAPSMAJOR,100),$expr($CUDAXXYY>=12070)),100)
                 $if($land($contains($SYSCAPSMAJOR,120),$expr($CUDAXXYY>=12070)),120)
             )
             set(SETCAPS=1)
             set(CCALLMAJOR=1)
             help(Compile for all major supported compute capabilities.)
         )
         cuda$xdy(
             set(GPUNKW=0)
             helpname("cudaX.Y")
             help(Use CUDA X.Y Toolkit compatibility, where installed)
             set(CUDAVERSION=$xdy)
             set(USEPGICUDA=1)
         )
         debug(
             set(GPUNKW=0)
             set(ACCDEBUG=-x 163 0x800000)
             error($ifn($index($TARGET,linux86-64,linuxpower,linuxarm64),-gpu=debug is not supported on this target))
             set(FORCEDBGLLVM=1)
             helpname([no]debug)
             help(Generate GPU debug information)
         )
         nodebug(hide
             set(GPUNKW=0)
             set(ACCDEBUG=-y 163 0x800000 -y 192 0x40000000 -y 120 0x1000)
             set(FORCEDBGLLVM=0)
             help(Do not generate GPU debug information)
         )
         devdebug(hide
             set(GPUNKW=0)
             append(ACCCGFLAGS=-x 194 0x80000000 -y 189 0x10)
             set(ACCLLVM=0)
             set(FORCELLVM=0)
             set(FORCEDBGLLVM=0)
             set(DEVDEBUG=1)
             append(ACCCGFLAGS=-x 163 8 -x 180 0x40)
             set(USENVOMPDEVSTATICLIB=1)
             set(USENVOMPNVCCBITCODE=0)
         )
         fastmath(
             set(GPUNKW=0)
             help(Use fast math library)
             append(ACCCGFLAGS=-x 163 0x200)
             set(GPU_FASTMATH=1)
         )
         flushz(
             set(GPUNKW=0)
             append(ACCCGFLAGS=-x 180 0x80 -y 180 0x10)
             helpname([no]flushz)
             help(Enable flush-to-zero mode on the GPU)
         )
         noflushz(hide
             set(GPUNKW=0)
             append(ACCCGFLAGS=-y 180 0x80 -x 180 0x10)
             help(Disable flush-to-zero mode on the GPU)
         )
         nvlamath(
             set(GPUNKW=0)
             help(Use nvlamath module in program units)
             append(F901ARGS=-x 137 0x100)
         )
         fma(
             set(GPUNKW=0)
             append(CGARGS=-y 163 0x40 -x 186 0x2000000 -x 201 0x08 -y 201 0x04)
             helpname([no]fma)
             help(Generate fused multiply-add instructions for the GPU (default at -O1))
         )
         nofma(hide
             set(GPUNKW=0)
             append(CGARGS=-x 163 0x40 -y 186 0x2000000 -y 201 0x08 -x 201 0x04)
             help(Do not generate fused multiply-add instructions for the GPU)
         )
         gpufile(hide
             set(GPUNKW=0)
             append(ACCCGFLAGS=-x 163 0x20000 -x 180 0x40)
             set(DEFDEFACCRELOC=0)
             set(NORDC=1)
             append(F901ARGS=-x 68 0x200)   
             help(Do not embed kernel .gpu files in objects; implies nordc)
         )
         ggpufile(hide
             set(GPUNKW=0)
             set(GGPUFILE=1)
             help(Keep generated fat binary file)
         )
         gvmode(hide
             set(GPUNKW=0)
             append(ACCCGFLAGS=-x 201 0x200)
             help(Gang-vector mode with non-seq acc routine calls)
         )
         ggvmode(hide
             set(GPUNKW=0)
             append(ACCCGFLAGS=-x 201 0x100)
             help(Global gang-vector mode)
         )
         v32mode(hide
             set(GPUNKW=0)
             append(ACCCGFLAGS=-x 201 0x8000)
             help(Vector-32 mode with non-seq acc routine calls)
         )
         gv32mode(hide
             set(GPUNKW=0)
             append(ACCCGFLAGS=-x 201 0x4000)
             help(Global vector-32 mode)
         )
         vxmode(hide
             set(GPUNKW=0)
             append(ACCCGFLAGS=-x 201 0x10000)
             help(General vector-length, even with non-seq acc routine calls; uses sub-block barriers)
         )
         keep(
             set(GPUNKW=0)
             append(ACCCGFLAGS=-x 163 0xc0008 -x 180 0x40 -x 180 0x800 $if($lor($USEGPUBC,$DEBUGLIBNVVM),-x 226 0x40) $if($NEEDCUDALTO,-x 226 0x2))
             set(NKEEP=1)
             help(Keep kernel files)
         )
         keepbin(hide
             set(GPUNKW=0)
             append(ACCCGFLAGS=-x 163 0x80000)
             help(Keep kernel .cubin files)
         )
         keepfat(hide
	     append(ACCCGFLAGS=-x 180 0x800)
             help(Keep fatbinary files)
         )
         keepgpu(hide
             set(GPUNKW=0)
             append(ACCCGFLAGS=-x 163 8 -x 180 0x40)
             help(Keep kernel source files)
         )
         keepgpubc(hide
             if($lor($USEGPUBC,$DEBUGLIBNVVM))
             set(GPUNKW=0)
             append(ACCCGFLAGS=-x 163 8 -x 180 0x40 -x 226 0x40)
             help(Keep kernel source files in text and bitcode format)
         )
         keepptx(hide
             set(GPUNKW=0)
             append(ACCCGFLAGS=-x 163 0x40000)
             help(Keep kernel .ptx files)
         )
         keepltoir(hide
             set(GPUNKW=0)
             append(ACCCGFLAGS=$if($NEEDCUDALTO,-x 226 0x2))
             help(Keep kernel .ltoir files)
         )
         lineinfo(
             set(GPUNKW=0)
             append(CGARGS=-x 192 0x40000000)
             set(FORCELILLVM=1)
             helpname([no]lineinfo)
             help(Generate GPU line information)
         )
         llonly(hide
             set(DEFOUTFILE=)
             set(LLONLY=YES)
             append(CARGS=-x 163 0x100000)
             append(ACCCGFLAGS=-x 163 8)
         )
         lto(hide
             set(GPUNKW=0)
             append(CGARGS=-x 226 0x1)
             set(NEEDCUDALTO=1)
             helpname([no]lto)
             help(Perform link-time optimization of device code. Must be specified at both compile and link time)
         )
         nolto(hide
             set(GPUNKW=0)
             append(CGARGS=-y 226 0x1)
             set(NEEDCUDALTO=0)
         )
         partiallink(
             hide
             helpname([no]partiallink)
             help(Enable partial linking step.)
             set(GPUNKW=0)
             set(NEEDPARTIALLINK=$if($DYNAMICLINK,0,1))
         )
         nopartiallink(
             hide
             set(GPUNKW=0)
             set(NEEDPARTIALLINK=0)
         )
         zeroinit(
             set(GPUNKW=0)
             set(ZEROINIT=1)
             set(DOACCLINK=1)
             help(Initialize allocated device memory with zero)
         )
         nolineinfo(hide
             set(GPUNKW=0)
             append(CGARGS=-y 192 0x40000000)
             set(FORCELILLVM=0)
             help(Do not generate GPU line information)
         )
         nvvm(hide
             set(GPUNKW=0)
             set(LLVMFLAGS=-x 189 0x10)
             set(ACCLLVM=1)
             helpname([no]nvvm)
             help(Use libNVVM to generate device code)
         )
         nonvvm(hide
             set(GPUNKW=0)
             append(ACCCGFLAGS=$if($or($equal($TARGET,win64),$equal($TARGET,win64-llvm)),-x,-y) 189 0x10)
             warning($if($or($equal($TARGET,win64),$equal($TARGET,win64-llvm)),nonvvm is ignored on Windows))
             set(ACCLLVM=$if($or($equal($TARGET,win64),$equal($TARGET,win64-llvm)),1,0))
             set(FORCELLVM=$if($or($equal($TARGET,win64),$equal($TARGET,win64-llvm)),1,0))
             keyword(
                 diff(hide
                     append(ACCCGFLAGS=-x 198 0x40000000)
                 )
             )
             help(Use CUDA C toolkit to generate device code)
             set(USENVOMPDEVSTATICLIB=1)
         )
         "nvvm-next"(hide
             help(Use newer nvvm version available in the CUDA toolkit)
             set(USENVVMNEXT=$if($or($expr($CUDAXXYY>=12000),$notequal($LLONLY,)),1,0))
             warning($if($land($notequal($USEFLANG1,YES),$expr($CUDAXXYY<12000)),NVVM SOLID code generation was requested with -gpu=nvvm-next but the driver $CUDAVERSION only supports NVVM 7.0. Emitting NVVM 7.0 code.))
             append(ACCCGFLAGS=$if($USENVVMNEXT,-x 187 0x10000000))
         )
         nvvmverify(hide
             help(Use verify nvvm bitcode before compilation)
             set(USENVVMVERIFY=1)
             append(ACCCGFLAGS=-x 187 0x80000)
         )
         nonvvmverify(hide
             help(Disable verification of nvvm bitcode before compilation)
             set(USENVVMVERIFY=0)
             append(ACCCGFLAGS=-y 187 0x80000)
         )
         autocollapse(
             helpname([no]autocollapse)
             set(ACCCACHE=-y 205 1)
             help(Automatically collapse tightly nested OpenACC parallel loops)
         )
         noautocollapse(hide
             set(ACCCACHE=-x 205 1)
             help(Do not autocollapse loops)
         )
         deepcopy(
             set(GPUNKW=0)
             append(ACCCGFLAGS=-x 215 0x2 -x 2 0x400)
             append(ACCFEFLAGS=-x 215 0x2 -x 2 0x400)
             help(Enable Full Deepcopy support in OpenACC Fortran)
         )
         sched(hide
             set(GPUNKW=0)
             help(Choose what loop scheduling to use)
             keyword(
                 static(
                     help(Use Static Scheduler [default])
                 )
                 dynamic(
                     help(Use Dynamic Loop Scheduling)
                     append(ACCCGFLAGS=-x 215 0x10)
                 )
		         mixed(
                     help(Use Mixed Loop scheduling)
                     append(ACCCGFLAGS=-x 215 0x20000000)
                 )
             )
         )
         opt(hide
             help(Selectively enable linearization optimization)
             keyword(
                 re(hide
                     help(Enable redundancy elimination)
                     append(ACCCGFLAGS=-x 211 0x80)
                 )
             )
         )
         tripcount(
             help(Select options for trip count calculation)
             keyword(
                 host(
                     help(Calculate trip counts on host for some compute constructs [default])
                     append(ACCCGFLAGS=-y 205 0x2000)
                 )
                 device(
                     help(Calculate trip counts on device for some compute constructs)
                     append(ACCCGFLAGS=-x 205 0x2000)
                 )
                 warn(
                     help(Enable warnings when trip counts are calculated on host instead of device)
                     append(ACCCGFLAGS=-x 205 0x4000)
                     helpname([no]warn)
                 )
                 nowarn(hide
                     help(Disable warnings when trip counts are calculated on host instead of device)
                     append(ACCCGFLAGS=-y 205 0x4000)
                 )
                 check(
                     help(Insert runtime checks to ensure host and device trip count calculations match; enable output with NVCOMPILER_ACC_CHECK_TRIPCOUNT)
                     append(ACCCGFLAGS=-x 205 0x8000)
                     helpname([no]check)
                 )
                 nocheck(hide
                     help(Omit runtime checks for device variables used in trip count calculations)
                     append(ACCCGFLAGS=-y 205 0x8000)
                 )
             )
         )
         loadcache(
             set(GPUNKW=0)
             help(Choose what hardware level cache to use for global memory loads)
             keyword(
                 L1(
                     help(Use L1 cache)
                     append(ACCCGFLAGS=-y 180 0x08 -x 180 0x40000000)
                 )
                 L2(
                     help(Use L2 cache)
                     append(ACCCGFLAGS=-x 180 0x08 -y 180 0x40000000)
                 )
                 l1(hide
                     help(Use L1 cache)
                     append(ACCCGFLAGS=-y 180 0x08 -x 180 0x40000000)
                 )
                 l2(hide
                     help(Use L2 cache)
                     append(ACCCGFLAGS=-x 180 0x08 -y 180 0x40000000)
                 )
             )
         )
         math_uniform(hide
             set(GPUNKW=0)
             set(GPU_MATH_UNIFORM=1)
             help(Select uniform bit-for-bit version of CPU and GPU math intrinsics)
         )
         maxregcount:n(
             set(GPUNKW=0)
	     set(MAXREGCOUNT=$n)
             help(Set maximum number of registers to use on the GPU)
         )
         maxrregcount:n(hide
             set(GPUNKW=0)
	     set(MAXREGCOUNT=$n)
             help(Set maximum number of registers to use on the GPU)
         )
	 minblkspersm:n(hide
             set(GPUNKW=0)
	     set(MINBLKSPERSM=$n)
	     help(Set minimum blocks per SM on the GPU)
	 )
         defvectorlen:n(hide
             set(GPUNKW=0)
             set(DEFVECTLEN=$n)
             help(Set OpenACC default vector_length on GPU)
         )
         defnumworkers:n(hide
             set(GPUNKW=0)
             set(DEFWORKERS=$n)
             help(Set OpenACC default num_workers on GPU)
         )
         noL1(hide
             set(GPUNKW=0)
             append(ACCCGFLAGS=-x 180 0x08)
             help(Do not use the L1 hardware data cache to cache global variables)
         )
         noL1cache(hide
             set(GPUNKW=0)
             append(ACCCGFLAGS=-x 180 0x08)
             help(Do not use the L1 hardware data cache to cache global variables)
         )
         noci(hide
             set(GPUNKW=0)
             append(ACCCGFLAGS=-x 177 0x40000000)
             help(Do not cache strip loop indices)
         )
         nonvcc(hide
             set(GPUNKW=0)
             append(ACCCGFLAGS=-x 163 4)
             append(F901ARGS=-x 163 4)
             help(Do not compile kernel .gpu files)
             set(IGNORECUDALIB=1)
         )
         diag_dealloc(hide
             append(F901ARGS=-x 25 0x400)
             help(Assert that host memory being deallocated is not present on a device)
         )
         pinned(
             set(GPUNKW=0)
             help(Use CUDA Pinned Memory)
             set(PINNEDSET=1)
             error($ifn($index($TARGET,linux86-64,linuxpower,linuxarm64),The -gpu=pinned suboption is not supported on $TARGET))
             keyword(
                 intercept(
                     help(Intercept LIBC function calls (e.g. free) and replace deallocations with the ones corresponding to the allocation kind (e.g. managed/pinned memory deallocator for allocations in managed/pinned memory). System allocations are not replaced.)
                     helpname([no]intercept)
                     set(NEEDMANPINMEMORYINTERCEPTION=1)
                 )
                 nointercept(hide
                     set(NEEDMANPINMEMORYINTERCEPTION=0)
                 )
             )
         )
         rdc(
             set(GPUNKW=0)
             help(Generate relocatable device code)
             helpname([no]rdc)
             set(NORDC=0)
             append(F901ARGS=-y 68 0x200)   
             set(DEFDEFACCRELOC=1)
         )
         nordc(hide
             set(GPUNKW=0)
             help(Do not generate relocatable device code)
             set(NORDC=1)
             append(F901ARGS=-x 68 0x200)   
             set(DEFDEFACCRELOC=0)
         )
         redatomic(hide
             help(Generate reductions only using atomics. Only valid for OpenACC where in some cases a separate reduction kernel is used)
             append(ACCCGFLAGS=-y 194 0x40000)
         )
         reloc(hide
             set(GPUNKW=0)
             help(Generate relocatable device code)
             helpname([no]reloc)
             set(NORDC=0)
             append(F901ARGS=-y 68 0x200)   
             set(DEFDEFACCRELOC=1)
         )
         noreloc(hide
             set(GPUNKW=0)
             help(Do not generate relocatable device code)
             set(NORDC=1)
             append(F901ARGS=-x 68 0x200)   
             set(DEFDEFACCRELOC=0)
         )
         noptx(hide
             set(GPUNKW=0)
             help(Do not save PTX code in the object file unless nordc is set)
             append(CGARGS=-x 215 0x2000)
         )
         ptx(hide
             set(GPUNKW=0)
             help(Do save PTX code in the object file)
             helpname([no]ptx)
             append(CGARGS=-y 215 0x2000)
         )
         noallptx(hide
             set(GPUNKW=0)
             append(CGARGS=-y 226 0x04)
         )
         allptx(hide
             set(GPUNKW=0)
             help(Do save PTX code in the object file for all requested CC)
             helpname([no]allptx)
             append(CGARGS=-x 226 0x04)
         )
         required(hide
             set(GPUNKW=0)
             set(ACCREQUIRED=1)
             helpname([no]required)
             help(Issue compiler error if the compute regions fail to accelerate)
         )
         norequired(hide
             set(GPUNKW=0)
             set(ACCREQUIRED=0)
             help(Generate host code if the compute regions fail to accelerate)
         )
         safecache(
             set(GPUNKW=0)
             help(Allows variable-sized array sections in OpenACC cache directives and assumes they fit into CUDA shared memory)
             append(CGARGS=-x 198 0x8000000)
         )
	 stacklimit:l(
         warning("The -gpu=[no]stacklimit option is deprecated and will be removed in a future release.")
	     helpname(stacklimit:<l>|nostacklimit)
	     help(Sets the limit of stack variables in a procedure or kernel, in KB. The -gpu=[no]stacklimit option has been deprecated and will be removed in a future release.)
	     set(STACKLIMIT=$l)
	 )
	 nostacklimit( hide
         warning("The -gpu=[no]stacklimit option is deprecated and will be removed in a future release.")
	     help(Disables computing the size of stack variables for a procedure or kernel)
	     set(STACKLIMIT=)
	 )
         dp(hide
             set(GPUNKW=0)
         ) # vestigial
         mul24(hide
             set(GPUNKW=0)
             help(Use 24-bit multiplication for subscripting) append(ACCCGFLAGS=-x 163 0x400)
         )
         ptxinfo(
             set(GPUNKW=0)
             help(Print ptxas information)
             append(ACCCGFLAGS=-x 163 0x400000)
         )
         nvvmfile:file(hide
             set(GPUNKW=0)
             set(var=$file)
             help(Pass custom gpu file)
             append(ACCCGFLAGS=-savegpu $var)
         )
         ptxfile:file(hide
             set(GPUNKW=0)
             set(var=$file)
             help(Pass custom ptx file)
             append(ACCCGFLAGS=-saveptx $var)
         )
         time(hide
             set(GPUNKW=0)
             help(Collect simple timing information)
             set(TIMEINIT=1)
             set(DOACCLINK=1)
             set(ACCRPATH=1)
         )
         unroll(
             set(GPUNKW=0)
             append(ACCCGFLAGS=-x 177 0x08 -y 195 0x08)
             helpname([no]unroll)
             help(Enable automatic inner loop unrolling (default at -O3))
         )
         nounroll(hide
             set(GPUNKW=0)
             append(ACCCGFLAGS=-y 177 0x08 -x 195 0x08)
             help(Disable automatic inner loop unrolling)
         )
         noattach(hide
             set(GPUNKW=0)
             help(Do not attempt to attach to existing CUDA context)
             set(CUDA_NOATTACH=1)
             set(DOACCLINK=1)
         )
         sync(hide
             set(GPUNKW=0)
             help(Ignore async clauses)
             set(ACCWAIT=2)
         )
         wait(hide
             set(GPUNKW=0)
             helpname([no]wait)
             help(Wait for each device kernel to finish)
             set(ACCWAIT=1)
         )
         nowait(hide
             set(GPUNKW=0)
             help(Set default to launch kernels asynchronously)
             set(ACCWAIT=0)
         )
         O0(hide
             set(GPUNKW=0)
             set(AOPT=0)
         )
         O1(hide
             set(GPUNKW=0)
             set(AOPT=1)
         )
         O2(hide
             set(GPUNKW=0)
             set(AOPT=2)
         )
         O3(hide
             set(GPUNKW=0)
             set(AOPT=3)
         )
         OO0(hide
             set(GPUNKW=0)
             set(ACOPT=0)
         )
         OO1(hide
             set(GPUNKW=0)
             set(ACOPT=1)
         )
         OO2(hide
             set(GPUNKW=0)
             set(ACOPT=2)
         )
         OO3(hide
             set(GPUNKW=0)
             set(ACOPT=3)
         )
         vv(hide
             help(Display GPU compilation toolchain)
             set(GPUNKW=0)
             append(ACCCGFLAGS=-x 163 0x200000)
             set(VERYVERBOSE=YES)
         )
         managed(
             set(GPUNKW=0)
             help(Use CUDA Managed Memory)
             if($index($TARGET,linux86-64,linuxpower,linuxarm64))
             set(MANAGEDSET=1)
             set(EXPLICITMANAGED=1)
             keyword(
                 prefer(hide
                     keyword(
                         gpu(
                             set(MANPREFER=gpu)
                             help(Set managed allocations to prefer GPU memory)
                         )
                         cpu(
                             set(MANPREFER=cpu)
                             help(Set managed allocations to prefer CPU memory)
                         )
                         none(
                             set(MANPREFER=)
                             help(Reset managed allocation preferences)
                         )
                     )
                 )
                 intercept(
                     help(Intercept LIBC function calls (e.g. free) and replace deallocations with the ones corresponding to the allocation kind (e.g. managed/pinned memory deallocator for allocations in managed/pinned memory). System allocations are not replaced.)
                     helpname([no]intercept)
                     set(NEEDMANPINMEMORYINTERCEPTION=1)
                 )
                 nointercept(hide
                     set(NEEDMANPINMEMORYINTERCEPTION=0)
                 )
             )
             nokeyword()
         )
         nomanaged(
             set(GPUNKW=0)
             help(Do not enable CUDA Managed Memory)
             set(MANAGEDSET=1)
             set(EXPLICITMANAGED=0)
         )
         beta(
             set(GPUNKW=0)
             help(Enable beta features)
             append(F901ARGS=-x 137 0x1000000)
         )
         autocompare(
             set(GPUNKW=0)
             help(Automatically compare OpenACC CPU/GPU results: implies redundant)
             append(CGARGS=-x 215 0x60)
             set(AUTOCOMPARE=1)
             set(REDUNDANT=1)
         )
         redundant(
             set(GPUNKW=0)
             append(CGARGS=-x 215 0x20)
             help(Redundant OpenACC CPU/GPU execution)
             set(REDUNDANT=1)
         )
         implicitsections(
             set(GPUNKW=0)
             set(IMPLICITSECTIONS=1)
             help(Implicitly convert array element to array section in OpenMP or OpenACC data clauses)
	     helpname([no]implicitsections)
         )
         noimplicitsections( hide
             set(GPUNKW=0)
             set(IMPLICITSECTIONS=0)
             help(Do not implicitly convert array element to array section in OpenMP or OpenACC data clauses)
         )
         implicitdata( hide
             append(CGARGS=-y 215 0x800000)
             help(Implicitly move data to device (aka implicit copy, copyin, copyout). Default behavior for nvc/nvc++/nvfortran.)
             helpname([no]implicitdata)
         )
         noimplicitdata( hide
             append(CGARGS=-x 215 0x800000)
             help(Do not implicitly move data to device (aka do not do implicit copy, copyin, copyout). Explicit clauses are required instead.)
         )
         time(hide
             set(GPUNKW=0)
             help(Collect simple timing information)
             set(TIMEINIT=1)
             set(DOACCLINK=1)
             set(ACCRPATH=1)
         )
         # -gpu=[no]unified
         unified(
            set(GPUNKW=0)
            help(Use CUDA Unified Memory)
            if($index($TARGET,linux86-64,linuxpower,linuxarm64))
            set(UNIFIEDSET=1)
            set(EXPLICITUNIFIED=1)
            keyword(
                intercept(
                    help(Intercept LIBC function calls (e.g. free) and replace deallocations with the ones corresponding to the allocation kind (e.g. managed/pinned memory deallocator for allocations in managed/pinned memory). System allocations are not replaced.)
                    helpname([no]intercept)
                    set(NEEDMANPINMEMORYINTERCEPTION=$if($NEEDMANAGED,1,0))
                )
                nointercept(hide
                    set(NEEDMANPINMEMORYINTERCEPTION=0)
                )
            )
         )
         nounified(
             set(GPUNKW=0)
             help(Do not enable CUDA Unified Memory)
             set(UNIFIEDSET=1)
             set(EXPLICITUNIFIED=0)
         )
         splitcompile(hide
             help(Set maximum number of threads used for NVVM parallel compilation, 0 (default) will try to use all the available threads.)
             helpname([no]splitcompile[:n])
             set(GPUNKW=0)
             set(SPLITCOMPILE=1)
             set(MAXNVVMTHREADS=$if($expr($CUDAXXYY<12010),0xFFFF,0))
         )
         splitcompile:n(hide
             set(GPUNKW=0)
             set(SPLITCOMPILE=1)
             set(MAXNVVMTHREADS=$if($expr($CUDAXXYY<12010),0xFFFF,$n))
         )
         nosplitcompile(hide
             set(GPUNKW=0)
             set(SPLITCOMPILE=0)
	         set(MAXNVVMTHREADS=0xFFFF)
         )
         # -gpu=mem
         mem(
            set(GPUNKW=0)
            help(Set GPU memory mode)
            if($index($TARGET,linux86-64,linuxpower,linuxarm64))
            set(MEMSETEXPLICIT=1)
            keyword(
                separate(
                    help(Set Separate Memory mode)
                    set(SETMEMORYMODE=$SEPARATEMEMMODE)
                    keyword(
                        # -gpu=mem:separate:[no]pinnedalloc
                        pinnedalloc(
                        help(Use GPU Pinned Memory for explicit allocations)
                        set(USEPINNEDALLOCWITHSEPARATEMEM=1)
                        )
                        nopinnedalloc(
                        help(Don't use GPU Pinned Memory for explicit allocations)
                        set(USEPINNEDALLOCWITHSEPARATEMEM=0)
                        )
                    )
                )
                managed(
                    help(Set Managed Memory mode)
                    set(SETMEMORYMODE=$MANAGEDMEMMODE)
                )
                unified(
                    help(Set Unified Memory mode)
                    set(SETMEMORYMODE=$UNIFIEDMEMMODE)
                    # -gpu=mem:unified:[no]managedalloc
                    keyword(
                        managedalloc(
                            help(Use GPU Managed Memory for explicit allocations)
                            set(USEMANALLOCWITHUNIFIEDMEM=1)
                        )
                        nomanagedalloc(
                            help(Don't use GPU Managed Memory for explicit allocations)
                            set(USEMANALLOCWITHUNIFIEDMEM=0)
                        )
                    )
                )
            )
         )
         # -gpu=[no]interceptdeallocations
         interceptdeallocations(
             help(Intercept LIBC function calls (e.g. free) and replace deallocations with managed/pinned variants if the memory was allocated via managed/pinned memory allocations.)
             helpname([no]interceptdeallocations)
             set(INTERCEPTDEALLOCSETEXPLICIT=1)
             set(NEEDMANPINMEMORYINTERCEPTION=1)
         )
         nointerceptdeallocations(
             help(Don't intercept LIBC function calls (e.g. free) to replace deallocations with managed/pinned variants.)
             helpname([no]interceptdeallocations)
             set(INTERCEPTDEALLOCSETEXPLICIT=1)
             set(NEEDMANPINMEMORYINTERCEPTION=0)
         )
         useoldnvvmd(hide
             help(Use old nvvmd)
             set(USEOLDNVVMD=1)
         )
         outlineaccpar(hide
             help(This option is used internally to outline OpenACC parallel constructs)
             append(FLANG1ARGS=-outline-acc-par=true)
         )
     )
     append(ACCCGFLAGS=$if($notequal($USECUDAROOT,),-cudaroot $USECUDAROOT))
     append(ACCCGFLAGS=$DEFAULTCAPFLAG);
     # end of gpu

switch -cuda is
     help($if($ISFTN,Enable CUDA Fortran. )Add CUDA include paths. Link with the CUDA runtime libraries. Please refer to -gpu for target specific options)
     set(TGTCUDA=1)
     set(NEEDACCLIB=1)
     append(ACCELS=tesla)
     set(NEEDLOCSCRIPT=1)
     helpgroup(target)
     append(F901ARGS=$if($equal($DRIVERLANG,Fortran),-x 137 1))
     append(CGARGS=-x 137 1)
     append(CGARGS=$if($equal($DRIVERLANG,Fortran),-x 121 0xc00 -x 180 0x4000000))

     # nvc++
     append(CPP1ARGS=$if($equal($DRIVERLANG,CPP),--cuda -D__CUDACC__ -D_NVHPC_CUDA_CPP -D__NV_NO_HOST_COMPILER_CHECK $foreach(c,$COMPUTECAPS,--cudacap=$c ) ))
     append(CPPPREINC=$if($equal($DRIVERLANG,CPP),--preinclude _cuda_preinclude.h))
     append(CPP2ARGS=$if($equal($DRIVERLANG,CPP),-x 137 1 -x 137 0x200000))
     set(ACCDEF=$if($or($equal($DRIVERLANG,CPP),$LNGACC),$foreach(f,$ACCDEFINES, -D$f)))
     set(ACCDEFDEF=$if($or($equal($DRIVERLANG,CPP),$LNGACC),$foreach(f,$ACCDEFINES, -def $f)))
     set(ACCTESLAONLY=$if($or($equal($DRIVERLANG,CPP),$LNGACC),$DEFTESLAONLY))
     append(ACCCGFLAGS=$if($equal($DRIVERLANG,CPP),-x 121 0xc00))

     # Needs to be removed when the compiler supports multiple capabilities
     append(USRDDEF=$if($equal($DRIVERLANG,CPP),-D__PGI_CUDA_ARCH__=$MOSTCAPABLECOMPILEDCC -D__NVCOMPILER_CUDA_ARCH__=$MOSTCAPABLECOMPILEDCC))

     # When the opt level is set to 1 `nvc++` fails to compile certain code, see FS#29592
     max(OPTLEVELINITDEF1=$if($equal($DRIVERLANG,CPP),2,1))

     # nvc++
     set(DEFHOST=)	# can't generate host code here
     set(DEFDEFDEFACCRELOC=$if($equal($DRIVERLANG,Fortran),1,$DEFDEFDEF4ACCRELOC))
     set(DEFAULTLLVM=$if($equal($TARGETARCH,64),1,0))
     set(LRTLIB=$LRTLIBNAME)

     keyword(
         charstring(
             if($index($ISFTN,1))
             help(Enable limited support for character strings in GPU kernels)
             error($if($notequal($DRIVERLANG,Fortran),-cuda=charstring flag is supported only with nvfortran))
             append(CGARGS=-x 125 0x800)
             append(F901ARGS=-x 137 0x8000)
         )
         fastmath(hide
             help(Use fast math library)
             append(CGARGS=-x 163 0x200)
         )
         madconst(
             if($index($ISFTN,1))
             help(Put Module Array Descriptors in CUDA Constant Memory)
             error($if($notequal($DRIVERLANG,Fortran),-cuda=madconst flag is supported only with nvfortran))
             append(F901ARGS=-x 137 0x40)
         )
         oopt(hide
             help(Accelerator optimizations)
             keyword(
                 fwd1(
                     help(Forward substitution of simple expressions)
                     append(CGARGS=-x 177 0x01)
                 )
                reassoc1(
                    help(Reassociate address expressions)
                    append(CGARGS=-x 177 0x02)
                )
                induct(
                    help(Induction variable analysis)
                    append(CGARGS=-x 177 0x04)
                )
                looptest(
                    help(Loop test replacement)
                    append(CGARGS=-y 176 0x20000)
                )
                nolooptest(
                    help(Disable loop test replacement)
                    append(CGARGS=-x 176 0x20000)
                )
                letest(
                    help(Simplify loop tests)
                    append(CGARGS=-x 176 0x40000)
                )
                ivlive(
                    help(Mark induction variables live only if used)
                    append(CGARGS=-x 176 0x100000)
                )
                localpre(
                    help(Basic-block local redundancy elimination)
                    append(CGARGS=-x 177 0x2000000)
                )
                noidxcompare(
                    help(Do not simplify threadidx compares)
                    append(CGARGS=-x 177 0x4000)
                )
                unroll(
                    help(Scalar loop unrolling)
                    append(CGARGS=-x 177 0x08)
                    keyword(
                        noredophi(help(Do not redo FUD chains and kill dead blocks)
                            append(CGARGS=-x 163 0x8000000)
                        )
                    )
                )
                nounroll(
                    help(Disable scalar loop unrolling)
                    append(CGARGS=-y 177 0x08 -x 163 0x2000000)
                )
                fwd2(
                    help(Safe forward substitution of simple expressions)
                    append(CGARGS=-x 177 0x10)
                )
                reassoc2(
                    help(Reassociate address expressions)
                    append(CGARGS=-x 177 0x20)
                )
                distribute(
                    help(Distribute multiply over add while reassociating)
                    append(CGARGS=-x 177 0x100)
                )
                fwd3(
                    help(Forward substitution of simple expressions)
                    append(CGARGS=-x 177 0x40)
                )
                pre(
                    help(Partial redundancy elimination)
                    append(CGARGS=-x 177 0x80)
                )
                sign(
                    help(Find expression signs)
                    append(CGARGS=-x 177 0x20000)
                )
                fma(
                    help(Find more FMA opportunities)
                    append(CGARGS=-x 186 0x2000000)
                )
                nodup(
                    append(CGARGS=-x 176 0x80)
                )
                nofud(
                    help(Do not do FUD chain optimizations)
                    append(CGARGS=-x 163 0x1000000)
                )
             )
         )
         noregalloc(hide
             help(Do not create constructor to register allocators)
             append(FLANG1ARGS=-disable-cuda-reg-alloc-ctor=true)
         )
     )
     nokeyword()
     append(CGARGS=$if($notequal($CUDAROOT,),-cudaroot $CUDAROOT))
     append(USRDDEF=-D_CUDA)
     append(USRDEFDEF=-def _CUDA -def __NVCOMPILER_CUDA)
     append(USRDDEF=$if($notequal($CUDAVERSIONDEFINE,),$foreach(f,$CUDAVERSIONDEFINE, -D$f)))
     append(USRDEFDEF=$if($notequal($CUDAVERSIONDEFINE,),$foreach(f,$CUDAVERSIONDEFINE, -def $f)))
     set(FNEEDCUDA=1)
     set(ANYCUF=1)
     set(ANYCU=1)
     set(CUDANEEDED=1)
     set(CUDARTNEEDED=1)
     set(CUDAINIT=1)
     append(CGARGS=$if($equal($DRIVERLANG,Fortran),$DEFAULTCAPFLAG $TOOLKITFLAG));

# Shorthands to match NVCC spelling
switch -Xptxas args is
    switcharg
    shorthand(-Wptxas,$args);

switch --ptxas-options args is
    switcharg
    shorthand(-Wptxas,$args);

switch -Xfatbinary args is
    switcharg
    shorthand(-Wfatbinary,$args);

switch --fatbinary-options args is
    switcharg
    shorthand(-Wfatbinary,$args);

switch -Xnvlink args is
    switcharg
	shorthand(-Wnvlink,$args);

switch --nvlink-options args is
    switcharg
    shorthand(-Wnvlink,$args);
