/****************************************************************************** * Copyright (c) 2011, Duane Merrill. All rights reserved. * Copyright (c) 2011-2014, NVIDIA CORPORATION. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * Neither the name of the NVIDIA CORPORATION nor the * names of its contributors may be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * ******************************************************************************/ /** * \file * Static architectural properties by SM version. */ #pragma once #include "util_namespace.cuh" /// Optional outer namespace(s) CUB_NS_PREFIX /// CUB namespace namespace cub { /** * \addtogroup UtilMgmt * @{ */ /// CUB_PTX_VERSION reflects the PTX version targeted by the active compiler pass (or zero during the host pass). #ifndef __CUDA_ARCH__ #define CUB_PTX_VERSION 0 #else #define CUB_PTX_VERSION __CUDA_ARCH__ #endif /// Whether or not the source targeted by the active compiler pass is allowed to invoke device kernels or methods from the CUDA runtime API. #if (CUB_PTX_VERSION == 0) || defined(CUB_CDP) #define CUB_RUNTIME_ENABLED #endif /// Number of threads per warp #define CUB_LOG_WARP_THREADS(arch) \ (5) /// Number of smem banks #define CUB_LOG_SMEM_BANKS(arch) \ ((arch >= 200) ? \ (5) : \ (4)) /// Number of bytes per smem bank #define CUB_SMEM_BANK_BYTES(arch) \ (4) /// Number of smem bytes provisioned per SM #define CUB_SMEM_BYTES(arch) \ ((arch >= 200) ? \ (48 * 1024) : \ (16 * 1024)) /// Smem allocation size in bytes #define CUB_SMEM_ALLOC_UNIT(arch) \ ((arch >= 300) ? \ (256) : \ ((arch >= 200) ? \ (128) : \ (512))) /// Whether or not the architecture allocates registers by block (or by warp) #define CUB_REGS_BY_BLOCK(arch) \ ((arch >= 200) ? \ (false) : \ (true)) /// Number of registers allocated at a time per block (or by warp) #define CUB_REG_ALLOC_UNIT(arch) \ ((arch >= 300) ? \ (256) : \ ((arch >= 200) ? \ (64) : \ ((arch >= 120) ? \ (512) : \ (256)))) /// Granularity of warps for which registers are allocated #define CUB_WARP_ALLOC_UNIT(arch) \ ((arch >= 300) ? \ (4) : \ (2)) /// Maximum number of threads per SM #define CUB_MAX_SM_THREADS(arch) \ ((arch >= 300) ? \ (2048) : \ ((arch >= 200) ? \ (1536) : \ ((arch >= 120) ? \ (1024) : \ (768)))) /// Maximum number of thread blocks per SM #define CUB_MAX_SM_BLOCKS(arch) \ ((arch >= 300) ? \ (16) : \ (8)) /// Maximum number of threads per thread block #define CUB_MAX_BLOCK_THREADS(arch) \ ((arch >= 200) ? \ (1024) : \ (512)) /// Maximum number of registers per SM #define CUB_MAX_SM_REGISTERS(arch) \ ((arch >= 300) ? \ (64 * 1024) : \ ((arch >= 200) ? \ (32 * 1024) : \ ((arch >= 120) ? \ (16 * 1024) : \ (8 * 1024)))) /// Oversubscription factor #define CUB_SUBSCRIPTION_FACTOR(arch) \ ((arch >= 300) ? \ (5) : \ ((arch >= 200) ? \ (3) : \ (10))) /// Prefer X-way conflict over padding #define CUB_PREFER_CONFLICT_OVER_PADDING(arch) \ ((arch >= 300) ? \ (0) : \ (4)) #ifndef DOXYGEN_SHOULD_SKIP_THIS // Do not document #define CUB_PTX_LOG_WARP_THREADS CUB_LOG_WARP_THREADS(CUB_PTX_VERSION) #define CUB_PTX_WARP_THREADS (1 << CUB_PTX_LOG_WARP_THREADS) #define CUB_PTX_LOG_SMEM_BANKS CUB_LOG_SMEM_BANKS(CUB_PTX_VERSION) #define CUB_PTX_SMEM_BANKS (1 << CUB_PTX_LOG_SMEM_BANKS) #define CUB_PTX_SMEM_BANK_BYTES CUB_SMEM_BANK_BYTES(CUB_PTX_VERSION) #define CUB_PTX_SMEM_BYTES CUB_SMEM_BYTES(CUB_PTX_VERSION) #define CUB_PTX_SMEM_ALLOC_UNIT CUB_SMEM_ALLOC_UNIT(CUB_PTX_VERSION) #define CUB_PTX_REGS_BY_BLOCK CUB_REGS_BY_BLOCK(CUB_PTX_VERSION) #define CUB_PTX_REG_ALLOC_UNIT CUB_REG_ALLOC_UNIT(CUB_PTX_VERSION) #define CUB_PTX_WARP_ALLOC_UNIT CUB_WARP_ALLOC_UNIT(CUB_PTX_VERSION) #define CUB_PTX_MAX_SM_THREADS CUB_MAX_SM_THREADS(CUB_PTX_VERSION) #define CUB_PTX_MAX_SM_BLOCKS CUB_MAX_SM_BLOCKS(CUB_PTX_VERSION) #define CUB_PTX_MAX_BLOCK_THREADS CUB_MAX_BLOCK_THREADS(CUB_PTX_VERSION) #define CUB_PTX_MAX_SM_REGISTERS CUB_MAX_SM_REGISTERS(CUB_PTX_VERSION) #define CUB_PTX_PREFER_CONFLICT_OVER_PADDING CUB_PREFER_CONFLICT_OVER_PADDING(CUB_PTX_VERSION) #endif // Do not document /** @} */ // end group UtilMgmt } // CUB namespace CUB_NS_POSTFIX // Optional outer namespace(s)