123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186 |
- /******************************************************************************
- * Copyright (c) 2011, Duane Merrill. All rights reserved.
- * Copyright (c) 2011-2014, NVIDIA CORPORATION. All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- * * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of the NVIDIA CORPORATION nor the
- * names of its contributors may be used to endorse or promote products
- * derived from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
- * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
- * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
- * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
- * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
- * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
- * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
- * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- ******************************************************************************/
- /**
- * \file
- * Static architectural properties by SM version.
- */
- #pragma once
- #include "util_namespace.cuh"
- /// Optional outer namespace(s)
- CUB_NS_PREFIX
- /// CUB namespace
- namespace cub {
- /**
- * \addtogroup UtilMgmt
- * @{
- */
- /// CUB_PTX_VERSION reflects the PTX version targeted by the active compiler pass (or zero during the host pass).
- #ifndef __CUDA_ARCH__
- #define CUB_PTX_VERSION 0
- #else
- #define CUB_PTX_VERSION __CUDA_ARCH__
- #endif
- /// Whether or not the source targeted by the active compiler pass is allowed to invoke device kernels or methods from the CUDA runtime API.
- #if (CUB_PTX_VERSION == 0) || defined(CUB_CDP)
- #define CUB_RUNTIME_ENABLED
- #endif
- /// Number of threads per warp
- #define CUB_LOG_WARP_THREADS(arch) \
- (5)
- /// Number of smem banks
- #define CUB_LOG_SMEM_BANKS(arch) \
- ((arch >= 200) ? \
- (5) : \
- (4))
- /// Number of bytes per smem bank
- #define CUB_SMEM_BANK_BYTES(arch) \
- (4)
- /// Number of smem bytes provisioned per SM
- #define CUB_SMEM_BYTES(arch) \
- ((arch >= 200) ? \
- (48 * 1024) : \
- (16 * 1024))
- /// Smem allocation size in bytes
- #define CUB_SMEM_ALLOC_UNIT(arch) \
- ((arch >= 300) ? \
- (256) : \
- ((arch >= 200) ? \
- (128) : \
- (512)))
- /// Whether or not the architecture allocates registers by block (or by warp)
- #define CUB_REGS_BY_BLOCK(arch) \
- ((arch >= 200) ? \
- (false) : \
- (true))
- /// Number of registers allocated at a time per block (or by warp)
- #define CUB_REG_ALLOC_UNIT(arch) \
- ((arch >= 300) ? \
- (256) : \
- ((arch >= 200) ? \
- (64) : \
- ((arch >= 120) ? \
- (512) : \
- (256))))
- /// Granularity of warps for which registers are allocated
- #define CUB_WARP_ALLOC_UNIT(arch) \
- ((arch >= 300) ? \
- (4) : \
- (2))
- /// Maximum number of threads per SM
- #define CUB_MAX_SM_THREADS(arch) \
- ((arch >= 300) ? \
- (2048) : \
- ((arch >= 200) ? \
- (1536) : \
- ((arch >= 120) ? \
- (1024) : \
- (768))))
- /// Maximum number of thread blocks per SM
- #define CUB_MAX_SM_BLOCKS(arch) \
- ((arch >= 300) ? \
- (16) : \
- (8))
- /// Maximum number of threads per thread block
- #define CUB_MAX_BLOCK_THREADS(arch) \
- ((arch >= 200) ? \
- (1024) : \
- (512))
- /// Maximum number of registers per SM
- #define CUB_MAX_SM_REGISTERS(arch) \
- ((arch >= 300) ? \
- (64 * 1024) : \
- ((arch >= 200) ? \
- (32 * 1024) : \
- ((arch >= 120) ? \
- (16 * 1024) : \
- (8 * 1024))))
- /// Oversubscription factor
- #define CUB_SUBSCRIPTION_FACTOR(arch) \
- ((arch >= 300) ? \
- (5) : \
- ((arch >= 200) ? \
- (3) : \
- (10)))
- /// Prefer X-way conflict over padding
- #define CUB_PREFER_CONFLICT_OVER_PADDING(arch) \
- ((arch >= 300) ? \
- (0) : \
- (4))
- #ifndef DOXYGEN_SHOULD_SKIP_THIS // Do not document
- #define CUB_PTX_LOG_WARP_THREADS CUB_LOG_WARP_THREADS(CUB_PTX_VERSION)
- #define CUB_PTX_WARP_THREADS (1 << CUB_PTX_LOG_WARP_THREADS)
- #define CUB_PTX_LOG_SMEM_BANKS CUB_LOG_SMEM_BANKS(CUB_PTX_VERSION)
- #define CUB_PTX_SMEM_BANKS (1 << CUB_PTX_LOG_SMEM_BANKS)
- #define CUB_PTX_SMEM_BANK_BYTES CUB_SMEM_BANK_BYTES(CUB_PTX_VERSION)
- #define CUB_PTX_SMEM_BYTES CUB_SMEM_BYTES(CUB_PTX_VERSION)
- #define CUB_PTX_SMEM_ALLOC_UNIT CUB_SMEM_ALLOC_UNIT(CUB_PTX_VERSION)
- #define CUB_PTX_REGS_BY_BLOCK CUB_REGS_BY_BLOCK(CUB_PTX_VERSION)
- #define CUB_PTX_REG_ALLOC_UNIT CUB_REG_ALLOC_UNIT(CUB_PTX_VERSION)
- #define CUB_PTX_WARP_ALLOC_UNIT CUB_WARP_ALLOC_UNIT(CUB_PTX_VERSION)
- #define CUB_PTX_MAX_SM_THREADS CUB_MAX_SM_THREADS(CUB_PTX_VERSION)
- #define CUB_PTX_MAX_SM_BLOCKS CUB_MAX_SM_BLOCKS(CUB_PTX_VERSION)
- #define CUB_PTX_MAX_BLOCK_THREADS CUB_MAX_BLOCK_THREADS(CUB_PTX_VERSION)
- #define CUB_PTX_MAX_SM_REGISTERS CUB_MAX_SM_REGISTERS(CUB_PTX_VERSION)
- #define CUB_PTX_PREFER_CONFLICT_OVER_PADDING CUB_PREFER_CONFLICT_OVER_PADDING(CUB_PTX_VERSION)
- #endif // Do not document
- /** @} */ // end group UtilMgmt
- } // CUB namespace
- CUB_NS_POSTFIX // Optional outer namespace(s)
|