syncblocks_test.cu 4.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137
  1. /*
  2. * nvbio
  3. * Copyright (c) 2011-2014, NVIDIA CORPORATION. All rights reserved.
  4. *
  5. * Redistribution and use in source and binary forms, with or without
  6. * modification, are permitted provided that the following conditions are met:
  7. * * Redistributions of source code must retain the above copyright
  8. * notice, this list of conditions and the following disclaimer.
  9. * * Redistributions in binary form must reproduce the above copyright
  10. * notice, this list of conditions and the following disclaimer in the
  11. * documentation and/or other materials provided with the distribution.
  12. * * Neither the name of the NVIDIA CORPORATION nor the
  13. * names of its contributors may be used to endorse or promote products
  14. * derived from this software without specific prior written permission.
  15. *
  16. * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
  17. * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
  18. * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
  19. * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
  20. * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
  21. * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
  22. * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
  23. * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  24. * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
  25. * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  26. */
  27. // syncblocks_test.cu
  28. //
  29. #define NVBIO_CUDA_DEBUG
  30. #include <stdio.h>
  31. #include <stdlib.h>
  32. #include <vector>
  33. #include <algorithm>
  34. #include <nvbio/basic/timer.h>
  35. #include <nvbio/basic/console.h>
  36. #include <nvbio/basic/vector.h>
  37. #include <nvbio/basic/cuda/arch.h>
  38. #include <nvbio/basic/cuda/syncblocks.h>
  39. namespace nvbio {
  40. __global__
  41. void print_kernel(const uint32 n_barriers, cuda::syncblocks barrier, uint32* queue_ptr, uint2* queue)
  42. {
  43. for (uint32 i = 0; i < n_barriers; ++i)
  44. {
  45. if (threadIdx.x == 0)
  46. {
  47. const uint32 slot = atomicAdd( queue_ptr, 1u );
  48. queue[slot] = make_uint2( i, blockIdx.x );
  49. //NVBIO_CUDA_DEBUG_ASSERT( slot >= i*gridDim.x, "block[%u] got slot %u at iteration %u\n", blockIdx.x, slot, i );
  50. }
  51. barrier.enact();
  52. }
  53. }
  54. __global__
  55. void speed_kernel(const uint32 n_barriers, cuda::syncblocks barrier, uint2* output)
  56. {
  57. for (uint32 i = 0; i < n_barriers; ++i)
  58. barrier.enact();
  59. output[blockIdx.x] = make_uint2( blockIdx.x, 0 );
  60. }
  61. int syncblocks_test()
  62. {
  63. const uint32 n_barriers = 100;
  64. cuda::syncblocks_storage barrier_st;
  65. cuda::syncblocks barrier = barrier_st.get();
  66. log_info( stderr, "syncblocks test... started\n" );
  67. const uint32 blockdim = 128;
  68. const uint32 n_blocks = max_active_blocks( print_kernel, blockdim, 0u );
  69. log_info( stderr, " %u blocks\n", n_blocks );
  70. thrust::device_vector<uint32> dqueue_head( 1u );
  71. thrust::device_vector<uint2> dqueue( n_barriers*n_blocks );
  72. uint32* dqueue_head_ptr = thrust::raw_pointer_cast( &dqueue_head.front() );
  73. uint2* dqueue_ptr = thrust::raw_pointer_cast( &dqueue.front() );
  74. thrust::host_vector<uint2> hqueue;
  75. log_info( stderr, " correctness test... started\n" );
  76. for (uint32 i = 0; i < 20; ++i)
  77. {
  78. // initialize the queue pointer
  79. dqueue_head[0] = 0;
  80. // call the testing kernel
  81. print_kernel<<<n_blocks,blockdim>>>( n_barriers, barrier, dqueue_head_ptr, dqueue_ptr );
  82. cudaDeviceSynchronize();
  83. nvbio::cuda::thrust_copy_vector(hqueue, dqueue);
  84. for (uint32 n = 0; n < n_barriers; ++n)
  85. {
  86. for (uint32 j = 0; j < n_blocks; ++j)
  87. {
  88. const uint2 val = hqueue[n*n_blocks + j];
  89. if (val.x != n)
  90. {
  91. log_error( stderr, " found (%u,%u) at position %u:%u, launch %u\n", val.x, val.y, n, j, i );
  92. return 1;
  93. }
  94. }
  95. }
  96. }
  97. log_info( stderr, " correctness test... done\n" );
  98. const uint32 n_tests = 100;
  99. log_info( stderr, " speed test... started\n" );
  100. Timer timer;
  101. timer.start();
  102. for (uint32 i = 0; i < n_tests; ++i)
  103. speed_kernel<<<n_blocks,blockdim>>>( n_barriers, barrier, dqueue_ptr+1 );
  104. cudaDeviceSynchronize();
  105. timer.stop();
  106. const float time = timer.seconds() / (n_tests*n_barriers);
  107. log_info( stderr, " speed test... done: %.1f ns\n", time * 1.0e6f );
  108. log_info( stderr, "syncblocks test... done\n" );
  109. return 0;
  110. }
  111. } // namespace nvbio