XuMengJie
/
agv_navigation_ws


			
				
					
						
						
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137
							/*
 * nvbio
 * Copyright (c) 2011-2014, NVIDIA CORPORATION. All rights reserved.
 * 
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions are met:
 *    * Redistributions of source code must retain the above copyright
 *      notice, this list of conditions and the following disclaimer.
 *    * Redistributions in binary form must reproduce the above copyright
 *      notice, this list of conditions and the following disclaimer in the
 *      documentation and/or other materials provided with the distribution.
 *    * Neither the name of the NVIDIA CORPORATION nor the
 *      names of its contributors may be used to endorse or promote products
 *      derived from this software without specific prior written permission.
 * 
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */

// syncblocks_test.cu
//
#define NVBIO_CUDA_DEBUG

#include <stdio.h>
#include <stdlib.h>
#include <vector>
#include <algorithm>
#include <nvbio/basic/timer.h>
#include <nvbio/basic/console.h>
#include <nvbio/basic/vector.h>
#include <nvbio/basic/cuda/arch.h>
#include <nvbio/basic/cuda/syncblocks.h>

namespace nvbio {

__global__
void print_kernel(const uint32 n_barriers, cuda::syncblocks barrier, uint32* queue_ptr, uint2* queue)
{
    for (uint32 i = 0; i < n_barriers; ++i)
    {
        if (threadIdx.x == 0)
        {
            const uint32 slot = atomicAdd( queue_ptr, 1u );
            queue[slot] = make_uint2( i, blockIdx.x );
            //NVBIO_CUDA_DEBUG_ASSERT( slot >= i*gridDim.x, "block[%u] got slot %u at iteration %u\n", blockIdx.x, slot, i );
        }

        barrier.enact();
    }
}
__global__
void speed_kernel(const uint32 n_barriers, cuda::syncblocks barrier, uint2* output)
{
    for (uint32 i = 0; i < n_barriers; ++i)
        barrier.enact();

    output[blockIdx.x] = make_uint2( blockIdx.x, 0 );
}

int syncblocks_test()
{
    const uint32 n_barriers = 100;
    cuda::syncblocks_storage barrier_st;

    cuda::syncblocks barrier = barrier_st.get();

    log_info( stderr, "syncblocks test... started\n" );

    const uint32 blockdim = 128;
    const uint32 n_blocks = max_active_blocks( print_kernel, blockdim, 0u );
    log_info( stderr, "  %u blocks\n", n_blocks );

    thrust::device_vector<uint32> dqueue_head( 1u );
    thrust::device_vector<uint2>  dqueue( n_barriers*n_blocks );

    uint32* dqueue_head_ptr = thrust::raw_pointer_cast( &dqueue_head.front() );
    uint2*  dqueue_ptr      = thrust::raw_pointer_cast( &dqueue.front() );

    thrust::host_vector<uint2> hqueue;
    log_info( stderr, "  correctness test... started\n" );

    for (uint32 i = 0; i < 20; ++i)
    {
        // initialize the queue pointer
        dqueue_head[0] = 0;

        // call the testing kernel
        print_kernel<<<n_blocks,blockdim>>>( n_barriers, barrier, dqueue_head_ptr, dqueue_ptr );
        cudaDeviceSynchronize();

        nvbio::cuda::thrust_copy_vector(hqueue, dqueue);

        for (uint32 n = 0; n < n_barriers; ++n)
        {
            for (uint32 j = 0; j < n_blocks; ++j)
            {
                const uint2 val = hqueue[n*n_blocks + j];
                if (val.x != n)
                {
                    log_error( stderr, "  found (%u,%u) at position %u:%u, launch %u\n", val.x, val.y, n, j, i );
                    return 1;
                }
            }
        }
    }
    log_info( stderr, "  correctness test... done\n" );

    const uint32 n_tests = 100;

    log_info( stderr, "  speed test... started\n" );

    Timer timer;
    timer.start();

    for (uint32 i = 0; i < n_tests; ++i)
        speed_kernel<<<n_blocks,blockdim>>>( n_barriers, barrier, dqueue_ptr+1 );

    cudaDeviceSynchronize();
    timer.stop();

    const float time = timer.seconds() / (n_tests*n_barriers);

    log_info( stderr, "  speed test... done: %.1f ns\n", time * 1.0e6f );

    log_info( stderr, "syncblocks test... done\n" );
    return 0;
}

} // namespace nvbio