/* * nvbio * Copyright (c) 2011-2014, NVIDIA CORPORATION. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * Neither the name of the NVIDIA CORPORATION nor the * names of its contributors may be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "build-chains.h" #include "mem-search.h" #include "options.h" #include "pipeline.h" #include "util.h" #include #include #include #include #include #include #include #include #include #include using namespace nvbio; // a functor to extract the read id from a mem struct mem_read_id_functor { typedef mem_state::mem_type argument_type; typedef uint32 result_type; NVBIO_HOST_DEVICE uint32 operator() (const argument_type mem) const { return mem.string_id(); } }; // a class to keep track of a chain struct chain { // construct an empty chain NVBIO_FORCEINLINE NVBIO_HOST_DEVICE chain() : id(uint32(-1)) {} // construct a new chain from a single seed NVBIO_FORCEINLINE NVBIO_HOST_DEVICE chain(const uint32 _id, const mem_state::mem_type seed) : id( _id ), ref( seed.index_pos() ), span_beg( seed.span().x ), last_ref( seed.index_pos() ), last_span( seed.span() ) {} // test whether we can merge the given mem into this chain NVBIO_FORCEINLINE NVBIO_HOST_DEVICE bool merge(const mem_state::mem_type seed, const uint32 w, const uint32 max_chain_gap) { const uint32 seed_len = seed.span().y - seed.span().x; const uint32 last_len = last_span.y - last_span.x; const uint32 rbeg = ref; const uint32 rend = last_ref + last_len; // check whether seed is contained in the chain if (seed.span().x >= span_beg && seed.span().y <= last_span.y && seed.index_pos() >= rbeg && seed.index_pos() + seed_len <= rend) return true; // contained seed; do nothing const int32 x = seed.span().x - last_span.x; // always non-negative const int32 y = seed.index_pos() - last_ref; if ((y >= 0) && (x - y <= w) && (x - last_len < max_chain_gap) && (y - last_len < max_chain_gap)) { // grow the chain last_span = seed.span(); last_ref = seed.index_pos(); return true; } return false; } uint32 id; // chain id uint32 ref; // reference coordinate of the first seed in the chain uint32 span_beg; // read span begin uint32 last_ref; // the reference coordinate of the last seed in the chain uint2 last_span; // the read span of the last seed in the chain }; struct chain_compare { NVBIO_FORCEINLINE NVBIO_HOST_DEVICE bool operator() (const chain& chain1, const chain& chain2) const { // compare by the reference coordinate of the first seed of each chain return chain1.ref < chain2.ref; } }; // assign a chain id to all MEMs for the current pipeline::chunk of reads __global__ void build_chains_kernel( const read_chunk chunk, // the current sub-batch const uint32 pass_number, // the pass number - we process up to N seeds per pass const uint32 n_active, // the number of active reads in this pass const uint32* active_reads, // the set of active reads uint8* active_flags, // the output set of active read flags const uint32 w, // w parameter const uint32 max_chain_gap, // max chain gap parameter const uint32 n_mems, // the total number of MEMs for this chunk of reads const mem_state::mem_type* mems, // the MEMs for this chunk of reads const uint32* mems_index, // a sorting index into the MEMs specifying the processing order uint64* mems_chains) // the output chain IDs corresponding to the sorted MEMs { const uint32 thread_id = threadIdx.x + blockIdx.x * blockDim.x; if (thread_id >= n_active) return; const uint32 read_id = active_reads[ thread_id ]; // find the first seed belonging to this read const uint32 mem_begin = uint32( nvbio::lower_bound( read_id, nvbio::make_transform_iterator( mems, mem_read_id_functor() ), n_mems ) - nvbio::make_transform_iterator( mems, mem_read_id_functor() ) ); // find the first seed belonging to the next read const uint32 mem_end = uint32( nvbio::lower_bound( read_id+1u, nvbio::make_transform_iterator( mems, mem_read_id_functor() ), n_mems ) - nvbio::make_transform_iterator( mems, mem_read_id_functor() ) ); // the maximum amount of chains we can output in one pass const uint32 MAX_CHAINS = 128; // keep a priority queue of the chains organized by the reference coordinate of their leftmost seed typedef nvbio::vector_view chain_vector_type; typedef nvbio::priority_queue chain_queue_type; chain chain_queue_storage[MAX_CHAINS+1]; chain_queue_type chain_queue( chain_vector_type( 0u, chain_queue_storage ) ); // keep a counter tracking the number of chains that get created // // NOTE: here we conservatively assume that in the previous passes we have // created the maximum number of chains, so as to avoid assigning an already // taken ID to a new chain (which would result in merging potentially unrelated // chains) uint64 n_chains = pass_number * MAX_CHAINS; // compute the first and ending MEM to process in this pass const uint32 mem_batch_begin = mem_begin + pass_number * MAX_CHAINS; const uint32 mem_batch_end = nvbio::min( mem_batch_begin + MAX_CHAINS, mem_end ); // process the seeds in order for (uint32 i = mem_batch_begin; i < mem_batch_end; ++i) { const uint32 seed_idx = mems_index[i]; const mem_state::mem_type seed = mems[ seed_idx ]; // the chain id for this seed, to be determined uint32 chain_id; // insert seed if (chain_queue.empty()) { // get a new chain id chain_id = n_chains++; // build a new chain chain_queue.push( chain( chain_id, seed ) ); } else { // find the closest chain... chain_queue_type::iterator chain_it = chain_queue.upper_bound( chain( 0u, seed ) ); // and test whether we can merge this seed into it if (chain_it != chain_queue.end() && chain_it->merge( seed, w, max_chain_gap ) == false) { // get a new chain id chain_id = n_chains++; // build a new chain chain_queue.push( chain( chain_id, seed ) ); } else { // merge with the existing chain chain_id = chain_it->id; } } // write out the chain id (OR'd with the read id) mems_chains[i] = chain_id | (uint64( read_id ) << 32); } // write out whether we need more passes active_flags[ thread_id ] = (mem_batch_begin < mem_end) ? 1u : 0u; } // build chains for the current pipeline::chunk of reads void build_chains(pipeline_state *pipeline, const io::SequenceDataDevice *reads) { const ScopedTimer timer( &pipeline->stats.chain_time ); // keep track of the time spent here struct chains_state *chn = &pipeline->chn; const uint32 n_reads = pipeline->chunk.read_end - pipeline->chunk.read_begin; const uint32 n_mems = pipeline->chunk.mem_end - pipeline->chunk.mem_begin; // skip pathological cases if (n_mems == 0u) return; // // Here we are going to run multiple passes of the same kernel, as we cannot fit // all chains in local memory at once... // // prepare some ping-pong queues for tracking active reads that need more passes nvbio::vector active_reads( n_reads ); nvbio::vector active_flags( n_reads ); nvbio::vector out_reads( n_reads ); nvbio::vector temp_storage; // initialize the active reads queue thrust::copy( thrust::make_counting_iterator(0u) + pipeline->chunk.read_begin, thrust::make_counting_iterator(0u) + pipeline->chunk.read_end, active_reads.begin() ); uint32 n_active = n_reads; for (uint32 pass_number = 0u; n_active; ++pass_number) { const uint32 block_dim = 128; const uint32 n_blocks = util::divide_ri( n_active, block_dim ); // assign a chain id to each mem build_chains_kernel<<>>( pipeline->chunk, pass_number, n_active, nvbio::plain_view( active_reads ), nvbio::plain_view( active_flags ), command_line_options.w, command_line_options.max_chain_gap, n_mems, nvbio::plain_view( chn->mems ), nvbio::plain_view( chn->mems_index ), nvbio::plain_view( chn->mems_chain ) ); optional_device_synchronize(); cuda::check_error("build-chains kernel"); // shrink the set of active reads n_active = copy_flagged( n_active, // the number of input elements active_reads.begin(), // the input sequence of elements to copy active_flags.begin(), // the input sequence of copy flags out_reads.begin(), // the output sequence of copied elements temp_storage ); // some temporary storage active_reads.swap( out_reads ); } // sort mems by chain id // NOTE: it's important here to use a stable-sort, so as to guarantee preserving // the ordering by left-coordinate of the MEMs thrust::sort_by_key( // TODO: this is slow, switch to nvbio::cuda::SortEnactor chn->mems_chain.begin(), chn->mems_chain.begin() + n_mems, chn->mems_index.begin() ); optional_device_synchronize(); nvbio::cuda::check_error("build-chains kernel"); }