123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295 |
- /*
- * nvbio
- * Copyright (c) 2011-2014, NVIDIA CORPORATION. All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- * * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of the NVIDIA CORPORATION nor the
- * names of its contributors may be used to endorse or promote products
- * derived from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
- * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
- * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
- * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
- * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
- * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
- * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
- * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
- #include "build-chains.h"
- #include "mem-search.h"
- #include "options.h"
- #include "pipeline.h"
- #include "util.h"
- #include <nvbio/basic/numbers.h>
- #include <nvbio/basic/algorithms.h>
- #include <nvbio/basic/priority_queue.h>
- #include <nvbio/basic/timer.h>
- #include <nvbio/basic/transform_iterator.h>
- #include <nvbio/basic/vector_view.h>
- #include <nvbio/basic/primitives.h>
- #include <thrust/iterator/counting_iterator.h>
- #include <thrust/iterator/transform_iterator.h>
- #include <thrust/sort.h>
- using namespace nvbio;
- // a functor to extract the read id from a mem
- struct mem_read_id_functor
- {
- typedef mem_state::mem_type argument_type;
- typedef uint32 result_type;
- NVBIO_HOST_DEVICE
- uint32 operator() (const argument_type mem) const { return mem.string_id(); }
- };
- // a class to keep track of a chain
- struct chain
- {
- // construct an empty chain
- NVBIO_FORCEINLINE NVBIO_HOST_DEVICE
- chain() : id(uint32(-1)) {}
- // construct a new chain from a single seed
- NVBIO_FORCEINLINE NVBIO_HOST_DEVICE
- chain(const uint32 _id, const mem_state::mem_type seed) :
- id( _id ),
- ref( seed.index_pos() ),
- span_beg( seed.span().x ),
- last_ref( seed.index_pos() ),
- last_span( seed.span() )
- {}
- // test whether we can merge the given mem into this chain
- NVBIO_FORCEINLINE NVBIO_HOST_DEVICE
- bool merge(const mem_state::mem_type seed, const uint32 w, const uint32 max_chain_gap)
- {
- const uint32 seed_len = seed.span().y - seed.span().x;
- const uint32 last_len = last_span.y - last_span.x;
- const uint32 rbeg = ref;
- const uint32 rend = last_ref + last_len;
- // check whether seed is contained in the chain
- if (seed.span().x >= span_beg && seed.span().y <= last_span.y && seed.index_pos() >= rbeg && seed.index_pos() + seed_len <= rend)
- return true; // contained seed; do nothing
- const int32 x = seed.span().x - last_span.x; // always non-negative
- const int32 y = seed.index_pos() - last_ref;
- if ((y >= 0) && (x - y <= w) && (x - last_len < max_chain_gap) && (y - last_len < max_chain_gap))
- {
- // grow the chain
- last_span = seed.span();
- last_ref = seed.index_pos();
- return true;
- }
- return false;
- }
- uint32 id; // chain id
- uint32 ref; // reference coordinate of the first seed in the chain
- uint32 span_beg; // read span begin
- uint32 last_ref; // the reference coordinate of the last seed in the chain
- uint2 last_span; // the read span of the last seed in the chain
- };
- struct chain_compare
- {
- NVBIO_FORCEINLINE NVBIO_HOST_DEVICE
- bool operator() (const chain& chain1, const chain& chain2) const
- {
- // compare by the reference coordinate of the first seed of each chain
- return chain1.ref < chain2.ref;
- }
- };
- // assign a chain id to all MEMs for the current pipeline::chunk of reads
- __global__
- void build_chains_kernel(
- const read_chunk chunk, // the current sub-batch
- const uint32 pass_number, // the pass number - we process up to N seeds per pass
- const uint32 n_active, // the number of active reads in this pass
- const uint32* active_reads, // the set of active reads
- uint8* active_flags, // the output set of active read flags
- const uint32 w, // w parameter
- const uint32 max_chain_gap, // max chain gap parameter
- const uint32 n_mems, // the total number of MEMs for this chunk of reads
- const mem_state::mem_type* mems, // the MEMs for this chunk of reads
- const uint32* mems_index, // a sorting index into the MEMs specifying the processing order
- uint64* mems_chains) // the output chain IDs corresponding to the sorted MEMs
- {
- const uint32 thread_id = threadIdx.x + blockIdx.x * blockDim.x;
- if (thread_id >= n_active)
- return;
- const uint32 read_id = active_reads[ thread_id ];
- // find the first seed belonging to this read
- const uint32 mem_begin = uint32( nvbio::lower_bound(
- read_id,
- nvbio::make_transform_iterator( mems, mem_read_id_functor() ),
- n_mems ) - nvbio::make_transform_iterator( mems, mem_read_id_functor() ) );
- // find the first seed belonging to the next read
- const uint32 mem_end = uint32( nvbio::lower_bound(
- read_id+1u,
- nvbio::make_transform_iterator( mems, mem_read_id_functor() ),
- n_mems ) - nvbio::make_transform_iterator( mems, mem_read_id_functor() ) );
- // the maximum amount of chains we can output in one pass
- const uint32 MAX_CHAINS = 128;
- // keep a priority queue of the chains organized by the reference coordinate of their leftmost seed
- typedef nvbio::vector_view<chain*> chain_vector_type;
- typedef nvbio::priority_queue<chain, chain_vector_type, chain_compare> chain_queue_type;
- chain chain_queue_storage[MAX_CHAINS+1];
- chain_queue_type chain_queue( chain_vector_type( 0u, chain_queue_storage ) );
- // keep a counter tracking the number of chains that get created
- //
- // NOTE: here we conservatively assume that in the previous passes we have
- // created the maximum number of chains, so as to avoid assigning an already
- // taken ID to a new chain (which would result in merging potentially unrelated
- // chains)
- uint64 n_chains = pass_number * MAX_CHAINS;
- // compute the first and ending MEM to process in this pass
- const uint32 mem_batch_begin = mem_begin + pass_number * MAX_CHAINS;
- const uint32 mem_batch_end = nvbio::min( mem_batch_begin + MAX_CHAINS, mem_end );
- // process the seeds in order
- for (uint32 i = mem_batch_begin; i < mem_batch_end; ++i)
- {
- const uint32 seed_idx = mems_index[i];
- const mem_state::mem_type seed = mems[ seed_idx ];
- // the chain id for this seed, to be determined
- uint32 chain_id;
- // insert seed
- if (chain_queue.empty())
- {
- // get a new chain id
- chain_id = n_chains++;
- // build a new chain
- chain_queue.push( chain( chain_id, seed ) );
- }
- else
- {
- // find the closest chain...
- chain_queue_type::iterator chain_it = chain_queue.upper_bound( chain( 0u, seed ) );
- // and test whether we can merge this seed into it
- if (chain_it != chain_queue.end() &&
- chain_it->merge( seed, w, max_chain_gap ) == false)
- {
- // get a new chain id
- chain_id = n_chains++;
- // build a new chain
- chain_queue.push( chain( chain_id, seed ) );
- }
- else
- {
- // merge with the existing chain
- chain_id = chain_it->id;
- }
- }
- // write out the chain id (OR'd with the read id)
- mems_chains[i] = chain_id | (uint64( read_id ) << 32);
- }
- // write out whether we need more passes
- active_flags[ thread_id ] = (mem_batch_begin < mem_end) ? 1u : 0u;
- }
- // build chains for the current pipeline::chunk of reads
- void build_chains(pipeline_state *pipeline, const io::SequenceDataDevice *reads)
- {
- const ScopedTimer<float> timer( &pipeline->stats.chain_time ); // keep track of the time spent here
- struct chains_state<device_tag> *chn = &pipeline->chn;
- const uint32 n_reads = pipeline->chunk.read_end - pipeline->chunk.read_begin;
- const uint32 n_mems = pipeline->chunk.mem_end - pipeline->chunk.mem_begin;
- // skip pathological cases
- if (n_mems == 0u)
- return;
- //
- // Here we are going to run multiple passes of the same kernel, as we cannot fit
- // all chains in local memory at once...
- //
- // prepare some ping-pong queues for tracking active reads that need more passes
- nvbio::vector<device_tag,uint32> active_reads( n_reads );
- nvbio::vector<device_tag,uint8> active_flags( n_reads );
- nvbio::vector<device_tag,uint32> out_reads( n_reads );
- nvbio::vector<device_tag,uint8> temp_storage;
- // initialize the active reads queue
- thrust::copy(
- thrust::make_counting_iterator<uint32>(0u) + pipeline->chunk.read_begin,
- thrust::make_counting_iterator<uint32>(0u) + pipeline->chunk.read_end,
- active_reads.begin() );
- uint32 n_active = n_reads;
- for (uint32 pass_number = 0u; n_active; ++pass_number)
- {
- const uint32 block_dim = 128;
- const uint32 n_blocks = util::divide_ri( n_active, block_dim );
- // assign a chain id to each mem
- build_chains_kernel<<<n_blocks, block_dim>>>(
- pipeline->chunk,
- pass_number,
- n_active,
- nvbio::plain_view( active_reads ),
- nvbio::plain_view( active_flags ),
- command_line_options.w,
- command_line_options.max_chain_gap,
- n_mems,
- nvbio::plain_view( chn->mems ),
- nvbio::plain_view( chn->mems_index ),
- nvbio::plain_view( chn->mems_chain ) );
- optional_device_synchronize();
- cuda::check_error("build-chains kernel");
- // shrink the set of active reads
- n_active = copy_flagged(
- n_active, // the number of input elements
- active_reads.begin(), // the input sequence of elements to copy
- active_flags.begin(), // the input sequence of copy flags
- out_reads.begin(), // the output sequence of copied elements
- temp_storage ); // some temporary storage
- active_reads.swap( out_reads );
- }
- // sort mems by chain id
- // NOTE: it's important here to use a stable-sort, so as to guarantee preserving
- // the ordering by left-coordinate of the MEMs
- thrust::sort_by_key( // TODO: this is slow, switch to nvbio::cuda::SortEnactor
- chn->mems_chain.begin(),
- chn->mems_chain.begin() + n_mems,
- chn->mems_index.begin() );
- optional_device_synchronize();
- nvbio::cuda::check_error("build-chains kernel");
- }
|