build-chains.cu 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295
  1. /*
  2. * nvbio
  3. * Copyright (c) 2011-2014, NVIDIA CORPORATION. All rights reserved.
  4. *
  5. * Redistribution and use in source and binary forms, with or without
  6. * modification, are permitted provided that the following conditions are met:
  7. * * Redistributions of source code must retain the above copyright
  8. * notice, this list of conditions and the following disclaimer.
  9. * * Redistributions in binary form must reproduce the above copyright
  10. * notice, this list of conditions and the following disclaimer in the
  11. * documentation and/or other materials provided with the distribution.
  12. * * Neither the name of the NVIDIA CORPORATION nor the
  13. * names of its contributors may be used to endorse or promote products
  14. * derived from this software without specific prior written permission.
  15. *
  16. * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
  17. * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
  18. * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
  19. * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
  20. * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
  21. * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
  22. * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
  23. * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  24. * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
  25. * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  26. */
  27. #include "build-chains.h"
  28. #include "mem-search.h"
  29. #include "options.h"
  30. #include "pipeline.h"
  31. #include "util.h"
  32. #include <nvbio/basic/numbers.h>
  33. #include <nvbio/basic/algorithms.h>
  34. #include <nvbio/basic/priority_queue.h>
  35. #include <nvbio/basic/timer.h>
  36. #include <nvbio/basic/transform_iterator.h>
  37. #include <nvbio/basic/vector_view.h>
  38. #include <nvbio/basic/primitives.h>
  39. #include <thrust/iterator/counting_iterator.h>
  40. #include <thrust/iterator/transform_iterator.h>
  41. #include <thrust/sort.h>
  42. using namespace nvbio;
  43. // a functor to extract the read id from a mem
  44. struct mem_read_id_functor
  45. {
  46. typedef mem_state::mem_type argument_type;
  47. typedef uint32 result_type;
  48. NVBIO_HOST_DEVICE
  49. uint32 operator() (const argument_type mem) const { return mem.string_id(); }
  50. };
  51. // a class to keep track of a chain
  52. struct chain
  53. {
  54. // construct an empty chain
  55. NVBIO_FORCEINLINE NVBIO_HOST_DEVICE
  56. chain() : id(uint32(-1)) {}
  57. // construct a new chain from a single seed
  58. NVBIO_FORCEINLINE NVBIO_HOST_DEVICE
  59. chain(const uint32 _id, const mem_state::mem_type seed) :
  60. id( _id ),
  61. ref( seed.index_pos() ),
  62. span_beg( seed.span().x ),
  63. last_ref( seed.index_pos() ),
  64. last_span( seed.span() )
  65. {}
  66. // test whether we can merge the given mem into this chain
  67. NVBIO_FORCEINLINE NVBIO_HOST_DEVICE
  68. bool merge(const mem_state::mem_type seed, const uint32 w, const uint32 max_chain_gap)
  69. {
  70. const uint32 seed_len = seed.span().y - seed.span().x;
  71. const uint32 last_len = last_span.y - last_span.x;
  72. const uint32 rbeg = ref;
  73. const uint32 rend = last_ref + last_len;
  74. // check whether seed is contained in the chain
  75. if (seed.span().x >= span_beg && seed.span().y <= last_span.y && seed.index_pos() >= rbeg && seed.index_pos() + seed_len <= rend)
  76. return true; // contained seed; do nothing
  77. const int32 x = seed.span().x - last_span.x; // always non-negative
  78. const int32 y = seed.index_pos() - last_ref;
  79. if ((y >= 0) && (x - y <= w) && (x - last_len < max_chain_gap) && (y - last_len < max_chain_gap))
  80. {
  81. // grow the chain
  82. last_span = seed.span();
  83. last_ref = seed.index_pos();
  84. return true;
  85. }
  86. return false;
  87. }
  88. uint32 id; // chain id
  89. uint32 ref; // reference coordinate of the first seed in the chain
  90. uint32 span_beg; // read span begin
  91. uint32 last_ref; // the reference coordinate of the last seed in the chain
  92. uint2 last_span; // the read span of the last seed in the chain
  93. };
  94. struct chain_compare
  95. {
  96. NVBIO_FORCEINLINE NVBIO_HOST_DEVICE
  97. bool operator() (const chain& chain1, const chain& chain2) const
  98. {
  99. // compare by the reference coordinate of the first seed of each chain
  100. return chain1.ref < chain2.ref;
  101. }
  102. };
  103. // assign a chain id to all MEMs for the current pipeline::chunk of reads
  104. __global__
  105. void build_chains_kernel(
  106. const read_chunk chunk, // the current sub-batch
  107. const uint32 pass_number, // the pass number - we process up to N seeds per pass
  108. const uint32 n_active, // the number of active reads in this pass
  109. const uint32* active_reads, // the set of active reads
  110. uint8* active_flags, // the output set of active read flags
  111. const uint32 w, // w parameter
  112. const uint32 max_chain_gap, // max chain gap parameter
  113. const uint32 n_mems, // the total number of MEMs for this chunk of reads
  114. const mem_state::mem_type* mems, // the MEMs for this chunk of reads
  115. const uint32* mems_index, // a sorting index into the MEMs specifying the processing order
  116. uint64* mems_chains) // the output chain IDs corresponding to the sorted MEMs
  117. {
  118. const uint32 thread_id = threadIdx.x + blockIdx.x * blockDim.x;
  119. if (thread_id >= n_active)
  120. return;
  121. const uint32 read_id = active_reads[ thread_id ];
  122. // find the first seed belonging to this read
  123. const uint32 mem_begin = uint32( nvbio::lower_bound(
  124. read_id,
  125. nvbio::make_transform_iterator( mems, mem_read_id_functor() ),
  126. n_mems ) - nvbio::make_transform_iterator( mems, mem_read_id_functor() ) );
  127. // find the first seed belonging to the next read
  128. const uint32 mem_end = uint32( nvbio::lower_bound(
  129. read_id+1u,
  130. nvbio::make_transform_iterator( mems, mem_read_id_functor() ),
  131. n_mems ) - nvbio::make_transform_iterator( mems, mem_read_id_functor() ) );
  132. // the maximum amount of chains we can output in one pass
  133. const uint32 MAX_CHAINS = 128;
  134. // keep a priority queue of the chains organized by the reference coordinate of their leftmost seed
  135. typedef nvbio::vector_view<chain*> chain_vector_type;
  136. typedef nvbio::priority_queue<chain, chain_vector_type, chain_compare> chain_queue_type;
  137. chain chain_queue_storage[MAX_CHAINS+1];
  138. chain_queue_type chain_queue( chain_vector_type( 0u, chain_queue_storage ) );
  139. // keep a counter tracking the number of chains that get created
  140. //
  141. // NOTE: here we conservatively assume that in the previous passes we have
  142. // created the maximum number of chains, so as to avoid assigning an already
  143. // taken ID to a new chain (which would result in merging potentially unrelated
  144. // chains)
  145. uint64 n_chains = pass_number * MAX_CHAINS;
  146. // compute the first and ending MEM to process in this pass
  147. const uint32 mem_batch_begin = mem_begin + pass_number * MAX_CHAINS;
  148. const uint32 mem_batch_end = nvbio::min( mem_batch_begin + MAX_CHAINS, mem_end );
  149. // process the seeds in order
  150. for (uint32 i = mem_batch_begin; i < mem_batch_end; ++i)
  151. {
  152. const uint32 seed_idx = mems_index[i];
  153. const mem_state::mem_type seed = mems[ seed_idx ];
  154. // the chain id for this seed, to be determined
  155. uint32 chain_id;
  156. // insert seed
  157. if (chain_queue.empty())
  158. {
  159. // get a new chain id
  160. chain_id = n_chains++;
  161. // build a new chain
  162. chain_queue.push( chain( chain_id, seed ) );
  163. }
  164. else
  165. {
  166. // find the closest chain...
  167. chain_queue_type::iterator chain_it = chain_queue.upper_bound( chain( 0u, seed ) );
  168. // and test whether we can merge this seed into it
  169. if (chain_it != chain_queue.end() &&
  170. chain_it->merge( seed, w, max_chain_gap ) == false)
  171. {
  172. // get a new chain id
  173. chain_id = n_chains++;
  174. // build a new chain
  175. chain_queue.push( chain( chain_id, seed ) );
  176. }
  177. else
  178. {
  179. // merge with the existing chain
  180. chain_id = chain_it->id;
  181. }
  182. }
  183. // write out the chain id (OR'd with the read id)
  184. mems_chains[i] = chain_id | (uint64( read_id ) << 32);
  185. }
  186. // write out whether we need more passes
  187. active_flags[ thread_id ] = (mem_batch_begin < mem_end) ? 1u : 0u;
  188. }
  189. // build chains for the current pipeline::chunk of reads
  190. void build_chains(pipeline_state *pipeline, const io::SequenceDataDevice *reads)
  191. {
  192. const ScopedTimer<float> timer( &pipeline->stats.chain_time ); // keep track of the time spent here
  193. struct chains_state<device_tag> *chn = &pipeline->chn;
  194. const uint32 n_reads = pipeline->chunk.read_end - pipeline->chunk.read_begin;
  195. const uint32 n_mems = pipeline->chunk.mem_end - pipeline->chunk.mem_begin;
  196. // skip pathological cases
  197. if (n_mems == 0u)
  198. return;
  199. //
  200. // Here we are going to run multiple passes of the same kernel, as we cannot fit
  201. // all chains in local memory at once...
  202. //
  203. // prepare some ping-pong queues for tracking active reads that need more passes
  204. nvbio::vector<device_tag,uint32> active_reads( n_reads );
  205. nvbio::vector<device_tag,uint8> active_flags( n_reads );
  206. nvbio::vector<device_tag,uint32> out_reads( n_reads );
  207. nvbio::vector<device_tag,uint8> temp_storage;
  208. // initialize the active reads queue
  209. thrust::copy(
  210. thrust::make_counting_iterator<uint32>(0u) + pipeline->chunk.read_begin,
  211. thrust::make_counting_iterator<uint32>(0u) + pipeline->chunk.read_end,
  212. active_reads.begin() );
  213. uint32 n_active = n_reads;
  214. for (uint32 pass_number = 0u; n_active; ++pass_number)
  215. {
  216. const uint32 block_dim = 128;
  217. const uint32 n_blocks = util::divide_ri( n_active, block_dim );
  218. // assign a chain id to each mem
  219. build_chains_kernel<<<n_blocks, block_dim>>>(
  220. pipeline->chunk,
  221. pass_number,
  222. n_active,
  223. nvbio::plain_view( active_reads ),
  224. nvbio::plain_view( active_flags ),
  225. command_line_options.w,
  226. command_line_options.max_chain_gap,
  227. n_mems,
  228. nvbio::plain_view( chn->mems ),
  229. nvbio::plain_view( chn->mems_index ),
  230. nvbio::plain_view( chn->mems_chain ) );
  231. optional_device_synchronize();
  232. cuda::check_error("build-chains kernel");
  233. // shrink the set of active reads
  234. n_active = copy_flagged(
  235. n_active, // the number of input elements
  236. active_reads.begin(), // the input sequence of elements to copy
  237. active_flags.begin(), // the input sequence of copy flags
  238. out_reads.begin(), // the output sequence of copied elements
  239. temp_storage ); // some temporary storage
  240. active_reads.swap( out_reads );
  241. }
  242. // sort mems by chain id
  243. // NOTE: it's important here to use a stable-sort, so as to guarantee preserving
  244. // the ordering by left-coordinate of the MEMs
  245. thrust::sort_by_key( // TODO: this is slow, switch to nvbio::cuda::SortEnactor
  246. chn->mems_chain.begin(),
  247. chn->mems_chain.begin() + n_mems,
  248. chn->mems_index.begin() );
  249. optional_device_synchronize();
  250. nvbio::cuda::check_error("build-chains kernel");
  251. }