align.cu 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384
  1. /*
  2. * nvbio
  3. * Copyright (c) 2011-2014, NVIDIA CORPORATION. All rights reserved.
  4. *
  5. * Redistribution and use in source and binary forms, with or without
  6. * modification, are permitted provided that the following conditions are met:
  7. * * Redistributions of source code must retain the above copyright
  8. * notice, this list of conditions and the following disclaimer.
  9. * * Redistributions in binary form must reproduce the above copyright
  10. * notice, this list of conditions and the following disclaimer in the
  11. * documentation and/or other materials provided with the distribution.
  12. * * Neither the name of the NVIDIA CORPORATION nor the
  13. * names of its contributors may be used to endorse or promote products
  14. * derived from this software without specific prior written permission.
  15. *
  16. * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
  17. * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
  18. * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
  19. * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
  20. * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
  21. * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
  22. * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
  23. * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  24. * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
  25. * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  26. */
  27. #include "align.h"
  28. #include "mem-search.h"
  29. #include "options.h"
  30. #include "pipeline.h"
  31. #include "util.h"
  32. #include <nvbio/basic/numbers.h>
  33. #include <nvbio/basic/algorithms.h>
  34. #include <nvbio/basic/timer.h>
  35. #include <nvbio/basic/transform_iterator.h>
  36. #include <nvbio/basic/vector_view.h>
  37. #include <nvbio/basic/primitives.h>
  38. #include <nvbio/alignment/alignment.h>
  39. #include <nvbio/alignment/batched.h>
  40. #include <thrust/iterator/counting_iterator.h>
  41. #include <thrust/iterator/transform_iterator.h>
  42. using namespace nvbio;
  43. // initialize the alignment pipeline
  44. //
  45. void align_init(struct pipeline_state *pipeline, const io::SequenceDataDevice *batch)
  46. {
  47. struct chains_state<device_tag> *chn = &pipeline->chn;
  48. struct alignment_state<device_tag> *aln = &pipeline->aln;
  49. const uint32 n_reads = pipeline->chunk.read_end - pipeline->chunk.read_begin;
  50. const uint32 n_chains = chn->n_chains;
  51. // initially, target the device
  52. pipeline->system = DEVICE;
  53. // reserve enough storage
  54. if (aln->stencil.size() < n_reads)
  55. {
  56. aln->begin_chains.clear(); aln->begin_chains.resize( n_reads );
  57. aln->end_chains.clear(); aln->end_chains.resize( n_reads );
  58. aln->stencil.clear(); aln->stencil.resize( n_reads );
  59. aln->temp_queue.clear(); aln->temp_queue.resize( n_reads );
  60. aln->query_spans.clear(); aln->query_spans.resize( n_reads );
  61. aln->ref_spans.clear(); aln->ref_spans.resize( n_reads );
  62. aln->sinks.clear(); aln->sinks.resize( n_reads );
  63. }
  64. // find the first chain for each read
  65. thrust::lower_bound(
  66. chn->chain_reads.begin(),
  67. chn->chain_reads.begin() + n_chains,
  68. thrust::make_counting_iterator<uint32>( pipeline->chunk.read_begin ),
  69. thrust::make_counting_iterator<uint32>( pipeline->chunk.read_end ),
  70. aln->begin_chains.begin() );
  71. // find the ending chain for each read
  72. thrust::upper_bound(
  73. chn->chain_reads.begin(),
  74. chn->chain_reads.begin() + n_chains,
  75. thrust::make_counting_iterator<uint32>( pipeline->chunk.read_begin ),
  76. thrust::make_counting_iterator<uint32>( pipeline->chunk.read_end ),
  77. aln->end_chains.begin() );
  78. aln->n_active = n_reads;
  79. }
  80. #define MEM_SHORT_EXT 50
  81. #define MEM_SHORT_LEN 200
  82. // a functor to compute the size of a span
  83. //
  84. struct span_size
  85. {
  86. typedef uint2 argument_type;
  87. typedef uint32 result_type;
  88. NVBIO_FORCEINLINE NVBIO_HOST_DEVICE
  89. uint32 operator() (const uint2 span) const { return span.y - span.x; }
  90. };
  91. // a functor to extract the reference span from a chain
  92. //
  93. struct span_functor
  94. {
  95. typedef io::SequenceDataAccess<DNA_N> reads_access_type;
  96. typedef io::SequenceDataAccess<DNA> reference_access_type;
  97. NVBIO_HOST_DEVICE
  98. span_functor(
  99. const runtime_options _options,
  100. const reads_access_type _reads,
  101. const reference_access_type _reference,
  102. const chains_view _chains,
  103. const uint32* _active_chains,
  104. uint2* _query_spans,
  105. uint2* _ref_spans,
  106. uint8* _flags) :
  107. options ( _options ),
  108. reads ( _reads ),
  109. reference ( _reference ),
  110. chains ( _chains ),
  111. active_chains ( _active_chains ),
  112. query_spans ( _query_spans ),
  113. ref_spans ( _ref_spans ),
  114. flags ( _flags ) {}
  115. // the functor operator
  116. NVBIO_HOST_DEVICE
  117. void operator() (const uint32 idx) const
  118. {
  119. const uint32 chain_idx = active_chains[idx];
  120. const chain_reference chain = chains[chain_idx];
  121. const uint32 len = chain.size();
  122. uint2 qspan = make_uint2( uint32(-1), 0u );
  123. uint2 rspan = make_uint2( uint32(-1), 0u );
  124. // loop through all seeds in this chain
  125. for (uint32 i = 0; i < len; ++i)
  126. {
  127. // fetch the i-th seed
  128. const chains_view::mem_type seed = chain[i];
  129. qspan.x = nvbio::min( qspan.x, seed.span().x );
  130. qspan.y = nvbio::max( qspan.y, seed.span().y );
  131. rspan.x = nvbio::min( rspan.x, seed.index_pos() );
  132. rspan.y = nvbio::max( rspan.y, seed.index_pos() + seed.span().y - seed.span().x );
  133. }
  134. const uint32 read_id = chain.read();
  135. const uint2 read_range = reads.get_range( read_id );
  136. const uint32 read_len = read_range.y - read_range.x;
  137. qspan.x = qspan.x > MEM_SHORT_EXT ? qspan.x - MEM_SHORT_EXT : 0u;
  138. qspan.y = qspan.y + MEM_SHORT_EXT < read_len ? qspan.y + MEM_SHORT_EXT : read_len;
  139. rspan.x = rspan.x > MEM_SHORT_EXT ? rspan.x - MEM_SHORT_EXT : 0u;
  140. rspan.y = rspan.y + MEM_SHORT_EXT < reference.bps() ? rspan.y + MEM_SHORT_EXT : reference.bps();
  141. const uint32 qdelta = qspan.y - qspan.x;
  142. const uint32 rdelta = rspan.y - rspan.x;
  143. if ((qspan.x <= 10 || qspan.y >= read_len - 10) || // because ksw_align() does not support end-to-end alignment
  144. (rdelta > qdelta + MEM_SHORT_EXT || qdelta > rdelta + MEM_SHORT_EXT) ||
  145. (qdelta >= options.w * 4 || rdelta >= options.w * 4))
  146. {
  147. flags[idx] = 0; // because ksw_align() does not support end-to-end alignment
  148. return;
  149. }
  150. // save the resulting spans
  151. query_spans[idx] = make_uint2( qspan.x + read_range.x, qspan.y + read_range.x );
  152. ref_spans[idx] = rspan;
  153. // flag to perform short alignment
  154. flags[idx] = 1;
  155. }
  156. const runtime_options options;
  157. const reads_access_type reads;
  158. const reference_access_type reference;
  159. const chains_view chains;
  160. const uint32* active_chains;
  161. uint2* query_spans;
  162. uint2* ref_spans;
  163. uint8* flags;
  164. };
  165. // perform banded alignment
  166. //
  167. template <typename system_tag>
  168. uint32 align_short(
  169. chains_state<system_tag> *chn,
  170. alignment_state<system_tag> *aln,
  171. const io::SequenceData *reference,
  172. const io::SequenceData *reads)
  173. {
  174. typedef io::SequenceDataAccess<DNA_N> read_access_type;
  175. typedef io::SequenceDataAccess<DNA> reference_access_type;
  176. // prepare POD access pointers to the reads and reference
  177. const read_access_type reads_access( *reads );
  178. const reference_access_type reference_access( *reference );
  179. //
  180. // During alignment, we essentially keep a queue of "active" reads, corresponding
  181. // to those reads for which there's more chains to process; at every step, we select
  182. // one new chain from each read as an alignment candidate, removing it from the set.
  183. // This is done keeping a set of (begin,end) pointers per read and advancing the
  184. // begin field - when a range becomes empty, it's removed
  185. //
  186. uint32 n_active = aln->n_active;
  187. // build a stencil of the active reads, stencil[i] = (begin_chains[i] != end_chains[i])
  188. transform<system_tag>(
  189. n_active,
  190. aln->begin_chains.begin(),
  191. aln->end_chains.begin(),
  192. aln->stencil.begin(),
  193. nvbio::not_equal_functor<uint32>() );
  194. nvbio::vector<system_tag,uint8> temp_storage;
  195. // filter away reads that are done processing because there's no more chains
  196. copy_flagged(
  197. n_active,
  198. aln->begin_chains.begin(),
  199. aln->stencil.begin(),
  200. aln->temp_queue.begin(),
  201. temp_storage );
  202. aln->begin_chains.swap( aln->temp_queue );
  203. n_active = copy_flagged(
  204. n_active,
  205. aln->end_chains.begin(),
  206. aln->stencil.begin(),
  207. aln->temp_queue.begin(),
  208. temp_storage );
  209. aln->end_chains.swap( aln->temp_queue );
  210. // reset the number of active reads
  211. aln->n_active = n_active;
  212. // check whether there's no more work to do
  213. if (n_active == 0)
  214. return 0u;
  215. // now build a view of the chains
  216. const chains_view chains( *chn );
  217. typedef typename alignment_state<system_tag>::sink_type sink_type;
  218. const nvbio::vector<system_tag,uint32>& cur_chains = aln->begin_chains;
  219. nvbio::vector<system_tag,uint2>& query_spans = aln->query_spans;
  220. nvbio::vector<system_tag,uint2>& ref_spans = aln->ref_spans;
  221. nvbio::vector<system_tag,uint8>& stencil = aln->stencil;
  222. nvbio::vector<system_tag,uint32>& list = aln->temp_queue;
  223. nvbio::vector<system_tag,sink_type>& sinks = aln->sinks;
  224. // compute the chain query-spans
  225. for_each<system_tag>(
  226. n_active,
  227. thrust::make_counting_iterator<uint32>(0u),
  228. span_functor(
  229. command_line_options,
  230. reads_access,
  231. reference_access,
  232. chains,
  233. raw_pointer( cur_chains ),
  234. raw_pointer( query_spans ),
  235. raw_pointer( ref_spans ),
  236. raw_pointer( stencil ) ) );
  237. // copy the list of indices to the short alignment problems
  238. const uint32 n_alns = copy_flagged(
  239. n_active,
  240. thrust::make_counting_iterator<uint32>(0u),
  241. stencil.begin(),
  242. list.begin(),
  243. temp_storage );
  244. if (n_alns)
  245. {
  246. //
  247. // perform a Gotoh batched alignment between two string-sets:
  248. // the string-sets here are sparse subsets of the symbol streams holding
  249. // the reads and the reference data
  250. //
  251. typedef read_access_type::sequence_stream_type read_stream_type;
  252. typedef reference_access_type::sequence_stream_type reference_stream_type;
  253. typedef thrust::permutation_iterator<const uint2*, const uint32*> infix_iterator;
  254. const infix_iterator reads_infixes = thrust::make_permutation_iterator( raw_pointer( query_spans ), raw_pointer( list ) );
  255. const infix_iterator reference_infixes = thrust::make_permutation_iterator( raw_pointer( ref_spans ), raw_pointer( list ) );
  256. // build the sparse subset of the reads sequence
  257. const SparseStringSet<read_stream_type,infix_iterator> read_infix_set(
  258. n_alns,
  259. reads_access.sequence_stream(),
  260. reads_infixes );
  261. // build the sparse subset of the reference sequence
  262. const SparseStringSet<reference_stream_type,infix_iterator> reference_infix_set(
  263. n_alns,
  264. reference_access.sequence_stream(),
  265. reference_infixes );
  266. // compute the largest reference span
  267. const uint32 max_rspan = nvbio::reduce(
  268. n_alns,
  269. thrust::make_transform_iterator( reference_infixes, span_size() ),
  270. thrust::maximum<uint32>(),
  271. temp_storage );
  272. const aln::SimpleGotohScheme gotoh( 2, -2, -5, -3 ); // TODO: assign the correct scores here
  273. // invoke the parallel alignment
  274. aln::batch_alignment_score(
  275. aln::make_gotoh_aligner<aln::LOCAL>( gotoh ),
  276. read_infix_set,
  277. reference_infix_set,
  278. sinks.begin(),
  279. aln::DeviceThreadScheduler(),
  280. reads_access.max_sequence_len(),
  281. max_rspan );
  282. // TODO:
  283. // - check which alignments were successful
  284. // - perform a reverse alignment to find the source cell of each alignment
  285. }
  286. // add one to the processed chains
  287. nvbio::transform<system_tag>(
  288. n_active,
  289. aln->begin_chains.begin(),
  290. thrust::make_constant_iterator<uint32>( 1u ),
  291. aln->begin_chains.begin(),
  292. nvbio::add_functor() );
  293. return n_active;
  294. }
  295. // perform banded alignment
  296. //
  297. uint32 align(
  298. struct pipeline_state *pipeline,
  299. const nvbio::io::SequenceDataHost *reads_host,
  300. const nvbio::io::SequenceDataDevice *reads_device)
  301. {
  302. if (pipeline->system == DEVICE && // if currently on the device,
  303. pipeline->aln.n_active < 16*1024) // but too little parallelism...
  304. {
  305. // copy the state of the pipeline to the host
  306. pipeline->system = HOST;
  307. pipeline->h_chn = pipeline->chn;
  308. pipeline->h_aln = pipeline->aln;
  309. }
  310. if (pipeline->system == HOST)
  311. {
  312. return align_short<host_tag>(
  313. &pipeline->h_chn,
  314. &pipeline->h_aln,
  315. (const io::SequenceData*)pipeline->mem.reference_data_host,
  316. (const io::SequenceData*)reads_host );
  317. }
  318. else
  319. {
  320. return align_short<device_tag>(
  321. &pipeline->chn,
  322. &pipeline->aln,
  323. (const io::SequenceData*)pipeline->mem.reference_data_device,
  324. (const io::SequenceData*)reads_device );
  325. }
  326. }