aligner.h 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388
  1. /*
  2. * nvbio
  3. * Copyright (c) 2011-2014, NVIDIA CORPORATION. All rights reserved.
  4. *
  5. * Redistribution and use in source and binary forms, with or without
  6. * modification, are permitted provided that the following conditions are met:
  7. * * Redistributions of source code must retain the above copyright
  8. * notice, this list of conditions and the following disclaimer.
  9. * * Redistributions in binary form must reproduce the above copyright
  10. * notice, this list of conditions and the following disclaimer in the
  11. * documentation and/or other materials provided with the distribution.
  12. * * Neither the name of the NVIDIA CORPORATION nor the
  13. * names of its contributors may be used to endorse or promote products
  14. * derived from this software without specific prior written permission.
  15. *
  16. * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
  17. * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
  18. * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
  19. * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
  20. * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
  21. * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
  22. * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
  23. * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  24. * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
  25. * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  26. */
  27. #pragma once
  28. #include <nvBowtie/bowtie2/cuda/defs.h>
  29. #include <nvBowtie/bowtie2/cuda/reads_def.h>
  30. #include <nvBowtie/bowtie2/cuda/fmindex_def.h>
  31. #include <nvBowtie/bowtie2/cuda/seed_hit.h>
  32. #include <nvBowtie/bowtie2/cuda/seed_hit_deque_array.h>
  33. #include <nvBowtie/bowtie2/cuda/scoring_queues.h>
  34. #include <nvBowtie/bowtie2/cuda/params.h>
  35. #include <nvBowtie/bowtie2/cuda/stats.h>
  36. #include <nvBowtie/bowtie2/cuda/mapping.h>
  37. #include <nvbio/io/alignments.h>
  38. #include <nvbio/io/output/output_file.h>
  39. #include <nvbio/io/output/output_batch.h>
  40. #include <nvbio/io/sequence/sequence.h>
  41. #include <nvBowtie/bowtie2/cuda/scoring.h>
  42. #include <nvBowtie/bowtie2/cuda/mapq.h>
  43. #include <nvbio/basic/cuda/arch.h>
  44. #include <nvbio/basic/cuda/sort.h>
  45. #include <nvbio/basic/cuda/host_device_buffer.h>
  46. #include <nvbio/basic/cuda/work_queue.h>
  47. #include <nvbio/basic/timer.h>
  48. #include <nvbio/basic/console.h>
  49. #include <nvbio/basic/options.h>
  50. #include <nvbio/basic/threads.h>
  51. #include <nvbio/basic/html.h>
  52. #include <nvbio/basic/dna.h>
  53. #include <nvbio/basic/vector_array.h>
  54. #include <nvbio/fmindex/bwt.h>
  55. #include <nvbio/fmindex/ssa.h>
  56. #include <nvbio/fmindex/fmindex.h>
  57. #include <nvbio/fmindex/fmindex_device.h>
  58. #include <thrust/host_vector.h>
  59. #include <thrust/device_vector.h>
  60. #include <thrust/scan.h>
  61. #include <thrust/sort.h>
  62. #include <stdio.h>
  63. #include <stdlib.h>
  64. #include <string.h>
  65. #include <vector>
  66. #include <algorithm>
  67. #include <numeric>
  68. #include <functional>
  69. namespace nvbio {
  70. namespace bowtie2 {
  71. namespace cuda {
  72. struct Aligner
  73. {
  74. static const uint32 MAX_READ_LEN = MAXIMUM_READ_LENGTH;
  75. typedef FMIndexDef::type fmi_type;
  76. typedef FMIndexDef::type rfmi_type;
  77. typedef ReadsDef::read_storage_type read_storage_type;
  78. typedef ReadsDef::read_base_type read_base_type;
  79. typedef ReadsDef::read_qual_type read_qual_type;
  80. typedef ReadsDef::read_view_type read_view_type;
  81. typedef ReadsDef::type read_batch_type;
  82. typedef io::LdgSequenceDataView genome_view_type;
  83. typedef io::SequenceDataAccess<DNA,genome_view_type> genome_access_type;
  84. typedef genome_access_type::sequence_stream_type genome_iterator;
  85. uint32 ID;
  86. uint32 BATCH_SIZE;
  87. thrust::device_vector<uint8> dp_buffer_dvec;
  88. uint8* dp_buffer_dptr;
  89. SeedHitDequeArray hit_deques;
  90. nvbio::cuda::PingPongQueues<uint32> seed_queues;
  91. ScoringQueues scoring_queues;
  92. thrust::device_vector<uint32> idx_queue_dvec;
  93. uint32* idx_queue_dptr;
  94. thrust::device_vector<uint16> sorting_queue_dvec;
  95. thrust::device_vector<uint8> reseed_dvec;
  96. uint8* reseed_dptr;
  97. thrust::device_vector<uint32> trys_dvec;
  98. uint32* trys_dptr;
  99. thrust::device_vector<uint32> rseeds_dvec;
  100. uint32* rseeds_dptr;
  101. thrust::device_vector<uint8> flags_dvec;
  102. uint8* flags_dptr;
  103. nvbio::vector<device_tag,uint8> temp_dvec;
  104. thrust::device_vector<io::Alignment> best_data_dvec;
  105. thrust::device_vector<io::Alignment> best_data_dvec_o;
  106. io::Alignment* best_data_dptr;
  107. io::Alignment* best_data_dptr_o;
  108. thrust::device_vector<uint8> mapq_dvec;
  109. uint8* mapq_dptr;
  110. // --- paired-end vectors --------------------------------- //
  111. thrust::device_vector<uint32> opposite_queue_dvec;
  112. uint32* opposite_queue_dptr;
  113. // --- all-mapping vectors -------------------------------- //
  114. thrust::device_vector<io::Alignment> buffer_alignments_dvec;
  115. io::Alignment* buffer_alignments_dptr;
  116. thrust::device_vector<uint32> buffer_read_info_dvec;
  117. uint32* buffer_read_info_dptr;
  118. thrust::device_vector<io::Alignment> output_alignments_dvec;
  119. io::Alignment* output_alignments_dptr;
  120. thrust::device_vector<uint32> output_read_info_dvec;
  121. uint32* output_read_info_dptr;
  122. thrust::device_vector<uint32> hits_count_scan_dvec;
  123. uint32* hits_count_scan_dptr;
  124. thrust::device_vector<uint64> hits_range_scan_dvec;
  125. uint64* hits_range_scan_dptr;
  126. // -------------------------------------------------------- //
  127. nvbio::DeviceVectorArray<uint8> mds;
  128. nvbio::DeviceVectorArray<io::Cigar> cigar;
  129. thrust::device_vector<uint2> cigar_coords_dvec;
  130. uint2* cigar_coords_dptr;
  131. thrust::device_vector<uint64> hits_stats_dvec;
  132. thrust::host_vector<uint64> hits_stats_hvec;
  133. uint64* hits_stats_dptr;
  134. uint32 batch_number;
  135. nvbio::cuda::SortEnactor sort_enactor;
  136. // --------------------------------------------------------------------------------------------- //
  137. // file object that we're writing into
  138. io::OutputFile *output_file;
  139. static uint32 band_length(const uint32 max_dist)
  140. {
  141. //return max_dist*2+1;
  142. // compute band length
  143. uint32 band_len = 4;
  144. while (band_len-1 < max_dist*2+1)
  145. band_len *= 2;
  146. band_len -= 1;
  147. return band_len;
  148. }
  149. Aligner() : output_file(NULL) {}
  150. bool init(const uint32 id, const uint32 batch_size, const Params& params, const EndType type);
  151. void keep_stats(const uint32 count, Stats& stats);
  152. template <typename scoring_tag>
  153. void best_approx(
  154. const Params& params,
  155. const fmi_type fmi,
  156. const rfmi_type rfmi,
  157. const UberScoringScheme& scoring_scheme,
  158. const io::SequenceDataDevice& reference_data,
  159. const io::FMIndexDataDevice& driver_data,
  160. const io::SequenceDataDevice& read_data,
  161. io::HostOutputBatchSE& cpu_batch,
  162. Stats& stats);
  163. template <
  164. typename scoring_tag,
  165. typename scoring_scheme_type>
  166. void best_approx_score(
  167. const Params& params,
  168. const fmi_type fmi,
  169. const rfmi_type rfmi,
  170. const scoring_scheme_type& scoring_scheme,
  171. const io::SequenceDataDevice& reference_data,
  172. const io::FMIndexDataDevice& driver_data,
  173. const io::SequenceDataDevice& read_data,
  174. const uint32 seeding_pass,
  175. const uint32 seed_queue_size,
  176. const uint32* seed_queue,
  177. Stats& stats);
  178. template <typename scoring_tag>
  179. void best_approx(
  180. const Params& params,
  181. const FMIndexDef::type fmi,
  182. const FMIndexDef::type rfmi,
  183. const UberScoringScheme& scoring_scheme,
  184. const io::SequenceDataDevice& reference_data,
  185. const io::FMIndexDataDevice& driver_data,
  186. const io::SequenceDataDevice& read_data1,
  187. const io::SequenceDataDevice& read_data2,
  188. io::HostOutputBatchPE& cpu_batch,
  189. Stats& stats);
  190. template <
  191. typename scoring_tag,
  192. typename scoring_scheme_type>
  193. void best_approx_score(
  194. const Params& params,
  195. const fmi_type fmi,
  196. const rfmi_type rfmi,
  197. const scoring_scheme_type& scoring_scheme,
  198. const io::SequenceDataDevice& reference_data,
  199. const io::FMIndexDataDevice& driver_data,
  200. const uint32 anchor,
  201. const io::SequenceDataDevice& read_data1,
  202. const io::SequenceDataDevice& read_data2,
  203. const uint32 seeding_pass,
  204. const uint32 seed_queue_size,
  205. const uint32* seed_queue,
  206. Stats& stats);
  207. template <typename scoring_tag>
  208. void all(
  209. const Params& params,
  210. const fmi_type fmi,
  211. const rfmi_type rfmi,
  212. const UberScoringScheme& scoring_scheme,
  213. const io::SequenceDataDevice& reference_data,
  214. const io::FMIndexDataDevice& driver_data,
  215. const io::SequenceDataDevice& read_data,
  216. io::HostOutputBatchSE& cpu_batch,
  217. Stats& stats);
  218. template <typename scoring_scheme_type>
  219. void score_all(
  220. const Params& params,
  221. const fmi_type fmi,
  222. const rfmi_type rfmi,
  223. const UberScoringScheme& input_scoring_scheme,
  224. const scoring_scheme_type& scoring_scheme,
  225. const io::SequenceDataDevice& reference_data,
  226. const io::FMIndexDataDevice& driver_data,
  227. const io::SequenceDataDevice& read_data,
  228. io::HostOutputBatchSE& cpu_batch,
  229. const uint32 seed_queue_size,
  230. const uint32* seed_queue,
  231. Stats& stats,
  232. uint64& total_alignments);
  233. #if defined(__CUDACC__)
  234. // return a pointer to an "index" into the given sorted keys
  235. //
  236. template <typename iterator_type>
  237. std::pair<uint32*,uint64*> sort_64_bits(
  238. const uint32 count,
  239. const iterator_type keys)
  240. {
  241. thrust::copy(
  242. keys,
  243. keys + count,
  244. thrust::device_ptr<uint64>( (uint64*)raw_pointer( sorting_queue_dvec ) ) );
  245. return sort_64_bits( count );
  246. }
  247. #endif
  248. // return a pointer to an "index" into the given sorted keys
  249. //
  250. std::pair<uint32*,uint64*> sort_64_bits(
  251. const uint32 count);
  252. // return a pointer to an "index" into the given keys sorted by their hi bits
  253. //
  254. uint32* sort_hi_bits(
  255. const uint32 count,
  256. const uint32* keys);
  257. // sort a set of keys in place
  258. //
  259. void sort_inplace(
  260. const uint32 count,
  261. uint32* keys);
  262. bool init_alloc(const uint32 BATCH_SIZE, const Params& params, const EndType type, bool do_alloc, std::pair<uint64,uint64>* mem_stats = NULL);
  263. };
  264. // Compute the total number of matches found
  265. void hits_stats(
  266. const uint32 batch_size,
  267. const SeedHit* hit_data,
  268. const uint32* hit_counts,
  269. uint64* hit_stats);
  270. void ring_buffer_to_plain_array(
  271. const uint32* buffer,
  272. const uint32 buffer_size,
  273. const uint32 begin,
  274. const uint32 end,
  275. uint32* output);
  276. #if defined(__CUDACC__)
  277. // initialize a set of alignments
  278. //
  279. template <typename ReadBatch, typename ScoreFunction>
  280. __global__
  281. void init_alignments_kernel(
  282. const ReadBatch read_batch,
  283. const ScoreFunction worst_score_fun,
  284. io::Alignment* best_data,
  285. const uint32 best_stride,
  286. const uint32 mate)
  287. {
  288. const uint32 thread_id = threadIdx.x + BLOCKDIM*blockIdx.x;
  289. if (thread_id >= read_batch.size()) return;
  290. // compute the read length
  291. const uint2 read_range = read_batch.get_range( thread_id );
  292. const uint32 read_len = read_range.y - read_range.x;
  293. const int32 worst_score = worst_score_fun( read_len );
  294. io::Alignment a1 = io::Alignment( uint32(-1), io::Alignment::max_ed(), worst_score, mate );
  295. io::Alignment a2 = io::Alignment( uint32(-1), io::Alignment::max_ed(), worst_score, mate );
  296. best_data[ thread_id ] = a1;
  297. best_data[ thread_id + best_stride ] = a2;
  298. }
  299. // initialize a set of alignments
  300. //
  301. template <typename ReadBatch, typename ScoreFunction>
  302. void init_alignments(
  303. const ReadBatch read_batch,
  304. const ScoreFunction worst_score_fun,
  305. io::Alignment* best_data,
  306. const uint32 best_stride,
  307. const uint32 mate = 0)
  308. {
  309. const int blocks = (read_batch.size() + BLOCKDIM-1) / BLOCKDIM;
  310. init_alignments_kernel<<<blocks, BLOCKDIM>>>(
  311. read_batch,
  312. worst_score_fun,
  313. best_data,
  314. best_stride,
  315. mate );
  316. }
  317. #endif // defined(__CUDACC__)
  318. // mark unaligned reads that need reseeding
  319. //
  320. void mark_unaligned(
  321. const uint32 n_active_reads,
  322. const uint32* active_reads,
  323. const io::Alignment* best_data,
  324. uint8* reseed);
  325. // mark unique unaligned read pairs as discordant
  326. //
  327. void mark_discordant(
  328. const uint32 n_reads,
  329. io::Alignment* anchor_data,
  330. io::Alignment* opposite_data,
  331. const uint32 stride);
  332. } // namespace cuda
  333. } // namespace bowtie2
  334. } // namespace nvbio