select.cu 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289
  1. /*
  2. * nvbio
  3. * Copyright (c) 2011-2014, NVIDIA CORPORATION. All rights reserved.
  4. *
  5. * Redistribution and use in source and binary forms, with or without
  6. * modification, are permitted provided that the following conditions are met:
  7. * * Redistributions of source code must retain the above copyright
  8. * notice, this list of conditions and the following disclaimer.
  9. * * Redistributions in binary form must reproduce the above copyright
  10. * notice, this list of conditions and the following disclaimer in the
  11. * documentation and/or other materials provided with the distribution.
  12. * * Neither the name of the NVIDIA CORPORATION nor the
  13. * names of its contributors may be used to endorse or promote products
  14. * derived from this software without specific prior written permission.
  15. *
  16. * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
  17. * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
  18. * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
  19. * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
  20. * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
  21. * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
  22. * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
  23. * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  24. * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
  25. * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  26. */
  27. #include <nvBowtie/bowtie2/cuda/select.h>
  28. #include <nvBowtie/bowtie2/cuda/select_impl.h>
  29. #include <nvbio/basic/algorithms.h>
  30. namespace nvbio {
  31. namespace bowtie2 {
  32. namespace cuda {
  33. __global__
  34. void select_init_kernel(
  35. const uint32 count,
  36. const char* read_names,
  37. const uint32* read_names_idx,
  38. SeedHitDequeArrayDeviceView hits,
  39. uint32* trys,
  40. uint32* rseeds,
  41. const ParamsPOD params)
  42. {
  43. //
  44. // NOTE: here we are initializing constants for ALL the reads in a batch,
  45. // including inactive ones. This works because we always access these fields
  46. // by read id throughout the entire pipeline.
  47. //
  48. const uint32 thread_id = threadIdx.x + BLOCKDIM*blockIdx.x;
  49. if (thread_id >= count) return;
  50. // initialize the number of trys
  51. if (trys)
  52. trys[ thread_id ] = params.max_effort_init;
  53. // initialize the probability trees
  54. if (params.randomized)
  55. {
  56. // initialize the generator
  57. //rseeds[ thread_id ] = 0u;
  58. {
  59. // compute a hash of the read name
  60. const uint32 off = read_names_idx[ thread_id ];
  61. const uint32 len = read_names_idx[ thread_id + 1u ] - off;
  62. const char* read_name = read_names + off;
  63. // djb2 hashing function
  64. uint32 hash = 5381;
  65. for (uint32 i = 0; i < len && read_name[i]; ++i)
  66. hash = ((hash << 5) + hash) ^ read_name[i];
  67. rseeds[ thread_id ] = hash;
  68. }
  69. typedef SumTree<float*> ProbTree;
  70. // check whether we have a non-zero number of hits, otherwise we can go home
  71. const uint32 n_hits = hits.get_size( thread_id );
  72. if (n_hits == 0u)
  73. return;
  74. // build the probability tree
  75. float* hit_probs = hits.get_probs( thread_id );
  76. const SeedHit* hit_data = hits.get_data( thread_id );
  77. for (uint32 i = 0; i < n_hits; ++i)
  78. {
  79. const SeedHit hit = hit_data[i];
  80. const uint2 range = hit.get_range();
  81. hit_probs[i] = 1.0f / (float(range.y - range.x) *
  82. float(range.y - range.x));
  83. }
  84. if (params.top_seed)
  85. hit_probs[0] = 0.0f; // assign zero probability to the top hit, as we will deterministically visit it
  86. // setup the tree
  87. ProbTree prob_tree( nvbio::max( n_hits, 1u ), hit_probs );
  88. prob_tree.setup();
  89. }
  90. }
  91. //
  92. // Initialize the hit-selection pipeline
  93. //
  94. void select_init(
  95. const uint32 count,
  96. const char* read_names,
  97. const uint32* read_names_idx,
  98. SeedHitDequeArrayDeviceView hits,
  99. uint32* trys,
  100. uint32* rseeds,
  101. const ParamsPOD params)
  102. {
  103. const int blocks = (count + BLOCKDIM-1) / BLOCKDIM;
  104. select_init_kernel<<<blocks, BLOCKDIM>>>(
  105. count,
  106. read_names,
  107. read_names_idx,
  108. hits,
  109. trys,
  110. rseeds,
  111. params );
  112. }
  113. //
  114. // Initialize the hit-selection pipeline
  115. //
  116. void select_init(BestApproxScoringPipelineState<EditDistanceScoringScheme>& pipeline, const ParamsPOD& params)
  117. {
  118. select_init_t( pipeline, params );
  119. }
  120. //
  121. // Initialize the hit-selection pipeline
  122. //
  123. void select_init(BestApproxScoringPipelineState<SmithWatermanScoringScheme<> >& pipeline, const ParamsPOD& params)
  124. {
  125. select_init_t( pipeline, params );
  126. }
  127. //
  128. // Prepare for a round of seed extension by selecting the next SA row from each
  129. // of the seed-hit deque arrays (SeedHitDequeArray) bound to the active-reads in
  130. // the scoring queues (ScoringQueues::active_reads).
  131. //
  132. void select(
  133. const SelectBestApproxContext context,
  134. const BestApproxScoringPipelineState<EditDistanceScoringScheme>& pipeline,
  135. const ParamsPOD params)
  136. {
  137. select_t( context, pipeline, params );
  138. }
  139. //
  140. // Prepare for a round of seed extension by selecting the next SA row from each
  141. // of the seed-hit deque arrays (SeedHitDequeArray) bound to the active-reads in
  142. // the scoring queues (ScoringQueues::active_reads).
  143. //
  144. void select(
  145. const SelectBestApproxContext context,
  146. const BestApproxScoringPipelineState<SmithWatermanScoringScheme<> >& pipeline,
  147. const ParamsPOD params)
  148. {
  149. select_t( context, pipeline, params );
  150. }
  151. ///
  152. /// select next hit extensions for all-mapping
  153. ///
  154. __global__
  155. void select_all_kernel(
  156. const uint64 begin,
  157. const uint32 count,
  158. const uint32 n_reads,
  159. const uint32 n_hit_ranges,
  160. const uint64 n_hits,
  161. SeedHitDequeArrayDeviceView hits,
  162. const uint32* hit_count_scan,
  163. const uint64* hit_range_scan,
  164. HitQueuesDeviceView hit_queues)
  165. {
  166. const uint32 thread_id = threadIdx.x + BLOCKDIM*blockIdx.x;
  167. if (thread_id >= count) return;
  168. // each thread is responsible to process a hit, starting from 'begin'
  169. const uint64 global_hit_id = begin + thread_id;
  170. // do a binary search, looking for thread_id in hit_range_scan,
  171. // to find the corresponding seed hit.
  172. const uint32 global_range_id = uint32( upper_bound( global_hit_id, hit_range_scan, n_hit_ranges ) - hit_range_scan );
  173. // at this point we can figure out which read this range belongs to
  174. const uint32 read_id = uint32( upper_bound( global_range_id, hit_count_scan, n_reads ) - hit_count_scan );
  175. // now we have everything we need to access the proper hit_data
  176. const SeedHit* hit_data = hits.get_data( read_id );
  177. // compute the local range index within this read
  178. const uint32 count_offset = read_id ? hit_count_scan[ read_id-1 ] : 0u;
  179. const uint32 range_id = global_range_id - count_offset;
  180. const SeedHit* hit = &hit_data[ range_id ];
  181. // compute the local hit index within this range
  182. const uint64 range_offset = global_range_id ? hit_range_scan[ global_range_id-1 ] : 0u;
  183. const uint32 hit_id = uint32( global_hit_id - range_offset );
  184. const uint32 r_type = hit->get_readtype() ? 1u : 0u;
  185. HitReference<HitQueuesDeviceView> out_hit( hit_queues, thread_id );
  186. out_hit.loc = hit->front() + hit_id;
  187. out_hit.seed = packed_seed( hit->get_posinread(), hit->get_indexdir(), r_type, 0u );
  188. out_hit.read_id = read_id;
  189. }
  190. ///
  191. /// select next hit extensions from the top seed ranges
  192. ///
  193. __global__
  194. void select_n_from_top_range_kernel(
  195. const uint32 begin,
  196. const uint32 count,
  197. const uint32 n_reads,
  198. SeedHitDequeArrayDeviceView hits,
  199. const uint32* hit_range_scan,
  200. uint32* loc_queue,
  201. packed_seed* seed_queue,
  202. packed_read* read_info)
  203. {
  204. const uint32 thread_id = threadIdx.x + BLOCKDIM*blockIdx.x;
  205. if (thread_id >= count) return;
  206. // each thread is responsible to process a hit, starting from 'begin'
  207. const uint64 global_hit_id = begin + thread_id;
  208. // do a binary search, looking for thread_id in hit_range_scan,
  209. // to find the corresponding read id.
  210. const uint32 read_id = uint32( upper_bound( global_hit_id, hit_range_scan, n_reads ) - hit_range_scan );
  211. // now we have everything we need to access the proper hit_data
  212. const SeedHit* hit_data = hits.get_data( read_id );
  213. // fetch the top range
  214. const SeedHit* hit = &hit_data[0];
  215. // compute the local hit index within this range
  216. const uint64 range_offset = global_hit_id ? hit_range_scan[ read_id-1 ] : 0u;
  217. const uint32 hit_id = uint32( global_hit_id - range_offset );
  218. const uint32 r_type = hit->get_readtype() ? 1u : 0u;
  219. loc_queue[ thread_id ] = hit->front() + hit_id;
  220. seed_queue[ thread_id ] = packed_seed( hit->get_posinread(), hit->get_indexdir(), r_type, 0u );
  221. read_info[ thread_id ] = packed_read( read_id );
  222. }
  223. void select_all(
  224. const uint64 begin,
  225. const uint32 count,
  226. const uint32 n_reads,
  227. const uint32 n_hit_ranges,
  228. const uint64 n_hits,
  229. const SeedHitDequeArrayDeviceView hits,
  230. const uint32* hit_count_scan,
  231. const uint64* hit_range_scan,
  232. HitQueuesDeviceView hit_queues)
  233. {
  234. const int blocks = (count + BLOCKDIM-1) / BLOCKDIM;
  235. select_all_kernel<<<blocks, BLOCKDIM>>>(
  236. begin,
  237. count,
  238. n_reads,
  239. n_hit_ranges,
  240. n_hits,
  241. hits,
  242. hit_count_scan,
  243. hit_range_scan,
  244. hit_queues );
  245. }
  246. } // namespace cuda
  247. } // namespace bowtie2
  248. } // namespace nvbio