mapping.h 7.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180
  1. /*
  2. * nvbio
  3. * Copyright (c) 2011-2014, NVIDIA CORPORATION. All rights reserved.
  4. *
  5. * Redistribution and use in source and binary forms, with or without
  6. * modification, are permitted provided that the following conditions are met:
  7. * * Redistributions of source code must retain the above copyright
  8. * notice, this list of conditions and the following disclaimer.
  9. * * Redistributions in binary form must reproduce the above copyright
  10. * notice, this list of conditions and the following disclaimer in the
  11. * documentation and/or other materials provided with the distribution.
  12. * * Neither the name of the NVIDIA CORPORATION nor the
  13. * names of its contributors may be used to endorse or promote products
  14. * derived from this software without specific prior written permission.
  15. *
  16. * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
  17. * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
  18. * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
  19. * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
  20. * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
  21. * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
  22. * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
  23. * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  24. * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
  25. * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  26. */
  27. ///
  28. ///\file mapping.h
  29. ///
  30. #pragma once
  31. #include <nvBowtie/bowtie2/cuda/defs.h>
  32. #include <nvBowtie/bowtie2/cuda/utils.h>
  33. #include <nvBowtie/bowtie2/cuda/params.h>
  34. #include <nvBowtie/bowtie2/cuda/seed_hit.h>
  35. #include <nvBowtie/bowtie2/cuda/seed_hit_deque_array.h>
  36. #include <nvBowtie/bowtie2/cuda/reads_def.h>
  37. #include <nvBowtie/bowtie2/cuda/fmindex_def.h>
  38. #include <nvbio/io/utils.h>
  39. #include <nvbio/basic/cuda/pingpong_queues.h>
  40. #include <nvbio/basic/cached_iterator.h>
  41. #include <nvbio/basic/packedstream.h>
  42. #include <nvbio/basic/priority_deque.h>
  43. #include <nvbio/basic/strided_iterator.h>
  44. #include <nvbio/basic/algorithms.h>
  45. namespace nvbio {
  46. namespace bowtie2 {
  47. namespace cuda {
  48. ///@addtogroup nvBowtie
  49. ///@{
  50. /// \defgroup Mapping
  51. ///
  52. /// The functions in this module implement the very first pipeline stage: seed mapping.
  53. /// In this stage each read is broken up into many short, possibly overlapping seeds
  54. /// which get mapped against the reference genome using an FM-index.
  55. /// The output is a vector of variable-lenth "priority deques", one for each read,
  56. /// containing the set of Suffix Array ranges (\ref SeedHits) where the seeds align,
  57. /// prioritized by the inverse of the range size (see \ref SeedHitDequeArray).
  58. ///
  59. /// The module implements many mapping algorithms:
  60. ///
  61. /// - exact: allowing exact matches only
  62. /// - approx-hybrid: allowing 0 mismatches in a subseed of the seed, and up to 1 mismatch in the rest
  63. /// - approx-case-pruning: allowing 1 mismatch across the entire seed, using 2 FM-indices to perform
  64. /// the search with case pruning (i.e. searching an alignment with 0 mismatches in the first half of
  65. /// the seed in the forward FM-index, and an alignment with 0 mismatches in the second half in the
  66. /// reverse FM-index).
  67. ///
  68. ///@addtogroup Mapping
  69. ///@{
  70. ///
  71. /// For all the seed hit ranges, output the range size in out_ranges.
  72. ///
  73. void gather_ranges(
  74. const uint32 count,
  75. const uint32 n_reads,
  76. SeedHitDequeArrayDeviceView hits,
  77. const uint32* hit_counts_scan,
  78. uint64* out_ranges);
  79. ///
  80. /// perform exact read mapping
  81. ///
  82. void map_whole_read(
  83. const ReadsDef::type& read_batch,
  84. const FMIndexDef::type fmi,
  85. const FMIndexDef::type rfmi,
  86. const nvbio::cuda::PingPongQueuesView<uint32> queues,
  87. uint8* reseed,
  88. SeedHitDequeArrayDeviceView hits,
  89. const ParamsPOD params,
  90. const bool fw,
  91. const bool rc);
  92. ///
  93. /// perform one run of exact seed mapping for all the reads in the input queue,
  94. /// writing reads that need another run in the output queue
  95. ///
  96. void map_exact(
  97. const ReadsDef::type& read_batch,
  98. const FMIndexDef::type fmi,
  99. const FMIndexDef::type rfmi,
  100. const uint32 retry,
  101. const nvbio::cuda::PingPongQueuesView<uint32> queues,
  102. uint8* reseed,
  103. SeedHitDequeArrayDeviceView hits,
  104. const ParamsPOD params,
  105. const bool fw,
  106. const bool rc);
  107. ///
  108. /// perform multiple runs of exact seed mapping in one go and keep the best
  109. ///
  110. void map_exact(
  111. const ReadsDef::type& read_batch,
  112. const FMIndexDef::type fmi,
  113. const FMIndexDef::type rfmi,
  114. SeedHitDequeArrayDeviceView hits,
  115. const uint2 seed_range,
  116. const ParamsPOD params,
  117. const bool fw,
  118. const bool rc);
  119. ///
  120. /// perform one run of approximate seed mapping for all the reads in the input queue,
  121. /// writing reads that need another run in the output queue
  122. ///
  123. void map_approx(
  124. const ReadsDef::type& read_batch,
  125. const FMIndexDef::type fmi,
  126. const FMIndexDef::type rfmi,
  127. const uint32 retry,
  128. const nvbio::cuda::PingPongQueuesView<uint32> queues,
  129. uint8* reseed,
  130. SeedHitDequeArrayDeviceView hits,
  131. const ParamsPOD params,
  132. const bool fw,
  133. const bool rc);
  134. ///
  135. /// perform multiple runs of approximate seed mapping in one go and keep the best
  136. ///
  137. void map_approx(
  138. const ReadsDef::type& read_batch,
  139. const FMIndexDef::type fmi,
  140. const FMIndexDef::type rfmi,
  141. SeedHitDequeArrayDeviceView hits,
  142. const uint2 seed_range,
  143. const ParamsPOD params,
  144. const bool fw,
  145. const bool rc);
  146. ///
  147. /// perform one run of seed mapping
  148. ///
  149. void map(
  150. const ReadsDef::type& read_batch,
  151. const FMIndexDef::type fmi,
  152. const FMIndexDef::type rfmi,
  153. const uint32 retry,
  154. const nvbio::cuda::PingPongQueuesView<uint32> queues,
  155. uint8* reseed,
  156. SeedHitDequeArrayDeviceView hits,
  157. const ParamsPOD params,
  158. const bool fw,
  159. const bool rc);
  160. ///@} // group Mapping
  161. ///@} // group nvBowtie
  162. } // namespace cuda
  163. } // namespace bowtie2
  164. } // namespace nvbio