alignment.h 5.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162
  1. /*
  2. * nvbio
  3. * Copyright (c) 2011-2014, NVIDIA CORPORATION. All rights reserved.
  4. *
  5. * Redistribution and use in source and binary forms, with or without
  6. * modification, are permitted provided that the following conditions are met:
  7. * * Redistributions of source code must retain the above copyright
  8. * notice, this list of conditions and the following disclaimer.
  9. * * Redistributions in binary form must reproduce the above copyright
  10. * notice, this list of conditions and the following disclaimer in the
  11. * documentation and/or other materials provided with the distribution.
  12. * * Neither the name of the NVIDIA CORPORATION nor the
  13. * names of its contributors may be used to endorse or promote products
  14. * derived from this software without specific prior written permission.
  15. *
  16. * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
  17. * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
  18. * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
  19. * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
  20. * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
  21. * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
  22. * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
  23. * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  24. * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
  25. * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  26. */
  27. #pragma once
  28. #include <nvbio/basic/types.h>
  29. #include <nvbio/basic/numbers.h>
  30. namespace nvbio {
  31. namespace alndiff {
  32. struct Alignment
  33. {
  34. enum { PAIRED = 1,
  35. PROPER_PAIR = 2,
  36. UNMAPPED = 4,
  37. MATE_UNMAPPED = 8,
  38. REVERSE = 16,
  39. MATE_REVERSE = 32,
  40. READ_1 = 64,
  41. READ_2 = 128,
  42. SECONDARY = 256,
  43. QC_FAILED = 512,
  44. DUPLICATE = 1024
  45. };
  46. Alignment() :
  47. read_id( uint32(-1) ),
  48. read_len( 0 ),
  49. mate( 0 ),
  50. pos( 0 ),
  51. ref_id( uint32(-1) ),
  52. flag( UNMAPPED ),
  53. score( -65536 ),
  54. mapQ( 0 ),
  55. ed( 255 ),
  56. subs( 0 ),
  57. ins( 0 ),
  58. dels( 0 ),
  59. n_mm( 0 ),
  60. n_gapo( 0 ),
  61. n_gape( 0 ),
  62. has_second( 0 ),
  63. sec_score( -65536 ) {}
  64. bool is_mapped() const { return (pos != 0) && ((flag & UNMAPPED) == 0); }
  65. bool is_rc() const { return (flag & REVERSE) != 0; }
  66. bool is_unique() const { return is_mapped() && (has_second == false); }
  67. bool is_ambiguous() const { return is_mapped() && has_second && (sec_score == score); }
  68. uint16 mapped_read_bases() const { return subs + ins; }
  69. uint16 mapped_ref_bases() const { return subs + dels; }
  70. uint16 trimmed() const { return read_len - subs - ins; }
  71. uint32 read_id;
  72. uint32 read_len;
  73. uint32 mate;
  74. uint32 pos;
  75. uint32 ref_id;
  76. uint32 flag;
  77. int32 score;
  78. uint8 mapQ;
  79. uint8 ed;
  80. uint16 subs;
  81. uint16 ins;
  82. uint16 dels;
  83. uint8 n_mm;
  84. uint8 n_gapo;
  85. uint8 n_gape;
  86. uint8 has_second;
  87. int32 sec_score;
  88. };
  89. inline
  90. bool distant(const Alignment& a1, const Alignment& a2)
  91. {
  92. return (int64(a1.pos) < int64(a2.pos) - a1.read_len) ||
  93. (int64(a1.pos) > int64(a2.pos) + a1.read_len);
  94. }
  95. struct AlignmentPair
  96. {
  97. AlignmentPair() {}
  98. AlignmentPair(const Alignment m1, const Alignment m2) : mate1(m1), mate2(m2) {}
  99. uint32 read_id() const { return mate1.read_id; }
  100. uint32 read_len() const { return mate1.read_len + mate2.read_len; }
  101. int32 score() const { return mate1.score + mate2.score; }
  102. uint8 mapQ() const { return mate1.mapQ; }
  103. uint8 ed() const { return mate1.ed + mate2.ed; }
  104. uint16 subs() const { return mate1.subs + mate2.subs; }
  105. uint16 ins() const { return mate1.ins + mate2.ins; }
  106. uint16 dels() const { return mate1.dels + mate2.dels; }
  107. uint8 n_mm() const { return mate1.n_mm + mate2.n_mm; }
  108. uint8 n_gapo() const { return mate1.n_gapo + mate2.n_gapo; }
  109. uint8 n_gape() const { return mate1.n_gape + mate2.n_gape; }
  110. bool has_second() const { return mate1.has_second && mate2.has_second; }
  111. int32 sec_score() const { return mate1.sec_score + mate2.sec_score; }
  112. bool is_mapped() const { return mate1.is_mapped() && mate2.is_mapped(); }
  113. bool is_mapped_paired() const { return mate1.is_mapped() && mate2.is_mapped() && (mate1.flag & Alignment::PROPER_PAIR) && (mate2.flag & Alignment::PROPER_PAIR); }
  114. bool is_mapped_unpaired() const { return mate1.is_mapped() && mate2.is_mapped() && (((mate1.flag & Alignment::PROPER_PAIR) == 0) || ((mate2.flag & Alignment::PROPER_PAIR) == 0)); }
  115. bool is_unique_paired() const { return is_mapped_paired() && mate1.is_unique() && mate2.is_unique(); }
  116. bool is_ambiguous_paired()const { return is_mapped_paired() && has_second() && (score() == sec_score()); }
  117. uint16 mapped_read_bases() const { return subs() + ins(); }
  118. uint16 mapped_ref_bases() const { return subs() + dels(); }
  119. uint16 trimmed() const { return read_len() - subs() - ins(); }
  120. const Alignment& operator[] (const uint32 i) const { return *(&mate1 + i); }
  121. Alignment mate1;
  122. Alignment mate2;
  123. };
  124. struct AlignmentStream
  125. {
  126. /// virtual destructor
  127. ///
  128. virtual ~AlignmentStream() {}
  129. /// return if the stream is ok
  130. ///
  131. virtual bool is_ok() { return false; }
  132. /// get the next batch
  133. ///
  134. virtual uint32 next_batch(
  135. const uint32 count,
  136. Alignment* batch) { return 0; }
  137. };
  138. /// open an alignment file
  139. ///
  140. AlignmentStream* open_alignment_file(const char* file_name);
  141. } // alndiff namespace
  142. } // nvbio namespace