alignment_bam.cpp 5.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155
  1. /*
  2. * nvbio
  3. * Copyright (c) 2011-2014, NVIDIA CORPORATION. All rights reserved.
  4. *
  5. * Redistribution and use in source and binary forms, with or without
  6. * modification, are permitted provided that the following conditions are met:
  7. * * Redistributions of source code must retain the above copyright
  8. * notice, this list of conditions and the following disclaimer.
  9. * * Redistributions in binary form must reproduce the above copyright
  10. * notice, this list of conditions and the following disclaimer in the
  11. * documentation and/or other materials provided with the distribution.
  12. * * Neither the name of the NVIDIA CORPORATION nor the
  13. * names of its contributors may be used to endorse or promote products
  14. * derived from this software without specific prior written permission.
  15. *
  16. * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
  17. * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
  18. * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
  19. * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
  20. * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
  21. * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
  22. * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
  23. * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  24. * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
  25. * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  26. */
  27. #include <nvbio-aln-diff/alignment.h>
  28. #include <contrib/bamtools/BamReader.h>
  29. #include <nvbio/basic/console.h>
  30. #include <crc/crc.h>
  31. namespace nvbio {
  32. namespace alndiff {
  33. struct BAMAlignmentStream : public AlignmentStream
  34. {
  35. BAMAlignmentStream(const char* file_name)
  36. {
  37. log_verbose(stderr, "opening BAM file \"%s\"... started\n", file_name);
  38. m_bam_reader.Open( file_name );
  39. m_offset = 0;
  40. log_verbose(stderr, "opening BAM file \"%s\"... done\n", file_name);
  41. }
  42. // return if the stream is ok
  43. //
  44. bool is_ok() { return true; } // TODO: add a mechanism to bamtools to know whether the file opened correctly
  45. // get the next batch
  46. //
  47. uint32 next_batch(
  48. const uint32 count,
  49. Alignment* batch)
  50. {
  51. uint32 n_read = 0;
  52. while (n_read < count)
  53. {
  54. Alignment* aln = batch + n_read;
  55. // clean the alignment
  56. *aln = Alignment();
  57. BamTools::BamAlignment bam_aln;
  58. if (m_bam_reader.GetNextAlignment( bam_aln ) == false)
  59. break;
  60. aln->read_id = uint32( crcCalc( bam_aln.Name.c_str(), uint32(bam_aln.Name.length()) ) );
  61. aln->read_len = bam_aln.Length;
  62. aln->mate = bam_aln.IsFirstMate() ? 0u : 1u;
  63. aln->flag = bam_aln.AlignmentFlag;
  64. aln->pos = bam_aln.Position;
  65. if (aln->is_mapped())
  66. {
  67. aln->ref_id = bam_aln.RefID;
  68. aln->mapQ = uint8( bam_aln.MapQuality );
  69. bam_aln.GetEditDistance( aln->ed );
  70. analyze_cigar( bam_aln.CigarData, aln );
  71. bam_aln.GetTag( "AS", aln->score );
  72. aln->has_second = bam_aln.GetTag( "XS", aln->sec_score );
  73. bam_aln.GetTag( "XM", aln->n_mm );
  74. bam_aln.GetTag( "XO", aln->n_gapo );
  75. bam_aln.GetTag( "XG", aln->n_gape );
  76. const char* md = bam_aln.GetTag( "MD" );
  77. if (md)
  78. analyze_md( md, aln );
  79. }
  80. ++n_read;
  81. }
  82. m_offset += n_read;
  83. return n_read;
  84. }
  85. void analyze_cigar(const std::vector<BamTools::CigarOp>& cigar, Alignment* aln)
  86. {
  87. aln->subs = aln->ins = aln->dels = 0;
  88. for (uint32 i = 0; i < cigar.size(); ++i)
  89. {
  90. const BamTools::CigarOp op = cigar[i];
  91. if (op.Type == 'X')
  92. ++aln->n_mm;
  93. if (op.Type == 'M' || op.Type == 'X' || op.Type == '=')
  94. aln->subs += op.Length;
  95. else if (op.Type == 'I')
  96. aln->ins += op.Length;
  97. else if (op.Type == 'D')
  98. aln->dels += op.Length;
  99. }
  100. }
  101. void analyze_md(const char* md, Alignment* aln)
  102. {
  103. aln->n_mm = 0;
  104. for (; *md != '\0'; ++md)
  105. {
  106. const char c = *md;
  107. if (c >= '0' &&
  108. c <= '9')
  109. continue;
  110. if (c >= 'A' &&
  111. c <= 'Z')
  112. ++aln->n_mm;
  113. if (c == '^')
  114. {
  115. // a deletion, skip it
  116. for (++md; *md != '\0' && (*md <= '0' || *md >= '9'); ++md) {}
  117. }
  118. }
  119. }
  120. BamTools::BamReader m_bam_reader;
  121. uint32 m_offset;
  122. };
  123. AlignmentStream* open_bam_file(const char* file_name)
  124. {
  125. return new BAMAlignmentStream( file_name );
  126. }
  127. } // alndiff namespace
  128. } // nvbio namespace