stats.h 5.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150
  1. /*
  2. * nvbio
  3. * Copyright (c) 2011-2014, NVIDIA CORPORATION. All rights reserved.
  4. *
  5. * Redistribution and use in source and binary forms, with or without
  6. * modification, are permitted provided that the following conditions are met:
  7. * * Redistributions of source code must retain the above copyright
  8. * notice, this list of conditions and the following disclaimer.
  9. * * Redistributions in binary form must reproduce the above copyright
  10. * notice, this list of conditions and the following disclaimer in the
  11. * documentation and/or other materials provided with the distribution.
  12. * * Neither the name of the NVIDIA CORPORATION nor the
  13. * names of its contributors may be used to endorse or promote products
  14. * derived from this software without specific prior written permission.
  15. *
  16. * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
  17. * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
  18. * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
  19. * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
  20. * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
  21. * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
  22. * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
  23. * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  24. * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
  25. * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  26. */
  27. #pragma once
  28. #include <nvbio-aln-diff/utils.h>
  29. #include <nvbio-aln-diff/alignment.h>
  30. namespace nvbio {
  31. namespace alndiff {
  32. enum Type { LOWER = 0, HIGHER = 1 };
  33. enum Bins { LINEAR = 0, LOG = 1 };
  34. struct StatsPartition
  35. {
  36. Histogram<256> hist;
  37. Histogram2d<12,10> hist_by_length;
  38. Histogram2d<12,10> hist_by_mapQ;
  39. Histogram<256> diff_hist;
  40. Histogram2d<12,10> diff_hist_by_length;
  41. Histogram2d<32,10> diff_hist_by_value_neg;
  42. Histogram2d<32,10> diff_hist_by_value_pos;
  43. Histogram2d<7,12> diff_hist_by_mapQ1;
  44. Histogram2d<7,12> diff_hist_by_mapQ2;
  45. };
  46. template <Type TYPE_T, Bins BINS_T>
  47. struct Stats
  48. {
  49. static const Type TYPE = TYPE_T;
  50. static const Bins BINS = BINS_T;
  51. typedef StatsPartition Partition;
  52. Bins bin_type() const { return BINS; }
  53. int32 push(const int32 val1, const int32 val2, const uint32 length_bin, const uint32 mapQ1, const uint32 mapQ2)
  54. {
  55. l.hist.push( (BINS == LOG) ? log_bin(val1) : val1 );
  56. l.hist_by_length.push( length_bin, log_bin( val1 ) );
  57. l.hist_by_mapQ.push( log_bin( mapQ1 ), log_bin( val1 ) );
  58. r.hist.push( (BINS == LOG) ? log_bin(val2) : val2 );
  59. r.hist_by_length.push( length_bin, log_bin( val2 ) );
  60. r.hist_by_mapQ.push( log_bin( mapQ2 ), log_bin( val2) );
  61. if (TYPE == LOWER)
  62. {
  63. const int32 diff = int32(val2) - int32(val1);
  64. if (diff >= 0)
  65. {
  66. const int32 log_diff = log_bin( diff );
  67. l.diff_hist.push( (BINS == LOG) ? log_diff : diff );
  68. l.diff_hist_by_length.push( length_bin, log_diff );
  69. l.diff_hist_by_mapQ1.push( log_bin(mapQ1), log_diff );
  70. l.diff_hist_by_mapQ2.push( log_bin(mapQ2), log_diff );
  71. if (val1 < 0)
  72. l.diff_hist_by_value_neg.push( log_bin(-val1), log_diff );
  73. else
  74. l.diff_hist_by_value_pos.push( log_bin(val1), log_diff );
  75. }
  76. if (diff <= 0)
  77. {
  78. const int32 log_diff = log_bin( -diff );
  79. r.diff_hist.push( (BINS == LOG) ? log_diff : -diff );
  80. r.diff_hist_by_length.push( length_bin, log_diff );
  81. r.diff_hist_by_mapQ1.push( log_bin(mapQ1), log_diff );
  82. r.diff_hist_by_mapQ2.push( log_bin(mapQ2), log_diff );
  83. if (val2 < 0)
  84. r.diff_hist_by_value_neg.push( log_bin(-val2), log_diff );
  85. else
  86. r.diff_hist_by_value_pos.push( log_bin(val2), log_diff );
  87. }
  88. return diff;
  89. }
  90. else
  91. {
  92. const int32 diff = int32(val1) - int32(val2);
  93. if (diff >= 0)
  94. {
  95. const int32 log_diff = log_bin( diff );
  96. l.diff_hist.push( (BINS == LOG) ? log_diff : diff );
  97. l.diff_hist_by_length.push( length_bin, log_diff );
  98. l.diff_hist_by_mapQ1.push( log_bin(mapQ1), log_diff );
  99. l.diff_hist_by_mapQ2.push( log_bin(mapQ2), log_diff );
  100. if (val2 < 0)
  101. l.diff_hist_by_value_neg.push( log_bin(-val2), log_diff );
  102. else
  103. l.diff_hist_by_value_pos.push( log_bin(val2), log_diff );
  104. }
  105. if (diff <= 0)
  106. {
  107. const int32 log_diff = log_bin( -diff );
  108. r.diff_hist.push( (BINS == LOG) ? log_diff : -diff );
  109. r.diff_hist_by_length.push( length_bin, log_diff );
  110. r.diff_hist_by_mapQ1.push( log_bin(mapQ1), log_diff );
  111. r.diff_hist_by_mapQ2.push( log_bin(mapQ2), log_diff );
  112. if (val1 < 0)
  113. r.diff_hist_by_value_neg.push( log_bin(-val1), log_diff );
  114. else
  115. r.diff_hist_by_value_pos.push( log_bin(val1), log_diff );
  116. }
  117. return diff;
  118. }
  119. }
  120. Partition l;
  121. Partition r;
  122. };
  123. struct AlignmentStats
  124. {
  125. Stats<HIGHER,LOG> higher_score;
  126. Stats<LOWER,LINEAR> lower_ed;
  127. Stats<HIGHER,LOG> higher_mapQ;
  128. Stats<HIGHER,LOG> longer_mapping;
  129. Stats<HIGHER,LOG> higher_pos;
  130. Stats<LOWER,LINEAR> lower_subs;
  131. Stats<LOWER,LINEAR> lower_ins;
  132. Stats<LOWER,LINEAR> lower_dels;
  133. Stats<LOWER,LINEAR> lower_mms;
  134. };
  135. } // namespace alndiff
  136. } // namespace nvbio