se_analyzer.cpp 28 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415
  1. /*
  2. * nvbio
  3. * Copyright (c) 2011-2014, NVIDIA CORPORATION. All rights reserved.
  4. *
  5. * Redistribution and use in source and binary forms, with or without
  6. * modification, are permitted provided that the following conditions are met:
  7. * * Redistributions of source code must retain the above copyright
  8. * notice, this list of conditions and the following disclaimer.
  9. * * Redistributions in binary form must reproduce the above copyright
  10. * notice, this list of conditions and the following disclaimer in the
  11. * documentation and/or other materials provided with the distribution.
  12. * * Neither the name of the NVIDIA CORPORATION nor the
  13. * names of its contributors may be used to endorse or promote products
  14. * derived from this software without specific prior written permission.
  15. *
  16. * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
  17. * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
  18. * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
  19. * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
  20. * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
  21. * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
  22. * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
  23. * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  24. * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
  25. * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  26. */
  27. #include <nvbio-aln-diff/se_analyzer.h>
  28. #include <nvbio-aln-diff/html.h>
  29. #include <nvbio-aln-diff/utils.h>
  30. namespace nvbio {
  31. namespace alndiff {
  32. SEAnalyzer::SEAnalyzer(Filter& filter) :
  33. m_filter( filter )
  34. {
  35. n = 0;
  36. n_mismatched = 0;
  37. }
  38. void SEAnalyzer::push(
  39. const Alignment& alnL,
  40. const Alignment& alnR)
  41. {
  42. if (alnL.read_id != alnR.read_id ||
  43. alnL.read_len != alnR.read_len)
  44. {
  45. n_mismatched++;
  46. return;
  47. }
  48. mapped.push( alnL.is_mapped(), alnR.is_mapped() );
  49. unique.push( alnL.is_unique(), alnR.is_unique() );
  50. ambiguous.push( alnL.is_ambiguous(), alnR.is_ambiguous() );
  51. if ((alnL.is_mapped() == true) && (alnR.is_mapped() == false)) mapped_L_not_R_by_mapQ.push( log_bin( alnL.mapQ ) );
  52. if ((alnR.is_mapped() == true) && (alnL.is_mapped() == false)) mapped_R_not_L_by_mapQ.push( log_bin( alnR.mapQ ) );
  53. if ((alnL.is_unique() == true) && (alnR.is_unique() == false)) unique_L_not_R_by_mapQ.push( log_bin( alnL.mapQ ) );
  54. if ((alnR.is_unique() == true) && (alnL.is_unique() == false)) unique_R_not_L_by_mapQ.push( log_bin( alnR.mapQ ) );
  55. if ((alnL.is_ambiguous() == true) && (alnR.is_ambiguous() == false)) ambiguous_L_not_R_by_mapQ.push( log_bin( alnL.mapQ ) );
  56. if ((alnR.is_ambiguous() == true) && (alnL.is_ambiguous() == false)) ambiguous_R_not_L_by_mapQ.push( log_bin( alnR.mapQ ) );
  57. if (alnL.is_mapped() && alnR.is_mapped())
  58. {
  59. const uint32 mapQ_bin = log_bin( alnR.mapQ );
  60. uint32 read_flags = 0u;
  61. if (alnL.ref_id != alnR.ref_id)
  62. {
  63. n_different_ref.push( mapQ_bin );
  64. n_distant.push( mapQ_bin );
  65. read_flags |= Filter::DISTANT;
  66. read_flags |= Filter::DIFFERENT_REF;
  67. }
  68. else if (alndiff::distant( alnL, alnR ))
  69. {
  70. n_distant.push( mapQ_bin );
  71. read_flags |= Filter::DISTANT;
  72. }
  73. if (alnL.is_rc() != alnR.is_rc())
  74. {
  75. n_discordant.push( mapQ_bin );
  76. read_flags |= Filter::DISCORDANT;
  77. }
  78. const uint32 length_bin = read_length_bin( alnL.read_len );
  79. // generic stats
  80. {
  81. // ed
  82. m_filter( al_stats.lower_ed.push( alnL.ed, alnR.ed, length_bin, alnL.mapQ, alnR.mapQ ), read_flags, Filter::ED, alnL.read_id );
  83. // mapQ
  84. m_filter( al_stats.higher_mapQ.push( alnL.mapQ, alnR.mapQ, length_bin, alnL.mapQ, alnR.mapQ ), read_flags, Filter::MAPQ, alnL.read_id );
  85. // longer mapping
  86. al_stats.longer_mapping.push( alnL.mapped_read_bases(), alnR.mapped_read_bases(), length_bin, alnL.mapQ, alnR.mapQ );
  87. al_stats.lower_subs.push( alnL.subs, alnR.subs, length_bin, alnL.mapQ, alnR.mapQ );
  88. m_filter( al_stats.lower_mms.push( alnL.n_mm, alnR.n_mm, length_bin, alnL.mapQ, alnR.mapQ ), read_flags, Filter::MMS, alnL.read_id );
  89. m_filter( al_stats.lower_ins.push( alnL.ins, alnR.ins, length_bin, alnL.mapQ, alnR.mapQ ), read_flags, Filter::INS, alnL.read_id );
  90. m_filter( al_stats.lower_dels.push( alnL.dels, alnR.dels, length_bin, alnL.mapQ, alnR.mapQ ), read_flags, Filter::DELS, alnL.read_id );
  91. al_stats.higher_pos.push( alnL.pos, alnR.pos, length_bin, alnL.mapQ, alnR.mapQ );
  92. }
  93. if (read_flags & Filter::DISTANT)
  94. {
  95. // ed
  96. distant_stats.lower_ed.push( alnL.ed, alnR.ed, length_bin, alnL.mapQ, alnR.mapQ );
  97. // mapQ
  98. distant_stats.higher_mapQ.push( alnL.mapQ, alnR.mapQ, length_bin, alnL.mapQ, alnR.mapQ );
  99. // longer mapping
  100. distant_stats.longer_mapping.push( alnL.mapped_read_bases(), alnR.mapped_read_bases(), length_bin, alnL.mapQ, alnR.mapQ );
  101. distant_stats.lower_subs.push( alnL.subs, alnR.subs, length_bin, alnL.mapQ, alnR.mapQ );
  102. distant_stats.lower_mms.push( alnL.n_mm, alnR.n_mm, length_bin, alnL.mapQ, alnR.mapQ );
  103. distant_stats.lower_ins.push( alnL.ins, alnR.ins, length_bin, alnL.mapQ, alnR.mapQ );
  104. distant_stats.lower_dels.push( alnL.dels, alnR.dels, length_bin, alnL.mapQ, alnR.mapQ );
  105. distant_stats.higher_pos.push( alnL.pos, alnR.pos, length_bin, alnL.mapQ, alnR.mapQ );
  106. }
  107. if (read_flags & Filter::DISCORDANT)
  108. {
  109. // ed
  110. discordant_stats.lower_ed.push( alnL.ed, alnR.ed, length_bin, alnL.mapQ, alnR.mapQ );
  111. // mapQ
  112. discordant_stats.higher_mapQ.push( alnL.mapQ, alnR.mapQ, length_bin, alnL.mapQ, alnR.mapQ );
  113. // longer mapping
  114. discordant_stats.longer_mapping.push( alnL.mapped_read_bases(), alnR.mapped_read_bases(), length_bin, alnL.mapQ, alnR.mapQ );
  115. discordant_stats.lower_subs.push( alnL.subs, alnR.subs, length_bin, alnL.mapQ, alnR.mapQ );
  116. discordant_stats.lower_mms.push( alnL.n_mm, alnR.n_mm, length_bin, alnL.mapQ, alnR.mapQ );
  117. discordant_stats.lower_ins.push( alnL.ins, alnR.ins, length_bin, alnL.mapQ, alnR.mapQ );
  118. discordant_stats.lower_dels.push( alnL.dels, alnR.dels, length_bin, alnL.mapQ, alnR.mapQ );
  119. discordant_stats.higher_pos.push( alnL.pos, alnR.pos, length_bin, alnL.mapQ, alnR.mapQ );
  120. }
  121. }
  122. ++n;
  123. }
  124. namespace {
  125. void generate_summary_header(FILE* html_output)
  126. {
  127. html::tr_object tr( html_output, NULL );
  128. html::th_object( html_output, html::FORMATTED, NULL, "" );
  129. html::th_object( html_output, html::FORMATTED, NULL, "better" );
  130. html::th_object( html_output, html::FORMATTED, "class", "red", NULL, "worse" );
  131. }
  132. template <typename StatsType>
  133. void generate_summary_cell(FILE* html_output, const std::string file_name, const char* type, const StatsType& stats, const uint32 n)
  134. {
  135. char link_name[1024];
  136. sprintf( link_name, "<a href=\"%s\">%s</a>", local_file( file_name ), type );
  137. html::tr_object tr( html_output, "class", "alt", NULL );
  138. html::th_object( html_output, html::FORMATTED, NULL, link_name );
  139. html::td_object( html_output, html::FORMATTED, NULL, "%5.2f %%", 100.0f * float(stats.l.diff_hist.all_but(0))/float(n) );
  140. html::td_object( html_output, html::FORMATTED, "class", "pink", NULL, "%5.2f %%", 100.0f * float(stats.r.diff_hist.all_but(0))/float(n) );
  141. }
  142. } // anonymous namespace
  143. void SEAnalyzer::generate_report(const char* aln_file_name1, const char* aln_file_name2, const char* report)
  144. {
  145. if (report == NULL)
  146. return;
  147. const std::string mapped_bps_name = generate_file_name( report, "mapped-bps" );
  148. const std::string ed_name = generate_file_name( report, "ed" );
  149. const std::string mapQ_name = generate_file_name( report, "mapQ" );
  150. const std::string mms_name = generate_file_name( report, "mms" );
  151. const std::string ins_name = generate_file_name( report, "ins" );
  152. const std::string dels_name = generate_file_name( report, "dels" );
  153. const std::string pos_name = generate_file_name( report, "pos" );
  154. const std::string distant_mapped_bps_name = generate_file_name( report, "distant_stats.mapped-bps" );
  155. const std::string distant_ed_name = generate_file_name( report, "distant_stats.ed" );
  156. const std::string distant_mapQ_name = generate_file_name( report, "distant_stats.mapQ" );
  157. const std::string distant_mms_name = generate_file_name( report, "distant_stats.mms" );
  158. const std::string distant_ins_name = generate_file_name( report, "distant_stats.ins" );
  159. const std::string distant_dels_name = generate_file_name( report, "distant_stats.dels" );
  160. const std::string distant_pos_name = generate_file_name( report, "distant_stats.pos" );
  161. const std::string discordant_mapped_bps_name = generate_file_name( report, "discordant_stats.mapped-bps" );
  162. const std::string discordant_ed_name = generate_file_name( report, "discordant_stats.ed" );
  163. const std::string discordant_mapQ_name = generate_file_name( report, "discordant_stats.mapQ" );
  164. const std::string discordant_mms_name = generate_file_name( report, "discordant_stats.mms" );
  165. const std::string discordant_ins_name = generate_file_name( report, "discordant_stats.ins" );
  166. const std::string discordant_dels_name = generate_file_name( report, "discordant_stats.dels" );
  167. const std::string discordant_pos_name = generate_file_name( report, "discordant_stats.pos" );
  168. generate_table( mapped_bps_name.c_str(), "mapped L & R", "mapped bps", "mapped bps diff", al_stats.longer_mapping.bin_type(), al_stats.longer_mapping.l, al_stats.longer_mapping.r, n, mapped.L_and_R );
  169. generate_table( ed_name.c_str(), "mapped L & R", "edit distance", "edit distance diff", al_stats.lower_ed.bin_type(), al_stats.lower_ed.l, al_stats.lower_ed.r, n, mapped.L_and_R );
  170. generate_table( mapQ_name.c_str(), "mapped L & R", "mapQ", "mapQ diff", al_stats.higher_mapQ.bin_type(), al_stats.higher_mapQ.l, al_stats.higher_mapQ.r, n, mapped.L_and_R );
  171. generate_table( mms_name.c_str(), "mapped L & R", "mms", "mms diff", al_stats.lower_mms.bin_type(), al_stats.lower_mms.l, al_stats.lower_mms.r, n, mapped.L_and_R );
  172. generate_table( ins_name.c_str(), "mapped L & R", "ins", "ins diff", al_stats.lower_ins.bin_type(), al_stats.lower_ins.l, al_stats.lower_ins.r, n, mapped.L_and_R );
  173. generate_table( dels_name.c_str(), "mapped L & R", "dels", "dels diff", al_stats.lower_dels.bin_type(), al_stats.lower_dels.l, al_stats.lower_dels.r, n, mapped.L_and_R );
  174. generate_table( pos_name.c_str(), "mapped L & R", "position", "distance", al_stats.higher_pos.bin_type(), al_stats.higher_pos.l, al_stats.higher_pos.r, n, mapped.L_and_R, false );
  175. generate_table( distant_mapped_bps_name.c_str(), "distant", "mapped bps", "mapped bps diff", distant_stats.longer_mapping.bin_type(), distant_stats.longer_mapping.l, distant_stats.longer_mapping.r, n, n_distant.count );
  176. generate_table( distant_ed_name.c_str(), "distant", "edit distance", "edit distance diff", distant_stats.lower_ed.bin_type(), distant_stats.lower_ed.l, distant_stats.lower_ed.r, n, n_distant.count );
  177. generate_table( distant_mapQ_name.c_str(), "distant", "mapQ", "mapQ diff", distant_stats.higher_mapQ.bin_type(), distant_stats.higher_mapQ.l, distant_stats.higher_mapQ.r, n, n_distant.count );
  178. generate_table( distant_mms_name.c_str(), "distant", "mms", "mms diff", distant_stats.lower_mms.bin_type(), distant_stats.lower_mms.l, distant_stats.lower_mms.r, n, n_distant.count );
  179. generate_table( distant_ins_name.c_str(), "distant", "ins", "ins diff", distant_stats.lower_ins.bin_type(), distant_stats.lower_ins.l, distant_stats.lower_ins.r, n, n_distant.count );
  180. generate_table( distant_dels_name.c_str(), "distant", "dels", "dels diff", distant_stats.lower_dels.bin_type(), distant_stats.lower_dels.l, distant_stats.lower_dels.r, n, n_distant.count );
  181. generate_table( distant_pos_name.c_str(), "distant", "position", "distance", distant_stats.higher_pos.bin_type(), distant_stats.higher_pos.l, distant_stats.higher_pos.r, n, n_distant.count, false );
  182. generate_table( discordant_mapped_bps_name.c_str(), "discordant", "mapped bps", "mapped bps diff", discordant_stats.longer_mapping.bin_type(), discordant_stats.longer_mapping.l, discordant_stats.longer_mapping.r, n, n_discordant.count );
  183. generate_table( discordant_ed_name.c_str(), "discordant", "edit distance", "edit distance diff", discordant_stats.lower_ed.bin_type(), discordant_stats.lower_ed.l, discordant_stats.lower_ed.r, n, n_discordant.count );
  184. generate_table( discordant_mapQ_name.c_str(), "discordant", "mapQ", "mapQ diff", discordant_stats.higher_mapQ.bin_type(), discordant_stats.higher_mapQ.l, discordant_stats.higher_mapQ.r, n, n_discordant.count );
  185. generate_table( discordant_mms_name.c_str(), "discordant", "mms", "mms diff", discordant_stats.lower_mms.bin_type(), discordant_stats.lower_mms.l, discordant_stats.lower_mms.r, n, n_discordant.count );
  186. generate_table( discordant_ins_name.c_str(), "discordant", "ins", "ins diff", discordant_stats.lower_ins.bin_type(), discordant_stats.lower_ins.l, discordant_stats.lower_ins.r, n, n_discordant.count );
  187. generate_table( discordant_dels_name.c_str(), "discordant", "dels", "dels diff", discordant_stats.lower_dels.bin_type(), discordant_stats.lower_dels.l, discordant_stats.lower_dels.r, n, n_discordant.count );
  188. generate_table( discordant_pos_name.c_str(), "discordant", "position", "distance", discordant_stats.higher_pos.bin_type(), discordant_stats.higher_pos.l, discordant_stats.higher_pos.r, n, n_discordant.count, false );
  189. FILE* html_output = fopen( report, "w" );
  190. if (html_output == NULL)
  191. {
  192. log_warning( stderr, "unable to write HTML report \"%s\"\n", report );
  193. return;
  194. }
  195. const Histogram<8> cum_different_ref = reverse_cumulative( n_different_ref );
  196. const Histogram<8> cum_distant = reverse_cumulative( n_distant );
  197. const Histogram<8> cum_discordant = reverse_cumulative( n_discordant );
  198. const uint32 HI_MAPQ_BIN = 6; // >= 32
  199. {
  200. html::html_object html( html_output );
  201. {
  202. const char* meta_list = "<meta http-equiv=\"refresh\" content=\"5\" />";
  203. html::header_object hd( html_output, "nv-aln-diff report", html::style(), meta_list );
  204. {
  205. html::body_object body( html_output );
  206. //
  207. // alignment stats
  208. //
  209. {
  210. html::table_object table( html_output, "alignment-stats", "stats", "alignment stats" );
  211. {
  212. html::tr_object tr( html_output, NULL );
  213. html::th_object( html_output, html::FORMATTED, NULL, "" );
  214. html::th_object( html_output, html::FORMATTED, NULL, "L = %s", aln_file_name1 );
  215. html::th_object( html_output, html::FORMATTED, NULL, "R = %s", aln_file_name2 );
  216. html::th_object( html_output, html::FORMATTED, NULL, "L & R" );
  217. html::th_object( html_output, html::FORMATTED, NULL, "L \\ R" );
  218. html::th_object( html_output, html::FORMATTED, NULL, "R \\ L" );
  219. }
  220. {
  221. html::tr_object tr( html_output, "class", "alt", NULL );
  222. html::th_object( html_output, html::FORMATTED, NULL, "mapped");
  223. html::td_object( html_output, html::FORMATTED, NULL, "%5.2f %%", 100.0f * float(mapped.L) / float(n) );
  224. html::td_object( html_output, html::FORMATTED, "class", "pink", NULL, "%5.2f %%", 100.0f * float(mapped.R) / float(n) );
  225. html::td_object( html_output, html::FORMATTED, "class", "green", NULL, "%5.2f %%", 100.0f * float(mapped.L_and_R) / float(n) );
  226. html::td_object( html_output, html::FORMATTED, NULL, "%5.2f %% (%.3f %%)", 100.0f * float(mapped.L_not_R) / float(n), 100.0f * float(mapped_L_not_R_by_mapQ[HI_MAPQ_BIN]) / float(n) );
  227. html::td_object( html_output, html::FORMATTED, "class", "pink", NULL, "%5.2f %% (%.3f %%)", 100.0f * float(mapped.R_not_L) / float(n), 100.0f * float(mapped_R_not_L_by_mapQ[HI_MAPQ_BIN]) / float(n) );
  228. }
  229. {
  230. html::tr_object tr( html_output, NULL );
  231. html::th_object( html_output, html::FORMATTED, NULL, "unique");
  232. html::td_object( html_output, html::FORMATTED, NULL, "%5.2f %%", 100.0f * float(unique.L) / float(n) );
  233. html::td_object( html_output, html::FORMATTED, "class", "pink", NULL, "%5.2f %%", 100.0f * float(unique.R) / float(n) );
  234. html::td_object( html_output, html::FORMATTED, "class", "green", NULL, "%5.2f %%", 100.0f * float(unique.L_and_R) / float(n) );
  235. html::td_object( html_output, html::FORMATTED, NULL, "%5.2f %% (%.3f %%)", 100.0f * float(unique.L_not_R) / float(n), 100.0f * float(unique_L_not_R_by_mapQ[HI_MAPQ_BIN]) / float(n) );
  236. html::td_object( html_output, html::FORMATTED, "class", "pink", NULL, "%5.2f %% (%.3f %%)", 100.0f * float(unique.R_not_L) / float(n), 100.0f * float(unique_R_not_L_by_mapQ[HI_MAPQ_BIN]) / float(n) );
  237. }
  238. {
  239. html::tr_object tr( html_output, "class", "alt", NULL );
  240. html::th_object( html_output, html::FORMATTED, NULL, "ambiguous");
  241. html::td_object( html_output, html::FORMATTED, NULL, "%5.2f %%", 100.0f * float(ambiguous.L) / float(n) );
  242. html::td_object( html_output, html::FORMATTED, "class", "pink", NULL, "%5.2f %%", 100.0f * float(ambiguous.R) / float(n) );
  243. html::td_object( html_output, html::FORMATTED, "class", "green", NULL, "%5.2f %%", 100.0f * float(ambiguous.L_and_R) / float(n) );
  244. html::td_object( html_output, html::FORMATTED, NULL, "%5.2f %% (%.3f %%)", 100.0f * float(ambiguous.L_not_R) / float(n), 100.0f * float(ambiguous_L_not_R_by_mapQ[HI_MAPQ_BIN]) / float(n) );
  245. html::td_object( html_output, html::FORMATTED, "class", "pink", NULL, "%5.2f %% (%.3f %%)", 100.0f * float(ambiguous.R_not_L) / float(n), 100.0f * float(ambiguous_R_not_L_by_mapQ[HI_MAPQ_BIN]) / float(n) );
  246. }
  247. }
  248. //
  249. // discordance stats
  250. //
  251. {
  252. html::table_object table( html_output, "discordance-stats", "stats", "discordance stats" );
  253. {
  254. html::tr_object tr( html_output, NULL );
  255. html::th_object( html_output, html::FORMATTED, NULL, "" );
  256. html::th_object( html_output, html::FORMATTED, NULL, "items" );
  257. html::th_object( html_output, html::FORMATTED, NULL, "%% of total" );
  258. }
  259. {
  260. html::tr_object tr( html_output, NULL );
  261. html::th_object( html_output, html::FORMATTED, NULL, "different reference" );
  262. html::td_object( html_output, html::FORMATTED, NULL, "%.2f M (%.2f M)", float(cum_different_ref[0]) * 1.0e-6f, float(cum_different_ref[HI_MAPQ_BIN]) * 1.0e-6f );
  263. html::td_object( html_output, html::FORMATTED, NULL, "%5.2f %% (%.3f %%)", 100.0f * float(cum_different_ref[0]) / float(n), 100.0f * float(cum_different_ref[HI_MAPQ_BIN]) / float(n) );
  264. }
  265. {
  266. html::tr_object tr( html_output, "class", "alt", NULL );
  267. html::th_object( html_output, html::FORMATTED, NULL, "distant" );
  268. html::td_object( html_output, html::FORMATTED, NULL, "%.2f M (%.2f M)", float(cum_distant[0]) * 1.0e-6f, float(cum_distant[HI_MAPQ_BIN]) * 1.0e-6f );
  269. html::td_object( html_output, html::FORMATTED, NULL, "%5.2f %% (%.3f %%)", 100.0f * float(cum_distant[0]) / float(n), 100.0f * float(cum_distant[HI_MAPQ_BIN]) / float(n) );
  270. }
  271. {
  272. html::tr_object tr( html_output, NULL );
  273. html::th_object( html_output, html::FORMATTED, NULL, "discordant" );
  274. html::td_object( html_output, html::FORMATTED, NULL, "%.2f M (%.2f M)", float(cum_discordant[0]) * 1.0e-6f, float(cum_discordant[HI_MAPQ_BIN]) * 1.0e-6f );
  275. html::td_object( html_output, html::FORMATTED, NULL, "%5.2f %% (%.3f %%)", 100.0f * float(cum_discordant[0]) / float(n), 100.0f * float(cum_discordant[HI_MAPQ_BIN]) / float(n) );
  276. }
  277. }
  278. //
  279. // summary stats
  280. //
  281. {
  282. html::table_object table( html_output, "summary-stats", "stats", "summary stats" );
  283. generate_summary_header( html_output );
  284. generate_summary_cell( html_output, mapped_bps_name, "mapped bases", al_stats.longer_mapping, n );
  285. generate_summary_cell( html_output, ed_name, "edit distance",al_stats.lower_ed, n );
  286. generate_summary_cell( html_output, mapQ_name, "mapQ", al_stats.higher_mapQ, n );
  287. generate_summary_cell( html_output, mms_name, "mismatches", al_stats.lower_mms, n );
  288. generate_summary_cell( html_output, ins_name, "insertions", al_stats.lower_ins, n );
  289. generate_summary_cell( html_output, dels_name, "deletions", al_stats.lower_dels, n );
  290. generate_summary_cell( html_output, pos_name, "distance", al_stats.higher_pos, n );
  291. // ------------------------------------------- distant -------------------------------------------------- //
  292. generate_summary_header( html_output );
  293. generate_summary_cell( html_output, distant_mapped_bps_name, "mapped bases [distant]", distant_stats.longer_mapping, n );
  294. generate_summary_cell( html_output, distant_ed_name, "edit distance [distant]",distant_stats.lower_ed, n );
  295. generate_summary_cell( html_output, distant_mapQ_name, "mapQ [distant]", distant_stats.higher_mapQ, n );
  296. generate_summary_cell( html_output, distant_mms_name, "mismatches [distant]", distant_stats.lower_mms, n );
  297. generate_summary_cell( html_output, distant_ins_name, "insertions [distant]", distant_stats.lower_ins, n );
  298. generate_summary_cell( html_output, distant_dels_name, "deletions [distant]", distant_stats.lower_dels, n );
  299. generate_summary_cell( html_output, distant_pos_name, "distance [distant]", distant_stats.higher_pos, n );
  300. // ------------------------------------------- discordant ---------------------------------------------- //
  301. generate_summary_header( html_output );
  302. generate_summary_cell( html_output, discordant_mapped_bps_name, "mapped bases [discordant]", discordant_stats.longer_mapping, n );
  303. generate_summary_cell( html_output, discordant_ed_name, "edit distance [discordant]",discordant_stats.lower_ed, n );
  304. generate_summary_cell( html_output, discordant_mapQ_name, "mapQ [discordant]", discordant_stats.higher_mapQ, n );
  305. generate_summary_cell( html_output, discordant_mms_name, "mismatches [discordant]", discordant_stats.lower_mms, n );
  306. generate_summary_cell( html_output, discordant_ins_name, "insertions [discordant]", discordant_stats.lower_ins, n );
  307. generate_summary_cell( html_output, discordant_dels_name, "deletions [discordant]", discordant_stats.lower_dels, n );
  308. generate_summary_cell( html_output, discordant_pos_name, "distance [discordant]", discordant_stats.higher_pos, n );
  309. }
  310. // mapped L not R
  311. generate_table(
  312. html_output,
  313. "by mapQ",
  314. "mapped (L \\ R) vs (R \\ L)",
  315. LOG,
  316. mapped_L_not_R_by_mapQ,
  317. mapped_R_not_L_by_mapQ,
  318. n,
  319. n );
  320. // unique L not R
  321. generate_table(
  322. html_output,
  323. "by mapQ",
  324. "unique (L \\ R) vs (R \\ L)",
  325. LOG,
  326. unique_L_not_R_by_mapQ,
  327. unique_R_not_L_by_mapQ,
  328. n,
  329. n );
  330. // unique L not R
  331. generate_table(
  332. html_output,
  333. "by mapQ",
  334. "ambiguous (L \\ R) vs (R \\ L)",
  335. LOG,
  336. ambiguous_L_not_R_by_mapQ,
  337. ambiguous_R_not_L_by_mapQ,
  338. n,
  339. n );
  340. // different reference by mapQ
  341. generate_table(
  342. html_output,
  343. "mapped L & R",
  344. "different reference by mapQ",
  345. LOG,
  346. n_different_ref,
  347. mapped.L_and_R );
  348. // different reference by mapQ
  349. generate_table(
  350. html_output,
  351. "mapped L & R",
  352. "distant by mapQ",
  353. LOG,
  354. n_distant,
  355. mapped.L_and_R );
  356. // discordant reference by mapQ
  357. generate_table(
  358. html_output,
  359. "mapped L & R",
  360. "discordant by mapQ",
  361. LOG,
  362. n_discordant,
  363. mapped.L_and_R );
  364. }
  365. }
  366. }
  367. fclose( html_output );
  368. }
  369. } // namespace alndiff
  370. } // namespace nvbio