pe_analyzer.cpp 42 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684
  1. /*
  2. * nvbio
  3. * Copyright (c) 2011-2014, NVIDIA CORPORATION. All rights reserved.
  4. *
  5. * Redistribution and use in source and binary forms, with or without
  6. * modification, are permitted provided that the following conditions are met:
  7. * * Redistributions of source code must retain the above copyright
  8. * notice, this list of conditions and the following disclaimer.
  9. * * Redistributions in binary form must reproduce the above copyright
  10. * notice, this list of conditions and the following disclaimer in the
  11. * documentation and/or other materials provided with the distribution.
  12. * * Neither the name of the NVIDIA CORPORATION nor the
  13. * names of its contributors may be used to endorse or promote products
  14. * derived from this software without specific prior written permission.
  15. *
  16. * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
  17. * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
  18. * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
  19. * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
  20. * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
  21. * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
  22. * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
  23. * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  24. * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
  25. * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  26. */
  27. #include <nvbio-aln-diff/pe_analyzer.h>
  28. #include <nvbio-aln-diff/html.h>
  29. #include <nvbio-aln-diff/utils.h>
  30. namespace nvbio {
  31. namespace alndiff {
  32. PEAnalyzer::PEAnalyzer(Filter& filter, const bool id_check) :
  33. m_filter( filter ),
  34. m_id_check( id_check )
  35. {
  36. n = 0;
  37. n_mismatched = 0;
  38. }
  39. void PEAnalyzer::push(
  40. const AlignmentPair& alnL,
  41. const AlignmentPair& alnR)
  42. {
  43. if ((m_id_check && alnL[0].read_id != alnR[0].read_id) ||
  44. (m_id_check && alnL[1].read_id != alnR[1].read_id) ||
  45. alnL[0].read_len != alnR[0].read_len ||
  46. alnL[1].read_len != alnR[1].read_len)
  47. {
  48. n_mismatched++;
  49. return;
  50. }
  51. mapped.push( alnL.is_mapped(), alnR.is_mapped() );
  52. paired.push( alnL.is_mapped_paired(), alnR.is_mapped_paired() );
  53. unique.push( alnL.is_unique_paired(), alnR.is_unique_paired() );
  54. ambiguous.push( alnL.is_ambiguous_paired(), alnR.is_ambiguous_paired() );
  55. if ((alnL.is_mapped_paired() == true) && (alnR.is_mapped_paired() == false)) paired_L_not_R_by_mapQ.push( log_bin( alnL.mapQ() ) );
  56. if ((alnR.is_mapped_paired() == true) && (alnL.is_mapped_paired() == false)) paired_R_not_L_by_mapQ.push( log_bin( alnR.mapQ() ) );
  57. if ((alnL.is_unique_paired() == true) && (alnR.is_unique_paired() == false)) unique_L_not_R_by_mapQ.push( log_bin( alnL.mapQ() ) );
  58. if ((alnR.is_unique_paired() == true) && (alnL.is_unique_paired() == false)) unique_R_not_L_by_mapQ.push( log_bin( alnR.mapQ() ) );
  59. if ((alnL.is_ambiguous_paired() == true) && (alnR.is_ambiguous_paired() == false)) ambiguous_L_not_R_by_mapQ.push( log_bin( alnL.mapQ() ) );
  60. if ((alnR.is_ambiguous_paired() == true) && (alnL.is_ambiguous_paired() == false)) ambiguous_R_not_L_by_mapQ.push( log_bin( alnR.mapQ() ) );
  61. // edit-distance correlation stats
  62. //if (alnL.is_mapped_paired()) sec_ed_by_ed_l.push( alnL.ed()+1, alnL.has_second() ? alnL.sec_ed()+1 : 0u );
  63. //else sec_ed_by_ed_l.push( 0u, 0u );
  64. // edit-distance correlation stats
  65. //if (alnR.is_mapped_paired()) sec_ed_by_ed_r.push( alnR.ed()+1, alnR.has_second() ? alnR.sec_ed()+1 : 0u );
  66. //else sec_ed_by_ed_r.push( 0u, 0u );
  67. if (alnL.is_mapped_paired() && alnR.is_mapped_paired())
  68. {
  69. const uint32 mapQ_bin = log_bin( alnR.mapQ() );
  70. uint32 read_flags = 0u;
  71. if ((alnL[0].ref_id != alnR[0].ref_id) &&
  72. (alnL[1].ref_id != alnR[1].ref_id))
  73. {
  74. n_different_ref12.push( mapQ_bin );
  75. n_different_ref.push( mapQ_bin );
  76. n_distant12.push( mapQ_bin );
  77. n_distant.push( mapQ_bin );
  78. if ((alnL.is_unique_paired() == true) &&
  79. (alnR.is_unique_paired() == true))
  80. {
  81. n_different_ref_unique.push( mapQ_bin );
  82. n_distant_unique.push( mapQ_bin );
  83. }
  84. if ((alnL.is_ambiguous_paired() == false) &&
  85. (alnR.is_ambiguous_paired() == false))
  86. {
  87. n_different_ref_not_ambiguous.push( mapQ_bin );
  88. n_distant_not_ambiguous.push( mapQ_bin );
  89. }
  90. read_flags |= Filter::DISTANT;
  91. read_flags |= Filter::DIFFERENT_REF;
  92. }
  93. else if (alnL[0].ref_id != alnR[0].ref_id)
  94. {
  95. n_different_ref1.push( mapQ_bin );
  96. n_different_ref.push( mapQ_bin );
  97. n_distant1.push( mapQ_bin );
  98. n_distant.push( mapQ_bin );
  99. if ((alnL.is_unique_paired() == true) &&
  100. (alnR.is_unique_paired() == true))
  101. {
  102. n_different_ref_unique.push( mapQ_bin );
  103. n_distant_unique.push( mapQ_bin );
  104. }
  105. if ((alnL.is_ambiguous_paired() == false) &&
  106. (alnR.is_ambiguous_paired() == false))
  107. {
  108. n_different_ref_not_ambiguous.push( mapQ_bin );
  109. n_distant_not_ambiguous.push( mapQ_bin );
  110. }
  111. read_flags |= Filter::DISTANT;
  112. read_flags |= Filter::DIFFERENT_REF;
  113. }
  114. else if (alnL[1].ref_id != alnR[1].ref_id)
  115. {
  116. n_different_ref2.push( mapQ_bin );
  117. n_different_ref.push( mapQ_bin );
  118. n_distant2.push( mapQ_bin );
  119. n_distant.push( mapQ_bin );
  120. if ((alnL.is_unique_paired() == true) &&
  121. (alnR.is_unique_paired() == true))
  122. {
  123. n_different_ref_unique.push( mapQ_bin );
  124. n_distant_unique.push( mapQ_bin );
  125. }
  126. if ((alnL.is_ambiguous_paired() == false) &&
  127. (alnR.is_ambiguous_paired() == false))
  128. {
  129. n_different_ref_not_ambiguous.push( mapQ_bin );
  130. n_distant_not_ambiguous.push( mapQ_bin );
  131. }
  132. read_flags |= Filter::DISTANT;
  133. read_flags |= Filter::DIFFERENT_REF;
  134. }
  135. else if (alndiff::distant( alnL[0], alnR[0] ) && alndiff::distant( alnL[1], alnR[1] ))
  136. {
  137. n_distant12.push( mapQ_bin );
  138. n_distant.push( mapQ_bin );
  139. if ((alnL.is_unique_paired() == true) &&
  140. (alnR.is_unique_paired() == true))
  141. n_distant_unique.push( mapQ_bin );
  142. if ((alnL.is_ambiguous_paired() == false) &&
  143. (alnR.is_ambiguous_paired() == false))
  144. n_distant_not_ambiguous.push( mapQ_bin );
  145. read_flags |= Filter::DISTANT;
  146. }
  147. else if (alndiff::distant( alnL[0], alnR[0] ))
  148. {
  149. n_distant1.push( mapQ_bin );
  150. n_distant.push( mapQ_bin );
  151. if ((alnL.is_unique_paired() == true) &&
  152. (alnR.is_unique_paired() == true))
  153. n_distant_unique.push( mapQ_bin );
  154. if ((alnL.is_ambiguous_paired() == false) &&
  155. (alnR.is_ambiguous_paired() == false))
  156. n_distant_not_ambiguous.push( mapQ_bin );
  157. read_flags |= Filter::DISTANT;
  158. }
  159. else if (alndiff::distant( alnL[1], alnR[1] ))
  160. {
  161. n_distant2.push( mapQ_bin );
  162. n_distant.push( mapQ_bin );
  163. if ((alnL.is_unique_paired() == true) &&
  164. (alnR.is_unique_paired() == true))
  165. n_distant_unique.push( mapQ_bin );
  166. if ((alnL.is_ambiguous_paired() == false) &&
  167. (alnR.is_ambiguous_paired() == false))
  168. n_distant_not_ambiguous.push( mapQ_bin );
  169. read_flags |= Filter::DISTANT;
  170. }
  171. if ((alnL[0].is_rc() != alnR[0].is_rc()) &&
  172. (alnL[1].is_rc() != alnR[1].is_rc()))
  173. {
  174. n_discordant12.push( mapQ_bin );
  175. n_discordant.push( mapQ_bin );
  176. if ((alnL.is_unique_paired() == true) &&
  177. (alnR.is_unique_paired() == true))
  178. n_discordant_unique.push( mapQ_bin );
  179. if ((alnL.is_ambiguous_paired() == false) &&
  180. (alnR.is_ambiguous_paired() == false))
  181. n_discordant_not_ambiguous.push( mapQ_bin );
  182. read_flags |= Filter::DISCORDANT;
  183. }
  184. if (alnL[0].is_rc() != alnR[0].is_rc())
  185. {
  186. n_discordant1.push( mapQ_bin );
  187. n_discordant.push( mapQ_bin );
  188. if ((alnL.is_unique_paired() == true) &&
  189. (alnR.is_unique_paired() == true))
  190. n_discordant_unique.push( mapQ_bin );
  191. if ((alnL.is_ambiguous_paired() == false) &&
  192. (alnR.is_ambiguous_paired() == false))
  193. n_discordant_not_ambiguous.push( mapQ_bin );
  194. read_flags |= Filter::DISCORDANT;
  195. }
  196. else if (alnL[1].is_rc() != alnR[1].is_rc())
  197. {
  198. n_discordant2.push( mapQ_bin );
  199. n_discordant.push( mapQ_bin );
  200. if ((alnL.is_unique_paired() == true) &&
  201. (alnR.is_unique_paired() == true))
  202. n_discordant_unique.push( mapQ_bin );
  203. if ((alnL.is_ambiguous_paired() == false) &&
  204. (alnR.is_ambiguous_paired() == false))
  205. n_discordant_not_ambiguous.push( mapQ_bin );
  206. read_flags |= Filter::DISCORDANT;
  207. }
  208. const uint32 length_bin = read_length_bin( alnL.read_len() );
  209. // generic stats
  210. {
  211. // score
  212. m_filter( al_stats.higher_score.push( alnL.score(), alnR.score(), length_bin, alnL.mapQ(), alnR.mapQ() ), read_flags, Filter::SCORE, alnL.read_id() );
  213. // ed
  214. m_filter( al_stats.lower_ed.push( alnL.ed(), alnR.ed(), length_bin, alnL.mapQ(), alnR.mapQ() ), read_flags, Filter::ED, alnL.read_id() );
  215. // mapQ
  216. m_filter( al_stats.higher_mapQ.push( alnL.mapQ(), alnR.mapQ(), length_bin, alnL.mapQ(), alnR.mapQ() ), read_flags, Filter::MAPQ, alnL.read_id() );
  217. // longer mapping
  218. al_stats.longer_mapping.push( alnL.mapped_read_bases(), alnR.mapped_read_bases(), length_bin, alnL.mapQ(), alnR.mapQ() );
  219. al_stats.lower_subs.push( alnL.subs(), alnR.subs(), length_bin, alnL.mapQ(), alnR.mapQ() );
  220. m_filter( al_stats.lower_mms.push( alnL.n_mm(), alnR.n_mm(), length_bin, alnL.mapQ(), alnR.mapQ() ), read_flags, Filter::MMS, alnL.read_id() );
  221. m_filter( al_stats.lower_ins.push( alnL.ins(), alnR.ins(), length_bin, alnL.mapQ(), alnR.mapQ() ), read_flags, Filter::INS, alnL.read_id() );
  222. m_filter( al_stats.lower_dels.push( alnL.dels(), alnR.dels(), length_bin, alnL.mapQ(), alnR.mapQ() ), read_flags, Filter::DELS, alnL.read_id() );
  223. al_stats.higher_pos.push( alnL[0].pos, alnR[0].pos, length_bin, alnL.mapQ(), alnR.mapQ() );
  224. // TODO: this is a 2d quantity
  225. //if (alnL.has_second())
  226. sec_score_by_score_l.push( log_bin( alnL.score() ), log_bin( alnL.score() - alnL.sec_score() ) );
  227. //if (alnR.has_second())
  228. sec_score_by_score_r.push( log_bin( alnR.score() ), log_bin( alnR.score() - alnR.sec_score() ) );
  229. }
  230. if (read_flags & Filter::DISTANT)
  231. {
  232. // ed
  233. distant_stats.lower_ed.push( alnL.ed(), alnR.ed(), length_bin, alnL.mapQ(), alnR.mapQ() );
  234. // mapQ
  235. distant_stats.higher_mapQ.push( alnL.mapQ(), alnR.mapQ(), length_bin, alnL.mapQ(), alnR.mapQ() );
  236. // longer mapping
  237. distant_stats.longer_mapping.push( alnL.mapped_read_bases(), alnR.mapped_read_bases(), length_bin, alnL.mapQ(), alnR.mapQ() );
  238. distant_stats.lower_subs.push( alnL.subs(), alnR.subs(), length_bin, alnL.mapQ(), alnR.mapQ() );
  239. distant_stats.lower_mms.push( alnL.n_mm(), alnR.n_mm(), length_bin, alnL.mapQ(), alnR.mapQ() );
  240. distant_stats.lower_ins.push( alnL.ins(), alnR.ins(), length_bin, alnL.mapQ(), alnR.mapQ() );
  241. distant_stats.lower_dels.push( alnL.dels(), alnR.dels(), length_bin, alnL.mapQ(), alnR.mapQ() );
  242. distant_stats.higher_pos.push( alnL[0].pos, alnR[0].pos, length_bin, alnL.mapQ(), alnR.mapQ() );
  243. // TODO: this is a 2d quantity
  244. }
  245. if (read_flags & Filter::DISCORDANT)
  246. {
  247. // ed
  248. discordant_stats.lower_ed.push( alnL.ed(), alnR.ed(), length_bin, alnL.mapQ(), alnR.mapQ() );
  249. // mapQ
  250. discordant_stats.higher_mapQ.push( alnL.mapQ(), alnR.mapQ(), length_bin, alnL.mapQ(), alnR.mapQ() );
  251. // longer mapping
  252. discordant_stats.longer_mapping.push( alnL.mapped_read_bases(), alnR.mapped_read_bases(), length_bin, alnL.mapQ(), alnR.mapQ() );
  253. discordant_stats.lower_subs.push( alnL.subs(), alnR.subs(), length_bin, alnL.mapQ(), alnR.mapQ() );
  254. discordant_stats.lower_mms.push( alnL.n_mm(), alnR.n_mm(), length_bin, alnL.mapQ(), alnR.mapQ() );
  255. discordant_stats.lower_ins.push( alnL.ins(), alnR.ins(), length_bin, alnL.mapQ(), alnR.mapQ() );
  256. discordant_stats.lower_dels.push( alnL.dels(), alnR.dels(), length_bin, alnL.mapQ(), alnR.mapQ() );
  257. discordant_stats.higher_pos.push( alnL[0].pos, alnR[0].pos, length_bin, alnL.mapQ(), alnR.mapQ() );
  258. // TODO: this is a 2d quantity
  259. }
  260. }
  261. ++n;
  262. }
  263. namespace {
  264. void generate_summary_header(FILE* html_output)
  265. {
  266. html::tr_object tr( html_output, NULL );
  267. html::th_object( html_output, html::FORMATTED, NULL, "" );
  268. html::th_object( html_output, html::FORMATTED, NULL, "better" );
  269. html::th_object( html_output, html::FORMATTED, "class", "red", NULL, "worse" );
  270. }
  271. template <typename StatsType>
  272. void generate_summary_cell(FILE* html_output, const std::string file_name, const char* type, const StatsType& stats, const uint32 n, const char* cls)
  273. {
  274. char link_name[1024];
  275. sprintf( link_name, "<a href=\"%s\">%s</a>", local_file( file_name ), type );
  276. html::tr_object tr( html_output, "class", cls, NULL );
  277. html::th_object( html_output, html::FORMATTED, NULL, link_name );
  278. html::td_object( html_output, html::FORMATTED, NULL, "%5.2f %%", 100.0f * float(stats.l.diff_hist.all_but(0))/float(n) );
  279. html::td_object( html_output, html::FORMATTED, "class", "pink", NULL, "%5.2f %%", 100.0f * float(stats.r.diff_hist.all_but(0))/float(n) );
  280. }
  281. } // anonymous namespace
  282. void PEAnalyzer::generate_report(const char* aln_file_nameL, const char* aln_file_nameR, const char* report)
  283. {
  284. if (report == NULL)
  285. return;
  286. const std::string mapped_bps_name = generate_file_name( report, "mapped-bps" );
  287. const std::string score_name = generate_file_name( report, "score" );
  288. const std::string ed_name = generate_file_name( report, "ed" );
  289. const std::string mapQ_name = generate_file_name( report, "mapQ" );
  290. const std::string mms_name = generate_file_name( report, "mms" );
  291. const std::string ins_name = generate_file_name( report, "ins" );
  292. const std::string dels_name = generate_file_name( report, "dels" );
  293. const std::string pos_name = generate_file_name( report, "pos" );
  294. const std::string distant_mapped_bps_name = generate_file_name( report, "distant_stats.mapped-bps" );
  295. const std::string distant_ed_name = generate_file_name( report, "distant_stats.ed" );
  296. const std::string distant_mapQ_name = generate_file_name( report, "distant_stats.mapQ" );
  297. const std::string distant_mms_name = generate_file_name( report, "distant_stats.mms" );
  298. const std::string distant_ins_name = generate_file_name( report, "distant_stats.ins" );
  299. const std::string distant_dels_name = generate_file_name( report, "distant_stats.dels" );
  300. const std::string distant_pos_name = generate_file_name( report, "distant_stats.pos" );
  301. const std::string discordant_mapped_bps_name = generate_file_name( report, "discordant_stats.mapped-bps" );
  302. const std::string discordant_ed_name = generate_file_name( report, "discordant_stats.ed" );
  303. const std::string discordant_mapQ_name = generate_file_name( report, "discordant_stats.mapQ" );
  304. const std::string discordant_mms_name = generate_file_name( report, "discordant_stats.mms" );
  305. const std::string discordant_ins_name = generate_file_name( report, "discordant_stats.ins" );
  306. const std::string discordant_dels_name = generate_file_name( report, "discordant_stats.dels" );
  307. const std::string discordant_pos_name = generate_file_name( report, "discordant_stats.pos" );
  308. generate_table( mapped_bps_name.c_str(), "paired L & R", "mapped bps", "mapped bps diff", al_stats.longer_mapping.bin_type(), al_stats.longer_mapping.l, al_stats.longer_mapping.r, n, paired.L_and_R );
  309. generate_table( score_name.c_str(), "paired L & R", "score", "score diff", al_stats.higher_score.bin_type(), al_stats.higher_score.l, al_stats.higher_score.r, n, paired.L_and_R );
  310. generate_table( ed_name.c_str(), "paired L & R", "edit distance", "edit distance diff", al_stats.lower_ed.bin_type(), al_stats.lower_ed.l, al_stats.lower_ed.r, n, paired.L_and_R );
  311. generate_table( mapQ_name.c_str(), "paired L & R", "mapQ", "mapQ diff", al_stats.higher_mapQ.bin_type(), al_stats.higher_mapQ.l, al_stats.higher_mapQ.r, n, paired.L_and_R );
  312. generate_table( mms_name.c_str(), "paired L & R", "mms", "mms diff", al_stats.lower_mms.bin_type(), al_stats.lower_mms.l, al_stats.lower_mms.r, n, paired.L_and_R );
  313. generate_table( ins_name.c_str(), "paired L & R", "ins", "ins diff", al_stats.lower_ins.bin_type(), al_stats.lower_ins.l, al_stats.lower_ins.r, n, paired.L_and_R );
  314. generate_table( dels_name.c_str(), "paired L & R", "dels", "dels diff", al_stats.lower_dels.bin_type(), al_stats.lower_dels.l, al_stats.lower_dels.r, n, paired.L_and_R );
  315. generate_table( pos_name.c_str(), "paired L & R", "position", "distance", al_stats.higher_pos.bin_type(), al_stats.higher_pos.l, al_stats.higher_pos.r, n, paired.L_and_R, false );
  316. generate_table( distant_mapped_bps_name.c_str(), "distant", "mapped bps", "mapped bps diff", distant_stats.longer_mapping.bin_type(), distant_stats.longer_mapping.l, distant_stats.longer_mapping.r, n, n_distant.count );
  317. generate_table( distant_ed_name.c_str(), "distant", "edit distance", "edit distance diff", distant_stats.lower_ed.bin_type(), distant_stats.lower_ed.l, distant_stats.lower_ed.r, n, n_distant.count );
  318. generate_table( distant_mapQ_name.c_str(), "distant", "mapQ", "mapQ diff", distant_stats.higher_mapQ.bin_type(), distant_stats.higher_mapQ.l, distant_stats.higher_mapQ.r, n, n_distant.count );
  319. generate_table( distant_mms_name.c_str(), "distant", "mms", "mms diff", distant_stats.lower_mms.bin_type(), distant_stats.lower_mms.l, distant_stats.lower_mms.r, n, n_distant.count );
  320. generate_table( distant_ins_name.c_str(), "distant", "ins", "ins diff", distant_stats.lower_ins.bin_type(), distant_stats.lower_ins.l, distant_stats.lower_ins.r, n, n_distant.count );
  321. generate_table( distant_dels_name.c_str(), "distant", "dels", "dels diff", distant_stats.lower_dels.bin_type(), distant_stats.lower_dels.l, distant_stats.lower_dels.r, n, n_distant.count );
  322. generate_table( distant_pos_name.c_str(), "distant", "position", "distance", distant_stats.higher_pos.bin_type(), distant_stats.higher_pos.l, distant_stats.higher_pos.r, n, n_distant.count, false );
  323. generate_table( discordant_mapped_bps_name.c_str(), "discordant", "mapped bps", "mapped bps diff", discordant_stats.longer_mapping.bin_type(), discordant_stats.longer_mapping.l, discordant_stats.longer_mapping.r, n, n_discordant.count );
  324. generate_table( discordant_ed_name.c_str(), "discordant", "edit distance","edit distance diff", discordant_stats.lower_ed.bin_type(), discordant_stats.lower_ed.l, discordant_stats.lower_ed.r, n, n_discordant.count );
  325. generate_table( discordant_mapQ_name.c_str(), "discordant", "mapQ", "mapQ diff", discordant_stats.higher_mapQ.bin_type(), discordant_stats.higher_mapQ.l, discordant_stats.higher_mapQ.r, n, n_discordant.count );
  326. generate_table( discordant_mms_name.c_str(), "discordant", "mms", "mms diff", discordant_stats.lower_mms.bin_type(), discordant_stats.lower_mms.l, discordant_stats.lower_mms.r, n, n_discordant.count );
  327. generate_table( discordant_ins_name.c_str(), "discordant", "ins", "ins diff", discordant_stats.lower_ins.bin_type(), discordant_stats.lower_ins.l, discordant_stats.lower_ins.r, n, n_discordant.count );
  328. generate_table( discordant_dels_name.c_str(), "discordant", "dels", "dels diff", discordant_stats.lower_dels.bin_type(), discordant_stats.lower_dels.l, discordant_stats.lower_dels.r, n, n_discordant.count );
  329. generate_table( discordant_pos_name.c_str(), "discordant", "position", "distance", discordant_stats.higher_pos.bin_type(), discordant_stats.higher_pos.l, discordant_stats.higher_pos.r, n, n_discordant.count, false );
  330. FILE* html_output = fopen( report, "w" );
  331. if (html_output == NULL)
  332. {
  333. log_warning( stderr, "unable to write HTML report \"%s\"\n", report );
  334. return;
  335. }
  336. std::vector<std::string> log_bins;
  337. for (uint32 y = 0; y < 11; ++y)
  338. {
  339. if (y == 0)
  340. log_bins.push_back( "0" );
  341. else if (y == 1)
  342. log_bins.push_back( "1" );
  343. else if (y == 2)
  344. log_bins.push_back( "2" );
  345. else
  346. {
  347. char buffer[16];
  348. sprintf(buffer, "2^%u", y-1);
  349. log_bins.push_back( buffer );
  350. }
  351. }
  352. log_bins.push_back( "inf" );
  353. std::vector<std::string> ed_bins;
  354. ed_bins.push_back("-");
  355. for (uint32 y = 0; y < 16; ++y)
  356. {
  357. char buffer[16];
  358. sprintf(buffer, "%u", y);
  359. ed_bins.push_back( buffer );
  360. }
  361. const Histogram<8> cum_different_ref = reverse_cumulative( n_different_ref );
  362. const Histogram<8> cum_different_ref12 = reverse_cumulative( n_different_ref12 );
  363. const Histogram<8> cum_different_ref1 = reverse_cumulative( n_different_ref1 );
  364. const Histogram<8> cum_different_ref2 = reverse_cumulative( n_different_ref2 );
  365. const Histogram<8> cum_different_ref_unique = reverse_cumulative( n_different_ref_unique );
  366. const Histogram<8> cum_different_ref_not_ambiguous = reverse_cumulative( n_different_ref_not_ambiguous );
  367. const Histogram<8> cum_distant = reverse_cumulative( n_distant );
  368. const Histogram<8> cum_distant12 = reverse_cumulative( n_distant12 );
  369. const Histogram<8> cum_distant1 = reverse_cumulative( n_distant1 );
  370. const Histogram<8> cum_distant2 = reverse_cumulative( n_distant2 );
  371. const Histogram<8> cum_distant_unique = reverse_cumulative( n_distant_unique );
  372. const Histogram<8> cum_distant_not_ambiguous = reverse_cumulative( n_distant_not_ambiguous );
  373. const Histogram<8> cum_discordant = reverse_cumulative( n_discordant );
  374. const Histogram<8> cum_discordant12 = reverse_cumulative( n_discordant12 );
  375. const Histogram<8> cum_discordant1 = reverse_cumulative( n_discordant1 );
  376. const Histogram<8> cum_discordant2 = reverse_cumulative( n_discordant2 );
  377. const Histogram<8> cum_discordant_unique = reverse_cumulative( n_discordant_unique );
  378. const Histogram<8> cum_discordant_not_ambiguous = reverse_cumulative( n_discordant_not_ambiguous );
  379. const uint32 HI_MAPQ_BIN = 6; // >= 32
  380. {
  381. html::html_object html( html_output );
  382. {
  383. const char* meta_list = "<meta http-equiv=\"refresh\" content=\"5\" />";
  384. html::header_object hd( html_output, "nv-aln-diff report", html::style(), meta_list );
  385. {
  386. html::body_object body( html_output );
  387. //
  388. // alignment stats
  389. //
  390. {
  391. html::table_object table( html_output, "alignment-stats", "stats", "alignment stats" );
  392. {
  393. html::tr_object tr( html_output, NULL );
  394. html::th_object( html_output, html::FORMATTED, NULL, "" );
  395. html::th_object( html_output, html::FORMATTED, NULL, "L = %s", aln_file_nameL );
  396. html::th_object( html_output, html::FORMATTED, NULL, "R = %s", aln_file_nameR );
  397. html::th_object( html_output, html::FORMATTED, NULL, "L & R" );
  398. html::th_object( html_output, html::FORMATTED, NULL, "L \\ R" );
  399. html::th_object( html_output, html::FORMATTED, NULL, "R \\ L" );
  400. }
  401. {
  402. html::tr_object tr( html_output, "class", "alt", NULL );
  403. html::th_object( html_output, html::FORMATTED, NULL, "mapped");
  404. html::td_object( html_output, html::FORMATTED, NULL, "%5.2f %%", 100.0f * float(mapped.L) / float(n) );
  405. html::td_object( html_output, html::FORMATTED, "class", "pink", NULL, "%5.2f %%", 100.0f * float(mapped.R) / float(n) );
  406. html::td_object( html_output, html::FORMATTED, "class", "green", NULL, "%5.2f %%", 100.0f * float(mapped.L_and_R) / float(n) );
  407. html::td_object( html_output, html::FORMATTED, NULL, "%5.2f %%", 100.0f * float(mapped.L_not_R) / float(n) );
  408. html::td_object( html_output, html::FORMATTED, "class", "pink", NULL, "%5.2f %%", 100.0f * float(mapped.R_not_L) / float(n) );
  409. }
  410. {
  411. html::tr_object tr( html_output, NULL );
  412. html::th_object( html_output, html::FORMATTED, NULL, "paired");
  413. html::td_object( html_output, html::FORMATTED, NULL, "%5.2f %%", 100.0f * float(paired.L) / float(n) );
  414. html::td_object( html_output, html::FORMATTED, "class", "pink", NULL, "%5.2f %%", 100.0f * float(paired.R) / float(n) );
  415. html::td_object( html_output, html::FORMATTED, "class", "green", NULL, "%5.2f %%", 100.0f * float(paired.L_and_R) / float(n) );
  416. html::td_object( html_output, html::FORMATTED, NULL, "%5.2f %% (%.3f %%)", 100.0f * float(paired.L_not_R) / float(n), 100.0f * float(paired_L_not_R_by_mapQ[HI_MAPQ_BIN]) / float(n) );
  417. html::td_object( html_output, html::FORMATTED, "class", "pink", NULL, "%5.2f %% (%.3f %%)", 100.0f * float(paired.R_not_L) / float(n), 100.0f * float(paired_R_not_L_by_mapQ[HI_MAPQ_BIN]) / float(n) );
  418. }
  419. {
  420. html::tr_object tr( html_output, "class", "alt", NULL );
  421. html::th_object( html_output, html::FORMATTED, NULL, "unique");
  422. html::td_object( html_output, html::FORMATTED, NULL, "%5.2f %%", 100.0f * float(unique.L) / float(n) );
  423. html::td_object( html_output, html::FORMATTED, "class", "pink", NULL, "%5.2f %%", 100.0f * float(unique.R) / float(n) );
  424. html::td_object( html_output, html::FORMATTED, "class", "green", NULL, "%5.2f %%", 100.0f * float(unique.L_and_R) / float(n) );
  425. html::td_object( html_output, html::FORMATTED, NULL, "%5.2f %% (%.3f %%)", 100.0f * float(unique.L_not_R) / float(n), 100.0f * float(unique_L_not_R_by_mapQ[HI_MAPQ_BIN]) / float(n) );
  426. html::td_object( html_output, html::FORMATTED, "class", "pink", NULL, "%5.2f %% (%.3f %%)", 100.0f * float(unique.R_not_L) / float(n), 100.0f * float(unique_R_not_L_by_mapQ[HI_MAPQ_BIN]) / float(n) );
  427. }
  428. {
  429. html::tr_object tr( html_output, NULL );
  430. html::th_object( html_output, html::FORMATTED, NULL, "ambiguous");
  431. html::td_object( html_output, html::FORMATTED, NULL, "%5.2f %%", 100.0f * float(ambiguous.L) / float(n) );
  432. html::td_object( html_output, html::FORMATTED, "class", "pink", NULL, "%5.2f %%", 100.0f * float(ambiguous.R) / float(n) );
  433. html::td_object( html_output, html::FORMATTED, "class", "green", NULL, "%5.2f %%", 100.0f * float(ambiguous.L_and_R) / float(n) );
  434. html::td_object( html_output, html::FORMATTED, NULL, "%5.2f %% (%.3f %%)", 100.0f * float(ambiguous.L_not_R) / float(n), 100.0f * float(ambiguous_L_not_R_by_mapQ[HI_MAPQ_BIN]) / float(n) );
  435. html::td_object( html_output, html::FORMATTED, "class", "pink", NULL, "%5.2f %% (%.3f %%)", 100.0f * float(ambiguous.R_not_L) / float(n), 100.0f * float(ambiguous_R_not_L_by_mapQ[HI_MAPQ_BIN]) / float(n) );
  436. }
  437. }
  438. //
  439. // discordance stats
  440. //
  441. {
  442. html::table_object table( html_output, "discordance-stats", "stats", "discordance stats" );
  443. {
  444. html::tr_object tr( html_output, NULL );
  445. html::th_object( html_output, html::FORMATTED, NULL, "" );
  446. html::th_object( html_output, html::FORMATTED, NULL, "items" );
  447. html::th_object( html_output, html::FORMATTED, NULL, "%% of total" );
  448. html::th_object( html_output, html::FORMATTED, NULL, "mate 1" );
  449. html::th_object( html_output, html::FORMATTED, NULL, "mate 2" );
  450. html::th_object( html_output, html::FORMATTED, NULL, "mate 1 & 2" );
  451. html::th_object( html_output, html::FORMATTED, NULL, "unique" );
  452. html::th_object( html_output, html::FORMATTED, NULL, "not-ambiguous" );
  453. }
  454. {
  455. html::tr_object tr( html_output, NULL );
  456. html::th_object( html_output, html::FORMATTED, NULL, "different reference" );
  457. html::td_object( html_output, html::FORMATTED, NULL, "%.2f M (%.2f M)", float(cum_different_ref[0]) * 1.0e-6f, float(cum_different_ref[HI_MAPQ_BIN]) * 1.0e-6f );
  458. html::td_object( html_output, html::FORMATTED, NULL, "%5.2f %% (%.3f %%)", 100.0f * float(cum_different_ref[0]) / float(n), 100.0f * float(cum_different_ref[HI_MAPQ_BIN]) / float(n) );
  459. html::td_object( html_output, html::FORMATTED, NULL, "%5.2f %% (%.3f %%)", 100.0f * float(cum_different_ref1[0]) / float(n), 100.0f * float(cum_different_ref1[HI_MAPQ_BIN]) / float(n) );
  460. html::td_object( html_output, html::FORMATTED, NULL, "%5.2f %% (%.3f %%)", 100.0f * float(cum_different_ref2[0]) / float(n), 100.0f * float(cum_different_ref2[HI_MAPQ_BIN]) / float(n) );
  461. html::td_object( html_output, html::FORMATTED, NULL, "%5.2f %% (%.3f %%)", 100.0f * float(cum_different_ref12[0]) / float(n), 100.0f * float(cum_different_ref12[HI_MAPQ_BIN]) / float(n) );
  462. html::td_object( html_output, html::FORMATTED, NULL, "%5.2f %% (%.3f %%)", 100.0f * float(cum_different_ref_unique[0]) / float(n), 100.0f * float(cum_different_ref_unique[HI_MAPQ_BIN]) / float(n) );
  463. html::td_object( html_output, html::FORMATTED, NULL, "%5.2f %% (%.3f %%)", 100.0f * float(cum_different_ref_not_ambiguous[0]) / float(n), 100.0f * float(cum_different_ref_not_ambiguous[HI_MAPQ_BIN]) / float(n) );
  464. }
  465. {
  466. html::tr_object tr( html_output, "class", "alt", NULL );
  467. html::th_object( html_output, html::FORMATTED, NULL, "distant" );
  468. html::td_object( html_output, html::FORMATTED, NULL, "%.2f M (%.2f M)", float(cum_distant[0]) * 1.0e-6f, float(cum_distant[HI_MAPQ_BIN]) * 1.0e-6f );
  469. html::td_object( html_output, html::FORMATTED, NULL, "%5.2f %% (%.3f %%)", 100.0f * float(cum_distant[0]) / float(n), 100.0f * float(cum_distant[HI_MAPQ_BIN]) / float(n) );
  470. html::td_object( html_output, html::FORMATTED, NULL, "%5.2f %% (%.3f %%)", 100.0f * float(cum_distant1[0]) / float(n), 100.0f * float(cum_distant1[HI_MAPQ_BIN]) / float(n) );
  471. html::td_object( html_output, html::FORMATTED, NULL, "%5.2f %% (%.3f %%)", 100.0f * float(cum_distant2[0]) / float(n), 100.0f * float(cum_distant2[HI_MAPQ_BIN]) / float(n) );
  472. html::td_object( html_output, html::FORMATTED, NULL, "%5.2f %% (%.3f %%)", 100.0f * float(cum_distant12[0]) / float(n), 100.0f * float(cum_distant12[HI_MAPQ_BIN]) / float(n) );
  473. html::td_object( html_output, html::FORMATTED, NULL, "%5.2f %% (%.3f %%)", 100.0f * float(cum_distant_unique[0]) / float(n), 100.0f * float(cum_distant_unique[HI_MAPQ_BIN]) / float(n) );
  474. html::td_object( html_output, html::FORMATTED, NULL, "%5.2f %% (%.3f %%)", 100.0f * float(cum_distant_not_ambiguous[0]) / float(n), 100.0f * float(cum_distant_not_ambiguous[HI_MAPQ_BIN]) / float(n) );
  475. }
  476. {
  477. html::tr_object tr( html_output, NULL );
  478. html::th_object( html_output, html::FORMATTED, NULL, "discordant" );
  479. html::td_object( html_output, html::FORMATTED, NULL, "%.2f M (%.2f M)", float(cum_discordant[0]) * 1.0e-6f, float(cum_discordant[HI_MAPQ_BIN]) * 1.0e-6f );
  480. html::td_object( html_output, html::FORMATTED, NULL, "%5.2f %% (%.3f %%)", 100.0f * float(cum_discordant[0]) / float(n), 100.0f * float(cum_discordant[HI_MAPQ_BIN]) / float(n) );
  481. html::td_object( html_output, html::FORMATTED, NULL, "%5.2f %% (%.3f %%)", 100.0f * float(cum_discordant1[0]) / float(n), 100.0f * float(cum_discordant1[HI_MAPQ_BIN]) / float(n) );
  482. html::td_object( html_output, html::FORMATTED, NULL, "%5.2f %% (%.3f %%)", 100.0f * float(cum_discordant2[0]) / float(n), 100.0f * float(cum_discordant2[HI_MAPQ_BIN]) / float(n) );
  483. html::td_object( html_output, html::FORMATTED, NULL, "%5.2f %% (%.3f %%)", 100.0f * float(cum_discordant12[0]) / float(n), 100.0f * float(cum_discordant12[HI_MAPQ_BIN]) / float(n) );
  484. html::td_object( html_output, html::FORMATTED, NULL, "%5.2f %% (%.3f %%)", 100.0f * float(cum_discordant_unique[0]) / float(n), 100.0f * float(cum_discordant_unique[HI_MAPQ_BIN]) / float(n) );
  485. html::td_object( html_output, html::FORMATTED, NULL, "%5.2f %% (%.3f %%)", 100.0f * float(cum_discordant_not_ambiguous[0]) / float(n), 100.0f * float(cum_discordant_not_ambiguous[HI_MAPQ_BIN]) / float(n) );
  486. }
  487. }
  488. //
  489. // summary stats
  490. //
  491. {
  492. html::table_object table( html_output, "summary-stats", "stats", "summary stats" );
  493. // ------------------------------------------- generic -------------------------------------------------- //
  494. generate_summary_header( html_output );
  495. generate_summary_cell( html_output, mapped_bps_name, "mapped bases", al_stats.longer_mapping, n, "none" );
  496. generate_summary_cell( html_output, score_name, "score", al_stats.higher_score, n, "alt" );
  497. generate_summary_cell( html_output, ed_name, "edit distance",al_stats.lower_ed, n, "none" );
  498. generate_summary_cell( html_output, mapQ_name, "mapQ", al_stats.higher_mapQ, n, "alt" );
  499. generate_summary_cell( html_output, mms_name, "mismatches", al_stats.lower_mms, n, "none" );
  500. generate_summary_cell( html_output, ins_name, "insertions", al_stats.lower_ins, n, "alt" );
  501. generate_summary_cell( html_output, dels_name, "deletions", al_stats.lower_dels, n, "none" );
  502. generate_summary_cell( html_output, pos_name, "distance", al_stats.higher_pos, n, "alt" );
  503. // ------------------------------------------- distant -------------------------------------------------- //
  504. generate_summary_header( html_output );
  505. generate_summary_cell( html_output, distant_mapped_bps_name, "mapped bases [distant]", distant_stats.longer_mapping, n, "none" );
  506. generate_summary_cell( html_output, distant_ed_name, "edit distance [distant]",distant_stats.lower_ed, n, "alt" );
  507. generate_summary_cell( html_output, distant_mapQ_name, "mapQ [distant]", distant_stats.higher_mapQ, n, "none" );
  508. generate_summary_cell( html_output, distant_mms_name, "mismatches [distant]", distant_stats.lower_mms, n, "alt" );
  509. generate_summary_cell( html_output, distant_ins_name, "insertions [distant]", distant_stats.lower_ins, n, "none" );
  510. generate_summary_cell( html_output, distant_dels_name, "deletions [distant]", distant_stats.lower_dels, n, "alt" );
  511. generate_summary_cell( html_output, distant_pos_name, "distance [distant]", distant_stats.higher_pos, n, "none" );
  512. // ------------------------------------------- discordant ---------------------------------------------- //
  513. generate_summary_header( html_output );
  514. generate_summary_cell( html_output, discordant_mapped_bps_name, "mapped bases [discordant]", discordant_stats.longer_mapping, n, "alt" );
  515. generate_summary_cell( html_output, discordant_ed_name, "edit distance [discordant]",discordant_stats.lower_ed, n, "none" );
  516. generate_summary_cell( html_output, discordant_mapQ_name, "mapQ [discordant]", discordant_stats.higher_mapQ, n, "alt" );
  517. generate_summary_cell( html_output, discordant_mms_name, "mismatches [discordant]", discordant_stats.lower_mms, n, "none" );
  518. generate_summary_cell( html_output, discordant_ins_name, "insertions [discordant]", discordant_stats.lower_ins, n, "alt" );
  519. generate_summary_cell( html_output, discordant_dels_name, "deletions [discordant]", discordant_stats.lower_dels, n, "none" );
  520. generate_summary_cell( html_output, discordant_pos_name, "distance [discordant]", discordant_stats.higher_pos, n, "alt" );
  521. }
  522. // paired L not R
  523. generate_table(
  524. html_output,
  525. "by mapQ",
  526. "paired (L \\ R) vs (R \\ L)",
  527. LOG,
  528. paired_L_not_R_by_mapQ,
  529. paired_R_not_L_by_mapQ,
  530. n,
  531. n );
  532. // unique L not R
  533. generate_table(
  534. html_output,
  535. "by mapQ",
  536. "unique (L \\ R) vs (R \\ L)",
  537. LOG,
  538. unique_L_not_R_by_mapQ,
  539. unique_R_not_L_by_mapQ,
  540. n,
  541. n );
  542. // unique L not R
  543. generate_table(
  544. html_output,
  545. "by mapQ",
  546. "ambiguous (L \\ R) vs (R \\ L)",
  547. LOG,
  548. ambiguous_L_not_R_by_mapQ,
  549. ambiguous_R_not_L_by_mapQ,
  550. n,
  551. n );
  552. // different reference by mapQ
  553. generate_table(
  554. html_output,
  555. "paired L & R",
  556. "different reference by mapQ",
  557. LOG,
  558. n_different_ref,
  559. paired.L_and_R );
  560. // different reference by mapQ
  561. generate_table(
  562. html_output,
  563. "paired L & R",
  564. "distant by mapQ",
  565. LOG,
  566. n_distant,
  567. paired.L_and_R );
  568. // discordant reference by mapQ
  569. generate_table(
  570. html_output,
  571. "paired L & R",
  572. "discordant by mapQ",
  573. LOG,
  574. n_discordant,
  575. paired.L_and_R );
  576. // second score by score
  577. generate_table2d(
  578. html_output,
  579. "paired L & R",
  580. "second score",
  581. sec_score_by_score_l,
  582. sec_score_by_score_r,
  583. "score",
  584. log_bins,
  585. log_bins,
  586. n,
  587. paired.L_and_R,
  588. false );
  589. // second score by score
  590. /*generate_table2d(
  591. html_output,
  592. "paired L & R",
  593. "second ed",
  594. sec_ed_by_ed_l,
  595. sec_ed_by_ed_r,
  596. "ed",
  597. ed_bins,
  598. ed_bins,
  599. n,
  600. n,
  601. false );*/
  602. }
  603. }
  604. }
  605. fclose( html_output );
  606. }
  607. } // namespace alndiff
  608. } // namespace nvbio