assembly_graph.h 4.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135
  1. /*
  2. * nvbio
  3. * Copyright (c) 2011-2014, NVIDIA CORPORATION. All rights reserved.
  4. *
  5. * Redistribution and use in source and binary forms, with or without
  6. * modification, are permitted provided that the following conditions are met:
  7. * * Redistributions of source code must retain the above copyright
  8. * notice, this list of conditions and the following disclaimer.
  9. * * Redistributions in binary form must reproduce the above copyright
  10. * notice, this list of conditions and the following disclaimer in the
  11. * documentation and/or other materials provided with the distribution.
  12. * * Neither the name of the NVIDIA CORPORATION nor the
  13. * names of its contributors may be used to endorse or promote products
  14. * derived from this software without specific prior written permission.
  15. *
  16. * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
  17. * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
  18. * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
  19. * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
  20. * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
  21. * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
  22. * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
  23. * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  24. * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
  25. * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  26. */
  27. #pragma once
  28. #include "assembly_types.h"
  29. using namespace nvbio;
  30. #define ASSEMBLY_MIN_BASE_QUALITY 6
  31. struct debruijn_graph
  32. {
  33. uint32 kmer_size; // kmer_size used to build the graph
  34. uint32 n_nodes;
  35. uint32 n_edges;
  36. uint32 n_subgraphs;
  37. // global CSR representation
  38. D_VectorSetKmerCoord nodes;
  39. D_VectorU32 node_adjancency_map;
  40. D_VectorU32 node_adj_offests;
  41. D_VectorU32 node_out_degrees;
  42. D_VectorU32 node_in_degrees;
  43. D_VectorU32 edge_counts; // edge multiplicities
  44. D_VectorU32 edge_weights; // transitions probabilities
  45. D_VectorU8 edge_ref_flags; // flags each edge if the reference haplotype contains it
  46. D_VectorU32 node_in_adjancency_map;
  47. D_VectorU32 node_in_adj_offests;
  48. // local active region subgraphs
  49. D_VectorU32 subgraph_source_ids;
  50. D_VectorU32 subgraph_sink_ids;
  51. D_VectorU32 subgraph_n_nodes;
  52. // topological sort
  53. D_VectorU32 topo_sorted_uids; // topologically sorted nodes
  54. D_VectorU32 topo_sorted_offsets;
  55. // properties
  56. D_VectorU8 low_complexity;
  57. D_VectorU8 cycle_presence; // each sub-graph is marked 1 if it contains a cycle
  58. // kernel view
  59. struct view
  60. {
  61. D_VectorU32::plain_view_type node_adjancency_map;
  62. D_VectorU32::plain_view_type node_adj_offests;
  63. D_VectorU32::plain_view_type node_out_degrees;
  64. D_VectorU32::plain_view_type node_in_degrees;
  65. D_VectorU32::plain_view_type edge_counts;
  66. D_VectorU32::plain_view_type edge_weights;
  67. D_VectorU32::plain_view_type subgraph_source_ids;
  68. D_VectorU32::plain_view_type subgraph_sink_ids;
  69. D_VectorU32::plain_view_type subgraph_n_nodes;
  70. D_VectorU32::plain_view_type topo_sorted_uids;
  71. D_VectorU32::plain_view_type topo_sorted_offsets;
  72. D_VectorU8::plain_view_type cycle_presence;
  73. D_VectorU32::plain_view_type node_in_adjancency_map;
  74. D_VectorU32::plain_view_type node_in_adj_offests;
  75. };
  76. operator view() {
  77. view v = {
  78. plain_view(node_adjancency_map),
  79. plain_view(node_adj_offests),
  80. plain_view(node_out_degrees),
  81. plain_view(node_in_degrees),
  82. plain_view(edge_counts),
  83. plain_view(edge_weights),
  84. plain_view(subgraph_source_ids),
  85. plain_view(subgraph_sink_ids),
  86. plain_view(subgraph_n_nodes),
  87. plain_view(topo_sorted_uids),
  88. plain_view(topo_sorted_offsets),
  89. plain_view(cycle_presence),
  90. plain_view(node_in_adjancency_map),
  91. plain_view(node_in_adj_offests)
  92. };
  93. return v;
  94. }
  95. // construction
  96. bool construct_graph(const D_SequenceSet& sequence_set, const D_VectorU32& d_active_region_ids,
  97. const uint32 kmer_size, const bool allow_low_complexity);
  98. void compute_edge_weights();
  99. void compute_in_degrees();
  100. void compute_in_adjacencies();
  101. void compute_subgraph_n_nodes();
  102. void compute_complexity();
  103. // validation
  104. bool is_low_complexity(const uint32 subgraph_id) { return low_complexity[subgraph_id];}
  105. bool has_cycles(const uint32 subgraph_id) { return cycle_presence[subgraph_id];}
  106. // search
  107. void topological_sort();
  108. void find_k_best_paths(const uint32 k);
  109. // debugging
  110. void print_dot_graph(const D_SequenceSet& sequence_set);
  111. };
  112. #include "assembly_graph_inl.h"