mem.cu 7.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224
  1. /*
  2. * nvbio
  3. * Copyright (c) 2011-2014, NVIDIA CORPORATION. All rights reserved.
  4. *
  5. * Redistribution and use in source and binary forms, with or without
  6. * modification, are permitted provided that the following conditions are met:
  7. * * Redistributions of source code must retain the above copyright
  8. * notice, this list of conditions and the following disclaimer.
  9. * * Redistributions in binary form must reproduce the above copyright
  10. * notice, this list of conditions and the following disclaimer in the
  11. * documentation and/or other materials provided with the distribution.
  12. * * Neither the name of the NVIDIA CORPORATION nor the
  13. * names of its contributors may be used to endorse or promote products
  14. * derived from this software without specific prior written permission.
  15. *
  16. * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
  17. * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
  18. * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
  19. * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
  20. * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
  21. * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
  22. * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
  23. * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  24. * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
  25. * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  26. */
  27. // mem.cu
  28. //
  29. #include <stdio.h>
  30. #include <stdlib.h>
  31. #include <nvbio/basic/timer.h>
  32. #include <nvbio/basic/console.h>
  33. #include <nvbio/basic/vector.h>
  34. #include <nvbio/basic/packed_vector.h>
  35. #include <nvbio/basic/shared_pointer.h>
  36. #include <nvbio/basic/dna.h>
  37. #include <nvbio/strings/string_set.h>
  38. #include <nvbio/strings/infix.h>
  39. #include <nvbio/strings/seeds.h>
  40. #include <nvbio/fmindex/mem.h>
  41. #include <nvbio/io/sequence/sequence.h>
  42. #include <nvbio/io/sequence/sequence_mmap.h>
  43. #include <nvbio/io/fmindex/fmindex.h>
  44. using namespace nvbio;
  45. // main test entry point
  46. //
  47. int main(int argc, char* argv[])
  48. {
  49. //
  50. // perform some basic option parsing
  51. //
  52. const uint32 batch_reads = 512*1024;
  53. const uint32 batch_bps = 50*1024*1024;
  54. const char* reads = argv[argc-1];
  55. const char* index = argv[argc-2];
  56. uint32 max_reads = uint32(-1);
  57. uint32 min_intv = 1u;
  58. uint32 max_intv = 10000u;
  59. uint32 min_span = 19u;
  60. for (int i = 0; i < argc; ++i)
  61. {
  62. if (strcmp( argv[i], "-max-reads" ) == 0)
  63. max_reads = uint32( atoi( argv[++i] ) );
  64. else if (strcmp( argv[i], "-min-intv" ) == 0)
  65. min_intv = atoi( argv[++i] );
  66. else if (strcmp( argv[i], "-max-intv" ) == 0)
  67. max_intv = atoi( argv[++i] );
  68. else if (strcmp( argv[i], "-min-span" ) == 0)
  69. min_span = atoi( argv[++i] );
  70. }
  71. const uint32 fm_flags = io::FMIndexData::FORWARD |
  72. io::FMIndexData::REVERSE |
  73. io::FMIndexData::SA;
  74. SharedPointer<io::SequenceData> h_ref;
  75. h_ref = io::map_sequence_file( index );
  76. if (h_ref == NULL)
  77. h_ref = io::load_sequence_file( DNA, index );
  78. if (h_ref == NULL)
  79. {
  80. log_error(stderr, " failed loading reference \"%s\"\n", index);
  81. return 1u;
  82. }
  83. io::FMIndexData *h_fmi = NULL;
  84. io::FMIndexDataMMAP mmap_loader;
  85. io::FMIndexDataHost file_loader;
  86. if (mmap_loader.load( index ))
  87. h_fmi = &mmap_loader;
  88. else
  89. {
  90. if (!file_loader.load( index, fm_flags ))
  91. {
  92. log_error(stderr, " failed loading index \"%s\"\n", index);
  93. return 1u;
  94. }
  95. h_fmi = &file_loader;
  96. }
  97. // build its device version
  98. const io::SequenceDataDevice d_ref( *h_ref );
  99. const io::FMIndexDataDevice d_fmi( *h_fmi, fm_flags );
  100. typedef io::SequenceDataAccess<DNA> genome_access_type;
  101. typedef genome_access_type::sequence_stream_type genome_type;
  102. // fetch the genome string
  103. const genome_access_type d_genome_access( d_ref );
  104. const genome_type d_genome( d_genome_access.sequence_stream() );
  105. // open a read file
  106. log_info(stderr, " opening reads file... started\n");
  107. SharedPointer<io::SequenceDataStream> read_data_file(
  108. io::open_sequence_file(
  109. reads,
  110. io::Phred33,
  111. 2*max_reads,
  112. uint32(-1),
  113. io::SequenceEncoding( io::FORWARD | io::REVERSE_COMPLEMENT ) ) );
  114. // check whether the file opened correctly
  115. if (read_data_file == NULL || read_data_file->is_ok() == false)
  116. {
  117. log_error(stderr, " failed opening file \"%s\"\n", reads);
  118. return 1u;
  119. }
  120. log_info(stderr, " opening reads file... done\n");
  121. typedef io::FMIndexDataDevice::fm_index_type fm_index_type;
  122. typedef MEMFilterDevice<fm_index_type> mem_filter_type;
  123. // fetch the FM-index
  124. const fm_index_type f_index = d_fmi.index();
  125. const fm_index_type r_index = d_fmi.rindex();
  126. // create a MEM filter
  127. mem_filter_type mem_filter;
  128. const uint32 mems_batch = 16*1024*1024;
  129. nvbio::vector<device_tag,mem_filter_type::mem_type> mems( mems_batch );
  130. io::SequenceDataHost h_read_data;
  131. // load a batch of reads
  132. while (io::next( DNA_N, &h_read_data, read_data_file.get(), batch_reads, batch_bps ))
  133. {
  134. log_info(stderr, " loading reads... started\n");
  135. // copy it to the device
  136. const io::SequenceDataDevice d_read_data( h_read_data );
  137. const io::SequenceDataAccess<DNA_N> d_read_access( d_read_data );
  138. const uint32 n_reads = d_read_data.size() / 2;
  139. log_info(stderr, " loading reads... done\n");
  140. log_info(stderr, " %u reads\n", n_reads);
  141. log_info(stderr, " ranking MEMs... started\n");
  142. Timer timer;
  143. timer.start();
  144. mem_filter.rank(
  145. f_index,
  146. r_index,
  147. d_read_access.sequence_string_set(),
  148. min_intv,
  149. max_intv,
  150. min_span );
  151. cudaDeviceSynchronize();
  152. timer.stop();
  153. const uint64 n_mems = mem_filter.n_mems();
  154. log_info(stderr, " ranking MEMs... done\n");
  155. log_info(stderr, " %.1f avg ranges\n", float( mem_filter.n_ranges() ) / float( n_reads ) );
  156. log_info(stderr, " %.1f avg MEMs\n", float( n_mems ) / float( n_reads ) );
  157. log_info(stderr, " %.1f K reads/s\n", 1.0e-3f * float(n_reads) / timer.seconds());
  158. log_info(stderr, " locating MEMs... started\n");
  159. float locate_time = 0.0f;
  160. // loop through large batches of hits and locate & merge them
  161. for (uint64 mems_begin = 0; mems_begin < n_mems; mems_begin += mems_batch)
  162. {
  163. const uint64 mems_end = nvbio::min( mems_begin + mems_batch, n_mems );
  164. timer.start();
  165. mem_filter.locate(
  166. mems_begin,
  167. mems_end,
  168. mems.begin() );
  169. cudaDeviceSynchronize();
  170. timer.stop();
  171. locate_time += timer.seconds();
  172. log_verbose(stderr, "\r %5.2f%% (%4.1f M MEMs/s)",
  173. 100.0f * float( mems_end ) / float( n_mems ),
  174. 1.0e-6f * float( mems_end ) / locate_time );
  175. }
  176. log_verbose_cont(stderr, "\n" );
  177. log_info(stderr, " locating MEMs... done\n");
  178. }
  179. return 0;
  180. }