nvSetBWT.cu 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435
  1. /*
  2. * nvbio
  3. * Copyright (c) 2011-2014, NVIDIA CORPORATION. All rights reserved.
  4. *
  5. * Redistribution and use in source and binary forms, with or without
  6. * modification, are permitted provided that the following conditions are met:
  7. * * Redistributions of source code must retain the above copyright
  8. * notice, this list of conditions and the following disclaimer.
  9. * * Redistributions in binary form must reproduce the above copyright
  10. * notice, this list of conditions and the following disclaimer in the
  11. * documentation and/or other materials provided with the distribution.
  12. * * Neither the name of the NVIDIA CORPORATION nor the
  13. * names of its contributors may be used to endorse or promote products
  14. * derived from this software without specific prior written permission.
  15. *
  16. * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
  17. * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
  18. * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
  19. * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
  20. * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
  21. * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
  22. * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
  23. * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  24. * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
  25. * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  26. */
  27. // nvSetBWT.cu
  28. //
  29. //#define NVBIO_CUDA_DEBUG
  30. #include <cub/cub.cuh>
  31. #include <nvbio/basic/omp.h>
  32. #include "input_thread.h"
  33. #include <nvbio/basic/pipeline.h>
  34. #include <nvbio/sufsort/sufsort.h>
  35. #include <nvbio/sufsort/sufsort_utils.h>
  36. #include <nvbio/sufsort/file_bwt.h>
  37. #include <nvbio/sufsort/bwte.h>
  38. #include <nvbio/basic/timer.h>
  39. #include <nvbio/basic/shared_pointer.h>
  40. #include <nvbio/basic/exceptions.h>
  41. #include <nvbio/basic/dna.h>
  42. #include <nvbio/basic/vector.h>
  43. #include <nvbio/basic/system.h>
  44. #include <nvbio/basic/cuda/arch.h>
  45. #include <nvbio/strings/string_set.h>
  46. #include <nvbio/io/sequence/sequence.h>
  47. #include <stdio.h>
  48. #include <stdlib.h>
  49. #include <vector>
  50. #include <algorithm>
  51. using namespace nvbio;
  52. static const uint32 SYMBOL_SIZE = io::SequenceDataAccess<DNA>::SEQUENCE_BITS;
  53. static const bool BIG_ENDIAN = io::SequenceDataAccess<DNA>::SEQUENCE_BIG_ENDIAN;
  54. typedef io::SequenceDataAccess<DNA>::sequence_storage_iterator storage_iterator;
  55. typedef io::SequenceDataAccess<DNA>::index_iterator offsets_iterator;
  56. typedef BWTEContext<SYMBOL_SIZE,BIG_ENDIAN,storage_iterator,offsets_iterator> BWTE_context_type;
  57. ///
  58. /// A small class implementing a Pipeline stage reading sequence batches from a file
  59. ///
  60. struct SortStage
  61. {
  62. typedef io::SequenceDataHost argument_type;
  63. typedef BWTEBlock return_type;
  64. /// constructor
  65. ///
  66. ///\param file input sequence file
  67. ///\param max_strings maximum number of strings per batch
  68. ///\param max_bps maximum number of base pairs per batch
  69. ///
  70. SortStage(BWTE_context_type& context) : m_context( context ) {}
  71. /// fill the next batch
  72. ///
  73. bool process(PipelineContext& context)
  74. {
  75. // fetch the input
  76. io::SequenceDataHost* h_read_data = context.input<io::SequenceDataHost>(0);
  77. // fetch the output
  78. BWTEBlock* block = context.output<BWTEBlock>();
  79. // build a view
  80. const io::SequenceDataAccess<DNA> h_read_view( *h_read_data );
  81. m_context.sort_block(
  82. 0u,
  83. h_read_data->size(),
  84. h_read_view.sequence_string_set(),
  85. *block );
  86. return true;
  87. }
  88. BWTE_context_type& m_context;
  89. };
  90. ///
  91. /// A small class implementing a Pipeline stage reading sequence batches from a file
  92. ///
  93. struct SinkStage
  94. {
  95. typedef io::SequenceDataHost argument_type;
  96. /// constructor
  97. ///
  98. ///\param file input sequence file
  99. ///\param max_strings maximum number of strings per batch
  100. ///\param max_bps maximum number of base pairs per batch
  101. ///
  102. SinkStage(
  103. BWTE_context_type& context,
  104. PagedText<SYMBOL_SIZE,BIG_ENDIAN>& bwt,
  105. SparseSymbolSet& dollars) :
  106. m_context( context ),
  107. m_bwt( bwt ),
  108. m_dollars( dollars ),
  109. n_reads( 0 ),
  110. m_time( 0.0f )
  111. {}
  112. /// fill the next batch
  113. ///
  114. bool process(PipelineContext& context)
  115. {
  116. const ScopedTimer<float> timer( &m_time );
  117. // fetch the input
  118. io::SequenceDataHost* h_read_data = context.input<io::SequenceDataHost>( 0 );
  119. // build a view
  120. const io::SequenceDataAccess<DNA> h_read_view( *h_read_data );
  121. log_info(stderr, " block [%u, %u] (%u / %.2fG bps, %.1f M suffixes/s)\n",
  122. n_reads, n_reads + h_read_data->size(), h_read_data->bps(),
  123. 1.0e-9f * m_bwt.size(),
  124. m_time ? (1.0e-6f * m_bwt.size()) / m_time : 0.0f );
  125. log_debug(stderr," peak memory : %.1f GB\n", float( peak_resident_memory() ) / float(1024*1024*1024));
  126. /*
  127. m_context.append_block(
  128. 0u,
  129. h_read_data->size(),
  130. h_read_view.sequence_string_set(),
  131. m_bwt,
  132. m_dollars,
  133. true );
  134. */
  135. // fetch the second input
  136. BWTEBlock* block = context.input<BWTEBlock>( 1 );
  137. m_context.merge_block(
  138. 0u,
  139. h_read_data->size(),
  140. h_read_view.sequence_string_set(),
  141. *block,
  142. m_bwt,
  143. m_dollars,
  144. true );
  145. n_reads += h_read_data->size();
  146. return true;
  147. }
  148. BWTE_context_type& m_context;
  149. PagedText<SYMBOL_SIZE,BIG_ENDIAN>& m_bwt;
  150. SparseSymbolSet& m_dollars;
  151. uint32 n_reads;
  152. float m_time;
  153. };
  154. int main(int argc, char* argv[])
  155. {
  156. if (argc < 2)
  157. {
  158. log_visible(stderr, "nvSetBWT - Copyright 2013-2014, NVIDIA Corporation\n");
  159. log_info(stderr, "usage:\n");
  160. log_info(stderr, " nvSetBWT [options] input_file output_file\n");
  161. log_info(stderr, " options:\n");
  162. log_info(stderr, " -v | --verbosity int (0-6) [5]\n");
  163. log_info(stderr, " -c | --compression string [1R] (e.g. \"1\", ..., \"9\", \"1R\")\n");
  164. log_info(stderr, " -t | --threads int [auto]\n");
  165. log_info(stderr, " -b | --bucketing int [16] (# of bits used for bucketing)\n");
  166. log_info(stderr, " -F | --skip-forward\n");
  167. log_info(stderr, " -R | --skip-reverse\n");
  168. log_info(stderr, " output formats:\n");
  169. log_info(stderr, " .txt ASCII\n");
  170. log_info(stderr, " .txt.gz ASCII, gzip compressed\n");
  171. log_info(stderr, " .txt.bgz ASCII, block-gzip compressed\n");
  172. log_info(stderr, " .bwt 2-bit packed binary\n");
  173. log_info(stderr, " .bwt.gz 2-bit packed binary, gzip compressed\n");
  174. log_info(stderr, " .bwt.bgz 2-bit packed binary, block-gzip compressed\n");
  175. log_info(stderr, " .bwt4 4-bit packed binary\n");
  176. log_info(stderr, " .bwt4.gz 4-bit packed binary, gzip compressed\n");
  177. log_info(stderr, " .bwt4.bgz 4-bit packed binary, block-gzip compressed\n");
  178. return 0;
  179. }
  180. const char* reads_name = argv[argc-2];
  181. const char* output_name = argv[argc-1];
  182. bool forward = true;
  183. bool reverse = true;
  184. const char* comp_level = "1R";
  185. io::QualityEncoding qencoding = io::Phred33;
  186. int threads = 0;
  187. for (int i = 0; i < argc - 2; ++i)
  188. {
  189. if ((strcmp( argv[i], "-v" ) == 0) ||
  190. (strcmp( argv[i], "-verbosity" ) == 0) ||
  191. (strcmp( argv[i], "--verbosity" ) == 0))
  192. {
  193. set_verbosity( Verbosity( atoi( argv[++i] ) ) );
  194. }
  195. else if ((strcmp( argv[i], "-F" ) == 0) ||
  196. (strcmp( argv[i], "--skip-forward" ) == 0)) // skip forward strand
  197. {
  198. forward = false;
  199. }
  200. else if ((strcmp( argv[i], "-R" ) == 0) ||
  201. (strcmp( argv[i], "--skip-reverse" ) == 0)) // skip reverse strand
  202. {
  203. reverse = false;
  204. }
  205. else if ((strcmp( argv[i], "-c" ) == 0) ||
  206. (strcmp( argv[i], "--compression" ) == 0)) // setup compression level
  207. {
  208. comp_level = argv[++i];
  209. }
  210. else if ((strcmp( argv[i], "-t" ) == 0) ||
  211. (strcmp( argv[i], "--threads" ) == 0)) // setup number of threads
  212. {
  213. threads = atoi( argv[++i] );
  214. }
  215. }
  216. try
  217. {
  218. log_visible(stderr,"nvSetBWT... started\n");
  219. // build an output file
  220. SharedPointer<SetBWTHandler> output_handler = SharedPointer<SetBWTHandler>( open_bwt_file( output_name, comp_level ) );
  221. if (output_handler == NULL)
  222. {
  223. log_error(stderr, " failed to create an output handler\n");
  224. return 1;
  225. }
  226. // gather device memory stats
  227. size_t free_device, total_device;
  228. cudaMemGetInfo(&free_device, &total_device);
  229. cuda::check_error("cuda-check");
  230. log_stats(stderr, " device has %ld of %ld MB free\n", free_device/1024/1024, total_device/1024/1024);
  231. #ifdef _OPENMP
  232. // now set the number of CPU threads
  233. omp_set_num_threads( threads > 0 ? threads : omp_get_num_procs() );
  234. omp_set_nested(1);
  235. #pragma omp parallel
  236. {
  237. log_verbose(stderr, " running on multiple threads (%d)\n", omp_get_thread_num());
  238. }
  239. #endif
  240. uint32 encoding_flags = 0u;
  241. if (forward) encoding_flags |= io::FORWARD;
  242. if (reverse) encoding_flags |= io::REVERSE_COMPLEMENT;
  243. log_visible(stderr, "opening read file \"%s\"\n", reads_name);
  244. SharedPointer<nvbio::io::SequenceDataStream> read_data_file(
  245. nvbio::io::open_sequence_file(
  246. reads_name,
  247. qencoding,
  248. uint32(-1),
  249. uint32(-1),
  250. io::SequenceEncoding( encoding_flags ) )
  251. );
  252. if (read_data_file == NULL || read_data_file->is_ok() == false)
  253. {
  254. log_error(stderr, " failed opening file \"%s\"\n", reads_name);
  255. return false;
  256. }
  257. // output vectors
  258. PagedText<SYMBOL_SIZE,BIG_ENDIAN> bwt;
  259. SparseSymbolSet dollars;
  260. // get the current device
  261. int current_device;
  262. cudaGetDevice( &current_device );
  263. // build a BWTEContext
  264. BWTE_context_type bwte_context( current_device );
  265. // find out how big a block can we alloc
  266. uint32 max_block_suffixes = 256*1024*1024;
  267. uint32 max_block_strings = 16*1024*1024;
  268. while (bwte_context.needed_device_memory( max_block_strings, max_block_suffixes ) + 256u*1024u*1024u >= free_device)
  269. max_block_suffixes /= 2;
  270. log_verbose(stderr, " block size: %u\n", max_block_suffixes);
  271. // reserve enough space for the block processing
  272. bwte_context.reserve( max_block_strings, max_block_suffixes );
  273. cudaMemGetInfo(&free_device, &total_device);
  274. log_stats(stderr, " device has %ld of %ld MB free\n", free_device/1024/1024, total_device/1024/1024);
  275. // build the input stage
  276. InputStage input_stage( read_data_file.get(), max_block_strings, max_block_suffixes - max_block_strings );
  277. // build the sort stage
  278. SortStage sort_stage( bwte_context );
  279. // build the sink
  280. SinkStage sink_stage( bwte_context, bwt, dollars );
  281. // build the pipeline
  282. Pipeline pipeline;
  283. const uint32 in0 = pipeline.append_stage( &input_stage, 4u );
  284. const uint32 in1 = pipeline.append_stage( &sort_stage, 4u );
  285. const uint32 out = pipeline.append_sink( &sink_stage );
  286. pipeline.add_dependency( in0, out );
  287. pipeline.add_dependency( in0, in1 );
  288. pipeline.add_dependency( in1, out );
  289. Timer timer;
  290. timer.start();
  291. // and run it!
  292. pipeline.run();
  293. log_info(stderr," writing output... started\n");
  294. // write out the results
  295. for (uint32 i = 0; i < bwt.page_count(); ++i)
  296. {
  297. // find the dollars corresponding to this page
  298. const uint64 page_begin = bwt.get_page_offset(i);
  299. const uint64 page_end = bwt.get_page_offset(i+1);
  300. const uint64 dollars_begin = nvbio::lower_bound_index(
  301. page_begin,
  302. dollars.pos(),
  303. dollars.size() );
  304. const uint64 dollars_end = nvbio::lower_bound_index(
  305. page_end,
  306. dollars.pos(),
  307. dollars.size() );
  308. //log_debug(stderr," page[%u] : %llu symbols (%llu,%llu), %llu dollars (%llu,%llu)\n", i, page_end - page_begin, page_begin, page_end, dollars_end - dollars_begin, dollars_begin, dollars_end);
  309. // and output the page
  310. output_handler->process(
  311. bwt.get_page_size(i),
  312. SYMBOL_SIZE,
  313. (const uint32*)bwt.get_page(i),
  314. dollars_end - dollars_begin,
  315. dollars.pos() + dollars_begin,
  316. dollars.ids() + dollars_begin );
  317. }
  318. log_info(stderr," writing output... done\n");
  319. timer.stop();
  320. const float time = timer.seconds();
  321. log_verbose(stderr," total time : %.1fs\n", time);
  322. log_verbose(stderr," peak memory : %.1f GB\n", float( peak_resident_memory() ) / float(1024*1024*1024));
  323. log_visible(stderr,"nvSetBWT... done\n");
  324. }
  325. catch (nvbio::cuda_error e)
  326. {
  327. log_error(stderr, "caught a nvbio::cuda_error exception:\n");
  328. log_error(stderr, " %s\n", e.what());
  329. return 1;
  330. }
  331. catch (nvbio::bad_alloc e)
  332. {
  333. log_error(stderr, "caught a nvbio::bad_alloc exception:\n");
  334. log_error(stderr, " %s\n", e.what());
  335. return 1;
  336. }
  337. catch (nvbio::logic_error e)
  338. {
  339. log_error(stderr, "caught a nvbio::logic_error exception:\n");
  340. log_error(stderr, " %s\n", e.what());
  341. return 1;
  342. }
  343. catch (nvbio::runtime_error e)
  344. {
  345. log_error(stderr, "caught a nvbio::runtime_error exception:\n");
  346. log_error(stderr, " %s\n", e.what());
  347. return 1;
  348. }
  349. catch (std::bad_alloc e)
  350. {
  351. log_error(stderr, "caught a std::bad_alloc exception:\n");
  352. log_error(stderr, " %s\n", e.what());
  353. return 1;
  354. }
  355. catch (std::logic_error e)
  356. {
  357. log_error(stderr, "caught a std::logic_error exception:\n");
  358. log_error(stderr, " %s\n", e.what());
  359. return 1;
  360. }
  361. catch (std::runtime_error e)
  362. {
  363. log_error(stderr, "caught a std::runtime_error exception:\n");
  364. log_error(stderr, " %s\n", e.what());
  365. return 1;
  366. }
  367. catch (...)
  368. {
  369. log_error(stderr, "caught an unknown exception!\n");
  370. return 1;
  371. }
  372. return 0;
  373. }