fmmap.cu 19 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541
  1. /*
  2. * nvbio
  3. * Copyright (c) 2011-2014, NVIDIA CORPORATION. All rights reserved.
  4. *
  5. * Redistribution and use in source and binary forms, with or without
  6. * modification, are permitted provided that the following conditions are met:
  7. * * Redistributions of source code must retain the above copyright
  8. * notice, this list of conditions and the following disclaimer.
  9. * * Redistributions in binary form must reproduce the above copyright
  10. * notice, this list of conditions and the following disclaimer in the
  11. * documentation and/or other materials provided with the distribution.
  12. * * Neither the name of the NVIDIA CORPORATION nor the
  13. * names of its contributors may be used to endorse or promote products
  14. * derived from this software without specific prior written permission.
  15. *
  16. * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
  17. * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
  18. * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
  19. * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
  20. * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
  21. * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
  22. * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
  23. * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  24. * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
  25. * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  26. */
  27. // fmmap.cu
  28. //
  29. #include <stdio.h>
  30. #include <stdlib.h>
  31. #include <nvbio/basic/timer.h>
  32. #include <nvbio/basic/console.h>
  33. #include <nvbio/basic/vector.h>
  34. #include <nvbio/basic/shared_pointer.h>
  35. #include <nvbio/basic/dna.h>
  36. #include <nvbio/strings/string_set.h>
  37. #include <nvbio/strings/infix.h>
  38. #include <nvbio/strings/seeds.h>
  39. #include <nvbio/fmindex/filter.h>
  40. #include <nvbio/io/sequence/sequence.h>
  41. #include <nvbio/io/sequence/sequence_encoder.h>
  42. #include <nvbio/io/fmindex/fmindex.h>
  43. #include <nvbio/alignment/alignment.h>
  44. #include <nvbio/alignment/batched.h>
  45. #include "util.h"
  46. using namespace nvbio;
  47. // alignment params
  48. //
  49. struct Params
  50. {
  51. uint32 seed_len;
  52. uint32 seed_intv;
  53. uint32 merge_intv;
  54. };
  55. // query stats
  56. //
  57. struct Stats
  58. {
  59. Stats() :
  60. time(0),
  61. extract_time(0),
  62. rank_time(0),
  63. locate_time(0),
  64. align_time(0),
  65. reads(0),
  66. aligned(0),
  67. queries(0),
  68. occurrences(0) {}
  69. float time;
  70. float extract_time;
  71. float rank_time;
  72. float locate_time;
  73. float align_time;
  74. uint64 reads;
  75. uint64 aligned;
  76. uint64 queries;
  77. uint64 occurrences;
  78. };
  79. // the pipeline state
  80. //
  81. struct Pipeline
  82. {
  83. typedef io::FMIndexDataDevice::fm_index_type fm_index_type;
  84. typedef FMIndexFilterDevice<fm_index_type> fm_filter_type;
  85. Params params; // program options
  86. SharedPointer<io::SequenceDataDevice> ref_data; // reference data
  87. SharedPointer<io::FMIndexDataDevice> fm_data; // fm-index data
  88. fm_filter_type fm_filter; // fm-index filter
  89. };
  90. // transform an (index-pos,seed-id) hit into a diagonal (text-pos = index-pos - seed-pos, read-id)
  91. struct hit_to_diagonal
  92. {
  93. typedef uint2 argument_type;
  94. typedef uint2 result_type;
  95. // constructor
  96. NVBIO_FORCEINLINE NVBIO_HOST_DEVICE
  97. hit_to_diagonal(const string_set_infix_coord_type* _seed_coords) : seed_coords(_seed_coords) {}
  98. // functor operator
  99. NVBIO_FORCEINLINE NVBIO_HOST_DEVICE
  100. uint2 operator() (const uint2 hit) const
  101. {
  102. const uint32 index_pos = hit.x;
  103. const uint32 seed_id = hit.y;
  104. const string_set_infix_coord_type seed = seed_coords[ seed_id ];
  105. const uint32 read_pos = infix_begin( seed );
  106. const uint32 read_id = string_id( seed );
  107. return make_uint2( index_pos - read_pos, read_id );
  108. }
  109. const string_set_infix_coord_type* seed_coords;
  110. };
  111. // extract a set of uniformly spaced seeds from a string-set and return it as an InfixSet
  112. //
  113. template <typename system_tag, typename string_set_type>
  114. InfixSet<string_set_type, const string_set_infix_coord_type*>
  115. extract_seeds(
  116. const string_set_type string_set, // the input string-set
  117. const uint32 seed_len, // the seeds length
  118. const uint32 seed_interval, // the spacing between seeds
  119. nvbio::vector<system_tag,string_set_infix_coord_type>& seed_coords) // the output vector of seed coordinates
  120. {
  121. // enumerate all seeds
  122. const uint32 n_seeds = enumerate_string_set_seeds(
  123. string_set,
  124. uniform_seeds_functor<>( seed_len, seed_interval ),
  125. seed_coords );
  126. // and build the output infix-set
  127. return InfixSet<string_set_type, const string_set_infix_coord_type*>(
  128. n_seeds,
  129. string_set,
  130. nvbio::plain_view( seed_coords ) );
  131. }
  132. // a functor to extract the read infixes from the hit diagonals
  133. //
  134. struct read_infixes
  135. {
  136. // constructor
  137. NVBIO_HOST_DEVICE
  138. read_infixes(const io::ConstSequenceDataView reads) :
  139. m_reads( reads ) {}
  140. // functor operator
  141. NVBIO_HOST_DEVICE
  142. string_infix_coord_type operator() (const uint2 diagonal) const
  143. {
  144. const io::SequenceDataAccess<DNA_N> reads( m_reads );
  145. const uint32 read_id = diagonal.y;
  146. // fetch the read range
  147. return reads.get_range( read_id );
  148. }
  149. const io::ConstSequenceDataView m_reads;
  150. };
  151. // a functor to extract the genome infixes from the hit diagonals
  152. //
  153. template <uint32 BAND_LEN>
  154. struct genome_infixes
  155. {
  156. typedef const io::SequenceDataAccess<DNA_N,io::ConstSequenceDataView> read_access_type;
  157. // constructor
  158. NVBIO_HOST_DEVICE
  159. genome_infixes(const uint32 genome_len, const io::ConstSequenceDataView reads) :
  160. m_genome_len( genome_len ),
  161. m_reads( reads ) {}
  162. // functor operator
  163. NVBIO_HOST_DEVICE
  164. string_infix_coord_type operator() (const uint2 diagonal) const
  165. {
  166. const io::SequenceDataAccess<DNA_N> reads( m_reads );
  167. const uint32 read_id = diagonal.y;
  168. const uint32 text_pos = diagonal.x;
  169. // fetch the read range
  170. const uint2 read_range = reads.get_range( read_id );
  171. const uint32 read_len = read_range.y - read_range.x;
  172. // compute the segment of text to align to
  173. const uint32 genome_begin = text_pos > BAND_LEN/2 ? text_pos - BAND_LEN/2 : 0u;
  174. const uint32 genome_end = nvbio::min( genome_begin + read_len + BAND_LEN, m_genome_len );
  175. return make_uint2( genome_begin, genome_end );
  176. }
  177. const uint32 m_genome_len;
  178. const io::ConstSequenceDataView m_reads;
  179. };
  180. // a functor to extract the score from a sink
  181. //
  182. struct sink_score
  183. {
  184. typedef aln::BestSink<int16> argument_type;
  185. typedef int16 result_type;
  186. // functor operator
  187. NVBIO_HOST_DEVICE
  188. int16 operator() (const aln::BestSink<int16>& sink) const { return sink.score; }
  189. };
  190. // perform q-gram index mapping
  191. //
  192. void map(
  193. Pipeline& pipeline,
  194. const io::SequenceDataDevice& reads,
  195. nvbio::vector<device_tag,int16>& best_scores,
  196. Stats& stats)
  197. {
  198. typedef io::SequenceDataAccess<DNA> genome_access_type;
  199. typedef genome_access_type::sequence_stream_type genome_string;
  200. typedef io::SequenceDataAccess<DNA_N> read_access_type;
  201. typedef read_access_type::sequence_string_set_type read_string_set_type;
  202. typedef read_access_type::sequence_stream_type read_stream;
  203. typedef string_set_infix_coord_type infix_coord_type;
  204. typedef nvbio::vector<device_tag,infix_coord_type> infix_vector_type;
  205. typedef InfixSet<read_string_set_type, const string_set_infix_coord_type*> seed_string_set_type;
  206. // fetch the program options
  207. const Params& params = pipeline.params;
  208. // fetch the genome string
  209. const genome_access_type genome_access( *pipeline.ref_data );
  210. const uint32 genome_len = genome_access.bps();
  211. const genome_string genome( genome_access.sequence_stream() );
  212. // fetch an fm-index view
  213. const Pipeline::fm_index_type fm_index = pipeline.fm_data->index();
  214. // fetch the fm-index filter
  215. Pipeline::fm_filter_type& fm_filter = pipeline.fm_filter;
  216. // prepare some vectors to store the query qgrams
  217. infix_vector_type seed_coords;
  218. Timer timer;
  219. timer.start();
  220. const read_access_type reads_access( reads );
  221. const read_string_set_type read_string_set = reads_access.sequence_string_set();
  222. const seed_string_set_type seed_string_set = extract_seeds(
  223. read_string_set,
  224. params.seed_len,
  225. params.seed_intv,
  226. seed_coords );
  227. cudaDeviceSynchronize();
  228. timer.stop();
  229. const float extract_time = timer.seconds();
  230. stats.queries += seed_string_set.size();
  231. stats.extract_time += extract_time;
  232. //
  233. // search the sorted seeds with the FM-index filter
  234. //
  235. const uint32 batch_size = 16*1024*1024;
  236. typedef uint2 hit_type; // each hit will be an (index-pos,seed-id) coordinate pair
  237. // prepare storage for the output hits
  238. nvbio::vector<device_tag,hit_type> hits( batch_size );
  239. nvbio::vector<device_tag,uint32> out_reads( batch_size );
  240. nvbio::vector<device_tag,int16> out_scores( batch_size );
  241. nvbio::vector<device_tag,uint8> temp_storage;
  242. timer.start();
  243. // first step: rank the query seeds
  244. const uint64 n_hits = fm_filter.rank( fm_index, seed_string_set );
  245. cudaDeviceSynchronize();
  246. timer.stop();
  247. stats.rank_time += timer.seconds();
  248. stats.occurrences += n_hits;
  249. nvbio::vector<device_tag, aln::BestSink<int16> > sinks( batch_size );
  250. nvbio::vector<device_tag,string_infix_coord_type> genome_infix_coords( batch_size );
  251. nvbio::vector<device_tag,string_infix_coord_type> read_infix_coords( batch_size );
  252. static const uint32 BAND_LEN = 31;
  253. // loop through large batches of hits and locate & merge them
  254. for (uint64 hits_begin = 0; hits_begin < n_hits; hits_begin += batch_size)
  255. {
  256. const uint64 hits_end = nvbio::min( hits_begin + batch_size, n_hits );
  257. timer.start();
  258. fm_filter.locate(
  259. hits_begin,
  260. hits_end,
  261. hits.begin() );
  262. cudaDeviceSynchronize();
  263. timer.stop();
  264. stats.locate_time += timer.seconds();
  265. // transform the (index-pos,seed-id) hit coordinates into diagonals, (text-pos = index-pos - seed-pos, read-id)
  266. thrust::transform(
  267. hits.begin(),
  268. hits.begin() + hits_end - hits_begin,
  269. hits.begin(),
  270. hit_to_diagonal( nvbio::plain_view( seed_coords ) ) );
  271. timer.start();
  272. // build the set of read infixes
  273. thrust::transform(
  274. hits.begin(),
  275. hits.begin() + hits_end - hits_begin,
  276. read_infix_coords.begin(),
  277. read_infixes( nvbio::plain_view( reads ) ) );
  278. // build the set of genome infixes
  279. thrust::transform(
  280. hits.begin(),
  281. hits.begin() + hits_end - hits_begin,
  282. genome_infix_coords.begin(),
  283. genome_infixes<BAND_LEN>( genome_len, nvbio::plain_view( reads ) ) );
  284. typedef nvbio::vector<device_tag,string_infix_coord_type>::const_iterator infix_iterator;
  285. const SparseStringSet<read_stream,infix_iterator> read_infix_set(
  286. hits_end - hits_begin,
  287. reads_access.sequence_stream(),
  288. read_infix_coords.begin() );
  289. const SparseStringSet<genome_string,infix_iterator> genome_infix_set(
  290. hits_end - hits_begin,
  291. genome,
  292. genome_infix_coords.begin() );
  293. typedef aln::MyersTag<5u> myers_dna5_tag;
  294. //const aln::SimpleGotohScheme gotoh( 2, -2, -5, -3 );
  295. aln::batch_banded_alignment_score<BAND_LEN>(
  296. aln::make_edit_distance_aligner<aln::SEMI_GLOBAL, myers_dna5_tag>(),
  297. //aln::make_gotoh_aligner<aln::LOCAL>( gotoh ),
  298. read_infix_set,
  299. genome_infix_set,
  300. sinks.begin(),
  301. aln::DeviceThreadScheduler(),
  302. reads.max_sequence_len(),
  303. reads.max_sequence_len() + BAND_LEN );
  304. cudaDeviceSynchronize();
  305. timer.stop();
  306. stats.align_time += timer.seconds();
  307. // compute the best score for each read in this batch;
  308. // note that we divide the string-id by 2 to merge results coming from the forward
  309. // and reverse-complemented strands
  310. cuda::reduce_by_key(
  311. hits_end - hits_begin,
  312. thrust::make_transform_iterator(
  313. hits.begin(),
  314. make_composition_functor( divide_by_two(), component_functor<hit_type>( 1u ) ) ), // take the second component divided by 2
  315. thrust::make_transform_iterator( sinks.begin(), sink_score() ),
  316. out_reads.begin(),
  317. out_scores.begin(),
  318. thrust::maximum<int16>(),
  319. temp_storage );
  320. // and keep track of the global best
  321. update_scores(
  322. hits_end - hits_begin,
  323. nvbio::plain_view( out_reads ),
  324. nvbio::plain_view( out_scores ),
  325. nvbio::plain_view( best_scores ) );
  326. log_verbose(stderr, "\r processed %6.2f %% reads", 100.0f * float( hits_end ) / float( n_hits ));
  327. }
  328. log_verbose_cont(stderr, "\n");
  329. }
  330. // main test entry point
  331. //
  332. int main(int argc, char* argv[])
  333. {
  334. //
  335. // perform some basic option parsing
  336. //
  337. const uint32 batch_reads = 1*1024*1024;
  338. const uint32 batch_bps = 100*1024*1024;
  339. const char* reads = argv[argc-1];
  340. const char* index = argv[argc-2];
  341. Params params;
  342. params.seed_len = 22;
  343. params.seed_intv = 10;
  344. params.merge_intv = 16;
  345. uint32 max_reads = uint32(-1);
  346. int16 score_threshold = -20;
  347. for (int i = 0; i < argc; ++i)
  348. {
  349. if (strcmp( argv[i], "-s" ) == 0)
  350. {
  351. params.seed_len = uint32( atoi( argv[++i] ) );
  352. params.seed_intv = uint32( atoi( argv[++i] ) );
  353. }
  354. if (strcmp( argv[i], "-m" ) == 0)
  355. params.merge_intv = uint32( atoi( argv[++i] ) );
  356. else if (strcmp( argv[i], "-max-reads" ) == 0)
  357. max_reads = uint32( atoi( argv[++i] ) );
  358. else if (strcmp( argv[i], "-t" ) == 0)
  359. score_threshold = int16( atoi( argv[++i] ) );
  360. }
  361. io::SequenceDataHost h_ref;
  362. if (!io::load_sequence_file( DNA, &h_ref, index ))
  363. {
  364. log_error(stderr, " failed loading reference \"%s\"\n", index);
  365. return 1u;
  366. }
  367. io::FMIndexDataHost h_fmi;
  368. if (!h_fmi.load( index, io::FMIndexData::FORWARD | io::FMIndexData::SA ))
  369. {
  370. log_error(stderr, " failed loading index \"%s\"\n", index);
  371. return 1u;
  372. }
  373. Pipeline pipeline;
  374. // store the program options
  375. pipeline.params = params;
  376. // build its device version
  377. pipeline.ref_data = new io::SequenceDataDevice( h_ref );
  378. // build its device version
  379. pipeline.fm_data = new io::FMIndexDataDevice( h_fmi, io::FMIndexData::FORWARD |
  380. io::FMIndexData::SA );
  381. // open a read file
  382. log_info(stderr, " opening reads file... started\n");
  383. SharedPointer<io::SequenceDataStream> read_data_file(
  384. io::open_sequence_file(
  385. reads,
  386. io::Phred33,
  387. 2*max_reads,
  388. uint32(-1),
  389. io::SequenceEncoding( io::FORWARD | io::REVERSE_COMPLEMENT ) ) );
  390. // check whether the file opened correctly
  391. if (read_data_file == NULL || read_data_file->is_ok() == false)
  392. {
  393. log_error(stderr, " failed opening file \"%s\"\n", reads);
  394. return 1u;
  395. }
  396. log_info(stderr, " opening reads file... done\n");
  397. // keep stats
  398. Stats stats;
  399. io::SequenceDataHost h_read_data;
  400. while (1)
  401. {
  402. // load a batch of reads
  403. if (io::next( DNA_N, &h_read_data, read_data_file.get(), batch_reads, batch_bps ) == 0)
  404. break;
  405. log_info(stderr, " loading reads... started\n");
  406. // copy it to the device
  407. const io::SequenceDataDevice d_read_data = h_read_data;
  408. const uint32 n_reads = d_read_data.size() / 2;
  409. log_info(stderr, " loading reads... done\n");
  410. log_info(stderr, " %u reads\n", n_reads);
  411. const int16 worst_score = Field_traits<int16>::min();
  412. nvbio::vector<device_tag,int16> best_scores( n_reads, worst_score );
  413. nvbio::vector<device_tag,uint8> temp_storage;
  414. Timer timer;
  415. timer.start();
  416. map(
  417. pipeline,
  418. d_read_data,
  419. best_scores,
  420. stats );
  421. timer.stop();
  422. const float time = timer.seconds();
  423. // accumulate the number of aligned reads
  424. stats.reads += n_reads;
  425. stats.time += time;
  426. // count how many reads have a score >= score_threshold
  427. const uint32 n_aligned = cuda::reduce(
  428. n_reads,
  429. thrust::make_transform_iterator( best_scores.begin(), above_threshold( score_threshold ) ),
  430. thrust::plus<uint32>(),
  431. temp_storage );
  432. stats.aligned += n_aligned;
  433. log_info(stderr, " aligned %6.2f %% reads (%6.2f K reads/s)\n", 100.0f * float( stats.aligned ) / float( stats.reads ), (1.0e-3f * float( stats.reads )) / stats.time);
  434. log_verbose(stderr, " breakdown:\n");
  435. log_verbose(stderr, " extract throughput : %.2f B seeds/s\n", (1.0e-9f * float( stats.queries )) / stats.extract_time);
  436. log_verbose(stderr, " rank throughput : %6.2f K reads/s\n", (1.0e-3f * float( stats.reads )) / stats.rank_time);
  437. log_verbose(stderr, " : %6.2f B seeds/s\n", (1.0e-9f * float( stats.queries )) / stats.rank_time);
  438. log_verbose(stderr, " locate throughput : %6.2f K reads/s\n", (1.0e-3f * float( stats.reads )) / stats.locate_time);
  439. log_verbose(stderr, " align throughput : %6.2f K reads/s\n", (1.0e-3f * float( stats.reads )) / stats.align_time);
  440. log_verbose(stderr, " : %6.2f M hits/s\n", (1.0e-6f * float( stats.occurrences )) / stats.align_time);
  441. log_verbose(stderr, " occurrences : %.3f B\n", 1.0e-9f * float( stats.occurrences ) );
  442. }
  443. return 0;
  444. }