123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515 |
- /*
- * nvbio
- * Copyright (c) 2011-2014, NVIDIA CORPORATION. All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- * * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of the NVIDIA CORPORATION nor the
- * names of its contributors may be used to endorse or promote products
- * derived from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
- * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
- * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
- * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
- * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
- * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
- * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
- * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
- // qmap.cu
- //
- #include <stdio.h>
- #include <stdlib.h>
- #include <nvbio/basic/timer.h>
- #include <nvbio/basic/console.h>
- #include <nvbio/basic/vector.h>
- #include <nvbio/basic/shared_pointer.h>
- #include <nvbio/basic/dna.h>
- #include <nvbio/strings/string_set.h>
- #include <nvbio/strings/infix.h>
- #include <nvbio/strings/seeds.h>
- #include <nvbio/qgram/qgram.h>
- #include <nvbio/qgram/filter.h>
- #include <nvbio/io/sequence/sequence.h>
- #include "alignment.h"
- #include "util.h"
- using namespace nvbio;
- // query stats
- //
- struct Stats
- {
- Stats() :
- time(0),
- build_time(0),
- extract_time(0),
- rank_time(0),
- locate_time(0),
- align_time(0),
- reads(0),
- aligned(0),
- queries(0),
- matches(0),
- occurrences(0),
- merged(0) {}
- float time;
- float build_time;
- float extract_time;
- float rank_time;
- float locate_time;
- float align_time;
- uint64 reads;
- uint64 aligned;
- uint64 queries;
- uint64 matches;
- uint64 occurrences;
- uint64 merged;
- };
- // build a set of q-grams from a given string, together with their sorted counterpart
- //
- template <typename genome_string, typename qgram_vector_type, typename index_vector_type>
- void build_qgrams(
- const uint32 Q,
- const uint32 genome_len,
- const uint32 genome_offset,
- const genome_string genome,
- const uint32 n_queries,
- qgram_vector_type& qgrams,
- qgram_vector_type& sorted_qgrams,
- index_vector_type& sorted_indices)
- {
- // build the q-grams
- qgrams.resize( n_queries );
- generate_qgrams( Q, 2u, genome_len, genome, n_queries, thrust::make_counting_iterator<uint32>(genome_offset), qgrams.begin() );
- // sort the q-grams
- sorted_qgrams = qgrams;
- sorted_indices.resize( n_queries );
- thrust::copy(
- thrust::make_counting_iterator<uint32>(genome_offset),
- thrust::make_counting_iterator<uint32>(genome_offset) + n_queries,
- sorted_indices.begin() );
- thrust::sort_by_key( sorted_qgrams.begin(), sorted_qgrams.end(), sorted_indices.begin() );
- }
- // build a q-gram set-index from a string-set
- //
- template <typename string_set_type>
- void qgram_set_index_build(
- const uint32 Q,
- const uint32 seed_interval,
- const string_set_type string_set,
- QGramSetIndexDevice& qgram_index)
- {
- log_verbose(stderr, " building q-gram set-index... started\n");
- Timer timer;
- timer.start();
- // build the q-gram set index
- qgram_index.build(
- Q, // q-gram size
- 2u, // implicitly convert N to A
- string_set,
- uniform_seeds_functor<>( Q, seed_interval ),
- 12u );
- cudaDeviceSynchronize();
- timer.stop();
- const float time = timer.seconds();
- log_verbose(stderr, " building q-gram set-index... done\n");
- log_verbose(stderr, " indexed q-grams : %6.2f M q-grams\n", 1.0e-6f * float( qgram_index.n_qgrams ));
- log_verbose(stderr, " unique q-grams : %6.2f M q-grams\n", 1.0e-6f * float( qgram_index.n_unique_qgrams ));
- log_verbose(stderr, " throughput : %5.1f M q-grams/s\n", 1.0e-6f * float( qgram_index.n_qgrams ) / time);
- log_verbose(stderr, " memory usage : %5.1f MB\n", float( qgram_index.used_device_memory() ) / float(1024*1024) );
- }
- // perform q-gram index mapping
- //
- template <typename qgram_index_type, typename qgram_filter_type, typename genome_string>
- void map(
- qgram_index_type& qgram_index,
- qgram_filter_type& qgram_filter,
- const uint32 merge_intv,
- const io::SequenceDataDevice& reads,
- const uint32 n_queries,
- const uint32 genome_len,
- const uint32 genome_offset,
- const genome_string genome,
- nvbio::vector<device_tag,int16>& best_scores,
- Stats& stats)
- {
- typedef typename qgram_index_type::system_tag system_tag;
- // prepare some vectors to store the query qgrams
- nvbio::vector<system_tag,uint64> qgrams( n_queries );
- nvbio::vector<system_tag,uint64> sorted_qgrams( n_queries );
- nvbio::vector<system_tag,uint32> sorted_indices( n_queries );
- const uint32 Q = qgram_index.Q;
- Timer timer;
- timer.start();
- build_qgrams(
- Q,
- genome_len,
- genome_offset,
- genome,
- n_queries,
- qgrams,
- sorted_qgrams,
- sorted_indices );
- cudaDeviceSynchronize();
- timer.stop();
- const float extract_time = timer.seconds();
- stats.queries += n_queries;
- stats.extract_time += extract_time;
- //
- // search the sorted query q-grams with a q-gram filter
- //
- const uint32 batch_size = 32*1024*1024;
- typedef typename qgram_filter_type::hit_type hit_type;
- typedef typename qgram_filter_type::diagonal_type diagonal_type;
- // prepare storage for the output hits
- nvbio::vector<system_tag,hit_type> hits( batch_size );
- nvbio::vector<system_tag,diagonal_type> merged_hits( batch_size );
- nvbio::vector<system_tag,uint16> merged_counts( batch_size );
- nvbio::vector<system_tag,int16> scores( batch_size );
- nvbio::vector<system_tag,uint32> out_reads( batch_size );
- nvbio::vector<system_tag,int16> out_scores( batch_size );
- nvbio::vector<system_tag,uint8> temp_storage;
- timer.start();
- // first step: rank the query q-grams
- const uint64 n_hits = qgram_filter.rank(
- qgram_index,
- n_queries,
- nvbio::raw_pointer( sorted_qgrams ),
- nvbio::raw_pointer( sorted_indices ) );
- cudaDeviceSynchronize();
- timer.stop();
- stats.rank_time += timer.seconds();
- stats.occurrences += n_hits;
- nvbio::vector<device_tag, aln::BestSink<int16> > sinks( batch_size );
- nvbio::vector<device_tag,string_infix_coord_type> genome_infix_coords( batch_size );
- nvbio::vector<device_tag,string_infix_coord_type> read_infix_coords( batch_size );
- const static uint32 BAND_LEN = 31;
- // loop through large batches of hits and locate & merge them
- for (uint64 hits_begin = 0; hits_begin < n_hits; hits_begin += batch_size)
- {
- typedef io::SequenceDataAccess<DNA_N> read_access_type;
- typedef read_access_type::sequence_string_set_type read_string_set_type;
- typedef read_access_type::sequence_stream_type read_stream;
- // build an access pointer to the sequence data
- const read_access_type reads_access( reads );
- const uint64 hits_end = nvbio::min( hits_begin + batch_size, n_hits );
- timer.start();
- qgram_filter.locate(
- hits_begin,
- hits_end,
- hits.begin() );
- const uint32 n_merged = qgram_filter.merge(
- merge_intv,
- hits_end - hits_begin,
- hits.begin(),
- merged_hits.begin(),
- merged_counts.begin() );
- cudaDeviceSynchronize();
- timer.stop();
- stats.locate_time += timer.seconds();
- stats.merged += n_merged;
- timer.start();
- // build the set of read infixes
- thrust::transform(
- merged_hits.begin(),
- merged_hits.begin() + hits_end - hits_begin,
- read_infix_coords.begin(),
- read_infixes( nvbio::plain_view( reads ) ) );
- // build the set of genome infixes
- thrust::transform(
- merged_hits.begin(),
- merged_hits.begin() + hits_end - hits_begin,
- genome_infix_coords.begin(),
- genome_infixes<BAND_LEN>( genome_len, nvbio::plain_view( reads ) ) );
- typedef nvbio::vector<device_tag,string_infix_coord_type>::const_iterator infix_iterator;
- // build a view of the reads
- const SparseStringSet<read_stream,infix_iterator> read_infix_set(
- hits_end - hits_begin,
- reads_access.sequence_stream(),
- read_infix_coords.begin() );
- const SparseStringSet<genome_string,infix_iterator> genome_infix_set(
- hits_end - hits_begin,
- genome,
- genome_infix_coords.begin() );
- typedef aln::MyersTag<5u> myers_dna5_tag;
- aln::batch_banded_alignment_score<BAND_LEN>(
- aln::make_edit_distance_aligner<aln::SEMI_GLOBAL, myers_dna5_tag>(),
- read_infix_set,
- genome_infix_set,
- sinks.begin(),
- aln::DeviceThreadScheduler(),
- reads.max_sequence_len(),
- reads.max_sequence_len() + BAND_LEN );
- cudaDeviceSynchronize();
- timer.stop();
- stats.align_time += timer.seconds();
- // compute the best score for each read in this batch;
- // note that we divide the string-id by 2 to merge results coming from the forward
- // and reverse-complemented strands
- cuda::reduce_by_key(
- n_merged,
- thrust::make_transform_iterator(
- merged_hits.begin(),
- make_composition_functor( divide_by_two(), component_functor<diagonal_type>( 1u ) ) ), // take the second component divided by 2
- thrust::make_transform_iterator( sinks.begin(), sink_score() ),
- out_reads.begin(),
- out_scores.begin(),
- thrust::maximum<int16>(),
- temp_storage );
- // and keep track of the global best
- update_scores(
- n_merged,
- nvbio::plain_view( out_reads ),
- nvbio::plain_view( out_scores ),
- nvbio::plain_view( best_scores ) );
- }
- }
- // main test entry point
- //
- int main(int argc, char* argv[])
- {
- //
- // perform some basic option parsing
- //
- const uint32 batch_reads = 1*1024*1024;
- const uint32 batch_bps = 100*1024*1024;
- const uint32 queries_batch = 16*1024*1024;
- const char* reads = argv[argc-1];
- const char* index = argv[argc-2];
- uint32 Q = 20;
- uint32 Q_intv = 10;
- uint32 merge_intv = 16;
- uint32 max_reads = uint32(-1);
- int16 score_threshold = -20;
- for (int i = 0; i < argc; ++i)
- {
- if (strcmp( argv[i], "-q" ) == 0)
- {
- Q = uint32( atoi( argv[++i] ) );
- Q_intv = uint32( atoi( argv[++i] ) );
- }
- if (strcmp( argv[i], "-m" ) == 0)
- merge_intv = uint32( atoi( argv[++i] ) );
- else if (strcmp( argv[i], "-max-reads" ) == 0)
- max_reads = uint32( atoi( argv[++i] ) );
- else if (strcmp( argv[i], "-t" ) == 0)
- score_threshold = int16( atoi( argv[++i] ) );
- }
- log_info(stderr, "qmap... started\n");
- // load a genome archive...
- log_visible(stderr, " loading reference index ... started\n");
- log_info(stderr, " file: \"%s\"\n", index);
- io::SequenceDataHost h_genome_data;
- if (io::load_sequence_file( DNA, &h_genome_data, index ) == false)
- {
- log_error(stderr, " failed loading index \"%s\"\n", index);
- return 1u;
- }
- log_visible(stderr, " loading reference index ... done\n");
- log_verbose(stderr, " sequences : %u\n", h_genome_data.size() );
- log_verbose(stderr, " bps : %u\n", h_genome_data.bps() );
- log_verbose(stderr, " avg bps : %u (min: %u, max: %u)\n",
- h_genome_data.avg_sequence_len(),
- h_genome_data.min_sequence_len(),
- h_genome_data.max_sequence_len() );
- // build its device version
- const io::SequenceDataDevice d_genome_data( h_genome_data );
- const io::SequenceDataAccess<DNA> d_genome_access( d_genome_data );
- typedef io::SequenceDataAccess<DNA>::sequence_stream_type genome_type;
- // fetch the genome string
- const uint32 genome_len = d_genome_data.bps();
- const genome_type d_genome( d_genome_access.sequence_stream() );
- // open a read file
- log_info(stderr, " opening reads file... started\n");
- SharedPointer<io::SequenceDataStream> read_data_file(
- io::open_sequence_file(
- reads,
- io::Phred33,
- 2*max_reads,
- uint32(-1),
- io::SequenceEncoding( io::FORWARD | io::REVERSE_COMPLEMENT ) ) );
- // check whether the file opened correctly
- if (read_data_file == NULL || read_data_file->is_ok() == false)
- {
- log_error(stderr, " failed opening file \"%s\"\n", reads);
- return 1u;
- }
- log_info(stderr, " opening reads file... done\n");
- // keep stats
- Stats stats;
- io::SequenceDataHost h_read_data;
- while (1)
- {
- // load a batch of reads
- if (io::next( DNA_N, &h_read_data, read_data_file.get(), batch_reads, batch_bps ) == 0)
- break;
- log_info(stderr, " loading reads... started\n");
- // copy it to the device
- const io::SequenceDataDevice d_read_data( h_read_data );
- const io::SequenceDataAccess<DNA_N> d_read_access( d_read_data );
- const uint32 n_reads = d_read_data.size() / 2;
- log_info(stderr, " loading reads... done\n");
- log_info(stderr, " %u reads\n", n_reads);
- // prepare some typedefs for the involved string-sets and infixes
- typedef io::SequenceDataAccess<DNA_N> read_access_type; // the read view type
- typedef read_access_type::sequence_string_set_type string_set_type; // the read string-set
- typedef string_set_infix_coord_type infix_coord_type; // the infix coordinate type, for string-sets
- typedef nvbio::vector<device_tag,infix_coord_type> infix_vector_type; // the device vector type for infix coordinates
- typedef InfixSet<string_set_type, const string_set_infix_coord_type*> seed_set_type; // the infix-set type for representing seeds
- // fetch the actual read string-set
- const string_set_type d_read_string_set = d_read_access.sequence_string_set();
- // build the q-gram index
- QGramSetIndexDevice qgram_index;
- qgram_set_index_build(
- Q,
- Q_intv,
- d_read_string_set,
- qgram_index );
- typedef QGramFilterDevice<QGramSetIndexDevice,const uint64*,const uint32*> qgram_filter_type;
- qgram_filter_type qgram_filter;
- float time = 0.0f;
- const int16 worst_score = Field_traits<int16>::min();
- nvbio::vector<device_tag,int16> best_scores( n_reads, worst_score );
- nvbio::vector<device_tag,uint8> temp_storage;
- // stream through the genome
- for (uint32 genome_begin = 0; genome_begin < genome_len; genome_begin += queries_batch)
- {
- const uint32 genome_end = nvbio::min( genome_begin + queries_batch, genome_len );
- Timer timer;
- timer.start();
- map(
- qgram_index,
- qgram_filter,
- merge_intv,
- d_read_data,
- genome_end - genome_begin,
- genome_len,
- genome_begin,
- d_genome,
- best_scores,
- stats );
- cudaDeviceSynchronize();
- timer.stop();
- time += timer.seconds();
- const float genome_ratio = float( genome_end ) / float( genome_len );
- log_verbose(stderr, "\r aligned %5.2f%% of genome (%6.2f K reads/s)", 100.0f * genome_ratio, (1.0e-3f * n_reads) * genome_ratio / time );
- }
- log_verbose_cont(stderr, "\n");
- // accumulate the number of aligned reads
- stats.reads += n_reads;
- stats.time += time;
- // count how many reads have a score >= score_threshold
- const uint32 n_aligned = cuda::reduce(
- n_reads,
- thrust::make_transform_iterator( best_scores.begin(), above_threshold( score_threshold ) ),
- thrust::plus<uint32>(),
- temp_storage );
- stats.aligned += n_aligned;
- log_info(stderr, " aligned %6.2f %% reads (%6.2f K reads/s)\n", 100.0f * float( stats.aligned ) / float( stats.reads ), (1.0e-3f * float( stats.reads )) / stats.time);
- log_verbose(stderr, " breakdown:\n");
- log_verbose(stderr, " extract throughput : %.2f B q-grams/s\n", (1.0e-9f * float( stats.queries )) / stats.extract_time);
- log_verbose(stderr, " rank throughput : %6.2f K reads/s\n", (1.0e-3f * float( stats.reads )) / stats.rank_time);
- log_verbose(stderr, " : %6.2f B seeds/s\n", (1.0e-9f * float( stats.queries )) / stats.rank_time);
- log_verbose(stderr, " locate throughput : %6.2f K reads/s\n", (1.0e-3f * float( stats.reads )) / stats.locate_time);
- log_verbose(stderr, " align throughput : %6.2f K reads/s\n", (1.0e-3f * float( stats.reads )) / stats.align_time);
- log_verbose(stderr, " : %6.2f M hits/s\n", (1.0e-6f * float( stats.merged )) / stats.align_time);
- log_verbose(stderr, " occurrences : %.3f B\n", 1.0e-9f * float( stats.occurrences ) );
- log_verbose(stderr, " merged occurrences : %.3f B (%.1f %%)\n", 1.0e-9f * float( stats.merged ), 100.0f * float(stats.merged)/float(stats.occurrences));
- }
- log_info(stderr, "qmap... done\n");
- return 0;
- }
|