123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733 |
- /*
- * nvbio
- * Copyright (c) 2011-2014, NVIDIA CORPORATION. All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- * * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of the NVIDIA CORPORATION nor the
- * names of its contributors may be used to endorse or promote products
- * derived from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
- * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
- * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
- * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
- * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
- * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
- * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
- * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
- // qgram_test.cu
- //
- //#define CUFMI_CUDA_DEBUG
- //#define CUFMI_CUDA_ASSERTS
- #include <stdio.h>
- #include <stdlib.h>
- #include <vector>
- #include <algorithm>
- #include <nvbio/basic/timer.h>
- #include <nvbio/basic/console.h>
- #include <nvbio/basic/vector.h>
- #include <nvbio/basic/packedstream.h>
- #include <nvbio/strings/string_set.h>
- #include <nvbio/strings/seeds.h>
- #include <nvbio/basic/shared_pointer.h>
- #include <nvbio/io/sequence/sequence.h>
- #include <nvbio/qgram/qgram.h>
- #include <nvbio/qgram/qgroup.h>
- #include <nvbio/qgram/filter.h>
- #if defined(_OPENMP)
- #include <omp.h>
- #endif
- namespace nvbio {
- // return the size of a given range
- struct range_size
- {
- typedef uint2 argument_type;
- typedef uint32 result_type;
- NVBIO_FORCEINLINE NVBIO_HOST_DEVICE
- uint32 operator() (const uint2 range) const { return range.y - range.x; }
- };
- // return 1 for non-empty ranges, 0 otherwise
- struct valid_range
- {
- typedef uint2 argument_type;
- typedef uint32 result_type;
- NVBIO_FORCEINLINE NVBIO_HOST_DEVICE
- uint32 operator() (const uint2 range) const { return range.y - range.x > 0 ? 1u : 0u; }
- };
- // query stats
- //
- struct Stats
- {
- Stats() :
- build_time(0),
- unsorted_time(0),
- sorted_time(0),
- filter_time(0),
- merge_time(0),
- queries(0),
- matches(0),
- occurrences(0),
- merged(0) {}
- float build_time;
- float unsorted_time;
- float sorted_time;
- float filter_time;
- float merge_time;
- uint64 queries;
- uint64 matches;
- uint64 occurrences;
- uint64 merged;
- };
- // build a set of q-grams from a given string, together with their sorted counterpart
- //
- template <typename genome_string, typename qgram_vector_type, typename index_vector_type>
- void build_qgrams(
- const uint32 Q,
- const uint32 genome_len,
- const uint32 genome_offset,
- const genome_string genome,
- const uint32 n_queries,
- qgram_vector_type& qgrams,
- qgram_vector_type& sorted_qgrams,
- index_vector_type& sorted_indices)
- {
- // build the q-grams
- qgrams.resize( n_queries );
- generate_qgrams( Q, 2u, genome_len, genome, n_queries, thrust::make_counting_iterator<uint32>(genome_offset), qgrams.begin() );
- // sort the q-grams
- sorted_qgrams = qgrams;
- sorted_indices.resize( n_queries );
- thrust::copy(
- thrust::make_counting_iterator<uint32>(genome_offset),
- thrust::make_counting_iterator<uint32>(genome_offset) + n_queries,
- sorted_indices.begin() );
- thrust::sort_by_key( sorted_qgrams.begin(), sorted_qgrams.end(), sorted_indices.begin() );
- }
- // build a q-gram index from a string
- //
- template <typename string_type>
- void test_qgram_index_build(
- const uint32 Q,
- const uint32 string_len,
- const string_type string,
- QGramIndexDevice& qgram_index)
- {
- log_verbose(stderr, " building q-gram index... started\n");
- Timer timer;
- timer.start();
- // build the q-gram index
- qgram_index.build(
- Q, // q-gram size
- 2u, // implicitly convert N to A
- string_len,
- string,
- 12u );
- cudaDeviceSynchronize();
- timer.stop();
- const float time = timer.seconds();
- log_verbose(stderr, " building q-gram index... done\n");
- log_verbose(stderr, " indexed q-grams : %6.2f M q-grams\n", 1.0e-6f * float( qgram_index.n_qgrams ));
- log_verbose(stderr, " unique q-grams : %6.2f M q-grams\n", 1.0e-6f * float( qgram_index.n_unique_qgrams ));
- log_verbose(stderr, " throughput : %5.1f M q-grams/s\n", 1.0e-6f * float( string_len ) / time);
- log_verbose(stderr, " memory usage : %5.1f MB\n", float( qgram_index.used_device_memory() ) / float(1024*1024) );
- log_verbose(stderr, " querying q-gram index... started\n");
- }
- // build a q-gram set-index from a string-set
- //
- template <typename string_set_type>
- void test_qgram_set_index_build(
- const uint32 Q,
- const string_set_type string_set,
- QGramSetIndexDevice& qgram_index)
- {
- log_verbose(stderr, " building q-gram set-index... started\n");
- Timer timer;
- timer.start();
- // build the q-gram set index
- qgram_index.build(
- Q, // q-gram size
- 2u, // implicitly convert N to A
- string_set,
- uniform_seeds_functor<>( Q, 10u ),
- 12u );
- cudaDeviceSynchronize();
- timer.stop();
- const float time = timer.seconds();
- log_verbose(stderr, " building q-gram set-index... done\n");
- log_verbose(stderr, " indexed q-grams : %6.2f M q-grams\n", 1.0e-6f * float( qgram_index.n_qgrams ));
- log_verbose(stderr, " unique q-grams : %6.2f M q-grams\n", 1.0e-6f * float( qgram_index.n_unique_qgrams ));
- log_verbose(stderr, " throughput : %5.1f M q-grams/s\n", 1.0e-6f * float( qgram_index.n_qgrams ) / time);
- log_verbose(stderr, " memory usage : %5.1f MB\n", float( qgram_index.used_device_memory() ) / float(1024*1024) );
- }
- // build a q-group index from a string
- //
- template <typename string_type>
- void test_qgroup_index_build(
- const uint32 Q,
- const uint32 string_len,
- const string_type string,
- QGroupIndexDevice& qgram_index)
- {
- log_verbose(stderr, " building q-group index... started\n");
- Timer timer;
- timer.start();
- // build the q-group index
- qgram_index.build(
- Q, // q-group size
- 2u, // implicitly convert N to A
- string_len,
- string );
- cudaDeviceSynchronize();
- timer.stop();
- const float time = timer.seconds();
- log_verbose(stderr, " building q-group index... done\n");
- log_verbose(stderr, " indexed q-grams : %6.2f M q-grams\n", 1.0e-6f * float( qgram_index.n_qgrams ));
- log_verbose(stderr, " unique q-grams : %6.2f M q-grams\n", 1.0e-6f * float( qgram_index.n_unique_qgrams ));
- log_verbose(stderr, " throughput : %5.1f M q-grams/s\n", 1.0e-6f * float( string_len ) / time);
- log_verbose(stderr, " memory usage : %5.1f MB\n", float( qgram_index.used_device_memory() ) / float(1024*1024) );
- log_verbose(stderr, " querying q-group index... started\n");
- }
- // test a generic q-gram index query, both using plain queries and with a q-gram filter
- //
- template <typename qgram_index_type, typename genome_string>
- void test_qgram_index_query(
- qgram_index_type& qgram_index,
- const uint32 n_queries,
- const uint32 genome_len,
- const uint32 genome_offset,
- const genome_string genome,
- Stats& stats)
- {
- const uint32 Q = qgram_index.Q;
- typedef typename qgram_index_type::system_tag system_tag;
- // prepare some vectors to store the query qgrams
- nvbio::vector<system_tag,uint64> qgrams( n_queries );
- nvbio::vector<system_tag,uint64> sorted_qgrams( n_queries );
- nvbio::vector<system_tag,uint32> sorted_indices( n_queries );
- build_qgrams(
- Q,
- genome_len,
- genome_offset,
- genome,
- n_queries,
- qgrams,
- sorted_qgrams,
- sorted_indices );
- // prepare a vector to store the query results
- nvbio::vector<system_tag,uint2> ranges( n_queries );
- log_verbose(stderr, " querying q-gram index... started\n");
- Timer timer;
- timer.start();
- // search the query q-grams in the index
- thrust::transform(
- qgrams.begin(),
- qgrams.begin() + n_queries,
- ranges.begin(),
- nvbio::plain_view( qgram_index ) );
- cudaDeviceSynchronize();
- timer.stop();
- const float unsorted_time = timer.seconds();
- timer.start();
- // and now repeat the same operation with the sorted q-grams
- thrust::transform(
- sorted_qgrams.begin(),
- sorted_qgrams.begin() + n_queries,
- ranges.begin(),
- nvbio::plain_view( qgram_index ) );
- cudaDeviceSynchronize();
- timer.stop();
- const float sorted_time = timer.seconds();
- const uint32 n_occurrences = thrust::reduce(
- thrust::make_transform_iterator( ranges.begin(), range_size() ),
- thrust::make_transform_iterator( ranges.begin(), range_size() ) + n_queries );
- const uint32 n_matches = thrust::reduce(
- thrust::make_transform_iterator( ranges.begin(), valid_range() ),
- thrust::make_transform_iterator( ranges.begin(), valid_range() ) + n_queries );
- stats.queries += n_queries;
- stats.unsorted_time += unsorted_time;
- stats.sorted_time += sorted_time;
- stats.matches += n_matches;
- stats.occurrences += n_occurrences;
- log_verbose(stderr, " querying q-gram index... done\n");
- log_verbose(stderr, " unsorted throughput : %.2f B q-grams/s\n", (1.0e-9f * float( stats.queries )) / stats.unsorted_time);
- log_verbose(stderr, " sorted throughput : %.2f B q-grams/s\n", (1.0e-9f * float( stats.queries )) / stats.sorted_time);
- log_verbose(stderr, " matches : %.2f M\n", 1.0e-6f * float( stats.matches ) );
- log_verbose(stderr, " occurrences : %.3f B\n", 1.0e-9f * float( stats.occurrences ) );
- log_verbose(stderr, " q-gram filter... started\n");
- //
- // search the sorted query q-grams with a q-gram filter
- //
- const uint32 batch_size = 16*1024*1024;
- typedef QGramFilter<system_tag,qgram_index_type,const uint64*,const uint32*> qgram_filter_type;
- typedef typename qgram_filter_type::hit_type hit_type;
- typedef typename qgram_filter_type::diagonal_type diagonal_type;
- // prepare storage for the output hits
- nvbio::vector<system_tag,hit_type> hits( batch_size );
- nvbio::vector<system_tag,diagonal_type> merged_hits( batch_size );
- nvbio::vector<system_tag,uint16> merged_counts( batch_size );
- qgram_filter_type qgram_filter;
- timer.start();
- // first step: rank the query q-grams
- const uint32 n_hits = qgram_filter.rank(
- qgram_index,
- n_queries,
- nvbio::raw_pointer( sorted_qgrams ),
- nvbio::raw_pointer( sorted_indices ) );
- if (n_hits != n_occurrences)
- {
- log_error(stderr, " mismatching number of hits: expected %u, got %u\n", n_occurrences, n_hits);
- exit(1);
- }
- // loop through large batches of hits and locate them
- for (uint32 hits_begin = 0; hits_begin < n_hits; hits_begin += batch_size)
- {
- const uint32 hits_end = nvbio::min( hits_begin + batch_size, n_hits );
- qgram_filter.locate(
- hits_begin,
- hits_end,
- hits.begin() );
- }
- cudaDeviceSynchronize();
- timer.stop();
- const float filter_time = timer.seconds();
- stats.filter_time += filter_time;
- timer.start();
- // loop through large batches of hits and locate & merge them
- for (uint32 hits_begin = 0; hits_begin < n_hits; hits_begin += batch_size)
- {
- const uint32 hits_end = nvbio::min( hits_begin + batch_size, n_hits );
- qgram_filter.locate(
- hits_begin,
- hits_end,
- hits.begin() );
- const uint32 n_merged = qgram_filter.merge(
- 16u,
- hits_end - hits_begin,
- hits.begin(),
- merged_hits.begin(),
- merged_counts.begin() );
- stats.merged += n_merged;
- }
- cudaDeviceSynchronize();
- timer.stop();
- const float merge_time = timer.seconds();
- stats.merge_time += merge_time;
- log_verbose(stderr, " q-gram filter... done\n");
- log_verbose(stderr, " filter throughput : %.2f M q-grams/s\n", (1.0e-6f * float( stats.queries )) / stats.filter_time);
- log_verbose(stderr, " merge throughput : %.2f M q-grams/s\n", (1.0e-6f * float( stats.queries )) / stats.merge_time);
- log_verbose(stderr, " merged occurrences : %.3f B (%.1f %%)\n", 1.0e-9f * float( stats.merged ), 100.0f * float(stats.merged)/float(stats.occurrences));
- }
- enum QGramTest
- {
- ALL = 0xFFFFFFFFu,
- QGRAM_INDEX = 1u,
- QGRAM_SET_INDEX = 2u,
- QGROUP_INDEX = 4u,
- };
- // main test entry point
- //
- int qgram_test(int argc, char* argv[])
- {
- uint32 TEST_MASK = 0xFFFFFFFFu;
- uint32 n_qgrams = 10000000;
- uint32 n_queries = 10000000;
- uint32 queries_batch = 10000000;
- bool device_test = true;
- bool host_test = true;
- const char* reads = "./data/SRR493095_1.fastq.gz";
- const char* index = "./data/human.NCBI36/Homo_sapiens.NCBI36.53.dna.toplevel.fa";
- for (int i = 0; i < argc; ++i)
- {
- if (strcmp( argv[i], "-qgrams" ) == 0)
- n_qgrams = uint32( atoi( argv[++i] ) )*1000u;
- else if (strcmp( argv[i], "-queries" ) == 0)
- n_queries = uint32( atoi( argv[++i] ) )*1000u;
- else if (strcmp( argv[i], "-batch" ) == 0)
- queries_batch = uint32( atoi( argv[++i] ) )*1000u;
- else if (strcmp( argv[i], "-reads" ) == 0)
- reads = argv[++i];
- else if (strcmp( argv[i], "-index" ) == 0)
- index = argv[++i];
- else if (strcmp( argv[i], "-no-device" ) == 0)
- device_test = false;
- else if (strcmp( argv[i], "-no-host" ) == 0)
- host_test = false;
- else if (strcmp( argv[i], "-tests" ) == 0)
- {
- const std::string tests_string( argv[++i] );
- char temp[256];
- const char* begin = tests_string.c_str();
- const char* end = begin;
- TEST_MASK = 0u;
- while (1)
- {
- while (*end != ':' && *end != '\0')
- {
- temp[end - begin] = *end;
- end++;
- }
- temp[end - begin] = '\0';
- if (strcmp( temp, "qgram" ) == 0)
- TEST_MASK |= QGRAM_INDEX;
- else if (strcmp( temp, "qgram-set" ) == 0)
- TEST_MASK |= QGRAM_SET_INDEX;
- else if (strcmp( temp, "qgroup" ) == 0)
- TEST_MASK |= QGROUP_INDEX;
- if (*end == '\0')
- break;
- ++end; begin = end;
- }
- }
- }
- #if defined(_OPENMP)
- // Now set the number of threads
- omp_set_num_threads( omp_get_num_procs() );
- #endif
- log_info(stderr, "q-gram test... started\n");
- const io::QualityEncoding qencoding = io::Phred33;
- log_info(stderr, " loading reads... started\n");
- SharedPointer<io::SequenceDataStream> read_data_file(
- io::open_sequence_file(
- reads,
- qencoding,
- uint32(-1),
- uint32(-1) ) );
- if (read_data_file == NULL || read_data_file->is_ok() == false)
- {
- log_error(stderr, " failed opening file \"%s\"\n", reads);
- return 1u;
- }
- const uint32 batch_size = uint32(-1);
- const uint32 batch_bps = n_qgrams;
- // load a batch of reads
- io::SequenceDataHost h_read_data;
- if (io::next( DNA_N, &h_read_data, read_data_file.get(), batch_size, batch_bps ) == 0)
- {
- log_error(stderr, " unable to read input sequences\n");
- return 1;
- }
-
- // build its device version
- const io::SequenceDataDevice d_read_data( h_read_data );
- const io::SequenceDataAccess<DNA_N> d_read_access( d_read_data );
- log_info(stderr, " loading reads... done\n");
- // fetch the actual string
- typedef io::SequenceDataAccess<DNA_N> read_access_type;
- typedef read_access_type::sequence_stream_type string_type;
- typedef read_access_type::sequence_string_set_type string_set_type;
- const uint32 n_strings = d_read_access.size();
- const uint32 string_len = d_read_access.bps();
- const string_type string = d_read_access.sequence_stream();
- const string_set_type string_set = d_read_access.sequence_string_set();
- log_info(stderr, " strings: %u\n", n_strings);
- log_info(stderr, " symbols: %.3f M\n", 1.0e-6f * float(string_len));
- io::SequenceDataHost ref;
- if (!io::load_sequence_file( DNA, &ref, index ))
- {
- log_error(stderr, " failed loading index \"%s\"\n", index);
- return 1u;
- }
- // build its device version
- const io::SequenceDataDevice ref_cuda( ref );
- typedef io::SequenceDataAccess<DNA> genome_access_type;
- typedef genome_access_type::sequence_stream_type genome_type;
- const uint32 genome_len = ref.bps();
- const genome_access_type h_genome_access( ref );
- const genome_type h_genome( h_genome_access.sequence_stream() );
- const genome_access_type d_genome_access( ref_cuda );
- const genome_type d_genome( d_genome_access.sequence_stream() );
- // clamp the total number of queries
- n_queries = nvbio::min( n_queries, genome_len );
- // test q-gram index
- if (TEST_MASK & QGRAM_INDEX)
- {
- log_visible(stderr, " testing q-gram index (device)... started\n");
- QGramIndexDevice qgram_index;
- test_qgram_index_build(
- 20u,
- string_len,
- string,
- qgram_index );
- if (device_test)
- {
- Stats stats;
- for (uint32 genome_begin = 0; genome_begin < n_queries; genome_begin += queries_batch)
- {
- const uint32 genome_end = nvbio::min( genome_begin + queries_batch, n_queries );
- test_qgram_index_query(
- qgram_index,
- genome_end - genome_begin,
- genome_len,
- genome_begin,
- d_genome,
- stats );
- }
- log_visible(stderr, " testing q-gram index (device)... done\n");
- const float genome_ratio = float(genome_len)/float(stats.queries);
- log_info(stderr, " sorted throughput: %7.2f K reads/s\n", 1.0e-3f * float(n_strings) / (stats.sorted_time * genome_ratio) );
- log_info(stderr, " sorted throughput: %7.2f M bases/s\n", 1.0e-6f * float(string_len) / (stats.sorted_time * genome_ratio) );
- log_info(stderr, " filter throughput: %7.2f K reads/s\n", 1.0e-3f * float(n_strings) / (stats.filter_time * genome_ratio) );
- log_info(stderr, " filter throughput: %7.2f M bases/s\n", 1.0e-6f * float(string_len) / (stats.filter_time * genome_ratio) );
- log_info(stderr, " merge throughput: %7.2f K reads/s\n", 1.0e-3f * float(n_strings) / (stats.merge_time * genome_ratio) );
- log_info(stderr, " merge throughput: %7.2f M bases/s\n", 1.0e-6f * float(string_len) / (stats.merge_time * genome_ratio) );
- }
- if (host_test)
- {
- log_visible(stderr, " testing q-gram index (host)... started\n");
- QGramIndexHost h_qgram_index;
-
- h_qgram_index = qgram_index;
- Stats stats;
- for (uint32 genome_begin = 0; genome_begin < n_queries; genome_begin += queries_batch)
- {
- const uint32 genome_end = nvbio::min( genome_begin + queries_batch, n_queries );
- test_qgram_index_query(
- h_qgram_index,
- genome_end - genome_begin,
- genome_len,
- genome_begin,
- h_genome,
- stats );
- }
- log_visible(stderr, " testing q-gram index (host)... done\n");
- const float genome_ratio = float(genome_len)/float(stats.queries);
- log_info(stderr, " sorted throughput: %7.2f K reads/s\n", 1.0e-3f * float(n_strings) / (stats.sorted_time * genome_ratio) );
- log_info(stderr, " sorted throughput: %7.2f M bases/s\n", 1.0e-6f * float(string_len) / (stats.sorted_time * genome_ratio) );
- log_info(stderr, " filter throughput: %7.2f K reads/s\n", 1.0e-3f * float(n_strings) / (stats.filter_time * genome_ratio) );
- log_info(stderr, " filter throughput: %7.2f M bases/s\n", 1.0e-6f * float(string_len) / (stats.filter_time * genome_ratio) );
- log_info(stderr, " merge throughput: %7.2f K reads/s\n", 1.0e-3f * float(n_strings) / (stats.merge_time * genome_ratio) );
- log_info(stderr, " merge throughput: %7.2f M bases/s\n", 1.0e-6f * float(string_len) / (stats.merge_time * genome_ratio) );
- }
- }
- // test q-gram set-index
- if (TEST_MASK & QGRAM_SET_INDEX)
- {
- log_visible(stderr, " testing q-gram set-index (device)... started\n");
- QGramSetIndexDevice qgram_index;
- test_qgram_set_index_build(
- 22u,
- string_set,
- qgram_index );
- if (device_test)
- {
- Stats stats;
- for (uint32 genome_begin = 0; genome_begin < n_queries; genome_begin += queries_batch)
- {
- const uint32 genome_end = nvbio::min( genome_begin + queries_batch, n_queries );
- test_qgram_index_query(
- qgram_index,
- genome_end - genome_begin,
- genome_len,
- genome_begin,
- d_genome,
- stats );
- }
- log_visible(stderr, " testing q-gram set-index (device)... done\n");
- const float genome_ratio = float(genome_len)/float(stats.queries);
- log_info(stderr, " sorted throughput: %7.2f K reads/s\n", 1.0e-3f * float(n_strings) / (stats.sorted_time * genome_ratio) );
- log_info(stderr, " sorted throughput: %7.2f M bases/s\n", 1.0e-6f * float(string_len) / (stats.sorted_time * genome_ratio) );
- log_info(stderr, " filter throughput: %7.2f K reads/s\n", 1.0e-3f * float(n_strings) / (stats.filter_time * genome_ratio) );
- log_info(stderr, " filter throughput: %7.2f M bases/s\n", 1.0e-6f * float(string_len) / (stats.filter_time * genome_ratio) );
- log_info(stderr, " merge throughput: %7.2f K reads/s\n", 1.0e-3f * float(n_strings) / (stats.merge_time * genome_ratio) );
- log_info(stderr, " merge throughput: %7.2f M bases/s\n", 1.0e-6f * float(string_len) / (stats.merge_time * genome_ratio) );
- }
- if (host_test)
- {
- log_visible(stderr, " testing q-gram set-index (host)... started\n");
- QGramSetIndexHost h_qgram_index;
-
- h_qgram_index = qgram_index;
- Stats stats;
- for (uint32 genome_begin = 0; genome_begin < n_queries; genome_begin += queries_batch)
- {
- const uint32 genome_end = nvbio::min( genome_begin + queries_batch, n_queries );
- test_qgram_index_query(
- h_qgram_index,
- genome_end - genome_begin,
- genome_len,
- genome_begin,
- h_genome,
- stats );
- }
- log_visible(stderr, " testing q-gram set-index (host)... done\n");
- const float genome_ratio = float(genome_len)/float(stats.queries);
- log_info(stderr, " sorted throughput: %7.2f K reads/s\n", 1.0e-3f * float(n_strings) / (stats.sorted_time * genome_ratio) );
- log_info(stderr, " sorted throughput: %7.2f M bases/s\n", 1.0e-6f * float(string_len) / (stats.sorted_time * genome_ratio) );
- log_info(stderr, " filter throughput: %7.2f K reads/s\n", 1.0e-3f * float(n_strings) / (stats.filter_time * genome_ratio) );
- log_info(stderr, " filter throughput: %7.2f M bases/s\n", 1.0e-6f * float(string_len) / (stats.filter_time * genome_ratio) );
- log_info(stderr, " merge throughput: %7.2f K reads/s\n", 1.0e-3f * float(n_strings) / (stats.merge_time * genome_ratio) );
- log_info(stderr, " merge throughput: %7.2f M bases/s\n", 1.0e-6f * float(string_len) / (stats.merge_time * genome_ratio) );
- }
- }
- // test q-group index
- if (TEST_MASK & QGROUP_INDEX)
- {
- log_visible(stderr, " testing q-group index (device)... started\n");
- QGroupIndexDevice qgram_index;
- test_qgroup_index_build(
- 16u,
- string_len,
- string,
- qgram_index );
- if (device_test)
- {
- Stats stats;
- for (uint32 genome_begin = 0; genome_begin < n_queries; genome_begin += queries_batch)
- {
- const uint32 genome_end = nvbio::min( genome_begin + queries_batch, n_queries );
- test_qgram_index_query(
- qgram_index,
- genome_end - genome_begin,
- genome_len,
- genome_begin,
- d_genome,
- stats );
- }
- log_visible(stderr, " testing q-group index (device)... done\n");
- const float genome_ratio = float(genome_len)/float(stats.queries);
- log_info(stderr, " sorted throughput: %7.2f K reads/s\n", 1.0e-3f * float(n_strings) / (stats.sorted_time * genome_ratio) );
- log_info(stderr, " sorted throughput: %7.2f M bases/s\n", 1.0e-6f * float(string_len) / (stats.sorted_time * genome_ratio) );
- log_info(stderr, " filter throughput: %7.2f K reads/s\n", 1.0e-3f * float(n_strings) / (stats.filter_time * genome_ratio) );
- log_info(stderr, " filter throughput: %7.2f M bases/s\n", 1.0e-6f * float(string_len) / (stats.filter_time * genome_ratio) );
- log_info(stderr, " merge throughput: %7.2f K reads/s\n", 1.0e-3f * float(n_strings) / (stats.merge_time * genome_ratio) );
- log_info(stderr, " merge throughput: %7.2f M bases/s\n", 1.0e-6f * float(string_len) / (stats.merge_time * genome_ratio) );
- }
- }
- log_info(stderr, "q-gram test... done\n" );
- return 0;
- }
- } // namespace nvbio
|