qgram_test.cu 27 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733
  1. /*
  2. * nvbio
  3. * Copyright (c) 2011-2014, NVIDIA CORPORATION. All rights reserved.
  4. *
  5. * Redistribution and use in source and binary forms, with or without
  6. * modification, are permitted provided that the following conditions are met:
  7. * * Redistributions of source code must retain the above copyright
  8. * notice, this list of conditions and the following disclaimer.
  9. * * Redistributions in binary form must reproduce the above copyright
  10. * notice, this list of conditions and the following disclaimer in the
  11. * documentation and/or other materials provided with the distribution.
  12. * * Neither the name of the NVIDIA CORPORATION nor the
  13. * names of its contributors may be used to endorse or promote products
  14. * derived from this software without specific prior written permission.
  15. *
  16. * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
  17. * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
  18. * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
  19. * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
  20. * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
  21. * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
  22. * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
  23. * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  24. * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
  25. * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  26. */
  27. // qgram_test.cu
  28. //
  29. //#define CUFMI_CUDA_DEBUG
  30. //#define CUFMI_CUDA_ASSERTS
  31. #include <stdio.h>
  32. #include <stdlib.h>
  33. #include <vector>
  34. #include <algorithm>
  35. #include <nvbio/basic/timer.h>
  36. #include <nvbio/basic/console.h>
  37. #include <nvbio/basic/vector.h>
  38. #include <nvbio/basic/packedstream.h>
  39. #include <nvbio/strings/string_set.h>
  40. #include <nvbio/strings/seeds.h>
  41. #include <nvbio/basic/shared_pointer.h>
  42. #include <nvbio/io/sequence/sequence.h>
  43. #include <nvbio/qgram/qgram.h>
  44. #include <nvbio/qgram/qgroup.h>
  45. #include <nvbio/qgram/filter.h>
  46. #if defined(_OPENMP)
  47. #include <omp.h>
  48. #endif
  49. namespace nvbio {
  50. // return the size of a given range
  51. struct range_size
  52. {
  53. typedef uint2 argument_type;
  54. typedef uint32 result_type;
  55. NVBIO_FORCEINLINE NVBIO_HOST_DEVICE
  56. uint32 operator() (const uint2 range) const { return range.y - range.x; }
  57. };
  58. // return 1 for non-empty ranges, 0 otherwise
  59. struct valid_range
  60. {
  61. typedef uint2 argument_type;
  62. typedef uint32 result_type;
  63. NVBIO_FORCEINLINE NVBIO_HOST_DEVICE
  64. uint32 operator() (const uint2 range) const { return range.y - range.x > 0 ? 1u : 0u; }
  65. };
  66. // query stats
  67. //
  68. struct Stats
  69. {
  70. Stats() :
  71. build_time(0),
  72. unsorted_time(0),
  73. sorted_time(0),
  74. filter_time(0),
  75. merge_time(0),
  76. queries(0),
  77. matches(0),
  78. occurrences(0),
  79. merged(0) {}
  80. float build_time;
  81. float unsorted_time;
  82. float sorted_time;
  83. float filter_time;
  84. float merge_time;
  85. uint64 queries;
  86. uint64 matches;
  87. uint64 occurrences;
  88. uint64 merged;
  89. };
  90. // build a set of q-grams from a given string, together with their sorted counterpart
  91. //
  92. template <typename genome_string, typename qgram_vector_type, typename index_vector_type>
  93. void build_qgrams(
  94. const uint32 Q,
  95. const uint32 genome_len,
  96. const uint32 genome_offset,
  97. const genome_string genome,
  98. const uint32 n_queries,
  99. qgram_vector_type& qgrams,
  100. qgram_vector_type& sorted_qgrams,
  101. index_vector_type& sorted_indices)
  102. {
  103. // build the q-grams
  104. qgrams.resize( n_queries );
  105. generate_qgrams( Q, 2u, genome_len, genome, n_queries, thrust::make_counting_iterator<uint32>(genome_offset), qgrams.begin() );
  106. // sort the q-grams
  107. sorted_qgrams = qgrams;
  108. sorted_indices.resize( n_queries );
  109. thrust::copy(
  110. thrust::make_counting_iterator<uint32>(genome_offset),
  111. thrust::make_counting_iterator<uint32>(genome_offset) + n_queries,
  112. sorted_indices.begin() );
  113. thrust::sort_by_key( sorted_qgrams.begin(), sorted_qgrams.end(), sorted_indices.begin() );
  114. }
  115. // build a q-gram index from a string
  116. //
  117. template <typename string_type>
  118. void test_qgram_index_build(
  119. const uint32 Q,
  120. const uint32 string_len,
  121. const string_type string,
  122. QGramIndexDevice& qgram_index)
  123. {
  124. log_verbose(stderr, " building q-gram index... started\n");
  125. Timer timer;
  126. timer.start();
  127. // build the q-gram index
  128. qgram_index.build(
  129. Q, // q-gram size
  130. 2u, // implicitly convert N to A
  131. string_len,
  132. string,
  133. 12u );
  134. cudaDeviceSynchronize();
  135. timer.stop();
  136. const float time = timer.seconds();
  137. log_verbose(stderr, " building q-gram index... done\n");
  138. log_verbose(stderr, " indexed q-grams : %6.2f M q-grams\n", 1.0e-6f * float( qgram_index.n_qgrams ));
  139. log_verbose(stderr, " unique q-grams : %6.2f M q-grams\n", 1.0e-6f * float( qgram_index.n_unique_qgrams ));
  140. log_verbose(stderr, " throughput : %5.1f M q-grams/s\n", 1.0e-6f * float( string_len ) / time);
  141. log_verbose(stderr, " memory usage : %5.1f MB\n", float( qgram_index.used_device_memory() ) / float(1024*1024) );
  142. log_verbose(stderr, " querying q-gram index... started\n");
  143. }
  144. // build a q-gram set-index from a string-set
  145. //
  146. template <typename string_set_type>
  147. void test_qgram_set_index_build(
  148. const uint32 Q,
  149. const string_set_type string_set,
  150. QGramSetIndexDevice& qgram_index)
  151. {
  152. log_verbose(stderr, " building q-gram set-index... started\n");
  153. Timer timer;
  154. timer.start();
  155. // build the q-gram set index
  156. qgram_index.build(
  157. Q, // q-gram size
  158. 2u, // implicitly convert N to A
  159. string_set,
  160. uniform_seeds_functor<>( Q, 10u ),
  161. 12u );
  162. cudaDeviceSynchronize();
  163. timer.stop();
  164. const float time = timer.seconds();
  165. log_verbose(stderr, " building q-gram set-index... done\n");
  166. log_verbose(stderr, " indexed q-grams : %6.2f M q-grams\n", 1.0e-6f * float( qgram_index.n_qgrams ));
  167. log_verbose(stderr, " unique q-grams : %6.2f M q-grams\n", 1.0e-6f * float( qgram_index.n_unique_qgrams ));
  168. log_verbose(stderr, " throughput : %5.1f M q-grams/s\n", 1.0e-6f * float( qgram_index.n_qgrams ) / time);
  169. log_verbose(stderr, " memory usage : %5.1f MB\n", float( qgram_index.used_device_memory() ) / float(1024*1024) );
  170. }
  171. // build a q-group index from a string
  172. //
  173. template <typename string_type>
  174. void test_qgroup_index_build(
  175. const uint32 Q,
  176. const uint32 string_len,
  177. const string_type string,
  178. QGroupIndexDevice& qgram_index)
  179. {
  180. log_verbose(stderr, " building q-group index... started\n");
  181. Timer timer;
  182. timer.start();
  183. // build the q-group index
  184. qgram_index.build(
  185. Q, // q-group size
  186. 2u, // implicitly convert N to A
  187. string_len,
  188. string );
  189. cudaDeviceSynchronize();
  190. timer.stop();
  191. const float time = timer.seconds();
  192. log_verbose(stderr, " building q-group index... done\n");
  193. log_verbose(stderr, " indexed q-grams : %6.2f M q-grams\n", 1.0e-6f * float( qgram_index.n_qgrams ));
  194. log_verbose(stderr, " unique q-grams : %6.2f M q-grams\n", 1.0e-6f * float( qgram_index.n_unique_qgrams ));
  195. log_verbose(stderr, " throughput : %5.1f M q-grams/s\n", 1.0e-6f * float( string_len ) / time);
  196. log_verbose(stderr, " memory usage : %5.1f MB\n", float( qgram_index.used_device_memory() ) / float(1024*1024) );
  197. log_verbose(stderr, " querying q-group index... started\n");
  198. }
  199. // test a generic q-gram index query, both using plain queries and with a q-gram filter
  200. //
  201. template <typename qgram_index_type, typename genome_string>
  202. void test_qgram_index_query(
  203. qgram_index_type& qgram_index,
  204. const uint32 n_queries,
  205. const uint32 genome_len,
  206. const uint32 genome_offset,
  207. const genome_string genome,
  208. Stats& stats)
  209. {
  210. const uint32 Q = qgram_index.Q;
  211. typedef typename qgram_index_type::system_tag system_tag;
  212. // prepare some vectors to store the query qgrams
  213. nvbio::vector<system_tag,uint64> qgrams( n_queries );
  214. nvbio::vector<system_tag,uint64> sorted_qgrams( n_queries );
  215. nvbio::vector<system_tag,uint32> sorted_indices( n_queries );
  216. build_qgrams(
  217. Q,
  218. genome_len,
  219. genome_offset,
  220. genome,
  221. n_queries,
  222. qgrams,
  223. sorted_qgrams,
  224. sorted_indices );
  225. // prepare a vector to store the query results
  226. nvbio::vector<system_tag,uint2> ranges( n_queries );
  227. log_verbose(stderr, " querying q-gram index... started\n");
  228. Timer timer;
  229. timer.start();
  230. // search the query q-grams in the index
  231. thrust::transform(
  232. qgrams.begin(),
  233. qgrams.begin() + n_queries,
  234. ranges.begin(),
  235. nvbio::plain_view( qgram_index ) );
  236. cudaDeviceSynchronize();
  237. timer.stop();
  238. const float unsorted_time = timer.seconds();
  239. timer.start();
  240. // and now repeat the same operation with the sorted q-grams
  241. thrust::transform(
  242. sorted_qgrams.begin(),
  243. sorted_qgrams.begin() + n_queries,
  244. ranges.begin(),
  245. nvbio::plain_view( qgram_index ) );
  246. cudaDeviceSynchronize();
  247. timer.stop();
  248. const float sorted_time = timer.seconds();
  249. const uint32 n_occurrences = thrust::reduce(
  250. thrust::make_transform_iterator( ranges.begin(), range_size() ),
  251. thrust::make_transform_iterator( ranges.begin(), range_size() ) + n_queries );
  252. const uint32 n_matches = thrust::reduce(
  253. thrust::make_transform_iterator( ranges.begin(), valid_range() ),
  254. thrust::make_transform_iterator( ranges.begin(), valid_range() ) + n_queries );
  255. stats.queries += n_queries;
  256. stats.unsorted_time += unsorted_time;
  257. stats.sorted_time += sorted_time;
  258. stats.matches += n_matches;
  259. stats.occurrences += n_occurrences;
  260. log_verbose(stderr, " querying q-gram index... done\n");
  261. log_verbose(stderr, " unsorted throughput : %.2f B q-grams/s\n", (1.0e-9f * float( stats.queries )) / stats.unsorted_time);
  262. log_verbose(stderr, " sorted throughput : %.2f B q-grams/s\n", (1.0e-9f * float( stats.queries )) / stats.sorted_time);
  263. log_verbose(stderr, " matches : %.2f M\n", 1.0e-6f * float( stats.matches ) );
  264. log_verbose(stderr, " occurrences : %.3f B\n", 1.0e-9f * float( stats.occurrences ) );
  265. log_verbose(stderr, " q-gram filter... started\n");
  266. //
  267. // search the sorted query q-grams with a q-gram filter
  268. //
  269. const uint32 batch_size = 16*1024*1024;
  270. typedef QGramFilter<system_tag,qgram_index_type,const uint64*,const uint32*> qgram_filter_type;
  271. typedef typename qgram_filter_type::hit_type hit_type;
  272. typedef typename qgram_filter_type::diagonal_type diagonal_type;
  273. // prepare storage for the output hits
  274. nvbio::vector<system_tag,hit_type> hits( batch_size );
  275. nvbio::vector<system_tag,diagonal_type> merged_hits( batch_size );
  276. nvbio::vector<system_tag,uint16> merged_counts( batch_size );
  277. qgram_filter_type qgram_filter;
  278. timer.start();
  279. // first step: rank the query q-grams
  280. const uint32 n_hits = qgram_filter.rank(
  281. qgram_index,
  282. n_queries,
  283. nvbio::raw_pointer( sorted_qgrams ),
  284. nvbio::raw_pointer( sorted_indices ) );
  285. if (n_hits != n_occurrences)
  286. {
  287. log_error(stderr, " mismatching number of hits: expected %u, got %u\n", n_occurrences, n_hits);
  288. exit(1);
  289. }
  290. // loop through large batches of hits and locate them
  291. for (uint32 hits_begin = 0; hits_begin < n_hits; hits_begin += batch_size)
  292. {
  293. const uint32 hits_end = nvbio::min( hits_begin + batch_size, n_hits );
  294. qgram_filter.locate(
  295. hits_begin,
  296. hits_end,
  297. hits.begin() );
  298. }
  299. cudaDeviceSynchronize();
  300. timer.stop();
  301. const float filter_time = timer.seconds();
  302. stats.filter_time += filter_time;
  303. timer.start();
  304. // loop through large batches of hits and locate & merge them
  305. for (uint32 hits_begin = 0; hits_begin < n_hits; hits_begin += batch_size)
  306. {
  307. const uint32 hits_end = nvbio::min( hits_begin + batch_size, n_hits );
  308. qgram_filter.locate(
  309. hits_begin,
  310. hits_end,
  311. hits.begin() );
  312. const uint32 n_merged = qgram_filter.merge(
  313. 16u,
  314. hits_end - hits_begin,
  315. hits.begin(),
  316. merged_hits.begin(),
  317. merged_counts.begin() );
  318. stats.merged += n_merged;
  319. }
  320. cudaDeviceSynchronize();
  321. timer.stop();
  322. const float merge_time = timer.seconds();
  323. stats.merge_time += merge_time;
  324. log_verbose(stderr, " q-gram filter... done\n");
  325. log_verbose(stderr, " filter throughput : %.2f M q-grams/s\n", (1.0e-6f * float( stats.queries )) / stats.filter_time);
  326. log_verbose(stderr, " merge throughput : %.2f M q-grams/s\n", (1.0e-6f * float( stats.queries )) / stats.merge_time);
  327. log_verbose(stderr, " merged occurrences : %.3f B (%.1f %%)\n", 1.0e-9f * float( stats.merged ), 100.0f * float(stats.merged)/float(stats.occurrences));
  328. }
  329. enum QGramTest
  330. {
  331. ALL = 0xFFFFFFFFu,
  332. QGRAM_INDEX = 1u,
  333. QGRAM_SET_INDEX = 2u,
  334. QGROUP_INDEX = 4u,
  335. };
  336. // main test entry point
  337. //
  338. int qgram_test(int argc, char* argv[])
  339. {
  340. uint32 TEST_MASK = 0xFFFFFFFFu;
  341. uint32 n_qgrams = 10000000;
  342. uint32 n_queries = 10000000;
  343. uint32 queries_batch = 10000000;
  344. bool device_test = true;
  345. bool host_test = true;
  346. const char* reads = "./data/SRR493095_1.fastq.gz";
  347. const char* index = "./data/human.NCBI36/Homo_sapiens.NCBI36.53.dna.toplevel.fa";
  348. for (int i = 0; i < argc; ++i)
  349. {
  350. if (strcmp( argv[i], "-qgrams" ) == 0)
  351. n_qgrams = uint32( atoi( argv[++i] ) )*1000u;
  352. else if (strcmp( argv[i], "-queries" ) == 0)
  353. n_queries = uint32( atoi( argv[++i] ) )*1000u;
  354. else if (strcmp( argv[i], "-batch" ) == 0)
  355. queries_batch = uint32( atoi( argv[++i] ) )*1000u;
  356. else if (strcmp( argv[i], "-reads" ) == 0)
  357. reads = argv[++i];
  358. else if (strcmp( argv[i], "-index" ) == 0)
  359. index = argv[++i];
  360. else if (strcmp( argv[i], "-no-device" ) == 0)
  361. device_test = false;
  362. else if (strcmp( argv[i], "-no-host" ) == 0)
  363. host_test = false;
  364. else if (strcmp( argv[i], "-tests" ) == 0)
  365. {
  366. const std::string tests_string( argv[++i] );
  367. char temp[256];
  368. const char* begin = tests_string.c_str();
  369. const char* end = begin;
  370. TEST_MASK = 0u;
  371. while (1)
  372. {
  373. while (*end != ':' && *end != '\0')
  374. {
  375. temp[end - begin] = *end;
  376. end++;
  377. }
  378. temp[end - begin] = '\0';
  379. if (strcmp( temp, "qgram" ) == 0)
  380. TEST_MASK |= QGRAM_INDEX;
  381. else if (strcmp( temp, "qgram-set" ) == 0)
  382. TEST_MASK |= QGRAM_SET_INDEX;
  383. else if (strcmp( temp, "qgroup" ) == 0)
  384. TEST_MASK |= QGROUP_INDEX;
  385. if (*end == '\0')
  386. break;
  387. ++end; begin = end;
  388. }
  389. }
  390. }
  391. #if defined(_OPENMP)
  392. // Now set the number of threads
  393. omp_set_num_threads( omp_get_num_procs() );
  394. #endif
  395. log_info(stderr, "q-gram test... started\n");
  396. const io::QualityEncoding qencoding = io::Phred33;
  397. log_info(stderr, " loading reads... started\n");
  398. SharedPointer<io::SequenceDataStream> read_data_file(
  399. io::open_sequence_file(
  400. reads,
  401. qencoding,
  402. uint32(-1),
  403. uint32(-1) ) );
  404. if (read_data_file == NULL || read_data_file->is_ok() == false)
  405. {
  406. log_error(stderr, " failed opening file \"%s\"\n", reads);
  407. return 1u;
  408. }
  409. const uint32 batch_size = uint32(-1);
  410. const uint32 batch_bps = n_qgrams;
  411. // load a batch of reads
  412. io::SequenceDataHost h_read_data;
  413. if (io::next( DNA_N, &h_read_data, read_data_file.get(), batch_size, batch_bps ) == 0)
  414. {
  415. log_error(stderr, " unable to read input sequences\n");
  416. return 1;
  417. }
  418. // build its device version
  419. const io::SequenceDataDevice d_read_data( h_read_data );
  420. const io::SequenceDataAccess<DNA_N> d_read_access( d_read_data );
  421. log_info(stderr, " loading reads... done\n");
  422. // fetch the actual string
  423. typedef io::SequenceDataAccess<DNA_N> read_access_type;
  424. typedef read_access_type::sequence_stream_type string_type;
  425. typedef read_access_type::sequence_string_set_type string_set_type;
  426. const uint32 n_strings = d_read_access.size();
  427. const uint32 string_len = d_read_access.bps();
  428. const string_type string = d_read_access.sequence_stream();
  429. const string_set_type string_set = d_read_access.sequence_string_set();
  430. log_info(stderr, " strings: %u\n", n_strings);
  431. log_info(stderr, " symbols: %.3f M\n", 1.0e-6f * float(string_len));
  432. io::SequenceDataHost ref;
  433. if (!io::load_sequence_file( DNA, &ref, index ))
  434. {
  435. log_error(stderr, " failed loading index \"%s\"\n", index);
  436. return 1u;
  437. }
  438. // build its device version
  439. const io::SequenceDataDevice ref_cuda( ref );
  440. typedef io::SequenceDataAccess<DNA> genome_access_type;
  441. typedef genome_access_type::sequence_stream_type genome_type;
  442. const uint32 genome_len = ref.bps();
  443. const genome_access_type h_genome_access( ref );
  444. const genome_type h_genome( h_genome_access.sequence_stream() );
  445. const genome_access_type d_genome_access( ref_cuda );
  446. const genome_type d_genome( d_genome_access.sequence_stream() );
  447. // clamp the total number of queries
  448. n_queries = nvbio::min( n_queries, genome_len );
  449. // test q-gram index
  450. if (TEST_MASK & QGRAM_INDEX)
  451. {
  452. log_visible(stderr, " testing q-gram index (device)... started\n");
  453. QGramIndexDevice qgram_index;
  454. test_qgram_index_build(
  455. 20u,
  456. string_len,
  457. string,
  458. qgram_index );
  459. if (device_test)
  460. {
  461. Stats stats;
  462. for (uint32 genome_begin = 0; genome_begin < n_queries; genome_begin += queries_batch)
  463. {
  464. const uint32 genome_end = nvbio::min( genome_begin + queries_batch, n_queries );
  465. test_qgram_index_query(
  466. qgram_index,
  467. genome_end - genome_begin,
  468. genome_len,
  469. genome_begin,
  470. d_genome,
  471. stats );
  472. }
  473. log_visible(stderr, " testing q-gram index (device)... done\n");
  474. const float genome_ratio = float(genome_len)/float(stats.queries);
  475. log_info(stderr, " sorted throughput: %7.2f K reads/s\n", 1.0e-3f * float(n_strings) / (stats.sorted_time * genome_ratio) );
  476. log_info(stderr, " sorted throughput: %7.2f M bases/s\n", 1.0e-6f * float(string_len) / (stats.sorted_time * genome_ratio) );
  477. log_info(stderr, " filter throughput: %7.2f K reads/s\n", 1.0e-3f * float(n_strings) / (stats.filter_time * genome_ratio) );
  478. log_info(stderr, " filter throughput: %7.2f M bases/s\n", 1.0e-6f * float(string_len) / (stats.filter_time * genome_ratio) );
  479. log_info(stderr, " merge throughput: %7.2f K reads/s\n", 1.0e-3f * float(n_strings) / (stats.merge_time * genome_ratio) );
  480. log_info(stderr, " merge throughput: %7.2f M bases/s\n", 1.0e-6f * float(string_len) / (stats.merge_time * genome_ratio) );
  481. }
  482. if (host_test)
  483. {
  484. log_visible(stderr, " testing q-gram index (host)... started\n");
  485. QGramIndexHost h_qgram_index;
  486. h_qgram_index = qgram_index;
  487. Stats stats;
  488. for (uint32 genome_begin = 0; genome_begin < n_queries; genome_begin += queries_batch)
  489. {
  490. const uint32 genome_end = nvbio::min( genome_begin + queries_batch, n_queries );
  491. test_qgram_index_query(
  492. h_qgram_index,
  493. genome_end - genome_begin,
  494. genome_len,
  495. genome_begin,
  496. h_genome,
  497. stats );
  498. }
  499. log_visible(stderr, " testing q-gram index (host)... done\n");
  500. const float genome_ratio = float(genome_len)/float(stats.queries);
  501. log_info(stderr, " sorted throughput: %7.2f K reads/s\n", 1.0e-3f * float(n_strings) / (stats.sorted_time * genome_ratio) );
  502. log_info(stderr, " sorted throughput: %7.2f M bases/s\n", 1.0e-6f * float(string_len) / (stats.sorted_time * genome_ratio) );
  503. log_info(stderr, " filter throughput: %7.2f K reads/s\n", 1.0e-3f * float(n_strings) / (stats.filter_time * genome_ratio) );
  504. log_info(stderr, " filter throughput: %7.2f M bases/s\n", 1.0e-6f * float(string_len) / (stats.filter_time * genome_ratio) );
  505. log_info(stderr, " merge throughput: %7.2f K reads/s\n", 1.0e-3f * float(n_strings) / (stats.merge_time * genome_ratio) );
  506. log_info(stderr, " merge throughput: %7.2f M bases/s\n", 1.0e-6f * float(string_len) / (stats.merge_time * genome_ratio) );
  507. }
  508. }
  509. // test q-gram set-index
  510. if (TEST_MASK & QGRAM_SET_INDEX)
  511. {
  512. log_visible(stderr, " testing q-gram set-index (device)... started\n");
  513. QGramSetIndexDevice qgram_index;
  514. test_qgram_set_index_build(
  515. 22u,
  516. string_set,
  517. qgram_index );
  518. if (device_test)
  519. {
  520. Stats stats;
  521. for (uint32 genome_begin = 0; genome_begin < n_queries; genome_begin += queries_batch)
  522. {
  523. const uint32 genome_end = nvbio::min( genome_begin + queries_batch, n_queries );
  524. test_qgram_index_query(
  525. qgram_index,
  526. genome_end - genome_begin,
  527. genome_len,
  528. genome_begin,
  529. d_genome,
  530. stats );
  531. }
  532. log_visible(stderr, " testing q-gram set-index (device)... done\n");
  533. const float genome_ratio = float(genome_len)/float(stats.queries);
  534. log_info(stderr, " sorted throughput: %7.2f K reads/s\n", 1.0e-3f * float(n_strings) / (stats.sorted_time * genome_ratio) );
  535. log_info(stderr, " sorted throughput: %7.2f M bases/s\n", 1.0e-6f * float(string_len) / (stats.sorted_time * genome_ratio) );
  536. log_info(stderr, " filter throughput: %7.2f K reads/s\n", 1.0e-3f * float(n_strings) / (stats.filter_time * genome_ratio) );
  537. log_info(stderr, " filter throughput: %7.2f M bases/s\n", 1.0e-6f * float(string_len) / (stats.filter_time * genome_ratio) );
  538. log_info(stderr, " merge throughput: %7.2f K reads/s\n", 1.0e-3f * float(n_strings) / (stats.merge_time * genome_ratio) );
  539. log_info(stderr, " merge throughput: %7.2f M bases/s\n", 1.0e-6f * float(string_len) / (stats.merge_time * genome_ratio) );
  540. }
  541. if (host_test)
  542. {
  543. log_visible(stderr, " testing q-gram set-index (host)... started\n");
  544. QGramSetIndexHost h_qgram_index;
  545. h_qgram_index = qgram_index;
  546. Stats stats;
  547. for (uint32 genome_begin = 0; genome_begin < n_queries; genome_begin += queries_batch)
  548. {
  549. const uint32 genome_end = nvbio::min( genome_begin + queries_batch, n_queries );
  550. test_qgram_index_query(
  551. h_qgram_index,
  552. genome_end - genome_begin,
  553. genome_len,
  554. genome_begin,
  555. h_genome,
  556. stats );
  557. }
  558. log_visible(stderr, " testing q-gram set-index (host)... done\n");
  559. const float genome_ratio = float(genome_len)/float(stats.queries);
  560. log_info(stderr, " sorted throughput: %7.2f K reads/s\n", 1.0e-3f * float(n_strings) / (stats.sorted_time * genome_ratio) );
  561. log_info(stderr, " sorted throughput: %7.2f M bases/s\n", 1.0e-6f * float(string_len) / (stats.sorted_time * genome_ratio) );
  562. log_info(stderr, " filter throughput: %7.2f K reads/s\n", 1.0e-3f * float(n_strings) / (stats.filter_time * genome_ratio) );
  563. log_info(stderr, " filter throughput: %7.2f M bases/s\n", 1.0e-6f * float(string_len) / (stats.filter_time * genome_ratio) );
  564. log_info(stderr, " merge throughput: %7.2f K reads/s\n", 1.0e-3f * float(n_strings) / (stats.merge_time * genome_ratio) );
  565. log_info(stderr, " merge throughput: %7.2f M bases/s\n", 1.0e-6f * float(string_len) / (stats.merge_time * genome_ratio) );
  566. }
  567. }
  568. // test q-group index
  569. if (TEST_MASK & QGROUP_INDEX)
  570. {
  571. log_visible(stderr, " testing q-group index (device)... started\n");
  572. QGroupIndexDevice qgram_index;
  573. test_qgroup_index_build(
  574. 16u,
  575. string_len,
  576. string,
  577. qgram_index );
  578. if (device_test)
  579. {
  580. Stats stats;
  581. for (uint32 genome_begin = 0; genome_begin < n_queries; genome_begin += queries_batch)
  582. {
  583. const uint32 genome_end = nvbio::min( genome_begin + queries_batch, n_queries );
  584. test_qgram_index_query(
  585. qgram_index,
  586. genome_end - genome_begin,
  587. genome_len,
  588. genome_begin,
  589. d_genome,
  590. stats );
  591. }
  592. log_visible(stderr, " testing q-group index (device)... done\n");
  593. const float genome_ratio = float(genome_len)/float(stats.queries);
  594. log_info(stderr, " sorted throughput: %7.2f K reads/s\n", 1.0e-3f * float(n_strings) / (stats.sorted_time * genome_ratio) );
  595. log_info(stderr, " sorted throughput: %7.2f M bases/s\n", 1.0e-6f * float(string_len) / (stats.sorted_time * genome_ratio) );
  596. log_info(stderr, " filter throughput: %7.2f K reads/s\n", 1.0e-3f * float(n_strings) / (stats.filter_time * genome_ratio) );
  597. log_info(stderr, " filter throughput: %7.2f M bases/s\n", 1.0e-6f * float(string_len) / (stats.filter_time * genome_ratio) );
  598. log_info(stderr, " merge throughput: %7.2f K reads/s\n", 1.0e-3f * float(n_strings) / (stats.merge_time * genome_ratio) );
  599. log_info(stderr, " merge throughput: %7.2f M bases/s\n", 1.0e-6f * float(string_len) / (stats.merge_time * genome_ratio) );
  600. }
  601. }
  602. log_info(stderr, "q-gram test... done\n" );
  603. return 0;
  604. }
  605. } // namespace nvbio