sw-benchmark.cu 24 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716
  1. /*
  2. * nvbio
  3. * Copyright (c) 2011-2014, NVIDIA CORPORATION. All rights reserved.
  4. *
  5. * Redistribution and use in source and binary forms, with or without
  6. * modification, are permitted provided that the following conditions are met:
  7. * * Redistributions of source code must retain the above copyright
  8. * notice, this list of conditions and the following disclaimer.
  9. * * Redistributions in binary form must reproduce the above copyright
  10. * notice, this list of conditions and the following disclaimer in the
  11. * documentation and/or other materials provided with the distribution.
  12. * * Neither the name of the NVIDIA CORPORATION nor the
  13. * names of its contributors may be used to endorse or promote products
  14. * derived from this software without specific prior written permission.
  15. *
  16. * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
  17. * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
  18. * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
  19. * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
  20. * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
  21. * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
  22. * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
  23. * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  24. * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
  25. * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  26. */
  27. // sw-benchmark.cu
  28. //
  29. //
  30. // This project supports comparisons against multi-core SSE-enabled CPUs using
  31. // conditional compilation of the SSW library:
  32. //
  33. // https://github.com/mengyao/Complete-Striped-Smith-Waterman-Library
  34. //
  35. // In order to perform these additional tests, the user must download ssw.h and ssw.c
  36. // from the above repository, copy them in the sw-benchmark directory, and run cmake with
  37. // the option -DSSWLIB=ON.
  38. //
  39. #if defined(SSWLIB)
  40. #include "ssw.h"
  41. #include <omp.h>
  42. #endif
  43. #include <nvbio/basic/timer.h>
  44. #include <nvbio/basic/console.h>
  45. #include <nvbio/basic/cuda/ldg.h>
  46. #include <nvbio/basic/packedstream.h>
  47. #include <nvbio/basic/packedstream_loader.h>
  48. #include <nvbio/basic/vector_view.h>
  49. #include <nvbio/basic/shared_pointer.h>
  50. #include <nvbio/io/sequence/sequence.h>
  51. #include <nvbio/fasta/fasta.h>
  52. #include <nvbio/basic/dna.h>
  53. #include <nvbio/alignment/alignment.h>
  54. #include <nvbio/alignment/batched.h>
  55. #include <nvbio/alignment/sink.h>
  56. #include <thrust/device_vector.h>
  57. #include <stdio.h>
  58. #include <stdlib.h>
  59. #include <vector>
  60. #include <algorithm>
  61. enum { MAX_READ_LENGTH = 1024 };
  62. using namespace nvbio;
  63. enum { CACHE_SIZE = 64 };
  64. typedef nvbio::lmem_cache_tag<CACHE_SIZE> lmem_cache_tag_type;
  65. typedef nvbio::uncached_tag uncached_tag_type;
  66. enum { REF_BITS = 2 };
  67. enum { REF_BIG_ENDIAN = false };
  68. //
  69. // An alignment stream class to be used in conjunction with the BatchAlignmentScore class
  70. //
  71. template <typename t_aligner_type, typename cache_type = lmem_cache_tag_type>
  72. struct AlignmentStream
  73. {
  74. typedef t_aligner_type aligner_type;
  75. typedef nvbio::cuda::ldg_pointer<uint32> storage_iterator;
  76. typedef nvbio::PackedStringLoader<
  77. storage_iterator,
  78. io::SequenceDataTraits<DNA_N>::SEQUENCE_BITS,
  79. io::SequenceDataTraits<DNA_N>::SEQUENCE_BIG_ENDIAN,cache_type> pattern_loader_type;
  80. typedef typename pattern_loader_type::input_iterator uncached_pattern_iterator;
  81. typedef typename pattern_loader_type::iterator pattern_iterator;
  82. typedef nvbio::vector_view<pattern_iterator> pattern_string;
  83. typedef nvbio::PackedStringLoader<
  84. storage_iterator,
  85. REF_BITS,
  86. REF_BIG_ENDIAN,uncached_tag_type> text_loader_type;
  87. typedef typename text_loader_type::input_iterator uncached_text_iterator;
  88. typedef typename text_loader_type::iterator text_iterator;
  89. typedef nvbio::vector_view<text_iterator> text_string;
  90. // an alignment context
  91. struct context_type
  92. {
  93. int32 min_score;
  94. aln::BestSink<int32> sink;
  95. };
  96. // a container for the strings to be aligned
  97. struct strings_type
  98. {
  99. pattern_loader_type pattern_loader;
  100. text_loader_type text_loader;
  101. pattern_string pattern;
  102. aln::trivial_quality_string quals;
  103. text_string text;
  104. };
  105. // constructor
  106. AlignmentStream(
  107. aligner_type _aligner,
  108. const uint32 _count,
  109. const uint32* _offsets,
  110. const uint32* _patterns,
  111. const uint32 _max_pattern_len,
  112. const uint32 _total_pattern_len,
  113. const uint32* _text,
  114. const uint32 _text_len,
  115. int16* _scores) :
  116. m_aligner ( _aligner ),
  117. m_count (_count),
  118. m_max_pattern_len (_max_pattern_len),
  119. m_total_pattern_len (_total_pattern_len),
  120. m_text_len (_text_len),
  121. m_offsets (_offsets),
  122. m_patterns (storage_iterator(_patterns)),
  123. m_text (storage_iterator(_text)),
  124. m_scores (_scores) {}
  125. // get the aligner
  126. NVBIO_FORCEINLINE NVBIO_HOST_DEVICE
  127. const aligner_type& aligner() const { return m_aligner; };
  128. // return the maximum pattern length
  129. NVBIO_FORCEINLINE NVBIO_HOST_DEVICE
  130. uint32 max_pattern_length() const { return m_max_pattern_len; }
  131. // return the maximum text length
  132. NVBIO_FORCEINLINE NVBIO_HOST_DEVICE
  133. uint32 max_text_length() const { return m_text_len; }
  134. // return the stream size
  135. NVBIO_FORCEINLINE NVBIO_HOST_DEVICE
  136. uint32 size() const { return m_count; }
  137. // return the i-th pattern's length
  138. NVBIO_FORCEINLINE NVBIO_HOST_DEVICE
  139. uint32 pattern_length(const uint32 i, context_type* context) const { return m_offsets[i+1] - m_offsets[i]; }
  140. // return the i-th text's length
  141. NVBIO_FORCEINLINE NVBIO_HOST_DEVICE
  142. uint32 text_length(const uint32 i, context_type* context) const { return m_text_len; }
  143. // return the total number of cells
  144. uint64 cells() const { return uint64( m_total_pattern_len ) * uint64( m_text_len ); }
  145. // initialize the i-th context
  146. NVBIO_FORCEINLINE NVBIO_HOST_DEVICE
  147. bool init_context(
  148. const uint32 i,
  149. context_type* context) const
  150. {
  151. context->min_score = Field_traits<int32>::min();
  152. return true;
  153. }
  154. // initialize the i-th context
  155. NVBIO_FORCEINLINE NVBIO_HOST_DEVICE
  156. void load_strings(
  157. const uint32 i,
  158. const uint32 window_begin,
  159. const uint32 window_end,
  160. const context_type* context,
  161. strings_type* strings) const
  162. {
  163. const uint32 offset = m_offsets[i];
  164. const uint32 length = m_offsets[i+1] - offset;
  165. strings->text = text_string( m_text_len,
  166. strings->text_loader.load(
  167. m_text,
  168. m_text_len,
  169. make_uint2( window_begin, window_end ),
  170. false ) );
  171. strings->pattern = pattern_string( length,
  172. strings->pattern_loader.load( m_patterns + offset, length ) );
  173. }
  174. // handle the output
  175. NVBIO_FORCEINLINE NVBIO_HOST_DEVICE
  176. void output(
  177. const uint32 i,
  178. const context_type* context) const
  179. {
  180. // copy the output score
  181. m_scores[i] = context->sink.score;
  182. }
  183. aligner_type m_aligner;
  184. uint32 m_count;
  185. uint32 m_max_pattern_len;
  186. uint32 m_total_pattern_len;
  187. uint32 m_text_len;
  188. const uint32* m_offsets;
  189. uncached_pattern_iterator m_patterns;
  190. uncached_text_iterator m_text;
  191. int16* m_scores;
  192. };
  193. // A simple kernel to test the speed of alignment without the possible overheads of the BatchAlignmentScore interface
  194. //
  195. template <uint32 BLOCKDIM, uint32 MAX_PATTERN_LEN, typename aligner_type, typename score_type>
  196. __global__ void alignment_test_kernel(
  197. const aligner_type aligner,
  198. const uint32 N_probs,
  199. const uint32* offsets,
  200. const uint32* pattern_ptr,
  201. const uint32 text_len,
  202. const uint32* text_ptr,
  203. score_type* score)
  204. {
  205. const uint32 tid = blockIdx.x * BLOCKDIM + threadIdx.x;
  206. typedef lmem_cache_tag_type lmem_cache_type;
  207. typedef nvbio::cuda::ldg_pointer<uint32> storage_iterator;
  208. typedef nvbio::PackedStringLoader<
  209. storage_iterator,
  210. io::SequenceDataTraits<DNA_N>::SEQUENCE_BITS,
  211. io::SequenceDataTraits<DNA_N>::SEQUENCE_BIG_ENDIAN,lmem_cache_type> pattern_loader_type;
  212. typedef typename pattern_loader_type::input_iterator uncached_pattern_iterator;
  213. typedef typename pattern_loader_type::iterator pattern_iterator;
  214. typedef nvbio::vector_view<pattern_iterator> pattern_string;
  215. typedef nvbio::PackedStringLoader<
  216. storage_iterator,
  217. REF_BITS,
  218. REF_BIG_ENDIAN,uncached_tag_type> text_loader_type;
  219. typedef typename text_loader_type::input_iterator uncached_text_iterator;
  220. typedef typename text_loader_type::iterator text_iterator;
  221. typedef nvbio::vector_view<text_iterator> text_string;
  222. if (tid >= N_probs)
  223. return;
  224. const uint32 pattern_off = offsets[tid];
  225. const uint32 pattern_len = offsets[tid+1] - pattern_off;
  226. pattern_loader_type pattern_loader;
  227. pattern_string pattern = pattern_string( pattern_len, pattern_loader.load( uncached_pattern_iterator( pattern_ptr ) + pattern_off, pattern_len ) );
  228. text_loader_type text_loader;
  229. text_string text = text_string( text_len, text_loader.load( uncached_text_iterator( text_ptr ), text_len ) );
  230. aln::BestSink<int32> sink;
  231. aln::alignment_score<MAX_PATTERN_LEN>(
  232. aligner,
  233. pattern,
  234. aln::trivial_quality_string(),
  235. text,
  236. Field_traits<int32>::min(),
  237. sink );
  238. score[tid] = sink.score;
  239. }
  240. unsigned char nst_nt4_table[256] = {
  241. 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
  242. 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
  243. 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 5 /*'-'*/, 4, 4,
  244. 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
  245. 4, 0, 4, 1, 4, 4, 4, 2, 4, 4, 4, 4, 4, 4, 4, 4,
  246. 4, 4, 4, 4, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
  247. 4, 0, 4, 1, 4, 4, 4, 2, 4, 4, 4, 4, 4, 4, 4, 4,
  248. 4, 4, 4, 4, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
  249. 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
  250. 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
  251. 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
  252. 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
  253. 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
  254. 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
  255. 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
  256. 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4
  257. };
  258. struct ReferenceCounter
  259. {
  260. ReferenceCounter() : m_size(0) {}
  261. void begin_read() {}
  262. void end_read() {}
  263. void id(const uint8 c) {}
  264. void read(const uint8 c) { ++m_size; }
  265. uint32 m_size;
  266. };
  267. struct ReferenceCoder
  268. {
  269. typedef PackedStream<uint32*,uint8,2,false> stream_type;
  270. ReferenceCoder(uint32* storage) :
  271. m_size(0), m_stream( storage )
  272. {}
  273. void begin_read() {}
  274. void end_read() {}
  275. void id(const uint8 c) {}
  276. void read(const uint8 s)
  277. {
  278. const uint8 c = nst_nt4_table[s];
  279. m_stream[ m_size++ ] = c < 4 ? c : 0;
  280. }
  281. uint32 m_size;
  282. stream_type m_stream;
  283. };
  284. // execute a given batch alignment type on a given stream
  285. //
  286. // \tparam batch_type a \ref BatchAlignment "Batch Alignment"
  287. // \tparam stream_type a stream compatible to the given batch_type
  288. //
  289. // \return average time
  290. //
  291. template <typename batch_type, typename stream_type>
  292. float enact_batch(
  293. batch_type& batch,
  294. const stream_type& stream)
  295. {
  296. // allow to alloc all the needed temporary storage
  297. const uint64 temp_size = batch_type::max_temp_storage(
  298. stream.max_pattern_length(),
  299. stream.max_text_length(),
  300. stream.size() );
  301. Timer timer;
  302. timer.start();
  303. // enact the batch
  304. batch.enact( stream, temp_size, NULL );
  305. cudaDeviceSynchronize();
  306. timer.stop();
  307. return timer.seconds();
  308. }
  309. // execute and time a batch of full DP alignments using BatchAlignmentScore
  310. //
  311. template <typename scheduler_type, typename stream_type>
  312. void batch_score_profile(
  313. const stream_type stream)
  314. {
  315. typedef aln::BatchedAlignmentScore<stream_type, scheduler_type> batch_type; // our batch type
  316. // setup a batch
  317. batch_type batch;
  318. const float time = enact_batch(
  319. batch,
  320. stream );
  321. fprintf(stderr," %5.1f", 1.0e-9f * float(stream.cells())/time );
  322. }
  323. // execute and time the batch_score<scheduler> algorithm for all possible schedulers
  324. //
  325. template <typename aligner_type>
  326. void batch_score_profile_all(
  327. const aligner_type aligner,
  328. const uint32 n_tasks,
  329. const uint32* offsets_dvec,
  330. const uint32* pattern_dvec,
  331. const uint32 max_pattern_len,
  332. const uint32 total_pattern_len,
  333. const uint32* text_dvec,
  334. const uint32 text_len,
  335. int16* score_dvec)
  336. {
  337. {
  338. typedef AlignmentStream<aligner_type> stream_type;
  339. // create a stream
  340. stream_type stream(
  341. aligner,
  342. n_tasks,
  343. offsets_dvec,
  344. pattern_dvec,
  345. max_pattern_len,
  346. total_pattern_len,
  347. text_dvec,
  348. text_len,
  349. score_dvec );
  350. // test the DeviceThreadScheduler
  351. batch_score_profile<aln::DeviceThreadScheduler>( stream );
  352. // test the DeviceStagedThreadScheduler
  353. //batch_score_profile<aln::DeviceStagedThreadScheduler>( stream );
  354. }
  355. {
  356. const uint32 BLOCKDIM = 128;
  357. const uint32 N_BLOCKS = (n_tasks + BLOCKDIM-1) / BLOCKDIM;
  358. Timer timer;
  359. timer.start();
  360. // enact the batch
  361. alignment_test_kernel<BLOCKDIM,MAX_READ_LENGTH> <<<N_BLOCKS,BLOCKDIM>>>(
  362. aligner,
  363. n_tasks,
  364. offsets_dvec,
  365. pattern_dvec,
  366. text_len,
  367. text_dvec,
  368. score_dvec );
  369. cudaDeviceSynchronize();
  370. timer.stop();
  371. const float time = timer.seconds();
  372. fprintf(stderr," %5.1f", 1.0e-9f * float(uint64(total_pattern_len)*uint64(text_len))/time );
  373. }
  374. fprintf(stderr, " GCUPS\n");
  375. }
  376. enum AlignmentTest
  377. {
  378. ALL = 0xFFFFFFFFu,
  379. ED = 1u,
  380. SW = 2u,
  381. GOTOH = 4u,
  382. ED_BANDED = 8u,
  383. SW_BANDED = 16u,
  384. GOTOH_BANDED = 32u,
  385. SSW = 64u
  386. };
  387. int main(int argc, char* argv[])
  388. {
  389. uint32 TEST_MASK = 0xFFFFFFFFu;
  390. const char* reads_name = argv[argc-2];
  391. const char* ref_name = argv[argc-1];
  392. uint32 threads = omp_get_num_procs();
  393. io::QualityEncoding qencoding = io::Phred33;
  394. for (int i = 0; i < argc-2; ++i)
  395. {
  396. if (strcmp( argv[i], "-tests" ) == 0)
  397. {
  398. const std::string tests_string( argv[++i] );
  399. char temp[256];
  400. const char* begin = tests_string.c_str();
  401. const char* end = begin;
  402. TEST_MASK = 0u;
  403. while (1)
  404. {
  405. while (*end != ':' && *end != '\0')
  406. {
  407. temp[end - begin] = *end;
  408. end++;
  409. }
  410. temp[end - begin] = '\0';
  411. if (strcmp( temp, "ed" ) == 0)
  412. TEST_MASK |= ED;
  413. else if (strcmp( temp, "sw" ) == 0)
  414. TEST_MASK |= SW;
  415. else if (strcmp( temp, "gotoh" ) == 0)
  416. TEST_MASK |= GOTOH;
  417. else if (strcmp( temp, "ssw" ) == 0)
  418. TEST_MASK |= SSW;
  419. if (*end == '\0')
  420. break;
  421. ++end; begin = end;
  422. }
  423. }
  424. else if (strcmp( argv[i], "-threads" ) == 0)
  425. threads = atoi( argv[++i] );
  426. }
  427. fprintf(stderr,"sw-benchmark... started\n");
  428. log_visible(stderr, "opening read file \"%s\"\n", reads_name);
  429. SharedPointer<nvbio::io::SequenceDataStream> read_data_file(
  430. nvbio::io::open_sequence_file(reads_name,
  431. qencoding)
  432. );
  433. log_visible(stderr, "reading reference file \"%s\"... started\n", ref_name);
  434. // read the reference
  435. thrust::host_vector<uint32> h_ref_storage;
  436. uint32 ref_length;
  437. uint32 ref_words;
  438. {
  439. ReferenceCounter counter;
  440. FASTA_inc_reader fasta( ref_name );
  441. if (fasta.valid() == false)
  442. {
  443. fprintf(stderr, " error: unable to open reference file \"%s\"\n", ref_name);
  444. exit(1);
  445. }
  446. while (fasta.read( 1024, counter ) == 1024);
  447. ref_length = counter.m_size;
  448. ref_words = (ref_length + 15)/16; // # of words at 2 bits per symbol
  449. }
  450. {
  451. h_ref_storage.resize( ref_words );
  452. ReferenceCoder coder( &h_ref_storage[0] );
  453. FASTA_inc_reader fasta( ref_name );
  454. if (fasta.valid() == false)
  455. {
  456. fprintf(stderr, " error: unable to open reference file \"%s\"\n", ref_name);
  457. exit(1);
  458. }
  459. while (fasta.read( 1024, coder ) == 1024);
  460. }
  461. log_visible(stderr, "reading reference file \"%s\"... done (%u bps)\n", ref_name, ref_length);
  462. typedef PackedStream<uint32*,uint8,REF_BITS,REF_BIG_ENDIAN> ref_stream_type;
  463. thrust::device_vector<uint32> d_ref_storage( h_ref_storage );
  464. ref_stream_type d_ref_stream( nvbio::raw_pointer( d_ref_storage ) );
  465. const uint32 batch_size = 256*1024;
  466. thrust::device_vector<int16> score_dvec( batch_size, 0 );
  467. #if defined(SSWLIB)
  468. std::vector<int8_t> unpacked_ref( ref_length );
  469. {
  470. ref_stream_type h_ref_stream( nvbio::raw_pointer( h_ref_storage ) );
  471. for (uint32 i = 0; i < ref_length; ++i)
  472. unpacked_ref[i] = h_ref_stream[i];
  473. }
  474. // Now set the number of threads
  475. omp_set_num_threads( threads );
  476. #pragma omp parallel
  477. {
  478. fprintf(stderr, " running on multiple threads\n");
  479. }
  480. #endif
  481. io::SequenceDataHost h_read_data;
  482. while (io::next( DNA_N, &h_read_data, read_data_file.get(), batch_size ))
  483. {
  484. // build the device side representation
  485. const io::SequenceDataDevice d_read_data( h_read_data );
  486. const uint32 n_read_symbols = h_read_data.bps();
  487. fprintf(stderr," %u reads, avg: %u bps, max: %u bps\n",
  488. h_read_data.size(),
  489. h_read_data.avg_sequence_len(),
  490. h_read_data.max_sequence_len());
  491. if (TEST_MASK & GOTOH)
  492. {
  493. aln::SimpleGotohScheme scoring;
  494. scoring.m_match = 2;
  495. scoring.m_mismatch = -1;
  496. scoring.m_gap_open = -2;
  497. scoring.m_gap_ext = -1;
  498. fprintf(stderr," testing Gotoh scoring speed...\n");
  499. fprintf(stderr," %15s : ", "global");
  500. {
  501. batch_score_profile_all(
  502. aln::make_gotoh_aligner<aln::GLOBAL,aln::TextBlockingTag>( scoring ),
  503. d_read_data.size(),
  504. nvbio::plain_view( d_read_data ).sequence_index(),
  505. nvbio::plain_view( d_read_data ).sequence_storage(),
  506. d_read_data.max_sequence_len(),
  507. n_read_symbols,
  508. nvbio::raw_pointer( d_ref_storage ),
  509. ref_length,
  510. nvbio::raw_pointer( score_dvec ) );
  511. }
  512. fprintf(stderr," %15s : ", "semi-global");
  513. {
  514. batch_score_profile_all(
  515. aln::make_gotoh_aligner<aln::SEMI_GLOBAL,aln::TextBlockingTag>( scoring ),
  516. d_read_data.size(),
  517. nvbio::plain_view( d_read_data ).sequence_index(),
  518. nvbio::plain_view( d_read_data ).sequence_storage(),
  519. d_read_data.max_sequence_len(),
  520. n_read_symbols,
  521. nvbio::raw_pointer( d_ref_storage ),
  522. ref_length,
  523. nvbio::raw_pointer( score_dvec ) );
  524. }
  525. fprintf(stderr," %15s : ", "local");
  526. {
  527. batch_score_profile_all(
  528. aln::make_gotoh_aligner<aln::LOCAL,aln::TextBlockingTag>( scoring ),
  529. d_read_data.size(),
  530. nvbio::plain_view( d_read_data ).sequence_index(),
  531. nvbio::plain_view( d_read_data ).sequence_storage(),
  532. d_read_data.max_sequence_len(),
  533. n_read_symbols,
  534. nvbio::raw_pointer( d_ref_storage ),
  535. ref_length,
  536. nvbio::raw_pointer( score_dvec ) );
  537. }
  538. }
  539. if (TEST_MASK & ED)
  540. {
  541. fprintf(stderr," testing Edit Distance scoring speed...\n");
  542. fprintf(stderr," %15s : ", "semi-global");
  543. {
  544. batch_score_profile_all(
  545. aln::make_edit_distance_aligner<aln::SEMI_GLOBAL,aln::TextBlockingTag>(),
  546. d_read_data.size(),
  547. nvbio::plain_view( d_read_data ).sequence_index(),
  548. nvbio::plain_view( d_read_data ).sequence_storage(),
  549. d_read_data.max_sequence_len(),
  550. n_read_symbols,
  551. nvbio::raw_pointer( d_ref_storage ),
  552. ref_length,
  553. nvbio::raw_pointer( score_dvec ) );
  554. }
  555. }
  556. #if defined(SSWLIB)
  557. if (TEST_MASK & SSW)
  558. {
  559. fprintf(stderr," testing SSW scoring speed...\n");
  560. fprintf(stderr," %15s : ", "local");
  561. const int8_t mat[4*4] = {2, -1, -1, -1, -1, 2, -1, -1, -1, -1, 2, -1, -1, -1, -1, 2};
  562. std::vector<int8_t> unpacked_reads( n_read_symbols );
  563. typedef io::SequenceDataAccess<DNA_N> read_access_type;
  564. typedef read_access_type::sequence_stream_type read_stream_type;
  565. const read_access_type reads_access( h_read_data );
  566. const read_stream_type packed_reads( reads_access.sequence_stream() );
  567. #pragma omp parallel for
  568. for (int i = 0; i < int( n_read_symbols ); ++i)
  569. unpacked_reads[i] = packed_reads[i];
  570. Timer timer;
  571. timer.start();
  572. #pragma omp parallel for
  573. for (int i = 0; i < int( h_read_data.size() ); ++i)
  574. {
  575. const uint32 read_off = reads_access.sequence_index()[i];
  576. const uint32 read_len = reads_access.sequence_index()[i+1] - read_off;
  577. s_profile* prof = ssw_init( &unpacked_reads[read_off], read_len, mat, 4, 2 );
  578. s_align* align = ssw_align(
  579. prof,
  580. &unpacked_ref[0],
  581. ref_length,
  582. 2,
  583. 2,
  584. 0u,
  585. 0u,
  586. 0,
  587. 15 );
  588. align_destroy( align );
  589. init_destroy( prof );
  590. }
  591. timer.stop();
  592. const float time = timer.seconds();
  593. fprintf(stderr," %5.1f", 1.0e-9f * float(uint64(n_read_symbols)*uint64(ref_length))/time );
  594. fprintf(stderr, " GCUPS\n");
  595. }
  596. #endif
  597. }
  598. fprintf(stderr,"sw-benchmark... done\n");
  599. return 0;
  600. }