alignment_test_utils.h 25 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793
  1. /*
  2. * nvbio
  3. * Copyright (c) 2011-2014, NVIDIA CORPORATION. All rights reserved.
  4. *
  5. * Redistribution and use in source and binary forms, with or without
  6. * modification, are permitted provided that the following conditions are met:
  7. * * Redistributions of source code must retain the above copyright
  8. * notice, this list of conditions and the following disclaimer.
  9. * * Redistributions in binary form must reproduce the above copyright
  10. * notice, this list of conditions and the following disclaimer in the
  11. * documentation and/or other materials provided with the distribution.
  12. * * Neither the name of the NVIDIA CORPORATION nor the
  13. * names of its contributors may be used to endorse or promote products
  14. * derived from this software without specific prior written permission.
  15. *
  16. * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
  17. * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
  18. * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
  19. * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
  20. * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
  21. * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
  22. * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
  23. * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  24. * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
  25. * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  26. */
  27. // alignment_test.cu
  28. //
  29. #pragma once
  30. #include <nvbio-test/alignment_test_utils.h>
  31. #include <nvbio/basic/timer.h>
  32. #include <nvbio/basic/console.h>
  33. #include <nvbio/basic/cuda/ldg.h>
  34. #include <nvbio/basic/cached_iterator.h>
  35. #include <nvbio/basic/packedstream.h>
  36. #include <nvbio/basic/packedstream_loader.h>
  37. #include <nvbio/basic/vector_view.h>
  38. #include <nvbio/basic/shared_pointer.h>
  39. #include <nvbio/basic/dna.h>
  40. #include <nvbio/alignment/alignment.h>
  41. #include <nvbio/alignment/batched.h>
  42. #include <nvbio/alignment/sink.h>
  43. #include <thrust/device_vector.h>
  44. #include <stdio.h>
  45. #include <stdlib.h>
  46. #include <vector>
  47. #include <algorithm>
  48. namespace nvbio {
  49. namespace aln {
  50. enum AlignmentTest
  51. {
  52. ALL = 0xFFFFFFFFu,
  53. ED = 1u,
  54. SW = 2u,
  55. GOTOH = 4u,
  56. ED_BANDED = 8u,
  57. SW_BANDED = 16u,
  58. GOTOH_BANDED = 32u,
  59. SW_WARP = 64u,
  60. SW_STRIPED = 128u,
  61. FUNCTIONAL = 256u,
  62. };
  63. // make a light-weight string from an ASCII char string
  64. vector_view<const char*> make_string(const char* str)
  65. {
  66. return vector_view<const char*>( uint32(strlen(str)), str );
  67. }
  68. // run-length encode a string
  69. std::string rle(const char* input)
  70. {
  71. char buffer[1024];
  72. buffer[0] = '\0';
  73. char prev = '\0';
  74. uint32 cnt = 0;
  75. for (const char* p = input; *p != '\0'; ++p)
  76. {
  77. if (*p == prev)
  78. ++cnt;
  79. else
  80. {
  81. if (p != input)
  82. sprintf( buffer + strlen(buffer), "%u%c", cnt, prev );
  83. prev = *p;
  84. cnt = 1u;
  85. }
  86. }
  87. if (cnt)
  88. sprintf( buffer + strlen(buffer), "%u%c", cnt, prev );
  89. return std::string( buffer );
  90. }
  91. // fill an array with random symbols
  92. template <uint32 BITS, typename rand_type>
  93. void fill_packed_stream(rand_type& rand, const uint32 value_range, const uint32 n, uint32* storage)
  94. {
  95. typedef PackedStream<uint32*,uint8,BITS,false> packed_stream_type;
  96. packed_stream_type stream( storage );
  97. for (uint32 i = 0; i < n; ++i)
  98. stream[i] = rand.next() % value_range;
  99. }
  100. // abstract container for the score matrices
  101. template <uint32 N, uint32 M, typename aligner_tag>
  102. struct ScoreMatrices {};
  103. // ScoreMatrices EditDistanceTag-specialization
  104. template <uint32 N, uint32 M>
  105. struct ScoreMatrices<N,M,aln::EditDistanceTag>
  106. {
  107. int32 H[N+1][M+1];
  108. char H_flow[N+1][M+1];
  109. void print()
  110. {
  111. for (uint32 i = 0; i <= N; ++i)
  112. {
  113. fprintf(stderr,"H[%2u] : ", i);
  114. for (uint32 j = 0; j <= M; ++j)
  115. fprintf(stderr, " %3d", H[i][j]);
  116. fprintf(stderr,"\n");
  117. }
  118. fprintf(stderr,"\n");
  119. for (uint32 i = 0; i <= N; ++i)
  120. {
  121. fprintf(stderr,"Hd[%2u] : ", i);
  122. for (uint32 j = 0; j <= M; ++j)
  123. fprintf(stderr, " %c", H_flow[i][j] );
  124. fprintf(stderr,"\n");
  125. }
  126. fprintf(stderr,"\n");
  127. }
  128. };
  129. // ScoreMatrices SmithWatermanTag-specialization
  130. template <uint32 N, uint32 M>
  131. struct ScoreMatrices<N,M,aln::SmithWatermanTag>
  132. {
  133. int32 H[N+1][M+1];
  134. char H_flow[N+1][M+1];
  135. void print()
  136. {
  137. for (uint32 i = 0; i <= N; ++i)
  138. {
  139. fprintf(stderr,"H[%2u] : ", i);
  140. for (uint32 j = 0; j <= M; ++j)
  141. fprintf(stderr, " %3d", H[i][j]);
  142. fprintf(stderr,"\n");
  143. }
  144. fprintf(stderr,"\n");
  145. for (uint32 i = 0; i <= N; ++i)
  146. {
  147. fprintf(stderr,"Hd[%2u] : ", i);
  148. for (uint32 j = 0; j <= M; ++j)
  149. fprintf(stderr, " %c", H_flow[i][j] );
  150. fprintf(stderr,"\n");
  151. }
  152. fprintf(stderr,"\n");
  153. }
  154. };
  155. // ScoreMatrices GotohTag-specialization
  156. template <uint32 N, uint32 M>
  157. struct ScoreMatrices<N,M,aln::GotohTag>
  158. {
  159. int32 H[N+1][M+1];
  160. int32 E[N+1][M+1];
  161. int32 F[N+1][M+1];
  162. char H_flow[N+1][M+1];
  163. char E_flow[N+1][M+1];
  164. char F_flow[N+1][M+1];
  165. void print()
  166. {
  167. for (uint32 i = 0; i <= N; ++i)
  168. {
  169. fprintf(stderr,"H[%2u] : ", i);
  170. for (uint32 j = 0; j <= M; ++j)
  171. fprintf(stderr, " %3d", H[i][j]);
  172. fprintf(stderr,"\n");
  173. }
  174. fprintf(stderr,"\n");
  175. for (uint32 i = 0; i <= N; ++i)
  176. {
  177. fprintf(stderr,"E[%2u] : ", i);
  178. for (uint32 j = 0; j <= M; ++j)
  179. fprintf(stderr, " %3d", E[i][j]);
  180. fprintf(stderr,"\n");
  181. }
  182. fprintf(stderr,"\n");
  183. for (uint32 i = 0; i <= N; ++i)
  184. {
  185. fprintf(stderr,"F[%2u] : ", i);
  186. for (uint32 j = 0; j <= M; ++j)
  187. fprintf(stderr, " %3d", F[i][j]);
  188. fprintf(stderr,"\n");
  189. }
  190. fprintf(stderr,"\n");
  191. for (uint32 i = 0; i <= N; ++i)
  192. {
  193. fprintf(stderr,"Hd[%2u] : ", i);
  194. for (uint32 j = 0; j <= M; ++j)
  195. fprintf(stderr, " %c", H_flow[i][j] );
  196. fprintf(stderr,"\n");
  197. }
  198. fprintf(stderr,"\n");
  199. for (uint32 i = 0; i <= N; ++i)
  200. {
  201. fprintf(stderr,"Ed[%2u] : ", i);
  202. for (uint32 j = 0; j <= M; ++j)
  203. fprintf(stderr, " %c", E_flow[i][j] );
  204. fprintf(stderr,"\n");
  205. }
  206. fprintf(stderr,"\n");
  207. for (uint32 i = 0; i <= N; ++i)
  208. {
  209. fprintf(stderr,"Fd[%2u] : ", i);
  210. for (uint32 j = 0; j <= M; ++j)
  211. fprintf(stderr, " %c", F_flow[i][j] );
  212. fprintf(stderr,"\n");
  213. }
  214. }
  215. };
  216. template <uint32 M, uint32 N, uint32 BAND_LEN, aln::AlignmentType TYPE, typename scheme_type>
  217. int32 ref_banded_sw(const uint8* str, const uint8* ref, const uint32 pos, const aln::SmithWatermanAligner<TYPE, scheme_type> aligner)
  218. {
  219. const scheme_type& scoring = aligner.scheme;
  220. const int32 G = scoring.deletion();
  221. const int32 I = scoring.insertion();
  222. const int32 S = scoring.mismatch();
  223. const int32 V = scoring.match();
  224. const uint32 start = pos;
  225. int32 best_score = Field_traits<int32>::min();
  226. int32 band[BAND_LEN];
  227. for (int32 j = 0; j < BAND_LEN; ++j)
  228. band[j] = 0;
  229. for (int32 i = 0; i < M; ++i)
  230. {
  231. const uint8 q = str[i];
  232. int32 top, left, diagonal, hi;
  233. // j == 0 case
  234. {
  235. const int32 S_ij = (ref[start+i] == q) ? V : S;
  236. diagonal = band[0] + S_ij;
  237. top = band[1] + G;
  238. hi = nvbio::max( top, diagonal );
  239. if (TYPE == aln::LOCAL)
  240. {
  241. hi = nvbio::max( hi, int32(0) ); // clamp to zero
  242. best_score = nvbio::max( best_score, hi ); // save the highest score in the matrix
  243. }
  244. band[0] = hi;
  245. }
  246. for (uint32 j = 1; j < BAND_LEN-1; ++j)
  247. {
  248. const int32 S_ij = (ref[start+i+j] == q) ? V : S;
  249. diagonal = band[j] + S_ij;
  250. top = band[j+1] + G;
  251. left = band[j-1] + I;
  252. hi = nvbio::max3( top, left, diagonal );
  253. if (TYPE == LOCAL)
  254. {
  255. hi = nvbio::max( hi, int32(0) ); // clamp to zero
  256. best_score = nvbio::max( best_score, hi ); // save the highest score in the matrix
  257. }
  258. band[j] = hi;
  259. }
  260. // j == BAND_LEN-1 case
  261. const int32 S_ij = (ref[start+i+BAND_LEN-1] == q) ? V : S;
  262. diagonal = band[BAND_LEN-1] + S_ij;
  263. left = band[BAND_LEN-2] + I;
  264. hi = nvbio::max( left, diagonal );
  265. if (TYPE == LOCAL)
  266. {
  267. hi = nvbio::max( hi, int32(0) ); // clamp to zero
  268. best_score = nvbio::max( best_score, hi ); // save the highest score in the matrix
  269. }
  270. band[BAND_LEN-1] = hi;
  271. }
  272. if (TYPE == GLOBAL)
  273. best_score = band[ BAND_LEN-1 ];
  274. else if (TYPE == SEMI_GLOBAL)
  275. {
  276. // get the highest score along the long edge of the path graph
  277. best_score = band[0];
  278. for (uint32 j = 1; j < BAND_LEN; ++j)
  279. best_score = nvbio::max( best_score, band[j] );
  280. }
  281. return best_score;
  282. }
  283. template <uint32 M, uint32 N, uint32 BAND_LEN, aln::AlignmentType TYPE, typename scheme_type>
  284. int32 ref_banded_sw(const uint8* pattern, const uint8* text, const uint32 pos, const aln::GotohAligner<TYPE, scheme_type> aligner)
  285. {
  286. const scheme_type& scoring = aligner.scheme;
  287. const int32 G_o = scoring.pattern_gap_open();
  288. const int32 G_e = scoring.pattern_gap_extension();
  289. const int32 S = scoring.mismatch();
  290. const int32 V = scoring.match();
  291. const uint32 start = pos;
  292. int32 best_score = Field_traits<int32>::min();
  293. int32 infimum = Field_traits<int32>::min() - G_e;
  294. int32 H_band[BAND_LEN];
  295. int32 F_band[BAND_LEN];
  296. H_band[0] = 0;
  297. for (uint32 j = 1; j < BAND_LEN; ++j)
  298. H_band[j] = TYPE == GLOBAL ? scoring.text_gap_open() + (j-1)*scoring.text_gap_extension() : 0;
  299. for (uint32 j = 0; j < BAND_LEN; ++j)
  300. F_band[j] = infimum;
  301. #define DEBUG_THIS
  302. #if defined(DEBUG_THIS)
  303. FILE* file = fopen("debug.txt", "w");
  304. #define DEBUG_THIS_STATEMENT(x) x
  305. #else
  306. #define DEBUG_THIS_STATEMENT(x)
  307. #endif
  308. for (uint32 i = 0; i < M; ++i)
  309. {
  310. #if defined(DEBUG_THIS)
  311. {
  312. fprintf(file,"F[%2u] = ", i);
  313. for (uint32 j = 0; j < BAND_LEN; ++j)
  314. fprintf(file," %3d", F_band[j]);
  315. fprintf(file,"\n");
  316. fprintf(file,"H[%2u] = ", i);
  317. for (uint32 j = 0; j < BAND_LEN; ++j)
  318. fprintf(file," %3d", H_band[j]);
  319. fprintf(file,"\n\n");
  320. }
  321. #endif
  322. const uint8 q = pattern[i];
  323. DEBUG_THIS_STATEMENT( fprintf(file,"U[%2u] = ", i) );
  324. // update F for the next row
  325. for (uint32 j = 0; j < BAND_LEN-1; ++j)
  326. {
  327. const int32 ftop = F_band[j+1] + G_e;
  328. const int32 htop = H_band[j+1] + G_o;
  329. F_band[j] = nvbio::max( ftop, htop );
  330. if (i && (j == BAND_LEN-2))
  331. assert( F_band[j] == htop );
  332. DEBUG_THIS_STATEMENT( fprintf(file," %c", ftop > htop ? 'F' : 'H') );
  333. }
  334. F_band[BAND_LEN-1] = infimum;
  335. //if (i < 8) fprintf(stderr,"\nU[%2u] = ", i);
  336. // j == 0 case
  337. {
  338. const int32 S_ij = (text[start+i] == q) ? V : S;
  339. const int32 diagonal = H_band[0] + S_ij;
  340. const int32 top = F_band[0];
  341. int32 hi = nvbio::max( top, diagonal );
  342. if (TYPE == LOCAL)
  343. {
  344. hi = nvbio::max( hi, int32(0) ); // clamp to zero
  345. best_score = nvbio::max( best_score, hi );
  346. }
  347. H_band[0] = hi;
  348. DEBUG_THIS_STATEMENT( fprintf(file," %c", top > diagonal ? 'I' : 'S') );
  349. }
  350. // compute E_1
  351. int32 E_j = H_band[0] + G_o;
  352. for (uint32 j = 1; j < BAND_LEN-1; ++j)
  353. {
  354. const uint32 g = text[start+i+j];
  355. const int32 S_ij = (g == q) ? V : S;
  356. const int32 diagonal = H_band[j] + S_ij;
  357. const int32 top = F_band[j];
  358. const int32 left = E_j;
  359. int32 hi = nvbio::max3( top, left, diagonal );
  360. if (TYPE == LOCAL)
  361. {
  362. hi = nvbio::max( hi, int32(0) ); // clamp to zero
  363. best_score = nvbio::max( best_score, hi );
  364. }
  365. H_band[j] = hi;
  366. DEBUG_THIS_STATEMENT( fprintf(file," %c", (top > left ? (top > diagonal ? 'I' : 'S') : (left > diagonal ? 'D' : 'S'))) );
  367. // update E for the next round, i.e. j+1
  368. const int32 eleft = E_j + G_e;
  369. const int32 ediagonal = hi + G_o;
  370. E_j = nvbio::max( ediagonal, eleft );
  371. }
  372. // j == BAND_LEN-1 case
  373. {
  374. const uint8 g = text[start+i+BAND_LEN-1];
  375. const int32 S_ij = (g == q) ? V : S;
  376. const int32 diagonal = H_band[ BAND_LEN-1 ] + S_ij;
  377. const int32 left = E_j;
  378. int32 hi = nvbio::max( left, diagonal );
  379. if (TYPE == LOCAL)
  380. {
  381. hi = nvbio::max( hi, int32(0) ); // clamp to zero
  382. best_score = nvbio::max( best_score, hi );
  383. }
  384. H_band[ BAND_LEN-1 ] = hi;
  385. DEBUG_THIS_STATEMENT( fprintf(file," %c", left > diagonal ? 'D' : 'S') );
  386. }
  387. DEBUG_THIS_STATEMENT( fprintf(file,"\n") );
  388. }
  389. DEBUG_THIS_STATEMENT(fclose(file));
  390. if (TYPE == GLOBAL)
  391. best_score = H_band[ BAND_LEN-1 ];
  392. else if (TYPE == SEMI_GLOBAL)
  393. {
  394. // get the highest score along the long edge of the path graph
  395. best_score = H_band[0];
  396. for (uint32 j = 1; j < BAND_LEN; ++j)
  397. best_score = nvbio::max( best_score, H_band[j] );
  398. }
  399. return best_score;
  400. #if defined(DEBUG_THIS)
  401. #undef DEBUG_THIS_STATEMENT
  402. #undef DEBUG_THIS
  403. #endif
  404. }
  405. template <uint32 M, uint32 N, aln::AlignmentType TYPE, typename scheme_type>
  406. int32 ref_sw(
  407. const uint8* str,
  408. const uint8* ref,
  409. const aln::SmithWatermanAligner<TYPE,scheme_type> aligner,
  410. ScoreMatrices<N,M,aln::SmithWatermanTag>* mat)
  411. {
  412. const scheme_type& scoring = aligner.scheme;
  413. const int32 G = scoring.deletion();
  414. const int32 I = scoring.insertion();
  415. const int32 S = scoring.mismatch();
  416. const int32 V = scoring.match();
  417. mat->H[0][0] = 0;
  418. mat->H_flow[0][0] = '*';
  419. for (uint32 j = 1; j <= M; ++j)
  420. {
  421. mat->H[0][j] = TYPE != LOCAL ? G * j : 0;
  422. mat->H_flow[0][j] = '*';
  423. }
  424. for (uint32 i = 1; i <= N; ++i)
  425. {
  426. mat->H[i][0] = TYPE == GLOBAL ? G * i : 0;
  427. mat->H_flow[i][0] = '*';
  428. }
  429. int32 best_score = Field_traits<int32>::min();
  430. for (uint32 i = 1; i <= N; ++i)
  431. {
  432. for (uint32 j = 1; j <= M; ++j)
  433. {
  434. const uint8 r_i = ref[i-1];
  435. const uint8 s_j = str[j-1];
  436. const int32 S_ij = (r_i == s_j) ? V : S;
  437. const int32 top = mat->H[i-1][j] + G;
  438. const int32 left = mat->H[i][j-1] + I;
  439. const int32 diag = mat->H[i-1][j-1] + S_ij;
  440. mat->H[i][j] = nvbio::max3( top, left, diag );
  441. mat->H_flow[i][j] = top > left ?
  442. (top > diag ? '|' : '\\') :
  443. (left > diag ? '-' : '\\');
  444. if (TYPE == aln::LOCAL)
  445. best_score = nvbio::max( best_score, mat->H[i][j] = nvbio::max( mat->H[i][j], int32(0) ) );
  446. }
  447. if (TYPE == aln::SEMI_GLOBAL)
  448. best_score = nvbio::max( best_score, mat->H[i][M] );
  449. }
  450. if (TYPE == aln::LOCAL || TYPE == aln::SEMI_GLOBAL)
  451. return best_score;
  452. else
  453. return mat->H[N][M];
  454. }
  455. template <uint32 M, uint32 N, aln::AlignmentType TYPE>
  456. int32 ref_sw(
  457. const uint8* str,
  458. const uint8* ref,
  459. const aln::EditDistanceAligner<TYPE> aligner,
  460. ScoreMatrices<N,M,aln::EditDistanceTag>* mat)
  461. {
  462. return ref_sw<M,N>(
  463. str,
  464. ref,
  465. aln::make_smith_waterman_aligner<TYPE>( priv::EditDistanceSWScheme() ),
  466. (ScoreMatrices<N,M,aln::SmithWatermanTag>*)mat );
  467. }
  468. template <uint32 M, uint32 N, aln::AlignmentType TYPE, typename scheme_type>
  469. int32 ref_sw(
  470. const uint8* str,
  471. const uint8* ref,
  472. const aln::GotohAligner<TYPE,scheme_type> aligner,
  473. ScoreMatrices<N,M,aln::GotohTag>* mat)
  474. {
  475. const scheme_type& scoring = aligner.scheme;
  476. const int32 G_o = scoring.pattern_gap_open();
  477. const int32 G_e = scoring.pattern_gap_extension();
  478. const int32 S = scoring.mismatch();
  479. const int32 V = scoring.match();
  480. mat->H[0][0] = 0;
  481. mat->F[0][0] = (TYPE != LOCAL) ? -100000 : 0;
  482. mat->E[0][0] = (TYPE != LOCAL) ? -100000 : 0;
  483. mat->H_flow[0][0] = '*';
  484. mat->E_flow[0][0] = '*';
  485. mat->F_flow[0][0] = '*';
  486. for (uint32 j = 1; j <= M; ++j)
  487. {
  488. mat->H[0][j] = (TYPE != LOCAL) ? G_o + G_e * (j-1) : 0;
  489. mat->E[0][j] = mat->F[0][j] = (TYPE != LOCAL) ? -100000 : 0;
  490. mat->H_flow[0][j] = '*';
  491. mat->E_flow[0][j] = '*';
  492. mat->F_flow[0][j] = '*';
  493. }
  494. for (uint32 i = 1; i <= N; ++i)
  495. {
  496. mat->H[i][0] = (TYPE == aln::GLOBAL) ? G_o + G_e * (i-1) : 0;
  497. mat->E[i][0] = mat->F[i][0] = (TYPE != LOCAL) ? -100000 : 0;
  498. mat->H_flow[i][0] = '*';
  499. mat->E_flow[i][0] = '*';
  500. mat->F_flow[i][0] = '*';
  501. }
  502. int32 best_score = Field_traits<int32>::min();
  503. uint2 best_sink = make_uint2(uint32(-1),uint32(-1));
  504. for (uint32 i = 1; i <= N; ++i)
  505. {
  506. for (uint32 j = 1; j <= M; ++j)
  507. {
  508. const uint8 r_i = ref[i-1];
  509. const uint8 s_j = str[j-1];
  510. const int32 S_ij = (r_i == s_j) ? V : S;
  511. mat->E_flow[i][j] = mat->E[i][j-1] + G_e > mat->H[i][j-1] + G_o ? '-' : 'H';
  512. mat->F_flow[i][j] = mat->F[i-1][j] + G_e > mat->H[i-1][j] + G_o ? '|' : 'H';
  513. mat->E[i][j] = nvbio::max( mat->E[i][j-1] + G_e, mat->H[i][j-1] + G_o );
  514. mat->F[i][j] = nvbio::max( mat->F[i-1][j] + G_e, mat->H[i-1][j] + G_o );
  515. mat->H_flow[i][j] = mat->F[i][j] > mat->E[i][j] ?
  516. (mat->F[i][j] > mat->H[i-1][j-1] + S_ij ? 'F' : '\\') :
  517. (mat->E[i][j] > mat->H[i-1][j-1] + S_ij ? 'E' : '\\');
  518. mat->H[i][j] = nvbio::max3( mat->H[i-1][j-1] + S_ij, mat->E[i][j], mat->F[i][j] );
  519. if (TYPE == aln::LOCAL)
  520. {
  521. mat->H[i][j] = nvbio::max( mat->H[i][j], int32(0) );
  522. if (mat->H[i][j] == 0)
  523. mat->H_flow[i][j] = '0';
  524. if (best_score < mat->H[i][j])
  525. {
  526. best_score = mat->H[i][j];
  527. best_sink = make_uint2( i, j );
  528. }
  529. }
  530. }
  531. if (TYPE == aln::SEMI_GLOBAL)
  532. {
  533. if (best_score < mat->H[i][M])
  534. {
  535. best_score = mat->H[i][M];
  536. best_sink = make_uint2( i, M );
  537. }
  538. }
  539. }
  540. if (TYPE == aln::LOCAL || TYPE == aln::SEMI_GLOBAL)
  541. return best_score;
  542. else
  543. return mat->H[N][M];
  544. }
  545. //
  546. // A test Backtracer class, \see Traceback.
  547. //
  548. struct TestBacktracker
  549. {
  550. void clear() { aln[0] = '\0'; aln_len = 0; }
  551. NVBIO_HOST_DEVICE
  552. void clip(const uint32 len)
  553. {
  554. for (uint32 i = 0; i < len; ++i)
  555. aln[aln_len+i] = 'S';
  556. aln[aln_len+len] = '\0';
  557. }
  558. NVBIO_HOST_DEVICE
  559. void push(const uint8 op)
  560. {
  561. aln[ aln_len++ ] = "MID"[op];
  562. }
  563. // compute the score of the resulting alignment
  564. //
  565. template <AlignmentType TYPE>
  566. int32 score(
  567. EditDistanceAligner<TYPE> aligner,
  568. const uint32 offset,
  569. const uint8* str,
  570. const uint8* ref)
  571. {
  572. int32 score = 0;
  573. for (uint32 a = 0, j = 0, k = offset; a < aln_len; ++a)
  574. {
  575. const char op = aln[aln_len-a-1];
  576. if (op == 'S')
  577. ++j;
  578. else if (op == 'I')
  579. {
  580. // apply a gap-open or gap-extension penalty according to whether it's the first insertion or not
  581. --score;
  582. // mover in the pattern
  583. ++j;
  584. }
  585. else if (op == 'D')
  586. {
  587. // apply a gap-open or gap-extension penalty according to whether it's the first deletion or not
  588. --score;
  589. // move in the text
  590. ++k;
  591. }
  592. else if (op == 'M')
  593. {
  594. // add a match or mismatch score
  595. score -= (str[j] == ref[k]) ? 0 : 1;
  596. // move diagonally
  597. ++j; ++k;
  598. }
  599. }
  600. return score;
  601. }
  602. // compute the score of the resulting alignment
  603. //
  604. template <AlignmentType TYPE, typename scoring_type>
  605. int32 score(
  606. SmithWatermanAligner<TYPE,scoring_type> aligner,
  607. const uint32 offset,
  608. const uint8* str,
  609. const uint8* ref)
  610. {
  611. const scoring_type& scoring = aligner.scheme;
  612. int32 score = 0;
  613. for (uint32 a = 0, j = 0, k = offset; a < aln_len; ++a)
  614. {
  615. const char op = aln[aln_len-a-1];
  616. if (op == 'S')
  617. ++j;
  618. else if (op == 'I')
  619. {
  620. // apply a gap-open or gap-extension penalty according to whether it's the first insertion or not
  621. score += scoring.insertion();
  622. // mover in the pattern
  623. ++j;
  624. }
  625. else if (op == 'D')
  626. {
  627. // apply a gap-open or gap-extension penalty according to whether it's the first deletion or not
  628. score += scoring.deletion();
  629. // move in the text
  630. ++k;
  631. }
  632. else if (op == 'M')
  633. {
  634. // add a match or mismatch score
  635. score += (str[j] == ref[k]) ? scoring.match() : scoring.mismatch();
  636. // move diagonally
  637. ++j; ++k;
  638. }
  639. }
  640. return score;
  641. }
  642. // compute the score of the resulting alignment
  643. //
  644. template <AlignmentType TYPE, typename scoring_type>
  645. int32 score(
  646. GotohAligner<TYPE,scoring_type> aligner,
  647. const uint32 offset,
  648. const uint8* str,
  649. const uint8* ref)
  650. {
  651. const scoring_type& scoring = aligner.scheme;
  652. int32 score = 0;
  653. char state = 'S';
  654. for (uint32 a = 0, j = 0, k = offset; a < aln_len; ++a)
  655. {
  656. const char op = aln[aln_len-a-1];
  657. if (op == 'S')
  658. ++j;
  659. else if (op == 'I')
  660. {
  661. // apply a gap-open or gap-extension penalty according to whether it's the first insertion or not
  662. score += (state != op) ? scoring.pattern_gap_open() : scoring.pattern_gap_extension();
  663. // mover in the pattern
  664. ++j;
  665. }
  666. else if (op == 'D')
  667. {
  668. // apply a gap-open or gap-extension penalty according to whether it's the first deletion or not
  669. score += (state != op) ? scoring.text_gap_open() : scoring.text_gap_extension();
  670. // move in the text
  671. ++k;
  672. }
  673. else if (op == 'M')
  674. {
  675. // add a match or mismatch score
  676. score += (str[j] == ref[k]) ? scoring.match() : scoring.mismatch();
  677. // move diagonally
  678. ++j; ++k;
  679. }
  680. // store new state
  681. state = op;
  682. }
  683. return score;
  684. }
  685. char aln[1024];
  686. uint32 aln_len;
  687. };
  688. } // namespace sw
  689. } // namespace nvbio