rank_test.cu 7.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257
  1. /*
  2. * nvbio
  3. * Copyright (c) 2011-2014, NVIDIA CORPORATION. All rights reserved.
  4. *
  5. * Redistribution and use in source and binary forms, with or without
  6. * modification, are permitted provided that the following conditions are met:
  7. * * Redistributions of source code must retain the above copyright
  8. * notice, this list of conditions and the following disclaimer.
  9. * * Redistributions in binary form must reproduce the above copyright
  10. * notice, this list of conditions and the following disclaimer in the
  11. * documentation and/or other materials provided with the distribution.
  12. * * Neither the name of the NVIDIA CORPORATION nor the
  13. * names of its contributors may be used to endorse or promote products
  14. * derived from this software without specific prior written permission.
  15. *
  16. * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
  17. * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
  18. * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
  19. * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
  20. * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
  21. * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
  22. * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
  23. * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  24. * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
  25. * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  26. */
  27. // rank_test.cu
  28. //
  29. #define MOD_NAMESPACE
  30. #define MOD_NAMESPACE_NAME fmitest
  31. #define MOD_NAMESPACE_BEGIN namespace fmitest {
  32. #define MOD_NAMESPACE_END }
  33. //#define CUFMI_CUDA_DEBUG
  34. //#define CUFMI_CUDA_ASSERTS
  35. #include <stdio.h>
  36. #include <stdlib.h>
  37. #include <vector>
  38. #include <algorithm>
  39. #include <nvbio/basic/timer.h>
  40. #include <nvbio/basic/console.h>
  41. #include <nvbio/basic/dna.h>
  42. #include <nvbio/basic/cached_iterator.h>
  43. #include <nvbio/basic/packedstream.h>
  44. #include <nvbio/basic/deinterleaved_iterator.h>
  45. #include <nvbio/fmindex/bwt.h>
  46. #include <nvbio/fmindex/rank_dictionary.h>
  47. namespace nvbio {
  48. namespace { // anonymous namespace
  49. template <typename rank_dict_type, typename index_type>
  50. void do_test(const index_type LEN, const rank_dict_type& dict)
  51. {
  52. typedef StaticVector<index_type,4> vec4;
  53. vec4 counts(0u);
  54. for (index_type i = 0; i < LEN; ++i)
  55. {
  56. counts[ dict.text()[i] ]++;
  57. for (uint8 c = 0; c < 4; ++c)
  58. {
  59. const index_type r = rank( dict, i, c );
  60. if (r != counts[c])
  61. {
  62. log_error(stderr, " rank mismatch at [%u:%u]: expected %u, got %u\n", uint32(i), uint32(c), uint32(counts[c]), uint32(r));
  63. exit(1);
  64. }
  65. }
  66. vec4 r4 = rank_all( dict, i );
  67. if (r4 != counts)
  68. {
  69. log_error(stderr, " rank mismatch at [%u]: expected (%u,%u,%u,%u), got (%u,%u,%u,%u)\n", uint32(i),
  70. (uint32)counts[0], (uint32)counts[1], (uint32)counts[2], (uint32)counts[3],
  71. (uint32)r4[0], (uint32)r4[1], (uint32)r4[2], (uint32)r4[3]);
  72. exit(1);
  73. }
  74. }
  75. }
  76. void synthetic_test(const uint32 LEN)
  77. {
  78. // 32-bits test
  79. {
  80. fprintf(stderr, " 32-bit test\n");
  81. const uint32 OCC_INT = 64;
  82. const uint32 WORDS = (LEN+15)/16;
  83. const uint32 OCC_WORDS = ((LEN+OCC_INT-1) / OCC_INT) * 4;
  84. Timer timer;
  85. const uint64 memory_footprint =
  86. sizeof(uint32)*WORDS +
  87. sizeof(uint32)*OCC_WORDS;
  88. fprintf(stderr, " memory : %.1f MB\n", float(memory_footprint)/float(1024*1024));
  89. thrust::host_vector<uint32> text_storage( align<4>(WORDS), 0u );
  90. thrust::host_vector<uint32> occ( align<4>(WORDS), 0u );
  91. thrust::host_vector<uint32> count_table( 256 );
  92. // initialize the text
  93. {
  94. typedef PackedStream<uint32*,uint8,2,true> stream_type;
  95. stream_type text( &text_storage[0] );
  96. for (uint32 i = 0; i < LEN; ++i)
  97. text[i] = (rand() % 4);
  98. // print the string
  99. if (LEN < 64)
  100. {
  101. char string[64];
  102. dna_to_string(
  103. text.begin(),
  104. text.begin() + LEN,
  105. string );
  106. fprintf(stderr, " string : %s\n", string);
  107. }
  108. uint32 L2[5];
  109. // build the occurrence table
  110. build_occurrence_table<2u,OCC_INT>(
  111. text.begin(),
  112. text.begin() + LEN,
  113. &occ[0],
  114. &L2[1] );
  115. }
  116. // generate the count table
  117. gen_bwt_count_table( &count_table[0] );
  118. // test uint32 support
  119. {
  120. typedef PackedStream<const uint32*,uint8,2,true> stream_type;
  121. stream_type text( &text_storage[0] );
  122. typedef rank_dictionary<2u, OCC_INT, stream_type, const uint32*, const uint32*> rank_dict_type;
  123. rank_dict_type dict(
  124. text,
  125. &occ[0],
  126. &count_table[0] );
  127. do_test( LEN, dict );
  128. }
  129. // test uint4 support
  130. {
  131. typedef PackedStream<const uint4*,uint8,2,true> stream_type;
  132. stream_type text( (const uint4*)&text_storage[0] );
  133. typedef rank_dictionary<2u, OCC_INT, stream_type, const uint4*, const uint32*> rank_dict_type;
  134. rank_dict_type dict(
  135. text,
  136. (const uint4*)&occ[0],
  137. &count_table[0] );
  138. do_test( LEN, dict );
  139. }
  140. }
  141. // 64-bits test
  142. {
  143. fprintf(stderr, " 64-bit test\n");
  144. const uint32 OCC_INT = 128;
  145. const uint32 WORDS = (LEN+31)/32;
  146. const uint32 OCC_WORDS = ((LEN+OCC_INT-1) / OCC_INT) * 4;
  147. Timer timer;
  148. const uint64 memory_footprint =
  149. sizeof(uint64)*WORDS +
  150. sizeof(uint64)*OCC_WORDS;
  151. fprintf(stderr, " memory : %.1f MB\n", float(memory_footprint)/float(1024*1024));
  152. thrust::host_vector<uint64> text_storage( align<4>(WORDS), 0u );
  153. thrust::host_vector<uint64> occ( align<4>(WORDS), 0u );
  154. thrust::host_vector<uint32> count_table( 256 );
  155. // initialize the text
  156. {
  157. typedef PackedStream<uint64*,uint8,2,true,uint64> stream_type;
  158. stream_type text( &text_storage[0] );
  159. for (uint32 i = 0; i < LEN; ++i)
  160. text[i] = (rand() % 4);
  161. // print the string
  162. if (LEN < 64)
  163. {
  164. char string[64];
  165. dna_to_string(
  166. text.begin(),
  167. text.begin() + LEN,
  168. string );
  169. fprintf(stderr, " string : %s\n", string);
  170. }
  171. uint64 L2[5];
  172. // build the occurrence table
  173. build_occurrence_table<2u,OCC_INT>(
  174. text.begin(),
  175. text.begin() + LEN,
  176. &occ[0],
  177. &L2[1] );
  178. }
  179. // generate the count table
  180. gen_bwt_count_table( &count_table[0] );
  181. // test uint64 support
  182. {
  183. typedef PackedStream<const uint64*,uint8,2,true,uint64> stream_type;
  184. stream_type text( &text_storage[0] );
  185. typedef rank_dictionary<2u, OCC_INT, stream_type, const uint64*, const uint32*> rank_dict_type;
  186. rank_dict_type dict(
  187. text,
  188. &occ[0],
  189. &count_table[0] );
  190. do_test( uint64(LEN), dict );
  191. }
  192. }
  193. }
  194. } // anonymous namespace
  195. int rank_test(int argc, char* argv[])
  196. {
  197. uint32 len = 10000000;
  198. for (int i = 0; i < argc; ++i)
  199. {
  200. if (strcmp( argv[i], "-length" ) == 0)
  201. len = atoi( argv[++i] )*1000;
  202. }
  203. fprintf(stderr, "rank test... started\n");
  204. synthetic_test( len );
  205. fprintf(stderr, "rank test... done\n");
  206. return 0;
  207. }
  208. } // namespace nvbio