string_set_test.cu 36 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948
  1. /*
  2. * nvbio
  3. * Copyright (c) 2011-2014, NVIDIA CORPORATION. All rights reserved.
  4. *
  5. * Redistribution and use in source and binary forms, with or without
  6. * modification, are permitted provided that the following conditions are met:
  7. * * Redistributions of source code must retain the above copyright
  8. * notice, this list of conditions and the following disclaimer.
  9. * * Redistributions in binary form must reproduce the above copyright
  10. * notice, this list of conditions and the following disclaimer in the
  11. * documentation and/or other materials provided with the distribution.
  12. * * Neither the name of the NVIDIA CORPORATION nor the
  13. * names of its contributors may be used to endorse or promote products
  14. * derived from this software without specific prior written permission.
  15. *
  16. * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
  17. * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
  18. * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
  19. * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
  20. * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
  21. * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
  22. * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
  23. * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  24. * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
  25. * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  26. */
  27. // string_set_test.cu
  28. //
  29. #include <stdio.h>
  30. #include <stdlib.h>
  31. #include <vector>
  32. #include <algorithm>
  33. #include <nvbio/basic/timer.h>
  34. #include <nvbio/basic/console.h>
  35. #include <nvbio/basic/cuda/arch.h>
  36. #include <nvbio/basic/cuda/ldg.h>
  37. #include <nvbio/strings/string_set.h>
  38. #include <thrust/device_vector.h>
  39. namespace nvbio {
  40. namespace string_set_private {
  41. enum StringSetTest
  42. {
  43. ALL = 0x0FFFFFFFu,
  44. CPU = 0xF0000000u,
  45. SPARSE_TO_CONCAT = 1u,
  46. SPARSE_TO_PACKED_CONCAT = 2u,
  47. CONCAT_TO_PACKED_CONCAT = 4u,
  48. CONCAT_TO_STRIDED = 8u,
  49. SPARSE_TO_STRIDED = 16u,
  50. PACKED_CONCAT_TO_STRIDED = 32u,
  51. PACKED_SPARSE_TO_STRIDED = 64u,
  52. STRIDED_PACKED_TO_STRIDED = 128u,
  53. CONCAT_TO_STRIDED_PACKED = 256u,
  54. PACKED_CONCAT_TO_STRIDED_PACKED = 512u,
  55. PACKED_SPARSE_TO_STRIDED_PACKED = 1024u,
  56. };
  57. }
  58. using namespace string_set_private;
  59. template <typename input_set, typename output_set>
  60. void check(const input_set& in_string_set, const output_set& out_string_set)
  61. {
  62. if (in_string_set.size() != out_string_set.size())
  63. {
  64. fprintf(stderr, " \nerror: input set has size %u, output has %u\n", in_string_set.size(), out_string_set.size() );
  65. exit(1);
  66. }
  67. // check that the string sets match
  68. for (uint32 i = 0; i < in_string_set.size(); ++i)
  69. {
  70. typename input_set::string_type in_string = in_string_set[i];
  71. typename output_set::string_type out_string = out_string_set[i];
  72. const uint32 in_len = in_string.size();
  73. const uint32 out_len = out_string.size();
  74. if (in_len != out_len)
  75. {
  76. fprintf(stderr, " \nerror: input string[%u] has length %u, output has length %u\n", i, in_len, out_len );
  77. exit(1);
  78. }
  79. for (uint32 j = 0; j < in_len; ++j)
  80. {
  81. const uint8 in_c = in_string[j];
  82. const uint8 out_c = out_string[j];
  83. if (in_c != out_c)
  84. {
  85. fprintf(stderr, " \nerror: at string[%u][%u] expected : %u, got %u\n", i, j, uint32(in_c), uint32(out_c) );
  86. exit(1);
  87. }
  88. }
  89. }
  90. }
  91. void make_test_string_set(
  92. const uint32 SYMBOL_SIZE,
  93. const uint32 N_strings,
  94. const uint32 N,
  95. const uint32 N_spacing,
  96. thrust::host_vector<uint8>& h_string,
  97. thrust::host_vector<uint2>& h_ranges)
  98. {
  99. h_string.resize( N_strings * N_spacing );
  100. h_ranges.resize( N_strings );
  101. LCG_random rand;
  102. for (uint32 i = 0; i < N_strings; ++i)
  103. {
  104. h_ranges[i] = make_uint2( N_spacing*i, N_spacing*i + N );
  105. for (uint32 j = 0; j < N_spacing; ++j)
  106. h_string[ i * N_spacing + j ] = rand.next() & ((1u << SYMBOL_SIZE)-1);
  107. }
  108. }
  109. int string_set_test(int argc, char* argv[])
  110. {
  111. fprintf(stderr, "nvbio/basic/string_set test... started\n");
  112. const uint32 N = 128;
  113. const uint32 N_spacing = 150;
  114. const uint32 SYMBOL_SIZE = 4;
  115. const uint32 SYMBOLS_PER_WORD = (8u*sizeof(uint32)) / SYMBOL_SIZE;
  116. const uint32 N_words = (N + SYMBOLS_PER_WORD-1) / SYMBOLS_PER_WORD;
  117. uint32 N_strings = 256*1024;
  118. uint32 N_tests = 20;
  119. uint32 TEST_MASK = ALL;
  120. uint32 CPU_MASK = CPU;
  121. for (int i = 0; i < argc; ++i)
  122. {
  123. if (strcmp( argv[i], "-N" ) == 0)
  124. N_strings = atoi( argv[++i] );
  125. else if (strcmp( argv[i], "-N-tests" ) == 0)
  126. N_tests = atoi( argv[++i] );
  127. else if (strcmp( argv[i], "-cpu" ) == 0)
  128. CPU_MASK = atoi( argv[++i] );
  129. else if (strcmp( argv[i], "-tests" ) == 0)
  130. {
  131. const std::string tests_string( argv[++i] );
  132. char temp[256];
  133. const char* begin = tests_string.c_str();
  134. const char* end = begin;
  135. TEST_MASK = 0u;
  136. while (1)
  137. {
  138. while (*end != ':' && *end != '\0')
  139. {
  140. temp[end - begin] = *end;
  141. end++;
  142. }
  143. temp[end - begin] = '\0';
  144. if (strcmp( temp, "packed-sparse-to-strided" ) == 0)
  145. TEST_MASK |= PACKED_SPARSE_TO_STRIDED;
  146. else if (strcmp( temp, "packed-sparse-to-strided-packed" ) == 0)
  147. TEST_MASK |= PACKED_SPARSE_TO_STRIDED_PACKED;
  148. else if (strcmp( temp, "sparse-to-concat" ) == 0)
  149. TEST_MASK |= SPARSE_TO_CONCAT;
  150. else if (strcmp( temp, "sparse-to-packed-concat" ) == 0)
  151. TEST_MASK |= SPARSE_TO_PACKED_CONCAT;
  152. else if (strcmp( temp, "concat-to-packed-concat" ) == 0)
  153. TEST_MASK |= CONCAT_TO_PACKED_CONCAT;
  154. else if (strcmp( temp, "sparse-to-strided" ) == 0)
  155. TEST_MASK |= SPARSE_TO_STRIDED;
  156. else if (strcmp( temp, "packed-concat-to-strided" ) == 0)
  157. TEST_MASK |= PACKED_CONCAT_TO_STRIDED;
  158. else if (strcmp( temp, "packed-sparse-to-strided" ) == 0)
  159. TEST_MASK |= PACKED_SPARSE_TO_STRIDED;
  160. else if (strcmp( temp, "strided-packed-to-strided" ) == 0)
  161. TEST_MASK |= STRIDED_PACKED_TO_STRIDED;
  162. else if (strcmp( temp, "concat-to-strided-packed" ) == 0)
  163. TEST_MASK |= CONCAT_TO_STRIDED_PACKED;
  164. else if (strcmp( temp, "packed-concat-to-strided-packed" ) == 0)
  165. TEST_MASK |= PACKED_CONCAT_TO_STRIDED_PACKED;
  166. else if (strcmp( temp, "packed-sparse-to-strided-packed" ) == 0)
  167. TEST_MASK |= PACKED_SPARSE_TO_STRIDED_PACKED;
  168. if (end[i] == '\0')
  169. break;
  170. ++end; begin = end;
  171. }
  172. }
  173. }
  174. TEST_MASK |= CPU_MASK;
  175. typedef SparseStringSet<uint8*,uint2*> base_string_set;
  176. thrust::host_vector<uint8> h_base_string;
  177. thrust::host_vector<uint2> h_base_ranges;
  178. make_test_string_set(
  179. SYMBOL_SIZE,
  180. N_strings,
  181. N,
  182. N_spacing,
  183. h_base_string,
  184. h_base_ranges );
  185. thrust::device_vector<uint8> d_base_string( h_base_string );
  186. thrust::device_vector<uint2> d_base_ranges( h_base_ranges );
  187. base_string_set h_base_string_set(
  188. N_strings,
  189. thrust::raw_pointer_cast( &h_base_string.front() ),
  190. thrust::raw_pointer_cast( &h_base_ranges.front() ) );
  191. base_string_set d_base_string_set(
  192. N_strings,
  193. thrust::raw_pointer_cast( &d_base_string.front() ),
  194. thrust::raw_pointer_cast( &d_base_ranges.front() ) );
  195. // copy a packed sparse string set into a strided packed string set
  196. if ((TEST_MASK & PACKED_SPARSE_TO_STRIDED) && (TEST_MASK & CPU))
  197. {
  198. fprintf(stderr, " test cpu packed-sparse -> strided copy... started\n");
  199. const uint32 N_words = (N_spacing + SYMBOLS_PER_WORD-1) / SYMBOLS_PER_WORD;
  200. typedef PackedStream<uint32*,uint8,SYMBOL_SIZE,false> packed_stream_type;
  201. typedef SparseStringSet<packed_stream_type,const uint2*> input_set;
  202. typedef StridedStringSet<
  203. uint8*,
  204. uint32*> output_set;
  205. thrust::host_vector<uint32> h_in_string( N_strings * N_words );
  206. thrust::host_vector<uint2> h_in_ranges( N_strings );
  207. packed_stream_type h_packed_stream(
  208. thrust::raw_pointer_cast( &h_in_string.front() ) );
  209. LCG_random rand;
  210. for (uint32 i = 0; i < N_strings; ++i)
  211. {
  212. h_in_ranges[i] = make_uint2( N_spacing*i, N_spacing*i + N );
  213. for (uint32 j = 0; j < N_spacing; ++j)
  214. h_packed_stream[ i * N_spacing + j ] = rand.next() & ((1u << SYMBOL_SIZE) - 1u);
  215. }
  216. input_set h_in_string_set(
  217. N_strings,
  218. h_packed_stream,
  219. thrust::raw_pointer_cast( &h_in_ranges.front() ) );
  220. // build the host output string set
  221. thrust::host_vector<uint8> h_out_stream( N_strings * N );
  222. thrust::host_vector<uint32> h_out_lengths( N_strings );
  223. output_set h_out_string_set(
  224. N_strings,
  225. N_strings,
  226. thrust::raw_pointer_cast( &h_out_stream.front() ),
  227. thrust::raw_pointer_cast( &h_out_lengths.front() ) );
  228. Timer timer;
  229. timer.start();
  230. // copy intput set into the output set
  231. for (uint32 i = 0; i < N_tests; ++i)
  232. copy( h_in_string_set, h_out_string_set );
  233. timer.stop();
  234. fprintf(stderr, " test cpu packed-sparse -> strided copy... done: %.2f GSYMS\n", (1.0e-9f*float(N_strings*N))*(float(N_tests)/timer.seconds()));
  235. }
  236. // copy a packed sparse string set into a strided packed string set
  237. if ((TEST_MASK & PACKED_SPARSE_TO_STRIDED_PACKED) && (TEST_MASK & CPU))
  238. {
  239. fprintf(stderr, " test cpu packed-sparse -> strided-packed copy... started\n");
  240. const uint32 N_words = (N_spacing + SYMBOLS_PER_WORD-1) / SYMBOLS_PER_WORD;
  241. typedef PackedStream<uint32*,uint8,SYMBOL_SIZE,false> packed_stream_type;
  242. typedef SparseStringSet<packed_stream_type,const uint2*> input_set;
  243. typedef StridedPackedStringSet<
  244. uint32*,
  245. uint8,
  246. SYMBOL_SIZE,
  247. false,
  248. uint32*> output_set;
  249. thrust::host_vector<uint32> h_in_string( N_strings * N_words );
  250. thrust::host_vector<uint2> h_in_ranges( N_strings );
  251. packed_stream_type h_packed_stream(
  252. thrust::raw_pointer_cast( &h_in_string.front() ) );
  253. LCG_random rand;
  254. for (uint32 i = 0; i < N_strings; ++i)
  255. {
  256. h_in_ranges[i] = make_uint2( N_spacing*i, N_spacing*i + N );
  257. for (uint32 j = 0; j < N_spacing; ++j)
  258. h_packed_stream[ i * N_spacing + j ] = rand.next() & ((1u << SYMBOL_SIZE) - 1u);
  259. }
  260. input_set h_in_string_set(
  261. N_strings,
  262. h_packed_stream,
  263. thrust::raw_pointer_cast( &h_in_ranges.front() ) );
  264. // build the output string set
  265. thrust::host_vector<uint32> h_out_stream( N_strings * N_words );
  266. thrust::host_vector<uint32> h_out_lengths( N_strings );
  267. output_set h_out_string_set(
  268. N_strings,
  269. N_strings,
  270. thrust::raw_pointer_cast( &h_out_stream.front() ),
  271. thrust::raw_pointer_cast( &h_out_lengths.front() ) );
  272. Timer timer;
  273. timer.start();
  274. // copy intput set into the output set
  275. for (uint32 i = 0; i < N_tests; ++i)
  276. copy( h_in_string_set, h_out_string_set );
  277. timer.stop();
  278. fprintf(stderr, " test cpu packed-sparse -> strided-packed copy... done: %.2f GSYMS\n", (1.0e-9f*float(N_strings*N))*(float(N_tests)/timer.seconds()));
  279. }
  280. // copy a sparse string set into a concatenated one
  281. if (TEST_MASK & SPARSE_TO_CONCAT)
  282. {
  283. fprintf(stderr, " test sparse -> concat copy... started\n");
  284. typedef base_string_set input_set;
  285. typedef ConcatenatedStringSet<uint8*,uint32*> output_set;
  286. // build the device output string set
  287. thrust::device_vector<uint8> d_out_string( N_strings * N );
  288. thrust::device_vector<uint32> d_out_offsets( N_strings+1 );
  289. output_set d_out_string_set(
  290. N_strings,
  291. thrust::raw_pointer_cast( &d_out_string.front() ),
  292. thrust::raw_pointer_cast( &d_out_offsets.front() ) );
  293. Timer timer;
  294. timer.start();
  295. for (uint32 i = 0; i < N_tests; ++i)
  296. cuda::copy( d_base_string_set, d_out_string_set );
  297. timer.stop();
  298. // build the host output string set
  299. thrust::host_vector<uint8> h_out_string( d_out_string );
  300. thrust::host_vector<uint32> h_out_offsets( d_out_offsets );
  301. output_set h_out_string_set(
  302. N_strings,
  303. thrust::raw_pointer_cast( &h_out_string.front() ),
  304. thrust::raw_pointer_cast( &h_out_offsets.front() ) );
  305. // check that the string sets match
  306. check( h_base_string_set, h_out_string_set );
  307. fprintf(stderr, " test sparse -> concat copy... done: %.2f GSYMS\n", (1.0e-9f*float(N_strings*N))*(float(N_tests)/timer.seconds()));
  308. }
  309. // copy a sparse string set into a packed concatenated one
  310. if (TEST_MASK & SPARSE_TO_PACKED_CONCAT)
  311. {
  312. fprintf(stderr, " test sparse -> packed-concat copy... started\n");
  313. typedef PackedStream<uint32*,uint8,SYMBOL_SIZE,false> packed_stream_type;
  314. typedef base_string_set input_set;
  315. typedef ConcatenatedStringSet<packed_stream_type,uint32*> output_set;
  316. // build the device output string set
  317. thrust::device_vector<uint32> d_out_string( N_strings * N_words );
  318. thrust::device_vector<uint32> d_out_offsets( N_strings+1 );
  319. packed_stream_type d_packed_stream(
  320. thrust::raw_pointer_cast( &d_out_string.front() ) );
  321. output_set d_out_string_set(
  322. N_strings,
  323. d_packed_stream,
  324. thrust::raw_pointer_cast( &d_out_offsets.front() ) );
  325. Timer timer;
  326. timer.start();
  327. for (uint32 i = 0; i < N_tests; ++i)
  328. cuda::copy( d_base_string_set, d_out_string_set );
  329. timer.stop();
  330. // build the host output string set
  331. thrust::host_vector<uint32> h_out_string( d_out_string );
  332. thrust::host_vector<uint32> h_out_offsets( d_out_offsets );
  333. packed_stream_type h_packed_stream(
  334. thrust::raw_pointer_cast( &h_out_string.front() ) );
  335. output_set h_out_string_set(
  336. N_strings,
  337. h_packed_stream,
  338. thrust::raw_pointer_cast( &h_out_offsets.front() ) );
  339. // check that the string sets match
  340. check( h_base_string_set, h_out_string_set );
  341. fprintf(stderr, " test sparse -> packed-concat copy... done: %.2f GSYMS\n", (1.0e-9f*float(N_strings*N))*(float(N_tests)/timer.seconds()));
  342. }
  343. // copy a concatenated string set into a packed concatenated one
  344. if (TEST_MASK & CONCAT_TO_PACKED_CONCAT)
  345. {
  346. fprintf(stderr, " test concat -> packed-concat copy... started\n");
  347. typedef PackedStream<uint32*,uint8,SYMBOL_SIZE,false> packed_stream_type;
  348. typedef ConcatenatedStringSet<uint8*,uint32*> input_set;
  349. typedef ConcatenatedStringSet<packed_stream_type,uint32*> output_set;
  350. // build the device input string set
  351. thrust::device_vector<uint8> d_in_string( N_strings * N );
  352. thrust::device_vector<uint32> d_in_offsets( N_strings+1 );
  353. input_set d_in_string_set(
  354. N_strings,
  355. thrust::raw_pointer_cast( &d_in_string.front() ),
  356. thrust::raw_pointer_cast( &d_in_offsets.front() ) );
  357. // copy the base string set into the input one
  358. cuda::copy( d_base_string_set, d_in_string_set );
  359. // build the device output string set
  360. thrust::device_vector<uint32> d_out_string( N_strings * N_words );
  361. thrust::device_vector<uint32> d_out_offsets( N_strings+1 );
  362. packed_stream_type d_packed_stream(
  363. thrust::raw_pointer_cast( &d_out_string.front() ) );
  364. output_set d_out_string_set(
  365. N_strings,
  366. d_packed_stream,
  367. thrust::raw_pointer_cast( &d_out_offsets.front() ) );
  368. Timer timer;
  369. timer.start();
  370. for (uint32 i = 0; i < N_tests; ++i)
  371. cuda::copy( d_in_string_set, d_out_string_set );
  372. timer.stop();
  373. // build the host output string set
  374. thrust::host_vector<uint32> h_out_string( d_out_string );
  375. thrust::host_vector<uint32> h_out_offsets( d_out_offsets );
  376. packed_stream_type h_packed_stream(
  377. thrust::raw_pointer_cast( &h_out_string.front() ) );
  378. output_set h_out_string_set(
  379. N_strings,
  380. h_packed_stream,
  381. thrust::raw_pointer_cast( &h_out_offsets.front() ) );
  382. // check that the string sets match
  383. check( h_base_string_set, h_out_string_set );
  384. fprintf(stderr, " test concat -> packed-concat copy... done: %.2f GSYMS\n", (1.0e-9f*float(N_strings*N))*(float(N_tests)/timer.seconds()));
  385. }
  386. // copy a sparse string set into a strided one
  387. if (TEST_MASK & SPARSE_TO_STRIDED)
  388. {
  389. fprintf(stderr, " test sparse -> strided copy... started\n");
  390. typedef base_string_set input_set;
  391. typedef StridedStringSet<uint8*,uint32*> output_set;
  392. // build the device output string set
  393. thrust::device_vector<uint8> d_out_string( N_strings * N );
  394. thrust::device_vector<uint32> d_out_lengths( N_strings+1 );
  395. output_set d_out_string_set(
  396. N_strings,
  397. N_strings,
  398. thrust::raw_pointer_cast( &d_out_string.front() ),
  399. thrust::raw_pointer_cast( &d_out_lengths.front() ) );
  400. Timer timer;
  401. timer.start();
  402. for (uint32 i = 0; i < N_tests; ++i)
  403. cuda::copy( d_base_string_set, d_out_string_set );
  404. timer.stop();
  405. // build the host output string set
  406. thrust::host_vector<uint8> h_out_string( d_out_string );
  407. thrust::host_vector<uint32> h_out_lengths( d_out_lengths );
  408. output_set h_out_string_set(
  409. N_strings,
  410. N_strings,
  411. thrust::raw_pointer_cast( &h_out_string.front() ),
  412. thrust::raw_pointer_cast( &h_out_lengths.front() ) );
  413. // check that the string sets match
  414. check( h_base_string_set, h_out_string_set );
  415. fprintf(stderr, " test sparse -> strided copy... done: %.2f GSYMS\n", (1.0e-9f*float(N_strings*N))*(float(N_tests)/timer.seconds()));
  416. }
  417. // copy a packed sparse string set into a strided packed string set
  418. if (TEST_MASK & PACKED_CONCAT_TO_STRIDED)
  419. {
  420. fprintf(stderr, " test packed-concat -> strided copy... started\n");
  421. const uint32 N_words = (N + SYMBOLS_PER_WORD-1) / SYMBOLS_PER_WORD;
  422. typedef PackedStream<uint32*,uint8,SYMBOL_SIZE,false> packed_stream_type;
  423. typedef ConcatenatedStringSet<packed_stream_type,uint32*> input_set;
  424. typedef StridedStringSet<
  425. uint8*,
  426. uint32*> output_set;
  427. // build the device input string set
  428. thrust::device_vector<uint32> d_in_string( N_strings * N_words );
  429. thrust::device_vector<uint32> d_in_offsets( N_strings+1 );
  430. packed_stream_type d_packed_stream(
  431. thrust::raw_pointer_cast( &d_in_string.front() ) );
  432. input_set d_in_string_set(
  433. N_strings,
  434. d_packed_stream,
  435. thrust::raw_pointer_cast( &d_in_offsets.front() ) );
  436. // copy the base string set into the input set
  437. cuda::copy( d_base_string_set, d_in_string_set );
  438. // build the device output string set
  439. thrust::device_vector<uint8> d_out_stream( N_strings * N );
  440. thrust::device_vector<uint32> d_out_lengths( N_strings );
  441. output_set d_out_string_set(
  442. N_strings,
  443. N_strings,
  444. thrust::raw_pointer_cast( &d_out_stream.front() ),
  445. thrust::raw_pointer_cast( &d_out_lengths.front() ) );
  446. Timer timer;
  447. timer.start();
  448. // copy intput set into the output set
  449. for (uint32 i = 0; i < N_tests; ++i)
  450. cuda::copy( d_in_string_set, d_out_string_set );
  451. timer.stop();
  452. // build the host output string set
  453. thrust::host_vector<uint8> h_out_stream( d_out_stream );
  454. thrust::host_vector<uint32> h_out_lengths( d_out_lengths );
  455. output_set h_out_string_set(
  456. N_strings,
  457. N_strings,
  458. thrust::raw_pointer_cast( &h_out_stream.front() ),
  459. thrust::raw_pointer_cast( &h_out_lengths.front() ) );
  460. // check that the string sets match
  461. check( h_base_string_set, h_out_string_set );
  462. fprintf(stderr, " test packed-concat -> strided copy... done: %.2f GSYMS\n", (1.0e-9f*float(N_strings*N))*(float(N_tests)/timer.seconds()));
  463. }
  464. // copy a packed sparse string set into a strided packed string set
  465. if (TEST_MASK & PACKED_SPARSE_TO_STRIDED)
  466. {
  467. fprintf(stderr, " test packed-sparse -> strided copy... started\n");
  468. const uint32 N_words = (N_spacing + SYMBOLS_PER_WORD-1) / SYMBOLS_PER_WORD;
  469. typedef PackedStream<uint32*,uint8,SYMBOL_SIZE,false> packed_stream_type;
  470. typedef PackedStream<cuda::ldg_pointer<uint32>,uint8,SYMBOL_SIZE,false> tex_packed_stream_type;
  471. typedef SparseStringSet<packed_stream_type,const uint2*> input_set;
  472. typedef SparseStringSet<tex_packed_stream_type,const uint2*> tex_input_set;
  473. typedef StridedStringSet<
  474. uint8*,
  475. uint32*> output_set;
  476. thrust::host_vector<uint32> h_in_string( N_strings * N_words );
  477. thrust::host_vector<uint2> h_in_ranges( N_strings );
  478. packed_stream_type h_packed_stream(
  479. thrust::raw_pointer_cast( &h_in_string.front() ) );
  480. LCG_random rand;
  481. for (uint32 i = 0; i < N_strings; ++i)
  482. {
  483. h_in_ranges[i] = make_uint2( N_spacing*i, N_spacing*i + N );
  484. for (uint32 j = 0; j < N_spacing; ++j)
  485. h_packed_stream[ i * N_spacing + j ] = rand.next() & ((1u << SYMBOL_SIZE) - 1u);
  486. }
  487. // build the device input string set
  488. thrust::device_vector<uint32> d_in_string( h_in_string );
  489. thrust::device_vector<uint2> d_in_ranges( h_in_ranges );
  490. packed_stream_type d_packed_stream(
  491. thrust::raw_pointer_cast( &d_in_string.front() ) );
  492. input_set d_in_string_set(
  493. N_strings,
  494. d_packed_stream,
  495. thrust::raw_pointer_cast( &d_in_ranges.front() ) );
  496. // build the device output string set
  497. thrust::device_vector<uint8> d_out_stream( N_strings * N );
  498. thrust::device_vector<uint32> d_out_lengths( N_strings );
  499. output_set d_out_string_set(
  500. N_strings,
  501. N_strings,
  502. thrust::raw_pointer_cast( &d_out_stream.front() ),
  503. thrust::raw_pointer_cast( &d_out_lengths.front() ) );
  504. Timer timer;
  505. timer.start();
  506. // copy intput set into the output set
  507. for (uint32 i = 0; i < N_tests; ++i)
  508. cuda::copy( d_in_string_set, d_out_string_set );
  509. timer.stop();
  510. // build the host input string set
  511. input_set h_in_string_set(
  512. N_strings,
  513. h_packed_stream,
  514. thrust::raw_pointer_cast( &h_in_ranges.front() ) );
  515. // build the host output string set
  516. thrust::host_vector<uint8> h_out_stream( d_out_stream );
  517. thrust::host_vector<uint32> h_out_lengths( d_out_lengths );
  518. output_set h_out_string_set(
  519. N_strings,
  520. N_strings,
  521. thrust::raw_pointer_cast( &h_out_stream.front() ),
  522. thrust::raw_pointer_cast( &h_out_lengths.front() ) );
  523. // check that the string sets match
  524. check( h_in_string_set, h_out_string_set );
  525. fprintf(stderr, " test packed-sparse -> strided copy... done: %.2f GSYMS\n", (1.0e-9f*float(N_strings*N))*(float(N_tests)/timer.seconds()));
  526. #if 1
  527. fprintf(stderr, " test packed-sparse (tex) -> strided copy... started\n");
  528. // bind the texture
  529. tex_packed_stream_type d_tex_packed_stream( cuda::ldg_pointer<uint32>( thrust::raw_pointer_cast( &d_in_string.front() ) ) );
  530. tex_input_set d_tex_in_string_set(
  531. N_strings,
  532. d_tex_packed_stream,
  533. thrust::raw_pointer_cast( &d_in_ranges.front() ) );
  534. timer.start();
  535. // copy intput set into the output set
  536. for (uint32 i = 0; i < N_tests; ++i)
  537. cuda::copy( d_tex_in_string_set, d_out_string_set );
  538. timer.stop();
  539. fprintf(stderr, " test packed-sparse (tex) -> strided copy... done: %.2f GSYMS\n", (1.0e-9f*float(N_strings*N))*(float(N_tests)/timer.seconds()));
  540. #endif
  541. }
  542. // copy a strided-packed string set into a strided one
  543. if (TEST_MASK & STRIDED_PACKED_TO_STRIDED)
  544. {
  545. fprintf(stderr, " test strided-packed -> strided copy... started\n");
  546. typedef StridedPackedStringSet<
  547. uint32*,
  548. uint8,
  549. SYMBOL_SIZE,
  550. false,
  551. uint32*> input_set;
  552. typedef StridedStringSet<uint8*,uint32*> output_set;
  553. // first build the input set
  554. thrust::device_vector<uint32> d_in_stream( N_strings * N_words );
  555. thrust::device_vector<uint32> d_in_lengths( N_strings+1 );
  556. input_set d_in_string_set(
  557. N_strings,
  558. N_strings,
  559. thrust::raw_pointer_cast( &d_in_stream.front() ),
  560. thrust::raw_pointer_cast( &d_in_lengths.front() ) );
  561. // copy the base string set into the input set
  562. cuda::copy( d_base_string_set, d_in_string_set );
  563. // build the device output string set
  564. thrust::device_vector<uint8> d_out_string( N_strings * N );
  565. thrust::device_vector<uint32> d_out_lengths( N_strings );
  566. output_set d_out_string_set(
  567. N_strings,
  568. N_strings,
  569. thrust::raw_pointer_cast( &d_out_string.front() ),
  570. thrust::raw_pointer_cast( &d_out_lengths.front() ) );
  571. Timer timer;
  572. timer.start();
  573. for (uint32 i = 0; i < N_tests; ++i)
  574. cuda::copy( d_in_string_set, d_out_string_set );
  575. timer.stop();
  576. // build the host output string set
  577. thrust::host_vector<uint8> h_out_string( d_out_string );
  578. thrust::host_vector<uint32> h_out_lengths( d_out_lengths );
  579. output_set h_out_string_set(
  580. N_strings,
  581. N_strings,
  582. thrust::raw_pointer_cast( &h_out_string.front() ),
  583. thrust::raw_pointer_cast( &h_out_lengths.front() ) );
  584. // check that the string sets match
  585. check( h_base_string_set, h_out_string_set );
  586. fprintf(stderr, " test strided-packed -> strided copy... done: %.2f GSYMS\n", (1.0e-9f*float(N_strings*N))*(float(N_tests)/timer.seconds()));
  587. }
  588. // copy a simple concatenated string set into a strided packed string set
  589. if (TEST_MASK & CONCAT_TO_STRIDED_PACKED)
  590. {
  591. fprintf(stderr, " test concat -> strided-packed copy... started\n");
  592. typedef ConcatenatedStringSet<uint8*,uint32*> input_set;
  593. typedef StridedPackedStringSet<
  594. uint32*,
  595. uint8,
  596. SYMBOL_SIZE,
  597. false,
  598. uint32*> output_set;
  599. // first build the input set
  600. thrust::device_vector<uint8> d_in_string( N_strings * N );
  601. thrust::device_vector<uint32> d_in_offsets( N_strings+1 );
  602. input_set d_in_string_set(
  603. N_strings,
  604. thrust::raw_pointer_cast( &d_in_string.front() ),
  605. thrust::raw_pointer_cast( &d_in_offsets.front() ) );
  606. // copy the base string set into the input set
  607. cuda::copy( d_base_string_set, d_in_string_set );
  608. // build the device output string set
  609. thrust::device_vector<uint32> d_out_stream( N_strings * N_words );
  610. thrust::device_vector<uint32> d_out_lengths( N_strings );
  611. output_set d_out_string_set(
  612. N_strings,
  613. N_strings,
  614. thrust::raw_pointer_cast( &d_out_stream.front() ),
  615. thrust::raw_pointer_cast( &d_out_lengths.front() ) );
  616. Timer timer;
  617. timer.start();
  618. // copy intput set into the output set
  619. for (uint32 i = 0; i < N_tests; ++i)
  620. cuda::copy( d_in_string_set, d_out_string_set );
  621. timer.stop();
  622. // build the host output string set
  623. thrust::host_vector<uint32> h_out_stream( d_out_stream );
  624. thrust::host_vector<uint32> h_out_lengths( d_out_lengths );
  625. output_set h_out_string_set(
  626. N_strings,
  627. N_strings,
  628. thrust::raw_pointer_cast( &h_out_stream.front() ),
  629. thrust::raw_pointer_cast( &h_out_lengths.front() ) );
  630. // check that the string sets match
  631. check( h_base_string_set, h_out_string_set );
  632. fprintf(stderr, " test concat -> strided-packed copy... done: %.2f GSYMS\n", (1.0e-9f*float(N_strings*N))*(float(N_tests)/timer.seconds()));
  633. }
  634. // copy a packed concatenated string set into a strided packed string set
  635. if (TEST_MASK & PACKED_CONCAT_TO_STRIDED_PACKED)
  636. {
  637. fprintf(stderr, " test packed-concat -> strided-packed copy... started\n");
  638. const uint32 N_words = (N + SYMBOLS_PER_WORD-1) / SYMBOLS_PER_WORD;
  639. typedef PackedStream<uint32*,uint8,SYMBOL_SIZE,false> packed_stream_type;
  640. typedef ConcatenatedStringSet<packed_stream_type,uint32*> input_set;
  641. typedef StridedPackedStringSet<
  642. uint32*,
  643. uint8,
  644. SYMBOL_SIZE,
  645. false,
  646. uint32*> output_set;
  647. // build the device input string set
  648. thrust::device_vector<uint32> d_in_string( N_strings * N_words );
  649. thrust::device_vector<uint32> d_in_offsets( N_strings+1 );
  650. packed_stream_type d_packed_stream(
  651. thrust::raw_pointer_cast( &d_in_string.front() ) );
  652. input_set d_in_string_set(
  653. N_strings,
  654. d_packed_stream,
  655. thrust::raw_pointer_cast( &d_in_offsets.front() ) );
  656. // copy the base string set into the input set
  657. cuda::copy( d_base_string_set, d_in_string_set );
  658. // build the device output string set
  659. thrust::device_vector<uint32> d_out_stream( N_strings * N_words );
  660. thrust::device_vector<uint32> d_out_lengths( N_strings );
  661. output_set d_out_string_set(
  662. N_strings,
  663. N_strings,
  664. thrust::raw_pointer_cast( &d_out_stream.front() ),
  665. thrust::raw_pointer_cast( &d_out_lengths.front() ) );
  666. Timer timer;
  667. timer.start();
  668. // copy intput set into the output set
  669. for (uint32 i = 0; i < N_tests; ++i)
  670. cuda::copy( d_in_string_set, d_out_string_set );
  671. timer.stop();
  672. // build the host output string set
  673. thrust::host_vector<uint32> h_out_stream( d_out_stream );
  674. thrust::host_vector<uint32> h_out_lengths( d_out_lengths );
  675. output_set h_out_string_set(
  676. N_strings,
  677. N_strings,
  678. thrust::raw_pointer_cast( &h_out_stream.front() ),
  679. thrust::raw_pointer_cast( &h_out_lengths.front() ) );
  680. // check that the string sets match
  681. check( h_base_string_set, h_out_string_set );
  682. fprintf(stderr, " test packed-concat -> strided-packed copy... done: %.2f GSYMS\n", (1.0e-9f*float(N_strings*N))*(float(N_tests)/timer.seconds()));
  683. }
  684. // copy a packed sparse string set into a strided packed string set
  685. if (TEST_MASK & PACKED_SPARSE_TO_STRIDED_PACKED)
  686. {
  687. fprintf(stderr, " test packed-sparse -> strided-packed copy... started\n");
  688. const uint32 N_words = (N_spacing + SYMBOLS_PER_WORD-1) / SYMBOLS_PER_WORD;
  689. typedef PackedStream<uint32*,uint8,SYMBOL_SIZE,false> packed_stream_type;
  690. typedef PackedStream<cuda::ldg_pointer<uint32>,uint8,SYMBOL_SIZE,false> tex_packed_stream_type;
  691. typedef SparseStringSet<packed_stream_type,const uint2*> input_set;
  692. typedef SparseStringSet<tex_packed_stream_type,const uint2*> tex_input_set;
  693. typedef StridedPackedStringSet<
  694. uint32*,
  695. uint8,
  696. SYMBOL_SIZE,
  697. false,
  698. uint32*> output_set;
  699. thrust::host_vector<uint32> h_in_string( N_strings * N_words );
  700. thrust::host_vector<uint2> h_in_ranges( N_strings );
  701. packed_stream_type h_packed_stream(
  702. thrust::raw_pointer_cast( &h_in_string.front() ) );
  703. LCG_random rand;
  704. for (uint32 i = 0; i < N_strings; ++i)
  705. {
  706. h_in_ranges[i] = make_uint2( N_spacing*i, N_spacing*i + N );
  707. for (uint32 j = 0; j < N_spacing; ++j)
  708. h_packed_stream[ i * N_spacing + j ] = rand.next() & ((1u << SYMBOL_SIZE) - 1u);
  709. }
  710. // build the device input string set
  711. thrust::device_vector<uint32> d_in_string( h_in_string );
  712. thrust::device_vector<uint2> d_in_ranges( h_in_ranges );
  713. packed_stream_type d_packed_stream(
  714. thrust::raw_pointer_cast( &d_in_string.front() ) );
  715. input_set d_in_string_set(
  716. N_strings,
  717. d_packed_stream,
  718. thrust::raw_pointer_cast( &d_in_ranges.front() ) );
  719. // build the device output string set
  720. thrust::device_vector<uint32> d_out_stream( N_strings * N_words );
  721. thrust::device_vector<uint32> d_out_lengths( N_strings );
  722. output_set d_out_string_set(
  723. N_strings,
  724. N_strings,
  725. thrust::raw_pointer_cast( &d_out_stream.front() ),
  726. thrust::raw_pointer_cast( &d_out_lengths.front() ) );
  727. Timer timer;
  728. timer.start();
  729. // copy intput set into the output set
  730. for (uint32 i = 0; i < N_tests; ++i)
  731. cuda::copy( d_in_string_set, d_out_string_set );
  732. timer.stop();
  733. // build the host input string set
  734. input_set h_in_string_set(
  735. N_strings,
  736. h_packed_stream,
  737. thrust::raw_pointer_cast( &h_in_ranges.front() ) );
  738. // build the host output string set
  739. thrust::host_vector<uint32> h_out_stream( d_out_stream );
  740. thrust::host_vector<uint32> h_out_lengths( d_out_lengths );
  741. output_set h_out_string_set(
  742. N_strings,
  743. N_strings,
  744. thrust::raw_pointer_cast( &h_out_stream.front() ),
  745. thrust::raw_pointer_cast( &h_out_lengths.front() ) );
  746. // check that the string sets match
  747. check( h_in_string_set, h_out_string_set );
  748. fprintf(stderr, " test packed-sparse -> strided-packed copy... done: %.2f GSYMS\n", (1.0e-9f*float(N_strings*N))*(float(N_tests)/timer.seconds()));
  749. }
  750. fprintf(stderr, "nvbio/basic/string_set test... done\n");
  751. return 0;
  752. }
  753. } // namespace nvbio