persist.cu 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388
  1. /*
  2. * nvbio
  3. * Copyright (c) 2011-2014, NVIDIA CORPORATION. All rights reserved.
  4. *
  5. * Redistribution and use in source and binary forms, with or without
  6. * modification, are permitted provided that the following conditions are met:
  7. * * Redistributions of source code must retain the above copyright
  8. * notice, this list of conditions and the following disclaimer.
  9. * * Redistributions in binary form must reproduce the above copyright
  10. * notice, this list of conditions and the following disclaimer in the
  11. * documentation and/or other materials provided with the distribution.
  12. * * Neither the name of the NVIDIA CORPORATION nor the
  13. * names of its contributors may be used to endorse or promote products
  14. * derived from this software without specific prior written permission.
  15. *
  16. * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
  17. * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
  18. * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
  19. * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
  20. * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
  21. * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
  22. * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
  23. * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  24. * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
  25. * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  26. */
  27. #include <nvBowtie/bowtie2/cuda/persist.h>
  28. #include <nvBowtie/bowtie2/cuda/seed_hit.h>
  29. #include <nvBowtie/bowtie2/cuda/seed_hit_deque_array.h>
  30. #include <crc/crc.h>
  31. namespace nvbio {
  32. namespace bowtie2 {
  33. namespace cuda {
  34. namespace {
  35. std::string mate_file_name(const std::string& file_name, const uint32 anchor)
  36. {
  37. const size_t dot = file_name.find('.');
  38. if (dot == std::string::npos)
  39. return file_name + (anchor ? ".2" : ".1");
  40. else
  41. {
  42. std::string output = file_name;
  43. output.insert( dot, (anchor ? ".2" : ".1") );
  44. return output;
  45. }
  46. }
  47. } // anonymous namespace
  48. // clear the persisting files
  49. //
  50. void persist_clear(const std::string& file_name)
  51. {
  52. // clear mate 0
  53. {
  54. const std::string mate_file = mate_file_name( file_name, 0u );
  55. FILE* file = fopen( mate_file.c_str(), "w" ); fclose( file );
  56. }
  57. // clear mate 1
  58. {
  59. const std::string mate_file = mate_file_name( file_name, 0u );
  60. FILE* file = fopen( mate_file.c_str(), "w" ); fclose( file );
  61. }
  62. }
  63. // compute a set of hits
  64. //
  65. void persist_hits(
  66. const std::string& file_name,
  67. const char* name,
  68. const uint32 anchor,
  69. const uint32 count,
  70. const SeedHitDequeArray& hit_deques)
  71. {
  72. // check whether we need to persist anything
  73. if (file_name == "")
  74. return;
  75. const std::string mate_file = mate_file_name( file_name, anchor );
  76. FILE* file = fopen( mate_file.c_str(), "a+" );
  77. SeedHitDequeArrayHostStorage hit_deques_h = static_cast<const SeedHitDequeArrayDeviceStorage&>( hit_deques );
  78. fprintf( file, "\n---------------------------------------------------------------------------------\n" );
  79. fprintf( file, "%s\n", name );
  80. fprintf( file, "---------------------------------------------------------------------------------\n" );
  81. for (uint32 i = 0; i < count; ++i)
  82. fprintf( file, "cnt[%u] = %u\n", i, uint32(hit_deques_h.m_counts[i]) );
  83. fprintf( file, "\n" );
  84. for (uint32 i = 0; i < count; ++i)
  85. {
  86. const uint32 n_ranges = hit_deques_h.m_counts[i];
  87. const SeedHit* hits = &hit_deques_h.m_hits[0] + hit_deques_h.m_index[i];
  88. fprintf( file, "hits[%u] = [%u]{\n", i, n_ranges );
  89. for (uint32 j = 0; j < n_ranges; ++j)
  90. {
  91. const SeedHit hit = hits[j];
  92. fprintf( file, " range[%u] = { (%u, %u - %u), dir[%u], pos[%u], rc[%u] }\n",
  93. j, hit.get_range().x, hit.get_range().y, hit.get_range().y - hit.get_range().x,
  94. hit.get_indexdir(),
  95. hit.get_posinread(),
  96. hit.get_readtype() );
  97. }
  98. fprintf( file, "}\n" );
  99. }
  100. fprintf( file, "\n" );
  101. fclose( file );
  102. }
  103. // persist a set of reads
  104. //
  105. void persist_reads(
  106. const std::string& file_name,
  107. const char* name,
  108. const uint32 anchor,
  109. const uint32 count,
  110. const thrust::device_vector<uint32>::iterator iterator)
  111. {
  112. // check whether we need to persist anything
  113. if (file_name == "")
  114. return;
  115. const std::string mate_file = mate_file_name( file_name, anchor );
  116. FILE* file = fopen( mate_file.c_str(), "a+" );
  117. thrust::host_vector<int32> hvec( count );
  118. thrust::copy(
  119. iterator,
  120. iterator + count,
  121. hvec.begin() );
  122. fprintf( file, "\n---------------------------------------------------------------------------------\n" );
  123. fprintf( file, "%s\n", name );
  124. fprintf( file, "---------------------------------------------------------------------------------\n" );
  125. for (uint32 i = 0; i < count; ++i)
  126. fprintf( file, "read[%u] = %u\n", i, hvec[i] );
  127. fprintf( file, "\n" );
  128. fclose( file );
  129. }
  130. // persist a set of selected hits
  131. //
  132. void persist_selection(
  133. const std::string& file_name,
  134. const char* name,
  135. const uint32 anchor,
  136. const uint32 read_count,
  137. const packed_read* read_infos_dptr,
  138. const uint32 n_multi,
  139. const uint32 hits_queue_size,
  140. const ReadHitsIndex& hits_index,
  141. const HitQueues& hits_queue)
  142. {
  143. // check whether we need to persist anything
  144. if (file_name == "")
  145. return;
  146. const std::string mate_file = mate_file_name( file_name, anchor );
  147. FILE* file = fopen( mate_file.c_str(), "a+" );
  148. fprintf( file, "\n---------------------------------------------------------------------------------\n" );
  149. fprintf( file, "%s\n", name );
  150. fprintf( file, "---------------------------------------------------------------------------------\n" );
  151. thrust::host_vector<uint32> loc_vec( hits_queue.loc );
  152. thrust::host_vector<packed_seed> seed_vec( hits_queue.seed );
  153. if (n_multi > 1)
  154. {
  155. const uint32 link_stride = hits_index.m_stride;
  156. thrust::host_vector<uint32> link_hvec( hits_index.m_links );
  157. thrust::host_vector<uint32> read_infos_hvec( read_count );
  158. thrust::host_vector<uint32> idx_hvec( read_count );
  159. uint32* link_hptr = thrust::raw_pointer_cast( &link_hvec.front() );
  160. uint32* read_infos_hptr = (uint32*)thrust::raw_pointer_cast( &read_infos_hvec.front() );
  161. cudaDeviceSynchronize();
  162. cudaMemcpy( read_infos_hptr, read_infos_dptr, read_count * sizeof(uint32), cudaMemcpyDeviceToHost );
  163. cudaDeviceSynchronize();
  164. // sort the reads so as to show everything in the same order all the times
  165. thrust::copy(
  166. thrust::make_counting_iterator(0u),
  167. thrust::make_counting_iterator(0u) + read_count,
  168. idx_hvec.begin() );
  169. thrust::sort_by_key(
  170. read_infos_hvec.begin(),
  171. read_infos_hvec.end(),
  172. idx_hvec.begin() );
  173. fprintf( file, "selection = [reads: %u - batch-size: %u - link-stride: %u]\n", read_count, n_multi, link_stride );
  174. for (uint32 i = 0; i < read_count; ++i)
  175. {
  176. const packed_read *read_info = (packed_read *)&read_infos_hvec[i];
  177. const uint32 idx = idx_hvec[i];
  178. const uint32 n = link_hvec[idx];
  179. fprintf( file, "read[%06u:%06u:%u] = [%02u]{", i, read_info->read_id, read_info->top_flag, n );
  180. if (n)
  181. {
  182. strided_iterator<const uint32*> link_vec( link_hptr + idx + link_stride, link_stride );
  183. for (uint32 j = 0; j < n; ++j)
  184. {
  185. const packed_seed seed = seed_vec[ link_vec[j] ];
  186. const uint32 loc = loc_vec[ link_vec[j] ];
  187. fprintf( file, " seed[pos:%u,dir:%u,rc:%u,top:%u,loc:%u]\n", (uint32)seed.pos_in_read, (uint32)seed.index_dir, (uint32)seed.rc, (uint32)seed.top_flag, loc );
  188. }
  189. }
  190. fprintf( file, "}\n" );
  191. }
  192. }
  193. else
  194. {
  195. thrust::host_vector<uint32> read_infos_hvec( read_count );
  196. thrust::host_vector<uint32> idx_hvec( read_count );
  197. uint32* read_infos_hptr = (uint32*)thrust::raw_pointer_cast( &read_infos_hvec.front() );
  198. cudaDeviceSynchronize();
  199. cudaMemcpy( read_infos_hptr, read_infos_dptr, read_count * sizeof(uint32), cudaMemcpyDeviceToHost );
  200. cudaDeviceSynchronize();
  201. // sort the reads so as to show everything in the same order all the times
  202. thrust::copy(
  203. thrust::make_counting_iterator(0u),
  204. thrust::make_counting_iterator(0u) + read_count,
  205. idx_hvec.begin() );
  206. thrust::sort_by_key(
  207. read_infos_hvec.begin(),
  208. read_infos_hvec.end(),
  209. idx_hvec.begin() );
  210. fprintf( file, "selection = [reads: %u - batch-size: 1]\n", read_count );
  211. for (uint32 i = 0; i < read_count; ++i)
  212. {
  213. const packed_read *read_info = (packed_read *)&read_infos_hvec[i];
  214. const uint32 idx = idx_hvec[i];
  215. fprintf( file, "read[%06u:%06u:%u] = [%02u]{", i, read_info->read_id, read_info->top_flag, 1u );
  216. const packed_seed seed = seed_vec[ idx ];
  217. const uint32 loc = loc_vec[ idx ];
  218. fprintf( file, " seed[pos:%u,dir:%u,rc:%u,top:%u,loc:%u]\n", (uint32)seed.pos_in_read, (uint32)seed.index_dir, (uint32)seed.rc, (uint32)seed.top_flag, loc );
  219. fprintf( file, "}\n" );
  220. }
  221. }
  222. fclose( file );
  223. }
  224. // persist a set of scores
  225. //
  226. void persist_scores(
  227. const std::string& file_name,
  228. const char* name,
  229. const uint32 anchor,
  230. const uint32 read_count,
  231. const uint32 n_multi,
  232. const uint32 hits_queue_size,
  233. const ScoringQueues& scoring_queues)
  234. {
  235. // check whether we need to persist anything
  236. if (file_name == "")
  237. return;
  238. const std::string mate_file = mate_file_name( file_name, anchor );
  239. FILE* file = fopen( mate_file.c_str(), "a+" );
  240. fprintf( file, "\n---------------------------------------------------------------------------------\n" );
  241. fprintf( file, "%s\n", name );
  242. fprintf( file, "---------------------------------------------------------------------------------\n" );
  243. thrust::host_vector<packed_read> read_infos_hvec( scoring_queues.active_reads.in_queue );
  244. thrust::host_vector<int32> score_hvec( scoring_queues.hits.score );
  245. uint32* read_infos_hptr = (uint32*)thrust::raw_pointer_cast( &read_infos_hvec.front() );
  246. int32* score_hptr = thrust::raw_pointer_cast( &score_hvec.front() );
  247. if (n_multi > 1)
  248. {
  249. const uint32 link_stride = scoring_queues.hits_index.m_stride;
  250. thrust::host_vector<uint32> link_hvec( scoring_queues.hits_index.m_links );
  251. thrust::host_vector<uint32> idx_hvec( read_count );
  252. uint32* link_hptr = thrust::raw_pointer_cast( &link_hvec.front() );
  253. // sort the reads so as to show everything in the same order all the times
  254. thrust::copy(
  255. thrust::make_counting_iterator(0u),
  256. thrust::make_counting_iterator(0u) + read_count,
  257. idx_hvec.begin() );
  258. thrust::sort_by_key(
  259. read_infos_hptr,
  260. read_infos_hptr + read_count,
  261. idx_hvec.begin() );
  262. fprintf( file, "scores = [reads: %u - batch-size: %u - link-stride: %u]\n", read_count, n_multi, link_stride );
  263. for (uint32 i = 0; i < read_count; ++i)
  264. {
  265. const packed_read *read_info = (packed_read *)&read_infos_hvec[i];
  266. const uint32 idx = idx_hvec[i];
  267. const uint32 n = link_hvec[idx];
  268. if (n)
  269. {
  270. strided_iterator<const uint32*> link_vec( link_hptr + idx + link_stride, link_stride );
  271. fprintf( file, "read[%06u:%06u:%u] = {\n", i, read_info->read_id, read_info->top_flag );
  272. for (uint32 j = 0; j < n; ++j)
  273. {
  274. const uint32 link = link_vec[j];
  275. fprintf( file, " score[%04u] = %d\n", j, score_hvec[link] );
  276. }
  277. fprintf( file, "}\n" );
  278. }
  279. }
  280. fprintf( file, "\n" );
  281. // sort
  282. thrust::sort(
  283. score_hvec.begin(),
  284. score_hvec.end() );
  285. // write sorted score list
  286. fprintf( file, "\n" );
  287. for (uint32 i = 0; i < hits_queue_size; ++i)
  288. fprintf( file, "score[%u] = %d\n", i, score_hvec[i] );
  289. fprintf( file, "\n" );
  290. int64 sum = 0;
  291. for (uint32 i = 0; i < hits_queue_size; ++i)
  292. sum += score_hvec[i];
  293. fprintf( file, "sum = %lld\n", sum );
  294. // compute a crc
  295. const char* ptr = (const char*)score_hptr;
  296. const uint64 crc = crcCalc( ptr, sizeof(int32)*hits_queue_size );
  297. fprintf( file, "crc = %llu\n", crc );
  298. }
  299. else
  300. {
  301. thrust::host_vector<uint32> idx_hvec( read_count );
  302. // sort the reads so as to show everything in the same order all the times
  303. thrust::copy(
  304. thrust::make_counting_iterator(0u),
  305. thrust::make_counting_iterator(0u) + read_count,
  306. idx_hvec.begin() );
  307. thrust::sort_by_key(
  308. read_infos_hptr,
  309. read_infos_hptr + read_count,
  310. idx_hvec.begin() );
  311. for (uint32 i = 0; i < read_count; ++i)
  312. {
  313. const packed_read *read_info = (packed_read *)&read_infos_hvec[i];
  314. const uint32 idx = idx_hvec[i];
  315. const uint32 score = score_hvec[idx];
  316. fprintf( file, "read[%06u:%06u:%u] : score[%d]\n", i, read_info->read_id, read_info->top_flag, score );
  317. }
  318. fprintf( file, "\n" );
  319. }
  320. fclose( file );
  321. }
  322. } // namespace cuda
  323. } // namespace bowtie2
  324. } // namespace nvbio