nvBowtie.cpp 43 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949
  1. /*
  2. * nvbio
  3. * Copyright (c) 2011-2014, NVIDIA CORPORATION. All rights reserved.
  4. *
  5. * Redistribution and use in source and binary forms, with or without
  6. * modification, are permitted provided that the following conditions are met:
  7. * * Redistributions of source code must retain the above copyright
  8. * notice, this list of conditions and the following disclaimer.
  9. * * Redistributions in binary form must reproduce the above copyright
  10. * notice, this list of conditions and the following disclaimer in the
  11. * documentation and/or other materials provided with the distribution.
  12. * * Neither the name of the NVIDIA CORPORATION nor the
  13. * names of its contributors may be used to endorse or promote products
  14. * derived from this software without specific prior written permission.
  15. *
  16. * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
  17. * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
  18. * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
  19. * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
  20. * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
  21. * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
  22. * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
  23. * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  24. * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
  25. * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  26. */
  27. // nvBowtie.cpp : Defines the entry point for the console application.
  28. //
  29. #include <stdio.h>
  30. #include <stdlib.h>
  31. #include <string.h>
  32. #include <string>
  33. #include <nvbio/basic/console.h>
  34. #include <nvbio/basic/exceptions.h>
  35. #include <nvbio/basic/shared_pointer.h>
  36. #include <nvbio/basic/cuda/arch.h>
  37. #include <nvbio/io/fmindex/fmindex.h>
  38. #include <nvbio/io/sequence/sequence.h>
  39. #include <nvbio/io/sequence/sequence_mmap.h>
  40. #include <nvbio/io/output/output_file.h>
  41. #include <nvBowtie/bowtie2/cuda/params.h>
  42. #include <nvBowtie/bowtie2/cuda/stats.h>
  43. #include <nvBowtie/bowtie2/cuda/input_thread.h>
  44. #include <nvBowtie/bowtie2/cuda/compute_thread.h>
  45. #include <nvbio/basic/omp.h>
  46. void crcInit();
  47. namespace nvbio {
  48. namespace bowtie2 {
  49. namespace cuda {
  50. void test_seed_hit_deques();
  51. void test_scoring_queues();
  52. } // namespace cuda
  53. } // namespace bowtie2
  54. } // namespace nvbio
  55. using namespace nvbio;
  56. // bogus implementation of a function to check if a string is a number
  57. bool is_number(const char* str, uint32 len = uint32(-1))
  58. {
  59. if (str[0] == '-')
  60. ++str;
  61. for (uint32 l = 0; *str != '\0' && l < len; ++l)
  62. {
  63. const char c = *str; ++str;
  64. if (c == '.') continue;
  65. if (c >= '0' && c <= '9') continue;
  66. return false;
  67. }
  68. return true;
  69. }
  70. void log_ed(nvbio::bowtie2::cuda::AlignmentStats& stats)
  71. {
  72. if (stats.n_mapped == 0)
  73. return;
  74. const std::vector<uint32>& mapped = stats.mapped_log_ed_histogram;
  75. log_stats(stderr, " ed :");
  76. for (uint32 i = 0; i < mapped.size(); ++i)
  77. {
  78. if (float(mapped[i])/float(stats.n_mapped) > 1.0e-3f)
  79. log_stats_cont(stderr, " %5u ", i ? 1u << (i-1) : 0u );
  80. }
  81. log_stats_cont(stderr,"\n");
  82. log_stats(stderr, " :");
  83. for (uint32 i = 0; i < mapped.size(); ++i)
  84. {
  85. if (float(mapped[i])/float(stats.n_mapped) > 1.0e-3f)
  86. log_stats_cont(stderr, " %6.2f%%", 100.0f * float(mapped[i])/float(stats.n_mapped) );
  87. }
  88. log_stats_cont(stderr,"\n");
  89. }
  90. void log_mapq(nvbio::bowtie2::cuda::AlignmentStats& stats)
  91. {
  92. if (stats.n_mapped == 0)
  93. return;
  94. log_stats(stderr, " mapq :");
  95. for (uint32 i = 0; i < 8; ++i)
  96. log_stats_cont(stderr, " %5u ", i ? 1u << (i-1) : 0u );
  97. log_stats_cont(stderr,"\n");
  98. log_stats(stderr, " :");
  99. for (uint32 i = 0; i < 8; ++i)
  100. log_stats_cont(stderr, " %6.2f%%", 100.0f * float(stats.mapq_log_bins[i])/float(stats.n_mapped) );
  101. log_stats_cont(stderr,"\n");
  102. }
  103. int main(int argc, char* argv[])
  104. {
  105. //cudaSetDeviceFlags( cudaDeviceMapHost | cudaDeviceLmemResizeToMax );
  106. crcInit();
  107. if (argc == 1 ||
  108. (argc == 2 && strcmp( argv[1], "--help" ) == 0) ||
  109. (argc == 2 && strcmp( argv[1], "-h" ) == 0))
  110. {
  111. log_info(stderr,"nvBowtie [options]\n");
  112. log_info(stderr,"options:\n");
  113. log_info(stderr," General:\n");
  114. log_info(stderr," -U file-name unpaired reads\n");
  115. log_info(stderr," -1 file-name first mate reads\n");
  116. log_info(stderr," -2 file-name second mate reads\n");
  117. log_info(stderr," -S file-name output file (.sam|.bam)\n");
  118. log_info(stderr," -x file-name reference index\n");
  119. log_info(stderr," --verbosity int [5] verbosity level\n");
  120. log_info(stderr," --upto | -u int [-1] maximum number of reads to process\n");
  121. log_info(stderr," --trim3 | -3 int [0] trim the first N bases of 3'\n");
  122. log_info(stderr," --trim5 | -5 int [0] trim the first N bases of 5'\n");
  123. log_info(stderr," --nofw do not align the forward strand\n");
  124. log_info(stderr," --norc do not align the reverse-complemented strand\n");
  125. log_info(stderr," --device int [0] select the given cuda device(s) (e.g. --device 0 --device 1 ...)\n");
  126. log_info(stderr," --file-ref load reference from file\n");
  127. log_info(stderr," --server-ref load reference from server\n");
  128. log_info(stderr," --phred33 qualities are ASCII characters equal to Phred quality + 33\n");
  129. log_info(stderr," --phred64 qualities are ASCII characters equal to Phred quality + 64\n");
  130. log_info(stderr," --solexa-quals qualities are in the Solexa format\n");
  131. log_info(stderr," --rg-id string add the RG-ID field of the SAM output header\n");
  132. log_info(stderr," --rg string,val add an RG-TAG field of the SAM output header\n");
  133. log_info(stderr," Paired-End:\n");
  134. log_info(stderr," --ff paired mates are forward-forward\n");
  135. log_info(stderr," --fr paired mates are forward-reverse\n");
  136. log_info(stderr," --rf paired mates are reverse-forward\n");
  137. log_info(stderr," --rr paired mates are reverse-reverse\n");
  138. log_info(stderr," --minins int [0] minimum insert length\n");
  139. log_info(stderr," --minins int [500] maximum insert length\n");
  140. log_info(stderr," --overlap allow overlapping mates\n");
  141. log_info(stderr," --dovetail allow dovetailing mates\n");
  142. log_info(stderr," --no-mixed only report paired alignments\n");
  143. log_info(stderr," --ungapped-mates | -ug perform ungapped mate alignment\n");
  144. log_info(stderr," Seeding:\n");
  145. log_info(stderr," --seed-len | -L int [22] seed lengths\n");
  146. log_info(stderr," --seed-freq | -i {G|L|S},x,y seed interval, as x + y*func(read-len) (G=log,L=linear,S=sqrt)\n");
  147. log_info(stderr," --max-hits int [100] maximum amount of seed hits\n");
  148. log_info(stderr," --max-reseed | -R int [2] number of reseeding rounds\n");
  149. log_info(stderr," Extension:\n");
  150. log_info(stderr," --all | -a perform all-mapping (i.e. find and report all alignments)\n");
  151. log_info(stderr," --local perform local alignment\n");
  152. log_info(stderr," --rand randomized seed selection\n");
  153. log_info(stderr," --no-rand do not randomize seed hit selection\n");
  154. log_info(stderr," --max-dist int [15] maximum edit distance\n");
  155. log_info(stderr," --max-effort-init int [15] initial maximum number of consecutive extension failures\n");
  156. log_info(stderr," --max-effort | -D int [15] maximum number of consecutive extension failures\n");
  157. log_info(stderr," --min-ext int [30] minimum number of extensions per read\n");
  158. log_info(stderr," --max-ext int [400] maximum number of extensions per read\n");
  159. log_info(stderr," --fast apply the fast presets\n");
  160. log_info(stderr," --very-fast apply the very-fast presets\n");
  161. log_info(stderr," --sensitive apply the sensitive presets\n");
  162. log_info(stderr," --very-sensitive apply the very-sensitive presets\n");
  163. log_info(stderr," --fast-local apply the fast presets\n");
  164. log_info(stderr," --very-fast-local apply the very-fast presets\n");
  165. log_info(stderr," --sensitive-local apply the sensitive presets\n");
  166. log_info(stderr," --very-sensitive-local apply the very-sensitive presets\n");
  167. log_info(stderr," Scoring:\n");
  168. log_info(stderr," --scoring {sw|ed} Smith-Waterman / Edit-Distance scoring\n");
  169. log_info(stderr," --score-min {G|L|S},x,y minimum score function, as x + y*func(read-len)\n");
  170. log_info(stderr," --ma int match bonus\n");
  171. log_info(stderr," --mp int,int mismatch min/max penalties\n");
  172. log_info(stderr," --np int N penalty\n");
  173. log_info(stderr," --rdg int,int read open/extension gap penalties\n");
  174. log_info(stderr," --rfg int,int reference open/extension gap penalties\n");
  175. log_info(stderr," Reporting:\n");
  176. log_info(stderr," --mapQ-filter | -Q int [0] minimum mapQ threshold\n");
  177. exit(0);
  178. }
  179. else if (argc == 2 && strcmp( argv[1], "-test" ) == 0)
  180. {
  181. log_visible(stderr, "nvBowtie tests... started\n");
  182. nvbio::bowtie2::cuda::test_seed_hit_deques();
  183. nvbio::bowtie2::cuda::test_scoring_queues();
  184. log_visible(stderr, "nvBowtie tests... done\n");
  185. exit(0);
  186. }
  187. else if (argc == 2 && strcmp( argv[1], "--version" ) == 0)
  188. {
  189. fprintf(stderr, "nvBowtie version %s\n", NVBIO_VERSION_STRING);
  190. exit(0);
  191. }
  192. // setup the number of OMP threads
  193. omp_set_num_threads( omp_get_num_procs() );
  194. uint32 max_reads = uint32(-1);
  195. uint32 max_read_len = uint32(-1);
  196. uint32 trim3 = 0;
  197. uint32 trim5 = 0;
  198. //bool debug = false;
  199. bool from_file = false;
  200. bool paired_end = false;
  201. io::PairedEndPolicy pe_policy = io::PE_POLICY_FR;
  202. io::QualityEncoding qencoding = io::Phred33;
  203. std::vector<int> cuda_devices;
  204. std::map<std::string,std::string> string_options;
  205. std::string argstr;
  206. for (int32 i = 1; i < argc; ++i)
  207. {
  208. argstr += " ";
  209. argstr += argv[i];
  210. }
  211. std::string rg_id;
  212. std::string rg_string;
  213. bool legacy_cmdline = true;
  214. const char* read_name1 = "";
  215. const char* read_name2 = "";
  216. const char* reference_name = "";
  217. const char* output_name = "";
  218. for (int32 i = 1; i < argc; ++i)
  219. {
  220. if (strcmp( argv[i], "--pe" ) == 0 ||
  221. strcmp( argv[i], "-paired-ends" ) == 0 ||
  222. strcmp( argv[i], "--paired-ends" ) == 0)
  223. paired_end = true;
  224. else if (strcmp( argv[i], "--ff" ) == 0)
  225. pe_policy = io::PE_POLICY_FF;
  226. else if (strcmp( argv[i], "--fr" ) == 0)
  227. pe_policy = io::PE_POLICY_FR;
  228. else if (strcmp( argv[i], "--rf" ) == 0)
  229. pe_policy = io::PE_POLICY_RF;
  230. else if (strcmp( argv[i], "--rr" ) == 0)
  231. pe_policy = io::PE_POLICY_RR;
  232. else if (strcmp( argv[i], "-max-reads" ) == 0 ||
  233. strcmp( argv[i], "--max-reads" ) == 0 ||
  234. strcmp( argv[i], "-u" ) == 0 ||
  235. strcmp( argv[i], "--upto" ) == 0)
  236. max_reads = (uint32)atoi( argv[++i] );
  237. else if (strcmp( argv[i], "-max-read-len" ) == 0 ||
  238. strcmp( argv[i], "--max-read-len" ) == 0)
  239. max_read_len = (uint32)atoi( argv[++i] );
  240. else if (strcmp( argv[i], "-3") == 0 ||
  241. strcmp( argv[i], "--trim3") == 0)
  242. trim3 = (uint32)atoi( argv[++i] );
  243. else if (strcmp( argv[i], "-5") == 0 ||
  244. strcmp( argv[i], "--trim5") == 0)
  245. trim5 = (uint32)atoi( argv[++i] );
  246. else if (strcmp( argv[i], "-file-ref" ) == 0 ||
  247. strcmp( argv[i], "--file-ref" ) == 0)
  248. from_file = true;
  249. else if (strcmp( argv[i], "-server-ref" ) == 0 ||
  250. strcmp( argv[i], "--server-ref" ) == 0)
  251. from_file = false;
  252. else if (strcmp( argv[i], "-input" ) == 0 ||
  253. strcmp( argv[i], "--input" ) == 0)
  254. {
  255. if (strcmp( argv[i+1], "file" ) == 0)
  256. from_file = true;
  257. else if (strcmp( argv[i+1], "server" ) == 0)
  258. from_file = false;
  259. else
  260. log_warning(stderr, "unknown \"%s\" input, skipping\n", argv[i+1]);
  261. ++i;
  262. }
  263. else if (strcmp( argv[i], "-phred33" ) == 0 ||
  264. strcmp( argv[i], "--phred33" ) == 0)
  265. qencoding = io::Phred33;
  266. else if (strcmp( argv[i], "-phred64" ) == 0 ||
  267. strcmp( argv[i], "--phred64" ) == 0)
  268. qencoding = io::Phred64;
  269. else if (strcmp( argv[i], "-solexa-quals" ) == 0 ||
  270. strcmp( argv[i], "--solexa-quals" ) == 0)
  271. qencoding = io::Solexa;
  272. else if (strcmp( argv[i], "-device" ) == 0 ||
  273. strcmp( argv[i], "--device" ) == 0)
  274. cuda_devices.push_back( atoi( argv[++i] ) );
  275. else if (strcmp( argv[i], "-verbosity" ) == 0 ||
  276. strcmp( argv[i], "--verbosity" ) == 0)
  277. set_verbosity( Verbosity( atoi( argv[++i] ) ) );
  278. else if (strcmp( argv[i], "-rg-id" ) == 0 ||
  279. strcmp( argv[i], "--rg-id" ) == 0)
  280. rg_id = argv[++i];
  281. else if (strcmp( argv[i], "-rg" ) == 0 ||
  282. strcmp( argv[i], "--rg" ) == 0)
  283. {
  284. rg_string += "\t";
  285. rg_string += argv[++i];
  286. }
  287. else if (strcmp( argv[i], "-1") == 0)
  288. {
  289. legacy_cmdline = false;
  290. paired_end = true;
  291. read_name1 = argv[++i];
  292. }
  293. else if (strcmp( argv[i], "-2") == 0)
  294. {
  295. legacy_cmdline = false;
  296. paired_end = true;
  297. read_name2 = argv[++i];
  298. }
  299. else if (strcmp( argv[i], "-U") == 0)
  300. {
  301. legacy_cmdline = false;
  302. paired_end = false;
  303. read_name1 = argv[++i];
  304. }
  305. else if (strcmp( argv[i], "-S") == 0)
  306. {
  307. legacy_cmdline = false;
  308. output_name = argv[++i];
  309. }
  310. else if (strcmp( argv[i], "-x") == 0)
  311. {
  312. legacy_cmdline = false;
  313. reference_name = argv[++i];
  314. }
  315. else if (argv[i][0] == '-')
  316. {
  317. // add unknown option to the string options
  318. const std::string key = std::string( argv[i][1] == '-' ? argv[i] + 2 : argv[i] + 1 );
  319. const char* next = argv[i+1];
  320. if (is_number(next) || next[0] != '-')
  321. {
  322. const std::string val = std::string( next ); ++i;
  323. string_options.insert( std::make_pair( key, val ) );
  324. }
  325. else
  326. string_options.insert( std::make_pair( key, "1" ) );
  327. }
  328. }
  329. if (strlen( read_name1 ) == 0 &&
  330. strlen( read_name2 ) == 0)
  331. {
  332. log_error(stderr, "must specify at least one read input with -U/-1/-2\n");
  333. return 1;
  334. }
  335. log_info(stderr, "nvBowtie... started\n");
  336. log_debug(stderr, " %-16s : %d\n", "max-reads", max_reads);
  337. log_debug(stderr, " %-16s : %d\n", "max-length", max_read_len);
  338. log_debug(stderr, " %-16s : %s\n", "quals", qencoding == io::Phred33 ? "phred33" :
  339. qencoding == io::Phred64 ? "phred64" :
  340. "solexa");
  341. if (paired_end)
  342. {
  343. log_debug(stderr, " %-16s : %s\n", "pe-policy",
  344. pe_policy == io::PE_POLICY_FF ? "ff" :
  345. pe_policy == io::PE_POLICY_FR ? "fr" :
  346. pe_policy == io::PE_POLICY_RF ? "rf" :
  347. "rr" );
  348. }
  349. if (string_options.empty() == false)
  350. {
  351. for (std::map<std::string,std::string>::const_iterator it = string_options.begin(); it != string_options.end(); ++it)
  352. log_debug(stderr, " %-16s : %s\n", it->first.c_str(), it->second.c_str());
  353. }
  354. log_debug(stderr, "\n");
  355. try
  356. {
  357. int device_count;
  358. cudaGetDeviceCount(&device_count);
  359. cuda::check_error("cuda-check");
  360. log_verbose(stderr, " cuda devices : %d\n", device_count);
  361. // inspect and select cuda devices
  362. if (device_count > 0)
  363. {
  364. if (cuda_devices.empty())
  365. {
  366. int best_device = 0;
  367. cudaDeviceProp best_device_prop;
  368. cudaGetDeviceProperties( &best_device_prop, best_device );
  369. for (int device = 0; device < device_count; ++device)
  370. {
  371. cudaDeviceProp device_prop;
  372. cudaGetDeviceProperties( &device_prop, device );
  373. log_verbose(stderr, " device %d has compute capability %d.%d\n", device, device_prop.major, device_prop.minor);
  374. log_verbose(stderr, " SM count : %u\n", device_prop.multiProcessorCount);
  375. log_verbose(stderr, " SM clock rate : %u Mhz\n", device_prop.clockRate / 1000);
  376. log_verbose(stderr, " memory clock rate : %.1f Ghz\n", float(device_prop.memoryClockRate) * 1.0e-6f);
  377. if (device_prop.major >= best_device_prop.major &&
  378. device_prop.minor >= best_device_prop.minor)
  379. {
  380. best_device_prop = device_prop;
  381. best_device = device;
  382. }
  383. }
  384. cuda_devices.push_back( best_device );
  385. }
  386. for (size_t i = 0; i < cuda_devices.size(); ++i)
  387. {
  388. cudaDeviceProp device_prop;
  389. cudaGetDeviceProperties( &device_prop, cuda_devices[i] );
  390. log_verbose(stderr, " chosen device %d\n", cuda_devices[i]);
  391. log_verbose(stderr, " device name : %s\n", device_prop.name);
  392. log_verbose(stderr, " compute capability : %d.%d\n", device_prop.major, device_prop.minor);
  393. }
  394. }
  395. else
  396. {
  397. log_error(stderr, "no available CUDA devices\n");
  398. exit(1);
  399. }
  400. if (legacy_cmdline)
  401. {
  402. const uint32 arg_offset = paired_end ? argc-4 : argc-3;
  403. reference_name = argv[arg_offset];
  404. if (paired_end)
  405. {
  406. read_name1 = argv[arg_offset+1];
  407. read_name2 = argv[arg_offset+2];
  408. }
  409. else
  410. read_name1 = argv[arg_offset+1];
  411. output_name = argv[argc-1];
  412. }
  413. //
  414. // Parse program options
  415. //
  416. // WARNING: we don't do any error checking on passed parameters!
  417. bowtie2::cuda::Params params;
  418. params.pe_policy = pe_policy;
  419. {
  420. bool init = true;
  421. std::string config = string_option( string_options, "config", "" );
  422. if (config != "") { bowtie2::cuda::parse_options( params, bowtie2::cuda::load_options( config.c_str() ), init ); init = false; }
  423. bowtie2::cuda::parse_options( params, string_options, init );
  424. }
  425. if (params.alignment_type == bowtie2::cuda::LocalAlignment &&
  426. params.scoring_mode == bowtie2::cuda::EditDistanceMode)
  427. {
  428. log_warning(stderr, "edit-distance scoring is incompatible with local alignment, switching to Smith-Waterman\n");
  429. params.scoring_mode = bowtie2::cuda::SmithWatermanMode;
  430. }
  431. //
  432. // Load the reference
  433. //
  434. SharedPointer<nvbio::io::SequenceData> reference_data;
  435. SharedPointer<nvbio::io::FMIndexData> driver_data;
  436. if (from_file)
  437. {
  438. log_visible(stderr, "loading reference index... started\n");
  439. log_info(stderr, " file: \"%s\"\n", reference_name);
  440. // load the reference data
  441. reference_data = io::load_sequence_file( DNA, reference_name );
  442. if (reference_data == NULL)
  443. {
  444. log_error(stderr, "unable to load reference index \"%s\"\n", reference_name);
  445. return 1;
  446. }
  447. log_visible(stderr, "loading reference index... done\n");
  448. nvbio::io::FMIndexDataHost* loader = new nvbio::io::FMIndexDataHost;
  449. if (!loader->load( reference_name ))
  450. {
  451. log_error(stderr, "unable to load reference index \"%s\"\n", reference_name);
  452. return 1;
  453. }
  454. driver_data = loader;
  455. }
  456. else
  457. {
  458. log_visible(stderr, "mapping reference index... started\n");
  459. log_info(stderr, " file: \"%s\"\n", reference_name);
  460. // map the reference data
  461. reference_data = io::map_sequence_file( reference_name );
  462. if (reference_data == NULL)
  463. {
  464. log_visible(stderr, "mapping reference index... failed\n");
  465. log_visible(stderr, "loading reference index... started\n");
  466. log_info(stderr, " file: \"%s\"\n", reference_name);
  467. // load the reference data
  468. reference_data = io::load_sequence_file( DNA, reference_name );
  469. if (reference_data == NULL)
  470. {
  471. log_error(stderr, "unable to load reference index \"%s\"\n", reference_name);
  472. return 1;
  473. }
  474. log_visible(stderr, "loading reference index... done\n");
  475. nvbio::io::FMIndexDataHost* loader = new nvbio::io::FMIndexDataHost;
  476. if (!loader->load( reference_name ))
  477. {
  478. log_error(stderr, "unable to load reference index \"%s\"\n", reference_name);
  479. return 1;
  480. }
  481. driver_data = loader;
  482. }
  483. else
  484. {
  485. log_visible(stderr, "mapping reference index... done\n");
  486. nvbio::io::FMIndexDataMMAP* loader = new nvbio::io::FMIndexDataMMAP;
  487. if (!loader->load( reference_name ))
  488. {
  489. log_error(stderr, "unable to load reference index \"%s\"\n", reference_name);
  490. return 1;
  491. }
  492. driver_data = loader;
  493. }
  494. }
  495. //
  496. // Setup the output file
  497. //
  498. SharedPointer<io::OutputFile> output_file( io::OutputFile::open(
  499. output_name,
  500. paired_end ? io::PAIRED_END : io::SINGLE_END,
  501. io::BNT(*reference_data) ) );
  502. output_file->set_rg( rg_id.c_str(), rg_string.c_str() );
  503. output_file->set_program(
  504. "nvBowtie",
  505. "nvBowtie",
  506. NVBIO_VERSION_STRING,
  507. argstr.c_str() );
  508. output_file->configure_mapq_evaluator(params.mapq_filter);
  509. output_file->header();
  510. if (paired_end)
  511. {
  512. //
  513. // Open the input read files
  514. //
  515. log_visible(stderr, "opening read file [1] \"%s\"\n", read_name1);
  516. SharedPointer<nvbio::io::SequenceDataStream> read_data_file1(
  517. nvbio::io::open_sequence_file(
  518. strcmp( read_name1, "-" ) == 0 ? "" : read_name1,
  519. qencoding,
  520. max_reads,
  521. max_read_len,
  522. io::REVERSE,
  523. trim3,
  524. trim5 )
  525. );
  526. if (read_data_file1 == NULL || read_data_file1->is_ok() == false)
  527. {
  528. log_error(stderr, "unable to open read file \"%s\"\n", read_name1);
  529. return 1;
  530. }
  531. log_visible(stderr, "opening read file [2] \"%s\"\n", read_name2);
  532. SharedPointer<nvbio::io::SequenceDataStream> read_data_file2(
  533. nvbio::io::open_sequence_file(
  534. strcmp( read_name2, "-" ) == 0 ? "" : read_name2,
  535. qencoding,
  536. max_reads,
  537. max_read_len,
  538. io::REVERSE,
  539. trim3,
  540. trim5 )
  541. );
  542. if (read_data_file2 == NULL || read_data_file2->is_ok() == false)
  543. {
  544. log_error(stderr, "unable to open read file \"%s\"\n", read_name2);
  545. return 1;
  546. }
  547. // print the command line options
  548. {
  549. const bowtie2::cuda::SimpleFunc& score_min = bowtie2::cuda::EditDistanceMode ?
  550. params.scoring_scheme.ed.m_score_min :
  551. params.scoring_scheme.sw.m_score_min;
  552. log_verbose(stderr, " mode = %s\n", bowtie2::cuda::mapping_mode( params.mode ));
  553. log_verbose(stderr, " scoring = %s\n", bowtie2::cuda::scoring_mode( params.scoring_mode ));
  554. log_verbose(stderr, " score-min = %s:%.2f:%.2f\n", score_min.type_string(), score_min.k, score_min.m);
  555. log_verbose(stderr, " alignment type = %s\n", params.alignment_type == bowtie2::cuda::LocalAlignment ? "local" : "end-to-end");
  556. log_verbose(stderr, " pe-policy = %s\n",
  557. pe_policy == io::PE_POLICY_FF ? "ff" :
  558. pe_policy == io::PE_POLICY_FR ? "fr" :
  559. pe_policy == io::PE_POLICY_RF ? "rf" :
  560. "rr" );
  561. log_verbose(stderr, " seed length = %u\n", params.seed_len);
  562. log_verbose(stderr, " seed interval = (%s, %.3f, %.3f)\n", params.seed_freq.type_symbol(), params.seed_freq.k, params.seed_freq.m);
  563. log_verbose(stderr, " seed rounds = %u\n", params.max_reseed);
  564. log_verbose(stderr, " max hits = %u\n", params.max_hits);
  565. log_verbose(stderr, " max edit dist = %u\n", params.max_dist);
  566. log_verbose(stderr, " max effort = %u\n", params.max_effort);
  567. log_verbose(stderr, " substitutions = %u\n", params.allow_sub);
  568. log_verbose(stderr, " mapq filter = %u\n", params.mapq_filter);
  569. log_verbose(stderr, " randomized = %s\n", params.randomized ? "yes" : "no");
  570. if (params.allow_sub)
  571. log_verbose(stderr, " subseed length = %u\n", params.subseed_len);
  572. }
  573. //
  574. // Setup the compute threads
  575. //
  576. uint32 batch_size = uint32(-1);
  577. std::vector< bowtie2::cuda::Stats > device_stats( cuda_devices.size() );
  578. std::vector< SharedPointer<bowtie2::cuda::ComputeThreadPE> > compute_threads( cuda_devices.size() );
  579. for (uint32 i = 0; i < cuda_devices.size(); ++i)
  580. {
  581. device_stats[i] = bowtie2::cuda::Stats( params );
  582. compute_threads[i] = SharedPointer<bowtie2::cuda::ComputeThreadPE>(
  583. new bowtie2::cuda::ComputeThreadPE(
  584. i,
  585. cuda_devices[i],
  586. *reference_data,
  587. *driver_data,
  588. string_options,
  589. params,
  590. device_stats[i] ) );
  591. batch_size = nvbio::min( batch_size, compute_threads[i]->gauge_batch_size() );
  592. }
  593. //
  594. // Setup the input thread
  595. //
  596. Timer timer;
  597. timer.start();
  598. bowtie2::cuda::Stats input_stats( params );
  599. bowtie2::cuda::InputThreadPE input_thread( read_data_file1.get(), read_data_file2.get(), input_stats, batch_size, params.avg_read_length );
  600. input_thread.create();
  601. for (uint32 i = 0; i < cuda_devices.size(); ++i)
  602. {
  603. compute_threads[i]->set_input( &input_thread );
  604. compute_threads[i]->set_output( output_file.get() );
  605. compute_threads[i]->create();
  606. }
  607. uint32 n_reads = 0;
  608. bowtie2::cuda::AlignmentStats concordant;
  609. bowtie2::cuda::AlignmentStats discordant;
  610. bowtie2::cuda::AlignmentStats mate1;
  611. bowtie2::cuda::AlignmentStats mate2;
  612. for (uint32 i = 0; i < cuda_devices.size(); ++i)
  613. {
  614. compute_threads[i]->join();
  615. n_reads += device_stats[i].n_reads;
  616. concordant.merge( device_stats[i].concordant );
  617. discordant.merge( device_stats[i].discordant );
  618. mate1.merge( device_stats[i].mate1 );
  619. mate2.merge( device_stats[i].mate2 );
  620. }
  621. input_thread.join();
  622. timer.stop();
  623. log_verbose( stderr, " compute threads joined\n" );
  624. // close the output file
  625. output_file->close();
  626. //
  627. // Print statistics
  628. //
  629. // transfer I/O statistics
  630. io::IOStats iostats = output_file->get_aggregate_statistics();
  631. const bowtie2::cuda::KernelStats& io = iostats.output_process_timings;
  632. log_stats(stderr, " total : %.2f sec (avg: %.3fK reads/s).\n", timer.seconds(), 1.0e-3f * float(n_reads) / timer.seconds());
  633. log_stats(stderr, " reads I/O : %.2f sec (avg: %.3fM reads/s, max: %.3fM reads/s).\n", input_stats.read_io.time, 1.0e-6f * input_stats.read_io.avg_speed(), 1.0e-6f * input_stats.read_io.max_speed);
  634. log_stats(stderr, " results I/O : %.2f sec (avg: %.3fM reads/s, max: %.3fM reads/s).\n", io.time, 1.0e-6f * io.avg_speed(), 1.0e-6f * io.max_speed);
  635. uint32& n_mapped = concordant.n_mapped;
  636. uint32& n_unique = concordant.n_unique;
  637. uint32& n_ambiguous = concordant.n_ambiguous;
  638. uint32& n_nonambiguous = concordant.n_unambiguous;
  639. uint32& n_multiple = concordant.n_multiple;
  640. {
  641. log_stats(stderr, " concordant reads : %.2f %% - of these:\n", 100.0f * float(n_mapped)/float(n_reads) );
  642. log_stats(stderr, " aligned uniquely : %4.1f%% (%4.1f%% of total)\n", 100.0f * float(n_unique)/float(n_mapped), 100.0f * float(n_mapped - n_multiple)/float(n_reads) );
  643. log_stats(stderr, " aligned unambiguously : %4.1f%% (%4.1f%% of total)\n", 100.0f * float(n_nonambiguous)/float(n_mapped), 100.0f * float(n_nonambiguous)/float(n_reads) );
  644. log_stats(stderr, " aligned ambiguously : %4.1f%% (%4.1f%% of total)\n", 100.0f * float(n_ambiguous)/float(n_mapped), 100.0f * float(n_ambiguous)/float(n_reads) );
  645. log_stats(stderr, " aligned multiply : %4.1f%% (%4.1f%% of total)\n", 100.0f * float(n_multiple)/float(n_mapped), 100.0f * float(n_multiple)/float(n_reads) );
  646. log_ed( concordant );
  647. log_mapq( concordant );
  648. log_stats(stderr, " discordant reads : %.2f %%\n", 100.0f * float(discordant.n_mapped)/float(n_reads) );
  649. log_ed( discordant );
  650. log_mapq( discordant );
  651. log_stats(stderr, " mate1 : %.2f %% - of these:\n", 100.0f * float(mate1.n_mapped)/float(n_reads) );
  652. if (mate1.n_mapped)
  653. {
  654. log_stats(stderr, " aligned uniquely : %4.1f%% (%4.1f%% of total)\n", 100.0f * float(mate1.n_unique)/float(mate1.n_mapped), 100.0f * float(mate1.n_mapped - mate1.n_multiple)/float(n_reads) );
  655. log_stats(stderr, " aligned unambiguously : %4.1f%% (%4.1f%% of total)\n", 100.0f * float(mate1.n_unambiguous)/float(mate1.n_mapped), 100.0f * float(mate1.n_unambiguous)/float(n_reads) );
  656. log_stats(stderr, " aligned ambiguously : %4.1f%% (%4.1f%% of total)\n", 100.0f * float(mate1.n_ambiguous)/float(mate1.n_mapped), 100.0f * float(mate1.n_ambiguous)/float(n_reads) );
  657. log_stats(stderr, " aligned multiply : %4.1f%% (%4.1f%% of total)\n", 100.0f * float(mate1.n_multiple)/float(mate1.n_mapped), 100.0f * float(mate1.n_multiple)/float(n_reads) );
  658. }
  659. log_stats(stderr, " mate2 : %.2f %% - of these:\n", 100.0f * float(mate2.n_mapped)/float(n_reads) );
  660. if (mate2.n_mapped)
  661. {
  662. log_stats(stderr, " aligned uniquely : %4.1f%% (%4.1f%% of total)\n", 100.0f * float(mate2.n_unique)/float(mate2.n_mapped), 100.0f * float(mate2.n_mapped - mate2.n_multiple)/float(n_reads) );
  663. log_stats(stderr, " aligned unambiguously : %4.1f%% (%4.1f%% of total)\n", 100.0f * float(mate2.n_unambiguous)/float(mate2.n_mapped), 100.0f * float(mate2.n_unambiguous)/float(n_reads) );
  664. log_stats(stderr, " aligned ambiguously : %4.1f%% (%4.1f%% of total)\n", 100.0f * float(mate2.n_ambiguous)/float(mate2.n_mapped), 100.0f * float(mate2.n_ambiguous)/float(n_reads) );
  665. log_stats(stderr, " aligned multiply : %4.1f%% (%4.1f%% of total)\n", 100.0f * float(mate2.n_multiple)/float(mate2.n_mapped), 100.0f * float(mate2.n_multiple)/float(n_reads) );
  666. }
  667. }
  668. // generate an html report
  669. if (params.report.length())
  670. bowtie2::cuda::generate_report_header( n_reads, params, concordant, (uint32)cuda_devices.size(), &device_stats[0], params.report.c_str() );
  671. }
  672. else
  673. {
  674. //
  675. // Open the input read file
  676. //
  677. log_visible(stderr, "opening read file \"%s\"\n", read_name1);
  678. SharedPointer<io::SequenceDataStream> read_data_file(
  679. io::open_sequence_file(
  680. strcmp( read_name1, "-" ) == 0 ? "" : read_name1,
  681. qencoding,
  682. max_reads,
  683. max_read_len,
  684. io::REVERSE,
  685. trim3,
  686. trim5 )
  687. );
  688. if (read_data_file == NULL || read_data_file->is_ok() == false)
  689. {
  690. log_error(stderr, "unable to open read file \"%s\"\n", read_name1);
  691. return 1;
  692. }
  693. // print the command line options
  694. {
  695. const bowtie2::cuda::SimpleFunc& score_min = params.scoring_mode == bowtie2::cuda::EditDistanceMode ?
  696. params.scoring_scheme.ed.m_score_min :
  697. params.scoring_scheme.sw.m_score_min;
  698. log_verbose(stderr, " mode = %s\n", bowtie2::cuda::mapping_mode( params.mode ));
  699. log_verbose(stderr, " scoring = %s\n", bowtie2::cuda::scoring_mode( params.scoring_mode ));
  700. log_verbose(stderr, " score-min = %s:%.2f:%.2f\n", score_min.type_string(), score_min.k, score_min.m);
  701. log_verbose(stderr, " alignment type = %s\n", params.alignment_type == bowtie2::cuda::LocalAlignment ? "local" : "end-to-end");
  702. log_verbose(stderr, " seed length = %u\n", params.seed_len);
  703. log_verbose(stderr, " seed interval = (%s, %.3f, %.3f)\n", params.seed_freq.type_symbol(), params.seed_freq.k, params.seed_freq.m);
  704. log_verbose(stderr, " seed rounds = %u\n", params.max_reseed);
  705. log_verbose(stderr, " max hits = %u\n", params.max_hits);
  706. log_verbose(stderr, " max edit dist = %u\n", params.max_dist);
  707. log_verbose(stderr, " max effort = %u\n", params.max_effort);
  708. log_verbose(stderr, " substitutions = %u\n", params.allow_sub);
  709. log_verbose(stderr, " mapq filter = %u\n", params.mapq_filter);
  710. log_verbose(stderr, " randomized = %s\n", params.randomized ? "yes" : "no");
  711. if (params.allow_sub)
  712. log_verbose(stderr, " subseed length = %u\n", params.subseed_len);
  713. }
  714. //
  715. // Setup the compute threads
  716. //
  717. uint32 batch_size = uint32(-1);
  718. std::vector< bowtie2::cuda::Stats > device_stats( cuda_devices.size() );
  719. std::vector< SharedPointer<bowtie2::cuda::ComputeThreadSE> > compute_threads( cuda_devices.size() );
  720. for (uint32 i = 0; i < cuda_devices.size(); ++i)
  721. {
  722. device_stats[i] = bowtie2::cuda::Stats( params );
  723. compute_threads[i] = SharedPointer<bowtie2::cuda::ComputeThreadSE>(
  724. new bowtie2::cuda::ComputeThreadSE(
  725. i,
  726. cuda_devices[i],
  727. *reference_data,
  728. *driver_data,
  729. string_options,
  730. params,
  731. device_stats[i] ) );
  732. batch_size = nvbio::min( batch_size, compute_threads[i]->gauge_batch_size() );
  733. }
  734. //
  735. // Setup the input thread
  736. //
  737. Timer timer;
  738. timer.start();
  739. bowtie2::cuda::Stats input_stats( params );
  740. bowtie2::cuda::InputThreadSE input_thread( read_data_file.get(), input_stats, batch_size, params.avg_read_length );
  741. input_thread.create();
  742. for (uint32 i = 0; i < cuda_devices.size(); ++i)
  743. {
  744. compute_threads[i]->set_input( &input_thread );
  745. compute_threads[i]->set_output( output_file.get() );
  746. compute_threads[i]->create();
  747. }
  748. uint32 n_reads = 0;
  749. bowtie2::cuda::AlignmentStats mate1;
  750. for (uint32 i = 0; i < cuda_devices.size(); ++i)
  751. {
  752. compute_threads[i]->join();
  753. n_reads += device_stats[i].n_reads;
  754. mate1.merge( device_stats[i].mate1 );
  755. }
  756. input_thread.join();
  757. timer.stop();
  758. log_verbose( stderr, " compute threads joined\n" );
  759. // close the output file
  760. output_file->close();
  761. //
  762. // Print statistics
  763. //
  764. // transfer I/O statistics
  765. io::IOStats iostats = output_file->get_aggregate_statistics();
  766. const bowtie2::cuda::KernelStats& io = iostats.output_process_timings;
  767. log_stats(stderr, " total : %.2f sec (avg: %.3fK reads/s).\n", timer.seconds(), 1.0e-3f * float(n_reads) / timer.seconds());
  768. log_stats(stderr, " reads I/O : %.2f sec (avg: %.3fM reads/s, max: %.3fM reads/s).\n", input_stats.read_io.time, 1.0e-6f * input_stats.read_io.avg_speed(), 1.0e-6f * input_stats.read_io.max_speed);
  769. log_stats(stderr, " results I/O : %.2f sec (avg: %.3fM reads/s, max: %.3fM reads/s).\n", io.time, 1.0e-6f * io.avg_speed(), 1.0e-6f * io.max_speed);
  770. uint32& n_mapped = mate1.n_mapped;
  771. uint32& n_unique = mate1.n_unique;
  772. uint32& n_ambiguous = mate1.n_ambiguous;
  773. uint32& n_nonambiguous = mate1.n_unambiguous;
  774. uint32& n_multiple = mate1.n_multiple;
  775. {
  776. log_stats(stderr, " mapped reads : %.2f %% - of these:\n", 100.0f * float(n_mapped)/float(n_reads) );
  777. log_stats(stderr, " aligned uniquely : %4.1f%% (%4.1f%% of total)\n", 100.0f * float(n_unique)/float(n_mapped), 100.0f * float(n_mapped - n_multiple)/float(n_reads) );
  778. log_stats(stderr, " aligned unambiguously : %4.1f%% (%4.1f%% of total)\n", 100.0f * float(n_nonambiguous)/float(n_mapped), 100.0f * float(n_nonambiguous)/float(n_reads) );
  779. log_stats(stderr, " aligned ambiguously : %4.1f%% (%4.1f%% of total)\n", 100.0f * float(n_ambiguous)/float(n_mapped), 100.0f * float(n_ambiguous)/float(n_reads) );
  780. log_stats(stderr, " aligned multiply : %4.1f%% (%4.1f%% of total)\n", 100.0f * float(n_multiple)/float(n_mapped), 100.0f * float(n_multiple)/float(n_reads) );
  781. log_ed( mate1 );
  782. log_mapq( mate1 );
  783. }
  784. // generate an html report
  785. if (params.report.length())
  786. bowtie2::cuda::generate_report_header( n_reads, params, mate1, (uint32)cuda_devices.size(), &device_stats[0], params.report.c_str() );
  787. }
  788. log_info( stderr, "nvBowtie... done\n" );
  789. }
  790. catch (nvbio::cuda_error e)
  791. {
  792. log_error(stderr, "caught a nvbio::cuda_error exception:\n");
  793. log_error(stderr, " %s\n", e.what());
  794. }
  795. catch (nvbio::bad_alloc e)
  796. {
  797. log_error(stderr, "caught a nvbio::bad_alloc exception:\n");
  798. log_error(stderr, " %s\n", e.what());
  799. }
  800. catch (nvbio::logic_error e)
  801. {
  802. log_error(stderr, "caught a nvbio::logic_error exception:\n");
  803. log_error(stderr, " %s\n", e.what());
  804. }
  805. catch (nvbio::runtime_error e)
  806. {
  807. log_error(stderr, "caught a nvbio::runtime_error exception:\n");
  808. log_error(stderr, " %s\n", e.what());
  809. }
  810. catch (std::bad_alloc e)
  811. {
  812. log_error(stderr, "caught a std::bad_alloc exception:\n");
  813. log_error(stderr, " %s\n", e.what());
  814. }
  815. catch (std::logic_error e)
  816. {
  817. log_error(stderr, "caught a std::logic_error exception:\n");
  818. log_error(stderr, " %s\n", e.what());
  819. }
  820. catch (std::runtime_error e)
  821. {
  822. log_error(stderr, "caught a std::runtime_error exception:\n");
  823. log_error(stderr, " %s\n", e.what());
  824. }
  825. catch (...)
  826. {
  827. log_error(stderr, "caught an unknown exception!\n");
  828. }
  829. return 0;
  830. }