params.cpp 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363
  1. #include <nvBowtie/bowtie2/cuda/params.h>
  2. #include <stdio.h>
  3. #include <stdlib.h>
  4. #include <string.h>
  5. #include <numeric>
  6. #include <functional>
  7. #include <map>
  8. namespace nvbio {
  9. namespace bowtie2 {
  10. namespace cuda {
  11. std::map<std::string,std::string> load_options(const char* name)
  12. {
  13. std::map<std::string,std::string> options;
  14. FILE* file = fopen( name, "r" );
  15. if (file == NULL)
  16. {
  17. log_warning( stderr, "failed opening \"%s\"\n", name );
  18. return options;
  19. }
  20. char key[1024];
  21. char value[1024];
  22. while (fscanf( file, "%s %s", key, value ) == 2)
  23. options[ key ] = std::string( value );
  24. fclose( file );
  25. return options;
  26. }
  27. // bogus implementation of a function to check if a string is a number
  28. bool is_number(const char* str, uint32 len = uint32(-1))
  29. {
  30. if (str[0] == '-')
  31. ++str;
  32. for (uint32 l = 0; *str != '\0' && l < len; ++l)
  33. {
  34. const char c = *str; ++str;
  35. if (c == '.') continue;
  36. if (c >= '0' && c <= '9') continue;
  37. return false;
  38. }
  39. return true;
  40. }
  41. // bogus implementation of a function to check if an option is a function
  42. SimpleFunc parse_function(const char* str, const SimpleFunc def)
  43. {
  44. if (str[1] != ',')
  45. return def;
  46. if (!(str[0] == 'C' ||
  47. str[0] == 'L' ||
  48. str[0] == 'G' ||
  49. str[0] == 'S'))
  50. return def;
  51. SimpleFunc ret;
  52. ret.type = (str[0] == 'C') ? SimpleFunc::LinearFunc :
  53. (str[0] == 'L') ? SimpleFunc::LinearFunc :
  54. (str[0] == 'G') ? SimpleFunc::LogFunc :
  55. SimpleFunc::SqrtFunc;
  56. std::string nums = std::string( str + 2 );
  57. const size_t c = nums.find(',');
  58. if (c == std::string::npos)
  59. return def;
  60. if (is_number( nums.c_str(), (uint32)c ) == false) return def;
  61. if (is_number( nums.c_str() + c + 1 ) == false) return def;
  62. const std::string num1 = nums.substr( 0, c );
  63. const std::string num2 = std::string( nums.c_str() + c + 1 );
  64. ret.k = (float)atof( num1.c_str() );
  65. ret.m = (float)atof( nums.c_str() + c + 1 );
  66. // take care of transforming constant functions in linear ones
  67. if (str[0] == 'C')
  68. {
  69. //ret.k += ret.m;
  70. ret.m = 0.0f;
  71. }
  72. return ret;
  73. }
  74. template <typename options_type>
  75. SimpleFunc func_option(const options_type& options, const char* name, const SimpleFunc func)
  76. {
  77. return (options.find( std::string(name) ) != options.end()) ?
  78. parse_function( options.find(std::string(name))->second.c_str(), func ) :
  79. func;
  80. }
  81. template <typename options_type>
  82. SimpleFunc func_option(const options_type& options, const char* name1, const char* name2, const SimpleFunc func)
  83. {
  84. return
  85. (options.find( std::string(name1) ) != options.end()) ?
  86. parse_function( options.find(std::string(name1))->second.c_str(), func ) :
  87. (options.find( std::string(name2) ) != options.end()) ?
  88. parse_function( options.find(std::string(name2))->second.c_str(), func ) :
  89. func;
  90. }
  91. void parse_options(Params& params, const std::map<std::string,std::string>& options, bool init)
  92. {
  93. const bool old_local = params.alignment_type == LocalAlignment;
  94. const uint32 old_scoring_mode = params.scoring_mode;
  95. params.mode = mapping_mode( string_option(options, "mode", init ? "best" : mapping_mode( params.mode )).c_str() ); // mapping mode
  96. params.scoring_mode = scoring_mode( string_option(options, "scoring", init ? "sw" : scoring_mode( params.scoring_mode )).c_str() ); // scoring mode
  97. params.alignment_type = uint_option(options, "local", init ? 0u : params.alignment_type == LocalAlignment ) ? LocalAlignment : EndToEndAlignment; // local alignment
  98. params.keep_stats = bool_option(options, "stats", init ? 1u : params.keep_stats); // keep stats
  99. params.max_hits = uint_option(options, "max-hits", init ? 100u : params.max_hits); // too big = memory exhaustion
  100. params.max_dist = uint_option(options, "max-dist", init ? 15u : params.max_dist); // must be <= MAX_BAND_LEN/2
  101. params.max_effort_init = uint_option(options, "max-effort-init", init ? 15u : params.max_effort_init); // initial scoring effort limit
  102. params.max_effort = uint_option(options, "max-effort", "D", init ? 15u : params.max_effort); // scoring effort limit
  103. params.min_ext = uint_option(options, "min-ext", init ? 30u : params.min_ext); // min # of extensions
  104. params.max_ext = uint_option(options, "max-ext", init ? 400u : params.max_ext); // max # of extensions
  105. params.max_reseed = uint_option(options, "max-reseed", "R", init ? 2u : params.max_reseed); // max # of reseeding rounds
  106. params.rep_seeds = uint_option(options, "rep-seeds", init ? 300u : params.rep_seeds); // reseeding threshold
  107. params.allow_sub = uint_option(options, "N", init ? 0u : params.allow_sub); // allow substitution in seed
  108. params.mapq_filter = uint_option(options, "mapQ-filter", "Q", init ? 0u : params.mapq_filter); // filter anything below this
  109. params.report = string_option(options, "report", init ? "" : params.report.c_str()); // generate a report file
  110. params.scoring_file = string_option(options, "scoring-scheme", init ? "" : params.scoring_file.c_str());
  111. params.randomized = bool_option(options, "rand", init ? 1u : params.randomized); // use randomized selection
  112. params.randomized =!bool_option(options, "no-rand", !params.randomized); // don't use randomized selection
  113. params.top_seed = uint_option(options, "top", init ? 0u : params.top_seed); // explore top seed entirely
  114. params.min_read_len = uint_option(options, "min-read-len", init ? 12u : params.min_read_len); // minimum read length
  115. params.ungapped_mates = bool_option(options, "ungapped-mates", "ug", init ? 0u : params.ungapped_mates); // ungapped mate alignment
  116. params.fw = !bool_option(options, "nofw", init ? false : !params.fw); // fw alignment
  117. params.rc = !bool_option(options, "norc", init ? false : !params.rc); // rc alignment
  118. // force the all-mapping mode with the '--all|-a' option
  119. if (uint_option(options, "all", "a", params.mode == AllMapping))
  120. params.mode = AllMapping;
  121. // force Edit-Distance scoring with the '--ed' option
  122. if (uint_option(options, "ed", params.scoring_mode == EditDistanceMode))
  123. params.scoring_mode = EditDistanceMode;
  124. // force Smith-Waterman scoring with the '--sw' option
  125. if (uint_option(options, "sw", params.scoring_mode == SmithWatermanMode))
  126. params.scoring_mode = SmithWatermanMode;
  127. const bool local = params.alignment_type == LocalAlignment;
  128. // set the default seeding values, or reset them if the alignment type has been changed
  129. if (init || (local != old_local))
  130. {
  131. params.seed_len = local ? 20 : 22u;
  132. params.seed_freq = SimpleFunc( SimpleFunc::SqrtFunc, 1.0f, (local ? 0.75f : 1.15f) );
  133. }
  134. params.seed_len = uint_option(options, "seed-len", "L", params.seed_len); // no greater than 32
  135. params.seed_freq = func_option( options, "seed-freq", "i", params.seed_freq ); // seed interval
  136. params.subseed_len = uint_option(options, "subseed-len", init ? 0u : params.subseed_len); // no greater than 32
  137. params.pe_overlap = bool_option(options, "overlap", init ? true : params.pe_overlap); // paired-end overlap
  138. params.pe_overlap = !bool_option(options, "no-overlap", !params.pe_overlap); // paired-end overlap
  139. params.pe_dovetail = bool_option(options, "dovetail", init ? false : params.pe_dovetail); // paired-end dovetail
  140. params.pe_unpaired = !bool_option(options, "no-mixed", init ? false :!params.pe_unpaired); // paired-end no-mixed
  141. params.pe_discordant = !bool_option(options, "no-discordant", init ? false :!params.pe_discordant); // paired-end no-discordant
  142. params.pe_discordant = bool_option(options, "discordant", params.pe_discordant); // paired-end discordant
  143. params.min_frag_len = uint_option(options, "minins", "I", init ? 0u : params.min_frag_len); // paired-end minimum fragment length
  144. params.max_frag_len = uint_option(options, "maxins", "X", init ? 500u : params.max_frag_len); // paired-end maximum fragment length
  145. // the maximum batch of reads processed in parallel
  146. params.max_batch_size = uint_option(options, "batch-size", init ? 1024u : params.max_batch_size ); // maximum batch size
  147. params.avg_read_length = uint_option(options, "read-length", init ? AVG_READ_LENGTH : params.avg_read_length ); // average read length
  148. // internal controls
  149. params.scoring_window = uint_option(options, "scoring-window", init ? 32u : params.scoring_window); // scoring window size
  150. params.debug.read_id = (uint32)int_option(options, "debug-read", init ? -1 : (int32)params.debug.read_id); // debug read id
  151. params.debug.select = bool_option(options, "debug-select", init ? false: params.debug.select); // debug select kernel
  152. params.debug.locate = bool_option(options, "debug-locate", init ? false: params.debug.locate); // debug locate kernel
  153. params.debug.score = bool_option(options, "debug-score", init ? true : params.debug.score); // debug score kernel
  154. params.debug.score_bad = bool_option(options, "debug-score-bad", init ? false: params.debug.score_bad); // debug score bad
  155. params.debug.score_info = bool_option(options, "debug-score-info", init ? false: params.debug.score_info); // debug score info
  156. params.debug.reduce = bool_option(options, "debug-reduce", init ? true : params.debug.reduce); // debug reduce kernel
  157. params.debug.traceback = bool_option(options, "debug-traceback", init ? true : params.debug.traceback); // debug traceback kernel
  158. params.debug.asserts = bool_option(options, "debug-asserts", init ? true : params.debug.asserts); // debug asserts
  159. params.persist_batch = int_option(options, "persist-batch", init ? -1 : params.persist_batch); // persist pass
  160. params.persist_seeding = int_option(options, "persist-seeding", init ? -1 : params.persist_seeding); // persist pass
  161. params.persist_extension = int_option(options, "persist-extension", init ? -1 : params.persist_extension); // persist pass
  162. params.persist_file = string_option(options, "persist-file", init ? "" : params.persist_file.c_str() ); // persist file
  163. params.no_multi_hits = int_option(options, "no-multi-hits", init ? 0 : params.no_multi_hits ); // disable multi-hit selection
  164. params.max_effort_init = nvbio::max( params.max_effort_init, params.max_effort );
  165. params.max_ext = nvbio::max( params.max_ext, params.max_effort );
  166. UberScoringScheme& sc = params.scoring_scheme;
  167. // set the default ED values, or reset them if the scoring mode has been changed
  168. if (init || (params.scoring_mode != old_scoring_mode))
  169. sc.ed.m_score_min = SimpleFunc( SimpleFunc::LinearFunc, -(float)params.max_dist, 0.0f );
  170. // set the default SW values, or reset them if the alignment type has been changed
  171. if (init || (local != old_local))
  172. {
  173. sc.sw = local ?
  174. SmithWatermanScoringScheme<>::local() :
  175. SmithWatermanScoringScheme<>();
  176. }
  177. // load scoring scheme from file
  178. if (params.scoring_file != "")
  179. sc.sw = load_scoring_scheme( params.scoring_file.c_str(), AlignmentType( params.alignment_type ) );
  180. // score-min
  181. sc.ed.m_score_min = func_option( options, "score-min", sc.ed.m_score_min );
  182. sc.sw.m_score_min = func_option( options, "score-min", sc.sw.m_score_min );
  183. // match bonus
  184. sc.sw.m_match.m_val = int_option( options, "ma", sc.sw.m_match.m_val );
  185. // mismatch penalties
  186. const int2 mp = int2_option( options, "mp", make_int2( sc.sw.m_mmp.m_max_val, sc.sw.m_mmp.m_min_val ) );
  187. sc.sw.m_mmp.m_max_val = mp.x;
  188. sc.sw.m_mmp.m_min_val = mp.y;
  189. // np
  190. sc.sw.m_np.m_val = int_option( options, "np", sc.sw.m_np.m_val );
  191. // read gaps
  192. const int2 rdg = int2_option( options, "rdg", make_int2( sc.sw.m_read_gap_const, sc.sw.m_read_gap_coeff ) );
  193. sc.sw.m_read_gap_const = rdg.x;
  194. sc.sw.m_read_gap_coeff = rdg.y;
  195. // reference gaps
  196. const int2 rfg = int2_option( options, "rfg", make_int2( sc.sw.m_ref_gap_const, sc.sw.m_ref_gap_coeff ) );
  197. sc.sw.m_ref_gap_const = rfg.x;
  198. sc.sw.m_ref_gap_coeff = rfg.y;
  199. // presets
  200. if (params.alignment_type == EndToEndAlignment)
  201. {
  202. if (uint_option(options, "very-fast", 0u))
  203. {
  204. params.max_effort = 5u;
  205. params.max_reseed = 1u;
  206. params.seed_len = 22u;
  207. params.seed_freq = SimpleFunc( SimpleFunc::SqrtFunc, 0.0f, 2.5f );
  208. }
  209. if (uint_option(options, "fast", 0u))
  210. {
  211. params.max_effort = 10u;
  212. params.max_reseed = 2u;
  213. params.seed_len = 22u;
  214. params.seed_freq = SimpleFunc( SimpleFunc::SqrtFunc, 0.0f, 2.5f );
  215. }
  216. if (uint_option(options, "sensitive", 0u))
  217. {
  218. params.max_effort = 15u;
  219. params.max_reseed = 2u;
  220. params.seed_len = 22u;
  221. params.seed_freq = SimpleFunc( SimpleFunc::SqrtFunc, 1.0f, 1.15f );
  222. }
  223. if (uint_option(options, "very-sensitive", 0u))
  224. {
  225. params.max_effort = 20u;
  226. params.max_reseed = 3u;
  227. params.seed_len = 20u;
  228. params.seed_freq = SimpleFunc( SimpleFunc::SqrtFunc, 1.0f, 0.5f );
  229. }
  230. if (uint_option(options, "very-fast-local", 0u))
  231. {
  232. if (uint_option(options, "end-to-end", 0u ) == 0)
  233. {
  234. params.alignment_type = LocalAlignment;
  235. params.max_effort = 5u;
  236. params.max_reseed = 1u;
  237. params.seed_len = 25u;
  238. params.seed_freq = SimpleFunc( SimpleFunc::SqrtFunc, 1.0f, 2.0f );
  239. }
  240. else
  241. log_warning(stderr, "--very-fast-local is incompatible with --end-to-end\n");
  242. }
  243. if (uint_option(options, "fast-local", 0u))
  244. {
  245. if (uint_option(options, "end-to-end", 0u ) == 0)
  246. {
  247. params.alignment_type = LocalAlignment;
  248. params.max_effort = 10u;
  249. params.max_reseed = 2u;
  250. params.seed_len = 22u;
  251. params.seed_freq = SimpleFunc( SimpleFunc::SqrtFunc, 1.0f, 1.75f );
  252. }
  253. else
  254. log_warning(stderr, "--fast-local is incompatible with --end-to-end\n");
  255. }
  256. if (uint_option(options, "sensitive-local", 0u))
  257. {
  258. if (uint_option(options, "end-to-end", 0u ) == 0)
  259. {
  260. params.alignment_type = LocalAlignment;
  261. params.max_effort = 15u;
  262. params.max_reseed = 2u;
  263. params.seed_len = 20u;
  264. params.seed_freq = SimpleFunc( SimpleFunc::SqrtFunc, 1.0f, 0.75f );
  265. }
  266. else
  267. log_warning(stderr, "--sensitive-local is incompatible with --end-to-end\n");
  268. }
  269. if (uint_option(options, "very-sensitive-local", 0u))
  270. {
  271. if (uint_option(options, "end-to-end", 0u ) == 0)
  272. {
  273. params.alignment_type = LocalAlignment;
  274. params.max_effort = 20u;
  275. params.max_reseed = 3u;
  276. params.seed_len = 20u;
  277. params.seed_freq = SimpleFunc( SimpleFunc::SqrtFunc, 1.0f, 0.5f );
  278. }
  279. else
  280. log_warning(stderr, "--very-sensitive-local is incompatible with --end-to-end\n");
  281. }
  282. }
  283. else
  284. {
  285. if (uint_option(options, "very-fast", "very-fast-local", 0u))
  286. {
  287. params.max_effort = 5u;
  288. params.max_reseed = 1u;
  289. params.seed_len = 25u;
  290. params.seed_freq = SimpleFunc( SimpleFunc::SqrtFunc, 1.0f, 2.0f );
  291. }
  292. if (uint_option(options, "fast", "fast-local", 0u))
  293. {
  294. params.max_effort = 10u;
  295. params.max_reseed = 2u;
  296. params.seed_len = 22u;
  297. params.seed_freq = SimpleFunc( SimpleFunc::SqrtFunc, 1.0f, 1.75f );
  298. }
  299. if (uint_option(options, "sensitive", "sensitive-local", 0u))
  300. {
  301. params.max_effort = 15u;
  302. params.max_reseed = 2u;
  303. params.seed_len = 20u;
  304. params.seed_freq = SimpleFunc( SimpleFunc::SqrtFunc, 1.0f, 0.75f );
  305. }
  306. if (uint_option(options, "very-sensitive", "very-sensitive-local", 0u))
  307. {
  308. params.max_effort = 20u;
  309. params.max_reseed = 3u;
  310. params.seed_len = 20u;
  311. params.seed_freq = SimpleFunc( SimpleFunc::SqrtFunc, 1.0f, 0.5f );
  312. }
  313. }
  314. }
  315. } // namespace cuda
  316. } // namespace bowtie2
  317. } // namespace nvbio