123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363 |
- #include <nvBowtie/bowtie2/cuda/params.h>
- #include <stdio.h>
- #include <stdlib.h>
- #include <string.h>
- #include <numeric>
- #include <functional>
- #include <map>
- namespace nvbio {
- namespace bowtie2 {
- namespace cuda {
- std::map<std::string,std::string> load_options(const char* name)
- {
- std::map<std::string,std::string> options;
- FILE* file = fopen( name, "r" );
- if (file == NULL)
- {
- log_warning( stderr, "failed opening \"%s\"\n", name );
- return options;
- }
- char key[1024];
- char value[1024];
- while (fscanf( file, "%s %s", key, value ) == 2)
- options[ key ] = std::string( value );
- fclose( file );
- return options;
- }
- // bogus implementation of a function to check if a string is a number
- bool is_number(const char* str, uint32 len = uint32(-1))
- {
- if (str[0] == '-')
- ++str;
- for (uint32 l = 0; *str != '\0' && l < len; ++l)
- {
- const char c = *str; ++str;
- if (c == '.') continue;
- if (c >= '0' && c <= '9') continue;
- return false;
- }
- return true;
- }
- // bogus implementation of a function to check if an option is a function
- SimpleFunc parse_function(const char* str, const SimpleFunc def)
- {
- if (str[1] != ',')
- return def;
- if (!(str[0] == 'C' ||
- str[0] == 'L' ||
- str[0] == 'G' ||
- str[0] == 'S'))
- return def;
- SimpleFunc ret;
- ret.type = (str[0] == 'C') ? SimpleFunc::LinearFunc :
- (str[0] == 'L') ? SimpleFunc::LinearFunc :
- (str[0] == 'G') ? SimpleFunc::LogFunc :
- SimpleFunc::SqrtFunc;
- std::string nums = std::string( str + 2 );
- const size_t c = nums.find(',');
- if (c == std::string::npos)
- return def;
- if (is_number( nums.c_str(), (uint32)c ) == false) return def;
- if (is_number( nums.c_str() + c + 1 ) == false) return def;
- const std::string num1 = nums.substr( 0, c );
- const std::string num2 = std::string( nums.c_str() + c + 1 );
- ret.k = (float)atof( num1.c_str() );
- ret.m = (float)atof( nums.c_str() + c + 1 );
- // take care of transforming constant functions in linear ones
- if (str[0] == 'C')
- {
- //ret.k += ret.m;
- ret.m = 0.0f;
- }
- return ret;
- }
- template <typename options_type>
- SimpleFunc func_option(const options_type& options, const char* name, const SimpleFunc func)
- {
- return (options.find( std::string(name) ) != options.end()) ?
- parse_function( options.find(std::string(name))->second.c_str(), func ) :
- func;
- }
- template <typename options_type>
- SimpleFunc func_option(const options_type& options, const char* name1, const char* name2, const SimpleFunc func)
- {
- return
- (options.find( std::string(name1) ) != options.end()) ?
- parse_function( options.find(std::string(name1))->second.c_str(), func ) :
- (options.find( std::string(name2) ) != options.end()) ?
- parse_function( options.find(std::string(name2))->second.c_str(), func ) :
- func;
- }
- void parse_options(Params& params, const std::map<std::string,std::string>& options, bool init)
- {
- const bool old_local = params.alignment_type == LocalAlignment;
- const uint32 old_scoring_mode = params.scoring_mode;
- params.mode = mapping_mode( string_option(options, "mode", init ? "best" : mapping_mode( params.mode )).c_str() ); // mapping mode
- params.scoring_mode = scoring_mode( string_option(options, "scoring", init ? "sw" : scoring_mode( params.scoring_mode )).c_str() ); // scoring mode
- params.alignment_type = uint_option(options, "local", init ? 0u : params.alignment_type == LocalAlignment ) ? LocalAlignment : EndToEndAlignment; // local alignment
- params.keep_stats = bool_option(options, "stats", init ? 1u : params.keep_stats); // keep stats
- params.max_hits = uint_option(options, "max-hits", init ? 100u : params.max_hits); // too big = memory exhaustion
- params.max_dist = uint_option(options, "max-dist", init ? 15u : params.max_dist); // must be <= MAX_BAND_LEN/2
- params.max_effort_init = uint_option(options, "max-effort-init", init ? 15u : params.max_effort_init); // initial scoring effort limit
- params.max_effort = uint_option(options, "max-effort", "D", init ? 15u : params.max_effort); // scoring effort limit
- params.min_ext = uint_option(options, "min-ext", init ? 30u : params.min_ext); // min # of extensions
- params.max_ext = uint_option(options, "max-ext", init ? 400u : params.max_ext); // max # of extensions
- params.max_reseed = uint_option(options, "max-reseed", "R", init ? 2u : params.max_reseed); // max # of reseeding rounds
- params.rep_seeds = uint_option(options, "rep-seeds", init ? 300u : params.rep_seeds); // reseeding threshold
- params.allow_sub = uint_option(options, "N", init ? 0u : params.allow_sub); // allow substitution in seed
- params.mapq_filter = uint_option(options, "mapQ-filter", "Q", init ? 0u : params.mapq_filter); // filter anything below this
- params.report = string_option(options, "report", init ? "" : params.report.c_str()); // generate a report file
- params.scoring_file = string_option(options, "scoring-scheme", init ? "" : params.scoring_file.c_str());
- params.randomized = bool_option(options, "rand", init ? 1u : params.randomized); // use randomized selection
- params.randomized =!bool_option(options, "no-rand", !params.randomized); // don't use randomized selection
- params.top_seed = uint_option(options, "top", init ? 0u : params.top_seed); // explore top seed entirely
- params.min_read_len = uint_option(options, "min-read-len", init ? 12u : params.min_read_len); // minimum read length
- params.ungapped_mates = bool_option(options, "ungapped-mates", "ug", init ? 0u : params.ungapped_mates); // ungapped mate alignment
- params.fw = !bool_option(options, "nofw", init ? false : !params.fw); // fw alignment
- params.rc = !bool_option(options, "norc", init ? false : !params.rc); // rc alignment
- // force the all-mapping mode with the '--all|-a' option
- if (uint_option(options, "all", "a", params.mode == AllMapping))
- params.mode = AllMapping;
- // force Edit-Distance scoring with the '--ed' option
- if (uint_option(options, "ed", params.scoring_mode == EditDistanceMode))
- params.scoring_mode = EditDistanceMode;
- // force Smith-Waterman scoring with the '--sw' option
- if (uint_option(options, "sw", params.scoring_mode == SmithWatermanMode))
- params.scoring_mode = SmithWatermanMode;
- const bool local = params.alignment_type == LocalAlignment;
- // set the default seeding values, or reset them if the alignment type has been changed
- if (init || (local != old_local))
- {
- params.seed_len = local ? 20 : 22u;
- params.seed_freq = SimpleFunc( SimpleFunc::SqrtFunc, 1.0f, (local ? 0.75f : 1.15f) );
- }
- params.seed_len = uint_option(options, "seed-len", "L", params.seed_len); // no greater than 32
- params.seed_freq = func_option( options, "seed-freq", "i", params.seed_freq ); // seed interval
- params.subseed_len = uint_option(options, "subseed-len", init ? 0u : params.subseed_len); // no greater than 32
- params.pe_overlap = bool_option(options, "overlap", init ? true : params.pe_overlap); // paired-end overlap
- params.pe_overlap = !bool_option(options, "no-overlap", !params.pe_overlap); // paired-end overlap
- params.pe_dovetail = bool_option(options, "dovetail", init ? false : params.pe_dovetail); // paired-end dovetail
- params.pe_unpaired = !bool_option(options, "no-mixed", init ? false :!params.pe_unpaired); // paired-end no-mixed
- params.pe_discordant = !bool_option(options, "no-discordant", init ? false :!params.pe_discordant); // paired-end no-discordant
- params.pe_discordant = bool_option(options, "discordant", params.pe_discordant); // paired-end discordant
- params.min_frag_len = uint_option(options, "minins", "I", init ? 0u : params.min_frag_len); // paired-end minimum fragment length
- params.max_frag_len = uint_option(options, "maxins", "X", init ? 500u : params.max_frag_len); // paired-end maximum fragment length
- // the maximum batch of reads processed in parallel
- params.max_batch_size = uint_option(options, "batch-size", init ? 1024u : params.max_batch_size ); // maximum batch size
- params.avg_read_length = uint_option(options, "read-length", init ? AVG_READ_LENGTH : params.avg_read_length ); // average read length
- // internal controls
- params.scoring_window = uint_option(options, "scoring-window", init ? 32u : params.scoring_window); // scoring window size
- params.debug.read_id = (uint32)int_option(options, "debug-read", init ? -1 : (int32)params.debug.read_id); // debug read id
- params.debug.select = bool_option(options, "debug-select", init ? false: params.debug.select); // debug select kernel
- params.debug.locate = bool_option(options, "debug-locate", init ? false: params.debug.locate); // debug locate kernel
- params.debug.score = bool_option(options, "debug-score", init ? true : params.debug.score); // debug score kernel
- params.debug.score_bad = bool_option(options, "debug-score-bad", init ? false: params.debug.score_bad); // debug score bad
- params.debug.score_info = bool_option(options, "debug-score-info", init ? false: params.debug.score_info); // debug score info
- params.debug.reduce = bool_option(options, "debug-reduce", init ? true : params.debug.reduce); // debug reduce kernel
- params.debug.traceback = bool_option(options, "debug-traceback", init ? true : params.debug.traceback); // debug traceback kernel
- params.debug.asserts = bool_option(options, "debug-asserts", init ? true : params.debug.asserts); // debug asserts
- params.persist_batch = int_option(options, "persist-batch", init ? -1 : params.persist_batch); // persist pass
- params.persist_seeding = int_option(options, "persist-seeding", init ? -1 : params.persist_seeding); // persist pass
- params.persist_extension = int_option(options, "persist-extension", init ? -1 : params.persist_extension); // persist pass
- params.persist_file = string_option(options, "persist-file", init ? "" : params.persist_file.c_str() ); // persist file
- params.no_multi_hits = int_option(options, "no-multi-hits", init ? 0 : params.no_multi_hits ); // disable multi-hit selection
- params.max_effort_init = nvbio::max( params.max_effort_init, params.max_effort );
- params.max_ext = nvbio::max( params.max_ext, params.max_effort );
- UberScoringScheme& sc = params.scoring_scheme;
- // set the default ED values, or reset them if the scoring mode has been changed
- if (init || (params.scoring_mode != old_scoring_mode))
- sc.ed.m_score_min = SimpleFunc( SimpleFunc::LinearFunc, -(float)params.max_dist, 0.0f );
- // set the default SW values, or reset them if the alignment type has been changed
- if (init || (local != old_local))
- {
- sc.sw = local ?
- SmithWatermanScoringScheme<>::local() :
- SmithWatermanScoringScheme<>();
- }
- // load scoring scheme from file
- if (params.scoring_file != "")
- sc.sw = load_scoring_scheme( params.scoring_file.c_str(), AlignmentType( params.alignment_type ) );
- // score-min
- sc.ed.m_score_min = func_option( options, "score-min", sc.ed.m_score_min );
- sc.sw.m_score_min = func_option( options, "score-min", sc.sw.m_score_min );
- // match bonus
- sc.sw.m_match.m_val = int_option( options, "ma", sc.sw.m_match.m_val );
- // mismatch penalties
- const int2 mp = int2_option( options, "mp", make_int2( sc.sw.m_mmp.m_max_val, sc.sw.m_mmp.m_min_val ) );
- sc.sw.m_mmp.m_max_val = mp.x;
- sc.sw.m_mmp.m_min_val = mp.y;
- // np
- sc.sw.m_np.m_val = int_option( options, "np", sc.sw.m_np.m_val );
- // read gaps
- const int2 rdg = int2_option( options, "rdg", make_int2( sc.sw.m_read_gap_const, sc.sw.m_read_gap_coeff ) );
- sc.sw.m_read_gap_const = rdg.x;
- sc.sw.m_read_gap_coeff = rdg.y;
- // reference gaps
- const int2 rfg = int2_option( options, "rfg", make_int2( sc.sw.m_ref_gap_const, sc.sw.m_ref_gap_coeff ) );
- sc.sw.m_ref_gap_const = rfg.x;
- sc.sw.m_ref_gap_coeff = rfg.y;
- // presets
- if (params.alignment_type == EndToEndAlignment)
- {
- if (uint_option(options, "very-fast", 0u))
- {
- params.max_effort = 5u;
- params.max_reseed = 1u;
- params.seed_len = 22u;
- params.seed_freq = SimpleFunc( SimpleFunc::SqrtFunc, 0.0f, 2.5f );
- }
- if (uint_option(options, "fast", 0u))
- {
- params.max_effort = 10u;
- params.max_reseed = 2u;
- params.seed_len = 22u;
- params.seed_freq = SimpleFunc( SimpleFunc::SqrtFunc, 0.0f, 2.5f );
- }
- if (uint_option(options, "sensitive", 0u))
- {
- params.max_effort = 15u;
- params.max_reseed = 2u;
- params.seed_len = 22u;
- params.seed_freq = SimpleFunc( SimpleFunc::SqrtFunc, 1.0f, 1.15f );
- }
- if (uint_option(options, "very-sensitive", 0u))
- {
- params.max_effort = 20u;
- params.max_reseed = 3u;
- params.seed_len = 20u;
- params.seed_freq = SimpleFunc( SimpleFunc::SqrtFunc, 1.0f, 0.5f );
- }
- if (uint_option(options, "very-fast-local", 0u))
- {
- if (uint_option(options, "end-to-end", 0u ) == 0)
- {
- params.alignment_type = LocalAlignment;
- params.max_effort = 5u;
- params.max_reseed = 1u;
- params.seed_len = 25u;
- params.seed_freq = SimpleFunc( SimpleFunc::SqrtFunc, 1.0f, 2.0f );
- }
- else
- log_warning(stderr, "--very-fast-local is incompatible with --end-to-end\n");
- }
- if (uint_option(options, "fast-local", 0u))
- {
- if (uint_option(options, "end-to-end", 0u ) == 0)
- {
- params.alignment_type = LocalAlignment;
- params.max_effort = 10u;
- params.max_reseed = 2u;
- params.seed_len = 22u;
- params.seed_freq = SimpleFunc( SimpleFunc::SqrtFunc, 1.0f, 1.75f );
- }
- else
- log_warning(stderr, "--fast-local is incompatible with --end-to-end\n");
- }
- if (uint_option(options, "sensitive-local", 0u))
- {
- if (uint_option(options, "end-to-end", 0u ) == 0)
- {
- params.alignment_type = LocalAlignment;
- params.max_effort = 15u;
- params.max_reseed = 2u;
- params.seed_len = 20u;
- params.seed_freq = SimpleFunc( SimpleFunc::SqrtFunc, 1.0f, 0.75f );
- }
- else
- log_warning(stderr, "--sensitive-local is incompatible with --end-to-end\n");
- }
- if (uint_option(options, "very-sensitive-local", 0u))
- {
- if (uint_option(options, "end-to-end", 0u ) == 0)
- {
- params.alignment_type = LocalAlignment;
- params.max_effort = 20u;
- params.max_reseed = 3u;
- params.seed_len = 20u;
- params.seed_freq = SimpleFunc( SimpleFunc::SqrtFunc, 1.0f, 0.5f );
- }
- else
- log_warning(stderr, "--very-sensitive-local is incompatible with --end-to-end\n");
- }
- }
- else
- {
- if (uint_option(options, "very-fast", "very-fast-local", 0u))
- {
- params.max_effort = 5u;
- params.max_reseed = 1u;
- params.seed_len = 25u;
- params.seed_freq = SimpleFunc( SimpleFunc::SqrtFunc, 1.0f, 2.0f );
- }
- if (uint_option(options, "fast", "fast-local", 0u))
- {
- params.max_effort = 10u;
- params.max_reseed = 2u;
- params.seed_len = 22u;
- params.seed_freq = SimpleFunc( SimpleFunc::SqrtFunc, 1.0f, 1.75f );
- }
- if (uint_option(options, "sensitive", "sensitive-local", 0u))
- {
- params.max_effort = 15u;
- params.max_reseed = 2u;
- params.seed_len = 20u;
- params.seed_freq = SimpleFunc( SimpleFunc::SqrtFunc, 1.0f, 0.75f );
- }
- if (uint_option(options, "very-sensitive", "very-sensitive-local", 0u))
- {
- params.max_effort = 20u;
- params.max_reseed = 3u;
- params.seed_len = 20u;
- params.seed_freq = SimpleFunc( SimpleFunc::SqrtFunc, 1.0f, 0.5f );
- }
- }
- }
- } // namespace cuda
- } // namespace bowtie2
- } // namespace nvbio
|