utils.h 5.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148
  1. /*
  2. * nvbio
  3. * Copyright (c) 2011-2014, NVIDIA CORPORATION. All rights reserved.
  4. *
  5. * Redistribution and use in source and binary forms, with or without
  6. * modification, are permitted provided that the following conditions are met:
  7. * * Redistributions of source code must retain the above copyright
  8. * notice, this list of conditions and the following disclaimer.
  9. * * Redistributions in binary form must reproduce the above copyright
  10. * notice, this list of conditions and the following disclaimer in the
  11. * documentation and/or other materials provided with the distribution.
  12. * * Neither the name of the NVIDIA CORPORATION nor the
  13. * names of its contributors may be used to endorse or promote products
  14. * derived from this software without specific prior written permission.
  15. *
  16. * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
  17. * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
  18. * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
  19. * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
  20. * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
  21. * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
  22. * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
  23. * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  24. * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
  25. * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  26. */
  27. // utils.h
  28. //
  29. #pragma once
  30. #include <nvbio/basic/numbers.h>
  31. #include <nvbio/basic/threads.h>
  32. #include <nvbio/basic/cuda/arch.h>
  33. enum {
  34. ERROR_FREE = 0,
  35. CORRECTIONS = 1,
  36. UNFIXABLE = 2,
  37. TRIMMED_READS = 3,
  38. TRIMMED_BASES = 4
  39. };
  40. typedef nvbio::uint8 uint8;
  41. typedef nvbio::int16 int16;
  42. typedef nvbio::uint16 uint16;
  43. typedef nvbio::int32 int32;
  44. typedef nvbio::uint32 uint32;
  45. typedef nvbio::int64 int64;
  46. typedef nvbio::uint64 uint64;
  47. typedef nvbio::uint64_2 uint64_2;
  48. typedef nvbio::uint64_4 uint64_4;
  49. //static const uint32 SAMPLED_KMERS_FILTER_K = 9; // optimal number of hashes for a Bloom filter with false probability rate of 0.01
  50. //static const uint32 TRUSTED_KMERS_FILTER_K = 11; // optimal number of hashes for a Bloom filter with false probability rate of 0.0005
  51. //static const uint32 SAMPLED_KMERS_FILTER_K = 7; // optimal number of hashes for a Bloom filter with false probability rate of 0.01
  52. //static const uint32 TRUSTED_KMERS_FILTER_K = 11; // optimal number of hashes for a Bloom filter with false probability rate of 0.0005
  53. static const uint32 SAMPLED_KMERS_FILTER_K = 5; // this parameter should theoretically achieve a worse false probability rate than 0.01, yet in practice it does better
  54. static const uint32 TRUSTED_KMERS_FILTER_K = 8; // this parameter should theoretically achieve a worse false probability rate than 0.0005, yet in practice it does better
  55. struct hash_functor1
  56. {
  57. NVBIO_FORCEINLINE NVBIO_HOST_DEVICE
  58. uint64 operator() (const uint64 kmer) const { return nvbio::hash( kmer ); }
  59. };
  60. struct hash_functor2
  61. {
  62. NVBIO_FORCEINLINE NVBIO_HOST_DEVICE
  63. uint64 operator() (const uint64 kmer) const { return nvbio::hash2( kmer ); }
  64. };
  65. enum { MAX_READ_LENGTH = 2048 };
  66. struct KmerCode
  67. {
  68. NVBIO_FORCEINLINE NVBIO_HOST_DEVICE
  69. KmerCode() : mask(0), code(0), len(0), invalid(-1) {}
  70. NVBIO_FORCEINLINE NVBIO_HOST_DEVICE
  71. KmerCode(const int l) : mask(0), code(0), len(l), invalid(-1)
  72. {
  73. for (int i = 0 ; i < len; ++i)
  74. {
  75. mask = mask << 2u;
  76. mask = mask | 3u;
  77. }
  78. }
  79. NVBIO_FORCEINLINE NVBIO_HOST_DEVICE
  80. KmerCode(const KmerCode& k) : mask(k.mask), code(k.code), len(k.len), invalid(k.invalid)
  81. {}
  82. NVBIO_FORCEINLINE NVBIO_HOST_DEVICE
  83. void restart() { code = 0ull ; invalid = -1 ; }
  84. NVBIO_FORCEINLINE NVBIO_HOST_DEVICE
  85. void push_back(const uint8 c)
  86. {
  87. if (invalid != -1)
  88. invalid++;
  89. code = ((code << 2ull) & mask) | uint64(c & 3);
  90. if (c >= 4)
  91. invalid = 0;
  92. if (invalid >= len)
  93. invalid = -1;
  94. }
  95. NVBIO_FORCEINLINE NVBIO_HOST_DEVICE
  96. void push_front(const uint8 c)
  97. {
  98. shift_right( 1 );
  99. if (c >= 4)
  100. invalid = len - 1;
  101. code = (code | ((uint64(c & 3)) << (2ull * (len - 1)))) & mask;
  102. }
  103. NVBIO_FORCEINLINE NVBIO_HOST_DEVICE
  104. void shift_right(int k)
  105. {
  106. if (invalid != -1)
  107. invalid -= k;
  108. code = (code >> (2ull * k)) & (mask >> (2ull * k));
  109. if (invalid < 0)
  110. invalid = -1;
  111. }
  112. NVBIO_FORCEINLINE NVBIO_HOST_DEVICE
  113. bool is_valid() const { return invalid == -1; }
  114. uint64 mask;
  115. uint64 code;
  116. int len;
  117. int invalid;
  118. };
  119. struct SequenceStats
  120. {
  121. SequenceStats() : m_reads(0), m_bps(0), m_time(0) {}
  122. nvbio::Mutex m_mutex;
  123. uint64 m_reads;
  124. uint64 m_bps;
  125. float m_time;
  126. };