nvSetBWT.dox 4.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113
  1. /*
  2. * nvbio
  3. * Copyright (c) 2011-2014, NVIDIA CORPORATION. All rights reserved.
  4. *
  5. * Redistribution and use in source and binary forms, with or without
  6. * modification, are permitted provided that the following conditions are met:
  7. * * Redistributions of source code must retain the above copyright
  8. * notice, this list of conditions and the following disclaimer.
  9. * * Redistributions in binary form must reproduce the above copyright
  10. * notice, this list of conditions and the following disclaimer in the
  11. * documentation and/or other materials provided with the distribution.
  12. * * Neither the name of the NVIDIA CORPORATION nor the
  13. * names of its contributors may be used to endorse or promote products
  14. * derived from this software without specific prior written permission.
  15. *
  16. * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
  17. * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
  18. * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
  19. * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
  20. * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
  21. * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
  22. * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
  23. * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  24. * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
  25. * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  26. */
  27. ///\page nvsetbwt_page nvSetBWT
  28. ///\htmlonly
  29. /// <img src="nvidia_cubes.png" style="position:relative; bottom:-10px; border:0px;"/>
  30. ///\endhtmlonly
  31. ///\par
  32. ///\n
  33. /// <b>nvSetBWT</b> is an application built on top of \ref nvbio_page to build the BWT of
  34. /// a <i>set</i> of strings, typically reads.
  35. ///\par
  36. /// Given an input fastq or text file with one read per line, it will create a file
  37. /// containing the BWT of their forward and reverse-complemented strands.
  38. /// Alongside with the main BWT file, a file containing the mapping between the primary
  39. /// dollar tokens and their position in the BWT will be generated.
  40. /// e.g.
  41. ///
  42. ///\verbatim
  43. /// ./nvSetBWT my-reads.fastq my-reads.bwt
  44. ///\endverbatim
  45. ///\par
  46. /// will generate the following files:
  47. ///
  48. ///\verbatim
  49. /// my-reads.bwt
  50. /// my-reads.pri
  51. ///\endverbatim
  52. ///
  53. ///\section OptionsSection Options
  54. ///\par
  55. /// nvSetBWT supports the following command options:
  56. ///
  57. ///\verbatim
  58. /// nvSetBWT [options] input_file output_file
  59. /// options:
  60. /// -v | --verbosity int (0-6) [5]
  61. /// -c | --compression string [1R] (e.g. \"1\", ..., \"9\", \"1R\")
  62. /// -F | --skip-forward
  63. /// -R | --skip-reverse
  64. ///\endverbatim
  65. ///
  66. ///\section FormatsSection File Formats
  67. ///\par
  68. /// The output BWT can be saved in one of the following formats:
  69. ///
  70. ///\verbatim
  71. /// .txt ASCII
  72. /// .txt.gz ASCII, gzip compressed
  73. /// .txt.bgz ASCII, block-gzip compressed
  74. /// .bwt 2-bit packed binary
  75. /// .bwt.gz 2-bit packed binary, gzip compressed
  76. /// .bwt.bgz 2-bit packed binary, block-gzip compressed
  77. /// .bwt4 4-bit packed binary
  78. /// .bwt4.gz 4-bit packed binary, gzip compressed
  79. /// .bwt4.bgz 4-bit packed binary, block-gzip compressed
  80. ///\endverbatim
  81. ///\par
  82. /// The accompanying primary map file (.pri|.pri.gz|.pri.bgz), is a plain list of (position,string-id) pairs,
  83. /// either in ASCII or binary form.
  84. /// The ASCII file has the form:
  85. ///
  86. ///\verbatim
  87. /// #PRI
  88. /// position[1] string[1]
  89. /// ...
  90. /// position[n] string[n]
  91. ///\endverbatim
  92. ///\par
  93. /// The binary file has the format:
  94. ///
  95. ///\verbatim
  96. /// char[4] header = "PRIB";
  97. /// struct { uint64 position; uint32 string_id; } pairs[n];
  98. ///\endverbatim
  99. ///
  100. ///\section DetailsSection Details
  101. ///\par
  102. /// nvSetBWT implements a novel algorithm for the BWT construction of very large string sets,
  103. /// called <b>set-bwte</b>:
  104. ///
  105. /// http://arxiv.org/pdf/1410.0562.pdf
  106. ///
  107. ///\par
  108. /// The algorithm can be considered an adaptation of Ferragina's serial <b>bwte</b> algorithm
  109. /// to string sets and massive parallelism.
  110. /// Among its properties, it is well suited to process reads of arbitrary length, and it allows incremental
  111. /// updates (though this option is not yet implemented).
  112. ///