/* * nvbio * Copyright (c) 2011-2014, NVIDIA CORPORATION. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * Neither the name of the NVIDIA CORPORATION nor the * names of its contributors may be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ // seeding.cu // #include #include #include #include #include #include #include #include #include #include #include using namespace nvbio; int8 s_blosum62[] = { 4, 0, -2, -1, -2, 0, -2, -1, -1, -1, -1, -2, -4, -1, -1, -1, 1, 0, 0, -3, -2, -2, -1, 0, 0, 9, -3, -4, -2, -3, -3, -1, -3, -1, -1, -3, -4, -3, -3, -3, -1, -1, -1, -2, -2, -3, -3, -2, -2, -3, 6, 2, -3, -1, -1, -3, -1, -4, -3, 1, -4, -1, 0, -2, 0, -1, -3, -4, -3, 4, 1, -1, -1, -4, 2, 5, -3, -2, 0, -3, 1, -3, -2, 0, -4, -1, 2, 0, 0, -1, -2, -3, -2, 1, 4, -1, -2, -2, -3, -3, 6, -3, -1, 0, -3, 0, 0, -3, -4, -4, -3, -3, -2, -2, -1, 1, 3, -3, -3, -1, 0, -3, -1, -2, -3, 6, -2, -4, -2, -4, -3, 0, -4, -2, -2, -2, 0, -2, -3, -2, -3, -1, -2, -1, -2, -3, -1, 0, -1, -2, 8, -3, -1, -3, -2, 1, -4, -2, 0, 0, -1, -2, -3, -2, 2, 0, 0, -1, -1, -1, -3, -3, 0, -4, -3, 4, -3, 2, 1, -3, -4, -3, -3, -3, -2, -1, 3, -3, -1, -3, -3, -1, -1, -3, -1, 1, -3, -2, -1, -3, 5, -2, -1, 0, -4, -1, 1, 2, 0, -1, -2, -3, -2, 0, 1, -1, -1, -1, -4, -3, 0, -4, -3, 2, -2, 4, 2, -3, -4, -3, -2, -2, -2, -1, 1, -2, -1, -4, -3, -1, -1, -1, -3, -2, 0, -3, -2, 1, -1, 2, 5, -2, -4, -2, 0, -1, -1, -1, 1, -1, -1, -3, -1, -1, -2, -3, 1, 0, -3, 0, 1, -3, 0, -3, -2, 6, -4, -2, 0, 0, 1, 0, -3, -4, -2, 3, 0, -1, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, 1, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -1, -3, -1, -1, -4, -2, -2, -3, -1, -3, -2, -2, -4, 7, -1, -2, -1, -1, -2, -4, -3, -2, -1, -2, -1, -3, 0, 2, -3, -2, 0, -3, 1, -2, 0, 0, -4, -1, 5, 1, 0, -1, -2, -2, -1, 0, 3, -1, -1, -3, -2, 0, -3, -2, 0, -3, 2, -2, -1, 0, -4, -2, 1, 5, -1, -1, -3, -3, -2, -1, 0, -1, 1, -1, 0, 0, -2, 0, -1, -2, 0, -2, -1, 1, -4, -1, 0, -1, 4, 1, -2, -3, -2, 0, 0, 0, 0, -1, -1, -1, -2, -2, -2, -1, -1, -1, -1, 0, -4, -1, -1, -1, 1, 5, 0, -2, -2, -1, -1, 0, 0, -1, -3, -2, -1, -3, -3, 3, -2, 1, 1, -3, -4, -2, -2, -3, -2, 0, 4, -3, -1, -3, -2, -1, -3, -2, -4, -3, 1, -2, -2, -3, -3, -2, -1, -4, -4, -4, -2, -3, -3, -2, -3, 11, 2, -4, -3, -2, -2, -2, -3, -2, 3, -3, 2, -1, -2, -1, -1, -2, -4, -3, -1, -2, -2, -2, -1, 2, 7, -3, -2, -1, -2, -3, 4, 1, -3, -1, 0, -3, 0, -4, -3, 3, -4, -2, 0, -1, 0, -1, -3, -4, -3, 4, 1, -1, -1, -3, 1, 4, -3, -2, 0, -3, 1, -3, -1, 0, -4, -1, 3, 0, 0, -1, -2, -3, -2, 1, 4, -1, 0, -2, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -4, -2, -1, -1, 0, 0, -1, -2, -1, -1, -1, -1, }; template struct BlosumScheme { NVBIO_FORCEINLINE NVBIO_HOST_DEVICE BlosumScheme(const matrix_iterator m, const int32 gap_open, const int32 gap_ext) : m_matrix(m), m_gap_open(gap_open), m_gap_ext(gap_ext) {} NVBIO_FORCEINLINE NVBIO_HOST_DEVICE int32 match(const uint8 q = 0) const { return 11; }; NVBIO_FORCEINLINE NVBIO_HOST_DEVICE int32 substitution(const uint32 r_i, const uint32 q_j, const uint8 r, const uint8 q, const uint8 qq = 0) const { return int8( m_matrix[ r + q*24 ] ); }; NVBIO_FORCEINLINE NVBIO_HOST_DEVICE int32 pattern_gap_open() const { return m_gap_open; }; NVBIO_FORCEINLINE NVBIO_HOST_DEVICE int32 pattern_gap_extension() const { return m_gap_ext; }; NVBIO_FORCEINLINE NVBIO_HOST_DEVICE int32 text_gap_open() const { return m_gap_open; }; NVBIO_FORCEINLINE NVBIO_HOST_DEVICE int32 text_gap_extension() const { return m_gap_ext; }; const matrix_iterator m_matrix; const int32 m_gap_open; const int32 m_gap_ext; }; template struct QueryProfile { NVBIO_FORCEINLINE NVBIO_HOST_DEVICE QueryProfile(const matrix_iterator m, const int32 gap_open, const int32 gap_ext) : m_matrix(m), m_gap_open(gap_open), m_gap_ext(gap_ext) {} NVBIO_FORCEINLINE NVBIO_HOST_DEVICE int32 match(const uint8 q = 0) const { return 11; }; NVBIO_FORCEINLINE NVBIO_HOST_DEVICE int32 substitution(const uint32 r_i, const uint32 q_j, const uint8 r, const uint8 q, const uint8 qq = 0) const { return int8( m_matrix[ r + q_j*24 ] ); }; NVBIO_FORCEINLINE NVBIO_HOST_DEVICE int32 pattern_gap_open() const { return m_gap_open; }; NVBIO_FORCEINLINE NVBIO_HOST_DEVICE int32 pattern_gap_extension() const { return m_gap_ext; }; NVBIO_FORCEINLINE NVBIO_HOST_DEVICE int32 text_gap_open() const { return m_gap_open; }; NVBIO_FORCEINLINE NVBIO_HOST_DEVICE int32 text_gap_extension() const { return m_gap_ext; }; const matrix_iterator m_matrix; const int32 m_gap_open; const int32 m_gap_ext; }; // main test entry point // int main(int argc, char* argv[]) { log_info(stderr, "protein SW... started\n"); const uint32 n_tests = 10; const uint32 n_strings = 50000; const uint32 P = 100; const uint32 T = 1000; // alloc a device vector for holding the BLOSUM62 scoring matrix nvbio::vector d_blosum62( 24*24 ); // copy the matrix to the device thrust::copy( s_blosum62, s_blosum62 + 24*24, d_blosum62.begin() ); // alloc the storage for the host strings nvbio::vector h_pattern_strings( P * n_strings ); nvbio::vector h_pattern_profiles( P * n_strings * 24 ); nvbio::vector h_text_strings( T * n_strings ); // fill the strings with random characters LCG_random rand; for (uint32 i = 0; i < n_strings; ++i) { for (uint32 j = 0; j < P; ++j) { h_pattern_strings[i*P + j] = nvbio::min( uint8( rand.next() * 24.0f ), (uint8)23u ); const uint8 q = h_pattern_strings[i*P + j]; for (uint32 r = 0; r < 24; ++r) h_pattern_profiles[i*P + j*24 + r] = s_blosum62[ r + q*24 ]; } } for (uint32 i = 0; i < T * n_strings; ++i) h_text_strings[i] = nvbio::min( uint8( rand.next() * 24.0f ), (uint8)23u ); // copy to the device nvbio::vector d_pattern_strings( h_pattern_strings ); nvbio::vector d_pattern_profiles( h_pattern_profiles ); nvbio::vector d_text_strings( h_text_strings ); nvbio::vector d_pattern_offsets( n_strings + 1 ); nvbio::vector d_text_offsets( n_strings + 1 ); // build the string offsets thrust::sequence( d_pattern_offsets.begin(), d_pattern_offsets.end(), 0u, P ); thrust::sequence( d_text_offsets.begin(), d_text_offsets.end(), 0u, T ); // prepare a vector of alignment sinks nvbio::vector< device_tag, aln::BestSink< uint32 > > sinks( n_strings ); { const aln::SimpleGotohScheme scoring( 1, -1, -5, -3 ); Timer timer; timer.start(); for (uint32 i = 0; i < n_tests; ++i) { // and execute the batch alignment, on a GPU device aln::batch_alignment_score( aln::make_gotoh_aligner( scoring ), make_concatenated_string_set( n_strings, (const uint8*)raw_pointer( d_pattern_strings ), (const uint32*)raw_pointer( d_pattern_offsets ) ), make_concatenated_string_set( n_strings, (const uint8*)raw_pointer( d_text_strings ), (const uint32*)raw_pointer( d_text_offsets ) ), sinks.begin(), aln::DeviceThreadScheduler() ); } cudaDeviceSynchronize(); timer.stop(); log_info(stderr, " GCUPS (Constant): %.1f\n", (1.0e-9f * float(P*T) * float(n_strings) * float(n_tests))/timer.seconds()); } { const BlosumScheme< cuda::ldg_pointer > scoring( cuda::make_ldg_pointer( raw_pointer( d_blosum62 ) ), -5, -3 ); Timer timer; timer.start(); for (uint32 i = 0; i < n_tests; ++i) { // and execute the batch alignment, on a GPU device aln::batch_alignment_score( aln::make_gotoh_aligner( scoring ), make_concatenated_string_set( n_strings, (const uint8*)raw_pointer( d_pattern_strings ), (const uint32*)raw_pointer( d_pattern_offsets ) ), make_concatenated_string_set( n_strings, (const uint8*)raw_pointer( d_text_strings ), (const uint32*)raw_pointer( d_text_offsets ) ), sinks.begin(), aln::DeviceThreadBlockScheduler<128,1>() ); } cudaDeviceSynchronize(); timer.stop(); log_info(stderr, " GCUPS (Blosum62): %.1f\n", (1.0e-9f * float(P*T) * float(n_strings) * float(n_tests))/timer.seconds()); } { //const QueryProfile< cuda::ldg_pointer > scoring( cuda::make_ldg_pointer( raw_pointer( d_pattern_profiles ) ), -5, -3 ); const QueryProfile scoring( raw_pointer( d_pattern_profiles ), -5, -3 ); Timer timer; timer.start(); for (uint32 i = 0; i < n_tests; ++i) { // and execute the batch alignment, on a GPU device aln::batch_alignment_score( aln::make_gotoh_aligner( scoring ), make_concatenated_string_set( n_strings, thrust::make_constant_iterator(0), (const uint32*)raw_pointer( d_pattern_offsets ) ), make_concatenated_string_set( n_strings, (const uint8*)raw_pointer( d_text_strings ), (const uint32*)raw_pointer( d_text_offsets ) ), sinks.begin(), aln::DeviceThreadBlockScheduler<128,1>() ); } cudaDeviceSynchronize(); timer.stop(); log_info(stderr, " GCUPS (Blosum62): %.1f\n", (1.0e-9f * float(P*T) * float(n_strings) * float(n_tests))/timer.seconds()); } log_info(stderr, "protein SW... done\n"); return 0; }