mkl_direct_blas.h 30 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943
  1. /*******************************************************************************
  2. * Copyright 2014-2022 Intel Corporation.
  3. *
  4. * This software and the related documents are Intel copyrighted materials, and
  5. * your use of them is governed by the express license under which they were
  6. * provided to you (License). Unless the License provides otherwise, you may not
  7. * use, modify, copy, publish, distribute, disclose or transmit this software or
  8. * the related documents without Intel's prior written permission.
  9. *
  10. * This software and the related documents are provided as is, with no express
  11. * or implied warranties, other than those that are expressly stated in the
  12. * License.
  13. *******************************************************************************/
  14. /*
  15. ! Content:
  16. ! Intel(R) oneAPI Math Kernel Library (oneMKL) C functions that can be inlined
  17. !******************************************************************************/
  18. #include "mkl_types.h"
  19. #include "immintrin.h"
  20. #undef mkl_dc_gemm
  21. #undef mkl_dc_syrk
  22. #undef mkl_dc_trsm
  23. #if defined(MKL_DOUBLE)
  24. #define mkl_dc_gemm mkl_dc_dgemm
  25. #define mkl_dc_syrk mkl_dc_dsyrk
  26. #define mkl_dc_trsm mkl_dc_dtrsm
  27. #define mkl_dc_axpy mkl_dc_daxpy
  28. #define mkl_dc_dot mkl_dc_ddot
  29. #define MKL_DC_DOT_CONVERT mkl_dc_ddot_convert
  30. #elif defined(MKL_SINGLE)
  31. #define mkl_dc_gemm mkl_dc_sgemm
  32. #define mkl_dc_syrk mkl_dc_ssyrk
  33. #define mkl_dc_trsm mkl_dc_strsm
  34. #define mkl_dc_axpy mkl_dc_saxpy
  35. #define mkl_dc_dot mkl_dc_sdot
  36. #define MKL_DC_DOT_CONVERT mkl_dc_sdot_convert
  37. #elif defined(MKL_COMPLEX)
  38. #define mkl_dc_gemm mkl_dc_cgemm
  39. #define mkl_dc_syrk mkl_dc_csyrk
  40. #define mkl_dc_trsm mkl_dc_ctrsm
  41. #define mkl_dc_axpy mkl_dc_caxpy
  42. #elif defined(MKL_COMPLEX16)
  43. #define mkl_dc_gemm mkl_dc_zgemm
  44. #define mkl_dc_syrk mkl_dc_zsyrk
  45. #define mkl_dc_trsm mkl_dc_ztrsm
  46. #define mkl_dc_axpy mkl_dc_zaxpy
  47. #endif
  48. #define mkl_dc_gemm_xx_mnk_pst_beta(m, n, k, alpha, A, lda, B, ldb, beta, C, ldc, \
  49. a_op, b_op, c_op, a_access, b_access) \
  50. do { \
  51. MKL_INT i, j, l; \
  52. MKL_DC_PRAGMA_VECTOR \
  53. for (i = 0; i < m; i++) \
  54. for (j = 0; j < n; j++) { \
  55. mkl_dc_type c, temp; MKL_DC_SET_ZERO(temp); \
  56. for (l = 0; l < k; l++) { \
  57. mkl_dc_type a, b, temp1; \
  58. a = a_access(A, lda, i, l); a_op(a, a); \
  59. b = b_access(B, ldb, l, j); b_op(b, b); \
  60. MKL_DC_MUL(temp1, a, b); \
  61. MKL_DC_ADD(temp, temp, temp1); \
  62. } \
  63. MKL_DC_MUL(temp, alpha, temp); \
  64. c = C[i + j * ldc]; \
  65. c_op(c, beta); \
  66. MKL_DC_ADD(C[i + j * ldc], c, temp); \
  67. } \
  68. } while (0)
  69. #define mkl_dc_gemm_xx_mnk_pst(m, n, k, alpha, A, lda, B, ldb, beta, C, ldc, \
  70. a_op, b_op, a_access, b_access) \
  71. do { \
  72. if (MKL_DC_IS_ZERO(beta)) \
  73. mkl_dc_gemm_xx_mnk_pst_beta(m, n, k, alpha, A, lda, B, ldb, beta, C, ldc, \
  74. a_op, b_op, MKL_DC_ZERO_C, a_access, b_access); \
  75. else \
  76. mkl_dc_gemm_xx_mnk_pst_beta(m, n, k, alpha, A, lda, B, ldb, beta, C, ldc, \
  77. a_op, b_op, MKL_DC_MUL_C, a_access, b_access); \
  78. } while (0)
  79. #ifdef __AVX2__
  80. #ifdef MKL_DOUBLE
  81. #define MKL_DC_MUL_ADD_YMM(ymm_a, ymm_b, ymm_c, ymm_tmp) \
  82. ymm_c = _mm256_fmadd_pd(ymm_a, ymm_b, ymm_c);
  83. #define MKL_DC_MUL_ADD_XMM(xmm_a, xmm_b, xmm_c, xmm_tmp) \
  84. xmm_c = _mm_fmadd_pd(xmm_a, xmm_b, xmm_c);
  85. #define MKL_DC_MUL_ADD_XMM_S(xmm_a, xmm_b, xmm_c, xmm_tmp) \
  86. xmm_c = _mm_fmadd_sd(xmm_a, xmm_b, xmm_c);
  87. #define MKL_DC_XOR_YMM _mm256_xor_pd
  88. #define MKL_DC_SETZERO_YMM _mm256_setzero_pd
  89. #define MKL_DC_BCAST_YMM _mm256_broadcast_sd
  90. #define MKL_DC_LOAD_YMM _mm256_loadu_pd
  91. #define MKL_DC_STORE_YMM _mm256_storeu_pd
  92. #define MKL_DC_ADD_YMM _mm256_add_pd
  93. #define MKL_DC_MUL_YMM _mm256_mul_pd
  94. #define MKL_DC_MASKLOAD_YMM _mm256_maskload_pd
  95. #define MKL_DC_MASKSTORE_YMM _mm256_maskstore_pd
  96. #define MKL_DC_HADD_YMM _mm256_hadd_pd
  97. #define MKL_DC_PERM2F128_YMM _mm256_permute2f128_pd
  98. #define MKL_DC_UNPACKHI_YMM _mm256_unpackhi_pd
  99. #define MKL_DC_UNPACKLO_YMM _mm256_unpacklo_pd
  100. #define MKL_DC_CAST_YMM_TO_XMM _mm256_castpd256_pd128
  101. #define MKL_DC_CAST_XMM_TO_YMM _mm256_castpd128_pd256
  102. #define MKL_DC_XOR_XMM _mm_xor_pd
  103. #define MKL_DC_SETZERO_XMM _mm_setzero_pd
  104. #define MKL_DC_LOAD_XMM _mm_loadu_pd
  105. #define MKL_DC_STORE_XMM _mm_storeu_pd
  106. #define MKL_DC_ADD_XMM _mm_add_pd
  107. #define MKL_DC_MUL_XMM _mm_mul_pd
  108. #define MKL_DC_LOAD_XMM_S _mm_load_sd
  109. #define MKL_DC_STORE_XMM_S _mm_store_sd
  110. #define MKL_DC_LOADDUP_XMM _mm_loaddup_pd
  111. #define MKL_DC_ADD_XMM_S _mm_add_sd
  112. #define MKL_DC_MUL_XMM_S _mm_mul_sd
  113. #define MKL_DC_UNPACKHI_XMM _mm_unpackhi_pd
  114. #define MKL_DC_UNPACKLO_XMM _mm_unpacklo_pd
  115. #define MKL_DC_VEC_TRANSPOSE_YMM(x1, x2, x3, x4, tmp1, tmp2, tmp3, tmp4) \
  116. do { \
  117. tmp1 = _mm256_unpacklo_pd(x1, x2); \
  118. tmp2 = _mm256_unpackhi_pd(x1, x2); \
  119. tmp3 = _mm256_unpacklo_pd(x3, x4); \
  120. tmp4 = _mm256_unpackhi_pd(x3, x4); \
  121. x1 = _mm256_permute2f128_pd(tmp1, tmp3, 0x20); \
  122. x2 = _mm256_permute2f128_pd(tmp2, tmp4, 0x20); \
  123. x3 = _mm256_permute2f128_pd(tmp1, tmp3, 0x31); \
  124. x4 = _mm256_permute2f128_pd(tmp2, tmp4, 0x31); \
  125. } while (0)
  126. #define MKL_DC_VEC_TRANSPOSE_XMM(out1, out2, in1, in2) \
  127. do { \
  128. out1 = _mm_unpacklo_pd(in1, in2); \
  129. out2 = _mm_unpackhi_pd(in1, in2); \
  130. } while (0)
  131. typedef __m128d MKL_DC_XMMTYPE;
  132. typedef __m256d MKL_DC_YMMTYPE;
  133. #endif
  134. #endif
  135. #define MKL_DC_DGEMM_KERNELS(kernel_type, arch) \
  136. do { \
  137. if (AisN && BisN) { \
  138. mkl_dc_dgemm_nn_mnk_ ## kernel_type ## _ ## arch ## _pst(m, n, k, ALPHA, A, lda, B, ldb, BETA, C, ldc); \
  139. } else if (AisN && !BisN) { \
  140. mkl_dc_dgemm_nt_mnk_ ## kernel_type ## _ ## arch ## _pst(m, n, k, ALPHA, A, lda, B, ldb, BETA, C, ldc); \
  141. } else if (!AisN && BisN) { \
  142. mkl_dc_dgemm_tn_mnk_ ## kernel_type ## _ ## arch ## _pst(m, n, k, ALPHA, A, lda, B, ldb, BETA, C, ldc); \
  143. } else { \
  144. mkl_dc_dgemm_tt_mnk_ ## kernel_type ## _ ## arch ## _pst(m, n, k, ALPHA, A, lda, B, ldb, BETA, C, ldc); \
  145. } \
  146. } while (0)
  147. #include "mkl_direct_blas_kernels.h"
  148. #define MKL_DC_ALPHA_ONE
  149. #include "mkl_direct_blas_kernels.h"
  150. #undef MKL_DC_ALPHA_ONE
  151. #define MKL_DC_BETA_ZERO
  152. #include "mkl_direct_blas_kernels.h"
  153. #undef MKL_DC_BETA_ZERO
  154. #define MKL_DC_BETA_ONE
  155. #include "mkl_direct_blas_kernels.h"
  156. #undef MKL_DC_BETA_ONE
  157. #define MKL_DC_BETA_ONE
  158. #define MKL_DC_ALPHA_ONE
  159. #include "mkl_direct_blas_kernels.h"
  160. #undef MKL_DC_BETA_ONE
  161. #undef MKL_DC_ALPHA_ONE
  162. #define MKL_DC_BETA_ZERO
  163. #define MKL_DC_ALPHA_ONE
  164. #include "mkl_direct_blas_kernels.h"
  165. #undef MKL_DC_BETA_ZERO
  166. #undef MKL_DC_ALPHA_ONE
  167. static __inline void mkl_dc_gemm(const char * TRANSA, const char * TRANSB,
  168. const MKL_INT * M, const MKL_INT * N, const MKL_INT * K,
  169. const mkl_dc_type * ALPHA,
  170. const mkl_dc_type * A, const MKL_INT * LDA,
  171. const mkl_dc_type * B, const MKL_INT * LDB,
  172. const mkl_dc_type * BETA,
  173. mkl_dc_type * C, const MKL_INT * LDC)
  174. {
  175. int AisN, BisN;
  176. #ifndef MKL_REAL_DATA_TYPE
  177. int AisT, AisC;
  178. int BisT, BisC;
  179. #endif
  180. mkl_dc_type alpha = *ALPHA, beta = *BETA;
  181. MKL_INT m = *M, n = *N, k = *K;
  182. MKL_INT lda = *LDA, ldb = *LDB, ldc = *LDC;
  183. if (m <= 0 || n <= 0 || ((MKL_DC_IS_ZERO(alpha) || k <= 0) && MKL_DC_IS_ONE(beta)))
  184. return;
  185. AisN = MKL_DC_MisN(*TRANSA);
  186. BisN = MKL_DC_MisN(*TRANSB);
  187. #ifndef MKL_REAL_DATA_TYPE
  188. AisT = MKL_DC_MisT(*TRANSA);
  189. BisT = MKL_DC_MisT(*TRANSB);
  190. AisC = !(AisN || AisT);
  191. BisC = !(BisN || BisT);
  192. #endif
  193. if (MKL_DC_IS_ZERO(alpha)) {
  194. MKL_INT i, j;
  195. if (MKL_DC_IS_ZERO(beta))
  196. for (j = 0; j < n; j++)
  197. #pragma vector
  198. for (i = 0; i < m; i++)
  199. MKL_DC_SET_ZERO(C[i + ldc * j]);
  200. else
  201. for (j = 0; j < n; j++)
  202. #pragma vector
  203. for (i = 0; i < m; i++)
  204. MKL_DC_MUL(C[i + ldc * j], beta, C[i + ldc * j]);
  205. return;
  206. }
  207. #if defined(MKL_DOUBLE) && defined(__AVX2__)
  208. if (MKL_DC_IS_ONE(alpha) && MKL_DC_IS_ONE(beta)) {
  209. MKL_DC_DGEMM_KERNELS(a1b1, avx2);
  210. } else if (MKL_DC_IS_ONE(alpha) && MKL_DC_IS_ZERO(beta)) {
  211. MKL_DC_DGEMM_KERNELS(a1b0, avx2);
  212. } else if (MKL_DC_IS_ZERO(beta)) {
  213. MKL_DC_DGEMM_KERNELS(axb0, avx2);
  214. } else if (MKL_DC_IS_ONE(beta)) {
  215. MKL_DC_DGEMM_KERNELS(axb1, avx2);
  216. } else if (MKL_DC_IS_ONE(alpha)) {
  217. MKL_DC_DGEMM_KERNELS(a1bx, avx2);
  218. } else {
  219. MKL_DC_DGEMM_KERNELS(axbx, avx2);
  220. }
  221. #else
  222. if (AisN && BisN)
  223. mkl_dc_gemm_xx_mnk_pst(m, n, k, alpha, A, lda, B, ldb, beta, C, ldc, MKL_DC_MOV, MKL_DC_MOV, MKL_DC_MN, MKL_DC_MN);
  224. #ifndef MKL_REAL_DATA_TYPE
  225. else if (AisC && BisN)
  226. mkl_dc_gemm_xx_mnk_pst(m, n, k, alpha, A, lda, B, ldb, beta, C, ldc, MKL_DC_CONJ, MKL_DC_MOV, MKL_DC_MT, MKL_DC_MN);
  227. else if (AisC && BisT)
  228. mkl_dc_gemm_xx_mnk_pst(m, n, k, alpha, A, lda, B, ldb, beta, C, ldc, MKL_DC_CONJ, MKL_DC_MOV, MKL_DC_MT, MKL_DC_MT);
  229. else if (AisN && BisC)
  230. mkl_dc_gemm_xx_mnk_pst(m, n, k, alpha, A, lda, B, ldb, beta, C, ldc, MKL_DC_MOV, MKL_DC_CONJ, MKL_DC_MN, MKL_DC_MT);
  231. else if (AisT && BisC)
  232. mkl_dc_gemm_xx_mnk_pst(m, n, k, alpha, A, lda, B, ldb, beta, C, ldc, MKL_DC_MOV, MKL_DC_CONJ, MKL_DC_MT, MKL_DC_MT);
  233. else if (AisC && BisC)
  234. mkl_dc_gemm_xx_mnk_pst(m, n, k, alpha, A, lda, B, ldb, beta, C, ldc, MKL_DC_CONJ, MKL_DC_CONJ, MKL_DC_MT, MKL_DC_MT);
  235. #endif
  236. else if (AisN && !BisN)
  237. mkl_dc_gemm_xx_mnk_pst(m, n, k, alpha, A, lda, B, ldb, beta, C, ldc, MKL_DC_MOV, MKL_DC_MOV, MKL_DC_MN, MKL_DC_MT);
  238. else if (!AisN && BisN)
  239. mkl_dc_gemm_xx_mnk_pst(m, n, k, alpha, A, lda, B, ldb, beta, C, ldc, MKL_DC_MOV, MKL_DC_MOV, MKL_DC_MT, MKL_DC_MN);
  240. else
  241. mkl_dc_gemm_xx_mnk_pst(m, n, k, alpha, A, lda, B, ldb, beta, C, ldc, MKL_DC_MOV, MKL_DC_MOV, MKL_DC_MT, MKL_DC_MT);
  242. #endif
  243. }
  244. /* ?TRSM */
  245. #define mkl_dc_trsm_lxnx_mn_pst(uplo, m, n, alpha, A, lda, B, ldb, diag_op ) \
  246. do { \
  247. MKL_INT i, j, k; \
  248. if ( MKL_DC_MisU(uplo) ) { \
  249. for (j = 0; j < n; j++) { \
  250. if ( !(MKL_DC_IS_ONE(alpha)) ) { \
  251. for(i = 0; i < m; i++) { \
  252. MKL_DC_MUL_C( B[i+j*ldb], alpha ); \
  253. } \
  254. } \
  255. for (k = m-1; k >= 0; k--) { \
  256. diag_op( B[k + j * ldb], A[k + k * lda] ); \
  257. MKL_DC_PRAGMA_VECTOR \
  258. for ( i = 0; i <= k-1; i++ ) { \
  259. mkl_dc_type a, temp1; \
  260. a = A[i + k * lda]; \
  261. MKL_DC_MUL(temp1, B[k + j * ldb], a); \
  262. MKL_DC_SUB(B[i + j * ldb], B[i + j * ldb], temp1); \
  263. } \
  264. } \
  265. } \
  266. } else { \
  267. for (j = 0; j < n; j++) { \
  268. if ( !(MKL_DC_IS_ONE(alpha)) ) { \
  269. for(i = 0; i < m; i++) { \
  270. MKL_DC_MUL_C( B[i+j*ldb], alpha ); \
  271. } \
  272. } \
  273. for (k = 0; k < m; k++) { \
  274. diag_op( B[k + j * ldb], A[k + k * lda] ); \
  275. MKL_DC_PRAGMA_VECTOR \
  276. for ( i = k+1; i < m; i++ ) { \
  277. mkl_dc_type a, temp1; \
  278. a = A[i + k * lda]; \
  279. MKL_DC_MUL(temp1, B[k + j * ldb], a); \
  280. MKL_DC_SUB(B[i + j * ldb], B[i + j * ldb], temp1); \
  281. } \
  282. } \
  283. } \
  284. } \
  285. } while (0)
  286. #define mkl_dc_trsm_lxtx_mn_pst(uplo, m, n, alpha, A, lda, B, ldb, diag_op ) \
  287. do { \
  288. MKL_INT i, j, k; \
  289. if ( MKL_DC_MisU(uplo) ) { \
  290. for (j = 0; j < n; j++) { \
  291. mkl_dc_type temp; \
  292. for(i = 0; i < m; i++) { \
  293. MKL_DC_MUL(temp, alpha, B[i+j*ldb]); \
  294. MKL_DC_PRAGMA_VECTOR \
  295. for (k = 0; k <= i-1; k++) { \
  296. mkl_dc_type a, temp1; \
  297. a = A[k + i * lda]; \
  298. MKL_DC_MUL(temp1, B[k + j * ldb], a); \
  299. MKL_DC_SUB(temp, temp, temp1); \
  300. } \
  301. diag_op( temp, A[i + i * lda] ); \
  302. MKL_DC_MOV(B[i+j*ldb], temp); \
  303. } \
  304. } \
  305. } else { \
  306. for (j = 0; j < n; j++) { \
  307. mkl_dc_type temp; \
  308. for(i = m-1; i >= 0; i--) { \
  309. MKL_DC_MUL(temp, alpha, B[i+j*ldb]); \
  310. MKL_DC_PRAGMA_VECTOR \
  311. for (k = i+1; k < m; k++) { \
  312. mkl_dc_type a, temp1; \
  313. a = A[k + i * lda]; \
  314. MKL_DC_MUL(temp1, B[k + j * ldb], a); \
  315. MKL_DC_SUB(temp, temp, temp1); \
  316. } \
  317. diag_op( temp, A[i + i * lda] ); \
  318. MKL_DC_MOV(B[i+j*ldb], temp); \
  319. } \
  320. } \
  321. } \
  322. } while (0)
  323. #define mkl_dc_trsm_lxcx_mn_pst(uplo, m, n, alpha, A, lda, B, ldb, diag_op ) \
  324. do { \
  325. MKL_INT i, j, k; \
  326. if ( MKL_DC_MisU(uplo) ) { \
  327. for (j = 0; j < n; j++) { \
  328. mkl_dc_type temp; \
  329. for(i = 0; i < m; i++) { \
  330. mkl_dc_type a; \
  331. MKL_DC_MUL(temp, alpha, B[i+j*ldb]); \
  332. MKL_DC_PRAGMA_VECTOR \
  333. for (k = 0; k <= i-1; k++) { \
  334. mkl_dc_type a1, temp1; \
  335. a1 = A[k + i * lda]; \
  336. MKL_DC_CONJ(a1, a1); \
  337. MKL_DC_MUL(temp1, B[k + j * ldb], a1); \
  338. MKL_DC_SUB(temp, temp, temp1); \
  339. } \
  340. a = A[i + i * lda]; \
  341. MKL_DC_CONJ(a, a); \
  342. diag_op( temp, a ); \
  343. MKL_DC_MOV(B[i+j*ldb], temp); \
  344. } \
  345. } \
  346. } else { \
  347. for (j = 0; j < n; j++) { \
  348. mkl_dc_type temp; \
  349. for(i = m-1; i >= 0; i--) { \
  350. mkl_dc_type a; \
  351. MKL_DC_MUL(temp, alpha, B[i+j*ldb]); \
  352. MKL_DC_PRAGMA_VECTOR \
  353. for (k = i+1; k < m; k++) { \
  354. mkl_dc_type a1, temp1; \
  355. a1 = A[k + i * lda]; \
  356. MKL_DC_CONJ(a1, a1); \
  357. MKL_DC_MUL(temp1, B[k + j * ldb], a1); \
  358. MKL_DC_SUB(temp, temp, temp1); \
  359. } \
  360. a = A[i + i * lda]; \
  361. MKL_DC_CONJ(a, a); \
  362. diag_op( temp, a ); \
  363. MKL_DC_MOV(B[i+j*ldb], temp); \
  364. } \
  365. } \
  366. } \
  367. } while (0)
  368. #define mkl_dc_trsm_rxnn_mn_pst(uplo, m, n, alpha, A, lda, B, ldb ) \
  369. do { \
  370. MKL_INT i, j, k; \
  371. if ( MKL_DC_MisU(uplo) ) { \
  372. for (j = 0; j < n; j++) { \
  373. mkl_dc_type temp, one; \
  374. if ( !(MKL_DC_IS_ONE(alpha)) ) { \
  375. for(i = 0; i < m; i++) { \
  376. MKL_DC_MUL_C( B[i+j*ldb], alpha ); \
  377. } \
  378. } \
  379. for (k = 0; k <= j-1; k++) { \
  380. MKL_DC_PRAGMA_VECTOR \
  381. for ( i = 0; i < m; i++ ) { \
  382. mkl_dc_type a, temp1; \
  383. a = A[k + j * lda]; \
  384. MKL_DC_MUL(temp1, B[i + k * ldb], a); \
  385. MKL_DC_SUB(B[i + j * ldb], B[i + j * ldb], temp1); \
  386. } \
  387. } \
  388. MKL_DC_SET_ONE(one); \
  389. MKL_DC_DIV(temp, one, A[j + j * lda]); \
  390. for ( i = 0; i < m; i++ ) { \
  391. MKL_DC_MUL(B[i + j * ldb], temp, B[i + j * ldb]); \
  392. } \
  393. } \
  394. } else { \
  395. for (j = n-1; j >= 0; j--) { \
  396. mkl_dc_type temp, one; \
  397. if ( !(MKL_DC_IS_ONE(alpha)) ) { \
  398. for(i = 0; i < m; i++) { \
  399. MKL_DC_MUL_C( B[i+j*ldb], alpha ); \
  400. } \
  401. } \
  402. for (k = j+1; k < n; k++) { \
  403. MKL_DC_PRAGMA_VECTOR \
  404. for ( i = 0; i < m; i++ ) { \
  405. mkl_dc_type a, temp1; \
  406. a = A[k + j * lda]; \
  407. MKL_DC_MUL(temp1, B[i + k * ldb], a); \
  408. MKL_DC_SUB(B[i + j * ldb], B[i + j * ldb], temp1); \
  409. } \
  410. } \
  411. MKL_DC_SET_ONE(one); \
  412. MKL_DC_DIV(temp, one, A[j + j * lda]); \
  413. for ( i = 0; i < m; i++ ) { \
  414. MKL_DC_MUL(B[i + j * ldb], temp, B[i + j * ldb]); \
  415. } \
  416. } \
  417. } \
  418. } while (0)
  419. #define mkl_dc_trsm_rxnu_mn_pst(uplo, m, n, alpha, A, lda, B, ldb ) \
  420. do { \
  421. MKL_INT i, j, k; \
  422. if ( MKL_DC_MisU(uplo) ) { \
  423. for (j = 0; j < n; j++) { \
  424. if ( !(MKL_DC_IS_ONE(alpha)) ) { \
  425. for(i = 0; i < m; i++) { \
  426. MKL_DC_MUL_C( B[i+j*ldb], alpha ); \
  427. } \
  428. } \
  429. for (k = 0; k <= j-1; k++) { \
  430. MKL_DC_PRAGMA_VECTOR \
  431. for ( i = 0; i < m; i++ ) { \
  432. mkl_dc_type a, temp1; \
  433. a = A[k + j * lda]; \
  434. MKL_DC_MUL(temp1, B[i + k * ldb], a); \
  435. MKL_DC_SUB(B[i + j * ldb], B[i + j * ldb], temp1); \
  436. } \
  437. } \
  438. } \
  439. } else { \
  440. for (j = n-1; j >= 0; j--) { \
  441. if ( !(MKL_DC_IS_ONE(alpha)) ) { \
  442. for(i = 0; i < m; i++) { \
  443. MKL_DC_MUL_C( B[i+j*ldb], alpha ); \
  444. } \
  445. } \
  446. for (k = j+1; k < n; k++) { \
  447. MKL_DC_PRAGMA_VECTOR \
  448. for ( i = 0; i < m; i++ ) { \
  449. mkl_dc_type a, temp1; \
  450. a = A[k + j * lda]; \
  451. MKL_DC_MUL(temp1, B[i + k * ldb], a); \
  452. MKL_DC_SUB(B[i + j * ldb], B[i + j * ldb], temp1); \
  453. } \
  454. } \
  455. } \
  456. } \
  457. } while (0)
  458. #define mkl_dc_trsm_rxtn_mn_pst(uplo, m, n, alpha, A, lda, B, ldb) \
  459. do { \
  460. MKL_INT i, j, k; \
  461. if ( MKL_DC_MisU(uplo) ) { \
  462. for (k = n-1; k >= 0; k--) { \
  463. mkl_dc_type temp, one; \
  464. MKL_DC_SET_ONE(one); \
  465. MKL_DC_DIV(temp, one, A[k + k * lda]); \
  466. for (i = 0; i < m; i++) { \
  467. MKL_DC_MUL(B[i + k*ldb], temp, B[i + k*ldb]); \
  468. } \
  469. for(j = 0; j <= k-1; j++) { \
  470. MKL_DC_MOV(temp, A[j+k*lda]); \
  471. MKL_DC_PRAGMA_VECTOR \
  472. for (i = 0; i < m; i++) { \
  473. mkl_dc_type b, temp1; \
  474. b = B[i + k * ldb]; \
  475. MKL_DC_MUL(temp1, temp, b); \
  476. MKL_DC_SUB(B[i + j * ldb], B[i + j * ldb], temp1); \
  477. } \
  478. } \
  479. if ( !(MKL_DC_IS_ONE(alpha)) ) { \
  480. for ( i = 0; i < m; i++ ) { \
  481. MKL_DC_MUL(B[i + k * ldb], alpha, B[i + k * ldb]); \
  482. } \
  483. } \
  484. } \
  485. } else { \
  486. for (k = 0; k < n; k++) { \
  487. mkl_dc_type temp, one; \
  488. MKL_DC_SET_ONE(one); \
  489. MKL_DC_DIV(temp, one, A[k + k * lda]); \
  490. for (i = 0; i < m; i++) { \
  491. MKL_DC_MUL(B[i + k*ldb], temp, B[i + k*ldb]); \
  492. } \
  493. for(j = k+1; j < n; j++) { \
  494. MKL_DC_MOV(temp, A[j+k*lda]); \
  495. MKL_DC_PRAGMA_VECTOR \
  496. for (i = 0; i < m; i++) { \
  497. mkl_dc_type b, temp1; \
  498. b = B[i + k * ldb]; \
  499. MKL_DC_MUL(temp1, temp, b); \
  500. MKL_DC_SUB(B[i + j * ldb], B[i + j * ldb], temp1); \
  501. } \
  502. } \
  503. if ( !(MKL_DC_IS_ONE(alpha)) ) { \
  504. for ( i = 0; i < m; i++ ) { \
  505. MKL_DC_MUL(B[i + k * ldb], alpha, B[i + k * ldb]); \
  506. } \
  507. } \
  508. } \
  509. } \
  510. } while (0)
  511. #define mkl_dc_trsm_rxcn_mn_pst(uplo, m, n, alpha, A, lda, B, ldb) \
  512. do { \
  513. MKL_INT i, j, k; \
  514. if ( MKL_DC_MisU(uplo) ) { \
  515. for (k = n-1; k >= 0; k--) { \
  516. mkl_dc_type temp, one, a; \
  517. MKL_DC_SET_ONE(one); \
  518. a = A[k + k * lda]; \
  519. MKL_DC_CONJ(a, a); \
  520. MKL_DC_DIV(temp, one, a); \
  521. for (i = 0; i < m; i++) { \
  522. MKL_DC_MUL(B[i + k*ldb], temp, B[i + k*ldb]); \
  523. } \
  524. for(j = 0; j <= k-1; j++) { \
  525. MKL_DC_MOV(temp, A[j+k*lda]); \
  526. MKL_DC_CONJ(temp, temp); \
  527. MKL_DC_PRAGMA_VECTOR \
  528. for (i = 0; i < m; i++) { \
  529. mkl_dc_type b, temp1; \
  530. b = B[i + k * ldb]; \
  531. MKL_DC_MUL(temp1, temp, b); \
  532. MKL_DC_SUB(B[i + j * ldb], B[i + j * ldb], temp1); \
  533. } \
  534. } \
  535. if ( !(MKL_DC_IS_ONE(alpha)) ) { \
  536. for ( i = 0; i < m; i++ ) { \
  537. MKL_DC_MUL(B[i + k * ldb], alpha, B[i + k * ldb]); \
  538. } \
  539. } \
  540. } \
  541. } else { \
  542. for (k = 0; k < n; k++) { \
  543. mkl_dc_type temp, one, a; \
  544. MKL_DC_SET_ONE(one); \
  545. a = A[k + k * lda]; \
  546. MKL_DC_CONJ(a, a); \
  547. MKL_DC_DIV(temp, one, a); \
  548. for (i = 0; i < m; i++) { \
  549. MKL_DC_MUL(B[i + k*ldb], temp, B[i + k*ldb]); \
  550. } \
  551. for(j = k+1; j < n; j++) { \
  552. MKL_DC_MOV(temp, A[j+k*lda]); \
  553. MKL_DC_CONJ(temp, temp); \
  554. MKL_DC_PRAGMA_VECTOR \
  555. for (i = 0; i < m; i++) { \
  556. mkl_dc_type b, temp1; \
  557. b = B[i + k * ldb]; \
  558. MKL_DC_MUL(temp1, temp, b); \
  559. MKL_DC_SUB(B[i + j * ldb], B[i + j * ldb], temp1); \
  560. } \
  561. } \
  562. if ( !(MKL_DC_IS_ONE(alpha)) ) { \
  563. for ( i = 0; i < m; i++ ) { \
  564. MKL_DC_MUL(B[i + k * ldb], alpha, B[i + k * ldb]); \
  565. } \
  566. } \
  567. } \
  568. } \
  569. } while (0)
  570. #define mkl_dc_trsm_rxtu_mn_pst(uplo, m, n, alpha, A, lda, B, ldb) \
  571. do { \
  572. MKL_INT i, j, k; \
  573. if ( MKL_DC_MisU(uplo) ) { \
  574. for (k = n-1; k >= 0; k--) { \
  575. for(j = 0; j <= k-1; j++) { \
  576. mkl_dc_type temp; \
  577. MKL_DC_MOV(temp, A[j+k*lda]); \
  578. MKL_DC_PRAGMA_VECTOR \
  579. for (i = 0; i < m; i++) { \
  580. mkl_dc_type b, temp1; \
  581. b = B[i + k * ldb]; \
  582. MKL_DC_MUL(temp1, temp, b); \
  583. MKL_DC_SUB(B[i + j * ldb], B[i + j * ldb], temp1); \
  584. } \
  585. } \
  586. if ( !(MKL_DC_IS_ONE(alpha)) ) { \
  587. for ( i = 0; i < m; i++ ) { \
  588. MKL_DC_MUL(B[i + k * ldb], alpha, B[i + k * ldb]); \
  589. } \
  590. } \
  591. } \
  592. } else { \
  593. for (k = 0; k < n; k++) { \
  594. for(j = k+1; j < n; j++) { \
  595. mkl_dc_type temp; \
  596. MKL_DC_MOV(temp, A[j+k*lda]); \
  597. MKL_DC_PRAGMA_VECTOR \
  598. for (i = 0; i < m; i++) { \
  599. mkl_dc_type b, temp1; \
  600. b = B[i + k * ldb]; \
  601. MKL_DC_MUL(temp1, temp, b); \
  602. MKL_DC_SUB(B[i + j * ldb], B[i + j * ldb], temp1); \
  603. } \
  604. } \
  605. if ( !(MKL_DC_IS_ONE(alpha)) ) { \
  606. for ( i = 0; i < m; i++ ) { \
  607. MKL_DC_MUL(B[i + k * ldb], alpha, B[i + k * ldb]); \
  608. } \
  609. } \
  610. } \
  611. } \
  612. } while (0)
  613. #define mkl_dc_trsm_rxcu_mn_pst(uplo, m, n, alpha, A, lda, B, ldb) \
  614. do { \
  615. MKL_INT i, j, k; \
  616. if ( MKL_DC_MisU(uplo) ) { \
  617. for (k = n-1; k >= 0; k--) { \
  618. for(j = 0; j <= k-1; j++) { \
  619. mkl_dc_type temp; \
  620. temp = A[j+k*lda]; \
  621. MKL_DC_CONJ(temp, temp); \
  622. MKL_DC_PRAGMA_VECTOR \
  623. for (i = 0; i < m; i++) { \
  624. mkl_dc_type b, temp1; \
  625. b = B[i + k * ldb]; \
  626. MKL_DC_MUL(temp1, temp, b); \
  627. MKL_DC_SUB(B[i + j * ldb], B[i + j * ldb], temp1); \
  628. } \
  629. } \
  630. if ( !(MKL_DC_IS_ONE(alpha)) ) { \
  631. for ( i = 0; i < m; i++ ) { \
  632. MKL_DC_MUL(B[i + k * ldb], alpha, B[i + k * ldb]); \
  633. } \
  634. } \
  635. } \
  636. } else { \
  637. for (k = 0; k < n; k++) { \
  638. for(j = k+1; j < n; j++) { \
  639. mkl_dc_type temp; \
  640. temp = A[j+k*lda]; \
  641. MKL_DC_CONJ(temp, temp); \
  642. MKL_DC_PRAGMA_VECTOR \
  643. for (i = 0; i < m; i++) { \
  644. mkl_dc_type b, temp1; \
  645. b = B[i + k * ldb]; \
  646. MKL_DC_MUL(temp1, temp, b); \
  647. MKL_DC_SUB(B[i + j * ldb], B[i + j * ldb], temp1); \
  648. } \
  649. } \
  650. if ( !(MKL_DC_IS_ONE(alpha)) ) { \
  651. for ( i = 0; i < m; i++ ) { \
  652. MKL_DC_MUL(B[i + k * ldb], alpha, B[i + k * ldb]); \
  653. } \
  654. } \
  655. } \
  656. } \
  657. } while (0)
  658. static __inline void mkl_dc_trsm(const char * SIDE, const char * UPLO,
  659. const char * TRANSA, const char * DIAG,
  660. const MKL_INT * M, const MKL_INT * N,
  661. const mkl_dc_type * ALPHA,
  662. const mkl_dc_type * A, const MKL_INT * LDA,
  663. mkl_dc_type * B, const MKL_INT * LDB)
  664. {
  665. int AisN;
  666. int lside, unit;
  667. #ifndef MKL_REAL_DATA_TYPE
  668. int noconj;
  669. #endif
  670. mkl_dc_type alpha = *ALPHA;
  671. MKL_INT m = *M, n = *N;
  672. MKL_INT lda = *LDA, ldb = *LDB;
  673. char uplo = *UPLO;
  674. if (m <= 0 || n <= 0 )
  675. return;
  676. AisN = MKL_DC_MisN(*TRANSA);
  677. lside = MKL_DC_MisL(*SIDE);
  678. unit = MKL_DC_MisU(*DIAG);
  679. #ifndef MKL_REAL_DATA_TYPE
  680. noconj = MKL_DC_MisT(*TRANSA);
  681. #endif
  682. if (MKL_DC_IS_ZERO(alpha)) {
  683. MKL_INT i, j;
  684. for (j = 0; j < n; j++)
  685. #pragma vector
  686. for (i = 0; i < m; i++)
  687. MKL_DC_SET_ZERO(B[i + ldb * j]);
  688. return;
  689. }
  690. if (lside)
  691. if (AisN)
  692. if (unit)
  693. mkl_dc_trsm_lxnx_mn_pst(uplo, m, n, alpha, A, lda, B, ldb, MKL_DC_NOOP);
  694. else
  695. mkl_dc_trsm_lxnx_mn_pst(uplo, m, n, alpha, A, lda, B, ldb, MKL_DC_DIV_B);
  696. else
  697. #ifndef MKL_REAL_DATA_TYPE
  698. if (!noconj)
  699. if (unit)
  700. mkl_dc_trsm_lxcx_mn_pst(uplo, m, n, alpha, A, lda, B, ldb, MKL_DC_NOOP);
  701. else
  702. mkl_dc_trsm_lxcx_mn_pst(uplo, m, n, alpha, A, lda, B, ldb, MKL_DC_DIV_B);
  703. else
  704. #endif
  705. if (unit)
  706. mkl_dc_trsm_lxtx_mn_pst(uplo, m, n, alpha, A, lda, B, ldb, MKL_DC_NOOP);
  707. else
  708. mkl_dc_trsm_lxtx_mn_pst(uplo, m, n, alpha, A, lda, B, ldb, MKL_DC_DIV_B);
  709. else
  710. if (AisN)
  711. if (unit)
  712. mkl_dc_trsm_rxnu_mn_pst(uplo, m, n, alpha, A, lda, B, ldb);
  713. else
  714. mkl_dc_trsm_rxnn_mn_pst(uplo, m, n, alpha, A, lda, B, ldb);
  715. else
  716. #ifndef MKL_REAL_DATA_TYPE
  717. if (!noconj)
  718. if (unit)
  719. mkl_dc_trsm_rxcu_mn_pst(uplo, m, n, alpha, A, lda, B, ldb);
  720. else
  721. mkl_dc_trsm_rxcn_mn_pst(uplo, m, n, alpha, A, lda, B, ldb);
  722. else
  723. #endif
  724. if (unit)
  725. mkl_dc_trsm_rxtu_mn_pst(uplo, m, n, alpha, A, lda, B, ldb);
  726. else
  727. mkl_dc_trsm_rxtn_mn_pst(uplo, m, n, alpha, A, lda, B, ldb);
  728. }
  729. /* ?SYRK */
  730. #define mkl_dc_syrk_xx_nk_pst_beta(uplo, n, k, alpha, A, lda, beta, C, ldc, \
  731. c_op, a_access) \
  732. do { \
  733. MKL_INT i, j, l; \
  734. if (MKL_DC_MisU(uplo)) { \
  735. for (j = 0; j < n; j++) \
  736. MKL_DC_PRAGMA_VECTOR \
  737. for (i = 0; i <= j; i++) { \
  738. mkl_dc_type c, temp; MKL_DC_SET_ZERO(temp); \
  739. for (l = 0; l < k; l++) { \
  740. mkl_dc_type a, b, temp1; \
  741. a = a_access(A, lda, j, l); \
  742. b = a_access(A, lda, i, l); \
  743. MKL_DC_MUL(temp1, a, b); \
  744. MKL_DC_ADD(temp, temp, temp1); \
  745. } \
  746. MKL_DC_MUL(temp, alpha, temp); \
  747. c = C[i + j * ldc]; \
  748. c_op(c, beta); \
  749. MKL_DC_ADD(C[i + j * ldc], c, temp); \
  750. } \
  751. } else { \
  752. for (j = 0; j < n; j++) \
  753. MKL_DC_PRAGMA_VECTOR \
  754. for (i = j; i < n; i++) { \
  755. mkl_dc_type c, temp; MKL_DC_SET_ZERO(temp); \
  756. for (l = 0; l < k; l++) { \
  757. mkl_dc_type a, b, temp1; \
  758. a = a_access(A, lda, j, l); \
  759. b = a_access(A, lda, i, l); \
  760. MKL_DC_MUL(temp1, a, b); \
  761. MKL_DC_ADD(temp, temp, temp1); \
  762. } \
  763. MKL_DC_MUL(temp, alpha, temp); \
  764. c = C[i + j * ldc]; \
  765. c_op(c, beta); \
  766. MKL_DC_ADD(C[i + j * ldc], c, temp); \
  767. } \
  768. } \
  769. } while (0)
  770. #define mkl_dc_syrk_xx_nk_pst(uplo, n, k, alpha, A, lda, beta, C, ldc, \
  771. a_access) \
  772. do { \
  773. if (MKL_DC_IS_ZERO(beta)) \
  774. mkl_dc_syrk_xx_nk_pst_beta(uplo, n, k, alpha, A, lda, beta, C, ldc, \
  775. MKL_DC_ZERO_C, a_access); \
  776. else \
  777. mkl_dc_syrk_xx_nk_pst_beta(uplo, n, k, alpha, A, lda, beta, C, ldc, \
  778. MKL_DC_MUL_C, a_access); \
  779. } while (0)
  780. static __inline void mkl_dc_syrk(const char * UPLO, const char * TRANS,
  781. const MKL_INT * N, const MKL_INT * K,
  782. const mkl_dc_type * ALPHA,
  783. const mkl_dc_type * A, const MKL_INT * LDA,
  784. const mkl_dc_type * BETA,
  785. mkl_dc_type * C, const MKL_INT * LDC)
  786. {
  787. int AisN, CisU;
  788. mkl_dc_type alpha = *ALPHA, beta = *BETA;
  789. MKL_INT n = *N, k = *K;
  790. MKL_INT lda = *LDA, ldc = *LDC;
  791. char uplo = *UPLO;
  792. if (n <= 0 || ((MKL_DC_IS_ZERO(alpha) || k <= 0) && MKL_DC_IS_ONE(beta)))
  793. return;
  794. AisN = MKL_DC_MisN(*TRANS);
  795. CisU = MKL_DC_MisU(*UPLO);
  796. if (MKL_DC_IS_ZERO(alpha)) {
  797. MKL_INT i, j;
  798. if (MKL_DC_IS_ZERO(beta))
  799. if (CisU)
  800. for (j = 0; j < n; j++)
  801. #pragma vector
  802. for (i = 0; i <= j; i++)
  803. MKL_DC_SET_ZERO(C[i + ldc * j]);
  804. else
  805. for (j = 0; j < n; j++)
  806. #pragma vector
  807. for (i = j; i < n; i++)
  808. MKL_DC_SET_ZERO(C[i + ldc * j]);
  809. else
  810. if (CisU)
  811. for (j = 0; j < n; j++)
  812. #pragma vector
  813. for (i = 0; i <= j; i++)
  814. MKL_DC_MUL(C[i + ldc * j], beta, C[i + ldc * j]);
  815. else
  816. for (j = 0; j < n; j++)
  817. #pragma vector
  818. for (i = j; i < n; i++)
  819. MKL_DC_MUL(C[i + ldc * j], beta, C[i + ldc * j]);
  820. return;
  821. }
  822. if (AisN)
  823. mkl_dc_syrk_xx_nk_pst(uplo, n, k, alpha, A, lda, beta, C, ldc, MKL_DC_MN);
  824. else
  825. mkl_dc_syrk_xx_nk_pst(uplo, n, k, alpha, A, lda, beta, C, ldc, MKL_DC_MT);
  826. }
  827. static __inline void mkl_dc_axpy (const MKL_INT *N, const mkl_dc_type *ALPHA, const mkl_dc_type *x, const MKL_INT *INCX, mkl_dc_type *y, const MKL_INT *INCY)
  828. {
  829. MKL_INT i;
  830. MKL_INT n = *N, ix = 0, iy = 0;
  831. if (*INCX == 1 && *INCY == 1) {
  832. #pragma vector vecremainder
  833. for(i = 0; i < n; i++) MKL_DC_MUL_ADD(y[i], (*ALPHA), x[i], y[i]);
  834. } else {
  835. if (*INCX < 0) ix = (-(n) + 1) * *INCX;
  836. if (*INCY < 0) iy = (-(n) + 1) * *INCY;
  837. for(i = 0; i < n; i++, ix += *INCX, iy += *INCY) MKL_DC_MUL_ADD(y[iy], (*ALPHA), x[ix], y[iy]);
  838. }
  839. }
  840. #if defined(MKL_DOUBLE) || defined(MKL_SINGLE)
  841. static __inline mkl_dc_type mkl_dc_dot (const MKL_INT* N, const mkl_dc_type *x,
  842. const MKL_INT* INCX, const mkl_dc_type *y, const MKL_INT* INCY)
  843. {
  844. MKL_INT n = *N, ix = 0, iy = 0;
  845. mkl_dc_type ret = 0.0;
  846. MKL_INT i;
  847. if (*INCX == 1 && *INCY == 1)
  848. #pragma vector vecremainder
  849. for(i = 0; i < n; i++) ret += y[i] * x[i];
  850. else {
  851. if (*INCX < 0) ix = (-(n) + 1) * *INCX;
  852. if (*INCY < 0) iy = (-(n) + 1) * *INCY;
  853. for(i = 0; i < n; i++, ix += *INCX, iy += *INCY) ret += y[iy] * x[ix];
  854. }
  855. return ret;
  856. }
  857. #endif
  858. #undef MKL_DC_DOT_CONVERT
  859. #undef mkl_dc_gemm_xx_mnk_pst_beta
  860. #undef mkl_dc_gemm_xx_mnk_pst
  861. #undef mkl_dc_syrk_xx_nk_pst_beta
  862. #undef mkl_dc_syrk_xx_nk_pst
  863. #undef mkl_dc_trsm_lxnx_mn_pst
  864. #undef mkl_dc_trsm_lxtx_mn_pst
  865. #undef mkl_dc_trsm_lxcx_mn_pst
  866. #undef mkl_dc_trsm_rxnn_mn_pst
  867. #undef mkl_dc_trsm_rxnu_mn_pst
  868. #undef mkl_dc_trsm_rxtn_mn_pst
  869. #undef mkl_dc_trsm_rxtu_mn_pst
  870. #undef mkl_dc_trsm_rxcn_mn_pst
  871. #undef mkl_dc_trsm_rxcu_mn_pst
  872. #undef mkl_dc_axpy
  873. #undef mkl_dc_dot