mkl_direct_call.h 66 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111
  1. /*******************************************************************************
  2. * Copyright 2014-2022 Intel Corporation.
  3. *
  4. * This software and the related documents are Intel copyrighted materials, and
  5. * your use of them is governed by the express license under which they were
  6. * provided to you (License). Unless the License provides otherwise, you may not
  7. * use, modify, copy, publish, distribute, disclose or transmit this software or
  8. * the related documents without Intel's prior written permission.
  9. *
  10. * This software and the related documents are provided as is, with no express
  11. * or implied warranties, other than those that are expressly stated in the
  12. * License.
  13. *******************************************************************************/
  14. /*
  15. ! Content:
  16. ! Intel(R) oneAPI Math Kernel Library (oneMKL) C/C++ macros for MKL_DIRECT_CALL
  17. !******************************************************************************/
  18. #ifndef _MKL_DIRECT_CALL_H
  19. #define _MKL_DIRECT_CALL_H
  20. #include "mkl_blas.h"
  21. #include "mkl_lapack.h"
  22. #include "mkl_lapacke.h"
  23. #include "mkl_types.h"
  24. #if defined(MKL_DIRECT_CALL_SEQ_JIT)
  25. #ifndef MKL_DIRECT_CALL_JIT
  26. #define MKL_DIRECT_CALL_JIT
  27. #endif
  28. #ifndef MKL_DIRECT_CALL_SEQ
  29. #define MKL_DIRECT_CALL_SEQ
  30. #endif
  31. #endif
  32. #if (defined(MKL_DIRECT_CALL_SEQ) || defined(MKL_DIRECT_CALL_JIT)) && !defined(MKL_DIRECT_CALL)
  33. #define MKL_DIRECT_CALL
  34. #endif
  35. #ifdef MKL_DIRECT_CALL_SEQ
  36. #define MKL_DIRECT_CALL_INIT_FLAG MKL_INT mkl_direct_call_flag = 1
  37. #else
  38. #define MKL_DIRECT_CALL_INIT_FLAG MKL_INT mkl_direct_call_flag = 0
  39. #endif
  40. #ifdef MKL_DIRECT_CALL
  41. #ifdef __cplusplus
  42. extern "C" {
  43. #endif
  44. /* Function declarations for the direct calls */
  45. void dgemm_direct(const char *transa, const char *transb, const MKL_INT *m, const MKL_INT *n, const MKL_INT *k,const double *alpha, const double *a, const MKL_INT *lda, const double *b, const MKL_INT *ldb,const double *beta, double *c, const MKL_INT *ldc, const MKL_INT* flag);
  46. void sgemm_direct(const char *transa, const char *transb, const MKL_INT *m, const MKL_INT *n, const MKL_INT *k, const float *alpha, const float *a, const MKL_INT *lda, const float *b, const MKL_INT *ldb, const float *beta, float *c, const MKL_INT *ldc, const MKL_INT* flag);
  47. void cgemm_direct(const char *transa, const char *transb, const MKL_INT *m, const MKL_INT *n, const MKL_INT *k, const MKL_Complex8 *alpha, const MKL_Complex8 *a, const MKL_INT *lda, const MKL_Complex8 *b, const MKL_INT *ldb, const MKL_Complex8 *beta, MKL_Complex8 *c, const MKL_INT *ldc, const MKL_INT* flag);
  48. void zgemm_direct(const char *transa, const char *transb, const MKL_INT *m, const MKL_INT *n, const MKL_INT *k, const MKL_Complex16 *alpha, const MKL_Complex16 *a, const MKL_INT *lda, const MKL_Complex16 *b, const MKL_INT *ldb, const MKL_Complex16 *beta, MKL_Complex16 *c, const MKL_INT *ldc, const MKL_INT* flag);
  49. void mkl_dgemm_jit(const char *transa, const char *transb,
  50. const MKL_INT *m, const MKL_INT *n, const MKL_INT *k,
  51. const double *alpha, const double *a, const MKL_INT *lda,
  52. const double *b, const MKL_INT *ldb,
  53. const double *beta, double *c, const MKL_INT *ldc);
  54. void mkl_sgemm_jit(const char *transa, const char *transb,
  55. const MKL_INT *m, const MKL_INT *n, const MKL_INT *k,
  56. const float *alpha, const float *a, const MKL_INT *lda,
  57. const float *b, const MKL_INT *ldb,
  58. const float *beta, float *c, const MKL_INT *ldc);
  59. void mkl_cgemm_jit(const char *transa, const char *transb,
  60. const MKL_INT *m, const MKL_INT *n, const MKL_INT *k,
  61. const MKL_Complex8 *alpha, const MKL_Complex8 *a, const MKL_INT *lda,
  62. const MKL_Complex8 *b, const MKL_INT *ldb,
  63. const MKL_Complex8 *beta, MKL_Complex8 *c, const MKL_INT *ldc);
  64. void mkl_zgemm_jit(const char *transa, const char *transb,
  65. const MKL_INT *m, const MKL_INT *n, const MKL_INT *k,
  66. const MKL_Complex16 *alpha, const MKL_Complex16 *a, const MKL_INT *lda,
  67. const MKL_Complex16 *b, const MKL_INT *ldb,
  68. const MKL_Complex16 *beta, MKL_Complex16 *c, const MKL_INT *ldc);
  69. void cgemm3m_direct(const char *transa, const char *transb, const MKL_INT *m, const MKL_INT *n, const MKL_INT *k, const MKL_Complex8 *alpha, const MKL_Complex8 *a, const MKL_INT *lda, const MKL_Complex8 *b, const MKL_INT *ldb, const MKL_Complex8 *beta, MKL_Complex8 *c, const MKL_INT *ldc, const MKL_INT* flag);
  70. void zgemm3m_direct(const char *transa, const char *transb, const MKL_INT *m, const MKL_INT *n, const MKL_INT *k, const MKL_Complex16 *alpha, const MKL_Complex16 *a, const MKL_INT *lda, const MKL_Complex16 *b, const MKL_INT *ldb, const MKL_Complex16 *beta, MKL_Complex16 *c, const MKL_INT *ldc, const MKL_INT* flag);
  71. void dtrsm_direct(const char *side, const char *uplo, const char *transa, const char *diag, const MKL_INT *m, const MKL_INT *n, const double *alpha, const double *a, const MKL_INT *lda, double *b, const MKL_INT *ldb, const MKL_INT* flag);
  72. void strsm_direct(const char *side, const char *uplo, const char *transa, const char *diag, const MKL_INT *m, const MKL_INT *n, const float *alpha, const float *a, const MKL_INT *lda, float *b, const MKL_INT *ldb, const MKL_INT* flag);
  73. void ctrsm_direct(const char *side, const char *uplo, const char *transa, const char *diag, const MKL_INT *m, const MKL_INT *n, const MKL_Complex8 *alpha, const MKL_Complex8 *a, const MKL_INT *lda, MKL_Complex8 *b, const MKL_INT *ldb, const MKL_INT* flag);
  74. void ztrsm_direct(const char *side, const char *uplo, const char *transa, const char *diag, const MKL_INT *m, const MKL_INT *n, const MKL_Complex16 *alpha, const MKL_Complex16 *a, const MKL_INT *lda, MKL_Complex16 *b, const MKL_INT *ldb, const MKL_INT* flag);
  75. void dsyrk_direct(const char *uplo, const char *trans, const MKL_INT *n, const MKL_INT *k,const double *alpha, const double *a, const MKL_INT *lda, const double *beta, double *c, const MKL_INT *ldc, const MKL_INT* flag);
  76. void ssyrk_direct(const char *uplo, const char *trans, const MKL_INT *n, const MKL_INT *k, const float *alpha, const float *a, const MKL_INT *lda, const float *beta, float *c, const MKL_INT *ldc, const MKL_INT* flag);
  77. void csyrk_direct(const char *uplo, const char *trans, const MKL_INT *n, const MKL_INT *k, const MKL_Complex8 *alpha, const MKL_Complex8 *a, const MKL_INT *lda, const MKL_Complex8 *beta, MKL_Complex8 *c, const MKL_INT *ldc, const MKL_INT* flag);
  78. void zsyrk_direct(const char *uplo, const char *trans, const MKL_INT *n, const MKL_INT *k, const MKL_Complex16 *alpha, const MKL_Complex16 *a, const MKL_INT *lda, const MKL_Complex16 *beta, MKL_Complex16 *c, const MKL_INT *ldc, const MKL_INT* flag);
  79. void daxpy_direct(const MKL_INT *n, const double *alpha, const double *x, const MKL_INT *incx, double *y, const MKL_INT *incy, const MKL_INT* flag);
  80. void saxpy_direct(const MKL_INT *n, const float *alpha, const float *x, const MKL_INT *incx, float *y, const MKL_INT *incy, const MKL_INT* flag);
  81. void caxpy_direct(const MKL_INT *n, const MKL_Complex8 *alpha, const MKL_Complex8 *x, const MKL_INT *incx, MKL_Complex8 *y, const MKL_INT *inc, const MKL_INT* flag);
  82. void zaxpy_direct(const MKL_INT *n, const MKL_Complex16 *alpha, const MKL_Complex16 *x, const MKL_INT *incx, MKL_Complex16 *y, const MKL_INT *incy, const MKL_INT* flag);
  83. double ddot_direct(const MKL_INT *n, const double *x, const MKL_INT *incx, const double *y, const MKL_INT *incy);
  84. float sdot_direct(const MKL_INT *n, const float *x, const MKL_INT *incx, const float *y, const MKL_INT *incy);
  85. #define MKL_DC_GETRF_CHECKSIZE(m, n) (((m) <= 5) && ((n) <= 5) && MKL_DC_USE_C)
  86. #define MKL_DC_GETRFNP_CHECKSIZE(m, n) (((m) <= 15) && ((n) <= 15) && MKL_DC_USE_C)
  87. #define MKL_DC_GETRS_CHECKSIZE(n, nrhs) (((n) <= 5) && ((nrhs) <= 10) && MKL_DC_USE_C)
  88. #define MKL_DC_GETRI_CHECKSIZE(n) (((n) <= 6) && MKL_DC_USE_C)
  89. #define MKL_DC_GEQRF_CHECKSIZE(m, n) (((m) <= 10) && ((n) <= 10) && MKL_DC_USE_C)
  90. #define MKL_DC_POTRF_CHECKSIZE(n) (((n) <= 12) && MKL_DC_USE_C)
  91. #if defined(__INTEL_COMPILER)
  92. #define MKL_DC_USE_C 1
  93. #if (__INTEL_COMPILER <= 1500)
  94. #define MKL_DC_POTRF_DISABLE 1
  95. #else
  96. #define MKL_DC_POTRF_DISABLE 0
  97. #endif
  98. #elif defined(__INTEL_LLVM_COMPILER)
  99. #define MKL_DC_USE_C 1
  100. #define MKL_DC_POTRF_DISABLE 0
  101. #elif defined(__GNUC__)
  102. #if defined(__STRICT_ANSI__) && !defined(__STDC_VERSION__)
  103. #define MKL_DC_USE_C 0
  104. #else
  105. #define MKL_DC_USE_C 1
  106. #endif
  107. #define MKL_DC_POTRF_DISABLE 1
  108. #else
  109. #define MKL_DC_USE_C 0
  110. #endif
  111. #ifndef MKL_DC_UNSAFE
  112. #define MKL_DC_UNSAFE 0
  113. #endif
  114. #if (MKL_DC_USE_C == 1)
  115. #undef MKL_DOUBLE
  116. #undef MKL_SINGLE
  117. #undef MKL_COMPLEX16
  118. #undef MKL_COMPLEX
  119. #define MKL_DOUBLE
  120. #include "mkl_direct_types.h"
  121. #include "mkl_direct_blas.h"
  122. #include "mkl_direct_lapack.h"
  123. #undef MKL_DOUBLE
  124. #undef MKL_SINGLE
  125. #undef MKL_COMPLEX16
  126. #undef MKL_COMPLEX
  127. #define MKL_SINGLE
  128. #include "mkl_direct_types.h"
  129. #include "mkl_direct_blas.h"
  130. #include "mkl_direct_lapack.h"
  131. #undef MKL_DOUBLE
  132. #undef MKL_SINGLE
  133. #undef MKL_COMPLEX16
  134. #undef MKL_COMPLEX
  135. #define MKL_COMPLEX
  136. #include "mkl_direct_types.h"
  137. #include "mkl_direct_blas.h"
  138. #include "mkl_direct_lapack.h"
  139. #undef MKL_DOUBLE
  140. #undef MKL_SINGLE
  141. #undef MKL_COMPLEX16
  142. #undef MKL_COMPLEX
  143. #define MKL_COMPLEX16
  144. #include "mkl_direct_types.h"
  145. #include "mkl_direct_blas.h"
  146. #include "mkl_direct_lapack.h"
  147. #else
  148. #define mkl_dc_dgemm(transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc) 0
  149. #define mkl_dc_sgemm(transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc) 0
  150. #define mkl_dc_cgemm(transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc) 0
  151. #define mkl_dc_zgemm(transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc) 0
  152. #define mkl_dc_dtrsm(side, uplo, transa, diag, m, n, alpha, a, lda, b, ldb) 0
  153. #define mkl_dc_strsm(side, uplo, transa, diag, m, n, alpha, a, lda, b, ldb) 0
  154. #define mkl_dc_ctrsm(side, uplo, transa, diag, m, n, alpha, a, lda, b, ldb) 0
  155. #define mkl_dc_ztrsm(side, uplo, transa, diag, m, n, alpha, a, lda, b, ldb) 0
  156. #define mkl_dc_dsyrk(uplo, trans, n, k, alpha, a, lda, beta, c, ldc) 0
  157. #define mkl_dc_ssyrk(uplo, trans, n, k, alpha, a, lda, beta, c, ldc) 0
  158. #define mkl_dc_csyrk(uplo, trans, n, k, alpha, a, lda, beta, c, ldc) 0
  159. #define mkl_dc_zsyrk(uplo, trans, n, k, alpha, a, lda, beta, c, ldc) 0
  160. #define mkl_dc_ddot(n, x, incx, y, incy) 0
  161. #define mkl_dc_sdot(n, x, incx, y, incy) 0
  162. #define mkl_dc_daxpy(n, alpha, x, incx, y, incy) 0
  163. #define mkl_dc_saxpy(n, alpha, x, incx, y, incy) 0
  164. #define mkl_dc_caxpy(n, alpha, x, incx, y, incy) 0
  165. #define mkl_dc_zaxpy(n, alpha, x, incx, y, incy) 0
  166. #endif /* MKL_DC_USE_C */
  167. #if defined(__linux__) || (__INTEL_COMPILER >= 1500)
  168. #define MKL_DIRECT_CALL_CONSTANT_P(m,n,k) (__builtin_constant_p(*(m)) && __builtin_constant_p(*(n)) && __builtin_constant_p(*(k)))
  169. #else
  170. #define MKL_DIRECT_CALL_CONSTANT_P(m,n,k) (0)
  171. #endif
  172. /* BLAS */
  173. #define MKL_DC_CBLAS_CHECKSIZE(m,n,k) ((((m) <= 5 && (n) <= 5 && (k) <= 5)) && MKL_DC_USE_C)
  174. #define MKL_DC_CHECKSIZE(m,n,k) (((*(m) <= 5 && *(n) <= 5 && *(k) <= 5)) && MKL_DC_USE_C)
  175. #define MKL_DC_GEMM3M_CHECKSIZE(m,n,k) (((*(m) <= 4 && *(n) <= 4 && *(k) <= 4)) && MKL_DC_USE_C)
  176. #define MKL_DC_CBLAS_SYRK_CHECKSIZE(n,k) ((((n) <= 3 && (k) <= 9)) && MKL_DC_USE_C)
  177. #define MKL_DC_SYRK_CHECKSIZE(n,k) (((*(n) <= 3 && *(k) <= 9)) && MKL_DC_USE_C)
  178. #define MKL_DC_CBLAS_TRSM_CHECKSIZE(m,n) ((((m) <= 7 && (n) <= 7)) && MKL_DC_USE_C)
  179. #define MKL_DC_TRSM_CHECKSIZE(m,n) (((*(m) <= 7 && *(n) <= 7)) && MKL_DC_USE_C)
  180. #define MKL_DC_CBLAS_DAXPY_CHECKSIZE(n) (((n) <= 32) && MKL_DC_USE_C)
  181. #define MKL_DC_CBLAS_SAXPY_CHECKSIZE(n) (((n) <= 64) && MKL_DC_USE_C)
  182. #define MKL_DC_CBLAS_CAXPY_CHECKSIZE(n) (((n) <= 16) && MKL_DC_USE_C)
  183. #define MKL_DC_CBLAS_ZAXPY_CHECKSIZE(n) (((n) <= 4) && MKL_DC_USE_C)
  184. #define MKL_DC_DAXPY_CHECKSIZE(n) ((*(n) <= 32) && MKL_DC_USE_C)
  185. #define MKL_DC_SAXPY_CHECKSIZE(n) ((*(n) <= 64) && MKL_DC_USE_C)
  186. #define MKL_DC_CAXPY_CHECKSIZE(n) ((*(n) <= 16) && MKL_DC_USE_C)
  187. #define MKL_DC_ZAXPY_CHECKSIZE(n) ((*(n) <= 4 ) && MKL_DC_USE_C)
  188. #define MKL_DC_DDOT_CHECKSIZE(n) ((*(n) <= 32) && MKL_DC_USE_C)
  189. #define MKL_DC_SDOT_CHECKSIZE(n) ((*(n) <= 64) && MKL_DC_USE_C)
  190. #ifdef MKL_DIRECT_CALL_JIT
  191. #define SMALL_SIZE_DGEMM mkl_dgemm_jit
  192. #define SMALL_SIZE_SGEMM mkl_sgemm_jit
  193. #define SMALL_SIZE_ZGEMM mkl_zgemm_jit
  194. #define SMALL_SIZE_CGEMM mkl_cgemm_jit
  195. /* JIT will be activated if:
  196. - A and C matrices are not too large (*JIT_MAX_AC_ELEMENTS max)
  197. - m*n*k is not too large (*JIT_MAX_MNK max) */
  198. #define DJIT_MAX_AC_ELEMENTS 4096
  199. #define DJIT_MAX_MNK 131072
  200. #define SJIT_MAX_AC_ELEMENTS 8192
  201. #define SJIT_MAX_MNK 262144
  202. #define ZJIT_MAX_AC_ELEMENTS 2048
  203. #define ZJIT_MAX_MNK 65536
  204. #define CJIT_MAX_AC_ELEMENTS 4096
  205. #define CJIT_MAX_MNK 131072
  206. #define AC_CHECK(m,n,k,max) (((m) * (n) <= max) && ((m) * (k) <= max))
  207. #define MNK_CHECK(m,n,k,max) ((m) * (n) * (k) <= max)
  208. #define GEMV_CHECK(m,n,k) (!((m) == 1 && ((n) > 32 || (k) > 32)) && !((n) == 1 && ((m) > 32 || (k) > 32)))
  209. #define MKL_DC_DGEMM_CBLAS_CHECKSIZE(m,n,k) (MNK_CHECK(m,n,k,DJIT_MAX_MNK) && AC_CHECK(m,n,k,DJIT_MAX_AC_ELEMENTS) && MKL_DC_USE_C)
  210. #define MKL_DC_SGEMM_CBLAS_CHECKSIZE(m,n,k) (MNK_CHECK(m,n,k,SJIT_MAX_MNK) && AC_CHECK(m,n,k,SJIT_MAX_AC_ELEMENTS) && MKL_DC_USE_C)
  211. #define MKL_DC_ZGEMM_CBLAS_CHECKSIZE(m,n,k) (MNK_CHECK(m,n,k,ZJIT_MAX_MNK) && AC_CHECK(m,n,k,ZJIT_MAX_AC_ELEMENTS) && GEMV_CHECK(m,n,k) && MKL_DC_USE_C)
  212. #define MKL_DC_CGEMM_CBLAS_CHECKSIZE(m,n,k) (MNK_CHECK(m,n,k,CJIT_MAX_MNK) && AC_CHECK(m,n,k,CJIT_MAX_AC_ELEMENTS) && GEMV_CHECK(m,n,k) && MKL_DC_USE_C)
  213. #define MKL_DC_DGEMM_CHECKSIZE(m,n,k) MKL_DC_DGEMM_CBLAS_CHECKSIZE(*(m),*(n),*(k))
  214. #define MKL_DC_SGEMM_CHECKSIZE(m,n,k) MKL_DC_SGEMM_CBLAS_CHECKSIZE(*(m),*(n),*(k))
  215. #define MKL_DC_ZGEMM_CHECKSIZE(m,n,k) MKL_DC_ZGEMM_CBLAS_CHECKSIZE(*(m),*(n),*(k))
  216. #define MKL_DC_CGEMM_CHECKSIZE(m,n,k) MKL_DC_CGEMM_CBLAS_CHECKSIZE(*(m),*(n),*(k))
  217. #else /* not JIT */
  218. #define SMALL_SIZE_DGEMM mkl_dc_dgemm
  219. #define SMALL_SIZE_SGEMM mkl_dc_sgemm
  220. #define SMALL_SIZE_ZGEMM mkl_dc_zgemm
  221. #define SMALL_SIZE_CGEMM mkl_dc_cgemm
  222. #define MKL_DC_DSGEMM_CBLAS_CHECKSIZE(m,n,k) ((((m) <= 5 && (n) <= 5 && (k) <= 5)) && MKL_DC_USE_C)
  223. #define MKL_DC_DGEMM_CBLAS_CHECKSIZE(m,n,k) MKL_DC_DSGEMM_CBLAS_CHECKSIZE(m,n,k)
  224. #define MKL_DC_SGEMM_CBLAS_CHECKSIZE(m,n,k) MKL_DC_DSGEMM_CBLAS_CHECKSIZE(m,n,k)
  225. #define MKL_DC_ZGEMM_CBLAS_CHECKSIZE(m,n,k) MKL_DC_CBLAS_CHECKSIZE(m,n,k)
  226. #define MKL_DC_CGEMM_CBLAS_CHECKSIZE(m,n,k) MKL_DC_CBLAS_CHECKSIZE(m,n,k)
  227. #define MKL_DC_SGEMM_CHECKSIZE(m,n,k) (((*(m) <= 5 && *(n) <= 5 && *(k) <= 5)) && MKL_DC_USE_C)
  228. #ifdef __AVX2__
  229. #define MKL_DC_DGEMM_CHECKSIZE(m,n,k) (((*(m) <= 12 && *(n) <= 12 && *(k) <= 12)) && MKL_DC_USE_C)
  230. #else
  231. #define MKL_DC_DGEMM_CHECKSIZE(m,n,k) (((*(m) <= 5 && *(n) <= 5 && *(k) <= 5)) && MKL_DC_USE_C)
  232. #endif
  233. #define MKL_DC_CGEMM_CHECKSIZE(m,n,k) MKL_DC_CHECKSIZE(m,n,k)
  234. #define MKL_DC_ZGEMM_CHECKSIZE(m,n,k) MKL_DC_CHECKSIZE(m,n,k)
  235. #endif
  236. /* CBLAS interfaces */
  237. /* CBLAS GEMM */
  238. #define MKL_DC_CBLAS_DGEMM_CONVERT(layout, transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, fname_unrolledc, fname_direct) do { \
  239. MKL_DIRECT_CALL_INIT_FLAG; \
  240. \
  241. char *ftrans[] = { "N", "T", "C"};\
  242. \
  243. MKL_INT index_transa, index_transb;\
  244. index_transa = (transa) - CblasNoTrans;\
  245. index_transb = (transb) - CblasNoTrans;\
  246. MKL_INT temp_m = (m), temp_n = (n), temp_k = (k), temp_lda = (lda), temp_ldb = (ldb), temp_ldc = (ldc);\
  247. double temp_alpha = (alpha), temp_beta = (beta);\
  248. \
  249. if ((layout) == CblasRowMajor) { \
  250. if (MKL_DC_DGEMM_CBLAS_CHECKSIZE(m,n,k)) { \
  251. fname_unrolledc(ftrans[index_transb], ftrans[index_transa],\
  252. &(temp_n), &(temp_m), &(temp_k), &(temp_alpha),\
  253. (b), &(temp_ldb), (a), &(temp_lda), &(temp_beta), (c), &(temp_ldc));\
  254. } else { \
  255. fname_direct(ftrans[index_transb], ftrans[index_transa],\
  256. &(temp_n), &(temp_m), &(temp_k), &(temp_alpha),\
  257. (b), &(temp_ldb), (a), &(temp_lda), &(temp_beta), (c), &(temp_ldc), &mkl_direct_call_flag);\
  258. } \
  259. }\
  260. else if ((layout) == CblasColMajor) { \
  261. if (MKL_DC_DGEMM_CBLAS_CHECKSIZE(m,n,k)) { \
  262. fname_unrolledc(ftrans[index_transa], ftrans[index_transb],\
  263. &(temp_m), &(temp_n), &(temp_k), &(temp_alpha),\
  264. (a), &(temp_lda), (b), &(temp_ldb), &(temp_beta), (c), &(temp_ldc));\
  265. } else { \
  266. fname_direct(ftrans[index_transa], ftrans[index_transb],\
  267. &(temp_m), &(temp_n), &(temp_k), &(temp_alpha),\
  268. (a), &(temp_lda), (b), &(temp_ldb), &(temp_beta), (c), &(temp_ldc), &mkl_direct_call_flag);\
  269. }\
  270. }\
  271. } while (0)
  272. #define MKL_DC_CBLAS_SGEMM_CONVERT(layout, transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, fname_unrolledc, fname_direct) do { \
  273. MKL_DIRECT_CALL_INIT_FLAG; \
  274. \
  275. char *ftrans[] = { "N", "T", "C"};\
  276. \
  277. MKL_INT index_transa, index_transb;\
  278. index_transa = (transa) - CblasNoTrans;\
  279. index_transb = (transb) - CblasNoTrans;\
  280. MKL_INT temp_m = (m), temp_n = (n), temp_k = (k), temp_lda = (lda), temp_ldb = (ldb), temp_ldc = (ldc);\
  281. float temp_alpha = (alpha), temp_beta = (beta);\
  282. \
  283. if ((layout) == CblasRowMajor) { \
  284. if (MKL_DC_SGEMM_CBLAS_CHECKSIZE(m,n,k)) { \
  285. fname_unrolledc(ftrans[index_transb], ftrans[index_transa],\
  286. &(temp_n), &(temp_m), &(temp_k), &(temp_alpha),\
  287. (b), &(temp_ldb), (a), &(temp_lda), &(temp_beta), (c), &(temp_ldc));\
  288. } else { \
  289. fname_direct(ftrans[index_transb], ftrans[index_transa],\
  290. &(temp_n), &(temp_m), &(temp_k), &(temp_alpha),\
  291. (b), &(temp_ldb), (a), &(temp_lda), &(temp_beta), (c), &(temp_ldc), &mkl_direct_call_flag);\
  292. } \
  293. }\
  294. else if ((layout) == CblasColMajor) { \
  295. if (MKL_DC_SGEMM_CBLAS_CHECKSIZE(m,n,k)) { \
  296. fname_unrolledc(ftrans[index_transa], ftrans[index_transb],\
  297. &(temp_m), &(temp_n), &(temp_k), &(temp_alpha),\
  298. (a), &(temp_lda), (b), &(temp_ldb), &(temp_beta), (c), &(temp_ldc));\
  299. } else { \
  300. fname_direct(ftrans[index_transa], ftrans[index_transb],\
  301. &(temp_m), &(temp_n), &(temp_k), &(temp_alpha),\
  302. (a), &(temp_lda), (b), &(temp_ldb), &(temp_beta), (c), &(temp_ldc), &mkl_direct_call_flag);\
  303. }\
  304. }\
  305. } while (0)
  306. #define MKL_DC_CBLAS_CGEMM_CONVERT(layout, transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, fname_unrolledc, fname_direct) do { \
  307. MKL_DIRECT_CALL_INIT_FLAG; \
  308. \
  309. char *ftrans[] = { "N", "T", "C"};\
  310. \
  311. MKL_INT index_transa, index_transb;\
  312. index_transa = (transa) - CblasNoTrans;\
  313. index_transb = (transb) - CblasNoTrans;\
  314. MKL_INT temp_m = (m), temp_n = (n), temp_k = (k), temp_lda = (lda), temp_ldb = (ldb), temp_ldc = (ldc);\
  315. \
  316. if ((layout) == CblasRowMajor) { \
  317. if (MKL_DC_CGEMM_CBLAS_CHECKSIZE(m,n,k)) { \
  318. fname_unrolledc(ftrans[index_transb], ftrans[index_transa],\
  319. &(temp_n), &(temp_m), &(temp_k), (alpha),\
  320. (b), &(temp_ldb), (a), &(temp_lda), (beta), (c), &(temp_ldc));\
  321. } else { \
  322. fname_direct(ftrans[index_transb], ftrans[index_transa],\
  323. &(temp_n), &(temp_m), &(temp_k), (alpha),\
  324. (b), &(temp_ldb), (a), &(temp_lda), (beta), (c), &(temp_ldc), &mkl_direct_call_flag);\
  325. }\
  326. }\
  327. else if ((layout) == CblasColMajor) { \
  328. if (MKL_DC_CGEMM_CBLAS_CHECKSIZE(m,n,k)) { \
  329. fname_unrolledc(ftrans[index_transa], ftrans[index_transb],\
  330. &(temp_m), &(temp_n), &(temp_k), (alpha),\
  331. (a), &(temp_lda), (b), &(temp_ldb), (beta), (c), &(temp_ldc));\
  332. } else { \
  333. fname_direct(ftrans[index_transa], ftrans[index_transb],\
  334. &(temp_m), &(temp_n), &(temp_k), (alpha),\
  335. (a), &(temp_lda), (b), &(temp_ldb), (beta), (c), &(temp_ldc), &mkl_direct_call_flag);\
  336. }\
  337. }\
  338. } while (0)
  339. #define MKL_DC_CBLAS_ZGEMM_CONVERT(layout, transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, fname_unrolledc, fname_direct) do { \
  340. MKL_DIRECT_CALL_INIT_FLAG; \
  341. \
  342. char *ftrans[] = { "N", "T", "C"};\
  343. \
  344. MKL_INT index_transa, index_transb;\
  345. index_transa = (transa) - CblasNoTrans;\
  346. index_transb = (transb) - CblasNoTrans;\
  347. MKL_INT temp_m = (m), temp_n = (n), temp_k = (k), temp_lda = (lda), temp_ldb = (ldb), temp_ldc = (ldc);\
  348. \
  349. if ((layout) == CblasRowMajor) { \
  350. if (MKL_DC_ZGEMM_CBLAS_CHECKSIZE(m,n,k)) { \
  351. fname_unrolledc(ftrans[index_transb], ftrans[index_transa],\
  352. &(temp_n), &(temp_m), &(temp_k), (alpha),\
  353. (b), &(temp_ldb), (a), &(temp_lda), (beta), (c), &(temp_ldc));\
  354. } else { \
  355. fname_direct(ftrans[index_transb], ftrans[index_transa],\
  356. &(temp_n), &(temp_m), &(temp_k), (alpha),\
  357. (b), &(temp_ldb), (a), &(temp_lda), (beta), (c), &(temp_ldc), &mkl_direct_call_flag);\
  358. }\
  359. }\
  360. else if ((layout) == CblasColMajor) { \
  361. if (MKL_DC_ZGEMM_CBLAS_CHECKSIZE(m,n,k)) { \
  362. fname_unrolledc(ftrans[index_transa], ftrans[index_transb],\
  363. &(temp_m), &(temp_n), &(temp_k), (alpha),\
  364. (a), &(temp_lda), (b), &(temp_ldb), (beta), (c), &(temp_ldc));\
  365. } else { \
  366. fname_direct(ftrans[index_transa], ftrans[index_transb],\
  367. &(temp_m), &(temp_n), &(temp_k), (alpha),\
  368. (a), &(temp_lda), (b), &(temp_ldb), (beta), (c), &(temp_ldc), &mkl_direct_call_flag);\
  369. }\
  370. }\
  371. } while (0)
  372. #ifdef MKL_DIRECT_CALL_JIT
  373. #define cblas_dgemm(layout, ta, tb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc) MKL_DC_CBLAS_DGEMM_CONVERT(layout, ta, tb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, mkl_dgemm_jit, dgemm_direct)
  374. #define cblas_sgemm(layout, ta, tb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc) MKL_DC_CBLAS_SGEMM_CONVERT(layout, ta, tb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, mkl_sgemm_jit, sgemm_direct)
  375. #define cblas_cgemm(layout, ta, tb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc) MKL_DC_CBLAS_CGEMM_CONVERT(layout, ta, tb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, mkl_cgemm_jit, cgemm_direct)
  376. #define cblas_zgemm(layout, ta, tb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc) MKL_DC_CBLAS_ZGEMM_CONVERT(layout, ta, tb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, mkl_zgemm_jit, zgemm_direct)
  377. #else
  378. #define cblas_dgemm(layout, ta, tb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc) MKL_DC_CBLAS_DGEMM_CONVERT(layout, ta, tb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, mkl_dc_dgemm, dgemm_direct)
  379. #define cblas_sgemm(layout, ta, tb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc) MKL_DC_CBLAS_SGEMM_CONVERT(layout, ta, tb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, mkl_dc_sgemm, sgemm_direct)
  380. #define cblas_cgemm(layout, ta, tb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc) MKL_DC_CBLAS_CGEMM_CONVERT(layout, ta, tb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, mkl_dc_cgemm, cgemm_direct)
  381. #define cblas_zgemm(layout, ta, tb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc) MKL_DC_CBLAS_ZGEMM_CONVERT(layout, ta, tb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, mkl_dc_zgemm, zgemm_direct)
  382. #endif
  383. #define cblas_zgemm3m(layout, ta, tb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc) MKL_DC_CBLAS_ZGEMM_CONVERT(layout, ta, tb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, mkl_dc_zgemm, zgemm3m_direct)
  384. #define cblas_cgemm3m(layout, ta, tb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc) MKL_DC_CBLAS_CGEMM_CONVERT(layout, ta, tb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, mkl_dc_cgemm, cgemm3m_direct)
  385. /* CBLAS TRSM */
  386. #define MKL_DC_CBLAS_DTRSM_CONVERT(layout, side, uplo, trans, diag, m, n, alpha, a, lda, b, ldb, fname_unrolledc, fname_direct) do { \
  387. MKL_DIRECT_CALL_INIT_FLAG; \
  388. char *fside[] = {"L", "R"};\
  389. char *fuplo[] = {"U", "L"};\
  390. char *ftrans[] = {"N", "T", "C"};\
  391. char *fdiag[] = {"N", "U"};\
  392. MKL_INT index_uplo, index_trans, index_diag, index_side; \
  393. index_side = (side) - CblasLeft; \
  394. index_uplo = (uplo) - CblasUpper; \
  395. index_trans = (trans) - CblasNoTrans; \
  396. index_diag = (diag) - CblasNonUnit; \
  397. MKL_INT temp_n = (n), temp_m = (m), temp_lda = (lda), temp_ldb = (ldb); \
  398. double temp_alpha = (alpha); \
  399. if ((layout) == CblasRowMajor) { \
  400. if ( MKL_DC_CBLAS_TRSM_CHECKSIZE(m, n) ) { \
  401. fname_unrolledc(fside[1-index_side], fuplo[1-index_uplo], ftrans[index_trans], fdiag[index_diag], &(temp_n), &(temp_m), &(temp_alpha), (a), &(temp_lda), (b), &(temp_ldb)); \
  402. } else { \
  403. fname_direct(fside[1-index_side], fuplo[1-index_uplo], ftrans[index_trans], fdiag[index_diag], &(temp_n), &(temp_m), &(temp_alpha), (a), &(temp_lda), (b), &(temp_ldb), &mkl_direct_call_flag); \
  404. } \
  405. } else if ((layout) == CblasColMajor) { \
  406. if ( MKL_DC_CBLAS_TRSM_CHECKSIZE(m, n) ) { \
  407. fname_unrolledc(fside[index_side], fuplo[index_uplo], ftrans[index_trans], fdiag[index_diag], &(temp_m), &(temp_n), &(temp_alpha), (a), &(temp_lda), (b), &(temp_ldb)); \
  408. } else { \
  409. fname_direct(fside[index_side], fuplo[index_uplo], ftrans[index_trans], fdiag[index_diag], &(temp_m), &(temp_n), &(temp_alpha), (a), &(temp_lda), (b), &(temp_ldb), &mkl_direct_call_flag); \
  410. } \
  411. } \
  412. } while (0)
  413. #define MKL_DC_CBLAS_STRSM_CONVERT(layout, side, uplo, trans, diag, m, n, alpha, a, lda, b, ldb, fname_unrolledc, fname_direct) do { \
  414. MKL_DIRECT_CALL_INIT_FLAG; \
  415. char *fside[] = {"L", "R"};\
  416. char *fuplo[] = {"U", "L"};\
  417. char *ftrans[] = {"N", "T", "C"};\
  418. char *fdiag[] = {"N", "U"};\
  419. MKL_INT index_uplo, index_trans, index_diag, index_side; \
  420. index_side = (side) - CblasLeft; \
  421. index_uplo = (uplo) - CblasUpper; \
  422. index_trans = (trans) - CblasNoTrans; \
  423. index_diag = (diag) - CblasNonUnit; \
  424. MKL_INT temp_n = (n), temp_m = (m), temp_lda = (lda), temp_ldb = (ldb); \
  425. float temp_alpha = (alpha); \
  426. if ((layout) == CblasRowMajor) { \
  427. if ( MKL_DC_CBLAS_TRSM_CHECKSIZE(m, n) ) { \
  428. fname_unrolledc(fside[1-index_side], fuplo[1-index_uplo], ftrans[index_trans], fdiag[index_diag], &(temp_n), &(temp_m), &(temp_alpha), (a), &(temp_lda), (b), &(temp_ldb)); \
  429. } else { \
  430. fname_direct(fside[1-index_side], fuplo[1-index_uplo], ftrans[index_trans], fdiag[index_diag], &(temp_n), &(temp_m), &(temp_alpha), (a), &(temp_lda), (b), &(temp_ldb), &mkl_direct_call_flag); \
  431. } \
  432. } else if ((layout) == CblasColMajor) { \
  433. if ( MKL_DC_CBLAS_TRSM_CHECKSIZE(m, n) ) { \
  434. fname_unrolledc(fside[index_side], fuplo[index_uplo], ftrans[index_trans], fdiag[index_diag], &(temp_m), &(temp_n), &(temp_alpha), (a), &(temp_lda), (b), &(temp_ldb)); \
  435. } else { \
  436. fname_direct(fside[index_side], fuplo[index_uplo], ftrans[index_trans], fdiag[index_diag], &(temp_m), &(temp_n), &(temp_alpha), (a), &(temp_lda), (b), &(temp_ldb), &mkl_direct_call_flag); \
  437. } \
  438. } \
  439. } while (0)
  440. #define MKL_DC_CBLAS_COMPLEX_TRSM_CONVERT(layout, side, uplo, trans, diag, m, n, alpha, a, lda, b, ldb, fname_unrolledc, fname_direct) do { \
  441. MKL_DIRECT_CALL_INIT_FLAG; \
  442. char *fside[] = {"L", "R"};\
  443. char *fuplo[] = {"U", "L"};\
  444. char *ftrans[] = {"N", "T", "C"};\
  445. char *fdiag[] = {"N", "U"};\
  446. MKL_INT index_uplo, index_trans, index_diag, index_side; \
  447. index_side = (side) - CblasLeft; \
  448. index_uplo = (uplo) - CblasUpper; \
  449. index_trans = (trans) - CblasNoTrans; \
  450. index_diag = (diag) - CblasNonUnit; \
  451. MKL_INT temp_n = (n), temp_m = (m), temp_lda = (lda), temp_ldb = (ldb); \
  452. if ((layout) == CblasRowMajor) { \
  453. if ( MKL_DC_CBLAS_TRSM_CHECKSIZE(m, n) ) { \
  454. fname_unrolledc(fside[1-index_side], fuplo[1-index_uplo], ftrans[index_trans], fdiag[index_diag], &(temp_n), &(temp_m), (alpha), (a), &(temp_lda), (b), &(temp_ldb)); \
  455. } else { \
  456. fname_direct(fside[1-index_side], fuplo[1-index_uplo], ftrans[index_trans], fdiag[index_diag], &(temp_n), &(temp_m), (alpha), (a), &(temp_lda), (b), &(temp_ldb), &mkl_direct_call_flag); \
  457. } \
  458. } else if ((layout) == CblasColMajor) { \
  459. if ( MKL_DC_CBLAS_TRSM_CHECKSIZE(m, n) ) { \
  460. fname_unrolledc(fside[index_side], fuplo[index_uplo], ftrans[index_trans], fdiag[index_diag], &(temp_m), &(temp_n), (alpha), (a), &(temp_lda), (b), &(temp_ldb)); \
  461. } else { \
  462. fname_direct(fside[index_side], fuplo[index_uplo], ftrans[index_trans], fdiag[index_diag], &(temp_m), &(temp_n), (alpha), (a), &(temp_lda), (b), &(temp_ldb), &mkl_direct_call_flag); \
  463. } \
  464. } \
  465. } while (0)
  466. #define cblas_dtrsm(layout, side, uplo, trans, diag, m, n, alpha, a, lda, b, ldb) MKL_DC_CBLAS_DTRSM_CONVERT(layout, side, uplo, trans, diag, m, n, alpha, a, lda, b, ldb, mkl_dc_dtrsm, dtrsm_direct)
  467. #define cblas_strsm(layout, side, uplo, trans, diag, m, n, alpha, a, lda, b, ldb) MKL_DC_CBLAS_STRSM_CONVERT(layout, side, uplo, trans, diag, m, n, alpha, a, lda, b, ldb, mkl_dc_strsm, strsm_direct)
  468. #define cblas_ctrsm(layout, side, uplo, trans, diag, m, n, alpha, a, lda, b, ldb) MKL_DC_CBLAS_COMPLEX_TRSM_CONVERT(layout, side, uplo, trans, diag, m, n, alpha, a, lda, b, ldb, mkl_dc_ctrsm, ctrsm_direct)
  469. #define cblas_ztrsm(layout, side, uplo, trans, diag, m, n, alpha, a, lda, b, ldb) MKL_DC_CBLAS_COMPLEX_TRSM_CONVERT(layout, side, uplo, trans, diag, m, n, alpha, a, lda, b, ldb, mkl_dc_ztrsm, ztrsm_direct)
  470. /* CBLAS SYRK */
  471. #define MKL_DC_CBLAS_DSYRK_CONVERT(layout, uplo, trans, n, k, alpha, a, lda, beta, c, ldc, fname_unrolledc, fname_direct) do { \
  472. MKL_DIRECT_CALL_INIT_FLAG; \
  473. char *fuplo[] = {"U", "L"};\
  474. char *ftrans[] = {"N", "T"};\
  475. MKL_INT index_uplo, index_trans; \
  476. index_uplo = (uplo) - CblasUpper; \
  477. index_trans = (trans) - CblasNoTrans; \
  478. MKL_INT temp_n = (n), temp_k = (k), temp_lda = (lda), temp_ldc = (ldc); \
  479. double temp_alpha = (alpha), temp_beta = (beta); \
  480. if ((layout) == CblasRowMajor) { \
  481. if ( MKL_DC_CBLAS_SYRK_CHECKSIZE(n, k) ) { \
  482. fname_unrolledc(fuplo[1-index_uplo], ftrans[1-index_trans], &(temp_n), &(temp_k), &(temp_alpha), (a), &(temp_lda), &(temp_beta), (c), &(temp_ldc)); \
  483. } else { \
  484. fname_direct(fuplo[1-index_uplo], ftrans[1-index_trans], &(temp_n), &(temp_k), &(temp_alpha), (a), &(temp_lda), &(temp_beta), (c), &(temp_ldc), &mkl_direct_call_flag); \
  485. } \
  486. } else if ((layout) == CblasColMajor) { \
  487. if ( MKL_DC_CBLAS_SYRK_CHECKSIZE(n, k) ) { \
  488. fname_unrolledc(fuplo[index_uplo], ftrans[index_trans], &(temp_n), &(temp_k), &(temp_alpha), (a), &(temp_lda), &(temp_beta), (c), &(temp_ldc)); \
  489. } else { \
  490. fname_direct(fuplo[index_uplo], ftrans[index_trans], &(temp_n), &(temp_k), &(temp_alpha), (a), &(temp_lda), &(temp_beta), (c), &(temp_ldc), &mkl_direct_call_flag); \
  491. } \
  492. } \
  493. } while (0)
  494. #define MKL_DC_CBLAS_SSYRK_CONVERT(layout, uplo, trans, n, k, alpha, a, lda, beta, c, ldc, fname_unrolledc, fname_direct) do { \
  495. MKL_DIRECT_CALL_INIT_FLAG; \
  496. char *fuplo[] = {"U", "L"};\
  497. char *ftrans[] = {"N", "T"};\
  498. MKL_INT index_uplo, index_trans; \
  499. index_uplo = (uplo) - CblasUpper; \
  500. index_trans = (trans) - CblasNoTrans; \
  501. MKL_INT temp_n = (n), temp_k = (k), temp_lda = (lda), temp_ldc = (ldc); \
  502. float temp_alpha = (alpha), temp_beta = (beta); \
  503. if ((layout) == CblasRowMajor) { \
  504. if ( MKL_DC_CBLAS_SYRK_CHECKSIZE(n, k) ) { \
  505. fname_unrolledc(fuplo[1-index_uplo], ftrans[1-index_trans], &(temp_n), &(temp_k), &(temp_alpha), (a), &(temp_lda), &(temp_beta), (c), &(temp_ldc)); \
  506. } else { \
  507. fname_direct(fuplo[1-index_uplo], ftrans[1-index_trans], &(temp_n), &(temp_k), &(temp_alpha), (a), &(temp_lda), &(temp_beta), (c), &(temp_ldc), &mkl_direct_call_flag); \
  508. } \
  509. } else if ((layout) == CblasColMajor) { \
  510. if ( MKL_DC_CBLAS_SYRK_CHECKSIZE(n, k) ) { \
  511. fname_unrolledc(fuplo[index_uplo], ftrans[index_trans], &(temp_n), &(temp_k), &(temp_alpha), (a), &(temp_lda), &(temp_beta), (c), &(temp_ldc)); \
  512. } else { \
  513. fname_direct(fuplo[index_uplo], ftrans[index_trans], &(temp_n), &(temp_k), &(temp_alpha), (a), &(temp_lda), &(temp_beta), (c), &(temp_ldc), &mkl_direct_call_flag); \
  514. } \
  515. } \
  516. } while (0)
  517. #define MKL_DC_CBLAS_COMPLEX_SYRK_CONVERT(layout, uplo, trans, n, k, alpha, a, lda, beta, c, ldc, fname_unrolledc, fname_direct) do { \
  518. MKL_DIRECT_CALL_INIT_FLAG; \
  519. char *fuplo[] = {"U", "L"};\
  520. char *ftrans[] = {"N", "T"};\
  521. MKL_INT index_uplo, index_trans; \
  522. index_uplo = (uplo) - CblasUpper; \
  523. index_trans = (trans) - CblasNoTrans; \
  524. MKL_INT temp_n = (n), temp_k = (k), temp_lda = (lda), temp_ldc = (ldc); \
  525. if ((layout) == CblasRowMajor) { \
  526. if ( MKL_DC_CBLAS_SYRK_CHECKSIZE(n, k) ) { \
  527. fname_unrolledc(fuplo[1-index_uplo], ftrans[1-index_trans], &(temp_n), &(temp_k), (alpha), (a), &(temp_lda), (beta), (c), &(temp_ldc)); \
  528. } else { \
  529. fname_direct(fuplo[1-index_uplo], ftrans[1-index_trans], &(temp_n), &(temp_k), (alpha), (a), &(temp_lda), (beta), (c), &(temp_ldc), &mkl_direct_call_flag); \
  530. } \
  531. } else if ((layout) == CblasColMajor) { \
  532. if ( MKL_DC_CBLAS_SYRK_CHECKSIZE(n, k) ) { \
  533. fname_unrolledc(fuplo[index_uplo], ftrans[index_trans], &(temp_n), &(temp_k), (alpha), (a), &(temp_lda), (beta), (c), &(temp_ldc)); \
  534. } else { \
  535. fname_direct(fuplo[index_uplo], ftrans[index_trans], &(temp_n), &(temp_k), (alpha), (a), &(temp_lda), (beta), (c), &(temp_ldc), &mkl_direct_call_flag); \
  536. } \
  537. } \
  538. } while (0)
  539. #define cblas_dsyrk(layout, uplo, trans, n, k, alpha, a, lda, beta, c, ldc) MKL_DC_CBLAS_DSYRK_CONVERT(layout, uplo, trans, n, k, alpha, a, lda, beta, c, ldc, mkl_dc_dsyrk, dsyrk_direct)
  540. #define cblas_ssyrk(layout, uplo, trans, n, k, alpha, a, lda, beta, c, ldc) MKL_DC_CBLAS_SSYRK_CONVERT(layout, uplo, trans, n, k, alpha, a, lda, beta, c, ldc, mkl_dc_ssyrk, ssyrk_direct)
  541. #define cblas_csyrk(layout, uplo, trans, n, k, alpha, a, lda, beta, c, ldc) MKL_DC_CBLAS_COMPLEX_SYRK_CONVERT(layout, uplo, trans, n, k, alpha, a, lda, beta, c, ldc, mkl_dc_csyrk, csyrk_direct)
  542. #define cblas_zsyrk(layout, uplo, trans, n, k, alpha, a, lda, beta, c, ldc) MKL_DC_CBLAS_COMPLEX_SYRK_CONVERT(layout, uplo, trans, n, k, alpha, a, lda, beta, c, ldc, mkl_dc_zsyrk, zsyrk_direct)
  543. /* CBLAS AXPY */
  544. #define MKL_DC_SAXPY_CBLAS_CONVERT(n, alpha, x, incx, y, incy, CHECK, fname_unrolledc, fname_direct) do { \
  545. MKL_INT temp_n = (n), temp_incx = (incx), temp_incy = (incy);\
  546. float temp_alpha = (alpha);\
  547. if (CHECK(n)) { \
  548. fname_unrolledc(&(temp_n), &(temp_alpha), (x), &(temp_incx), (y), &(temp_incy));\
  549. } else { \
  550. MKL_DIRECT_CALL_INIT_FLAG; \
  551. fname_direct(&(temp_n), &(temp_alpha), (x), &(temp_incx), (y), &(temp_incy), &mkl_direct_call_flag); \
  552. } \
  553. } while (0)
  554. #define MKL_DC_DAXPY_CBLAS_CONVERT(n, alpha, x, incx, y, incy, CHECK, fname_unrolledc, fname_direct) do { \
  555. MKL_INT temp_n = (n), temp_incx = (incx), temp_incy = (incy);\
  556. double temp_alpha = (alpha);\
  557. if (CHECK(n)) { \
  558. fname_unrolledc(&(temp_n), &(temp_alpha), (x), &(temp_incx), (y), &(temp_incy));\
  559. } else { \
  560. MKL_DIRECT_CALL_INIT_FLAG; \
  561. fname_direct(&(temp_n), &(temp_alpha), (x), &(temp_incx), (y), &(temp_incy), &mkl_direct_call_flag); \
  562. } \
  563. } while (0)
  564. #define MKL_DC_COMPLEX_AXPY_CBLAS_CONVERT(n, alpha, x, incx, y, incy, CHECK, fname_unrolledc, fname_direct) do { \
  565. MKL_INT temp_n = (n), temp_incx = (incx), temp_incy = (incy);\
  566. if (CHECK(n)) { \
  567. fname_unrolledc(&(temp_n), (alpha), (x), &(temp_incx), (y), &(temp_incy));\
  568. } else { \
  569. MKL_DIRECT_CALL_INIT_FLAG; \
  570. fname_direct(&(temp_n), (alpha), (x), &(temp_incx), (y), &(temp_incy), &mkl_direct_call_flag); \
  571. } \
  572. } while (0)
  573. #define cblas_daxpy(n,a,x,incx,y,incy) MKL_DC_DAXPY_CBLAS_CONVERT(n, a, x, incx, y, incy, MKL_DC_CBLAS_DAXPY_CHECKSIZE, mkl_dc_daxpy, daxpy_direct)
  574. #define cblas_saxpy(n,a,x,incx,y,incy) MKL_DC_SAXPY_CBLAS_CONVERT(n, a, x, incx, y, incy, MKL_DC_CBLAS_SAXPY_CHECKSIZE, mkl_dc_saxpy, saxpy_direct)
  575. #define cblas_caxpy(n,a,x,incx,y,incy) MKL_DC_COMPLEX_AXPY_CBLAS_CONVERT(n, a, x, incx, y, incy, MKL_DC_CBLAS_CAXPY_CHECKSIZE, mkl_dc_caxpy, caxpy_direct)
  576. #define cblas_zaxpy(n,a,x,incx,y,incy) MKL_DC_COMPLEX_AXPY_CBLAS_CONVERT(n, a, x, incx, y, incy,MKL_DC_CBLAS_ZAXPY_CHECKSIZE,mkl_dc_zaxpy,zaxpy_direct)
  577. /* CBLAS DOT */
  578. #define cblas_ddot(n,x,incx,y,incy) mkl_dc_ddot_convert(&(n), (x), &(incx), (y), &(incy));
  579. #define cblas_sdot(n,x,incx,y,incy) mkl_dc_sdot_convert(&(n), (x), &(incx), (y), &(incy));
  580. /* end of CBLAS interfaces */
  581. /* DGEMM */
  582. #define GEMM_DIRECT_FUNCTION
  583. #define MKL_DC_DGEMM_CONVERT(transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc) do { \
  584. if (MKL_DC_DGEMM_CHECKSIZE(m,n,k)) { \
  585. SMALL_SIZE_DGEMM((transa), (transb), (m), (n), (k), (alpha), (a), (lda), (b), (ldb), (beta), (c), (ldc));\
  586. } else { \
  587. MKL_DIRECT_CALL_INIT_FLAG; \
  588. dgemm_direct((transa), (transb), (m), (n), (k), (alpha), (a), (lda), (b), (ldb), (beta), (c), (ldc), &mkl_direct_call_flag); \
  589. } \
  590. } while (0)
  591. #define dgemm(transa,transb,m,n,k,alpha,a,lda,b,ldb,beta,c,ldc) MKL_DC_DGEMM_CONVERT(transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc)
  592. #define dgemm_(transa,transb,m,n,k,alpha,a,lda,b,ldb,beta,c,ldc) MKL_DC_DGEMM_CONVERT(transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc)
  593. #define DGEMM(transa,transb,m,n,k,alpha,a,lda,b,ldb,beta,c,ldc) MKL_DC_DGEMM_CONVERT(transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc)
  594. /* SGEMM */
  595. #define MKL_DC_SGEMM_CONVERT(transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc) do { \
  596. if (MKL_DC_SGEMM_CHECKSIZE(m,n,k)) { \
  597. SMALL_SIZE_SGEMM((transa), (transb), (m), (n), (k), (alpha), (a), (lda), (b), (ldb), (beta), (c), (ldc));\
  598. } else { \
  599. MKL_DIRECT_CALL_INIT_FLAG; \
  600. sgemm_direct((transa), (transb), (m), (n), (k), (alpha), (a), (lda), (b), (ldb), (beta), (c), (ldc), &mkl_direct_call_flag); \
  601. } \
  602. } while (0)
  603. #define sgemm(transa,transb,m,n,k,alpha,a,lda,b,ldb,beta,c,ldc) MKL_DC_SGEMM_CONVERT(transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc)
  604. #define sgemm_(transa,transb,m,n,k,alpha,a,lda,b,ldb,beta,c,ldc) MKL_DC_SGEMM_CONVERT(transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc)
  605. #define SGEMM(transa,transb,m,n,k,alpha,a,lda,b,ldb,beta,c,ldc) MKL_DC_SGEMM_CONVERT(transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc)
  606. /* CGEMM */
  607. #define MKL_DC_CGEMM_CONVERT(transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc) do { \
  608. if (MKL_DC_CGEMM_CHECKSIZE(m,n,k)) { \
  609. SMALL_SIZE_CGEMM((transa), (transb), (m), (n), (k), (alpha), (a), (lda), (b), (ldb), (beta), (c), (ldc));\
  610. } else { \
  611. MKL_DIRECT_CALL_INIT_FLAG; \
  612. cgemm_direct((transa), (transb), (m), (n), (k), (alpha), (a), (lda), (b), (ldb), (beta), (c), (ldc), &mkl_direct_call_flag); \
  613. } \
  614. } while (0)
  615. #define cgemm(transa,transb,m,n,k,alpha,a,lda,b,ldb,beta,c,ldc) MKL_DC_CGEMM_CONVERT(transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc)
  616. #define cgemm_(transa,transb,m,n,k,alpha,a,lda,b,ldb,beta,c,ldc) MKL_DC_CGEMM_CONVERT(transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc)
  617. #define CGEMM(transa,transb,m,n,k,alpha,a,lda,b,ldb,beta,c,ldc) MKL_DC_CGEMM_CONVERT(transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc)
  618. /* ZGEMM */
  619. #define MKL_DC_ZGEMM_CONVERT(transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc) do { \
  620. if (MKL_DC_ZGEMM_CHECKSIZE(m,n,k)) { \
  621. SMALL_SIZE_ZGEMM((transa), (transb), (m), (n), (k), (alpha), (a), (lda), (b), (ldb), (beta), (c), (ldc));\
  622. } else { \
  623. MKL_DIRECT_CALL_INIT_FLAG; \
  624. zgemm_direct((transa), (transb), (m), (n), (k), (alpha), (a), (lda), (b), (ldb), (beta), (c), (ldc), &mkl_direct_call_flag); \
  625. } \
  626. } while (0)
  627. #define zgemm(transa,transb,m,n,k,alpha,a,lda,b,ldb,beta,c,ldc) MKL_DC_ZGEMM_CONVERT(transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc)
  628. #define zgemm_(transa,transb,m,n,k,alpha,a,lda,b,ldb,beta,c,ldc) MKL_DC_ZGEMM_CONVERT(transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc)
  629. #define ZGEMM(transa,transb,m,n,k,alpha,a,lda,b,ldb,beta,c,ldc) MKL_DC_ZGEMM_CONVERT(transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc)
  630. /* CGEMM3M */
  631. #define MKL_DC_CGEMM3M_CONVERT(transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc) do { \
  632. if (MKL_DC_GEMM3M_CHECKSIZE(m,n,k)) { \
  633. mkl_dc_cgemm((transa), (transb), (m), (n), (k), (alpha), (a), (lda), (b), (ldb), (beta), (c), (ldc));\
  634. } else { \
  635. MKL_DIRECT_CALL_INIT_FLAG; \
  636. cgemm3m_direct((transa), (transb), (m), (n), (k), (alpha), (a), (lda), (b), (ldb), (beta), (c), (ldc), &mkl_direct_call_flag); \
  637. }\
  638. } while (0)
  639. #define cgemm3m(transa,transb,m,n,k,alpha,a,lda,b,ldb,beta,c,ldc) MKL_DC_CGEMM3M_CONVERT(transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc)
  640. #define cgemm3m_(transa,transb,m,n,k,alpha,a,lda,b,ldb,beta,c,ldc) MKL_DC_CGEMM3M_CONVERT(transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc)
  641. #define CGEMM3M(transa,transb,m,n,k,alpha,a,lda,b,ldb,beta,c,ldc) MKL_DC_CGEMM3M_CONVERT(transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc)
  642. /* ZGEMM3M */
  643. #define MKL_DC_ZGEMM3M_CONVERT(transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc) do { \
  644. if (MKL_DC_GEMM3M_CHECKSIZE(m,n,k)) { \
  645. mkl_dc_zgemm((transa), (transb), (m), (n), (k), (alpha), (a), (lda), (b), (ldb), (beta), (c), (ldc));\
  646. } else { \
  647. MKL_DIRECT_CALL_INIT_FLAG; \
  648. zgemm3m_direct((transa), (transb), (m), (n), (k), (alpha), (a), (lda), (b), (ldb), (beta), (c), (ldc), &mkl_direct_call_flag); \
  649. }\
  650. } while (0)
  651. #define zgemm3m(transa,transb,m,n,k,alpha,a,lda,b,ldb,beta,c,ldc) MKL_DC_ZGEMM3M_CONVERT(transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc)
  652. #define zgemm3m_(transa,transb,m,n,k,alpha,a,lda,b,ldb,beta,c,ldc) MKL_DC_ZGEMM3M_CONVERT(transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc)
  653. #define ZGEMM3M(transa,transb,m,n,k,alpha,a,lda,b,ldb,beta,c,ldc) MKL_DC_ZGEMM3M_CONVERT(transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc)
  654. /* ?TRSM_DIRECT */
  655. #define MKL_DC_TRSM_CONVERT(side, uplo, transa, diag, m, n, alpha, a, lda, b, ldb, fname_unrolledc, fname_direct) do { \
  656. if (MKL_DC_TRSM_CHECKSIZE(m,n)) { \
  657. fname_unrolledc((side), (uplo), (transa), (diag), (m), (n), (alpha), (a), (lda), (b), (ldb)); \
  658. } else { \
  659. MKL_DIRECT_CALL_INIT_FLAG; \
  660. fname_direct((side), (uplo), (transa), (diag), (m), (n), (alpha), (a), (lda), (b), (ldb), &mkl_direct_call_flag); \
  661. } \
  662. } while (0)
  663. /* DTRSM_DIRECT */
  664. #define dtrsm(side, uplo, transa, diag, m, n, alpha, a, lda, b, ldb) MKL_DC_TRSM_CONVERT(side, uplo, transa, diag, m, n, alpha, a, lda, b, ldb, mkl_dc_dtrsm, dtrsm_direct)
  665. #define dtrsm_(side, uplo, transa, diag, m, n, alpha, a, lda, b, ldb) MKL_DC_TRSM_CONVERT(side, uplo, transa, diag, m, n, alpha, a, lda, b, ldb, mkl_dc_dtrsm, dtrsm_direct)
  666. #define DTRSM(side, uplo, transa, diag, m, n, alpha, a, lda, b, ldb) MKL_DC_TRSM_CONVERT(side, uplo, transa, diag, m, n, alpha, a, lda, b, ldb, mkl_dc_dtrsm, dtrsm_direct)
  667. /* STRSM_DIRECT */
  668. #define strsm(side, uplo, transa, diag, m, n, alpha, a, lda, b, ldb) MKL_DC_TRSM_CONVERT(side, uplo, transa, diag, m, n, alpha, a, lda, b, ldb, mkl_dc_strsm, strsm_direct)
  669. #define strsm_(side, uplo, transa, diag, m, n, alpha, a, lda, b, ldb) MKL_DC_TRSM_CONVERT(side, uplo, transa, diag, m, n, alpha, a, lda, b, ldb, mkl_dc_strsm, strsm_direct)
  670. #define STRSM(side, uplo, transa, diag, m, n, alpha, a, lda, b, ldb) MKL_DC_TRSM_CONVERT(side, uplo, transa, diag, m, n, alpha, a, lda, b, ldb, mkl_dc_strsm, strsm_direct)
  671. /* CTRSM_DIRECT */
  672. #define ctrsm(side, uplo, transa, diag, m, n, alpha, a, lda, b, ldb) MKL_DC_TRSM_CONVERT(side, uplo, transa, diag, m, n, alpha, a, lda, b, ldb, mkl_dc_ctrsm, ctrsm_direct)
  673. #define ctrsm_(side, uplo, transa, diag, m, n, alpha, a, lda, b, ldb) MKL_DC_TRSM_CONVERT(side, uplo, transa, diag, m, n, alpha, a, lda, b, ldb, mkl_dc_ctrsm, ctrsm_direct)
  674. #define CTRSM(side, uplo, transa, diag, m, n, alpha, a, lda, b, ldb) MKL_DC_TRSM_CONVERT(side, uplo, transa, diag, m, n, alpha, a, lda, b, ldb, mkl_dc_ctrsm, ctrsm_direct)
  675. /* ZTRSM_DIRECT */
  676. #define ztrsm(side, uplo, transa, diag, m, n, alpha, a, lda, b, ldb) MKL_DC_TRSM_CONVERT(side, uplo, transa, diag, m, n, alpha, a, lda, b, ldb, mkl_dc_ztrsm, ztrsm_direct)
  677. #define ztrsm_(side, uplo, transa, diag, m, n, alpha, a, lda, b, ldb) MKL_DC_TRSM_CONVERT(side, uplo, transa, diag, m, n, alpha, a, lda, b, ldb, mkl_dc_ztrsm, ztrsm_direct)
  678. #define ZTRSM(side, uplo, transa, diag, m, n, alpha, a, lda, b, ldb) MKL_DC_TRSM_CONVERT(side, uplo, transa, diag, m, n, alpha, a, lda, b, ldb, mkl_dc_ztrsm, ztrsm_direct)
  679. /* ?SYRK_DIRECT */
  680. /* DSYRK_DIRECT */
  681. #define MKL_DC_DSYRK_CONVERT(uplo, trans, n, k, alpha, a, lda, beta, c, ldc) do { \
  682. if (MKL_DC_SYRK_CHECKSIZE(n,k)) { \
  683. mkl_dc_dsyrk((uplo), (trans), (n), (k), (alpha), (a), (lda), (beta), (c), (ldc));\
  684. } else { \
  685. MKL_DIRECT_CALL_INIT_FLAG; \
  686. dsyrk_direct((uplo), (trans), (n), (k), (alpha), (a), (lda), (beta), (c), (ldc), &mkl_direct_call_flag); \
  687. } \
  688. } while (0)
  689. #define dsyrk(uplo, trans, n, k, alpha, a, lda, beta, c, ldc) MKL_DC_DSYRK_CONVERT(uplo, trans, n, k, alpha, a, lda, beta, c, ldc)
  690. #define dsyrk_(uplo, trans, n, k, alpha, a, lda, beta, c, ldc) MKL_DC_DSYRK_CONVERT(uplo, trans, n, k, alpha, a, lda, beta, c, ldc)
  691. #define DSYRK(uplo, trans, n, k, alpha, a, lda, beta, c, ldc) MKL_DC_DSYRK_CONVERT(uplo, trans, n, k, alpha, a, lda, beta, c, ldc)
  692. /* SSYRK_DIRECT */
  693. #define MKL_DC_SSYRK_CONVERT(uplo, trans, n, k, alpha, a, lda, beta, c, ldc) do { \
  694. if (MKL_DC_SYRK_CHECKSIZE(n,k)) { \
  695. mkl_dc_ssyrk((uplo), (trans), (n), (k), (alpha), (a), (lda), (beta), (c), (ldc));\
  696. } else { \
  697. MKL_DIRECT_CALL_INIT_FLAG; \
  698. ssyrk_direct((uplo), (trans), (n), (k), (alpha), (a), (lda), (beta), (c), (ldc), &mkl_direct_call_flag); \
  699. } \
  700. } while (0)
  701. #define ssyrk(uplo, trans, n, k, alpha, a, lda, beta, c, ldc) MKL_DC_SSYRK_CONVERT(uplo, trans, n, k, alpha, a, lda, beta, c, ldc)
  702. #define ssyrk_(uplo, trans, n, k, alpha, a, lda, beta, c, ldc) MKL_DC_SSYRK_CONVERT(uplo, trans, n, k, alpha, a, lda, beta, c, ldc)
  703. #define SSYRK(uplo, trans, n, k, alpha, a, lda, beta, c, ldc) MKL_DC_SSYRK_CONVERT(uplo, trans, n, k, alpha, a, lda, beta, c, ldc)
  704. /* ZSYRK_DIRECT */
  705. #define MKL_DC_ZSYRK_CONVERT(uplo, trans, n, k, alpha, a, lda, beta, c, ldc) do { \
  706. if (MKL_DC_SYRK_CHECKSIZE(n,k)) { \
  707. mkl_dc_zsyrk((uplo), (trans), (n), (k), (alpha), (a), (lda), (beta), (c), (ldc));\
  708. } else { \
  709. MKL_DIRECT_CALL_INIT_FLAG; \
  710. zsyrk_direct((uplo), (trans), (n), (k), (alpha), (a), (lda), (beta), (c), (ldc), &mkl_direct_call_flag); \
  711. } \
  712. } while (0)
  713. #define zsyrk(uplo, trans, n, k, alpha, a, lda, beta, c, ldc) MKL_DC_ZSYRK_CONVERT(uplo, trans, n, k, alpha, a, lda, beta, c, ldc)
  714. #define zsyrk_(uplo, trans, n, k, alpha, a, lda, beta, c, ldc) MKL_DC_ZSYRK_CONVERT(uplo, trans, n, k, alpha, a, lda, beta, c, ldc)
  715. #define ZSYRK(uplo, trans, n, k, alpha, a, lda, beta, c, ldc) MKL_DC_ZSYRK_CONVERT(uplo, trans, n, k, alpha, a, lda, beta, c, ldc)
  716. /* CSYRK_DIRECT */
  717. #define MKL_DC_CSYRK_CONVERT(uplo, trans, n, k, alpha, a, lda, beta, c, ldc) do { \
  718. if (MKL_DC_SYRK_CHECKSIZE(n,k)) { \
  719. mkl_dc_csyrk((uplo), (trans), (n), (k), (alpha), (a), (lda), (beta), (c), (ldc));\
  720. } else { \
  721. MKL_DIRECT_CALL_INIT_FLAG; \
  722. csyrk_direct((uplo), (trans), (n), (k), (alpha), (a), (lda), (beta), (c), (ldc), &mkl_direct_call_flag); \
  723. } \
  724. } while (0)
  725. #define csyrk(uplo, trans, n, k, alpha, a, lda, beta, c, ldc) MKL_DC_CSYRK_CONVERT(uplo, trans, n, k, alpha, a, lda, beta, c, ldc)
  726. #define csyrk_(uplo, trans, n, k, alpha, a, lda, beta, c, ldc) MKL_DC_CSYRK_CONVERT(uplo, trans, n, k, alpha, a, lda, beta, c, ldc)
  727. #define CSYRK(uplo, trans, n, k, alpha, a, lda, beta, c, ldc) MKL_DC_CSYRK_CONVERT(uplo, trans, n, k, alpha, a, lda, beta, c, ldc)
  728. /* ?AXPY_DIRECT */
  729. #define MKL_DC_AXPY_CONVERT(n, alpha, x, incx, y, incy, CHECK, fname_unrolledc, fname_direct) do { \
  730. if (CHECK(n)) { \
  731. fname_unrolledc((n), (alpha), (x), (incx), (y), (incy));\
  732. } else { \
  733. MKL_DIRECT_CALL_INIT_FLAG; \
  734. fname_direct((n), (alpha), (x), (incx), (y), (incy), &mkl_direct_call_flag); \
  735. } \
  736. } while (0)
  737. #define daxpy(n,alpha,x,incx,y,incy) MKL_DC_AXPY_CONVERT(n, alpha, x, incx, y, incy, MKL_DC_DAXPY_CHECKSIZE, mkl_dc_daxpy, daxpy_direct)
  738. #define daxpy_(n,alpha,x,incx,y,incy) MKL_DC_AXPY_CONVERT(n, alpha, x, incx, y, incy, MKL_DC_DAXPY_CHECKSIZE, mkl_dc_daxpy, daxpy_direct)
  739. #define DAXPY(n,alpha,x,incx,y,incy) MKL_DC_AXPY_CONVERT(n, alpha, x, incx, y, incy, MKL_DC_DAXPY_CHECKSIZE, mkl_dc_daxpy, daxpy_direct)
  740. #define saxpy(n,alpha,x,incx,y,incy) MKL_DC_AXPY_CONVERT(n, alpha, x, incx, y, incy, MKL_DC_SAXPY_CHECKSIZE, mkl_dc_saxpy, saxpy_direct)
  741. #define saxpy_(n,alpha,x,incx,y,incy) MKL_DC_AXPY_CONVERT(n, alpha, x, incx, y, incy, MKL_DC_SAXPY_CHECKSIZE, mkl_dc_saxpy, saxpy_direct)
  742. #define SAXPY(n,alpha,x,incx,y,incy) MKL_DC_AXPY_CONVERT(n, alpha, x, incx, y, incy, MKL_DC_SAXPY_CHECKSIZE, mkl_dc_saxpy, saxpy_direct)
  743. #define caxpy(n,alpha,x,incx,y,incy) MKL_DC_AXPY_CONVERT(n, alpha, x, incx, y, incy, MKL_DC_CAXPY_CHECKSIZE, mkl_dc_caxpy, caxpy_direct)
  744. #define caxpy_(n,alpha,x,incx,y,incy) MKL_DC_AXPY_CONVERT(n, alpha, x, incx, y, incy, MKL_DC_CAXPY_CHECKSIZE, mkl_dc_caxpy, caxpy_direct)
  745. #define CAXPY(n,alpha,x,incx,y,incy) MKL_DC_AXPY_CONVERT(n, alpha, x, incx, y, incy, MKL_DC_CAXPY_CHECKSIZE, mkl_dc_caxpy, caxpy_direct)
  746. #define zaxpy(n,alpha,x,incx,y,incy) MKL_DC_AXPY_CONVERT(n, alpha, x, incx, y, incy, MKL_DC_ZAXPY_CHECKSIZE, mkl_dc_zaxpy, zaxpy_direct)
  747. #define zaxpy_(n,alpha,x,incx,y,incy) MKL_DC_AXPY_CONVERT(n, alpha, x, incx, y, incy, MKL_DC_ZAXPY_CHECKSIZE, mkl_dc_zaxpy, zaxpy_direct)
  748. #define ZAXPY(n,alpha,x,incx,y,incy) MKL_DC_AXPY_CONVERT(n, alpha, x, incx, y, incy, MKL_DC_ZAXPY_CHECKSIZE, mkl_dc_zaxpy, zaxpy_direct)
  749. /* {S,D}DOT_DIRECT */
  750. static __inline double mkl_dc_ddot_convert(const MKL_INT *n, const double* x, const MKL_INT *incx, const double *y, const MKL_INT *incy) {
  751. double ret = 0.0;
  752. if (MKL_DC_DDOT_CHECKSIZE(n)) {
  753. ret = mkl_dc_ddot((n), (x), (incx), (y), (incy));
  754. } else {
  755. ret = ddot_direct((n), (x), (incx), (y), (incy));
  756. }
  757. return ret;
  758. }
  759. static __inline float mkl_dc_sdot_convert(const MKL_INT *n, const float* x, const MKL_INT *incx, const float *y, const MKL_INT *incy) {
  760. float ret = 0.0;
  761. if (MKL_DC_SDOT_CHECKSIZE(n)) {
  762. ret = mkl_dc_sdot((n), (x), (incx), (y), (incy));
  763. } else {
  764. ret = sdot_direct((n), (x), (incx), (y), (incy));
  765. }
  766. return ret;
  767. }
  768. #define ddot mkl_dc_ddot_convert
  769. #define ddot_ mkl_dc_ddot_convert
  770. #define DDOT mkl_dc_ddot_convert
  771. #define sdot mkl_dc_sdot_convert
  772. #define sdot_ mkl_dc_sdot_convert
  773. #define SDOT mkl_dc_sdot_convert
  774. /* LAPACK */
  775. #if (MKL_DC_USE_C == 1)
  776. /* GETRF */
  777. #define MKL_DC_GETRF_CONVERT(m, n, a, lda, ipiv, info, prec) do { \
  778. const MKL_INT temp_m = *(m); \
  779. const MKL_INT temp_n = *(n); \
  780. const MKL_INT temp_lda = *(lda); \
  781. if (MKL_DC_GETRF_CHECKSIZE(temp_m, temp_n)) { \
  782. mkl_dc_ ## prec ## getrf(temp_m, temp_n, (a), temp_lda, (ipiv), (info)); \
  783. } else { \
  784. prec ## getrf((m), (n), (a), (lda), (ipiv), (info)); \
  785. } \
  786. } while (0)
  787. #define dgetrf(m, n, a, lda, ipiv, info) MKL_DC_GETRF_CONVERT(m, n, a, lda, ipiv, info, d)
  788. #define dgetrf_(m, n, a, lda, ipiv, info) MKL_DC_GETRF_CONVERT(m, n, a, lda, ipiv, info, d)
  789. #define DGETRF(m, n, a, lda, ipiv, info) MKL_DC_GETRF_CONVERT(m, n, a, lda, ipiv, info, d)
  790. #define sgetrf(m, n, a, lda, ipiv, info) MKL_DC_GETRF_CONVERT(m, n, a, lda, ipiv, info, s)
  791. #define sgetrf_(m, n, a, lda, ipiv, info) MKL_DC_GETRF_CONVERT(m, n, a, lda, ipiv, info, s)
  792. #define SGETRF(m, n, a, lda, ipiv, info) MKL_DC_GETRF_CONVERT(m, n, a, lda, ipiv, info, s)
  793. #define cgetrf(m, n, a, lda, ipiv, info) MKL_DC_GETRF_CONVERT(m, n, a, lda, ipiv, info, c)
  794. #define cgetrf_(m, n, a, lda, ipiv, info) MKL_DC_GETRF_CONVERT(m, n, a, lda, ipiv, info, c)
  795. #define CGETRF(m, n, a, lda, ipiv, info) MKL_DC_GETRF_CONVERT(m, n, a, lda, ipiv, info, c)
  796. #define zgetrf(m, n, a, lda, ipiv, info) MKL_DC_GETRF_CONVERT(m, n, a, lda, ipiv, info, z)
  797. #define zgetrf_(m, n, a, lda, ipiv, info) MKL_DC_GETRF_CONVERT(m, n, a, lda, ipiv, info, z)
  798. #define ZGETRF(m, n, a, lda, ipiv, info) MKL_DC_GETRF_CONVERT(m, n, a, lda, ipiv, info, z)
  799. /* LAPACKE_?getrf */
  800. #define LAPACKE_dgetrf(matrix_layout, m, n, a, lda, ipiv) mkl_dc_lapacke_dgetrf_convert(matrix_layout, m, n, a, lda, ipiv)
  801. #define LAPACKE_sgetrf(matrix_layout, m, n, a, lda, ipiv) mkl_dc_lapacke_sgetrf_convert(matrix_layout, m, n, a, lda, ipiv)
  802. #define LAPACKE_cgetrf(matrix_layout, m, n, a, lda, ipiv) mkl_dc_lapacke_cgetrf_convert(matrix_layout, m, n, a, lda, ipiv)
  803. #define LAPACKE_zgetrf(matrix_layout, m, n, a, lda, ipiv) mkl_dc_lapacke_zgetrf_convert(matrix_layout, m, n, a, lda, ipiv)
  804. #define LAPACKE_dgetrf_work(matrix_layout, m, n, a, lda, ipiv) mkl_dc_lapacke_dgetrf_convert(matrix_layout, m, n, a, lda, ipiv)
  805. #define LAPACKE_sgetrf_work(matrix_layout, m, n, a, lda, ipiv) mkl_dc_lapacke_sgetrf_convert(matrix_layout, m, n, a, lda, ipiv)
  806. #define LAPACKE_cgetrf_work(matrix_layout, m, n, a, lda, ipiv) mkl_dc_lapacke_cgetrf_convert(matrix_layout, m, n, a, lda, ipiv)
  807. #define LAPACKE_zgetrf_work(matrix_layout, m, n, a, lda, ipiv) mkl_dc_lapacke_zgetrf_convert(matrix_layout, m, n, a, lda, ipiv)
  808. /* GETRFNP */
  809. #define MKL_DC_GETRFNP_CONVERT(m, n, a, lda, info, prec) do { \
  810. const MKL_INT temp_m = *(m); \
  811. const MKL_INT temp_n = *(n); \
  812. const MKL_INT temp_lda = *(lda); \
  813. if (MKL_DC_GETRFNP_CHECKSIZE(temp_m, temp_n)) { \
  814. mkl_dc_ ## prec ## getrfnp(temp_m, temp_n, (a), temp_lda, (info)); \
  815. } else { \
  816. mkl_ ##prec ## getrfnp((m), (n), (a), (lda), (info)); \
  817. } \
  818. } while (0)
  819. #define mkl_dgetrfnp(m, n, a, lda, info) MKL_DC_GETRFNP_CONVERT(m, n, a, lda, info, d)
  820. #define mkl_dgetrfnp_(m, n, a, lda, info) MKL_DC_GETRFNP_CONVERT(m, n, a, lda, info, d)
  821. #define MKL_DGETRFNP(m, n, a, lda, info) MKL_DC_GETRFNP_CONVERT(m, n, a, lda, info, d)
  822. #define mkl_sgetrfnp(m, n, a, lda, info) MKL_DC_GETRFNP_CONVERT(m, n, a, lda, info, s)
  823. #define mkl_sgetrfnp_(m, n, a, lda, info) MKL_DC_GETRFNP_CONVERT(m, n, a, lda, info, s)
  824. #define MKL_SGETRFNP(m, n, a, lda, info) MKL_DC_GETRFNP_CONVERT(m, n, a, lda, info, s)
  825. #define mkl_cgetrfnp(m, n, a, lda, info) MKL_DC_GETRFNP_CONVERT(m, n, a, lda, info, c)
  826. #define mkl_cgetrfnp_(m, n, a, lda, info) MKL_DC_GETRFNP_CONVERT(m, n, a, lda, info, c)
  827. #define MKL_CGETRFNP(m, n, a, lda, info) MKL_DC_GETRFNP_CONVERT(m, n, a, lda, info, c)
  828. #define mkl_zgetrfnp(m, n, a, lda, info) MKL_DC_GETRFNP_CONVERT(m, n, a, lda, info, z)
  829. #define mkl_zgetrfnp_(m, n, a, lda, info) MKL_DC_GETRFNP_CONVERT(m, n, a, lda, info, z)
  830. #define MKL_ZGETRFNP(m, n, a, lda, info) MKL_DC_GETRFNP_CONVERT(m, n, a, lda, info, z)
  831. /* LAPACKE_mkl_?getrfnp */
  832. #define LAPACKE_mkl_dgetrfnp(matrix_layout, m, n, a, lda) mkl_dc_lapacke_dgetrfnp_convert(matrix_layout, m, n, a, lda)
  833. #define LAPACKE_mkl_sgetrfnp(matrix_layout, m, n, a, lda) mkl_dc_lapacke_sgetrfnp_convert(matrix_layout, m, n, a, lda)
  834. #define LAPACKE_mkl_cgetrfnp(matrix_layout, m, n, a, lda) mkl_dc_lapacke_cgetrfnp_convert(matrix_layout, m, n, a, lda)
  835. #define LAPACKE_mkl_zgetrfnp(matrix_layout, m, n, a, lda) mkl_dc_lapacke_zgetrfnp_convert(matrix_layout, m, n, a, lda)
  836. #define LAPACKE_mkl_dgetrfnp_work(matrix_layout, m, n, a, lda) mkl_dc_lapacke_dgetrfnp_convert(matrix_layout, m, n, a, lda)
  837. #define LAPACKE_mkl_sgetrfnp_work(matrix_layout, m, n, a, lda) mkl_dc_lapacke_sgetrfnp_convert(matrix_layout, m, n, a, lda)
  838. #define LAPACKE_mkl_cgetrfnp_work(matrix_layout, m, n, a, lda) mkl_dc_lapacke_cgetrfnp_convert(matrix_layout, m, n, a, lda)
  839. #define LAPACKE_mkl_zgetrfnp_work(matrix_layout, m, n, a, lda) mkl_dc_lapacke_zgetrfnp_convert(matrix_layout, m, n, a, lda)
  840. /* GETRS */
  841. #define MKL_DC_GETRS_CONVERT(trans, n, nrhs, a, lda, ipiv, b, ldb, info, prec) do { \
  842. const MKL_INT temp_n = *(n); \
  843. const MKL_INT temp_nrhs = *(nrhs); \
  844. if (MKL_DC_GETRS_CHECKSIZE(temp_n, temp_nrhs)) { \
  845. const char temp_trans = *(trans); \
  846. const MKL_INT temp_lda = *(lda); \
  847. const MKL_INT temp_ldb = *(ldb); \
  848. mkl_dc_ ## prec ## getrs(temp_trans, temp_n, temp_nrhs, (a), temp_lda, (ipiv), (b), temp_ldb, (info)); \
  849. } else { \
  850. prec ## getrs((trans), (n), (nrhs), (a), (lda), (ipiv), (b), (ldb), (info)); \
  851. } \
  852. } while (0)
  853. #define dgetrs(trans, n, nrhs, a, lda, ipiv, b, ldb, info) MKL_DC_GETRS_CONVERT(trans, n, nrhs, a, lda, ipiv, b, ldb, info, d)
  854. #define dgetrs_(trans, n, nrhs, a, lda, ipiv, b, ldb, info) MKL_DC_GETRS_CONVERT(trans, n, nrhs, a, lda, ipiv, b, ldb, info, d)
  855. #define DGETRS(trans, n, nrhs, a, lda, ipiv, b, ldb, info) MKL_DC_GETRS_CONVERT(trans, n, nrhs, a, lda, ipiv, b, ldb, info, d)
  856. #define sgetrs(trans, n, nrhs, a, lda, ipiv, b, ldb, info) MKL_DC_GETRS_CONVERT(trans, n, nrhs, a, lda, ipiv, b, ldb, info, s)
  857. #define sgetrs_(trans, n, nrhs, a, lda, ipiv, b, ldb, info) MKL_DC_GETRS_CONVERT(trans, n, nrhs, a, lda, ipiv, b, ldb, info, s)
  858. #define SGETRS(trans, n, nrhs, a, lda, ipiv, b, ldb, info) MKL_DC_GETRS_CONVERT(trans, n, nrhs, a, lda, ipiv, b, ldb, info, s)
  859. #define cgetrs(trans, n, nrhs, a, lda, ipiv, b, ldb, info) MKL_DC_GETRS_CONVERT(trans, n, nrhs, a, lda, ipiv, b, ldb, info, c)
  860. #define cgetrs_(trans, n, nrhs, a, lda, ipiv, b, ldb, info) MKL_DC_GETRS_CONVERT(trans, n, nrhs, a, lda, ipiv, b, ldb, info, c)
  861. #define CGETRS(trans, n, nrhs, a, lda, ipiv, b, ldb, info) MKL_DC_GETRS_CONVERT(trans, n, nrhs, a, lda, ipiv, b, ldb, info, c)
  862. #define zgetrs(trans, n, nrhs, a, lda, ipiv, b, ldb, info) MKL_DC_GETRS_CONVERT(trans, n, nrhs, a, lda, ipiv, b, ldb, info, z)
  863. #define zgetrs_(trans, n, nrhs, a, lda, ipiv, b, ldb, info) MKL_DC_GETRS_CONVERT(trans, n, nrhs, a, lda, ipiv, b, ldb, info, z)
  864. #define ZGETRS(trans, n, nrhs, a, lda, ipiv, b, ldb, info) MKL_DC_GETRS_CONVERT(trans, n, nrhs, a, lda, ipiv, b, ldb, info, z)
  865. /* LAPACKE_?getrs */
  866. #define LAPACKE_dgetrs(matrix_layout, trans, n, nrhs, a, lda, ipiv, b, ldb) mkl_dc_lapacke_dgetrs_convert(matrix_layout, trans, n, nrhs, a, lda, ipiv, b, ldb)
  867. #define LAPACKE_sgetrs(matrix_layout, trans, n, nrhs, a, lda, ipiv, b, ldb) mkl_dc_lapacke_sgetrs_convert(matrix_layout, trans, n, nrhs, a, lda, ipiv, b, ldb)
  868. #define LAPACKE_cgetrs(matrix_layout, trans, n, nrhs, a, lda, ipiv, b, ldb) mkl_dc_lapacke_cgetrs_convert(matrix_layout, trans, n, nrhs, a, lda, ipiv, b, ldb)
  869. #define LAPACKE_zgetrs(matrix_layout, trans, n, nrhs, a, lda, ipiv, b, ldb) mkl_dc_lapacke_zgetrs_convert(matrix_layout, trans, n, nrhs, a, lda, ipiv, b, ldb)
  870. #define LAPACKE_dgetrs_work(matrix_layout, trans, n, nrhs, a, lda, ipiv, b, ldb) mkl_dc_lapacke_dgetrs_convert(matrix_layout, trans, n, nrhs, a, lda, ipiv, b, ldb)
  871. #define LAPACKE_sgetrs_work(matrix_layout, trans, n, nrhs, a, lda, ipiv, b, ldb) mkl_dc_lapacke_sgetrs_convert(matrix_layout, trans, n, nrhs, a, lda, ipiv, b, ldb)
  872. #define LAPACKE_cgetrs_work(matrix_layout, trans, n, nrhs, a, lda, ipiv, b, ldb) mkl_dc_lapacke_cgetrs_convert(matrix_layout, trans, n, nrhs, a, lda, ipiv, b, ldb)
  873. #define LAPACKE_zgetrs_work(matrix_layout, trans, n, nrhs, a, lda, ipiv, b, ldb) mkl_dc_lapacke_zgetrs_convert(matrix_layout, trans, n, nrhs, a, lda, ipiv, b, ldb)
  874. /* ?GETRI */
  875. #define MKL_DC_GETRI_CONVERT(n, a, lda, ipiv, work, lwork, info, prec) do { \
  876. const MKL_INT temp_n = *(n); \
  877. const MKL_INT temp_lda = *(lda); \
  878. const MKL_INT temp_lwork = *(lwork); \
  879. if (MKL_DC_GETRI_CHECKSIZE(temp_n)) { \
  880. mkl_dc_ ## prec ## getri(temp_n, (a), temp_lda, (ipiv), (work), temp_lwork, (info)); \
  881. } else { \
  882. prec ## getri((n), (a), (lda), (ipiv), (work), (lwork), (info)); \
  883. } \
  884. } while (0)
  885. #define dgetri(n, a, lda, ipiv, work, lwork, info) MKL_DC_GETRI_CONVERT(n, a, lda, ipiv, work, lwork, info, d)
  886. #define sgetri(n, a, lda, ipiv, work, lwork, info) MKL_DC_GETRI_CONVERT(n, a, lda, ipiv, work, lwork, info, s)
  887. #define cgetri(n, a, lda, ipiv, work, lwork, info) MKL_DC_GETRI_CONVERT(n, a, lda, ipiv, work, lwork, info, c)
  888. #define zgetri(n, a, lda, ipiv, work, lwork, info) MKL_DC_GETRI_CONVERT(n, a, lda, ipiv, work, lwork, info, z)
  889. #define LAPACKE_dgetri(matrix_layout, n, a, lda, ipiv) mkl_dc_lapacke_dgetri_convert(matrix_layout, n, a, lda, ipiv, NULL, 0)
  890. #define LAPACKE_sgetri(matrix_layout, n, a, lda, ipiv) mkl_dc_lapacke_sgetri_convert(matrix_layout, n, a, lda, ipiv, NULL, 0)
  891. #define LAPACKE_cgetri(matrix_layout, n, a, lda, ipiv) mkl_dc_lapacke_cgetri_convert(matrix_layout, n, a, lda, ipiv, NULL, 0)
  892. #define LAPACKE_zgetri(matrix_layout, n, a, lda, ipiv) mkl_dc_lapacke_zgetri_convert(matrix_layout, n, a, lda, ipiv, NULL, 0)
  893. #define LAPACKE_dgetri_work(matrix_layout, n, a, lda, ipiv, work, lwork) mkl_dc_lapacke_dgetri_convert(matrix_layout, n, a, lda, ipiv, work, lwork)
  894. #define LAPACKE_sgetri_work(matrix_layout, n, a, lda, ipiv, work, lwork) mkl_dc_lapacke_sgetri_convert(matrix_layout, n, a, lda, ipiv, work, lwork)
  895. #define LAPACKE_cgetri_work(matrix_layout, n, a, lda, ipiv, work, lwork) mkl_dc_lapacke_cgetri_convert(matrix_layout, n, a, lda, ipiv, work, lwork)
  896. #define LAPACKE_zgetri_work(matrix_layout, n, a, lda, ipiv, work, lwork) mkl_dc_lapacke_zgetri_convert(matrix_layout, n, a, lda, ipiv, work, lwork)
  897. /* ?GEQRF */
  898. #define MKL_DC_GEQRF_CONVERT(m, n, a, lda, tau, work, lwork, info, prec) do { \
  899. const MKL_INT temp_m = *(m); \
  900. const MKL_INT temp_n = *(n); \
  901. const MKL_INT temp_lda = *(lda); \
  902. const MKL_INT temp_lwork = *(lwork); \
  903. if (MKL_DC_GEQRF_CHECKSIZE(temp_m, temp_n)) { \
  904. mkl_dc_ ## prec ## geqrf(temp_m, temp_n, (a), temp_lda, (tau), (work), temp_lwork, (info)); \
  905. } else { \
  906. prec ## geqrf((m), (n), (a), (lda), (tau), (work), (lwork), (info)); \
  907. } \
  908. } while (0)
  909. #define dgeqrf(m, n, a, lda, tau, work, lwork, info) MKL_DC_GEQRF_CONVERT(m, n, a, lda, tau, work, lwork, info, d)
  910. #define sgeqrf(m, n, a, lda, tau, work, lwork, info) MKL_DC_GEQRF_CONVERT(m, n, a, lda, tau, work, lwork, info, s)
  911. #define cgeqrf(m, n, a, lda, tau, work, lwork, info) MKL_DC_GEQRF_CONVERT(m, n, a, lda, tau, work, lwork, info, c)
  912. #define zgeqrf(m, n, a, lda, tau, work, lwork, info) MKL_DC_GEQRF_CONVERT(m, n, a, lda, tau, work, lwork, info, z)
  913. #define LAPACKE_dgeqrf(matrix_layout, m, n, a, lda, tau) mkl_dc_lapacke_dgeqrf_convert(matrix_layout, m, n, a, lda, tau, NULL, 0)
  914. #define LAPACKE_sgeqrf(matrix_layout, m, n, a, lda, tau) mkl_dc_lapacke_sgeqrf_convert(matrix_layout, m, n, a, lda, tau, NULL, 0)
  915. #define LAPACKE_cgeqrf(matrix_layout, m, n, a, lda, tau) mkl_dc_lapacke_cgeqrf_convert(matrix_layout, m, n, a, lda, tau, NULL, 0)
  916. #define LAPACKE_zgeqrf(matrix_layout, m, n, a, lda, tau) mkl_dc_lapacke_zgeqrf_convert(matrix_layout, m, n, a, lda, tau, NULL, 0)
  917. #define LAPACKE_dgeqrf_work(matrix_layout, m, n, a, lda, tau, work, lwork) mkl_dc_lapacke_dgeqrf_convert(matrix_layout, m, n, a, lda, tau, work, lwork)
  918. #define LAPACKE_sgeqrf_work(matrix_layout, m, n, a, lda, tau, work, lwork) mkl_dc_lapacke_sgeqrf_convert(matrix_layout, m, n, a, lda, tau, work, lwork)
  919. #define LAPACKE_cgeqrf_work(matrix_layout, m, n, a, lda, tau, work, lwork) mkl_dc_lapacke_cgeqrf_convert(matrix_layout, m, n, a, lda, tau, work, lwork)
  920. #define LAPACKE_zgeqrf_work(matrix_layout, m, n, a, lda, tau, work, lwork) mkl_dc_lapacke_zgeqrf_convert(matrix_layout, m, n, a, lda, tau, work, lwork)
  921. /* POTRF */
  922. #if (MKL_DC_POTRF_DISABLE == 0)
  923. #define MKL_DC_POTRF_CONVERT(uplo, n, a, lda, info, prec) do { \
  924. const MKL_INT temp_n = *(n); \
  925. if (MKL_DC_POTRF_CHECKSIZE(temp_n)) { \
  926. const char temp_uplo = *(uplo); \
  927. const MKL_INT temp_lda = *(lda); \
  928. mkl_dc_ ## prec ## potrf(temp_uplo, temp_n, (a), temp_lda, (info)); \
  929. } else { \
  930. prec ## potrf((uplo), (n), (a), (lda), (info)); \
  931. } \
  932. } while (0)
  933. #define dpotrf(uplo, n, a, lda, info) MKL_DC_POTRF_CONVERT(uplo, n, a, lda, info, d)
  934. #define dpotrf_(uplo, n, a, lda, info) MKL_DC_POTRF_CONVERT(uplo, n, a, lda, info, d)
  935. #define DPOTRF(uplo, n, a, lda, info) MKL_DC_POTRF_CONVERT(uplo, n, a, lda, info, d)
  936. #define spotrf(uplo, n, a, lda, info) MKL_DC_POTRF_CONVERT(uplo, n, a, lda, info, s)
  937. #define spotrf_(uplo, n, a, lda, info) MKL_DC_POTRF_CONVERT(uplo, n, a, lda, info, s)
  938. #define SPOTRF(uplo, n, a, lda, info) MKL_DC_POTRF_CONVERT(uplo, n, a, lda, info, s)
  939. #define cpotrf(uplo, n, a, lda, info) MKL_DC_POTRF_CONVERT(uplo, n, a, lda, info, c)
  940. #define cpotrf_(uplo, n, a, lda, info) MKL_DC_POTRF_CONVERT(uplo, n, a, lda, info, c)
  941. #define CPOTRF(uplo, n, a, lda, info) MKL_DC_POTRF_CONVERT(uplo, n, a, lda, info, c)
  942. #define zpotrf(uplo, n, a, lda, info) MKL_DC_POTRF_CONVERT(uplo, n, a, lda, info, z)
  943. #define zpotrf_(uplo, n, a, lda, info) MKL_DC_POTRF_CONVERT(uplo, n, a, lda, info, z)
  944. #define ZPOTRF(uplo, n, a, lda, info) MKL_DC_POTRF_CONVERT(uplo, n, a, lda, info, z)
  945. /* LAPACKE_?potrf */
  946. #define LAPACKE_dpotrf(matrix_layout, uplo, n, a, lda) mkl_dc_lapacke_dpotrf_convert(matrix_layout, uplo, n, a, lda)
  947. #define LAPACKE_spotrf(matrix_layout, uplo, n, a, lda) mkl_dc_lapacke_spotrf_convert(matrix_layout, uplo, n, a, lda)
  948. #define LAPACKE_cpotrf(matrix_layout, uplo, n, a, lda) mkl_dc_lapacke_cpotrf_convert(matrix_layout, uplo, n, a, lda)
  949. #define LAPACKE_zpotrf(matrix_layout, uplo, n, a, lda) mkl_dc_lapacke_zpotrf_convert(matrix_layout, uplo, n, a, lda)
  950. #define LAPACKE_dpotrf_work(matrix_layout, uplo, n, a, lda) mkl_dc_lapacke_dpotrf_convert(matrix_layout, uplo, n, a, lda)
  951. #define LAPACKE_spotrf_work(matrix_layout, uplo, n, a, lda) mkl_dc_lapacke_spotrf_convert(matrix_layout, uplo, n, a, lda)
  952. #define LAPACKE_cpotrf_work(matrix_layout, uplo, n, a, lda) mkl_dc_lapacke_cpotrf_convert(matrix_layout, uplo, n, a, lda)
  953. #define LAPACKE_zpotrf_work(matrix_layout, uplo, n, a, lda) mkl_dc_lapacke_zpotrf_convert(matrix_layout, uplo, n, a, lda)
  954. #endif /* MKL_DC_POTRF_DISABLE */
  955. #endif /* MKL_DC_USE_C */
  956. #ifdef __cplusplus
  957. }
  958. #endif
  959. #endif /* #ifdef MKL_DIRECT_CALL */
  960. #endif /* _MKL_DIRECT_CALL_H */