fftw3_omp_offload.h 47 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839
  1. /*******************************************************************************
  2. * Copyright 2020-2022 Intel Corporation.
  3. *
  4. * This software and the related documents are Intel copyrighted materials, and
  5. * your use of them is governed by the express license under which they were
  6. * provided to you (License). Unless the License provides otherwise, you may not
  7. * use, modify, copy, publish, distribute, disclose or transmit this software or
  8. * the related documents without Intel's prior written permission.
  9. *
  10. * This software and the related documents are provided as is, with no express
  11. * or implied warranties, other than those that are expressly stated in the
  12. * License.
  13. *******************************************************************************/
  14. /*
  15. ! Content:
  16. ! Intel(R) oneAPI Math Kernel Library (Intel(R) oneMKL)
  17. ! FFTW3 interface for OpenMP target (offload)
  18. !******************************************************************************/
  19. #ifndef FFTW3_OMP_OFFLOAD_H_INCLUDED
  20. #define FFTW3_OMP_OFFLOAD_H_INCLUDED
  21. #include "../fftw3.h"
  22. #include "mkl_dfti_omp_offload.h"
  23. #if (_OPENMP >= 202011)
  24. #include <omp.h>
  25. #endif
  26. #ifdef __cplusplus
  27. extern "C"
  28. {
  29. #endif // __cplusplus
  30. // Double precision offload function declarations
  31. extern fftw_plan fftw_plan_dft_1d_omp_offload(int n0, fftw_complex* in,
  32. fftw_complex* out, int sign,
  33. unsigned flags,
  34. void* interop_obj);
  35. extern fftw_plan fftw_plan_dft_2d_omp_offload(int n0, int n1,
  36. fftw_complex* in,
  37. fftw_complex* out, int sign,
  38. unsigned flags,
  39. void* interop_obj);
  40. extern fftw_plan fftw_plan_dft_3d_omp_offload(int n0, int n1, int n2,
  41. fftw_complex* in,
  42. fftw_complex* out, int sign,
  43. unsigned flags,
  44. void* interop_obj);
  45. extern fftw_plan fftw_plan_dft_omp_offload(int rank, const int* n,
  46. fftw_complex* in,
  47. fftw_complex* out, int sign,
  48. unsigned flags,
  49. void* interop_obj);
  50. extern fftw_plan
  51. fftw_plan_many_dft_omp_offload(int rank, const int* n, int howmany,
  52. fftw_complex* in, const int* inembed,
  53. int istride, int idist, fftw_complex* out,
  54. const int* onembed, int ostride, int odist,
  55. int sign, unsigned flags, void* interop_obj);
  56. extern fftw_plan fftw_plan_guru_dft_omp_offload(
  57. int rank, const fftw_iodim* dims, int howmany_rank,
  58. const fftw_iodim* howmany_dims, fftw_complex* in, fftw_complex* out,
  59. int sign, unsigned flags, void* interop_obj);
  60. extern fftw_plan fftw_plan_guru64_dft_omp_offload(
  61. int rank, const fftw_iodim64* dims, int howmany_rank,
  62. const fftw_iodim64* howmany_dims, fftw_complex* in, fftw_complex* out,
  63. int sign, unsigned flags, void* interop_obj);
  64. // c2r
  65. extern fftw_plan fftw_plan_dft_c2r_1d_omp_offload(int n, fftw_complex* in,
  66. double* out,
  67. unsigned flags,
  68. void* interop_obj);
  69. extern fftw_plan fftw_plan_dft_c2r_2d_omp_offload(int nx, int ny,
  70. fftw_complex* in,
  71. double* out,
  72. unsigned flags,
  73. void* interop_obj);
  74. extern fftw_plan fftw_plan_dft_c2r_3d_omp_offload(int nx, int ny, int nz,
  75. fftw_complex* in,
  76. double* out,
  77. unsigned flags,
  78. void* interop_obj);
  79. extern fftw_plan fftw_plan_dft_c2r_omp_offload(int rank, const int* n,
  80. fftw_complex* in,
  81. double* out, unsigned flags,
  82. void* interop_obj);
  83. extern fftw_plan fftw_plan_guru_dft_c2r_omp_offload(
  84. int rank, const fftw_iodim* dims, int howmany_rank,
  85. const fftw_iodim* howmany_dims, fftw_complex* in, double* out,
  86. unsigned flags, void* interop_obj);
  87. extern fftw_plan fftw_plan_guru64_dft_c2r_omp_offload(
  88. int rank, const fftw_iodim64* dims, int howmany_rank,
  89. const fftw_iodim64* howmany_dims, fftw_complex* in, double* out,
  90. unsigned flags, void* interop_obj);
  91. extern fftw_plan fftw_plan_many_dft_c2r_omp_offload(
  92. int rank, const int* n, int howmany, fftw_complex* in,
  93. const int* inembed, int istride, int idist, double* out,
  94. const int* onembed, int ostride, int odist, unsigned flags,
  95. void* interop_obj);
  96. // r2c
  97. extern fftw_plan fftw_plan_dft_r2c_1d_omp_offload(int n, double* in,
  98. fftw_complex* out,
  99. unsigned flags,
  100. void* interop_obj);
  101. extern fftw_plan fftw_plan_dft_r2c_2d_omp_offload(int nx, int ny,
  102. double* in,
  103. fftw_complex* out,
  104. unsigned flags,
  105. void* interop_obj);
  106. extern fftw_plan fftw_plan_dft_r2c_3d_omp_offload(int nx, int ny, int nz,
  107. double* in,
  108. fftw_complex* out,
  109. unsigned flags,
  110. void* interop_obj);
  111. extern fftw_plan fftw_plan_dft_r2c_omp_offload(int rank, const int* n,
  112. double* in,
  113. fftw_complex* out,
  114. unsigned flags,
  115. void* interop_obj);
  116. extern fftw_plan fftw_plan_guru_dft_r2c_omp_offload(
  117. int rank, const fftw_iodim* dims, int howmany_rank,
  118. const fftw_iodim* howmany_dims, double* in, fftw_complex* out,
  119. unsigned flags, void* interop_obj);
  120. extern fftw_plan fftw_plan_guru64_dft_r2c_omp_offload(
  121. int rank, const fftw_iodim64* dims, int howmany_rank,
  122. const fftw_iodim64* howmany_dims, double* in, fftw_complex* out,
  123. unsigned flags, void* interop_obj);
  124. extern fftw_plan fftw_plan_many_dft_r2c_omp_offload(
  125. int rank, const int* n, int howmany, double* in, const int* inembed,
  126. int istride, int idist, fftw_complex* out, const int* onembed,
  127. int ostride, int odist, unsigned flags, void* interop_obj);
  128. extern void fftw_execute_omp_offload(const fftw_plan plan,
  129. void* interop_obj);
  130. extern void fftw_execute_dft_omp_offload(const fftw_plan plan,
  131. fftw_complex* in,
  132. fftw_complex* out,
  133. void* interop_obj);
  134. extern void fftw_execute_dft_r2c_omp_offload(const fftw_plan plan,
  135. double* in, fftw_complex* out,
  136. void* interop_obj);
  137. extern void fftw_execute_dft_c2r_omp_offload(const fftw_plan plan,
  138. fftw_complex* in, double* out,
  139. void* interop_obj);
  140. // Single precision offload function declarations
  141. extern fftwf_plan fftwf_plan_dft_1d_omp_offload(int n0, fftwf_complex* in,
  142. fftwf_complex* out,
  143. int sign, unsigned flags,
  144. void* interop_obj);
  145. extern fftwf_plan fftwf_plan_dft_2d_omp_offload(int n0, int n1,
  146. fftwf_complex* in,
  147. fftwf_complex* out,
  148. int sign, unsigned flags,
  149. void* interop_obj);
  150. extern fftwf_plan fftwf_plan_dft_3d_omp_offload(int n0, int n1, int n2,
  151. fftwf_complex* in,
  152. fftwf_complex* out,
  153. int sign, unsigned flags,
  154. void* interop_obj);
  155. extern fftwf_plan fftwf_plan_dft_omp_offload(int rank, const int* n,
  156. fftwf_complex* in,
  157. fftwf_complex* out, int sign,
  158. unsigned flags,
  159. void* interop_obj);
  160. extern fftwf_plan fftwf_plan_many_dft_omp_offload(
  161. int rank, const int* n, int howmany, fftwf_complex* in,
  162. const int* inembed, int istride, int idist, fftwf_complex* out,
  163. const int* onembed, int ostride, int odist, int sign, unsigned flags,
  164. void* interop_obj);
  165. extern fftwf_plan fftwf_plan_guru_dft_omp_offload(
  166. int rank, const fftwf_iodim* dims, int howmany_rank,
  167. const fftwf_iodim* howmany_dims, fftwf_complex* in, fftwf_complex* out,
  168. int sign, unsigned flags, void* interop_obj);
  169. extern fftwf_plan fftwf_plan_guru64_dft_omp_offload(
  170. int rank, const fftwf_iodim64* dims, int howmany_rank,
  171. const fftwf_iodim64* howmany_dims, fftwf_complex* in,
  172. fftwf_complex* out, int sign, unsigned flags, void* interop_obj);
  173. // c2r
  174. extern fftwf_plan
  175. fftwf_plan_dft_c2r_1d_omp_offload(int n, fftwf_complex* in, float* out,
  176. unsigned flags, void* interop_obj);
  177. extern fftwf_plan fftwf_plan_dft_c2r_2d_omp_offload(int nx, int ny,
  178. fftwf_complex* in,
  179. float* out,
  180. unsigned flags,
  181. void* interop_obj);
  182. extern fftwf_plan fftwf_plan_dft_c2r_3d_omp_offload(int nx, int ny, int nz,
  183. fftwf_complex* in,
  184. float* out,
  185. unsigned flags,
  186. void* interop_obj);
  187. extern fftwf_plan fftwf_plan_dft_c2r_omp_offload(int rank, const int* n,
  188. fftwf_complex* in,
  189. float* out, unsigned flags,
  190. void* interop_obj);
  191. extern fftwf_plan fftwf_plan_guru_dft_c2r_omp_offload(
  192. int rank, const fftwf_iodim* dims, int howmany_rank,
  193. const fftwf_iodim* howmany_dims, fftwf_complex* in, float* out,
  194. unsigned flags, void* interop_obj);
  195. extern fftwf_plan fftwf_plan_guru64_dft_c2r_omp_offload(
  196. int rank, const fftwf_iodim64* dims, int howmany_rank,
  197. const fftwf_iodim64* howmany_dims, fftwf_complex* in, float* out,
  198. unsigned flags, void* interop_obj);
  199. extern fftwf_plan fftwf_plan_many_dft_c2r_omp_offload(
  200. int rank, const int* n, int howmany, fftwf_complex* in,
  201. const int* inembed, int istride, int idist, float* out,
  202. const int* onembed, int ostride, int odist, unsigned flags,
  203. void* interop_obj);
  204. // r2c
  205. extern fftwf_plan fftwf_plan_dft_r2c_1d_omp_offload(int n, float* in,
  206. fftwf_complex* out,
  207. unsigned flags,
  208. void* interop_obj);
  209. extern fftwf_plan fftwf_plan_dft_r2c_2d_omp_offload(int nx, int ny,
  210. float* in,
  211. fftwf_complex* out,
  212. unsigned flags,
  213. void* interop_obj);
  214. extern fftwf_plan fftwf_plan_dft_r2c_3d_omp_offload(int nx, int ny, int nz,
  215. float* in,
  216. fftwf_complex* out,
  217. unsigned flags,
  218. void* interop_obj);
  219. extern fftwf_plan fftwf_plan_dft_r2c_omp_offload(int rank, const int* n,
  220. float* in,
  221. fftwf_complex* out,
  222. unsigned flags,
  223. void* interop_obj);
  224. extern fftwf_plan fftwf_plan_guru_dft_r2c_omp_offload(
  225. int rank, const fftwf_iodim* dims, int howmany_rank,
  226. const fftwf_iodim* howmany_dims, float* in, fftwf_complex* out,
  227. unsigned flags, void* interop_obj);
  228. extern fftwf_plan fftwf_plan_guru64_dft_r2c_omp_offload(
  229. int rank, const fftwf_iodim64* dims, int howmany_rank,
  230. const fftwf_iodim64* howmany_dims, float* in, fftwf_complex* out,
  231. unsigned flags, void* interop_obj);
  232. extern fftwf_plan fftwf_plan_many_dft_r2c_omp_offload(
  233. int rank, const int* n, int howmany, float* in, const int* inembed,
  234. int istride, int idist, fftwf_complex* out, const int* onembed,
  235. int ostride, int odist, unsigned flags, void* interop_obj);
  236. extern void fftwf_execute_omp_offload(const fftwf_plan plan,
  237. void* interop_obj);
  238. extern void fftwf_execute_dft_omp_offload(const fftwf_plan plan,
  239. fftwf_complex* in,
  240. fftwf_complex* out,
  241. void* interop_obj);
  242. extern void fftwf_execute_dft_r2c_omp_offload(const fftwf_plan plan,
  243. float* in, fftwf_complex* out,
  244. void* interop_obj);
  245. extern void fftwf_execute_dft_c2r_omp_offload(const fftwf_plan plan,
  246. fftwf_complex* in, float* out,
  247. void* interop_obj);
  248. // Double precision variant function declarations for the standard fftw routines
  249. #if (_OPENMP >= 202011)
  250. #pragma omp declare variant(fftw_plan_dft_1d_omp_offload) \
  251. match(construct = {dispatch}, device = {arch(gen)}) \
  252. adjust_args(need_device_ptr : in, out) \
  253. append_args(interop(prefer_type("sycl","level_zero"),targetsync))
  254. #endif
  255. #pragma omp declare variant(fftw_plan_dft_1d_omp_offload) \
  256. match(construct = {target variant dispatch}, device = {arch(gen)})
  257. extern fftw_plan fftw_plan_dft_1d(int n0, fftw_complex* in,
  258. fftw_complex* out, int sign,
  259. unsigned flags);
  260. #if (_OPENMP >= 202011)
  261. #pragma omp declare variant(fftw_plan_dft_2d_omp_offload) \
  262. match(construct = {dispatch}, device = {arch(gen)}) \
  263. adjust_args(need_device_ptr : in, out) \
  264. append_args(interop(prefer_type("sycl","level_zero"),targetsync))
  265. #endif
  266. #pragma omp declare variant(fftw_plan_dft_2d_omp_offload) \
  267. match(construct = {target variant dispatch}, device = {arch(gen)})
  268. extern fftw_plan fftw_plan_dft_2d(int n0, int n1, fftw_complex* in,
  269. fftw_complex* out, int sign,
  270. unsigned flags);
  271. #if (_OPENMP >= 202011)
  272. #pragma omp declare variant(fftw_plan_dft_3d_omp_offload) \
  273. match(construct = {dispatch}, device = {arch(gen)}) \
  274. adjust_args(need_device_ptr : in, out) \
  275. append_args(interop(prefer_type("sycl","level_zero"),targetsync))
  276. #endif
  277. #pragma omp declare variant(fftw_plan_dft_3d_omp_offload) \
  278. match(construct = {target variant dispatch}, device = {arch(gen)})
  279. extern fftw_plan fftw_plan_dft_3d(int n0, int n1, int n2, fftw_complex* in,
  280. fftw_complex* out, int sign,
  281. unsigned flags);
  282. #if (_OPENMP >= 202011)
  283. #pragma omp declare variant(fftw_plan_dft_omp_offload) \
  284. match(construct = {dispatch}, device = {arch(gen)}) \
  285. adjust_args(need_device_ptr : in, out) \
  286. append_args(interop(prefer_type("sycl","level_zero"),targetsync))
  287. #endif
  288. #pragma omp declare variant(fftw_plan_dft_omp_offload) \
  289. match(construct = {target variant dispatch}, device = {arch(gen)})
  290. extern fftw_plan fftw_plan_dft(int rank, const int* n, fftw_complex* in,
  291. fftw_complex* out, int sign, unsigned flags);
  292. #if (_OPENMP >= 202011)
  293. #pragma omp declare variant(fftw_plan_many_dft_omp_offload) \
  294. match(construct = {dispatch}, device = {arch(gen)}) \
  295. adjust_args(need_device_ptr : in, out) \
  296. append_args(interop(prefer_type("sycl","level_zero"),targetsync))
  297. #endif
  298. #pragma omp declare variant(fftw_plan_many_dft_omp_offload) \
  299. match(construct = {target variant dispatch}, device = {arch(gen)})
  300. extern fftw_plan fftw_plan_many_dft(int rank, const int* n, int howmany,
  301. fftw_complex* in, const int* inembed,
  302. int istride, int idist,
  303. fftw_complex* out, const int* onembed,
  304. int ostride, int odist, int sign,
  305. unsigned flags);
  306. #if (_OPENMP >= 202011)
  307. #pragma omp declare variant(fftw_plan_guru_dft_omp_offload) \
  308. match(construct = {dispatch}, device = {arch(gen)}) \
  309. adjust_args(need_device_ptr : in, out) \
  310. append_args(interop(prefer_type("sycl","level_zero"),targetsync))
  311. #endif
  312. #pragma omp declare variant(fftw_plan_guru_dft_omp_offload) \
  313. match(construct = {target variant dispatch}, device = {arch(gen)})
  314. extern fftw_plan fftw_plan_guru_dft(int rank, const fftw_iodim* dims,
  315. int howmany_rank,
  316. const fftw_iodim* howmany_dims,
  317. fftw_complex* in, fftw_complex* out,
  318. int sign, unsigned flags);
  319. #if (_OPENMP >= 202011)
  320. #pragma omp declare variant(fftw_plan_dft_c2r_1d_omp_offload) \
  321. match(construct = {dispatch}, device = {arch(gen)}) \
  322. adjust_args(need_device_ptr : in, out) \
  323. append_args(interop(prefer_type("sycl","level_zero"),targetsync))
  324. #endif
  325. // c2r
  326. #pragma omp declare variant(fftw_plan_dft_c2r_1d_omp_offload) \
  327. match(construct = {target variant dispatch}, device = {arch(gen)})
  328. extern fftw_plan fftw_plan_dft_c2r_1d(int n, fftw_complex* in, double* out,
  329. unsigned flags);
  330. #if (_OPENMP >= 202011)
  331. #pragma omp declare variant(fftw_plan_dft_c2r_2d_omp_offload) \
  332. match(construct = {dispatch}, device = {arch(gen)}) \
  333. adjust_args(need_device_ptr : in, out) \
  334. append_args(interop(prefer_type("sycl","level_zero"),targetsync))
  335. #endif
  336. #pragma omp declare variant(fftw_plan_dft_c2r_2d_omp_offload) \
  337. match(construct = {target variant dispatch}, device = {arch(gen)})
  338. extern fftw_plan fftw_plan_dft_c2r_2d(int nx, int ny, fftw_complex* in,
  339. double* out, unsigned flags);
  340. #if (_OPENMP >= 202011)
  341. #pragma omp declare variant(fftw_plan_dft_c2r_3d_omp_offload) \
  342. match(construct = {dispatch}, device = {arch(gen)}) \
  343. adjust_args(need_device_ptr : in, out) \
  344. append_args(interop(prefer_type("sycl","level_zero"),targetsync))
  345. #endif
  346. #pragma omp declare variant(fftw_plan_dft_c2r_3d_omp_offload) \
  347. match(construct = {target variant dispatch}, device = {arch(gen)})
  348. extern fftw_plan fftw_plan_dft_c2r_3d(int nx, int ny, int nz,
  349. fftw_complex* in, double* out,
  350. unsigned flags);
  351. #if (_OPENMP >= 202011)
  352. #pragma omp declare variant(fftw_plan_dft_c2r_omp_offload) \
  353. match(construct = {dispatch}, device = {arch(gen)}) \
  354. adjust_args(need_device_ptr : in, out) \
  355. append_args(interop(prefer_type("sycl","level_zero"),targetsync))
  356. #endif
  357. #pragma omp declare variant(fftw_plan_dft_c2r_omp_offload) \
  358. match(construct = {target variant dispatch}, device = {arch(gen)})
  359. extern fftw_plan fftw_plan_dft_c2r(int rank, const int* n, fftw_complex* in,
  360. double* out, unsigned flags);
  361. #if (_OPENMP >= 202011)
  362. #pragma omp declare variant(fftw_plan_guru_dft_c2r_omp_offload) \
  363. match(construct = {dispatch}, device = {arch(gen)}) \
  364. adjust_args(need_device_ptr : in, out) \
  365. append_args(interop(prefer_type("sycl","level_zero"),targetsync))
  366. #endif
  367. #pragma omp declare variant(fftw_plan_guru_dft_c2r_omp_offload) \
  368. match(construct = {target variant dispatch}, device = {arch(gen)})
  369. extern fftw_plan fftw_plan_guru_dft_c2r(int rank, const fftw_iodim* dims,
  370. int howmany_rank,
  371. const fftw_iodim* howmany_dims,
  372. fftw_complex* in, double* out,
  373. unsigned flags);
  374. #if (_OPENMP >= 202011)
  375. #pragma omp declare variant(fftw_plan_many_dft_c2r_omp_offload) \
  376. match(construct = {dispatch}, device = {arch(gen)}) \
  377. adjust_args(need_device_ptr : in, out) \
  378. append_args(interop(prefer_type("sycl","level_zero"),targetsync))
  379. #endif
  380. #pragma omp declare variant(fftw_plan_many_dft_c2r_omp_offload) \
  381. match(construct = {target variant dispatch}, device = {arch(gen)})
  382. extern fftw_plan fftw_plan_many_dft_c2r(int rank, const int* n, int howmany,
  383. fftw_complex* in,
  384. const int* inembed, int istride,
  385. int idist, double* out,
  386. const int* onembed, int ostride,
  387. int odist, unsigned flags);
  388. #if (_OPENMP >= 202011)
  389. #pragma omp declare variant(fftw_plan_dft_r2c_1d_omp_offload) \
  390. match(construct = {dispatch}, device = {arch(gen)}) \
  391. adjust_args(need_device_ptr : in, out) \
  392. append_args(interop(prefer_type("sycl","level_zero"),targetsync))
  393. #endif
  394. // r2c
  395. #pragma omp declare variant(fftw_plan_dft_r2c_1d_omp_offload) \
  396. match(construct = {target variant dispatch}, device = {arch(gen)})
  397. extern fftw_plan fftw_plan_dft_r2c_1d(int n, double* in, fftw_complex* out,
  398. unsigned flags);
  399. #if (_OPENMP >= 202011)
  400. #pragma omp declare variant(fftw_plan_dft_r2c_2d_omp_offload) \
  401. match(construct = {dispatch}, device = {arch(gen)}) \
  402. adjust_args(need_device_ptr : in, out) \
  403. append_args(interop(prefer_type("sycl","level_zero"),targetsync))
  404. #endif
  405. #pragma omp declare variant(fftw_plan_dft_r2c_2d_omp_offload) \
  406. match(construct = {target variant dispatch}, device = {arch(gen)})
  407. extern fftw_plan fftw_plan_dft_r2c_2d(int nx, int ny, double* in,
  408. fftw_complex* out, unsigned flags);
  409. #if (_OPENMP >= 202011)
  410. #pragma omp declare variant(fftw_plan_dft_r2c_3d_omp_offload) \
  411. match(construct = {dispatch}, device = {arch(gen)}) \
  412. adjust_args(need_device_ptr : in, out) \
  413. append_args(interop(prefer_type("sycl","level_zero"),targetsync))
  414. #endif
  415. #pragma omp declare variant(fftw_plan_dft_r2c_3d_omp_offload) \
  416. match(construct = {target variant dispatch}, device = {arch(gen)})
  417. extern fftw_plan fftw_plan_dft_r2c_3d(int nx, int ny, int nz, double* in,
  418. fftw_complex* out, unsigned flags);
  419. #if (_OPENMP >= 202011)
  420. #pragma omp declare variant(fftw_plan_dft_r2c_omp_offload) \
  421. match(construct = {dispatch}, device = {arch(gen)}) \
  422. adjust_args(need_device_ptr : in, out) \
  423. append_args(interop(prefer_type("sycl","level_zero"),targetsync))
  424. #endif
  425. #pragma omp declare variant(fftw_plan_dft_r2c_omp_offload) \
  426. match(construct = {target variant dispatch}, device = {arch(gen)})
  427. extern fftw_plan fftw_plan_dft_r2c(int rank, const int* n, double* in,
  428. fftw_complex* out, unsigned flags);
  429. #if (_OPENMP >= 202011)
  430. #pragma omp declare variant(fftw_plan_guru_dft_r2c_omp_offload) \
  431. match(construct = {dispatch}, device = {arch(gen)}) \
  432. adjust_args(need_device_ptr : in, out) \
  433. append_args(interop(prefer_type("sycl","level_zero"),targetsync))
  434. #endif
  435. #pragma omp declare variant(fftw_plan_guru_dft_r2c_omp_offload) \
  436. match(construct = {target variant dispatch}, device = {arch(gen)})
  437. extern fftw_plan fftw_plan_guru_dft_r2c(int rank, const fftw_iodim* dims,
  438. int howmany_rank,
  439. const fftw_iodim* howmany_dims,
  440. double* in, fftw_complex* out,
  441. unsigned flags);
  442. #if (_OPENMP >= 202011)
  443. #pragma omp declare variant(fftw_plan_many_dft_r2c_omp_offload) \
  444. match(construct = {dispatch}, device = {arch(gen)}) \
  445. adjust_args(need_device_ptr : in, out) \
  446. append_args(interop(prefer_type("sycl","level_zero"),targetsync))
  447. #endif
  448. #pragma omp declare variant(fftw_plan_many_dft_r2c_omp_offload) \
  449. match(construct = {target variant dispatch}, device = {arch(gen)})
  450. extern fftw_plan fftw_plan_many_dft_r2c(int rank, const int* n, int howmany,
  451. double* in, const int* inembed,
  452. int istride, int idist,
  453. fftw_complex* out,
  454. const int* onembed, int ostride,
  455. int odist, unsigned flags);
  456. #if (_OPENMP >= 202011)
  457. #pragma omp declare variant(fftw_execute_omp_offload) \
  458. match(construct = {dispatch}, device = {arch(gen)}) \
  459. append_args(interop(prefer_type("sycl","level_zero"),targetsync))
  460. #endif
  461. #pragma omp declare variant(fftw_execute_omp_offload) \
  462. match(construct = {target variant dispatch}, device = {arch(gen)})
  463. extern void fftw_execute(const fftw_plan plan);
  464. #if (_OPENMP >= 202011)
  465. #pragma omp declare variant(fftw_execute_dft_omp_offload) \
  466. match(construct = {dispatch}, device = {arch(gen)}) \
  467. adjust_args(need_device_ptr : in, out) \
  468. append_args(interop(prefer_type("sycl","level_zero"),targetsync))
  469. #endif
  470. #pragma omp declare variant(fftw_execute_dft_omp_offload) \
  471. match(construct = {target variant dispatch}, device = {arch(gen)})
  472. extern void fftw_execute_dft(const fftw_plan plan, fftw_complex* in,
  473. fftw_complex* out);
  474. #if (_OPENMP >= 202011)
  475. #pragma omp declare variant(fftw_execute_dft_r2c_omp_offload) \
  476. match(construct = {dispatch}, device = {arch(gen)}) \
  477. adjust_args(need_device_ptr : in, out) \
  478. append_args(interop(prefer_type("sycl","level_zero"),targetsync))
  479. #endif
  480. #pragma omp declare variant(fftw_execute_dft_r2c_omp_offload) \
  481. match(construct = {target variant dispatch}, device = {arch(gen)})
  482. extern void fftw_execute_dft_r2c(const fftw_plan plan, double* in,
  483. fftw_complex* out);
  484. #if (_OPENMP >= 202011)
  485. #pragma omp declare variant(fftw_execute_dft_c2r_omp_offload) \
  486. match(construct = {dispatch}, device = {arch(gen)}) \
  487. adjust_args(need_device_ptr : in, out) \
  488. append_args(interop(prefer_type("sycl","level_zero"),targetsync))
  489. #endif
  490. #pragma omp declare variant(fftw_execute_dft_c2r_omp_offload) \
  491. match(construct = {target variant dispatch}, device = {arch(gen)})
  492. extern void fftw_execute_dft_c2r(const fftw_plan plan, fftw_complex* in,
  493. double* out);
  494. // Single precision variant function declaratnons for the standard fftwf
  495. // routines
  496. #if (_OPENMP >= 202011)
  497. #pragma omp declare variant(fftwf_plan_dft_1d_omp_offload) \
  498. match(construct = {dispatch}, device = {arch(gen)}) \
  499. adjust_args(need_device_ptr : in, out) \
  500. append_args(interop(prefer_type("sycl","level_zero"),targetsync))
  501. #endif
  502. #pragma omp declare variant(fftwf_plan_dft_1d_omp_offload) \
  503. match(construct = {target variant dispatch}, device = {arch(gen)})
  504. extern fftwf_plan fftwf_plan_dft_1d(int n0, fftwf_complex* in,
  505. fftwf_complex* out, int sign,
  506. unsigned flags);
  507. #if (_OPENMP >= 202011)
  508. #pragma omp declare variant(fftwf_plan_dft_2d_omp_offload) \
  509. match(construct = {dispatch}, device = {arch(gen)}) \
  510. adjust_args(need_device_ptr : in, out) \
  511. append_args(interop(prefer_type("sycl","level_zero"),targetsync))
  512. #endif
  513. #pragma omp declare variant(fftwf_plan_dft_2d_omp_offload) \
  514. match(construct = {target variant dispatch}, device = {arch(gen)})
  515. extern fftwf_plan fftwf_plan_dft_2d(int n0, int n1, fftwf_complex* in,
  516. fftwf_complex* out, int sign,
  517. unsigned flags);
  518. #if (_OPENMP >= 202011)
  519. #pragma omp declare variant(fftwf_plan_dft_3d_omp_offload) \
  520. match(construct = {dispatch}, device = {arch(gen)}) \
  521. adjust_args(need_device_ptr : in, out) \
  522. append_args(interop(prefer_type("sycl","level_zero"),targetsync))
  523. #endif
  524. #pragma omp declare variant(fftwf_plan_dft_3d_omp_offload) \
  525. match(construct = {target variant dispatch}, device = {arch(gen)})
  526. extern fftwf_plan fftwf_plan_dft_3d(int n0, int n1, int n2,
  527. fftwf_complex* in, fftwf_complex* out,
  528. int sign, unsigned flags);
  529. #if (_OPENMP >= 202011)
  530. #pragma omp declare variant(fftwf_plan_dft_omp_offload) \
  531. match(construct = {dispatch}, device = {arch(gen)}) \
  532. adjust_args(need_device_ptr : in, out) \
  533. append_args(interop(prefer_type("sycl","level_zero"),targetsync))
  534. #endif
  535. #pragma omp declare variant(fftwf_plan_dft_omp_offload) \
  536. match(construct = {target variant dispatch}, device = {arch(gen)})
  537. extern fftwf_plan fftwf_plan_dft(int rank, const int* n, fftwf_complex* in,
  538. fftwf_complex* out, int sign,
  539. unsigned flags);
  540. #if (_OPENMP >= 202011)
  541. #pragma omp declare variant(fftwf_plan_many_dft_omp_offload) \
  542. match(construct = {dispatch}, device = {arch(gen)}) \
  543. adjust_args(need_device_ptr : in, out) \
  544. append_args(interop(prefer_type("sycl","level_zero"),targetsync))
  545. #endif
  546. #pragma omp declare variant(fftwf_plan_many_dft_omp_offload) \
  547. match(construct = {target variant dispatch}, device = {arch(gen)})
  548. extern fftwf_plan fftwf_plan_many_dft(int rank, const int* n, int howmany,
  549. fftwf_complex* in, const int* inembed,
  550. int istride, int idist,
  551. fftwf_complex* out,
  552. const int* onembed, int ostride,
  553. int odist, int sign, unsigned flags);
  554. #if (_OPENMP >= 202011)
  555. #pragma omp declare variant(fftwf_plan_guru_dft_omp_offload) \
  556. match(construct = {dispatch}, device = {arch(gen)}) \
  557. adjust_args(need_device_ptr : in, out) \
  558. append_args(interop(prefer_type("sycl","level_zero"),targetsync))
  559. #endif
  560. #pragma omp declare variant(fftwf_plan_guru_dft_omp_offload) \
  561. match(construct = {target variant dispatch}, device = {arch(gen)})
  562. extern fftwf_plan fftwf_plan_guru_dft(int rank, const fftwf_iodim* dims,
  563. int howmany_rank,
  564. const fftwf_iodim* howmany_dims,
  565. fftwf_complex* in, fftwf_complex* out,
  566. int sign, unsigned flags);
  567. #if (_OPENMP >= 202011)
  568. #pragma omp declare variant(fftwf_plan_dft_c2r_1d_omp_offload) \
  569. match(construct = {dispatch}, device = {arch(gen)}) \
  570. adjust_args(need_device_ptr : in, out) \
  571. append_args(interop(prefer_type("sycl","level_zero"),targetsync))
  572. #endif
  573. // c2r
  574. #pragma omp declare variant(fftwf_plan_dft_c2r_1d_omp_offload) \
  575. match(construct = {target variant dispatch}, device = {arch(gen)})
  576. extern fftwf_plan fftwf_plan_dft_c2r_1d(int n, fftwf_complex* in,
  577. float* out, unsigned flags);
  578. #if (_OPENMP >= 202011)
  579. #pragma omp declare variant(fftwf_plan_dft_c2r_2d_omp_offload) \
  580. match(construct = {dispatch}, device = {arch(gen)}) \
  581. adjust_args(need_device_ptr : in, out) \
  582. append_args(interop(prefer_type("sycl","level_zero"),targetsync))
  583. #endif
  584. #pragma omp declare variant(fftwf_plan_dft_c2r_2d_omp_offload) \
  585. match(construct = {target variant dispatch}, device = {arch(gen)})
  586. extern fftwf_plan fftwf_plan_dft_c2r_2d(int nx, int ny, fftwf_complex* in,
  587. float* out, unsigned flags);
  588. #if (_OPENMP >= 202011)
  589. #pragma omp declare variant(fftwf_plan_dft_c2r_3d_omp_offload) \
  590. match(construct = {dispatch}, device = {arch(gen)}) \
  591. adjust_args(need_device_ptr : in, out) \
  592. append_args(interop(prefer_type("sycl","level_zero"),targetsync))
  593. #endif
  594. #pragma omp declare variant(fftwf_plan_dft_c2r_3d_omp_offload) \
  595. match(construct = {target variant dispatch}, device = {arch(gen)})
  596. extern fftwf_plan fftwf_plan_dft_c2r_3d(int nx, int ny, int nz,
  597. fftwf_complex* in, float* out,
  598. unsigned flags);
  599. #if (_OPENMP >= 202011)
  600. #pragma omp declare variant(fftwf_plan_dft_c2r_omp_offload) \
  601. match(construct = {dispatch}, device = {arch(gen)}) \
  602. adjust_args(need_device_ptr : in, out) \
  603. append_args(interop(prefer_type("sycl","level_zero"),targetsync))
  604. #endif
  605. #pragma omp declare variant(fftwf_plan_dft_c2r_omp_offload) \
  606. match(construct = {target variant dispatch}, device = {arch(gen)})
  607. extern fftwf_plan fftwf_plan_dft_c2r(int rank, const int* n,
  608. fftwf_complex* in, float* out,
  609. unsigned flags);
  610. #if (_OPENMP >= 202011)
  611. #pragma omp declare variant(fftwf_plan_guru_dft_c2r_omp_offload) \
  612. match(construct = {dispatch}, device = {arch(gen)}) \
  613. adjust_args(need_device_ptr : in, out) \
  614. append_args(interop(prefer_type("sycl","level_zero"),targetsync))
  615. #endif
  616. #pragma omp declare variant(fftwf_plan_guru_dft_c2r_omp_offload) \
  617. match(construct = {target variant dispatch}, device = {arch(gen)})
  618. extern fftwf_plan fftwf_plan_guru_dft_c2r(int rank, const fftwf_iodim* dims,
  619. int howmany_rank,
  620. const fftwf_iodim* howmany_dims,
  621. fftwf_complex* in, float* out,
  622. unsigned flags);
  623. #if (_OPENMP >= 202011)
  624. #pragma omp declare variant(fftwf_plan_many_dft_c2r_omp_offload) \
  625. match(construct = {dispatch}, device = {arch(gen)}) \
  626. adjust_args(need_device_ptr : in, out) \
  627. append_args(interop(prefer_type("sycl","level_zero"),targetsync))
  628. #endif
  629. #pragma omp declare variant(fftwf_plan_many_dft_c2r_omp_offload) \
  630. match(construct = {target variant dispatch}, device = {arch(gen)})
  631. extern fftwf_plan fftwf_plan_many_dft_c2r(int rank, const int* n,
  632. int howmany, fftwf_complex* in,
  633. const int* inembed, int istride,
  634. int idist, float* out,
  635. const int* onembed, int ostride,
  636. int odist, unsigned flags);
  637. #if (_OPENMP >= 202011)
  638. #pragma omp declare variant(fftwf_plan_dft_r2c_1d_omp_offload) \
  639. match(construct = {dispatch}, device = {arch(gen)}) \
  640. adjust_args(need_device_ptr : in, out) \
  641. append_args(interop(prefer_type("sycl","level_zero"),targetsync))
  642. #endif
  643. // r2c
  644. #pragma omp declare variant(fftwf_plan_dft_r2c_1d_omp_offload) \
  645. match(construct = {target variant dispatch}, device = {arch(gen)})
  646. extern fftwf_plan fftwf_plan_dft_r2c_1d(int n, float* in,
  647. fftwf_complex* out, unsigned flags);
  648. #if (_OPENMP >= 202011)
  649. #pragma omp declare variant(fftwf_plan_dft_r2c_2d_omp_offload) \
  650. match(construct = {dispatch}, device = {arch(gen)}) \
  651. adjust_args(need_device_ptr : in, out) \
  652. append_args(interop(prefer_type("sycl","level_zero"),targetsync))
  653. #endif
  654. #pragma omp declare variant(fftwf_plan_dft_r2c_2d_omp_offload) \
  655. match(construct = {target variant dispatch}, device = {arch(gen)})
  656. extern fftwf_plan fftwf_plan_dft_r2c_2d(int nx, int ny, float* in,
  657. fftwf_complex* out, unsigned flags);
  658. #if (_OPENMP >= 202011)
  659. #pragma omp declare variant(fftwf_plan_dft_r2c_3d_omp_offload) \
  660. match(construct = {dispatch}, device = {arch(gen)}) \
  661. adjust_args(need_device_ptr : in, out) \
  662. append_args(interop(prefer_type("sycl","level_zero"),targetsync))
  663. #endif
  664. #pragma omp declare variant(fftwf_plan_dft_r2c_3d_omp_offload) \
  665. match(construct = {target variant dispatch}, device = {arch(gen)})
  666. extern fftwf_plan fftwf_plan_dft_r2c_3d(int nx, int ny, int nz, float* in,
  667. fftwf_complex* out, unsigned flags);
  668. #if (_OPENMP >= 202011)
  669. #pragma omp declare variant(fftwf_plan_dft_r2c_omp_offload) \
  670. match(construct = {dispatch}, device = {arch(gen)}) \
  671. adjust_args(need_device_ptr : in, out) \
  672. append_args(interop(prefer_type("sycl","level_zero"),targetsync))
  673. #endif
  674. #pragma omp declare variant(fftwf_plan_dft_r2c_omp_offload) \
  675. match(construct = {target variant dispatch}, device = {arch(gen)})
  676. extern fftwf_plan fftwf_plan_dft_r2c(int rank, const int* n, float* in,
  677. fftwf_complex* out, unsigned flags);
  678. #if (_OPENMP >= 202011)
  679. #pragma omp declare variant(fftwf_plan_guru_dft_r2c_omp_offload) \
  680. match(construct = {dispatch}, device = {arch(gen)}) \
  681. adjust_args(need_device_ptr : in, out) \
  682. append_args(interop(prefer_type("sycl","level_zero"),targetsync))
  683. #endif
  684. #pragma omp declare variant(fftwf_plan_guru_dft_r2c_omp_offload) \
  685. match(construct = {target variant dispatch}, device = {arch(gen)})
  686. extern fftwf_plan fftwf_plan_guru_dft_r2c(int rank, const fftwf_iodim* dims,
  687. int howmany_rank,
  688. const fftwf_iodim* howmany_dims,
  689. float* in, fftwf_complex* out,
  690. unsigned flags);
  691. #if (_OPENMP >= 202011)
  692. #pragma omp declare variant(fftwf_plan_many_dft_r2c_omp_offload) \
  693. match(construct = {dispatch}, device = {arch(gen)}) \
  694. adjust_args(need_device_ptr : in, out) \
  695. append_args(interop(prefer_type("sycl","level_zero"),targetsync))
  696. #endif
  697. #pragma omp declare variant(fftwf_plan_many_dft_r2c_omp_offload) \
  698. match(construct = {target variant dispatch}, device = {arch(gen)})
  699. extern fftwf_plan fftwf_plan_many_dft_r2c(int rank, const int* n,
  700. int howmany, float* in,
  701. const int* inembed, int istride,
  702. int idist, fftwf_complex* out,
  703. const int* onembed, int ostride,
  704. int odist, unsigned flags);
  705. #if (_OPENMP >= 202011)
  706. #pragma omp declare variant(fftwf_execute_omp_offload) \
  707. match(construct = {dispatch}, device = {arch(gen)}) \
  708. append_args(interop(prefer_type("sycl","level_zero"),targetsync))
  709. #endif
  710. #pragma omp declare variant(fftwf_execute_omp_offload) \
  711. match(construct = {target variant dispatch}, device = {arch(gen)})
  712. extern void fftwf_execute(const fftwf_plan plan);
  713. #if (_OPENMP >= 202011)
  714. #pragma omp declare variant(fftwf_execute_dft_omp_offload) \
  715. match(construct = {dispatch}, device = {arch(gen)}) \
  716. adjust_args(need_device_ptr : in, out) \
  717. append_args(interop(prefer_type("sycl","level_zero"),targetsync))
  718. #endif
  719. #pragma omp declare variant(fftwf_execute_dft_omp_offload) \
  720. match(construct = {target variant dispatch}, device = {arch(gen)})
  721. extern void fftwf_execute_dft(const fftwf_plan plan, fftwf_complex* in,
  722. fftwf_complex* out);
  723. #if (_OPENMP >= 202011)
  724. #pragma omp declare variant(fftwf_execute_dft_r2c_omp_offload) \
  725. match(construct = {dispatch}, device = {arch(gen)}) \
  726. adjust_args(need_device_ptr : in, out) \
  727. append_args(interop(prefer_type("sycl","level_zero"),targetsync))
  728. #endif
  729. #pragma omp declare variant(fftwf_execute_dft_r2c_omp_offload) \
  730. match(construct = {target variant dispatch}, device = {arch(gen)})
  731. extern void fftwf_execute_dft_r2c(const fftwf_plan plan, float* in,
  732. fftwf_complex* out);
  733. #if (_OPENMP >= 202011)
  734. #pragma omp declare variant(fftwf_execute_dft_c2r_omp_offload) \
  735. match(construct = {dispatch}, device = {arch(gen)}) \
  736. adjust_args(need_device_ptr : in, out) \
  737. append_args(interop(prefer_type("sycl","level_zero"),targetsync))
  738. #endif
  739. #pragma omp declare variant(fftwf_execute_dft_c2r_omp_offload) \
  740. match(construct = {target variant dispatch}, device = {arch(gen)})
  741. extern void fftwf_execute_dft_c2r(const fftwf_plan plan, fftwf_complex* in,
  742. float* out);
  743. #ifdef __cplusplus
  744. }
  745. #endif // __cplusplus
  746. #endif