bfloat16_float.cpp 21 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467
  1. // This file is part of Eigen, a lightweight C++ template library
  2. // for linear algebra.
  3. //
  4. // This Source Code Form is subject to the terms of the Mozilla
  5. // Public License v. 2.0. If a copy of the MPL was not distributed
  6. // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
  7. #include <sstream>
  8. #include <memory>
  9. #include <math.h>
  10. #include "main.h"
  11. #include <Eigen/src/Core/arch/Default/BFloat16.h>
  12. #define VERIFY_BFLOAT16_BITS_EQUAL(h, bits) \
  13. VERIFY_IS_EQUAL((numext::bit_cast<numext::uint16_t>(h)), (static_cast<numext::uint16_t>(bits)))
  14. // Make sure it's possible to forward declare Eigen::bfloat16
  15. namespace Eigen {
  16. struct bfloat16;
  17. }
  18. using Eigen::bfloat16;
  19. float BinaryToFloat(uint32_t sign, uint32_t exponent, uint32_t high_mantissa,
  20. uint32_t low_mantissa) {
  21. float dest;
  22. uint32_t src = (sign << 31) + (exponent << 23) + (high_mantissa << 16) + low_mantissa;
  23. memcpy(static_cast<void*>(&dest),
  24. static_cast<const void*>(&src), sizeof(dest));
  25. return dest;
  26. }
  27. void test_truncate(float input, float expected_truncation, float expected_rounding){
  28. bfloat16 truncated = Eigen::bfloat16_impl::truncate_to_bfloat16(input);
  29. bfloat16 rounded = Eigen::bfloat16_impl::float_to_bfloat16_rtne<false>(input);
  30. if ((numext::isnan)(input)){
  31. VERIFY((numext::isnan)(static_cast<float>(truncated)) || (numext::isinf)(static_cast<float>(truncated)));
  32. VERIFY((numext::isnan)(static_cast<float>(rounded)) || (numext::isinf)(static_cast<float>(rounded)));
  33. return;
  34. }
  35. VERIFY_IS_EQUAL(expected_truncation, static_cast<float>(truncated));
  36. VERIFY_IS_EQUAL(expected_rounding, static_cast<float>(rounded));
  37. }
  38. template<typename T>
  39. void test_roundtrip() {
  40. // Representable T round trip via bfloat16
  41. VERIFY_IS_EQUAL((internal::cast<bfloat16,T>(internal::cast<T,bfloat16>(-std::numeric_limits<T>::infinity()))), -std::numeric_limits<T>::infinity());
  42. VERIFY_IS_EQUAL((internal::cast<bfloat16,T>(internal::cast<T,bfloat16>(std::numeric_limits<T>::infinity()))), std::numeric_limits<T>::infinity());
  43. VERIFY_IS_EQUAL((internal::cast<bfloat16,T>(internal::cast<T,bfloat16>(T(-1.0)))), T(-1.0));
  44. VERIFY_IS_EQUAL((internal::cast<bfloat16,T>(internal::cast<T,bfloat16>(T(-0.5)))), T(-0.5));
  45. VERIFY_IS_EQUAL((internal::cast<bfloat16,T>(internal::cast<T,bfloat16>(T(-0.0)))), T(-0.0));
  46. VERIFY_IS_EQUAL((internal::cast<bfloat16,T>(internal::cast<T,bfloat16>(T(1.0)))), T(1.0));
  47. VERIFY_IS_EQUAL((internal::cast<bfloat16,T>(internal::cast<T,bfloat16>(T(0.5)))), T(0.5));
  48. VERIFY_IS_EQUAL((internal::cast<bfloat16,T>(internal::cast<T,bfloat16>(T(0.0)))), T(0.0));
  49. }
  50. void test_conversion()
  51. {
  52. using Eigen::bfloat16_impl::__bfloat16_raw;
  53. // Round-trip casts
  54. VERIFY_IS_EQUAL(
  55. numext::bit_cast<bfloat16>(numext::bit_cast<numext::uint16_t>(bfloat16(1.0f))),
  56. bfloat16(1.0f));
  57. VERIFY_IS_EQUAL(
  58. numext::bit_cast<bfloat16>(numext::bit_cast<numext::uint16_t>(bfloat16(0.5f))),
  59. bfloat16(0.5f));
  60. VERIFY_IS_EQUAL(
  61. numext::bit_cast<bfloat16>(numext::bit_cast<numext::uint16_t>(bfloat16(-0.33333f))),
  62. bfloat16(-0.33333f));
  63. VERIFY_IS_EQUAL(
  64. numext::bit_cast<bfloat16>(numext::bit_cast<numext::uint16_t>(bfloat16(0.0f))),
  65. bfloat16(0.0f));
  66. // Conversion from float.
  67. VERIFY_BFLOAT16_BITS_EQUAL(bfloat16(1.0f), 0x3f80);
  68. VERIFY_BFLOAT16_BITS_EQUAL(bfloat16(0.5f), 0x3f00);
  69. VERIFY_BFLOAT16_BITS_EQUAL(bfloat16(0.33333f), 0x3eab);
  70. VERIFY_BFLOAT16_BITS_EQUAL(bfloat16(3.38e38f), 0x7f7e);
  71. VERIFY_BFLOAT16_BITS_EQUAL(bfloat16(3.40e38f), 0x7f80); // Becomes infinity.
  72. // Verify round-to-nearest-even behavior.
  73. float val1 = static_cast<float>(bfloat16(__bfloat16_raw(0x3c00)));
  74. float val2 = static_cast<float>(bfloat16(__bfloat16_raw(0x3c01)));
  75. float val3 = static_cast<float>(bfloat16(__bfloat16_raw(0x3c02)));
  76. VERIFY_BFLOAT16_BITS_EQUAL(bfloat16(0.5f * (val1 + val2)), 0x3c00);
  77. VERIFY_BFLOAT16_BITS_EQUAL(bfloat16(0.5f * (val2 + val3)), 0x3c02);
  78. // Conversion from int.
  79. VERIFY_BFLOAT16_BITS_EQUAL(bfloat16(-1), 0xbf80);
  80. VERIFY_BFLOAT16_BITS_EQUAL(bfloat16(0), 0x0000);
  81. VERIFY_BFLOAT16_BITS_EQUAL(bfloat16(1), 0x3f80);
  82. VERIFY_BFLOAT16_BITS_EQUAL(bfloat16(2), 0x4000);
  83. VERIFY_BFLOAT16_BITS_EQUAL(bfloat16(3), 0x4040);
  84. VERIFY_BFLOAT16_BITS_EQUAL(bfloat16(12), 0x4140);
  85. // Conversion from bool.
  86. VERIFY_BFLOAT16_BITS_EQUAL(bfloat16(false), 0x0000);
  87. VERIFY_BFLOAT16_BITS_EQUAL(bfloat16(true), 0x3f80);
  88. // Conversion to bool
  89. VERIFY_IS_EQUAL(static_cast<bool>(bfloat16(3)), true);
  90. VERIFY_IS_EQUAL(static_cast<bool>(bfloat16(0.33333f)), true);
  91. VERIFY_IS_EQUAL(bfloat16(-0.0), false);
  92. VERIFY_IS_EQUAL(static_cast<bool>(bfloat16(0.0)), false);
  93. // Explicit conversion to float.
  94. VERIFY_IS_EQUAL(static_cast<float>(bfloat16(__bfloat16_raw(0x0000))), 0.0f);
  95. VERIFY_IS_EQUAL(static_cast<float>(bfloat16(__bfloat16_raw(0x3f80))), 1.0f);
  96. // Implicit conversion to float
  97. VERIFY_IS_EQUAL(bfloat16(__bfloat16_raw(0x0000)), 0.0f);
  98. VERIFY_IS_EQUAL(bfloat16(__bfloat16_raw(0x3f80)), 1.0f);
  99. // Zero representations
  100. VERIFY_IS_EQUAL(bfloat16(0.0f), bfloat16(0.0f));
  101. VERIFY_IS_EQUAL(bfloat16(-0.0f), bfloat16(0.0f));
  102. VERIFY_IS_EQUAL(bfloat16(-0.0f), bfloat16(-0.0f));
  103. VERIFY_BFLOAT16_BITS_EQUAL(bfloat16(0.0f), 0x0000);
  104. VERIFY_BFLOAT16_BITS_EQUAL(bfloat16(-0.0f), 0x8000);
  105. // Flush denormals to zero
  106. for (float denorm = -std::numeric_limits<float>::denorm_min();
  107. denorm < std::numeric_limits<float>::denorm_min();
  108. denorm = nextafterf(denorm, 1.0f)) {
  109. bfloat16 bf_trunc = Eigen::bfloat16_impl::truncate_to_bfloat16(denorm);
  110. VERIFY_IS_EQUAL(static_cast<float>(bf_trunc), 0.0f);
  111. // Implicit conversion of denormls to bool is correct
  112. VERIFY_IS_EQUAL(static_cast<bool>(bfloat16(denorm)), false);
  113. VERIFY_IS_EQUAL(bfloat16(denorm), false);
  114. if (std::signbit(denorm)) {
  115. VERIFY_BFLOAT16_BITS_EQUAL(bf_trunc, 0x8000);
  116. } else {
  117. VERIFY_BFLOAT16_BITS_EQUAL(bf_trunc, 0x0000);
  118. }
  119. bfloat16 bf_round = Eigen::bfloat16_impl::float_to_bfloat16_rtne<false>(denorm);
  120. VERIFY_IS_EQUAL(static_cast<float>(bf_round), 0.0f);
  121. if (std::signbit(denorm)) {
  122. VERIFY_BFLOAT16_BITS_EQUAL(bf_round, 0x8000);
  123. } else {
  124. VERIFY_BFLOAT16_BITS_EQUAL(bf_round, 0x0000);
  125. }
  126. }
  127. // Default is zero
  128. VERIFY_IS_EQUAL(static_cast<float>(bfloat16()), 0.0f);
  129. // Representable floats round trip via bfloat16
  130. test_roundtrip<float>();
  131. test_roundtrip<double>();
  132. test_roundtrip<std::complex<float> >();
  133. test_roundtrip<std::complex<double> >();
  134. // Truncate test
  135. test_truncate(
  136. BinaryToFloat(0, 0x80, 0x48, 0xf5c3),
  137. BinaryToFloat(0, 0x80, 0x48, 0x0000),
  138. BinaryToFloat(0, 0x80, 0x49, 0x0000));
  139. test_truncate(
  140. BinaryToFloat(1, 0x80, 0x48, 0xf5c3),
  141. BinaryToFloat(1, 0x80, 0x48, 0x0000),
  142. BinaryToFloat(1, 0x80, 0x49, 0x0000));
  143. test_truncate(
  144. BinaryToFloat(0, 0x80, 0x48, 0x8000),
  145. BinaryToFloat(0, 0x80, 0x48, 0x0000),
  146. BinaryToFloat(0, 0x80, 0x48, 0x0000));
  147. test_truncate(
  148. BinaryToFloat(0, 0xff, 0x00, 0x0001),
  149. BinaryToFloat(0, 0xff, 0x40, 0x0000),
  150. BinaryToFloat(0, 0xff, 0x40, 0x0000));
  151. test_truncate(
  152. BinaryToFloat(0, 0xff, 0x7f, 0xffff),
  153. BinaryToFloat(0, 0xff, 0x40, 0x0000),
  154. BinaryToFloat(0, 0xff, 0x40, 0x0000));
  155. test_truncate(
  156. BinaryToFloat(1, 0x80, 0x48, 0xc000),
  157. BinaryToFloat(1, 0x80, 0x48, 0x0000),
  158. BinaryToFloat(1, 0x80, 0x49, 0x0000));
  159. test_truncate(
  160. BinaryToFloat(0, 0x80, 0x48, 0x0000),
  161. BinaryToFloat(0, 0x80, 0x48, 0x0000),
  162. BinaryToFloat(0, 0x80, 0x48, 0x0000));
  163. test_truncate(
  164. BinaryToFloat(0, 0x80, 0x48, 0x4000),
  165. BinaryToFloat(0, 0x80, 0x48, 0x0000),
  166. BinaryToFloat(0, 0x80, 0x48, 0x0000));
  167. test_truncate(
  168. BinaryToFloat(0, 0x80, 0x48, 0x8000),
  169. BinaryToFloat(0, 0x80, 0x48, 0x0000),
  170. BinaryToFloat(0, 0x80, 0x48, 0x0000));
  171. test_truncate(
  172. BinaryToFloat(0, 0x00, 0x48, 0x8000),
  173. BinaryToFloat(0, 0x00, 0x00, 0x0000),
  174. BinaryToFloat(0, 0x00, 0x00, 0x0000));
  175. test_truncate(
  176. BinaryToFloat(0, 0x00, 0x7f, 0xc000),
  177. BinaryToFloat(0, 0x00, 0x00, 0x0000),
  178. BinaryToFloat(0, 0x00, 0x00, 0x0000));
  179. // Conversion
  180. Array<float,1,100> a;
  181. for (int i = 0; i < 100; i++) a(i) = i + 1.25;
  182. Array<bfloat16,1,100> b = a.cast<bfloat16>();
  183. Array<float,1,100> c = b.cast<float>();
  184. for (int i = 0; i < 100; ++i) {
  185. VERIFY_LE(numext::abs(c(i) - a(i)), a(i) / 128);
  186. }
  187. // Epsilon
  188. VERIFY_LE(1.0f, static_cast<float>((std::numeric_limits<bfloat16>::epsilon)() + bfloat16(1.0f)));
  189. VERIFY_IS_EQUAL(1.0f, static_cast<float>((std::numeric_limits<bfloat16>::epsilon)() / bfloat16(2.0f) + bfloat16(1.0f)));
  190. // Negate
  191. VERIFY_IS_EQUAL(static_cast<float>(-bfloat16(3.0f)), -3.0f);
  192. VERIFY_IS_EQUAL(static_cast<float>(-bfloat16(-4.5f)), 4.5f);
  193. #if !EIGEN_COMP_MSVC
  194. // Visual Studio errors out on divisions by 0
  195. VERIFY((numext::isnan)(static_cast<float>(bfloat16(0.0 / 0.0))));
  196. VERIFY((numext::isinf)(static_cast<float>(bfloat16(1.0 / 0.0))));
  197. VERIFY((numext::isinf)(static_cast<float>(bfloat16(-1.0 / 0.0))));
  198. // Visual Studio errors out on divisions by 0
  199. VERIFY((numext::isnan)(bfloat16(0.0 / 0.0)));
  200. VERIFY((numext::isinf)(bfloat16(1.0 / 0.0)));
  201. VERIFY((numext::isinf)(bfloat16(-1.0 / 0.0)));
  202. #endif
  203. // NaNs and infinities.
  204. VERIFY(!(numext::isinf)(static_cast<float>(bfloat16(3.38e38f)))); // Largest finite number.
  205. VERIFY(!(numext::isnan)(static_cast<float>(bfloat16(0.0f))));
  206. VERIFY((numext::isinf)(static_cast<float>(bfloat16(__bfloat16_raw(0xff80)))));
  207. VERIFY((numext::isnan)(static_cast<float>(bfloat16(__bfloat16_raw(0xffc0)))));
  208. VERIFY((numext::isinf)(static_cast<float>(bfloat16(__bfloat16_raw(0x7f80)))));
  209. VERIFY((numext::isnan)(static_cast<float>(bfloat16(__bfloat16_raw(0x7fc0)))));
  210. // Exactly same checks as above, just directly on the bfloat16 representation.
  211. VERIFY(!(numext::isinf)(bfloat16(__bfloat16_raw(0x7bff))));
  212. VERIFY(!(numext::isnan)(bfloat16(__bfloat16_raw(0x0000))));
  213. VERIFY((numext::isinf)(bfloat16(__bfloat16_raw(0xff80))));
  214. VERIFY((numext::isnan)(bfloat16(__bfloat16_raw(0xffc0))));
  215. VERIFY((numext::isinf)(bfloat16(__bfloat16_raw(0x7f80))));
  216. VERIFY((numext::isnan)(bfloat16(__bfloat16_raw(0x7fc0))));
  217. VERIFY_BFLOAT16_BITS_EQUAL(bfloat16(BinaryToFloat(0x0, 0xff, 0x40, 0x0)), 0x7fc0);
  218. VERIFY_BFLOAT16_BITS_EQUAL(bfloat16(BinaryToFloat(0x1, 0xff, 0x40, 0x0)), 0xffc0);
  219. VERIFY_BFLOAT16_BITS_EQUAL(Eigen::bfloat16_impl::truncate_to_bfloat16(
  220. BinaryToFloat(0x0, 0xff, 0x40, 0x0)),
  221. 0x7fc0);
  222. VERIFY_BFLOAT16_BITS_EQUAL(Eigen::bfloat16_impl::truncate_to_bfloat16(
  223. BinaryToFloat(0x1, 0xff, 0x40, 0x0)),
  224. 0xffc0);
  225. }
  226. void test_numtraits()
  227. {
  228. std::cout << "epsilon = " << NumTraits<bfloat16>::epsilon() << " (0x" << std::hex << numext::bit_cast<numext::uint16_t>(NumTraits<bfloat16>::epsilon()) << ")" << std::endl;
  229. std::cout << "highest = " << NumTraits<bfloat16>::highest() << " (0x" << std::hex << numext::bit_cast<numext::uint16_t>(NumTraits<bfloat16>::highest()) << ")" << std::endl;
  230. std::cout << "lowest = " << NumTraits<bfloat16>::lowest() << " (0x" << std::hex << numext::bit_cast<numext::uint16_t>(NumTraits<bfloat16>::lowest()) << ")" << std::endl;
  231. std::cout << "min = " << (std::numeric_limits<bfloat16>::min)() << " (0x" << std::hex << numext::bit_cast<numext::uint16_t>((std::numeric_limits<bfloat16>::min)()) << ")" << std::endl;
  232. std::cout << "denorm min = " << (std::numeric_limits<bfloat16>::denorm_min)() << " (0x" << std::hex << numext::bit_cast<numext::uint16_t>((std::numeric_limits<bfloat16>::denorm_min)()) << ")" << std::endl;
  233. std::cout << "infinity = " << NumTraits<bfloat16>::infinity() << " (0x" << std::hex << numext::bit_cast<numext::uint16_t>(NumTraits<bfloat16>::infinity()) << ")" << std::endl;
  234. std::cout << "quiet nan = " << NumTraits<bfloat16>::quiet_NaN() << " (0x" << std::hex << numext::bit_cast<numext::uint16_t>(NumTraits<bfloat16>::quiet_NaN()) << ")" << std::endl;
  235. std::cout << "signaling nan = " << std::numeric_limits<bfloat16>::signaling_NaN() << " (0x" << std::hex << numext::bit_cast<numext::uint16_t>(std::numeric_limits<bfloat16>::signaling_NaN()) << ")" << std::endl;
  236. VERIFY(NumTraits<bfloat16>::IsSigned);
  237. VERIFY_IS_EQUAL(
  238. numext::bit_cast<numext::uint16_t>(std::numeric_limits<bfloat16>::infinity()),
  239. numext::bit_cast<numext::uint16_t>(bfloat16(std::numeric_limits<float>::infinity())) );
  240. // There is no guarantee that casting a 32-bit NaN to bfloat16 has a precise
  241. // bit pattern. We test that it is in fact a NaN, then test the signaling
  242. // bit (msb of significand is 1 for quiet, 0 for signaling).
  243. const numext::uint16_t BFLOAT16_QUIET_BIT = 0x0040;
  244. VERIFY(
  245. (numext::isnan)(std::numeric_limits<bfloat16>::quiet_NaN())
  246. && (numext::isnan)(bfloat16(std::numeric_limits<float>::quiet_NaN()))
  247. && ((numext::bit_cast<numext::uint16_t>(std::numeric_limits<bfloat16>::quiet_NaN()) & BFLOAT16_QUIET_BIT) > 0)
  248. && ((numext::bit_cast<numext::uint16_t>(bfloat16(std::numeric_limits<float>::quiet_NaN())) & BFLOAT16_QUIET_BIT) > 0) );
  249. // After a cast to bfloat16, a signaling NaN may become non-signaling. Thus,
  250. // we check that both are NaN, and that only the `numeric_limits` version is
  251. // signaling.
  252. VERIFY(
  253. (numext::isnan)(std::numeric_limits<bfloat16>::signaling_NaN())
  254. && (numext::isnan)(bfloat16(std::numeric_limits<float>::signaling_NaN()))
  255. && ((numext::bit_cast<numext::uint16_t>(std::numeric_limits<bfloat16>::signaling_NaN()) & BFLOAT16_QUIET_BIT) == 0) );
  256. VERIFY( (std::numeric_limits<bfloat16>::min)() > bfloat16(0.f) );
  257. VERIFY( (std::numeric_limits<bfloat16>::denorm_min)() > bfloat16(0.f) );
  258. VERIFY_IS_EQUAL( (std::numeric_limits<bfloat16>::denorm_min)()/bfloat16(2), bfloat16(0.f) );
  259. }
  260. void test_arithmetic()
  261. {
  262. VERIFY_IS_EQUAL(static_cast<float>(bfloat16(2) + bfloat16(2)), 4);
  263. VERIFY_IS_EQUAL(static_cast<float>(bfloat16(2) + bfloat16(-2)), 0);
  264. VERIFY_IS_APPROX(static_cast<float>(bfloat16(0.33333f) + bfloat16(0.66667f)), 1.0f);
  265. VERIFY_IS_EQUAL(static_cast<float>(bfloat16(2.0f) * bfloat16(-5.5f)), -11.0f);
  266. VERIFY_IS_APPROX(static_cast<float>(bfloat16(1.0f) / bfloat16(3.0f)), 0.3339f);
  267. VERIFY_IS_EQUAL(static_cast<float>(-bfloat16(4096.0f)), -4096.0f);
  268. VERIFY_IS_EQUAL(static_cast<float>(-bfloat16(-4096.0f)), 4096.0f);
  269. }
  270. void test_comparison()
  271. {
  272. VERIFY(bfloat16(1.0f) > bfloat16(0.5f));
  273. VERIFY(bfloat16(0.5f) < bfloat16(1.0f));
  274. VERIFY(!(bfloat16(1.0f) < bfloat16(0.5f)));
  275. VERIFY(!(bfloat16(0.5f) > bfloat16(1.0f)));
  276. VERIFY(!(bfloat16(4.0f) > bfloat16(4.0f)));
  277. VERIFY(!(bfloat16(4.0f) < bfloat16(4.0f)));
  278. VERIFY(!(bfloat16(0.0f) < bfloat16(-0.0f)));
  279. VERIFY(!(bfloat16(-0.0f) < bfloat16(0.0f)));
  280. VERIFY(!(bfloat16(0.0f) > bfloat16(-0.0f)));
  281. VERIFY(!(bfloat16(-0.0f) > bfloat16(0.0f)));
  282. VERIFY(bfloat16(0.2f) > bfloat16(-1.0f));
  283. VERIFY(bfloat16(-1.0f) < bfloat16(0.2f));
  284. VERIFY(bfloat16(-16.0f) < bfloat16(-15.0f));
  285. VERIFY(bfloat16(1.0f) == bfloat16(1.0f));
  286. VERIFY(bfloat16(1.0f) != bfloat16(2.0f));
  287. // Comparisons with NaNs and infinities.
  288. #if !EIGEN_COMP_MSVC
  289. // Visual Studio errors out on divisions by 0
  290. VERIFY(!(bfloat16(0.0 / 0.0) == bfloat16(0.0 / 0.0)));
  291. VERIFY(bfloat16(0.0 / 0.0) != bfloat16(0.0 / 0.0));
  292. VERIFY(!(bfloat16(1.0) == bfloat16(0.0 / 0.0)));
  293. VERIFY(!(bfloat16(1.0) < bfloat16(0.0 / 0.0)));
  294. VERIFY(!(bfloat16(1.0) > bfloat16(0.0 / 0.0)));
  295. VERIFY(bfloat16(1.0) != bfloat16(0.0 / 0.0));
  296. VERIFY(bfloat16(1.0) < bfloat16(1.0 / 0.0));
  297. VERIFY(bfloat16(1.0) > bfloat16(-1.0 / 0.0));
  298. #endif
  299. }
  300. void test_basic_functions()
  301. {
  302. VERIFY_IS_EQUAL(static_cast<float>(numext::abs(bfloat16(3.5f))), 3.5f);
  303. VERIFY_IS_EQUAL(static_cast<float>(abs(bfloat16(3.5f))), 3.5f);
  304. VERIFY_IS_EQUAL(static_cast<float>(numext::abs(bfloat16(-3.5f))), 3.5f);
  305. VERIFY_IS_EQUAL(static_cast<float>(abs(bfloat16(-3.5f))), 3.5f);
  306. VERIFY_IS_EQUAL(static_cast<float>(numext::floor(bfloat16(3.5f))), 3.0f);
  307. VERIFY_IS_EQUAL(static_cast<float>(floor(bfloat16(3.5f))), 3.0f);
  308. VERIFY_IS_EQUAL(static_cast<float>(numext::floor(bfloat16(-3.5f))), -4.0f);
  309. VERIFY_IS_EQUAL(static_cast<float>(floor(bfloat16(-3.5f))), -4.0f);
  310. VERIFY_IS_EQUAL(static_cast<float>(numext::ceil(bfloat16(3.5f))), 4.0f);
  311. VERIFY_IS_EQUAL(static_cast<float>(ceil(bfloat16(3.5f))), 4.0f);
  312. VERIFY_IS_EQUAL(static_cast<float>(numext::ceil(bfloat16(-3.5f))), -3.0f);
  313. VERIFY_IS_EQUAL(static_cast<float>(ceil(bfloat16(-3.5f))), -3.0f);
  314. VERIFY_IS_APPROX(static_cast<float>(numext::sqrt(bfloat16(0.0f))), 0.0f);
  315. VERIFY_IS_APPROX(static_cast<float>(sqrt(bfloat16(0.0f))), 0.0f);
  316. VERIFY_IS_APPROX(static_cast<float>(numext::sqrt(bfloat16(4.0f))), 2.0f);
  317. VERIFY_IS_APPROX(static_cast<float>(sqrt(bfloat16(4.0f))), 2.0f);
  318. VERIFY_IS_APPROX(static_cast<float>(numext::pow(bfloat16(0.0f), bfloat16(1.0f))), 0.0f);
  319. VERIFY_IS_APPROX(static_cast<float>(pow(bfloat16(0.0f), bfloat16(1.0f))), 0.0f);
  320. VERIFY_IS_APPROX(static_cast<float>(numext::pow(bfloat16(2.0f), bfloat16(2.0f))), 4.0f);
  321. VERIFY_IS_APPROX(static_cast<float>(pow(bfloat16(2.0f), bfloat16(2.0f))), 4.0f);
  322. VERIFY_IS_EQUAL(static_cast<float>(numext::exp(bfloat16(0.0f))), 1.0f);
  323. VERIFY_IS_EQUAL(static_cast<float>(exp(bfloat16(0.0f))), 1.0f);
  324. VERIFY_IS_APPROX(static_cast<float>(numext::exp(bfloat16(EIGEN_PI))), 20.f + static_cast<float>(EIGEN_PI));
  325. VERIFY_IS_APPROX(static_cast<float>(exp(bfloat16(EIGEN_PI))), 20.f + static_cast<float>(EIGEN_PI));
  326. VERIFY_IS_EQUAL(static_cast<float>(numext::expm1(bfloat16(0.0f))), 0.0f);
  327. VERIFY_IS_EQUAL(static_cast<float>(expm1(bfloat16(0.0f))), 0.0f);
  328. VERIFY_IS_APPROX(static_cast<float>(numext::expm1(bfloat16(2.0f))), 6.375f);
  329. VERIFY_IS_APPROX(static_cast<float>(expm1(bfloat16(2.0f))), 6.375f);
  330. VERIFY_IS_EQUAL(static_cast<float>(numext::log(bfloat16(1.0f))), 0.0f);
  331. VERIFY_IS_EQUAL(static_cast<float>(log(bfloat16(1.0f))), 0.0f);
  332. VERIFY_IS_APPROX(static_cast<float>(numext::log(bfloat16(10.0f))), 2.296875f);
  333. VERIFY_IS_APPROX(static_cast<float>(log(bfloat16(10.0f))), 2.296875f);
  334. VERIFY_IS_EQUAL(static_cast<float>(numext::log1p(bfloat16(0.0f))), 0.0f);
  335. VERIFY_IS_EQUAL(static_cast<float>(log1p(bfloat16(0.0f))), 0.0f);
  336. VERIFY_IS_APPROX(static_cast<float>(numext::log1p(bfloat16(10.0f))), 2.390625f);
  337. VERIFY_IS_APPROX(static_cast<float>(log1p(bfloat16(10.0f))), 2.390625f);
  338. }
  339. void test_trigonometric_functions()
  340. {
  341. VERIFY_IS_APPROX(numext::cos(bfloat16(0.0f)), bfloat16(cosf(0.0f)));
  342. VERIFY_IS_APPROX(cos(bfloat16(0.0f)), bfloat16(cosf(0.0f)));
  343. VERIFY_IS_APPROX(numext::cos(bfloat16(EIGEN_PI)), bfloat16(cosf(EIGEN_PI)));
  344. // VERIFY_IS_APPROX(numext::cos(bfloat16(EIGEN_PI/2)), bfloat16(cosf(EIGEN_PI/2)));
  345. // VERIFY_IS_APPROX(numext::cos(bfloat16(3*EIGEN_PI/2)), bfloat16(cosf(3*EIGEN_PI/2)));
  346. VERIFY_IS_APPROX(numext::cos(bfloat16(3.5f)), bfloat16(cosf(3.5f)));
  347. VERIFY_IS_APPROX(numext::sin(bfloat16(0.0f)), bfloat16(sinf(0.0f)));
  348. VERIFY_IS_APPROX(sin(bfloat16(0.0f)), bfloat16(sinf(0.0f)));
  349. // VERIFY_IS_APPROX(numext::sin(bfloat16(EIGEN_PI)), bfloat16(sinf(EIGEN_PI)));
  350. VERIFY_IS_APPROX(numext::sin(bfloat16(EIGEN_PI/2)), bfloat16(sinf(EIGEN_PI/2)));
  351. VERIFY_IS_APPROX(numext::sin(bfloat16(3*EIGEN_PI/2)), bfloat16(sinf(3*EIGEN_PI/2)));
  352. VERIFY_IS_APPROX(numext::sin(bfloat16(3.5f)), bfloat16(sinf(3.5f)));
  353. VERIFY_IS_APPROX(numext::tan(bfloat16(0.0f)), bfloat16(tanf(0.0f)));
  354. VERIFY_IS_APPROX(tan(bfloat16(0.0f)), bfloat16(tanf(0.0f)));
  355. // VERIFY_IS_APPROX(numext::tan(bfloat16(EIGEN_PI)), bfloat16(tanf(EIGEN_PI)));
  356. // VERIFY_IS_APPROX(numext::tan(bfloat16(EIGEN_PI/2)), bfloat16(tanf(EIGEN_PI/2)));
  357. // VERIFY_IS_APPROX(numext::tan(bfloat16(3*EIGEN_PI/2)), bfloat16(tanf(3*EIGEN_PI/2)));
  358. VERIFY_IS_APPROX(numext::tan(bfloat16(3.5f)), bfloat16(tanf(3.5f)));
  359. }
  360. void test_array()
  361. {
  362. typedef Array<bfloat16,1,Dynamic> ArrayXh;
  363. Index size = internal::random<Index>(1,10);
  364. Index i = internal::random<Index>(0,size-1);
  365. ArrayXh a1 = ArrayXh::Random(size), a2 = ArrayXh::Random(size);
  366. VERIFY_IS_APPROX( a1+a1, bfloat16(2)*a1 );
  367. VERIFY( (a1.abs() >= bfloat16(0)).all() );
  368. VERIFY_IS_APPROX( (a1*a1).sqrt(), a1.abs() );
  369. VERIFY( ((a1.min)(a2) <= (a1.max)(a2)).all() );
  370. a1(i) = bfloat16(-10.);
  371. VERIFY_IS_EQUAL( a1.minCoeff(), bfloat16(-10.) );
  372. a1(i) = bfloat16(10.);
  373. VERIFY_IS_EQUAL( a1.maxCoeff(), bfloat16(10.) );
  374. std::stringstream ss;
  375. ss << a1;
  376. }
  377. void test_product()
  378. {
  379. typedef Matrix<bfloat16,Dynamic,Dynamic> MatrixXh;
  380. Index rows = internal::random<Index>(1,EIGEN_TEST_MAX_SIZE);
  381. Index cols = internal::random<Index>(1,EIGEN_TEST_MAX_SIZE);
  382. Index depth = internal::random<Index>(1,EIGEN_TEST_MAX_SIZE);
  383. MatrixXh Ah = MatrixXh::Random(rows,depth);
  384. MatrixXh Bh = MatrixXh::Random(depth,cols);
  385. MatrixXh Ch = MatrixXh::Random(rows,cols);
  386. MatrixXf Af = Ah.cast<float>();
  387. MatrixXf Bf = Bh.cast<float>();
  388. MatrixXf Cf = Ch.cast<float>();
  389. VERIFY_IS_APPROX(Ch.noalias()+=Ah*Bh, (Cf.noalias()+=Af*Bf).cast<bfloat16>());
  390. }
  391. EIGEN_DECLARE_TEST(bfloat16_float)
  392. {
  393. CALL_SUBTEST(test_numtraits());
  394. for(int i = 0; i < g_repeat; i++) {
  395. CALL_SUBTEST(test_conversion());
  396. CALL_SUBTEST(test_arithmetic());
  397. CALL_SUBTEST(test_comparison());
  398. CALL_SUBTEST(test_basic_functions());
  399. CALL_SUBTEST(test_trigonometric_functions());
  400. CALL_SUBTEST(test_array());
  401. CALL_SUBTEST(test_product());
  402. }
  403. }