ribbon_impl.h 46 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137
  1. // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
  2. // This source code is licensed under both the GPLv2 (found in the
  3. // COPYING file in the root directory) and Apache 2.0 License
  4. // (found in the LICENSE.Apache file in the root directory).
  5. #pragma once
  6. #include <cmath>
  7. #include "port/port.h" // for PREFETCH
  8. #include "util/fastrange.h"
  9. #include "util/ribbon_alg.h"
  10. namespace ROCKSDB_NAMESPACE {
  11. namespace ribbon {
  12. // RIBBON PHSF & RIBBON Filter (Rapid Incremental Boolean Banding ON-the-fly)
  13. //
  14. // ribbon_impl.h: templated (parameterized) standard implementations
  15. //
  16. // Ribbon is a Perfect Hash Static Function construction useful as a compact
  17. // static Bloom filter alternative. See ribbon_alg.h for core algorithms
  18. // and core design details.
  19. //
  20. // TODO: more details on trade-offs and practical issues.
  21. //
  22. // APIs for configuring Ribbon are in ribbon_config.h
  23. // Ribbon implementations in this file take these parameters, which must be
  24. // provided in a class/struct type with members expressed in this concept:
  25. // concept TypesAndSettings {
  26. // // See RibbonTypes and *Hasher in ribbon_alg.h, except here we have
  27. // // the added constraint that Hash be equivalent to either uint32_t or
  28. // // uint64_t.
  29. // typename Hash;
  30. // typename CoeffRow;
  31. // typename ResultRow;
  32. // typename Index;
  33. // typename Key;
  34. // static constexpr bool kFirstCoeffAlwaysOne;
  35. //
  36. // // An unsigned integer type for identifying a hash seed, typically
  37. // // uint32_t or uint64_t. Importantly, this is the amount of data
  38. // // stored in memory for identifying a raw seed. See StandardHasher.
  39. // typename Seed;
  40. //
  41. // // When true, the PHSF implements a static filter, expecting just
  42. // // keys as inputs for construction. When false, implements a general
  43. // // PHSF and expects std::pair<Key, ResultRow> as inputs for
  44. // // construction.
  45. // static constexpr bool kIsFilter;
  46. //
  47. // // When true, enables a special "homogeneous" filter implementation that
  48. // // is slightly faster to construct, and never fails to construct though
  49. // // FP rate can quickly explode in cases where corresponding
  50. // // non-homogeneous filter would fail (or nearly fail?) to construct.
  51. // // For smaller filters, you can configure with ConstructionFailureChance
  52. // // smaller than desired FP rate to largely counteract this effect.
  53. // // TODO: configuring Homogeneous Ribbon for arbitrarily large filters
  54. // // based on data from OptimizeHomogAtScale
  55. // static constexpr bool kHomogeneous;
  56. //
  57. // // When true, adds a tiny bit more hashing logic on queries and
  58. // // construction to improve utilization at the beginning and end of
  59. // // the structure. Recommended when CoeffRow is only 64 bits (or
  60. // // less), so typical num_starts < 10k. Although this is compatible
  61. // // with kHomogeneous, the competing space vs. time priorities might
  62. // // not be useful.
  63. // static constexpr bool kUseSmash;
  64. //
  65. // // When true, allows number of "starts" to be zero, for best support
  66. // // of the "no keys to add" case by always returning false for filter
  67. // // queries. (This is distinct from the "keys added but no space for
  68. // // any data" case, in which a filter always returns true.) The cost
  69. // // supporting this is a conditional branch (probably predictable) in
  70. // // queries.
  71. // static constexpr bool kAllowZeroStarts;
  72. //
  73. // // A seedable stock hash function on Keys. All bits of Hash must
  74. // // be reasonably high quality. XXH functions recommended, but
  75. // // Murmur, City, Farm, etc. also work.
  76. // static Hash HashFn(const Key &, Seed raw_seed);
  77. // };
  78. // A bit of a hack to automatically construct the type for
  79. // AddInput based on a constexpr bool.
  80. template <typename Key, typename ResultRow, bool IsFilter>
  81. struct AddInputSelector {
  82. // For general PHSF, not filter
  83. using T = std::pair<Key, ResultRow>;
  84. };
  85. template <typename Key, typename ResultRow>
  86. struct AddInputSelector<Key, ResultRow, true /*IsFilter*/> {
  87. // For Filter
  88. using T = Key;
  89. };
  90. // To avoid writing 'typename' everywhere that we use types like 'Index'
  91. #define IMPORT_RIBBON_TYPES_AND_SETTINGS(TypesAndSettings) \
  92. using CoeffRow = typename TypesAndSettings::CoeffRow; \
  93. using ResultRow = typename TypesAndSettings::ResultRow; \
  94. using Index = typename TypesAndSettings::Index; \
  95. using Hash = typename TypesAndSettings::Hash; \
  96. using Key = typename TypesAndSettings::Key; \
  97. using Seed = typename TypesAndSettings::Seed; \
  98. \
  99. /* Some more additions */ \
  100. using QueryInput = Key; \
  101. using AddInput = typename ROCKSDB_NAMESPACE::ribbon::AddInputSelector< \
  102. Key, ResultRow, TypesAndSettings::kIsFilter>::T; \
  103. static constexpr auto kCoeffBits = \
  104. static_cast<Index>(sizeof(CoeffRow) * 8U); \
  105. \
  106. /* Export to algorithm */ \
  107. static constexpr bool kFirstCoeffAlwaysOne = \
  108. TypesAndSettings::kFirstCoeffAlwaysOne; \
  109. \
  110. static_assert(sizeof(CoeffRow) + sizeof(ResultRow) + sizeof(Index) + \
  111. sizeof(Hash) + sizeof(Key) + sizeof(Seed) + \
  112. sizeof(QueryInput) + sizeof(AddInput) + kCoeffBits + \
  113. kFirstCoeffAlwaysOne > \
  114. 0, \
  115. "avoid unused warnings, semicolon expected after macro call")
  116. #ifdef _MSC_VER
  117. #pragma warning(push)
  118. #pragma warning(disable : 4309) // cast truncating constant
  119. #pragma warning(disable : 4307) // arithmetic constant overflow
  120. #endif
  121. // StandardHasher: A standard implementation of concepts RibbonTypes,
  122. // PhsfQueryHasher, FilterQueryHasher, and BandingHasher from ribbon_alg.h.
  123. //
  124. // This implementation should be suitable for most all practical purposes
  125. // as it "behaves" across a wide range of settings, with little room left
  126. // for improvement. The key functionality in this hasher is generating
  127. // CoeffRows, starts, and (for filters) ResultRows, which could be ~150
  128. // bits of data or more, from a modest hash of 64 or even just 32 bits, with
  129. // enough uniformity and bitwise independence to be close to "the best you
  130. // can do" with available hash information in terms of FP rate and
  131. // compactness. (64 bits recommended and sufficient for PHSF practical
  132. // purposes.)
  133. //
  134. // Another feature of this hasher is a minimal "premixing" of seeds before
  135. // they are provided to TypesAndSettings::HashFn in case that function does
  136. // not provide sufficiently independent hashes when iterating merely
  137. // sequentially on seeds. (This for example works around a problem with the
  138. // preview version 0.7.2 of XXH3 used in RocksDB, a.k.a. XXPH3 or Hash64, and
  139. // MurmurHash1 used in RocksDB, a.k.a. Hash.) We say this pre-mixing step
  140. // translates "ordinal seeds," which we iterate sequentially to find a
  141. // solution, into "raw seeds," with many more bits changing for each
  142. // iteration. The translation is an easily reversible lightweight mixing,
  143. // not suitable for hashing on its own. An advantage of this approach is that
  144. // StandardHasher can store just the raw seed (e.g. 64 bits) for fast query
  145. // times, while from the application perspective, we can limit to a small
  146. // number of ordinal keys (e.g. 64 in 6 bits) for saving in metadata.
  147. //
  148. // The default constructor initializes the seed to ordinal seed zero, which
  149. // is equal to raw seed zero.
  150. //
  151. template <class TypesAndSettings>
  152. class StandardHasher {
  153. public:
  154. IMPORT_RIBBON_TYPES_AND_SETTINGS(TypesAndSettings);
  155. inline Hash GetHash(const Key& key) const {
  156. return TypesAndSettings::HashFn(key, raw_seed_);
  157. }
  158. // For when AddInput == pair<Key, ResultRow> (kIsFilter == false)
  159. inline Hash GetHash(const std::pair<Key, ResultRow>& bi) const {
  160. return GetHash(bi.first);
  161. }
  162. inline Index GetStart(Hash h, Index num_starts) const {
  163. // This is "critical path" code because it's required before memory
  164. // lookup.
  165. //
  166. // FastRange gives us a fast and effective mapping from h to the
  167. // appropriate range. This depends most, sometimes exclusively, on
  168. // upper bits of h.
  169. //
  170. if (TypesAndSettings::kUseSmash) {
  171. // Extra logic to "smash" entries at beginning and end, for
  172. // better utilization. For example, without smash and with
  173. // kFirstCoeffAlwaysOne, there's about a 30% chance that the
  174. // first slot in the banding will be unused, and worse without
  175. // kFirstCoeffAlwaysOne. The ending slots are even less utilized
  176. // without smash.
  177. //
  178. // But since this only affects roughly kCoeffBits of the slots,
  179. // it's usually small enough to be ignorable (less computation in
  180. // this function) when number of slots is roughly 10k or larger.
  181. //
  182. // The best values for these smash weights might depend on how
  183. // densely you're packing entries, and also kCoeffBits, but this
  184. // seems to work well for roughly 95% success probability.
  185. //
  186. constexpr Index kFrontSmash = kCoeffBits / 4;
  187. constexpr Index kBackSmash = kCoeffBits / 4;
  188. Index start = FastRangeGeneric(h, num_starts + kFrontSmash + kBackSmash);
  189. start = std::max(start, kFrontSmash);
  190. start -= kFrontSmash;
  191. start = std::min(start, num_starts - 1);
  192. return start;
  193. } else {
  194. // For query speed, we allow small number of initial and final
  195. // entries to be under-utilized.
  196. // NOTE: This call statically enforces that Hash is equivalent to
  197. // either uint32_t or uint64_t.
  198. return FastRangeGeneric(h, num_starts);
  199. }
  200. }
  201. inline CoeffRow GetCoeffRow(Hash h) const {
  202. // This is not so much "critical path" code because it can be done in
  203. // parallel (instruction level) with memory lookup.
  204. //
  205. // When we might have many entries squeezed into a single start,
  206. // we need reasonably good remixing for CoeffRow.
  207. if (TypesAndSettings::kUseSmash) {
  208. // Reasonably good, reasonably fast, reasonably general.
  209. // Probably not 1:1 but probably close enough.
  210. Unsigned128 a = Multiply64to128(h, kAltCoeffFactor1);
  211. Unsigned128 b = Multiply64to128(h, kAltCoeffFactor2);
  212. auto cr = static_cast<CoeffRow>(b ^ (a << 64) ^ (a >> 64));
  213. // Now ensure the value is non-zero
  214. if (kFirstCoeffAlwaysOne) {
  215. cr |= 1;
  216. } else {
  217. // Still have to ensure some bit is non-zero
  218. cr |= (cr == 0) ? 1 : 0;
  219. }
  220. return cr;
  221. }
  222. // If not kUseSmash, we ensure we're not squeezing many entries into a
  223. // single start, in part by ensuring num_starts > num_slots / 2. Thus,
  224. // here we do not need good remixing for CoeffRow, but just enough that
  225. // (a) every bit is reasonably independent from Start.
  226. // (b) every Hash-length bit subsequence of the CoeffRow has full or
  227. // nearly full entropy from h.
  228. // (c) if nontrivial bit subsequences within are correlated, it needs to
  229. // be more complicated than exact copy or bitwise not (at least without
  230. // kFirstCoeffAlwaysOne), or else there seems to be a kind of
  231. // correlated clustering effect.
  232. // (d) the CoeffRow is not zero, so that no one input on its own can
  233. // doom construction success. (Preferably a mix of 1's and 0's if
  234. // satisfying above.)
  235. // First, establish sufficient bitwise independence from Start, with
  236. // multiplication by a large random prime.
  237. // Note that we cast to Hash because if we use product bits beyond
  238. // original input size, that's going to correlate with Start (FastRange)
  239. // even with a (likely) different multiplier here.
  240. Hash a = h * kCoeffAndResultFactor;
  241. static_assert(
  242. sizeof(Hash) == sizeof(uint64_t) || sizeof(Hash) == sizeof(uint32_t),
  243. "Supported sizes");
  244. // If that's big enough, we're done. If not, we have to expand it,
  245. // maybe up to 4x size.
  246. uint64_t b;
  247. if (sizeof(Hash) < sizeof(uint64_t)) {
  248. // Almost-trivial hash expansion (OK - see above), favoring roughly
  249. // equal number of 1's and 0's in result
  250. b = (uint64_t{a} << 32) ^ (a ^ kCoeffXor32);
  251. } else {
  252. b = a;
  253. }
  254. static_assert(sizeof(CoeffRow) <= sizeof(Unsigned128), "Supported sizes");
  255. Unsigned128 c;
  256. if (sizeof(uint64_t) < sizeof(CoeffRow)) {
  257. // Almost-trivial hash expansion (OK - see above), favoring roughly
  258. // equal number of 1's and 0's in result
  259. c = (Unsigned128{b} << 64) ^ (b ^ kCoeffXor64);
  260. } else {
  261. c = b;
  262. }
  263. auto cr = static_cast<CoeffRow>(c);
  264. // Now ensure the value is non-zero
  265. if (kFirstCoeffAlwaysOne) {
  266. cr |= 1;
  267. } else if (sizeof(CoeffRow) == sizeof(Hash)) {
  268. // Still have to ensure some bit is non-zero
  269. cr |= (cr == 0) ? 1 : 0;
  270. } else {
  271. // (We did trivial expansion with constant xor, which ensures some
  272. // bits are non-zero.)
  273. }
  274. return cr;
  275. }
  276. inline ResultRow GetResultRowMask() const {
  277. // TODO: will be used with InterleavedSolutionStorage?
  278. // For now, all bits set (note: might be a small type so might need to
  279. // narrow after promotion)
  280. return static_cast<ResultRow>(~ResultRow{0});
  281. }
  282. inline ResultRow GetResultRowFromHash(Hash h) const {
  283. if (TypesAndSettings::kIsFilter && !TypesAndSettings::kHomogeneous) {
  284. // This is not so much "critical path" code because it can be done in
  285. // parallel (instruction level) with memory lookup.
  286. //
  287. // ResultRow bits only needs to be independent from CoeffRow bits if
  288. // many entries might have the same start location, where "many" is
  289. // comparable to number of hash bits or kCoeffBits. If !kUseSmash
  290. // and num_starts > kCoeffBits, it is safe and efficient to draw from
  291. // the same bits computed for CoeffRow, which are reasonably
  292. // independent from Start. (Inlining and common subexpression
  293. // elimination with GetCoeffRow should make this
  294. // a single shared multiplication in generated code when !kUseSmash.)
  295. Hash a = h * kCoeffAndResultFactor;
  296. // The bits here that are *most* independent of Start are the highest
  297. // order bits (as in Knuth multiplicative hash). To make those the
  298. // most preferred for use in the result row, we do a bswap here.
  299. auto rr = static_cast<ResultRow>(EndianSwapValue(a));
  300. return rr & GetResultRowMask();
  301. } else {
  302. // Must be zero
  303. return 0;
  304. }
  305. }
  306. // For when AddInput == Key (kIsFilter == true)
  307. inline ResultRow GetResultRowFromInput(const Key&) const {
  308. // Must be zero
  309. return 0;
  310. }
  311. // For when AddInput == pair<Key, ResultRow> (kIsFilter == false)
  312. inline ResultRow GetResultRowFromInput(
  313. const std::pair<Key, ResultRow>& bi) const {
  314. // Simple extraction
  315. return bi.second;
  316. }
  317. // Seed tracking APIs - see class comment
  318. void SetRawSeed(Seed seed) { raw_seed_ = seed; }
  319. Seed GetRawSeed() { return raw_seed_; }
  320. void SetOrdinalSeed(Seed count) {
  321. // A simple, reversible mixing of any size (whole bytes) up to 64 bits.
  322. // This allows casting the raw seed to any smaller size we use for
  323. // ordinal seeds without risk of duplicate raw seeds for unique ordinal
  324. // seeds.
  325. // Seed type might be smaller than numerical promotion size, but Hash
  326. // should be at least that size, so we use Hash as intermediate type.
  327. static_assert(sizeof(Seed) <= sizeof(Hash),
  328. "Hash must be at least size of Seed");
  329. // Multiply by a large random prime (one-to-one for any prefix of bits)
  330. Hash tmp = count * kToRawSeedFactor;
  331. // Within-byte one-to-one mixing
  332. static_assert((kSeedMixMask & (kSeedMixMask >> kSeedMixShift)) == 0,
  333. "Illegal mask+shift");
  334. tmp ^= (tmp & kSeedMixMask) >> kSeedMixShift;
  335. raw_seed_ = static_cast<Seed>(tmp);
  336. // dynamic verification
  337. assert(GetOrdinalSeed() == count);
  338. }
  339. Seed GetOrdinalSeed() {
  340. Hash tmp = raw_seed_;
  341. // Within-byte one-to-one mixing (its own inverse)
  342. tmp ^= (tmp & kSeedMixMask) >> kSeedMixShift;
  343. // Multiply by 64-bit multiplicative inverse
  344. static_assert(kToRawSeedFactor * kFromRawSeedFactor == Hash{1},
  345. "Must be inverses");
  346. return static_cast<Seed>(tmp * kFromRawSeedFactor);
  347. }
  348. protected:
  349. // For expanding hash:
  350. // large random prime
  351. static constexpr Hash kCoeffAndResultFactor =
  352. static_cast<Hash>(0xc28f82822b650bedULL);
  353. static constexpr uint64_t kAltCoeffFactor1 = 0x876f170be4f1fcb9U;
  354. static constexpr uint64_t kAltCoeffFactor2 = 0xf0433a4aecda4c5fU;
  355. // random-ish data
  356. static constexpr uint32_t kCoeffXor32 = 0xa6293635U;
  357. static constexpr uint64_t kCoeffXor64 = 0xc367844a6e52731dU;
  358. // For pre-mixing seeds
  359. static constexpr Hash kSeedMixMask = static_cast<Hash>(0xf0f0f0f0f0f0f0f0ULL);
  360. static constexpr unsigned kSeedMixShift = 4U;
  361. static constexpr Hash kToRawSeedFactor =
  362. static_cast<Hash>(0xc78219a23eeadd03ULL);
  363. static constexpr Hash kFromRawSeedFactor =
  364. static_cast<Hash>(0xfe1a137d14b475abULL);
  365. // See class description
  366. Seed raw_seed_ = 0;
  367. };
  368. // StandardRehasher (and StandardRehasherAdapter): A variant of
  369. // StandardHasher that uses the same type for keys as for hashes.
  370. // This is primarily intended for building a Ribbon filter
  371. // from existing hashes without going back to original inputs in
  372. // order to apply a different seed. This hasher seeds a 1-to-1 mixing
  373. // transformation to apply a seed to an existing hash. (Untested for
  374. // hash-sized keys that are not already uniformly distributed.) This
  375. // transformation builds on the seed pre-mixing done in StandardHasher.
  376. //
  377. // Testing suggests essentially no degradation of solution success rate
  378. // vs. going back to original inputs when changing hash seeds. For example:
  379. // Average re-seeds for solution with r=128, 1.02x overhead, and ~100k keys
  380. // is about 1.10 for both StandardHasher and StandardRehasher.
  381. //
  382. // StandardRehasher is not really recommended for general PHSFs (not
  383. // filters) because a collision in the original hash could prevent
  384. // construction despite re-seeding the Rehasher. (Such collisions
  385. // do not interfere with filter construction.)
  386. //
  387. // concept RehasherTypesAndSettings: like TypesAndSettings but
  388. // does not require Key or HashFn.
  389. template <class RehasherTypesAndSettings>
  390. class StandardRehasherAdapter : public RehasherTypesAndSettings {
  391. public:
  392. using Hash = typename RehasherTypesAndSettings::Hash;
  393. using Key = Hash;
  394. using Seed = typename RehasherTypesAndSettings::Seed;
  395. static Hash HashFn(const Hash& input, Seed raw_seed) {
  396. // Note: raw_seed is already lightly pre-mixed, and this multiplication
  397. // by a large prime is sufficient mixing (low-to-high bits) on top of
  398. // that for good FastRange results, which depends primarily on highest
  399. // bits. (The hashed CoeffRow and ResultRow are less sensitive to
  400. // mixing than Start.)
  401. // Also note: did consider adding ^ (input >> some) before the
  402. // multiplication, but doesn't appear to be necessary.
  403. return (input ^ raw_seed) * kRehashFactor;
  404. }
  405. private:
  406. static constexpr Hash kRehashFactor =
  407. static_cast<Hash>(0x6193d459236a3a0dULL);
  408. };
  409. // See comment on StandardRehasherAdapter
  410. template <class RehasherTypesAndSettings>
  411. using StandardRehasher =
  412. StandardHasher<StandardRehasherAdapter<RehasherTypesAndSettings>>;
  413. #ifdef _MSC_VER
  414. #pragma warning(pop)
  415. #endif
  416. // Especially with smaller hashes (e.g. 32 bit), there can be noticeable
  417. // false positives due to collisions in the Hash returned by GetHash.
  418. // This function returns the expected FP rate due to those collisions,
  419. // which can be added to the expected FP rate from the underlying data
  420. // structure. (Note: technically, a + b is only a good approximation of
  421. // 1-(1-a)(1-b) == a + b - a*b, if a and b are much closer to 0 than to 1.)
  422. // The number of entries added can be a double here in case it's an
  423. // average.
  424. template <class Hasher, typename Numerical>
  425. double ExpectedCollisionFpRate(const Hasher& hasher, Numerical added) {
  426. // Standardize on the 'double' specialization
  427. return ExpectedCollisionFpRate(hasher, 1.0 * added);
  428. }
  429. template <class Hasher>
  430. double ExpectedCollisionFpRate(const Hasher& /*hasher*/, double added) {
  431. // Technically, there could be overlap among the added, but ignoring that
  432. // is typically close enough.
  433. return added / std::pow(256.0, sizeof(typename Hasher::Hash));
  434. }
  435. // StandardBanding: a canonical implementation of BandingStorage and
  436. // BacktrackStorage, with convenience API for banding (solving with on-the-fly
  437. // Gaussian elimination) with and without backtracking.
  438. template <class TypesAndSettings>
  439. class StandardBanding : public StandardHasher<TypesAndSettings> {
  440. public:
  441. IMPORT_RIBBON_TYPES_AND_SETTINGS(TypesAndSettings);
  442. StandardBanding(Index num_slots = 0, Index backtrack_size = 0) {
  443. Reset(num_slots, backtrack_size);
  444. }
  445. void Reset(Index num_slots, Index backtrack_size = 0) {
  446. if (num_slots == 0) {
  447. // Unusual (TypesAndSettings::kAllowZeroStarts) or "uninitialized"
  448. num_starts_ = 0;
  449. } else {
  450. // Normal
  451. assert(num_slots >= kCoeffBits);
  452. if (num_slots > num_slots_allocated_) {
  453. coeff_rows_.reset(new CoeffRow[num_slots]());
  454. if (!TypesAndSettings::kHomogeneous) {
  455. // Note: don't strictly have to zero-init result_rows,
  456. // except possible information leakage, etc ;)
  457. result_rows_.reset(new ResultRow[num_slots]());
  458. }
  459. num_slots_allocated_ = num_slots;
  460. } else {
  461. for (Index i = 0; i < num_slots; ++i) {
  462. coeff_rows_[i] = 0;
  463. if (!TypesAndSettings::kHomogeneous) {
  464. // Note: don't strictly have to zero-init result_rows,
  465. // except possible information leakage, etc ;)
  466. result_rows_[i] = 0;
  467. }
  468. }
  469. }
  470. num_starts_ = num_slots - kCoeffBits + 1;
  471. }
  472. EnsureBacktrackSize(backtrack_size);
  473. }
  474. void EnsureBacktrackSize(Index backtrack_size) {
  475. if (backtrack_size > backtrack_size_) {
  476. backtrack_.reset(new Index[backtrack_size]);
  477. backtrack_size_ = backtrack_size;
  478. }
  479. }
  480. // ********************************************************************
  481. // From concept BandingStorage
  482. inline bool UsePrefetch() const {
  483. // A rough guesstimate of when prefetching during construction pays off.
  484. // TODO: verify/validate
  485. return num_starts_ > 1500;
  486. }
  487. inline void Prefetch(Index i) const {
  488. PREFETCH(&coeff_rows_[i], 1 /* rw */, 1 /* locality */);
  489. if (!TypesAndSettings::kHomogeneous) {
  490. PREFETCH(&result_rows_[i], 1 /* rw */, 1 /* locality */);
  491. }
  492. }
  493. inline void LoadRow(Index i, CoeffRow* cr, ResultRow* rr,
  494. bool for_back_subst) const {
  495. *cr = coeff_rows_[i];
  496. if (TypesAndSettings::kHomogeneous) {
  497. if (for_back_subst && *cr == 0) {
  498. // Cheap pseudorandom data to fill unconstrained solution rows
  499. *rr = static_cast<ResultRow>(i * 0x9E3779B185EBCA87ULL);
  500. } else {
  501. *rr = 0;
  502. }
  503. } else {
  504. *rr = result_rows_[i];
  505. }
  506. }
  507. inline void StoreRow(Index i, CoeffRow cr, ResultRow rr) {
  508. coeff_rows_[i] = cr;
  509. if (TypesAndSettings::kHomogeneous) {
  510. assert(rr == 0);
  511. } else {
  512. result_rows_[i] = rr;
  513. }
  514. }
  515. inline Index GetNumStarts() const { return num_starts_; }
  516. // from concept BacktrackStorage, for when backtracking is used
  517. inline bool UseBacktrack() const { return true; }
  518. inline void BacktrackPut(Index i, Index to_save) { backtrack_[i] = to_save; }
  519. inline Index BacktrackGet(Index i) const { return backtrack_[i]; }
  520. // ********************************************************************
  521. // Some useful API, still somewhat low level. Here an input is
  522. // a Key for filters, or std::pair<Key, ResultRow> for general PHSF.
  523. // Adds a range of inputs to the banding, returning true if successful.
  524. // False means none or some may have been successfully added, so it's
  525. // best to Reset this banding before any further use.
  526. //
  527. // Adding can fail even before all the "slots" are completely "full".
  528. //
  529. template <typename InputIterator>
  530. bool AddRange(InputIterator begin, InputIterator end) {
  531. assert(num_starts_ > 0 || TypesAndSettings::kAllowZeroStarts);
  532. if (TypesAndSettings::kAllowZeroStarts && num_starts_ == 0) {
  533. // Unusual. Can't add any in this case.
  534. return begin == end;
  535. }
  536. // Normal
  537. return BandingAddRange(this, *this, begin, end);
  538. }
  539. // Adds a range of inputs to the banding, returning true if successful,
  540. // or if unsuccessful, rolls back to state before this call and returns
  541. // false. Caller guarantees that the number of inputs in this batch
  542. // does not exceed `backtrack_size` provided to Reset.
  543. //
  544. // Adding can fail even before all the "slots" are completely "full".
  545. //
  546. template <typename InputIterator>
  547. bool AddRangeOrRollBack(InputIterator begin, InputIterator end) {
  548. assert(num_starts_ > 0 || TypesAndSettings::kAllowZeroStarts);
  549. if (TypesAndSettings::kAllowZeroStarts && num_starts_ == 0) {
  550. // Unusual. Can't add any in this case.
  551. return begin == end;
  552. }
  553. // else Normal
  554. return BandingAddRange(this, this, *this, begin, end);
  555. }
  556. // Adds a single input to the banding, returning true if successful.
  557. // If unsuccessful, returns false and banding state is unchanged.
  558. //
  559. // Adding can fail even before all the "slots" are completely "full".
  560. //
  561. bool Add(const AddInput& input) {
  562. // Pointer can act as iterator
  563. return AddRange(&input, &input + 1);
  564. }
  565. // Return the number of "occupied" rows (with non-zero coefficients stored).
  566. Index GetOccupiedCount() const {
  567. Index count = 0;
  568. if (num_starts_ > 0) {
  569. const Index num_slots = num_starts_ + kCoeffBits - 1;
  570. for (Index i = 0; i < num_slots; ++i) {
  571. if (coeff_rows_[i] != 0) {
  572. ++count;
  573. }
  574. }
  575. }
  576. return count;
  577. }
  578. // Returns whether a row is "occupied" in the banding (non-zero
  579. // coefficients stored). (Only recommended for debug/test)
  580. bool IsOccupied(Index i) { return coeff_rows_[i] != 0; }
  581. // ********************************************************************
  582. // High-level API
  583. // Iteratively (a) resets the structure for `num_slots`, (b) attempts
  584. // to add the range of inputs, and (c) if unsuccessful, chooses next
  585. // hash seed, until either successful or unsuccessful with all the
  586. // allowed seeds. Returns true if successful. In that case, use
  587. // GetOrdinalSeed() or GetRawSeed() to get the successful seed.
  588. //
  589. // The allowed sequence of hash seeds is determined by
  590. // `starting_ordinal_seed,` the first ordinal seed to be attempted
  591. // (see StandardHasher), and `ordinal_seed_mask,` a bit mask (power of
  592. // two minus one) for the range of ordinal seeds to consider. The
  593. // max number of seeds considered will be ordinal_seed_mask + 1.
  594. // For filters we suggest `starting_ordinal_seed` be chosen randomly
  595. // or round-robin, to minimize false positive correlations between keys.
  596. //
  597. // If unsuccessful, how best to continue is going to be application
  598. // specific. It should be possible to choose parameters such that
  599. // failure is extremely unlikely, using max_seed around 32 to 64.
  600. // (TODO: APIs to help choose parameters) One option for fallback in
  601. // constructing a filter is to construct a Bloom filter instead.
  602. // Increasing num_slots is an option, but should not be used often
  603. // unless construction maximum latency is a concern (rather than
  604. // average running time of construction). Instead, choose parameters
  605. // appropriately and trust that seeds are independent. (Also,
  606. // increasing num_slots without changing hash seed would have a
  607. // significant correlation in success, rather than independence.)
  608. template <typename InputIterator>
  609. bool ResetAndFindSeedToSolve(Index num_slots, InputIterator begin,
  610. InputIterator end,
  611. Seed starting_ordinal_seed = 0U,
  612. Seed ordinal_seed_mask = 63U) {
  613. // power of 2 minus 1
  614. assert((ordinal_seed_mask & (ordinal_seed_mask + 1)) == 0);
  615. // starting seed is within mask
  616. assert((starting_ordinal_seed & ordinal_seed_mask) ==
  617. starting_ordinal_seed);
  618. starting_ordinal_seed &= ordinal_seed_mask; // if not debug
  619. Seed cur_ordinal_seed = starting_ordinal_seed;
  620. do {
  621. StandardHasher<TypesAndSettings>::SetOrdinalSeed(cur_ordinal_seed);
  622. Reset(num_slots);
  623. bool success = AddRange(begin, end);
  624. if (success) {
  625. return true;
  626. }
  627. cur_ordinal_seed = (cur_ordinal_seed + 1) & ordinal_seed_mask;
  628. } while (cur_ordinal_seed != starting_ordinal_seed);
  629. // Reached limit by circling around
  630. return false;
  631. }
  632. static std::size_t EstimateMemoryUsage(uint32_t num_slots) {
  633. std::size_t bytes_coeff_rows = num_slots * sizeof(CoeffRow);
  634. std::size_t bytes_result_rows = num_slots * sizeof(ResultRow);
  635. std::size_t bytes_backtrack = 0;
  636. std::size_t bytes_banding =
  637. bytes_coeff_rows + bytes_result_rows + bytes_backtrack;
  638. return bytes_banding;
  639. }
  640. protected:
  641. // TODO: explore combining in a struct
  642. std::unique_ptr<CoeffRow[]> coeff_rows_;
  643. std::unique_ptr<ResultRow[]> result_rows_;
  644. // We generally store "starts" instead of slots for speed of GetStart(),
  645. // as in StandardHasher.
  646. Index num_starts_ = 0;
  647. Index num_slots_allocated_ = 0;
  648. std::unique_ptr<Index[]> backtrack_;
  649. Index backtrack_size_ = 0;
  650. };
  651. // Implements concept SimpleSolutionStorage, mostly for demonstration
  652. // purposes. This is "in memory" only because it does not handle byte
  653. // ordering issues for serialization.
  654. template <class TypesAndSettings>
  655. class InMemSimpleSolution {
  656. public:
  657. IMPORT_RIBBON_TYPES_AND_SETTINGS(TypesAndSettings);
  658. void PrepareForNumStarts(Index num_starts) {
  659. if (TypesAndSettings::kAllowZeroStarts && num_starts == 0) {
  660. // Unusual
  661. num_starts_ = 0;
  662. } else {
  663. // Normal
  664. const Index num_slots = num_starts + kCoeffBits - 1;
  665. assert(num_slots >= kCoeffBits);
  666. if (num_slots > num_slots_allocated_) {
  667. // Do not need to init the memory
  668. solution_rows_.reset(new ResultRow[num_slots]);
  669. num_slots_allocated_ = num_slots;
  670. }
  671. num_starts_ = num_starts;
  672. }
  673. }
  674. Index GetNumStarts() const { return num_starts_; }
  675. ResultRow Load(Index slot_num) const { return solution_rows_[slot_num]; }
  676. void Store(Index slot_num, ResultRow solution_row) {
  677. solution_rows_[slot_num] = solution_row;
  678. }
  679. // ********************************************************************
  680. // High-level API
  681. template <typename BandingStorage>
  682. void BackSubstFrom(const BandingStorage& bs) {
  683. if (TypesAndSettings::kAllowZeroStarts && bs.GetNumStarts() == 0) {
  684. // Unusual
  685. PrepareForNumStarts(0);
  686. } else {
  687. // Normal
  688. SimpleBackSubst(this, bs);
  689. }
  690. }
  691. template <typename PhsfQueryHasher>
  692. ResultRow PhsfQuery(const Key& input, const PhsfQueryHasher& hasher) const {
  693. // assert(!TypesAndSettings::kIsFilter); Can be useful in testing
  694. if (TypesAndSettings::kAllowZeroStarts && num_starts_ == 0) {
  695. // Unusual
  696. return 0;
  697. } else {
  698. // Normal
  699. return SimplePhsfQuery(input, hasher, *this);
  700. }
  701. }
  702. template <typename FilterQueryHasher>
  703. bool FilterQuery(const Key& input, const FilterQueryHasher& hasher) const {
  704. assert(TypesAndSettings::kIsFilter);
  705. if (TypesAndSettings::kAllowZeroStarts && num_starts_ == 0) {
  706. // Unusual. Zero starts presumes no keys added -> always false
  707. return false;
  708. } else {
  709. // Normal, or upper_num_columns_ == 0 means "no space for data" and
  710. // thus will always return true.
  711. return SimpleFilterQuery(input, hasher, *this);
  712. }
  713. }
  714. double ExpectedFpRate() const {
  715. assert(TypesAndSettings::kIsFilter);
  716. if (TypesAndSettings::kAllowZeroStarts && num_starts_ == 0) {
  717. // Unusual, but we don't have FPs if we always return false.
  718. return 0.0;
  719. }
  720. // else Normal
  721. // Each result (solution) bit (column) cuts FP rate in half
  722. return std::pow(0.5, 8U * sizeof(ResultRow));
  723. }
  724. // ********************************************************************
  725. // Static high-level API
  726. // Round up to a number of slots supported by this structure. Note that
  727. // this needs to be must be taken into account for the banding if this
  728. // solution layout/storage is to be used.
  729. static Index RoundUpNumSlots(Index num_slots) {
  730. // Must be at least kCoeffBits for at least one start
  731. // Or if not smash, even more because hashing not equipped
  732. // for stacking up so many entries on a single start location
  733. auto min_slots = kCoeffBits * (TypesAndSettings::kUseSmash ? 1 : 2);
  734. return std::max(num_slots, static_cast<Index>(min_slots));
  735. }
  736. protected:
  737. // We generally store "starts" instead of slots for speed of GetStart(),
  738. // as in StandardHasher.
  739. Index num_starts_ = 0;
  740. Index num_slots_allocated_ = 0;
  741. std::unique_ptr<ResultRow[]> solution_rows_;
  742. };
  743. // Implements concept InterleavedSolutionStorage always using little-endian
  744. // byte order, so easy for serialization/deserialization. This implementation
  745. // fully supports fractional bits per key, where any number of segments
  746. // (number of bytes multiple of sizeof(CoeffRow)) can be used with any number
  747. // of slots that is a multiple of kCoeffBits.
  748. //
  749. // The structure is passed an externally allocated/de-allocated byte buffer
  750. // that is optionally pre-populated (from storage) for answering queries,
  751. // or can be populated by BackSubstFrom.
  752. //
  753. template <class TypesAndSettings>
  754. class SerializableInterleavedSolution {
  755. public:
  756. IMPORT_RIBBON_TYPES_AND_SETTINGS(TypesAndSettings);
  757. // Does not take ownership of `data` but uses it (up to `data_len` bytes)
  758. // throughout lifetime
  759. SerializableInterleavedSolution(char* data, size_t data_len)
  760. : data_(data), data_len_(data_len) {}
  761. void PrepareForNumStarts(Index num_starts) {
  762. assert(num_starts == 0 || (num_starts % kCoeffBits == 1));
  763. num_starts_ = num_starts;
  764. InternalConfigure();
  765. }
  766. Index GetNumStarts() const { return num_starts_; }
  767. Index GetNumBlocks() const {
  768. const Index num_slots = num_starts_ + kCoeffBits - 1;
  769. return num_slots / kCoeffBits;
  770. }
  771. Index GetUpperNumColumns() const { return upper_num_columns_; }
  772. Index GetUpperStartBlock() const { return upper_start_block_; }
  773. Index GetNumSegments() const {
  774. return static_cast<Index>(data_len_ / sizeof(CoeffRow));
  775. }
  776. CoeffRow LoadSegment(Index segment_num) const {
  777. assert(data_ != nullptr); // suppress clang analyzer report
  778. return DecodeFixedGeneric<CoeffRow>(data_ + segment_num * sizeof(CoeffRow));
  779. }
  780. void StoreSegment(Index segment_num, CoeffRow val) {
  781. assert(data_ != nullptr); // suppress clang analyzer report
  782. EncodeFixedGeneric(data_ + segment_num * sizeof(CoeffRow), val);
  783. }
  784. void PrefetchSegmentRange(Index begin_segment_num,
  785. Index end_segment_num) const {
  786. if (end_segment_num == begin_segment_num) {
  787. // Nothing to do
  788. return;
  789. }
  790. char* cur = data_ + begin_segment_num * sizeof(CoeffRow);
  791. char* last = data_ + (end_segment_num - 1) * sizeof(CoeffRow);
  792. while (cur < last) {
  793. PREFETCH(cur, 0 /* rw */, 1 /* locality */);
  794. cur += CACHE_LINE_SIZE;
  795. }
  796. PREFETCH(last, 0 /* rw */, 1 /* locality */);
  797. }
  798. // ********************************************************************
  799. // High-level API
  800. void ConfigureForNumBlocks(Index num_blocks) {
  801. if (num_blocks == 0) {
  802. PrepareForNumStarts(0);
  803. } else {
  804. PrepareForNumStarts(num_blocks * kCoeffBits - kCoeffBits + 1);
  805. }
  806. }
  807. void ConfigureForNumSlots(Index num_slots) {
  808. assert(num_slots % kCoeffBits == 0);
  809. ConfigureForNumBlocks(num_slots / kCoeffBits);
  810. }
  811. template <typename BandingStorage>
  812. void BackSubstFrom(const BandingStorage& bs) {
  813. if (TypesAndSettings::kAllowZeroStarts && bs.GetNumStarts() == 0) {
  814. // Unusual
  815. PrepareForNumStarts(0);
  816. } else {
  817. // Normal
  818. InterleavedBackSubst(this, bs);
  819. }
  820. }
  821. template <typename PhsfQueryHasher>
  822. ResultRow PhsfQuery(const Key& input, const PhsfQueryHasher& hasher) const {
  823. // assert(!TypesAndSettings::kIsFilter); Can be useful in testing
  824. if (TypesAndSettings::kAllowZeroStarts && num_starts_ == 0) {
  825. // Unusual
  826. return 0;
  827. } else {
  828. // Normal
  829. // NOTE: not using a struct to encourage compiler optimization
  830. Hash hash;
  831. Index segment_num;
  832. Index num_columns;
  833. Index start_bit;
  834. InterleavedPrepareQuery(input, hasher, *this, &hash, &segment_num,
  835. &num_columns, &start_bit);
  836. return InterleavedPhsfQuery(hash, segment_num, num_columns, start_bit,
  837. hasher, *this);
  838. }
  839. }
  840. template <typename FilterQueryHasher>
  841. bool FilterQuery(const Key& input, const FilterQueryHasher& hasher) const {
  842. assert(TypesAndSettings::kIsFilter);
  843. if (TypesAndSettings::kAllowZeroStarts && num_starts_ == 0) {
  844. // Unusual. Zero starts presumes no keys added -> always false
  845. return false;
  846. } else {
  847. // Normal, or upper_num_columns_ == 0 means "no space for data" and
  848. // thus will always return true.
  849. // NOTE: not using a struct to encourage compiler optimization
  850. Hash hash;
  851. Index segment_num;
  852. Index num_columns;
  853. Index start_bit;
  854. InterleavedPrepareQuery(input, hasher, *this, &hash, &segment_num,
  855. &num_columns, &start_bit);
  856. return InterleavedFilterQuery(hash, segment_num, num_columns, start_bit,
  857. hasher, *this);
  858. }
  859. }
  860. double ExpectedFpRate() const {
  861. assert(TypesAndSettings::kIsFilter);
  862. if (TypesAndSettings::kAllowZeroStarts && num_starts_ == 0) {
  863. // Unusual. Zero starts presumes no keys added -> always false
  864. return 0.0;
  865. }
  866. // else Normal
  867. // Note: Ignoring smash setting; still close enough in that case
  868. double lower_portion =
  869. (upper_start_block_ * 1.0 * kCoeffBits) / num_starts_;
  870. // Each result (solution) bit (column) cuts FP rate in half. Weight that
  871. // for upper and lower number of bits (columns).
  872. return lower_portion * std::pow(0.5, upper_num_columns_ - 1) +
  873. (1.0 - lower_portion) * std::pow(0.5, upper_num_columns_);
  874. }
  875. // ********************************************************************
  876. // Static high-level API
  877. // Round up to a number of slots supported by this structure. Note that
  878. // this needs to be must be taken into account for the banding if this
  879. // solution layout/storage is to be used.
  880. static Index RoundUpNumSlots(Index num_slots) {
  881. // Must be multiple of kCoeffBits
  882. Index corrected = (num_slots + kCoeffBits - 1) / kCoeffBits * kCoeffBits;
  883. // Do not use num_starts==1 unless kUseSmash, because the hashing
  884. // might not be equipped for stacking up so many entries on a
  885. // single start location.
  886. if (!TypesAndSettings::kUseSmash && corrected == kCoeffBits) {
  887. corrected += kCoeffBits;
  888. }
  889. return corrected;
  890. }
  891. // Round down to a number of slots supported by this structure. Note that
  892. // this needs to be must be taken into account for the banding if this
  893. // solution layout/storage is to be used.
  894. static Index RoundDownNumSlots(Index num_slots) {
  895. // Must be multiple of kCoeffBits
  896. Index corrected = num_slots / kCoeffBits * kCoeffBits;
  897. // Do not use num_starts==1 unless kUseSmash, because the hashing
  898. // might not be equipped for stacking up so many entries on a
  899. // single start location.
  900. if (!TypesAndSettings::kUseSmash && corrected == kCoeffBits) {
  901. corrected = 0;
  902. }
  903. return corrected;
  904. }
  905. // Compute the number of bytes for a given number of slots and desired
  906. // FP rate. Since desired FP rate might not be exactly achievable,
  907. // rounding_bias32==0 means to always round toward lower FP rate
  908. // than desired (more bytes); rounding_bias32==max uint32_t means always
  909. // round toward higher FP rate than desired (fewer bytes); other values
  910. // act as a proportional threshold or bias between the two.
  911. static size_t GetBytesForFpRate(Index num_slots, double desired_fp_rate,
  912. uint32_t rounding_bias32) {
  913. return InternalGetBytesForFpRate(num_slots, desired_fp_rate,
  914. 1.0 / desired_fp_rate, rounding_bias32);
  915. }
  916. // The same, but specifying desired accuracy as 1.0 / FP rate, or
  917. // one_in_fp_rate. E.g. desired_one_in_fp_rate=100 means 1% FP rate.
  918. static size_t GetBytesForOneInFpRate(Index num_slots,
  919. double desired_one_in_fp_rate,
  920. uint32_t rounding_bias32) {
  921. return InternalGetBytesForFpRate(num_slots, 1.0 / desired_one_in_fp_rate,
  922. desired_one_in_fp_rate, rounding_bias32);
  923. }
  924. protected:
  925. static size_t InternalGetBytesForFpRate(Index num_slots,
  926. double desired_fp_rate,
  927. double desired_one_in_fp_rate,
  928. uint32_t rounding_bias32) {
  929. assert(TypesAndSettings::kIsFilter);
  930. if (TypesAndSettings::kAllowZeroStarts) {
  931. if (num_slots == 0) {
  932. // Unusual. Zero starts presumes no keys added -> always false (no FPs)
  933. return 0U;
  934. }
  935. } else {
  936. assert(num_slots > 0);
  937. }
  938. // Must be rounded up already.
  939. assert(RoundUpNumSlots(num_slots) == num_slots);
  940. if (desired_one_in_fp_rate > 1.0 && desired_fp_rate < 1.0) {
  941. // Typical: less than 100% FP rate
  942. if (desired_one_in_fp_rate <= static_cast<ResultRow>(-1)) {
  943. // Typical: Less than maximum result row entropy
  944. ResultRow rounded = static_cast<ResultRow>(desired_one_in_fp_rate);
  945. int lower_columns = FloorLog2(rounded);
  946. double lower_columns_fp_rate = std::pow(2.0, -lower_columns);
  947. double upper_columns_fp_rate = std::pow(2.0, -(lower_columns + 1));
  948. // Floating point don't let me down!
  949. assert(lower_columns_fp_rate >= desired_fp_rate);
  950. assert(upper_columns_fp_rate <= desired_fp_rate);
  951. double lower_portion = (desired_fp_rate - upper_columns_fp_rate) /
  952. (lower_columns_fp_rate - upper_columns_fp_rate);
  953. // Floating point don't let me down!
  954. assert(lower_portion >= 0.0);
  955. assert(lower_portion <= 1.0);
  956. double rounding_bias = (rounding_bias32 + 0.5) / double{0x100000000};
  957. assert(rounding_bias > 0.0);
  958. assert(rounding_bias < 1.0);
  959. // Note: Ignoring smash setting; still close enough in that case
  960. Index num_starts = num_slots - kCoeffBits + 1;
  961. // Lower upper_start_block means lower FP rate (higher accuracy)
  962. Index upper_start_block = static_cast<Index>(
  963. (lower_portion * num_starts + rounding_bias) / kCoeffBits);
  964. Index num_blocks = num_slots / kCoeffBits;
  965. assert(upper_start_block < num_blocks);
  966. // Start by assuming all blocks use lower number of columns
  967. Index num_segments = num_blocks * static_cast<Index>(lower_columns);
  968. // Correct by 1 each for blocks using upper number of columns
  969. num_segments += (num_blocks - upper_start_block);
  970. // Total bytes
  971. return num_segments * sizeof(CoeffRow);
  972. } else {
  973. // one_in_fp_rate too big, thus requested FP rate is smaller than
  974. // supported. Use max number of columns for minimum supported FP rate.
  975. return num_slots * sizeof(ResultRow);
  976. }
  977. } else {
  978. // Effectively asking for 100% FP rate, or NaN etc.
  979. if (TypesAndSettings::kAllowZeroStarts) {
  980. // Zero segments
  981. return 0U;
  982. } else {
  983. // One segment (minimum size, maximizing FP rate)
  984. return sizeof(CoeffRow);
  985. }
  986. }
  987. }
  988. void InternalConfigure() {
  989. const Index num_blocks = GetNumBlocks();
  990. Index num_segments = GetNumSegments();
  991. if (num_blocks == 0) {
  992. // Exceptional
  993. upper_num_columns_ = 0;
  994. upper_start_block_ = 0;
  995. } else {
  996. // Normal
  997. upper_num_columns_ =
  998. (num_segments + /*round up*/ num_blocks - 1) / num_blocks;
  999. upper_start_block_ = upper_num_columns_ * num_blocks - num_segments;
  1000. // Unless that's more columns than supported by ResultRow data type
  1001. if (upper_num_columns_ > 8U * sizeof(ResultRow)) {
  1002. // Use maximum columns (there will be space unused)
  1003. upper_num_columns_ = static_cast<Index>(8U * sizeof(ResultRow));
  1004. upper_start_block_ = 0;
  1005. num_segments = num_blocks * upper_num_columns_;
  1006. }
  1007. }
  1008. // Update data_len_ for correct rounding and/or unused space
  1009. // NOTE: unused space stays gone if we PrepareForNumStarts again.
  1010. // We are prioritizing minimizing the number of fields over making
  1011. // the "unusued space" feature work well.
  1012. data_len_ = num_segments * sizeof(CoeffRow);
  1013. }
  1014. char* const data_;
  1015. size_t data_len_;
  1016. Index num_starts_ = 0;
  1017. Index upper_num_columns_ = 0;
  1018. Index upper_start_block_ = 0;
  1019. };
  1020. } // namespace ribbon
  1021. } // namespace ROCKSDB_NAMESPACE
  1022. // For convenience working with templates
  1023. #define IMPORT_RIBBON_IMPL_TYPES(TypesAndSettings) \
  1024. using Hasher = ROCKSDB_NAMESPACE::ribbon::StandardHasher<TypesAndSettings>; \
  1025. using Banding = \
  1026. ROCKSDB_NAMESPACE::ribbon::StandardBanding<TypesAndSettings>; \
  1027. using SimpleSoln = \
  1028. ROCKSDB_NAMESPACE::ribbon::InMemSimpleSolution<TypesAndSettings>; \
  1029. using InterleavedSoln = \
  1030. ROCKSDB_NAMESPACE::ribbon::SerializableInterleavedSolution< \
  1031. TypesAndSettings>; \
  1032. static_assert(sizeof(Hasher) + sizeof(Banding) + sizeof(SimpleSoln) + \
  1033. sizeof(InterleavedSoln) > \
  1034. 0, \
  1035. "avoid unused warnings, semicolon expected after macro call")