aligned_buffer.h 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323
  1. // Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
  2. // This source code is licensed under both the GPLv2 (found in the
  3. // COPYING file in the root directory) and Apache 2.0 License
  4. // (found in the LICENSE.Apache file in the root directory).
  5. //
  6. // Copyright (c) 2011 The LevelDB Authors. All rights reserved.
  7. // Use of this source code is governed by a BSD-style license that can be
  8. // found in the LICENSE file. See the AUTHORS file for names of contributors.
  9. #pragma once
  10. #include <algorithm>
  11. #include <cassert>
  12. #include "port/malloc.h"
  13. #include "port/port.h"
  14. #include "rocksdb/file_system.h"
  15. namespace ROCKSDB_NAMESPACE {
  16. // This file contains utilities to handle the alignment of pages and buffers.
  17. // Truncate to a multiple of page_size, which is also a page boundary. This
  18. // helps to figuring out the right alignment.
  19. // Example:
  20. // TruncateToPageBoundary(4096, 5000) => 4096
  21. // TruncateToPageBoundary((4096, 10000) => 8192
  22. inline size_t TruncateToPageBoundary(size_t page_size, size_t s) {
  23. s -= (s & (page_size - 1));
  24. assert((s % page_size) == 0);
  25. return s;
  26. }
  27. // Round up x to a multiple of y.
  28. // Example:
  29. // Roundup(13, 5) => 15
  30. // Roundup(201, 16) => 208
  31. inline size_t Roundup(size_t x, size_t y) { return ((x + y - 1) / y) * y; }
  32. // Round down x to a multiple of y.
  33. // Example:
  34. // Rounddown(13, 5) => 10
  35. // Rounddown(201, 16) => 192
  36. inline size_t Rounddown(size_t x, size_t y) { return (x / y) * y; }
  37. // AlignedBuffer manages a buffer by taking alignment into consideration, and
  38. // aligns the buffer start and end positions. It is mainly used for direct I/O,
  39. // though it can be used other purposes as well.
  40. // It also supports expanding the managed buffer, and copying whole or part of
  41. // the data from old buffer into the new expanded buffer. Such a copy especially
  42. // helps in cases avoiding an IO to re-fetch the data from disk.
  43. //
  44. // Example:
  45. // AlignedBuffer buf;
  46. // buf.Alignment(alignment);
  47. // buf.AllocateNewBuffer(user_requested_buf_size);
  48. // ...
  49. // buf.AllocateNewBuffer(2*user_requested_buf_size, /*copy_data*/ true,
  50. // copy_offset, copy_len);
  51. class AlignedBuffer {
  52. size_t alignment_;
  53. FSAllocationPtr buf_;
  54. size_t capacity_;
  55. size_t cursize_;
  56. char* bufstart_;
  57. public:
  58. AlignedBuffer()
  59. : alignment_(), capacity_(0), cursize_(0), bufstart_(nullptr) {}
  60. AlignedBuffer(AlignedBuffer&& o) noexcept { *this = std::move(o); }
  61. AlignedBuffer& operator=(AlignedBuffer&& o) noexcept {
  62. alignment_ = std::move(o.alignment_);
  63. buf_ = std::move(o.buf_);
  64. capacity_ = std::move(o.capacity_);
  65. cursize_ = std::move(o.cursize_);
  66. bufstart_ = std::move(o.bufstart_);
  67. return *this;
  68. }
  69. AlignedBuffer(const AlignedBuffer&) = delete;
  70. AlignedBuffer& operator=(const AlignedBuffer&) = delete;
  71. static bool isAligned(const void* ptr, size_t alignment) {
  72. return reinterpret_cast<uintptr_t>(ptr) % alignment == 0;
  73. }
  74. static bool isAligned(size_t n, size_t alignment) {
  75. return n % alignment == 0;
  76. }
  77. size_t Alignment() const { return alignment_; }
  78. size_t Capacity() const { return capacity_; }
  79. size_t CurrentSize() const { return cursize_; }
  80. const char* BufferStart() const { return bufstart_; }
  81. char* BufferStart() { return bufstart_; }
  82. void Clear() { cursize_ = 0; }
  83. FSAllocationPtr Release() {
  84. cursize_ = 0;
  85. capacity_ = 0;
  86. bufstart_ = nullptr;
  87. return std::move(buf_);
  88. }
  89. void Alignment(size_t alignment) {
  90. assert(alignment > 0);
  91. assert((alignment & (alignment - 1)) == 0);
  92. alignment_ = alignment;
  93. }
  94. // Points the buffer to the result without allocating extra
  95. // memory or performing any data copies. Takes ownership of the
  96. // FSAllocationPtr. This method is called when we want to reuse the buffer
  97. // provided by the file system
  98. void SetBuffer(Slice& result, FSAllocationPtr new_buf) {
  99. alignment_ = 1;
  100. capacity_ = result.size();
  101. cursize_ = result.size();
  102. buf_ = std::move(new_buf);
  103. assert(buf_.get() != nullptr);
  104. // Note: bufstart_ must point to result.data() and not new_buf, which can
  105. // point to any arbitrary object
  106. bufstart_ = const_cast<char*>(result.data());
  107. }
  108. // Allocates a new buffer and sets the start position to the first aligned
  109. // byte.
  110. //
  111. // requested_capacity: requested new buffer capacity. This capacity will be
  112. // rounded up based on alignment.
  113. // copy_data: Copy data from old buffer to new buffer. If copy_offset and
  114. // copy_len are not passed in and the new requested capacity is bigger
  115. // than the existing buffer's capacity, the data in the exising buffer is
  116. // fully copied over to the new buffer.
  117. // copy_offset: Copy data from this offset in old buffer.
  118. // copy_len: Number of bytes to copy.
  119. //
  120. // The function does nothing if the new requested_capacity is smaller than
  121. // the current buffer capacity and copy_data is true i.e. the old buffer is
  122. // retained as is.
  123. void AllocateNewBuffer(size_t requested_capacity, bool copy_data = false,
  124. uint64_t copy_offset = 0, size_t copy_len = 0) {
  125. assert(alignment_ > 0);
  126. assert((alignment_ & (alignment_ - 1)) == 0);
  127. copy_len = copy_len > 0 ? copy_len : cursize_;
  128. if (copy_data && requested_capacity < copy_len) {
  129. // If we are downsizing to a capacity that is smaller than the current
  130. // data in the buffer -- Ignore the request.
  131. return;
  132. }
  133. size_t new_capacity = Roundup(requested_capacity, alignment_);
  134. char* new_buf = new char[new_capacity + alignment_];
  135. char* new_bufstart = reinterpret_cast<char*>(
  136. (reinterpret_cast<uintptr_t>(new_buf) + (alignment_ - 1)) &
  137. ~static_cast<uintptr_t>(alignment_ - 1));
  138. if (copy_data) {
  139. assert(bufstart_ + copy_offset + copy_len <= bufstart_ + cursize_);
  140. memcpy(new_bufstart, bufstart_ + copy_offset, copy_len);
  141. cursize_ = copy_len;
  142. } else {
  143. cursize_ = 0;
  144. }
  145. bufstart_ = new_bufstart;
  146. capacity_ = new_capacity;
  147. // buf_ is a FSAllocationPtr which takes in a deleter
  148. // we can just wrap the regular default delete that would have been called
  149. buf_ = std::unique_ptr<void, std::function<void(void*)>>(
  150. static_cast<void*>(new_buf),
  151. [](void* p) { delete[] static_cast<char*>(p); });
  152. }
  153. // Append to the buffer.
  154. //
  155. // src : source to copy the data from.
  156. // append_size : number of bytes to copy from src.
  157. // Returns the number of bytes appended.
  158. //
  159. // If append_size is more than the remaining buffer size only the
  160. // remaining-size worth of bytes are copied.
  161. size_t Append(const char* src, size_t append_size) {
  162. size_t buffer_remaining = capacity_ - cursize_;
  163. size_t to_copy = std::min(append_size, buffer_remaining);
  164. if (to_copy > 0) {
  165. memcpy(bufstart_ + cursize_, src, to_copy);
  166. cursize_ += to_copy;
  167. }
  168. return to_copy;
  169. }
  170. // Read from the buffer.
  171. //
  172. // dest : destination buffer to copy the data to.
  173. // offset : the buffer offset to start reading from.
  174. // read_size : the number of bytes to copy from the buffer to dest.
  175. // Returns the number of bytes read/copied to dest.
  176. size_t Read(char* dest, size_t offset, size_t read_size) const {
  177. assert(offset < cursize_);
  178. size_t to_read = 0;
  179. if (offset < cursize_) {
  180. to_read = std::min(cursize_ - offset, read_size);
  181. }
  182. if (to_read > 0) {
  183. memcpy(dest, bufstart_ + offset, to_read);
  184. }
  185. return to_read;
  186. }
  187. // Pad to the end of alignment with "padding"
  188. void PadToAlignmentWith(int padding) {
  189. size_t total_size = Roundup(cursize_, alignment_);
  190. size_t pad_size = total_size - cursize_;
  191. if (pad_size > 0) {
  192. assert((pad_size + cursize_) <= capacity_);
  193. memset(bufstart_ + cursize_, padding, pad_size);
  194. cursize_ += pad_size;
  195. }
  196. }
  197. void PadWith(size_t pad_size, int padding) {
  198. assert((pad_size + cursize_) <= capacity_);
  199. memset(bufstart_ + cursize_, padding, pad_size);
  200. cursize_ += pad_size;
  201. }
  202. // After a partial flush move the tail to the beginning of the buffer.
  203. void RefitTail(size_t tail_offset, size_t tail_size) {
  204. if (tail_size > 0) {
  205. memmove(bufstart_, bufstart_ + tail_offset, tail_size);
  206. }
  207. cursize_ = tail_size;
  208. }
  209. // Returns a place to start appending.
  210. // WARNING: Note that it is possible to write past the end of the buffer if
  211. // the buffer is modified without using the write APIs or encapsulation
  212. // offered by AlignedBuffer. It is up to the user to guard against such
  213. // errors.
  214. char* Destination() { return bufstart_ + cursize_; }
  215. void Size(size_t cursize) { cursize_ = cursize; }
  216. };
  217. // Related to std::string but more easily avoids zeroing out a buffer that's
  218. // going to be overwritten anyway.
  219. class GrowableBuffer {
  220. public:
  221. GrowableBuffer() : capacity_(0) {}
  222. ~GrowableBuffer() { free(data_); }
  223. // No copies
  224. GrowableBuffer(const GrowableBuffer&) = delete;
  225. GrowableBuffer& operator=(const GrowableBuffer&) = delete;
  226. // Movable
  227. GrowableBuffer(GrowableBuffer&& other) noexcept
  228. : data_(other.data_), size_(other.size_), capacity_(other.capacity_) {
  229. other.data_ = nullptr;
  230. other.size_ = 0;
  231. other.capacity_ = 0;
  232. }
  233. GrowableBuffer& operator=(GrowableBuffer&& other) noexcept {
  234. if (this == &other) {
  235. return *this;
  236. }
  237. free(data_);
  238. data_ = other.data_;
  239. size_ = other.size_;
  240. capacity_ = other.capacity_;
  241. other.data_ = nullptr;
  242. other.size_ = 0;
  243. other.capacity_ = 0;
  244. return *this;
  245. }
  246. char* data() { return data_; }
  247. const char* data() const { return data_; }
  248. size_t size() const { return size_; }
  249. size_t& MutableSize() { return size_; }
  250. bool empty() const { return size_ == 0; }
  251. void Reset() { size_ = 0; }
  252. void ResetForSize(size_t new_size) {
  253. if (new_size > capacity_) {
  254. free(data_);
  255. size_t new_capacity = std::max(capacity_ * 2, new_size);
  256. new_capacity = std::max(size_t{64}, new_capacity);
  257. data_ = static_cast<char*>(malloc(new_capacity));
  258. #ifdef ROCKSDB_MALLOC_USABLE_SIZE
  259. capacity_ = malloc_usable_size(data_);
  260. #else
  261. capacity_ = new_capacity;
  262. #endif
  263. // Warm the memory in CPU cache
  264. for (size_t i = 0; i < new_capacity; i += CACHE_LINE_SIZE) {
  265. data_[i] = 1;
  266. }
  267. }
  268. size_ = new_size;
  269. }
  270. Slice AsSlice() const { return Slice(data_, size_); }
  271. operator Slice() const { return AsSlice(); }
  272. private:
  273. char* data_ = nullptr;
  274. size_t size_ = 0;
  275. size_t capacity_;
  276. };
  277. } // namespace ROCKSDB_NAMESPACE