io_win.h 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456
  1. // Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
  2. // This source code is licensed under both the GPLv2 (found in the
  3. // COPYING file in the root directory) and Apache 2.0 License
  4. // (found in the LICENSE.Apache file in the root directory).
  5. //
  6. // Copyright (c) 2011 The LevelDB Authors. All rights reserved.
  7. // Use of this source code is governed by a BSD-style license that can be
  8. // found in the LICENSE file. See the AUTHORS file for names of contributors.
  9. #pragma once
  10. #include <stdint.h>
  11. #include <mutex>
  12. #include <string>
  13. #include "rocksdb/status.h"
  14. #include "rocksdb/env.h"
  15. #include "util/aligned_buffer.h"
  16. #include <windows.h>
  17. namespace ROCKSDB_NAMESPACE {
  18. namespace port {
  19. std::string GetWindowsErrSz(DWORD err);
  20. inline Status IOErrorFromWindowsError(const std::string& context, DWORD err) {
  21. return ((err == ERROR_HANDLE_DISK_FULL) || (err == ERROR_DISK_FULL))
  22. ? Status::NoSpace(context, GetWindowsErrSz(err))
  23. : ((err == ERROR_FILE_NOT_FOUND) || (err == ERROR_PATH_NOT_FOUND))
  24. ? Status::PathNotFound(context, GetWindowsErrSz(err))
  25. : Status::IOError(context, GetWindowsErrSz(err));
  26. }
  27. inline Status IOErrorFromLastWindowsError(const std::string& context) {
  28. return IOErrorFromWindowsError(context, GetLastError());
  29. }
  30. inline Status IOError(const std::string& context, int err_number) {
  31. return (err_number == ENOSPC)
  32. ? Status::NoSpace(context, strerror(err_number))
  33. : (err_number == ENOENT)
  34. ? Status::PathNotFound(context, strerror(err_number))
  35. : Status::IOError(context, strerror(err_number));
  36. }
  37. class WinFileData;
  38. Status pwrite(const WinFileData* file_data, const Slice& data,
  39. uint64_t offset, size_t& bytes_written);
  40. Status pread(const WinFileData* file_data, char* src, size_t num_bytes,
  41. uint64_t offset, size_t& bytes_read);
  42. Status fallocate(const std::string& filename, HANDLE hFile, uint64_t to_size);
  43. Status ftruncate(const std::string& filename, HANDLE hFile, uint64_t toSize);
  44. size_t GetUniqueIdFromFile(HANDLE hFile, char* id, size_t max_size);
  45. class WinFileData {
  46. protected:
  47. const std::string filename_;
  48. HANDLE hFile_;
  49. // If true, the I/O issued would be direct I/O which the buffer
  50. // will need to be aligned (not sure there is a guarantee that the buffer
  51. // passed in is aligned).
  52. const bool use_direct_io_;
  53. public:
  54. // We want this class be usable both for inheritance (prive
  55. // or protected) and for containment so __ctor and __dtor public
  56. WinFileData(const std::string& filename, HANDLE hFile, bool direct_io)
  57. : filename_(filename), hFile_(hFile), use_direct_io_(direct_io) {}
  58. virtual ~WinFileData() { this->CloseFile(); }
  59. bool CloseFile() {
  60. bool result = true;
  61. if (hFile_ != NULL && hFile_ != INVALID_HANDLE_VALUE) {
  62. result = ::CloseHandle(hFile_);
  63. assert(result);
  64. hFile_ = NULL;
  65. }
  66. return result;
  67. }
  68. const std::string& GetName() const { return filename_; }
  69. HANDLE GetFileHandle() const { return hFile_; }
  70. bool use_direct_io() const { return use_direct_io_; }
  71. WinFileData(const WinFileData&) = delete;
  72. WinFileData& operator=(const WinFileData&) = delete;
  73. };
  74. class WinSequentialFile : protected WinFileData, public SequentialFile {
  75. // Override for behavior change when creating a custom env
  76. virtual Status PositionedReadInternal(char* src, size_t numBytes,
  77. uint64_t offset, size_t& bytes_read) const;
  78. public:
  79. WinSequentialFile(const std::string& fname, HANDLE f,
  80. const EnvOptions& options);
  81. ~WinSequentialFile();
  82. WinSequentialFile(const WinSequentialFile&) = delete;
  83. WinSequentialFile& operator=(const WinSequentialFile&) = delete;
  84. virtual Status Read(size_t n, Slice* result, char* scratch) override;
  85. virtual Status PositionedRead(uint64_t offset, size_t n, Slice* result,
  86. char* scratch) override;
  87. virtual Status Skip(uint64_t n) override;
  88. virtual Status InvalidateCache(size_t offset, size_t length) override;
  89. virtual bool use_direct_io() const override { return WinFileData::use_direct_io(); }
  90. };
  91. // mmap() based random-access
  92. class WinMmapReadableFile : private WinFileData, public RandomAccessFile {
  93. HANDLE hMap_;
  94. const void* mapped_region_;
  95. const size_t length_;
  96. public:
  97. // mapped_region_[0,length-1] contains the mmapped contents of the file.
  98. WinMmapReadableFile(const std::string& fileName, HANDLE hFile, HANDLE hMap,
  99. const void* mapped_region, size_t length);
  100. ~WinMmapReadableFile();
  101. WinMmapReadableFile(const WinMmapReadableFile&) = delete;
  102. WinMmapReadableFile& operator=(const WinMmapReadableFile&) = delete;
  103. virtual Status Read(uint64_t offset, size_t n, Slice* result,
  104. char* scratch) const override;
  105. virtual Status InvalidateCache(size_t offset, size_t length) override;
  106. virtual size_t GetUniqueId(char* id, size_t max_size) const override;
  107. };
  108. // We preallocate and use memcpy to append new
  109. // data to the file. This is safe since we either properly close the
  110. // file before reading from it, or for log files, the reading code
  111. // knows enough to skip zero suffixes.
  112. class WinMmapFile : private WinFileData, public WritableFile {
  113. private:
  114. HANDLE hMap_;
  115. const size_t page_size_; // We flush the mapping view in page_size
  116. // increments. We may decide if this is a memory
  117. // page size or SSD page size
  118. const size_t
  119. allocation_granularity_; // View must start at such a granularity
  120. size_t reserved_size_; // Preallocated size
  121. size_t mapping_size_; // The max size of the mapping object
  122. // we want to guess the final file size to minimize the remapping
  123. size_t view_size_; // How much memory to map into a view at a time
  124. char* mapped_begin_; // Must begin at the file offset that is aligned with
  125. // allocation_granularity_
  126. char* mapped_end_;
  127. char* dst_; // Where to write next (in range [mapped_begin_,mapped_end_])
  128. char* last_sync_; // Where have we synced up to
  129. uint64_t file_offset_; // Offset of mapped_begin_ in file
  130. // Do we have unsynced writes?
  131. bool pending_sync_;
  132. // Can only truncate or reserve to a sector size aligned if
  133. // used on files that are opened with Unbuffered I/O
  134. Status TruncateFile(uint64_t toSize);
  135. Status UnmapCurrentRegion();
  136. Status MapNewRegion();
  137. virtual Status PreallocateInternal(uint64_t spaceToReserve);
  138. public:
  139. WinMmapFile(const std::string& fname, HANDLE hFile, size_t page_size,
  140. size_t allocation_granularity, const EnvOptions& options);
  141. ~WinMmapFile();
  142. WinMmapFile(const WinMmapFile&) = delete;
  143. WinMmapFile& operator=(const WinMmapFile&) = delete;
  144. virtual Status Append(const Slice& data) override;
  145. // Means Close() will properly take care of truncate
  146. // and it does not need any additional information
  147. virtual Status Truncate(uint64_t size) override;
  148. virtual Status Close() override;
  149. virtual Status Flush() override;
  150. // Flush only data
  151. virtual Status Sync() override;
  152. /**
  153. * Flush data as well as metadata to stable storage.
  154. */
  155. virtual Status Fsync() override;
  156. /**
  157. * Get the size of valid data in the file. This will not match the
  158. * size that is returned from the filesystem because we use mmap
  159. * to extend file by map_size every time.
  160. */
  161. virtual uint64_t GetFileSize() override;
  162. virtual Status InvalidateCache(size_t offset, size_t length) override;
  163. virtual Status Allocate(uint64_t offset, uint64_t len) override;
  164. virtual size_t GetUniqueId(char* id, size_t max_size) const override;
  165. };
  166. class WinRandomAccessImpl {
  167. protected:
  168. WinFileData* file_base_;
  169. size_t alignment_;
  170. // Override for behavior change when creating a custom env
  171. virtual Status PositionedReadInternal(char* src, size_t numBytes,
  172. uint64_t offset, size_t& bytes_read) const;
  173. WinRandomAccessImpl(WinFileData* file_base, size_t alignment,
  174. const EnvOptions& options);
  175. virtual ~WinRandomAccessImpl() {}
  176. Status ReadImpl(uint64_t offset, size_t n, Slice* result,
  177. char* scratch) const;
  178. size_t GetAlignment() const { return alignment_; }
  179. public:
  180. WinRandomAccessImpl(const WinRandomAccessImpl&) = delete;
  181. WinRandomAccessImpl& operator=(const WinRandomAccessImpl&) = delete;
  182. };
  183. // pread() based random-access
  184. class WinRandomAccessFile
  185. : private WinFileData,
  186. protected WinRandomAccessImpl, // Want to be able to override
  187. // PositionedReadInternal
  188. public RandomAccessFile {
  189. public:
  190. WinRandomAccessFile(const std::string& fname, HANDLE hFile, size_t alignment,
  191. const EnvOptions& options);
  192. ~WinRandomAccessFile();
  193. virtual Status Read(uint64_t offset, size_t n, Slice* result,
  194. char* scratch) const override;
  195. virtual size_t GetUniqueId(char* id, size_t max_size) const override;
  196. virtual bool use_direct_io() const override { return WinFileData::use_direct_io(); }
  197. virtual Status InvalidateCache(size_t offset, size_t length) override;
  198. virtual size_t GetRequiredBufferAlignment() const override;
  199. };
  200. // This is a sequential write class. It has been mimicked (as others) after
  201. // the original Posix class. We add support for unbuffered I/O on windows as
  202. // well
  203. // we utilize the original buffer as an alignment buffer to write directly to
  204. // file with no buffering.
  205. // No buffering requires that the provided buffer is aligned to the physical
  206. // sector size (SSD page size) and
  207. // that all SetFilePointer() operations to occur with such an alignment.
  208. // We thus always write in sector/page size increments to the drive and leave
  209. // the tail for the next write OR for Close() at which point we pad with zeros.
  210. // No padding is required for
  211. // buffered access.
  212. class WinWritableImpl {
  213. protected:
  214. WinFileData* file_data_;
  215. const uint64_t alignment_;
  216. uint64_t next_write_offset_; // Needed because Windows does not support O_APPEND
  217. uint64_t reservedsize_; // how far we have reserved space
  218. virtual Status PreallocateInternal(uint64_t spaceToReserve);
  219. WinWritableImpl(WinFileData* file_data, size_t alignment);
  220. ~WinWritableImpl() {}
  221. uint64_t GetAlignement() const { return alignment_; }
  222. Status AppendImpl(const Slice& data);
  223. // Requires that the data is aligned as specified by
  224. // GetRequiredBufferAlignment()
  225. Status PositionedAppendImpl(const Slice& data, uint64_t offset);
  226. Status TruncateImpl(uint64_t size);
  227. Status CloseImpl();
  228. Status SyncImpl();
  229. uint64_t GetFileNextWriteOffset() {
  230. // Double accounting now here with WritableFileWriter
  231. // and this size will be wrong when unbuffered access is used
  232. // but tests implement their own writable files and do not use
  233. // WritableFileWrapper
  234. // so we need to squeeze a square peg through
  235. // a round hole here.
  236. return next_write_offset_;
  237. }
  238. Status AllocateImpl(uint64_t offset, uint64_t len);
  239. public:
  240. WinWritableImpl(const WinWritableImpl&) = delete;
  241. WinWritableImpl& operator=(const WinWritableImpl&) = delete;
  242. };
  243. class WinWritableFile : private WinFileData,
  244. protected WinWritableImpl,
  245. public WritableFile {
  246. public:
  247. WinWritableFile(const std::string& fname, HANDLE hFile, size_t alignment,
  248. size_t capacity, const EnvOptions& options);
  249. ~WinWritableFile();
  250. virtual Status Append(const Slice& data) override;
  251. // Requires that the data is aligned as specified by
  252. // GetRequiredBufferAlignment()
  253. virtual Status PositionedAppend(const Slice& data, uint64_t offset) override;
  254. // Need to implement this so the file is truncated correctly
  255. // when buffered and unbuffered mode
  256. virtual Status Truncate(uint64_t size) override;
  257. virtual Status Close() override;
  258. // write out the cached data to the OS cache
  259. // This is now taken care of the WritableFileWriter
  260. virtual Status Flush() override;
  261. virtual Status Sync() override;
  262. virtual Status Fsync() override;
  263. virtual bool IsSyncThreadSafe() const override;
  264. // Indicates if the class makes use of direct I/O
  265. // Use PositionedAppend
  266. virtual bool use_direct_io() const override;
  267. virtual size_t GetRequiredBufferAlignment() const override;
  268. virtual uint64_t GetFileSize() override;
  269. virtual Status Allocate(uint64_t offset, uint64_t len) override;
  270. virtual size_t GetUniqueId(char* id, size_t max_size) const override;
  271. };
  272. class WinRandomRWFile : private WinFileData,
  273. protected WinRandomAccessImpl,
  274. protected WinWritableImpl,
  275. public RandomRWFile {
  276. public:
  277. WinRandomRWFile(const std::string& fname, HANDLE hFile, size_t alignment,
  278. const EnvOptions& options);
  279. ~WinRandomRWFile() {}
  280. // Indicates if the class makes use of direct I/O
  281. // If false you must pass aligned buffer to Write()
  282. virtual bool use_direct_io() const override;
  283. // Use the returned alignment value to allocate aligned
  284. // buffer for Write() when use_direct_io() returns true
  285. virtual size_t GetRequiredBufferAlignment() const override;
  286. // Write bytes in `data` at offset `offset`, Returns Status::OK() on success.
  287. // Pass aligned buffer when use_direct_io() returns true.
  288. virtual Status Write(uint64_t offset, const Slice& data) override;
  289. // Read up to `n` bytes starting from offset `offset` and store them in
  290. // result, provided `scratch` size should be at least `n`.
  291. // Returns Status::OK() on success.
  292. virtual Status Read(uint64_t offset, size_t n, Slice* result,
  293. char* scratch) const override;
  294. virtual Status Flush() override;
  295. virtual Status Sync() override;
  296. virtual Status Fsync() { return Sync(); }
  297. virtual Status Close() override;
  298. };
  299. class WinMemoryMappedBuffer : public MemoryMappedFileBuffer {
  300. private:
  301. HANDLE file_handle_;
  302. HANDLE map_handle_;
  303. public:
  304. WinMemoryMappedBuffer(HANDLE file_handle, HANDLE map_handle, void* base, size_t size) :
  305. MemoryMappedFileBuffer(base, size),
  306. file_handle_(file_handle),
  307. map_handle_(map_handle) {}
  308. ~WinMemoryMappedBuffer() override;
  309. };
  310. class WinDirectory : public Directory {
  311. HANDLE handle_;
  312. public:
  313. explicit WinDirectory(HANDLE h) noexcept : handle_(h) {
  314. assert(handle_ != INVALID_HANDLE_VALUE);
  315. }
  316. ~WinDirectory() {
  317. ::CloseHandle(handle_);
  318. }
  319. virtual Status Fsync() override;
  320. size_t GetUniqueId(char* id, size_t max_size) const override;
  321. };
  322. class WinFileLock : public FileLock {
  323. public:
  324. explicit WinFileLock(HANDLE hFile) : hFile_(hFile) {
  325. assert(hFile != NULL);
  326. assert(hFile != INVALID_HANDLE_VALUE);
  327. }
  328. ~WinFileLock();
  329. private:
  330. HANDLE hFile_;
  331. };
  332. }
  333. } // namespace ROCKSDB_NAMESPACE