io_win.h 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508
  1. // Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
  2. // This source code is licensed under both the GPLv2 (found in the
  3. // COPYING file in the root directory) and Apache 2.0 License
  4. // (found in the LICENSE.Apache file in the root directory).
  5. //
  6. // Copyright (c) 2011 The LevelDB Authors. All rights reserved.
  7. // Use of this source code is governed by a BSD-style license that can be
  8. // found in the LICENSE file. See the AUTHORS file for names of contributors.
  9. #pragma once
  10. #include <stdint.h>
  11. #include <windows.h>
  12. #include <mutex>
  13. #include <string>
  14. #include "rocksdb/file_system.h"
  15. #include "rocksdb/status.h"
  16. #include "util/aligned_buffer.h"
  17. #include "util/string_util.h"
  18. namespace ROCKSDB_NAMESPACE {
  19. namespace port {
  20. std::string GetWindowsErrSz(DWORD err);
  21. inline IOStatus IOErrorFromWindowsError(const std::string& context, DWORD err) {
  22. return ((err == ERROR_HANDLE_DISK_FULL) || (err == ERROR_DISK_FULL))
  23. ? IOStatus::NoSpace(context, GetWindowsErrSz(err))
  24. : ((err == ERROR_FILE_NOT_FOUND) || (err == ERROR_PATH_NOT_FOUND))
  25. ? IOStatus::PathNotFound(context, GetWindowsErrSz(err))
  26. : IOStatus::IOError(context, GetWindowsErrSz(err));
  27. }
  28. inline IOStatus IOErrorFromLastWindowsError(const std::string& context) {
  29. return IOErrorFromWindowsError(context, GetLastError());
  30. }
  31. inline IOStatus IOError(const std::string& context, int err_number) {
  32. return (err_number == ENOSPC)
  33. ? IOStatus::NoSpace(context, errnoStr(err_number).c_str())
  34. : (err_number == ENOENT)
  35. ? IOStatus::PathNotFound(context, errnoStr(err_number).c_str())
  36. : IOStatus::IOError(context, errnoStr(err_number).c_str());
  37. }
  38. class WinFileData;
  39. IOStatus pwrite(const WinFileData* file_data, const Slice& data,
  40. uint64_t offset, size_t& bytes_written);
  41. IOStatus pread(const WinFileData* file_data, char* src, size_t num_bytes,
  42. uint64_t offset, size_t& bytes_read);
  43. IOStatus fallocate(const std::string& filename, HANDLE hFile, uint64_t to_size);
  44. IOStatus ftruncate(const std::string& filename, HANDLE hFile, uint64_t toSize);
  45. size_t GetUniqueIdFromFile(HANDLE hFile, char* id, size_t max_size);
  46. class WinFileData {
  47. protected:
  48. const std::string filename_;
  49. HANDLE hFile_;
  50. // If true, the I/O issued would be direct I/O which the buffer
  51. // will need to be aligned (not sure there is a guarantee that the buffer
  52. // passed in is aligned).
  53. const bool use_direct_io_;
  54. const size_t sector_size_;
  55. public:
  56. // We want this class be usable both for inheritance (prive
  57. // or protected) and for containment so __ctor and __dtor public
  58. WinFileData(const std::string& filename, HANDLE hFile, bool direct_io);
  59. virtual ~WinFileData() { this->CloseFile(); }
  60. bool CloseFile() {
  61. bool result = true;
  62. if (hFile_ != NULL && hFile_ != INVALID_HANDLE_VALUE) {
  63. result = ::CloseHandle(hFile_);
  64. assert(result);
  65. hFile_ = NULL;
  66. }
  67. return result;
  68. }
  69. const std::string& GetName() const { return filename_; }
  70. HANDLE GetFileHandle() const { return hFile_; }
  71. bool use_direct_io() const { return use_direct_io_; }
  72. size_t GetSectorSize() const { return sector_size_; }
  73. bool IsSectorAligned(const size_t off) const;
  74. WinFileData(const WinFileData&) = delete;
  75. WinFileData& operator=(const WinFileData&) = delete;
  76. };
  77. class WinSequentialFile : protected WinFileData, public FSSequentialFile {
  78. // Override for behavior change when creating a custom env
  79. virtual IOStatus PositionedReadInternal(char* src, size_t numBytes,
  80. uint64_t offset,
  81. size_t& bytes_read) const;
  82. public:
  83. WinSequentialFile(const std::string& fname, HANDLE f,
  84. const FileOptions& options);
  85. ~WinSequentialFile();
  86. WinSequentialFile(const WinSequentialFile&) = delete;
  87. WinSequentialFile& operator=(const WinSequentialFile&) = delete;
  88. IOStatus Read(size_t n, const IOOptions& options, Slice* result,
  89. char* scratch, IODebugContext* dbg) override;
  90. IOStatus PositionedRead(uint64_t offset, size_t n, const IOOptions& options,
  91. Slice* result, char* scratch,
  92. IODebugContext* dbg) override;
  93. IOStatus Skip(uint64_t n) override;
  94. IOStatus InvalidateCache(size_t offset, size_t length) override;
  95. bool use_direct_io() const override { return WinFileData::use_direct_io(); }
  96. };
  97. // mmap() based random-access
  98. class WinMmapReadableFile : private WinFileData, public FSRandomAccessFile {
  99. HANDLE hMap_;
  100. const void* mapped_region_;
  101. const size_t length_;
  102. public:
  103. // mapped_region_[0,length-1] contains the mmapped contents of the file.
  104. WinMmapReadableFile(const std::string& fileName, HANDLE hFile, HANDLE hMap,
  105. const void* mapped_region, size_t length);
  106. ~WinMmapReadableFile();
  107. WinMmapReadableFile(const WinMmapReadableFile&) = delete;
  108. WinMmapReadableFile& operator=(const WinMmapReadableFile&) = delete;
  109. IOStatus Read(uint64_t offset, size_t n, const IOOptions& options,
  110. Slice* result, char* scratch,
  111. IODebugContext* dbg) const override;
  112. IOStatus InvalidateCache(size_t offset, size_t length) override;
  113. size_t GetUniqueId(char* id, size_t max_size) const override;
  114. IOStatus GetFileSize(uint64_t* file_size) override;
  115. };
  116. // We preallocate and use memcpy to append new
  117. // data to the file. This is safe since we either properly close the
  118. // file before reading from it, or for log files, the reading code
  119. // knows enough to skip zero suffixes.
  120. class WinMmapFile : private WinFileData, public FSWritableFile {
  121. private:
  122. HANDLE hMap_;
  123. const size_t page_size_; // We flush the mapping view in page_size
  124. // increments. We may decide if this is a memory
  125. // page size or SSD page size
  126. const size_t
  127. allocation_granularity_; // View must start at such a granularity
  128. size_t reserved_size_; // Preallocated size
  129. size_t mapping_size_; // The max size of the mapping object
  130. // we want to guess the final file size to minimize the remapping
  131. size_t view_size_; // How much memory to map into a view at a time
  132. char* mapped_begin_; // Must begin at the file offset that is aligned with
  133. // allocation_granularity_
  134. char* mapped_end_;
  135. char* dst_; // Where to write next (in range [mapped_begin_,mapped_end_])
  136. char* last_sync_; // Where have we synced up to
  137. uint64_t file_offset_; // Offset of mapped_begin_ in file
  138. // Do we have unsynced writes?
  139. bool pending_sync_;
  140. // Can only truncate or reserve to a sector size aligned if
  141. // used on files that are opened with Unbuffered I/O
  142. IOStatus TruncateFile(uint64_t toSize);
  143. IOStatus UnmapCurrentRegion();
  144. IOStatus MapNewRegion(const IOOptions& options, IODebugContext* dbg);
  145. virtual IOStatus PreallocateInternal(uint64_t spaceToReserve);
  146. public:
  147. WinMmapFile(const std::string& fname, HANDLE hFile, size_t page_size,
  148. size_t allocation_granularity, const FileOptions& options);
  149. ~WinMmapFile();
  150. WinMmapFile(const WinMmapFile&) = delete;
  151. WinMmapFile& operator=(const WinMmapFile&) = delete;
  152. IOStatus Append(const Slice& data, const IOOptions& options,
  153. IODebugContext* dbg) override;
  154. IOStatus Append(const Slice& data, const IOOptions& opts,
  155. const DataVerificationInfo& /* verification_info */,
  156. IODebugContext* dbg) override {
  157. return Append(data, opts, dbg);
  158. }
  159. // Means Close() will properly take care of truncate
  160. // and it does not need any additional information
  161. IOStatus Truncate(uint64_t size, const IOOptions& options,
  162. IODebugContext* dbg) override;
  163. IOStatus Close(const IOOptions& options, IODebugContext* dbg) override;
  164. IOStatus Flush(const IOOptions& options, IODebugContext* dbg) override;
  165. // Flush only data
  166. IOStatus Sync(const IOOptions& options, IODebugContext* dbg) override;
  167. /**
  168. * Flush data as well as metadata to stable storage.
  169. */
  170. IOStatus Fsync(const IOOptions& options, IODebugContext* dbg) override;
  171. /**
  172. * Get the size of valid data in the file. This will not match the
  173. * size that is returned from the filesystem because we use mmap
  174. * to extend file by map_size every time.
  175. */
  176. uint64_t GetFileSize(const IOOptions& options, IODebugContext* dbg) override;
  177. IOStatus InvalidateCache(size_t offset, size_t length) override;
  178. IOStatus Allocate(uint64_t offset, uint64_t len, const IOOptions& options,
  179. IODebugContext* dbg) override;
  180. size_t GetUniqueId(char* id, size_t max_size) const override;
  181. };
  182. class WinRandomAccessImpl {
  183. protected:
  184. WinFileData* file_base_;
  185. size_t alignment_;
  186. // Override for behavior change when creating a custom env
  187. virtual IOStatus PositionedReadInternal(char* src, size_t numBytes,
  188. uint64_t offset,
  189. size_t& bytes_read) const;
  190. WinRandomAccessImpl(WinFileData* file_base, size_t alignment,
  191. const FileOptions& options);
  192. virtual ~WinRandomAccessImpl() {}
  193. IOStatus ReadImpl(uint64_t offset, size_t n, Slice* result,
  194. char* scratch) const;
  195. size_t GetAlignment() const { return alignment_; }
  196. public:
  197. WinRandomAccessImpl(const WinRandomAccessImpl&) = delete;
  198. WinRandomAccessImpl& operator=(const WinRandomAccessImpl&) = delete;
  199. };
  200. // pread() based random-access
  201. class WinRandomAccessFile
  202. : private WinFileData,
  203. protected WinRandomAccessImpl, // Want to be able to override
  204. // PositionedReadInternal
  205. public FSRandomAccessFile {
  206. public:
  207. WinRandomAccessFile(const std::string& fname, HANDLE hFile, size_t alignment,
  208. const FileOptions& options);
  209. ~WinRandomAccessFile();
  210. IOStatus Read(uint64_t offset, size_t n, const IOOptions& options,
  211. Slice* result, char* scratch,
  212. IODebugContext* dbg) const override;
  213. size_t GetUniqueId(char* id, size_t max_size) const override;
  214. bool use_direct_io() const override { return WinFileData::use_direct_io(); }
  215. IOStatus InvalidateCache(size_t offset, size_t length) override;
  216. size_t GetRequiredBufferAlignment() const override;
  217. IOStatus GetFileSize(uint64_t* file_size) override;
  218. };
  219. // This is a sequential write class. It has been mimicked (as others) after
  220. // the original Posix class. We add support for unbuffered I/O on windows as
  221. // well
  222. // we utilize the original buffer as an alignment buffer to write directly to
  223. // file with no buffering.
  224. // No buffering requires that the provided buffer is aligned to the physical
  225. // sector size (SSD page size) and
  226. // that all SetFilePointer() operations to occur with such an alignment.
  227. // We thus always write in sector/page size increments to the drive and leave
  228. // the tail for the next write OR for Close() at which point we pad with zeros.
  229. // No padding is required for
  230. // buffered access.
  231. class WinWritableImpl {
  232. protected:
  233. WinFileData* file_data_;
  234. const uint64_t alignment_;
  235. uint64_t
  236. next_write_offset_; // Needed because Windows does not support O_APPEND
  237. uint64_t reservedsize_; // how far we have reserved space
  238. virtual IOStatus PreallocateInternal(uint64_t spaceToReserve);
  239. WinWritableImpl(WinFileData* file_data, size_t alignment);
  240. ~WinWritableImpl() {}
  241. uint64_t GetAlignment() const { return alignment_; }
  242. IOStatus AppendImpl(const Slice& data);
  243. // Requires that the data is aligned as specified by
  244. // GetRequiredBufferAlignment()
  245. IOStatus PositionedAppendImpl(const Slice& data, uint64_t offset);
  246. IOStatus TruncateImpl(uint64_t size);
  247. IOStatus CloseImpl();
  248. IOStatus SyncImpl(const IOOptions& options, IODebugContext* dbg);
  249. uint64_t GetFileNextWriteOffset() {
  250. // Double accounting now here with WritableFileWriter
  251. // and this size will be wrong when unbuffered access is used
  252. // but tests implement their own writable files and do not use
  253. // WritableFileWrapper
  254. // so we need to squeeze a square peg through
  255. // a round hole here.
  256. return next_write_offset_;
  257. }
  258. IOStatus AllocateImpl(uint64_t offset, uint64_t len);
  259. public:
  260. WinWritableImpl(const WinWritableImpl&) = delete;
  261. WinWritableImpl& operator=(const WinWritableImpl&) = delete;
  262. };
  263. class WinWritableFile : private WinFileData,
  264. protected WinWritableImpl,
  265. public FSWritableFile {
  266. public:
  267. WinWritableFile(const std::string& fname, HANDLE hFile, size_t alignment,
  268. size_t capacity, const FileOptions& options);
  269. ~WinWritableFile();
  270. IOStatus Append(const Slice& data, const IOOptions& options,
  271. IODebugContext* dbg) override;
  272. IOStatus Append(const Slice& data, const IOOptions& opts,
  273. const DataVerificationInfo& /* verification_info */,
  274. IODebugContext* dbg) override {
  275. return Append(data, opts, dbg);
  276. }
  277. // Requires that the data is aligned as specified by
  278. // GetRequiredBufferAlignment()
  279. IOStatus PositionedAppend(const Slice& data, uint64_t offset,
  280. const IOOptions& options,
  281. IODebugContext* dbg) override;
  282. IOStatus PositionedAppend(const Slice& data, uint64_t offset,
  283. const IOOptions& opts,
  284. const DataVerificationInfo& /* verification_info */,
  285. IODebugContext* dbg) override {
  286. return PositionedAppend(data, offset, opts, dbg);
  287. }
  288. // Need to implement this so the file is truncated correctly
  289. // when buffered and unbuffered mode
  290. IOStatus Truncate(uint64_t size, const IOOptions& options,
  291. IODebugContext* dbg) override;
  292. IOStatus Close(const IOOptions& options, IODebugContext* dbg) override;
  293. // write out the cached data to the OS cache
  294. // This is now taken care of the WritableFileWriter
  295. IOStatus Flush(const IOOptions& options, IODebugContext* dbg) override;
  296. IOStatus Sync(const IOOptions& options, IODebugContext* dbg) override;
  297. IOStatus Fsync(const IOOptions& options, IODebugContext* dbg) override;
  298. bool IsSyncThreadSafe() const override;
  299. // Indicates if the class makes use of direct I/O
  300. // Use PositionedAppend
  301. bool use_direct_io() const override;
  302. size_t GetRequiredBufferAlignment() const override;
  303. uint64_t GetFileSize(const IOOptions& options, IODebugContext* dbg) override;
  304. IOStatus Allocate(uint64_t offset, uint64_t len, const IOOptions& options,
  305. IODebugContext* dbg) override;
  306. size_t GetUniqueId(char* id, size_t max_size) const override;
  307. };
  308. class WinRandomRWFile : private WinFileData,
  309. protected WinRandomAccessImpl,
  310. protected WinWritableImpl,
  311. public FSRandomRWFile {
  312. public:
  313. WinRandomRWFile(const std::string& fname, HANDLE hFile, size_t alignment,
  314. const FileOptions& options);
  315. ~WinRandomRWFile() {}
  316. // Indicates if the class makes use of direct I/O
  317. // If false you must pass aligned buffer to Write()
  318. bool use_direct_io() const override;
  319. // Use the returned alignment value to allocate aligned
  320. // buffer for Write() when use_direct_io() returns true
  321. size_t GetRequiredBufferAlignment() const override;
  322. // Write bytes in `data` at offset `offset`, Returns Status::OK() on success.
  323. // Pass aligned buffer when use_direct_io() returns true.
  324. IOStatus Write(uint64_t offset, const Slice& data, const IOOptions& options,
  325. IODebugContext* dbg) override;
  326. // Read up to `n` bytes starting from offset `offset` and store them in
  327. // result, provided `scratch` size should be at least `n`.
  328. // Returns Status::OK() on success.
  329. IOStatus Read(uint64_t offset, size_t n, const IOOptions& options,
  330. Slice* result, char* scratch,
  331. IODebugContext* dbg) const override;
  332. IOStatus Flush(const IOOptions& options, IODebugContext* dbg) override;
  333. IOStatus Sync(const IOOptions& options, IODebugContext* dbg) override;
  334. IOStatus Fsync(const IOOptions& options, IODebugContext* dbg) override {
  335. return Sync(options, dbg);
  336. }
  337. IOStatus Close(const IOOptions& options, IODebugContext* dbg) override;
  338. };
  339. class WinMemoryMappedBuffer : public MemoryMappedFileBuffer {
  340. private:
  341. HANDLE file_handle_;
  342. HANDLE map_handle_;
  343. public:
  344. WinMemoryMappedBuffer(HANDLE file_handle, HANDLE map_handle, void* base,
  345. size_t size)
  346. : MemoryMappedFileBuffer(base, size),
  347. file_handle_(file_handle),
  348. map_handle_(map_handle) {}
  349. ~WinMemoryMappedBuffer() override;
  350. };
  351. class WinDirectory : public FSDirectory {
  352. const std::string filename_;
  353. HANDLE handle_;
  354. public:
  355. explicit WinDirectory(const std::string& filename, HANDLE h) noexcept
  356. : filename_(filename), handle_(h) {
  357. assert(handle_ != INVALID_HANDLE_VALUE);
  358. }
  359. ~WinDirectory() {
  360. if (handle_ != NULL) {
  361. IOStatus s = WinDirectory::Close(IOOptions(), nullptr);
  362. s.PermitUncheckedError();
  363. }
  364. }
  365. const std::string& GetName() const { return filename_; }
  366. IOStatus Fsync(const IOOptions& options, IODebugContext* dbg) override;
  367. IOStatus Close(const IOOptions& options, IODebugContext* dbg) override;
  368. size_t GetUniqueId(char* id, size_t max_size) const override;
  369. };
  370. class WinFileLock : public FileLock {
  371. public:
  372. explicit WinFileLock(HANDLE hFile) : hFile_(hFile) {
  373. assert(hFile != NULL);
  374. assert(hFile != INVALID_HANDLE_VALUE);
  375. }
  376. ~WinFileLock();
  377. private:
  378. HANDLE hFile_;
  379. };
  380. } // namespace port
  381. } // namespace ROCKSDB_NAMESPACE