| 12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121 |
- // Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
- // This source code is licensed under both the GPLv2 (found in the
- // COPYING file in the root directory) and Apache 2.0 License
- // (found in the LICENSE.Apache file in the root directory).
- //
- // Copyright (c) 2011 The LevelDB Authors. All rights reserved.
- // Use of this source code is governed by a BSD-style license that can be
- // found in the LICENSE file. See the AUTHORS file for names of contributors.
- #if defined(OS_WIN)
- #include "port/win/io_win.h"
- #include "env_win.h"
- #include "monitoring/iostats_context_imp.h"
- #include "test_util/sync_point.h"
- #include "util/aligned_buffer.h"
- #include "util/coding.h"
- namespace ROCKSDB_NAMESPACE {
- namespace port {
- /*
- * DirectIOHelper
- */
- namespace {
- const size_t kSectorSize = 512;
- inline bool IsPowerOfTwo(const size_t alignment) {
- return ((alignment) & (alignment - 1)) == 0;
- }
- inline bool IsAligned(size_t alignment, const void* ptr) {
- return ((uintptr_t(ptr)) & (alignment - 1)) == 0;
- }
- } // namespace
- std::string GetWindowsErrSz(DWORD err) {
- std::string Err;
- LPSTR lpMsgBuf = nullptr;
- FormatMessageA(FORMAT_MESSAGE_ALLOCATE_BUFFER | FORMAT_MESSAGE_FROM_SYSTEM |
- FORMAT_MESSAGE_IGNORE_INSERTS,
- NULL, err,
- 0, // Default language
- reinterpret_cast<LPSTR>(&lpMsgBuf), 0, NULL);
- if (lpMsgBuf) {
- Err = lpMsgBuf;
- LocalFree(lpMsgBuf);
- }
- return Err;
- }
- // We preserve the original name of this interface to denote the original idea
- // behind it.
- // All reads happen by a specified offset and pwrite interface does not change
- // the position of the file pointer. Judging from the man page and errno it does
- // execute
- // lseek atomically to return the position of the file back where it was.
- // WriteFile() does not
- // have this capability. Therefore, for both pread and pwrite the pointer is
- // advanced to the next position
- // which is fine for writes because they are (should be) sequential.
- // Because all the reads/writes happen by the specified offset, the caller in
- // theory should not
- // rely on the current file offset.
- IOStatus pwrite(const WinFileData* file_data, const Slice& data,
- uint64_t offset, size_t& bytes_written) {
- IOStatus s;
- bytes_written = 0;
- size_t num_bytes = data.size();
- if (num_bytes > std::numeric_limits<DWORD>::max()) {
- // May happen in 64-bit builds where size_t is 64-bits but
- // long is still 32-bit, but that's the API here at the moment
- return IOStatus::InvalidArgument(
- "num_bytes is too large for a single write: " + file_data->GetName());
- }
- OVERLAPPED overlapped = {0};
- ULARGE_INTEGER offsetUnion;
- offsetUnion.QuadPart = offset;
- overlapped.Offset = offsetUnion.LowPart;
- overlapped.OffsetHigh = offsetUnion.HighPart;
- DWORD bytesWritten = 0;
- if (FALSE == WriteFile(file_data->GetFileHandle(), data.data(),
- static_cast<DWORD>(num_bytes), &bytesWritten,
- &overlapped)) {
- auto lastError = GetLastError();
- s = IOErrorFromWindowsError("WriteFile failed: " + file_data->GetName(),
- lastError);
- } else {
- bytes_written = bytesWritten;
- }
- return s;
- }
- // See comments for pwrite above
- IOStatus pread(const WinFileData* file_data, char* src, size_t num_bytes,
- uint64_t offset, size_t& bytes_read) {
- IOStatus s;
- bytes_read = 0;
- if (num_bytes > std::numeric_limits<DWORD>::max()) {
- return IOStatus::InvalidArgument(
- "num_bytes is too large for a single read: " + file_data->GetName());
- }
- OVERLAPPED overlapped = {0};
- ULARGE_INTEGER offsetUnion;
- offsetUnion.QuadPart = offset;
- overlapped.Offset = offsetUnion.LowPart;
- overlapped.OffsetHigh = offsetUnion.HighPart;
- DWORD bytesRead = 0;
- if (FALSE == ReadFile(file_data->GetFileHandle(), src,
- static_cast<DWORD>(num_bytes), &bytesRead,
- &overlapped)) {
- auto lastError = GetLastError();
- // EOF is OK with zero bytes read
- if (lastError != ERROR_HANDLE_EOF) {
- s = IOErrorFromWindowsError("ReadFile failed: " + file_data->GetName(),
- lastError);
- }
- } else {
- bytes_read = bytesRead;
- }
- return s;
- }
- // SetFileInformationByHandle() is capable of fast pre-allocates.
- // However, this does not change the file end position unless the file is
- // truncated and the pre-allocated space is not considered filled with zeros.
- IOStatus fallocate(const std::string& filename, HANDLE hFile,
- uint64_t to_size) {
- IOStatus status;
- FILE_ALLOCATION_INFO alloc_info;
- alloc_info.AllocationSize.QuadPart = to_size;
- if (!SetFileInformationByHandle(hFile, FileAllocationInfo, &alloc_info,
- sizeof(FILE_ALLOCATION_INFO))) {
- auto lastError = GetLastError();
- status = IOErrorFromWindowsError(
- "Failed to pre-allocate space: " + filename, lastError);
- }
- return status;
- }
- IOStatus ftruncate(const std::string& filename, HANDLE hFile, uint64_t toSize) {
- IOStatus status;
- FILE_END_OF_FILE_INFO end_of_file;
- end_of_file.EndOfFile.QuadPart = toSize;
- if (!SetFileInformationByHandle(hFile, FileEndOfFileInfo, &end_of_file,
- sizeof(FILE_END_OF_FILE_INFO))) {
- auto lastError = GetLastError();
- status = IOErrorFromWindowsError("Failed to Set end of file: " + filename,
- lastError);
- }
- return status;
- }
- size_t GetUniqueIdFromFile(HANDLE /*hFile*/, char* /*id*/,
- size_t /*max_size*/) {
- // Returning 0 is safe as it causes the table reader to generate a unique ID.
- // This is suboptimal for performance as it prevents multiple table readers
- // for the same file from sharing cached blocks. For example, if users have
- // a low value for `max_open_files`, there can be many table readers opened
- // for the same file.
- //
- // TODO: this is a temporarily solution as it is safe but not optimal for
- // performance. For more details see discussion in
- // https://github.com/facebook/rocksdb/pull/5844.
- return 0;
- }
- WinFileData::WinFileData(const std::string& filename, HANDLE hFile,
- bool direct_io)
- : filename_(filename),
- hFile_(hFile),
- use_direct_io_(direct_io),
- sector_size_(WinFileSystem::GetSectorSize(filename)) {}
- bool WinFileData::IsSectorAligned(const size_t off) const {
- return (off & (sector_size_ - 1)) == 0;
- }
- ////////////////////////////////////////////////////////////////////////////////////////////////////
- // WinMmapReadableFile
- WinMmapReadableFile::WinMmapReadableFile(const std::string& fileName,
- HANDLE hFile, HANDLE hMap,
- const void* mapped_region,
- size_t length)
- : WinFileData(fileName, hFile, false /* use_direct_io */),
- hMap_(hMap),
- mapped_region_(mapped_region),
- length_(length) {}
- WinMmapReadableFile::~WinMmapReadableFile() {
- BOOL ret __attribute__((__unused__));
- ret = ::UnmapViewOfFile(mapped_region_);
- assert(ret);
- ret = ::CloseHandle(hMap_);
- assert(ret);
- }
- IOStatus WinMmapReadableFile::Read(uint64_t offset, size_t n,
- const IOOptions& /*options*/, Slice* result,
- char* scratch,
- IODebugContext* /*dbg*/) const {
- IOStatus s;
- if (offset > length_) {
- *result = Slice();
- return IOError(filename_, EINVAL);
- } else if (offset + n > length_) {
- n = length_ - static_cast<size_t>(offset);
- }
- *result = Slice(static_cast<const char*>(mapped_region_) + offset, n);
- return s;
- }
- IOStatus WinMmapReadableFile::InvalidateCache(size_t offset, size_t length) {
- return IOStatus::OK();
- }
- size_t WinMmapReadableFile::GetUniqueId(char* id, size_t max_size) const {
- return GetUniqueIdFromFile(hFile_, id, max_size);
- }
- IOStatus WinMmapReadableFile::GetFileSize(uint64_t* size) {
- LARGE_INTEGER fileSize;
- if (GetFileSizeEx(hFile_, &fileSize)) {
- *size = fileSize.QuadPart;
- return IOStatus::OK();
- } else {
- return IOStatus::IOError("Failed to get file size", filename_);
- }
- }
- ///////////////////////////////////////////////////////////////////////////////
- /// WinMmapFile
- // Can only truncate or reserve to a sector size aligned if
- // used on files that are opened with Unbuffered I/O
- IOStatus WinMmapFile::TruncateFile(uint64_t toSize) {
- return ftruncate(filename_, hFile_, toSize);
- }
- IOStatus WinMmapFile::UnmapCurrentRegion() {
- IOStatus status;
- if (mapped_begin_ != nullptr) {
- if (!::UnmapViewOfFile(mapped_begin_)) {
- status = IOErrorFromWindowsError(
- "Failed to unmap file view: " + filename_, GetLastError());
- }
- // Move on to the next portion of the file
- file_offset_ += view_size_;
- // UnmapView automatically sends data to disk but not the metadata
- // which is good and provides some equivalent of fdatasync() on Linux
- // therefore, we donot need separate flag for metadata
- mapped_begin_ = nullptr;
- mapped_end_ = nullptr;
- dst_ = nullptr;
- last_sync_ = nullptr;
- pending_sync_ = false;
- }
- return status;
- }
- IOStatus WinMmapFile::MapNewRegion(const IOOptions& options,
- IODebugContext* dbg) {
- IOStatus status;
- assert(mapped_begin_ == nullptr);
- size_t minDiskSize = static_cast<size_t>(file_offset_) + view_size_;
- if (minDiskSize > reserved_size_) {
- status = Allocate(file_offset_, view_size_, options, dbg);
- if (!status.ok()) {
- return status;
- }
- }
- // Need to remap
- if (hMap_ == NULL || reserved_size_ > mapping_size_) {
- if (hMap_ != NULL) {
- // Unmap the previous one
- BOOL ret __attribute__((__unused__));
- ret = ::CloseHandle(hMap_);
- assert(ret);
- hMap_ = NULL;
- }
- ULARGE_INTEGER mappingSize;
- mappingSize.QuadPart = reserved_size_;
- hMap_ = CreateFileMappingA(
- hFile_,
- NULL, // Security attributes
- PAGE_READWRITE, // There is not a write only mode for mapping
- mappingSize.HighPart, // Enable mapping the whole file but the actual
- // amount mapped is determined by MapViewOfFile
- mappingSize.LowPart,
- NULL); // Mapping name
- if (NULL == hMap_) {
- return IOErrorFromWindowsError(
- "WindowsMmapFile failed to create file mapping for: " + filename_,
- GetLastError());
- }
- mapping_size_ = reserved_size_;
- }
- ULARGE_INTEGER offset;
- offset.QuadPart = file_offset_;
- // View must begin at the granularity aligned offset
- mapped_begin_ =
- static_cast<char*>(MapViewOfFileEx(hMap_, FILE_MAP_WRITE, offset.HighPart,
- offset.LowPart, view_size_, NULL));
- if (!mapped_begin_) {
- status = IOErrorFromWindowsError(
- "WindowsMmapFile failed to map file view: " + filename_,
- GetLastError());
- } else {
- mapped_end_ = mapped_begin_ + view_size_;
- dst_ = mapped_begin_;
- last_sync_ = mapped_begin_;
- pending_sync_ = false;
- }
- return status;
- }
- IOStatus WinMmapFile::PreallocateInternal(uint64_t spaceToReserve) {
- return fallocate(filename_, hFile_, spaceToReserve);
- }
- WinMmapFile::WinMmapFile(const std::string& fname, HANDLE hFile,
- size_t page_size, size_t allocation_granularity,
- const FileOptions& options)
- : WinFileData(fname, hFile, false),
- FSWritableFile(options),
- hMap_(NULL),
- page_size_(page_size),
- allocation_granularity_(allocation_granularity),
- reserved_size_(0),
- mapping_size_(0),
- view_size_(0),
- mapped_begin_(nullptr),
- mapped_end_(nullptr),
- dst_(nullptr),
- last_sync_(nullptr),
- file_offset_(0),
- pending_sync_(false) {
- // Allocation granularity must be obtained from GetSystemInfo() and must be
- // a power of two.
- assert(allocation_granularity > 0);
- assert((allocation_granularity & (allocation_granularity - 1)) == 0);
- assert(page_size > 0);
- assert((page_size & (page_size - 1)) == 0);
- // Only for memory mapped writes
- assert(options.use_mmap_writes);
- // View size must be both the multiple of allocation_granularity AND the
- // page size and the granularity is usually a multiple of a page size.
- const size_t viewSize =
- 32 * 1024; // 32Kb similar to the Windows File Cache in buffered mode
- view_size_ = Roundup(viewSize, allocation_granularity_);
- }
- WinMmapFile::~WinMmapFile() {
- if (hFile_) {
- this->Close(IOOptions(), nullptr);
- }
- }
- IOStatus WinMmapFile::Append(const Slice& data, const IOOptions& options,
- IODebugContext* dbg) {
- const char* src = data.data();
- size_t left = data.size();
- while (left > 0) {
- assert(mapped_begin_ <= dst_);
- size_t avail = mapped_end_ - dst_;
- if (avail == 0) {
- IOStatus s = UnmapCurrentRegion();
- if (s.ok()) {
- s = MapNewRegion(options, dbg);
- }
- if (!s.ok()) {
- return s;
- }
- } else {
- size_t n = std::min(left, avail);
- memcpy(dst_, src, n);
- dst_ += n;
- src += n;
- left -= n;
- pending_sync_ = true;
- }
- }
- // Now make sure that the last partial page is padded with zeros if needed
- size_t bytesToPad = Roundup(size_t(dst_), page_size_) - size_t(dst_);
- if (bytesToPad > 0) {
- memset(dst_, 0, bytesToPad);
- }
- return IOStatus::OK();
- }
- // Means Close() will properly take care of truncate
- // and it does not need any additional information
- IOStatus WinMmapFile::Truncate(uint64_t size, const IOOptions& /*options*/,
- IODebugContext* /*dbg*/) {
- return IOStatus::OK();
- }
- IOStatus WinMmapFile::Close(const IOOptions& options, IODebugContext* dbg) {
- IOStatus s;
- assert(NULL != hFile_);
- // We truncate to the precise size so no
- // uninitialized data at the end. SetEndOfFile
- // which we use does not write zeros and it is good.
- uint64_t targetSize = GetFileSize(options, dbg);
- if (mapped_begin_ != nullptr) {
- // Sync before unmapping to make sure everything
- // is on disk and there is not a lazy writing
- // so we are deterministic with the tests
- Sync(options, dbg);
- s = UnmapCurrentRegion();
- }
- if (NULL != hMap_) {
- BOOL ret = ::CloseHandle(hMap_);
- if (!ret && s.ok()) {
- auto lastError = GetLastError();
- s = IOErrorFromWindowsError(
- "Failed to Close mapping for file: " + filename_, lastError);
- }
- hMap_ = NULL;
- }
- if (hFile_ != NULL) {
- TruncateFile(targetSize);
- BOOL ret = ::CloseHandle(hFile_);
- hFile_ = NULL;
- if (!ret && s.ok()) {
- auto lastError = GetLastError();
- s = IOErrorFromWindowsError(
- "Failed to close file map handle: " + filename_, lastError);
- }
- }
- return s;
- }
- IOStatus WinMmapFile::Flush(const IOOptions& /*options*/,
- IODebugContext* /*dbg*/) {
- return IOStatus::OK();
- }
- // Flush only data
- IOStatus WinMmapFile::Sync(const IOOptions& /*options*/,
- IODebugContext* /*dbg*/) {
- IOStatus s;
- // Some writes occurred since last sync
- if (dst_ > last_sync_) {
- assert(mapped_begin_);
- assert(dst_);
- assert(dst_ > mapped_begin_);
- assert(dst_ < mapped_end_);
- size_t page_begin =
- TruncateToPageBoundary(page_size_, last_sync_ - mapped_begin_);
- size_t page_end =
- TruncateToPageBoundary(page_size_, dst_ - mapped_begin_ - 1);
- // Flush only the amount of that is a multiple of pages
- if (!::FlushViewOfFile(mapped_begin_ + page_begin,
- (page_end - page_begin) + page_size_)) {
- s = IOErrorFromWindowsError("Failed to FlushViewOfFile: " + filename_,
- GetLastError());
- } else {
- last_sync_ = dst_;
- }
- }
- return s;
- }
- /**
- * Flush data as well as metadata to stable storage.
- */
- IOStatus WinMmapFile::Fsync(const IOOptions& options, IODebugContext* dbg) {
- IOStatus s = Sync(options, dbg);
- // Flush metadata
- if (s.ok() && pending_sync_) {
- if (!::FlushFileBuffers(hFile_)) {
- s = IOErrorFromWindowsError("Failed to FlushFileBuffers: " + filename_,
- GetLastError());
- }
- pending_sync_ = false;
- }
- return s;
- }
- /**
- * Get the size of valid data in the file. This will not match the
- * size that is returned from the filesystem because we use mmap
- * to extend file by map_size every time.
- */
- uint64_t WinMmapFile::GetFileSize(const IOOptions& /*options*/,
- IODebugContext* /*dbg*/) {
- size_t used = dst_ - mapped_begin_;
- return file_offset_ + used;
- }
- IOStatus WinMmapFile::InvalidateCache(size_t offset, size_t length) {
- return IOStatus::OK();
- }
- IOStatus WinMmapFile::Allocate(uint64_t offset, uint64_t len,
- const IOOptions& /*options*/,
- IODebugContext* /*dbg*/) {
- IOStatus status;
- TEST_KILL_RANDOM("WinMmapFile::Allocate");
- // Make sure that we reserve an aligned amount of space
- // since the reservation block size is driven outside so we want
- // to check if we are ok with reservation here
- size_t spaceToReserve =
- Roundup(static_cast<size_t>(offset + len), view_size_);
- // Nothing to do
- if (spaceToReserve <= reserved_size_) {
- return status;
- }
- IOSTATS_TIMER_GUARD(allocate_nanos);
- status = PreallocateInternal(spaceToReserve);
- if (status.ok()) {
- reserved_size_ = spaceToReserve;
- }
- return status;
- }
- size_t WinMmapFile::GetUniqueId(char* id, size_t max_size) const {
- return GetUniqueIdFromFile(hFile_, id, max_size);
- }
- //////////////////////////////////////////////////////////////////////////////////
- // WinSequentialFile
- WinSequentialFile::WinSequentialFile(const std::string& fname, HANDLE f,
- const FileOptions& options)
- : WinFileData(fname, f, options.use_direct_reads) {}
- WinSequentialFile::~WinSequentialFile() {
- assert(hFile_ != INVALID_HANDLE_VALUE);
- }
- IOStatus WinSequentialFile::Read(size_t n, const IOOptions& /*opts*/,
- Slice* result, char* scratch,
- IODebugContext* /*dbg*/) {
- IOStatus s;
- size_t r = 0;
- assert(result != nullptr);
- if (WinFileData::use_direct_io()) {
- return IOStatus::NotSupported("Read() does not support direct_io");
- }
- // Windows ReadFile API accepts a DWORD.
- // While it is possible to read in a loop if n is too big
- // it is an unlikely case.
- if (n > std::numeric_limits<DWORD>::max()) {
- return IOStatus::InvalidArgument("n is too big for a single ReadFile: " +
- filename_);
- }
- DWORD bytesToRead =
- static_cast<DWORD>(n); // cast is safe due to the check above
- DWORD bytesRead = 0;
- BOOL ret = ReadFile(hFile_, scratch, bytesToRead, &bytesRead, NULL);
- if (ret != FALSE) {
- r = bytesRead;
- } else {
- auto lastError = GetLastError();
- if (lastError != ERROR_HANDLE_EOF) {
- s = IOErrorFromWindowsError("ReadFile failed: " + filename_, lastError);
- }
- }
- *result = Slice(scratch, r);
- return s;
- }
- IOStatus WinSequentialFile::PositionedReadInternal(char* src, size_t numBytes,
- uint64_t offset,
- size_t& bytes_read) const {
- return pread(this, src, numBytes, offset, bytes_read);
- }
- IOStatus WinSequentialFile::PositionedRead(uint64_t offset, size_t n,
- const IOOptions& /*opts*/,
- Slice* result, char* scratch,
- IODebugContext* /*dbg*/) {
- if (!WinFileData::use_direct_io()) {
- return IOStatus::NotSupported("This function is only used for direct_io");
- }
- assert(IsSectorAligned(static_cast<size_t>(offset)));
- assert(IsSectorAligned(static_cast<size_t>(n)));
- size_t bytes_read = 0; // out param
- IOStatus s = PositionedReadInternal(scratch, static_cast<size_t>(n), offset,
- bytes_read);
- *result = Slice(scratch, bytes_read);
- return s;
- }
- IOStatus WinSequentialFile::Skip(uint64_t n) {
- // Can't handle more than signed max as SetFilePointerEx accepts a signed
- // 64-bit integer. As such it is a highly unlikley case to have n so large.
- if (n > static_cast<uint64_t>(std::numeric_limits<LONGLONG>::max())) {
- return IOStatus::InvalidArgument(
- "n is too large for a single SetFilePointerEx() call" + filename_);
- }
- LARGE_INTEGER li;
- li.QuadPart = static_cast<LONGLONG>(n); // cast is safe due to the check
- // above
- BOOL ret = SetFilePointerEx(hFile_, li, NULL, FILE_CURRENT);
- if (ret == FALSE) {
- auto lastError = GetLastError();
- return IOErrorFromWindowsError("Skip SetFilePointerEx():" + filename_,
- lastError);
- }
- return IOStatus::OK();
- }
- IOStatus WinSequentialFile::InvalidateCache(size_t offset, size_t length) {
- return IOStatus::OK();
- }
- //////////////////////////////////////////////////////////////////////////////////////////////////
- /// WinRandomAccessBase
- inline IOStatus WinRandomAccessImpl::PositionedReadInternal(
- char* src, size_t numBytes, uint64_t offset, size_t& bytes_read) const {
- return pread(file_base_, src, numBytes, offset, bytes_read);
- }
- inline WinRandomAccessImpl::WinRandomAccessImpl(WinFileData* file_base,
- size_t alignment,
- const FileOptions& options)
- : file_base_(file_base),
- alignment_(std::max(alignment, file_base->GetSectorSize())) {
- assert(!options.use_mmap_reads);
- }
- inline IOStatus WinRandomAccessImpl::ReadImpl(uint64_t offset, size_t n,
- Slice* result,
- char* scratch) const {
- // Check buffer alignment
- if (file_base_->use_direct_io()) {
- assert(file_base_->IsSectorAligned(static_cast<size_t>(offset)));
- assert(IsAligned(alignment_, scratch));
- }
- if (n == 0) {
- *result = Slice(scratch, 0);
- return IOStatus::OK();
- }
- size_t bytes_read = 0;
- IOStatus s = PositionedReadInternal(scratch, n, offset, bytes_read);
- *result = Slice(scratch, bytes_read);
- return s;
- }
- ///////////////////////////////////////////////////////////////////////////////////////////////////
- /// WinRandomAccessFile
- WinRandomAccessFile::WinRandomAccessFile(const std::string& fname, HANDLE hFile,
- size_t alignment,
- const FileOptions& options)
- : WinFileData(fname, hFile, options.use_direct_reads),
- WinRandomAccessImpl(this, alignment, options) {}
- WinRandomAccessFile::~WinRandomAccessFile() {}
- IOStatus WinRandomAccessFile::Read(uint64_t offset, size_t n,
- const IOOptions& /*options*/, Slice* result,
- char* scratch,
- IODebugContext* /*dbg*/) const {
- return ReadImpl(offset, n, result, scratch);
- }
- IOStatus WinRandomAccessFile::InvalidateCache(size_t offset, size_t length) {
- return IOStatus::OK();
- }
- size_t WinRandomAccessFile::GetUniqueId(char* id, size_t max_size) const {
- return GetUniqueIdFromFile(GetFileHandle(), id, max_size);
- }
- size_t WinRandomAccessFile::GetRequiredBufferAlignment() const {
- return GetAlignment();
- }
- IOStatus WinRandomAccessFile::GetFileSize(uint64_t* size) {
- LARGE_INTEGER fileSize;
- if (GetFileSizeEx(hFile_, &fileSize)) {
- *size = fileSize.QuadPart;
- return IOStatus::OK();
- } else {
- return IOStatus::IOError("Failed to get file size", filename_);
- }
- }
- /////////////////////////////////////////////////////////////////////////////
- // WinWritableImpl
- //
- inline IOStatus WinWritableImpl::PreallocateInternal(uint64_t spaceToReserve) {
- return fallocate(file_data_->GetName(), file_data_->GetFileHandle(),
- spaceToReserve);
- }
- inline WinWritableImpl::WinWritableImpl(WinFileData* file_data,
- size_t alignment)
- : file_data_(file_data),
- alignment_(std::max(alignment, file_data->GetSectorSize())),
- next_write_offset_(0),
- reservedsize_(0) {
- // Query current position in case ReopenWritableFile is called
- // This position is only important for buffered writes
- // for unbuffered writes we explicitely specify the position.
- LARGE_INTEGER zero_move;
- zero_move.QuadPart = 0; // Do not move
- LARGE_INTEGER pos;
- pos.QuadPart = 0;
- BOOL ret = SetFilePointerEx(file_data_->GetFileHandle(), zero_move, &pos,
- FILE_CURRENT);
- // Querying no supped to fail
- if (ret != 0) {
- next_write_offset_ = pos.QuadPart;
- } else {
- assert(false);
- }
- }
- inline IOStatus WinWritableImpl::AppendImpl(const Slice& data) {
- IOStatus s;
- if (data.size() > std::numeric_limits<DWORD>::max()) {
- return IOStatus::InvalidArgument("data is too long for a single write" +
- file_data_->GetName());
- }
- size_t bytes_written = 0; // out param
- if (file_data_->use_direct_io()) {
- // With no offset specified we are appending
- // to the end of the file
- assert(file_data_->IsSectorAligned(next_write_offset_));
- assert(file_data_->IsSectorAligned(data.size()));
- assert(IsAligned(static_cast<size_t>(GetAlignment()), data.data()));
- s = pwrite(file_data_, data, next_write_offset_, bytes_written);
- } else {
- DWORD bytesWritten = 0;
- if (!WriteFile(file_data_->GetFileHandle(), data.data(),
- static_cast<DWORD>(data.size()), &bytesWritten, NULL)) {
- auto lastError = GetLastError();
- s = IOErrorFromWindowsError(
- "Failed to WriteFile: " + file_data_->GetName(), lastError);
- } else {
- bytes_written = bytesWritten;
- }
- }
- if (s.ok()) {
- if (bytes_written == data.size()) {
- // This matters for direct_io cases where
- // we rely on the fact that next_write_offset_
- // is sector aligned
- next_write_offset_ += bytes_written;
- } else {
- s = IOStatus::IOError("Failed to write all bytes: " +
- file_data_->GetName());
- }
- }
- return s;
- }
- inline IOStatus WinWritableImpl::PositionedAppendImpl(const Slice& data,
- uint64_t offset) {
- if (file_data_->use_direct_io()) {
- assert(file_data_->IsSectorAligned(static_cast<size_t>(offset)));
- assert(file_data_->IsSectorAligned(data.size()));
- assert(IsAligned(static_cast<size_t>(GetAlignment()), data.data()));
- }
- size_t bytes_written = 0;
- IOStatus s = pwrite(file_data_, data, offset, bytes_written);
- if (s.ok()) {
- if (bytes_written == data.size()) {
- // For sequential write this would be simple
- // size extension by data.size()
- uint64_t write_end = offset + bytes_written;
- if (write_end >= next_write_offset_) {
- next_write_offset_ = write_end;
- }
- } else {
- s = IOStatus::IOError("Failed to write all of the requested data: " +
- file_data_->GetName());
- }
- }
- return s;
- }
- inline IOStatus WinWritableImpl::TruncateImpl(uint64_t size) {
- // It is tempting to check for the size for sector alignment
- // but truncation may come at the end and there is not a requirement
- // for this to be sector aligned so long as we do not attempt to write
- // after that. The interface docs state that the behavior is undefined
- // in that case.
- IOStatus s =
- ftruncate(file_data_->GetName(), file_data_->GetFileHandle(), size);
- if (s.ok()) {
- next_write_offset_ = size;
- }
- return s;
- }
- inline IOStatus WinWritableImpl::CloseImpl() {
- IOStatus s;
- auto hFile = file_data_->GetFileHandle();
- assert(INVALID_HANDLE_VALUE != hFile);
- if (!::FlushFileBuffers(hFile)) {
- auto lastError = GetLastError();
- s = IOErrorFromWindowsError(
- "FlushFileBuffers failed at Close() for: " + file_data_->GetName(),
- lastError);
- }
- if (!file_data_->CloseFile() && s.ok()) {
- auto lastError = GetLastError();
- s = IOErrorFromWindowsError(
- "CloseHandle failed for: " + file_data_->GetName(), lastError);
- }
- return s;
- }
- inline IOStatus WinWritableImpl::SyncImpl(const IOOptions& /*options*/,
- IODebugContext* /*dbg*/) {
- IOStatus s;
- if (!::FlushFileBuffers(file_data_->GetFileHandle())) {
- auto lastError = GetLastError();
- s = IOErrorFromWindowsError(
- "FlushFileBuffers failed at Sync() for: " + file_data_->GetName(),
- lastError);
- }
- return s;
- }
- inline IOStatus WinWritableImpl::AllocateImpl(uint64_t offset, uint64_t len) {
- IOStatus status;
- TEST_KILL_RANDOM("WinWritableFile::Allocate");
- // Make sure that we reserve an aligned amount of space
- // since the reservation block size is driven outside so we want
- // to check if we are ok with reservation here
- size_t spaceToReserve = Roundup(static_cast<size_t>(offset + len),
- static_cast<size_t>(alignment_));
- // Nothing to do
- if (spaceToReserve <= reservedsize_) {
- return status;
- }
- IOSTATS_TIMER_GUARD(allocate_nanos);
- status = PreallocateInternal(spaceToReserve);
- if (status.ok()) {
- reservedsize_ = spaceToReserve;
- }
- return status;
- }
- ////////////////////////////////////////////////////////////////////////////////
- /// WinWritableFile
- WinWritableFile::WinWritableFile(const std::string& fname, HANDLE hFile,
- size_t alignment, size_t /* capacity */,
- const FileOptions& options)
- : WinFileData(fname, hFile, options.use_direct_writes),
- WinWritableImpl(this, alignment),
- FSWritableFile(options) {
- assert(!options.use_mmap_writes);
- }
- WinWritableFile::~WinWritableFile() {}
- // Indicates if the class makes use of direct I/O
- bool WinWritableFile::use_direct_io() const {
- return WinFileData::use_direct_io();
- }
- size_t WinWritableFile::GetRequiredBufferAlignment() const {
- return static_cast<size_t>(GetAlignment());
- }
- IOStatus WinWritableFile::Append(const Slice& data,
- const IOOptions& /*options*/,
- IODebugContext* /*dbg*/) {
- return AppendImpl(data);
- }
- IOStatus WinWritableFile::PositionedAppend(const Slice& data, uint64_t offset,
- const IOOptions& /*options*/,
- IODebugContext* /*dbg*/) {
- return PositionedAppendImpl(data, offset);
- }
- // Need to implement this so the file is truncated correctly
- // when buffered and unbuffered mode
- IOStatus WinWritableFile::Truncate(uint64_t size, const IOOptions& /*options*/,
- IODebugContext* /*dbg*/) {
- return TruncateImpl(size);
- }
- IOStatus WinWritableFile::Close(const IOOptions& /*options*/,
- IODebugContext* /*dbg*/) {
- return CloseImpl();
- }
- // write out the cached data to the OS cache
- // This is now taken care of the WritableFileWriter
- IOStatus WinWritableFile::Flush(const IOOptions& /*options*/,
- IODebugContext* /*dbg*/) {
- return IOStatus::OK();
- }
- IOStatus WinWritableFile::Sync(const IOOptions& options, IODebugContext* dbg) {
- return SyncImpl(options, dbg);
- }
- IOStatus WinWritableFile::Fsync(const IOOptions& options, IODebugContext* dbg) {
- return SyncImpl(options, dbg);
- }
- bool WinWritableFile::IsSyncThreadSafe() const { return true; }
- uint64_t WinWritableFile::GetFileSize(const IOOptions& /*options*/,
- IODebugContext* /*dbg*/) {
- return GetFileNextWriteOffset();
- }
- IOStatus WinWritableFile::Allocate(uint64_t offset, uint64_t len,
- const IOOptions& /*options*/,
- IODebugContext* /*dbg*/) {
- return AllocateImpl(offset, len);
- }
- size_t WinWritableFile::GetUniqueId(char* id, size_t max_size) const {
- return GetUniqueIdFromFile(GetFileHandle(), id, max_size);
- }
- /////////////////////////////////////////////////////////////////////////
- /// WinRandomRWFile
- WinRandomRWFile::WinRandomRWFile(const std::string& fname, HANDLE hFile,
- size_t alignment, const FileOptions& options)
- : WinFileData(fname, hFile,
- options.use_direct_reads && options.use_direct_writes),
- WinRandomAccessImpl(this, alignment, options),
- WinWritableImpl(this, alignment) {}
- bool WinRandomRWFile::use_direct_io() const {
- return WinFileData::use_direct_io();
- }
- size_t WinRandomRWFile::GetRequiredBufferAlignment() const {
- assert(WinRandomAccessImpl::GetAlignment() ==
- WinWritableImpl::GetAlignment());
- return static_cast<size_t>(WinRandomAccessImpl::GetAlignment());
- }
- IOStatus WinRandomRWFile::Write(uint64_t offset, const Slice& data,
- const IOOptions& /*options*/,
- IODebugContext* /*dbg*/) {
- return PositionedAppendImpl(data, offset);
- }
- IOStatus WinRandomRWFile::Read(uint64_t offset, size_t n,
- const IOOptions& /*options*/, Slice* result,
- char* scratch, IODebugContext* /*dbg*/) const {
- return ReadImpl(offset, n, result, scratch);
- }
- IOStatus WinRandomRWFile::Flush(const IOOptions& /*options*/,
- IODebugContext* /*dbg*/) {
- return IOStatus::OK();
- }
- IOStatus WinRandomRWFile::Sync(const IOOptions& options, IODebugContext* dbg) {
- return SyncImpl(options, dbg);
- }
- IOStatus WinRandomRWFile::Close(const IOOptions& /*options*/,
- IODebugContext* /*dbg*/) {
- return CloseImpl();
- }
- //////////////////////////////////////////////////////////////////////////
- /// WinMemoryMappedBufer
- WinMemoryMappedBuffer::~WinMemoryMappedBuffer() {
- BOOL ret
- #if defined(_MSC_VER)
- = FALSE;
- #else
- __attribute__((__unused__));
- #endif
- if (base_ != nullptr) {
- ret = ::UnmapViewOfFile(base_);
- assert(ret);
- base_ = nullptr;
- }
- if (map_handle_ != NULL && map_handle_ != INVALID_HANDLE_VALUE) {
- ret = ::CloseHandle(map_handle_);
- assert(ret);
- map_handle_ = NULL;
- }
- if (file_handle_ != NULL && file_handle_ != INVALID_HANDLE_VALUE) {
- ret = ::CloseHandle(file_handle_);
- assert(ret);
- file_handle_ = NULL;
- }
- }
- //////////////////////////////////////////////////////////////////////////
- /// WinDirectory
- IOStatus WinDirectory::Fsync(const IOOptions& /*options*/,
- IODebugContext* /*dbg*/) {
- return IOStatus::OK();
- }
- IOStatus WinDirectory::Close(const IOOptions& /*options*/,
- IODebugContext* /*dbg*/) {
- IOStatus s = IOStatus::OK();
- BOOL ret __attribute__((__unused__));
- if (handle_ != INVALID_HANDLE_VALUE) {
- ret = ::CloseHandle(handle_);
- if (!ret) {
- auto lastError = GetLastError();
- s = IOErrorFromWindowsError("Directory closes failed for : " + GetName(),
- lastError);
- }
- handle_ = NULL;
- }
- return s;
- }
- size_t WinDirectory::GetUniqueId(char* id, size_t max_size) const {
- return GetUniqueIdFromFile(handle_, id, max_size);
- }
- //////////////////////////////////////////////////////////////////////////
- /// WinFileLock
- WinFileLock::~WinFileLock() {
- BOOL ret __attribute__((__unused__));
- ret = ::CloseHandle(hFile_);
- assert(ret);
- }
- } // namespace port
- } // namespace ROCKSDB_NAMESPACE
- #endif
|