| 1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069 |
- // Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
- // This source code is licensed under both the GPLv2 (found in the
- // COPYING file in the root directory) and Apache 2.0 License
- // (found in the LICENSE.Apache file in the root directory).
- //
- // Copyright (c) 2011 The LevelDB Authors. All rights reserved.
- // Use of this source code is governed by a BSD-style license that can be
- // found in the LICENSE file. See the AUTHORS file for names of contributors.
- #include "port/win/io_win.h"
- #include "monitoring/iostats_context_imp.h"
- #include "test_util/sync_point.h"
- #include "util/aligned_buffer.h"
- #include "util/coding.h"
- namespace ROCKSDB_NAMESPACE {
- namespace port {
- /*
- * DirectIOHelper
- */
- namespace {
- const size_t kSectorSize = 512;
- inline
- bool IsPowerOfTwo(const size_t alignment) {
- return ((alignment) & (alignment - 1)) == 0;
- }
- inline
- bool IsSectorAligned(const size_t off) {
- return (off & (kSectorSize - 1)) == 0;
- }
- inline
- bool IsAligned(size_t alignment, const void* ptr) {
- return ((uintptr_t(ptr)) & (alignment - 1)) == 0;
- }
- }
- std::string GetWindowsErrSz(DWORD err) {
- LPSTR lpMsgBuf;
- FormatMessageA(FORMAT_MESSAGE_ALLOCATE_BUFFER | FORMAT_MESSAGE_FROM_SYSTEM |
- FORMAT_MESSAGE_IGNORE_INSERTS,
- NULL, err,
- 0, // Default language
- reinterpret_cast<LPSTR>(&lpMsgBuf), 0, NULL);
- std::string Err = lpMsgBuf;
- LocalFree(lpMsgBuf);
- return Err;
- }
- // We preserve the original name of this interface to denote the original idea
- // behind it.
- // All reads happen by a specified offset and pwrite interface does not change
- // the position of the file pointer. Judging from the man page and errno it does
- // execute
- // lseek atomically to return the position of the file back where it was.
- // WriteFile() does not
- // have this capability. Therefore, for both pread and pwrite the pointer is
- // advanced to the next position
- // which is fine for writes because they are (should be) sequential.
- // Because all the reads/writes happen by the specified offset, the caller in
- // theory should not
- // rely on the current file offset.
- Status pwrite(const WinFileData* file_data, const Slice& data,
- uint64_t offset, size_t& bytes_written) {
- Status s;
- bytes_written = 0;
- size_t num_bytes = data.size();
- if (num_bytes > std::numeric_limits<DWORD>::max()) {
- // May happen in 64-bit builds where size_t is 64-bits but
- // long is still 32-bit, but that's the API here at the moment
- return Status::InvalidArgument("num_bytes is too large for a single write: " +
- file_data->GetName());
- }
- OVERLAPPED overlapped = { 0 };
- ULARGE_INTEGER offsetUnion;
- offsetUnion.QuadPart = offset;
- overlapped.Offset = offsetUnion.LowPart;
- overlapped.OffsetHigh = offsetUnion.HighPart;
- DWORD bytesWritten = 0;
- if (FALSE == WriteFile(file_data->GetFileHandle(), data.data(), static_cast<DWORD>(num_bytes),
- &bytesWritten, &overlapped)) {
- auto lastError = GetLastError();
- s = IOErrorFromWindowsError("WriteFile failed: " + file_data->GetName(),
- lastError);
- } else {
- bytes_written = bytesWritten;
- }
- return s;
- }
- // See comments for pwrite above
- Status pread(const WinFileData* file_data, char* src, size_t num_bytes,
- uint64_t offset, size_t& bytes_read) {
- Status s;
- bytes_read = 0;
- if (num_bytes > std::numeric_limits<DWORD>::max()) {
- return Status::InvalidArgument("num_bytes is too large for a single read: " +
- file_data->GetName());
- }
- OVERLAPPED overlapped = { 0 };
- ULARGE_INTEGER offsetUnion;
- offsetUnion.QuadPart = offset;
- overlapped.Offset = offsetUnion.LowPart;
- overlapped.OffsetHigh = offsetUnion.HighPart;
- DWORD bytesRead = 0;
- if (FALSE == ReadFile(file_data->GetFileHandle(), src, static_cast<DWORD>(num_bytes),
- &bytesRead, &overlapped)) {
- auto lastError = GetLastError();
- // EOF is OK with zero bytes read
- if (lastError != ERROR_HANDLE_EOF) {
- s = IOErrorFromWindowsError("ReadFile failed: " + file_data->GetName(),
- lastError);
- }
- } else {
- bytes_read = bytesRead;
- }
- return s;
- }
- // SetFileInformationByHandle() is capable of fast pre-allocates.
- // However, this does not change the file end position unless the file is
- // truncated and the pre-allocated space is not considered filled with zeros.
- Status fallocate(const std::string& filename, HANDLE hFile,
- uint64_t to_size) {
- Status status;
- FILE_ALLOCATION_INFO alloc_info;
- alloc_info.AllocationSize.QuadPart = to_size;
- if (!SetFileInformationByHandle(hFile, FileAllocationInfo, &alloc_info,
- sizeof(FILE_ALLOCATION_INFO))) {
- auto lastError = GetLastError();
- status = IOErrorFromWindowsError(
- "Failed to pre-allocate space: " + filename, lastError);
- }
- return status;
- }
- Status ftruncate(const std::string& filename, HANDLE hFile,
- uint64_t toSize) {
- Status status;
- FILE_END_OF_FILE_INFO end_of_file;
- end_of_file.EndOfFile.QuadPart = toSize;
- if (!SetFileInformationByHandle(hFile, FileEndOfFileInfo, &end_of_file,
- sizeof(FILE_END_OF_FILE_INFO))) {
- auto lastError = GetLastError();
- status = IOErrorFromWindowsError("Failed to Set end of file: " + filename,
- lastError);
- }
- return status;
- }
- size_t GetUniqueIdFromFile(HANDLE /*hFile*/, char* /*id*/,
- size_t /*max_size*/) {
- // Returning 0 is safe as it causes the table reader to generate a unique ID.
- // This is suboptimal for performance as it prevents multiple table readers
- // for the same file from sharing cached blocks. For example, if users have
- // a low value for `max_open_files`, there can be many table readers opened
- // for the same file.
- //
- // TODO: this is a temporarily solution as it is safe but not optimal for
- // performance. For more details see discussion in
- // https://github.com/facebook/rocksdb/pull/5844.
- return 0;
- }
- ////////////////////////////////////////////////////////////////////////////////////////////////////
- // WinMmapReadableFile
- WinMmapReadableFile::WinMmapReadableFile(const std::string& fileName,
- HANDLE hFile, HANDLE hMap,
- const void* mapped_region,
- size_t length)
- : WinFileData(fileName, hFile, false /* use_direct_io */),
- hMap_(hMap),
- mapped_region_(mapped_region),
- length_(length) {}
- WinMmapReadableFile::~WinMmapReadableFile() {
- BOOL ret __attribute__((__unused__));
- ret = ::UnmapViewOfFile(mapped_region_);
- assert(ret);
- ret = ::CloseHandle(hMap_);
- assert(ret);
- }
- Status WinMmapReadableFile::Read(uint64_t offset, size_t n, Slice* result,
- char* scratch) const {
- Status s;
- if (offset > length_) {
- *result = Slice();
- return IOError(filename_, EINVAL);
- } else if (offset + n > length_) {
- n = length_ - static_cast<size_t>(offset);
- }
- *result =
- Slice(reinterpret_cast<const char*>(mapped_region_)+offset, n);
- return s;
- }
- Status WinMmapReadableFile::InvalidateCache(size_t offset, size_t length) {
- return Status::OK();
- }
- size_t WinMmapReadableFile::GetUniqueId(char* id, size_t max_size) const {
- return GetUniqueIdFromFile(hFile_, id, max_size);
- }
- ///////////////////////////////////////////////////////////////////////////////
- /// WinMmapFile
- // Can only truncate or reserve to a sector size aligned if
- // used on files that are opened with Unbuffered I/O
- Status WinMmapFile::TruncateFile(uint64_t toSize) {
- return ftruncate(filename_, hFile_, toSize);
- }
- Status WinMmapFile::UnmapCurrentRegion() {
- Status status;
- if (mapped_begin_ != nullptr) {
- if (!::UnmapViewOfFile(mapped_begin_)) {
- status = IOErrorFromWindowsError(
- "Failed to unmap file view: " + filename_, GetLastError());
- }
- // Move on to the next portion of the file
- file_offset_ += view_size_;
- // UnmapView automatically sends data to disk but not the metadata
- // which is good and provides some equivalent of fdatasync() on Linux
- // therefore, we donot need separate flag for metadata
- mapped_begin_ = nullptr;
- mapped_end_ = nullptr;
- dst_ = nullptr;
- last_sync_ = nullptr;
- pending_sync_ = false;
- }
- return status;
- }
- Status WinMmapFile::MapNewRegion() {
- Status status;
- assert(mapped_begin_ == nullptr);
- size_t minDiskSize = static_cast<size_t>(file_offset_) + view_size_;
- if (minDiskSize > reserved_size_) {
- status = Allocate(file_offset_, view_size_);
- if (!status.ok()) {
- return status;
- }
- }
- // Need to remap
- if (hMap_ == NULL || reserved_size_ > mapping_size_) {
- if (hMap_ != NULL) {
- // Unmap the previous one
- BOOL ret __attribute__((__unused__));
- ret = ::CloseHandle(hMap_);
- assert(ret);
- hMap_ = NULL;
- }
- ULARGE_INTEGER mappingSize;
- mappingSize.QuadPart = reserved_size_;
- hMap_ = CreateFileMappingA(
- hFile_,
- NULL, // Security attributes
- PAGE_READWRITE, // There is not a write only mode for mapping
- mappingSize.HighPart, // Enable mapping the whole file but the actual
- // amount mapped is determined by MapViewOfFile
- mappingSize.LowPart,
- NULL); // Mapping name
- if (NULL == hMap_) {
- return IOErrorFromWindowsError(
- "WindowsMmapFile failed to create file mapping for: " + filename_,
- GetLastError());
- }
- mapping_size_ = reserved_size_;
- }
- ULARGE_INTEGER offset;
- offset.QuadPart = file_offset_;
- // View must begin at the granularity aligned offset
- mapped_begin_ = reinterpret_cast<char*>(
- MapViewOfFileEx(hMap_, FILE_MAP_WRITE, offset.HighPart, offset.LowPart,
- view_size_, NULL));
- if (!mapped_begin_) {
- status = IOErrorFromWindowsError(
- "WindowsMmapFile failed to map file view: " + filename_,
- GetLastError());
- } else {
- mapped_end_ = mapped_begin_ + view_size_;
- dst_ = mapped_begin_;
- last_sync_ = mapped_begin_;
- pending_sync_ = false;
- }
- return status;
- }
- Status WinMmapFile::PreallocateInternal(uint64_t spaceToReserve) {
- return fallocate(filename_, hFile_, spaceToReserve);
- }
- WinMmapFile::WinMmapFile(const std::string& fname, HANDLE hFile,
- size_t page_size, size_t allocation_granularity,
- const EnvOptions& options)
- : WinFileData(fname, hFile, false),
- WritableFile(options),
- hMap_(NULL),
- page_size_(page_size),
- allocation_granularity_(allocation_granularity),
- reserved_size_(0),
- mapping_size_(0),
- view_size_(0),
- mapped_begin_(nullptr),
- mapped_end_(nullptr),
- dst_(nullptr),
- last_sync_(nullptr),
- file_offset_(0),
- pending_sync_(false) {
- // Allocation granularity must be obtained from GetSystemInfo() and must be
- // a power of two.
- assert(allocation_granularity > 0);
- assert((allocation_granularity & (allocation_granularity - 1)) == 0);
- assert(page_size > 0);
- assert((page_size & (page_size - 1)) == 0);
- // Only for memory mapped writes
- assert(options.use_mmap_writes);
- // View size must be both the multiple of allocation_granularity AND the
- // page size and the granularity is usually a multiple of a page size.
- const size_t viewSize = 32 * 1024; // 32Kb similar to the Windows File Cache in buffered mode
- view_size_ = Roundup(viewSize, allocation_granularity_);
- }
- WinMmapFile::~WinMmapFile() {
- if (hFile_) {
- this->Close();
- }
- }
- Status WinMmapFile::Append(const Slice& data) {
- const char* src = data.data();
- size_t left = data.size();
- while (left > 0) {
- assert(mapped_begin_ <= dst_);
- size_t avail = mapped_end_ - dst_;
- if (avail == 0) {
- Status s = UnmapCurrentRegion();
- if (s.ok()) {
- s = MapNewRegion();
- }
- if (!s.ok()) {
- return s;
- }
- } else {
- size_t n = std::min(left, avail);
- memcpy(dst_, src, n);
- dst_ += n;
- src += n;
- left -= n;
- pending_sync_ = true;
- }
- }
- // Now make sure that the last partial page is padded with zeros if needed
- size_t bytesToPad = Roundup(size_t(dst_), page_size_) - size_t(dst_);
- if (bytesToPad > 0) {
- memset(dst_, 0, bytesToPad);
- }
- return Status::OK();
- }
- // Means Close() will properly take care of truncate
- // and it does not need any additional information
- Status WinMmapFile::Truncate(uint64_t size) {
- return Status::OK();
- }
- Status WinMmapFile::Close() {
- Status s;
- assert(NULL != hFile_);
- // We truncate to the precise size so no
- // uninitialized data at the end. SetEndOfFile
- // which we use does not write zeros and it is good.
- uint64_t targetSize = GetFileSize();
- if (mapped_begin_ != nullptr) {
- // Sync before unmapping to make sure everything
- // is on disk and there is not a lazy writing
- // so we are deterministic with the tests
- Sync();
- s = UnmapCurrentRegion();
- }
- if (NULL != hMap_) {
- BOOL ret = ::CloseHandle(hMap_);
- if (!ret && s.ok()) {
- auto lastError = GetLastError();
- s = IOErrorFromWindowsError(
- "Failed to Close mapping for file: " + filename_, lastError);
- }
- hMap_ = NULL;
- }
- if (hFile_ != NULL) {
- TruncateFile(targetSize);
- BOOL ret = ::CloseHandle(hFile_);
- hFile_ = NULL;
- if (!ret && s.ok()) {
- auto lastError = GetLastError();
- s = IOErrorFromWindowsError(
- "Failed to close file map handle: " + filename_, lastError);
- }
- }
- return s;
- }
- Status WinMmapFile::Flush() { return Status::OK(); }
- // Flush only data
- Status WinMmapFile::Sync() {
- Status s;
- // Some writes occurred since last sync
- if (dst_ > last_sync_) {
- assert(mapped_begin_);
- assert(dst_);
- assert(dst_ > mapped_begin_);
- assert(dst_ < mapped_end_);
- size_t page_begin =
- TruncateToPageBoundary(page_size_, last_sync_ - mapped_begin_);
- size_t page_end =
- TruncateToPageBoundary(page_size_, dst_ - mapped_begin_ - 1);
- // Flush only the amount of that is a multiple of pages
- if (!::FlushViewOfFile(mapped_begin_ + page_begin,
- (page_end - page_begin) + page_size_)) {
- s = IOErrorFromWindowsError("Failed to FlushViewOfFile: " + filename_,
- GetLastError());
- } else {
- last_sync_ = dst_;
- }
- }
- return s;
- }
- /**
- * Flush data as well as metadata to stable storage.
- */
- Status WinMmapFile::Fsync() {
- Status s = Sync();
- // Flush metadata
- if (s.ok() && pending_sync_) {
- if (!::FlushFileBuffers(hFile_)) {
- s = IOErrorFromWindowsError("Failed to FlushFileBuffers: " + filename_,
- GetLastError());
- }
- pending_sync_ = false;
- }
- return s;
- }
- /**
- * Get the size of valid data in the file. This will not match the
- * size that is returned from the filesystem because we use mmap
- * to extend file by map_size every time.
- */
- uint64_t WinMmapFile::GetFileSize() {
- size_t used = dst_ - mapped_begin_;
- return file_offset_ + used;
- }
- Status WinMmapFile::InvalidateCache(size_t offset, size_t length) {
- return Status::OK();
- }
- Status WinMmapFile::Allocate(uint64_t offset, uint64_t len) {
- Status status;
- TEST_KILL_RANDOM("WinMmapFile::Allocate", rocksdb_kill_odds);
- // Make sure that we reserve an aligned amount of space
- // since the reservation block size is driven outside so we want
- // to check if we are ok with reservation here
- size_t spaceToReserve = Roundup(static_cast<size_t>(offset + len), view_size_);
- // Nothing to do
- if (spaceToReserve <= reserved_size_) {
- return status;
- }
- IOSTATS_TIMER_GUARD(allocate_nanos);
- status = PreallocateInternal(spaceToReserve);
- if (status.ok()) {
- reserved_size_ = spaceToReserve;
- }
- return status;
- }
- size_t WinMmapFile::GetUniqueId(char* id, size_t max_size) const {
- return GetUniqueIdFromFile(hFile_, id, max_size);
- }
- //////////////////////////////////////////////////////////////////////////////////
- // WinSequentialFile
- WinSequentialFile::WinSequentialFile(const std::string& fname, HANDLE f,
- const EnvOptions& options)
- : WinFileData(fname, f, options.use_direct_reads) {}
- WinSequentialFile::~WinSequentialFile() {
- assert(hFile_ != INVALID_HANDLE_VALUE);
- }
- Status WinSequentialFile::Read(size_t n, Slice* result, char* scratch) {
- Status s;
- size_t r = 0;
- assert(result != nullptr);
- if (WinFileData::use_direct_io()) {
- return Status::NotSupported("Read() does not support direct_io");
- }
- // Windows ReadFile API accepts a DWORD.
- // While it is possible to read in a loop if n is too big
- // it is an unlikely case.
- if (n > std::numeric_limits<DWORD>::max()) {
- return Status::InvalidArgument("n is too big for a single ReadFile: "
- + filename_);
- }
- DWORD bytesToRead = static_cast<DWORD>(n); //cast is safe due to the check above
- DWORD bytesRead = 0;
- BOOL ret = ReadFile(hFile_, scratch, bytesToRead, &bytesRead, NULL);
- if (ret != FALSE) {
- r = bytesRead;
- } else {
- auto lastError = GetLastError();
- if (lastError != ERROR_HANDLE_EOF) {
- s = IOErrorFromWindowsError("ReadFile failed: " + filename_,
- lastError);
- }
- }
- *result = Slice(scratch, r);
- return s;
- }
- Status WinSequentialFile::PositionedReadInternal(char* src, size_t numBytes,
- uint64_t offset, size_t& bytes_read) const {
- return pread(this, src, numBytes, offset, bytes_read);
- }
- Status WinSequentialFile::PositionedRead(uint64_t offset, size_t n, Slice* result,
- char* scratch) {
- Status s;
- if (!WinFileData::use_direct_io()) {
- return Status::NotSupported("This function is only used for direct_io");
- }
- if (!IsSectorAligned(static_cast<size_t>(offset)) ||
- !IsSectorAligned(n)) {
- return Status::InvalidArgument(
- "WinSequentialFile::PositionedRead: offset is not properly aligned");
- }
- size_t bytes_read = 0; // out param
- s = PositionedReadInternal(scratch, static_cast<size_t>(n), offset, bytes_read);
- *result = Slice(scratch, bytes_read);
- return s;
- }
- Status WinSequentialFile::Skip(uint64_t n) {
- // Can't handle more than signed max as SetFilePointerEx accepts a signed 64-bit
- // integer. As such it is a highly unlikley case to have n so large.
- if (n > static_cast<uint64_t>(std::numeric_limits<LONGLONG>::max())) {
- return Status::InvalidArgument("n is too large for a single SetFilePointerEx() call" +
- filename_);
- }
- LARGE_INTEGER li;
- li.QuadPart = static_cast<LONGLONG>(n); //cast is safe due to the check above
- BOOL ret = SetFilePointerEx(hFile_, li, NULL, FILE_CURRENT);
- if (ret == FALSE) {
- auto lastError = GetLastError();
- return IOErrorFromWindowsError("Skip SetFilePointerEx():" + filename_,
- lastError);
- }
- return Status::OK();
- }
- Status WinSequentialFile::InvalidateCache(size_t offset, size_t length) {
- return Status::OK();
- }
- //////////////////////////////////////////////////////////////////////////////////////////////////
- /// WinRandomAccessBase
- inline
- Status WinRandomAccessImpl::PositionedReadInternal(char* src,
- size_t numBytes,
- uint64_t offset,
- size_t& bytes_read) const {
- return pread(file_base_, src, numBytes, offset, bytes_read);
- }
- inline
- WinRandomAccessImpl::WinRandomAccessImpl(WinFileData* file_base,
- size_t alignment,
- const EnvOptions& options) :
- file_base_(file_base),
- alignment_(alignment) {
- assert(!options.use_mmap_reads);
- }
- inline
- Status WinRandomAccessImpl::ReadImpl(uint64_t offset, size_t n, Slice* result,
- char* scratch) const {
- Status s;
- // Check buffer alignment
- if (file_base_->use_direct_io()) {
- if (!IsSectorAligned(static_cast<size_t>(offset)) ||
- !IsAligned(alignment_, scratch)) {
- return Status::InvalidArgument(
- "WinRandomAccessImpl::ReadImpl: offset or scratch is not properly aligned");
- }
- }
- if (n == 0) {
- *result = Slice(scratch, 0);
- return s;
- }
- size_t bytes_read = 0;
- s = PositionedReadInternal(scratch, n, offset, bytes_read);
- *result = Slice(scratch, bytes_read);
- return s;
- }
- ///////////////////////////////////////////////////////////////////////////////////////////////////
- /// WinRandomAccessFile
- WinRandomAccessFile::WinRandomAccessFile(const std::string& fname, HANDLE hFile,
- size_t alignment,
- const EnvOptions& options)
- : WinFileData(fname, hFile, options.use_direct_reads),
- WinRandomAccessImpl(this, alignment, options) {}
- WinRandomAccessFile::~WinRandomAccessFile() {
- }
- Status WinRandomAccessFile::Read(uint64_t offset, size_t n, Slice* result,
- char* scratch) const {
- return ReadImpl(offset, n, result, scratch);
- }
- Status WinRandomAccessFile::InvalidateCache(size_t offset, size_t length) {
- return Status::OK();
- }
- size_t WinRandomAccessFile::GetUniqueId(char* id, size_t max_size) const {
- return GetUniqueIdFromFile(GetFileHandle(), id, max_size);
- }
- size_t WinRandomAccessFile::GetRequiredBufferAlignment() const {
- return GetAlignment();
- }
- /////////////////////////////////////////////////////////////////////////////
- // WinWritableImpl
- //
- inline
- Status WinWritableImpl::PreallocateInternal(uint64_t spaceToReserve) {
- return fallocate(file_data_->GetName(), file_data_->GetFileHandle(), spaceToReserve);
- }
- inline
- WinWritableImpl::WinWritableImpl(WinFileData* file_data, size_t alignment)
- : file_data_(file_data),
- alignment_(alignment),
- next_write_offset_(0),
- reservedsize_(0) {
- // Query current position in case ReopenWritableFile is called
- // This position is only important for buffered writes
- // for unbuffered writes we explicitely specify the position.
- LARGE_INTEGER zero_move;
- zero_move.QuadPart = 0; // Do not move
- LARGE_INTEGER pos;
- pos.QuadPart = 0;
- BOOL ret = SetFilePointerEx(file_data_->GetFileHandle(), zero_move, &pos,
- FILE_CURRENT);
- // Querying no supped to fail
- if (ret != 0) {
- next_write_offset_ = pos.QuadPart;
- } else {
- assert(false);
- }
- }
- inline
- Status WinWritableImpl::AppendImpl(const Slice& data) {
- Status s;
- if (data.size() > std::numeric_limits<DWORD>::max()) {
- return Status::InvalidArgument("data is too long for a single write" +
- file_data_->GetName());
- }
- size_t bytes_written = 0; // out param
- if (file_data_->use_direct_io()) {
- // With no offset specified we are appending
- // to the end of the file
- assert(IsSectorAligned(next_write_offset_));
- if (!IsSectorAligned(data.size()) ||
- !IsAligned(static_cast<size_t>(GetAlignement()), data.data())) {
- s = Status::InvalidArgument(
- "WriteData must be page aligned, size must be sector aligned");
- } else {
- s = pwrite(file_data_, data, next_write_offset_, bytes_written);
- }
- } else {
- DWORD bytesWritten = 0;
- if (!WriteFile(file_data_->GetFileHandle(), data.data(),
- static_cast<DWORD>(data.size()), &bytesWritten, NULL)) {
- auto lastError = GetLastError();
- s = IOErrorFromWindowsError(
- "Failed to WriteFile: " + file_data_->GetName(),
- lastError);
- } else {
- bytes_written = bytesWritten;
- }
- }
- if(s.ok()) {
- if (bytes_written == data.size()) {
- // This matters for direct_io cases where
- // we rely on the fact that next_write_offset_
- // is sector aligned
- next_write_offset_ += bytes_written;
- } else {
- s = Status::IOError("Failed to write all bytes: " +
- file_data_->GetName());
- }
- }
- return s;
- }
- inline
- Status WinWritableImpl::PositionedAppendImpl(const Slice& data, uint64_t offset) {
- if(file_data_->use_direct_io()) {
- if (!IsSectorAligned(static_cast<size_t>(offset)) ||
- !IsSectorAligned(data.size()) ||
- !IsAligned(static_cast<size_t>(GetAlignement()), data.data())) {
- return Status::InvalidArgument(
- "Data and offset must be page aligned, size must be sector aligned");
- }
- }
- size_t bytes_written = 0;
- Status s = pwrite(file_data_, data, offset, bytes_written);
- if(s.ok()) {
- if (bytes_written == data.size()) {
- // For sequential write this would be simple
- // size extension by data.size()
- uint64_t write_end = offset + bytes_written;
- if (write_end >= next_write_offset_) {
- next_write_offset_ = write_end;
- }
- } else {
- s = Status::IOError("Failed to write all of the requested data: " +
- file_data_->GetName());
- }
- }
- return s;
- }
- inline
- Status WinWritableImpl::TruncateImpl(uint64_t size) {
- // It is tempting to check for the size for sector alignment
- // but truncation may come at the end and there is not a requirement
- // for this to be sector aligned so long as we do not attempt to write
- // after that. The interface docs state that the behavior is undefined
- // in that case.
- Status s = ftruncate(file_data_->GetName(), file_data_->GetFileHandle(),
- size);
- if (s.ok()) {
- next_write_offset_ = size;
- }
- return s;
- }
- inline
- Status WinWritableImpl::CloseImpl() {
- Status s;
- auto hFile = file_data_->GetFileHandle();
- assert(INVALID_HANDLE_VALUE != hFile);
- if (!::FlushFileBuffers(hFile)) {
- auto lastError = GetLastError();
- s = IOErrorFromWindowsError("FlushFileBuffers failed at Close() for: " +
- file_data_->GetName(),
- lastError);
- }
- if(!file_data_->CloseFile() && s.ok()) {
- auto lastError = GetLastError();
- s = IOErrorFromWindowsError("CloseHandle failed for: " + file_data_->GetName(),
- lastError);
- }
- return s;
- }
- inline
- Status WinWritableImpl::SyncImpl() {
- Status s;
- if (!::FlushFileBuffers (file_data_->GetFileHandle())) {
- auto lastError = GetLastError();
- s = IOErrorFromWindowsError(
- "FlushFileBuffers failed at Sync() for: " + file_data_->GetName(), lastError);
- }
- return s;
- }
- inline
- Status WinWritableImpl::AllocateImpl(uint64_t offset, uint64_t len) {
- Status status;
- TEST_KILL_RANDOM("WinWritableFile::Allocate", rocksdb_kill_odds);
- // Make sure that we reserve an aligned amount of space
- // since the reservation block size is driven outside so we want
- // to check if we are ok with reservation here
- size_t spaceToReserve = Roundup(static_cast<size_t>(offset + len), static_cast<size_t>(alignment_));
- // Nothing to do
- if (spaceToReserve <= reservedsize_) {
- return status;
- }
- IOSTATS_TIMER_GUARD(allocate_nanos);
- status = PreallocateInternal(spaceToReserve);
- if (status.ok()) {
- reservedsize_ = spaceToReserve;
- }
- return status;
- }
- ////////////////////////////////////////////////////////////////////////////////
- /// WinWritableFile
- WinWritableFile::WinWritableFile(const std::string& fname, HANDLE hFile,
- size_t alignment, size_t /* capacity */,
- const EnvOptions& options)
- : WinFileData(fname, hFile, options.use_direct_writes),
- WinWritableImpl(this, alignment),
- WritableFile(options) {
- assert(!options.use_mmap_writes);
- }
- WinWritableFile::~WinWritableFile() {
- }
- // Indicates if the class makes use of direct I/O
- bool WinWritableFile::use_direct_io() const { return WinFileData::use_direct_io(); }
- size_t WinWritableFile::GetRequiredBufferAlignment() const {
- return static_cast<size_t>(GetAlignement());
- }
- Status WinWritableFile::Append(const Slice& data) {
- return AppendImpl(data);
- }
- Status WinWritableFile::PositionedAppend(const Slice& data, uint64_t offset) {
- return PositionedAppendImpl(data, offset);
- }
- // Need to implement this so the file is truncated correctly
- // when buffered and unbuffered mode
- Status WinWritableFile::Truncate(uint64_t size) {
- return TruncateImpl(size);
- }
- Status WinWritableFile::Close() {
- return CloseImpl();
- }
- // write out the cached data to the OS cache
- // This is now taken care of the WritableFileWriter
- Status WinWritableFile::Flush() {
- return Status::OK();
- }
- Status WinWritableFile::Sync() {
- return SyncImpl();
- }
- Status WinWritableFile::Fsync() { return SyncImpl(); }
- bool WinWritableFile::IsSyncThreadSafe() const { return true; }
- uint64_t WinWritableFile::GetFileSize() {
- return GetFileNextWriteOffset();
- }
- Status WinWritableFile::Allocate(uint64_t offset, uint64_t len) {
- return AllocateImpl(offset, len);
- }
- size_t WinWritableFile::GetUniqueId(char* id, size_t max_size) const {
- return GetUniqueIdFromFile(GetFileHandle(), id, max_size);
- }
- /////////////////////////////////////////////////////////////////////////
- /// WinRandomRWFile
- WinRandomRWFile::WinRandomRWFile(const std::string& fname, HANDLE hFile,
- size_t alignment, const EnvOptions& options)
- : WinFileData(fname, hFile,
- options.use_direct_reads && options.use_direct_writes),
- WinRandomAccessImpl(this, alignment, options),
- WinWritableImpl(this, alignment) {}
- bool WinRandomRWFile::use_direct_io() const { return WinFileData::use_direct_io(); }
- size_t WinRandomRWFile::GetRequiredBufferAlignment() const {
- return static_cast<size_t>(GetAlignement());
- }
- Status WinRandomRWFile::Write(uint64_t offset, const Slice & data) {
- return PositionedAppendImpl(data, offset);
- }
- Status WinRandomRWFile::Read(uint64_t offset, size_t n, Slice* result,
- char* scratch) const {
- return ReadImpl(offset, n, result, scratch);
- }
- Status WinRandomRWFile::Flush() {
- return Status::OK();
- }
- Status WinRandomRWFile::Sync() {
- return SyncImpl();
- }
- Status WinRandomRWFile::Close() {
- return CloseImpl();
- }
- //////////////////////////////////////////////////////////////////////////
- /// WinMemoryMappedBufer
- WinMemoryMappedBuffer::~WinMemoryMappedBuffer() {
- BOOL ret
- #if defined(_MSC_VER)
- = FALSE;
- #else
- __attribute__((__unused__));
- #endif
- if (base_ != nullptr) {
- ret = ::UnmapViewOfFile(base_);
- assert(ret);
- base_ = nullptr;
- }
- if (map_handle_ != NULL && map_handle_ != INVALID_HANDLE_VALUE) {
- ret = ::CloseHandle(map_handle_);
- assert(ret);
- map_handle_ = NULL;
- }
- if (file_handle_ != NULL && file_handle_ != INVALID_HANDLE_VALUE) {
- ret = ::CloseHandle(file_handle_);
- assert(ret);
- file_handle_ = NULL;
- }
- }
- //////////////////////////////////////////////////////////////////////////
- /// WinDirectory
- Status WinDirectory::Fsync() { return Status::OK(); }
- size_t WinDirectory::GetUniqueId(char* id, size_t max_size) const {
- return GetUniqueIdFromFile(handle_, id, max_size);
- }
- //////////////////////////////////////////////////////////////////////////
- /// WinFileLock
- WinFileLock::~WinFileLock() {
- BOOL ret __attribute__((__unused__));
- ret = ::CloseHandle(hFile_);
- assert(ret);
- }
- }
- } // namespace ROCKSDB_NAMESPACE
|