From 95b021b9831163079c3c293088aa8d364ad0b1d7 Mon Sep 17 00:00:00 2001 From: Steven Wu Date: Wed, 27 Aug 2025 15:04:15 -0700 Subject: [PATCH] [CAS] Improve MappedFileRegionBumpPtr Improve MappedFileRegionBumpPtr so it can handle being opened with different capacities. Mismatching different capacities and header offsets will result in error on creation. --- clang/test/CAS/daemon-cas-recovery.c | 2 +- clang/test/CAS/depscan-cas-log.c | 6 +- clang/test/CAS/validate-once.c | 2 +- clang/tools/driver/cc1depscanProtocol.cpp | 1 + .../llvm/CAS/MappedFileRegionBumpPtr.h | 45 ++-- llvm/lib/CAS/MappedFileRegionBumpPtr.cpp | 240 +++++++++++------ llvm/lib/CAS/OnDiskGraphDB.cpp | 2 +- llvm/lib/CAS/OnDiskHashMappedTrie.cpp | 10 +- llvm/lib/CAS/OnDiskKeyValueDB.cpp | 4 +- llvm/test/CAS/logging.test | 26 +- llvm/test/CAS/validate-if-needed.test | 8 +- llvm/test/tools/llvm-cas/validation.test | 4 +- llvm/tools/llvm-cas-test/llvm-cas-test.cpp | 2 +- llvm/unittests/CAS/CMakeLists.txt | 1 + llvm/unittests/CAS/ProgramTest.cpp | 251 ++++++++++++++++++ 15 files changed, 477 insertions(+), 127 deletions(-) create mode 100644 llvm/unittests/CAS/ProgramTest.cpp diff --git a/clang/test/CAS/daemon-cas-recovery.c b/clang/test/CAS/daemon-cas-recovery.c index 8198d7b2556f6..ca337acc5efba 100644 --- a/clang/test/CAS/daemon-cas-recovery.c +++ b/clang/test/CAS/daemon-cas-recovery.c @@ -4,7 +4,7 @@ /// Construct a malformed CAS to recovery from. // RUN: echo "abc" | llvm-cas --cas %t/cas --make-blob --data - -// RUN: rm %t/cas/v1.1/v9.data +// RUN: rm %t/cas/v1.1/v10.data // RUN: not llvm-cas --cas %t/cas --validate --check-hash // RUN: env LLVM_CACHE_CAS_PATH=%t/cas LLVM_CAS_FORCE_VALIDATION=1 %clang-cache \ diff --git a/clang/test/CAS/depscan-cas-log.c b/clang/test/CAS/depscan-cas-log.c index 088ebd1d99c58..cd134b777f6e3 100644 --- a/clang/test/CAS/depscan-cas-log.c +++ b/clang/test/CAS/depscan-cas-log.c @@ -10,11 +10,11 @@ // RUN: -cc1-args -cc1 -triple x86_64-apple-macosx11.0.0 -emit-obj %s -o %t/t.o -fcas-path %t/cas // RUN: FileCheck %s --input-file %t/cas/v1.log -// CHECK: [[PID1:[0-9]*]] {{[0-9]*}}: mmap '{{.*}}v9.index' +// CHECK: [[PID1:[0-9]*]] {{[0-9]*}}: mmap '{{.*}}v{{[0-9]+}}.index' // CHECK: [[PID1]] {{[0-9]*}}: create subtrie // Even a minimal compilation involves at least 9 records for the cache key. // CHECK-COUNT-9: [[PID1]] {{[0-9]*}}: create record -// CHECK: [[PID2:[0-9]*]] {{[0-9]*}}: mmap '{{.*}}v9.index' -// CHECK: [[PID2]] {{[0-9]*}}: close mmap '{{.*}}v9.index' +// CHECK: [[PID2:[0-9]*]] {{[0-9]*}}: mmap '{{.*}}v{{[0-9]+}}.index' +// CHECK: [[PID2]] {{[0-9]*}}: close mmap '{{.*}}v{{[0-9]+}}.index' diff --git a/clang/test/CAS/validate-once.c b/clang/test/CAS/validate-once.c index 152c812cb976b..faadebe5ad019 100644 --- a/clang/test/CAS/validate-once.c +++ b/clang/test/CAS/validate-once.c @@ -1,7 +1,7 @@ // RUN: rm -rf %t // RUN: llvm-cas --cas %t/cas --ingest %s -// RUN: mv %t/cas/v1.1/v9.data %t/cas/v1.1/v9.data.bak +// RUN: mv %t/cas/v1.1/v10.data %t/cas/v1.1/v10.data.bak // RUN: %clang -cc1depscand -execute %{clang-daemon-dir}/%basename_t -cas-args -fcas-path %t/cas -- \ // RUN: %clang -target x86_64-apple-macos11 -I %S/Inputs \ diff --git a/clang/tools/driver/cc1depscanProtocol.cpp b/clang/tools/driver/cc1depscanProtocol.cpp index 83a1556b1abae..c1d65ae012f96 100644 --- a/clang/tools/driver/cc1depscanProtocol.cpp +++ b/clang/tools/driver/cc1depscanProtocol.cpp @@ -190,6 +190,7 @@ Expected ScanDaemon::launchDaemon(StringRef BasePath, static constexpr const char *PassThroughEnv[] = { "LLVM_CAS_LOG", "LLVM_CAS_DISABLE_VALIDATION", + "LLVM_CAS_MAX_MAPPING_SIZE", }; SmallVector EnvP; for (const char *Name : PassThroughEnv) diff --git a/llvm/include/llvm/CAS/MappedFileRegionBumpPtr.h b/llvm/include/llvm/CAS/MappedFileRegionBumpPtr.h index 17f78225e30a2..8e30818d8378d 100644 --- a/llvm/include/llvm/CAS/MappedFileRegionBumpPtr.h +++ b/llvm/include/llvm/CAS/MappedFileRegionBumpPtr.h @@ -5,11 +5,16 @@ // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// +// +/// \file +/// This file declares interface for MappedFileRegionBumpPtr, a bump pointer +/// allocator, backed by a memory-mapped file. +/// +//===----------------------------------------------------------------------===// #ifndef LLVM_CAS_MAPPEDFILEREGIONBUMPPTR_H #define LLVM_CAS_MAPPEDFILEREGIONBUMPPTR_H -#include "llvm/Config/llvm-config.h" #include "llvm/Support/Alignment.h" #include "llvm/Support/FileSystem.h" #include @@ -18,7 +23,7 @@ namespace llvm::cas { namespace ondisk { class OnDiskCASLogger; -} +} // namespace ondisk /// Allocator for an owned mapped file region that supports thread-safe and /// process-safe bump pointer allocation. @@ -31,33 +36,37 @@ class OnDiskCASLogger; /// Process-safe. Uses file locks when resizing the file during initialization /// and destruction. /// -/// Thread-safe, assuming all threads use the same instance to talk to a given -/// file/mapping. Unsafe to have multiple instances talking to the same file -/// in the same process since file locks will misbehave. Clients should -/// coordinate (somehow). -/// -/// \note Currently we allocate the whole file without sparseness on Windows. +/// Thread-safe. Requires OS support thread-safe file lock. /// /// Provides 8-byte alignment for all allocations. class MappedFileRegionBumpPtr { public: using RegionT = sys::fs::mapped_file_region; + /// Header for MappedFileRegionBumpPtr. It can be configured to be located + /// at any location within the file and the allocation will be appended after + /// the header. + struct Header { + std::atomic BumpPtr; + std::atomic AllocatedSize; + }; + /// Create a \c MappedFileRegionBumpPtr. /// /// \param Path the path to open the mapped region. /// \param Capacity the maximum size for the mapped file region. - /// \param BumpPtrOffset the offset at which to store the bump pointer. + /// \param HeaderOffset the offset at which to store the header. This is so + /// that information can be stored before the header, like a file magic. /// \param NewFileConstructor is for constructing new files. It has exclusive /// access to the file. Must call \c initializeBumpPtr. static Expected - create(const Twine &Path, uint64_t Capacity, int64_t BumpPtrOffset, + create(const Twine &Path, uint64_t Capacity, uint64_t HeaderOffset, std::shared_ptr Logger, function_ref NewFileConstructor); - /// Finish initializing the bump pointer. Must be called by - /// \c NewFileConstructor. - void initializeBumpPtr(int64_t BumpPtrOffset); + /// Finish initializing the header. Must be called by \c NewFileConstructor. + /// \c HeaderOffset passed should match the value passed to \c create. + void initializeHeader(uint64_t HeaderOffset); /// Minimum alignment for allocations, currently hardcoded to 8B. static constexpr Align getAlign() { @@ -103,20 +112,18 @@ class MappedFileRegionBumpPtr { std::swap(H, RHS.H); std::swap(Path, RHS.Path); std::swap(FD, RHS.FD); - std::swap(SharedLockFD, RHS.SharedLockFD); + std::swap(SupportFD, RHS.SupportFD); std::swap(Logger, RHS.Logger); } private: - struct Header { - std::atomic BumpPtr; - std::atomic AllocatedSize; - }; RegionT Region; Header *H = nullptr; std::string Path; + // File descriptor for the main storage file. std::optional FD; - std::optional SharedLockFD; + // File descriptor for the file used as config and reader/writer lock. + std::optional SupportFD; std::shared_ptr Logger = nullptr; }; diff --git a/llvm/lib/CAS/MappedFileRegionBumpPtr.cpp b/llvm/lib/CAS/MappedFileRegionBumpPtr.cpp index a6d081f79fc22..0ea0f2b2f17ed 100644 --- a/llvm/lib/CAS/MappedFileRegionBumpPtr.cpp +++ b/llvm/lib/CAS/MappedFileRegionBumpPtr.cpp @@ -1,11 +1,11 @@ -//===- MappedFileRegionBumpPtr.cpp ------------------------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// -/// \file +/// \file Implements MappedFileRegionBumpPtr. /// /// A bump pointer allocator, backed by a memory-mapped file. /// @@ -20,12 +20,14 @@ /// and across multiple processes without locking for every read. Our current /// implementation strategy is: /// -/// 1. Use \c ftruncate (\c sys::fs::resize_file) to grow the file to its max -/// size (typically several GB). Many modern filesystems will create a sparse -/// file, so that the trailing unused pages do not take space on disk. -/// 2. Call \c mmap (\c sys::fs::mapped_file_region) +/// 1. Use \c sys::fs::resize_file_sparse to grow the file to its max size +/// (typically several GB). If the file system doesn't support sparse file, +/// this may return a fully allocated file. +/// 2. Call \c sys::fs::mapped_file_region to map the entire file. /// 3. [Automatic as part of 2.] -/// 4. [Automatic as part of 2.] +/// 4. If supported, use \c fallocate or similiar APIs to ensure the file system +/// storage for the sparse file so we won't end up with partial file if the +/// disk is out of space. /// /// Additionally, we attempt to resize the file to its actual data size when /// closing the mapping, if this is the only concurrent instance. This is done @@ -35,10 +37,9 @@ /// which typically loses sparseness. These mitigations only work while the file /// is not in use. /// -/// FIXME: we assume that all concurrent users of the file will use the same -/// value for Capacity. Otherwise a process with a larger capacity can write -/// data that is "out of bounds" for processes with smaller capacity. Currently -/// this is true in the CAS. +/// The capacity and the header offset is determined by the first user of the +/// MappedFileRegionBumpPtr instance and any future mismatched value from the +/// original will result in error on creation. /// /// To support resizing, we use two separate file locks: /// 1. We use a shared reader lock on a ".shared" file until destruction. @@ -53,8 +54,10 @@ #include "llvm/CAS/MappedFileRegionBumpPtr.h" #include "OnDiskCommon.h" +#include "llvm/ADT/ScopeExit.h" +#include "llvm/ADT/StringExtras.h" #include "llvm/CAS/OnDiskCASLogger.h" -#include "llvm/Support/Compiler.h" +#include "llvm/Support/EndianStream.h" #if LLVM_ON_UNIX #include @@ -82,12 +85,21 @@ struct FileLockRAII { ~FileLockRAII() { consumeError(unlock()); } Error lock(sys::fs::LockKind LK) { + assert(!Locked && "already locked"); if (std::error_code EC = lockFileThreadSafe(FD, LK)) return createFileError(Path, EC); Locked = LK; return Error::success(); } + Error switchLock(sys::fs::LockKind LK) { + assert(Locked && "not locked"); + if (auto E = unlock()) + return E; + + return lock(LK); + } + Error unlock() { if (Locked) { Locked = std::nullopt; @@ -96,6 +108,23 @@ struct FileLockRAII { } return Error::success(); } + + // Return true if succeed to lock the file exclusively. + bool tryLockExclusive() { + assert(!Locked && "can only try to lock if not locked"); + if (tryLockFileThreadSafe(FD) == std::error_code()) { + Locked = sys::fs::LockKind::Exclusive; + return true; + } + + return false; + } + + // Release the lock so it will not be unlocked on destruction. + void release() { + Locked = std::nullopt; + FD = -1; + } }; struct FileSizeInfo { @@ -107,9 +136,15 @@ struct FileSizeInfo { } // end anonymous namespace Expected MappedFileRegionBumpPtr::create( - const Twine &Path, uint64_t Capacity, int64_t BumpPtrOffset, + const Twine &Path, uint64_t Capacity, uint64_t HeaderOffset, std::shared_ptr Logger, function_ref NewFileConstructor) { + uint64_t MinCapacity = HeaderOffset + sizeof(Header); + if (Capacity < MinCapacity) + return createStringError( + std::make_error_code(std::errc::invalid_argument), + "capacity is too small to hold MappedFileRegionBumpPtr"); + MappedFileRegionBumpPtr Result; Result.Path = Path.str(); Result.Logger = std::move(Logger); @@ -120,21 +155,80 @@ Expected MappedFileRegionBumpPtr::create( return createFileError(Path, EC); Result.FD = FD; - // Open the shared lock file. See file comment for details of locking scheme. - SmallString<128> SharedLockPath(Result.Path); - SharedLockPath.append(".shared"); - int SharedLockFD; + // Open the support file. See file comment for details of locking scheme. + SmallString<128> SupportFilePath(Result.Path); + SupportFilePath.append(".shared"); + int SupportFD; if (std::error_code EC = sys::fs::openFileForReadWrite( - SharedLockPath, SharedLockFD, sys::fs::CD_OpenAlways, + SupportFilePath, SupportFD, sys::fs::CD_OpenAlways, sys::fs::OF_None)) - return createFileError(SharedLockPath, EC); - Result.SharedLockFD = SharedLockFD; + return createFileError(SupportFilePath, EC); + Result.SupportFD = SupportFD; + + sys::fs::file_status SupportFileStatus; + bool FreshConfig = false; + while (true) { + if (auto EC = sys::fs::status(SupportFD, SupportFileStatus)) + return createFileError(SupportFilePath, EC); + if (SupportFileStatus.getSize() != 0) + break; + + // Try exclusive lock and initialize the file. + FileLockRAII SupportFileLock(std::string(SupportFilePath), SupportFD); + if (SupportFileLock.tryLockExclusive()) { + // Check size again and exit if it is initialized. + if (auto EC = sys::fs::status(SupportFD, SupportFileStatus)) + return createFileError(SupportFilePath, EC); + if (SupportFileStatus.getSize() != 0) + break; + + // Write capacity and header offset to file. + llvm::raw_fd_ostream OS(SupportFD, /*shouldClose=*/false, + /*unbuffered=*/true); + support::endian::write(OS, Capacity, endianness::little); + support::endian::write(OS, HeaderOffset, endianness::little); + + FreshConfig = true; + break; + } + } - // Take shared/reader lock that will be held until we close the file; unlocked - // by destroyImpl. - if (std::error_code EC = - lockFileThreadSafe(SharedLockFD, sys::fs::LockKind::Shared)) - return createFileError(Path, EC); + // Take shared/reader lock that will be held until we close the file; + // unlocked by destroyImpl if construction is successful. + FileLockRAII SupportFileLock(std::string(SupportFilePath), SupportFD); + if (auto E = SupportFileLock.lock(sys::fs::LockKind::Shared)) + return std::move(E); + + if (!FreshConfig) { + // Read the configuration and error out if the configuration is different. + SmallVector ConfigBuffer; + sys::fs::file_t SupportFile = sys::fs::convertFDToNativeFile(SupportFD); + if (auto E = sys::fs::readNativeFileToEOF(SupportFile, ConfigBuffer)) + return std::move(E); + + if (ConfigBuffer.size() != 2 * sizeof(uint64_t)) + return createStringError( + std::make_error_code(std::errc::invalid_argument), + SupportFilePath + " does not have correct size"); + + uint64_t ExpectedCapacity = + support::endian::read( + ConfigBuffer.data()); + uint64_t ExpectedOffset = + support::endian::read( + ConfigBuffer.data() + sizeof(uint64_t)); + + // If the capacity doesn't match, use the existing capacity instead. + if (ExpectedCapacity != Capacity) + Capacity = ExpectedCapacity; + + if (ExpectedOffset != HeaderOffset) + return createStringError( + std::make_error_code(std::errc::invalid_argument), + "specified header offset (" + utostr(HeaderOffset) + + ") does not match existing config (" + utostr(ExpectedOffset) + + ")"); + } // Take shared/reader lock for initialization. FileLockRAII InitLock(Result.Path, FD); @@ -146,35 +240,26 @@ Expected MappedFileRegionBumpPtr::create( if (!FileSize) return createFileError(Result.Path, FileSize.getError()); + // If the size is smaller than the capacity, we need to initialize the file. + // It maybe empty, or may have been shrunk during a previous close. if (FileSize->Size < Capacity) { // Lock the file exclusively so only one process will do the initialization. - if (Error E = InitLock.unlock()) - return std::move(E); - if (Error E = InitLock.lock(sys::fs::LockKind::Exclusive)) + if (Error E = InitLock.switchLock(sys::fs::LockKind::Exclusive)) return std::move(E); // Retrieve the current size now that we have exclusive access. FileSize = FileSizeInfo::get(File); if (!FileSize) - return createFileError(Result.Path, FileSize.getError()); + return createFileError(Result.Path, FileSize.getError()); } - // At this point either the file is still under-sized, or we have the size for - // the completely initialized file. - + // If the size is still smaller than capacity, we need to resize the file. if (FileSize->Size < Capacity) { - // We are initializing the file; it may be empty, or may have been shrunk - // during a previous close. - // FIXME: Detect a case where someone opened it with a smaller capacity. assert(InitLock.Locked == sys::fs::LockKind::Exclusive); if (std::error_code EC = sys::fs::resize_file_sparse(FD, Capacity)) return createFileError(Result.Path, EC); - if (Result.Logger) Result.Logger->log_MappedFileRegionBumpPtr_resizeFile( Result.Path, FileSize->Size, Capacity); - } else { - // Someone else initialized it. - Capacity = FileSize->Size; } // Create the mapped region. @@ -187,25 +272,29 @@ Expected MappedFileRegionBumpPtr::create( Result.Region = std::move(Map); } - if (FileSize->Size == 0) { + if (FileSize->Size < MinCapacity) { assert(InitLock.Locked == sys::fs::LockKind::Exclusive); - // We are creating a new file; run the constructor. + // If we need to fully initialize the file, call NewFileConstructor. if (Error E = NewFileConstructor(Result)) return std::move(E); - } else { - Result.initializeBumpPtr(BumpPtrOffset); - } - - if (FileSize->Size < Capacity && FileSize->AllocatedSize < Capacity) { - // We are initializing the file; sync the allocated size in case it - // changed when truncating or during construction. + } else + Result.initializeHeader(HeaderOffset); + + if (InitLock.Locked == sys::fs::LockKind::Exclusive) { + // If holding an exclusive lock, we might have resized the file and + // performed some read/write to the file. Query the file size again to make + // sure everything is up-to-date. Otherwise, FileSize info is already + // up-to-date. FileSize = FileSizeInfo::get(File); if (!FileSize) return createFileError(Result.Path, FileSize.getError()); - assert(InitLock.Locked == sys::fs::LockKind::Exclusive); - Result.H->AllocatedSize.exchange(FileSize->AllocatedSize); } + Result.H->AllocatedSize.exchange(FileSize->AllocatedSize); + + // Release the shared lock from RAII so it can be closed in destoryImpl(). + SupportFileLock.release(); + return Result; } @@ -214,22 +303,23 @@ void MappedFileRegionBumpPtr::destroyImpl() { return; // Drop the shared lock indicating we are no longer accessing the file. - if (SharedLockFD) - (void)unlockFileThreadSafe(*SharedLockFD); + if (SupportFD) + (void)unlockFileThreadSafe(*SupportFD); // Attempt to truncate the file if we can get exclusive access. Ignore any // errors. if (H) { - assert(SharedLockFD && "Must have shared lock file open"); - if (tryLockFileThreadSafe(*SharedLockFD) == std::error_code()) { + assert(SupportFD && "Must have shared lock file open"); + if (tryLockFileThreadSafe(*SupportFD) == std::error_code()) { size_t Size = size(); size_t Capacity = capacity(); // sync to file system to make sure all contents are up-to-date. (void)Region.sync(); + // unmap the file before resizing since that is the requirement for + // some platforms. Region.unmap(); (void)sys::fs::resize_file(*FD, Size); - (void)unlockFileThreadSafe(*SharedLockFD); - + (void)unlockFileThreadSafe(*SupportFD); if (Logger) Logger->log_MappedFileRegionBumpPtr_resizeFile(Path, Capacity, Size); } @@ -245,26 +335,25 @@ void MappedFileRegionBumpPtr::destroyImpl() { // Close the file and shared lock. Close(FD); - Close(SharedLockFD); + Close(SupportFD); if (Logger) Logger->log_MappedFileRegionBumpPtr_close(Path); } -void MappedFileRegionBumpPtr::initializeBumpPtr(int64_t BumpPtrOffset) { +void MappedFileRegionBumpPtr::initializeHeader(uint64_t HeaderOffset) { assert(capacity() < (uint64_t)INT64_MAX && "capacity must fit in int64_t"); - int64_t BumpPtrEndOffset = BumpPtrOffset + sizeof(decltype(*H)); - assert(BumpPtrEndOffset <= (int64_t)capacity() && + uint64_t HeaderEndOffset = HeaderOffset + sizeof(decltype(*H)); + assert(HeaderEndOffset <= capacity() && "Expected end offset to be pre-allocated"); - assert(isAligned(Align::Of(), BumpPtrOffset) && + assert(isAligned(Align::Of(), HeaderOffset) && "Expected end offset to be aligned"); - H = reinterpret_cast(data() + BumpPtrOffset); - - int64_t ExistingValue = 0; - if (!H->BumpPtr.compare_exchange_strong(ExistingValue, BumpPtrEndOffset)) - assert(ExistingValue >= BumpPtrEndOffset && - "Expected 0, or past the end of the BumpPtr itself"); + H = reinterpret_cast(data() + HeaderOffset); + uint64_t ExistingValue = 0; + if (!H->BumpPtr.compare_exchange_strong(ExistingValue, HeaderEndOffset)) + assert(ExistingValue >= HeaderEndOffset && + "Expected 0, or past the end of the header itself"); if (Logger) Logger->log_MappedFileRegionBumpPtr_create(Path, *FD, data(), capacity(), size()); @@ -277,16 +366,16 @@ static Error createAllocatorOutOfSpaceError() { Expected MappedFileRegionBumpPtr::allocateOffset(uint64_t AllocSize) { AllocSize = alignTo(AllocSize, getAlign()); - int64_t OldEnd = H->BumpPtr.fetch_add(AllocSize); - int64_t NewEnd = OldEnd + AllocSize; - if (LLVM_UNLIKELY(NewEnd > (int64_t)capacity())) { + uint64_t OldEnd = H->BumpPtr.fetch_add(AllocSize); + uint64_t NewEnd = OldEnd + AllocSize; + if (LLVM_UNLIKELY(NewEnd > capacity())) { // Return the allocation. If the start already passed the end, that means // some other concurrent allocations already consumed all the capacity. // There is no need to return the original value. If the start was not // passed the end, current allocation certainly bumped it passed the end. // All other allocation afterwards must have failed and current allocation // is in charge of return the allocation back to a valid value. - if (OldEnd <= (int64_t)capacity()) + if (OldEnd <= capacity()) (void)H->BumpPtr.exchange(OldEnd); if (Logger) @@ -296,12 +385,13 @@ Expected MappedFileRegionBumpPtr::allocateOffset(uint64_t AllocSize) { return createAllocatorOutOfSpaceError(); } - int64_t DiskSize = H->AllocatedSize; + uint64_t DiskSize = H->AllocatedSize; if (LLVM_UNLIKELY(NewEnd > DiskSize)) { - int64_t NewSize; + uint64_t NewSize; // The minimum increment is a page, but allocate more to amortize the cost. - constexpr int64_t Increment = 1 * 1024 * 1024; // 1 MB - if (Error E = preallocateFileTail(*FD, DiskSize, DiskSize + Increment).moveInto(NewSize)) + constexpr uint64_t Increment = 1 * 1024 * 1024; // 1 MB + if (Error E = preallocateFileTail(*FD, DiskSize, DiskSize + Increment) + .moveInto(NewSize)) return std::move(E); assert(NewSize >= DiskSize + Increment); // FIXME: on Darwin this can under-count the size if there is a race to diff --git a/llvm/lib/CAS/OnDiskGraphDB.cpp b/llvm/lib/CAS/OnDiskGraphDB.cpp index 99c9dd5f681c6..ec5eed0fa820a 100644 --- a/llvm/lib/CAS/OnDiskGraphDB.cpp +++ b/llvm/lib/CAS/OnDiskGraphDB.cpp @@ -81,7 +81,7 @@ static constexpr StringLiteral DataPoolTableName = "llvm.cas.data"; static constexpr StringLiteral IndexFile = "index"; static constexpr StringLiteral DataPoolFile = "data"; -static constexpr StringLiteral FilePrefix = "v9."; +static constexpr StringLiteral FilePrefix = "v10."; static constexpr StringLiteral FileSuffixData = ".data"; static constexpr StringLiteral FileSuffixLeaf = ".leaf"; static constexpr StringLiteral FileSuffixLeaf0 = ".leaf+0"; diff --git a/llvm/lib/CAS/OnDiskHashMappedTrie.cpp b/llvm/lib/CAS/OnDiskHashMappedTrie.cpp index 5ca85e0b0e39b..bd9f03ed86d5e 100644 --- a/llvm/lib/CAS/OnDiskHashMappedTrie.cpp +++ b/llvm/lib/CAS/OnDiskHashMappedTrie.cpp @@ -123,7 +123,7 @@ class DatabaseFile { uint64_t Magic; uint64_t Version; std::atomic RootTableOffset; - std::atomic BumpPtr; + MappedFileRegionBumpPtr::Header MappedFileHeader; }; const Header &getHeader() { return *H; } @@ -185,8 +185,8 @@ DatabaseFile::create(const Twine &Path, uint64_t Capacity, return createTableConfigError(std::errc::argument_out_of_domain, Path.str(), "datafile", "Allocator too small for header"); - (void)new (Alloc.data()) Header{getMagic(), getVersion(), {0}, {0}}; - Alloc.initializeBumpPtr(offsetof(Header, BumpPtr)); + (void)new (Alloc.data()) Header{getMagic(), getVersion(), {0}, {}}; + Alloc.initializeHeader(offsetof(Header, MappedFileHeader)); DatabaseFile DB(Alloc); return NewDBConstructor(DB); }; @@ -194,7 +194,7 @@ DatabaseFile::create(const Twine &Path, uint64_t Capacity, // Get or create the file. MappedFileRegionBumpPtr Alloc; if (Error E = MappedFileRegionBumpPtr::create( - Path, Capacity, offsetof(Header, BumpPtr), + Path, Capacity, offsetof(Header, MappedFileHeader), std::move(Logger), NewFileConstructor) .moveInto(Alloc)) return std::move(E); @@ -264,7 +264,7 @@ Error DatabaseFile::validate(MappedFileRegion &Region) { "database: wrong version"); // Check the bump-ptr, which should point past the header. - if (H->BumpPtr.load() < (int64_t)sizeof(Header)) + if (H->MappedFileHeader.BumpPtr.load() < (int64_t)sizeof(Header)) return createStringError(std::errc::invalid_argument, "database: corrupt bump-ptr"); diff --git a/llvm/lib/CAS/OnDiskKeyValueDB.cpp b/llvm/lib/CAS/OnDiskKeyValueDB.cpp index ba248281c414a..cca966fc40dd1 100644 --- a/llvm/lib/CAS/OnDiskKeyValueDB.cpp +++ b/llvm/lib/CAS/OnDiskKeyValueDB.cpp @@ -19,7 +19,7 @@ using namespace llvm::cas; using namespace llvm::cas::ondisk; static constexpr StringLiteral ActionCacheFile = "actions"; -static constexpr StringLiteral FilePrefix = "v4."; +static constexpr StringLiteral FilePrefix = "v5."; Expected> OnDiskKeyValueDB::put(ArrayRef Key, ArrayRef Value) { @@ -102,4 +102,4 @@ Error OnDiskKeyValueDB::validate(CheckValueT CheckValue) const { return CheckValue(Offset, Record.Data); return Error::success(); }); -} \ No newline at end of file +} diff --git a/llvm/test/CAS/logging.test b/llvm/test/CAS/logging.test index b4159869c7b92..47de3c7d0fcd1 100644 --- a/llvm/test/CAS/logging.test +++ b/llvm/test/CAS/logging.test @@ -8,29 +8,29 @@ RUN: FileCheck %s --input-file %t/cas/v1.log RUN: FileCheck %s --input-file %t/cas/v1.log --check-prefix=STANDALONE -// CHECK: resize mapped file '{{.*}}v9.index' -// CHECK: mmap '{{.*}}v9.index' [[INDEX:0x[0-9a-f]+]] -// CHECK: resize mapped file '{{.*}}v9.data' -// CHECK: mmap '{{.*}}v9.data' [[DATA:0x[0-9a-f]+]] -// CHECK: resize mapped file '{{.*}}v4.actions' -// CHECK: mmap '{{.*}}v4.actions' [[ACTIONS:0x[0-9a-f]+]] +// CHECK: resize mapped file '{{.*}}v{{[0-9]+}}.index' +// CHECK: mmap '{{.*}}v{{[0-9]+}}.index' [[INDEX:0x[0-9a-f]+]] +// CHECK: resize mapped file '{{.*}}v{{[0-9]+}}.data' +// CHECK: mmap '{{.*}}v{{[0-9]+}}.data' [[DATA:0x[0-9a-f]+]] +// CHECK: resize mapped file '{{.*}}v{{[0-9]+}}.actions' +// CHECK: mmap '{{.*}}v{{[0-9]+}}.actions' [[ACTIONS:0x[0-9a-f]+]] // store input/a contents into the datapool // CHECK: create record region=[[INDEX]] offset=[[INPUT_A_OFF:0x[0-9a-f]+]] hash=9b096cd140f119 // CHECK: cmpxcgh subtrie region=[[INDEX]] offset={{.*}} slot={{.*}} expected=0x0 new=[[INPUT_A_OFF]] prev=0x0 // CHECK: alloc [[DATA]] -// CHECK: resize mapped file '{{.*}}v4.actions' -// CHECK: close mmap '{{.*}}v4.actions' -// CHECK: resize mapped file '{{.*}}v9.data' -// CHECK: close mmap '{{.*}}v9.data' -// CHECK: resize mapped file '{{.*}}v9.index' -// CHECK: close mmap '{{.*}}v9.index' +// CHECK: resize mapped file '{{.*}}v{{[0-9]+}}.actions' +// CHECK: close mmap '{{.*}}v{{[0-9]+}}.actions' +// CHECK: resize mapped file '{{.*}}v{{[0-9]+}}.data' +// CHECK: close mmap '{{.*}}v{{[0-9]+}}.data' +// CHECK: resize mapped file '{{.*}}v{{[0-9]+}}.index' +// CHECK: close mmap '{{.*}}v{{[0-9]+}}.index' // CHECK: validate-if-needed '{{.*}}cas' boot=[[BOOT:[0-9]+]] last-valid=0 check-hash=1 allow-recovery=0 force=0 llvm-cas={{.*}}llvm-cas // CHECK: validate-if-needed '{{.*}}cas' boot=[[BOOT]] last-valid=[[BOOT]] check-hash=0 allow-recovery=1 force=1 llvm-cas={{.*}}llvm-cas -// STANDALONE: standalone file create '[[PATH:.*v9.[0-9a-f]*.leaf]].[[SUFFIX:[0-9a-f]*]]' +// STANDALONE: standalone file create '[[PATH:.*v[0-9]+.[0-9a-f]*.leaf]].[[SUFFIX:[0-9a-f]*]]' // STANDALONE: standalone file rename '[[PATH]].[[SUFFIX]]' to '[[PATH]]' //--- input/a diff --git a/llvm/test/CAS/validate-if-needed.test b/llvm/test/CAS/validate-if-needed.test index 0e8c28a23a36e..cb52cc4840ffc 100644 --- a/llvm/test/CAS/validate-if-needed.test +++ b/llvm/test/CAS/validate-if-needed.test @@ -1,6 +1,6 @@ RUN: rm -rf %t && mkdir %t RUN: llvm-cas --cas %t/cas --ingest %S/Inputs > %t/cas.id -RUN: mv %t/cas/v1.1/v9.data %t/cas/v1.1/v9.data.bak +RUN: mv %t/cas/v1.1/v10.data %t/cas/v1.1/v10.data.bak # INVALID: bad record # VALID: validated successfully @@ -12,7 +12,7 @@ RUN: not llvm-cas --cas %t/cas --validate-if-needed 2>&1 | FileCheck %s -check-p RUN: not llvm-cas --cas %t/cas --validate-if-needed 2>&1 | FileCheck %s -check-prefix=INVALID # Validation happens once per boot. -RUN: mv %t/cas/v1.1/v9.data.bak %t/cas/v1.1/v9.data +RUN: mv %t/cas/v1.1/v10.data.bak %t/cas/v1.1/v10.data RUN: llvm-cas --cas %t/cas --validate-if-needed | FileCheck %s -check-prefix=VALID RUN: llvm-cas --cas %t/cas --validate-if-needed | FileCheck %s -check-prefix=SKIPPED # Wrong timestamp triggers re-validation. @@ -20,7 +20,7 @@ RUN: echo '123' > %t/cas/v1.validation RUN: llvm-cas --cas %t/cas --validate-if-needed | FileCheck %s -check-prefix=VALID RUN: llvm-cas --cas %t/cas --validate-if-needed | FileCheck %s -check-prefix=SKIPPED # Skipped validation does not catch errors. -RUN: mv %t/cas/v1.1/v9.data %t/cas/v1.1/v9.data.bak +RUN: mv %t/cas/v1.1/v10.data %t/cas/v1.1/v10.data.bak RUN: llvm-cas --cas %t/cas --validate-if-needed | FileCheck %s -check-prefix=SKIPPED # Unless forced. @@ -33,7 +33,7 @@ RUN: llvm-cas --cas %t/cas --validate-if-needed --allow-recovery | FileCheck %s RUN: llvm-cas --cas %t/cas --validate-if-needed --force | FileCheck %s -check-prefix=VALID RUN: rm -rf %t/cas/v1.1 RUN: cp -r %t/cas/corrupt.0.v1.1 %t/cas/v1.1 -RUN: mv %t/cas/v1.1/v9.data %t/cas/v1.1/v9.data.bak +RUN: mv %t/cas/v1.1/v10.data %t/cas/v1.1/v10.data.bak RUN: llvm-cas --cas %t/cas --validate-if-needed --allow-recovery --force | FileCheck %s -check-prefix=RECOVERED RUN: ls %t/cas/corrupt.1.v1.1 diff --git a/llvm/test/tools/llvm-cas/validation.test b/llvm/test/tools/llvm-cas/validation.test index 6faed5293098c..6b662eae64dfc 100644 --- a/llvm/test/tools/llvm-cas/validation.test +++ b/llvm/test/tools/llvm-cas/validation.test @@ -12,7 +12,7 @@ RUN: llvm-cas --cas %t/cas --ingest %S/Inputs > %t/cas.id RUN: llvm-cas --cas %t/cas --validate RUN: llvm-cas --cas %t/cas --validate --check-hash -RUN: rm %t/cas/v1.1/v9.data +RUN: rm %t/cas/v1.1/v10.data RUN: not llvm-cas --cas %t/cas --validate RUN: not llvm-cas --cas %t/cas --validate --check-hash @@ -28,5 +28,5 @@ RUN: llvm-cas --cas %t/ac --put-cache-key @%t/abc.casid @%t/empty.casid RUN: llvm-cas --cas %t/ac --validate # Note: records are 40 bytes (32 hash bytes + 8 byte value), so trim the last # allocated record, leaving it invalid. -RUN: truncate -s -40 %t/ac/v1.1/v4.actions +RUN: truncate -s -40 %t/ac/v1.1/v5.actions RUN: not llvm-cas --cas %t/ac --validate diff --git a/llvm/tools/llvm-cas-test/llvm-cas-test.cpp b/llvm/tools/llvm-cas-test/llvm-cas-test.cpp index ed9062320956d..9954e16069249 100644 --- a/llvm/tools/llvm-cas-test/llvm-cas-test.cpp +++ b/llvm/tools/llvm-cas-test/llvm-cas-test.cpp @@ -285,7 +285,7 @@ static int checkLockFiles() { ExitOnError ExitOnErr("llvm-cas-test: check-lock-files: "); SmallString<128> DataPoolPath(CASPath); - sys::path::append(DataPoolPath, "v1.1/v9.data"); + sys::path::append(DataPoolPath, "v1.1/v10.data"); auto OpenCASAndGetDataPoolSize = [&]() -> Expected { auto Result = createOnDiskUnifiedCASDatabases(CASPath); diff --git a/llvm/unittests/CAS/CMakeLists.txt b/llvm/unittests/CAS/CMakeLists.txt index c3cca20cbb069..f18ea6b6c5361 100644 --- a/llvm/unittests/CAS/CMakeLists.txt +++ b/llvm/unittests/CAS/CMakeLists.txt @@ -32,6 +32,7 @@ add_llvm_unittest(CASTests OnDiskHashMappedTrieTest.cpp OnDiskKeyValueDBTest.cpp PluginCASTest.cpp + ProgramTest.cpp ThreadSafeAllocatorTest.cpp TreeSchemaTest.cpp UnifiedOnDiskCacheTest.cpp diff --git a/llvm/unittests/CAS/ProgramTest.cpp b/llvm/unittests/CAS/ProgramTest.cpp new file mode 100644 index 0000000000000..c220234a52648 --- /dev/null +++ b/llvm/unittests/CAS/ProgramTest.cpp @@ -0,0 +1,251 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "llvm/Support/Program.h" +#include "llvm/CAS/MappedFileRegionBumpPtr.h" +#include "llvm/Config/llvm-config.h" +#include "llvm/Support/ConvertUTF.h" +#include "llvm/Support/ExponentialBackoff.h" +#include "llvm/Support/FileSystem.h" +#include "llvm/Support/Path.h" +#include "llvm/Support/ThreadPool.h" +#include "llvm/Testing/Support/Error.h" +#include "gtest/gtest.h" +#if defined(__APPLE__) +#include +#elif !defined(_MSC_VER) +// Forward declare environ in case it's not provided by stdlib.h. +extern char **environ; +#endif + +using namespace llvm; +using namespace llvm::cas; + +extern const char *TestMainArgv0; +static char ProgramID = 0; + +class CASProgramTest : public testing::Test { + std::vector EnvTable; + std::vector EnvStorage; + +protected: + void SetUp() override { + auto EnvP = [] { +#if defined(_WIN32) + _wgetenv(L"TMP"); // Populate _wenviron, initially is null + return _wenviron; +#elif defined(__APPLE__) + return *_NSGetEnviron(); +#else + return environ; +#endif + }(); + ASSERT_TRUE(EnvP); + + auto prepareEnvVar = [this](decltype(*EnvP) Var) -> StringRef { +#if defined(_WIN32) + // On Windows convert UTF16 encoded variable to UTF8 + auto Len = wcslen(Var); + ArrayRef Ref{reinterpret_cast(Var), + Len * sizeof(*Var)}; + EnvStorage.emplace_back(); + auto convStatus = llvm::convertUTF16ToUTF8String(Ref, EnvStorage.back()); + EXPECT_TRUE(convStatus); + return EnvStorage.back(); +#else + (void)this; + return StringRef(Var); +#endif + }; + + while (*EnvP != nullptr) { + auto S = prepareEnvVar(*EnvP); + if (!StringRef(S).starts_with("GTEST_")) + EnvTable.emplace_back(S); + ++EnvP; + } + } + + void TearDown() override { + EnvTable.clear(); + EnvStorage.clear(); + } + + void addEnvVar(StringRef Var) { EnvTable.emplace_back(Var); } + + ArrayRef getEnviron() const { return EnvTable; } +}; + +#if LLVM_ENABLE_ONDISK_CAS + +TEST_F(CASProgramTest, MappedFileRegionBumpPtrTest) { + auto TestAllocator = [](StringRef Path) { + auto NewFileConstructor = [&](MappedFileRegionBumpPtr &Alloc) -> Error { + Alloc.initializeHeader(0); + return Error::success(); + }; + + std::optional Alloc; + ASSERT_THAT_ERROR( + MappedFileRegionBumpPtr::create(Path, /*Capacity=*/10 * 1024 * 1024, + /*HeaderOffset=*/0, /*Logger=*/nullptr, + NewFileConstructor) + .moveInto(Alloc), + Succeeded()); + + std::vector AllocatedPtr; + AllocatedPtr.resize(100); + DefaultThreadPool Threads; + for (unsigned I = 0; I < 100; ++I) { + Threads.async( + [&](unsigned Idx) { + // Allocate a buffer that is larger than needed so allocator hits + // additional pages for test coverage. + unsigned *P = (unsigned *)cantFail(Alloc->allocate(100)); + *P = Idx; + AllocatedPtr[Idx] = P; + }, + I); + } + + Threads.wait(); + for (unsigned I = 0; I < 100; ++I) + EXPECT_EQ(*AllocatedPtr[I], I); + }; + + if (const char *File = getenv("LLVM_CAS_TEST_MAPPED_FILE_REGION")) { + TestAllocator(File); + exit(0); + } + + SmallString<128> FilePath; + sys::fs::createUniqueDirectory("MappedFileRegionBumpPtr", FilePath); + sys::path::append(FilePath, "allocation-file"); + + std::string Executable = + sys::fs::getMainExecutable(TestMainArgv0, &ProgramID); + StringRef Argv[] = { + Executable, "--gtest_filter=CASProgramTest.MappedFileRegionBumpPtrTest"}; + + // Add LLVM_PROGRAM_TEST_LOCKED_FILE to the environment of the child. + std::string EnvVar = "LLVM_CAS_TEST_MAPPED_FILE_REGION="; + EnvVar += FilePath.str(); + addEnvVar(EnvVar); + + std::string Error; + bool ExecutionFailed; + sys::ProcessInfo PI = sys::ExecuteNoWait(Executable, Argv, getEnviron(), {}, + 0, &Error, &ExecutionFailed); + TestAllocator(FilePath); + + ASSERT_FALSE(ExecutionFailed) << Error; + ASSERT_NE(PI.Pid, sys::ProcessInfo::InvalidPid) << "Invalid process id"; + PI = llvm::sys::Wait(PI, /*SecondsToWait=*/5, &Error); + ASSERT_TRUE(PI.ReturnCode == 0); + ASSERT_TRUE(Error.empty()); + + // Clean up after both processes finish testing. + sys::fs::remove(FilePath); + sys::fs::remove_directories(sys::path::parent_path(FilePath)); +} + +TEST_F(CASProgramTest, MappedFileRegionBumpPtrSizeTest) { + using namespace std::chrono_literals; + auto NewFileConstructor = [&](MappedFileRegionBumpPtr &Alloc) -> Error { + Alloc.initializeHeader(0); + return Error::success(); + }; + + if (const char *File = getenv("LLVM_CAS_TEST_MAPPED_FILE_REGION")) { + ExponentialBackoff Backoff(5s); + do { + if (sys::fs::exists(File)) { + break; + } + } while (Backoff.waitForNextAttempt()); + + std::optional Alloc; + ASSERT_THAT_ERROR(MappedFileRegionBumpPtr::create(File, /*Capacity=*/1024, + /*HeaderOffset=*/0, + /*Logger=*/nullptr, + NewFileConstructor) + .moveInto(Alloc), + Succeeded()); + ASSERT_TRUE(Alloc->capacity() == 2048); + + Alloc.reset(); + ASSERT_THAT_ERROR(MappedFileRegionBumpPtr::create(File, /*Capacity=*/4096, + /*HeaderOffset=*/0, + /*Logger=*/nullptr, + NewFileConstructor) + .moveInto(Alloc), + Succeeded()); + ASSERT_TRUE(Alloc->capacity() == 2048); + Alloc.reset(); + + ASSERT_THAT_ERROR( + MappedFileRegionBumpPtr::create(File, /*Capacity=*/2048, + /*HeaderOffset=*/32, + /*Logger=*/nullptr, NewFileConstructor) + .moveInto(Alloc), + FailedWithMessage( + "specified header offset (32) does not match existing config (0)")); + + ASSERT_THAT_ERROR(MappedFileRegionBumpPtr::create(File, /*Capacity=*/2048, + /*HeaderOffset=*/0, + /*Logger=*/nullptr, + NewFileConstructor) + .moveInto(Alloc), + Succeeded()); + + exit(0); + } + + SmallString<128> FilePath; + sys::fs::createUniqueDirectory("MappedFileRegionBumpPtr", FilePath); + sys::path::append(FilePath, "allocation-file"); + + std::string Executable = + sys::fs::getMainExecutable(TestMainArgv0, &ProgramID); + StringRef Argv[] = { + Executable, + "--gtest_filter=CASProgramTest.MappedFileRegionBumpPtrSizeTest"}; + + // Add LLVM_PROGRAM_TEST_LOCKED_FILE to the environment of the child. + std::string EnvVar = "LLVM_CAS_TEST_MAPPED_FILE_REGION="; + EnvVar += FilePath.str(); + addEnvVar(EnvVar); + + std::optional Alloc; + ASSERT_THAT_ERROR(MappedFileRegionBumpPtr::create(FilePath, /*Capacity=*/2048, + /*HeaderOffset=*/0, + /*Logger=*/nullptr, + NewFileConstructor) + .moveInto(Alloc), + Succeeded()); + + std::string Error; + bool ExecutionFailed; + sys::ProcessInfo PI = sys::ExecuteNoWait(Executable, Argv, getEnviron(), {}, + 0, &Error, &ExecutionFailed); + + ASSERT_FALSE(ExecutionFailed) << Error; + ASSERT_NE(PI.Pid, sys::ProcessInfo::InvalidPid) << "Invalid process id"; + PI = llvm::sys::Wait(PI, /*SecondsToWait=*/100, &Error); + ASSERT_TRUE(PI.ReturnCode == 0); + ASSERT_TRUE(Error.empty()); + + // Size is still the requested 2048. + ASSERT_TRUE(Alloc->capacity() == 2048); + + // Clean up after both processes finish testing. + sys::fs::remove(FilePath); + sys::fs::remove_directories(sys::path::parent_path(FilePath)); +} + +#endif // LLVM_ENABLE_ONDISK_CAS