Skip to content

[🍒6.2][llvm][cas] Prevent corruption on ENOSPC on sparse filesystems #10921

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 1 commit into
base: swift/release/6.2
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions clang/test/CAS/depscan-cas-log.c
Original file line number Diff line number Diff line change
Expand Up @@ -10,11 +10,11 @@
// RUN: -cc1-args -cc1 -triple x86_64-apple-macosx11.0.0 -emit-obj %s -o %t/t.o -fcas-path %t/cas
// RUN: FileCheck %s --input-file %t/cas/v1.log

// CHECK: [[PID1:[0-9]*]] {{[0-9]*}}: mmap '{{.*}}v8.index'
// CHECK: [[PID1:[0-9]*]] {{[0-9]*}}: mmap '{{.*}}v9.index'
// CHECK: [[PID1]] {{[0-9]*}}: create subtrie

// CHECK: [[PID2:[0-9]*]] {{[0-9]*}}: mmap '{{.*}}v8.index'
// CHECK: [[PID2:[0-9]*]] {{[0-9]*}}: mmap '{{.*}}v9.index'
// Even a minimal compilation involves at least 9 records for the cache key.
// CHECK-COUNT-9: [[PID2]] {{[0-9]*}}: create record

// CHECK: [[PID1]] {{[0-9]*}}: close mmap '{{.*}}v8.index'
// CHECK: [[PID1]] {{[0-9]*}}: close mmap '{{.*}}v9.index'
2 changes: 1 addition & 1 deletion clang/test/CAS/validate-once.c
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
// RUN: rm -rf %t

// RUN: llvm-cas --cas %t/cas --ingest %s
// RUN: mv %t/cas/v1.1/v8.data %t/cas/v1.1/v8.data.bak
// RUN: mv %t/cas/v1.1/v9.data %t/cas/v1.1/v9.data.bak

// RUN: %clang -cc1depscand -execute %{clang-daemon-dir}/%basename_t -cas-args -fcas-path %t/cas -- \
// RUN: %clang -target x86_64-apple-macos11 -I %S/Inputs \
Expand Down
1 change: 1 addition & 0 deletions llvm/cmake/config-ix.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -298,6 +298,7 @@ check_symbol_exists(malloc_zone_statistics malloc/malloc.h
HAVE_MALLOC_ZONE_STATISTICS)
check_symbol_exists(getrlimit "sys/types.h;sys/time.h;sys/resource.h" HAVE_GETRLIMIT)
check_symbol_exists(posix_spawn spawn.h HAVE_POSIX_SPAWN)
check_symbol_exists(posix_fallocate fcntl.h HAVE_POSIX_FALLOCATE)
check_symbol_exists(pread unistd.h HAVE_PREAD)
check_symbol_exists(sbrk unistd.h HAVE_SBRK)
check_symbol_exists(strerror_r string.h HAVE_STRERROR_R)
Expand Down
10 changes: 7 additions & 3 deletions llvm/include/llvm/CAS/MappedFileRegionBumpPtr.h
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@ class MappedFileRegionBumpPtr {
Expected<int64_t> allocateOffset(uint64_t AllocSize);

char *data() const { return Region.data(); }
uint64_t size() const { return *BumpPtr; }
uint64_t size() const { return H->BumpPtr; }
uint64_t capacity() const { return Region.size(); }

RegionT &getRegion() { return Region; }
Expand All @@ -100,16 +100,20 @@ class MappedFileRegionBumpPtr {
void destroyImpl();
void moveImpl(MappedFileRegionBumpPtr &RHS) {
std::swap(Region, RHS.Region);
std::swap(BumpPtr, RHS.BumpPtr);
std::swap(H, RHS.H);
std::swap(Path, RHS.Path);
std::swap(FD, RHS.FD);
std::swap(SharedLockFD, RHS.SharedLockFD);
std::swap(Logger, RHS.Logger);
}

private:
struct Header {
std::atomic<int64_t> BumpPtr;
std::atomic<int64_t> AllocatedSize;
};
RegionT Region;
std::atomic<int64_t> *BumpPtr = nullptr;
Header *H = nullptr;
std::string Path;
std::optional<int> FD;
std::optional<int> SharedLockFD;
Expand Down
3 changes: 3 additions & 0 deletions llvm/include/llvm/Config/config.h.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -149,6 +149,9 @@
/* Define to 1 if you have the `posix_spawn' function. */
#cmakedefine HAVE_POSIX_SPAWN ${HAVE_POSIX_SPAWN}

/* Define to 1 if you have the `posix_fallocate' function. */
#cmakedefine HAVE_POSIX_FALLOCATE ${HAVE_POSIX_FALLOCATE}

/* Define to 1 if you have the `pread' function. */
#cmakedefine HAVE_PREAD ${HAVE_PREAD}

Expand Down
102 changes: 85 additions & 17 deletions llvm/lib/CAS/MappedFileRegionBumpPtr.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,19 @@
#include "llvm/CAS/MappedFileRegionBumpPtr.h"
#include "OnDiskCommon.h"
#include "llvm/CAS/OnDiskCASLogger.h"
#include "llvm/Support/Compiler.h"

#if LLVM_ON_UNIX
#include <sys/stat.h>
#if __has_include(<sys/param.h>)
#include <sys/param.h>
#endif
#ifdef DEV_BSIZE
#define MAPPED_FILE_BSIZE DEV_BSIZE
#elif __linux__
#define MAPPED_FILE_BSIZE 512
#endif
#endif

using namespace llvm;
using namespace llvm::cas;
Expand Down Expand Up @@ -85,6 +98,13 @@ struct FileLockRAII {
return Error::success();
}
};

struct FileSizeInfo {
uint64_t Size;
uint64_t AllocatedSize;

static ErrorOr<FileSizeInfo> get(sys::fs::file_t File);
};
} // end anonymous namespace

Expected<MappedFileRegionBumpPtr> MappedFileRegionBumpPtr::create(
Expand Down Expand Up @@ -123,39 +143,41 @@ Expected<MappedFileRegionBumpPtr> MappedFileRegionBumpPtr::create(
return std::move(E);

sys::fs::file_t File = sys::fs::convertFDToNativeFile(FD);
sys::fs::file_status Status;
if (std::error_code EC = sys::fs::status(File, Status))
return createFileError(Result.Path, EC);
auto FileSize = FileSizeInfo::get(File);
if (!FileSize)
return createFileError(Result.Path, FileSize.getError());

if (Status.getSize() < Capacity) {
if (FileSize->Size < Capacity) {
// Lock the file exclusively so only one process will do the initialization.
if (Error E = InitLock.unlock())
return std::move(E);
if (Error E = InitLock.lock(FileLockRAII::Exclusive))
return std::move(E);
// Retrieve the current size now that we have exclusive access.
if (std::error_code EC = sys::fs::status(File, Status))
return createFileError(Result.Path, EC);
FileSize = FileSizeInfo::get(File);
if (!FileSize)
return createFileError(Result.Path, FileSize.getError());
}

// At this point either the file is still under-sized, or we have the size for
// the completely initialized file.

if (Status.getSize() < Capacity) {
if (FileSize->Size < Capacity) {
// We are initializing the file; it may be empty, or may have been shrunk
// during a previous close.
// FIXME: Detect a case where someone opened it with a smaller capacity.
// FIXME: On Windows we should use FSCTL_SET_SPARSE and FSCTL_SET_ZERO_DATA
// to make this a sparse region, if supported.
assert(InitLock.Locked == FileLockRAII::Exclusive);
if (std::error_code EC = sys::fs::resize_file(FD, Capacity))
return createFileError(Result.Path, EC);

if (Result.Logger)
Result.Logger->log_MappedFileRegionBumpPtr_resizeFile(
Result.Path, Status.getSize(), Capacity);
Result.Path, FileSize->Size, Capacity);
} else {
// Someone else initialized it.
Capacity = Status.getSize();
Capacity = FileSize->Size;
}

// Create the mapped region.
Expand All @@ -168,14 +190,25 @@ Expected<MappedFileRegionBumpPtr> MappedFileRegionBumpPtr::create(
Result.Region = std::move(Map);
}

if (Status.getSize() == 0) {
if (FileSize->Size == 0) {
assert(InitLock.Locked == FileLockRAII::Exclusive);
// We are creating a new file; run the constructor.
if (Error E = NewFileConstructor(Result))
return std::move(E);
} else {
Result.initializeBumpPtr(BumpPtrOffset);
}

if (FileSize->Size < Capacity && FileSize->AllocatedSize < Capacity) {
// We are initializing the file; sync the allocated size in case it
// changed when truncating or during construction.
FileSize = FileSizeInfo::get(File);
if (!FileSize)
return createFileError(Result.Path, FileSize.getError());
assert(InitLock.Locked == FileLockRAII::Exclusive);
Result.H->AllocatedSize.exchange(FileSize->AllocatedSize);
}

return Result;
}

Expand All @@ -189,7 +222,7 @@ void MappedFileRegionBumpPtr::destroyImpl() {

// Attempt to truncate the file if we can get exclusive access. Ignore any
// errors.
if (BumpPtr) {
if (H) {
assert(SharedLockFD && "Must have shared lock file open");
if (tryLockFileThreadSafe(*SharedLockFD) == std::error_code()) {
size_t Size = size();
Expand Down Expand Up @@ -223,15 +256,15 @@ void MappedFileRegionBumpPtr::destroyImpl() {

void MappedFileRegionBumpPtr::initializeBumpPtr(int64_t BumpPtrOffset) {
assert(capacity() < (uint64_t)INT64_MAX && "capacity must fit in int64_t");
int64_t BumpPtrEndOffset = BumpPtrOffset + sizeof(decltype(*BumpPtr));
int64_t BumpPtrEndOffset = BumpPtrOffset + sizeof(decltype(*H));
assert(BumpPtrEndOffset <= (int64_t)capacity() &&
"Expected end offset to be pre-allocated");
assert(isAligned(Align::Of<decltype(*BumpPtr)>(), BumpPtrOffset) &&
assert(isAligned(Align::Of<decltype(*H)>(), BumpPtrOffset) &&
"Expected end offset to be aligned");
BumpPtr = reinterpret_cast<decltype(BumpPtr)>(data() + BumpPtrOffset);
H = reinterpret_cast<decltype(H)>(data() + BumpPtrOffset);

int64_t ExistingValue = 0;
if (!BumpPtr->compare_exchange_strong(ExistingValue, BumpPtrEndOffset))
if (!H->BumpPtr.compare_exchange_strong(ExistingValue, BumpPtrEndOffset))
assert(ExistingValue >= BumpPtrEndOffset &&
"Expected 0, or past the end of the BumpPtr itself");

Expand All @@ -247,7 +280,7 @@ static Error createAllocatorOutOfSpaceError() {

Expected<int64_t> MappedFileRegionBumpPtr::allocateOffset(uint64_t AllocSize) {
AllocSize = alignTo(AllocSize, getAlign());
int64_t OldEnd = BumpPtr->fetch_add(AllocSize);
int64_t OldEnd = H->BumpPtr.fetch_add(AllocSize);
int64_t NewEnd = OldEnd + AllocSize;
if (LLVM_UNLIKELY(NewEnd > (int64_t)capacity())) {
// Return the allocation. If the start already passed the end, that means
Expand All @@ -257,7 +290,7 @@ Expected<int64_t> MappedFileRegionBumpPtr::allocateOffset(uint64_t AllocSize) {
// All other allocation afterwards must have failed and current allocation
// is in charge of return the allocation back to a valid value.
if (OldEnd <= (int64_t)capacity())
(void)BumpPtr->exchange(OldEnd);
(void)H->BumpPtr.exchange(OldEnd);

if (Logger)
Logger->log_MappedFileRegionBumpPtr_oom(Path, capacity(), OldEnd,
Expand All @@ -266,8 +299,43 @@ Expected<int64_t> MappedFileRegionBumpPtr::allocateOffset(uint64_t AllocSize) {
return createAllocatorOutOfSpaceError();
}

int64_t DiskSize = H->AllocatedSize;
if (LLVM_UNLIKELY(NewEnd > DiskSize)) {
int64_t NewSize;
// The minimum increment is a page, but allocate more to amortize the cost.
constexpr int64_t Increment = 1 * 1024 * 1024; // 1 MB
if (Error E = preallocateFileTail(*FD, DiskSize, DiskSize + Increment).moveInto(NewSize))
return std::move(E);
assert(NewSize >= DiskSize + Increment);
// FIXME: on Darwin this can under-count the size if there is a race to
// preallocate disk, because the semantics of F_PREALLOCATE are to add bytes
// to the end of the file, not to allocate up to a fixed size.
// Any discrepancy will be resolved the next time the file is truncated and
// then reopend.
while (DiskSize < NewSize)
H->AllocatedSize.compare_exchange_strong(DiskSize, NewSize);
}

if (Logger)
Logger->log_MappedFileRegionBumpPtr_allocate(data(), OldEnd, AllocSize);

return OldEnd;
}

ErrorOr<FileSizeInfo> FileSizeInfo::get(sys::fs::file_t File) {
#if LLVM_ON_UNIX && defined(MAPPED_FILE_BSIZE)
struct stat Status;
int StatRet = ::fstat(File, &Status);
if (StatRet)
return errnoAsErrorCode();
uint64_t AllocatedSize = uint64_t(Status.st_blksize) * MAPPED_FILE_BSIZE;
return FileSizeInfo{uint64_t(Status.st_size), AllocatedSize};
#else
// Fallback: assume the file is fully allocated. Note: this may result in
// data loss on out-of-space.
sys::fs::file_status Status;
if (std::error_code EC = sys::fs::status(File, Status))
return EC;
return FileSizeInfo{Status.getSize(), Status.getSize()};
#endif
}
35 changes: 34 additions & 1 deletion llvm/lib/CAS/OnDiskCommon.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@

#include "OnDiskCommon.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/Config/config.h"
#include "llvm/Support/Error.h"
#include "llvm/Support/Process.h"
#include <mutex>
Expand All @@ -23,6 +24,10 @@
#endif
#endif

#if __has_include(<fcntl.h>)
#include <fcntl.h>
#endif

using namespace llvm;

static uint64_t OnDiskCASMaxMappingSize = 0;
Expand Down Expand Up @@ -107,4 +112,32 @@ cas::ondisk::tryLockFileThreadSafe(int FD, std::chrono::milliseconds Timeout,
#else
return make_error_code(std::errc::no_lock_available);
#endif
}
}

Expected<size_t> cas::ondisk::preallocateFileTail(int FD, size_t CurrentSize, size_t NewSize) {
auto CreateErrorFromErrno = [&]() -> Expected<size_t> {
std::error_code EC = errnoAsErrorCode();
if (EC == std::errc::not_supported)
// Ignore ENOTSUP in case the filesystem cannot preallocate.
return NewSize;
return createStringError(EC, "failed to allocate to CAS file: " + EC.message());
};
#if defined(HAVE_POSIX_FALLOCATE)
if (posix_fallocate(FD, CurrentSize, NewSize - CurrentSize))
return CreateErrorFromErrno();
return NewSize;
#elif defined(__APPLE__)
fstore_t FAlloc;
FAlloc.fst_flags = F_ALLOCATEALL | F_ALLOCATEPERSIST;
FAlloc.fst_posmode = F_PEOFPOSMODE;
FAlloc.fst_offset = 0;
FAlloc.fst_length = NewSize - CurrentSize;
FAlloc.fst_bytesalloc = 0;
if (fcntl(FD, F_PREALLOCATE, &FAlloc))
return CreateErrorFromErrno();
assert(CurrentSize + FAlloc.fst_bytesalloc >= NewSize);
return CurrentSize + FAlloc.fst_bytesalloc;
#else
return NewSize; // Pretend it worked.
#endif
}
8 changes: 8 additions & 0 deletions llvm/lib/CAS/OnDiskCommon.h
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,14 @@ std::error_code tryLockFileThreadSafe(
int FD, std::chrono::milliseconds Timeout = std::chrono::milliseconds(0),
bool Exclusive = true);

/// Allocate space for the file \p FD on disk, if the filesystem supports it.
///
/// On filesystems that support this operation, this ensures errors such as
/// \c std::errc::no_space_on_device are detected before we write data.
///
/// \returns the new size of the file, or an \c Error.
Expected<size_t> preallocateFileTail(int FD, size_t CurrentSize, size_t NewSize);

} // namespace llvm::cas::ondisk

#endif // LLVM_LIB_CAS_ONDISKCOMMON_H
5 changes: 4 additions & 1 deletion llvm/lib/CAS/OnDiskGraphDB.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,7 @@ static constexpr StringLiteral DataPoolTableName = "llvm.cas.data";
static constexpr StringLiteral IndexFile = "index";
static constexpr StringLiteral DataPoolFile = "data";

static constexpr StringLiteral FilePrefix = "v8.";
static constexpr StringLiteral FilePrefix = "v9.";
static constexpr StringLiteral FileSuffixData = ".data";
static constexpr StringLiteral FileSuffixLeaf = ".leaf";
static constexpr StringLiteral FileSuffixLeaf0 = ".leaf+0";
Expand Down Expand Up @@ -1311,6 +1311,9 @@ OnDiskGraphDB::createTempFile(StringRef FinalPath, uint64_t Size) {
if (!File)
return File.takeError();

if (Error E = preallocateFileTail(File->FD, 0, Size).takeError())
return createFileError(File->TmpName, std::move(E));

if (auto EC = sys::fs::resize_file_before_mapping_readwrite(File->FD, Size))
return createFileError(File->TmpName, EC);

Expand Down
2 changes: 1 addition & 1 deletion llvm/lib/CAS/OnDiskKeyValueDB.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ using namespace llvm::cas;
using namespace llvm::cas::ondisk;

static constexpr StringLiteral ActionCacheFile = "actions";
static constexpr StringLiteral FilePrefix = "v3.";
static constexpr StringLiteral FilePrefix = "v4.";

Expected<ArrayRef<char>> OnDiskKeyValueDB::put(ArrayRef<uint8_t> Key,
ArrayRef<char> Value) {
Expand Down
Loading