Skip to content

[CAS] gmodule support for caching build #11026

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft
wants to merge 10 commits into
base: next
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 4 additions & 2 deletions clang/include/clang/Basic/ASTSourceDescriptor.h
Original file line number Diff line number Diff line change
Expand Up @@ -30,21 +30,23 @@ class ASTSourceDescriptor {
StringRef Path;
StringRef ASTFile;
ASTFileSignature Signature;
StringRef CASID;
Module *ClangModule = nullptr;

public:
ASTSourceDescriptor() = default;
ASTSourceDescriptor(StringRef Name, StringRef Path, StringRef ASTFile,
ASTFileSignature Signature)
ASTFileSignature Signature, StringRef CASID)
: PCHModuleName(std::move(Name)), Path(std::move(Path)),
ASTFile(std::move(ASTFile)), Signature(Signature) {}
ASTFile(std::move(ASTFile)), Signature(Signature), CASID(CASID) {}
ASTSourceDescriptor(Module &M);

std::string getModuleName() const;
StringRef getPath() const { return Path; }
StringRef getASTFile() const { return ASTFile; }
ASTFileSignature getSignature() const { return Signature; }
Module *getModuleOrNull() const { return ClangModule; }
StringRef getCASID() const { return CASID; }
};

} // namespace clang
Expand Down
6 changes: 4 additions & 2 deletions clang/include/clang/Basic/DiagnosticCASKinds.td
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,10 @@ def err_cas_depscan_daemon_connection: Error<
def err_cas_depscan_failed: Error<
"CAS-based dependency scan failed: %0">, DefaultFatal;
def err_cas_store: Error<"failed to store to CAS: %0">, DefaultFatal;
def err_cas_unloadable_module : Error<
"module file '%0' not found: unloadable module cache key %1: %2">, DefaultFatal;
def err_cas_unloadable_module
: Error<"module file '%0' not found: unloadable %select{casid|module cache "
"key}1 %2: %3">,
DefaultFatal;
def err_cas_missing_module : Error<
"module file '%0' not found: missing module cache key %1: %2">, DefaultFatal;
def err_cas_missing_root_id : Error<
Expand Down
10 changes: 10 additions & 0 deletions clang/include/clang/Basic/Module.h
Original file line number Diff line number Diff line change
Expand Up @@ -274,6 +274,9 @@ class alignas(8) Module {
/// The \c ActionCache key for this module, if any.
std::optional<std::string> ModuleCacheKey;

/// The \c CASID for the loaded module, otherwise empty.
std::string CASID;

/// The top-level headers associated with this module.
llvm::SmallSetVector<FileEntryRef, 2> TopHeaders;

Expand Down Expand Up @@ -773,6 +776,13 @@ class alignas(8) Module {
getTopLevelModule()->ModuleCacheKey = std::move(Key);
}

StringRef getCASID() const { return getTopLevelModule()->CASID; }

void setCASID(std::string ID) {
assert(getCASID().empty() || getCASID() == ID);
getTopLevelModule()->CASID = std::move(ID);
}

/// Retrieve the umbrella directory as written.
std::optional<DirectoryName> getUmbrellaDirAsWritten() const {
if (const auto *Dir = std::get_if<DirectoryEntryRef>(&Umbrella))
Expand Down
4 changes: 2 additions & 2 deletions clang/include/clang/Frontend/CompilerInstance.h
Original file line number Diff line number Diff line change
Expand Up @@ -999,8 +999,8 @@ class CompilerInstance : public ModuleLoader {
/// "-fmodule-file-cache-key", or an imported pcm file. Used in diagnostics.
///
/// \returns true on failure.
bool addCachedModuleFile(StringRef Path, StringRef CacheKey,
StringRef Provider);
bool addCachedModuleFile(StringRef Path, StringRef CASID, StringRef Provider,
bool IsKey);

ModuleCache &getModuleCache() const { return *ModCache; }

Expand Down
10 changes: 10 additions & 0 deletions clang/include/clang/Serialization/ASTReader.h
Original file line number Diff line number Diff line change
Expand Up @@ -286,6 +286,14 @@ class ASTReaderListener {
return false;
}

/// Called for each module CASID.
///
/// \returns true to indicate the key cannot be loaded.
virtual bool readModuleCASID(StringRef ModuleName, StringRef Filename,
StringRef CASID) {
return false;
}

/// Indicates that a particular module file extension has been read.
virtual void readModuleFileExtension(
const ModuleFileExtensionMetadata &Metadata) {}
Expand Down Expand Up @@ -342,6 +350,8 @@ class ChainedASTReaderListener : public ASTReaderListener {
bool readIncludeTreeID(StringRef ID, bool Complain) override;
bool readModuleCacheKey(StringRef ModuleName, StringRef Filename,
StringRef CacheKey) override;
bool readModuleCASID(StringRef ModuleName, StringRef Filename,
StringRef CASID) override;
void readModuleFileExtension(
const ModuleFileExtensionMetadata &Metadata) override;
};
Expand Down
11 changes: 10 additions & 1 deletion clang/include/clang/Serialization/InMemoryModuleCache.h
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,8 @@ class InMemoryModuleCache : public llvm::RefCountedBase<InMemoryModuleCache> {
struct PCM {
std::unique_ptr<llvm::MemoryBuffer> Buffer;

std::string CASID;

/// Track whether this PCM is known to be good (either built or
/// successfully imported by a CompilerInstance/ASTReader using this
/// cache).
Expand All @@ -38,6 +40,9 @@ class InMemoryModuleCache : public llvm::RefCountedBase<InMemoryModuleCache> {
PCM() = default;
PCM(std::unique_ptr<llvm::MemoryBuffer> Buffer)
: Buffer(std::move(Buffer)) {}

PCM(std::unique_ptr<llvm::MemoryBuffer> Buffer, llvm::StringRef CASID)
: Buffer(std::move(Buffer)), CASID(CASID.str()) {}
};

/// Cache of buffers.
Expand All @@ -64,7 +69,8 @@ class InMemoryModuleCache : public llvm::RefCountedBase<InMemoryModuleCache> {
/// \post state is Tentative
/// \return a reference to the buffer as a convenience.
llvm::MemoryBuffer &addPCM(llvm::StringRef Filename,
std::unique_ptr<llvm::MemoryBuffer> Buffer);
std::unique_ptr<llvm::MemoryBuffer> Buffer,
llvm::StringRef CASID = "");

/// Store a just-built PCM under the Filename.
///
Expand All @@ -90,6 +96,9 @@ class InMemoryModuleCache : public llvm::RefCountedBase<InMemoryModuleCache> {
/// Get a pointer to the pCM if it exists; else nullptr.
llvm::MemoryBuffer *lookupPCM(llvm::StringRef Filename) const;

/// Get the PCM if it exits; else nullptr.
const PCM *lookup(llvm::StringRef Filename) const;

/// Check whether the PCM is final and has been shown to work.
///
/// \return true iff state is Final.
Expand Down
3 changes: 3 additions & 0 deletions clang/include/clang/Serialization/ModuleFile.h
Original file line number Diff line number Diff line change
Expand Up @@ -147,6 +147,9 @@ class ModuleFile {
/// The \c ActionCache key for this module, or empty.
std::string ModuleCacheKey;

/// The \c CASID for the module, or empty.
std::string CASID;

/// The CAS filesystem root ID for implicit modules built with the dependency
/// scanner, or empty.
std::string CASFileSystemRootID;
Expand Down
1 change: 1 addition & 0 deletions clang/lib/Basic/ASTSourceDescriptor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ ASTSourceDescriptor::ASTSourceDescriptor(Module &M)
Path = M.Directory->getName();
if (auto File = M.getASTFile())
ASTFile = File->getName();
CASID = M.getCASID();
}

std::string ASTSourceDescriptor::getModuleName() const {
Expand Down
5 changes: 5 additions & 0 deletions clang/lib/CodeGen/CGDebugInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3354,6 +3354,11 @@ llvm::DIModule *CGDebugInfo::getOrCreateModuleRef(ASTSourceDescriptor Mod,
PCM = Mod.getPath();
}
llvm::sys::path::append(PCM, Mod.getASTFile());

// FIXME: Prefer CASID if exists.
if (!Mod.getCASID().empty())
PCM = Mod.getCASID();

DIB.createCompileUnit(
TheCU->getSourceLanguage(),
// TODO: Support "Source" from external AST providers?
Expand Down
4 changes: 2 additions & 2 deletions clang/lib/CodeGen/ObjectFilePCHContainerWriter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -191,8 +191,8 @@ class PCHContainerGenerator : public ASTConsumer {
// Prepare CGDebugInfo to emit debug info for a clang module.
auto *DI = Builder->getModuleDebugInfo();
StringRef ModuleName = llvm::sys::path::filename(MainFileName);
DI->setPCHDescriptor(
{ModuleName, "", OutputFileName, ASTFileSignature::createDISentinel()});
DI->setPCHDescriptor({ModuleName, "", OutputFileName,
ASTFileSignature::createDISentinel(), /*CASID=*/""});
DI->setModuleMap(MMap);
}

Expand Down
102 changes: 77 additions & 25 deletions clang/lib/Frontend/CompilerInstance.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -636,6 +636,8 @@ class CompileCacheASTReaderHelper : public ASTReaderListener {
bool readIncludeTreeID(StringRef ID, bool Complain) override;
bool readModuleCacheKey(StringRef ModuleName, StringRef Filename,
StringRef CacheKey) override;
bool readModuleCASID(StringRef ModuleName, StringRef Filename,
StringRef CASID) override;

private:
bool checkCASID(bool Complain, StringRef RootID, unsigned ParseDiagID,
Expand Down Expand Up @@ -2589,25 +2591,51 @@ void CompilerInstance::setExternalSemaSource(
ExternalSemaSrc = std::move(ESS);
}

static bool addCachedModuleFileToInMemoryCache(
StringRef Path, StringRef CacheKey, StringRef Provider,
cas::ObjectStore &CAS, cas::ActionCache &Cache,
ModuleCache &ModCache, DiagnosticsEngine &Diags) {
static bool addCachedModuleFileToInMemoryCache(StringRef Path,
cas::ObjectStore &CAS,
cas::ObjectRef Object,
ModuleCache &ModCache,
DiagnosticsEngine &Diags) {
// FIXME: We wait to materialize each module file before proceeding, which
// introduces latency for a network CAS. Instead we should collect all the
// module keys and materialize them concurrently using \c getProxyFuture, for
// better network utilization.
auto OutputProxy = CAS.getProxy(Object);
if (!OutputProxy) {
Diags.Report(diag::err_cas_unloadable_module)
<< Path << 0 << CAS.getID(Object).toString() << OutputProxy.takeError();
return true;
}

auto PCMID = OutputProxy->getID().toString();
if (const auto *PCM = ModCache.getInMemoryModuleCache().lookup(Path)) {
// If the CASID in the module cache differs, return error.
if (!PCM->CASID.empty() && PCM->CASID != PCMID)
return true;

if (ModCache.getInMemoryModuleCache().lookupPCM(Path))
return false;
}

ModCache.getInMemoryModuleCache().addPCM(Path, OutputProxy->getMemoryBuffer(),
OutputProxy->getID().toString());
return false;
}

static bool addCachedModuleFileToInMemoryCacheFromKey(
StringRef Path, StringRef CacheKey, StringRef Provider,
cas::ObjectStore &CAS, cas::ActionCache &Cache, ModuleCache &ModCache,
DiagnosticsEngine &Diags) {
auto ID = CAS.parseID(CacheKey);
if (!ID) {
Diags.Report(diag::err_cas_unloadable_module)
<< Path << CacheKey << ID.takeError();
<< Path << 1 << CacheKey << ID.takeError();
return true;
}

auto Value = Cache.get(*ID);
if (!Value) {
Diags.Report(diag::err_cas_unloadable_module)
<< Path << CacheKey << Value.takeError();
<< Path << 1 << CacheKey << Value.takeError();
return true;
}
if (!*Value) {
Expand All @@ -2628,7 +2656,7 @@ static bool addCachedModuleFileToInMemoryCache(
auto ValueRef = CAS.getReference(**Value);
if (!ValueRef) {
Diags.Report(diag::err_cas_unloadable_module)
<< Path << CacheKey << "result module cannot be loaded from CAS";
<< Path << 1 << CacheKey << "result module cannot be loaded from CAS";

return true;
}
Expand All @@ -2637,44 +2665,68 @@ static bool addCachedModuleFileToInMemoryCache(
cas::CompileJobResultSchema Schema(CAS);
if (llvm::Error E = Schema.load(*ValueRef).moveInto(Result)) {
Diags.Report(diag::err_cas_unloadable_module)
<< Path << CacheKey << std::move(E);
<< Path << 1 << CacheKey << std::move(E);
return true;
}
auto Output =
Result->getOutput(cas::CompileJobCacheResult::OutputKind::MainOutput);
if (!Output)
llvm::report_fatal_error("missing main output");
// FIXME: We wait to materialize each module file before proceeding, which
// introduces latency for a network CAS. Instead we should collect all the
// module keys and materialize them concurrently using \c getProxyFuture, for
// better network utilization.
auto OutputProxy = CAS.getProxy(Output->Object);
if (!OutputProxy) {

return addCachedModuleFileToInMemoryCache(Path, CAS, Output->Object, ModCache,
Diags);
}

static bool addCachedModuleFileToInMemoryCacheFromID(StringRef Filename,
cas::ObjectStore &CAS,
StringRef CASID,
ModuleCache &ModCache,
DiagnosticsEngine &Diags) {
auto ID = CAS.parseID(CASID);
if (!ID) {
Diags.Report(diag::err_cas_unloadable_module)
<< Path << CacheKey << OutputProxy.takeError();
<< Filename << 0 << CASID << ID.takeError();
return true;
}
auto ModuleRef = CAS.getReference(*ID);
if (!ModuleRef) {
Diags.Report(diag::err_cas_unloadable_module)
<< Filename << 1 << CASID << "does not exist in CAS";

ModCache.getInMemoryModuleCache().addPCM(Path,
OutputProxy->getMemoryBuffer());
return false;
return true;
}

return addCachedModuleFileToInMemoryCache(Filename, CAS, *ModuleRef, ModCache,
Diags);
}

bool CompilerInstance::addCachedModuleFile(StringRef Path, StringRef CacheKey,
StringRef Provider) {
return addCachedModuleFileToInMemoryCache(
Path, CacheKey, Provider, getOrCreateObjectStore(),
getOrCreateActionCache(), getModuleCache(), getDiagnostics());
bool CompilerInstance::addCachedModuleFile(StringRef Path, StringRef CASID,
StringRef Provider, bool IsKey) {
if (IsKey)
return addCachedModuleFileToInMemoryCacheFromKey(
Path, CASID, Provider, getOrCreateObjectStore(),
getOrCreateActionCache(), getModuleCache(), getDiagnostics());

return addCachedModuleFileToInMemoryCacheFromID(
Path, getOrCreateObjectStore(), CASID, getModuleCache(),
getDiagnostics());
}

bool CompileCacheASTReaderHelper::readModuleCacheKey(StringRef ModuleName,
StringRef Filename,
StringRef CacheKey) {
// FIXME: add name/path of the importing module?
return addCachedModuleFileToInMemoryCache(
return addCachedModuleFileToInMemoryCacheFromKey(
Filename, CacheKey, "imported module", CAS, Cache, ModCache, Diags);
}

bool CompileCacheASTReaderHelper::readModuleCASID(StringRef ModuleName,
StringRef Filename,
StringRef CASID) {
return addCachedModuleFileToInMemoryCacheFromID(Filename, CAS, CASID,
ModCache, Diags);
}

/// Verify that ID is in the CAS. Otherwise the module cache probably was
/// created with a different CAS.
bool CompileCacheASTReaderHelper::checkCASID(bool Complain, StringRef RootID,
Expand Down
18 changes: 15 additions & 3 deletions clang/lib/Frontend/FrontendAction.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1258,7 +1258,8 @@ bool FrontendAction::BeginSourceFile(CompilerInstance &CI,
// Provide any modules from the action cache.
for (const auto &KeyPair : CI.getFrontendOpts().ModuleCacheKeys)
if (CI.addCachedModuleFile(KeyPair.first, KeyPair.second,
"-fmodule-file-cache-key"))
"-fmodule-file-cache-key",
/*IsKey=*/true))
return false;


Expand Down Expand Up @@ -1322,9 +1323,10 @@ bool FrontendAction::BeginSourceFile(CompilerInstance &CI,
DeserialListener, DeleteDeserialListener);
DeleteDeserialListener = true;
}
std::string PCHCASID;
if (!CI.getPreprocessorOpts().ImplicitPCHInclude.empty() ||
IncludeTreePCH) {
StringRef PCHPath;
std::string PCHPath;
DisableValidationForModuleKind DisableValidation;
std::unique_ptr<llvm::MemoryBuffer> PCHBuffer;
if (IncludeTreePCH) {
Expand All @@ -1333,7 +1335,11 @@ bool FrontendAction::BeginSourceFile(CompilerInstance &CI,
if (llvm::Error E =
IncludeTreePCH->getMemoryBuffer().moveInto(PCHBuffer))
return reportError(std::move(E));
PCHPath = PCHBuffer->getBufferIdentifier();
auto PCHFile = IncludeTreePCH->getContents();
if (!PCHFile)
return reportError(PCHFile.takeError());
PCHCASID = PCHFile->getID().toString();
PCHPath = PCHCASID;
} else {
PCHPath = CI.getPreprocessorOpts().ImplicitPCHInclude;
DisableValidation =
Expand All @@ -1345,6 +1351,12 @@ bool FrontendAction::BeginSourceFile(CompilerInstance &CI,
DeserialListener, DeleteDeserialListener, std::move(PCHBuffer));
if (!CI.getASTContext().getExternalSource())
return false;

// After loading PCH, set its CASID for content.
if (!PCHCASID.empty()) {
auto &MF = CI.getASTReader()->getModuleManager().getPrimaryModule();
MF.CASID = PCHCASID;
}
}
// If modules are enabled, create the AST reader before creating
// any builtins, so that all declarations know that they might be
Expand Down
Loading