From b027b6594dc24113ea41c72cb5ae278c26993791 Mon Sep 17 00:00:00 2001 From: Teresa Johnson Date: Mon, 22 Dec 2025 17:41:41 -0800 Subject: [PATCH] [MemProf] Fix reporting with -memprof-matching-cold-threshold With the -memprof-matching-cold-threshold option, we hint as cold allocations where the fraction of cold bytes is at least the given threshold. However, we were incorrectly reporting all of the allocation's contexts and bytes as hinted cold. Fix this to report the non-cold contexts as ignored. To do this, refactor out some existing reporting, and also keep track of the original allocation type for each context in the Trie along with its ContextTotalSize information. Most of the changes are the change to this array's type and name. --- .../include/llvm/Analysis/MemoryProfileInfo.h | 15 ++-- llvm/lib/Analysis/MemoryProfileInfo.cpp | 74 ++++++++++++------- llvm/test/Transforms/PGOProfile/memprof.ll | 4 +- 3 files changed, 60 insertions(+), 33 deletions(-) diff --git a/llvm/include/llvm/Analysis/MemoryProfileInfo.h b/llvm/include/llvm/Analysis/MemoryProfileInfo.h index ba4010bd8f50c..dbc7a163bfd71 100644 --- a/llvm/include/llvm/Analysis/MemoryProfileInfo.h +++ b/llvm/include/llvm/Analysis/MemoryProfileInfo.h @@ -75,8 +75,9 @@ class CallStackTrie { // If the user has requested reporting of hinted sizes, keep track of the // associated full stack id and profiled sizes. Can have more than one // after trimming (e.g. when building from metadata). This is only placed on - // the last (root-most) trie node for each allocation context. - std::vector ContextSizeInfo; + // the last (root-most) trie node for each allocation context. Also + // track the original allocation type of the context. + std::vector> ContextInfo; // Map of caller stack id to the corresponding child Trie node. std::map Callers; CallStackTrieNode(AllocationType Type) @@ -118,10 +119,12 @@ class CallStackTrie { delete Node; } - // Recursively build up a complete list of context size information from the - // trie nodes reached form the given Node, for hint size reporting. - void collectContextSizeInfo(CallStackTrieNode *Node, - std::vector &ContextSizeInfo); + // Recursively build up a complete list of context information from the + // trie nodes reached form the given Node, including each context's + // ContextTotalSize and AllocationType, for hint size reporting. + void collectContextInfo( + CallStackTrieNode *Node, + std::vector> &ContextInfo); // Recursively convert hot allocation types to notcold, since we don't // actually do any cloning for hot contexts, to facilitate more aggressive diff --git a/llvm/lib/Analysis/MemoryProfileInfo.cpp b/llvm/lib/Analysis/MemoryProfileInfo.cpp index b09f4ed78ca7e..bb1a103721740 100644 --- a/llvm/lib/Analysis/MemoryProfileInfo.cpp +++ b/llvm/lib/Analysis/MemoryProfileInfo.cpp @@ -181,7 +181,13 @@ void CallStackTrie::addCallStack( Curr = New; } assert(Curr); - llvm::append_range(Curr->ContextSizeInfo, ContextSizeInfo); + // Append all of the ContextSizeInfo, along with their original AllocType. + llvm::append_range( + Curr->ContextInfo, + llvm::map_range( + ContextSizeInfo, [AllocType](const ContextTotalSize &CTS) { + return std::pair(CTS, AllocType); + })); } void CallStackTrie::addCallStack(MDNode *MIB) { @@ -214,18 +220,18 @@ void CallStackTrie::addCallStack(MDNode *MIB) { addCallStack(getMIBAllocType(MIB), CallStack, std::move(ContextSizeInfo)); } -static MDNode *createMIBNode(LLVMContext &Ctx, ArrayRef MIBCallStack, - AllocationType AllocType, - ArrayRef ContextSizeInfo, - const uint64_t MaxColdSize, - bool BuiltFromExistingMetadata, - uint64_t &TotalBytes, uint64_t &ColdBytes) { +static MDNode * +createMIBNode(LLVMContext &Ctx, ArrayRef MIBCallStack, + AllocationType AllocType, + ArrayRef> ContextInfo, + const uint64_t MaxColdSize, bool BuiltFromExistingMetadata, + uint64_t &TotalBytes, uint64_t &ColdBytes) { SmallVector MIBPayload( {buildCallstackMetadata(MIBCallStack, Ctx)}); MIBPayload.push_back( MDString::get(Ctx, getAllocTypeAttributeString(AllocType))); - if (ContextSizeInfo.empty()) { + if (ContextInfo.empty()) { // The profile matcher should have provided context size info if there was a // MinCallsiteColdBytePercent < 100. Here we check >=100 to gracefully // handle a user-provided percent larger than 100. However, we may not have @@ -234,7 +240,8 @@ static MDNode *createMIBNode(LLVMContext &Ctx, ArrayRef MIBCallStack, return MDNode::get(Ctx, MIBPayload); } - for (const auto &[FullStackId, TotalSize] : ContextSizeInfo) { + for (const auto &[CSI, AT] : ContextInfo) { + const auto &[FullStackId, TotalSize] = CSI; TotalBytes += TotalSize; bool LargeColdContext = false; if (AllocType == AllocationType::Cold) { @@ -267,11 +274,12 @@ static MDNode *createMIBNode(LLVMContext &Ctx, ArrayRef MIBCallStack, return MDNode::get(Ctx, MIBPayload); } -void CallStackTrie::collectContextSizeInfo( - CallStackTrieNode *Node, std::vector &ContextSizeInfo) { - llvm::append_range(ContextSizeInfo, Node->ContextSizeInfo); +void CallStackTrie::collectContextInfo( + CallStackTrieNode *Node, + std::vector> &ContextInfo) { + llvm::append_range(ContextInfo, Node->ContextInfo); for (auto &Caller : Node->Callers) - collectContextSizeInfo(Caller.second, ContextSizeInfo); + collectContextInfo(Caller.second, ContextInfo); } void CallStackTrie::convertHotToNotCold(CallStackTrieNode *Node) { @@ -283,6 +291,17 @@ void CallStackTrie::convertHotToNotCold(CallStackTrieNode *Node) { convertHotToNotCold(Caller.second); } +// Helper to emit messages for non-cold contexts that are ignored for various +// reasons when reporting of hinted bytes is enabled. +static void emitIgnoredNonColdContextMessage(StringRef Tag, + uint64_t FullStackId, + StringRef Extra, + uint64_t TotalSize) { + errs() << "MemProf hinting: Total size for " << Tag + << " non-cold full allocation context hash " << FullStackId << Extra + << ": " << TotalSize << "\n"; +} + // Copy over some or all of NewMIBNodes to the SavedMIBNodes vector, depending // on options that enable filtering out some NotCold contexts. static void saveFilteredNewMIBNodes(std::vector &NewMIBNodes, @@ -321,9 +340,7 @@ static void saveFilteredNewMIBNodes(std::vector &NewMIBNodes, uint64_t TS = mdconst::dyn_extract(ContextSizePair->getOperand(1)) ->getZExtValue(); - errs() << "MemProf hinting: Total size for " << Tag - << " non-cold full allocation context hash " << FullStackId - << Extra << ": " << TS << "\n"; + emitIgnoredNonColdContextMessage(Tag, FullStackId, Extra, TS); } }; @@ -430,10 +447,10 @@ bool CallStackTrie::buildMIBNodes(CallStackTrieNode *Node, LLVMContext &Ctx, // Trim context below the first node in a prefix with a single alloc type. // Add an MIB record for the current call stack prefix. if (hasSingleAllocType(Node->AllocTypes)) { - std::vector ContextSizeInfo; - collectContextSizeInfo(Node, ContextSizeInfo); + std::vector> ContextInfo; + collectContextInfo(Node, ContextInfo); MIBNodes.push_back(createMIBNode( - Ctx, MIBCallStack, (AllocationType)Node->AllocTypes, ContextSizeInfo, + Ctx, MIBCallStack, (AllocationType)Node->AllocTypes, ContextInfo, MaxColdSize, BuiltFromExistingMetadata, TotalBytes, ColdBytes)); return true; } @@ -486,10 +503,10 @@ bool CallStackTrie::buildMIBNodes(CallStackTrieNode *Node, LLVMContext &Ctx, // non-cold allocation type. if (!CalleeHasAmbiguousCallerContext) return false; - std::vector ContextSizeInfo; - collectContextSizeInfo(Node, ContextSizeInfo); + std::vector> ContextInfo; + collectContextInfo(Node, ContextInfo); MIBNodes.push_back(createMIBNode( - Ctx, MIBCallStack, AllocationType::NotCold, ContextSizeInfo, MaxColdSize, + Ctx, MIBCallStack, AllocationType::NotCold, ContextInfo, MaxColdSize, BuiltFromExistingMetadata, TotalBytes, ColdBytes)); return true; } @@ -503,9 +520,16 @@ void CallStackTrie::addSingleAllocTypeAttribute(CallBase *CI, AllocationType AT, removeAnyExistingAmbiguousAttribute(CI); CI->addFnAttr(A); if (MemProfReportHintedSizes) { - std::vector ContextSizeInfo; - collectContextSizeInfo(Alloc, ContextSizeInfo); - for (const auto &[FullStackId, TotalSize] : ContextSizeInfo) { + std::vector> ContextInfo; + collectContextInfo(Alloc, ContextInfo); + for (const auto &[CSI, OrigAT] : ContextInfo) { + const auto &[FullStackId, TotalSize] = CSI; + // If the original alloc type is not the one being applied as the hint, + // report that we ignored this context. + if (AT != OrigAT) { + emitIgnoredNonColdContextMessage("ignored", FullStackId, "", TotalSize); + continue; + } errs() << "MemProf hinting: Total size for full allocation context hash " << FullStackId << " and " << Descriptor << " alloc type " << getAllocTypeAttributeString(AT) << ": " << TotalSize << "\n"; diff --git a/llvm/test/Transforms/PGOProfile/memprof.ll b/llvm/test/Transforms/PGOProfile/memprof.ll index f6a89a8ceb86a..a1f0f1d403c8f 100644 --- a/llvm/test/Transforms/PGOProfile/memprof.ll +++ b/llvm/test/Transforms/PGOProfile/memprof.ll @@ -400,10 +400,10 @@ for.end: ; preds = %for.cond ;; with the full allocation context hash, type, and size in bytes. ; TOTALSIZESTHRESH60: Total size for full allocation context hash 8525406123785421946 and dominant alloc type cold: 10 ; TOTALSIZESTHRESH60: Total size for full allocation context hash 11714230664165068698 and dominant alloc type cold: 10 -; TOTALSIZESTHRESH60: Total size for full allocation context hash 5725971306423925017 and dominant alloc type cold: 10 +; TOTALSIZESTHRESH60: Total size for ignored non-cold full allocation context hash 5725971306423925017: 10 ; TOTALSIZESTHRESH60: Total size for full allocation context hash 16342802530253093571 and dominant alloc type cold: 10 ; TOTALSIZESTHRESH60: Total size for full allocation context hash 18254812774972004394 and dominant alloc type cold: 10 -; TOTALSIZESTHRESH60: Total size for full allocation context hash 1093248920606587996 and dominant alloc type cold: 10 +; TOTALSIZESTHRESH60: Total size for ignored non-cold full allocation context hash 1093248920606587996: 10 ; TOTALSIZESSINGLE: Total size for full allocation context hash 6792096022461663180 and single alloc type notcold: 10 ; REMARKSINGLE: remark: memprof.cc:25:13: call in function main marked with memprof allocation attribute notcold ; TOTALSIZESSINGLE: Total size for full allocation context hash 15737101490731057601 and single alloc type cold: 10