diff --git a/llvm/include/llvm/Analysis/MemoryProfileInfo.h b/llvm/include/llvm/Analysis/MemoryProfileInfo.h index ba4010bd8f50c..dbc7a163bfd71 100644 --- a/llvm/include/llvm/Analysis/MemoryProfileInfo.h +++ b/llvm/include/llvm/Analysis/MemoryProfileInfo.h @@ -75,8 +75,9 @@ class CallStackTrie { // If the user has requested reporting of hinted sizes, keep track of the // associated full stack id and profiled sizes. Can have more than one // after trimming (e.g. when building from metadata). This is only placed on - // the last (root-most) trie node for each allocation context. - std::vector ContextSizeInfo; + // the last (root-most) trie node for each allocation context. Also + // track the original allocation type of the context. + std::vector> ContextInfo; // Map of caller stack id to the corresponding child Trie node. std::map Callers; CallStackTrieNode(AllocationType Type) @@ -118,10 +119,12 @@ class CallStackTrie { delete Node; } - // Recursively build up a complete list of context size information from the - // trie nodes reached form the given Node, for hint size reporting. - void collectContextSizeInfo(CallStackTrieNode *Node, - std::vector &ContextSizeInfo); + // Recursively build up a complete list of context information from the + // trie nodes reached form the given Node, including each context's + // ContextTotalSize and AllocationType, for hint size reporting. + void collectContextInfo( + CallStackTrieNode *Node, + std::vector> &ContextInfo); // Recursively convert hot allocation types to notcold, since we don't // actually do any cloning for hot contexts, to facilitate more aggressive diff --git a/llvm/lib/Analysis/MemoryProfileInfo.cpp b/llvm/lib/Analysis/MemoryProfileInfo.cpp index b09f4ed78ca7e..bb1a103721740 100644 --- a/llvm/lib/Analysis/MemoryProfileInfo.cpp +++ b/llvm/lib/Analysis/MemoryProfileInfo.cpp @@ -181,7 +181,13 @@ void CallStackTrie::addCallStack( Curr = New; } assert(Curr); - llvm::append_range(Curr->ContextSizeInfo, ContextSizeInfo); + // Append all of the ContextSizeInfo, along with their original AllocType. + llvm::append_range( + Curr->ContextInfo, + llvm::map_range( + ContextSizeInfo, [AllocType](const ContextTotalSize &CTS) { + return std::pair(CTS, AllocType); + })); } void CallStackTrie::addCallStack(MDNode *MIB) { @@ -214,18 +220,18 @@ void CallStackTrie::addCallStack(MDNode *MIB) { addCallStack(getMIBAllocType(MIB), CallStack, std::move(ContextSizeInfo)); } -static MDNode *createMIBNode(LLVMContext &Ctx, ArrayRef MIBCallStack, - AllocationType AllocType, - ArrayRef ContextSizeInfo, - const uint64_t MaxColdSize, - bool BuiltFromExistingMetadata, - uint64_t &TotalBytes, uint64_t &ColdBytes) { +static MDNode * +createMIBNode(LLVMContext &Ctx, ArrayRef MIBCallStack, + AllocationType AllocType, + ArrayRef> ContextInfo, + const uint64_t MaxColdSize, bool BuiltFromExistingMetadata, + uint64_t &TotalBytes, uint64_t &ColdBytes) { SmallVector MIBPayload( {buildCallstackMetadata(MIBCallStack, Ctx)}); MIBPayload.push_back( MDString::get(Ctx, getAllocTypeAttributeString(AllocType))); - if (ContextSizeInfo.empty()) { + if (ContextInfo.empty()) { // The profile matcher should have provided context size info if there was a // MinCallsiteColdBytePercent < 100. Here we check >=100 to gracefully // handle a user-provided percent larger than 100. However, we may not have @@ -234,7 +240,8 @@ static MDNode *createMIBNode(LLVMContext &Ctx, ArrayRef MIBCallStack, return MDNode::get(Ctx, MIBPayload); } - for (const auto &[FullStackId, TotalSize] : ContextSizeInfo) { + for (const auto &[CSI, AT] : ContextInfo) { + const auto &[FullStackId, TotalSize] = CSI; TotalBytes += TotalSize; bool LargeColdContext = false; if (AllocType == AllocationType::Cold) { @@ -267,11 +274,12 @@ static MDNode *createMIBNode(LLVMContext &Ctx, ArrayRef MIBCallStack, return MDNode::get(Ctx, MIBPayload); } -void CallStackTrie::collectContextSizeInfo( - CallStackTrieNode *Node, std::vector &ContextSizeInfo) { - llvm::append_range(ContextSizeInfo, Node->ContextSizeInfo); +void CallStackTrie::collectContextInfo( + CallStackTrieNode *Node, + std::vector> &ContextInfo) { + llvm::append_range(ContextInfo, Node->ContextInfo); for (auto &Caller : Node->Callers) - collectContextSizeInfo(Caller.second, ContextSizeInfo); + collectContextInfo(Caller.second, ContextInfo); } void CallStackTrie::convertHotToNotCold(CallStackTrieNode *Node) { @@ -283,6 +291,17 @@ void CallStackTrie::convertHotToNotCold(CallStackTrieNode *Node) { convertHotToNotCold(Caller.second); } +// Helper to emit messages for non-cold contexts that are ignored for various +// reasons when reporting of hinted bytes is enabled. +static void emitIgnoredNonColdContextMessage(StringRef Tag, + uint64_t FullStackId, + StringRef Extra, + uint64_t TotalSize) { + errs() << "MemProf hinting: Total size for " << Tag + << " non-cold full allocation context hash " << FullStackId << Extra + << ": " << TotalSize << "\n"; +} + // Copy over some or all of NewMIBNodes to the SavedMIBNodes vector, depending // on options that enable filtering out some NotCold contexts. static void saveFilteredNewMIBNodes(std::vector &NewMIBNodes, @@ -321,9 +340,7 @@ static void saveFilteredNewMIBNodes(std::vector &NewMIBNodes, uint64_t TS = mdconst::dyn_extract(ContextSizePair->getOperand(1)) ->getZExtValue(); - errs() << "MemProf hinting: Total size for " << Tag - << " non-cold full allocation context hash " << FullStackId - << Extra << ": " << TS << "\n"; + emitIgnoredNonColdContextMessage(Tag, FullStackId, Extra, TS); } }; @@ -430,10 +447,10 @@ bool CallStackTrie::buildMIBNodes(CallStackTrieNode *Node, LLVMContext &Ctx, // Trim context below the first node in a prefix with a single alloc type. // Add an MIB record for the current call stack prefix. if (hasSingleAllocType(Node->AllocTypes)) { - std::vector ContextSizeInfo; - collectContextSizeInfo(Node, ContextSizeInfo); + std::vector> ContextInfo; + collectContextInfo(Node, ContextInfo); MIBNodes.push_back(createMIBNode( - Ctx, MIBCallStack, (AllocationType)Node->AllocTypes, ContextSizeInfo, + Ctx, MIBCallStack, (AllocationType)Node->AllocTypes, ContextInfo, MaxColdSize, BuiltFromExistingMetadata, TotalBytes, ColdBytes)); return true; } @@ -486,10 +503,10 @@ bool CallStackTrie::buildMIBNodes(CallStackTrieNode *Node, LLVMContext &Ctx, // non-cold allocation type. if (!CalleeHasAmbiguousCallerContext) return false; - std::vector ContextSizeInfo; - collectContextSizeInfo(Node, ContextSizeInfo); + std::vector> ContextInfo; + collectContextInfo(Node, ContextInfo); MIBNodes.push_back(createMIBNode( - Ctx, MIBCallStack, AllocationType::NotCold, ContextSizeInfo, MaxColdSize, + Ctx, MIBCallStack, AllocationType::NotCold, ContextInfo, MaxColdSize, BuiltFromExistingMetadata, TotalBytes, ColdBytes)); return true; } @@ -503,9 +520,16 @@ void CallStackTrie::addSingleAllocTypeAttribute(CallBase *CI, AllocationType AT, removeAnyExistingAmbiguousAttribute(CI); CI->addFnAttr(A); if (MemProfReportHintedSizes) { - std::vector ContextSizeInfo; - collectContextSizeInfo(Alloc, ContextSizeInfo); - for (const auto &[FullStackId, TotalSize] : ContextSizeInfo) { + std::vector> ContextInfo; + collectContextInfo(Alloc, ContextInfo); + for (const auto &[CSI, OrigAT] : ContextInfo) { + const auto &[FullStackId, TotalSize] = CSI; + // If the original alloc type is not the one being applied as the hint, + // report that we ignored this context. + if (AT != OrigAT) { + emitIgnoredNonColdContextMessage("ignored", FullStackId, "", TotalSize); + continue; + } errs() << "MemProf hinting: Total size for full allocation context hash " << FullStackId << " and " << Descriptor << " alloc type " << getAllocTypeAttributeString(AT) << ": " << TotalSize << "\n"; diff --git a/llvm/test/Transforms/PGOProfile/memprof.ll b/llvm/test/Transforms/PGOProfile/memprof.ll index f6a89a8ceb86a..a1f0f1d403c8f 100644 --- a/llvm/test/Transforms/PGOProfile/memprof.ll +++ b/llvm/test/Transforms/PGOProfile/memprof.ll @@ -400,10 +400,10 @@ for.end: ; preds = %for.cond ;; with the full allocation context hash, type, and size in bytes. ; TOTALSIZESTHRESH60: Total size for full allocation context hash 8525406123785421946 and dominant alloc type cold: 10 ; TOTALSIZESTHRESH60: Total size for full allocation context hash 11714230664165068698 and dominant alloc type cold: 10 -; TOTALSIZESTHRESH60: Total size for full allocation context hash 5725971306423925017 and dominant alloc type cold: 10 +; TOTALSIZESTHRESH60: Total size for ignored non-cold full allocation context hash 5725971306423925017: 10 ; TOTALSIZESTHRESH60: Total size for full allocation context hash 16342802530253093571 and dominant alloc type cold: 10 ; TOTALSIZESTHRESH60: Total size for full allocation context hash 18254812774972004394 and dominant alloc type cold: 10 -; TOTALSIZESTHRESH60: Total size for full allocation context hash 1093248920606587996 and dominant alloc type cold: 10 +; TOTALSIZESTHRESH60: Total size for ignored non-cold full allocation context hash 1093248920606587996: 10 ; TOTALSIZESSINGLE: Total size for full allocation context hash 6792096022461663180 and single alloc type notcold: 10 ; REMARKSINGLE: remark: memprof.cc:25:13: call in function main marked with memprof allocation attribute notcold ; TOTALSIZESSINGLE: Total size for full allocation context hash 15737101490731057601 and single alloc type cold: 10