From 6826fb8e5f52d1ccc6d36e4ea3a2cb0e48437e61 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Mon, 27 Jan 2025 20:44:23 +0100 Subject: [PATCH 01/23] JIT: Inline via gtSplitTree --- src/coreclr/jit/compiler.h | 25 +- src/coreclr/jit/compiler.hpp | 1 - src/coreclr/jit/debuginfo.cpp | 22 +- src/coreclr/jit/fgbasic.cpp | 10 + src/coreclr/jit/fginline.cpp | 832 +++++++++----------- src/coreclr/jit/fgstmt.cpp | 58 ++ src/coreclr/jit/gentree.cpp | 240 +++--- src/coreclr/jit/gentree.h | 29 - src/coreclr/jit/gtlist.h | 1 - src/coreclr/jit/gtstructs.h | 1 - src/coreclr/jit/importer.cpp | 272 ++----- src/coreclr/jit/importercalls.cpp | 114 +-- src/coreclr/jit/importervectorization.cpp | 29 +- src/coreclr/jit/indirectcalltransformer.cpp | 195 ++--- src/coreclr/jit/inline.cpp | 22 +- src/coreclr/jit/inline.h | 34 +- src/coreclr/jit/simd.cpp | 2 +- 17 files changed, 726 insertions(+), 1161 deletions(-) diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index 5190c70c060b3a..54e8413200c37b 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -2673,6 +2673,7 @@ class Compiler friend class MorphCopyBlockHelper; friend class SharedTempsScope; friend class CallArgs; + friend class InlineAndDevirtualizeWalker; friend class IndirectCallTransformer; friend class ProfileSynthesis; friend class LocalsUseVisitor; @@ -3581,7 +3582,6 @@ class Compiler GenTree* gtNewMustThrowException(unsigned helper, var_types type, CORINFO_CLASS_HANDLE clsHnd); GenTreeLclFld* gtNewLclFldNode(unsigned lnum, var_types type, unsigned offset, ClassLayout* layout = nullptr); - GenTreeRetExpr* gtNewInlineCandidateReturnExpr(GenTreeCall* inlineCandidate, var_types type); GenTreeFieldAddr* gtNewFieldAddrNode(var_types type, CORINFO_FIELD_HANDLE fldHnd, @@ -3740,6 +3740,8 @@ class Compiler bool gtHasLocalsWithAddrOp(GenTree* tree); bool gtHasAddressExposedLocals(GenTree* tree); + bool gtSubTreeAndChildrenAreFirstExecutedSideEffects(GenTree* tree, GenTree* subTree); + unsigned gtSetCallArgsOrder(CallArgs* args, bool lateArgs, int* callCostEx, int* callCostSz); unsigned gtSetMultiOpOrder(GenTreeMultiOp* multiOp); @@ -3790,7 +3792,7 @@ class Compiler bool ignoreRoot = false); bool gtSplitTree( - BasicBlock* block, Statement* stmt, GenTree* splitPoint, Statement** firstNewStmt, GenTree*** splitPointUse); + BasicBlock* block, Statement* stmt, GenTree* splitPoint, Statement** firstNewStmt, GenTree*** splitPointUse, bool includeOperands = true); bool gtStoreDefinesField( LclVarDsc* fieldVarDsc, ssize_t offset, unsigned size, ssize_t* pFieldStoreOffset, unsigned* pFieldStoreSize); @@ -5208,7 +5210,7 @@ class Compiler InlineCandidateInfo** ppInlineCandidateInfo, InlineResult* inlineResult); - void impInlineRecordArgInfo(InlineInfo* pInlineInfo, CallArg* arg, unsigned argNum, InlineResult* inlineResult); + void impInlineRecordArgInfo(InlineInfo* pInlineInfo, InlArgInfo* argInfo, CallArg* arg, InlineResult* inlineResult); void impInlineInitVars(InlineInfo* pInlineInfo); @@ -5593,6 +5595,7 @@ class Compiler BasicBlock* fgSplitBlockAtBeginning(BasicBlock* curr); BasicBlock* fgSplitBlockAtEnd(BasicBlock* curr); BasicBlock* fgSplitBlockAfterStatement(BasicBlock* curr, Statement* stmt); + BasicBlock* fgSplitBlockBeforeStatement(BasicBlock* curr, Statement* stmt); BasicBlock* fgSplitBlockAfterNode(BasicBlock* curr, GenTree* node); // for LIR BasicBlock* fgSplitEdge(BasicBlock* curr, BasicBlock* succ); BasicBlock* fgSplitBlockBeforeTree(BasicBlock* block, Statement* stmt, GenTree* splitPoint, Statement** firstNewStmt, GenTree*** splitNodeUse); @@ -6638,6 +6641,8 @@ class Compiler private: Statement* fgInsertStmtListAfter(BasicBlock* block, Statement* stmtAfter, Statement* stmtList); + void fgInsertStmtListAtEnd(BasicBlock* block, Statement* stmtList); + void fgInsertStmtListBefore(BasicBlock* block, Statement* stmtBefore, Statement* stmtList); // Create a new temporary variable to hold the result of *ppTree, // and transform the graph accordingly. @@ -6805,8 +6810,8 @@ class Compiler GenTree* fgMorphCall(GenTreeCall* call); GenTree* fgExpandVirtualVtableCallTarget(GenTreeCall* call); - void fgMorphCallInline(GenTreeCall* call, InlineResult* result); - void fgMorphCallInlineHelper(GenTreeCall* call, InlineResult* result, InlineContext** createdContext); + void fgMorphCallInline(InlineInfo& inlineInfo, GenTreeCall* call, InlineResult* result); + void fgMorphCallInlineHelper(InlineInfo& inlineInfo, GenTreeCall* call, InlineResult* result, InlineContext** createdContext); #if DEBUG void fgNoteNonInlineCandidate(Statement* stmt, GenTreeCall* call); static fgWalkPreFn fgFindNonInlineCandidate; @@ -6983,11 +6988,12 @@ class Compiler bool MethodInstantiationComplexityExceeds(CORINFO_METHOD_HANDLE handle, int& cur, int max); bool TypeInstantiationComplexityExceeds(CORINFO_CLASS_HANDLE handle, int& cur, int max); - void fgInvokeInlineeCompiler(GenTreeCall* call, InlineResult* result, InlineContext** createdContext); + void fgInvokeInlineeCompiler(InlineInfo& inlineInfo, GenTreeCall* call, InlineResult* result, InlineContext** createdContext); void fgInsertInlineeBlocks(InlineInfo* pInlineInfo); - void fgInsertInlineeArgument(const InlArgInfo& argInfo, BasicBlock* block, Statement** afterStmt, Statement** newStmt, const DebugInfo& callDI); - Statement* fgInlinePrependStatements(InlineInfo* inlineInfo); - void fgInlineAppendStatements(InlineInfo* inlineInfo, BasicBlock* block, Statement* stmt); + void fgFinalizeInlineeStatements(InlineInfo* pInlineInfo); + void fgInsertInlineeArgument(class StatementListBuilder& statements, const InlArgInfo& argInfo, const DebugInfo& callDI); + void fgInlinePrependStatements(InlineInfo* inlineInfo); + void fgInlineAppendStatements(class StatementListBuilder& statements, InlineInfo* inlineInfo); #ifdef DEBUG static fgWalkPreFn fgDebugCheckInlineCandidates; @@ -11945,7 +11951,6 @@ class GenTreeVisitor case GT_CATCH_ARG: case GT_LABEL: case GT_FTN_ADDR: - case GT_RET_EXPR: case GT_CNS_INT: case GT_CNS_LNG: case GT_CNS_DBL: diff --git a/src/coreclr/jit/compiler.hpp b/src/coreclr/jit/compiler.hpp index ccb2584ac92550..07854d35cbccb1 100644 --- a/src/coreclr/jit/compiler.hpp +++ b/src/coreclr/jit/compiler.hpp @@ -4374,7 +4374,6 @@ void GenTree::VisitOperands(TVisitor visitor) case GT_CATCH_ARG: case GT_LABEL: case GT_FTN_ADDR: - case GT_RET_EXPR: case GT_CNS_INT: case GT_CNS_LNG: case GT_CNS_DBL: diff --git a/src/coreclr/jit/debuginfo.cpp b/src/coreclr/jit/debuginfo.cpp index 6b4e25635f2d38..e2464fcfb9aa6d 100644 --- a/src/coreclr/jit/debuginfo.cpp +++ b/src/coreclr/jit/debuginfo.cpp @@ -113,17 +113,17 @@ void DebugInfo::Validate() const if (!di.IsValid()) continue; - bool isValidOffs = di.GetLocation().GetOffset() < di.GetInlineContext()->GetILSize(); - if (isValidOffs) - { - bool isValidStart = di.GetInlineContext()->GetILInstsSet()->bitVectTest(di.GetLocation().GetOffset()); - assert(isValidStart && - "Detected invalid debug info: IL offset does not refer to the start of an IL instruction"); - } - else - { - assert(!"Detected invalid debug info: IL offset is out of range"); - } + //bool isValidOffs = di.GetLocation().GetOffset() < di.GetInlineContext()->GetILSize(); + //if (isValidOffs) + //{ + // bool isValidStart = di.GetInlineContext()->GetILInstsSet()->bitVectTest(di.GetLocation().GetOffset()); + // assert(isValidStart && + // "Detected invalid debug info: IL offset does not refer to the start of an IL instruction"); + //} + //else + //{ + // assert(!"Detected invalid debug info: IL offset is out of range"); + //} } while (di.GetParent(&di)); } diff --git a/src/coreclr/jit/fgbasic.cpp b/src/coreclr/jit/fgbasic.cpp index a34e2622157ba3..59fe5b818690f1 100644 --- a/src/coreclr/jit/fgbasic.cpp +++ b/src/coreclr/jit/fgbasic.cpp @@ -4677,6 +4677,16 @@ BasicBlock* Compiler::fgSplitBlockAfterStatement(BasicBlock* curr, Statement* st return newBlock; } +BasicBlock* Compiler::fgSplitBlockBeforeStatement(BasicBlock* curr, Statement* stmt) +{ + if (stmt == curr->firstStmt()) + { + return fgSplitBlockAtBeginning(curr); + } + + return fgSplitBlockAfterStatement(curr, stmt->GetPrevStmt()); +} + //------------------------------------------------------------------------------ // fgSplitBlockBeforeTree : Split the given block right before the given tree // diff --git a/src/coreclr/jit/fginline.cpp b/src/coreclr/jit/fginline.cpp index 1761d79f8eca7f..098a0d36d6a2e1 100644 --- a/src/coreclr/jit/fginline.cpp +++ b/src/coreclr/jit/fginline.cpp @@ -202,9 +202,13 @@ bool Compiler::TypeInstantiationComplexityExceeds(CORINFO_CLASS_HANDLE handle, i return false; } -class SubstitutePlaceholdersAndDevirtualizeWalker : public GenTreeVisitor +class InlineAndDevirtualizeWalker : public GenTreeVisitor { bool m_madeChanges = false; + BasicBlock* m_block = nullptr; + Statement* m_statement = nullptr; + BasicBlock* m_nextBlock = nullptr; + Statement* m_nextStatement = nullptr; public: enum @@ -214,7 +218,7 @@ class SubstitutePlaceholdersAndDevirtualizeWalker : public GenTreeVisitorGetRootNodePointer(), nullptr); + } + fgWalkResult PreOrderVisit(GenTree** use, GenTree* user) { GenTree* tree = *use; - // All the operations here and in the corresponding postorder - // callback (LateDevirtualization) are triggered by GT_CALL or - // GT_RET_EXPR trees, and these (should) have the call side - // effect flag. - // - // So bail out for any trees that don't have this flag. + // Inlining and late devirt are triggered by GT_CALL with the call + // side effect flag, so bail out for any trees that don't have this + // flag. if ((tree->gtFlags & GTF_CALL) == 0) { return fgWalkResult::WALK_SKIP_SUBTREES; } - if (tree->OperIs(GT_RET_EXPR)) + while ((*use)->IsCall() && TryInline(use, user)) { - UpdateInlineReturnExpressionPlaceHolder(use, user); - } - -#if FEATURE_MULTIREG_RET -#if defined(DEBUG) - - // Make sure we don't have a tree like so: V05 = (, , , retExpr); - // Since we only look one level above for the parent for '=' and - // do not check if there is a series of COMMAs. See above. - // Importer and FlowGraph will not generate such a tree, so just - // leaving an assert in here. This can be fixed by looking ahead - // when we visit stores similar to AttachStructInlineeToStore. - // - if (tree->OperIsStore()) - { - GenTree* value = tree->Data(); - - if (value->OperGet() == GT_COMMA) + if (m_nextBlock != nullptr) { - GenTree* effectiveValue = value->gtEffectiveVal(); - - noway_assert(!varTypeIsStruct(effectiveValue) || (effectiveValue->OperGet() != GT_RET_EXPR) || - !effectiveValue->AsRetExpr()->gtInlineCandidate->HasMultiRegRetVal()); + return fgWalkResult::WALK_ABORT; } } -#endif // defined(DEBUG) -#endif // FEATURE_MULTIREG_RET return fgWalkResult::WALK_CONTINUE; } fgWalkResult PostOrderVisit(GenTree** use, GenTree* user) { LateDevirtualization(use, user); + return fgWalkResult::WALK_CONTINUE; } private: - //------------------------------------------------------------------------ - // UpdateInlineReturnExpressionPlaceHolder: replace an - // inline return expression placeholder if there is one. - // - // Arguments: - // use -- edge for the tree that is a GT_RET_EXPR node - // parent -- node containing the edge - // - // Returns: - // fgWalkResult indicating the walk should continue; that - // is we wish to fully explore the tree. - // - // Notes: - // Looks for GT_RET_EXPR nodes that arose from tree splitting done - // during importation for inline candidates, and replaces them. - // - // For successful inlines, substitutes the return value expression - // from the inline body for the GT_RET_EXPR. - // - // For failed inlines, rejoins the original call into the tree from - // whence it was split during importation. - // - // The code doesn't actually know if the corresponding inline - // succeeded or not; it relies on the fact that gtInlineCandidate - // initially points back at the call and is modified in place to - // the inlinee return expression if the inline is successful (see - // tail end of fgInsertInlineeBlocks for the update of iciCall). - // - // If the return type is a struct type and we're on a platform - // where structs can be returned in multiple registers, ensure the - // call has a suitable parent. - // - // If the original call type and the substitution type are different - // the functions makes necessary updates. It could happen if there was - // an implicit conversion in the inlinee body. - // - void UpdateInlineReturnExpressionPlaceHolder(GenTree** use, GenTree* parent) + bool TryInline(GenTree** use, GenTree* parent) { - while ((*use)->OperIs(GT_RET_EXPR)) + GenTreeCall* call = (*use)->AsCall(); + + if (!call->IsInlineCandidate()) { - GenTree* tree = *use; + return false; + } - // Skip through chains of GT_RET_EXPRs (say from nested inlines) - // to the actual tree to use. - // - BasicBlock* inlineeBB = nullptr; - GenTree* inlineCandidate = tree; - do - { - GenTreeRetExpr* retExpr = inlineCandidate->AsRetExpr(); - inlineCandidate = retExpr->gtSubstExpr; - inlineeBB = retExpr->gtSubstBB; - } while (inlineCandidate->OperIs(GT_RET_EXPR)); - - // We might as well try and fold the return value. Eg returns of - // constant bools will have CASTS. This folding may uncover more - // GT_RET_EXPRs, so we loop around until we've got something distinct. - // - inlineCandidate = m_compiler->gtFoldExpr(inlineCandidate); - var_types retType = tree->TypeGet(); + InlineResult inlineResult(m_compiler, call, call->gtInlineCandidateInfo->inlinersContext, "TryInline"); -#ifdef DEBUG - if (m_compiler->verbose) - { - printf("\nReplacing the return expression placeholder "); - Compiler::printTreeID(tree); - printf(" with "); - Compiler::printTreeID(inlineCandidate); - printf("\n"); - // Dump out the old return expression placeholder it will be overwritten by the ReplaceWith below - m_compiler->gtDispTree(tree); - } -#endif // DEBUG + m_compiler->compCurBB = m_block; + m_compiler->fgMorphStmt = m_statement; - var_types newType = inlineCandidate->TypeGet(); + InlineInfo inlineInfo{}; + m_compiler->fgMorphCallInline(inlineInfo, call, &inlineResult); - // If we end up swapping type we may need to retype the tree: - if (retType != newType) + assert(inlineResult.IsSuccess() == call->IsInlineCandidate()); + if (!inlineResult.IsSuccess()) + { + // If the inline was rejected and returns a retbuffer, then mark that + // local as DNER now so that promotion knows to leave it up to physical + // promotion. + CallArg* retBuffer = call->gtArgs.GetRetBufferArg(); + if ((retBuffer != nullptr) && retBuffer->GetNode()->OperIs(GT_LCL_ADDR)) { - if ((retType == TYP_BYREF) && (tree->OperGet() == GT_IND)) - { - // - in an RVA static if we've reinterpreted it as a byref; - assert(newType == TYP_I_IMPL); - JITDUMP("Updating type of the return GT_IND expression to TYP_BYREF\n"); - inlineCandidate->gtType = TYP_BYREF; - } + m_compiler->lvaSetVarDoNotEnregister(retBuffer->GetNode()->AsLclVarCommon()->GetLclNum() + DEBUGARG(DoNotEnregisterReason::HiddenBufferStructArg)); } - *use = inlineCandidate; - m_madeChanges = true; + return false; + } - if (inlineeBB != nullptr) - { - // IR may potentially contain nodes that requires mandatory BB flags to be set. - // Propagate those flags from the containing BB. - m_compiler->compCurBB->CopyFlags(inlineeBB, BBF_COPY_PROPAGATE); - } + JITDUMP("Inlining candidate [%06u] in " FMT_STMT "\n", Compiler::dspTreeID(call), m_statement->GetID()); + DISPSTMT(m_statement); -#ifdef DEBUG - if (m_compiler->verbose) - { - printf("\nInserting the inline return expression\n"); - m_compiler->gtDispTree(inlineCandidate); - printf("\n"); - } -#endif // DEBUG + if (InsertMidStatement(inlineInfo, use)) + { + return true; } - // If the inline was rejected and returns a retbuffer, then mark that - // local as DNER now so that promotion knows to leave it up to physical - // promotion. - if ((*use)->IsCall()) + JITDUMP("Splitting is required\n"); + BasicBlock* callBlock = m_compiler->compCurBB; + Statement* callStmt = m_compiler->fgMorphStmt; + Statement* newStmt = nullptr; + GenTree** use2 = nullptr; + m_compiler->gtSplitTree(callBlock, callStmt, call, &newStmt, &use2, /* includeOperands */ false); + assert(use2 == use); + + JITDUMP("After split:\n"); + while ((newStmt != callStmt) && (newStmt != nullptr)) { - CallArg* retBuffer = (*use)->AsCall()->gtArgs.GetRetBufferArg(); - if ((retBuffer != nullptr) && retBuffer->GetNode()->OperIs(GT_LCL_ADDR)) - { - m_compiler->lvaSetVarDoNotEnregister(retBuffer->GetNode()->AsLclVarCommon()->GetLclNum() - DEBUGARG(DoNotEnregisterReason::HiddenBufferStructArg)); - } + DISPSTMT(newStmt); + newStmt = newStmt->GetNextStmt(); } -#if FEATURE_MULTIREG_RET - // If an inline was rejected and the call returns a struct, we may - // have deferred some work when importing call for cases where the - // struct is returned in multiple registers. + DISPSTMT(callStmt); + + Statement* predStmt = callStmt == callBlock->firstStmt() ? nullptr : callStmt->GetPrevStmt(); + + inlineInfo.setupStatements.InsertIntoBlockBefore(m_compiler->compCurBB, m_compiler->fgMorphStmt); + *use = call->gtInlineCandidateInfo->result.substExpr; + + if (*use == nullptr) + { + *use = m_compiler->gtNewNothingNode(); + } + + if (call->gtInlineCandidateInfo->result.substBB != nullptr) + { + // IR may potentially contain nodes that requires mandatory BB flags to be set. + // Propagate those flags from the containing BB. + callBlock->CopyFlags(call->gtInlineCandidateInfo->result.substBB, BBF_COPY_PROPAGATE); + } + + m_compiler->gtUpdateStmtSideEffects(callStmt); + + if (InsertMidBlock(inlineInfo, callBlock, callStmt)) + { + m_nextBlock = callBlock; + m_nextStatement = predStmt == nullptr ? callBlock->firstStmt() : predStmt->GetNextStmt(); + return true; + } + + BasicBlock* continueBlock = m_compiler->fgSplitBlockBeforeStatement(callBlock, callStmt); + unsigned const baseBBNum = m_compiler->fgBBNumMax; + + JITDUMP("split " FMT_BB " after the inlinee call site; after portion is now " FMT_BB "\n", callBlock->bbNum, + continueBlock->bbNum); + + // The newly split block is not special so doesn't need to be kept. // - // See the bail-out clauses in impFixupCallStructReturn for inline - // candidates. + continueBlock->RemoveFlags(BBF_DONT_REMOVE); + + // Set the try and handler index and fix the jump types of inlinee's blocks. // - // Do the deferred work now. - if ((*use)->IsCall() && varTypeIsStruct(*use) && (*use)->AsCall()->HasMultiRegRetVal()) + for (BasicBlock* const block : m_compiler->InlineeCompiler->Blocks()) { - // See assert below, we only look one level above for a store parent. - if (parent->OperIsStore()) + noway_assert(!block->hasTryIndex()); + noway_assert(!block->hasHndIndex()); + block->copyEHRegion(callBlock); + block->CopyFlags(callBlock, BBF_BACKWARD_JUMP | BBF_PROF_WEIGHT); + + // Update block nums appropriately + // + block->bbNum += baseBBNum; + m_compiler->fgBBNumMax = max(block->bbNum, m_compiler->fgBBNumMax); + + DebugInfo di = callStmt->GetDebugInfo().GetRoot(); + if (di.IsValid()) { - // The inlinee can only be the value. - assert(parent->Data() == *use); - AttachStructInlineeToStore(parent, (*use)->AsCall()->gtRetClsHnd); + block->bbCodeOffs = di.GetLocation().GetOffset(); + block->bbCodeOffsEnd = block->bbCodeOffs + 1; // TODO: is code size of 1 some magic number for inlining? } else { - // Just store the inlinee to a variable to keep it simple. - *use = StoreStructInlineeToVar(*use, (*use)->AsCall()->gtRetClsHnd); + block->bbCodeOffs = 0; // TODO: why not BAD_IL_OFFSET? + block->bbCodeOffsEnd = 0; + block->SetFlags(BBF_INTERNAL); + } + + if (block->KindIs(BBJ_RETURN)) + { + noway_assert(!block->HasFlag(BBF_HAS_JMP)); + JITDUMP("\nConvert bbKind of " FMT_BB " to BBJ_ALWAYS to bottom block " FMT_BB "\n", block->bbNum, + continueBlock->bbNum); + + FlowEdge* const newEdge = m_compiler->fgAddRefPred(continueBlock, block); + block->SetKindAndTargetEdge(BBJ_ALWAYS, newEdge); } - m_madeChanges = true; } -#endif + + // Inlinee's top block will have an artificial ref count. Remove. + assert(m_compiler->InlineeCompiler->fgFirstBB->bbRefs > 0); + m_compiler->InlineeCompiler->fgFirstBB->bbRefs--; + + // Insert inlinee's blocks into inliner's block list. + assert(callBlock->KindIs(BBJ_ALWAYS)); + assert(callBlock->TargetIs(continueBlock)); + m_compiler->fgRedirectTargetEdge(callBlock, m_compiler->InlineeCompiler->fgFirstBB); + + callBlock->SetNext(m_compiler->InlineeCompiler->fgFirstBB); + m_compiler->InlineeCompiler->fgFirstBB->SetPrev(callBlock); + m_compiler->InlineeCompiler->fgLastBB->SetNext(continueBlock); + continueBlock->SetPrev(m_compiler->InlineeCompiler->fgLastBB); + + // + // Add inlinee's block count to inliner's. + // + m_compiler->fgBBcount += m_compiler->InlineeCompiler->fgBBcount; + + // Append statements to null out gc ref locals, if necessary. + inlineInfo.teardownStatements.InsertIntoBlockAtBeginning(continueBlock); + JITDUMPEXEC(m_compiler->fgDispBasicBlocks(m_compiler->InlineeCompiler->fgFirstBB, m_compiler->InlineeCompiler->fgLastBB, true)); + + m_nextBlock = callBlock; + m_nextStatement = predStmt == nullptr ? callBlock->firstStmt() : predStmt->GetNextStmt(); + + return true; + } + + bool InsertMidStatement(InlineInfo& inlineInfo, GenTree** use) + { + Compiler* inlineeComp = m_compiler->InlineeCompiler; + if (inlineeComp->fgBBcount != 1) + { + return false; + } + + if (!inlineeComp->fgFirstBB->KindIs(BBJ_RETURN)) + { + return false; + } + + if ((inlineeComp->fgFirstBB->bbStmtList != nullptr) || + !inlineInfo.setupStatements.Empty() || + !inlineInfo.teardownStatements.Empty()) + { + return false; + } + + *use = inlineInfo.inlineCandidateInfo->result.substExpr; + + if (*use == nullptr) + { + *use = m_compiler->gtNewNothingNode(); + } + + if (inlineInfo.inlineCandidateInfo->result.substBB != nullptr) + { + // IR may potentially contain nodes that requires mandatory BB flags to be set. + // Propagate those flags from the containing BB. + m_block->CopyFlags(inlineInfo.inlineCandidateInfo->result.substBB, BBF_COPY_PROPAGATE); + } + + return true; + } + + bool InsertMidBlock(InlineInfo& inlineInfo, BasicBlock* block, Statement* stmt) + { + Compiler* inlineeComp = m_compiler->InlineeCompiler; + if (inlineeComp->fgBBcount != 1) + { + return false; + } + + if (!inlineeComp->fgFirstBB->KindIs(BBJ_RETURN)) + { + return false; + } + + m_compiler->fgInsertStmtListBefore(block, stmt, inlineeComp->fgFirstBB->bbStmtList); + + const BasicBlockFlags inlineeBlockFlags = inlineeComp->fgFirstBB->GetFlagsRaw(); + noway_assert((inlineeBlockFlags & BBF_HAS_JMP) == 0); + noway_assert((inlineeBlockFlags & BBF_KEEP_BBJ_ALWAYS) == 0); + + // Todo: we may want to exclude other flags here. + block->SetFlags(inlineeBlockFlags & ~BBF_RUN_RARELY); + + inlineInfo.teardownStatements.InsertIntoBlockBefore(block, stmt); + return true; } #if FEATURE_MULTIREG_RET @@ -550,7 +624,7 @@ class SubstitutePlaceholdersAndDevirtualizeWalker : public GenTreeVisitorOperGet() == GT_CALL) + if (tree->IsCall()) { GenTreeCall* call = tree->AsCall(); bool tryLateDevirt = call->IsVirtual() && (call->gtCallType == CT_USER_FUNC); @@ -629,7 +703,7 @@ class SubstitutePlaceholdersAndDevirtualizeWalker : public GenTreeVisitorOperGet() == GT_JTRUE) { // See if this jtrue is now foldable. - BasicBlock* block = m_compiler->compCurBB; + BasicBlock* block = m_block; GenTree* condTree = tree->AsOp()->gtOp1; assert(tree == block->lastStmt()->GetRootNode()); @@ -723,104 +797,53 @@ PhaseStatus Compiler::fgInline() noway_assert(fgFirstBB != nullptr); - BasicBlock* block = fgFirstBB; - SubstitutePlaceholdersAndDevirtualizeWalker walker(this); + InlineAndDevirtualizeWalker walker(this); bool madeChanges = false; + BasicBlock* currentBlock = fgFirstBB; - do + while (currentBlock != nullptr) { - // Make the current basic block address available globally - compCurBB = block; - - for (Statement* const stmt : block->Statements()) + Statement* currentStmt = currentBlock->firstStmt(); + while (currentStmt != nullptr) { - -#if defined(DEBUG) // In debug builds we want the inline tree to show all failed // inlines. Some inlines may fail very early and never make it to // candidate stage. So scan the tree looking for those early failures. - fgWalkTreePre(stmt->GetRootNodePointer(), fgFindNonInlineCandidate, stmt); -#endif - // See if we need to replace some return value place holders. - // Also, see if this replacement enables further devirtualization. - // - // Note we are doing both preorder and postorder work in this walker. - // - // The preorder callback is responsible for replacing GT_RET_EXPRs - // with the appropriate expansion (call or inline result). - // Replacement may introduce subtrees with GT_RET_EXPR and so - // we rely on the preorder to recursively process those as well. - // - // On the way back up, the postorder callback then re-examines nodes for - // possible further optimization, as the (now complete) GT_RET_EXPR - // replacement may have enabled optimizations by providing more - // specific types for trees or variables. - walker.WalkTree(stmt->GetRootNodePointer(), nullptr); + INDEBUG(fgWalkTreePre(currentStmt->GetRootNodePointer(), fgFindNonInlineCandidate, currentStmt)); - GenTree* expr = stmt->GetRootNode(); + walker.VisitStatement(currentBlock, currentStmt); - // The importer ensures that all inline candidates are - // statement expressions. So see if we have a call. - if (expr->IsCall()) + if (walker.NextBlock() != nullptr) { - GenTreeCall* call = expr->AsCall(); - - // We do. Is it an inline candidate? - // - // Note we also process GuardeDevirtualizationCandidates here as we've - // split off GT_RET_EXPRs for them even when they are not inline candidates - // as we need similar processing to ensure they get patched back to where - // they belong. - if (call->IsInlineCandidate() || call->IsGuardedDevirtualizationCandidate()) - { - InlineResult inlineResult(this, call, stmt, "fgInline"); - - fgMorphStmt = stmt; - - fgMorphCallInline(call, &inlineResult); - - // If there's a candidate to process, we will make changes - madeChanges = true; - - // fgMorphCallInline may have updated the - // statement expression to a GT_NOP if the - // call returned a value, regardless of - // whether the inline succeeded or failed. - // - // If so, remove the GT_NOP and continue - // on with the next statement. - if (stmt->GetRootNode()->IsNothingNode()) - { - fgRemoveStmt(block, stmt); - continue; - } - } + currentBlock = walker.NextBlock(); + currentStmt = walker.NextStatement(); + continue; } + GenTree* expr = currentStmt->GetRootNode(); + // See if stmt is of the form GT_COMMA(call, nop) // If yes, we can get rid of GT_COMMA. if (expr->OperGet() == GT_COMMA && expr->AsOp()->gtOp1->OperGet() == GT_CALL && expr->AsOp()->gtOp2->OperGet() == GT_NOP) { madeChanges = true; - stmt->SetRootNode(expr->AsOp()->gtOp1); + currentStmt->SetRootNode(expr->AsOp()->gtOp1); } + + currentStmt = currentStmt->GetNextStmt(); } - block = block->Next(); + currentBlock = currentBlock->Next(); - } while (block); + } madeChanges |= walker.MadeChanges(); #ifdef DEBUG - // Check that we should not have any inline candidate or return value place holder left. - - block = fgFirstBB; - noway_assert(block); - - do + // Check that we should not have any inline candidates left. + for (BasicBlock* block : Blocks()) { for (Statement* const stmt : block->Statements()) { @@ -828,9 +851,7 @@ PhaseStatus Compiler::fgInline() fgWalkTreePre(stmt->GetRootNodePointer(), fgDebugCheckInlineCandidates); } - block = block->Next(); - - } while (block); + } fgVerifyHandlerTab(); @@ -871,7 +892,7 @@ PhaseStatus Compiler::fgInline() // possible inline are undone, and the candidate flag on the call // is cleared. // -void Compiler::fgMorphCallInline(GenTreeCall* call, InlineResult* inlineResult) +void Compiler::fgMorphCallInline(InlineInfo& inlineInfo, GenTreeCall* call, InlineResult* inlineResult) { bool inliningFailed = false; @@ -882,7 +903,7 @@ void Compiler::fgMorphCallInline(GenTreeCall* call, InlineResult* inlineResult) { InlineContext* createdContext = nullptr; // Attempt the inline - fgMorphCallInlineHelper(call, inlineResult, &createdContext); + fgMorphCallInlineHelper(inlineInfo, call, inlineResult, &createdContext); // We should have made up our minds one way or another.... assert(inlineResult->IsDecided()); @@ -928,16 +949,13 @@ void Compiler::fgMorphCallInline(GenTreeCall* call, InlineResult* inlineResult) { if (call->gtReturnType != TYP_VOID) { - JITDUMP("Inlining [%06u] failed, so bashing " FMT_STMT " to NOP\n", dspTreeID(call), fgMorphStmt->GetID()); + JITDUMP("Inline [%06u] marked as failed\n", dspTreeID(call)); // Detach the GT_CALL tree from the original statement by // hanging a "nothing" node to it. Later the "nothing" node will be removed // and the original GT_CALL tree will be picked up by the GT_RET_EXPR node. - inlCandInfo->retExpr->gtSubstExpr = call; - inlCandInfo->retExpr->gtSubstBB = compCurBB; - - noway_assert(fgMorphStmt->GetRootNode() == call); - fgMorphStmt->SetRootNode(gtNewNothingNode()); + inlCandInfo->result.substExpr = nullptr; + inlCandInfo->result.substBB = nullptr; } // Inlinee compiler may have determined call does not return; if so, update this compiler's state. @@ -970,7 +988,7 @@ void Compiler::fgMorphCallInline(GenTreeCall* call, InlineResult* inlineResult) // If the inline succeeded, this context will already be marked as successful. If it failed and // a context is returned, then it will not have been marked as success or failed. // -void Compiler::fgMorphCallInlineHelper(GenTreeCall* call, InlineResult* result, InlineContext** createdContext) +void Compiler::fgMorphCallInlineHelper(InlineInfo& inlineInfo, GenTreeCall* call, InlineResult* result, InlineContext** createdContext) { // Don't expect any surprises here. assert(result->IsCandidate()); @@ -1031,7 +1049,7 @@ void Compiler::fgMorphCallInlineHelper(GenTreeCall* call, InlineResult* result, // Invoke the compiler to inline the call. // - fgInvokeInlineeCompiler(call, result, createdContext); + fgInvokeInlineeCompiler(inlineInfo, call, result, createdContext); if (result->IsFailure()) { @@ -1142,24 +1160,19 @@ Compiler::fgWalkResult Compiler::fgDebugCheckInlineCandidates(GenTree** pTree, f { assert((tree->gtFlags & GTF_CALL_INLINE_CANDIDATE) == 0); } - else - { - assert(tree->gtOper != GT_RET_EXPR); - } return WALK_CONTINUE; } #endif // DEBUG -void Compiler::fgInvokeInlineeCompiler(GenTreeCall* call, InlineResult* inlineResult, InlineContext** createdContext) +void Compiler::fgInvokeInlineeCompiler(InlineInfo& inlineInfo, GenTreeCall* call, InlineResult* inlineResult, InlineContext** createdContext) { noway_assert(call->gtOper == GT_CALL); noway_assert(call->IsInlineCandidate()); noway_assert(opts.OptEnabled(CLFLG_INLINING)); // This is the InlineInfo struct representing a method to be inlined. - InlineInfo inlineInfo{}; CORINFO_METHOD_HANDLE fncHandle = call->gtCallMethHnd; inlineInfo.fncHandle = fncHandle; @@ -1324,12 +1337,12 @@ void Compiler::fgInvokeInlineeCompiler(GenTreeCall* call, InlineResult* inlineRe // causes the BBJ_RETURN block not to be imported at all.) // Fail the inlining attempt if ((inlineCandidateInfo->methInfo.args.retType != CORINFO_TYPE_VOID) && - (inlineCandidateInfo->retExpr->gtSubstExpr == nullptr)) + (inlineCandidateInfo->result.substExpr == nullptr)) { #ifdef DEBUG if (verbose) { - printf("\nInlining failed because pInlineInfo->retExpr is not set in the inlinee method %s.\n", + printf("\nInlining failed because pInlineInfo->result.substExpr is not set in the inlinee method %s.\n", eeGetMethodFullName(fncHandle)); } #endif // DEBUG @@ -1342,8 +1355,7 @@ void Compiler::fgInvokeInlineeCompiler(GenTreeCall* call, InlineResult* inlineRe // !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! // We've successfully obtain the list of inlinee's basic blocks. - // Let's insert it to inliner's basic block list. - fgInsertInlineeBlocks(&inlineInfo); + fgFinalizeInlineeStatements(&inlineInfo); #ifdef DEBUG @@ -1367,39 +1379,69 @@ void Compiler::fgInvokeInlineeCompiler(GenTreeCall* call, InlineResult* inlineRe inlineResult->NoteSuccess(); } -//------------------------------------------------------------------------ -// fgInsertInlineeBlocks: incorporate statements for an inline into the -// root method. -// -// Arguments: -// inlineInfo -- info for the inline -// -// Notes: -// The inlining attempt cannot be failed once this method is called. -// -// Adds all inlinee statements, plus any glue statements needed -// either before or after the inlined call. -// -// Updates flow graph and assigns weights to inlinee -// blocks. Currently does not attempt to read IBC data for the -// inlinee. -// -// Updates relevant root method status flags (eg optMethodFlags) to -// include information from the inlinee. -// -// Marks newly added statements with an appropriate inline context. +void StatementListBuilder::Append(Statement* stmt) +{ + if (m_head == nullptr) + { + m_head = m_tail = stmt; + stmt->SetPrevStmt(nullptr); + stmt->SetNextStmt(nullptr); + return; + } + + stmt->SetPrevStmt(m_tail); + stmt->SetNextStmt(nullptr); + m_tail->SetNextStmt(stmt); + m_tail = stmt; +} + +void StatementListBuilder::InsertIntoBlockAtBeginning(BasicBlock* block) +{ + if (m_head == nullptr) + { + return; + } + + Statement* lastStmt = block->lastStmt(); + if (lastStmt == nullptr) + { + lastStmt = m_tail; + } + + if (block->bbStmtList != nullptr) + { + m_tail->SetNextStmt(block->bbStmtList); + block->bbStmtList->SetPrevStmt(m_tail); + } + + block->bbStmtList = m_head; + m_head->SetPrevStmt(lastStmt); +} -void Compiler::fgInsertInlineeBlocks(InlineInfo* pInlineInfo) +void StatementListBuilder::InsertIntoBlockBefore(BasicBlock* block, Statement* before) { - GenTreeCall* iciCall = pInlineInfo->iciCall; - Statement* iciStmt = pInlineInfo->iciStmt; - BasicBlock* iciBlock = pInlineInfo->iciBlock; + assert(before != nullptr); - noway_assert(iciBlock->bbStmtList != nullptr); - noway_assert(iciStmt->GetRootNode() != nullptr); - assert(iciStmt->GetRootNode() == iciCall); - noway_assert(iciCall->gtOper == GT_CALL); + if (before == block->firstStmt()) + { + InsertIntoBlockAtBeginning(block); + return; + } + if (m_head == nullptr) + { + return; + } + + m_head->SetPrevStmt(before->GetPrevStmt()); + m_tail->SetNextStmt(before); + + m_head->GetPrevStmt()->SetNextStmt(m_head); + m_tail->GetNextStmt()->SetPrevStmt(m_tail); +} + +void Compiler::fgFinalizeInlineeStatements(InlineInfo* pInlineInfo) +{ #ifdef DEBUG Statement* currentDumpStmt = nullptr; @@ -1407,7 +1449,7 @@ void Compiler::fgInsertInlineeBlocks(InlineInfo* pInlineInfo) if (verbose) { printf("\n\n----------- Statements (and blocks) added due to the inlining of call "); - printTreeID(iciCall); + printTreeID(pInlineInfo->iciCall); printf(" -----------\n"); } @@ -1417,156 +1459,17 @@ void Compiler::fgInsertInlineeBlocks(InlineInfo* pInlineInfo) pInlineInfo->inlineContext->SetSucceeded(pInlineInfo); // Prepend statements - Statement* stmtAfter = fgInlinePrependStatements(pInlineInfo); + fgInlinePrependStatements(pInlineInfo); #ifdef DEBUG if (verbose) { - currentDumpStmt = stmtAfter; + //currentDumpStmt = stmtAfter; printf("\nInlinee method body:"); } #endif // DEBUG - BasicBlock* topBlock = iciBlock; - BasicBlock* bottomBlock = nullptr; - bool insertInlineeBlocks = true; - - if (InlineeCompiler->fgBBcount == 1) - { - // When fgBBCount is 1 we will always have a non-NULL fgFirstBB - // - PREFAST_ASSUME(InlineeCompiler->fgFirstBB != nullptr); - - // DDB 91389: Don't throw away the (only) inlinee block - // when its return type is not BBJ_RETURN. - // In other words, we need its BBJ_ to perform the right thing. - if (InlineeCompiler->fgFirstBB->KindIs(BBJ_RETURN)) - { - // Inlinee contains just one BB. So just insert its statement list to topBlock. - if (InlineeCompiler->fgFirstBB->bbStmtList != nullptr) - { - JITDUMP("\nInserting inlinee code into " FMT_BB "\n", iciBlock->bbNum); - stmtAfter = fgInsertStmtListAfter(iciBlock, stmtAfter, InlineeCompiler->fgFirstBB->firstStmt()); - } - else - { - JITDUMP("\ninlinee was empty\n"); - } - - // Copy inlinee bbFlags to caller bbFlags. - const BasicBlockFlags inlineeBlockFlags = InlineeCompiler->fgFirstBB->GetFlagsRaw(); - noway_assert((inlineeBlockFlags & BBF_HAS_JMP) == 0); - noway_assert((inlineeBlockFlags & BBF_KEEP_BBJ_ALWAYS) == 0); - - // Todo: we may want to exclude other flags here. - iciBlock->SetFlags(inlineeBlockFlags & ~BBF_RUN_RARELY); - -#ifdef DEBUG - if (verbose) - { - noway_assert(currentDumpStmt); - - if (currentDumpStmt != stmtAfter) - { - do - { - currentDumpStmt = currentDumpStmt->GetNextStmt(); - - printf("\n"); - - gtDispStmt(currentDumpStmt); - printf("\n"); - - } while (currentDumpStmt != stmtAfter); - } - } -#endif // DEBUG - - // Append statements to null out gc ref locals, if necessary. - fgInlineAppendStatements(pInlineInfo, iciBlock, stmtAfter); - insertInlineeBlocks = false; - } - else - { - JITDUMP("\ninlinee was single-block, but not BBJ_RETURN\n"); - } - } - - // - // ======= Inserting inlinee's basic blocks =============== - // - if (insertInlineeBlocks) - { - JITDUMP("\nInserting inlinee blocks\n"); - bottomBlock = fgSplitBlockAfterStatement(topBlock, stmtAfter); - unsigned const baseBBNum = fgBBNumMax; - - JITDUMP("split " FMT_BB " after the inlinee call site; after portion is now " FMT_BB "\n", topBlock->bbNum, - bottomBlock->bbNum); - - // The newly split block is not special so doesn't need to be kept. - // - bottomBlock->RemoveFlags(BBF_DONT_REMOVE); - - // Set the try and handler index and fix the jump types of inlinee's blocks. - // - for (BasicBlock* const block : InlineeCompiler->Blocks()) - { - noway_assert(!block->hasTryIndex()); - noway_assert(!block->hasHndIndex()); - block->copyEHRegion(iciBlock); - block->CopyFlags(iciBlock, BBF_BACKWARD_JUMP | BBF_PROF_WEIGHT); - - // Update block nums appropriately - // - block->bbNum += baseBBNum; - fgBBNumMax = max(block->bbNum, fgBBNumMax); - - DebugInfo di = iciStmt->GetDebugInfo().GetRoot(); - if (di.IsValid()) - { - block->bbCodeOffs = di.GetLocation().GetOffset(); - block->bbCodeOffsEnd = block->bbCodeOffs + 1; // TODO: is code size of 1 some magic number for inlining? - } - else - { - block->bbCodeOffs = 0; // TODO: why not BAD_IL_OFFSET? - block->bbCodeOffsEnd = 0; - block->SetFlags(BBF_INTERNAL); - } - - if (block->KindIs(BBJ_RETURN)) - { - noway_assert(!block->HasFlag(BBF_HAS_JMP)); - JITDUMP("\nConvert bbKind of " FMT_BB " to BBJ_ALWAYS to bottom block " FMT_BB "\n", block->bbNum, - bottomBlock->bbNum); - - FlowEdge* const newEdge = fgAddRefPred(bottomBlock, block); - block->SetKindAndTargetEdge(BBJ_ALWAYS, newEdge); - } - } - - // Inlinee's top block will have an artificial ref count. Remove. - assert(InlineeCompiler->fgFirstBB->bbRefs > 0); - InlineeCompiler->fgFirstBB->bbRefs--; - - // Insert inlinee's blocks into inliner's block list. - assert(topBlock->KindIs(BBJ_ALWAYS)); - assert(topBlock->TargetIs(bottomBlock)); - fgRedirectTargetEdge(topBlock, InlineeCompiler->fgFirstBB); - - topBlock->SetNext(InlineeCompiler->fgFirstBB); - InlineeCompiler->fgLastBB->SetNext(bottomBlock); - - // - // Add inlinee's block count to inliner's. - // - fgBBcount += InlineeCompiler->fgBBcount; - - // Append statements to null out gc ref locals, if necessary. - fgInlineAppendStatements(pInlineInfo, bottomBlock, nullptr); - JITDUMPEXEC(fgDispBasicBlocks(InlineeCompiler->fgFirstBB, InlineeCompiler->fgLastBB, true)); - } + fgInlineAppendStatements(pInlineInfo->teardownStatements, pInlineInfo); // // At this point, we have successfully inserted inlinee's code. @@ -1669,6 +1572,7 @@ void Compiler::fgInsertInlineeBlocks(InlineInfo* pInlineInfo) } } + BasicBlock* iciBlock = pInlineInfo->iciBlock; // If we inline a no-return call at a site with profile weight, // we will introduce inconsistency. // @@ -1735,12 +1639,6 @@ void Compiler::fgInsertInlineeBlocks(InlineInfo* pInlineInfo) gsCookieDummy->lvIsTemp = true; // It is not alive at all, set the flag to prevent zero-init. lvaSetVarDoNotEnregister(dummy DEBUGARG(DoNotEnregisterReason::VMNeedsStackAddr)); } - - // - // Detach the GT_CALL node from the original statement by hanging a "nothing" node under it, - // so that fgMorphStmts can remove the statement once we return from here. - // - iciStmt->SetRootNode(gtNewNothingNode()); } //------------------------------------------------------------------------ @@ -1753,15 +1651,12 @@ void Compiler::fgInsertInlineeBlocks(InlineInfo* pInlineInfo) // newStmt - updated with the new statement // callDI - debug info for the call // -void Compiler::fgInsertInlineeArgument( - const InlArgInfo& argInfo, BasicBlock* block, Statement** afterStmt, Statement** newStmt, const DebugInfo& callDI) +void Compiler::fgInsertInlineeArgument(StatementListBuilder& statements, const InlArgInfo& argInfo, const DebugInfo& callDI) { const bool argIsSingleDef = !argInfo.argHasLdargaOp && !argInfo.argHasStargOp; CallArg* arg = argInfo.arg; GenTree* argNode = arg->GetNode(); - assert(!argNode->OperIs(GT_RET_EXPR)); - if (argInfo.argHasTmp) { noway_assert(argInfo.argIsUsed); @@ -1793,10 +1688,9 @@ void Compiler::fgInsertInlineeArgument( // We're going to assign the argument value to the temp we use for it in the inline body. GenTree* store = gtNewTempStore(argInfo.argTmpNum, argNode); - *newStmt = gtNewStmt(store, callDI); - fgInsertStmtAfter(block, *afterStmt, *newStmt); - *afterStmt = *newStmt; - DISPSTMT(*afterStmt); + Statement* newStmt = gtNewStmt(store, callDI); + statements.Append(newStmt); + DISPSTMT(newStmt); } } else if (argInfo.argIsByRefToStructLocal) @@ -1815,7 +1709,7 @@ void Compiler::fgInsertInlineeArgument( if (argInfo.argHasSideEff) { noway_assert(argInfo.argIsUsed == false); - *newStmt = nullptr; + Statement* newStmt = nullptr; bool append = true; if (argNode->gtOper == GT_BLK) @@ -1823,7 +1717,7 @@ void Compiler::fgInsertInlineeArgument( // Don't put GT_BLK node under a GT_COMMA. // Codegen can't deal with it. // Just hang the address here in case there are side-effect. - *newStmt = gtNewStmt(gtUnusedValNode(argNode->AsOp()->gtOp1), callDI); + newStmt = gtNewStmt(gtUnusedValNode(argNode->AsOp()->gtOp1), callDI); } else { @@ -1840,7 +1734,6 @@ void Compiler::fgInsertInlineeArgument( // has no side effects, we can skip appending all // together. This will help jit TP a bit. // - assert(!argNode->OperIs(GT_RET_EXPR)); // For case (1) // @@ -1886,21 +1779,20 @@ void Compiler::fgInsertInlineeArgument( if (!append) { - assert(*newStmt == nullptr); + assert(newStmt == nullptr); JITDUMP("Arg tree side effects were discardable, not appending anything for arg\n"); } else { // If we don't have something custom to append, // just append the arg node as an unused value. - if (*newStmt == nullptr) + if (newStmt == nullptr) { - *newStmt = gtNewStmt(gtUnusedValNode(argNode), callDI); + newStmt = gtNewStmt(gtUnusedValNode(argNode), callDI); } - fgInsertStmtAfter(block, *afterStmt, *newStmt); - *afterStmt = *newStmt; - DISPSTMT(*afterStmt); + statements.Append(newStmt); + DISPSTMT(newStmt); } } else if (argNode->IsBoxedValue()) @@ -1934,13 +1826,10 @@ void Compiler::fgInsertInlineeArgument( // and are given the same inline context as the call any calls // added here will appear to have been part of the immediate caller. // -Statement* Compiler::fgInlinePrependStatements(InlineInfo* inlineInfo) +void Compiler::fgInlinePrependStatements(InlineInfo* inlineInfo) { - BasicBlock* block = inlineInfo->iciBlock; - Statement* callStmt = inlineInfo->iciStmt; - const DebugInfo& callDI = callStmt->GetDebugInfo(); - Statement* afterStmt = callStmt; // afterStmt is the place where the new statements should be inserted after. - Statement* newStmt = nullptr; + BasicBlock* block = inlineInfo->iciBlock; + const DebugInfo& callDI = inlineInfo->iciStmt->GetDebugInfo(); GenTreeCall* call = inlineInfo->iciCall->AsCall(); noway_assert(call->gtOper == GT_CALL); @@ -1977,7 +1866,7 @@ Statement* Compiler::fgInlinePrependStatements(InlineInfo* inlineInfo) // Append the InstParam if (inlineInfo->inlInstParamArgInfo != nullptr) { - fgInsertInlineeArgument(*inlineInfo->inlInstParamArgInfo, block, &afterStmt, &newStmt, callDI); + fgInsertInlineeArgument(inlineInfo->setupStatements, *inlineInfo->inlInstParamArgInfo, callDI); } // Treat arguments that had to be assigned to temps @@ -1986,7 +1875,7 @@ Statement* Compiler::fgInlinePrependStatements(InlineInfo* inlineInfo) JITDUMP("\nArguments setup:\n"); for (unsigned argNum = 0; argNum < inlineInfo->argCnt; argNum++) { - fgInsertInlineeArgument(inlArgInfo[argNum], block, &afterStmt, &newStmt, callDI); + fgInsertInlineeArgument(inlineInfo->setupStatements, inlArgInfo[argNum], callDI); } } @@ -2001,17 +1890,15 @@ Statement* Compiler::fgInlinePrependStatements(InlineInfo* inlineInfo) CORINFO_CLASS_HANDLE exactClass = eeGetClassFromContext(inlineInfo->inlineCandidateInfo->exactContextHandle); tree = fgGetSharedCCtor(exactClass); - newStmt = gtNewStmt(tree, callDI); - fgInsertStmtAfter(block, afterStmt, newStmt); - afterStmt = newStmt; + Statement* newStmt = gtNewStmt(tree, callDI); + inlineInfo->setupStatements.Append(newStmt); } // Insert the nullcheck statement now. if (nullcheck) { - newStmt = gtNewStmt(nullcheck, callDI); - fgInsertStmtAfter(block, afterStmt, newStmt); - afterStmt = newStmt; + Statement* newStmt = gtNewStmt(nullcheck, callDI); + inlineInfo->setupStatements.Append(newStmt); } // @@ -2059,16 +1946,13 @@ Statement* Compiler::fgInlinePrependStatements(InlineInfo* inlineInfo) tree = gtNewTempStore(tmpNum, (lclTyp == TYP_STRUCT) ? gtNewIconNode(0) : gtNewZeroConNode(lclTyp)); - newStmt = gtNewStmt(tree, callDI); - fgInsertStmtAfter(block, afterStmt, newStmt); - afterStmt = newStmt; + Statement* newStmt = gtNewStmt(tree, callDI); + inlineInfo->setupStatements.Append(newStmt); - DISPSTMT(afterStmt); + DISPSTMT(newStmt); } } } - - return afterStmt; } //------------------------------------------------------------------------ @@ -2085,7 +1969,7 @@ Statement* Compiler::fgInlinePrependStatements(InlineInfo* inlineInfo) // we skip nulling the locals, since it can interfere // with tail calls introduced by the local. -void Compiler::fgInlineAppendStatements(InlineInfo* inlineInfo, BasicBlock* block, Statement* stmtAfter) +void Compiler::fgInlineAppendStatements(StatementListBuilder& statements, InlineInfo* inlineInfo) { // Null out any gc ref locals if (!inlineInfo->HasGcRefLocals()) @@ -2145,9 +2029,9 @@ void Compiler::fgInlineAppendStatements(InlineInfo* inlineInfo, BasicBlock* bloc // Does the local we're about to null out appear in the return // expression? If so we somehow messed up and didn't properly // spill the return value. See impInlineFetchLocal. - if ((inlCandInfo->retExpr != nullptr) && (inlCandInfo->retExpr->gtSubstExpr != nullptr)) + if (inlCandInfo->result.substExpr != nullptr) { - const bool interferesWithReturn = gtHasRef(inlCandInfo->retExpr->gtSubstExpr, tmpNum); + const bool interferesWithReturn = gtHasRef(inlCandInfo->result.substExpr, tmpNum); noway_assert(!interferesWithReturn); } @@ -2155,15 +2039,7 @@ void Compiler::fgInlineAppendStatements(InlineInfo* inlineInfo, BasicBlock* bloc GenTree* nullExpr = gtNewTempStore(tmpNum, gtNewZeroConNode(lclTyp)); Statement* nullStmt = gtNewStmt(nullExpr, callDI); - if (stmtAfter == nullptr) - { - fgInsertStmtAtBeg(block, nullStmt); - } - else - { - fgInsertStmtAfter(block, stmtAfter, nullStmt); - } - stmtAfter = nullStmt; + statements.Append(nullStmt); #ifdef DEBUG if (verbose) diff --git a/src/coreclr/jit/fgstmt.cpp b/src/coreclr/jit/fgstmt.cpp index 85809339965ff0..52be01d61fc9ff 100644 --- a/src/coreclr/jit/fgstmt.cpp +++ b/src/coreclr/jit/fgstmt.cpp @@ -382,6 +382,64 @@ Statement* Compiler::fgInsertStmtListAfter(BasicBlock* block, Statement* stmtAft return stmtLast; } +void Compiler::fgInsertStmtListAtEnd(BasicBlock* block, Statement* stmtList) +{ + if (stmtList == nullptr) + { + return; + } + + Statement* firstStmt = block->firstStmt(); + if (firstStmt != nullptr) + { + Statement* lastStmt = firstStmt->GetPrevStmt(); + noway_assert(lastStmt != nullptr && lastStmt->GetNextStmt() == nullptr); + + // Append the statement after the last one. + Statement* stmtLast = stmtList->GetPrevStmt(); + lastStmt->SetNextStmt(stmtList); + stmtList->SetPrevStmt(lastStmt); + firstStmt->SetPrevStmt(stmtLast); + } + else + { + // The block is completely empty. + block->bbStmtList = stmtList; + } +} + +void Compiler::fgInsertStmtListBefore(BasicBlock* block, Statement* before, Statement* stmtList) +{ + if (stmtList == nullptr) + { + return; + } + + assert(block->bbStmtList != nullptr); + Statement* stmtLast = stmtList->GetPrevStmt(); + + if (before == block->bbStmtList) + { + // We're inserting before the first statement in the block. + Statement* first = block->firstStmt(); + Statement* last = block->lastStmt(); + + stmtLast->SetNextStmt(first); + stmtList->SetPrevStmt(last); + + block->bbStmtList = stmtList; + first->SetPrevStmt(stmtLast); + } + else + { + stmtLast->SetNextStmt(before); + stmtList->SetPrevStmt(before->GetPrevStmt()); + + stmtList->GetPrevStmt()->SetNextStmt(stmtList); + stmtLast->GetNextStmt()->SetPrevStmt(stmtLast); + } +} + /***************************************************************************** * * Create a new statement from tree and wire the links up. diff --git a/src/coreclr/jit/gentree.cpp b/src/coreclr/jit/gentree.cpp index 06854fa563b59c..9b65ff47b5f487 100644 --- a/src/coreclr/jit/gentree.cpp +++ b/src/coreclr/jit/gentree.cpp @@ -249,7 +249,6 @@ void GenTree::InitNodeSize() GenTree::s_gtNodeSizes[GT_INDEX_ADDR] = TREE_NODE_SZ_LARGE; GenTree::s_gtNodeSizes[GT_BOUNDS_CHECK] = TREE_NODE_SZ_SMALL; GenTree::s_gtNodeSizes[GT_ARR_ELEM] = TREE_NODE_SZ_LARGE; - GenTree::s_gtNodeSizes[GT_RET_EXPR] = TREE_NODE_SZ_LARGE; GenTree::s_gtNodeSizes[GT_FIELD_ADDR] = TREE_NODE_SZ_LARGE; GenTree::s_gtNodeSizes[GT_CMPXCHG] = TREE_NODE_SZ_LARGE; GenTree::s_gtNodeSizes[GT_QMARK] = TREE_NODE_SZ_LARGE; @@ -320,7 +319,6 @@ void GenTree::InitNodeSize() static_assert_no_msg(sizeof(GenTreeStoreInd) <= TREE_NODE_SZ_SMALL); static_assert_no_msg(sizeof(GenTreeAddrMode) <= TREE_NODE_SZ_SMALL); static_assert_no_msg(sizeof(GenTreeBlk) <= TREE_NODE_SZ_SMALL); - static_assert_no_msg(sizeof(GenTreeRetExpr) <= TREE_NODE_SZ_LARGE); // *** large node static_assert_no_msg(sizeof(GenTreeILOffset) <= TREE_NODE_SZ_SMALL); static_assert_no_msg(sizeof(GenTreePhiArg) <= TREE_NODE_SZ_SMALL); static_assert_no_msg(sizeof(GenTreeAllocObj) <= TREE_NODE_SZ_LARGE); // *** large node @@ -738,10 +736,6 @@ ClassLayout* GenTree::GetLayout(Compiler* compiler) const structHnd = AsCall()->gtRetClsHnd; break; - case GT_RET_EXPR: - structHnd = AsRetExpr()->gtInlineCandidate->gtRetClsHnd; - break; - default: unreached(); } @@ -3067,10 +3061,6 @@ bool GenTree::Compare(GenTree* op1, GenTree* op2, bool swapOK) { return true; } - if (tree->OperIs(GT_RET_EXPR)) - { - return gtHasRef(tree->AsRetExpr()->gtInlineCandidate, lclNum); - } return false; } @@ -3188,6 +3178,54 @@ bool Compiler::gtHasAddressExposedLocals(GenTree* tree) return visitor.WalkTree(&tree, nullptr) == WALK_ABORT; } +bool Compiler::gtSubTreeAndChildrenAreFirstExecutedSideEffects(GenTree* tree, GenTree* subTree) +{ + struct Visitor : GenTreeVisitor + { + enum + { + DoPreOrder = true, + DoPostOrder = true, + UseExecutionOrder = true, + }; + + Visitor(Compiler* comp, GenTree* subTree) + : GenTreeVisitor(comp) + { + } + + fgWalkResult PreOrderVisit(GenTree** use, GenTree* user) + { + if (*use == m_subTree) + { + Result = true; + return WALK_ABORT; + } + + return WALK_CONTINUE; + } + + fgWalkResult PostOrderVisit(GenTree** use, GenTree* user) + { + if (((*use)->gtFlags & GTF_ALL_EFFECT) != GTF_EMPTY) + { + Result = false; + return WALK_ABORT; + } + + return WALK_CONTINUE; + } + + bool Result = false; + private: + GenTree* m_subTree; + }; + + Visitor visitor(this, subTree); + visitor.WalkTree(&tree, nullptr); + return visitor.Result; +} + #ifdef DEBUG /***************************************************************************** @@ -6651,7 +6689,6 @@ bool GenTree::TryGetUse(GenTree* operand, GenTree*** pUse) case GT_CATCH_ARG: case GT_LABEL: case GT_FTN_ADDR: - case GT_RET_EXPR: case GT_CNS_INT: case GT_CNS_LNG: case GT_CNS_DBL: @@ -8523,25 +8560,6 @@ GenTreeLclFld* Compiler::gtNewLclFldNode(unsigned lnum, var_types type, unsigned return node; } -GenTreeRetExpr* Compiler::gtNewInlineCandidateReturnExpr(GenTreeCall* inlineCandidate, var_types type) -{ - assert(GenTree::s_gtNodeSizes[GT_RET_EXPR] == TREE_NODE_SZ_LARGE); - - GenTreeRetExpr* node = new (this, GT_RET_EXPR) GenTreeRetExpr(type); - - node->gtInlineCandidate = inlineCandidate; - - node->gtSubstExpr = nullptr; - node->gtSubstBB = nullptr; - - // GT_RET_EXPR node eventually might be turned back into GT_CALL (when inlining is aborted for example). - // Therefore it should carry the GTF_CALL flag so that all the rules about spilling can apply to it as well. - // For example, impImportLeave or CEE_POP need to spill GT_RET_EXPR before empty the evaluation stack. - node->gtFlags |= GTF_CALL; - - return node; -} - //------------------------------------------------------------------------ // gtNewFieldAddrNode: Create a new GT_FIELD_ADDR node. // @@ -9484,13 +9502,6 @@ GenTree* Compiler::gtCloneExpr(GenTree* tree) copy->AsLclFld()->SetSsaNum(tree->AsLclFld()->GetSsaNum()); goto DONE; - case GT_RET_EXPR: - // GT_RET_EXPR is unique node, that contains a link to a gtInlineCandidate node, - // that is part of another statement. We cannot clone both here and cannot - // create another GT_RET_EXPR that points to the same gtInlineCandidate. - NO_WAY("Cloning of GT_RET_EXPR node not supported"); - goto DONE; - case GT_MEMORYBARRIER: copy = new (this, GT_MEMORYBARRIER) GenTree(GT_MEMORYBARRIER, TYP_VOID); goto DONE; @@ -10247,7 +10258,6 @@ GenTreeUseEdgeIterator::GenTreeUseEdgeIterator(GenTree* node) case GT_CATCH_ARG: case GT_LABEL: case GT_FTN_ADDR: - case GT_RET_EXPR: case GT_CNS_INT: case GT_CNS_LNG: case GT_CNS_DBL: @@ -12428,21 +12438,6 @@ void Compiler::gtDispLeaf(GenTree* tree, IndentStack* indentStack) case GT_SWIFT_ERROR: break; - case GT_RET_EXPR: - { - GenTreeCall* inlineCand = tree->AsRetExpr()->gtInlineCandidate; - printf("(for "); - printTreeID(inlineCand); - printf(")"); - - if (tree->AsRetExpr()->gtSubstExpr != nullptr) - { - printf(" -> "); - printTreeID(tree->AsRetExpr()->gtSubstExpr); - } - } - break; - case GT_PHYSREG: printf(" %s", getRegName(tree->AsPhysReg()->gtSrcReg)); break; @@ -15113,19 +15108,9 @@ GenTree* Compiler::gtTryRemoveBoxUpstreamEffects(GenTree* op, BoxRemovalOptions GenTree* copy = copyStmt->GetRootNode(); if (!copy->OperIs(GT_STOREIND, GT_STORE_BLK)) { - // GT_RET_EXPR is a tolerable temporary failure. - // The jit will revisit this optimization after - // inlining is done. - if (copy->gtOper == GT_RET_EXPR) - { - JITDUMP(" bailing; must wait for replacement of copy %s\n", GenTree::OpName(copy->gtOper)); - } - else - { - // Anything else is a missed case we should figure out how to handle. - // One known case is GT_COMMAs enclosing the store we are looking for. - JITDUMP(" bailing; unexpected copy op %s\n", GenTree::OpName(copy->gtOper)); - } + // Anything else is a missed case we should figure out how to handle. + // One known case is GT_COMMAs enclosing the store we are looking for. + JITDUMP(" bailing; unexpected copy op %s\n", GenTree::OpName(copy->gtOper)); return nullptr; } @@ -15191,13 +15176,6 @@ GenTree* Compiler::gtTryRemoveBoxUpstreamEffects(GenTree* op, BoxRemovalOptions // If the copy is a struct copy, make sure we know how to isolate any source side effects. GenTree* copySrc = copy->Data(); - // If the copy source is from a pending inline, wait for it to resolve. - if (copySrc->gtOper == GT_RET_EXPR) - { - JITDUMP(" bailing; must wait for replacement of copy source %s\n", GenTree::OpName(copySrc->gtOper)); - return nullptr; - } - bool hasSrcSideEffect = false; bool isStructCopy = false; @@ -16915,19 +16893,9 @@ bool Compiler::gtNodeHasSideEffects(GenTree* tree, GenTreeFlags flags, bool igno // Are there only GTF_CALL side effects remaining? (and no other side effect kinds) if (flags & GTF_CALL) { - GenTree* potentialCall = tree; - - if (potentialCall->OperIs(GT_RET_EXPR)) + if (tree->OperIs(GT_CALL)) { - // We need to preserve return expressions where the underlying call - // has side effects. Otherwise early folding can result in us dropping - // the call. - potentialCall = potentialCall->AsRetExpr()->gtInlineCandidate; - } - - if (potentialCall->OperIs(GT_CALL)) - { - GenTreeCall* const call = potentialCall->AsCall(); + GenTreeCall* const call = tree->AsCall(); const bool ignoreExceptions = (flags & GTF_EXCEPT) == 0; return call->HasSideEffects(this, ignoreExceptions, ignoreCctors); } @@ -17026,13 +16994,14 @@ bool Compiler::gtTreeHasSideEffects(GenTree* tree, GenTreeFlags flags /* = GTF_S // True if any changes were made; false if nothing needed to be done to split the tree. // bool Compiler::gtSplitTree( - BasicBlock* block, Statement* stmt, GenTree* splitPoint, Statement** firstNewStmt, GenTree*** splitNodeUse) + BasicBlock* block, Statement* stmt, GenTree* splitPoint, Statement** firstNewStmt, GenTree*** splitNodeUse, bool includeOperands) { class Splitter final : public GenTreeVisitor { BasicBlock* m_bb; Statement* m_splitStmt; GenTree* m_splitNode; + bool m_includeOperands; struct UseInfo { @@ -17049,11 +17018,12 @@ bool Compiler::gtSplitTree( UseExecutionOrder = true }; - Splitter(Compiler* compiler, BasicBlock* bb, Statement* stmt, GenTree* splitNode) + Splitter(Compiler* compiler, BasicBlock* bb, Statement* stmt, GenTree* splitNode, bool includeOperands) : GenTreeVisitor(compiler) , m_bb(bb) , m_splitStmt(stmt) , m_splitNode(splitNode) + , m_includeOperands(includeOperands) , m_useStack(compiler->getAllocator(CMK_ArrayStack)) { } @@ -17066,6 +17036,12 @@ bool Compiler::gtSplitTree( { assert(!(*use)->OperIs(GT_QMARK)); m_useStack.Push(UseInfo{use, user}); + if ((*use == m_splitNode) && !m_includeOperands) + { + SplitStack(use); + return WALK_ABORT; + } + return WALK_CONTINUE; } @@ -17073,56 +17049,61 @@ bool Compiler::gtSplitTree( { if (*use == m_splitNode) { - bool userIsReturned = false; - // Split all siblings and ancestor siblings. - int i; - for (i = 0; i < m_useStack.Height() - 1; i++) - { - const UseInfo& useInf = m_useStack.BottomRef(i); - if (useInf.Use == use) - { - break; - } + SplitStack(use); + return WALK_ABORT; + } - // If this has the same user as the next node then it is a - // sibling of an ancestor -- and thus not on the "path" - // that contains the split node. - if (m_useStack.BottomRef(i + 1).User == useInf.User) - { - SplitOutUse(useInf, userIsReturned); - } - else - { - // This is an ancestor of the node we're splitting on. - userIsReturned = IsReturned(useInf, userIsReturned); - } - } + while (m_useStack.Top(0).Use != use) + { + m_useStack.Pop(); + } - assert(m_useStack.Bottom(i).Use == use); - userIsReturned = IsReturned(m_useStack.BottomRef(i), userIsReturned); + return WALK_CONTINUE; + } - // The remaining nodes should be operands of the split node. - for (i++; i < m_useStack.Height(); i++) + private: + void SplitStack(GenTree** use) + { + bool userIsReturned = false; + // Split all siblings and ancestor siblings. + int i; + for (i = 0; i < m_useStack.Height() - 1; i++) + { + const UseInfo& useInf = m_useStack.BottomRef(i); + if (useInf.Use == use) { - const UseInfo& useInf = m_useStack.BottomRef(i); - assert(useInf.User == *use); - SplitOutUse(useInf, userIsReturned); + break; } - SplitNodeUse = use; - - return WALK_ABORT; + // If this has the same user as the next node then it is a + // sibling of an ancestor -- and thus not on the "path" + // that contains the split node. + if (m_useStack.BottomRef(i + 1).User == useInf.User) + { + SplitOutUse(useInf, userIsReturned); + } + else + { + // This is an ancestor of the node we're splitting on. + userIsReturned = IsReturned(useInf, userIsReturned); + } } - while (m_useStack.Top(0).Use != use) + assert(m_useStack.Bottom(i).Use == use); + userIsReturned = IsReturned(m_useStack.BottomRef(i), userIsReturned); + + assert(m_includeOperands || (i == m_useStack.Height() - 1)); + // The remaining nodes should be operands of the split node. + for (i++; i < m_useStack.Height(); i++) { - m_useStack.Pop(); + const UseInfo& useInf = m_useStack.BottomRef(i); + assert(useInf.User == *use); + SplitOutUse(useInf, userIsReturned); } - return WALK_CONTINUE; + SplitNodeUse = use; } - private: bool IsReturned(const UseInfo& useInf, bool userIsReturned) { if (useInf.User != nullptr) @@ -17278,7 +17259,7 @@ bool Compiler::gtSplitTree( } }; - Splitter splitter(this, block, stmt, splitPoint); + Splitter splitter(this, block, stmt, splitPoint, includeOperands); splitter.WalkTree(stmt->GetRootNodePointer(), nullptr); *firstNewStmt = splitter.FirstStatement; *splitNodeUse = splitter.SplitNodeUse; @@ -19114,15 +19095,6 @@ CORINFO_CLASS_HANDLE Compiler::gtGetClassHandle(GenTree* tree, bool* pIsExact, b break; } - case GT_RET_EXPR: - { - // If we see a RET_EXPR, recurse through to examine the - // return value expression. - GenTree* retExpr = obj->AsRetExpr()->gtInlineCandidate; - objClass = gtGetClassHandle(retExpr, pIsExact, pIsNonNull); - break; - } - case GT_CALL: { GenTreeCall* call = obj->AsCall(); diff --git a/src/coreclr/jit/gentree.h b/src/coreclr/jit/gentree.h index e7c42dc83c146b..3d6871e16587dd 100644 --- a/src/coreclr/jit/gentree.h +++ b/src/coreclr/jit/gentree.h @@ -8246,35 +8246,6 @@ struct GenTreeCmpXchg : public GenTreeIndir } }; -/* gtRetExp -- Place holder for the return expression from an inline candidate (GT_RET_EXPR) */ - -struct GenTreeRetExpr : public GenTree -{ - GenTreeCall* gtInlineCandidate; - - // Expression representing gtInlineCandidate's value (e.g. spill temp or - // expression from inlinee, or call itself for unsuccessful inlines). - // Substituted by UpdateInlineReturnExpressionPlaceHolder. - // This tree is nullptr during the import that created the GenTreeRetExpr and is set later - // when handling the actual inline candidate. - GenTree* gtSubstExpr; - - // The basic block that gtSubstExpr comes from, to enable propagating mandatory flags. - // nullptr for cases where gtSubstExpr is not a tree from the inlinee. - BasicBlock* gtSubstBB; - - GenTreeRetExpr(var_types type) - : GenTree(GT_RET_EXPR, type) - { - } -#if DEBUGGABLE_GENTREE - GenTreeRetExpr() - : GenTree() - { - } -#endif -}; - // In LIR there are no longer statements so debug information is inserted linearly using these nodes. struct GenTreeILOffset : public GenTree { diff --git a/src/coreclr/jit/gtlist.h b/src/coreclr/jit/gtlist.h index 8c6c67fd6a3273..1fa342bc925577 100644 --- a/src/coreclr/jit/gtlist.h +++ b/src/coreclr/jit/gtlist.h @@ -36,7 +36,6 @@ GTNODE(CATCH_ARG , GenTree ,0,0,GTK_LEAF) // Excep GTNODE(LABEL , GenTree ,0,0,GTK_LEAF) // Jump-target GTNODE(JMP , GenTreeVal ,0,0,GTK_LEAF|GTK_NOVALUE) // Jump to another function GTNODE(FTN_ADDR , GenTreeFptrVal ,0,0,GTK_LEAF) // Address of a function -GTNODE(RET_EXPR , GenTreeRetExpr ,0,0,GTK_LEAF|DBK_NOTLIR) // Place holder for the return expression from an inline candidate //----------------------------------------------------------------------------- // Constant nodes: diff --git a/src/coreclr/jit/gtstructs.h b/src/coreclr/jit/gtstructs.h index 26f88d17909974..27fc0116e2bfd7 100644 --- a/src/coreclr/jit/gtstructs.h +++ b/src/coreclr/jit/gtstructs.h @@ -86,7 +86,6 @@ GTSTRUCT_3_SPECIAL(ArrCommon , GT_ARR_LENGTH, GT_MDARR_LENGTH, GT_MDARR_LOWER_BO GTSTRUCT_1(ArrLen , GT_ARR_LENGTH) GTSTRUCT_2(MDArr , GT_MDARR_LENGTH, GT_MDARR_LOWER_BOUND) GTSTRUCT_1(ArrElem , GT_ARR_ELEM) -GTSTRUCT_1(RetExpr , GT_RET_EXPR) GTSTRUCT_1(ILOffset , GT_IL_OFFSET) GTSTRUCT_2(CopyOrReload, GT_COPY, GT_RELOAD) GTSTRUCT_1(AddrMode , GT_LEA) diff --git a/src/coreclr/jit/importer.cpp b/src/coreclr/jit/importer.cpp index 0435ecf0b514e4..3926e08c294aff 100644 --- a/src/coreclr/jit/importer.cpp +++ b/src/coreclr/jit/importer.cpp @@ -420,22 +420,22 @@ void Compiler::impAppendStmt(Statement* stmt, unsigned chkLevel, bool checkConsu { dstVarDsc = lvaGetDesc(expr->AsLclVarCommon()); } - else if (expr->OperIs(GT_CALL, GT_RET_EXPR)) // The special case of calls with return buffers. + else if (expr->OperIs(GT_CALL)) // The special case of calls with return buffers. { - GenTree* call = expr->OperIs(GT_RET_EXPR) ? expr->AsRetExpr()->gtInlineCandidate : expr; + GenTreeCall* call = expr->AsCall(); - if (call->TypeIs(TYP_VOID) && call->AsCall()->ShouldHaveRetBufArg()) + if (call->TypeIs(TYP_VOID) && call->ShouldHaveRetBufArg()) { GenTree* retBuf; - if (call->AsCall()->ShouldHaveRetBufArg()) + if (call->ShouldHaveRetBufArg()) { - assert(call->AsCall()->gtArgs.HasRetBuffer()); - retBuf = call->AsCall()->gtArgs.GetRetBufferArg()->GetNode(); + assert(call->gtArgs.HasRetBuffer()); + retBuf = call->gtArgs.GetRetBufferArg()->GetNode(); } else { - assert(!call->AsCall()->gtArgs.HasThisPointer()); - retBuf = call->AsCall()->gtArgs.GetArgByIndex(0)->GetNode(); + assert(!call->gtArgs.HasThisPointer()); + retBuf = call->gtArgs.GetArgByIndex(0)->GetNode(); } assert(retBuf->TypeIs(TYP_I_IMPL, TYP_BYREF)); @@ -479,40 +479,6 @@ void Compiler::impAppendStmt(Statement* stmt, unsigned chkLevel, bool checkConsu { impSpillSpecialSideEff(); } - - if ((lastStmt != impLastStmt) && expr->OperIs(GT_RET_EXPR)) - { - GenTree* const call = expr->AsRetExpr()->gtInlineCandidate; - JITDUMP("\nimpAppendStmt: after sinking a local struct store into inline candidate [%06u], we need to " - "reorder subsequent spills.\n", - dspTreeID(call)); - - // Move all newly appended statements to just before the call's statement. - // First, find the statement containing the call. - // - Statement* insertBeforeStmt = lastStmt; - - while (insertBeforeStmt->GetRootNode() != call) - { - assert(insertBeforeStmt != impStmtList); - insertBeforeStmt = insertBeforeStmt->GetPrevStmt(); - } - - Statement* movingStmt = lastStmt->GetNextStmt(); - - JITDUMP("Moving " FMT_STMT " through " FMT_STMT " before " FMT_STMT "\n", movingStmt->GetID(), - impLastStmt->GetID(), insertBeforeStmt->GetID()); - - // We move these backwards, so must keep moving the insert - // point to keep them in order. - // - while (impLastStmt != lastStmt) - { - Statement* movingStmt = impExtractLastStmt(); - impInsertStmtBefore(movingStmt, insertBeforeStmt); - insertBeforeStmt = movingStmt; - } - } } impAppendStmtCheck(stmt, chkLevel); @@ -942,29 +908,6 @@ GenTree* Compiler::impStoreStruct(GenTree* store, } #endif // UNIX_AMD64_ABI } - else if (src->OperIs(GT_RET_EXPR)) - { - assert(src->AsRetExpr()->gtInlineCandidate->OperIs(GT_CALL)); - GenTreeCall* call = src->AsRetExpr()->gtInlineCandidate; - - if (call->ShouldHaveRetBufArg()) - { - // insert the return value buffer into the argument list as first byref parameter after 'this' - // TODO-Bug?: verify if flags matter here - GenTreeFlags indirFlags = GTF_EMPTY; - GenTree* destAddr = impGetNodeAddr(store, CHECK_SPILL_ALL, &indirFlags); - call->gtArgs.InsertAfterThisOrFirst(this, - NewCallArg::Primitive(destAddr).WellKnown(WellKnownArg::RetBuffer)); - - // now returns void, not a struct - src->gtType = TYP_VOID; - call->gtType = TYP_VOID; - - // We already have appended the write to 'dest' GT_CALL's args - // So now we just return an empty node (pruning the GT_RET_EXPR) - return src; - } - } else if (src->OperIs(GT_COMMA)) { if (pAfterStmt) @@ -1163,7 +1106,6 @@ GenTree* Compiler::impNormStructVal(GenTree* structVal, unsigned curLevel) switch (structVal->OperGet()) { case GT_CALL: - case GT_RET_EXPR: { unsigned lclNum = lvaGrabTemp(true DEBUGARG("spilled call-like call argument")); impStoreToTemp(lclNum, structVal, curLevel); @@ -1758,25 +1700,6 @@ bool Compiler::impSpillStackEntry(unsigned level, CORINFO_CLASS_HANDLE stkHnd = stackState.esStack[level].seTypeInfo.GetClassHandleForObjRef(); lvaSetClass(tnum, tree, stkHnd); } - - // If we're assigning a GT_RET_EXPR, note the temp over on the call, - // so the inliner can use it in case it needs a return spill temp. - if (tree->OperGet() == GT_RET_EXPR) - { - JITDUMP("\n*** see V%02u = GT_RET_EXPR, noting temp\n", tnum); - GenTreeCall* call = tree->AsRetExpr()->gtInlineCandidate->AsCall(); - if (call->IsGuardedDevirtualizationCandidate()) - { - for (uint8_t i = 0; i < call->GetInlineCandidatesCount(); i++) - { - call->GetGDVCandidateInfo(i)->preexistingSpillTemp = tnum; - } - } - else - { - call->AsCall()->GetSingleInlineCandidateInfo()->preexistingSpillTemp = tnum; - } - } } // The tree type may be modified by impStoreToTemp, so use the type of the lclVar. @@ -3433,75 +3356,6 @@ void Compiler::impImportAndPushBox(CORINFO_RESOLVED_TOKEN* pResolvedToken) GenTree* allocBoxStore = gtNewTempStore(impBoxTemp, op1); Statement* allocBoxStmt = impAppendTree(allocBoxStore, CHECK_SPILL_NONE, impCurStmtDI); - // If the exprToBox is a call that returns its value via a ret buf arg, - // move the store statement(s) before the call (which must be a top level tree). - // - // We do this because impStoreStructPtr (invoked below) will - // back-substitute into a call when it sees a GT_RET_EXPR and the call - // has a hidden buffer pointer, So we need to reorder things to avoid - // creating out-of-sequence IR. - // - if (varTypeIsStruct(exprToBox) && exprToBox->OperIs(GT_RET_EXPR)) - { - GenTreeCall* const call = exprToBox->AsRetExpr()->gtInlineCandidate->AsCall(); - - if (call->ShouldHaveRetBufArg()) - { - JITDUMP("Must insert newobj stmts for box before call [%06u]\n", dspTreeID(call)); - - // Walk back through the statements in this block, looking for the one - // that has this call as the root node. - // - // Because gtNewTempStore (above) may have added statements that - // feed into the actual store we need to move this set of added - // statements as a group. - // - // Note boxed allocations are side-effect free (no com or finalizer) so - // our only worries here are (correctness) not overlapping the box temp - // lifetime and (perf) stretching the temp lifetime across the inlinee - // body. - // - // Since this is an inline candidate, we must be optimizing, and so we have - // a unique box temp per call. So no worries about overlap. - // - assert(!opts.OptimizationDisabled()); - - // Lifetime stretching could addressed with some extra cleverness--sinking - // the allocation back down to just before the copy, once we figure out - // where the copy is. We defer for now. - // - Statement* insertBeforeStmt = cursor; - noway_assert(insertBeforeStmt != nullptr); - - while (true) - { - if (insertBeforeStmt->GetRootNode() == call) - { - break; - } - - // If we've searched all the statements in the block and failed to - // find the call, then something's wrong. - // - noway_assert(insertBeforeStmt != impStmtList); - - insertBeforeStmt = insertBeforeStmt->GetPrevStmt(); - } - - // Found the call. Move the statements comprising the store. - // - JITDUMP("Moving " FMT_STMT "..." FMT_STMT " before " FMT_STMT "\n", cursor->GetNextStmt()->GetID(), - allocBoxStmt->GetID(), insertBeforeStmt->GetID()); - assert(allocBoxStmt == impLastStmt); - do - { - Statement* movingStmt = impExtractLastStmt(); - impInsertStmtBefore(movingStmt, insertBeforeStmt); - insertBeforeStmt = movingStmt; - } while (impLastStmt != cursor); - } - } - // Create a pointer to the box payload in op1. // op1 = gtNewLclvNode(impBoxTemp, TYP_REF); @@ -10992,7 +10846,6 @@ bool Compiler::impReturnInstruction(int prefixFlags, OPCODE& opcode) #endif InlineCandidateInfo* inlCandInfo = impInlineInfo->inlineCandidateInfo; - GenTreeRetExpr* inlRetExpr = inlCandInfo->retExpr; // Make sure the type matches the original call. var_types returnType = genActualType(op2->gtType); @@ -11045,12 +10898,8 @@ bool Compiler::impReturnInstruction(int prefixFlags, OPCODE& opcode) { // Do we have to normalize? var_types fncRealRetType = JITtype2varType(info.compMethodInfo->args.retType); - // For RET_EXPR get the type info from the call. Regardless - // of whether it ends up inlined or not normalization will - // happen as part of that function's codegen. - GenTree* returnedTree = op2->OperIs(GT_RET_EXPR) ? op2->AsRetExpr()->gtInlineCandidate : op2; - if ((varTypeIsSmall(returnedTree->TypeGet()) || varTypeIsSmall(fncRealRetType)) && - fgCastNeeded(returnedTree, fncRealRetType)) + if ((varTypeIsSmall(op2->TypeGet()) || varTypeIsSmall(fncRealRetType)) && + fgCastNeeded(op2, fncRealRetType)) { // Small-typed return values are normalized by the callee op2 = gtNewCastNode(TYP_INT, op2, false, fncRealRetType); @@ -11069,7 +10918,7 @@ bool Compiler::impReturnInstruction(int prefixFlags, OPCODE& opcode) bool isNonNull = false; CORINFO_CLASS_HANDLE returnClsHnd = gtGetClassHandle(op2, &isExact, &isNonNull); - if (inlRetExpr->gtSubstExpr == nullptr) + if (inlCandInfo->result.substExpr == nullptr) { // This is the first return, so best known type is the type // of this return value. @@ -11093,12 +10942,12 @@ bool Compiler::impReturnInstruction(int prefixFlags, OPCODE& opcode) op2 = tmpOp2; #ifdef DEBUG - if (inlRetExpr->gtSubstExpr != nullptr) + if (inlCandInfo->result.substExpr != nullptr) { // Some other block(s) have seen the CEE_RET first. // Better they spilled to the same temp. - assert(inlRetExpr->gtSubstExpr->gtOper == GT_LCL_VAR); - assert(inlRetExpr->gtSubstExpr->AsLclVarCommon()->GetLclNum() == + assert(inlCandInfo->result.substExpr->gtOper == GT_LCL_VAR); + assert(inlCandInfo->result.substExpr->AsLclVarCommon()->GetLclNum() == op2->AsLclVarCommon()->GetLclNum()); } #endif @@ -11113,7 +10962,7 @@ bool Compiler::impReturnInstruction(int prefixFlags, OPCODE& opcode) #endif // Report the return expression - inlRetExpr->gtSubstExpr = op2; + inlCandInfo->result.substExpr = op2; } else { @@ -11139,16 +10988,16 @@ bool Compiler::impReturnInstruction(int prefixFlags, OPCODE& opcode) if (fgNeedReturnSpillTemp()) { - if (inlRetExpr->gtSubstExpr == nullptr) + if (inlCandInfo->result.substExpr == nullptr) { // The inlinee compiler has figured out the type of the temp already. Use it here. - inlRetExpr->gtSubstExpr = + inlCandInfo->result.substExpr = gtNewLclvNode(lvaInlineeReturnSpillTemp, lvaTable[lvaInlineeReturnSpillTemp].lvType); } } else { - inlRetExpr->gtSubstExpr = op2; + inlCandInfo->result.substExpr = op2; } } else // The struct was to be returned via a return buffer. @@ -11159,24 +11008,24 @@ bool Compiler::impReturnInstruction(int prefixFlags, OPCODE& opcode) if (fgNeedReturnSpillTemp()) { // If this is the first return we have seen set the retExpr. - if (inlRetExpr->gtSubstExpr == nullptr) + if (inlCandInfo->result.substExpr == nullptr) { - inlRetExpr->gtSubstExpr = + inlCandInfo->result.substExpr = impStoreStructPtr(dest, gtNewLclvNode(lvaInlineeReturnSpillTemp, info.compRetType), CHECK_SPILL_ALL); } } else { - inlRetExpr->gtSubstExpr = impStoreStructPtr(dest, op2, CHECK_SPILL_ALL); + inlCandInfo->result.substExpr = impStoreStructPtr(dest, op2, CHECK_SPILL_ALL); } } } - // If gtSubstExpr is an arbitrary tree then we may need to + // If substExpr is an arbitrary tree then we may need to // propagate mandatory "IR presence" flags (e.g. BBF_HAS_IDX_LEN) // to the BB it ends up in. - inlRetExpr->gtSubstBB = fgNeedReturnSpillTemp() ? nullptr : compCurBB; + inlCandInfo->result.substBB = fgNeedReturnSpillTemp() ? nullptr : compCurBB; } } @@ -12926,9 +12775,8 @@ void Compiler::impCanInlineIL(CORINFO_METHOD_HANDLE fncHandle, // impInlineRecordArgInfo: record information about an inline candidate argument // // Arguments: -// pInlineInfo - inline info for the inline candidate +// argInfo - arg info // arg - the caller argument -// argNum - logical index of this argument // inlineResult - result of ongoing inline evaluation // // Notes: @@ -12939,17 +12787,13 @@ void Compiler::impCanInlineIL(CORINFO_METHOD_HANDLE fncHandle, // pass the argument into the inlinee. void Compiler::impInlineRecordArgInfo(InlineInfo* pInlineInfo, + InlArgInfo* argInfo, CallArg* arg, - unsigned argNum, InlineResult* inlineResult) { - InlArgInfo* inlCurArgInfo = &pInlineInfo->inlArgInfo[argNum]; - - inlCurArgInfo->arg = arg; + argInfo->arg = arg; GenTree* curArgVal = arg->GetNode(); - assert(!curArgVal->OperIs(GT_RET_EXPR)); - GenTree* lclVarTree; const bool isAddressInLocal = impIsAddressInLocal(curArgVal, &lclVarTree); if (isAddressInLocal) @@ -12958,7 +12802,7 @@ void Compiler::impInlineRecordArgInfo(InlineInfo* pInlineInfo, if (varTypeIsStruct(varDsc)) { - inlCurArgInfo->argIsByRefToStructLocal = true; + argInfo->argIsByRefToStructLocal = true; #ifdef FEATURE_SIMD if (varTypeIsSIMD(varDsc)) { @@ -12973,22 +12817,19 @@ void Compiler::impInlineRecordArgInfo(InlineInfo* pInlineInfo, if (curArgVal->gtFlags & GTF_ALL_EFFECT) { - inlCurArgInfo->argHasGlobRef = (curArgVal->gtFlags & GTF_GLOB_REF) != 0; - inlCurArgInfo->argHasSideEff = (curArgVal->gtFlags & (GTF_ALL_EFFECT & ~GTF_GLOB_REF)) != 0; + argInfo->argHasGlobRef = (curArgVal->gtFlags & GTF_GLOB_REF) != 0; + argInfo->argHasSideEff = (curArgVal->gtFlags & (GTF_ALL_EFFECT & ~GTF_GLOB_REF)) != 0; } if (curArgVal->gtOper == GT_LCL_VAR) { - inlCurArgInfo->argIsLclVar = true; - - /* Remember the "original" argument number */ - INDEBUG(curArgVal->AsLclVar()->gtLclILoffs = argNum;) + argInfo->argIsLclVar = true; } if (impIsInvariant(curArgVal)) { - inlCurArgInfo->argIsInvariant = true; - if (inlCurArgInfo->argIsThis && (curArgVal->gtOper == GT_CNS_INT) && (curArgVal->AsIntCon()->gtIconVal == 0)) + argInfo->argIsInvariant = true; + if (argInfo->argIsThis && (curArgVal->gtOper == GT_CNS_INT) && (curArgVal->AsIntCon()->gtIconVal == 0)) { // Abort inlining at this call site inlineResult->NoteFatal(InlineObservation::CALLSITE_ARG_HAS_NULL_THIS); @@ -12997,13 +12838,13 @@ void Compiler::impInlineRecordArgInfo(InlineInfo* pInlineInfo, } else if (gtIsTypeof(curArgVal)) { - inlCurArgInfo->argIsInvariant = true; - inlCurArgInfo->argHasSideEff = false; + argInfo->argIsInvariant = true; + argInfo->argHasSideEff = false; } bool isExact = false; bool isNonNull = false; - inlCurArgInfo->argIsExact = (gtGetClassHandle(curArgVal, &isExact, &isNonNull) != NO_CLASS_HANDLE) && isExact; + argInfo->argIsExact = (gtGetClassHandle(curArgVal, &isExact, &isNonNull) != NO_CLASS_HANDLE) && isExact; // If the arg is a local that is address-taken, we can't safely // directly substitute it into the inlinee. @@ -13014,51 +12855,51 @@ void Compiler::impInlineRecordArgInfo(InlineInfo* pInlineInfo, // which is safe in this case. // // Instead mark the arg as having a caller local ref. - if (!inlCurArgInfo->argIsInvariant && gtHasLocalsWithAddrOp(curArgVal)) + if (!argInfo->argIsInvariant && gtHasLocalsWithAddrOp(curArgVal)) { - inlCurArgInfo->argHasCallerLocalRef = true; + argInfo->argHasCallerLocalRef = true; } #ifdef DEBUG if (verbose) { - if (inlCurArgInfo->argIsThis) + if (arg->GetWellKnownArg() != WellKnownArg::None) { - printf("thisArg:"); + printf("%s:", getWellKnownArgName(arg->GetWellKnownArg())); } else { - printf("\nArgument #%u:", argNum); + printf("Argument #%u:", pInlineInfo->iciCall->gtArgs.GetIndex(arg)); } - if (inlCurArgInfo->argIsLclVar) + if (argInfo->argIsLclVar) { printf(" is a local var"); } - if (inlCurArgInfo->argIsInvariant) + if (argInfo->argIsInvariant) { printf(" is a constant or invariant"); } - if (inlCurArgInfo->argHasGlobRef) + if (argInfo->argHasGlobRef) { printf(" has global refs"); } - if (inlCurArgInfo->argHasCallerLocalRef) + if (argInfo->argHasCallerLocalRef) { printf(" has caller local ref"); } - if (inlCurArgInfo->argHasSideEff) + if (argInfo->argHasSideEff) { printf(" has side effects"); } - if (inlCurArgInfo->argHasLdargaOp) + if (argInfo->argHasLdargaOp) { printf(" has ldarga effect"); } - if (inlCurArgInfo->argHasStargOp) + if (argInfo->argHasStargOp) { printf(" has starg effect"); } - if (inlCurArgInfo->argIsByRefToStructLocal) + if (argInfo->argIsByRefToStructLocal) { printf(" is byref to a struct local"); } @@ -13125,19 +12966,7 @@ void Compiler::impInlineInitVars(InlineInfo* pInlineInfo) case WellKnownArg::InstParam: { InlArgInfo* ctxInfo = new (this, CMK_Inlining) InlArgInfo{}; - ctxInfo->arg = &arg; - ctxInfo->argTmpNum = BAD_VAR_NUM; - ctxInfo->argIsLclVar = arg.GetNode()->OperIs(GT_LCL_VAR); - if (arg.GetNode()->IsCnsIntOrI()) - { - ctxInfo->argIsInvariant = true; - } - else - { - // Conservative approach - ctxInfo->argHasSideEff = true; - ctxInfo->argHasGlobRef = true; - } + impInlineRecordArgInfo(pInlineInfo, ctxInfo, &arg, inlineResult); pInlineInfo->inlInstParamArgInfo = ctxInfo; continue; } @@ -13146,7 +12975,7 @@ void Compiler::impInlineInitVars(InlineInfo* pInlineInfo) } arg.SetEarlyNode(gtFoldExpr(arg.GetEarlyNode())); - impInlineRecordArgInfo(pInlineInfo, &arg, ilArgCnt, inlineResult); + impInlineRecordArgInfo(pInlineInfo, &inlArgInfo[ilArgCnt], &arg, inlineResult); if (inlineResult->IsFailure()) { @@ -13548,7 +13377,6 @@ GenTree* Compiler::impInlineFetchArg(InlArgInfo& argInfo, const InlLclVarInfo& l GenTree* op1 = nullptr; GenTree* argNode = argInfo.arg->GetNode(); - assert(!argNode->OperIs(GT_RET_EXPR)); if (argInfo.argIsInvariant && !argCanBeModified) { diff --git a/src/coreclr/jit/importercalls.cpp b/src/coreclr/jit/importercalls.cpp index a90a23c405cc2d..975ae5179b9e58 100644 --- a/src/coreclr/jit/importercalls.cpp +++ b/src/coreclr/jit/importercalls.cpp @@ -1402,32 +1402,6 @@ var_types Compiler::impImportCall(OPCODE opcode, assert(opts.OptEnabled(CLFLG_INLINING)); assert(!isFatPointerCandidate); // We should not try to inline calli. - - // Make the call its own tree (spill the stack if needed). - // Do not consume the debug info here. This is particularly - // important if we give up on the inline, in which case the - // call will typically end up in the statement that contains - // the GT_RET_EXPR that we leave on the stack. - impAppendTree(call, CHECK_SPILL_ALL, impCurStmtDI, false); - - // TODO: Still using the widened type. - GenTreeRetExpr* retExpr = gtNewInlineCandidateReturnExpr(call->AsCall(), genActualType(callRetTyp)); - - // Link the retExpr to the call so if necessary we can manipulate it later. - if (origCall->IsGuardedDevirtualizationCandidate()) - { - for (uint8_t i = 0; i < origCall->GetInlineCandidatesCount(); i++) - { - origCall->GetGDVCandidateInfo(i)->retExpr = retExpr; - } - } - else - { - origCall->GetSingleInlineCandidateInfo()->retExpr = retExpr; - } - - // Propagate retExpr as the placeholder for the call. - call = retExpr; } else { @@ -3835,11 +3809,11 @@ GenTree* Compiler::impIntrinsic(CORINFO_CLASS_HANDLE clsHnd, op1 = op1->AsCall()->gtArgs.GetArgByIndex(0)->GetNode(); retNode = op1; } - else if (op1->OperIs(GT_CALL, GT_RET_EXPR)) + else if (op1->OperIs(GT_CALL)) { // Skip roundtrip "handle -> RuntimeType -> handle" for // RuntimeTypeHandle.ToIntPtr(typeof(T).TypeHandle) - GenTreeCall* call = op1->IsCall() ? op1->AsCall() : op1->AsRetExpr()->gtInlineCandidate; + GenTreeCall* call = op1->AsCall(); if (lookupNamedIntrinsic(call->gtCallMethHnd) == NI_System_RuntimeType_get_TypeHandle) { // Check that the arg is CORINFO_HELP_TYPEHANDLE_TO_RUNTIMETYPE helper call @@ -3847,11 +3821,6 @@ GenTree* Compiler::impIntrinsic(CORINFO_CLASS_HANDLE clsHnd, if (arg->IsHelperCall() && gtIsTypeHandleToRuntimeTypeHelper(arg->AsCall())) { impPopStack(); - if (op1->OperIs(GT_RET_EXPR)) - { - // Bash the RET_EXPR's call to no-op since it's unused now - op1->AsRetExpr()->gtInlineCandidate->gtBashToNOP(); - } // Skip roundtrip and return the type handle directly retNode = arg->AsCall()->gtArgs.GetArgByIndex(0)->GetNode(); } @@ -4087,9 +4056,9 @@ GenTree* Compiler::impIntrinsic(CORINFO_CLASS_HANDLE clsHnd, case NI_System_Threading_Thread_get_ManagedThreadId: { - if (impStackTop().val->OperIs(GT_RET_EXPR)) + if (impStackTop().val->IsCall()) { - GenTreeCall* call = impStackTop().val->AsRetExpr()->gtInlineCandidate->AsCall(); + GenTreeCall* call = impStackTop().val->AsCall(); if (call->gtCallMoreFlags & GTF_CALL_M_SPECIAL_INTRINSIC) { if (lookupNamedIntrinsic(call->gtCallMethHnd) == NI_System_Threading_Thread_get_CurrentThread) @@ -6585,71 +6554,6 @@ void Compiler::impCheckForPInvokeCall( } } -//------------------------------------------------------------------------ -// SpillRetExprHelper: iterate through arguments tree and spill ret_expr to local variables. -// -class SpillRetExprHelper -{ -public: - SpillRetExprHelper(Compiler* comp) - : comp(comp) - { - } - - void StoreRetExprResultsInArgs(GenTreeCall* call) - { - for (CallArg& arg : call->gtArgs.Args()) - { - comp->fgWalkTreePre(&arg.EarlyNodeRef(), SpillRetExprVisitor, this); - } - } - -private: - static Compiler::fgWalkResult SpillRetExprVisitor(GenTree** pTree, Compiler::fgWalkData* fgWalkPre) - { - assert((pTree != nullptr) && (*pTree != nullptr)); - GenTree* tree = *pTree; - if ((tree->gtFlags & GTF_CALL) == 0) - { - // Trees with ret_expr are marked as GTF_CALL. - return Compiler::WALK_SKIP_SUBTREES; - } - if (tree->OperGet() == GT_RET_EXPR) - { - SpillRetExprHelper* walker = static_cast(fgWalkPre->pCallbackData); - walker->StoreRetExprAsLocalVar(pTree); - } - return Compiler::WALK_CONTINUE; - } - - void StoreRetExprAsLocalVar(GenTree** pRetExpr) - { - GenTree* retExpr = *pRetExpr; - assert(retExpr->OperGet() == GT_RET_EXPR); - const unsigned tmp = comp->lvaGrabTemp(true DEBUGARG("spilling ret_expr")); - JITDUMP("Storing return expression [%06u] to a local var V%02u.\n", comp->dspTreeID(retExpr), tmp); - comp->impStoreToTemp(tmp, retExpr, Compiler::CHECK_SPILL_NONE); - *pRetExpr = comp->gtNewLclvNode(tmp, retExpr->TypeGet()); - - assert(comp->lvaTable[tmp].lvSingleDef == 0); - comp->lvaTable[tmp].lvSingleDef = 1; - JITDUMP("Marked V%02u as a single def temp\n", tmp); - if (retExpr->TypeGet() == TYP_REF) - { - bool isExact = false; - bool isNonNull = false; - CORINFO_CLASS_HANDLE retClsHnd = comp->gtGetClassHandle(retExpr, &isExact, &isNonNull); - if (retClsHnd != nullptr) - { - comp->lvaSetClass(tmp, retClsHnd, isExact); - } - } - } - -private: - Compiler* comp; -}; - //------------------------------------------------------------------------ // addFatPointerCandidate: mark the call and the method, that they have a fat pointer candidate. // Spill ret_expr in the call node, because they can't be cloned. @@ -6662,8 +6566,6 @@ void Compiler::addFatPointerCandidate(GenTreeCall* call) JITDUMP("Marking call [%06u] as fat pointer candidate\n", dspTreeID(call)); setMethodHasFatPointer(); call->SetFatPointerCandidate(); - SpillRetExprHelper helper(this); - helper.StoreRetExprResultsInArgs(call); } //------------------------------------------------------------------------ @@ -7386,11 +7288,6 @@ void Compiler::addGuardedDevirtualizationCandidate(GenTreeCall* call, classHandle != NO_CLASS_HANDLE ? eeGetClassName(classHandle) : eeGetMethodFullName(methodHandle)); setMethodHasGuardedDevirtualization(); - // Spill off any GT_RET_EXPR subtrees so we can clone the call. - // - SpillRetExprHelper helper(this); - helper.StoreRetExprResultsInArgs(call); - // Gather some information for later. Note we actually allocate InlineCandidateInfo // here, as the devirtualized half of this call will likely become an inline candidate. // @@ -9237,7 +9134,8 @@ void Compiler::impCheckCanInline(GenTreeCall* call, pInfo->ilCallerHandle = pParam->pThis->info.compMethodHnd; pInfo->clsHandle = clsHandle; pInfo->exactContextHandle = pParam->exactContextHnd; - pInfo->retExpr = nullptr; + pInfo->result.substExpr = nullptr; + pInfo->result.substBB = nullptr; pInfo->preexistingSpillTemp = BAD_VAR_NUM; pInfo->clsAttr = clsAttr; pInfo->methAttr = pParam->methAttr; diff --git a/src/coreclr/jit/importervectorization.cpp b/src/coreclr/jit/importervectorization.cpp index 649b466a26f6e3..0ab6e85085d372 100644 --- a/src/coreclr/jit/importervectorization.cpp +++ b/src/coreclr/jit/importervectorization.cpp @@ -336,20 +336,7 @@ GenTree* Compiler::impExpandHalfConstEquals(GenTreeLclVarCommon* data, // GenTreeStrCon* Compiler::impGetStrConFromSpan(GenTree* span) { - GenTreeCall* argCall = nullptr; - if (span->OperIs(GT_RET_EXPR)) - { - // NOTE: we don't support chains of RET_EXPR here - GenTree* inlineCandidate = span->AsRetExpr()->gtInlineCandidate; - if (inlineCandidate->OperIs(GT_CALL)) - { - argCall = inlineCandidate->AsCall(); - } - } - else if (span->OperIs(GT_CALL)) - { - argCall = span->AsCall(); - } + GenTreeCall* argCall = span->IsCall() ? span->AsCall() : nullptr; if ((argCall != nullptr) && ((argCall->gtCallMoreFlags & GTF_CALL_M_SPECIAL_INTRINSIC) != 0)) { @@ -693,20 +680,6 @@ GenTree* Compiler::impUtf16SpanComparison(StringComparisonKind kind, CORINFO_SIG { impPopStack(); } - - // We have to clean up GT_RET_EXPR for String.op_Implicit or MemoryExtensions.AsSpans - if ((spanObj != op1) && op1->OperIs(GT_RET_EXPR)) - { - GenTree* inlineCandidate = op1->AsRetExpr()->gtInlineCandidate; - assert(inlineCandidate->IsCall()); - inlineCandidate->gtBashToNOP(); - } - else if ((spanObj != op2) && op2->OperIs(GT_RET_EXPR)) - { - GenTree* inlineCandidate = op2->AsRetExpr()->gtInlineCandidate; - assert(inlineCandidate->IsCall()); - inlineCandidate->gtBashToNOP(); - } } return unrolled; } diff --git a/src/coreclr/jit/indirectcalltransformer.cpp b/src/coreclr/jit/indirectcalltransformer.cpp index f2149f62546b87..fe5cac4afbe9e7 100644 --- a/src/coreclr/jit/indirectcalltransformer.cpp +++ b/src/coreclr/jit/indirectcalltransformer.cpp @@ -780,84 +780,84 @@ class IndirectCallTransformer // virtual void FixupRetExpr() { - // If call returns a value, we need to copy it to a temp, and - // bash the associated GT_RET_EXPR to refer to the temp instead - // of the call. - // - // Note implicit by-ref returns should have already been converted - // so any struct copy we induce here should be cheap. - InlineCandidateInfo* const inlineInfo = origCall->GetGDVCandidateInfo(0); - - if (!origCall->TypeIs(TYP_VOID)) - { - // If there's a spill temp already associated with this inline candidate, - // use that instead of allocating a new temp. - // - returnTemp = inlineInfo->preexistingSpillTemp; - - if (returnTemp != BAD_VAR_NUM) - { - JITDUMP("Reworking call(s) to return value via a existing return temp V%02u\n", returnTemp); - - // We will be introducing multiple defs for this temp, so make sure - // it is no longer marked as single def. - // - // Otherwise, we could make an incorrect type deduction. Say the - // original call site returns a B, but after we devirtualize along the - // GDV happy path we see that method returns a D. We can't then assume that - // the return temp is type D, because we don't know what type the fallback - // path returns. So we have to stick with the current type for B as the - // return type. - // - // Note local vars always live in the root method's symbol table. So we - // need to use the root compiler for lookup here. - // - LclVarDsc* const returnTempLcl = compiler->impInlineRoot()->lvaGetDesc(returnTemp); - - if (returnTempLcl->lvSingleDef == 1) - { - // In this case it's ok if we already updated the type assuming single def, - // we just don't want any further updates. - // - JITDUMP("Return temp V%02u is no longer a single def temp\n", returnTemp); - returnTempLcl->lvSingleDef = 0; - } - } - else - { - returnTemp = compiler->lvaGrabTemp(false DEBUGARG("guarded devirt return temp")); - JITDUMP("Reworking call(s) to return value via a new temp V%02u\n", returnTemp); - } - - if (varTypeIsStruct(origCall)) - { - compiler->lvaSetStruct(returnTemp, origCall->gtRetClsHnd, false); - } - - GenTree* tempTree = compiler->gtNewLclvNode(returnTemp, origCall->TypeGet()); - - JITDUMP("Linking GT_RET_EXPR [%06u] to refer to temp V%02u\n", compiler->dspTreeID(inlineInfo->retExpr), - returnTemp); - - inlineInfo->retExpr->gtSubstExpr = tempTree; - } - else if (inlineInfo->retExpr != nullptr) - { - // We still oddly produce GT_RET_EXPRs for some void - // returning calls. Just bash the ret expr to a NOP. - // - // Todo: consider bagging creation of these RET_EXPRs. The only possible - // benefit they provide is stitching back larger trees for failed inlines - // of void-returning methods. But then the calls likely sit in commas and - // the benefit of a larger tree is unclear. - JITDUMP("Linking GT_RET_EXPR [%06u] for VOID return to NOP\n", - compiler->dspTreeID(inlineInfo->retExpr)); - inlineInfo->retExpr->gtSubstExpr = compiler->gtNewNothingNode(); - } - else - { - // We do not produce GT_RET_EXPRs for CTOR calls, so there is nothing to patch. - } + //// If call returns a value, we need to copy it to a temp, and + //// bash the associated GT_RET_EXPR to refer to the temp instead + //// of the call. + //// + //// Note implicit by-ref returns should have already been converted + //// so any struct copy we induce here should be cheap. + //InlineCandidateInfo* const inlineInfo = origCall->GetGDVCandidateInfo(0); + + //if (!origCall->TypeIs(TYP_VOID)) + //{ + // // If there's a spill temp already associated with this inline candidate, + // // use that instead of allocating a new temp. + // // + // returnTemp = inlineInfo->preexistingSpillTemp; + + // if (returnTemp != BAD_VAR_NUM) + // { + // JITDUMP("Reworking call(s) to return value via a existing return temp V%02u\n", returnTemp); + + // // We will be introducing multiple defs for this temp, so make sure + // // it is no longer marked as single def. + // // + // // Otherwise, we could make an incorrect type deduction. Say the + // // original call site returns a B, but after we devirtualize along the + // // GDV happy path we see that method returns a D. We can't then assume that + // // the return temp is type D, because we don't know what type the fallback + // // path returns. So we have to stick with the current type for B as the + // // return type. + // // + // // Note local vars always live in the root method's symbol table. So we + // // need to use the root compiler for lookup here. + // // + // LclVarDsc* const returnTempLcl = compiler->impInlineRoot()->lvaGetDesc(returnTemp); + + // if (returnTempLcl->lvSingleDef == 1) + // { + // // In this case it's ok if we already updated the type assuming single def, + // // we just don't want any further updates. + // // + // JITDUMP("Return temp V%02u is no longer a single def temp\n", returnTemp); + // returnTempLcl->lvSingleDef = 0; + // } + // } + // else + // { + // returnTemp = compiler->lvaGrabTemp(false DEBUGARG("guarded devirt return temp")); + // JITDUMP("Reworking call(s) to return value via a new temp V%02u\n", returnTemp); + // } + + // if (varTypeIsStruct(origCall)) + // { + // compiler->lvaSetStruct(returnTemp, origCall->gtRetClsHnd, false); + // } + + // GenTree* tempTree = compiler->gtNewLclvNode(returnTemp, origCall->TypeGet()); + + // JITDUMP("Linking GT_RET_EXPR [%06u] to refer to temp V%02u\n", compiler->dspTreeID(inlineInfo->retExpr), + // returnTemp); + + // inlineInfo->retExpr->gtSubstExpr = tempTree; + //} + //else if (inlineInfo->retExpr != nullptr) + //{ + // // We still oddly produce GT_RET_EXPRs for some void + // // returning calls. Just bash the ret expr to a NOP. + // // + // // Todo: consider bagging creation of these RET_EXPRs. The only possible + // // benefit they provide is stitching back larger trees for failed inlines + // // of void-returning methods. But then the calls likely sit in commas and + // // the benefit of a larger tree is unclear. + // JITDUMP("Linking GT_RET_EXPR [%06u] for VOID return to NOP\n", + // compiler->dspTreeID(inlineInfo->retExpr)); + // inlineInfo->retExpr->gtSubstExpr = compiler->gtNewNothingNode(); + //} + //else + //{ + // // We do not produce GT_RET_EXPRs for CTOR calls, so there is nothing to patch. + //} } //------------------------------------------------------------------------ @@ -1015,34 +1015,10 @@ class IndirectCallTransformer // Re-establish this call as an inline candidate. // - GenTreeRetExpr* oldRetExpr = inlineInfo->retExpr; inlineInfo->clsHandle = compiler->info.compCompHnd->getMethodClass(methodHnd); inlineInfo->exactContextHandle = context; inlineInfo->preexistingSpillTemp = returnTemp; call->SetSingleInlineCandidateInfo(inlineInfo); - - // If there was a ret expr for this call, we need to create a new one - // and append it just after the call. - // - // Note the original GT_RET_EXPR has been linked to a temp. - // we set all this up in FixupRetExpr(). - if (oldRetExpr != nullptr) - { - inlineInfo->retExpr = compiler->gtNewInlineCandidateReturnExpr(call, call->TypeGet()); - - GenTree* newRetExpr = inlineInfo->retExpr; - - if (returnTemp != BAD_VAR_NUM) - { - newRetExpr = compiler->gtNewTempStore(returnTemp, newRetExpr); - } - else - { - // We should always have a return temp if we return results by value - assert(origCall->TypeGet() == TYP_VOID); - } - compiler->fgNewStmtAtEnd(block, newRetExpr); - } } } @@ -1366,21 +1342,6 @@ class IndirectCallTransformer return fgWalkResult::WALK_ABORT; } } - else if (node->OperIs(GT_RET_EXPR)) - { - // If this is a RET_EXPR that we already know how to substitute then it is the - // "fixed-up" RET_EXPR from a previous GDV candidate. In that case we can - // substitute it right here to make it eligibile for cloning. - if (node->AsRetExpr()->gtSubstExpr != nullptr) - { - assert(node->AsRetExpr()->gtInlineCandidate->IsGuarded()); - *use = node->AsRetExpr()->gtSubstExpr; - return fgWalkResult::WALK_CONTINUE; - } - - m_unclonableNode = node; - return fgWalkResult::WALK_ABORT; - } m_nodeCount++; return fgWalkResult::WALK_CONTINUE; diff --git a/src/coreclr/jit/inline.cpp b/src/coreclr/jit/inline.cpp index aba964f2ce613e..e4c85784a4ce6f 100644 --- a/src/coreclr/jit/inline.cpp +++ b/src/coreclr/jit/inline.cpp @@ -634,7 +634,7 @@ void InlineContext::DumpXml(FILE* file, unsigned indent) // description - string describing the context of the decision InlineResult::InlineResult( - Compiler* compiler, GenTreeCall* call, Statement* stmt, const char* description, bool doNotReport) + Compiler* compiler, GenTreeCall* call, InlineContext* context, const char* description, bool doNotReport) : m_RootCompiler(nullptr) , m_Policy(nullptr) , m_Call(call) @@ -655,17 +655,12 @@ InlineResult::InlineResult( m_Policy = InlinePolicy::GetPolicy(m_RootCompiler, isPrejitRoot); // Pass along some optional information to the policy. - if (stmt != nullptr) - { - m_InlineContext = stmt->GetDebugInfo().GetInlineContext(); - m_Policy->NoteContext(m_InlineContext); + m_InlineContext = context; + m_Policy->NoteContext(m_InlineContext); #if defined(DEBUG) - m_Policy->NoteOffset(call->gtRawILOffset); -#else - m_Policy->NoteOffset(stmt->GetDebugInfo().GetLocation().GetOffset()); + m_Policy->NoteOffset(call->gtRawILOffset); #endif // defined(DEBUG) - } // Get method handle for caller. Note we use the // handle for the "immediate" caller here. @@ -1290,13 +1285,6 @@ InlineContext* InlineStrategy::NewContext(InlineContext* parentContext, Statemen context->m_ILSize = info->methInfo.ILCodeSize; context->m_ActualCallOffset = info->ilOffset; context->m_RuntimeContext = info->exactContextHandle; - -#ifdef DEBUG - // All inline candidates should get their own statements that have - // appropriate debug info (or no debug info). - InlineContext* diInlineContext = stmt->GetDebugInfo().GetInlineContext(); - assert(diInlineContext == nullptr || diInlineContext == parentContext); -#endif } else { @@ -1305,7 +1293,7 @@ InlineContext* InlineStrategy::NewContext(InlineContext* parentContext, Statemen } // We currently store both the statement location (used when reporting - // only-style mappings) and the actual call offset (used when reporting the + // old-style mappings) and the actual call offset (used when reporting the // inline tree for rich debug info). // These are not always the same, consider e.g. // ldarg.0 diff --git a/src/coreclr/jit/inline.h b/src/coreclr/jit/inline.h index 426e6575973d4c..5e38d64b108214 100644 --- a/src/coreclr/jit/inline.h +++ b/src/coreclr/jit/inline.h @@ -233,10 +233,12 @@ class InlinePolicy { (void)context; } +#ifdef DEBUG virtual void NoteOffset(IL_OFFSET offset) { (void)offset; } +#endif // Policy determinations virtual void DetermineProfitability(CORINFO_METHOD_INFO* methodInfo) = 0; @@ -345,7 +347,7 @@ class InlineResult // Construct a new InlineResult to help evaluate a // particular call for inlining. InlineResult( - Compiler* compiler, GenTreeCall* call, Statement* stmt, const char* description, bool doNotReport = false); + Compiler* compiler, GenTreeCall* call, InlineContext* context, const char* description, bool doNotReport = false); // Construct a new InlineResult to evaluate a particular // method to see if it is inlineable. @@ -586,6 +588,13 @@ struct HandleHistogramProfileCandidateInfo unsigned probeIndex; }; +// TODO: move to InlineInfo +struct InlineIRResult +{ + GenTree* substExpr; + BasicBlock* substBB; +}; + // InlineCandidateInfo provides basic information about a particular // inline candidate. // @@ -615,8 +624,7 @@ struct InlineCandidateInfo : public HandleHistogramProfileCandidateInfo // CORINFO_CONTEXT_HANDLE originalContextHandle; - // The GT_RET_EXPR node linking back to the inline candidate. - GenTreeRetExpr* retExpr; + InlineIRResult result; unsigned preexistingSpillTemp; unsigned clsAttr; @@ -670,6 +678,23 @@ struct InlLclVarInfo unsigned char lclIsPinned : 1; }; +class StatementListBuilder +{ + Statement* m_head = nullptr; + Statement* m_tail = nullptr; + +public: + bool Empty() + { + return m_head == nullptr; + } + + void Append(Statement* stmt); + + void InsertIntoBlockAtBeginning(BasicBlock* block); + void InsertIntoBlockBefore(BasicBlock* block, Statement* before); +}; + // InlineInfo provides detailed information about a particular inline candidate. struct InlineInfo @@ -712,6 +737,9 @@ struct InlineInfo GenTreeCall* iciCall; // The GT_CALL node to be inlined. Statement* iciStmt; // The statement iciCall is in. BasicBlock* iciBlock; // The basic block iciStmt is in. + + StatementListBuilder setupStatements; + StatementListBuilder teardownStatements; }; // InlineContext tracks the inline history in a method. diff --git a/src/coreclr/jit/simd.cpp b/src/coreclr/jit/simd.cpp index 6256ee0c37799b..1e5dd80ccbbfd4 100644 --- a/src/coreclr/jit/simd.cpp +++ b/src/coreclr/jit/simd.cpp @@ -483,7 +483,7 @@ GenTree* Compiler::impSIMDPopStack() assert(varTypeIsSIMDOrMask(tree)); // Handle calls that may return the struct via a return buffer. - if (tree->OperIs(GT_CALL, GT_RET_EXPR)) + if (tree->OperIs(GT_CALL)) { tree = impNormStructVal(tree, CHECK_SPILL_ALL); } From be05e2dfdc6b795e6db05b88f21050e973bc7507 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Tue, 28 Jan 2025 19:38:39 +0100 Subject: [PATCH 02/23] Fix GDV --- src/coreclr/jit/gentree.cpp | 2 +- src/coreclr/jit/importercalls.cpp | 96 ++++++++- src/coreclr/jit/indirectcalltransformer.cpp | 223 +++++--------------- 3 files changed, 149 insertions(+), 172 deletions(-) diff --git a/src/coreclr/jit/gentree.cpp b/src/coreclr/jit/gentree.cpp index 9b65ff47b5f487..2d90611326367b 100644 --- a/src/coreclr/jit/gentree.cpp +++ b/src/coreclr/jit/gentree.cpp @@ -9771,7 +9771,7 @@ GenTree* Compiler::gtCloneExpr(GenTree* tree) // You must use gtCloneCandidateCall for these calls (and then do appropriate other fixup) if (tree->AsCall()->IsInlineCandidate() || tree->AsCall()->IsGuardedDevirtualizationCandidate()) { - NO_WAY("Cloning of calls with associated GT_RET_EXPR nodes is not supported"); + NO_WAY("Cloning of calls containing inline candidates is not supported"); } copy = gtCloneExprCallHelper(tree->AsCall()); diff --git a/src/coreclr/jit/importercalls.cpp b/src/coreclr/jit/importercalls.cpp index 975ae5179b9e58..bd4359848d085e 100644 --- a/src/coreclr/jit/importercalls.cpp +++ b/src/coreclr/jit/importercalls.cpp @@ -1402,6 +1402,22 @@ var_types Compiler::impImportCall(OPCODE opcode, assert(opts.OptEnabled(CLFLG_INLINING)); assert(!isFatPointerCandidate); // We should not try to inline calli. + + if (isGuardedDevirtualizationCandidate) + { + unsigned lclNum = lvaGrabTemp(false DEBUGARG("GDV candidate result")); + LclVarDsc* varDsc = lvaGetDesc(lclNum); + // Keep the information about small typedness to avoid + // inserting unnecessary casts around normalization. + if (call->IsCall() && varTypeIsSmall(call->AsCall()->gtReturnType)) + { + assert(call->AsCall()->NormalizesSmallTypesOnReturn()); + varDsc->lvType = call->AsCall()->gtReturnType; + } + + impStoreToTemp(lclNum, call, CHECK_SPILL_ALL, nullptr, impCurStmtDI); + call = gtNewLclvNode(lclNum, genActualType(callRetTyp)); + } } else { @@ -5933,7 +5949,7 @@ GenTree* Compiler::impPrimitiveNamedIntrinsic(NamedIntrinsic intrinsic, else { uint32_t cns1 = static_cast(op1->AsIntConCommon()->IconValue()); - result = gtNewIconNode(BitOperations::RotateLeft(cns1, cns2), baseType); + result = gtNewIconNode((int32_t)BitOperations::RotateLeft(cns1, cns2), baseType); } break; } @@ -5982,7 +5998,7 @@ GenTree* Compiler::impPrimitiveNamedIntrinsic(NamedIntrinsic intrinsic, else { uint32_t cns1 = static_cast(op1->AsIntConCommon()->IconValue()); - result = gtNewIconNode(BitOperations::RotateRight(cns1, cns2), baseType); + result = gtNewIconNode((int32_t)BitOperations::RotateRight(cns1, cns2), baseType); } break; } @@ -6554,9 +6570,75 @@ void Compiler::impCheckForPInvokeCall( } } +//------------------------------------------------------------------------ +// SpillInlineCandidatesHelper: iterate through arguments tree and spill inline candidates to local variables. +// +class SpillInlineCandidatesHelper +{ +public: + SpillInlineCandidatesHelper(Compiler* comp) + : comp(comp) + { + } + + void StoreInlineCandidateResultsInArgs(GenTreeCall* call) + { + for (CallArg& arg : call->gtArgs.Args()) + { + comp->fgWalkTreePre(&arg.EarlyNodeRef(), SpillInlineCandidateVisitor, this); + } + } + +private: + static Compiler::fgWalkResult SpillInlineCandidateVisitor(GenTree** pTree, Compiler::fgWalkData* fgWalkPre) + { + assert((pTree != nullptr) && (*pTree != nullptr)); + GenTree* tree = *pTree; + if ((tree->gtFlags & GTF_CALL) == 0) + { + // Trees with ret_expr are marked as GTF_CALL. + return Compiler::WALK_SKIP_SUBTREES; + } + if (tree->IsCall() && tree->AsCall()->IsInlineCandidate()) + { + SpillInlineCandidatesHelper* walker = static_cast(fgWalkPre->pCallbackData); + walker->StoreInlineCandidateAsLocalVar(pTree); + } + return Compiler::WALK_CONTINUE; + } + + void StoreInlineCandidateAsLocalVar(GenTree** pCall) + { + GenTreeCall* call = (*pCall)->AsCall(); + assert(call->IsCall()); + const unsigned tmp = comp->lvaGrabTemp(true DEBUGARG("spilling inline candidate")); + JITDUMP("Storing inline candidate [%06u] to a local var V%02u.\n", comp->dspTreeID(call), tmp); + comp->impStoreToTemp(tmp, call, Compiler::CHECK_SPILL_NONE); + *pCall = comp->gtNewLclvNode(tmp, call->TypeGet()); + + assert(comp->lvaTable[tmp].lvSingleDef == 0); + comp->lvaTable[tmp].lvSingleDef = 1; + JITDUMP("Marked V%02u as a single def temp\n", tmp); + if (call->TypeIs(TYP_REF)) + { + bool isExact = false; + bool isNonNull = false; + CORINFO_CLASS_HANDLE retClsHnd = comp->gtGetClassHandle(call, &isExact, &isNonNull); + if (retClsHnd != nullptr) + { + comp->lvaSetClass(tmp, retClsHnd, isExact); + } + } + } + +private: + Compiler* comp; +}; + + //------------------------------------------------------------------------ // addFatPointerCandidate: mark the call and the method, that they have a fat pointer candidate. -// Spill ret_expr in the call node, because they can't be cloned. +// Spill inline candidates in the call node, because they shouldn't be cloned. // // Arguments: // call - fat calli candidate @@ -6566,6 +6648,8 @@ void Compiler::addFatPointerCandidate(GenTreeCall* call) JITDUMP("Marking call [%06u] as fat pointer candidate\n", dspTreeID(call)); setMethodHasFatPointer(); call->SetFatPointerCandidate(); + SpillInlineCandidatesHelper helper(this); + helper.StoreInlineCandidateResultsInArgs(call); } //------------------------------------------------------------------------ @@ -7288,6 +7372,12 @@ void Compiler::addGuardedDevirtualizationCandidate(GenTreeCall* call, classHandle != NO_CLASS_HANDLE ? eeGetClassName(classHandle) : eeGetMethodFullName(methodHandle)); setMethodHasGuardedDevirtualization(); + // Spill off any inline candidate subtrees so we can clone the call. + // + SpillInlineCandidatesHelper helper(this); + helper.StoreInlineCandidateResultsInArgs(call); + + // Gather some information for later. Note we actually allocate InlineCandidateInfo // here, as the devirtualized half of this call will likely become an inline candidate. // diff --git a/src/coreclr/jit/indirectcalltransformer.cpp b/src/coreclr/jit/indirectcalltransformer.cpp index fe5cac4afbe9e7..5047b0e07f886d 100644 --- a/src/coreclr/jit/indirectcalltransformer.cpp +++ b/src/coreclr/jit/indirectcalltransformer.cpp @@ -151,6 +151,11 @@ class IndirectCallTransformer bool ContainsGuardedDevirtualizationCandidate(Statement* stmt) { GenTree* candidate = stmt->GetRootNode(); + if (candidate->OperIs(GT_STORE_LCL_VAR)) + { + candidate = candidate->AsLclVar()->Data(); + } + return candidate->IsCall() && candidate->AsCall()->IsGuardedDevirtualizationCandidate(); } @@ -162,12 +167,13 @@ class IndirectCallTransformer , currBlock(block) , stmt(stmt) { - remainderBlock = nullptr; - checkBlock = nullptr; - thenBlock = nullptr; - elseBlock = nullptr; - origCall = nullptr; - likelihood = HIGH_PROBABILITY; + remainderBlock = nullptr; + checkBlock = nullptr; + thenBlock = nullptr; + elseBlock = nullptr; + likelihood = HIGH_PROBABILITY; + doesReturnValue = stmt->GetRootNode()->OperIs(GT_STORE_LCL_VAR); + origCall = GetCall(stmt); } //------------------------------------------------------------------------ @@ -181,7 +187,6 @@ class IndirectCallTransformer void Transform() { JITDUMP("*** %s: transforming " FMT_STMT "\n", Name(), stmt->GetID()); - FixupRetExpr(); ClearFlag(); CreateRemainder(); assert(GetChecksCount() > 0); @@ -199,8 +204,6 @@ class IndirectCallTransformer protected: virtual const char* Name() = 0; virtual void ClearFlag() = 0; - virtual GenTreeCall* GetCall(Statement* callStmt) = 0; - virtual void FixupRetExpr() = 0; //------------------------------------------------------------------------ // CreateRemainder: split current block at the call stmt and @@ -310,6 +313,30 @@ class IndirectCallTransformer } } + //------------------------------------------------------------------------ + // GetCall: find a call in a statement. + // + // Arguments: + // callStmt - the statement with the call inside. + // + // Return Value: + // call tree node pointer. + GenTreeCall* GetCall(Statement* callStmt) + { + GenTree* tree = callStmt->GetRootNode(); + GenTreeCall* call = nullptr; + if (doesReturnValue) + { + assert(tree->OperIs(GT_STORE_LCL_VAR)); + call = tree->AsLclVar()->Data()->AsCall(); + } + else + { + call = tree->AsCall(); // call with void return type. + } + return call; + } + Compiler* compiler; BasicBlock* currBlock; BasicBlock* remainderBlock; @@ -319,6 +346,7 @@ class IndirectCallTransformer Statement* stmt; GenTreeCall* origCall; unsigned likelihood; + bool doesReturnValue; const int HIGH_PROBABILITY = 80; }; @@ -329,8 +357,6 @@ class IndirectCallTransformer FatPointerCallTransformer(Compiler* compiler, BasicBlock* block, Statement* stmt) : Transformer(compiler, block, stmt) { - doesReturnValue = stmt->GetRootNode()->OperIs(GT_STORE_LCL_VAR); - origCall = GetCall(stmt); fptrAddress = origCall->gtCallAddr; pointerType = fptrAddress->TypeGet(); } @@ -341,30 +367,6 @@ class IndirectCallTransformer return "FatPointerCall"; } - //------------------------------------------------------------------------ - // GetCall: find a call in a statement. - // - // Arguments: - // callStmt - the statement with the call inside. - // - // Return Value: - // call tree node pointer. - virtual GenTreeCall* GetCall(Statement* callStmt) - { - GenTree* tree = callStmt->GetRootNode(); - GenTreeCall* call = nullptr; - if (doesReturnValue) - { - assert(tree->OperIs(GT_STORE_LCL_VAR)); - call = tree->AsLclVar()->Data()->AsCall(); - } - else - { - call = tree->AsCall(); // call with void return type. - } - return call; - } - //------------------------------------------------------------------------ // ClearFlag: clear fat pointer candidate flag from the original call. // @@ -373,11 +375,6 @@ class IndirectCallTransformer origCall->ClearFatPointerCandidate(); } - // FixupRetExpr: no action needed as we handle this in the importer. - virtual void FixupRetExpr() - { - } - //------------------------------------------------------------------------ // CreateCheck: create check block, that checks fat pointer bit set. // @@ -474,7 +471,6 @@ class IndirectCallTransformer GenTree* fptrAddress; var_types pointerType; - bool doesReturnValue; }; class GuardedDevirtualizationTransformer final : public Transformer @@ -482,7 +478,6 @@ class IndirectCallTransformer public: GuardedDevirtualizationTransformer(Compiler* compiler, BasicBlock* block, Statement* stmt) : Transformer(compiler, block, stmt) - , returnTemp(BAD_VAR_NUM) { } @@ -491,19 +486,10 @@ class IndirectCallTransformer // virtual void Run() { - origCall = GetCall(stmt); - JITDUMP("\n----------------\n\n*** %s contemplating [%06u] in " FMT_BB " \n", Name(), compiler->dspTreeID(origCall), currBlock->bbNum); - // We currently need inline candidate info to guarded devirt. - // - if (!origCall->IsInlineCandidate()) - { - JITDUMP("*** %s Bailing on [%06u] -- not an inline candidate\n", Name(), compiler->dspTreeID(origCall)); - ClearFlag(); - return; - } + assert(origCall->IsInlineCandidate()); likelihood = origCall->GetGDVCandidateInfo(0)->likelihood; assert((likelihood >= 0) && (likelihood <= 100)); @@ -552,22 +538,6 @@ class IndirectCallTransformer return "GuardedDevirtualization"; } - //------------------------------------------------------------------------ - // GetCall: find a call in a statement. - // - // Arguments: - // callStmt - the statement with the call inside. - // - // Return Value: - // call tree node pointer. - virtual GenTreeCall* GetCall(Statement* callStmt) - { - GenTree* tree = callStmt->GetRootNode(); - assert(tree->IsCall()); - GenTreeCall* call = tree->AsCall(); - return call; - } - virtual void ClearFlag() { // We remove the GDV flag from the call in the CreateElse @@ -775,91 +745,6 @@ class IndirectCallTransformer arg->SetEarlyNode(compiler->gtNewLclVarNode(tmpNum)); } - //------------------------------------------------------------------------ - // FixupRetExpr: set up to repair return value placeholder from call - // - virtual void FixupRetExpr() - { - //// If call returns a value, we need to copy it to a temp, and - //// bash the associated GT_RET_EXPR to refer to the temp instead - //// of the call. - //// - //// Note implicit by-ref returns should have already been converted - //// so any struct copy we induce here should be cheap. - //InlineCandidateInfo* const inlineInfo = origCall->GetGDVCandidateInfo(0); - - //if (!origCall->TypeIs(TYP_VOID)) - //{ - // // If there's a spill temp already associated with this inline candidate, - // // use that instead of allocating a new temp. - // // - // returnTemp = inlineInfo->preexistingSpillTemp; - - // if (returnTemp != BAD_VAR_NUM) - // { - // JITDUMP("Reworking call(s) to return value via a existing return temp V%02u\n", returnTemp); - - // // We will be introducing multiple defs for this temp, so make sure - // // it is no longer marked as single def. - // // - // // Otherwise, we could make an incorrect type deduction. Say the - // // original call site returns a B, but after we devirtualize along the - // // GDV happy path we see that method returns a D. We can't then assume that - // // the return temp is type D, because we don't know what type the fallback - // // path returns. So we have to stick with the current type for B as the - // // return type. - // // - // // Note local vars always live in the root method's symbol table. So we - // // need to use the root compiler for lookup here. - // // - // LclVarDsc* const returnTempLcl = compiler->impInlineRoot()->lvaGetDesc(returnTemp); - - // if (returnTempLcl->lvSingleDef == 1) - // { - // // In this case it's ok if we already updated the type assuming single def, - // // we just don't want any further updates. - // // - // JITDUMP("Return temp V%02u is no longer a single def temp\n", returnTemp); - // returnTempLcl->lvSingleDef = 0; - // } - // } - // else - // { - // returnTemp = compiler->lvaGrabTemp(false DEBUGARG("guarded devirt return temp")); - // JITDUMP("Reworking call(s) to return value via a new temp V%02u\n", returnTemp); - // } - - // if (varTypeIsStruct(origCall)) - // { - // compiler->lvaSetStruct(returnTemp, origCall->gtRetClsHnd, false); - // } - - // GenTree* tempTree = compiler->gtNewLclvNode(returnTemp, origCall->TypeGet()); - - // JITDUMP("Linking GT_RET_EXPR [%06u] to refer to temp V%02u\n", compiler->dspTreeID(inlineInfo->retExpr), - // returnTemp); - - // inlineInfo->retExpr->gtSubstExpr = tempTree; - //} - //else if (inlineInfo->retExpr != nullptr) - //{ - // // We still oddly produce GT_RET_EXPRs for some void - // // returning calls. Just bash the ret expr to a NOP. - // // - // // Todo: consider bagging creation of these RET_EXPRs. The only possible - // // benefit they provide is stitching back larger trees for failed inlines - // // of void-returning methods. But then the calls likely sit in commas and - // // the benefit of a larger tree is unclear. - // JITDUMP("Linking GT_RET_EXPR [%06u] for VOID return to NOP\n", - // compiler->dspTreeID(inlineInfo->retExpr)); - // inlineInfo->retExpr->gtSubstExpr = compiler->gtNewNothingNode(); - //} - //else - //{ - // // We do not produce GT_RET_EXPRs for CTOR calls, so there is nothing to patch. - //} - } - //------------------------------------------------------------------------ // Devirtualize origCall using the given inline candidate // @@ -986,6 +871,13 @@ class IndirectCallTransformer // If the devirtualizer was unable to transform the call to invoke the unboxed entry, the inline info // we set up may be invalid. We won't be able to inline anyways. So demote the call as an inline candidate. // + GenTree* node = call; + if (doesReturnValue) + { + assert(stmt->GetRootNode()->OperIs(GT_STORE_LCL_VAR)); + node = compiler->gtNewTempStore(stmt->GetRootNode()->AsLclVarCommon()->GetLclNum(), node); + } + CORINFO_METHOD_HANDLE unboxedMethodHnd = inlineInfo->guardedMethodUnboxedEntryHandle; if ((unboxedMethodHnd != nullptr) && (methodHnd != unboxedMethodHnd)) { @@ -996,30 +888,26 @@ class IndirectCallTransformer call->gtFlags &= ~GTF_CALL_INLINE_CANDIDATE; call->ClearInlineInfo(); - - if (returnTemp != BAD_VAR_NUM) - { - GenTree* const store = compiler->gtNewTempStore(returnTemp, call); - compiler->fgNewStmtAtEnd(block, store); - } - else - { - compiler->fgNewStmtAtEnd(block, call, stmt->GetDebugInfo()); - } + compiler->fgNewStmtAtEnd(block, node, stmt->GetDebugInfo()); } else { // Add the call. // - compiler->fgNewStmtAtEnd(block, call, stmt->GetDebugInfo()); + compiler->fgNewStmtAtEnd(block, node, stmt->GetDebugInfo()); // Re-establish this call as an inline candidate. // inlineInfo->clsHandle = compiler->info.compCompHnd->getMethodClass(methodHnd); inlineInfo->exactContextHandle = context; - inlineInfo->preexistingSpillTemp = returnTemp; + inlineInfo->preexistingSpillTemp = doesReturnValue ? stmt->GetRootNode()->AsLclVarCommon()->GetLclNum() : BAD_VAR_NUM; call->SetSingleInlineCandidateInfo(inlineInfo); } + + if (doesReturnValue) + { + compiler->lvaGetDesc(stmt->GetRootNode()->AsLclVarCommon())->lvSingleDef = false; + } } //------------------------------------------------------------------------ @@ -1143,9 +1031,9 @@ class IndirectCallTransformer JITDUMP("Residual call [%06u] moved to block " FMT_BB "\n", compiler->dspTreeID(call), elseBlock->bbNum); - if (returnTemp != BAD_VAR_NUM) + if (doesReturnValue) { - GenTree* store = compiler->gtNewTempStore(returnTemp, call); + GenTree* store = compiler->gtNewTempStore(stmt->GetRootNode()->AsLclVarCommon()->GetLclNum(), call); newStmt->SetRootNode(store); } @@ -1402,7 +1290,6 @@ class IndirectCallTransformer } private: - unsigned returnTemp; Statement* lastStmt; bool checkFallsThrough; From 8e7d0961ccca92cf31dede2f42a6ec5298942fce Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Tue, 28 Jan 2025 19:39:29 +0100 Subject: [PATCH 03/23] Remove --- src/coreclr/jit/compiler.h | 2 -- src/coreclr/jit/gentree.cpp | 48 ------------------------------------- 2 files changed, 50 deletions(-) diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index 54e8413200c37b..9a3e288995ed75 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -3740,8 +3740,6 @@ class Compiler bool gtHasLocalsWithAddrOp(GenTree* tree); bool gtHasAddressExposedLocals(GenTree* tree); - bool gtSubTreeAndChildrenAreFirstExecutedSideEffects(GenTree* tree, GenTree* subTree); - unsigned gtSetCallArgsOrder(CallArgs* args, bool lateArgs, int* callCostEx, int* callCostSz); unsigned gtSetMultiOpOrder(GenTreeMultiOp* multiOp); diff --git a/src/coreclr/jit/gentree.cpp b/src/coreclr/jit/gentree.cpp index 2d90611326367b..aac21073c7511b 100644 --- a/src/coreclr/jit/gentree.cpp +++ b/src/coreclr/jit/gentree.cpp @@ -3178,54 +3178,6 @@ bool Compiler::gtHasAddressExposedLocals(GenTree* tree) return visitor.WalkTree(&tree, nullptr) == WALK_ABORT; } -bool Compiler::gtSubTreeAndChildrenAreFirstExecutedSideEffects(GenTree* tree, GenTree* subTree) -{ - struct Visitor : GenTreeVisitor - { - enum - { - DoPreOrder = true, - DoPostOrder = true, - UseExecutionOrder = true, - }; - - Visitor(Compiler* comp, GenTree* subTree) - : GenTreeVisitor(comp) - { - } - - fgWalkResult PreOrderVisit(GenTree** use, GenTree* user) - { - if (*use == m_subTree) - { - Result = true; - return WALK_ABORT; - } - - return WALK_CONTINUE; - } - - fgWalkResult PostOrderVisit(GenTree** use, GenTree* user) - { - if (((*use)->gtFlags & GTF_ALL_EFFECT) != GTF_EMPTY) - { - Result = false; - return WALK_ABORT; - } - - return WALK_CONTINUE; - } - - bool Result = false; - private: - GenTree* m_subTree; - }; - - Visitor visitor(this, subTree); - visitor.WalkTree(&tree, nullptr); - return visitor.Result; -} - #ifdef DEBUG /***************************************************************************** From 2a03c0d0cac89b4e4a07aad98dcee0a5cbcc68d9 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Tue, 28 Jan 2025 19:42:37 +0100 Subject: [PATCH 04/23] Clean up --- src/coreclr/jit/compiler.h | 1 - src/coreclr/jit/fginline.cpp | 6 ++---- 2 files changed, 2 insertions(+), 5 deletions(-) diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index 9a3e288995ed75..e34b0b89c5aecf 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -6987,7 +6987,6 @@ class Compiler bool TypeInstantiationComplexityExceeds(CORINFO_CLASS_HANDLE handle, int& cur, int max); void fgInvokeInlineeCompiler(InlineInfo& inlineInfo, GenTreeCall* call, InlineResult* result, InlineContext** createdContext); - void fgInsertInlineeBlocks(InlineInfo* pInlineInfo); void fgFinalizeInlineeStatements(InlineInfo* pInlineInfo); void fgInsertInlineeArgument(class StatementListBuilder& statements, const InlArgInfo& argInfo, const DebugInfo& callDI); void fgInlinePrependStatements(InlineInfo* inlineInfo); diff --git a/src/coreclr/jit/fginline.cpp b/src/coreclr/jit/fginline.cpp index 098a0d36d6a2e1..9e4ff7272faacb 100644 --- a/src/coreclr/jit/fginline.cpp +++ b/src/coreclr/jit/fginline.cpp @@ -1444,8 +1444,6 @@ void Compiler::fgFinalizeInlineeStatements(InlineInfo* pInlineInfo) { #ifdef DEBUG - Statement* currentDumpStmt = nullptr; - if (verbose) { printf("\n\n----------- Statements (and blocks) added due to the inlining of call "); @@ -1464,7 +1462,6 @@ void Compiler::fgFinalizeInlineeStatements(InlineInfo* pInlineInfo) #ifdef DEBUG if (verbose) { - //currentDumpStmt = stmtAfter; printf("\nInlinee method body:"); } #endif // DEBUG @@ -1472,7 +1469,8 @@ void Compiler::fgFinalizeInlineeStatements(InlineInfo* pInlineInfo) fgInlineAppendStatements(pInlineInfo->teardownStatements, pInlineInfo); // - // At this point, we have successfully inserted inlinee's code. + // At this point we have the inlinee's code in in inlinee compiler and setup/teardown statements + // in the InlineInfo instance. // // From b564e793f32412170e8dc55f746baec07c472545 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Tue, 28 Jan 2025 23:57:36 +0100 Subject: [PATCH 05/23] Handle ret buffer correctly, fix other bugs --- src/coreclr/jit/compiler.h | 2 +- src/coreclr/jit/fginline.cpp | 57 +++++-- src/coreclr/jit/gentree.cpp | 48 +++--- src/coreclr/jit/importer.cpp | 19 ++- src/coreclr/jit/importercalls.cpp | 74 -------- src/coreclr/jit/indirectcalltransformer.cpp | 178 ++++++++++++++------ src/coreclr/jit/inline.h | 1 + 7 files changed, 215 insertions(+), 164 deletions(-) diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index e34b0b89c5aecf..a4854537c366ce 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -3790,7 +3790,7 @@ class Compiler bool ignoreRoot = false); bool gtSplitTree( - BasicBlock* block, Statement* stmt, GenTree* splitPoint, Statement** firstNewStmt, GenTree*** splitPointUse, bool includeOperands = true); + BasicBlock* block, Statement* stmt, GenTree* splitPoint, Statement** firstNewStmt, GenTree*** splitPointUse, bool includeOperands = true, bool early = false); bool gtStoreDefinesField( LclVarDsc* fieldVarDsc, ssize_t offset, unsigned size, ssize_t* pFieldStoreOffset, unsigned* pFieldStoreSize); diff --git a/src/coreclr/jit/fginline.cpp b/src/coreclr/jit/fginline.cpp index 9e4ff7272faacb..33bf5b1ffc9dd9 100644 --- a/src/coreclr/jit/fginline.cpp +++ b/src/coreclr/jit/fginline.cpp @@ -324,7 +324,7 @@ class InlineAndDevirtualizeWalker : public GenTreeVisitorfgMorphStmt; Statement* newStmt = nullptr; GenTree** use2 = nullptr; - m_compiler->gtSplitTree(callBlock, callStmt, call, &newStmt, &use2, /* includeOperands */ false); + m_compiler->gtSplitTree(callBlock, callStmt, call, &newStmt, &use2, /* includeOperands */ false, /* early */ true); assert(use2 == use); JITDUMP("After split:\n"); @@ -338,13 +338,34 @@ class InlineAndDevirtualizeWalker : public GenTreeVisitorfirstStmt() ? nullptr : callStmt->GetPrevStmt(); - inlineInfo.setupStatements.InsertIntoBlockBefore(m_compiler->compCurBB, m_compiler->fgMorphStmt); - *use = call->gtInlineCandidateInfo->result.substExpr; + inlineInfo.setupStatements.InsertIntoBlockBefore(callBlock, callStmt); + + GenTree* substExpr = call->gtInlineCandidateInfo->result.substExpr; + if ((substExpr != nullptr) && substExpr->IsValue() && m_compiler->gtComplexityExceeds(substExpr, 16)) + { + JITDUMP("Substitution expression is complex so spilling it to its own statement\n"); + unsigned lclNum = m_compiler->lvaGrabTemp(false DEBUGARG("Complex inlinee substitution expression")); + Statement* storeTemp = m_compiler->gtNewStmt(m_compiler->gtNewTempStore(lclNum, substExpr)); + DISPSTMT(storeTemp); + inlineInfo.teardownStatements.Append(storeTemp); + *use = m_compiler->gtNewLclvNode(lclNum, genActualType(substExpr)); + } + else + { + *use = substExpr; + } if (*use == nullptr) { *use = m_compiler->gtNewNothingNode(); } + else + { + if ((*use)->IsValue() && !(*use)->IsCall() && (use == callStmt->GetRootNodePointer())) + { + *use = m_compiler->gtUnusedValNode(*use); + } + } if (call->gtInlineCandidateInfo->result.substBB != nullptr) { @@ -459,8 +480,21 @@ class InlineAndDevirtualizeWalker : public GenTreeVisitorGetRootNodePointer()) + { + // Leave this case up to the general handling. + return false; + } + + GenTree* call = *use; *use = inlineInfo.inlineCandidateInfo->result.substExpr; + if (m_compiler->gtComplexityExceeds(m_statement->GetRootNode(), 16)) + { + *use = call; + return false; + } + if (*use == nullptr) { *use = m_compiler->gtNewNothingNode(); @@ -820,18 +854,15 @@ PhaseStatus Compiler::fgInline() continue; } - GenTree* expr = currentStmt->GetRootNode(); + Statement* nextStmt = currentStmt->GetNextStmt(); - // See if stmt is of the form GT_COMMA(call, nop) - // If yes, we can get rid of GT_COMMA. - if (expr->OperGet() == GT_COMMA && expr->AsOp()->gtOp1->OperGet() == GT_CALL && - expr->AsOp()->gtOp2->OperGet() == GT_NOP) + if (currentStmt->GetRootNode()->IsNothingNode()) { + fgRemoveStmt(currentBlock, currentStmt); madeChanges = true; - currentStmt->SetRootNode(expr->AsOp()->gtOp1); } - currentStmt = currentStmt->GetNextStmt(); + currentStmt = nextStmt; } currentBlock = currentBlock->Next(); @@ -1184,6 +1215,7 @@ void Compiler::fgInvokeInlineeCompiler(InlineInfo& inlineInfo, GenTreeCall* call inlineInfo.retExprClassHndIsExact = false; inlineInfo.inlineResult = inlineResult; inlineInfo.inlInstParamArgInfo = nullptr; + inlineInfo.inlRetBufferArgInfo = nullptr; #ifdef FEATURE_SIMD inlineInfo.hasSIMDTypeArgLocalOrReturn = false; #endif // FEATURE_SIMD @@ -1867,6 +1899,11 @@ void Compiler::fgInlinePrependStatements(InlineInfo* inlineInfo) fgInsertInlineeArgument(inlineInfo->setupStatements, *inlineInfo->inlInstParamArgInfo, callDI); } + if (inlineInfo->inlRetBufferArgInfo != nullptr) + { + fgInsertInlineeArgument(inlineInfo->setupStatements, *inlineInfo->inlRetBufferArgInfo, callDI); + } + // Treat arguments that had to be assigned to temps if (inlineInfo->argCnt) { diff --git a/src/coreclr/jit/gentree.cpp b/src/coreclr/jit/gentree.cpp index aac21073c7511b..b06bae93cc2525 100644 --- a/src/coreclr/jit/gentree.cpp +++ b/src/coreclr/jit/gentree.cpp @@ -16946,7 +16946,7 @@ bool Compiler::gtTreeHasSideEffects(GenTree* tree, GenTreeFlags flags /* = GTF_S // True if any changes were made; false if nothing needed to be done to split the tree. // bool Compiler::gtSplitTree( - BasicBlock* block, Statement* stmt, GenTree* splitPoint, Statement** firstNewStmt, GenTree*** splitNodeUse, bool includeOperands) + BasicBlock* block, Statement* stmt, GenTree* splitPoint, Statement** firstNewStmt, GenTree*** splitNodeUse, bool includeOperands, bool early) { class Splitter final : public GenTreeVisitor { @@ -16954,6 +16954,7 @@ bool Compiler::gtSplitTree( Statement* m_splitStmt; GenTree* m_splitNode; bool m_includeOperands; + bool m_early; struct UseInfo { @@ -16970,12 +16971,13 @@ bool Compiler::gtSplitTree( UseExecutionOrder = true }; - Splitter(Compiler* compiler, BasicBlock* bb, Statement* stmt, GenTree* splitNode, bool includeOperands) + Splitter(Compiler* compiler, BasicBlock* bb, Statement* stmt, GenTree* splitNode, bool includeOperands, bool early) : GenTreeVisitor(compiler) , m_bb(bb) , m_splitStmt(stmt) , m_splitNode(splitNode) , m_includeOperands(includeOperands) + , m_early(early) , m_useStack(compiler->getAllocator(CMK_ArrayStack)) { } @@ -17128,25 +17130,29 @@ bool Compiler::gtSplitTree( return; } - if ((*use)->OperIs(GT_LCL_VAR) && !m_compiler->lvaGetDesc((*use)->AsLclVarCommon())->IsAddressExposed()) + if ((*use)->OperIs(GT_LCL_VAR)) { - // The splitting we do here should always guarantee that we - // only introduce locals for the tree edges that overlap the - // split point, so it should be ok to avoid creating statements - // for locals that aren't address exposed. Note that this - // relies on it being illegal IR to have a tree edge for a - // register candidate that overlaps with an interfering node. - // - // For example, this optimization would be problematic if IR - // like the following could occur: - // - // CALL - // LCL_VAR V00 - // CALL - // STORE_LCL_VAR(...) (setup) - // LCL_VAR V00 - // - return; + LclVarDsc* dsc = m_compiler->lvaGetDesc((*use)->AsLclVarCommon()); + if (!dsc->IsAddressExposed() && (!m_early || !dsc->lvHasLdAddrOp)) + { + // The splitting we do here should always guarantee that we + // only introduce locals for the tree edges that overlap the + // split point, so it should be ok to avoid creating statements + // for locals that aren't address exposed. Note that this + // relies on it being illegal IR to have a tree edge for a + // register candidate that overlaps with an interfering node. + // + // For example, this optimization would be problematic if IR + // like the following could occur: + // + // CALL + // LCL_VAR V00 + // CALL + // STORE_LCL_VAR(...) (setup) + // LCL_VAR V00 + // + return; + } } #ifndef TARGET_64BIT @@ -17211,7 +17217,7 @@ bool Compiler::gtSplitTree( } }; - Splitter splitter(this, block, stmt, splitPoint, includeOperands); + Splitter splitter(this, block, stmt, splitPoint, includeOperands, early); splitter.WalkTree(stmt->GetRootNodePointer(), nullptr); *firstNewStmt = splitter.FirstStatement; *splitNodeUse = splitter.SplitNodeUse; diff --git a/src/coreclr/jit/importer.cpp b/src/coreclr/jit/importer.cpp index 3926e08c294aff..32f8b25223164d 100644 --- a/src/coreclr/jit/importer.cpp +++ b/src/coreclr/jit/importer.cpp @@ -811,7 +811,7 @@ GenTree* Compiler::impStoreStruct(GenTree* store, GenTree* destAddr = impGetNodeAddr(store, CHECK_SPILL_ALL, &indirFlags); NewCallArg newArg = NewCallArg::Primitive(destAddr).WellKnown(wellKnownArgType); - if (destAddr->OperIs(GT_LCL_ADDR)) + if (destAddr->OperIs(GT_LCL_ADDR) && !srcCall->IsInlineCandidate()) { lvaSetVarDoNotEnregister(destAddr->AsLclVarCommon()->GetLclNum() DEBUGARG(DoNotEnregisterReason::HiddenBufferStructArg)); @@ -11002,8 +11002,10 @@ bool Compiler::impReturnInstruction(int prefixFlags, OPCODE& opcode) } else // The struct was to be returned via a return buffer. { - assert(iciCall->gtArgs.HasRetBuffer()); - GenTree* dest = gtCloneExpr(iciCall->gtArgs.GetRetBufferArg()->GetEarlyNode()); + assert(iciCall->gtArgs.HasRetBuffer() && (impInlineInfo->inlRetBufferArgInfo != nullptr)); + InlLclVarInfo lclInfo = {}; + lclInfo.lclTypeInfo = TYP_BYREF; + GenTree* dest = impInlineFetchArg(*impInlineInfo->inlRetBufferArgInfo, lclInfo); if (fgNeedReturnSpillTemp()) { @@ -12811,8 +12813,9 @@ void Compiler::impInlineRecordArgInfo(InlineInfo* pInlineInfo, #endif // FEATURE_SIMD } - // Spilling code relies on correct aliasability annotations. - assert(varDsc->lvHasLdAddrOp || varDsc->IsAddressExposed()); + // Spilling code relies on correct aliasability annotations, except for + // retbufs that are not actually exposed in IL. + assert(varDsc->lvHasLdAddrOp || varDsc->IsAddressExposed() || (arg->GetWellKnownArg() == WellKnownArg::RetBuffer)); } if (curArgVal->gtFlags & GTF_ALL_EFFECT) @@ -12961,8 +12964,12 @@ void Compiler::impInlineInitVars(InlineInfo* pInlineInfo) inlArgInfo[ilArgCnt].argIsThis = true; break; case WellKnownArg::RetBuffer: - // This does not appear in the table of inline arg info; do not include them + { + InlArgInfo* retBufferInfo = new (this, CMK_Inlining) InlArgInfo{}; + impInlineRecordArgInfo(pInlineInfo, retBufferInfo, &arg, inlineResult); + pInlineInfo->inlRetBufferArgInfo = retBufferInfo; continue; + } case WellKnownArg::InstParam: { InlArgInfo* ctxInfo = new (this, CMK_Inlining) InlArgInfo{}; diff --git a/src/coreclr/jit/importercalls.cpp b/src/coreclr/jit/importercalls.cpp index bd4359848d085e..4c8eb953eec5a6 100644 --- a/src/coreclr/jit/importercalls.cpp +++ b/src/coreclr/jit/importercalls.cpp @@ -6570,72 +6570,6 @@ void Compiler::impCheckForPInvokeCall( } } -//------------------------------------------------------------------------ -// SpillInlineCandidatesHelper: iterate through arguments tree and spill inline candidates to local variables. -// -class SpillInlineCandidatesHelper -{ -public: - SpillInlineCandidatesHelper(Compiler* comp) - : comp(comp) - { - } - - void StoreInlineCandidateResultsInArgs(GenTreeCall* call) - { - for (CallArg& arg : call->gtArgs.Args()) - { - comp->fgWalkTreePre(&arg.EarlyNodeRef(), SpillInlineCandidateVisitor, this); - } - } - -private: - static Compiler::fgWalkResult SpillInlineCandidateVisitor(GenTree** pTree, Compiler::fgWalkData* fgWalkPre) - { - assert((pTree != nullptr) && (*pTree != nullptr)); - GenTree* tree = *pTree; - if ((tree->gtFlags & GTF_CALL) == 0) - { - // Trees with ret_expr are marked as GTF_CALL. - return Compiler::WALK_SKIP_SUBTREES; - } - if (tree->IsCall() && tree->AsCall()->IsInlineCandidate()) - { - SpillInlineCandidatesHelper* walker = static_cast(fgWalkPre->pCallbackData); - walker->StoreInlineCandidateAsLocalVar(pTree); - } - return Compiler::WALK_CONTINUE; - } - - void StoreInlineCandidateAsLocalVar(GenTree** pCall) - { - GenTreeCall* call = (*pCall)->AsCall(); - assert(call->IsCall()); - const unsigned tmp = comp->lvaGrabTemp(true DEBUGARG("spilling inline candidate")); - JITDUMP("Storing inline candidate [%06u] to a local var V%02u.\n", comp->dspTreeID(call), tmp); - comp->impStoreToTemp(tmp, call, Compiler::CHECK_SPILL_NONE); - *pCall = comp->gtNewLclvNode(tmp, call->TypeGet()); - - assert(comp->lvaTable[tmp].lvSingleDef == 0); - comp->lvaTable[tmp].lvSingleDef = 1; - JITDUMP("Marked V%02u as a single def temp\n", tmp); - if (call->TypeIs(TYP_REF)) - { - bool isExact = false; - bool isNonNull = false; - CORINFO_CLASS_HANDLE retClsHnd = comp->gtGetClassHandle(call, &isExact, &isNonNull); - if (retClsHnd != nullptr) - { - comp->lvaSetClass(tmp, retClsHnd, isExact); - } - } - } - -private: - Compiler* comp; -}; - - //------------------------------------------------------------------------ // addFatPointerCandidate: mark the call and the method, that they have a fat pointer candidate. // Spill inline candidates in the call node, because they shouldn't be cloned. @@ -6648,8 +6582,6 @@ void Compiler::addFatPointerCandidate(GenTreeCall* call) JITDUMP("Marking call [%06u] as fat pointer candidate\n", dspTreeID(call)); setMethodHasFatPointer(); call->SetFatPointerCandidate(); - SpillInlineCandidatesHelper helper(this); - helper.StoreInlineCandidateResultsInArgs(call); } //------------------------------------------------------------------------ @@ -7372,12 +7304,6 @@ void Compiler::addGuardedDevirtualizationCandidate(GenTreeCall* call, classHandle != NO_CLASS_HANDLE ? eeGetClassName(classHandle) : eeGetMethodFullName(methodHandle)); setMethodHasGuardedDevirtualization(); - // Spill off any inline candidate subtrees so we can clone the call. - // - SpillInlineCandidatesHelper helper(this); - helper.StoreInlineCandidateResultsInArgs(call); - - // Gather some information for later. Note we actually allocate InlineCandidateInfo // here, as the devirtualized half of this call will likely become an inline candidate. // diff --git a/src/coreclr/jit/indirectcalltransformer.cpp b/src/coreclr/jit/indirectcalltransformer.cpp index 5047b0e07f886d..917b99fb89e90b 100644 --- a/src/coreclr/jit/indirectcalltransformer.cpp +++ b/src/coreclr/jit/indirectcalltransformer.cpp @@ -243,6 +243,35 @@ class IndirectCallTransformer return block; } + //------------------------------------------------------------------------ + // SpillOperandToEndOfCheckBlock: spill an edge to a temp at the end of the "check" block. + // + // Parameters + // use - The use edge + // + void SpillOperandToEndOfCheckBlock(GenTree** use) + { + unsigned tmpNum = compiler->lvaGrabTemp(true DEBUGARG("operand temp")); + GenTree* const argNode = *use; + GenTree* store = compiler->gtNewTempStore(tmpNum, argNode); + + if (argNode->TypeIs(TYP_REF)) + { + bool isExact = false; + bool isNonNull = false; + CORINFO_CLASS_HANDLE cls = compiler->gtGetClassHandle(argNode, &isExact, &isNonNull); + if (cls != NO_CLASS_HANDLE) + { + compiler->lvaSetClass(tmpNum, cls, isExact); + } + } + + Statement* storeStmt = compiler->fgNewStmtFromTree(store, stmt->GetDebugInfo()); + compiler->fgInsertStmtAtEnd(checkBlock, storeStmt); + + *use = compiler->gtNewLclVarNode(tmpNum); + } + virtual void CreateThen(uint8_t checkIdx) = 0; virtual void CreateElse() = 0; @@ -383,6 +412,33 @@ class IndirectCallTransformer assert(checkIdx == 0); checkBlock = CreateAndInsertBasicBlock(BBJ_ALWAYS, currBlock, currBlock); + + GenTree** lastSpilledArg = nullptr; + for (GenTree** arg : origCall->UseEdges()) + { + if (ContainsInlineCandidate(*arg)) + { + lastSpilledArg = arg; + } + } + + if (lastSpilledArg != nullptr) + { + for (GenTree** arg : origCall->UseEdges()) + { + GenTree* argNode = *arg; + if (((argNode->gtFlags & GTF_ALL_EFFECT) != 0) || compiler->gtHasLocalsWithAddrOp(argNode)) + { + SpillOperandToEndOfCheckBlock(arg); + } + + if (arg == lastSpilledArg) + { + break; + } + } + } + GenTree* fatPointerMask = new (compiler, GT_CNS_INT) GenTreeIntCon(TYP_I_IMPL, FAT_POINTER_MASK); GenTree* fptrAddressCopy = compiler->gtCloneExpr(fptrAddress); GenTree* fatPointerAnd = compiler->gtNewOperNode(GT_AND, TYP_I_IMPL, fptrAddressCopy, fatPointerMask); @@ -393,6 +449,45 @@ class IndirectCallTransformer compiler->fgInsertStmtAtEnd(checkBlock, jmpStmt); } + bool ContainsInlineCandidate(GenTree* tree) + { + struct Visitor : GenTreeVisitor + { + enum + { + DoPreOrder = true, + }; + + Visitor(Compiler* comp) : GenTreeVisitor(comp) + { + } + + fgWalkResult PreOrderVisit(GenTree** use, GenTree* user) + { + GenTree* tree = *use; + if ((tree->gtFlags & GTF_CALL) == 0) + { + return fgWalkResult::WALK_SKIP_SUBTREES; + } + + if (tree->IsCall() && tree->AsCall()->IsInlineCandidate()) + { + return fgWalkResult::WALK_ABORT; + } + + return fgWalkResult::WALK_CONTINUE; + } + }; + + if ((tree->gtFlags & GTF_CALL) == 0) + { + return false; + } + + Visitor visitor(compiler); + return visitor.WalkTree(&tree, nullptr) == Compiler::WALK_ABORT; + } + //------------------------------------------------------------------------ // CreateThen: create then block, that is executed if the check succeeds. // This simply executes the original call. @@ -621,7 +716,7 @@ class IndirectCallTransformer GenTree* argNode = arg.GetNode(); if (((argNode->gtFlags & GTF_ALL_EFFECT) != 0) || compiler->gtHasLocalsWithAddrOp(argNode)) { - SpillArgToTempBeforeGuard(&arg); + SpillOperandToEndOfCheckBlock(&arg.NodeRef()); } if (&arg == lastSideEffArg) @@ -636,7 +731,7 @@ class IndirectCallTransformer // is going to be used multiple times due to the guard. if (!thisArg->GetNode()->IsLocal()) { - SpillArgToTempBeforeGuard(thisArg); + SpillOperandToEndOfCheckBlock(&thisArg->NodeRef()); } GenTree* thisTree = compiler->gtCloneExpr(thisArg->GetNode()); @@ -716,35 +811,6 @@ class IndirectCallTransformer compiler->fgInsertStmtAtEnd(checkBlock, jmpStmt); } - //------------------------------------------------------------------------ - // SpillArgToTempBeforeGuard: spill an argument into a temp in the guard/check block. - // - // Parameters - // arg - The arg to create a temp and local store for. - // - void SpillArgToTempBeforeGuard(CallArg* arg) - { - unsigned tmpNum = compiler->lvaGrabTemp(true DEBUGARG("guarded devirt arg temp")); - GenTree* const argNode = arg->GetNode(); - GenTree* store = compiler->gtNewTempStore(tmpNum, argNode); - - if (argNode->TypeIs(TYP_REF)) - { - bool isExact = false; - bool isNonNull = false; - CORINFO_CLASS_HANDLE cls = compiler->gtGetClassHandle(argNode, &isExact, &isNonNull); - if (cls != NO_CLASS_HANDLE) - { - compiler->lvaSetClass(tmpNum, cls, isExact); - } - } - - Statement* storeStmt = compiler->fgNewStmtFromTree(store, stmt->GetDebugInfo()); - compiler->fgInsertStmtAtEnd(checkBlock, storeStmt); - - arg->SetEarlyNode(compiler->gtNewLclVarNode(tmpNum)); - } - //------------------------------------------------------------------------ // Devirtualize origCall using the given inline candidate // @@ -1240,27 +1306,6 @@ class IndirectCallTransformer { JITDUMP(" Scouting " FMT_STMT "\n", nextStmt->GetID()); - // See if this is a guarded devirt candidate. - // These will be top-level trees. - // - GenTree* const root = nextStmt->GetRootNode(); - - if (root->IsCall()) - { - GenTreeCall* const call = root->AsCall(); - - if (call->IsGuardedDevirtualizationCandidate() && - (call->GetGDVCandidateInfo(0)->likelihood >= gdvChainLikelihood)) - { - JITDUMP("GDV call at [%06u] has likelihood %u >= %u; chaining (%u stmts, %u nodes to dup).\n", - compiler->dspTreeID(call), call->GetGDVCandidateInfo(0)->likelihood, gdvChainLikelihood, - chainStatementDup, chainNodeDup); - - call->gtCallMoreFlags |= GTF_CALL_M_GUARDED_DEVIRT_CHAIN; - break; - } - } - // Stop searching if we've accumulated too much dup cost. // Consider: use node count instead. // @@ -1282,6 +1327,35 @@ class IndirectCallTransformer break; } + // See if this is a guarded devirt candidate. + // These will be top-level trees. + // + GenTree* const root = nextStmt->GetRootNode(); + + GenTreeCall* call = nullptr; + if (root->IsCall()) + { + call = root->AsCall(); + } + else if (root->OperIs(GT_STORE_LCL_VAR) && root->AsLclVarCommon()->Data()->IsCall()) + { + call = root->AsLclVarCommon()->Data()->AsCall(); + } + + if (call != nullptr) + { + if (call->IsGuardedDevirtualizationCandidate() && + (call->GetGDVCandidateInfo(0)->likelihood >= gdvChainLikelihood)) + { + JITDUMP("GDV call at [%06u] has likelihood %u >= %u; chaining (%u stmts, %u nodes to dup).\n", + compiler->dspTreeID(call), call->GetGDVCandidateInfo(0)->likelihood, gdvChainLikelihood, + chainStatementDup, chainNodeDup); + + call->gtCallMoreFlags |= GTF_CALL_M_GUARDED_DEVIRT_CHAIN; + break; + } + } + // Looks like we can clone this, so keep scouting. // chainStatementDup++; diff --git a/src/coreclr/jit/inline.h b/src/coreclr/jit/inline.h index 5e38d64b108214..2f4dff2a1f10da 100644 --- a/src/coreclr/jit/inline.h +++ b/src/coreclr/jit/inline.h @@ -718,6 +718,7 @@ struct InlineInfo unsigned argCnt; InlArgInfo inlArgInfo[MAX_INL_ARGS + 1]; InlArgInfo* inlInstParamArgInfo; + InlArgInfo* inlRetBufferArgInfo; int lclTmpNum[MAX_INL_LCLS]; // map local# -> temp# (-1 if unused) InlLclVarInfo lclVarInfo[MAX_INL_LCLS + MAX_INL_ARGS + 1]; // type information from local sig From 26e9434f98180337d80c9b300ed8a905ffb4ede3 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Thu, 6 Feb 2025 19:30:10 +0100 Subject: [PATCH 06/23] JITDUMP more stuff --- src/coreclr/jit/fginline.cpp | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/src/coreclr/jit/fginline.cpp b/src/coreclr/jit/fginline.cpp index 33bf5b1ffc9dd9..02e97468dce17b 100644 --- a/src/coreclr/jit/fginline.cpp +++ b/src/coreclr/jit/fginline.cpp @@ -495,6 +495,8 @@ class InlineAndDevirtualizeWalker : public GenTreeVisitorgtNewNothingNode(); @@ -523,6 +525,15 @@ class InlineAndDevirtualizeWalker : public GenTreeVisitorfgFirstBB->bbStmtList; stmt != nullptr; stmt = stmt->GetNextStmt()) + { + DISPSTMT(stmt); + } +#endif + m_compiler->fgInsertStmtListBefore(block, stmt, inlineeComp->fgFirstBB->bbStmtList); const BasicBlockFlags inlineeBlockFlags = inlineeComp->fgFirstBB->GetFlagsRaw(); @@ -583,8 +594,6 @@ class InlineAndDevirtualizeWalker : public GenTreeVisitorOperIs(GT_RET_EXPR)); - unsigned lclNum = m_compiler->lvaGrabTemp(false DEBUGARG("RetBuf for struct inline return candidates.")); LclVarDsc* varDsc = m_compiler->lvaGetDesc(lclNum); m_compiler->lvaSetStruct(lclNum, retClsHnd, false); From 26344729996c1f81f24ca4dcc17fb330b226534d Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Fri, 10 Oct 2025 13:15:40 +0200 Subject: [PATCH 07/23] Fixes --- src/coreclr/jit/async.cpp | 3 --- src/coreclr/jit/compiler.h | 6 +++--- src/coreclr/jit/fginline.cpp | 23 +++++++++++++---------- src/coreclr/jit/gtlist.h | 1 + 4 files changed, 17 insertions(+), 16 deletions(-) diff --git a/src/coreclr/jit/async.cpp b/src/coreclr/jit/async.cpp index ec92f9b23ca6a7..c0186adc693b3c 100644 --- a/src/coreclr/jit/async.cpp +++ b/src/coreclr/jit/async.cpp @@ -156,9 +156,6 @@ PhaseStatus Compiler::SaveAsyncContexts() if (call->IsInlineCandidate() && (call->gtReturnType != TYP_VOID)) { restoreAfterStmt = stmt->GetNextStmt(); - assert(restoreAfterStmt->GetRootNode()->OperIs(GT_RET_EXPR) || - (restoreAfterStmt->GetRootNode()->OperIs(GT_STORE_LCL_VAR) && - restoreAfterStmt->GetRootNode()->AsLclVarCommon()->Data()->OperIs(GT_RET_EXPR))); } if (curBB->hasTryIndex()) diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index 5c4747fa6293f6..fb232fc8cb7696 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -6495,6 +6495,9 @@ class Compiler #endif public: + // Create a new temporary variable to hold the result of *ppTree, + // and transform the graph accordingly. + GenTree* fgInsertCommaFormTemp(GenTree** ppTree); Statement* fgNewStmtAtBeg(BasicBlock* block, GenTree* tree, const DebugInfo& di = DebugInfo()); void fgInsertStmtAtEnd(BasicBlock* block, Statement* stmt); Statement* fgNewStmtAtEnd(BasicBlock* block, GenTree* tree, const DebugInfo& di = DebugInfo()); @@ -6509,9 +6512,6 @@ class Compiler private: void fgInsertStmtListAtEnd(BasicBlock* block, Statement* stmtList); void fgInsertStmtListBefore(BasicBlock* block, Statement* stmtBefore, Statement* stmtList); - // Create a new temporary variable to hold the result of *ppTree, - // and transform the graph accordingly. - GenTree* fgInsertCommaFormTemp(GenTree** ppTree); private: Statement* fgInsertStmtListAfter(BasicBlock* block, Statement* stmtAfter, Statement* stmtList); diff --git a/src/coreclr/jit/fginline.cpp b/src/coreclr/jit/fginline.cpp index ccf9434e5461cb..c3e5da03cd1ca0 100644 --- a/src/coreclr/jit/fginline.cpp +++ b/src/coreclr/jit/fginline.cpp @@ -342,7 +342,10 @@ class InlineAndDevirtualizeWalker : public GenTreeVisitorgtInlineCandidateInfo->result.substExpr; - if ((substExpr != nullptr) && substExpr->IsValue() && m_compiler->gtComplexityExceeds(substExpr, 16)) + auto getComplexity = [](GenTree* tree) { + return 1; + }; + if ((substExpr != nullptr) && substExpr->IsValue() && m_compiler->gtComplexityExceeds(substExpr, 16, getComplexity)) { JITDUMP("Substitution expression is complex so spilling it to its own statement\n"); unsigned lclNum = m_compiler->lvaGrabTemp(false DEBUGARG("Complex inlinee substitution expression")); @@ -439,7 +442,7 @@ class InlineAndDevirtualizeWalker : public GenTreeVisitorKindIs(BBJ_ALWAYS)); assert(callBlock->TargetIs(continueBlock)); - m_compiler->fgRedirectTargetEdge(callBlock, m_compiler->InlineeCompiler->fgFirstBB); + m_compiler->fgRedirectEdge(callBlock->TargetEdgeRef(), m_compiler->InlineeCompiler->fgFirstBB); callBlock->SetNext(m_compiler->InlineeCompiler->fgFirstBB); m_compiler->InlineeCompiler->fgFirstBB->SetPrev(callBlock); @@ -490,7 +493,8 @@ class InlineAndDevirtualizeWalker : public GenTreeVisitorresult.substExpr; - if (m_compiler->gtComplexityExceeds(m_statement->GetRootNode(), 16)) + auto getComplexity = [](GenTree* tree) { return 1; }; + if (m_compiler->gtComplexityExceeds(m_statement->GetRootNode(), 16, getComplexity)) { *use = call; return false; @@ -541,8 +545,7 @@ class InlineAndDevirtualizeWalker : public GenTreeVisitorSetFlags(inlineeBlockFlags & ~BBF_RUN_RARELY); + block->SetFlags(inlineeBlockFlags); inlineInfo.teardownStatements.InsertIntoBlockBefore(block, stmt); return true; @@ -739,11 +742,11 @@ class InlineAndDevirtualizeWalker : public GenTreeVisitorgtSplitTree(m_compiler->compCurBB, m_curStmt, call, &newStmt, &callUse, true)) + if (m_compiler->gtSplitTree(m_block, m_statement, call, &newStmt, &callUse, true, true)) { - if (m_firstNewStmt == nullptr) + if (m_nextStatement == nullptr) { - m_firstNewStmt = newStmt; + m_nextStatement = newStmt; } } @@ -752,8 +755,8 @@ class InlineAndDevirtualizeWalker : public GenTreeVisitorgtReturnType != TYP_VOID) { Statement* stmt = m_compiler->gtNewStmt(call); - m_compiler->fgInsertStmtBefore(m_compiler->compCurBB, m_curStmt, stmt); - if (m_firstNewStmt == nullptr) + m_compiler->fgInsertStmtBefore(m_block, m_statement, stmt); + if (m_statement == nullptr) { m_firstNewStmt = stmt; } diff --git a/src/coreclr/jit/gtlist.h b/src/coreclr/jit/gtlist.h index eeea4f0280bf22..92a12259635446 100644 --- a/src/coreclr/jit/gtlist.h +++ b/src/coreclr/jit/gtlist.h @@ -37,6 +37,7 @@ GTNODE(ASYNC_CONTINUATION, GenTree ,0,0,GTK_LEAF) // Access GTNODE(LABEL , GenTree ,0,0,GTK_LEAF) // Jump-target GTNODE(JMP , GenTreeVal ,0,0,GTK_LEAF|GTK_NOVALUE) // Jump to another function GTNODE(FTN_ADDR , GenTreeFptrVal ,0,0,GTK_LEAF) // Address of a function +GTNODE(GCPOLL , GenTree ,0,0,GTK_LEAF|GTK_NOVALUE|DBK_NOTLIR) //----------------------------------------------------------------------------- // Constant nodes: From 81fa80c9d128a3381b9b4734917bd53e5cd27374 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Fri, 24 Oct 2025 11:35:45 +0200 Subject: [PATCH 08/23] Fix build errors --- src/coreclr/jit/compiler.h | 2 +- src/coreclr/jit/fginline.cpp | 78 +++++++--------------- src/coreclr/jit/fgprofile.cpp | 14 ---- src/coreclr/jit/importer.cpp | 6 +- src/coreclr/jit/importercalls.cpp | 104 ++++++++++++++++++++++++++++++ 5 files changed, 133 insertions(+), 71 deletions(-) diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index fb232fc8cb7696..c53b86311deb00 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -5136,7 +5136,7 @@ class Compiler InlineCandidateInfo** ppInlineCandidateInfo, InlineResult* inlineResult); - void impInlineRecordArgInfo(InlineInfo* pInlineInfo, CallArg* arg, InlArgInfo* argInfo, InlineResult* inlineResult); + void impInlineRecordArgInfo(InlineInfo* pInlineInfo, InlArgInfo* argInfo, CallArg* arg, InlineResult* inlineResult); void impInlineInitVars(InlineInfo* pInlineInfo); diff --git a/src/coreclr/jit/fginline.cpp b/src/coreclr/jit/fginline.cpp index c3e5da03cd1ca0..acf437571f0168 100644 --- a/src/coreclr/jit/fginline.cpp +++ b/src/coreclr/jit/fginline.cpp @@ -274,6 +274,11 @@ class InlineAndDevirtualizeWalker : public GenTreeVisitorimpMarkInlineCandidate(call, context, false, &callInfo, inlinersContext); - - if (call->IsInlineCandidate()) - { - Statement* newStmt = nullptr; - GenTree** callUse = nullptr; - if (m_compiler->gtSplitTree(m_block, m_statement, call, &newStmt, &callUse, true, true)) - { - if (m_nextStatement == nullptr) - { - m_nextStatement = newStmt; - } - } - - // If the call is the root expression in a statement, and it returns void, - // we can inline it directly without creating a RET_EXPR. - if (parent != nullptr || call->gtReturnType != TYP_VOID) - { - Statement* stmt = m_compiler->gtNewStmt(call); - m_compiler->fgInsertStmtBefore(m_block, m_statement, stmt); - if (m_statement == nullptr) - { - m_firstNewStmt = stmt; - } - - GenTreeRetExpr* retExpr = - m_compiler->gtNewInlineCandidateReturnExpr(call->AsCall(), - genActualType(call->TypeGet())); - call->GetSingleInlineCandidateInfo()->retExpr = retExpr; - - JITDUMP("Creating new RET_EXPR for [%06u]:\n", call->gtTreeID); - DISPTREE(retExpr); - - *pTree = retExpr; - } - - JITDUMP("New inline candidate due to late devirtualization:\n"); - DISPTREE(call); - } + // Reprocess the statement to pick up the inline in preorder. + assert(m_nextBlock == nullptr); + m_nextBlock = m_block; + m_nextStatement = m_statement; } m_madeChanges = true; } @@ -820,6 +791,7 @@ class InlineAndDevirtualizeWalker : public GenTreeVisitorAsOp()->gtOp1; + bool modifiedTree = false; assert(tree == block->lastStmt()->GetRootNode()); while (condTree->OperIs(GT_COMMA)) @@ -2034,18 +2006,7 @@ void Compiler::fgInlinePrependStatements(InlineInfo* inlineInfo) } } - // Append the InstParam - if (inlineInfo->inlInstParamArgInfo != nullptr) - { - fgInsertInlineeArgument(inlineInfo->setupStatements, *inlineInfo->inlInstParamArgInfo, callDI); - } - - if (inlineInfo->inlRetBufferArgInfo != nullptr) - { - fgInsertInlineeArgument(inlineInfo->setupStatements, *inlineInfo->inlRetBufferArgInfo, callDI); - } - - // Treat arguments that had to be assigned to temps +#ifdef DEBUG if (inlineInfo->argCnt) { JITDUMP("\nArguments setup:\n"); @@ -2058,11 +2019,22 @@ void Compiler::fgInlinePrependStatements(InlineInfo* inlineInfo) InlArgInfo* argInfo = nullptr; switch (arg.GetWellKnownArg()) { - fgInsertInlineeArgument(inlineInfo->setupStatements, inlArgInfo[argNum], callDI); + case WellKnownArg::AsyncContinuation: + continue; + case WellKnownArg::RetBuffer: + argInfo = inlineInfo->inlRetBufferArgInfo; + break; + case WellKnownArg::InstParam: + argInfo = inlineInfo->inlInstParamArgInfo; + break; + default: + assert(ilArgNum < inlineInfo->argCnt); + argInfo = &inlineInfo->inlArgInfo[ilArgNum++]; + break; } assert(argInfo != nullptr); - fgInsertInlineeArgument(*argInfo, block, &afterStmt, &newStmt, callDI); + fgInsertInlineeArgument(inlineInfo->setupStatements, *argInfo, callDI); } // Add the CCTOR check if asked for. @@ -2155,7 +2127,7 @@ void Compiler::fgInlinePrependStatements(InlineInfo* inlineInfo) // we skip nulling the locals, since it can interfere // with tail calls introduced by the local. // -void Compiler::fgInlineAppendStatements(InlineInfo* inlineInfo, BasicBlock* block, Statement* stmtAfter) +void Compiler::fgInlineAppendStatements(class StatementListBuilder& statements, InlineInfo* inlineInfo) { // Null out any gc ref locals if (!inlineInfo->HasGcRefLocals()) diff --git a/src/coreclr/jit/fgprofile.cpp b/src/coreclr/jit/fgprofile.cpp index 49f57e49372bde..6c5b3d56d0d2ee 100644 --- a/src/coreclr/jit/fgprofile.cpp +++ b/src/coreclr/jit/fgprofile.cpp @@ -2624,20 +2624,6 @@ PhaseStatus Compiler::fgInstrumentMethod() JITDUMP("Inlinee instrumentation disabled by config\n"); return PhaseStatus::MODIFIED_NOTHING; } - - GenTreeRetExpr* const retExpr = impInlineInfo->inlineCandidateInfo->retExpr; - - // If there's a retExpr but no gtSubstBB, we assume the retExpr is a temp - // and so not interesting to instrumentation. - // - if ((retExpr != nullptr) && (retExpr->gtSubstBB != nullptr)) - { - assert(retExpr->gtSubstExpr != nullptr); - retBB = retExpr->gtSubstBB; - tempInlineeReturnStmt = fgNewStmtAtEnd(retBB, retExpr->gtSubstExpr); - JITDUMP("Temporarily adding ret expr [%06u] to " FMT_BB "\n", dspTreeID(retExpr->gtSubstExpr), - retBB->bbNum); - } } PhaseStatus status = fgInstrumentMethodCore(); diff --git a/src/coreclr/jit/importer.cpp b/src/coreclr/jit/importer.cpp index 9fb6c9230485a5..7dd0de462e6923 100644 --- a/src/coreclr/jit/importer.cpp +++ b/src/coreclr/jit/importer.cpp @@ -6715,12 +6715,12 @@ void Compiler::impImportBlockCode(BasicBlock* block) // If we see a local being assigned the result of a GDV-inlineable // GetEnumerator call, keep track of both the local and the call. // - if (op1->OperIs(GT_RET_EXPR)) + if (op1->IsCall()) { JITDUMP(".... checking for GDV returning IEnumerator...\n"); bool isEnumeratorT = false; - GenTreeCall* const call = op1->AsRetExpr()->gtInlineCandidate; + GenTreeCall* const call = op1->AsCall(); bool isExact = false; bool isNonNull = false; CORINFO_CLASS_HANDLE retCls = gtGetClassHandle(call, &isExact, &isNonNull); @@ -13248,7 +13248,7 @@ void Compiler::impInlineInitVars(InlineInfo* pInlineInfo) } arg.SetEarlyNode(gtFoldExpr(arg.GetEarlyNode())); - impInlineRecordArgInfo(pInlineInfo, &arg, argInfo, inlineResult); + impInlineRecordArgInfo(pInlineInfo, argInfo, &arg, inlineResult); if (inlineResult->IsFailure()) { diff --git a/src/coreclr/jit/importercalls.cpp b/src/coreclr/jit/importercalls.cpp index ca17d5cc38dc98..7171e42597bb77 100644 --- a/src/coreclr/jit/importercalls.cpp +++ b/src/coreclr/jit/importercalls.cpp @@ -6766,6 +6766,110 @@ void Compiler::impCheckForPInvokeCall( } } +//------------------------------------------------------------------------ +// impSetupAndSpillForAsyncCall: +// Register a call as being async and set up context handling information depending on the IL. +// Also spill IL arguments if necessary. +// +// Arguments: +// call - The call +// opcode - The IL opcode for the call +// prefixFlags - Flags containing context handling information from IL +// +void Compiler::impSetupAndSpillForAsyncCall(GenTreeCall* call, OPCODE opcode, unsigned prefixFlags) +{ + AsyncCallInfo asyncInfo; + + if ((prefixFlags & PREFIX_IS_TASK_AWAIT) != 0) + { + JITDUMP("Call is an async task await\n"); + + asyncInfo.ExecutionContextHandling = ExecutionContextHandling::SaveAndRestore; + asyncInfo.SaveAndRestoreSynchronizationContextField = true; + + if ((prefixFlags & PREFIX_TASK_AWAIT_CONTINUE_ON_CAPTURED_CONTEXT) != 0) + { + asyncInfo.ContinuationContextHandling = ContinuationContextHandling::ContinueOnCapturedContext; + JITDUMP(" Continuation continues on captured context\n"); + } + else + { + asyncInfo.ContinuationContextHandling = ContinuationContextHandling::ContinueOnThreadPool; + JITDUMP(" Continuation continues on thread pool\n"); + } + } + else if (opcode == CEE_CALLI) + { + // Used for unboxing/instantiating stubs + JITDUMP("Call is an async calli\n"); + } + else + { + JITDUMP("Call is an async non-task await\n"); + // Only expected non-task await to see in IL is one of the AsyncHelpers.AwaitAwaiter variants. + // These are awaits of custom awaitables, and they come with the behavior that the execution context + // is captured and restored on suspension/resumption. + // We could perhaps skip this for AwaitAwaiter (but not for UnsafeAwaitAwaiter) since it is expected + // that the safe INotifyCompletion will take care of flowing ExecutionContext. + asyncInfo.ExecutionContextHandling = ExecutionContextHandling::AsyncSaveAndRestore; + } + + // For tailcalls the contexts does not need saving/restoring: they will be + // overwritten by the caller anyway. + // + // More specifically, if we can show that + // Thread.CurrentThread._executionContext is not accessed between the + // call and returning then we can omit save/restore of the execution + // context. We do not do that optimization yet. + if ((prefixFlags & PREFIX_TAILCALL) != 0) + { + asyncInfo.ExecutionContextHandling = ExecutionContextHandling::None; + asyncInfo.ContinuationContextHandling = ContinuationContextHandling::None; + asyncInfo.SaveAndRestoreSynchronizationContextField = false; + } + + call->AsCall()->SetIsAsync(new (this, CMK_Async) AsyncCallInfo(asyncInfo)); + + if (asyncInfo.ExecutionContextHandling == ExecutionContextHandling::SaveAndRestore) + { + compMustSaveAsyncContexts = true; + + // In this case we will need to save the context after the arguments are evaluated. + // Spill the arguments to accomplish that. + // (We could do this via splitting in SaveAsyncContexts, but since we need to + // handle inline candidates we won't gain much.) + impSpillSideEffects(true, CHECK_SPILL_ALL DEBUGARG("Async await with execution context save and restore")); + } +} + +//------------------------------------------------------------------------ +// impInsertAsyncContinuationForLdvirtftnCall: +// Insert the async continuation argument for a call the EE asked to be +// performed via ldvirtftn. +// +// Arguments: +// call - The call +// +// Remarks: +// Should be called before the 'this' arg is inserted, but after other IL args +// have been inserted. +// +void Compiler::impInsertAsyncContinuationForLdvirtftnCall(GenTreeCall* call) +{ + assert(call->AsCall()->IsAsync()); + + if (Target::g_tgtArgOrder == Target::ARG_ORDER_R2L) + { + call->AsCall()->gtArgs.PushFront(this, NewCallArg::Primitive(gtNewNull(), TYP_REF) + .WellKnown(WellKnownArg::AsyncContinuation)); + } + else + { + call->AsCall()->gtArgs.PushBack(this, NewCallArg::Primitive(gtNewNull(), TYP_REF) + .WellKnown(WellKnownArg::AsyncContinuation)); + } +} + //------------------------------------------------------------------------ // addFatPointerCandidate: mark the call and the method, that they have a fat pointer candidate. // Spill inline candidates in the call node, because they shouldn't be cloned. From d770eb4b99f9cf36a628f6f346ec413ff2acf4be Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Fri, 24 Oct 2025 12:40:31 +0200 Subject: [PATCH 09/23] Fix inlining with EH --- src/coreclr/jit/fginline.cpp | 175 ++++++++++++++++++++++++++++++++++- 1 file changed, 172 insertions(+), 3 deletions(-) diff --git a/src/coreclr/jit/fginline.cpp b/src/coreclr/jit/fginline.cpp index acf437571f0168..7b56494dbdbc89 100644 --- a/src/coreclr/jit/fginline.cpp +++ b/src/coreclr/jit/fginline.cpp @@ -402,13 +402,177 @@ class InlineAndDevirtualizeWalker : public GenTreeVisitorRemoveFlags(BBF_DONT_REMOVE); + // If the inlinee has EH, merge the EH tables, and figure out how much of + // a shift we need to make in the inlinee blocks EH indicies. + // + unsigned const inlineeRegionCount = m_compiler->InlineeCompiler->compHndBBtabCount; + const bool inlineeHasEH = inlineeRegionCount > 0; + unsigned inlineeIndexShift = 0; + + if (inlineeHasEH) + { + // If the call site also has EH, we need to insert the inlinee clauses + // so they are a child of the call site's innermost enclosing region. + // Figure out what this is. + // + bool inTryRegion = false; + unsigned const enclosingRegion = m_compiler->ehGetMostNestedRegionIndex(callBlock, &inTryRegion); + + // We will insert the inlinee clauses in bulk before this index. + // + unsigned insertBeforeIndex = 0; + + if (enclosingRegion == 0) + { + // The call site is not in an EH region, so we can put the inlinee EH clauses + // at the end of root method's the EH table. + // + // For example, if the root method already has EH#0, and the inlinee has 2 regions + // + // enclosingRegion will be 0 + // inlineeIndexShift will be 1 + // insertBeforeIndex will be 1 + // + // inlinee eh0 -> eh1 + // inlinee eh1 -> eh2 + // + // root eh0 -> eh0 + // + inlineeIndexShift = m_compiler->compHndBBtabCount; + insertBeforeIndex = m_compiler->compHndBBtabCount; + } + else + { + // The call site is in an EH region, so we can put the inlinee EH clauses + // just before the enclosing region + // + // Note enclosingRegion is region index + 1. So EH#0 will be represented by 1 here. + // + // For example, if the enclosing EH regions are try#2 and hnd#3, and the inlinee has 2 eh clauses + // + // enclosingRegion will be 3 (try2 + 1) + // inlineeIndexShift will be 2 + // insertBeforeIndex will be 2 + // + // inlinee eh0 -> eh2 + // inlinee eh1 -> eh3 + // + // root eh0 -> eh0 + // root eh1 -> eh1 + // + // root eh2 -> eh4 + // root eh3 -> eh5 + // + inlineeIndexShift = enclosingRegion - 1; + insertBeforeIndex = enclosingRegion - 1; + } + + JITDUMP( + "Inlinee has EH. In root method, inlinee's %u EH region indices will shift by %u and become EH#%02u ... EH#%02u (%p)\n", + inlineeRegionCount, inlineeIndexShift, insertBeforeIndex, insertBeforeIndex + inlineeRegionCount - 1, + &inlineeIndexShift); + + if (enclosingRegion != 0) + { + JITDUMP("Inlinee is nested within current %s EH#%02u (which will become EH#%02u)\n", + inTryRegion ? "try" : "hnd", enclosingRegion - 1, enclosingRegion - 1 + inlineeRegionCount); + } + else + { + JITDUMP("Inlinee is not nested inside any EH region\n"); + } + + // Grow the EH table. We verified in fgFindBasicBlocks that this won't fail. + // + EHblkDsc* const outermostEbd = + m_compiler->fgTryAddEHTableEntries(insertBeforeIndex, inlineeRegionCount, /* deferAdding */ false); + assert(outermostEbd != nullptr); + + // fgTryAddEHTableEntries has adjusted the indices of all root method blocks and EH clauses + // to accommodate the new entries. No other changes to those are needed. + // + // We just need to add in and fix up the new entries from the inlinee. + // + // Fetch the new enclosing try/handler table indicies. + // + const unsigned enclosingTryIndex = + callBlock->hasTryIndex() ? callBlock->getTryIndex() : EHblkDsc::NO_ENCLOSING_INDEX; + const unsigned enclosingHndIndex = + callBlock->hasHndIndex() ? callBlock->getHndIndex() : EHblkDsc::NO_ENCLOSING_INDEX; + + // Copy over the EH table entries from inlinee->root and adjust their enclosing indicies. + // + for (unsigned XTnum = 0; XTnum < inlineeRegionCount; XTnum++) + { + unsigned newXTnum = XTnum + inlineeIndexShift; + m_compiler->compHndBBtab[newXTnum] = m_compiler->InlineeCompiler->compHndBBtab[XTnum]; + EHblkDsc* const ebd = &m_compiler->compHndBBtab[newXTnum]; + + if (ebd->ebdEnclosingTryIndex != EHblkDsc::NO_ENCLOSING_INDEX) + { + ebd->ebdEnclosingTryIndex += (unsigned short)inlineeIndexShift; + } + else + { + ebd->ebdEnclosingTryIndex = (unsigned short)enclosingTryIndex; + } + + if (ebd->ebdEnclosingHndIndex != EHblkDsc::NO_ENCLOSING_INDEX) + { + ebd->ebdEnclosingHndIndex += (unsigned short)inlineeIndexShift; + } + else + { + ebd->ebdEnclosingHndIndex = (unsigned short)enclosingHndIndex; + } + } + } + + // Fetch the new enclosing try/handler indicies for blocks. + // Note these are represented differently than the EH table indices. + // + const unsigned blockEnclosingTryIndex = callBlock->hasTryIndex() ? callBlock->getTryIndex() + 1 : 0; + const unsigned blockEnclosingHndIndex = callBlock->hasHndIndex() ? callBlock->getHndIndex() + 1 : 0; + // Set the try and handler index and fix the jump types of inlinee's blocks. // for (BasicBlock* const block : m_compiler->InlineeCompiler->Blocks()) { - noway_assert(!block->hasTryIndex()); - noway_assert(!block->hasHndIndex()); - block->copyEHRegion(callBlock); + if (block->hasTryIndex()) + { + JITDUMP("Inlinee " FMT_BB " has old try index %u, shift %u, new try index %u\n", block->bbNum, + (unsigned)block->bbTryIndex, inlineeIndexShift, + (unsigned)(block->bbTryIndex + inlineeIndexShift)); + block->bbTryIndex += (unsigned short)inlineeIndexShift; + } + else + { + block->bbTryIndex = (unsigned short)blockEnclosingTryIndex; + } + + if (block->hasHndIndex()) + { + block->bbHndIndex += (unsigned short)inlineeIndexShift; + } + else + { + block->bbHndIndex = (unsigned short)blockEnclosingHndIndex; + } + + // Sanity checks + // + if (callBlock->hasTryIndex()) + { + assert(block->hasTryIndex()); + assert(block->getTryIndex() <= callBlock->getTryIndex()); + } + + if (callBlock->hasHndIndex()) + { + assert(block->hasHndIndex()); + assert(block->getHndIndex() <= callBlock->getHndIndex()); + } + block->CopyFlags(callBlock, BBF_BACKWARD_JUMP | BBF_PROF_WEIGHT); // Update block nums appropriately @@ -507,6 +671,9 @@ class InlineAndDevirtualizeWalker : public GenTreeVisitorfgFirstBB->hasTryIndex()); + assert(!inlineeComp->fgFirstBB->hasHndIndex()); + if (*use == nullptr) { *use = m_compiler->gtNewNothingNode(); @@ -535,6 +702,8 @@ class InlineAndDevirtualizeWalker : public GenTreeVisitorfgFirstBB->hasTryIndex()); + assert(!inlineeComp->fgFirstBB->hasHndIndex()); JITDUMP("Inlinee does not have control flow; inserting mid-block\n"); #ifdef DEBUG From 7ba6bdbeea0efbcea7fa0262f0901bf8cc8993f4 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Sat, 25 Oct 2025 14:36:50 +0200 Subject: [PATCH 10/23] Fix issues, run jit-format --- src/coreclr/jit/debuginfo.cpp | 18 +-- src/coreclr/jit/fginline.cpp | 124 +++++++++++--------- src/coreclr/jit/fgprofile.cpp | 14 +++ src/coreclr/jit/gentree.cpp | 12 +- src/coreclr/jit/importer.cpp | 13 +- src/coreclr/jit/importercalls.cpp | 2 +- src/coreclr/jit/indirectcalltransformer.cpp | 20 ++-- src/coreclr/jit/inline.h | 9 +- src/coreclr/jit/lclvars.cpp | 6 + 9 files changed, 130 insertions(+), 88 deletions(-) diff --git a/src/coreclr/jit/debuginfo.cpp b/src/coreclr/jit/debuginfo.cpp index e2464fcfb9aa6d..74e2cfaed232fd 100644 --- a/src/coreclr/jit/debuginfo.cpp +++ b/src/coreclr/jit/debuginfo.cpp @@ -113,17 +113,17 @@ void DebugInfo::Validate() const if (!di.IsValid()) continue; - //bool isValidOffs = di.GetLocation().GetOffset() < di.GetInlineContext()->GetILSize(); - //if (isValidOffs) + // bool isValidOffs = di.GetLocation().GetOffset() < di.GetInlineContext()->GetILSize(); + // if (isValidOffs) //{ - // bool isValidStart = di.GetInlineContext()->GetILInstsSet()->bitVectTest(di.GetLocation().GetOffset()); - // assert(isValidStart && - // "Detected invalid debug info: IL offset does not refer to the start of an IL instruction"); - //} - //else + // bool isValidStart = di.GetInlineContext()->GetILInstsSet()->bitVectTest(di.GetLocation().GetOffset()); + // assert(isValidStart && + // "Detected invalid debug info: IL offset does not refer to the start of an IL instruction"); + // } + // else //{ - // assert(!"Detected invalid debug info: IL offset is out of range"); - //} + // assert(!"Detected invalid debug info: IL offset is out of range"); + // } } while (di.GetParent(&di)); } diff --git a/src/coreclr/jit/fginline.cpp b/src/coreclr/jit/fginline.cpp index cbae63c91dd6bf..5bb3960a78bca9 100644 --- a/src/coreclr/jit/fginline.cpp +++ b/src/coreclr/jit/fginline.cpp @@ -204,11 +204,11 @@ bool Compiler::TypeInstantiationComplexityExceeds(CORINFO_CLASS_HANDLE handle, i class InlineAndDevirtualizeWalker : public GenTreeVisitor { - bool m_madeChanges = false; - BasicBlock* m_block = nullptr; - Statement* m_statement = nullptr; - BasicBlock* m_nextBlock = nullptr; - Statement* m_nextStatement = nullptr; + bool m_madeChanges = false; + BasicBlock* m_block = nullptr; + Statement* m_statement = nullptr; + BasicBlock* m_nextBlock = nullptr; + Statement* m_nextStatement = nullptr; public: @@ -241,10 +241,10 @@ class InlineAndDevirtualizeWalker : public GenTreeVisitorGetRootNodePointer(), nullptr); } @@ -295,7 +295,7 @@ class InlineAndDevirtualizeWalker : public GenTreeVisitorgtInlineCandidateInfo->inlinersContext, "TryInline"); - m_compiler->compCurBB = m_block; + m_compiler->compCurBB = m_block; m_compiler->fgMorphStmt = m_statement; InlineInfo inlineInfo{}; @@ -327,10 +327,11 @@ class InlineAndDevirtualizeWalker : public GenTreeVisitorcompCurBB; - Statement* callStmt = m_compiler->fgMorphStmt; - Statement* newStmt = nullptr; - GenTree** use2 = nullptr; - m_compiler->gtSplitTree(callBlock, callStmt, call, &newStmt, &use2, /* includeOperands */ false, /* early */ true); + Statement* callStmt = m_compiler->fgMorphStmt; + Statement* newStmt = nullptr; + GenTree** use2 = nullptr; + m_compiler->gtSplitTree(callBlock, callStmt, call, &newStmt, &use2, /* includeOperands */ false, + /* early */ true); assert(use2 == use); JITDUMP("After split:\n"); @@ -346,14 +347,15 @@ class InlineAndDevirtualizeWalker : public GenTreeVisitorgtInlineCandidateInfo->result.substExpr; + GenTree* substExpr = call->gtInlineCandidateInfo->result.substExpr; auto getComplexity = [](GenTree* tree) { return 1; }; - if ((substExpr != nullptr) && substExpr->IsValue() && m_compiler->gtComplexityExceeds(substExpr, 16, getComplexity)) + if ((substExpr != nullptr) && substExpr->IsValue() && + m_compiler->gtComplexityExceeds(substExpr, 16, getComplexity)) { JITDUMP("Substitution expression is complex so spilling it to its own statement\n"); - unsigned lclNum = m_compiler->lvaGrabTemp(false DEBUGARG("Complex inlinee substitution expression")); + unsigned lclNum = m_compiler->lvaGrabTemp(false DEBUGARG("Complex inlinee substitution expression")); Statement* storeTemp = m_compiler->gtNewStmt(m_compiler->gtNewTempStore(lclNum, substExpr)); DISPSTMT(storeTemp); inlineInfo.teardownStatements.Append(storeTemp); @@ -387,13 +389,13 @@ class InlineAndDevirtualizeWalker : public GenTreeVisitorfirstStmt() : predStmt->GetNextStmt(); return true; } - BasicBlock* continueBlock = m_compiler->fgSplitBlockBeforeStatement(callBlock, callStmt); - unsigned const baseBBNum = m_compiler->fgBBNumMax; + BasicBlock* continueBlock = m_compiler->fgSplitBlockBeforeStatement(callBlock, callStmt); + unsigned const baseBBNum = m_compiler->fgBBNumMax; JITDUMP("split " FMT_BB " after the inlinee call site; after portion is now " FMT_BB "\n", callBlock->bbNum, continueBlock->bbNum); @@ -504,9 +506,9 @@ class InlineAndDevirtualizeWalker : public GenTreeVisitorcompHndBBtab[newXTnum] = m_compiler->InlineeCompiler->compHndBBtab[XTnum]; - EHblkDsc* const ebd = &m_compiler->compHndBBtab[newXTnum]; + EHblkDsc* const ebd = &m_compiler->compHndBBtab[newXTnum]; if (ebd->ebdEnclosingTryIndex != EHblkDsc::NO_ENCLOSING_INDEX) { @@ -625,9 +627,10 @@ class InlineAndDevirtualizeWalker : public GenTreeVisitorfgDispBasicBlocks(m_compiler->InlineeCompiler->fgFirstBB, m_compiler->InlineeCompiler->fgLastBB, true)); + JITDUMPEXEC(m_compiler->fgDispBasicBlocks(m_compiler->InlineeCompiler->fgFirstBB, + m_compiler->InlineeCompiler->fgLastBB, true)); - m_nextBlock = callBlock; + m_nextBlock = callBlock; m_nextStatement = predStmt == nullptr ? callBlock->firstStmt() : predStmt->GetNextStmt(); return true; @@ -646,8 +649,7 @@ class InlineAndDevirtualizeWalker : public GenTreeVisitorfgFirstBB->bbStmtList != nullptr) || - !inlineInfo.setupStatements.Empty() || + if ((inlineeComp->fgFirstBB->bbStmtList != nullptr) || !inlineInfo.setupStatements.Empty() || !inlineInfo.teardownStatements.Empty()) { return false; @@ -660,9 +662,11 @@ class InlineAndDevirtualizeWalker : public GenTreeVisitorresult.substExpr; + *use = inlineInfo.inlineCandidateInfo->result.substExpr; - auto getComplexity = [](GenTree* tree) { return 1; }; + auto getComplexity = [](GenTree* tree) { + return 1; + }; if (m_compiler->gtComplexityExceeds(m_statement->GetRootNode(), 16, getComplexity)) { *use = call; @@ -913,7 +917,7 @@ class InlineAndDevirtualizeWalker : public GenTreeVisitorimpMarkInlineCandidate(call, context, false, &callInfo, inlinersContext); // Reprocess the statement to pick up the inline in preorder. assert(m_nextBlock == nullptr); - m_nextBlock = m_block; + m_nextBlock = m_block; m_nextStatement = m_statement; } m_madeChanges = true; @@ -959,9 +963,9 @@ class InlineAndDevirtualizeWalker : public GenTreeVisitorOperIs(GT_JTRUE)) { // See if this jtrue is now foldable. - BasicBlock* block = m_block; - GenTree* condTree = tree->AsOp()->gtOp1; - bool modifiedTree = false; + BasicBlock* block = m_block; + GenTree* condTree = tree->AsOp()->gtOp1; + bool modifiedTree = false; assert(tree == block->lastStmt()->GetRootNode()); while (condTree->OperIs(GT_COMMA)) @@ -1094,8 +1098,8 @@ PhaseStatus Compiler::fgInline() noway_assert(fgFirstBB != nullptr); InlineAndDevirtualizeWalker walker(this); - bool madeChanges = false; - BasicBlock* currentBlock = fgFirstBB; + bool madeChanges = false; + BasicBlock* currentBlock = fgFirstBB; while (currentBlock != nullptr) { @@ -1112,7 +1116,7 @@ PhaseStatus Compiler::fgInline() if (walker.NextBlock() != nullptr) { currentBlock = walker.NextBlock(); - currentStmt = walker.NextStatement(); + currentStmt = walker.NextStatement(); continue; } @@ -1128,7 +1132,6 @@ PhaseStatus Compiler::fgInline() } currentBlock = currentBlock->Next(); - } madeChanges |= walker.MadeChanges(); @@ -1143,7 +1146,6 @@ PhaseStatus Compiler::fgInline() // Call Compiler::fgDebugCheckInlineCandidates on each node fgWalkTreePre(stmt->GetRootNodePointer(), fgDebugCheckInlineCandidates); } - } fgVerifyHandlerTab(); @@ -1249,7 +1251,7 @@ void Compiler::fgMorphCallInline(InlineInfo& inlineInfo, GenTreeCall* call, Inli // hanging a "nothing" node to it. Later the "nothing" node will be removed // and the original GT_CALL tree will be picked up by the GT_RET_EXPR node. inlCandInfo->result.substExpr = nullptr; - inlCandInfo->result.substBB = nullptr; + inlCandInfo->result.substBB = nullptr; } // Inlinee compiler may have determined call does not return; if so, update this compiler's state. @@ -1282,7 +1284,10 @@ void Compiler::fgMorphCallInline(InlineInfo& inlineInfo, GenTreeCall* call, Inli // If the inline succeeded, this context will already be marked as successful. If it failed and // a context is returned, then it will not have been marked as success or failed. // -void Compiler::fgMorphCallInlineHelper(InlineInfo& inlineInfo, GenTreeCall* call, InlineResult* result, InlineContext** createdContext) +void Compiler::fgMorphCallInlineHelper(InlineInfo& inlineInfo, + GenTreeCall* call, + InlineResult* result, + InlineContext** createdContext) { // Don't expect any surprises here. assert(result->IsCandidate()); @@ -1481,7 +1486,10 @@ Compiler::fgWalkResult Compiler::fgDebugCheckInlineCandidates(GenTree** pTree, f #endif // DEBUG -void Compiler::fgInvokeInlineeCompiler(InlineInfo& inlineInfo, GenTreeCall* call, InlineResult* inlineResult, InlineContext** createdContext) +void Compiler::fgInvokeInlineeCompiler(InlineInfo& inlineInfo, + GenTreeCall* call, + InlineResult* inlineResult, + InlineContext** createdContext) { noway_assert(call->OperIs(GT_CALL)); noway_assert(call->IsInlineCandidate()); @@ -1965,7 +1973,9 @@ void Compiler::fgFinalizeInlineeStatements(InlineInfo* pInlineInfo) // newStmt - updated with the new statement // callDI - debug info for the call // -void Compiler::fgInsertInlineeArgument(StatementListBuilder& statements, const InlArgInfo& argInfo, const DebugInfo& callDI) +void Compiler::fgInsertInlineeArgument(StatementListBuilder& statements, + const InlArgInfo& argInfo, + const DebugInfo& callDI) { const bool argIsSingleDef = !argInfo.argHasLdargaOp && !argInfo.argHasStargOp; CallArg* arg = argInfo.arg; @@ -2023,7 +2033,7 @@ void Compiler::fgInsertInlineeArgument(StatementListBuilder& statements, const I { noway_assert(argInfo.argIsUsed == false); Statement* newStmt = nullptr; - bool append = true; + bool append = true; if (argNode->OperIs(GT_BLK)) { @@ -2141,9 +2151,9 @@ void Compiler::fgInsertInlineeArgument(StatementListBuilder& statements, const I // void Compiler::fgInlinePrependStatements(InlineInfo* inlineInfo) { - BasicBlock* block = inlineInfo->iciBlock; - const DebugInfo& callDI = inlineInfo->iciStmt->GetDebugInfo(); - GenTreeCall* call = inlineInfo->iciCall->AsCall(); + BasicBlock* block = inlineInfo->iciBlock; + const DebugInfo& callDI = inlineInfo->iciStmt->GetDebugInfo(); + GenTreeCall* call = inlineInfo->iciCall->AsCall(); noway_assert(call->OperIs(GT_CALL)); @@ -2189,18 +2199,18 @@ void Compiler::fgInlinePrependStatements(InlineInfo* inlineInfo) InlArgInfo* argInfo = nullptr; switch (arg.GetWellKnownArg()) { - case WellKnownArg::AsyncContinuation: - continue; - case WellKnownArg::RetBuffer: - argInfo = inlineInfo->inlRetBufferArgInfo; - break; - case WellKnownArg::InstParam: - argInfo = inlineInfo->inlInstParamArgInfo; - break; - default: - assert(ilArgNum < inlineInfo->argCnt); - argInfo = &inlineInfo->inlArgInfo[ilArgNum++]; - break; + case WellKnownArg::AsyncContinuation: + continue; + case WellKnownArg::RetBuffer: + argInfo = inlineInfo->inlRetBufferArgInfo; + break; + case WellKnownArg::InstParam: + argInfo = inlineInfo->inlInstParamArgInfo; + break; + default: + assert(ilArgNum < inlineInfo->argCnt); + argInfo = &inlineInfo->inlArgInfo[ilArgNum++]; + break; } assert(argInfo != nullptr); @@ -2217,7 +2227,7 @@ void Compiler::fgInlinePrependStatements(InlineInfo* inlineInfo) { CORINFO_CLASS_HANDLE exactClass = eeGetClassFromContext(inlineInfo->inlineCandidateInfo->exactContextHandle); - tree = fgGetSharedCCtor(exactClass); + tree = fgGetSharedCCtor(exactClass); Statement* newStmt = gtNewStmt(tree, callDI); inlineInfo->setupStatements.Append(newStmt); } diff --git a/src/coreclr/jit/fgprofile.cpp b/src/coreclr/jit/fgprofile.cpp index 6c5b3d56d0d2ee..18e811a91d4279 100644 --- a/src/coreclr/jit/fgprofile.cpp +++ b/src/coreclr/jit/fgprofile.cpp @@ -2624,6 +2624,20 @@ PhaseStatus Compiler::fgInstrumentMethod() JITDUMP("Inlinee instrumentation disabled by config\n"); return PhaseStatus::MODIFIED_NOTHING; } + + const InlineIRResult& result = impInlineInfo->inlineCandidateInfo->result; + + // If there's a retExpr but no substBB, we assume the retExpr is a temp + // and so not interesting to instrumentation. + // + if (result.substBB != nullptr) + { + assert(result.substExpr != nullptr); + retBB = result.substBB; + + tempInlineeReturnStmt = fgNewStmtAtEnd(retBB, result.substExpr); + JITDUMP("Temporarily adding ret expr [%06u] to " FMT_BB "\n", dspTreeID(result.substExpr), retBB->bbNum); + } } PhaseStatus status = fgInstrumentMethodCore(); diff --git a/src/coreclr/jit/gentree.cpp b/src/coreclr/jit/gentree.cpp index f18f08a36e097a..5f6f46c691200b 100644 --- a/src/coreclr/jit/gentree.cpp +++ b/src/coreclr/jit/gentree.cpp @@ -16947,8 +16947,13 @@ bool Compiler::gtTreeHasSideEffects(GenTree* tree, GenTreeFlags flags /* = GTF_S // Returns: // True if any changes were made; false if nothing needed to be done to split the tree. // -bool Compiler::gtSplitTree( - BasicBlock* block, Statement* stmt, GenTree* splitPoint, Statement** firstNewStmt, GenTree*** splitNodeUse, bool includeOperands, bool early) +bool Compiler::gtSplitTree(BasicBlock* block, + Statement* stmt, + GenTree* splitPoint, + Statement** firstNewStmt, + GenTree*** splitNodeUse, + bool includeOperands, + bool early) { class Splitter final : public GenTreeVisitor { @@ -16973,7 +16978,8 @@ bool Compiler::gtSplitTree( UseExecutionOrder = true }; - Splitter(Compiler* compiler, BasicBlock* bb, Statement* stmt, GenTree* splitNode, bool includeOperands, bool early) + Splitter( + Compiler* compiler, BasicBlock* bb, Statement* stmt, GenTree* splitNode, bool includeOperands, bool early) : GenTreeVisitor(compiler) , m_bb(bb) , m_splitStmt(stmt) diff --git a/src/coreclr/jit/importer.cpp b/src/coreclr/jit/importer.cpp index 82eff5bea728b2..5af344825173bd 100644 --- a/src/coreclr/jit/importer.cpp +++ b/src/coreclr/jit/importer.cpp @@ -6810,7 +6810,7 @@ void Compiler::impImportBlockCode(BasicBlock* block) JITDUMP(".... checking for GDV returning IEnumerator...\n"); bool isEnumeratorT = false; - GenTreeCall* const call = op1->AsCall(); + GenTreeCall* const call = op1->AsCall(); bool isExact = false; bool isNonNull = false; CORINFO_CLASS_HANDLE retCls = gtGetClassHandle(call, &isExact, &isNonNull); @@ -11321,7 +11321,7 @@ bool Compiler::impReturnInstruction(int prefixFlags, OPCODE& opcode) assert(iciCall->gtArgs.HasRetBuffer() && (impInlineInfo->inlRetBufferArgInfo != nullptr)); InlLclVarInfo lclInfo = {}; lclInfo.lclTypeInfo = TYP_BYREF; - GenTree* dest = impInlineFetchArg(*impInlineInfo->inlRetBufferArgInfo, lclInfo); + GenTree* dest = impInlineFetchArg(*impInlineInfo->inlRetBufferArgInfo, lclInfo); if (fgNeedReturnSpillTemp()) { @@ -13145,7 +13145,7 @@ void Compiler::impInlineRecordArgInfo(InlineInfo* pInlineInfo, CallArg* arg, InlineResult* inlineResult) { - argInfo->arg = arg; + argInfo->arg = arg; GenTree* curArgVal = arg->GetNode(); GenTree* lclVarTree; @@ -13167,7 +13167,8 @@ void Compiler::impInlineRecordArgInfo(InlineInfo* pInlineInfo, // Spilling code relies on correct aliasability annotations, except for // retbufs that are not actually exposed in IL. - assert(varDsc->lvHasLdAddrOp || varDsc->IsAddressExposed() || (arg->GetWellKnownArg() == WellKnownArg::RetBuffer)); + assert(varDsc->lvHasLdAddrOp || varDsc->IsAddressExposed() || + (arg->GetWellKnownArg() == WellKnownArg::RetBuffer)); } if (curArgVal->gtFlags & GTF_ALL_EFFECT) @@ -13199,8 +13200,8 @@ void Compiler::impInlineRecordArgInfo(InlineInfo* pInlineInfo, argInfo->argHasSideEff = false; } - bool isExact = false; - bool isNonNull = false; + bool isExact = false; + bool isNonNull = false; argInfo->argIsExact = (gtGetClassHandle(curArgVal, &isExact, &isNonNull) != NO_CLASS_HANDLE) && isExact; // If the arg is a local that is address-taken, we can't safely diff --git a/src/coreclr/jit/importercalls.cpp b/src/coreclr/jit/importercalls.cpp index 7171e42597bb77..c2c3c8cb334ce4 100644 --- a/src/coreclr/jit/importercalls.cpp +++ b/src/coreclr/jit/importercalls.cpp @@ -1410,7 +1410,7 @@ var_types Compiler::impImportCall(OPCODE opcode, if (isGuardedDevirtualizationCandidate) { - unsigned lclNum = lvaGrabTemp(false DEBUGARG("GDV candidate result")); + unsigned lclNum = lvaGrabTemp(false DEBUGARG("GDV candidate result")); LclVarDsc* varDsc = lvaGetDesc(lclNum); // Keep the information about small typedness to avoid // inserting unnecessary casts around normalization. diff --git a/src/coreclr/jit/indirectcalltransformer.cpp b/src/coreclr/jit/indirectcalltransformer.cpp index d1db99d596daa0..8436019055bf38 100644 --- a/src/coreclr/jit/indirectcalltransformer.cpp +++ b/src/coreclr/jit/indirectcalltransformer.cpp @@ -202,8 +202,8 @@ class IndirectCallTransformer } protected: - virtual const char* Name() = 0; - virtual void ClearFlag() = 0; + virtual const char* Name() = 0; + virtual void ClearFlag() = 0; //------------------------------------------------------------------------ // CreateRemainder: split current block at the call stmt and @@ -386,8 +386,8 @@ class IndirectCallTransformer FatPointerCallTransformer(Compiler* compiler, BasicBlock* block, Statement* stmt) : Transformer(compiler, block, stmt) { - fptrAddress = origCall->gtCallAddr; - pointerType = fptrAddress->TypeGet(); + fptrAddress = origCall->gtCallAddr; + pointerType = fptrAddress->TypeGet(); } protected: @@ -411,7 +411,7 @@ class IndirectCallTransformer { assert(checkIdx == 0); - checkBlock = CreateAndInsertBasicBlock(BBJ_ALWAYS, currBlock, currBlock); + checkBlock = CreateAndInsertBasicBlock(BBJ_ALWAYS, currBlock, currBlock); GenTree** lastSpilledArg = nullptr; for (GenTree** arg : origCall->UseEdges()) @@ -458,7 +458,8 @@ class IndirectCallTransformer DoPreOrder = true, }; - Visitor(Compiler* comp) : GenTreeVisitor(comp) + Visitor(Compiler* comp) + : GenTreeVisitor(comp) { } @@ -982,9 +983,10 @@ class IndirectCallTransformer // Re-establish this call as an inline candidate. // - inlineInfo->clsHandle = compiler->info.compCompHnd->getMethodClass(methodHnd); - inlineInfo->exactContextHandle = context; - inlineInfo->preexistingSpillTemp = doesReturnValue ? stmt->GetRootNode()->AsLclVarCommon()->GetLclNum() : BAD_VAR_NUM; + inlineInfo->clsHandle = compiler->info.compCompHnd->getMethodClass(methodHnd); + inlineInfo->exactContextHandle = context; + inlineInfo->preexistingSpillTemp = + doesReturnValue ? stmt->GetRootNode()->AsLclVarCommon()->GetLclNum() : BAD_VAR_NUM; call->SetSingleInlineCandidateInfo(inlineInfo); } diff --git a/src/coreclr/jit/inline.h b/src/coreclr/jit/inline.h index 8d6d419f00f4c5..4ce99727fcf321 100644 --- a/src/coreclr/jit/inline.h +++ b/src/coreclr/jit/inline.h @@ -346,8 +346,11 @@ class InlineResult public: // Construct a new InlineResult to help evaluate a // particular call for inlining. - InlineResult( - Compiler* compiler, GenTreeCall* call, InlineContext* context, const char* description, bool doNotReport = false); + InlineResult(Compiler* compiler, + GenTreeCall* call, + InlineContext* context, + const char* description, + bool doNotReport = false); // Construct a new InlineResult to evaluate a particular // method to see if it is inlineable. @@ -591,7 +594,7 @@ struct HandleHistogramProfileCandidateInfo // TODO: move to InlineInfo struct InlineIRResult { - GenTree* substExpr; + GenTree* substExpr; BasicBlock* substBB; }; diff --git a/src/coreclr/jit/lclvars.cpp b/src/coreclr/jit/lclvars.cpp index 4e6b559c989baf..97073789af43e8 100644 --- a/src/coreclr/jit/lclvars.cpp +++ b/src/coreclr/jit/lclvars.cpp @@ -1599,6 +1599,12 @@ bool Compiler::StructPromotionHelper::CanPromoteStructVar(unsigned lclNum) return false; } + if (varDsc->GetLayout() == nullptr) + { + JITDUMP(" struct promotion of V%02u is disabled because it has no layout\n", lclNum); + return false; + } + if (varDsc->GetLayout()->IsCustomLayout()) { JITDUMP(" struct promotion of V%02u is disabled because it has custom layout\n", lclNum); From 7cc3cefebc96e79087829b48f0c1f855317d4b56 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Wed, 25 Feb 2026 20:44:04 +0100 Subject: [PATCH 11/23] Fix after merge --- src/coreclr/jit/fgopt.cpp | 12 +----------- src/coreclr/jit/importer.cpp | 5 ----- 2 files changed, 1 insertion(+), 16 deletions(-) diff --git a/src/coreclr/jit/fgopt.cpp b/src/coreclr/jit/fgopt.cpp index 14c819084e3718..9391b9fc55d17b 100644 --- a/src/coreclr/jit/fgopt.cpp +++ b/src/coreclr/jit/fgopt.cpp @@ -5650,17 +5650,7 @@ bool Compiler::gtTreeContainsAsyncCall(GenTree* tree) } auto isAsyncCall = [=](GenTree* tree) { - if (tree->IsCall() && tree->AsCall()->IsAsync()) - { - return true; - } - - if (tree->OperIs(GT_RET_EXPR) && gtTreeContainsAsyncCall(tree->AsRetExpr()->gtInlineCandidate)) - { - return true; - } - - return false; + return tree->IsCall() && tree->AsCall()->IsAsync(); }; return gtFindNodeInTree(tree, isAsyncCall) != nullptr; diff --git a/src/coreclr/jit/importer.cpp b/src/coreclr/jit/importer.cpp index a1b1dce1b797cd..17c4da63014812 100644 --- a/src/coreclr/jit/importer.cpp +++ b/src/coreclr/jit/importer.cpp @@ -11026,11 +11026,6 @@ bool Compiler::impReturnInstruction(int prefixFlags, OPCODE& opcode) if (map->Lookup(origCall, &enumeratorLcl)) { GenTree* returnValue = op2; - if (returnValue->OperIs(GT_RET_EXPR)) - { - returnValue = returnValue->AsRetExpr()->gtInlineCandidate; - } - if (returnValue->IsCall()) { JITDUMP("Flagging [%06u] for enumerator cloning via V%02u\n", dspTreeID(returnValue), From 7120c035288a40f8cfaa187c2e75ab21fadf514d Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Wed, 25 Feb 2026 22:26:40 +0100 Subject: [PATCH 12/23] Fix debug info --- src/coreclr/jit/async.cpp | 6 ++++-- src/coreclr/jit/compiler.h | 9 ++++++--- src/coreclr/jit/debuginfo.cpp | 22 ++++++++++----------- src/coreclr/jit/fginline.cpp | 33 ++++++++++++++----------------- src/coreclr/jit/importer.cpp | 6 ++++-- src/coreclr/jit/importercalls.cpp | 33 ++++++++++++++++++++----------- src/coreclr/jit/inline.cpp | 7 ++++--- src/coreclr/jit/inline.h | 9 +++++---- 8 files changed, 71 insertions(+), 54 deletions(-) diff --git a/src/coreclr/jit/async.cpp b/src/coreclr/jit/async.cpp index 9f79a026b6c5ab..493eb16e02da50 100644 --- a/src/coreclr/jit/async.cpp +++ b/src/coreclr/jit/async.cpp @@ -173,7 +173,8 @@ PhaseStatus Compiler::SaveAsyncContexts() CORINFO_CALL_INFO callInfo = {}; callInfo.hMethod = captureCall->gtCallMethHnd; callInfo.methodFlags = info.compCompHnd->getMethodAttribs(callInfo.hMethod); - impMarkInlineCandidate(captureCall, MAKE_METHODCONTEXT(callInfo.hMethod), false, &callInfo, compInlineContext); + impMarkInlineCandidate(captureCall, MAKE_METHODCONTEXT(callInfo.hMethod), false, &callInfo, compInlineContext, + DebugInfo()); Statement* captureStmt = fgNewStmtFromTree(captureCall); fgInsertStmtAtBeg(fgFirstBB, captureStmt); @@ -372,7 +373,8 @@ BasicBlock* Compiler::CreateReturnBB(unsigned* mergedReturnLcl) CORINFO_CALL_INFO callInfo = {}; callInfo.hMethod = restoreCall->gtCallMethHnd; callInfo.methodFlags = info.compCompHnd->getMethodAttribs(callInfo.hMethod); - impMarkInlineCandidate(restoreCall, MAKE_METHODCONTEXT(callInfo.hMethod), false, &callInfo, compInlineContext); + impMarkInlineCandidate(restoreCall, MAKE_METHODCONTEXT(callInfo.hMethod), false, &callInfo, compInlineContext, + DebugInfo()); Statement* restoreStmt = fgNewStmtFromTree(restoreCall); fgInsertStmtAtEnd(newReturnBB, restoreStmt); diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index 9674d0a998e47e..83e43be543a636 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -5156,6 +5156,7 @@ class Compiler unsigned methAttr, CORINFO_CONTEXT_HANDLE exactContextHnd, InlineContext* inlinersContext, + const DebugInfo& debugInfo, InlineCandidateInfo** ppInlineCandidateInfo, InlineResult* inlineResult); @@ -5178,7 +5179,8 @@ class Compiler CORINFO_CONTEXT_HANDLE exactContextHnd, bool exactContextNeedsRuntimeLookup, CORINFO_CALL_INFO* callInfo, - InlineContext* inlinersContext); + InlineContext* inlinersContext, + const DebugInfo& debugInfo); void impMarkInlineCandidateHelper(GenTreeCall* call, uint8_t candidateIndex, @@ -5186,7 +5188,8 @@ class Compiler bool exactContextNeedsRuntimeLookup, CORINFO_CALL_INFO* callInfo, InlineContext* inlinersContext, - InlineResult* inlineResult); + InlineResult* inlineResult, + const DebugInfo& debugInfo); bool impTailCallRetTypeCompatible(bool allowWidening, var_types callerRetType, @@ -6633,7 +6636,7 @@ class Compiler void fgMorphCallInline(InlineInfo& inlineInfo, GenTreeCall* call, InlineResult* result); void fgMorphCallInlineHelper(InlineInfo& inlineInfo, GenTreeCall* call, InlineResult* result, InlineContext** createdContext); #if DEBUG - void fgNoteNonInlineCandidate(Statement* stmt, GenTreeCall* call); + void fgNoteNonInlineCandidate(GenTreeCall* call); static fgWalkPreFn fgFindNonInlineCandidate; #endif GenTree* fgOptimizeDelegateConstructor(GenTreeCall* call, diff --git a/src/coreclr/jit/debuginfo.cpp b/src/coreclr/jit/debuginfo.cpp index cf9779eeefa908..15cb567bbdeb9b 100644 --- a/src/coreclr/jit/debuginfo.cpp +++ b/src/coreclr/jit/debuginfo.cpp @@ -88,17 +88,17 @@ void DebugInfo::Validate() const if (!di.IsValid()) continue; - // bool isValidOffs = di.GetLocation().GetOffset() < di.GetInlineContext()->GetILSize(); - // if (isValidOffs) - //{ - // bool isValidStart = di.GetInlineContext()->GetILInstsSet()->bitVectTest(di.GetLocation().GetOffset()); - // assert(isValidStart && - // "Detected invalid debug info: IL offset does not refer to the start of an IL instruction"); - // } - // else - //{ - // assert(!"Detected invalid debug info: IL offset is out of range"); - // } + bool isValidOffs = di.GetLocation().GetOffset() < di.GetInlineContext()->GetILSize(); + if (isValidOffs) + { + bool isValidStart = di.GetInlineContext()->GetILInstsSet()->bitVectTest(di.GetLocation().GetOffset()); + assert(isValidStart && + "Detected invalid debug info: IL offset does not refer to the start of an IL instruction"); + } + else + { + assert(!"Detected invalid debug info: IL offset is out of range"); + } } while (di.GetParent(&di)); } diff --git a/src/coreclr/jit/fginline.cpp b/src/coreclr/jit/fginline.cpp index 924759948b393b..e9caf0cb9ea7fa 100644 --- a/src/coreclr/jit/fginline.cpp +++ b/src/coreclr/jit/fginline.cpp @@ -868,9 +868,10 @@ class InlineAndDevirtualizeWalker : public GenTreeVisitorgtInlineContext; CORINFO_METHOD_HANDLE method = call->gtLateDevirtualizationInfo->methodHnd; CORINFO_CONTEXT_HANDLE context = call->gtLateDevirtualizationInfo->exactContextHnd; - InlineContext* inlinersContext = call->gtLateDevirtualizationInfo->inlinersContext; + ILLocation ilLocation = call->gtLateDevirtualizationInfo->ilLocation; unsigned methodFlags = 0; const bool isLateDevirtualization = true; const bool explicitTailCall = call->IsTailPrefixedCall(); @@ -887,7 +888,8 @@ class InlineAndDevirtualizeWalker : public GenTreeVisitorimpMarkInlineCandidate(call, context, false, &callInfo, inlinersContext); + m_compiler->impMarkInlineCandidate(call, context, false, &callInfo, inlinersContext, + DebugInfo(inlinersContext, ilLocation)); // Reprocess the statement to pick up the inline in preorder. assert(m_nextBlock == nullptr); m_nextBlock = m_block; @@ -1082,7 +1084,7 @@ PhaseStatus Compiler::fgInline() // In debug builds we want the inline tree to show all failed // inlines. Some inlines may fail very early and never make it to // candidate stage. So scan the tree looking for those early failures. - INDEBUG(fgWalkTreePre(currentStmt->GetRootNodePointer(), fgFindNonInlineCandidate, currentStmt)); + INDEBUG(fgWalkTreePre(currentStmt->GetRootNodePointer(), fgFindNonInlineCandidate, nullptr)); walker.VisitStatement(currentBlock, currentStmt); @@ -1189,8 +1191,8 @@ void Compiler::fgMorphCallInline(InlineInfo& inlineInfo, GenTreeCall* call, Inli { #ifdef DEBUG // In debug we always put all inline attempts into the inline tree. - InlineContext* ctx = m_inlineStrategy->NewContext(call->GetSingleInlineCandidateInfo()->inlinersContext, - fgMorphStmt, call); + InlineContext* ctx = + m_inlineStrategy->NewContext(call->GetSingleInlineCandidateInfo()->inlinersContext, call); ctx->SetFailed(inlineResult); #endif } @@ -1389,10 +1391,9 @@ Compiler::fgWalkResult Compiler::fgFindNonInlineCandidate(GenTree** pTree, fgWal if (tree->OperIs(GT_CALL)) { Compiler* compiler = data->m_compiler; - Statement* stmt = (Statement*)data->pCallbackData; GenTreeCall* call = tree->AsCall(); - compiler->fgNoteNonInlineCandidate(stmt, call); + compiler->fgNoteNonInlineCandidate(call); } return WALK_CONTINUE; } @@ -1402,14 +1403,13 @@ Compiler::fgWalkResult Compiler::fgFindNonInlineCandidate(GenTree** pTree, fgWal // not marked as inline candidates. // // Arguments: -// stmt - statement containing the call // call - the call itself // // Notes: // Used in debug only to try and place descriptions of inline failures // into the proper context in the inline tree. -void Compiler::fgNoteNonInlineCandidate(Statement* stmt, GenTreeCall* call) +void Compiler::fgNoteNonInlineCandidate(GenTreeCall* call) { if (call->IsInlineCandidate() || call->IsGuardedDevirtualizationCandidate() || call->WasInlineCandidate()) { @@ -1433,7 +1433,7 @@ void Compiler::fgNoteNonInlineCandidate(Statement* stmt, GenTreeCall* call) if (call->gtCallType == CT_USER_FUNC) { - m_inlineStrategy->NewContext(call->gtInlineContext, stmt, call)->SetFailed(&inlineResult); + m_inlineStrategy->NewContext(call->gtInlineContext, call)->SetFailed(&inlineResult); } } @@ -1473,7 +1473,6 @@ void Compiler::fgInvokeInlineeCompiler(InlineInfo& inlineInfo, inlineInfo.fncHandle = fncHandle; inlineInfo.iciCall = call; - inlineInfo.iciStmt = fgMorphStmt; inlineInfo.iciBlock = compCurBB; inlineInfo.thisDereferencedFirst = false; inlineInfo.retExprClassHnd = nullptr; @@ -1548,8 +1547,7 @@ void Compiler::fgInvokeInlineeCompiler(InlineInfo& inlineInfo, // late as possible, which is here. pParam->inlineInfo->inlineContext = pParam->inlineInfo->InlineRoot->m_inlineStrategy - ->NewContext(pParam->inlineInfo->inlineCandidateInfo->inlinersContext, pParam->inlineInfo->iciStmt, - pParam->inlineInfo->iciCall); + ->NewContext(pParam->inlineInfo->inlineCandidateInfo->inlinersContext, pParam->inlineInfo->iciCall); pParam->inlineInfo->argCnt = pParam->inlineCandidateInfo->methInfo.args.totalILArgs(); pParam->inlineInfo->tokenLookupContextHandle = pParam->inlineCandidateInfo->exactContextHandle; @@ -2132,9 +2130,9 @@ void Compiler::fgInsertInlineeArgument(StatementListBuilder& statements, // void Compiler::fgInlinePrependStatements(InlineInfo* inlineInfo) { - BasicBlock* block = inlineInfo->iciBlock; - const DebugInfo& callDI = inlineInfo->iciStmt->GetDebugInfo(); - GenTreeCall* call = inlineInfo->iciCall->AsCall(); + BasicBlock* block = inlineInfo->iciBlock; + DebugInfo callDI = DebugInfo(inlineInfo->inlineContext->GetParent(), inlineInfo->inlineContext->GetLocation()); + GenTreeCall* call = inlineInfo->iciCall->AsCall(); noway_assert(call->OperIs(GT_CALL)); @@ -2308,8 +2306,7 @@ void Compiler::fgInlineAppendStatements(class StatementListBuilder& statements, JITDUMP("fgInlineAppendStatements: nulling out gc ref inlinee locals.\n"); - Statement* callStmt = inlineInfo->iciStmt; - const DebugInfo& callDI = callStmt->GetDebugInfo(); + DebugInfo callDI = DebugInfo(inlineInfo->inlineContext->GetParent(), inlineInfo->inlineContext->GetLocation()); CORINFO_METHOD_INFO* InlineeMethodInfo = InlineeCompiler->info.compMethodInfo; const unsigned lclCnt = InlineeMethodInfo->locals.numArgs; InlLclVarInfo* lclVarInfo = inlineInfo->lclVarInfo; diff --git a/src/coreclr/jit/importer.cpp b/src/coreclr/jit/importer.cpp index 17c4da63014812..acecc26e116b2a 100644 --- a/src/coreclr/jit/importer.cpp +++ b/src/coreclr/jit/importer.cpp @@ -2077,6 +2077,8 @@ void Compiler::impCurStmtOffsSet(IL_OFFSET offs) { impCurStmtDI = impCreateDIWithCurrentStackInfo(offs, false); } + + impCurStmtDI.Validate(); } //------------------------------------------------------------------------ @@ -13010,7 +13012,7 @@ void Compiler::impInlineRecordArgInfo(InlineInfo* pInlineInfo, if (impIsInvariant(curArgVal)) { argInfo->argIsInvariant = true; - if (argInfo->argIsThis && (curArgVal->gtOper == GT_CNS_INT) && (curArgVal->AsIntCon()->gtIconVal == 0)) + if (argInfo->argIsThis && curArgVal->OperIs(GT_CNS_INT) && (curArgVal->AsIntCon()->gtIconVal == 0)) { // Abort inlining at this call site inlineResult->NoteFatal(InlineObservation::CALLSITE_ARG_HAS_NULL_THIS); @@ -13050,7 +13052,7 @@ void Compiler::impInlineRecordArgInfo(InlineInfo* pInlineInfo, } else { - printf("Argument #%u:", pInlineInfo->iciCall->gtArgs.GetIndex(arg)); + printf("IL Argument #%u:", pInlineInfo->iciCall->gtArgs.GetUserIndex(arg)); } if (argInfo->argIsLclVar) { diff --git a/src/coreclr/jit/importercalls.cpp b/src/coreclr/jit/importercalls.cpp index 33f8c37c37ddbc..e265aa67ff426f 100644 --- a/src/coreclr/jit/importercalls.cpp +++ b/src/coreclr/jit/importercalls.cpp @@ -1065,7 +1065,7 @@ var_types Compiler::impImportCall(OPCODE opcode, // Is it an inline candidate? impMarkInlineCandidate(call, exactContextHnd, exactContextNeedsRuntimeLookup, callInfo, - compInlineContext); + compInlineContext, impCurStmtDI); } // append the call node. @@ -1276,7 +1276,8 @@ var_types Compiler::impImportCall(OPCODE opcode, INDEBUG(call->AsCall()->gtRawILOffset = rawILOffset); // Is it an inline candidate? - impMarkInlineCandidate(call, exactContextHnd, exactContextNeedsRuntimeLookup, callInfo, compInlineContext); + impMarkInlineCandidate(call, exactContextHnd, exactContextNeedsRuntimeLookup, callInfo, compInlineContext, + impCurStmtDI); // If the call is virtual, record the inliner's context for possible use during late devirt inlining. // Also record the generics context if there is any. @@ -1288,7 +1289,7 @@ var_types Compiler::impImportCall(OPCODE opcode, LateDevirtualizationInfo* const info = new (this, CMK_Inlining) LateDevirtualizationInfo; info->methodHnd = callInfo->hMethod; info->exactContextHnd = exactContextHnd; - info->inlinersContext = compInlineContext; + info->ilLocation = impCurStmtDI.GetLocation(); call->AsCall()->gtLateDevirtualizationInfo = info; } } @@ -6222,7 +6223,7 @@ GenTree* Compiler::impPrimitiveNamedIntrinsic(NamedIntrinsic intrinsic, else { uint32_t cns1 = static_cast(op1->AsIntConCommon()->IconValue()); - result = gtNewIconNode((int32_t)BitOperations::RotateLeft(cns1, cns2), baseType); + result = gtNewIconNode(BitOperations::RotateLeft(cns1, cns2), baseType); } break; } @@ -6266,7 +6267,7 @@ GenTree* Compiler::impPrimitiveNamedIntrinsic(NamedIntrinsic intrinsic, if (varTypeIsLong(baseType)) { uint64_t cns1 = static_cast(op1->AsIntConCommon()->LngValue()); - result = gtNewLconNode(BitOperations::RotateRight(cns1, cns2)); + result = gtNewLconNode((int32_t)BitOperations::RotateRight(cns1, cns2)); } else { @@ -7886,6 +7887,7 @@ void Compiler::addGuardedDevirtualizationCandidate(GenTreeCall* call, // exactContextNeedsRuntimeLookup -- true if context required runtime lookup // callInfo -- call info from VM // inlinersContext -- the inliner's context +// debugInfo -- debug info that will be the parent debug info of statements from the inlinee // // Notes: // Mostly a wrapper for impMarkInlineCandidateHelper that also undoes @@ -7896,7 +7898,8 @@ void Compiler::impMarkInlineCandidate(GenTree* callNode, CORINFO_CONTEXT_HANDLE exactContextHnd, bool exactContextNeedsRuntimeLookup, CORINFO_CALL_INFO* callInfo, - InlineContext* inlinersContext) + InlineContext* inlinersContext, + const DebugInfo& debugInfo) { if (!opts.OptEnabled(CLFLG_INLINING)) { @@ -7919,7 +7922,7 @@ void Compiler::impMarkInlineCandidate(GenTree* callNode, // Do the actual evaluation impMarkInlineCandidateHelper(call, candidateId, exactContextHnd, exactContextNeedsRuntimeLookup, callInfo, - inlinersContext, &inlineResult); + inlinersContext, &inlineResult, debugInfo); // Ignore non-inlineable candidates // TODO: Consider keeping them to just devirtualize without inlining, at least for interface // calls on NativeAOT, but that requires more changes elsewhere too. @@ -7943,7 +7946,7 @@ void Compiler::impMarkInlineCandidate(GenTree* callNode, assert(candidatesCount <= 1); InlineResult inlineResult(this, call, nullptr, "impMarkInlineCandidate"); impMarkInlineCandidateHelper(call, 0, exactContextHnd, exactContextNeedsRuntimeLookup, callInfo, - inlinersContext, &inlineResult); + inlinersContext, &inlineResult, debugInfo); } // If this call is an inline candidate or is not a guarded devirtualization @@ -7977,6 +7980,7 @@ void Compiler::impMarkInlineCandidate(GenTree* callNode, // exactContextNeedsRuntimeLookup -- true if context required runtime lookup // callInfo -- call info from VM // inlinersContext -- the inliner's context +// debugInfo -- debug info that will be the parent debug info of statements from the inlinee // // Notes: // If callNode is an inline candidate, this method sets the flag @@ -7994,7 +7998,8 @@ void Compiler::impMarkInlineCandidateHelper(GenTreeCall* call, bool exactContextNeedsRuntimeLookup, CORINFO_CALL_INFO* callInfo, InlineContext* inlinersContext, - InlineResult* inlineResult) + InlineResult* inlineResult, + const DebugInfo& debugInfo) { // Let the strategy know there's another call impInlineRoot()->m_inlineStrategy->NoteCall(); @@ -8203,8 +8208,8 @@ void Compiler::impMarkInlineCandidateHelper(GenTreeCall* call, } InlineCandidateInfo* inlineCandidateInfo = nullptr; - impCheckCanInline(call, candidateIndex, fncHandle, methAttr, exactContextHnd, inlinersContext, &inlineCandidateInfo, - inlineResult); + impCheckCanInline(call, candidateIndex, fncHandle, methAttr, exactContextHnd, inlinersContext, debugInfo, + &inlineCandidateInfo, inlineResult); if (inlineResult->IsFailure()) { @@ -9608,6 +9613,7 @@ bool Compiler::impTailCallRetTypeCompatible(bool allowWideni // methAttr - attributes for the method // exactContextHnd - exact context for the method // inlinersContext - the inliner's context +// debugInfo -- debug info that will be the parent debug info of statements from the inlinee // ppInlineCandidateInfo [out] - information needed later for inlining // inlineResult - result of ongoing inline evaluation // @@ -9621,6 +9627,7 @@ void Compiler::impCheckCanInline(GenTreeCall* call, unsigned methAttr, CORINFO_CONTEXT_HANDLE exactContextHnd, InlineContext* inlinersContext, + const DebugInfo& debugInfo, InlineCandidateInfo** ppInlineCandidateInfo, InlineResult* inlineResult) { @@ -9636,6 +9643,7 @@ void Compiler::impCheckCanInline(GenTreeCall* call, unsigned methAttr; CORINFO_CONTEXT_HANDLE exactContextHnd; InlineContext* inlinersContext; + const DebugInfo* debugInfo; InlineResult* result; InlineCandidateInfo** ppInlineCandidateInfo; } param; @@ -9648,6 +9656,7 @@ void Compiler::impCheckCanInline(GenTreeCall* call, param.methAttr = methAttr; param.exactContextHnd = (exactContextHnd != nullptr) ? exactContextHnd : MAKE_METHODCONTEXT(fncHandle); param.inlinersContext = inlinersContext; + param.debugInfo = &debugInfo; param.result = inlineResult; param.ppInlineCandidateInfo = ppInlineCandidateInfo; @@ -9801,6 +9810,8 @@ void Compiler::impCheckCanInline(GenTreeCall* call, pInfo->initClassResult = initClassResult; pInfo->exactContextNeedsRuntimeLookup = false; pInfo->inlinersContext = pParam->inlinersContext; + pInfo->containingStatementLocation = pParam->debugInfo->GetLocation(); + assert((pParam->debugInfo->GetInlineContext() == pParam->inlinersContext) || !pParam->debugInfo->IsValid()); // Note exactContextNeedsRuntimeLookup is reset later on, // over in impMarkInlineCandidate. diff --git a/src/coreclr/jit/inline.cpp b/src/coreclr/jit/inline.cpp index 3ebc8bc7648d92..322370c58d42fe 100644 --- a/src/coreclr/jit/inline.cpp +++ b/src/coreclr/jit/inline.cpp @@ -1275,7 +1275,7 @@ InlineContext* InlineStrategy::NewRoot() return rootContext; } -InlineContext* InlineStrategy::NewContext(InlineContext* parentContext, Statement* stmt, GenTreeCall* call) +InlineContext* InlineStrategy::NewContext(InlineContext* parentContext, GenTreeCall* call) { InlineContext* context = new (m_compiler, CMK_Inlining) InlineContext(this); @@ -1295,6 +1295,7 @@ InlineContext* InlineStrategy::NewContext(InlineContext* parentContext, Statemen context->m_ILSize = info->methInfo.ILCodeSize; context->m_ActualCallOffset = info->ilOffset; context->m_RuntimeContext = info->exactContextHandle; + context->m_Location = info->containingStatementLocation; } else { @@ -1310,8 +1311,8 @@ InlineContext* InlineStrategy::NewContext(InlineContext* parentContext, Statemen // call // which becomes a single statement where the IL location points to the // ldarg instruction. - context->m_Location = stmt->GetDebugInfo().GetLocation(); - context->m_Callee = call->gtCallMethHnd; + // DebugInfo(parentContext, context->m_Location).Validate(); + context->m_Callee = call->gtCallMethHnd; #if defined(DEBUG) context->m_Devirtualized = call->IsDevirtualized(); diff --git a/src/coreclr/jit/inline.h b/src/coreclr/jit/inline.h index 93eed86588d413..f2686a467e2a71 100644 --- a/src/coreclr/jit/inline.h +++ b/src/coreclr/jit/inline.h @@ -637,6 +637,8 @@ struct InlineCandidateInfo : public HandleHistogramProfileCandidateInfo CorInfoInitClassResult initClassResult; bool exactContextNeedsRuntimeLookup; InlineContext* inlinersContext; + + ILLocation containingStatementLocation; }; // LateDevirtualizationInfo @@ -647,7 +649,7 @@ struct LateDevirtualizationInfo { CORINFO_METHOD_HANDLE methodHnd; CORINFO_CONTEXT_HANDLE exactContextHnd; - InlineContext* inlinersContext; + ILLocation ilLocation; }; // InlArgInfo describes inline candidate argument properties. @@ -742,7 +744,6 @@ struct InlineInfo #endif // FEATURE_SIMD GenTreeCall* iciCall; // The GT_CALL node to be inlined. - Statement* iciStmt; // The statement iciCall is in. BasicBlock* iciBlock; // The basic block iciStmt is in. StatementListBuilder setupStatements; @@ -981,8 +982,8 @@ class InlineStrategy // Construct a new inline strategy. InlineStrategy(Compiler* compiler); - // Create context for the specified inline candidate contained in the specified statement. - InlineContext* NewContext(InlineContext* parentContext, Statement* stmt, GenTreeCall* call); + // Create context for the specified inline candidate. + InlineContext* NewContext(InlineContext* parentContext, GenTreeCall* call); // Compiler associated with this strategy Compiler* GetCompiler() const From 32bcb49591305b48bc221c098ec1a7f7b21d149f Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Wed, 25 Feb 2026 22:43:40 +0100 Subject: [PATCH 13/23] Minor fixes --- src/coreclr/jit/importer.cpp | 13 ++----------- src/coreclr/jit/importercalls.cpp | 4 ++-- src/coreclr/jit/inline.cpp | 2 +- 3 files changed, 5 insertions(+), 14 deletions(-) diff --git a/src/coreclr/jit/importer.cpp b/src/coreclr/jit/importer.cpp index acecc26e116b2a..b38fc7c82e413a 100644 --- a/src/coreclr/jit/importer.cpp +++ b/src/coreclr/jit/importer.cpp @@ -391,17 +391,8 @@ void Compiler::impAppendStmt(Statement* stmt, unsigned chkLevel, bool checkConsu if (call->TypeIs(TYP_VOID) && call->ShouldHaveRetBufArg()) { - GenTree* retBuf; - if (call->ShouldHaveRetBufArg()) - { - assert(call->gtArgs.HasRetBuffer()); - retBuf = call->gtArgs.GetRetBufferArg()->GetNode(); - } - else - { - assert(!call->gtArgs.HasThisPointer()); - retBuf = call->gtArgs.GetArgByIndex(0)->GetNode(); - } + assert(call->gtArgs.HasRetBuffer()); + GenTree* retBuf = call->gtArgs.GetRetBufferArg()->GetNode(); assert(retBuf->TypeIs(TYP_I_IMPL, TYP_BYREF)); diff --git a/src/coreclr/jit/importercalls.cpp b/src/coreclr/jit/importercalls.cpp index e265aa67ff426f..deb041752cbaca 100644 --- a/src/coreclr/jit/importercalls.cpp +++ b/src/coreclr/jit/importercalls.cpp @@ -6218,12 +6218,12 @@ GenTree* Compiler::impPrimitiveNamedIntrinsic(NamedIntrinsic intrinsic, if (varTypeIsLong(baseType)) { uint64_t cns1 = static_cast(op1->AsIntConCommon()->LngValue()); - result = gtNewLconNode(BitOperations::RotateLeft(cns1, cns2)); + result = gtNewLconNode((int64_t)BitOperations::RotateLeft(cns1, cns2)); } else { uint32_t cns1 = static_cast(op1->AsIntConCommon()->IconValue()); - result = gtNewIconNode(BitOperations::RotateLeft(cns1, cns2), baseType); + result = gtNewIconNode((int32_t)BitOperations::RotateLeft(cns1, cns2), baseType); } break; } diff --git a/src/coreclr/jit/inline.cpp b/src/coreclr/jit/inline.cpp index 322370c58d42fe..8327475a9c5d05 100644 --- a/src/coreclr/jit/inline.cpp +++ b/src/coreclr/jit/inline.cpp @@ -631,7 +631,7 @@ void InlineContext::DumpXml(FILE* file, unsigned indent) // Arguments: // compiler - the compiler instance examining a call for inlining // call - the call in question -// stmt - statement containing the call (if known) +// context - The inline context // description - string describing the context of the decision InlineResult::InlineResult( From 3940a3d407796db878fcdea83935097b4883d3a8 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Wed, 25 Feb 2026 23:07:49 +0100 Subject: [PATCH 14/23] Inline in execution order like before, simplify a bit --- src/coreclr/jit/compiler.h | 2 +- src/coreclr/jit/fginline.cpp | 26 +++++++++----------------- 2 files changed, 10 insertions(+), 18 deletions(-) diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index 83e43be543a636..328ccd301ae443 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -2559,7 +2559,7 @@ class Compiler friend class MorphCopyBlockHelper; friend class SharedTempsScope; friend class CallArgs; - friend class InlineAndDevirtualizeWalker; + friend class DevirtualizeAndInlineWalker; friend class IndirectCallTransformer; friend class ProfileSynthesis; friend class LocalsUseVisitor; diff --git a/src/coreclr/jit/fginline.cpp b/src/coreclr/jit/fginline.cpp index e9caf0cb9ea7fa..178e99dc7b8bc3 100644 --- a/src/coreclr/jit/fginline.cpp +++ b/src/coreclr/jit/fginline.cpp @@ -202,7 +202,7 @@ bool Compiler::TypeInstantiationComplexityExceeds(CORINFO_CLASS_HANDLE handle, i return false; } -class InlineAndDevirtualizeWalker : public GenTreeVisitor +class DevirtualizeAndInlineWalker : public GenTreeVisitor { bool m_madeChanges = false; BasicBlock* m_block = nullptr; @@ -219,7 +219,7 @@ class InlineAndDevirtualizeWalker : public GenTreeVisitorIsCall() && TryInline(use, user)) - { - if (m_nextBlock != nullptr) - { - return fgWalkResult::WALK_ABORT; - } - } - return fgWalkResult::WALK_CONTINUE; } fgWalkResult PostOrderVisit(GenTree** use, GenTree* user) { LateDevirtualization(use, user); - if (m_nextBlock != nullptr) + + if ((*use)->IsCall() && TryInline(use, user)) { // Restart process. return fgWalkResult::WALK_ABORT; @@ -322,6 +315,8 @@ class InlineAndDevirtualizeWalker : public GenTreeVisitorfgFirstBB->hasTryIndex()); assert(!inlineeComp->fgFirstBB->hasHndIndex()); @@ -890,10 +886,6 @@ class InlineAndDevirtualizeWalker : public GenTreeVisitorimpMarkInlineCandidate(call, context, false, &callInfo, inlinersContext, DebugInfo(inlinersContext, ilLocation)); - // Reprocess the statement to pick up the inline in preorder. - assert(m_nextBlock == nullptr); - m_nextBlock = m_block; - m_nextStatement = m_statement; } m_madeChanges = true; } @@ -1072,7 +1064,7 @@ PhaseStatus Compiler::fgInline() noway_assert(fgFirstBB != nullptr); - InlineAndDevirtualizeWalker walker(this); + DevirtualizeAndInlineWalker walker(this); bool madeChanges = false; BasicBlock* currentBlock = fgFirstBB; From 88154a280ad2fdcab94c7e830bf961c85d024c7d Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Wed, 25 Feb 2026 23:31:17 +0100 Subject: [PATCH 15/23] Improve debug printing --- src/coreclr/jit/fginline.cpp | 23 ++++++++++++++++------- 1 file changed, 16 insertions(+), 7 deletions(-) diff --git a/src/coreclr/jit/fginline.cpp b/src/coreclr/jit/fginline.cpp index 178e99dc7b8bc3..6d424c7b1adeb9 100644 --- a/src/coreclr/jit/fginline.cpp +++ b/src/coreclr/jit/fginline.cpp @@ -634,25 +634,35 @@ class DevirtualizeAndInlineWalker : public GenTreeVisitorInlineeCompiler; + + if (!inlineInfo.setupStatements.Empty() || !inlineInfo.teardownStatements.Empty()) + { + JITDUMP("Inlinee has setup or teardown statements\n"); + return false; + } + if (inlineeComp->fgBBcount != 1) { + JITDUMP("Inlinee has more than 1 basic block\n"); return false; } if (!inlineeComp->fgFirstBB->KindIs(BBJ_RETURN)) { + JITDUMP("Inlinee block is not BBJ_RETURN\n"); return false; } - if ((inlineeComp->fgFirstBB->bbStmtList != nullptr) || !inlineInfo.setupStatements.Empty() || - !inlineInfo.teardownStatements.Empty()) + if ((inlineeComp->fgFirstBB->bbStmtList != nullptr)) { + JITDUMP("Inlinee has statements\n"); return false; } if (use == m_statement->GetRootNodePointer()) { // Leave this case up to the general handling. + JITDUMP("Candidate is the root node\n"); return false; } @@ -664,6 +674,7 @@ class DevirtualizeAndInlineWalker : public GenTreeVisitorgtComplexityExceeds(m_statement->GetRootNode(), 16, getComplexity)) { + JITDUMP("Candidate statement is too complex after substitution\n"); *use = call; return false; } @@ -1319,14 +1330,12 @@ void Compiler::fgMorphCallInlineHelper(InlineInfo& inlineInfo, #ifdef DEBUG if (verbose) { - printf("Expanding INLINE_CANDIDATE in statement "); - printStmtID(fgMorphStmt); - printf(" in " FMT_BB ":\n", compCurBB->bbNum); - gtDispStmt(fgMorphStmt); + printf("Expanding inline candidate [%06u] in " FMT_STMT " in " FMT_BB "\n", dspTreeID(call), fgMorphStmt->GetID(), compCurBB->bbNum); if (call->IsImplicitTailCall()) { - printf("Note: candidate is implicit tail call\n"); + printf(" Note: candidate is implicit tail call\n"); } + DISPSTMT(fgMorphStmt); } #endif From 86795e04a84420e27eb7df387903944ff450da69 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Wed, 25 Feb 2026 23:42:27 +0100 Subject: [PATCH 16/23] Enhance InsertMidStatement --- src/coreclr/jit/fginline.cpp | 46 +++++++++++++++++++++++------------- 1 file changed, 30 insertions(+), 16 deletions(-) diff --git a/src/coreclr/jit/fginline.cpp b/src/coreclr/jit/fginline.cpp index 6d424c7b1adeb9..29187f88c810e4 100644 --- a/src/coreclr/jit/fginline.cpp +++ b/src/coreclr/jit/fginline.cpp @@ -641,29 +641,43 @@ class DevirtualizeAndInlineWalker : public GenTreeVisitorfgBBcount != 1) + if (use == m_statement->GetRootNodePointer()) { - JITDUMP("Inlinee has more than 1 basic block\n"); + // Leave this case up to the general handling. + JITDUMP("Candidate is the root node\n"); return false; } - if (!inlineeComp->fgFirstBB->KindIs(BBJ_RETURN)) + BasicBlock* block = inlineeComp->fgFirstBB; + assert(block != nullptr); + int numBlocks = 0; + while (true) { - JITDUMP("Inlinee block is not BBJ_RETURN\n"); - return false; - } + if (block->bbStmtList != nullptr) + { + JITDUMP("Inlinee block " FMT_BB " has statements\n", block->bbNum); + return false; + } - if ((inlineeComp->fgFirstBB->bbStmtList != nullptr)) - { - JITDUMP("Inlinee has statements\n"); - return false; - } + if (block->KindIs(BBJ_RETURN)) + { + break; + } - if (use == m_statement->GetRootNodePointer()) - { - // Leave this case up to the general handling. - JITDUMP("Candidate is the root node\n"); - return false; + BasicBlock* succBlock = block->GetUniqueSucc(); + if (succBlock == nullptr) + { + JITDUMP("Inlinee block " FMT_BB " has non-trivial control flow\n", block->bbNum); + return false; + } + + block = succBlock; + numBlocks++; + if (numBlocks > 64) + { + JITDUMP("Too many blocks in inlinee\n"); + return false; + } } GenTree* call = *use; From 633189ae509d6eb31450de51cd36f68c3a020d48 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Wed, 25 Feb 2026 23:42:29 +0100 Subject: [PATCH 17/23] Revert "Enhance InsertMidStatement" This reverts commit 86795e04a84420e27eb7df387903944ff450da69. --- src/coreclr/jit/fginline.cpp | 46 +++++++++++++----------------------- 1 file changed, 16 insertions(+), 30 deletions(-) diff --git a/src/coreclr/jit/fginline.cpp b/src/coreclr/jit/fginline.cpp index 29187f88c810e4..6d424c7b1adeb9 100644 --- a/src/coreclr/jit/fginline.cpp +++ b/src/coreclr/jit/fginline.cpp @@ -641,43 +641,29 @@ class DevirtualizeAndInlineWalker : public GenTreeVisitorGetRootNodePointer()) + if (inlineeComp->fgBBcount != 1) { - // Leave this case up to the general handling. - JITDUMP("Candidate is the root node\n"); + JITDUMP("Inlinee has more than 1 basic block\n"); return false; } - BasicBlock* block = inlineeComp->fgFirstBB; - assert(block != nullptr); - int numBlocks = 0; - while (true) + if (!inlineeComp->fgFirstBB->KindIs(BBJ_RETURN)) { - if (block->bbStmtList != nullptr) - { - JITDUMP("Inlinee block " FMT_BB " has statements\n", block->bbNum); - return false; - } - - if (block->KindIs(BBJ_RETURN)) - { - break; - } + JITDUMP("Inlinee block is not BBJ_RETURN\n"); + return false; + } - BasicBlock* succBlock = block->GetUniqueSucc(); - if (succBlock == nullptr) - { - JITDUMP("Inlinee block " FMT_BB " has non-trivial control flow\n", block->bbNum); - return false; - } + if ((inlineeComp->fgFirstBB->bbStmtList != nullptr)) + { + JITDUMP("Inlinee has statements\n"); + return false; + } - block = succBlock; - numBlocks++; - if (numBlocks > 64) - { - JITDUMP("Too many blocks in inlinee\n"); - return false; - } + if (use == m_statement->GetRootNodePointer()) + { + // Leave this case up to the general handling. + JITDUMP("Candidate is the root node\n"); + return false; } GenTree* call = *use; From d29bd297448c7dc6136fd5976ff21405a45adf34 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Thu, 26 Feb 2026 13:34:47 +0100 Subject: [PATCH 18/23] Set reverse ops on stfld IR --- src/coreclr/jit/importer.cpp | 5 +++++ src/coreclr/jit/lclmorph.cpp | 18 +++++++++++------- 2 files changed, 16 insertions(+), 7 deletions(-) diff --git a/src/coreclr/jit/importer.cpp b/src/coreclr/jit/importer.cpp index b38fc7c82e413a..baab1ac315965f 100644 --- a/src/coreclr/jit/importer.cpp +++ b/src/coreclr/jit/importer.cpp @@ -9521,6 +9521,11 @@ void Compiler::impImportBlockCode(BasicBlock* block) : gtNewStoreIndNode(lclTyp, op1, op2, indirFlags); impAnnotateFieldIndir(op1->AsIndir()); + if ((op1->AsIndir()->Addr()->gtFlags & GTF_SIDE_EFFECT) != 0) + { + op1->SetReverseOp(); + } + if (varTypeIsStruct(op1)) { op1 = impStoreStruct(op1, CHECK_SPILL_ALL); diff --git a/src/coreclr/jit/lclmorph.cpp b/src/coreclr/jit/lclmorph.cpp index 1b2f9b661d7719..88f344de8d1229 100644 --- a/src/coreclr/jit/lclmorph.cpp +++ b/src/coreclr/jit/lclmorph.cpp @@ -1212,25 +1212,29 @@ class LocalAddressVisitor final : public GenTreeVisitor case GT_STORE_BLK: { assert(TopValue(2).Node() == node); - assert(TopValue(1).Node() == node->AsIndir()->Addr()); - assert(TopValue(0).Node() == node->AsIndir()->Data()); + + int dataIndex = TopValue(0).Node() == node->AsIndir()->Data() ? 0 : 1; + int addrIndex = dataIndex == 0 ? 1 : 0; + assert(TopValue(addrIndex).Node() == node->AsIndir()->Addr()); + assert(TopValue(dataIndex).Node() == node->AsIndir()->Data()); + // Data value always escapes. - EscapeValue(TopValue(0), node); + EscapeValue(TopValue(dataIndex), node); - if (node->AsIndir()->IsVolatile() || !TopValue(1).IsAddress()) + if (node->AsIndir()->IsVolatile() || !TopValue(addrIndex).IsAddress()) { // Volatile indirections must not be removed so the address, if any, must be escaped. - EscapeValue(TopValue(1), node); + EscapeValue(TopValue(addrIndex), node); } else { // This consumes the address. - ProcessIndirection(use, TopValue(1), user); + ProcessIndirection(use, TopValue(addrIndex), user); if ((m_lclAddrAssertions != nullptr) && (*use)->OperIsLocalStore()) { - HandleLocalStoreAssertions((*use)->AsLclVarCommon(), TopValue(0)); + HandleLocalStoreAssertions((*use)->AsLclVarCommon(), TopValue(dataIndex)); } } From 5afa67267a25f7a7448f847f203f51bd5622f620 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Thu, 26 Feb 2026 13:34:49 +0100 Subject: [PATCH 19/23] Revert "Set reverse ops on stfld IR" This reverts commit d29bd297448c7dc6136fd5976ff21405a45adf34. --- src/coreclr/jit/importer.cpp | 5 ----- src/coreclr/jit/lclmorph.cpp | 18 +++++++----------- 2 files changed, 7 insertions(+), 16 deletions(-) diff --git a/src/coreclr/jit/importer.cpp b/src/coreclr/jit/importer.cpp index baab1ac315965f..b38fc7c82e413a 100644 --- a/src/coreclr/jit/importer.cpp +++ b/src/coreclr/jit/importer.cpp @@ -9521,11 +9521,6 @@ void Compiler::impImportBlockCode(BasicBlock* block) : gtNewStoreIndNode(lclTyp, op1, op2, indirFlags); impAnnotateFieldIndir(op1->AsIndir()); - if ((op1->AsIndir()->Addr()->gtFlags & GTF_SIDE_EFFECT) != 0) - { - op1->SetReverseOp(); - } - if (varTypeIsStruct(op1)) { op1 = impStoreStruct(op1, CHECK_SPILL_ALL); diff --git a/src/coreclr/jit/lclmorph.cpp b/src/coreclr/jit/lclmorph.cpp index 88f344de8d1229..1b2f9b661d7719 100644 --- a/src/coreclr/jit/lclmorph.cpp +++ b/src/coreclr/jit/lclmorph.cpp @@ -1212,29 +1212,25 @@ class LocalAddressVisitor final : public GenTreeVisitor case GT_STORE_BLK: { assert(TopValue(2).Node() == node); - - int dataIndex = TopValue(0).Node() == node->AsIndir()->Data() ? 0 : 1; - int addrIndex = dataIndex == 0 ? 1 : 0; - assert(TopValue(addrIndex).Node() == node->AsIndir()->Addr()); - assert(TopValue(dataIndex).Node() == node->AsIndir()->Data()); - + assert(TopValue(1).Node() == node->AsIndir()->Addr()); + assert(TopValue(0).Node() == node->AsIndir()->Data()); // Data value always escapes. - EscapeValue(TopValue(dataIndex), node); + EscapeValue(TopValue(0), node); - if (node->AsIndir()->IsVolatile() || !TopValue(addrIndex).IsAddress()) + if (node->AsIndir()->IsVolatile() || !TopValue(1).IsAddress()) { // Volatile indirections must not be removed so the address, if any, must be escaped. - EscapeValue(TopValue(addrIndex), node); + EscapeValue(TopValue(1), node); } else { // This consumes the address. - ProcessIndirection(use, TopValue(addrIndex), user); + ProcessIndirection(use, TopValue(1), user); if ((m_lclAddrAssertions != nullptr) && (*use)->OperIsLocalStore()) { - HandleLocalStoreAssertions((*use)->AsLclVarCommon(), TopValue(dataIndex)); + HandleLocalStoreAssertions((*use)->AsLclVarCommon(), TopValue(0)); } } From 16f716f795f646571774dad762169cb98208c14c Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Thu, 26 Feb 2026 14:33:00 +0100 Subject: [PATCH 20/23] Fix multi reg handling --- src/coreclr/jit/fginline.cpp | 30 ++++++++++++++++++++++++++++++ src/coreclr/jit/importer.cpp | 34 +++++++++++++++++++--------------- 2 files changed, 49 insertions(+), 15 deletions(-) diff --git a/src/coreclr/jit/fginline.cpp b/src/coreclr/jit/fginline.cpp index 6d424c7b1adeb9..67a9546cb87531 100644 --- a/src/coreclr/jit/fginline.cpp +++ b/src/coreclr/jit/fginline.cpp @@ -307,6 +307,36 @@ class DevirtualizeAndInlineWalker : public GenTreeVisitorHasMultiRegRetVal()) + { + // If an inline was rejected and the call returns a struct, we may + // have deferred some work when importing call for cases where the + // struct is returned in multiple registers. + // + // See the bail-out clauses in impFixupCallStructReturn for inline + // candidates. + // + // Do the deferred work now. + if ((*use)->IsCall() && varTypeIsStruct(*use) && (*use)->AsCall()->HasMultiRegRetVal()) + { + // See assert below, we only look one level above for a store parent. + if (parent->OperIsStore()) + { + // The inlinee can only be the value. + assert(parent->Data() == *use); + AttachStructInlineeToStore(parent, (*use)->AsCall()->gtRetClsHnd); + } + else + { + // Just store the inlinee to a variable to keep it simple. + *use = StoreStructInlineeToVar(*use, (*use)->AsCall()->gtRetClsHnd); + } + m_madeChanges = true; + } + } +#endif + return false; } diff --git a/src/coreclr/jit/importer.cpp b/src/coreclr/jit/importer.cpp index b38fc7c82e413a..c0eccac6baf9d3 100644 --- a/src/coreclr/jit/importer.cpp +++ b/src/coreclr/jit/importer.cpp @@ -4424,25 +4424,29 @@ GenTree* Compiler::impFixupStructReturnType(GenTree* op) return op; } - // In contrast, we can only use multi-reg calls directly if they have the exact same ABI. - // Calling convention equality is a conservative approximation for that check. - if (op->IsCall() && - (op->AsCall()->GetUnmanagedCallConv() == info.compCallConv) -#if defined(TARGET_ARMARCH) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) - // TODO-Review: this seems unnecessary. Return ABI doesn't change under varargs. - && !op->AsCall()->IsVarargs() -#endif // defined(TARGET_ARMARCH) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) - ) + // We can sometimes use calls directly as multi-reg returns, but only + // if they're not inline candidates. Inline candidates can be replaced by + // arbitrary nodes that would not be legal to use as multi-reg returns. + // TODO-CQ: Delay this fixup to after we have resolved the inline candidate. + if (op->IsCall() && !op->AsCall()->IsInlineCandidate()) { - return op; - } + GenTreeCall* call = op->AsCall(); + // In contrast, we can only use multi-reg calls directly if they have the exact same ABI. + // Calling convention equality is a conservative approximation for that check. + if (call->GetUnmanagedCallConv() == info.compCallConv + #if defined(TARGET_ARMARCH) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) + // TODO-Review: this seems unnecessary. Return ABI doesn't change under varargs. + && !call->IsVarargs() + #endif // defined(TARGET_ARMARCH) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) + ) + { + return op; + } - if (op->IsCall()) - { // We cannot tail call because control needs to return to fixup the calling convention // for result return. - op->AsCall()->gtCallMoreFlags &= ~GTF_CALL_M_TAILCALL; - op->AsCall()->gtCallMoreFlags &= ~GTF_CALL_M_EXPLICIT_TAILCALL; + call->gtCallMoreFlags &= ~GTF_CALL_M_TAILCALL; + call->gtCallMoreFlags &= ~GTF_CALL_M_EXPLICIT_TAILCALL; } // The backend does not support other struct-producing nodes (e. g. OBJs) as sources of multi-reg returns. From 7213997e310fbe6b1a21b863ba3ef4ed05b4e0c6 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Thu, 26 Feb 2026 14:48:44 +0100 Subject: [PATCH 21/23] Fix folding assert --- src/coreclr/jit/gentree.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/coreclr/jit/gentree.cpp b/src/coreclr/jit/gentree.cpp index 7ef75490042685..bb0099a652e9c0 100644 --- a/src/coreclr/jit/gentree.cpp +++ b/src/coreclr/jit/gentree.cpp @@ -31974,9 +31974,8 @@ GenTree* Compiler::gtFoldExprHWIntrinsic(GenTreeHWIntrinsic* tree) #if defined(TARGET_XARCH) tryHandle = op->OperIsHWIntrinsic(); #elif defined(TARGET_ARM64) - assert(op->OperIsHWIntrinsic(NI_Sve_ConversionTrueMask)); + tryHandle = op->OperIsHWIntrinsic(NI_Sve_ConversionTrueMask) && op2->OperIsHWIntrinsic(); op = op2; - tryHandle = op->OperIsHWIntrinsic(); #endif // TARGET_ARM64 if (tryHandle) From 36843198f31b67ef830e6a5843dec1fbde9bce7d Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Thu, 26 Feb 2026 14:56:40 +0100 Subject: [PATCH 22/23] Feedback --- src/coreclr/jit/importercalls.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/coreclr/jit/importercalls.cpp b/src/coreclr/jit/importercalls.cpp index deb041752cbaca..415e5ef971b698 100644 --- a/src/coreclr/jit/importercalls.cpp +++ b/src/coreclr/jit/importercalls.cpp @@ -6267,7 +6267,7 @@ GenTree* Compiler::impPrimitiveNamedIntrinsic(NamedIntrinsic intrinsic, if (varTypeIsLong(baseType)) { uint64_t cns1 = static_cast(op1->AsIntConCommon()->LngValue()); - result = gtNewLconNode((int32_t)BitOperations::RotateRight(cns1, cns2)); + result = gtNewLconNode((int64_t)BitOperations::RotateRight(cns1, cns2)); } else { From 606861b02de22fcbaeed3140192b56c9e68fca96 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Thu, 26 Feb 2026 14:58:16 +0100 Subject: [PATCH 23/23] Run jit-format --- src/coreclr/jit/fginline.cpp | 5 +++-- src/coreclr/jit/importer.cpp | 4 ++-- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/src/coreclr/jit/fginline.cpp b/src/coreclr/jit/fginline.cpp index 67a9546cb87531..cfeaad16bf8e0a 100644 --- a/src/coreclr/jit/fginline.cpp +++ b/src/coreclr/jit/fginline.cpp @@ -345,7 +345,7 @@ class DevirtualizeAndInlineWalker : public GenTreeVisitorGetID(), compCurBB->bbNum); + printf("Expanding inline candidate [%06u] in " FMT_STMT " in " FMT_BB "\n", dspTreeID(call), + fgMorphStmt->GetID(), compCurBB->bbNum); if (call->IsImplicitTailCall()) { printf(" Note: candidate is implicit tail call\n"); diff --git a/src/coreclr/jit/importer.cpp b/src/coreclr/jit/importer.cpp index c0eccac6baf9d3..246842258b4134 100644 --- a/src/coreclr/jit/importer.cpp +++ b/src/coreclr/jit/importer.cpp @@ -4434,10 +4434,10 @@ GenTree* Compiler::impFixupStructReturnType(GenTree* op) // In contrast, we can only use multi-reg calls directly if they have the exact same ABI. // Calling convention equality is a conservative approximation for that check. if (call->GetUnmanagedCallConv() == info.compCallConv - #if defined(TARGET_ARMARCH) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) +#if defined(TARGET_ARMARCH) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) // TODO-Review: this seems unnecessary. Return ABI doesn't change under varargs. && !call->IsVarargs() - #endif // defined(TARGET_ARMARCH) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) +#endif // defined(TARGET_ARMARCH) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) ) { return op;