From b56925cc9d9c5a5a60ee57a5132ce3d6c23118c0 Mon Sep 17 00:00:00 2001 From: Andy Ayers Date: Fri, 19 Jun 2026 18:43:07 -0700 Subject: [PATCH] JIT: fix edge likelihoods for loop cloning We were over-estimating the edge likelihoods leading to the fast clone, causing profiles to inflate. The old code was not aware of the total number of checks that need to pass to reach the fast clone, so each edge's likelihood was slightly too high. This shows up prominently when we clone a set of nested loops. Fix by counting how many conditional branches we must pass through before reaching the fast clone, and use that to compute the proper likelihoods. --- src/coreclr/jit/loopcloning.cpp | 36 ++++++++++++++++++++++++++------- src/coreclr/jit/loopcloning.h | 5 ++++- 2 files changed, 33 insertions(+), 8 deletions(-) diff --git a/src/coreclr/jit/loopcloning.cpp b/src/coreclr/jit/loopcloning.cpp index 48d4c65bb973a0..010bd270fb4da0 100644 --- a/src/coreclr/jit/loopcloning.cpp +++ b/src/coreclr/jit/loopcloning.cpp @@ -900,10 +900,12 @@ void LoopCloneContext::SetLoopIterInfo(unsigned loopNum, NaturalLoopIterInfo* in BasicBlock* LoopCloneContext::CondToStmtInBlock(Compiler* comp, JitExpandArrayStack& conds, BasicBlock* slowPreheader, - BasicBlock* insertAfter) + BasicBlock* insertAfter, + unsigned totalCondsInChain) { noway_assert(conds.Size() > 0); assert(slowPreheader != nullptr); + assert(totalCondsInChain >= conds.Size()); // For now assume high likelihood for the fast path, // uniformly spread across the gating branches. @@ -911,12 +913,15 @@ BasicBlock* LoopCloneContext::CondToStmtInBlock(Compiler* // For "normal" cloning this is probably ok. For GDV cloning this // may be inaccurate. We should key off the type test likelihood(s). // + // `totalCondsInChain` counts cond blocks across all calls for one cloning op, + // so the chain's cumulative fast-path probability is fastPathWeightScaleFactor. + // const weight_t fastLikelihood = fastPathWeightScaleFactor; - // N = conds.Size() branches must all be true to execute the fast loop. - // Use the N'th root.... + // totalCondsInChain branches must all be true to execute the fast loop. + // Use the N'th root. // - const weight_t fastLikelihoodPerBlock = exp(log(fastLikelihood) / (weight_t)conds.Size()); + const weight_t fastLikelihoodPerBlock = exp(log(fastLikelihood) / (weight_t)totalCondsInChain); for (unsigned i = 0; i < conds.Size(); ++i) { @@ -2207,6 +2212,22 @@ BasicBlock* Compiler::optInsertLoopChoiceConditions(LoopCloneContext* contex JITDUMP("Inserting loop " FMT_LP " loop choice conditions\n", loop->GetIndex()); assert(slowPreheader != nullptr); + // Count all cond blocks the chain will install (block conditions + cloning conditions), + // so CondToStmtInBlock can size per-block likelihoods against the full chain length. + // + unsigned totalCondsInChain = 0; + if (context->HasBlockConditions(loop->GetIndex())) + { + JitExpandArrayStack*>* const levelCond = + context->GetBlockConditions(loop->GetIndex()); + for (unsigned i = 0; i < levelCond->Size(); ++i) + { + totalCondsInChain += (*levelCond)[i]->Size(); + } + } + totalCondsInChain += context->GetConditions(loop->GetIndex())->Size(); + assert(totalCondsInChain > 0); + if (context->HasBlockConditions(loop->GetIndex())) { JitExpandArrayStack*>* levelCond = @@ -2215,7 +2236,8 @@ BasicBlock* Compiler::optInsertLoopChoiceConditions(LoopCloneContext* contex { JITDUMP("Adding loop " FMT_LP " level %u block conditions\n ", loop->GetIndex(), i); DBEXEC(verbose, context->PrintBlockLevelConditions(i, (*levelCond)[i])); - insertAfter = context->CondToStmtInBlock(this, *((*levelCond)[i]), slowPreheader, insertAfter); + insertAfter = + context->CondToStmtInBlock(this, *((*levelCond)[i]), slowPreheader, insertAfter, totalCondsInChain); } } @@ -2223,8 +2245,8 @@ BasicBlock* Compiler::optInsertLoopChoiceConditions(LoopCloneContext* contex JITDUMP("Adding loop " FMT_LP " cloning conditions\n ", loop->GetIndex()); DBEXEC(verbose, context->PrintConditions(loop->GetIndex())); JITDUMP("\n"); - insertAfter = - context->CondToStmtInBlock(this, *(context->GetConditions(loop->GetIndex())), slowPreheader, insertAfter); + insertAfter = context->CondToStmtInBlock(this, *(context->GetConditions(loop->GetIndex())), slowPreheader, + insertAfter, totalCondsInChain); return insertAfter; } diff --git a/src/coreclr/jit/loopcloning.h b/src/coreclr/jit/loopcloning.h index 576fc49ab483b4..e80c8aafb9cd02 100644 --- a/src/coreclr/jit/loopcloning.h +++ b/src/coreclr/jit/loopcloning.h @@ -1020,10 +1020,13 @@ struct LoopCloneContext void SetLoopIterInfo(unsigned loopNum, NaturalLoopIterInfo* info); // Evaluate conditions into a JTRUE stmt and put it in a new block after `insertAfter`. + // `totalCondsInChain` is the combined cond block count across all calls for one cloning op, + // used to make the chain's cumulative fast-path probability match `fastPathWeightScaleFactor`. BasicBlock* CondToStmtInBlock(Compiler* comp, JitExpandArrayStack& conds, BasicBlock* slowHead, - BasicBlock* insertAfter); + BasicBlock* insertAfter, + unsigned totalCondsInChain); // Get all the optimization information for loop "loopNum"; this information is held in "optInfo" array. // If NULL this allocates the optInfo[loopNum] array for "loopNum".