diff --git a/src/coreclr/jit/loopcloning.cpp b/src/coreclr/jit/loopcloning.cpp index 48d4c65bb973a0..010bd270fb4da0 100644 --- a/src/coreclr/jit/loopcloning.cpp +++ b/src/coreclr/jit/loopcloning.cpp @@ -900,10 +900,12 @@ void LoopCloneContext::SetLoopIterInfo(unsigned loopNum, NaturalLoopIterInfo* in BasicBlock* LoopCloneContext::CondToStmtInBlock(Compiler* comp, JitExpandArrayStack& conds, BasicBlock* slowPreheader, - BasicBlock* insertAfter) + BasicBlock* insertAfter, + unsigned totalCondsInChain) { noway_assert(conds.Size() > 0); assert(slowPreheader != nullptr); + assert(totalCondsInChain >= conds.Size()); // For now assume high likelihood for the fast path, // uniformly spread across the gating branches. @@ -911,12 +913,15 @@ BasicBlock* LoopCloneContext::CondToStmtInBlock(Compiler* // For "normal" cloning this is probably ok. For GDV cloning this // may be inaccurate. We should key off the type test likelihood(s). // + // `totalCondsInChain` counts cond blocks across all calls for one cloning op, + // so the chain's cumulative fast-path probability is fastPathWeightScaleFactor. + // const weight_t fastLikelihood = fastPathWeightScaleFactor; - // N = conds.Size() branches must all be true to execute the fast loop. - // Use the N'th root.... + // totalCondsInChain branches must all be true to execute the fast loop. + // Use the N'th root. // - const weight_t fastLikelihoodPerBlock = exp(log(fastLikelihood) / (weight_t)conds.Size()); + const weight_t fastLikelihoodPerBlock = exp(log(fastLikelihood) / (weight_t)totalCondsInChain); for (unsigned i = 0; i < conds.Size(); ++i) { @@ -2207,6 +2212,22 @@ BasicBlock* Compiler::optInsertLoopChoiceConditions(LoopCloneContext* contex JITDUMP("Inserting loop " FMT_LP " loop choice conditions\n", loop->GetIndex()); assert(slowPreheader != nullptr); + // Count all cond blocks the chain will install (block conditions + cloning conditions), + // so CondToStmtInBlock can size per-block likelihoods against the full chain length. + // + unsigned totalCondsInChain = 0; + if (context->HasBlockConditions(loop->GetIndex())) + { + JitExpandArrayStack*>* const levelCond = + context->GetBlockConditions(loop->GetIndex()); + for (unsigned i = 0; i < levelCond->Size(); ++i) + { + totalCondsInChain += (*levelCond)[i]->Size(); + } + } + totalCondsInChain += context->GetConditions(loop->GetIndex())->Size(); + assert(totalCondsInChain > 0); + if (context->HasBlockConditions(loop->GetIndex())) { JitExpandArrayStack*>* levelCond = @@ -2215,7 +2236,8 @@ BasicBlock* Compiler::optInsertLoopChoiceConditions(LoopCloneContext* contex { JITDUMP("Adding loop " FMT_LP " level %u block conditions\n ", loop->GetIndex(), i); DBEXEC(verbose, context->PrintBlockLevelConditions(i, (*levelCond)[i])); - insertAfter = context->CondToStmtInBlock(this, *((*levelCond)[i]), slowPreheader, insertAfter); + insertAfter = + context->CondToStmtInBlock(this, *((*levelCond)[i]), slowPreheader, insertAfter, totalCondsInChain); } } @@ -2223,8 +2245,8 @@ BasicBlock* Compiler::optInsertLoopChoiceConditions(LoopCloneContext* contex JITDUMP("Adding loop " FMT_LP " cloning conditions\n ", loop->GetIndex()); DBEXEC(verbose, context->PrintConditions(loop->GetIndex())); JITDUMP("\n"); - insertAfter = - context->CondToStmtInBlock(this, *(context->GetConditions(loop->GetIndex())), slowPreheader, insertAfter); + insertAfter = context->CondToStmtInBlock(this, *(context->GetConditions(loop->GetIndex())), slowPreheader, + insertAfter, totalCondsInChain); return insertAfter; } diff --git a/src/coreclr/jit/loopcloning.h b/src/coreclr/jit/loopcloning.h index 576fc49ab483b4..e80c8aafb9cd02 100644 --- a/src/coreclr/jit/loopcloning.h +++ b/src/coreclr/jit/loopcloning.h @@ -1020,10 +1020,13 @@ struct LoopCloneContext void SetLoopIterInfo(unsigned loopNum, NaturalLoopIterInfo* info); // Evaluate conditions into a JTRUE stmt and put it in a new block after `insertAfter`. + // `totalCondsInChain` is the combined cond block count across all calls for one cloning op, + // used to make the chain's cumulative fast-path probability match `fastPathWeightScaleFactor`. BasicBlock* CondToStmtInBlock(Compiler* comp, JitExpandArrayStack& conds, BasicBlock* slowHead, - BasicBlock* insertAfter); + BasicBlock* insertAfter, + unsigned totalCondsInChain); // Get all the optimization information for loop "loopNum"; this information is held in "optInfo" array. // If NULL this allocates the optInfo[loopNum] array for "loopNum".