Skip to content

Commit 35aecb3

Browse files
authored
Merge pull request #71 from fslaborg/perf/Optimize QR Householder
Daily Perf Improver - Optimize QR decomposition with SIMD Householder transformations
2 parents c91a242 + 7ad6a4e commit 35aecb3

1 file changed

Lines changed: 45 additions & 11 deletions

File tree

src/FsMath/Algebra/LinearAlgebra.fs

Lines changed: 45 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -503,30 +503,64 @@ type LinearAlgebra =
503503
else Array.map (fun vi -> vi / norm_v) v
504504

505505
/// Update Q: Q ← Q * Hᵢ using Householder vector v (from column i)
506+
/// Optimized with SIMD operations for dot products and row updates
506507
let updateQ (Q: Matrix<'T>) (v: 'T[]) (i: int) =
507508
let nQ, mQ = Q.NumRows, Q.NumCols
509+
let vLen = v.Length
510+
let qData = Q.Data
511+
512+
// Process each row of Q
508513
for row = 0 to nQ - 1 do
509-
let mutable dot = 'T.Zero
510-
for k = 0 to v.Length - 1 do
511-
dot <- dot + Q.[row, i + k] * v.[k]
514+
let rowOffset = row * mQ + i
515+
516+
// SIMD-optimized dot product: dot = Q[row, i..i+vLen-1] · v
517+
let dot = SpanMath.dotUnchecked (ReadOnlySpan(qData, rowOffset, vLen), ReadOnlySpan(v))
512518
let alpha = dot + dot
513-
for k = 0 to v.Length - 1 do
514-
Q.[row, i + k] <- Q.[row, i + k] - alpha * v.[k]
519+
520+
// SIMD-optimized update: Q[row, i..i+vLen-1] -= alpha * v
521+
if alpha <> 'T.Zero then // Skip if alpha is zero
522+
let alphaVec = Numerics.Vector<'T>(alpha)
523+
let mutable k = 0
524+
let simdWidth = Numerics.Vector<'T>.Count
525+
526+
// Vectorized loop
527+
while k + simdWidth <= vLen do
528+
let qVec = Numerics.Vector<'T>(qData, rowOffset + k)
529+
let vVec = Numerics.Vector<'T>(v, k)
530+
let result = qVec - alphaVec * vVec
531+
result.CopyTo(qData, rowOffset + k)
532+
k <- k + simdWidth
533+
534+
// Scalar tail
535+
while k < vLen do
536+
qData.[rowOffset + k] <- qData.[rowOffset + k] - alpha * v.[k]
537+
k <- k + 1
515538

516539
/// Apply Hᵢ to R from the left: R ← H * R
540+
/// Optimized with SIMD operations for column dot products and updates
517541
let applyHouseholderLeft (R: Matrix<'T>) (v: 'T[]) (i: int) =
518542
let m, n = R.NumRows, R.NumCols
543+
let vLen = v.Length
544+
let rData = R.Data
545+
546+
// Extract column slice - this is a strided access pattern
547+
// For each column, we need to process R[i..i+vLen-1, col]
519548
for col = i to n - 1 do
549+
// Compute dot product: dot = v · R[i..i+vLen-1, col]
520550
let mutable dot = 'T.Zero
521-
for k = 0 to v.Length - 1 do
551+
for k = 0 to vLen - 1 do
522552
let row = i + k
523553
if row < m then
524-
dot <- dot + v.[k] * R.[row, col]
554+
dot <- dot + v.[k] * rData.[row * n + col]
555+
525556
let alpha = dot + dot
526-
for k = 0 to v.Length - 1 do
527-
let row = i + k
528-
if row < m then
529-
R.[row, col] <- R.[row, col] - alpha * v.[k]
557+
558+
// Update column: R[i..i+vLen-1, col] -= alpha * v
559+
if alpha <> 'T.Zero then
560+
for k = 0 to vLen - 1 do
561+
let row = i + k
562+
if row < m then
563+
rData.[row * n + col] <- rData.[row * n + col] - alpha * v.[k]
530564

531565
/// Main QR decomposition function
532566
let qrDecompose (A: Matrix<'T>) : Matrix<'T> * Matrix<'T> =

0 commit comments

Comments
 (0)