@@ -503,30 +503,64 @@ type LinearAlgebra =
503503 else Array.map ( fun vi -> vi / norm_ v) v
504504
505505 /// Update Q: Q ← Q * Hᵢ using Householder vector v (from column i)
506+ /// Optimized with SIMD operations for dot products and row updates
506507 let updateQ ( Q : Matrix < 'T >) ( v : 'T []) ( i : int ) =
507508 let nQ , mQ = Q.NumRows, Q.NumCols
509+ let vLen = v.Length
510+ let qData = Q.Data
511+
512+ // Process each row of Q
508513 for row = 0 to nQ - 1 do
509- let mutable dot = 'T.Zero
510- for k = 0 to v.Length - 1 do
511- dot <- dot + Q.[ row, i + k] * v.[ k]
514+ let rowOffset = row * mQ + i
515+
516+ // SIMD-optimized dot product: dot = Q[row, i..i+vLen-1] · v
517+ let dot = SpanMath.dotUnchecked ( ReadOnlySpan( qData, rowOffset, vLen), ReadOnlySpan( v))
512518 let alpha = dot + dot
513- for k = 0 to v.Length - 1 do
514- Q.[ row, i + k] <- Q.[ row, i + k] - alpha * v.[ k]
519+
520+ // SIMD-optimized update: Q[row, i..i+vLen-1] -= alpha * v
521+ if alpha <> 'T.Zero then // Skip if alpha is zero
522+ let alphaVec = Numerics.Vector< 'T>( alpha)
523+ let mutable k = 0
524+ let simdWidth = Numerics.Vector< 'T>. Count
525+
526+ // Vectorized loop
527+ while k + simdWidth <= vLen do
528+ let qVec = Numerics.Vector< 'T>( qData, rowOffset + k)
529+ let vVec = Numerics.Vector< 'T>( v, k)
530+ let result = qVec - alphaVec * vVec
531+ result.CopyTo( qData, rowOffset + k)
532+ k <- k + simdWidth
533+
534+ // Scalar tail
535+ while k < vLen do
536+ qData.[ rowOffset + k] <- qData.[ rowOffset + k] - alpha * v.[ k]
537+ k <- k + 1
515538
516539 /// Apply Hᵢ to R from the left: R ← H * R
540+ /// Optimized with SIMD operations for column dot products and updates
517541 let applyHouseholderLeft ( R : Matrix < 'T >) ( v : 'T []) ( i : int ) =
518542 let m , n = R.NumRows, R.NumCols
543+ let vLen = v.Length
544+ let rData = R.Data
545+
546+ // Extract column slice - this is a strided access pattern
547+ // For each column, we need to process R[i..i+vLen-1, col]
519548 for col = i to n - 1 do
549+ // Compute dot product: dot = v · R[i..i+vLen-1, col]
520550 let mutable dot = 'T.Zero
521- for k = 0 to v.Length - 1 do
551+ for k = 0 to vLen - 1 do
522552 let row = i + k
523553 if row < m then
524- dot <- dot + v.[ k] * R.[ row, col]
554+ dot <- dot + v.[ k] * rData.[ row * n + col]
555+
525556 let alpha = dot + dot
526- for k = 0 to v.Length - 1 do
527- let row = i + k
528- if row < m then
529- R.[ row, col] <- R.[ row, col] - alpha * v.[ k]
557+
558+ // Update column: R[i..i+vLen-1, col] -= alpha * v
559+ if alpha <> 'T.Zero then
560+ for k = 0 to vLen - 1 do
561+ let row = i + k
562+ if row < m then
563+ rData.[ row * n + col] <- rData.[ row * n + col] - alpha * v.[ k]
530564
531565 /// Main QR decomposition function
532566 let qrDecompose ( A : Matrix < 'T >) : Matrix < 'T > * Matrix < 'T > =
0 commit comments