Skip to content

Commit 3cc242c

Browse files
authored
Merge pull request #11 from rameel/reduce-unsafe
Reduce Unsafe usage
2 parents 25143cc + 1ba4cda commit 3cc242c

8 files changed

Lines changed: 169 additions & 254 deletions

File tree

src/Ramstack.Parsing/Collections/ArrayBuilder.cs

Lines changed: 16 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,20 @@ namespace Ramstack.Parsing.Collections;
33
/// <summary>
44
/// Represents an array builder for elements of type <typeparamref name="T"/>.
55
/// </summary>
6+
/// <remarks>
7+
/// This structure is optimized for performance and relies on proper initialization.
8+
///
9+
/// Always initialize instances using one of the available constructors with the <c>new</c> keyword,
10+
/// including the parameterless constructor: <c>new ArrayBuilder&lt;T>()</c>.
11+
///
12+
/// Do not use <c>default(ArrayBuilder&lt;T>)</c> or similar patterns, as this results
13+
/// in an uninitialized instance with an invalid internal state,
14+
/// causing a <see cref="NullReferenceException"/> when methods are called.
15+
///
16+
/// This design choice was made to avoid unnecessary overhead and maintain control over
17+
/// internal state initialization in modern C# versions where parameterless constructors
18+
/// in structs are supported.
19+
/// </remarks>
620
/// <typeparam name="T">The type of elements stored in the array builder.</typeparam>
721
[DebuggerDisplay("Count = {Count}")]
822
[DebuggerTypeProxy(typeof(ArrayBuilderDebugView<>))]
@@ -33,12 +47,10 @@ public readonly ref T this[int index]
3347
get
3448
{
3549
var array = _array;
36-
if ((uint)index >= (uint)_count)
50+
if ((uint)index >= (uint)_count || (uint)index >= (uint)array.Length)
3751
ThrowHelper.ThrowArgumentOutOfRangeException();
3852

39-
return ref Unsafe.Add(
40-
ref MemoryMarshal.GetArrayDataReference(array),
41-
(nint)(uint)index);
53+
return ref array[index];
4254
}
4355
}
4456

src/Ramstack.Parsing/Literal.String.cs

Lines changed: 18 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -128,16 +128,26 @@ private static int TryParseImpl(ReadOnlySpan<char> s, out TResult? value)
128128
}
129129
else
130130
{
131-
if (index + 5 >= s.Length)
131+
var p = s.Slice(index);
132+
if (p.Length < 6)
132133
goto FAIL;
133134

134-
ref var r = ref Unsafe.AsRef(in s[index]);
135-
136-
var v1 = (nint)Unsafe.Add(ref r, 2);
137-
var v2 = (nint)Unsafe.Add(ref r, 3);
138-
var v3 = (nint)Unsafe.Add(ref r, 4);
139-
var v4 = (nint)Unsafe.Add(ref r, 5);
140-
135+
var v1 = (nint)p[2];
136+
var v2 = (nint)p[3];
137+
var v3 = (nint)p[4];
138+
var v4 = (nint)p[5];
139+
140+
// We use Unsafe-methods here to index into the HexTable without bounds checks.
141+
//
142+
// The check 'if ((uint)(v1 | v2 | v3 | v4) <= 127)' guarantees that all four values
143+
// are within the range 0..127.
144+
//
145+
// This makes it safe to access HexTable[v1], HexTable[v2], HexTable[v3], and HexTable[v4]
146+
// without additional bounds checks, because HexTable has exactly 128 elements.
147+
// By using Unsafe.Add, we avoid the overhead of safety checks and improve performance.
148+
//
149+
// Once the JIT compiler can properly recognize and optimize this pattern, we may be able
150+
// to replace these unsafe calls with safe indexed access without performance loss.
141151
if ((uint)(v1 | v2 | v3 | v4) > 127)
142152
goto FAIL;
143153

src/Ramstack.Parsing/Literal.UnicodeEscapeSequence.cs

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,17 @@ public override bool TryParse(ref ParseContext context, out T value)
5151
var v3 = (nint)s[4];
5252
var v4 = (nint)s[5];
5353

54+
// We use Unsafe-methods here to index into the HexTable without bounds checks.
55+
//
56+
// The check 'if ((uint)(v1 | v2 | v3 | v4) <= 127)' guarantees that all four values
57+
// are within the range 0..127.
58+
//
59+
// This makes it safe to access HexTable[v1], HexTable[v2], HexTable[v3], and HexTable[v4]
60+
// without additional bounds checks, because HexTable has exactly 128 elements.
61+
// By using Unsafe.Add, we avoid the overhead of safety checks and improve performance.
62+
//
63+
// Once the JIT compiler can properly recognize and optimize this pattern, we may be able
64+
// to replace these unsafe calls with safe indexed access without performance loss.
5465
if ((uint)(v1 | v2 | v3 | v4) <= 127)
5566
{
5667
ref var table = ref MemoryMarshal.GetReference(HexTable);

src/Ramstack.Parsing/ParseContext.cs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,8 @@ public readonly ReadOnlySpan<char> Remaining
3232
var source = _source;
3333
var length = source.Length - _position;
3434

35+
Debug.Assert(_source[_position..].Length == length);
36+
3537
ref var reference = ref Unsafe.Add(
3638
ref MemoryMarshal.GetReference(source),
3739
(nint)(uint)_position);

src/Ramstack.Parsing/Parser.Repeat.cs

Lines changed: 31 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -451,7 +451,8 @@ public override bool TryParse(ref ParseContext context, [NotNullWhen(true)] out
451451
while (s.Length != 0)
452452
{
453453
var index = _searcher.IndexOfAnyExcept(s);
454-
if (index >= 0)
454+
455+
if ((uint)index < (uint)s.Length)
455456
{
456457
count += index;
457458

@@ -461,13 +462,21 @@ public override bool TryParse(ref ParseContext context, [NotNullWhen(true)] out
461462
index++;
462463
count++;
463464

464-
Debug.Assert((uint)index <= (uint)s.Length);
465-
466-
s = MemoryMarshal.CreateReadOnlySpan(
467-
ref Unsafe.Add(ref MemoryMarshal.GetReference(s), (nint)(uint)index),
468-
s.Length - index);
465+
// At this point, "index" is guaranteed to be within [0..s.Length].
466+
// While the Slice method performs its own bounds check and would throw if out of range,
467+
// our explicit check helps the JIT recognize that the call to Slice is safe.
468+
// This allows the JIT to eliminate the internal bounds check
469+
// and avoid generating unnecessary exception-handling code inside Slice.
470+
//
471+
// In other words, the check would happen anyway, but by placing it here explicitly,
472+
// we eliminate the need for exception code inside Slice itself.
473+
//
474+
// Even though logically (uint)index < (uint)s.Length implies (uint)index <= (uint)s.Length,
475+
// the JIT cannot currently prove this identity and therefore cannot optimize away
476+
// the redundant check in Slice.
469477

470-
// s = s.Slice(index);
478+
if ((uint)index <= (uint)s.Length)
479+
s = s.Slice(index);
471480
}
472481
else
473482
{
@@ -535,7 +544,7 @@ public override bool TryParse(ref ParseContext context, out Unit value)
535544
while (s.Length != 0)
536545
{
537546
var index = _searcher.IndexOfAnyExcept(s);
538-
if (index >= 0)
547+
if ((uint)index < (uint)s.Length)
539548
{
540549
count += index;
541550

@@ -545,13 +554,21 @@ public override bool TryParse(ref ParseContext context, out Unit value)
545554
index++;
546555
count++;
547556

548-
Debug.Assert((uint)index <= (uint)s.Length);
549-
550-
s = MemoryMarshal.CreateReadOnlySpan(
551-
ref Unsafe.Add(ref MemoryMarshal.GetReference(s), (nint)(uint)index),
552-
s.Length - index);
557+
// At this point, "index" is guaranteed to be within [0..s.Length].
558+
// While the Slice method performs its own bounds check and would throw if out of range,
559+
// our explicit check helps the JIT recognize that the call to Slice is safe.
560+
// This allows the JIT to eliminate the internal bounds check
561+
// and avoid generating unnecessary exception-handling code inside Slice.
562+
//
563+
// In other words, the check would happen anyway, but by placing it here explicitly,
564+
// we eliminate the need for exception code inside Slice itself.
565+
//
566+
// Even though logically (uint)index < (uint)s.Length implies (uint)index <= (uint)s.Length,
567+
// the JIT cannot currently prove this identity and therefore cannot optimize away
568+
// the redundant check in Slice.
553569

554-
// s = s.Slice(index);
570+
if ((uint)index <= (uint)s.Length)
571+
s = s.Slice(index);
555572
}
556573
else
557574
{

0 commit comments

Comments
 (0)