@@ -308,12 +308,18 @@ public final class EmailSyntaxValidator {
308308 . union ( CharacterSet ( charactersIn: Unicode . Scalar ( 0x2066 ) !... Unicode . Scalar ( 0x2069 ) !) ) // LRI, RLI, FSI, PDI
309309 private static let deprecatedFormatChars : CharacterSet = CharacterSet ( charactersIn: Unicode . Scalar ( 0x206A ) !... Unicode . Scalar ( 0x206F ) !) // Deprecated formatting
310310 private static let bmpPrivateUseChars : CharacterSet = CharacterSet ( charactersIn: Unicode . Scalar ( 0xE000 ) !... Unicode . Scalar ( 0xF8FF ) !) // BMP Private Use Area
311+ // Unicode permanently-reserved noncharacters — §23.7: "forbidden for use in open interchange."
312+ // U+FDD0–U+FDEF fall in nonAsciiBmpHigh (above bmpPrivateUseChars) and would survive all
313+ // other subtractions without this explicit exclusion.
314+ private static let unicodeNonCharacters : CharacterSet =
315+ CharacterSet ( charactersIn: Unicode . Scalar ( 0xFDD0 ) !... Unicode . Scalar ( 0xFDEF ) !) // U+FDD0–U+FDEF permanently reserved noncharacters
316+ . union ( CharacterSet ( charactersIn: Unicode . Scalar ( 0xFFFE ) !... Unicode . Scalar ( 0xFFFF ) !) ) // U+FFFE, U+FFFF BMP noncharacters
311317 // Invisible and zero-width format characters that produce no visible glyph.
312318 // Allowing them enables creating visually-identical but distinct email addresses (spoofing).
313319 private static let zeroWidthAndInvisibleChars : CharacterSet =
314320 CharacterSet ( charactersIn: Unicode . Scalar ( 0x00AD ) !... Unicode . Scalar ( 0x00AD ) !) // U+00AD Soft Hyphen
315321 . union ( CharacterSet ( charactersIn: Unicode . Scalar ( 0x200B ) !... Unicode . Scalar ( 0x200D ) !) ) // U+200B ZWS, U+200C ZWNJ, U+200D ZWJ
316- . union ( CharacterSet ( charactersIn: Unicode . Scalar ( 0x2060 ) !... Unicode . Scalar ( 0x2064 ) !) ) // U+2060 Word Joiner, U+2061-U+2064 invisible math operators
322+ . union ( CharacterSet ( charactersIn: Unicode . Scalar ( 0x2060 ) !... Unicode . Scalar ( 0x2065 ) !) ) // U+2060 Word Joiner, U+2061-U+2064 invisible math operators, U+2065 reserved
317323 . union ( CharacterSet ( charactersIn: Unicode . Scalar ( 0xFEFF ) !... Unicode . Scalar ( 0xFEFF ) !) ) // U+FEFF BOM / Zero Width No-Break Space
318324 . union ( CharacterSet ( charactersIn: Unicode . Scalar ( 0x2028 ) !... Unicode . Scalar ( 0x2029 ) !) ) // U+2028 Line Separator, U+2029 Paragraph Separator
319325 . union ( CharacterSet ( charactersIn: Unicode . Scalar ( 0xFE00 ) !... Unicode . Scalar ( 0xFE0F ) !) ) // U+FE00-U+FE0F Variation Selectors (invisible combiners, spoofing)
@@ -350,6 +356,7 @@ public final class EmailSyntaxValidator {
350356 . subtracting ( deprecatedFormatChars) // Exclude deprecated format characters
351357 . subtracting ( bmpPrivateUseChars) // Exclude BMP Private Use Area (U+E000-U+F8FF)
352358 . subtracting ( zeroWidthAndInvisibleChars) // Exclude invisible format characters (spoofing prevention)
359+ . subtracting ( unicodeNonCharacters) // Exclude permanently-reserved Unicode noncharacters (§23.7)
353360 . union ( supplementaryPlanes) // Supplementary planes (emoji, etc.) - MUST BE LAST (after subtractions)
354361
355362 // RFC 952/1123: domain labels are LDH (letters, digits, hyphens); Unicode letters are
@@ -380,6 +387,7 @@ public final class EmailSyntaxValidator {
380387 . subtracting ( deprecatedFormatChars) // Exclude deprecated format characters
381388 . subtracting ( bmpPrivateUseChars) // Exclude BMP Private Use Area (U+E000-U+F8FF)
382389 . subtracting ( zeroWidthAndInvisibleChars) // Exclude invisible format characters (spoofing prevention)
390+ . subtracting ( unicodeNonCharacters) // Exclude permanently-reserved Unicode noncharacters (§23.7)
383391 . union ( supplementaryPlanes) // Supplementary planes (emoji, etc.) - MUST BE LAST (after subtractions)
384392
385393 private static func extractDotAtom( _ candidate: String , compatibility: Compatibility ) -> String ? {
@@ -404,10 +412,12 @@ public final class EmailSyntaxValidator {
404412 // Reject supplementary-plane ranges excluded from allowedCharacterSet via
405413 // explicit scalar guards (Foundation CharacterSet.contains() is reliable for
406414 // individual scalars, but belt-and-suspenders for these security-sensitive ranges):
415+ // U+40000-U+DFFFF: Planes 4-13 (entirely unassigned in Unicode)
407416 // U+E0000-U+EFFFF: entire SSP (Tags block, unassigned gaps, VS Supplement)
408417 // U+F0000-U+10FFFF: Supplementary PUA-A/B
409418 && !label. unicodeScalars. contains ( where: {
410- ( $0. value >= 0xE0000 && $0. value <= 0x10FFFF ) // Entire SSP + PUA-A/B
419+ ( $0. value >= 0x40000 && $0. value <= 0xDFFFF ) // Planes 4-13 (entirely unassigned)
420+ || ( $0. value >= 0xE0000 && $0. value <= 0x10FFFF ) // Entire SSP + PUA-A/B
411421 } )
412422 } )
413423 else {
@@ -449,10 +459,13 @@ public final class EmailSyntaxValidator {
449459 guard !character. unicodeScalars. contains ( where: { s in
450460 s. value == 0x00AD || // U+00AD Soft Hyphen
451461 ( s. value >= 0x200B && s. value <= 0x200D ) || // U+200B-U+200D ZWS/ZWNJ/ZWJ
452- ( s. value >= 0x2060 && s. value <= 0x2064 ) || // U+2060-U+2064 invisible format chars
462+ ( s. value >= 0x2060 && s. value <= 0x2065 ) || // U+2060-U+2065 invisible/reserved format chars
453463 s. value == 0xFEFF || // U+FEFF BOM
454464 ( s. value >= 0xFE00 && s. value <= 0xFE0F ) || // U+FE00-U+FE0F Variation Selectors
455465 ( s. value == 0x2028 || s. value == 0x2029 ) || // U+2028 Line Sep, U+2029 Para Sep
466+ ( s. value >= 0xFDD0 && s. value <= 0xFDEF ) || // U+FDD0-U+FDEF Unicode noncharacters
467+ ( s. value == 0xFFFE || s. value == 0xFFFF ) || // U+FFFE/U+FFFF BMP noncharacters
468+ ( s. value >= 0x40000 && s. value <= 0xDFFFF ) || // Planes 4-13 (entirely unassigned)
456469 ( s. value >= 0xE0000 && s. value <= 0x10FFFF ) // Entire SSP (Tags, unassigned gaps, VS Sup) + PUA-A/B
457470 } ) else {
458471 return nil
@@ -470,7 +483,11 @@ public final class EmailSyntaxValidator {
470483
471484 if escaped {
472485 cleanedText. append ( character)
473- guard quotedPairSMTP. contains ( characterScalar) else {
486+ // RFC 5321: quoted-pair = "\" (VCHAR / WSP) — exactly one printable ASCII scalar.
487+ // A multi-scalar grapheme cluster (e.g. e + U+0301 combining acute) would have its
488+ // first scalar pass quotedPairSMTP while the additional scalars go unchecked.
489+ guard character. unicodeScalars. count == 1 ,
490+ quotedPairSMTP. contains ( characterScalar) else {
474491 return nil
475492 }
476493 escaped = false
0 commit comments