Skip to content

Commit 63093a8

Browse files
ekscryptoclaude
andcommitted
Security: fix domain octet limits, block Variation Selectors, drop .inverted on supplementary-plane sets
RFC compliance (Bug 1): - Domain total-length check now uses .utf8.count ≤ 253 (was .count) - Per-label length check now uses .utf8.count ≤ 63 (was .count) - RFC 1035 §2.3.4 specifies octets; multi-byte Unicode labels could previously exceed the byte limit while passing the character-count check Spoofing prevention (Bug 2): - Add U+FE00-U+FE0F (Variation Selectors) to zeroWidthAndInvisibleChars so they are subtracted from atextUnicode/qtextUnicode character sets - Add explicit scalar guards for U+E0100-U+E01EF (Variation Selectors Supplement) in extractDotAtom and extractQuotedString - VS chars combine invisibly with adjacent base characters; "user\uFE01" renders identically to "user" — same spoofing risk as ZWJ/ZWNJ Foundation CharacterSet bug (Bug 3): - Replace .inverted usage on sets containing supplementary planes with per-scalar allSatisfy({ set.contains($0) }) throughout: extractDotAtom (disallowedCharacterSet → allowedCharacterSet), candidateForRfc2047 (rangeOfCharacter inverted check) - Foundation's CharacterSet bitmap inversion is unreliable for supplementary-plane scalars; direct containment queries are not Tests: add testDomainLabelUnicodeByteLengthEnforced, testTotalDomainUnicodeByteLengthEnforced, testVariationSelectorsRejectedInLocalPart (BMP + Supplement) Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
1 parent 28643ae commit 63093a8

2 files changed

Lines changed: 113 additions & 14 deletions

File tree

Sources/SwiftEmailValidator/EmailSyntaxValidator.swift

Lines changed: 27 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -197,9 +197,10 @@ public final class EmailSyntaxValidator {
197197
/// - Returns: Repackaged email string (may still fail SMTP validation) or nil if really nothing that could be done
198198
private static func candidateForRfc2047(_ candidate: String, compatibility: Compatibility) -> String? {
199199

200+
// Avoid .inverted on sets containing supplementary planes — use per-scalar containment.
200201
guard compatibility == .asciiWithUnicodeExtension,
201202
!candidate.hasPrefix("=?"),
202-
candidate.rangeOfCharacter(from: qtextUnicodeSMTPCharacterSet.inverted) == nil
203+
candidate.unicodeScalars.allSatisfy({ qtextUnicodeSMTPCharacterSet.contains($0) })
203204
else {
204205
// There are some unsupported ASCII characters which are invalid regardless of unicode or ASCII (newline, tabs, etc)
205206
return nil
@@ -235,16 +236,16 @@ public final class EmailSyntaxValidator {
235236
// RFC 5321: host must not be empty
236237
guard !candidate.isEmpty else { return nil }
237238

238-
// RFC 1035: total domain must be ≤253 chars
239-
guard candidate.count <= 253 else { return nil }
239+
// RFC 1035: total domain must be ≤253 octets
240+
guard candidate.utf8.count <= 253 else { return nil }
240241

241242
// Split without omitting empty subsequences so that consecutive dots (empty labels),
242243
// leading dots, and trailing dots are all caught by the per-label checks below.
243244
let labels = candidate.split(separator: ".", omittingEmptySubsequences: false)
244245
guard labels.allSatisfy({ label in
245246
let s = String(label)
246247
return s.count >= 1 // no empty labels (catches .., leading/trailing dot)
247-
&& s.count <= 63 // RFC 1035: each label ≤63 chars
248+
&& s.utf8.count <= 63 // RFC 1035: each label ≤63 octets
248249
&& !s.hasPrefix("-") // RFC 1123: no leading hyphen
249250
&& !s.hasSuffix("-") // RFC 1123: no trailing hyphen
250251
&& s.unicodeScalars.allSatisfy({ domainLabelCharacterSet.contains($0) })
@@ -305,6 +306,7 @@ public final class EmailSyntaxValidator {
305306
.union(CharacterSet(charactersIn: Unicode.Scalar(0x2060)!...Unicode.Scalar(0x2064)!)) // U+2060 Word Joiner, U+2061-U+2064 invisible math operators
306307
.union(CharacterSet(charactersIn: Unicode.Scalar(0xFEFF)!...Unicode.Scalar(0xFEFF)!)) // U+FEFF BOM / Zero Width No-Break Space
307308
.union(CharacterSet(charactersIn: Unicode.Scalar(0x2028)!...Unicode.Scalar(0x2029)!)) // U+2028 Line Separator, U+2029 Paragraph Separator
309+
.union(CharacterSet(charactersIn: Unicode.Scalar(0xFE00)!...Unicode.Scalar(0xFE0F)!)) // U+FE00-U+FE0F Variation Selectors (invisible combiners, spoofing)
308310

309311
// Note: CharacterSet.inverted doesn't properly include supplementary planes (U+10000+).
310312
// Using .inverted on an ASCII-range set also leaks supplementary scalars into the result on
@@ -369,17 +371,26 @@ public final class EmailSyntaxValidator {
369371
}
370372

371373
let dotAtom = candidate[..<atRange.lowerBound]
372-
let disallowedCharacterSet: CharacterSet = compatibility == .ascii ? atextCharacterSet.inverted : atextUnicodeCharacterSet.inverted
374+
// Avoid .inverted on sets containing supplementary planes — Foundation has a known bug
375+
// where .inverted doesn't correctly handle supplementary-plane bitmaps. Check membership
376+
// in the allowed set directly using per-scalar containment instead.
377+
let allowedCharacterSet: CharacterSet = compatibility == .ascii ? atextCharacterSet : atextUnicodeCharacterSet
373378
guard dotAtom.count > 0,
374379
dotAtom.utf8.count <= 64,
375380
!dotAtom.hasPrefix("."),
376381
!dotAtom.hasSuffix("."),
377382
dotAtom.components(separatedBy: ".").allSatisfy({ label in
378383
label.count > 0
379-
&& label.rangeOfCharacter(from: disallowedCharacterSet) == nil
380-
// Reject Unicode Tags block (U+E0000-U+E007F): deprecated invisible-text
381-
// characters included in supplementaryPlanes but unsafe for email.
382-
&& !label.unicodeScalars.contains(where: { $0.value >= 0xE0000 && $0.value <= 0xE007F })
384+
&& label.unicodeScalars.allSatisfy({ allowedCharacterSet.contains($0) })
385+
// Reject supplementary-plane ranges excluded from allowedCharacterSet via
386+
// explicit scalar guards (Foundation CharacterSet.contains() is reliable for
387+
// individual scalars, but belt-and-suspenders for these security-sensitive ranges):
388+
// U+E0000-U+E007F Unicode Tags block (deprecated invisible-text markup)
389+
// U+E0100-U+E01EF Variation Selectors Supplement (invisible combiners, spoofing)
390+
&& !label.unicodeScalars.contains(where: {
391+
($0.value >= 0xE0000 && $0.value <= 0xE007F)
392+
|| ($0.value >= 0xE0100 && $0.value <= 0xE01EF)
393+
})
383394
})
384395
else {
385396
return nil
@@ -418,11 +429,13 @@ public final class EmailSyntaxValidator {
418429
// security-excluded scalars that appear as combining elements would slip through.
419430
// Scan every scalar in the cluster explicitly to prevent this.
420431
guard !character.unicodeScalars.contains(where: { s in
421-
s.value == 0x00AD || // U+00AD Soft Hyphen
422-
(s.value >= 0x200B && s.value <= 0x200D) || // U+200B-U+200D ZWS/ZWNJ/ZWJ
423-
(s.value >= 0x2060 && s.value <= 0x2064) || // U+2060-U+2064 invisible format chars
424-
s.value == 0xFEFF || // U+FEFF BOM
425-
(s.value >= 0xE0000 && s.value <= 0xE007F) || // U+E0000-U+E007F Unicode Tags block
432+
s.value == 0x00AD || // U+00AD Soft Hyphen
433+
(s.value >= 0x200B && s.value <= 0x200D) || // U+200B-U+200D ZWS/ZWNJ/ZWJ
434+
(s.value >= 0x2060 && s.value <= 0x2064) || // U+2060-U+2064 invisible format chars
435+
s.value == 0xFEFF || // U+FEFF BOM
436+
(s.value >= 0xFE00 && s.value <= 0xFE0F) || // U+FE00-U+FE0F Variation Selectors
437+
(s.value >= 0xE0000 && s.value <= 0xE007F) || // U+E0000-U+E007F Unicode Tags block
438+
(s.value >= 0xE0100 && s.value <= 0xE01EF) || // U+E0100-U+E01EF Variation Selectors Supplement
426439
(s.value == 0x2028 || s.value == 0x2029) // U+2028 Line Sep, U+2029 Para Sep
427440
}) else {
428441
return nil

Tests/SwiftEmailValidatorTests/EmailSyntaxValidatorTests.swift

Lines changed: 86 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -479,6 +479,49 @@ final class EmailSyntaxValidatorTests: XCTestCase {
479479
"Domain with valid-length labels totaling < 253 chars should be accepted with permissive validator")
480480
}
481481

482+
func testDomainLabelUnicodeByteLengthEnforced() {
483+
// RFC 1035 §2.3.4: each label must be ≤63 *octets*.
484+
// A label composed of 32 two-byte characters is 32 characters (≤63) but 64 UTF-8 bytes (>63).
485+
// It must be rejected even though the character count is within the old (wrong) limit.
486+
let permissive: (String) -> Bool = { _ in true }
487+
488+
// "ñ" is U+00F1, 2 UTF-8 bytes. 32 × "ñ" = 32 chars / 64 bytes → label too long in octets.
489+
let twoByteChar = "ñ"
490+
XCTAssertEqual(twoByteChar.utf8.count, 2)
491+
let label64Bytes = String(repeating: twoByteChar, count: 32) // 32 chars, 64 bytes
492+
XCTAssertEqual(label64Bytes.count, 32)
493+
XCTAssertEqual(label64Bytes.utf8.count, 64)
494+
XCTAssertNil(EmailSyntaxValidator.mailbox(from: "user@\(label64Bytes).com", compatibility: .unicode, domainValidator: permissive),
495+
"Domain label with 64 UTF-8 bytes (32 two-byte chars) must be rejected per RFC 1035")
496+
497+
// 31 × "ñ" = 31 chars / 62 bytes → should be accepted (within both limits).
498+
let label62Bytes = String(repeating: twoByteChar, count: 31) // 31 chars, 62 bytes
499+
XCTAssertEqual(label62Bytes.utf8.count, 62)
500+
XCTAssertNotNil(EmailSyntaxValidator.mailbox(from: "user@\(label62Bytes).com", compatibility: .unicode, domainValidator: permissive),
501+
"Domain label with 62 UTF-8 bytes should be accepted")
502+
}
503+
504+
func testTotalDomainUnicodeByteLengthEnforced() {
505+
// RFC 1035 §2.3.4: total domain must be ≤253 *octets*.
506+
// Build a domain whose character count is ≤253 but whose UTF-8 byte count exceeds 253.
507+
// Each label: 31 × "ñ" (31 chars, 62 bytes); three labels + dots = 62+1+62+1+62 = 188 chars / bytes.
508+
// Add a fourth label of 30 two-byte chars: 188+1+60 = 249 chars / 188+1+60 = 249 bytes — ok.
509+
// Then push it over 253 bytes without going over 253 chars by using more two-byte chars.
510+
let permissive: (String) -> Bool = { _ in true }
511+
let twoByteChar = "ñ"
512+
513+
// Construct a domain that is 127 chars but 254 bytes.
514+
// Three labels of 31 "ñ" each = 31*3 + 2 dots = 95 chars / 62*3+2 = 188 bytes.
515+
// Add a 4th label of 33 "ñ" = 33 chars / 66 bytes → total 129 chars / 255 bytes → reject.
516+
let label31 = String(repeating: twoByteChar, count: 31) // 62 bytes, 31 chars
517+
let label33 = String(repeating: twoByteChar, count: 33) // 66 bytes, 33 chars
518+
let longByteDomain = "\(label31).\(label31).\(label31).\(label33)"
519+
XCTAssertLessThanOrEqual(longByteDomain.count, 253, "character count must be ≤253 to test the byte-count path")
520+
XCTAssertGreaterThan(longByteDomain.utf8.count, 253, "byte count must exceed 253 to exercise the fix")
521+
XCTAssertNil(EmailSyntaxValidator.mailbox(from: "u@\(longByteDomain)", compatibility: .unicode, domainValidator: permissive),
522+
"Domain exceeding 253 UTF-8 bytes must be rejected even if character count ≤253")
523+
}
524+
482525
func testVeryLongRFC2047EncodedString() {
483526
// RFC2047 has 76-character limit
484527
// Create a string that when encoded exceeds 76 chars
@@ -641,6 +684,49 @@ final class EmailSyntaxValidatorTests: XCTestCase {
641684
}
642685
}
643686

687+
func testVariationSelectorsRejectedInLocalPart() {
688+
// Variation Selectors (U+FE00-U+FE0F) and Variation Selectors Supplement (U+E0100-U+E01EF)
689+
// are invisible combining characters. They produce no glyph and render identically to their
690+
// base character in all common renderers, making "user\u{FE01}" visually indistinguishable
691+
// from "user" — the same spoofing risk as ZWJ/ZWNJ, which are already blocked.
692+
let permissive: (String) -> Bool = { _ in true }
693+
694+
// BMP Variation Selectors (U+FE00-U+FE0F) — spot-check first, middle, last
695+
let bmpVariationSelectors: [(String, String)] = [
696+
("\u{FE00}", "U+FE00 Variation Selector-1"),
697+
("\u{FE08}", "U+FE08 Variation Selector-9"),
698+
("\u{FE0F}", "U+FE0F Variation Selector-16"),
699+
]
700+
for (char, name) in bmpVariationSelectors {
701+
XCTAssertNil(
702+
EmailSyntaxValidator.mailbox(from: "user\(char)@site.com", compatibility: .unicode, domainValidator: permissive),
703+
"\(name) must be rejected in dot-atom local part (spoofing prevention)"
704+
)
705+
XCTAssertNil(
706+
EmailSyntaxValidator.mailbox(from: "\"user\(char)\"@site.com", compatibility: .unicode, domainValidator: permissive),
707+
"\(name) must be rejected in quoted-string local part (spoofing prevention)"
708+
)
709+
}
710+
711+
// Variation Selectors Supplement (U+E0100-U+E01EF) — spot-check first, middle, last
712+
let supplementVariationSelectors: [(Unicode.Scalar, String)] = [
713+
(Unicode.Scalar(0xE0100)!, "U+E0100 Variation Selector-17"),
714+
(Unicode.Scalar(0xE0140)!, "U+E0140 Variation Selector-81"),
715+
(Unicode.Scalar(0xE01EF)!, "U+E01EF Variation Selector-256"),
716+
]
717+
for (scalar, name) in supplementVariationSelectors {
718+
let char = String(scalar)
719+
XCTAssertNil(
720+
EmailSyntaxValidator.mailbox(from: "user\(char)@site.com", compatibility: .unicode, domainValidator: permissive),
721+
"\(name) must be rejected in dot-atom local part (spoofing prevention)"
722+
)
723+
XCTAssertNil(
724+
EmailSyntaxValidator.mailbox(from: "\"user\(char)\"@site.com", compatibility: .unicode, domainValidator: permissive),
725+
"\(name) must be rejected in quoted-string local part (spoofing prevention)"
726+
)
727+
}
728+
}
729+
644730
func testLineSeparatorCharactersRejectedInLocalPart() {
645731
// U+2028 (Line Separator) and U+2029 (Paragraph Separator) carry line-break
646732
// semantics in some runtimes and are not explicitly permitted by RFC 6531/6532.

0 commit comments

Comments
 (0)