Skip to content

Commit 8ef63b6

Browse files
ekscryptoclaude
andcommitted
Security: fix RFC 2047 75-char limit; block supplementary PUA in local parts
Bug 1 (RFC 2047 off-by-one): RFC 2047 §2 caps an encoded-word at 75 characters. The limit check used <= 76, allowing one extra character beyond the spec. Changed to <= 75. Bug 2 (supplementary PUA spoofing): The BMP Private Use Area (U+E000-U+F8FF) was already blocked to prevent spoofing, but the supplementary PUA-A (U+F0000-U+FFFFF) and PUA-B (U+100000-U+10FFFF) were implicitly allowed via the supplementaryPlanes union. Added explicit scalar guards in extractDotAtom and extractQuotedString to reject these ranges consistently. Emoji and historic-script SMP characters remain accepted. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
1 parent 63093a8 commit 8ef63b6

4 files changed

Lines changed: 68 additions & 4 deletions

File tree

Sources/SwiftEmailValidator/EmailSyntaxValidator.swift

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -388,8 +388,9 @@ public final class EmailSyntaxValidator {
388388
// U+E0000-U+E007F Unicode Tags block (deprecated invisible-text markup)
389389
// U+E0100-U+E01EF Variation Selectors Supplement (invisible combiners, spoofing)
390390
&& !label.unicodeScalars.contains(where: {
391-
($0.value >= 0xE0000 && $0.value <= 0xE007F)
392-
|| ($0.value >= 0xE0100 && $0.value <= 0xE01EF)
391+
($0.value >= 0xE0000 && $0.value <= 0xE007F) // Unicode Tags block
392+
|| ($0.value >= 0xE0100 && $0.value <= 0xE01EF) // Variation Selectors Supplement
393+
|| ($0.value >= 0xF0000 && $0.value <= 0x10FFFF) // Supplementary PUA-A/B
393394
})
394395
})
395396
else {
@@ -436,7 +437,8 @@ public final class EmailSyntaxValidator {
436437
(s.value >= 0xFE00 && s.value <= 0xFE0F) || // U+FE00-U+FE0F Variation Selectors
437438
(s.value >= 0xE0000 && s.value <= 0xE007F) || // U+E0000-U+E007F Unicode Tags block
438439
(s.value >= 0xE0100 && s.value <= 0xE01EF) || // U+E0100-U+E01EF Variation Selectors Supplement
439-
(s.value == 0x2028 || s.value == 0x2029) // U+2028 Line Sep, U+2029 Para Sep
440+
(s.value == 0x2028 || s.value == 0x2029) || // U+2028 Line Sep, U+2029 Para Sep
441+
(s.value >= 0xF0000 && s.value <= 0x10FFFF) // Supplementary PUA-A/B
440442
}) else {
441443
return nil
442444
}

Sources/SwiftEmailValidator/RFC2047Coder.swift

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -80,7 +80,7 @@ public final class RFC2047Coder {
8080
/// - Quoted-Printable only supports ISO-8859-1 and ISO-8859-2 charsets
8181
public static func decode(_ encoded: String) -> String? {
8282

83-
guard encoded.count <= 76 else {
83+
guard encoded.count <= 75 else {
8484
return nil
8585
}
8686
let encodingComponents = match(regex: rfc2047regex, to: encoded)

Tests/SwiftEmailValidatorTests/EmailSyntaxValidatorTests.swift

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -785,4 +785,42 @@ final class EmailSyntaxValidatorTests: XCTestCase {
785785
"U+1F600 (emoji, SMP) just below Tags block should still be accepted"
786786
)
787787
}
788+
789+
// MARK: - Bug 2: Supplementary PUA (U+F0000-U+10FFFF)
790+
791+
func testSupplementaryPrivateUseAreaRejectedInLocalPart() {
792+
// Supplementary Private Use Area-A (U+F0000-U+FFFFF) and -B (U+100000-U+10FFFF)
793+
// carry the same spoofing risk as the BMP Private Use Area (U+E000-U+F8FF),
794+
// which is already blocked. Private-use characters have no standardised rendering
795+
// and can appear identical to common glyphs in custom fonts.
796+
let permissive: (String) -> Bool = { _ in true }
797+
let supplementaryPUAChars: [(Unicode.Scalar, String)] = [
798+
(Unicode.Scalar(0xF0000)!, "U+F0000 Supplementary PUA-A first"),
799+
(Unicode.Scalar(0xF0001)!, "U+F0001 Supplementary PUA-A"),
800+
(Unicode.Scalar(0xFFFFD)!, "U+FFFFD Supplementary PUA-A last valid"),
801+
(Unicode.Scalar(0x100000)!, "U+100000 Supplementary PUA-B first"),
802+
(Unicode.Scalar(0x100001)!, "U+100001 Supplementary PUA-B"),
803+
(Unicode.Scalar(0x10FFFD)!, "U+10FFFD Supplementary PUA-B last valid"),
804+
]
805+
for (scalar, name) in supplementaryPUAChars {
806+
let char = String(scalar)
807+
XCTAssertNil(
808+
EmailSyntaxValidator.mailbox(from: "user\(char)@site.com", compatibility: .unicode, domainValidator: permissive),
809+
"\(name) must be rejected in dot-atom local part (supplementary PUA spoofing prevention)"
810+
)
811+
XCTAssertNil(
812+
EmailSyntaxValidator.mailbox(from: "\"user\(char)\"@site.com", compatibility: .unicode, domainValidator: permissive),
813+
"\(name) must be rejected in quoted-string local part (supplementary PUA spoofing prevention)"
814+
)
815+
}
816+
// Confirm legitimate SMP characters (emoji, historic scripts) remain accepted
817+
XCTAssertNotNil(
818+
EmailSyntaxValidator.mailbox(from: "user\u{1F600}@site.com", compatibility: .unicode, domainValidator: permissive),
819+
"U+1F600 (emoji, SMP) must still be accepted"
820+
)
821+
XCTAssertNotNil(
822+
EmailSyntaxValidator.mailbox(from: "user\u{1D400}@site.com", compatibility: .unicode, domainValidator: permissive),
823+
"U+1D400 (Mathematical Bold A, SMP) must still be accepted"
824+
)
825+
}
788826
}

Tests/SwiftEmailValidatorTests/RFC2047CoderTests.swift

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -260,6 +260,30 @@ final class RFC2047CoderTests: XCTestCase {
260260
XCTAssertNil(RFC2047Coder.decode("=?utf-8?b?dGVz dA?="), "Spaces in encoded text should cause failure or be handled per RFC")
261261
}
262262

263+
// MARK: - Bug 1: RFC 2047 75-character limit
264+
265+
func testDecode75CharLimitEnforced() {
266+
// RFC 2047 §2: "An 'encoded-word' may not be more than 75 characters long".
267+
// Overhead: "=?iso-8859-1?q?" (15) + "?=" (2) = 17 chars → 58 chars of content hits 75 exactly.
268+
let prefix = "=?iso-8859-1?q?"
269+
let suffix = "?="
270+
let overhead = prefix.count + suffix.count // 17
271+
272+
// Exactly 75 chars → accepted
273+
let content75 = String(repeating: "a", count: 75 - overhead)
274+
let encoded75 = prefix + content75 + suffix
275+
XCTAssertEqual(encoded75.count, 75)
276+
XCTAssertNotNil(RFC2047Coder.decode(encoded75),
277+
"Encoded word of exactly 75 chars must be accepted per RFC 2047 §2")
278+
279+
// 76 chars → must be rejected (was incorrectly accepted before the fix)
280+
let content76 = String(repeating: "a", count: 76 - overhead)
281+
let encoded76 = prefix + content76 + suffix
282+
XCTAssertEqual(encoded76.count, 76)
283+
XCTAssertNil(RFC2047Coder.decode(encoded76),
284+
"Encoded word of 76 chars must be rejected per RFC 2047 §2 (max is 75)")
285+
}
286+
263287
func testDecodeGreedyRegexNoExtraContent() {
264288
// The RFC2047 regex uses (.*) which is greedy. For "=?utf-8?b?aGVsbG8=?=extra?=",
265289
// the greedy match captures "aGVsbG8=?=extra" as the encoded text. The '?' character

0 commit comments

Comments
 (0)