Skip to content

Commit 4f1a1e3

Browse files
ekscryptoclaude
andcommitted
Fix: enforce ASCII-only domain labels in .ascii compatibility mode
In .ascii mode the entire email address must be ASCII — the local-part check already enforced this, but the domain label character set was not gated by compatibility, silently accepting Unicode U-labels such as 例え or 한국 even when the caller expected a fully ASCII address. Added asciiDomainLabelCharacterSet (strict LDH: A-Z, a-z, 0-9, hyphen) and threaded extractionCompatibility through the private mailbox and extractHost helpers so that the appropriate set is selected per mode: .ascii → LDH only (Punycode xn--… passes naturally) .unicode / .asciiWithUnicodeExtension → existing domainLabelCharacterSet (Unicode U-labels allowed per RFC 5891) Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
1 parent 8ef63b6 commit 4f1a1e3

2 files changed

Lines changed: 46 additions & 8 deletions

File tree

Sources/SwiftEmailValidator/EmailSyntaxValidator.swift

Lines changed: 22 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -165,15 +165,17 @@ public final class EmailSyntaxValidator {
165165
localPart: .dotAtom(dotAtom),
166166
originalCandidate: candidate,
167167
hostCandidate: String(smtpCandidate.dropFirst(dotAtom.count + 1)),
168+
compatibility: extractionCompatibility,
168169
allowAddressLiteral: allowAddressLiteral,
169170
domainValidator: domainValidator)
170171
}
171-
172+
172173
if let quotedString = extractQuotedString(smtpCandidate, compatibility: extractionCompatibility) {
173174
return mailbox(
174175
localPart: .quotedString(String(quotedString.cleaned)),
175176
originalCandidate: candidate,
176177
hostCandidate: String(smtpCandidate.dropFirst(quotedString.integral.count + 1)),
178+
compatibility: extractionCompatibility,
177179
allowAddressLiteral: allowAddressLiteral,
178180
domainValidator: domainValidator)
179181
}
@@ -215,19 +217,19 @@ public final class EmailSyntaxValidator {
215217
return RFC2047Coder.encode(candidate)
216218
}
217219

218-
private static func mailbox(localPart: Mailbox.LocalPart, originalCandidate: String, hostCandidate: String, allowAddressLiteral: Bool, domainValidator: (String) -> Bool) -> Mailbox? {
219-
220-
guard let host = extractHost(from: hostCandidate, allowAddressLiteral: allowAddressLiteral, domainValidator: domainValidator) else {
220+
private static func mailbox(localPart: Mailbox.LocalPart, originalCandidate: String, hostCandidate: String, compatibility: Compatibility, allowAddressLiteral: Bool, domainValidator: (String) -> Bool) -> Mailbox? {
221+
222+
guard let host = extractHost(from: hostCandidate, compatibility: compatibility, allowAddressLiteral: allowAddressLiteral, domainValidator: domainValidator) else {
221223
return nil
222224
}
223-
225+
224226
return Mailbox(
225227
email: originalCandidate,
226228
localPart: localPart,
227229
host: host)
228230
}
229-
230-
private static func extractHost(from candidate: String, allowAddressLiteral: Bool, domainValidator: (String) -> Bool) -> Mailbox.Host? {
231+
232+
private static func extractHost(from candidate: String, compatibility: Compatibility, allowAddressLiteral: Bool, domainValidator: (String) -> Bool) -> Mailbox.Host? {
231233

232234
if candidate.hasPrefix("[") {
233235
return extractHostLiteral(from: candidate, allowAddressLiteral: allowAddressLiteral)
@@ -239,6 +241,11 @@ public final class EmailSyntaxValidator {
239241
// RFC 1035: total domain must be ≤253 octets
240242
guard candidate.utf8.count <= 253 else { return nil }
241243

244+
// In .ascii mode use the strict LDH-only set (A-Z, a-z, 0-9, hyphen).
245+
// Punycode ACE labels (xn--…) are naturally LDH and pass without special handling.
246+
// In .unicode / .asciiWithUnicodeExtension modes allow Unicode U-labels per RFC 5891.
247+
let labelCharacterSet = compatibility == .ascii ? asciiDomainLabelCharacterSet : domainLabelCharacterSet
248+
242249
// Split without omitting empty subsequences so that consecutive dots (empty labels),
243250
// leading dots, and trailing dots are all caught by the per-label checks below.
244251
let labels = candidate.split(separator: ".", omittingEmptySubsequences: false)
@@ -248,7 +255,7 @@ public final class EmailSyntaxValidator {
248255
&& s.utf8.count <= 63 // RFC 1035: each label ≤63 octets
249256
&& !s.hasPrefix("-") // RFC 1123: no leading hyphen
250257
&& !s.hasSuffix("-") // RFC 1123: no trailing hyphen
251-
&& s.unicodeScalars.allSatisfy({ domainLabelCharacterSet.contains($0) })
258+
&& s.unicodeScalars.allSatisfy({ labelCharacterSet.contains($0) })
252259
}) else {
253260
return nil
254261
}
@@ -345,6 +352,13 @@ public final class EmailSyntaxValidator {
345352
private static let domainLabelCharacterSet: CharacterSet = CharacterSet.letters
346353
.union(CharacterSet(charactersIn: "0123456789-"))
347354

355+
// Strict ASCII LDH set used when compatibility == .ascii.
356+
// Punycode ACE labels (xn--…) are naturally LDH and pass this check without special handling.
357+
private static let asciiDomainLabelCharacterSet: CharacterSet = CharacterSet(charactersIn: alphaLowerRange)
358+
.union(CharacterSet(charactersIn: alphaUpperRange))
359+
.union(CharacterSet(charactersIn: digitRange))
360+
.union(CharacterSet(charactersIn: "-"))
361+
348362
private static let quotedPairSMTP: ClosedRange<Unicode.Scalar> = Unicode.Scalar(0x20)!...Unicode.Scalar(0x7E)!
349363
private static let qtextSMTP1: ClosedRange<Unicode.Scalar> = Unicode.Scalar(0x20)!...Unicode.Scalar(0x21)!
350364
private static let qtextSMTP2: ClosedRange<Unicode.Scalar> = Unicode.Scalar(0x23)!...Unicode.Scalar(0x5B)!

Tests/SwiftEmailValidatorTests/EmailSyntaxValidatorTests.swift

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -140,6 +140,30 @@ final class EmailSyntaxValidatorTests: XCTestCase {
140140
XCTAssertNil(EmailSyntaxValidator.mailbox(from: "한@x.한국", compatibility: .ascii), "Unicode in email addresses should not be allowed in ASCII compatibility mode")
141141
XCTAssertNil(EmailSyntaxValidator.mailbox(from: "\"\"@x.한국", compatibility: .ascii), "Unicode in email addresses should not be allowed in ASCII compatibility mode")
142142
}
143+
144+
func testAsciiRejectsUnicodeDomain() {
145+
// In .ascii mode the domain must also be ASCII-only (LDH labels or Punycode).
146+
// A Unicode U-label like 例え or 한국 must be rejected even when the local part is ASCII.
147+
let permissive: (String) -> Bool = { _ in true }
148+
XCTAssertNil(
149+
EmailSyntaxValidator.mailbox(from: "user@例え.jp", compatibility: .ascii, domainValidator: permissive),
150+
"Unicode domain label must be rejected in .ascii mode"
151+
)
152+
XCTAssertNil(
153+
EmailSyntaxValidator.mailbox(from: "user@x.한국", compatibility: .ascii, domainValidator: permissive),
154+
"Unicode TLD must be rejected in .ascii mode"
155+
)
156+
// Punycode ACE form is LDH and must be accepted
157+
XCTAssertNotNil(
158+
EmailSyntaxValidator.mailbox(from: "user@xn--eckwd4c7c.jp", compatibility: .ascii, domainValidator: permissive),
159+
"Punycode ACE label must be accepted in .ascii mode (it is LDH)"
160+
)
161+
// Unicode domain must still be accepted in .unicode mode
162+
XCTAssertNotNil(
163+
EmailSyntaxValidator.mailbox(from: "user@例え.jp", compatibility: .unicode, domainValidator: permissive),
164+
"Unicode domain label must be accepted in .unicode mode"
165+
)
166+
}
143167

144168
func testUnicodeCompatibility() {
145169
XCTAssertEqual(EmailSyntaxValidator.mailbox(from: "한@x.한국", compatibility: .unicode)?.localPart, .dotAtom(""), "Unicode email addresses should be allowed in Unicode compatibility")

0 commit comments

Comments
 (0)