From b3de62e0a697b09d4d82cb23be71ee14063ac72c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Hendrik=20H=C3=BCbner?= Date: Wed, 22 Apr 2026 18:01:16 +0200 Subject: [PATCH 1/7] Implement stringByFoldingWithOptions --- Headers/Foundation/NSString.h | 8 + Source/NSString.m | 284 +++++++++++++++++++++++++++++++++- Tests/base/NSString/locale.m | 104 +++++++++++++ 3 files changed, 395 insertions(+), 1 deletion(-) diff --git a/Headers/Foundation/NSString.h b/Headers/Foundation/NSString.h index a518627b8d..535022e639 100644 --- a/Headers/Foundation/NSString.h +++ b/Headers/Foundation/NSString.h @@ -657,6 +657,14 @@ GS_EXPORT_CLASS - (NSString *) precomposedStringWithCanonicalMapping; #endif +#if OS_API_VERSION(MAC_OS_X_VERSION_10_5,GS_API_LATEST) +/** Returns a copy of the receiver suitable for comparison with the supplied + * options and locale. + */ +- (NSString *) stringByFoldingWithOptions: (NSStringCompareOptions)options + locale: (NSLocale *)locale; +#endif + // Converting String Contents into a Property List - (id) propertyList; - (NSDictionary*) propertyListFromStringsFileFormat; diff --git a/Source/NSString.m b/Source/NSString.m index 3cb0ed03e6..6a36b89bbd 100644 --- a/Source/NSString.m +++ b/Source/NSString.m @@ -116,6 +116,15 @@ #if defined(HAVE_UNICODE_UTYPES_H) # include #endif +#if defined(__has_include) +# if __has_include() +# include +# define GS_HAVE_ICU_UTRANS 1 +# endif +#endif +#ifndef GS_HAVE_ICU_UTRANS +# define GS_HAVE_ICU_UTRANS 0 +#endif #if defined(HAVE_ICU_H) # include #endif @@ -678,6 +687,222 @@ - (void) _setStringCollatorCache: (id)cache; } #endif // GS_USE_ICU +static NSString * +GSStringApplyTransliterator(const unichar *src, + NSUInteger srcLength, + const unichar *transId, + int32_t transIdLength) +{ + if (srcLength == 0) + { + return @""; + } + +#if (GS_USE_ICU == 1) && GS_HAVE_ICU_UTRANS + { + UErrorCode err = U_ZERO_ERROR; + UParseError parseError; + UTransliterator *trans; + unichar *dst; + unichar stackDst[100]; + BOOL dstOnStack = NO; + int32_t srcLen = (int32_t)srcLength; + int32_t capacity; + int32_t textLen; + int32_t limit; + NSString *result; + + trans = utrans_openU((const UChar *)transId, transIdLength, UTRANS_FORWARD, + NULL, 0, &parseError, &err); + if (U_FAILURE(err) || trans == NULL) + { + [NSException raise: NSCharacterConversionException + format: @"libicu transliterator open failed"]; + } + + capacity = srcLen + 16; + if (capacity < 32) + { + capacity = 32; + } + if ((NSUInteger)capacity * sizeof(unichar) <= 200) + { + dst = stackDst; + dstOnStack = YES; + } + else + { + dst = (unichar *)NSZoneMalloc(NSDefaultMallocZone(), + capacity * sizeof(unichar)); + } + + /* A transliterator can increase output size beyond the input size + * (for example decomposition stages), so we retry with a larger + * destination buffer when ICU reports overflow. + */ + for (;;) + { + memcpy(dst, src, srcLen * sizeof(unichar)); + textLen = srcLen; + limit = textLen; + err = U_ZERO_ERROR; + utrans_transUChars(trans, (UChar *)dst, &textLen, capacity, 0, &limit, &err); + if (err == U_BUFFER_OVERFLOW_ERROR) + { + unichar *tmp; + + capacity = textLen + 16; + if (dstOnStack == YES) + { + dst = (unichar *)NSZoneMalloc(NSDefaultMallocZone(), + capacity * sizeof(unichar)); + dstOnStack = NO; + } + else + { + tmp = (unichar *)NSZoneRealloc(NSDefaultMallocZone(), dst, + capacity * sizeof(unichar)); + dst = tmp; + } + continue; + } + if (U_FAILURE(err)) + { + if (dstOnStack == NO) + { + NSZoneFree(NSDefaultMallocZone(), dst); + } + utrans_close(trans); + [NSException raise: NSCharacterConversionException + format: @"libicu transliteration failed"]; + } + break; + } + + result = [NSString stringWithCharacters: dst length: textLen]; + if (dstOnStack == NO) + { + NSZoneFree(NSDefaultMallocZone(), dst); + } + utrans_close(trans); + return result; + } +#else + [NSException raise: NSInternalInconsistencyException + format: @"ICU transliterator support is required"]; + return nil; +#endif +} + +static NSString * +GSStringApplyTransliteratorToString(NSString *input, + const unichar *transId, + int32_t transIdLength) +{ + NSUInteger length = [input length]; + unichar *src; + NSString *result; + + if (length == 0) + { + return @""; + } + + src = (unichar *)NSZoneMalloc(NSDefaultMallocZone(), length * sizeof(unichar)); + [input getCharacters: src range: NSMakeRange(0, length)]; + result = GSStringApplyTransliterator(src, length, transId, transIdLength); + NSZoneFree(NSDefaultMallocZone(), src); + + return result; +} + +static NSString * +GSStringApplyTransliteratorIdentifierToString(NSString *input, + NSString *transliteratorId) +{ + NSUInteger transIdLength = [transliteratorId length]; + unichar *transId; + NSString *result; + + transId = (unichar *)NSZoneMalloc(NSDefaultMallocZone(), + transIdLength * sizeof(unichar)); + [transliteratorId getCharacters: transId range: NSMakeRange(0, transIdLength)]; + result = GSStringApplyTransliteratorToString(input, transId, (int32_t)transIdLength); + NSZoneFree(NSDefaultMallocZone(), transId); + + return result; +} + +#if (GS_USE_ICU == 1) && defined(HAVE_UNICODE_USTRING_H) +static NSString * +GSStringFoldCase(NSString *input, id locale) +{ + NSUInteger length = [input length]; + unichar *src; + unichar *dst; + int32_t newLength; + UErrorCode err; + const char *localeId = NULL; + NSString *result; + + if (length == 0) + { + return @""; + } + + if (locale == nil) + { + locale = [NSLocale systemLocale]; + } + else if ([locale isKindOfClass: [NSLocale class]] == NO) + { + locale = [NSLocale currentLocale]; + } + + if (locale != nil) + { + localeId = [[locale localeIdentifier] UTF8String]; + } + + src = (unichar *)NSZoneMalloc(NSDefaultMallocZone(), length * sizeof(unichar)); + [input getCharacters: src range: NSMakeRange(0, length)]; + + err = U_ZERO_ERROR; + newLength = u_strToLower(NULL, 0, (const UChar *)src, (int32_t)length, + localeId, &err); + if (err != U_BUFFER_OVERFLOW_ERROR) + { + NSZoneFree(NSDefaultMallocZone(), src); + [NSException raise: NSCharacterConversionException + format: @"libicu case folding length check failed"]; + } + + dst = (unichar *)NSZoneMalloc(NSDefaultMallocZone(), + newLength * sizeof(unichar)); + err = U_ZERO_ERROR; + u_strToLower((UChar *)dst, newLength, (const UChar *)src, + (int32_t)length, localeId, &err); + NSZoneFree(NSDefaultMallocZone(), src); + + if (U_FAILURE(err)) + { + NSZoneFree(NSDefaultMallocZone(), dst); + [NSException raise: NSCharacterConversionException + format: @"libicu case folding failed"]; + } + + result = [NSString stringWithCharacters: dst length: newLength]; + NSZoneFree(NSDefaultMallocZone(), dst); + return result; +} +#else +static NSString * +GSStringFoldCase(NSString *input, id locale) +{ + return [input lowercaseString]; +} +#endif + @implementation NSString // NSString itself is an abstract class which provides factory @@ -1968,6 +2193,64 @@ - (NSString *) decomposedStringWithCanonicalMapping return [self notImplemented: _cmd]; #endif } + +#if OS_API_VERSION(MAC_OS_X_VERSION_10_5,GS_API_LATEST) +- (NSString *) stringByFoldingWithOptions: (NSStringCompareOptions)options + locale: (NSLocale *)locale +{ + NSString *result = self; + static NSString * const widthTransliteratorId = @"Fullwidth-Halfwidth"; + static NSString * const diacriticTransliteratorId + = @"NFD; [:Nonspacing Mark:] Remove; NFC"; + static NSString * const widthDiacriticTransliteratorId + = @"Fullwidth-Halfwidth; NFD; [:Nonspacing Mark:] Remove; NFC"; + BOOL foldCase; + BOOL foldDiacritic; + BOOL foldWidth; + + if ([self length] == 0) + { + return @""; + } + + foldCase = ((options & NSCaseInsensitiveSearch) == NSCaseInsensitiveSearch); + foldDiacritic = ((options & NSDiacriticInsensitiveSearch) + == NSDiacriticInsensitiveSearch); + foldWidth = ((options & NSWidthInsensitiveSearch) == NSWidthInsensitiveSearch); + + if (foldCase == NO && foldDiacritic == NO && foldWidth == NO) + { + return IMMUTABLE(self); + } + + if (foldWidth == YES && foldDiacritic == YES) + { + result = GSStringApplyTransliteratorIdentifierToString( + result, widthDiacriticTransliteratorId); + foldWidth = NO; + foldDiacritic = NO; + } + + if (foldDiacritic == YES) + { + result = GSStringApplyTransliteratorIdentifierToString( + result, diacriticTransliteratorId); + } + + if (foldWidth == YES) + { + result = GSStringApplyTransliteratorIdentifierToString( + result, widthTransliteratorId); + } + + if (foldCase == YES) + { + result = GSStringFoldCase(result, locale); + } + + return IMMUTABLE(result); +} +#endif /** * Returns this string as an array of 16-bit unichar (unsigned @@ -6912,4 +7195,3 @@ - (void) setString: (NSString*)aString } @end - diff --git a/Tests/base/NSString/locale.m b/Tests/base/NSString/locale.m index 2aa9230dd8..b6cb57f13e 100644 --- a/Tests/base/NSString/locale.m +++ b/Tests/base/NSString/locale.m @@ -257,6 +257,102 @@ static void testDiacritics(void) PASS(compRes == NSOrderedSame, "expected 0 got %d", (int)compRes); } +#if OS_API_VERSION(MAC_OS_X_VERSION_10_5,GS_API_LATEST) +static void testFolding(void) +{ + const unichar eAcute = 0x00e9; + const unichar fullWidthA = 0xFF21; + const unichar dotlessI = 0x0131; + NSString *eAcuteStr = [[[NSString alloc] initWithCharacters: &eAcute + length: 1] autorelease]; + NSString *fullWidthAStr = [[[NSString alloc] initWithCharacters: &fullWidthA + length: 1] autorelease]; + NSString *dotlessIStr = [[[NSString alloc] initWithCharacters: &dotlessI + length: 1] autorelease]; + NSString *turkish = [[[NSLocale alloc] initWithLocaleIdentifier: @"tr_TR"] + autorelease]; + NSString *folded; + + folded = [@"HELLO" stringByFoldingWithOptions: NSCaseInsensitiveSearch + locale: nil]; + PASS_EQUAL(folded, @"hello", + "NSCaseInsensitiveSearch folds HELLO to hello"); + + folded = [eAcuteStr stringByFoldingWithOptions: NSDiacriticInsensitiveSearch + locale: nil]; + PASS_EQUAL(folded, @"e", + "NSDiacriticInsensitiveSearch folds e-acute to e"); + + folded = [fullWidthAStr stringByFoldingWithOptions: NSWidthInsensitiveSearch + locale: nil]; + PASS_EQUAL(folded, @"A", + "NSWidthInsensitiveSearch folds fullwidth A to ASCII A"); + + folded = [@"ABC123" + stringByFoldingWithOptions: NSWidthInsensitiveSearch + locale: nil]; + PASS_EQUAL(folded, @"ABC123", + "NSWidthInsensitiveSearch folds fullwidth letters and digits"); + + folded = [@"A\u0301" + stringByFoldingWithOptions: NSWidthInsensitiveSearch + | NSDiacriticInsensitiveSearch + locale: nil]; + PASS_EQUAL(folded, @"A", + "Width+diacritic folding removes acute after width fold"); + + folded = [@",.!ABC" + stringByFoldingWithOptions: NSWidthInsensitiveSearch + | NSCaseInsensitiveSearch + locale: nil]; + PASS_EQUAL(folded, @",.!abc", + "NSWidthInsensitiveSearch and NSCaseInsensitiveSearch folds " + "fullwidth punctuation and uppercase letters"); + + folded = [@"Iİıi" + stringByFoldingWithOptions: NSCaseInsensitiveSearch + locale: turkish]; + PASS_EQUAL(folded, @"ıiıi", + "Turkish NSCaseInsensitiveSearch folds I-sequence correctly"); + + folded = [@"K" + stringByFoldingWithOptions: NSCaseInsensitiveSearch + locale: nil]; + PASS_EQUAL(folded, @"k", + "NSCaseInsensitiveSearch folds Kelvin sign to k"); + + folded = [@"ÅÇñöü" + stringByFoldingWithOptions: NSDiacriticInsensitiveSearch + locale: nil]; + PASS_EQUAL(folded, @"ACnou", + "NSDiacriticInsensitiveSearch folds multiple accented Latin letters"); + + folded = [@"e\u0301" + stringByFoldingWithOptions: NSDiacriticInsensitiveSearch + locale: nil]; + PASS_EQUAL(folded, @"e", + "NSDiacriticInsensitiveSearch folds decomposed acute sequence"); + + folded = [@"ÉAI" + stringByFoldingWithOptions: NSCaseInsensitiveSearch + | NSDiacriticInsensitiveSearch + | NSWidthInsensitiveSearch + locale: turkish]; + PASS_EQUAL(folded, @"eaı", + "combined folding applies case, diacritic, and width handling"); + + folded = [@"é" stringByFoldingWithOptions: NSWidthInsensitiveSearch + locale: nil]; + PASS_EQUAL(folded, @"é", + "NSWidthInsensitiveSearch does not remove diacritics"); + + folded = [@"ø" stringByFoldingWithOptions: NSDiacriticInsensitiveSearch + locale: nil]; + PASS_EQUAL(folded, @"ø", + "NSDiacriticInsensitiveSearch does not fold stroked o"); +} +#endif + int main() { START_SET("NSString + locale") @@ -270,6 +366,14 @@ int main() testEszett(); testLithuanian(); testDiacritics(); +#if OS_API_VERSION(MAC_OS_X_VERSION_10_5,GS_API_LATEST) + { + BOOL wasHopeful = testHopeful; + testHopeful = NO; + testFolding(); + testHopeful = wasHopeful; + } +#endif END_SET("NSString + locale") From 187b391fae3ccc095cc89151883f279efc6ca8c8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Hendrik=20H=C3=BCbner?= Date: Wed, 22 Apr 2026 18:28:14 +0200 Subject: [PATCH 2/7] Update ChangeLog and add transliterator caching optimization --- ChangeLog | 9 +++ Source/NSString.m | 156 +++++++++++++++++++++++++++++++++++++--------- Source/NSThread.m | 12 ++++ 3 files changed, 149 insertions(+), 28 deletions(-) diff --git a/ChangeLog b/ChangeLog index 9ba77cf1e2..47f7966393 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,12 @@ +2026-04-22 Hendrik Huebner + + * Headers/Foundation/NSString.h: + Declare -stringByFoldingWithOptions:locale:. + * Source/NSString.m: + Implement -stringByFoldingWithOptions:locale: using ICU transliteration. + * Tests/base/NSString/locale.m: + Add folding tests for case, diacritic, width and combined behavior. + 2026-04-08 Richard Frith-Macdonald * Source/NSDateFormatter.m: diff --git a/Source/NSString.m b/Source/NSString.m index 6a36b89bbd..d5ce3bceca 100644 --- a/Source/NSString.m +++ b/Source/NSString.m @@ -649,6 +649,8 @@ - (void) dealloc @interface NSThread (StringCollatorCache) - (id) _stringCollatorCache; - (void) _setStringCollatorCache: (id)cache; +- (id) _stringTransliteratorCache; +- (void) _setStringTransliteratorCache: (id)cache; @end // The locale parameter must not be nil at this point. @@ -687,11 +689,125 @@ - (void) _setStringCollatorCache: (id)cache; } #endif // GS_USE_ICU +#if (GS_USE_ICU == 1) && GS_HAVE_ICU_UTRANS +typedef struct +{ + NSString *transliteratorId; + UTransliterator *transliterator; +} GSICUTransliteratorEntry; + +@interface GSICUTransliteratorCache : NSObject +{ + @public + GSICUTransliteratorEntry entries[4]; + NSUInteger nextEviction; +} +- (UTransliterator *) transliteratorForId: (NSString *)transliteratorId; +@end + +static UTransliterator * +GSICUCreateTransliterator(NSString *transliteratorId) +{ + NSUInteger transIdLength = [transliteratorId length]; + unichar *transId; + UErrorCode err = U_ZERO_ERROR; + UParseError parseError; + UTransliterator *trans; + + transId = (unichar *)NSZoneMalloc(NSDefaultMallocZone(), + transIdLength * sizeof(unichar)); + [transliteratorId getCharacters: transId + range: NSMakeRange(0, transIdLength)]; + trans = utrans_openU((const UChar *)transId, (int32_t)transIdLength, + UTRANS_FORWARD, NULL, 0, &parseError, &err); + NSZoneFree(NSDefaultMallocZone(), transId); + + if (U_FAILURE(err) || trans == NULL) + { + [NSException raise: NSCharacterConversionException + format: @"libicu transliterator open failed"]; + } + return trans; +} + +@implementation GSICUTransliteratorCache +- (UTransliterator *) transliteratorForId: (NSString *)transliteratorId +{ + NSUInteger i; + + /* We only cache a few static transliterator IDs, so do a simple + * linear search to find matches. + */ + for (i = 0; i < sizeof(entries) / sizeof(*entries); i++) + { + if (entries[i].transliteratorId == transliteratorId) + { + return entries[i].transliterator; + } + } + + for (i = 0; i < sizeof(entries) / sizeof(*entries); i++) + { + if (entries[i].transliteratorId == nil) + { + ASSIGN(entries[i].transliteratorId, transliteratorId); + entries[i].transliterator = GSICUCreateTransliterator(transliteratorId); + return entries[i].transliterator; + } + } + + /* If cache is full, use a FIFO eviction strategy. */ + RELEASE(entries[nextEviction].transliteratorId); + if (entries[nextEviction].transliterator != NULL) + { + utrans_close(entries[nextEviction].transliterator); + } + ASSIGN(entries[nextEviction].transliteratorId, transliteratorId); + entries[nextEviction].transliterator + = GSICUCreateTransliterator(transliteratorId); + i = nextEviction; + nextEviction = (nextEviction + 1) % (sizeof(entries) / sizeof(*entries)); + return entries[i].transliterator; +} + +- (void) dealloc +{ + NSUInteger i; + + for (i = 0; i < sizeof(entries) / sizeof(*entries); i++) + { + RELEASE(entries[i].transliteratorId); + if (entries[i].transliterator != NULL) + { + utrans_close(entries[i].transliterator); + } + } + [super dealloc]; +} +@end + +static UTransliterator * +GSICUCachedTransliterator(NSString *transliteratorId) +{ + NSThread *current; + GSICUTransliteratorCache *cache; + + current = [NSThread currentThread]; + cache = [current _stringTransliteratorCache]; + if (nil == cache) + { + cache = [[GSICUTransliteratorCache alloc] init]; + [current _setStringTransliteratorCache: cache]; + [cache release]; + } + return [cache transliteratorForId: transliteratorId]; +} +#endif + static NSString * GSStringApplyTransliterator(const unichar *src, NSUInteger srcLength, - const unichar *transId, - int32_t transIdLength) + UTransliterator *trans) { if (srcLength == 0) { @@ -701,8 +817,6 @@ - (void) _setStringCollatorCache: (id)cache; #if (GS_USE_ICU == 1) && GS_HAVE_ICU_UTRANS { UErrorCode err = U_ZERO_ERROR; - UParseError parseError; - UTransliterator *trans; unichar *dst; unichar stackDst[100]; BOOL dstOnStack = NO; @@ -712,14 +826,6 @@ - (void) _setStringCollatorCache: (id)cache; int32_t limit; NSString *result; - trans = utrans_openU((const UChar *)transId, transIdLength, UTRANS_FORWARD, - NULL, 0, &parseError, &err); - if (U_FAILURE(err) || trans == NULL) - { - [NSException raise: NSCharacterConversionException - format: @"libicu transliterator open failed"]; - } - capacity = srcLen + 16; if (capacity < 32) { @@ -772,7 +878,6 @@ - (void) _setStringCollatorCache: (id)cache; { NSZoneFree(NSDefaultMallocZone(), dst); } - utrans_close(trans); [NSException raise: NSCharacterConversionException format: @"libicu transliteration failed"]; } @@ -784,7 +889,6 @@ - (void) _setStringCollatorCache: (id)cache; { NSZoneFree(NSDefaultMallocZone(), dst); } - utrans_close(trans); return result; } #else @@ -796,8 +900,7 @@ - (void) _setStringCollatorCache: (id)cache; static NSString * GSStringApplyTransliteratorToString(NSString *input, - const unichar *transId, - int32_t transIdLength) + UTransliterator *trans) { NSUInteger length = [input length]; unichar *src; @@ -810,7 +913,7 @@ - (void) _setStringCollatorCache: (id)cache; src = (unichar *)NSZoneMalloc(NSDefaultMallocZone(), length * sizeof(unichar)); [input getCharacters: src range: NSMakeRange(0, length)]; - result = GSStringApplyTransliterator(src, length, transId, transIdLength); + result = GSStringApplyTransliterator(src, length, trans); NSZoneFree(NSDefaultMallocZone(), src); return result; @@ -820,17 +923,14 @@ - (void) _setStringCollatorCache: (id)cache; GSStringApplyTransliteratorIdentifierToString(NSString *input, NSString *transliteratorId) { - NSUInteger transIdLength = [transliteratorId length]; - unichar *transId; - NSString *result; - - transId = (unichar *)NSZoneMalloc(NSDefaultMallocZone(), - transIdLength * sizeof(unichar)); - [transliteratorId getCharacters: transId range: NSMakeRange(0, transIdLength)]; - result = GSStringApplyTransliteratorToString(input, transId, (int32_t)transIdLength); - NSZoneFree(NSDefaultMallocZone(), transId); - - return result; +#if (GS_USE_ICU == 1) && GS_HAVE_ICU_UTRANS + return GSStringApplyTransliteratorToString(input, + GSICUCachedTransliterator(transliteratorId)); +#else + [NSException raise: NSInternalInconsistencyException + format: @"ICU transliterator support is required"]; + return nil; +#endif } #if (GS_USE_ICU == 1) && defined(HAVE_UNICODE_USTRING_H) diff --git a/Source/NSThread.m b/Source/NSThread.m index 4cf8f3fc15..3f4fd59275 100644 --- a/Source/NSThread.m +++ b/Source/NSThread.m @@ -97,6 +97,7 @@ int pthread_spin_destroy(pthread_spinlock_t *lock) #define EXPOSE_NSThread_IVARS 1 #define GS_NSThread_IVARS \ id _stringCollatorCache; \ + id _stringTransliteratorCache; \ BOOL _targetIsBlock; \ gs_thread_id_t _pthreadID; \ NSUInteger _threadID; \ @@ -171,6 +172,7 @@ int pthread_spin_destroy(pthread_spinlock_t *lock) #define lockInfo (internal->_lockInfo) #define targetIsBlock (internal->_targetIsBlock) #define stringCollatorCache (internal->_stringCollatorCache) +#define stringTransliteratorCache (internal->_stringTransliteratorCache) #if defined(HAVE_PTHREAD_MAIN_NP) @@ -1205,6 +1207,7 @@ - (void) dealloc DESTROY(_arg); DESTROY(_name); DESTROY(stringCollatorCache); + DESTROY(stringTransliteratorCache); if (_autorelease_vars.pool_cache != 0) { [NSAutoreleasePool _endThread: self]; @@ -1601,6 +1604,15 @@ - (void) _setStringCollatorCache: (id) cache ASSIGN(stringCollatorCache, cache); } +- (id) _stringTransliteratorCache +{ + return (id)stringTransliteratorCache; +} +- (void) _setStringTransliteratorCache: (id) cache +{ + ASSIGN(stringTransliteratorCache, cache); +} + @end From 7851ab0bd821d029c050811960f7c3e0c0f6ab48 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Hendrik=20H=C3=BCbner?= Date: Wed, 22 Apr 2026 18:40:23 +0200 Subject: [PATCH 3/7] Cleanup --- Source/NSString.m | 31 ++++++++++++++++++++++--------- 1 file changed, 22 insertions(+), 9 deletions(-) diff --git a/Source/NSString.m b/Source/NSString.m index d5ce3bceca..ae5c9df33f 100644 --- a/Source/NSString.m +++ b/Source/NSString.m @@ -116,10 +116,12 @@ #if defined(HAVE_UNICODE_UTYPES_H) # include #endif -#if defined(__has_include) -# if __has_include() -# include -# define GS_HAVE_ICU_UTRANS 1 +#if !defined(GS_HAVE_ICU_UTRANS) +# if defined(__has_include) +# if __has_include() +# include +# define GS_HAVE_ICU_UTRANS 1 +# endif # endif #endif #ifndef GS_HAVE_ICU_UTRANS @@ -807,7 +809,7 @@ - (void) dealloc static NSString * GSStringApplyTransliterator(const unichar *src, NSUInteger srcLength, - UTransliterator *trans) + void *transOpaque) { if (srcLength == 0) { @@ -816,6 +818,7 @@ - (void) dealloc #if (GS_USE_ICU == 1) && GS_HAVE_ICU_UTRANS { + UTransliterator *trans = (UTransliterator *)transOpaque; UErrorCode err = U_ZERO_ERROR; unichar *dst; unichar stackDst[100]; @@ -898,9 +901,10 @@ - (void) dealloc #endif } +#if (GS_USE_ICU == 1) && GS_HAVE_ICU_UTRANS static NSString * GSStringApplyTransliteratorToString(NSString *input, - UTransliterator *trans) + void *transOpaque) { NSUInteger length = [input length]; unichar *src; @@ -913,11 +917,12 @@ - (void) dealloc src = (unichar *)NSZoneMalloc(NSDefaultMallocZone(), length * sizeof(unichar)); [input getCharacters: src range: NSMakeRange(0, length)]; - result = GSStringApplyTransliterator(src, length, trans); + result = GSStringApplyTransliterator(src, length, transOpaque); NSZoneFree(NSDefaultMallocZone(), src); return result; } +#endif static NSString * GSStringApplyTransliteratorIdentifierToString(NSString *input, @@ -935,7 +940,7 @@ - (void) dealloc #if (GS_USE_ICU == 1) && defined(HAVE_UNICODE_USTRING_H) static NSString * -GSStringFoldCase(NSString *input, id locale) +GSStringFoldCaseWithLocale(NSString *input, id locale) { NSUInteger length = [input length]; unichar *src; @@ -2323,6 +2328,13 @@ - (NSString *) stringByFoldingWithOptions: (NSStringCompareOptions)options return IMMUTABLE(self); } +#if !((GS_USE_ICU == 1) && GS_HAVE_ICU_UTRANS) + if (foldDiacritic == YES || foldWidth == YES) + { + return [self notImplemented: _cmd]; + } +#endif + if (foldWidth == YES && foldDiacritic == YES) { result = GSStringApplyTransliteratorIdentifierToString( @@ -2345,7 +2357,8 @@ - (NSString *) stringByFoldingWithOptions: (NSStringCompareOptions)options if (foldCase == YES) { - result = GSStringFoldCase(result, locale); + /* TODO: use `lowercaseStringWithLocale` once implemented. */ + result = GSStringFoldCaseWithLocale(result, locale); } return IMMUTABLE(result); From 4d2bedb137ef24836d6f850a34369c7802d2fb13 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Hendrik=20H=C3=BCbner?= Date: Sun, 3 May 2026 23:53:16 +0200 Subject: [PATCH 4/7] Use libc malloc and fix MSVC build error --- Source/NSString.m | 37 ++++++++++++++++--------------------- 1 file changed, 16 insertions(+), 21 deletions(-) diff --git a/Source/NSString.m b/Source/NSString.m index 43e4f76374..b088572f4a 100644 --- a/Source/NSString.m +++ b/Source/NSString.m @@ -716,13 +716,12 @@ - (UTransliterator *) transliteratorForId: (NSString *)transliteratorId; UParseError parseError; UTransliterator *trans; - transId = (unichar *)NSZoneMalloc(NSDefaultMallocZone(), - transIdLength * sizeof(unichar)); + transId = (unichar *)malloc(transIdLength * sizeof(unichar)); [transliteratorId getCharacters: transId range: NSMakeRange(0, transIdLength)]; trans = utrans_openU((const UChar *)transId, (int32_t)transIdLength, UTRANS_FORWARD, NULL, 0, &parseError, &err); - NSZoneFree(NSDefaultMallocZone(), transId); + free(transId); if (U_FAILURE(err) || trans == NULL) { @@ -841,8 +840,7 @@ - (void) dealloc } else { - dst = (unichar *)NSZoneMalloc(NSDefaultMallocZone(), - capacity * sizeof(unichar)); + dst = (unichar *)malloc(capacity * sizeof(unichar)); } /* A transliterator can increase output size beyond the input size @@ -863,14 +861,12 @@ - (void) dealloc capacity = textLen + 16; if (dstOnStack == YES) { - dst = (unichar *)NSZoneMalloc(NSDefaultMallocZone(), - capacity * sizeof(unichar)); + dst = (unichar *)malloc(capacity * sizeof(unichar)); dstOnStack = NO; } else { - tmp = (unichar *)NSZoneRealloc(NSDefaultMallocZone(), dst, - capacity * sizeof(unichar)); + tmp = (unichar *)realloc(dst, capacity * sizeof(unichar)); dst = tmp; } continue; @@ -879,7 +875,7 @@ - (void) dealloc { if (dstOnStack == NO) { - NSZoneFree(NSDefaultMallocZone(), dst); + free(dst); } [NSException raise: NSCharacterConversionException format: @"libicu transliteration failed"]; @@ -890,7 +886,7 @@ - (void) dealloc result = [NSString stringWithCharacters: dst length: textLen]; if (dstOnStack == NO) { - NSZoneFree(NSDefaultMallocZone(), dst); + free(dst); } return result; } @@ -915,10 +911,10 @@ - (void) dealloc return @""; } - src = (unichar *)NSZoneMalloc(NSDefaultMallocZone(), length * sizeof(unichar)); + src = (unichar *)malloc(length * sizeof(unichar)); [input getCharacters: src range: NSMakeRange(0, length)]; result = GSStringApplyTransliterator(src, length, transOpaque); - NSZoneFree(NSDefaultMallocZone(), src); + free(src); return result; } @@ -969,7 +965,7 @@ - (void) dealloc localeId = [[locale localeIdentifier] UTF8String]; } - src = (unichar *)NSZoneMalloc(NSDefaultMallocZone(), length * sizeof(unichar)); + src = (unichar *)malloc(length * sizeof(unichar)); [input getCharacters: src range: NSMakeRange(0, length)]; err = U_ZERO_ERROR; @@ -977,32 +973,31 @@ - (void) dealloc localeId, &err); if (err != U_BUFFER_OVERFLOW_ERROR) { - NSZoneFree(NSDefaultMallocZone(), src); + free(src); [NSException raise: NSCharacterConversionException format: @"libicu case folding length check failed"]; } - dst = (unichar *)NSZoneMalloc(NSDefaultMallocZone(), - newLength * sizeof(unichar)); + dst = (unichar *)malloc(newLength * sizeof(unichar)); err = U_ZERO_ERROR; u_strToLower((UChar *)dst, newLength, (const UChar *)src, (int32_t)length, localeId, &err); - NSZoneFree(NSDefaultMallocZone(), src); + free(src); if (U_FAILURE(err)) { - NSZoneFree(NSDefaultMallocZone(), dst); + free(dst); [NSException raise: NSCharacterConversionException format: @"libicu case folding failed"]; } result = [NSString stringWithCharacters: dst length: newLength]; - NSZoneFree(NSDefaultMallocZone(), dst); + free(dst); return result; } #else static NSString * -GSStringFoldCase(NSString *input, id locale) +GSStringFoldCaseWithLocale(NSString *input, id locale) { return [input lowercaseString]; } From 4b3b5f5b603884c63b47e1c82b953b9d31608983 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Hendrik=20H=C3=BCbner?= <117831077+HendrikHuebner@users.noreply.github.com> Date: Tue, 5 May 2026 10:23:18 +0200 Subject: [PATCH 5/7] Remove USTRING guard --- Source/NSString.m | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Source/NSString.m b/Source/NSString.m index b088572f4a..0f676e6fe9 100644 --- a/Source/NSString.m +++ b/Source/NSString.m @@ -934,7 +934,7 @@ - (void) dealloc #endif } -#if (GS_USE_ICU == 1) && defined(HAVE_UNICODE_USTRING_H) +#if (GS_USE_ICU == 1) static NSString * GSStringFoldCaseWithLocale(NSString *input, id locale) { From 07257cc537c0883aa00b59cfa43b818891a427ea Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Hendrik=20H=C3=BCbner?= Date: Wed, 20 May 2026 15:09:49 +0200 Subject: [PATCH 6/7] Make Windows tests with transliterators hopeful --- Tests/base/NSString/locale.m | 24 +++++++++++++++++++++++- 1 file changed, 23 insertions(+), 1 deletion(-) diff --git a/Tests/base/NSString/locale.m b/Tests/base/NSString/locale.m index 92ba6e8672..a77a0b68a4 100644 --- a/Tests/base/NSString/locale.m +++ b/Tests/base/NSString/locale.m @@ -10,6 +10,18 @@ #define NSLOCALE_SUPPORTED 1 /* Assume Apple support */ #endif +#if !defined(GS_HAVE_ICU_UTRANS) +# if defined(__has_include) +# if __has_include() +# define GS_HAVE_ICU_UTRANS 1 +# else +# define GS_HAVE_ICU_UTRANS 0 +# endif +# else +# define GS_HAVE_ICU_UTRANS 0 +# endif +#endif + static void testBasic(void) { NSComparisonResult compRes; @@ -290,6 +302,7 @@ static void testFolding(void) PASS_EQUAL(folded, @"hello", "NSCaseInsensitiveSearch folds HELLO to hello"); +#if GS_HAVE_ICU_UTRANS folded = [eAcuteStr stringByFoldingWithOptions: NSDiacriticInsensitiveSearch locale: nil]; PASS_EQUAL(folded, @"e", @@ -359,9 +372,18 @@ static void testFolding(void) "NSWidthInsensitiveSearch does not remove diacritics"); folded = [@"ø" stringByFoldingWithOptions: NSDiacriticInsensitiveSearch - locale: nil]; + locale: nil]; PASS_EQUAL(folded, @"ø", "NSDiacriticInsensitiveSearch does not fold stroked o"); +#else + { + BOOL wasHopeful = testHopeful; + testHopeful = YES; + PASS(YES, "Skipping transliterator-dependent folding checks " + "(ICU transliterator support unavailable at compile time)"); + testHopeful = wasHopeful; + } +#endif } #endif From 349a730d33090549147c544704029a35e6735c8b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Hendrik=20H=C3=BCbner?= Date: Wed, 20 May 2026 15:12:29 +0200 Subject: [PATCH 7/7] restart CI