From a44a3ee57f6f98cdea0967cf89772daa662ae997 Mon Sep 17 00:00:00 2001 From: Andreas Buhr Date: Tue, 31 Mar 2026 11:01:33 +0200 Subject: [PATCH] Performance optimization for case_fold There was one call to std::lower_bound for each space, for each parenthesis, for each newline, and for each bracket. This is not required. See https://www.unicode.org/Public/UCD/latest/ucd/CaseFolding.txt and https://www.fileformat.info/info/charset/UTF-32/list.htm --- include/boost/parser/detail/case_fold.hpp | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/include/boost/parser/detail/case_fold.hpp b/include/boost/parser/detail/case_fold.hpp index 076151dd..2c53b1c7 100644 --- a/include/boost/parser/detail/case_fold.hpp +++ b/include/boost/parser/detail/case_fold.hpp @@ -47,12 +47,15 @@ namespace boost::parser::detail { // One-byte fast path. if (cp < 0x100) { // ASCII letter fast path. - if (0x61 <= cp && cp <= 0x7a) { - *out++ = cp; - return out; - } else if (0x41 <= cp && cp <= 0x5a) { + if (0x41 <= cp && cp <= 0x5a) { + // upper case ASCII letters *out++ = cp + 0x20; return out; + } else if (cp < 0xb5) { + // nothing else is mapped before 0xb5 + // https://www.unicode.org/Public/UCD/latest/ucd/CaseFolding.txt + *out++ = cp; + return out; } else if (cp == 0x00DF) { // The lone multi-mapping below 0x100. *out++ = 0x0073;