@@ -1381,10 +1381,18 @@ impl<'a> Tokenizer<'a> {
13811381 return Ok ( Some ( Token :: HexStringLiteral ( s2) ) ) ;
13821382 }
13831383
1384- // match one period
1385- if let Some ( '.' ) = chars. peek ( ) {
1386- s. push ( '.' ) ;
1387- chars. next ( ) ;
1384+ // match one period. if we've just consumed an integer
1385+ // and the previous token is `.`, we're inside a ClickHouse
1386+ // tuple element access chain, and the trailing dot belongs
1387+ // to the chain, not to this number.
1388+ let in_tuple_chain = self . dialect . supports_tuple_element_access ( )
1389+ && prev_token == Some ( & Token :: Period )
1390+ && !s. is_empty ( ) ;
1391+ if !in_tuple_chain {
1392+ if let Some ( '.' ) = chars. peek ( ) {
1393+ s. push ( '.' ) ;
1394+ chars. next ( ) ;
1395+ }
13881396 }
13891397
13901398 // If the dialect supports identifiers that start with a numeric prefix
@@ -1398,6 +1406,26 @@ impl<'a> Tokenizer<'a> {
13981406 }
13991407 }
14001408
1409+ // ClickHouse-style positional tuple element access: emit `.` as a
1410+ // standalone Period when it follows the LHS of a chain (an
1411+ // identifier, `]`, `)`, or another integer already in the chain),
1412+ // so e.g. `arr[1].1` and `t.1.2` parse as `CompoundFieldAccess`
1413+ // instead of being fused into a decimal literal.
1414+ if s == "."
1415+ && self . dialect . supports_tuple_element_access ( )
1416+ && matches ! (
1417+ prev_token,
1418+ Some (
1419+ Token :: Word ( _)
1420+ | Token :: RBracket
1421+ | Token :: RParen
1422+ | Token :: Number ( _, _)
1423+ )
1424+ )
1425+ {
1426+ return Ok ( Some ( Token :: Period ) ) ;
1427+ }
1428+
14011429 // Consume fractional digits.
14021430 s += & peeking_next_take_while ( chars, |ch, next_ch| {
14031431 ch. is_ascii_digit ( ) || is_number_separator ( ch, next_ch)
@@ -4303,6 +4331,98 @@ mod tests {
43034331 ) ;
43044332 }
43054333
4334+ #[ test]
4335+ fn tokenize_clickhouse_tuple_element_access ( ) {
4336+ let dialects = all_dialects_where ( |dialect| dialect. supports_tuple_element_access ( ) ) ;
4337+
4338+ // After a Word, RBracket, or RParen, `.<digit>` is split into `Period`
4339+ // and a separate integer `Number`, so the parser can build a
4340+ // CompoundFieldAccess instead of seeing a single decimal literal.
4341+ dialects. tokenizes_to (
4342+ "t.1" ,
4343+ vec ! [
4344+ Token :: make_word( "t" , None ) ,
4345+ Token :: Period ,
4346+ Token :: Number ( "1" . to_string( ) , false ) ,
4347+ ] ,
4348+ ) ;
4349+
4350+ dialects. tokenizes_to (
4351+ "arr[1].2" ,
4352+ vec ! [
4353+ Token :: make_word( "arr" , None ) ,
4354+ Token :: LBracket ,
4355+ Token :: Number ( "1" . to_string( ) , false ) ,
4356+ Token :: RBracket ,
4357+ Token :: Period ,
4358+ Token :: Number ( "2" . to_string( ) , false ) ,
4359+ ] ,
4360+ ) ;
4361+
4362+ dialects. tokenizes_to (
4363+ "(1,2).2" ,
4364+ vec ! [
4365+ Token :: LParen ,
4366+ Token :: Number ( "1" . to_string( ) , false ) ,
4367+ Token :: Comma ,
4368+ Token :: Number ( "2" . to_string( ) , false ) ,
4369+ Token :: RParen ,
4370+ Token :: Period ,
4371+ Token :: Number ( "2" . to_string( ) , false ) ,
4372+ ] ,
4373+ ) ;
4374+
4375+ // Nested access `tup.1.2` (Tuple of Tuple) — the rule must re-fire on
4376+ // the second dot, and the integer between the two dots must not eat
4377+ // the trailing dot as a decimal fraction.
4378+ dialects. tokenizes_to (
4379+ "t.1.2" ,
4380+ vec ! [
4381+ Token :: make_word( "t" , None ) ,
4382+ Token :: Period ,
4383+ Token :: Number ( "1" . to_string( ) , false ) ,
4384+ Token :: Period ,
4385+ Token :: Number ( "2" . to_string( ) , false ) ,
4386+ ] ,
4387+ ) ;
4388+
4389+ // Decimal literals must remain untouched: the previous token is
4390+ // either whitespace or a number, never the LHS of an access chain.
4391+ dialects. tokenizes_to (
4392+ "SELECT 0.5" ,
4393+ vec ! [
4394+ Token :: make_keyword( "SELECT" ) ,
4395+ Token :: Whitespace ( Whitespace :: Space ) ,
4396+ Token :: Number ( "0.5" . to_string( ) , false ) ,
4397+ ] ,
4398+ ) ;
4399+
4400+ dialects. tokenizes_to (
4401+ "SELECT .5" ,
4402+ vec ! [
4403+ Token :: make_keyword( "SELECT" ) ,
4404+ Token :: Whitespace ( Whitespace :: Space ) ,
4405+ Token :: Number ( ".5" . to_string( ) , false ) ,
4406+ ] ,
4407+ ) ;
4408+
4409+ // Regression: dialects without the flag keep the old behavior. The
4410+ // dot and digit fuse into a single decimal-shaped Number token.
4411+ let tokens = Tokenizer :: new ( & GenericDialect { } , "arr[1].2" )
4412+ . tokenize ( )
4413+ . unwrap ( ) ;
4414+ assert_eq ! (
4415+ tokens,
4416+ vec![
4417+ Token :: make_word( "arr" , None ) ,
4418+ Token :: LBracket ,
4419+ Token :: Number ( "1" . to_string( ) , false ) ,
4420+ Token :: RBracket ,
4421+ Token :: Number ( ".2" . to_string( ) , false ) ,
4422+ ]
4423+ ) ;
4424+ }
4425+
43064426 #[ test]
43074427 fn tokenize_period_underscore ( ) {
43084428 let sql = String :: from ( "SELECT table._col" ) ;
0 commit comments