Skip to content

Commit db8e97a

Browse files
committed
Support clickhouse positional tuple element access (tup.1)
1 parent 7c78d13 commit db8e97a

5 files changed

Lines changed: 171 additions & 4 deletions

File tree

src/dialect/clickhouse.rs

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,11 @@ impl Dialect for ClickHouseDialect {
7272
true
7373
}
7474

75+
/// See <https://clickhouse.com/docs/sql-reference/functions/tuple-functions#tupleelement>
76+
fn supports_tuple_element_access(&self) -> bool {
77+
true
78+
}
79+
7580
// ClickHouse uses this for some FORMAT expressions in `INSERT` context, e.g. when inserting
7681
// with FORMAT JSONEachRow a raw JSON key-value expression is valid and expected.
7782
//

src/dialect/mod.rs

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -479,6 +479,18 @@ pub trait Dialect: Debug + Any {
479479
false
480480
}
481481

482+
/// Returns true if the dialect supports ClickHouse-style positional tuple
483+
/// element access such as `tup.1`, `arr[1].2`, or `(1, 2, 3).3`. When
484+
/// enabled, the tokenizer emits a standalone `.` (instead of fusing it
485+
/// into a decimal literal) when the previous token can be the left-hand
486+
/// side of a tuple access an identifier, `]`, `)`, or another integer
487+
/// already inside the chain.
488+
///
489+
/// See <https://clickhouse.com/docs/sql-reference/functions/tuple-functions#tupleelement>
490+
fn supports_tuple_element_access(&self) -> bool {
491+
false
492+
}
493+
482494
/// Returns true if the dialect supports numbers containing underscores, e.g. `10_000_000`
483495
fn supports_numeric_literal_underscores(&self) -> bool {
484496
false

src/parser/mod.rs

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1337,6 +1337,13 @@ impl<'a> Parser<'a> {
13371337
AttachedToken(next_token),
13381338
));
13391339
}
1340+
Token::Number(_, _) if self.dialect.supports_tuple_element_access() => {
1341+
// ClickHouse-style positional tuple access (`t.1`,
1342+
// `t.1.2`). Exit the wildcard-detection loop and let
1343+
// `parse_expr` handle the expression via the
1344+
// index-rewind fall-through below.
1345+
break;
1346+
}
13401347
_ => {
13411348
return self.expected("an identifier or a '*' after '.'", next_token);
13421349
}

src/tokenizer.rs

Lines changed: 124 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1381,10 +1381,18 @@ impl<'a> Tokenizer<'a> {
13811381
return Ok(Some(Token::HexStringLiteral(s2)));
13821382
}
13831383

1384-
// match one period
1385-
if let Some('.') = chars.peek() {
1386-
s.push('.');
1387-
chars.next();
1384+
// match one period. if we've just consumed an integer
1385+
// and the previous token is `.`, we're inside a ClickHouse
1386+
// tuple element access chain, and the trailing dot belongs
1387+
// to the chain, not to this number.
1388+
let in_tuple_chain = self.dialect.supports_tuple_element_access()
1389+
&& prev_token == Some(&Token::Period)
1390+
&& !s.is_empty();
1391+
if !in_tuple_chain {
1392+
if let Some('.') = chars.peek() {
1393+
s.push('.');
1394+
chars.next();
1395+
}
13881396
}
13891397

13901398
// If the dialect supports identifiers that start with a numeric prefix
@@ -1398,6 +1406,26 @@ impl<'a> Tokenizer<'a> {
13981406
}
13991407
}
14001408

1409+
// ClickHouse-style positional tuple element access: emit `.` as a
1410+
// standalone Period when it follows the LHS of a chain (an
1411+
// identifier, `]`, `)`, or another integer already in the chain),
1412+
// so e.g. `arr[1].1` and `t.1.2` parse as `CompoundFieldAccess`
1413+
// instead of being fused into a decimal literal.
1414+
if s == "."
1415+
&& self.dialect.supports_tuple_element_access()
1416+
&& matches!(
1417+
prev_token,
1418+
Some(
1419+
Token::Word(_)
1420+
| Token::RBracket
1421+
| Token::RParen
1422+
| Token::Number(_, _)
1423+
)
1424+
)
1425+
{
1426+
return Ok(Some(Token::Period));
1427+
}
1428+
14011429
// Consume fractional digits.
14021430
s += &peeking_next_take_while(chars, |ch, next_ch| {
14031431
ch.is_ascii_digit() || is_number_separator(ch, next_ch)
@@ -4303,6 +4331,98 @@ mod tests {
43034331
);
43044332
}
43054333

4334+
#[test]
4335+
fn tokenize_clickhouse_tuple_element_access() {
4336+
let dialects = all_dialects_where(|dialect| dialect.supports_tuple_element_access());
4337+
4338+
// After a Word, RBracket, or RParen, `.<digit>` is split into `Period`
4339+
// and a separate integer `Number`, so the parser can build a
4340+
// CompoundFieldAccess instead of seeing a single decimal literal.
4341+
dialects.tokenizes_to(
4342+
"t.1",
4343+
vec![
4344+
Token::make_word("t", None),
4345+
Token::Period,
4346+
Token::Number("1".to_string(), false),
4347+
],
4348+
);
4349+
4350+
dialects.tokenizes_to(
4351+
"arr[1].2",
4352+
vec![
4353+
Token::make_word("arr", None),
4354+
Token::LBracket,
4355+
Token::Number("1".to_string(), false),
4356+
Token::RBracket,
4357+
Token::Period,
4358+
Token::Number("2".to_string(), false),
4359+
],
4360+
);
4361+
4362+
dialects.tokenizes_to(
4363+
"(1,2).2",
4364+
vec![
4365+
Token::LParen,
4366+
Token::Number("1".to_string(), false),
4367+
Token::Comma,
4368+
Token::Number("2".to_string(), false),
4369+
Token::RParen,
4370+
Token::Period,
4371+
Token::Number("2".to_string(), false),
4372+
],
4373+
);
4374+
4375+
// Nested access `tup.1.2` (Tuple of Tuple) — the rule must re-fire on
4376+
// the second dot, and the integer between the two dots must not eat
4377+
// the trailing dot as a decimal fraction.
4378+
dialects.tokenizes_to(
4379+
"t.1.2",
4380+
vec![
4381+
Token::make_word("t", None),
4382+
Token::Period,
4383+
Token::Number("1".to_string(), false),
4384+
Token::Period,
4385+
Token::Number("2".to_string(), false),
4386+
],
4387+
);
4388+
4389+
// Decimal literals must remain untouched: the previous token is
4390+
// either whitespace or a number, never the LHS of an access chain.
4391+
dialects.tokenizes_to(
4392+
"SELECT 0.5",
4393+
vec![
4394+
Token::make_keyword("SELECT"),
4395+
Token::Whitespace(Whitespace::Space),
4396+
Token::Number("0.5".to_string(), false),
4397+
],
4398+
);
4399+
4400+
dialects.tokenizes_to(
4401+
"SELECT .5",
4402+
vec![
4403+
Token::make_keyword("SELECT"),
4404+
Token::Whitespace(Whitespace::Space),
4405+
Token::Number(".5".to_string(), false),
4406+
],
4407+
);
4408+
4409+
// Regression: dialects without the flag keep the old behavior. The
4410+
// dot and digit fuse into a single decimal-shaped Number token.
4411+
let tokens = Tokenizer::new(&GenericDialect {}, "arr[1].2")
4412+
.tokenize()
4413+
.unwrap();
4414+
assert_eq!(
4415+
tokens,
4416+
vec![
4417+
Token::make_word("arr", None),
4418+
Token::LBracket,
4419+
Token::Number("1".to_string(), false),
4420+
Token::RBracket,
4421+
Token::Number(".2".to_string(), false),
4422+
]
4423+
);
4424+
}
4425+
43064426
#[test]
43074427
fn tokenize_period_underscore() {
43084428
let sql = String::from("SELECT table._col");

tests/sqlparser_clickhouse.rs

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -110,6 +110,29 @@ fn parse_map_access_expr() {
110110
);
111111
}
112112

113+
#[test]
114+
fn parse_tuple_element_access() {
115+
// Single-level access on an array of tuples.
116+
let sql = "SELECT arr[1].1 FROM t";
117+
let select = clickhouse().verified_only_select(sql);
118+
assert_eq!(
119+
&Expr::CompoundFieldAccess {
120+
root: Box::new(Expr::Identifier(Ident::new("arr"))),
121+
access_chain: vec![
122+
AccessExpr::Subscript(Subscript::Index {
123+
index: Expr::value(Value::Number("1".parse().unwrap(), false)),
124+
}),
125+
AccessExpr::Dot(Expr::value(Value::Number("1".parse().unwrap(), false))),
126+
],
127+
},
128+
expr_from_projection(only(&select.projection))
129+
);
130+
131+
clickhouse().verified_stmt("SELECT t.1 FROM x");
132+
clickhouse().verified_stmt("SELECT (1, 2, 3).2");
133+
clickhouse().verified_stmt("SELECT arr[1].1.2 FROM t");
134+
}
135+
113136
#[test]
114137
fn parse_array_expr() {
115138
let sql = "SELECT ['1', '2'] FROM test";

0 commit comments

Comments
 (0)