Skip to content
Open
59 changes: 59 additions & 0 deletions common/utils/src/main/resources/error/error-conditions.json
Original file line number Diff line number Diff line change
Expand Up @@ -230,6 +230,60 @@
],
"sqlState" : "22003"
},
"BIN_BY_ALIGN_TO_TYPE_MISMATCH" : {
"message" : [
"The ALIGN TO expression type <originType> must match the range column type <rangeType>."
],
"sqlState" : "42804"
},
"BIN_BY_COLUMN_NOT_FOUND" : {
"message" : [
"The column <columnName> referenced in BIN BY was not found in the input relation."
],
"sqlState" : "42703"
},
"BIN_BY_DISTRIBUTE_TYPE_MISMATCH" : {
"message" : [
"The DISTRIBUTE UNIFORM columns in BIN BY must be numeric or DAY-TIME INTERVAL. Column <columnName> has type <columnType>."
],
"sqlState" : "42804"
},
"BIN_BY_DUPLICATE_DISTRIBUTE_COLUMN" : {
"message" : [
"Column <columnName> appears multiple times in the BIN BY DISTRIBUTE UNIFORM clause."
],
"sqlState" : "42701"
},
"BIN_BY_INVALID_BIN_WIDTH" : {
"message" : [
"The BIN WIDTH expression must be a positive DAY-TIME INTERVAL. Got: <expr>."
],
"sqlState" : "42K09"
},
"BIN_BY_INVALID_RANGE" : {
"message" : [
"The input row to BIN BY has range_start (<rangeStart>) greater than range_end (<rangeEnd>). BIN BY requires range_start <= range_end."
],
"sqlState" : "22023"
},
"BIN_BY_MISSING_DISTRIBUTE" : {
"message" : [
"BIN BY requires at least one column in DISTRIBUTE UNIFORM (...)."
],
"sqlState" : "42601"
},
"BIN_BY_RANGE_TYPE_MISMATCH" : {
"message" : [
"The range columns in BIN BY must be TIMESTAMP or TIMESTAMP_NTZ. Column <columnName> has type <columnType>. Both range columns must have the same type."
],
"sqlState" : "42804"
},
"BIN_BY_REQUIRES_TOP_LEVEL_COLUMN" : {
"message" : [
"BIN BY requires a top-level column, but <columnName> is a nested or computed field. Alias it to a top-level column in a select before BIN BY."
],
"sqlState" : "42K09"
},
"CALL_ON_STREAMING_DATASET_UNSUPPORTED" : {
"message" : [
"The method <methodName> can not be called on streaming Dataset/DataFrame."
Expand Down Expand Up @@ -7799,6 +7853,11 @@
"The ANALYZE TABLE command does not support views."
]
},
"BIN_BY" : {
"message" : [
"Physical execution of BIN BY is not yet implemented."
]
},
"CATALOG_INTERFACE_METHOD" : {
"message" : [
"Catalog API <methodName> is not supported by <catalogClass>."
Expand Down
7 changes: 7 additions & 0 deletions docs/sql-ref-ansi-compliance.md
Original file line number Diff line number Diff line change
Expand Up @@ -412,6 +412,7 @@ Below is a list of all the keywords in Spark SQL.
|ADD|non-reserved|non-reserved|non-reserved|
|AFTER|non-reserved|non-reserved|non-reserved|
|AGGREGATE|non-reserved|non-reserved|non-reserved|
|ALIGN|non-reserved|non-reserved|non-reserved|
|ALL|reserved|non-reserved|reserved|
|ALTER|non-reserved|non-reserved|reserved|
|ALWAYS|non-reserved|non-reserved|non-reserved|
Expand All @@ -433,8 +434,12 @@ Below is a list of all the keywords in Spark SQL.
|BERNOULLI|non-reserved|non-reserved|non-reserved|
|BETWEEN|non-reserved|non-reserved|reserved|
|BIGINT|non-reserved|non-reserved|reserved|
|BIN|non-reserved|non-reserved|non-reserved|
|BINARY|non-reserved|non-reserved|reserved|
|BINDING|non-reserved|non-reserved|non-reserved|
|BIN_DISTRIBUTE_RATIO|non-reserved|non-reserved|non-reserved|
|BIN_END|non-reserved|non-reserved|non-reserved|
|BIN_START|non-reserved|non-reserved|non-reserved|
|BOOLEAN|non-reserved|non-reserved|reserved|
|BOTH|reserved|non-reserved|reserved|
|BUCKET|non-reserved|non-reserved|non-reserved|
Expand Down Expand Up @@ -801,6 +806,7 @@ Below is a list of all the keywords in Spark SQL.
|UNARCHIVE|non-reserved|non-reserved|non-reserved|
|UNBOUNDED|non-reserved|non-reserved|non-reserved|
|UNCACHE|non-reserved|non-reserved|non-reserved|
|UNIFORM|non-reserved|non-reserved|non-reserved|
|UNION|reserved|strict-non-reserved|reserved|
|UNIQUE|reserved|non-reserved|reserved|
|UNKNOWN|reserved|non-reserved|reserved|
Expand Down Expand Up @@ -828,6 +834,7 @@ Below is a list of all the keywords in Spark SQL.
|WHEN|reserved|non-reserved|reserved|
|WHERE|reserved|non-reserved|reserved|
|WHILE|non-reserved|non-reserved|non-reserved|
|WIDTH|non-reserved|non-reserved|non-reserved|
|WINDOW|non-reserved|non-reserved|reserved|
|WITH|reserved|non-reserved|reserved|
|WITHIN|reserved|non-reserved|reserved|
Expand Down
8 changes: 4 additions & 4 deletions python/pyspark/sql/tvf.py
Original file line number Diff line number Diff line change
Expand Up @@ -551,11 +551,11 @@ def sql_keywords(self) -> DataFrame:
Examples
--------
>>> spark.tvf.sql_keywords().show()
+-------------+--------+
| keyword|reserved|
+-------------+--------+
+----------+--------+
| keyword|reserved|
+----------+--------+
...
+-------------+--------+...
+----------+--------+...
"""
return self._fn("sql_keywords")

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -131,6 +131,7 @@ BANG: '!';
ADD: 'ADD';
AFTER: 'AFTER';
AGGREGATE: 'AGGREGATE';
ALIGN: 'ALIGN';
ALL: 'ALL';
ALTER: 'ALTER';
ALWAYS: 'ALWAYS';
Expand All @@ -152,6 +153,10 @@ BEGIN: 'BEGIN';
BERNOULLI: 'BERNOULLI';
BETWEEN: 'BETWEEN';
BIGINT: 'BIGINT';
BIN: 'BIN';
BIN_DISTRIBUTE_RATIO: 'BIN_DISTRIBUTE_RATIO';
BIN_END: 'BIN_END';
BIN_START: 'BIN_START';
BINARY: 'BINARY';
BINDING: 'BINDING';
BOOLEAN: 'BOOLEAN';
Expand Down Expand Up @@ -519,6 +524,7 @@ TYPE: 'TYPE';
UNARCHIVE: 'UNARCHIVE';
UNBOUNDED: 'UNBOUNDED';
UNCACHE: 'UNCACHE';
UNIFORM: 'UNIFORM';
UNION: 'UNION';
UNIQUE: 'UNIQUE';
UNKNOWN: 'UNKNOWN';
Expand Down Expand Up @@ -546,6 +552,7 @@ WEEKS: 'WEEKS';
WHEN: 'WHEN';
WHERE: 'WHERE';
WHILE: 'WHILE';
WIDTH: 'WIDTH';
WINDOW: 'WINDOW';
WITH: 'WITH';
WITHIN: 'WITHIN';
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@ options { tokenVocab = SqlBaseLexer; }
la == ANTI || la == JOIN || la == UNION || la == EXCEPT ||
la == SETMINUS || la == INTERSECT || la == ORDER || la == CLUSTER ||
la == DISTRIBUTE || la == SORT || la == LIMIT || la == OFFSET ||
la == AGGREGATE || la == WINDOW || la == LATERAL;
la == AGGREGATE || la == WINDOW || la == LATERAL || la == BIN;
}
}

Expand Down Expand Up @@ -1029,6 +1029,20 @@ unpivotAlias
: AS? errorCapturingIdentifier
;

binByClause
: BIN BY LEFT_PAREN
RANGE rangeStart=multipartIdentifier TO rangeEnd=multipartIdentifier
BIN WIDTH binWidth=expression
(ALIGN TO origin=expression)?
DISTRIBUTE UNIFORM LEFT_PAREN
distributeCol+=multipartIdentifier
(COMMA distributeCol+=multipartIdentifier)* RIGHT_PAREN
(BIN_START AS binStartAlias=errorCapturingIdentifier)?
(BIN_END AS binEndAlias=errorCapturingIdentifier)?
(BIN_DISTRIBUTE_RATIO AS binRatioAlias=errorCapturingIdentifier)?
RIGHT_PAREN (AS? tblAlias=errorCapturingIdentifier)?
;

lateralView
: LATERAL VIEW (OUTER)? qualifiedName LEFT_PAREN (expression (COMMA expression)*)? RIGHT_PAREN tblName=identifier (AS? colName+=identifier (COMMA colName+=identifier)*)?
;
Expand All @@ -1050,6 +1064,7 @@ relationExtension
: joinRelation
| pivotClause
| unpivotClause
| binByClause
;

joinRelation
Expand Down Expand Up @@ -1904,6 +1919,7 @@ operatorPipeRightSide
// messages in the event that both are present (this is not allowed).
| pivotClause unpivotClause?
| unpivotClause pivotClause?
| binByClause
| sample
| joinRelation
| operator=(UNION | EXCEPT | SETMINUS | INTERSECT) setQuantifier? right=queryPrimary
Expand Down Expand Up @@ -1935,6 +1951,7 @@ ansiNonReserved
: ADD
| AFTER
| AGGREGATE
| ALIGN
| ALTER
| ALWAYS
| ANALYZE
Expand All @@ -1951,6 +1968,10 @@ ansiNonReserved
| BERNOULLI
| BETWEEN
| BIGINT
| BIN
| BIN_DISTRIBUTE_RATIO
| BIN_END
| BIN_START
| BINARY
| BINARY_HEX
| BINDING
Expand Down Expand Up @@ -2252,6 +2273,7 @@ ansiNonReserved
| UNARCHIVE
| UNBOUNDED
| UNCACHE
| UNIFORM
| UNLOCK
| UNPIVOT
| UNSET
Expand All @@ -2272,6 +2294,7 @@ ansiNonReserved
| WEEKS
| WHILE
| WATERMARK
| WIDTH
| WINDOW
| WITHOUT
| YEAR
Expand Down Expand Up @@ -2313,6 +2336,7 @@ nonReserved
: ADD
| AFTER
| AGGREGATE
| ALIGN
| ALL
| ALTER
| ALWAYS
Expand All @@ -2333,6 +2357,10 @@ nonReserved
| BERNOULLI
| BETWEEN
| BIGINT
| BIN
| BIN_DISTRIBUTE_RATIO
| BIN_END
| BIN_START
| BINARY
| BINARY_HEX
| BINDING
Expand Down Expand Up @@ -2688,6 +2716,7 @@ nonReserved
| UNARCHIVE
| UNBOUNDED
| UNCACHE
| UNIFORM
| UNIQUE
| UNKNOWN
| UNLOCK
Expand All @@ -2713,6 +2742,7 @@ nonReserved
| WHILE
| WHEN
| WHERE
| WIDTH
| WINDOW
| WITH
| WITHIN
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -527,6 +527,7 @@ class Analyzer(
ResolveGroupingAnalytics ::
ResolvePivot ::
ResolveUnpivot ::
ResolveBinBy ::
ResolveOrdinalInOrderByAndGroupBy ::
ExtractGenerator ::
ResolveGenerate ::
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -197,6 +197,14 @@ object DeduplicateRelations extends Rule[LogicalPlan] {
_.generatorOutput.map(_.exprId.id), newGenerate =>
newGenerate.copy(generatorOutput = newGenerate.generatorOutput.map(_.newInstance())))

case b: BinBy =>
deduplicateAndRenew[BinBy](
existingRelations,
b,
_.producedAttributes.map(_.exprId.id).toSeq,
newBinBy => newBinBy.copy(appendedAttributes =
newBinBy.appendedAttributes.map(_.newInstance())))

case e: Expand =>
deduplicateAndRenew[Expand](
existingRelations,
Expand Down Expand Up @@ -459,6 +467,13 @@ object DeduplicateRelations extends Rule[LogicalPlan] {
newVersion.copyTagsFrom(oldVersion)
Seq((oldVersion, newVersion))

case oldVersion: BinBy
if oldVersion.producedAttributes.intersect(conflictingAttributes).nonEmpty =>
val newVersion = oldVersion.copy(
appendedAttributes = oldVersion.appendedAttributes.map(_.newInstance()))
newVersion.copyTagsFrom(oldVersion)
Seq((oldVersion, newVersion))

case oldVersion: Expand
if oldVersion.producedAttributes.intersect(conflictingAttributes).nonEmpty =>
val producedAttributes = oldVersion.producedAttributes
Expand Down
Loading