From 9c2c4415cb8994fdc1e26375f62d745c84add079 Mon Sep 17 00:00:00 2001 From: Greg Felice Date: Mon, 22 Jun 2026 16:46:58 -0400 Subject: [PATCH] Fix single-node labeled pattern expressions not filtering by label (#2443) A single-node labeled pattern used as a boolean expression -- e.g. `WHERE (a:Person)`, `WHERE EXISTS((a:Person))` -- was accepted but did not test the bound vertex's label. It desugars to an EXISTS sub-pattern, and make_path_join_quals() returned early for vertex-only patterns (list_length(entities) < 3), emitting no quals. With no edge to carry a correlation, the sub-pattern referenced nothing from the enclosing query, so the planner produced an uncorrelated one-time InitPlan that was trivially true whenever any vertex of that label existed -- the predicate matched every outer row. Emit an explicit label-id filter for a vertex-only pattern whose vertex carries a non-default label and whose variable is declared in an ancestor parse state (i.e. a correlated reference). make_qual() builds a name-based id reference that resolves to the outer variable, so the filter both correlates the sub-pattern to that variable and enforces the label. Freshly scanned, non-correlated vertices (no ancestor binding) are untouched, so plain MATCH (a:Person) and "does any X exist" EXISTS checks are unaffected. Add regression coverage in pattern_expression: WHERE (a:Person), WHERE NOT (a:Person), and EXISTS((a:Company)) against a graph with a non-Person vertex. All 41 regression tests pass. --- regress/expected/pattern_expression.out | 70 ++++++++++++++++++++++--- regress/sql/pattern_expression.sql | 49 ++++++++++++++--- src/backend/parser/cypher_clause.c | 50 +++++++++++++++++- 3 files changed, 154 insertions(+), 15 deletions(-) diff --git a/regress/expected/pattern_expression.out b/regress/expected/pattern_expression.out index 0494d49b9..93a02e3fa 100644 --- a/regress/expected/pattern_expression.out +++ b/regress/expected/pattern_expression.out @@ -320,12 +320,15 @@ $$) AS (result agtype); -- -- Single-node pattern on an already-bound variable: (a:Label) -- --- NOTE: this is an EXISTS existence check on the bound variable, NOT an --- openCypher label predicate. A matching label is therefore always true --- (the variable is already bound), and a *different* label is rejected by --- AGE's pre-existing "multiple labels for variable" restriction rather than --- evaluating to false. Both behaviours are captured here so any future change --- to single-node-pattern semantics is caught by this test. +-- NOTE: as of #2443 a single-node labeled pattern is a correlated label +-- predicate -- in WHERE / EXISTS it tests whether the bound vertex actually +-- has the label (see the WHERE (a:Person) / EXISTS((a:Company)) cases in the +-- #2443 section below). Here the variable is already bound to the SAME label, +-- so the predicate is trivially true (the label matches). A *different* label +-- on an already-bound variable is still rejected by AGE's pre-existing +-- "multiple labels for variable" restriction rather than evaluating to false; +-- that is an orthogonal limitation, captured here so any future change to +-- single-node-pattern semantics is caught by this test. SELECT * FROM cypher('pattern_expr', $$ MATCH (a:Person) RETURN a.name, (a:Person) @@ -439,16 +442,69 @@ $$) AS (name agtype); "Alice" (1 row) +-- +-- Single-node labeled pattern as a boolean (#2443) +-- +-- A bound vertex carrying a label, e.g. (a:Person), must test that vertex's +-- label rather than be trivially true. Add a non-Person vertex so the filter +-- is observable (every other vertex in this graph is a :Person). +SELECT * FROM cypher('pattern_expr', $$ + CREATE (:Company {name: 'Acme'}) +$$) AS (result agtype); + result +-------- +(0 rows) + +-- bare single-node label predicate in WHERE: only the :Person vertices +SELECT * FROM cypher('pattern_expr', $$ + MATCH (a) + WHERE (a:Person) + RETURN a.name + ORDER BY a.name +$$) AS (name agtype); + name +----------- + "Alice" + "Bob" + "Charlie" + "Dave" +(4 rows) + +-- negated: only the non-Person vertex +SELECT * FROM cypher('pattern_expr', $$ + MATCH (a) + WHERE NOT (a:Person) + RETURN a.name + ORDER BY a.name +$$) AS (name agtype); + name +-------- + "Acme" +(1 row) + +-- EXISTS() form of a single-node label predicate +SELECT * FROM cypher('pattern_expr', $$ + MATCH (a) + WHERE EXISTS((a:Company)) + RETURN a.name + ORDER BY a.name +$$) AS (name agtype); + name +-------- + "Acme" +(1 row) + -- -- Cleanup -- SELECT * FROM drop_graph('pattern_expr', true); -NOTICE: drop cascades to 5 other objects +NOTICE: drop cascades to 6 other objects DETAIL: drop cascades to table pattern_expr._ag_label_vertex drop cascades to table pattern_expr._ag_label_edge drop cascades to table pattern_expr."Person" drop cascades to table pattern_expr."KNOWS" drop cascades to table pattern_expr."WORKS_WITH" +drop cascades to table pattern_expr."Company" NOTICE: graph "pattern_expr" has been dropped drop_graph ------------ diff --git a/regress/sql/pattern_expression.sql b/regress/sql/pattern_expression.sql index fff8476e5..9ded819ef 100644 --- a/regress/sql/pattern_expression.sql +++ b/regress/sql/pattern_expression.sql @@ -222,12 +222,15 @@ $$) AS (result agtype); -- -- Single-node pattern on an already-bound variable: (a:Label) -- --- NOTE: this is an EXISTS existence check on the bound variable, NOT an --- openCypher label predicate. A matching label is therefore always true --- (the variable is already bound), and a *different* label is rejected by --- AGE's pre-existing "multiple labels for variable" restriction rather than --- evaluating to false. Both behaviours are captured here so any future change --- to single-node-pattern semantics is caught by this test. +-- NOTE: as of #2443 a single-node labeled pattern is a correlated label +-- predicate -- in WHERE / EXISTS it tests whether the bound vertex actually +-- has the label (see the WHERE (a:Person) / EXISTS((a:Company)) cases in the +-- #2443 section below). Here the variable is already bound to the SAME label, +-- so the predicate is trivially true (the label matches). A *different* label +-- on an already-bound variable is still rejected by AGE's pre-existing +-- "multiple labels for variable" restriction rather than evaluating to false; +-- that is an orthogonal limitation, captured here so any future change to +-- single-node-pattern semantics is caught by this test. SELECT * FROM cypher('pattern_expr', $$ MATCH (a:Person) RETURN a.name, (a:Person) @@ -299,6 +302,40 @@ SELECT * FROM cypher('pattern_expr', $$ ORDER BY a.name $$) AS (name agtype); +-- +-- Single-node labeled pattern as a boolean (#2443) +-- +-- A bound vertex carrying a label, e.g. (a:Person), must test that vertex's +-- label rather than be trivially true. Add a non-Person vertex so the filter +-- is observable (every other vertex in this graph is a :Person). +SELECT * FROM cypher('pattern_expr', $$ + CREATE (:Company {name: 'Acme'}) +$$) AS (result agtype); + +-- bare single-node label predicate in WHERE: only the :Person vertices +SELECT * FROM cypher('pattern_expr', $$ + MATCH (a) + WHERE (a:Person) + RETURN a.name + ORDER BY a.name +$$) AS (name agtype); + +-- negated: only the non-Person vertex +SELECT * FROM cypher('pattern_expr', $$ + MATCH (a) + WHERE NOT (a:Person) + RETURN a.name + ORDER BY a.name +$$) AS (name agtype); + +-- EXISTS() form of a single-node label predicate +SELECT * FROM cypher('pattern_expr', $$ + MATCH (a) + WHERE EXISTS((a:Company)) + RETURN a.name + ORDER BY a.name +$$) AS (name agtype); + -- -- Cleanup -- diff --git a/src/backend/parser/cypher_clause.c b/src/backend/parser/cypher_clause.c index 5ac9dea65..3a79a530d 100644 --- a/src/backend/parser/cypher_clause.c +++ b/src/backend/parser/cypher_clause.c @@ -5452,10 +5452,56 @@ static List *make_path_join_quals(cypher_parsestate *cpstate, List *entities) List *quals = NIL; List *join_quals; - /* for vertex only queries, there is no work to do */ + /* + * Vertex-only patterns have no edges, so the edge-driven correlation and + * label-filter logic below never runs. That is correct for a freshly + * scanned vertex -- its label comes from its label-table scan. But a + * vertex that refers to a variable from an ENCLOSING query -- e.g. the + * (a:Person) in MATCH (a) WHERE (a:Person) / EXISTS((a:Person)) -- is not + * scanned from its label table here. Without an explicit filter such a + * sub-pattern is uncorrelated and trivially true (the label is never + * tested). If the vertex carries a non-default label and its variable + * exists in an ancestor parse state, emit a label-id filter: make_qual + * builds a name-based id reference that resolves to the outer variable, + * which both correlates the sub-pattern to it and enforces the label. + */ if (list_length(entities) < 3) { - return NIL; + cypher_parsestate *parent_cpstate = + (cypher_parsestate *) cpstate->pstate.parentParseState; + ListCell *vlc; + + if (parent_cpstate != NULL) + { + foreach (vlc, entities) + { + transform_entity *ent = lfirst(vlc); + char *label; + char *name; + + if (ent->type != ENT_VERTEX) + { + continue; + } + + label = ent->entity.node->label; + name = ent->entity.node->name; + + if (label != NULL && !IS_DEFAULT_LABEL_VERTEX(label) && + name != NULL && + find_variable(parent_cpstate, name) != NULL) + { + Node *id_field = make_qual(cpstate, ent, "id"); + + quals = lappend(quals, + filter_vertices_on_label_id(cpstate, + id_field, + label)); + } + } + } + + return quals; } lc = list_head(entities);