From 72bd4b3904bade1ae3c797a97012dc1b104c35b1 Mon Sep 17 00:00:00 2001 From: JPeer264 Date: Sat, 20 Jun 2026 16:33:21 +0200 Subject: [PATCH] feat(core): Add db.query.summary to postgres integration --- packages/core/src/integrations/postgresjs.ts | 5 +- packages/core/src/utils/sql.ts | 133 ++++++ .../core/test/lib/utils/sql.ported.test.ts | 451 ++++++++++++++++++ packages/core/test/lib/utils/sql.test.ts | 206 ++++++++ 4 files changed, 794 insertions(+), 1 deletion(-) create mode 100644 packages/core/src/utils/sql.ts create mode 100644 packages/core/test/lib/utils/sql.ported.test.ts create mode 100644 packages/core/test/lib/utils/sql.test.ts diff --git a/packages/core/src/integrations/postgresjs.ts b/packages/core/src/integrations/postgresjs.ts index 93602d71a6e5..7e539f8e0995 100644 --- a/packages/core/src/integrations/postgresjs.ts +++ b/packages/core/src/integrations/postgresjs.ts @@ -7,6 +7,7 @@ import { SEMANTIC_ATTRIBUTE_SENTRY_ORIGIN } from '../semanticAttributes'; import { SPAN_STATUS_ERROR, startSpanManual } from '../tracing'; import type { Span } from '../types/span'; import { debug } from '../utils/debug-logger'; +import { getSqlQuerySummary } from '../utils/sql'; import { getActiveSpan } from '../utils/spanUtils'; const SQL_OPERATION_REGEX = /^(SELECT|INSERT|UPDATE|DELETE|CREATE|DROP|ALTER)/i; @@ -226,10 +227,11 @@ function _wrapSingleQueryHandle( const fullQuery = _reconstructQuery(query.strings); const sanitizedSqlQuery = _sanitizeSqlQuery(fullQuery); + const querySummary = getSqlQuerySummary(fullQuery); return startSpanManual( { - name: sanitizedSqlQuery || 'postgresjs.query', + name: querySummary || sanitizedSqlQuery || 'postgresjs.query', op: 'db', }, (span: Span) => { @@ -238,6 +240,7 @@ function _wrapSingleQueryHandle( span.setAttributes({ 'db.system.name': 'postgres', 'db.query.text': sanitizedSqlQuery, + 'db.query.summary': querySummary, }); const connectionContext = sqlInstance diff --git a/packages/core/src/utils/sql.ts b/packages/core/src/utils/sql.ts new file mode 100644 index 000000000000..930cf08cfb5e --- /dev/null +++ b/packages/core/src/utils/sql.ts @@ -0,0 +1,133 @@ +const MAX_SUMMARY_LENGTH = 255; + +const TABLE_NAME_CHARS = /[^\s(,;)]+/; +const TABLE_NAME = TABLE_NAME_CHARS.source; + +const DDL_RE = new RegExp( + `^\\s*(?(?:CREATE|DROP)\\s+(?:TABLE|INDEX)|ALTER\\s+TABLE)(?:\\s+IF\\s+(?:NOT\\s+)?EXISTS)?\\s+(?${TABLE_NAME})`, + 'i', +); + +const INSERT_RE = new RegExp(`^\\s*(?INSERT)\\s+INTO\\s+(?
${TABLE_NAME})`, 'i'); +const UPDATE_RE = new RegExp(`^\\s*(?UPDATE)\\s+(?
${TABLE_NAME})`, 'i'); +const DELETE_RE = new RegExp(`^\\s*(?DELETE)\\s+FROM\\s+(?
${TABLE_NAME})`, 'i'); + +const SELECT_RE = /^\s*\(?\s*(?SELECT)\b/i; + +const PRAGMA_RE = /^\s*(?PRAGMA)\s+(?\S+)/i; + +const TOKEN_RE = /\b(?:FROM|JOIN)\s+|\(\s*(SELECT)\b|\b(?:UNION|INTERSECT|EXCEPT|MINUS)\s+(?:ALL\s+)?(SELECT)\b/gi; +const QUOTED_OR_PLAIN_TABLE_RE = /^(?:"[^"]*"|'[^']*'|[^\s(,;)]+)/; +const COMMA_TABLE_RE = /^\s*,\s*((?:"[^"]*"|'[^']*'|[^\s(,;)]+))/; +const SUBQUERY_SELECT_RE = /^\(\s*(SELECT)\b/i; + +/** + * Derives a low-cardinality summary from a SQL query for use as `db.query.summary`. + * + * Conforms to the OTEL semantic convention for generating query summaries: + * - Preserves original case of operations and identifiers (no normalization) + * - Uses format: `{operation} {target1} {target2} ...` + * - Strips filler words (INTO, FROM) from the operation + * - Captures multiple table targets (JOINs) + * - Handles INSERT...SELECT with both targets + * - Truncates to 255 characters without splitting mid-value + * + * @see https://opentelemetry.io/docs/specs/semconv/database/database-spans/#generating-a-summary-of-the-query + */ +export function getSqlQuerySummary(query: string | undefined): string | undefined { + if (!query) { + return undefined; + } + + const pragmaMatch = PRAGMA_RE.exec(query); + if (pragmaMatch?.groups?.['operation'] && pragmaMatch.groups['command']) { + const operation = pragmaMatch.groups['operation']; + const command = pragmaMatch.groups['command']; + const parenIdx = command.indexOf('('); + return truncate(`${operation} ${parenIdx >= 0 ? command.substring(0, parenIdx) : command}`); + } + + const ddlMatch = DDL_RE.exec(query); + if (ddlMatch?.groups?.['operation'] && ddlMatch.groups['table']) { + return truncate(`${ddlMatch.groups['operation']} ${ddlMatch.groups['table']}`); + } + + const insertMatch = INSERT_RE.exec(query); + if (insertMatch?.groups?.['operation'] && insertMatch.groups['table']) { + const parts = [insertMatch.groups['operation'], insertMatch.groups['table']]; + const rest = query.slice(insertMatch[0].length); + const subSelect = /\b(SELECT)\b/i.exec(rest); + if (subSelect?.[1]) { + parts.push(subSelect[1]); + const selectTables = extractTableNames(rest.slice(subSelect.index)); + parts.push(...selectTables); + } + return truncate(parts.join(' ')); + } + + const updateMatch = UPDATE_RE.exec(query); + if (updateMatch?.groups?.['operation'] && updateMatch.groups['table']) { + return truncate(`${updateMatch.groups['operation']} ${updateMatch.groups['table']}`); + } + + const deleteMatch = DELETE_RE.exec(query); + if (deleteMatch?.groups?.['operation'] && deleteMatch.groups['table']) { + return truncate(`${deleteMatch.groups['operation']} ${deleteMatch.groups['table']}`); + } + + const selectMatch = SELECT_RE.exec(query); + if (selectMatch?.groups?.['operation']) { + const tables = extractTableNames(query.slice(selectMatch[0].length)); + if (tables.length > 0) { + return truncate(`${selectMatch.groups['operation']} ${tables.join(' ')}`); + } + return selectMatch.groups['operation']; + } + + return truncate(query.trim().split(/\s+/)[0] ?? query); +} + +function extractTableNames(sql: string): string[] { + const tables: string[] = []; + TOKEN_RE.lastIndex = 0; + let match: RegExpExecArray | null; + + while ((match = TOKEN_RE.exec(sql)) !== null) { + if (match[1] || match[2]) { + tables.push((match[1] || match[2])!); + continue; + } + + const rest = sql.slice(match.index + match[0].length); + + const subqueryMatch = SUBQUERY_SELECT_RE.exec(rest); + if (subqueryMatch?.[1]) { + tables.push(subqueryMatch[1]); + TOKEN_RE.lastIndex = match.index + match[0].length + subqueryMatch[0].length; + continue; + } + + const tableMatch = QUOTED_OR_PLAIN_TABLE_RE.exec(rest); + if (!tableMatch) continue; + tables.push(tableMatch[0]); + + let afterTable = rest.slice(tableMatch[0].length); + let commaMatch: RegExpExecArray | null; + while ((commaMatch = COMMA_TABLE_RE.exec(afterTable)) !== null) { + if (!commaMatch[1]) break; + tables.push(commaMatch[1]); + afterTable = afterTable.slice(commaMatch[0].length); + } + } + + return tables; +} + +function truncate(summary: string): string { + if (summary.length <= MAX_SUMMARY_LENGTH) { + return summary; + } + const truncated = summary.substring(0, MAX_SUMMARY_LENGTH); + const lastSpace = truncated.lastIndexOf(' '); + return lastSpace > 0 ? truncated.substring(0, lastSpace) : truncated; +} diff --git a/packages/core/test/lib/utils/sql.ported.test.ts b/packages/core/test/lib/utils/sql.ported.test.ts new file mode 100644 index 000000000000..ba5231431ce7 --- /dev/null +++ b/packages/core/test/lib/utils/sql.ported.test.ts @@ -0,0 +1,451 @@ +/** + * Test cases ported from the OpenTelemetry Java instrumentation. + * + * Copyright The OpenTelemetry Authors + * SPDX-License-Identifier: Apache-2.0 + * + * Source files: + * - SqlQuerySummaryEdgeCasesTest.java + * - SqlQueryAnalyzerTest.java + * + * @see https://github.com/open-telemetry/opentelemetry-java-instrumentation/tree/887d863e4cb628fb6e565bad1e31b68c5262d84e/instrumentation-api-incubator/src/test/java/io/opentelemetry/instrumentation/api/incubator/semconv/db + */ + +import { describe, expect, it } from 'vitest'; +import { getSqlQuerySummary } from '../../../src/utils/sql'; + +describe('getSqlQuerySummary (OTel Java instrumentation)', () => { + describe('joins', () => { + it.each([ + ['SELECT * FROM t1 CROSS JOIN t2 CROSS JOIN t3', 'SELECT t1 t2 t3'], + ['SELECT * FROM t1 NATURAL LEFT JOIN t2 NATURAL RIGHT JOIN t3', 'SELECT t1 t2 t3'], + [ + 'SELECT * FROM orders o INNER JOIN customers c ON o.customer_id = c.id LEFT JOIN shipping s ON o.id = s.order_id RIGHT JOIN invoices i ON o.id = i.order_id FULL OUTER JOIN payments p ON o.id = p.order_id', + 'SELECT orders customers shipping invoices payments', + ], + ['SELECT * FROM users WHERE deleted_at IS NULL AND email IS NOT NULL', 'SELECT users'], + ])('%j => %j', (input, expected) => { + expect(getSqlQuerySummary(input)).toBe(expected); + }); + }); + + describe('table functions', () => { + it.each([ + ['SELECT * FROM get_user_orders(?) WHERE status = ?', 'SELECT get_user_orders'], + ['SELECT * FROM dbo.fn_split(?, ?)', 'SELECT dbo.fn_split'], + ['SELECT * FROM UNNEST(ARRAY[1,2,3]) AS t', 'SELECT UNNEST'], + ['SELECT * FROM GENERATE_SERIES(1, 10) AS gs', 'SELECT GENERATE_SERIES'], + ['SELECT * FROM fn1(fn2(fn3(?)))', 'SELECT fn1'], + ['SELECT * FROM t1 CROSS JOIN UNNEST(t1.arr) AS e', 'SELECT t1 UNNEST'], + ])('%j => %j', (input, expected) => { + expect(getSqlQuerySummary(input)).toBe(expected); + }); + }); + + describe('scalar subqueries in SELECT list', () => { + it.each([ + [ + 'SELECT e.name, (SELECT AVG(salary) FROM employees WHERE department_id = e.department_id) as avg_dept_salary FROM employees e', + 'SELECT SELECT employees employees', + ], + [ + 'SELECT p.name, (SELECT MAX(price) FROM products), (SELECT MIN(price) FROM products) FROM products p', + 'SELECT SELECT products SELECT products products', + ], + ])('%j => %j', (input, expected) => { + expect(getSqlQuerySummary(input)).toBe(expected); + }); + }); + + describe('EXISTS and NOT EXISTS', () => { + it.each([ + [ + 'SELECT * FROM users u WHERE EXISTS (SELECT 1 FROM sessions WHERE user_id = u.id) AND NOT EXISTS (SELECT 1 FROM banned_users WHERE user_id = u.id)', + 'SELECT users SELECT sessions SELECT banned_users', + ], + ])('%j => %j', (input, expected) => { + expect(getSqlQuerySummary(input)).toBe(expected); + }); + }); + + describe('ALL / ANY / SOME operators', () => { + it.each([ + [ + 'SELECT * FROM products WHERE price > ALL (SELECT price FROM discounted_products)', + 'SELECT products SELECT discounted_products', + ], + ['SELECT * FROM employees WHERE salary >= ANY (SELECT salary FROM managers)', 'SELECT employees SELECT managers'], + [ + 'SELECT * FROM orders WHERE amount < SOME (SELECT amount FROM large_orders)', + 'SELECT orders SELECT large_orders', + ], + ])('%j => %j', (input, expected) => { + expect(getSqlQuerySummary(input)).toBe(expected); + }); + }); + + describe('IN / NOT IN subqueries', () => { + it.each([ + [ + 'SELECT * FROM orders WHERE (customer_id, product_id) IN (SELECT customer_id, product_id FROM wishlists)', + 'SELECT orders SELECT wishlists', + ], + ['SELECT * FROM users WHERE id NOT IN (SELECT user_id FROM banned_users)', 'SELECT users SELECT banned_users'], + ])('%j => %j', (input, expected) => { + expect(getSqlQuerySummary(input)).toBe(expected); + }); + }); + + describe('derived tables', () => { + it.each([['SELECT * FROM (SELECT id, name FROM users) AS u', 'SELECT SELECT users']])( + '%j => %j', + (input, expected) => { + expect(getSqlQuerySummary(input)).toBe(expected); + }, + ); + }); + + describe('deeply nested subqueries', () => { + it.each([ + [ + 'SELECT * FROM orders WHERE customer_id IN (SELECT id FROM customers WHERE region_id IN (SELECT id FROM regions WHERE country_id IN (SELECT id FROM countries WHERE code = ?)))', + 'SELECT orders SELECT customers SELECT regions SELECT countries', + ], + ])('%j => %j', (input, expected) => { + expect(getSqlQuerySummary(input)).toBe(expected); + }); + }); + + describe('subqueries in JOIN conditions', () => { + it.each([ + [ + 'SELECT * FROM orders o JOIN customers c ON o.customer_id = c.id AND o.amount > (SELECT AVG(amount) FROM orders WHERE customer_id = c.id)', + 'SELECT orders customers SELECT orders', + ], + [ + 'SELECT * FROM employees e LEFT JOIN departments d ON e.dept_id = d.id AND EXISTS (SELECT 1 FROM projects WHERE dept_id = d.id)', + 'SELECT employees departments SELECT projects', + ], + [ + 'SELECT * FROM orders o JOIN order_items i ON o.id = i.order_id AND i.price > (SELECT AVG(price) FROM order_items WHERE order_id = o.id)', + 'SELECT orders order_items SELECT order_items', + ], + [ + 'SELECT * FROM t1 JOIN t2 ON t1.a IN (SELECT x FROM t3) AND t2.b IN (SELECT y FROM t4)', + 'SELECT t1 t2 SELECT t3 SELECT t4', + ], + [ + 'SELECT * FROM customers c JOIN orders o ON c.id = o.cust_id AND EXISTS (SELECT 1 FROM vip WHERE vip.cust_id = c.id AND vip.order_id = o.id)', + 'SELECT customers orders SELECT vip', + ], + ])('%j => %j', (input, expected) => { + expect(getSqlQuerySummary(input)).toBe(expected); + }); + }); + + describe('subqueries in clause positions', () => { + it.each([ + [ + 'SELECT department_id, COUNT(*) FROM employees GROUP BY department_id HAVING COUNT(*) > (SELECT AVG(cnt) FROM (SELECT COUNT(*) as cnt FROM employees GROUP BY department_id) AS dept_counts)', + 'SELECT employees SELECT SELECT employees', + ], + [ + 'SELECT category, SUM(price) FROM products GROUP BY category HAVING SUM(price) > (SELECT AVG(total) FROM category_totals)', + 'SELECT products SELECT category_totals', + ], + [ + 'SELECT * FROM products ORDER BY (SELECT AVG(rating) FROM reviews WHERE product_id = products.id) DESC', + 'SELECT products SELECT reviews', + ], + [ + 'SELECT * FROM employees ORDER BY (SELECT COUNT(*) FROM projects WHERE manager_id = employees.id)', + 'SELECT employees SELECT projects', + ], + [ + 'SELECT * FROM products WHERE price BETWEEN (SELECT MIN(price) FROM products) AND (SELECT MAX(price) FROM products)', + 'SELECT products SELECT products SELECT products', + ], + ['SELECT * FROM users LIMIT (SELECT setting FROM config WHERE key = ?)', 'SELECT users SELECT config'], + ['SELECT * FROM users OFFSET (SELECT page_size FROM config) ROWS', 'SELECT users SELECT config'], + ['SELECT * FROM GENERATE_SERIES(1, (SELECT MAX(id) FROM users))', 'SELECT GENERATE_SERIES SELECT users'], + ['SELECT COALESCE((SELECT name FROM t1 WHERE id = ?), ?) FROM dual', 'SELECT SELECT t1 dual'], + [ + 'SELECT COALESCE((SELECT a FROM t1), (SELECT b FROM t2), (SELECT c FROM t3)) FROM dual', + 'SELECT SELECT t1 SELECT t2 SELECT t3 dual', + ], + ])('%j => %j', (input, expected) => { + expect(getSqlQuerySummary(input)).toBe(expected); + }); + }); + + describe('set operations', () => { + it.each([ + [ + 'SELECT user_id FROM premium_users INTERSECT SELECT user_id FROM active_users', + 'SELECT premium_users SELECT active_users', + ], + ['SELECT email FROM all_users EXCEPT SELECT email FROM unsubscribed', 'SELECT all_users SELECT unsubscribed'], + ['(SELECT id FROM users) UNION (SELECT id FROM customers)', 'SELECT users SELECT customers'], + [ + '(SELECT * FROM t1 UNION SELECT * FROM t2) INTERSECT (SELECT * FROM t3 UNION SELECT * FROM t4)', + 'SELECT t1 SELECT t2 SELECT t3 SELECT t4', + ], + ['SELECT id FROM t1 MINUS SELECT id FROM t2', 'SELECT t1 SELECT t2'], + ])('%j => %j', (input, expected) => { + expect(getSqlQuerySummary(input)).toBe(expected); + }); + }); + + describe('window functions', () => { + it.each([ + [ + 'SELECT name, department, salary, AVG(salary) OVER (PARTITION BY department) as avg_dept_salary FROM employees', + 'SELECT employees', + ], + ['SELECT id, SUM(val) OVER w FROM data WINDOW w AS (PARTITION BY cat)', 'SELECT data'], + ['SELECT SUM(val) OVER (ORDER BY id ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) FROM data', 'SELECT data'], + ['SELECT ROW_NUMBER() OVER (ORDER BY a), RANK() OVER (PARTITION BY b ORDER BY c) FROM tbl', 'SELECT tbl'], + [ + 'SELECT SUM(amount) OVER (PARTITION BY (SELECT type FROM types WHERE types.id = sales.type_id)) FROM sales', + 'SELECT SELECT types sales', + ], + ])('%j => %j', (input, expected) => { + expect(getSqlQuerySummary(input)).toBe(expected); + }); + }); + + describe('expressions (CAST, CASE)', () => { + it.each([ + ['SELECT CAST(price AS DECIMAL(10,2)) FROM products', 'SELECT products'], + ['SELECT CAST((SELECT MAX(amount) FROM orders) AS INTEGER) FROM dual', 'SELECT SELECT orders dual'], + ['SELECT name, CAST(created_at AS DATE) FROM users WHERE CAST(status AS INTEGER) = ?', 'SELECT users'], + ['SELECT CASE WHEN status = ? THEN ? ELSE ? END FROM users', 'SELECT users'], + [ + 'SELECT CASE WHEN price > (SELECT AVG(price) FROM products) THEN ? ELSE ? END FROM products', + 'SELECT SELECT products products', + ], + [ + "SELECT CASE type WHEN 'A' THEN (SELECT COUNT(*) FROM type_a) WHEN 'B' THEN (SELECT COUNT(*) FROM type_b) ELSE 0 END FROM items", + 'SELECT SELECT type_a SELECT type_b items', + ], + [ + "SELECT CASE (SELECT type FROM config) WHEN 'A' THEN (SELECT v FROM a) WHEN 'B' THEN (SELECT v FROM b) END FROM dual", + 'SELECT SELECT config SELECT a SELECT b dual', + ], + ['SELECT * FROM orders WHERE (customer_id, product_id) = (?, ?)', 'SELECT orders'], + [ + 'SELECT * FROM users WHERE (first_name, last_name) IN (SELECT first_name, last_name FROM vip_users)', + 'SELECT users SELECT vip_users', + ], + ])('%j => %j', (input, expected) => { + expect(getSqlQuerySummary(input)).toBe(expected); + }); + }); + + describe('clauses (pagination, GROUP BY, RETURNING)', () => { + it.each([ + ['SELECT * FROM users ORDER BY created_at OFFSET 10 ROWS FETCH FIRST 20 ROWS ONLY', 'SELECT users'], + ['SELECT * FROM products ORDER BY price FETCH NEXT 10 ROWS ONLY', 'SELECT products'], + ['SELECT * FROM users LIMIT 10 OFFSET 5', 'SELECT users'], + ['SELECT dept, SUM(salary) FROM employees GROUP BY ROLLUP(dept)', 'SELECT employees'], + ['SELECT dept, year, SUM(salary) FROM employees GROUP BY CUBE(dept, year)', 'SELECT employees'], + ['SELECT dept, year, SUM(salary) FROM employees GROUP BY GROUPING SETS ((dept), (year))', 'SELECT employees'], + ['SELECT * INTO temp_table FROM users WHERE active = ?', 'SELECT users'], + [ + 'SELECT u.name, COUNT(o.id) INTO summary_table FROM users u LEFT JOIN orders o ON u.id = o.user_id GROUP BY u.name', + 'SELECT users orders', + ], + ['INSERT INTO users (name) VALUES (?) RETURNING id', 'INSERT users'], + ['DELETE FROM users WHERE id = ? RETURNING *', 'DELETE users'], + ['UPDATE users SET name = ? WHERE id = ? RETURNING *', 'UPDATE users'], + ['SELECT * FROM users FOR UPDATE OF users.name NOWAIT', 'SELECT users'], + ['SELECT * FROM users FOR SHARE', 'SELECT users'], + ['SELECT * FROM users FOR UPDATE SKIP LOCKED', 'SELECT users'], + ['SELECT * FROM users FETCH FIRST 10 PERCENT ROWS ONLY', 'SELECT users'], + ['SELECT DISTINCT ON (department) * FROM employees', 'SELECT employees'], + ])('%j => %j', (input, expected) => { + expect(getSqlQuerySummary(input)).toBe(expected); + }); + }); + + describe('VALUES', () => { + it.each([['INSERT INTO t1 (col) VALUES ((SELECT MAX(col) FROM t2))', 'INSERT t1 SELECT t2']])( + '%j => %j', + (input, expected) => { + expect(getSqlQuerySummary(input)).toBe(expected); + }, + ); + }); + + describe('identifiers and table references', () => { + it.each([ + ['SELECT * FROM "SELECT" WHERE "FROM" = ?', 'SELECT "SELECT"'], + ['SELECT * FROM `SELECT` WHERE `FROM` = ?', 'SELECT `SELECT`'], + ['SELECT * FROM [SELECT] WHERE [FROM] = ?', 'SELECT [SELECT]'], + ['SELECT * FROM "table.name"', 'SELECT "table.name"'], + ['SELECT * FROM db.schema.table', 'SELECT db.schema.table'], + ['SELECT * FROM schema1.t1 JOIN t2 ON schema1.t1.id = t2.id', 'SELECT schema1.t1 t2'], + ['SELECT * FROM users@remote_db', 'SELECT users@remote_db'], + ['SELECT * FROM schema.users@remote_db', 'SELECT schema.users@remote_db'], + ])('%j => %j', (input, expected) => { + expect(getSqlQuerySummary(input)).toBe(expected); + }); + }); + + describe('alias edge cases', () => { + it.each([ + ['SELECT * FROM users join', 'SELECT users'], + ['SELECT * FROM users AS this_is_a_very_long_alias_name_that_exceeds_normal_length', 'SELECT users'], + ])('%j => %j', (input, expected) => { + expect(getSqlQuerySummary(input)).toBe(expected); + }); + }); + + describe('vendor-specific syntax', () => { + it.each([ + ["SELECT * FROM sales PIVOT (SUM(amount) FOR quarter IN ('Q1', 'Q2', 'Q3', 'Q4'))", 'SELECT sales'], + ['SELECT * FROM wide_table UNPIVOT (value FOR col_name IN (col1, col2, col3))', 'SELECT wide_table'], + ['SELECT * FROM large_table TABLESAMPLE SYSTEM (10)', 'SELECT large_table'], + ['SELECT * FROM large_table TABLESAMPLE BERNOULLI (1) REPEATABLE (42)', 'SELECT large_table'], + ['SELECT * FROM users FOR XML PATH', 'SELECT users'], + ['SELECT * FROM users FOR JSON AUTO', 'SELECT users'], + [ + 'SELECT * FROM ticker MATCH_RECOGNIZE (ORDER BY tstamp MEASURES A.tstamp AS start_t PATTERN (A B* C) DEFINE A AS A.price > 10)', + 'SELECT ticker', + ], + [ + "SELECT * FROM sales MODEL DIMENSION BY (product) MEASURES (amount) RULES (amount['Total'] = amount['A'] + amount['B'])", + 'SELECT sales', + ], + [ + 'SELECT * FROM employees START WITH manager_id IS NULL CONNECT BY PRIOR employee_id = manager_id', + 'SELECT employees', + ], + ])('%j => %j', (input, expected) => { + expect(getSqlQuerySummary(input)).toBe(expected); + }); + }); + + describe('INSERT variations', () => { + it.each([ + ['INSERT INTO t1 a SELECT * FROM t2 WHERE t2.id = ?', 'INSERT t1 SELECT t2'], + [ + 'INSERT INTO archive (id, data) SELECT o.id, c.data FROM orders o JOIN customers c ON o.cid = c.id', + 'INSERT archive SELECT orders customers', + ], + [ + 'INSERT INTO orders (product_id) SELECT id FROM products WHERE active = ? RETURNING *', + 'INSERT orders SELECT products', + ], + ])('%j => %j', (input, expected) => { + expect(getSqlQuerySummary(input)).toBe(expected); + }); + }); + + describe('case preservation', () => { + it.each([ + ['select * from users', 'select users'], + ['SeLeCT * FrOm users', 'SeLeCT users'], + ['insert into users values (1)', 'insert users'], + ['delete from users where id = 1', 'delete users'], + ['update users set id = 1', 'update users'], + ['create table users (id int)', 'create table users'], + ['drop table users', 'drop table users'], + ['alter table users add column id int', 'alter table users'], + ])('%j => %j', (input, expected) => { + expect(getSqlQuerySummary(input)).toBe(expected); + }); + }); + + describe('DDL', () => { + it.each([ + ['CREATE TABLE IF NOT EXISTS table', 'CREATE TABLE table'], + ['ALTER TABLE table ADD CONSTRAINT c FOREIGN KEY (foreign_id) REFERENCES ref (id)', 'ALTER TABLE table'], + ['CREATE INDEX types_name ON types (name)', 'CREATE INDEX types_name'], + ['DROP INDEX types_name ON types (name)', 'DROP INDEX types_name'], + [ + 'ALTER TABLE users ADD COLUMN email VARCHAR(255), DROP COLUMN legacy_id, MODIFY COLUMN status INT', + 'ALTER TABLE users', + ], + ['CREATE TABLE users (password VARCHAR(255))', 'CREATE TABLE users'], + ['create table users (password VARCHAR(255))', 'create table users'], + ['ALTER TABLE users ADD COLUMN password VARCHAR(255)', 'ALTER TABLE users'], + ['alter table users ADD COLUMN password VARCHAR(255)', 'alter table users'], + ['ALTER TABLE user ADD COLUMN name VARCHAR(255)', 'ALTER TABLE user'], + ['CREATE TABLE password (id INT)', 'CREATE TABLE password'], + ['DROP TABLE password', 'DROP TABLE password'], + ])('%j => %j', (input, expected) => { + expect(getSqlQuerySummary(input)).toBe(expected); + }); + }); + + describe('embedded SELECT in DML', () => { + it.each([ + ['INSERT INTO t1 SELECT * FROM t2', 'INSERT t1 SELECT t2'], + ['DELETE FROM t1 WHERE x IN (SELECT y FROM t2)', 'DELETE t1'], + ['UPDATE t1 SET x = (SELECT y FROM t2)', 'UPDATE t1'], + ])('%j => %j', (input, expected) => { + expect(getSqlQuerySummary(input)).toBe(expected); + }); + }); + + describe('subqueries in FROM comma lists', () => { + it.each([['SELECT * FROM (SELECT * FROM t1) s1, (SELECT * FROM t2) s2', 'SELECT SELECT t1 SELECT t2']])( + '%j => %j', + (input, expected) => { + expect(getSqlQuerySummary(input)).toBe(expected); + }, + ); + }); + + describe('CROSS APPLY / OUTER APPLY', () => { + it.each([['SELECT * FROM t1 OUTER APPLY (SELECT * FROM t2 WHERE t2.id = t1.id)', 'SELECT t1 SELECT t2']])( + '%j => %j', + (input, expected) => { + expect(getSqlQuerySummary(input)).toBe(expected); + }, + ); + }); + + // Tests that require features not yet implemented in our regex-based approach. + // The Java implementation uses a full JFlex lexer that supports these patterns. + describe.skip('not yet supported', () => { + it.each([ + // CTEs: Java filters CTE names from the summary + ['WITH cte AS (SELECT a FROM b) SELECT * FROM cte', 'SELECT b SELECT'], + [ + 'With a AS (SELECT * FROM t1), b AS (SELECT * FROM t2) SELECT * FROM a JOIN b ON a.id = b.id', + 'SELECT t1 SELECT t2 SELECT', + ], + // LATERAL joins: we capture LATERAL as a table name + ['SELECT * FROM t1 CROSS JOIN LATERAL (SELECT * FROM t2 WHERE t2.id = t1.id) AS sub', 'SELECT t1 SELECT t2'], + // UPDATE/DELETE with subqueries in WHERE/SET (we return early after matching the operation) + [ + 'UPDATE products SET price = price * 1.1 WHERE category_id IN (SELECT id FROM categories WHERE margin_low = ?)', + 'UPDATE products SELECT categories', + ], + [ + 'DELETE FROM orders WHERE customer_id NOT IN (SELECT id FROM customers) AND product_id NOT IN (SELECT id FROM products)', + 'DELETE orders SELECT customers SELECT products', + ], + // EXPLAIN preserving inner SELECT + ['EXPLAIN SELECT * FROM users', 'EXPLAIN SELECT users'], + // SQL comments + ['select col /* from table2 */ from table', 'select table'], + // Multi-statement queries + ['SELECT * FROM t1; SELECT * FROM t2', 'SELECT t1; SELECT t2'], + // CROSS APPLY (SQL Server specific join-like keyword) + ['SELECT * FROM t1 CROSS APPLY t2', 'SELECT t1 t2'], + // Comma-separated tables after subquery aliases in FROM + ['SELECT * FROM a, (SELECT * FROM b), c', 'SELECT a SELECT b c'], + [ + 'SELECT * FROM (SELECT * FROM inner1), (SELECT * FROM inner2), outer_table', + 'SELECT SELECT inner1 SELECT inner2 outer_table', + ], + ['SELECT * FROM (SELECT * FROM t1) sub, t3', 'SELECT SELECT t1 t3'], + ['SELECT * FROM (SELECT * FROM t1) sub, t2 JOIN t3 ON t2.id = t3.id', 'SELECT SELECT t1 t2 t3'], + ])('%j => %j', (input, expected) => { + expect(getSqlQuerySummary(input)).toBe(expected); + }); + }); +}); diff --git a/packages/core/test/lib/utils/sql.test.ts b/packages/core/test/lib/utils/sql.test.ts new file mode 100644 index 000000000000..4cebd5c423b8 --- /dev/null +++ b/packages/core/test/lib/utils/sql.test.ts @@ -0,0 +1,206 @@ +import { describe, expect, it } from 'vitest'; +import { getSqlQuerySummary } from '../../../src/utils/sql'; + +describe('getSqlQuerySummary', () => { + it.each([undefined, ''])('returns undefined for %j', input => { + expect(getSqlQuerySummary(input)).toBeUndefined(); + }); + + describe('SELECT', () => { + it.each([ + ['SELECT * FROM users WHERE id = ?', 'SELECT users'], + ['select count(*) from orders', 'select orders'], + ['SELECT DISTINCT email FROM subscribers WHERE active = ?', 'SELECT subscribers'], + [' SELECT * FROM users', 'SELECT users'], + ])('%j => %j', (input, expected) => { + expect(getSqlQuerySummary(input)).toBe(expected); + }); + + it('returns just the operation for queries without FROM', () => { + expect(getSqlQuerySummary('SELECT 1')).toBe('SELECT'); + expect(getSqlQuerySummary('SELECT CURRENT_TIMESTAMP')).toBe('SELECT'); + }); + + it('captures multiple tables from JOINs', () => { + expect(getSqlQuerySummary('SELECT u.name FROM users u JOIN posts p ON u.id = p.user_id')).toBe( + 'SELECT users posts', + ); + expect( + getSqlQuerySummary( + 'SELECT * FROM orders o JOIN customers c ON o.customer_id = c.id LEFT JOIN products p ON o.product_id = p.id', + ), + ).toBe('SELECT orders customers products'); + }); + + it('preserves original case of identifiers', () => { + expect(getSqlQuerySummary('SELECT * FROM UserTable')).toBe('SELECT UserTable'); + expect(getSqlQuerySummary('select * from MyOrders')).toBe('select MyOrders'); + }); + + it.each([ + ['SELECT * FROM t1 JOIN t2 USING (id) LEFT JOIN t3 USING (id)', 'SELECT t1 t2 t3'], + ['SELECT * FROM colors CROSS JOIN sizes', 'SELECT colors sizes'], + ['SELECT * FROM employees NATURAL JOIN departments', 'SELECT employees departments'], + ])('handles various JOIN types: %j => %j', (input, expected) => { + expect(getSqlQuerySummary(input)).toBe(expected); + }); + + it.each([['SELECT * FROM t1, t2, t3, t4, t5 WHERE t1.id = t2.t1_id', 'SELECT t1 t2 t3 t4 t5']])( + 'handles implicit joins: %j => %j', + (input, expected) => { + expect(getSqlQuerySummary(input)).toBe(expected); + }, + ); + + it.each([ + [ + 'SELECT * FROM (SELECT * FROM (SELECT * FROM (SELECT * FROM users WHERE active = ?) AS l1) AS l2) AS l3', + 'SELECT SELECT SELECT SELECT users', + ], + ])('handles nested subqueries: %j => %j', (input, expected) => { + expect(getSqlQuerySummary(input)).toBe(expected); + }); + }); + + describe('INSERT', () => { + it.each([ + ['INSERT INTO users (name, email) VALUES (?, ?)', 'INSERT users'], + ['insert into orders (product_id) values (?)', 'insert orders'], + ])('strips INTO: %j => %j', (input, expected) => { + expect(getSqlQuerySummary(input)).toBe(expected); + }); + + it('captures INSERT...SELECT with both targets', () => { + expect(getSqlQuerySummary('INSERT INTO shipping_details SELECT * FROM orders')).toBe( + 'INSERT shipping_details SELECT orders', + ); + }); + }); + + describe('UPDATE', () => { + it.each([ + ['UPDATE users SET name = ? WHERE id = ?', 'UPDATE users'], + ['update orders SET status = ? WHERE created_at < ?', 'update orders'], + ])('%j => %j', (input, expected) => { + expect(getSqlQuerySummary(input)).toBe(expected); + }); + }); + + describe('DELETE', () => { + it.each([ + ['DELETE FROM users WHERE id = ?', 'DELETE users'], + ['delete from sessions WHERE expired_at < ?', 'delete sessions'], + ])('strips FROM: %j => %j', (input, expected) => { + expect(getSqlQuerySummary(input)).toBe(expected); + }); + }); + + describe('DDL', () => { + it.each([ + ['CREATE TABLE users (id INTEGER PRIMARY KEY, name TEXT)', 'CREATE TABLE users'], + ['CREATE TABLE IF NOT EXISTS users (id INTEGER PRIMARY KEY)', 'CREATE TABLE users'], + ['ALTER TABLE users ADD COLUMN email TEXT', 'ALTER TABLE users'], + ['DROP TABLE users', 'DROP TABLE users'], + ['DROP TABLE IF EXISTS users', 'DROP TABLE users'], + ['CREATE INDEX idx_name ON users (name)', 'CREATE INDEX idx_name'], + ['CREATE INDEX IF NOT EXISTS idx_name ON users (name)', 'CREATE INDEX idx_name'], + ['DROP INDEX idx_name', 'DROP INDEX idx_name'], + ['DROP INDEX IF EXISTS idx_name', 'DROP INDEX idx_name'], + ])('preserves DDL keywords: %j => %j', (input, expected) => { + expect(getSqlQuerySummary(input)).toBe(expected); + }); + + it('preserves original case of DDL operations', () => { + expect(getSqlQuerySummary('create table events (id INTEGER)')).toBe('create table events'); + expect(getSqlQuerySummary('Drop Table IF EXISTS temp')).toBe('Drop Table temp'); + }); + }); + + describe('PRAGMA', () => { + it.each([ + ['PRAGMA table_info(users)', 'PRAGMA table_info'], + ['PRAGMA journal_mode', 'PRAGMA journal_mode'], + ['PRAGMA table_list', 'PRAGMA table_list'], + ['PRAGMA index_info(idx_name)', 'PRAGMA index_info'], + ['pragma foreign_keys', 'pragma foreign_keys'], + ])('%j => %j', (input, expected) => { + expect(getSqlQuerySummary(input)).toBe(expected); + }); + }); + + describe('fallback', () => { + it('extracts the first keyword for unrecognized statements', () => { + expect(getSqlQuerySummary('EXPLAIN SELECT * FROM users')).toBe('EXPLAIN'); + expect(getSqlQuerySummary('VACUUM')).toBe('VACUUM'); + expect(getSqlQuerySummary('ANALYZE users')).toBe('ANALYZE'); + }); + + it('handles leading whitespace in fallback', () => { + expect(getSqlQuerySummary(' VACUUM')).toBe('VACUUM'); + }); + }); + + describe('set operations', () => { + it.each([ + ['select col from table1 union select col from table2', 'select table1 select table2'], + [ + 'SELECT * FROM users UNION ALL SELECT * FROM contractors UNION SELECT * FROM vendors', + 'SELECT users SELECT contractors SELECT vendors', + ], + ])('%j => %j', (input, expected) => { + expect(getSqlQuerySummary(input)).toBe(expected); + }); + }); + + describe('subqueries in WHERE', () => { + it.each([ + [ + 'SELECT * FROM customers WHERE EXISTS (SELECT 1 FROM orders WHERE customer_id = customers.id)', + 'SELECT customers SELECT orders', + ], + [ + 'SELECT * FROM products WHERE NOT EXISTS (SELECT 1 FROM order_items WHERE product_id = products.id)', + 'SELECT products SELECT order_items', + ], + ['SELECT * FROM orders WHERE customer_id NOT IN (SELECT id FROM customers)', 'SELECT orders SELECT customers'], + ])('%j => %j', (input, expected) => { + expect(getSqlQuerySummary(input)).toBe(expected); + }); + }); + + describe('OTEL spec examples', () => { + it.each([ + ['SELECT * FROM wuser_table WHERE username = ?', 'SELECT wuser_table'], + [ + 'INSERT INTO shipping_details (order_id, address) SELECT order_id, address FROM orders WHERE order_id = ?', + 'INSERT shipping_details SELECT orders', + ], + ['SELECT * FROM songs, artists WHERE songs.artist_id == artists.id', 'SELECT songs artists'], + [ + 'SELECT order_date FROM (SELECT * FROM orders o JOIN customers c ON o.customer_id = c.customer_id)', + 'SELECT SELECT orders customers', + ], + ['SELECT * FROM "song list", \'artists\'', 'SELECT "song list" \'artists\''], + ])('%j => %j', (input, expected) => { + expect(getSqlQuerySummary(input)).toBe(expected); + }); + }); + + describe('truncation', () => { + it('truncates at 255 characters on a word boundary', () => { + const longTable = 'a'.repeat(300); + const query = `SELECT * FROM ${longTable}`; + const result = getSqlQuerySummary(query); + + expect(result.length).toBeLessThanOrEqual(255); + expect(result).toBe('SELECT'); + }); + + it('does not truncate queries within the limit', () => { + const table = 'a'.repeat(200); + const query = `SELECT * FROM ${table}`; + + expect(getSqlQuerySummary(query)).toBe(`SELECT ${table}`); + }); + }); +});