From d86121e08d6a575a7aa8c88d78f0d33e713171d2 Mon Sep 17 00:00:00 2001 From: James Broadhead Date: Fri, 15 May 2026 16:26:51 +0000 Subject: [PATCH] feat(appkit): default analytics format to ARROW_STREAM (BREAKING) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit useAnalyticsQuery now returns a TypedArrowTable by default instead of a row array. Callers that need the JSON-row shape must pass { format: 'JSON_ARRAY' } explicitly. The default switch applies to the hook, the chart-data hook, the SQL warehouse connector defaults, and the analytics plugin request handler. Why: - ARROW_STREAM preserves column types (number stays number, bigint stays bigint) end-to-end. JSON_ARRAY stringifies everything on the wire. - ARROW IPC is 3-5x more compact than JSON for numeric data and parses faster on the client. - This PR stacks on the disposition-fallback PR, which makes both defaults work across all warehouse variants — but ARROW is the format the warehouses 'natively' want for INLINE, and aligning with that avoids the server-side decode the JSON_ARRAY fallback has to do against inline-arrow-only warehouses. Migration: - For tabular code that walks data.length / data[i], either: (a) opt back into JSON_ARRAY: useAnalyticsQuery('q', params, { format: 'JSON_ARRAY' }); (b) switch to Arrow API: data.numRows / data.getChild('col')?.get(i) / data.toArray(). - DataTable.tsx, dev-playground analytics + dashboard routes, and the SQL-helpers route are all pinned to JSON_ARRAY in this PR to preserve their existing rendering. - The template AnalyticsPage is updated to the Arrow API to demonstrate the new default. BREAKING CHANGE: Default format for useAnalyticsQuery and the analytics plugin request handler is now ARROW_STREAM instead of JSON_ARRAY. Depends on #329 (the disposition-fallback PR); merge that first. Signed-off-by: James Broadhead --- .../hooks/use-dashboard-data.ts | 26 ++++++++++++++----- .../client/src/routes/analytics.route.tsx | 14 +++++++--- .../client/src/routes/sql-helpers.route.tsx | 1 + .../hooks/__tests__/use-chart-data.test.ts | 8 +++--- packages/appkit-ui/src/react/hooks/types.ts | 4 +-- .../src/react/hooks/use-analytics-query.ts | 18 ++++++------- .../src/react/hooks/use-chart-data.ts | 4 +-- .../src/react/table/table-wrapper.tsx | 13 +++++++--- .../src/connectors/sql-warehouse/defaults.ts | 2 +- .../appkit/src/plugins/analytics/analytics.ts | 2 +- .../tests/analytics.integration.test.ts | 12 +++++++-- .../plugins/analytics/tests/analytics.test.ts | 9 ++++--- .../src/pages/analytics/AnalyticsPage.tsx | 6 ++--- 13 files changed, 78 insertions(+), 41 deletions(-) diff --git a/apps/dev-playground/client/src/features/smart-dashboard/hooks/use-dashboard-data.ts b/apps/dev-playground/client/src/features/smart-dashboard/hooks/use-dashboard-data.ts index c4e9f5d35..1c42d9fc7 100644 --- a/apps/dev-playground/client/src/features/smart-dashboard/hooks/use-dashboard-data.ts +++ b/apps/dev-playground/client/src/features/smart-dashboard/hooks/use-dashboard-data.ts @@ -80,7 +80,7 @@ export function useDashboardData(filters: DashboardFilters) { data: kpisRaw, loading: kpisLoading, error: kpisError, - } = useAnalyticsQuery("dashboard_kpis", params) as { + } = useAnalyticsQuery("dashboard_kpis", params, { format: "JSON_ARRAY" }) as { data: KPIRawRow[] | null; loading: boolean; error: string | null; @@ -90,7 +90,9 @@ export function useDashboardData(filters: DashboardFilters) { data: topZoneRaw, loading: topZoneLoading, error: topZoneError, - } = useAnalyticsQuery("dashboard_top_zone", params) as { + } = useAnalyticsQuery("dashboard_top_zone", params, { + format: "JSON_ARRAY", + }) as { data: TopZoneData[] | null; loading: boolean; error: string | null; @@ -109,7 +111,9 @@ export function useDashboardData(filters: DashboardFilters) { data: tripsOverTime, loading: tripsLoading, error: tripsError, - } = useAnalyticsQuery("dashboard_trips_over_time", tripsParams) as { + } = useAnalyticsQuery("dashboard_trips_over_time", tripsParams, { + format: "JSON_ARRAY", + }) as { data: TripOverTime[] | null; loading: boolean; error: string | null; @@ -119,7 +123,9 @@ export function useDashboardData(filters: DashboardFilters) { data: fareDistribution, loading: fareLoading, error: fareError, - } = useAnalyticsQuery("dashboard_fare_distribution", tripsParams) as { + } = useAnalyticsQuery("dashboard_fare_distribution", tripsParams, { + format: "JSON_ARRAY", + }) as { data: FareBucket[] | null; loading: boolean; error: string | null; @@ -129,7 +135,9 @@ export function useDashboardData(filters: DashboardFilters) { data: heatmap, loading: heatmapLoading, error: heatmapError, - } = useAnalyticsQuery("dashboard_hourly_heatmap", params) as { + } = useAnalyticsQuery("dashboard_hourly_heatmap", params, { + format: "JSON_ARRAY", + }) as { data: HeatmapCell[] | null; loading: boolean; error: string | null; @@ -139,7 +147,9 @@ export function useDashboardData(filters: DashboardFilters) { data: topZones, loading: topZonesLoading, error: topZonesError, - } = useAnalyticsQuery("dashboard_top_zones", params) as { + } = useAnalyticsQuery("dashboard_top_zones", params, { + format: "JSON_ARRAY", + }) as { data: TopZoneRow[] | null; loading: boolean; error: string | null; @@ -149,7 +159,9 @@ export function useDashboardData(filters: DashboardFilters) { data: sparklines, loading: sparklinesLoading, error: sparklinesError, - } = useAnalyticsQuery("dashboard_kpi_sparklines", params) as { + } = useAnalyticsQuery("dashboard_kpi_sparklines", params, { + format: "JSON_ARRAY", + }) as { data: SparklineRow[] | null; loading: boolean; error: string | null; diff --git a/apps/dev-playground/client/src/routes/analytics.route.tsx b/apps/dev-playground/client/src/routes/analytics.route.tsx index d1833107e..fa2e98996 100644 --- a/apps/dev-playground/client/src/routes/analytics.route.tsx +++ b/apps/dev-playground/client/src/routes/analytics.route.tsx @@ -53,9 +53,15 @@ function AnalyticsRoute() { data: summaryDataRaw, loading: summaryLoading, error: summaryError, - } = useAnalyticsQuery("spend_summary", summaryParams); + } = useAnalyticsQuery("spend_summary", summaryParams, { + format: "JSON_ARRAY", + }); - const { data: appsListData } = useAnalyticsQuery("apps_list", {}); + const { data: appsListData } = useAnalyticsQuery( + "apps_list", + {}, + { format: "JSON_ARRAY" }, + ); const untaggedAppsParams = useMemo(() => { return { @@ -69,7 +75,9 @@ function AnalyticsRoute() { data: untaggedAppsData, loading: untaggedAppsLoading, error: untaggedAppsError, - } = useAnalyticsQuery("untagged_apps", untaggedAppsParams); + } = useAnalyticsQuery("untagged_apps", untaggedAppsParams, { + format: "JSON_ARRAY", + }); const metrics = useMemo(() => { if (!summaryDataRaw || summaryDataRaw.length === 0) { diff --git a/apps/dev-playground/client/src/routes/sql-helpers.route.tsx b/apps/dev-playground/client/src/routes/sql-helpers.route.tsx index 517d890a4..dc652eba7 100644 --- a/apps/dev-playground/client/src/routes/sql-helpers.route.tsx +++ b/apps/dev-playground/client/src/routes/sql-helpers.route.tsx @@ -230,6 +230,7 @@ function SqlHelpersRoute() { const { data, loading, error } = useAnalyticsQuery( "sql_helpers_test", queryParams ?? {}, + { format: "JSON_ARRAY" }, ); // Helper to show the marker result diff --git a/packages/appkit-ui/src/react/hooks/__tests__/use-chart-data.test.ts b/packages/appkit-ui/src/react/hooks/__tests__/use-chart-data.test.ts index 686aff317..28175fff6 100644 --- a/packages/appkit-ui/src/react/hooks/__tests__/use-chart-data.test.ts +++ b/packages/appkit-ui/src/react/hooks/__tests__/use-chart-data.test.ts @@ -205,7 +205,7 @@ describe("useChartData", () => { ); }); - test("auto-selects JSON_ARRAY by default when no heuristics match", () => { + test("auto-selects ARROW_STREAM by default when no heuristics match", () => { mockUseAnalyticsQuery.mockReturnValue({ data: [], loading: false, @@ -223,11 +223,11 @@ describe("useChartData", () => { expect(mockUseAnalyticsQuery).toHaveBeenCalledWith( "test", { limit: 100 }, - expect.objectContaining({ format: "JSON_ARRAY" }), + expect.objectContaining({ format: "ARROW_STREAM" }), ); }); - test("defaults to auto format (JSON_ARRAY) when format is not specified", () => { + test("defaults to auto format (ARROW_STREAM) when format is not specified", () => { mockUseAnalyticsQuery.mockReturnValue({ data: [], loading: false, @@ -243,7 +243,7 @@ describe("useChartData", () => { expect(mockUseAnalyticsQuery).toHaveBeenCalledWith( "test", undefined, - expect.objectContaining({ format: "JSON_ARRAY" }), + expect.objectContaining({ format: "ARROW_STREAM" }), ); }); }); diff --git a/packages/appkit-ui/src/react/hooks/types.ts b/packages/appkit-ui/src/react/hooks/types.ts index 7c249bf0f..fdd3dbbcb 100644 --- a/packages/appkit-ui/src/react/hooks/types.ts +++ b/packages/appkit-ui/src/react/hooks/types.ts @@ -45,9 +45,9 @@ export interface TypedArrowTable< /** Options for configuring an analytics SSE query */ export interface UseAnalyticsQueryOptions< - F extends AnalyticsFormat = "JSON_ARRAY", + F extends AnalyticsFormat = "ARROW_STREAM", > { - /** Response format - "JSON_ARRAY" (default) returns typed arrays, "ARROW_STREAM" uses Arrow (inline or external links) */ + /** Response format - "ARROW_STREAM" (default) returns a TypedArrowTable (compact binary wire, type-preserving). "JSON_ARRAY" returns typed row arrays. */ format?: F; /** Maximum size of serialized parameters in bytes */ diff --git a/packages/appkit-ui/src/react/hooks/use-analytics-query.ts b/packages/appkit-ui/src/react/hooks/use-analytics-query.ts index a93da6f5e..e0eb8dbbc 100644 --- a/packages/appkit-ui/src/react/hooks/use-analytics-query.ts +++ b/packages/appkit-ui/src/react/hooks/use-analytics-query.ts @@ -77,8 +77,8 @@ function getArrowStreamUrl(id: string) { * Integration hook between client and analytics plugin. * * The return type is automatically inferred based on the format: - * - `format: "JSON_ARRAY"` (default): Returns typed array from QueryRegistry - * - `format: "ARROW_STREAM"`: Returns TypedArrowTable with row type preserved + * - `format: "ARROW_STREAM"` (default): Returns TypedArrowTable with row type preserved — works across all warehouse variants and avoids JSON serialization cost + * - `format: "JSON_ARRAY"`: Returns typed array from QueryRegistry * * Note: User context execution is determined by query file naming: * - `queryKey.obo.sql`: Executes as user (OBO = on-behalf-of / user delegation) @@ -89,28 +89,28 @@ function getArrowStreamUrl(id: string) { * @param options - Analytics query settings including format * @returns Query result state with format-appropriate data type * - * @example JSON_ARRAY format (default) + * @example ARROW_STREAM format (default) * ```typescript * const { data } = useAnalyticsQuery("spend_data", params); - * // data: Array<{ group_key: string; cost_usd: number; ... }> | null + * // data: TypedArrowTable<{ group_key: string; cost_usd: number; ... }> | null * ``` * - * @example ARROW_STREAM format + * @example JSON_ARRAY format * ```typescript - * const { data } = useAnalyticsQuery("spend_data", params, { format: "ARROW_STREAM" }); - * // data: TypedArrowTable<{ group_key: string; cost_usd: number; ... }> | null + * const { data } = useAnalyticsQuery("spend_data", params, { format: "JSON_ARRAY" }); + * // data: Array<{ group_key: string; cost_usd: number; ... }> | null * ``` */ export function useAnalyticsQuery< T = unknown, K extends QueryKey = QueryKey, - F extends AnalyticsFormat = "JSON_ARRAY", + F extends AnalyticsFormat = "ARROW_STREAM", >( queryKey: K, parameters?: InferParams | null, options: UseAnalyticsQueryOptions = {} as UseAnalyticsQueryOptions, ): UseAnalyticsQueryResult> { - const format = options?.format ?? "JSON_ARRAY"; + const format = options?.format ?? "ARROW_STREAM"; const maxParametersSize = options?.maxParametersSize ?? 100 * 1024; const autoStart = options?.autoStart ?? true; diff --git a/packages/appkit-ui/src/react/hooks/use-chart-data.ts b/packages/appkit-ui/src/react/hooks/use-chart-data.ts index 64b6e167f..67dc10aa3 100644 --- a/packages/appkit-ui/src/react/hooks/use-chart-data.ts +++ b/packages/appkit-ui/src/react/hooks/use-chart-data.ts @@ -73,10 +73,10 @@ function resolveFormat( return "ARROW_STREAM"; } - return "JSON_ARRAY"; + return "ARROW_STREAM"; } - return "JSON_ARRAY"; + return "ARROW_STREAM"; } // ============================================================================ diff --git a/packages/appkit-ui/src/react/table/table-wrapper.tsx b/packages/appkit-ui/src/react/table/table-wrapper.tsx index c4e28ff8c..8cb511fce 100644 --- a/packages/appkit-ui/src/react/table/table-wrapper.tsx +++ b/packages/appkit-ui/src/react/table/table-wrapper.tsx @@ -73,10 +73,15 @@ export function TableWrapper( const [columnVisibility, setColumnVisibility] = useState({}); const [rowSelection, setRowSelection] = useState({}); - const { data, loading, error } = useAnalyticsQuery( - queryKey, - parameters, - ); + // Pinned to JSON_ARRAY: the table walks `data.length` / `data[i]` for + // tabular rendering. A migration to Arrow's columnar API is a separate + // optimization — keeping it on the JSON shape preserves behavior across + // the default-switch to ARROW_STREAM. + const { data, loading, error } = useAnalyticsQuery< + TRaw[], + typeof queryKey, + "JSON_ARRAY" + >(queryKey, parameters, { format: "JSON_ARRAY" }); useEffect(() => { if (onRowSelectionChange && enableRowSelection) { diff --git a/packages/appkit/src/connectors/sql-warehouse/defaults.ts b/packages/appkit/src/connectors/sql-warehouse/defaults.ts index 994f11da5..506fa52dc 100644 --- a/packages/appkit/src/connectors/sql-warehouse/defaults.ts +++ b/packages/appkit/src/connectors/sql-warehouse/defaults.ts @@ -12,7 +12,7 @@ interface ExecuteStatementDefaults { export const executeStatementDefaults: ExecuteStatementDefaults = { wait_timeout: "30s", disposition: "INLINE", - format: "JSON_ARRAY", + format: "ARROW_STREAM", on_wait_timeout: "CONTINUE", timeout: 60000, }; diff --git a/packages/appkit/src/plugins/analytics/analytics.ts b/packages/appkit/src/plugins/analytics/analytics.ts index b3a268eba..fc7608aae 100644 --- a/packages/appkit/src/plugins/analytics/analytics.ts +++ b/packages/appkit/src/plugins/analytics/analytics.ts @@ -205,7 +205,7 @@ export class AnalyticsPlugin extends Plugin implements ToolProvider { res: express.Response, ): Promise { const { query_key } = req.params; - const { parameters, format: rawFormat = "JSON_ARRAY" } = + const { parameters, format: rawFormat = "ARROW_STREAM" } = req.body as IAnalyticsQueryRequest; if ( diff --git a/packages/appkit/src/plugins/analytics/tests/analytics.integration.test.ts b/packages/appkit/src/plugins/analytics/tests/analytics.integration.test.ts index 5c08b8d43..4cb797167 100644 --- a/packages/appkit/src/plugins/analytics/tests/analytics.integration.test.ts +++ b/packages/appkit/src/plugins/analytics/tests/analytics.integration.test.ts @@ -244,12 +244,20 @@ describe("Analytics Plugin Integration", () => { createSuccessfulSQLResponse([["cached_value"]], [{ name: "value" }]), ); + // Caching is JSON_ARRAY-only — the ARROW_STREAM default bypasses + // cache because inline-stash ids drain on first /arrow-result fetch, + // so a cache hit would replay a dead id. Explicitly request the + // cacheable shape. + const cacheableBody = JSON.stringify({ + parameters: {}, + format: "JSON_ARRAY", + }); const response1 = await fetch( `${baseUrl}/api/analytics/query/cache_test`, { method: "POST", headers: { "Content-Type": "application/json" }, - body: JSON.stringify({ parameters: {} }), + body: cacheableBody, }, ); const data1 = await parseSSEResponse(response1); @@ -259,7 +267,7 @@ describe("Analytics Plugin Integration", () => { { method: "POST", headers: { "Content-Type": "application/json" }, - body: JSON.stringify({ parameters: {} }), + body: cacheableBody, }, ); const data2 = await parseSSEResponse(response2); diff --git a/packages/appkit/src/plugins/analytics/tests/analytics.test.ts b/packages/appkit/src/plugins/analytics/tests/analytics.test.ts index 0683c44c2..9132762d5 100644 --- a/packages/appkit/src/plugins/analytics/tests/analytics.test.ts +++ b/packages/appkit/src/plugins/analytics/tests/analytics.test.ts @@ -699,7 +699,7 @@ describe("Analytics Plugin", () => { ); }); - test("/query/:query_key should use INLINE + JSON_ARRAY by default when no format specified", async () => { + test("/query/:query_key should use INLINE + ARROW_STREAM by default when no format specified", async () => { const plugin = new AnalyticsPlugin(config); const { router, getHandler } = createMockRouter(); @@ -708,8 +708,11 @@ describe("Analytics Plugin", () => { isAsUser: false, }); + // ARROW_STREAM + INLINE returns an attachment (one row's worth of + // Arrow IPC bytes is fine for shape-checking the request, the + // contents don't need to be decoded for this test). const executeMock = vi.fn().mockResolvedValue({ - result: { data: [{ id: 1 }] }, + result: { attachment: Buffer.from("AQID").toString("base64") }, }); (plugin as any).SQLClient.executeStatement = executeMock; @@ -728,7 +731,7 @@ describe("Analytics Plugin", () => { expect.anything(), expect.objectContaining({ disposition: "INLINE", - format: "JSON_ARRAY", + format: "ARROW_STREAM", }), expect.any(AbortSignal), ); diff --git a/template/client/src/pages/analytics/AnalyticsPage.tsx b/template/client/src/pages/analytics/AnalyticsPage.tsx index e1ca7a9df..2b01359a2 100644 --- a/template/client/src/pages/analytics/AnalyticsPage.tsx +++ b/template/client/src/pages/analytics/AnalyticsPage.tsx @@ -42,13 +42,13 @@ export function AnalyticsPage() { )} {error &&
Error: {error}
} - {data && data.length > 0 && ( + {data && data.numRows > 0 && (
Query: SELECT :message AS value
-
{data[0].value}
+
{data.getChild('value')?.get(0)}
)} - {data && data.length === 0 &&
No results
} + {data && data.numRows === 0 &&
No results
}