From e04e380ee2bdd495b5af4b169bbff564b4af8655 Mon Sep 17 00:00:00 2001 From: Raymond Yee Date: Wed, 8 Apr 2026 17:09:38 -0700 Subject: [PATCH 1/6] Add cross-filtering to Explorer facet counts When any filter is active, facet counts now reflect the intersection of all OTHER active filters. For example, selecting SESAR as source updates material/context/specimen counts to show only what exists in SESAR data. Uses parallel GROUP BY queries via DuckDB-WASM. Counts update via DOM manipulation to avoid resetting checkbox selections. Zero-count facet values are dimmed for visual clarity. When no filters are active, pre-computed summaries are used (instant). Co-Authored-By: Claude Opus 4.6 --- tutorials/isamples_explorer.qmd | 162 ++++++++++++++++++++++++++++++-- 1 file changed, 155 insertions(+), 7 deletions(-) diff --git a/tutorials/isamples_explorer.qmd b/tutorials/isamples_explorer.qmd index 402b8a4..4c1576d 100644 --- a/tutorials/isamples_explorer.qmd +++ b/tutorials/isamples_explorer.qmd @@ -12,7 +12,7 @@ Search and explore **6.7 million physical samples** from scientific collections ::: {.callout-note} ### Serverless Architecture -This app uses a **two-tier loading strategy**: a 2KB pre-computed summary loads instantly for facet counts (source, material, context, specimen type), while the full ~280 MB Parquet file is only queried when drilling into records. All powered by DuckDB-WASM in your browser -- no server required! +This app uses a **two-tier loading strategy**: a 2KB pre-computed summary loads instantly for facet counts, while the full ~280 MB Parquet file is queried on demand. **Cross-filtering** keeps counts accurate — selecting a source updates material/context/specimen counts to reflect only that source's samples. All powered by DuckDB-WASM in your browser — no server required! ::: ## Setup @@ -92,7 +92,6 @@ facetSummariesWarning //| code-fold: true // Source checkboxes with counts - uses pre-computed summaries for instant load viewof sourceCheckboxes = { - // Use pre-computed facet summaries (instant) instead of scanning full parquet const counts = facetsByType.source; const options = counts.map(r => r.value); @@ -104,7 +103,7 @@ viewof sourceCheckboxes = { const count = r ? Number(r.count).toLocaleString() : "0"; return html` - ${x} (${count}) + ${x} (${count}) `; } }); @@ -125,7 +124,7 @@ viewof materialCheckboxes = { const r = counts.find(s => s.value === x); const count = r ? Number(r.count).toLocaleString() : "0"; return html` - ${x} (${count}) + ${x} (${count}) `; } }); @@ -146,7 +145,7 @@ viewof contextCheckboxes = { const r = counts.find(s => s.value === x); const count = r ? Number(r.count).toLocaleString() : "0"; return html` - ${x} (${count}) + ${x} (${count}) `; } }); @@ -167,7 +166,7 @@ viewof objectTypeCheckboxes = { const r = counts.find(s => s.value === x); const count = r ? Number(r.count).toLocaleString() : "0"; return html` - ${x} (${count}) + ${x} (${count}) `; } }); @@ -366,7 +365,7 @@ facetSummariesWarning = { `; } -// Extract facet counts by type from pre-computed summaries +// Extract facet counts by type from pre-computed summaries (baseline) facetsByType = { const grouped = { source: [], material: [], context: [], object_type: [] }; for (const row of facetSummaries) { @@ -383,6 +382,155 @@ facetsByType = { } ``` +```{ojs} +//| code-fold: true +// Cross-filter: build WHERE clause excluding one facet dimension +// This lets each facet show counts reflecting all OTHER active filters +function buildWhereClause(excludeFacet) { + const conditions = [ + "otype = 'MaterialSampleRecord'", + "latitude IS NOT NULL" + ]; + + if (searchInput?.trim()) { + const term = searchInput.trim().replace(/'/g, "''"); + conditions.push(`( + label ILIKE '%${term}%' + OR description ILIKE '%${term}%' + OR CAST(place_name AS VARCHAR) ILIKE '%${term}%' + )`); + } + + if (excludeFacet !== 'source') { + const sources = Array.from(sourceCheckboxes || []); + if (sources.length > 0) { + const sourceList = sources.map(s => `'${s}'`).join(", "); + conditions.push(`n IN (${sourceList})`); + } + } + + if (excludeFacet !== 'material') { + const materials = Array.from(materialCheckboxes || []); + if (materials.length > 0) { + const matList = materials.map(m => `'${m.replace(/'/g, "''")}'`).join(", "); + conditions.push(`has_material_category IN (${matList})`); + } + } + + if (excludeFacet !== 'context') { + const contexts = Array.from(contextCheckboxes || []); + if (contexts.length > 0) { + const ctxList = contexts.map(c => `'${c.replace(/'/g, "''")}'`).join(", "); + conditions.push(`has_context_category IN (${ctxList})`); + } + } + + if (excludeFacet !== 'object_type') { + const objectTypes = Array.from(objectTypeCheckboxes || []); + if (objectTypes.length > 0) { + const otList = objectTypes.map(o => `'${o.replace(/'/g, "''")}'`).join(", "); + conditions.push(`has_specimen_category IN (${otList})`); + } + } + + return conditions.join(" AND "); +} +``` + +```{ojs} +//| code-fold: true +// Detect whether any filter is active (triggers cross-filter queries) +hasActiveFilters = { + const hasSearch = searchInput?.trim()?.length > 0; + const hasSources = (sourceCheckboxes || []).length > 0; + const hasMaterials = (materialCheckboxes || []).length > 0; + const hasContexts = (contextCheckboxes || []).length > 0; + const hasObjectTypes = (objectTypeCheckboxes || []).length > 0; + return hasSearch || hasSources || hasMaterials || hasContexts || hasObjectTypes; +} +``` + +```{ojs} +//| code-fold: true +// Cross-filtered facet counts: recompute when filters are active +// Each facet uses a WHERE clause with all filters EXCEPT its own dimension, +// so you see how many items exist for each value given other active filters +crossFilteredFacets = { + if (!hasActiveFilters) return null; // Use pre-computed summaries when no filters + + const facetConfig = [ + { key: 'source', column: 'n', exclude: 'source' }, + { key: 'material', column: 'has_material_category', exclude: 'material' }, + { key: 'context', column: 'has_context_category', exclude: 'context' }, + { key: 'object_type', column: 'has_specimen_category', exclude: 'object_type' }, + ]; + + const results = {}; + + // Run all 4 facet queries in parallel + const queries = facetConfig.map(async ({ key, column, exclude }) => { + const where = buildWhereClause(exclude); + const sql = ` + SELECT ${column} AS value, COUNT(*) AS count + FROM samples + WHERE ${where} AND ${column} IS NOT NULL + GROUP BY ${column} + ORDER BY count DESC + `; + try { + const rows = await runQuery(sql); + results[key] = rows.map(r => ({ value: r.value, count: r.count })); + } catch (e) { + console.warn(`Cross-filter query failed for ${key}:`, e); + results[key] = null; // Fall back to pre-computed + } + }); + + await Promise.all(queries); + return results; +} +``` + +```{ojs} +//| code-fold: true +// Merge cross-filtered counts with baseline facets +// Baseline provides the full list of values; cross-filter overrides counts +function getDisplayCounts(facetKey) { + const baseline = facetsByType[facetKey] || []; + if (!crossFilteredFacets || !crossFilteredFacets[facetKey]) return baseline; + + const filtered = crossFilteredFacets[facetKey]; + const countMap = new Map(filtered.map(r => [r.value, r.count])); + + return baseline.map(item => ({ + ...item, + count: countMap.has(item.value) ? countMap.get(item.value) : 0, + })); +} +``` + +```{ojs} +//| code-fold: true +// Update facet count labels in-place when cross-filtered counts arrive +// This avoids re-rendering checkboxes (which would reset user selections) +{ + if (!crossFilteredFacets) return; // No active filters — keep pre-computed counts + + for (const [facetKey, rows] of Object.entries(crossFilteredFacets)) { + if (!rows) continue; + const countMap = new Map(rows.map(r => [r.value, r.count])); + + document.querySelectorAll(`.facet-count[data-facet="${facetKey}"]`).forEach(el => { + const value = el.getAttribute('data-value'); + const count = countMap.get(value) ?? 0; + el.textContent = `(${Number(count).toLocaleString()})`; + // Dim zero-count items + el.style.opacity = count === 0 ? '0.4' : '1'; + }); + } +} +``` + ```{ojs} //| code-fold: true // Build WHERE clause from current filters (Tier 2: queries full parquet only when filtering) From 68ec7ee354da88e8f6dfdc084e8d37c989b579bb Mon Sep 17 00:00:00 2001 From: Raymond Yee Date: Fri, 10 Apr 2026 08:23:21 -0700 Subject: [PATCH 2/6] Fix cross-filtering: use pre-computed cache + correct column mapping MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Add 6KB pre-computed cross-filter cache for instant single-filter lookups - Add 21MB sample_facets view with URI-string columns for on-the-fly fallback - Fix column name mismatch: wide parquet has p__* BIGINT[] columns, but facet values are URI strings — cross-filter now queries sample_facets - Main whereClause uses pid subquery against sample_facets for facet filters - Source filter still queries wide parquet directly (n column is correct) Supplementary files on data.isamples.org: - isamples_202601_facet_cross_filter.parquet (6 KB, 526 rows) - isamples_202601_sample_facets_v2.parquet (21 MB, 6M rows) Co-Authored-By: Claude Opus 4.6 --- tutorials/isamples_explorer.qmd | 131 +++++++++++++++++++++----------- 1 file changed, 88 insertions(+), 43 deletions(-) diff --git a/tutorials/isamples_explorer.qmd b/tutorials/isamples_explorer.qmd index 4c1576d..62b61b1 100644 --- a/tutorials/isamples_explorer.qmd +++ b/tutorials/isamples_explorer.qmd @@ -31,6 +31,12 @@ parquet_url = "https://data.isamples.org/isamples_202601_wide.parquet" // Pre-computed facet summaries (2KB - loads instantly) facet_summaries_url = "https://data.isamples.org/isamples_202601_facet_summaries.parquet" +// Pre-computed cross-filter cache (6KB - instant single-filter lookups) +cross_filter_url = "https://data.isamples.org/isamples_202601_facet_cross_filter.parquet" + +// Slim facets file for on-the-fly multi-filter queries (0.3MB - URI strings, not BIGINT FKs) +sample_facets_url = "https://data.isamples.org/isamples_202601_sample_facets_v2.parquet" + // Source color scheme (consistent with iSamples conventions) SOURCE_COLORS = ({ 'SESAR': '#3366CC', // Blue @@ -307,9 +313,11 @@ db = { await instance.instantiate(bundle.mainModule, bundle.pthreadWorker); URL.revokeObjectURL(worker_url); - // Create view for convenience + // Create views for convenience const conn = await instance.connect(); await conn.query(`CREATE VIEW samples AS SELECT * FROM read_parquet('${parquet_url}')`); + // Slim facets view with correct URI-string columns for cross-filtering + await conn.query(`CREATE VIEW sample_facets AS SELECT * FROM read_parquet('${sample_facets_url}')`); await conn.close(); return instance; @@ -385,27 +393,15 @@ facetsByType = { ```{ojs} //| code-fold: true // Cross-filter: build WHERE clause excluding one facet dimension -// This lets each facet show counts reflecting all OTHER active filters -function buildWhereClause(excludeFacet) { - const conditions = [ - "otype = 'MaterialSampleRecord'", - "latitude IS NOT NULL" - ]; - - if (searchInput?.trim()) { - const term = searchInput.trim().replace(/'/g, "''"); - conditions.push(`( - label ILIKE '%${term}%' - OR description ILIKE '%${term}%' - OR CAST(place_name AS VARCHAR) ILIKE '%${term}%' - )`); - } +// Queries the sample_facets view (URI strings, correct column names) +function buildCrossFilterWhere(excludeFacet) { + const conditions = []; if (excludeFacet !== 'source') { const sources = Array.from(sourceCheckboxes || []); if (sources.length > 0) { const sourceList = sources.map(s => `'${s}'`).join(", "); - conditions.push(`n IN (${sourceList})`); + conditions.push(`source IN (${sourceList})`); } } @@ -413,7 +409,7 @@ function buildWhereClause(excludeFacet) { const materials = Array.from(materialCheckboxes || []); if (materials.length > 0) { const matList = materials.map(m => `'${m.replace(/'/g, "''")}'`).join(", "); - conditions.push(`has_material_category IN (${matList})`); + conditions.push(`material IN (${matList})`); } } @@ -421,7 +417,7 @@ function buildWhereClause(excludeFacet) { const contexts = Array.from(contextCheckboxes || []); if (contexts.length > 0) { const ctxList = contexts.map(c => `'${c.replace(/'/g, "''")}'`).join(", "); - conditions.push(`has_context_category IN (${ctxList})`); + conditions.push(`context IN (${ctxList})`); } } @@ -429,11 +425,11 @@ function buildWhereClause(excludeFacet) { const objectTypes = Array.from(objectTypeCheckboxes || []); if (objectTypes.length > 0) { const otList = objectTypes.map(o => `'${o.replace(/'/g, "''")}'`).join(", "); - conditions.push(`has_specimen_category IN (${otList})`); + conditions.push(`object_type IN (${otList})`); } } - return conditions.join(" AND "); + return conditions.length > 0 ? conditions.join(" AND ") : "1=1"; } ``` @@ -452,28 +448,73 @@ hasActiveFilters = { ```{ojs} //| code-fold: true -// Cross-filtered facet counts: recompute when filters are active -// Each facet uses a WHERE clause with all filters EXCEPT its own dimension, -// so you see how many items exist for each value given other active filters +// Cross-filtered facet counts: use pre-computed cache for single-filter, +// fall back to on-the-fly queries against sample_facets for multi-filter crossFilteredFacets = { if (!hasActiveFilters) return null; // Use pre-computed summaries when no filters + // Count how many facets have active filters + const activeSources = Array.from(sourceCheckboxes || []); + const activeMaterials = Array.from(materialCheckboxes || []); + const activeContexts = Array.from(contextCheckboxes || []); + const activeObjectTypes = Array.from(objectTypeCheckboxes || []); + const hasSearch = searchInput?.trim()?.length > 0; + + const activeFilterCount = [activeSources, activeMaterials, activeContexts, activeObjectTypes] + .filter(a => a.length > 0).length; + + // Try pre-computed cache for single-filter (no text search) + if (activeFilterCount === 1 && !hasSearch) { + try { + // Build filter conditions for the cache lookup + const conditions = ["filter_source IS NULL", "filter_material IS NULL", + "filter_context IS NULL", "filter_object_type IS NULL"]; + if (activeSources.length === 1) + conditions[0] = `filter_source = '${activeSources[0]}'`; + else if (activeMaterials.length === 1) + conditions[1] = `filter_material = '${activeMaterials[0].replace(/'/g, "''")}'`; + else if (activeContexts.length === 1) + conditions[2] = `filter_context = '${activeContexts[0].replace(/'/g, "''")}'`; + else if (activeObjectTypes.length === 1) + conditions[3] = `filter_object_type = '${activeObjectTypes[0].replace(/'/g, "''")}'`; + + const sql = ` + SELECT facet_type, facet_value AS value, count + FROM read_parquet('${cross_filter_url}') + WHERE ${conditions.join(" AND ")} + `; + const rows = await runQuery(sql); + + if (rows.length > 0) { + const results = { source: [], material: [], context: [], object_type: [] }; + for (const r of rows) { + if (results[r.facet_type]) { + results[r.facet_type].push({ value: r.value, count: Number(r.count) }); + } + } + return results; + } + } catch (e) { + console.warn("Pre-computed cache miss, falling back to on-the-fly:", e); + } + } + + // Fallback: on-the-fly queries against the slim sample_facets view const facetConfig = [ - { key: 'source', column: 'n', exclude: 'source' }, - { key: 'material', column: 'has_material_category', exclude: 'material' }, - { key: 'context', column: 'has_context_category', exclude: 'context' }, - { key: 'object_type', column: 'has_specimen_category', exclude: 'object_type' }, + { key: 'source', column: 'source', exclude: 'source' }, + { key: 'material', column: 'material', exclude: 'material' }, + { key: 'context', column: 'context', exclude: 'context' }, + { key: 'object_type', column: 'object_type', exclude: 'object_type' }, ]; const results = {}; - // Run all 4 facet queries in parallel const queries = facetConfig.map(async ({ key, column, exclude }) => { - const where = buildWhereClause(exclude); + const where = buildCrossFilterWhere(exclude); const sql = ` SELECT ${column} AS value, COUNT(*) AS count - FROM samples - WHERE ${where} AND ${column} IS NOT NULL + FROM sample_facets + WHERE ${where} AND ${column} IS NOT NULL AND ${column} != '' GROUP BY ${column} ORDER BY count DESC `; @@ -482,7 +523,7 @@ crossFilteredFacets = { results[key] = rows.map(r => ({ value: r.value, count: r.count })); } catch (e) { console.warn(`Cross-filter query failed for ${key}:`, e); - results[key] = null; // Fall back to pre-computed + results[key] = null; } }); @@ -534,13 +575,16 @@ function getDisplayCounts(facetKey) { ```{ojs} //| code-fold: true // Build WHERE clause from current filters (Tier 2: queries full parquet only when filtering) +// Source filter uses the wide parquet's `n` column directly. +// Material/context/object_type filters use the sample_facets view (URI strings) +// via a subquery, since the wide parquet stores these as BIGINT foreign keys. whereClause = { const conditions = [ "otype = 'MaterialSampleRecord'", "latitude IS NOT NULL" ]; - // Text search + // Text search (against wide parquet — has label, description, place_name) if (searchInput?.trim()) { const term = searchInput.trim().replace(/'/g, "''"); conditions.push(`( @@ -550,32 +594,33 @@ whereClause = { )`); } - // Source filter + // Source filter (n column exists in wide parquet) const sources = Array.from(sourceCheckboxes || []); if (sources.length > 0) { const sourceList = sources.map(s => `'${s}'`).join(", "); conditions.push(`n IN (${sourceList})`); } - // Material filter + // Facet filters: build a subquery against sample_facets to get matching PIDs + const facetConditions = []; const materials = Array.from(materialCheckboxes || []); if (materials.length > 0) { const matList = materials.map(m => `'${m.replace(/'/g, "''")}'`).join(", "); - conditions.push(`has_material_category IN (${matList})`); + facetConditions.push(`material IN (${matList})`); } - - // Context (sampled feature) filter const contexts = Array.from(contextCheckboxes || []); if (contexts.length > 0) { const ctxList = contexts.map(c => `'${c.replace(/'/g, "''")}'`).join(", "); - conditions.push(`has_context_category IN (${ctxList})`); + facetConditions.push(`context IN (${ctxList})`); } - - // Object type (specimen type) filter const objectTypes = Array.from(objectTypeCheckboxes || []); if (objectTypes.length > 0) { const otList = objectTypes.map(o => `'${o.replace(/'/g, "''")}'`).join(", "); - conditions.push(`has_specimen_category IN (${otList})`); + facetConditions.push(`object_type IN (${otList})`); + } + + if (facetConditions.length > 0) { + conditions.push(`pid IN (SELECT pid FROM sample_facets WHERE ${facetConditions.join(" AND ")})`); } return conditions.join(" AND "); From 53fd20114b6c210d7616dc7156e514f31d27fb5a Mon Sep 17 00:00:00 2001 From: Raymond Yee Date: Fri, 10 Apr 2026 08:31:30 -0700 Subject: [PATCH 3/6] Fix three cross-filter bugs 1. Multi-value within single facet: fast path now requires exactly one value in the active facet, not just one active dimension. Multiple selections (e.g., SESAR+GEOME) correctly fall through to on-the-fly queries. 2. Text search participates in cross-filtering: buildCrossFilterWhere now includes ILIKE conditions. sample_facets_v2 regenerated with label, description, place_name columns (63 MB on R2). 3. Clearing filters restores baseline counts: the update cell now resets all facet-count labels to baseline values and removes zero-count dimming when crossFilteredFacets is null. Co-Authored-By: Claude Opus 4.6 --- tutorials/isamples_explorer.qmd | 39 ++++++++++++++++++++++++++++----- 1 file changed, 33 insertions(+), 6 deletions(-) diff --git a/tutorials/isamples_explorer.qmd b/tutorials/isamples_explorer.qmd index 62b61b1..be092ea 100644 --- a/tutorials/isamples_explorer.qmd +++ b/tutorials/isamples_explorer.qmd @@ -397,6 +397,16 @@ facetsByType = { function buildCrossFilterWhere(excludeFacet) { const conditions = []; + // Text search participates in cross-filtering + if (searchInput?.trim()) { + const term = searchInput.trim().replace(/'/g, "''"); + conditions.push(`( + label ILIKE '%${term}%' + OR description ILIKE '%${term}%' + OR CAST(place_name AS VARCHAR) ILIKE '%${term}%' + )`); + } + if (excludeFacet !== 'source') { const sources = Array.from(sourceCheckboxes || []); if (sources.length > 0) { @@ -463,10 +473,15 @@ crossFilteredFacets = { const activeFilterCount = [activeSources, activeMaterials, activeContexts, activeObjectTypes] .filter(a => a.length > 0).length; - // Try pre-computed cache for single-filter (no text search) - if (activeFilterCount === 1 && !hasSearch) { + // Try pre-computed cache: exactly one facet active, exactly one value, no text search + const singleValueFacet = ( + !hasSearch && activeFilterCount === 1 && + [activeSources, activeMaterials, activeContexts, activeObjectTypes] + .every(a => a.length <= 1) + ); + + if (singleValueFacet) { try { - // Build filter conditions for the cache lookup const conditions = ["filter_source IS NULL", "filter_material IS NULL", "filter_context IS NULL", "filter_object_type IS NULL"]; if (activeSources.length === 1) @@ -552,10 +567,23 @@ function getDisplayCounts(facetKey) { ```{ojs} //| code-fold: true -// Update facet count labels in-place when cross-filtered counts arrive +// Update facet count labels in-place when cross-filtered counts change // This avoids re-rendering checkboxes (which would reset user selections) { - if (!crossFilteredFacets) return; // No active filters — keep pre-computed counts + if (!crossFilteredFacets) { + // No active filters — restore baseline counts and remove dimming + for (const facetKey of ['source', 'material', 'context', 'object_type']) { + const baseline = facetsByType[facetKey] || []; + const countMap = new Map(baseline.map(r => [r.value, r.count])); + document.querySelectorAll(`.facet-count[data-facet="${facetKey}"]`).forEach(el => { + const value = el.getAttribute('data-value'); + const count = countMap.get(value) ?? 0; + el.textContent = `(${Number(count).toLocaleString()})`; + el.style.opacity = '1'; + }); + } + return; + } for (const [facetKey, rows] of Object.entries(crossFilteredFacets)) { if (!rows) continue; @@ -565,7 +593,6 @@ function getDisplayCounts(facetKey) { const value = el.getAttribute('data-value'); const count = countMap.get(value) ?? 0; el.textContent = `(${Number(count).toLocaleString()})`; - // Dim zero-count items el.style.opacity = count === 0 ? '0.4' : '1'; }); } From b95cbad6214de38877a48c69a1fe434d8105dff2 Mon Sep 17 00:00:00 2001 From: Raymond Yee Date: Fri, 10 Apr 2026 08:42:02 -0700 Subject: [PATCH 4/6] Fix count universe inconsistency and blank-value mismatch MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Codex review found two bugs: 1. facet_summaries counted all 6.68M records but sample_facets only had the 5.98M with coordinates — counts jumped when toggling filters. Regenerated all three parquet files from the same base universe (lat IS NOT NULL). SESAR now consistently 4,389,231 across all files. 2. Baseline summaries included blank-string facet values, but on-the-fly queries excluded them with != ''. Regenerated summaries now exclude blanks, matching the on-the-fly behavior. Also: removed dead getDisplayCounts(), fixed stale 0.3MB comment, added missing quote escaping on source cache lookup. Co-Authored-By: Claude Opus 4.6 --- tutorials/isamples_explorer.qmd | 22 ++-------------------- 1 file changed, 2 insertions(+), 20 deletions(-) diff --git a/tutorials/isamples_explorer.qmd b/tutorials/isamples_explorer.qmd index be092ea..de3982d 100644 --- a/tutorials/isamples_explorer.qmd +++ b/tutorials/isamples_explorer.qmd @@ -34,7 +34,7 @@ facet_summaries_url = "https://data.isamples.org/isamples_202601_facet_summaries // Pre-computed cross-filter cache (6KB - instant single-filter lookups) cross_filter_url = "https://data.isamples.org/isamples_202601_facet_cross_filter.parquet" -// Slim facets file for on-the-fly multi-filter queries (0.3MB - URI strings, not BIGINT FKs) +// Facets file for on-the-fly multi-filter queries (63MB - URI strings, not BIGINT FKs) sample_facets_url = "https://data.isamples.org/isamples_202601_sample_facets_v2.parquet" // Source color scheme (consistent with iSamples conventions) @@ -485,7 +485,7 @@ crossFilteredFacets = { const conditions = ["filter_source IS NULL", "filter_material IS NULL", "filter_context IS NULL", "filter_object_type IS NULL"]; if (activeSources.length === 1) - conditions[0] = `filter_source = '${activeSources[0]}'`; + conditions[0] = `filter_source = '${activeSources[0].replace(/'/g, "''")}'`; else if (activeMaterials.length === 1) conditions[1] = `filter_material = '${activeMaterials[0].replace(/'/g, "''")}'`; else if (activeContexts.length === 1) @@ -547,24 +547,6 @@ crossFilteredFacets = { } ``` -```{ojs} -//| code-fold: true -// Merge cross-filtered counts with baseline facets -// Baseline provides the full list of values; cross-filter overrides counts -function getDisplayCounts(facetKey) { - const baseline = facetsByType[facetKey] || []; - if (!crossFilteredFacets || !crossFilteredFacets[facetKey]) return baseline; - - const filtered = crossFilteredFacets[facetKey]; - const countMap = new Map(filtered.map(r => [r.value, r.count])); - - return baseline.map(item => ({ - ...item, - count: countMap.has(item.value) ? countMap.get(item.value) : 0, - })); -} -``` - ```{ojs} //| code-fold: true // Update facet count labels in-place when cross-filtered counts change From b0f70f65ffd09e70ee66ff35b5bfc2f4a2d0ddf0 Mon Sep 17 00:00:00 2001 From: Raymond Yee Date: Fri, 10 Apr 2026 08:47:09 -0700 Subject: [PATCH 5/6] Add cross-filtering interaction tests 5 new tests in TestExplorerCrossFiltering: - Baseline SESAR count matches summaries (>4M) - Clicking source updates material counts (organicmaterial decreases) - Clearing filter restores baseline counts - Zero-count items get dimmed (opacity < 1) - New parquet endpoints (cross_filter, sample_facets_v2) reachable Cross-filter tests gracefully skip if data attributes not yet deployed. Co-Authored-By: Claude Opus 4.6 --- tests/test_explorer.py | 103 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 103 insertions(+) diff --git a/tests/test_explorer.py b/tests/test_explorer.py index be3847e..5709b83 100644 --- a/tests/test_explorer.py +++ b/tests/test_explorer.py @@ -71,6 +71,109 @@ def test_four_sources_present(self, explorer_page): assert explorer_page.get_by_text(source).count() > 0, f"Missing source: {source}" +class TestExplorerCrossFiltering: + """Cross-filtering: clicking a facet should update counts in other facets.""" + + def _wait_for_facets(self, page): + """Wait for facet count labels to render (requires cross-filter PR).""" + facet = page.locator(".facet-count[data-facet='source']") + # These data attributes only exist after the cross-filtering code is deployed + try: + facet.first.wait_for(state="attached", timeout=30000) + except Exception: + pytest.skip("Cross-filter data attributes not yet deployed") + + def _get_count(self, page, facet, value): + """Extract the numeric count from a facet-count label.""" + el = page.locator(f".facet-count[data-facet='{facet}'][data-value='{value}']") + if el.count() == 0: + return None + text = el.first.text_content() # e.g. "(4,389,231)" + return int(text.strip("() ").replace(",", "")) + + def _click_checkbox(self, page, label): + """Click a checkbox by its visible label text.""" + page.get_by_text(label, exact=True).first.click() + + def test_baseline_sesar_count_matches_summaries(self, explorer_page): + """Before any interaction, SESAR count should match the facet summary.""" + self._wait_for_facets(explorer_page) + count = self._get_count(explorer_page, "source", "SESAR") + assert count is not None, "SESAR facet-count element not found" + assert count > 4_000_000, f"SESAR baseline count too low: {count}" + + def test_clicking_source_updates_material_counts(self, explorer_page): + """Checking SESAR should lower material counts (no archaeology materials).""" + self._wait_for_facets(explorer_page) + # Record a material count before filtering + before = self._get_count(explorer_page, "material", + "https://w3id.org/isample/vocabulary/material/1.0/organicmaterial") + assert before is not None, "organicmaterial facet-count not found" + + # Click SESAR checkbox + self._click_checkbox(explorer_page, "SESAR") + + # Wait for cross-filter update (labels update in-place via DOM mutation) + explorer_page.wait_for_timeout(5000) + + after = self._get_count(explorer_page, "material", + "https://w3id.org/isample/vocabulary/material/1.0/organicmaterial") + assert after is not None + assert after < before, ( + f"organicmaterial count should decrease with SESAR filter: {before} -> {after}" + ) + + def test_clearing_filter_restores_baseline(self, explorer_page): + """Unchecking a source should restore baseline counts.""" + self._wait_for_facets(explorer_page) + baseline = self._get_count(explorer_page, "material", + "https://w3id.org/isample/vocabulary/material/1.0/earthmaterial") + + # Activate then deactivate SESAR + self._click_checkbox(explorer_page, "SESAR") + explorer_page.wait_for_timeout(5000) + filtered = self._get_count(explorer_page, "material", + "https://w3id.org/isample/vocabulary/material/1.0/earthmaterial") + + self._click_checkbox(explorer_page, "SESAR") + explorer_page.wait_for_timeout(5000) + restored = self._get_count(explorer_page, "material", + "https://w3id.org/isample/vocabulary/material/1.0/earthmaterial") + + assert filtered != baseline, "Filter should have changed the count" + assert restored == baseline, ( + f"Count should restore to baseline after clearing: {baseline} -> {restored}" + ) + + def test_zero_count_items_are_dimmed(self, explorer_page): + """Facet values with 0 matches should have reduced opacity.""" + self._wait_for_facets(explorer_page) + + # SMITHSONIAN is smallest source — filtering to it should zero some facets + self._click_checkbox(explorer_page, "SMITHSONIAN") + explorer_page.wait_for_timeout(5000) + + # Find any facet-count with "(0)" and check opacity + zero_counts = explorer_page.locator(".facet-count").filter(has_text="(0)") + if zero_counts.count() > 0: + opacity = zero_counts.first.evaluate("el => getComputedStyle(el).opacity") + assert float(opacity) < 1.0, "Zero-count items should be dimmed" + + def test_new_parquet_endpoints_reachable(self, explorer_page): + """The cross-filter and sample_facets parquet files should be accessible.""" + import subprocess + for url in [ + "https://data.isamples.org/isamples_202601_facet_cross_filter.parquet", + "https://data.isamples.org/isamples_202601_sample_facets_v2.parquet", + ]: + result = subprocess.run( + ["curl", "-s", "-o", "/dev/null", "-w", "%{http_code}", "--head", url], + capture_output=True, text=True + ) + code = result.stdout.strip() + assert code in ("200", "206"), f"{url} returned {code}" + + class TestExplorerSampleCard: """Sample Card section should exist.""" From 051bca9d9b4b057e2b11e6092a92f626af186056 Mon Sep 17 00:00:00 2001 From: Raymond Yee Date: Fri, 10 Apr 2026 09:30:09 -0700 Subject: [PATCH 6/6] Clean up blank-string facet values in sample_facets MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Convert blank strings to NULL with NULLIF in sample_facets_v2 generation (586 blank context rows → NULL). Remove redundant != '' guards from on-the-fly queries since IS NOT NULL now handles both. Addresses Codex finding #2: blank values in sample_facets caused state mismatch with baseline summaries (which correctly excluded blanks). Finding #1 (count universe mismatch) was a false positive — Codex cached stale files; live CDN has consistent counts across all three artifacts (SESAR=4,389,231, total=5,980,282). Co-Authored-By: Claude Opus 4.6 --- tutorials/isamples_explorer.qmd | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tutorials/isamples_explorer.qmd b/tutorials/isamples_explorer.qmd index de3982d..2c9141a 100644 --- a/tutorials/isamples_explorer.qmd +++ b/tutorials/isamples_explorer.qmd @@ -529,7 +529,7 @@ crossFilteredFacets = { const sql = ` SELECT ${column} AS value, COUNT(*) AS count FROM sample_facets - WHERE ${where} AND ${column} IS NOT NULL AND ${column} != '' + WHERE ${where} AND ${column} IS NOT NULL GROUP BY ${column} ORDER BY count DESC `;