diff --git a/tests/test_explorer.py b/tests/test_explorer.py
index be3847e..5709b83 100644
--- a/tests/test_explorer.py
+++ b/tests/test_explorer.py
@@ -71,6 +71,109 @@ def test_four_sources_present(self, explorer_page):
assert explorer_page.get_by_text(source).count() > 0, f"Missing source: {source}"
+class TestExplorerCrossFiltering:
+ """Cross-filtering: clicking a facet should update counts in other facets."""
+
+ def _wait_for_facets(self, page):
+ """Wait for facet count labels to render (requires cross-filter PR)."""
+ facet = page.locator(".facet-count[data-facet='source']")
+ # These data attributes only exist after the cross-filtering code is deployed
+ try:
+ facet.first.wait_for(state="attached", timeout=30000)
+ except Exception:
+ pytest.skip("Cross-filter data attributes not yet deployed")
+
+ def _get_count(self, page, facet, value):
+ """Extract the numeric count from a facet-count label."""
+ el = page.locator(f".facet-count[data-facet='{facet}'][data-value='{value}']")
+ if el.count() == 0:
+ return None
+ text = el.first.text_content() # e.g. "(4,389,231)"
+ return int(text.strip("() ").replace(",", ""))
+
+ def _click_checkbox(self, page, label):
+ """Click a checkbox by its visible label text."""
+ page.get_by_text(label, exact=True).first.click()
+
+ def test_baseline_sesar_count_matches_summaries(self, explorer_page):
+ """Before any interaction, SESAR count should match the facet summary."""
+ self._wait_for_facets(explorer_page)
+ count = self._get_count(explorer_page, "source", "SESAR")
+ assert count is not None, "SESAR facet-count element not found"
+ assert count > 4_000_000, f"SESAR baseline count too low: {count}"
+
+ def test_clicking_source_updates_material_counts(self, explorer_page):
+ """Checking SESAR should lower material counts (no archaeology materials)."""
+ self._wait_for_facets(explorer_page)
+ # Record a material count before filtering
+ before = self._get_count(explorer_page, "material",
+ "https://w3id.org/isample/vocabulary/material/1.0/organicmaterial")
+ assert before is not None, "organicmaterial facet-count not found"
+
+ # Click SESAR checkbox
+ self._click_checkbox(explorer_page, "SESAR")
+
+ # Wait for cross-filter update (labels update in-place via DOM mutation)
+ explorer_page.wait_for_timeout(5000)
+
+ after = self._get_count(explorer_page, "material",
+ "https://w3id.org/isample/vocabulary/material/1.0/organicmaterial")
+ assert after is not None
+ assert after < before, (
+ f"organicmaterial count should decrease with SESAR filter: {before} -> {after}"
+ )
+
+ def test_clearing_filter_restores_baseline(self, explorer_page):
+ """Unchecking a source should restore baseline counts."""
+ self._wait_for_facets(explorer_page)
+ baseline = self._get_count(explorer_page, "material",
+ "https://w3id.org/isample/vocabulary/material/1.0/earthmaterial")
+
+ # Activate then deactivate SESAR
+ self._click_checkbox(explorer_page, "SESAR")
+ explorer_page.wait_for_timeout(5000)
+ filtered = self._get_count(explorer_page, "material",
+ "https://w3id.org/isample/vocabulary/material/1.0/earthmaterial")
+
+ self._click_checkbox(explorer_page, "SESAR")
+ explorer_page.wait_for_timeout(5000)
+ restored = self._get_count(explorer_page, "material",
+ "https://w3id.org/isample/vocabulary/material/1.0/earthmaterial")
+
+ assert filtered != baseline, "Filter should have changed the count"
+ assert restored == baseline, (
+ f"Count should restore to baseline after clearing: {baseline} -> {restored}"
+ )
+
+ def test_zero_count_items_are_dimmed(self, explorer_page):
+ """Facet values with 0 matches should have reduced opacity."""
+ self._wait_for_facets(explorer_page)
+
+ # SMITHSONIAN is smallest source — filtering to it should zero some facets
+ self._click_checkbox(explorer_page, "SMITHSONIAN")
+ explorer_page.wait_for_timeout(5000)
+
+ # Find any facet-count with "(0)" and check opacity
+ zero_counts = explorer_page.locator(".facet-count").filter(has_text="(0)")
+ if zero_counts.count() > 0:
+ opacity = zero_counts.first.evaluate("el => getComputedStyle(el).opacity")
+ assert float(opacity) < 1.0, "Zero-count items should be dimmed"
+
+ def test_new_parquet_endpoints_reachable(self, explorer_page):
+ """The cross-filter and sample_facets parquet files should be accessible."""
+ import subprocess
+ for url in [
+ "https://data.isamples.org/isamples_202601_facet_cross_filter.parquet",
+ "https://data.isamples.org/isamples_202601_sample_facets_v2.parquet",
+ ]:
+ result = subprocess.run(
+ ["curl", "-s", "-o", "/dev/null", "-w", "%{http_code}", "--head", url],
+ capture_output=True, text=True
+ )
+ code = result.stdout.strip()
+ assert code in ("200", "206"), f"{url} returned {code}"
+
+
class TestExplorerSampleCard:
"""Sample Card section should exist."""
diff --git a/tutorials/isamples_explorer.qmd b/tutorials/isamples_explorer.qmd
index 402b8a4..2c9141a 100644
--- a/tutorials/isamples_explorer.qmd
+++ b/tutorials/isamples_explorer.qmd
@@ -12,7 +12,7 @@ Search and explore **6.7 million physical samples** from scientific collections
::: {.callout-note}
### Serverless Architecture
-This app uses a **two-tier loading strategy**: a 2KB pre-computed summary loads instantly for facet counts (source, material, context, specimen type), while the full ~280 MB Parquet file is only queried when drilling into records. All powered by DuckDB-WASM in your browser -- no server required!
+This app uses a **two-tier loading strategy**: a 2KB pre-computed summary loads instantly for facet counts, while the full ~280 MB Parquet file is queried on demand. **Cross-filtering** keeps counts accurate — selecting a source updates material/context/specimen counts to reflect only that source's samples. All powered by DuckDB-WASM in your browser — no server required!
:::
## Setup
@@ -31,6 +31,12 @@ parquet_url = "https://data.isamples.org/isamples_202601_wide.parquet"
// Pre-computed facet summaries (2KB - loads instantly)
facet_summaries_url = "https://data.isamples.org/isamples_202601_facet_summaries.parquet"
+// Pre-computed cross-filter cache (6KB - instant single-filter lookups)
+cross_filter_url = "https://data.isamples.org/isamples_202601_facet_cross_filter.parquet"
+
+// Facets file for on-the-fly multi-filter queries (63MB - URI strings, not BIGINT FKs)
+sample_facets_url = "https://data.isamples.org/isamples_202601_sample_facets_v2.parquet"
+
// Source color scheme (consistent with iSamples conventions)
SOURCE_COLORS = ({
'SESAR': '#3366CC', // Blue
@@ -92,7 +98,6 @@ facetSummariesWarning
//| code-fold: true
// Source checkboxes with counts - uses pre-computed summaries for instant load
viewof sourceCheckboxes = {
- // Use pre-computed facet summaries (instant) instead of scanning full parquet
const counts = facetsByType.source;
const options = counts.map(r => r.value);
@@ -104,7 +109,7 @@ viewof sourceCheckboxes = {
const count = r ? Number(r.count).toLocaleString() : "0";
return html`
- ${x} (${count})
+ ${x} (${count})
`;
}
});
@@ -125,7 +130,7 @@ viewof materialCheckboxes = {
const r = counts.find(s => s.value === x);
const count = r ? Number(r.count).toLocaleString() : "0";
return html`
- ${x} (${count})
+ ${x} (${count})
`;
}
});
@@ -146,7 +151,7 @@ viewof contextCheckboxes = {
const r = counts.find(s => s.value === x);
const count = r ? Number(r.count).toLocaleString() : "0";
return html`
- ${x} (${count})
+ ${x} (${count})
`;
}
});
@@ -167,7 +172,7 @@ viewof objectTypeCheckboxes = {
const r = counts.find(s => s.value === x);
const count = r ? Number(r.count).toLocaleString() : "0";
return html`
- ${x} (${count})
+ ${x} (${count})
`;
}
});
@@ -308,9 +313,11 @@ db = {
await instance.instantiate(bundle.mainModule, bundle.pthreadWorker);
URL.revokeObjectURL(worker_url);
- // Create view for convenience
+ // Create views for convenience
const conn = await instance.connect();
await conn.query(`CREATE VIEW samples AS SELECT * FROM read_parquet('${parquet_url}')`);
+ // Slim facets view with correct URI-string columns for cross-filtering
+ await conn.query(`CREATE VIEW sample_facets AS SELECT * FROM read_parquet('${sample_facets_url}')`);
await conn.close();
return instance;
@@ -366,7 +373,7 @@ facetSummariesWarning = {
`;
}
-// Extract facet counts by type from pre-computed summaries
+// Extract facet counts by type from pre-computed summaries (baseline)
facetsByType = {
const grouped = { source: [], material: [], context: [], object_type: [] };
for (const row of facetSummaries) {
@@ -383,16 +390,210 @@ facetsByType = {
}
```
+```{ojs}
+//| code-fold: true
+// Cross-filter: build WHERE clause excluding one facet dimension
+// Queries the sample_facets view (URI strings, correct column names)
+function buildCrossFilterWhere(excludeFacet) {
+ const conditions = [];
+
+ // Text search participates in cross-filtering
+ if (searchInput?.trim()) {
+ const term = searchInput.trim().replace(/'/g, "''");
+ conditions.push(`(
+ label ILIKE '%${term}%'
+ OR description ILIKE '%${term}%'
+ OR CAST(place_name AS VARCHAR) ILIKE '%${term}%'
+ )`);
+ }
+
+ if (excludeFacet !== 'source') {
+ const sources = Array.from(sourceCheckboxes || []);
+ if (sources.length > 0) {
+ const sourceList = sources.map(s => `'${s}'`).join(", ");
+ conditions.push(`source IN (${sourceList})`);
+ }
+ }
+
+ if (excludeFacet !== 'material') {
+ const materials = Array.from(materialCheckboxes || []);
+ if (materials.length > 0) {
+ const matList = materials.map(m => `'${m.replace(/'/g, "''")}'`).join(", ");
+ conditions.push(`material IN (${matList})`);
+ }
+ }
+
+ if (excludeFacet !== 'context') {
+ const contexts = Array.from(contextCheckboxes || []);
+ if (contexts.length > 0) {
+ const ctxList = contexts.map(c => `'${c.replace(/'/g, "''")}'`).join(", ");
+ conditions.push(`context IN (${ctxList})`);
+ }
+ }
+
+ if (excludeFacet !== 'object_type') {
+ const objectTypes = Array.from(objectTypeCheckboxes || []);
+ if (objectTypes.length > 0) {
+ const otList = objectTypes.map(o => `'${o.replace(/'/g, "''")}'`).join(", ");
+ conditions.push(`object_type IN (${otList})`);
+ }
+ }
+
+ return conditions.length > 0 ? conditions.join(" AND ") : "1=1";
+}
+```
+
+```{ojs}
+//| code-fold: true
+// Detect whether any filter is active (triggers cross-filter queries)
+hasActiveFilters = {
+ const hasSearch = searchInput?.trim()?.length > 0;
+ const hasSources = (sourceCheckboxes || []).length > 0;
+ const hasMaterials = (materialCheckboxes || []).length > 0;
+ const hasContexts = (contextCheckboxes || []).length > 0;
+ const hasObjectTypes = (objectTypeCheckboxes || []).length > 0;
+ return hasSearch || hasSources || hasMaterials || hasContexts || hasObjectTypes;
+}
+```
+
+```{ojs}
+//| code-fold: true
+// Cross-filtered facet counts: use pre-computed cache for single-filter,
+// fall back to on-the-fly queries against sample_facets for multi-filter
+crossFilteredFacets = {
+ if (!hasActiveFilters) return null; // Use pre-computed summaries when no filters
+
+ // Count how many facets have active filters
+ const activeSources = Array.from(sourceCheckboxes || []);
+ const activeMaterials = Array.from(materialCheckboxes || []);
+ const activeContexts = Array.from(contextCheckboxes || []);
+ const activeObjectTypes = Array.from(objectTypeCheckboxes || []);
+ const hasSearch = searchInput?.trim()?.length > 0;
+
+ const activeFilterCount = [activeSources, activeMaterials, activeContexts, activeObjectTypes]
+ .filter(a => a.length > 0).length;
+
+ // Try pre-computed cache: exactly one facet active, exactly one value, no text search
+ const singleValueFacet = (
+ !hasSearch && activeFilterCount === 1 &&
+ [activeSources, activeMaterials, activeContexts, activeObjectTypes]
+ .every(a => a.length <= 1)
+ );
+
+ if (singleValueFacet) {
+ try {
+ const conditions = ["filter_source IS NULL", "filter_material IS NULL",
+ "filter_context IS NULL", "filter_object_type IS NULL"];
+ if (activeSources.length === 1)
+ conditions[0] = `filter_source = '${activeSources[0].replace(/'/g, "''")}'`;
+ else if (activeMaterials.length === 1)
+ conditions[1] = `filter_material = '${activeMaterials[0].replace(/'/g, "''")}'`;
+ else if (activeContexts.length === 1)
+ conditions[2] = `filter_context = '${activeContexts[0].replace(/'/g, "''")}'`;
+ else if (activeObjectTypes.length === 1)
+ conditions[3] = `filter_object_type = '${activeObjectTypes[0].replace(/'/g, "''")}'`;
+
+ const sql = `
+ SELECT facet_type, facet_value AS value, count
+ FROM read_parquet('${cross_filter_url}')
+ WHERE ${conditions.join(" AND ")}
+ `;
+ const rows = await runQuery(sql);
+
+ if (rows.length > 0) {
+ const results = { source: [], material: [], context: [], object_type: [] };
+ for (const r of rows) {
+ if (results[r.facet_type]) {
+ results[r.facet_type].push({ value: r.value, count: Number(r.count) });
+ }
+ }
+ return results;
+ }
+ } catch (e) {
+ console.warn("Pre-computed cache miss, falling back to on-the-fly:", e);
+ }
+ }
+
+ // Fallback: on-the-fly queries against the slim sample_facets view
+ const facetConfig = [
+ { key: 'source', column: 'source', exclude: 'source' },
+ { key: 'material', column: 'material', exclude: 'material' },
+ { key: 'context', column: 'context', exclude: 'context' },
+ { key: 'object_type', column: 'object_type', exclude: 'object_type' },
+ ];
+
+ const results = {};
+
+ const queries = facetConfig.map(async ({ key, column, exclude }) => {
+ const where = buildCrossFilterWhere(exclude);
+ const sql = `
+ SELECT ${column} AS value, COUNT(*) AS count
+ FROM sample_facets
+ WHERE ${where} AND ${column} IS NOT NULL
+ GROUP BY ${column}
+ ORDER BY count DESC
+ `;
+ try {
+ const rows = await runQuery(sql);
+ results[key] = rows.map(r => ({ value: r.value, count: r.count }));
+ } catch (e) {
+ console.warn(`Cross-filter query failed for ${key}:`, e);
+ results[key] = null;
+ }
+ });
+
+ await Promise.all(queries);
+ return results;
+}
+```
+
+```{ojs}
+//| code-fold: true
+// Update facet count labels in-place when cross-filtered counts change
+// This avoids re-rendering checkboxes (which would reset user selections)
+{
+ if (!crossFilteredFacets) {
+ // No active filters — restore baseline counts and remove dimming
+ for (const facetKey of ['source', 'material', 'context', 'object_type']) {
+ const baseline = facetsByType[facetKey] || [];
+ const countMap = new Map(baseline.map(r => [r.value, r.count]));
+ document.querySelectorAll(`.facet-count[data-facet="${facetKey}"]`).forEach(el => {
+ const value = el.getAttribute('data-value');
+ const count = countMap.get(value) ?? 0;
+ el.textContent = `(${Number(count).toLocaleString()})`;
+ el.style.opacity = '1';
+ });
+ }
+ return;
+ }
+
+ for (const [facetKey, rows] of Object.entries(crossFilteredFacets)) {
+ if (!rows) continue;
+ const countMap = new Map(rows.map(r => [r.value, r.count]));
+
+ document.querySelectorAll(`.facet-count[data-facet="${facetKey}"]`).forEach(el => {
+ const value = el.getAttribute('data-value');
+ const count = countMap.get(value) ?? 0;
+ el.textContent = `(${Number(count).toLocaleString()})`;
+ el.style.opacity = count === 0 ? '0.4' : '1';
+ });
+ }
+}
+```
+
```{ojs}
//| code-fold: true
// Build WHERE clause from current filters (Tier 2: queries full parquet only when filtering)
+// Source filter uses the wide parquet's `n` column directly.
+// Material/context/object_type filters use the sample_facets view (URI strings)
+// via a subquery, since the wide parquet stores these as BIGINT foreign keys.
whereClause = {
const conditions = [
"otype = 'MaterialSampleRecord'",
"latitude IS NOT NULL"
];
- // Text search
+ // Text search (against wide parquet — has label, description, place_name)
if (searchInput?.trim()) {
const term = searchInput.trim().replace(/'/g, "''");
conditions.push(`(
@@ -402,32 +603,33 @@ whereClause = {
)`);
}
- // Source filter
+ // Source filter (n column exists in wide parquet)
const sources = Array.from(sourceCheckboxes || []);
if (sources.length > 0) {
const sourceList = sources.map(s => `'${s}'`).join(", ");
conditions.push(`n IN (${sourceList})`);
}
- // Material filter
+ // Facet filters: build a subquery against sample_facets to get matching PIDs
+ const facetConditions = [];
const materials = Array.from(materialCheckboxes || []);
if (materials.length > 0) {
const matList = materials.map(m => `'${m.replace(/'/g, "''")}'`).join(", ");
- conditions.push(`has_material_category IN (${matList})`);
+ facetConditions.push(`material IN (${matList})`);
}
-
- // Context (sampled feature) filter
const contexts = Array.from(contextCheckboxes || []);
if (contexts.length > 0) {
const ctxList = contexts.map(c => `'${c.replace(/'/g, "''")}'`).join(", ");
- conditions.push(`has_context_category IN (${ctxList})`);
+ facetConditions.push(`context IN (${ctxList})`);
}
-
- // Object type (specimen type) filter
const objectTypes = Array.from(objectTypeCheckboxes || []);
if (objectTypes.length > 0) {
const otList = objectTypes.map(o => `'${o.replace(/'/g, "''")}'`).join(", ");
- conditions.push(`has_specimen_category IN (${otList})`);
+ facetConditions.push(`object_type IN (${otList})`);
+ }
+
+ if (facetConditions.length > 0) {
+ conditions.push(`pid IN (SELECT pid FROM sample_facets WHERE ${facetConditions.join(" AND ")})`);
}
return conditions.join(" AND ");