From 5c2d9f6519d0928c5afbf7615f36ccb344662090 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Pierzcha=C5=82a?= Date: Thu, 21 May 2026 20:14:59 +0200 Subject: [PATCH 1/4] fix: improve Android snapshot fidelity --- .../__tests__/screenshot-overlay.test.ts | 151 +++++++ src/daemon/screenshot-overlay-android.ts | 93 +++++ src/daemon/screenshot-overlay-rects.ts | 38 ++ src/daemon/screenshot-overlay.ts | 195 +++++---- src/utils/__tests__/output.test.ts | 320 +++++++++++++- .../android-helper-presentation/geometry.ts | 23 ++ .../android-helper-presentation/labels.ts | 35 ++ .../android-helper-presentation/predicates.ts | 16 + .../structural-noise.ts | 265 ++++++++++++ src/utils/android-helper-presentation/tree.ts | 47 +++ .../android-helper-snapshot-presentation.ts | 389 +++--------------- src/utils/output.ts | 49 ++- src/utils/snapshot-processing.ts | 10 +- 13 files changed, 1184 insertions(+), 447 deletions(-) create mode 100644 src/daemon/screenshot-overlay-android.ts create mode 100644 src/daemon/screenshot-overlay-rects.ts create mode 100644 src/utils/android-helper-presentation/geometry.ts create mode 100644 src/utils/android-helper-presentation/labels.ts create mode 100644 src/utils/android-helper-presentation/predicates.ts create mode 100644 src/utils/android-helper-presentation/structural-noise.ts create mode 100644 src/utils/android-helper-presentation/tree.ts diff --git a/src/daemon/__tests__/screenshot-overlay.test.ts b/src/daemon/__tests__/screenshot-overlay.test.ts index b78455b1e..08ff9974b 100644 --- a/src/daemon/__tests__/screenshot-overlay.test.ts +++ b/src/daemon/__tests__/screenshot-overlay.test.ts @@ -258,6 +258,157 @@ test('buildScreenshotOverlayRefs prefers descendant text over generic android re ]); }); +test('buildScreenshotOverlayRefs keeps Android pixel rects aligned with screenshots', () => { + const snapshot = makeSnapshotState( + [ + { + index: 0, + type: 'android.widget.ScrollView', + rect: { x: 0, y: 0, width: 1344, height: 2920 }, + }, + { + index: 1, + parentIndex: 0, + type: 'android.widget.LinearLayout', + hittable: true, + rect: { x: 0, y: 2697, width: 1344, height: 223 }, + }, + { + index: 2, + parentIndex: 1, + type: 'android.widget.TextView', + label: 'Storage', + rect: { x: 240, y: 2745, width: 205, height: 81 }, + }, + ], + { backend: 'android' }, + ); + + const overlayRefs = buildScreenshotOverlayRefs(snapshot, 1344, 2992); + + assert.deepEqual(overlayRefs, [ + { + ref: 'e2', + label: 'Storage', + rect: { x: 0, y: 2697, width: 1344, height: 223 }, + overlayRect: { x: 0, y: 2697, width: 1344, height: 223 }, + center: { x: 672, y: 2809 }, + }, + ]); +}); + +test('buildScreenshotOverlayRefs includes unlabeled Android bottom tab controls', () => { + const snapshot = makeSnapshotState( + [ + { + index: 0, + type: 'android.widget.FrameLayout', + rect: { x: 0, y: 0, width: 1344, height: 2992 }, + }, + { + index: 1, + parentIndex: 0, + type: 'android.widget.ScrollView', + hittable: true, + rect: { x: 0, y: 159, width: 1344, height: 2593 }, + }, + { + index: 2, + parentIndex: 0, + type: 'android.widget.TextView', + label: 'Agent Device Tester', + rect: { x: 54, y: 181, width: 770, height: 86 }, + }, + { + index: 3, + parentIndex: 0, + type: 'android.view.ViewGroup', + hittable: true, + rect: { x: 72, y: 2724, width: 192, height: 132 }, + }, + { + index: 4, + parentIndex: 0, + type: 'android.view.ViewGroup', + hittable: true, + rect: { x: 436, y: 2724, width: 192, height: 132 }, + }, + { + index: 5, + parentIndex: 0, + type: 'android.view.ViewGroup', + hittable: true, + rect: { x: 800, y: 2724, width: 192, height: 132 }, + }, + { + index: 6, + parentIndex: 0, + type: 'android.view.ViewGroup', + hittable: true, + rect: { x: 1164, y: 2724, width: 132, height: 132 }, + }, + ], + { backend: 'android' }, + ); + + const overlayRefs = buildScreenshotOverlayRefs(snapshot, 1344, 2992); + + assert.deepEqual( + overlayRefs.map((overlayRef) => overlayRef.ref), + ['e4', 'e5', 'e6', 'e7'], + ); + assert.ok( + overlayRefs.every((overlayRef) => !overlayRef.label), + 'unlabeled Android tab controls should still get visual refs', + ); +}); + +test('buildScreenshotOverlayRefs trims Android row spacing from unlabeled action containers', () => { + const snapshot = makeSnapshotState( + [ + { + index: 0, + type: 'android.widget.ScrollView', + rect: { x: 0, y: 0, width: 1344, height: 2920 }, + }, + { + index: 1, + parentIndex: 0, + type: 'android.widget.LinearLayout', + hittable: true, + rect: { x: 0, y: 447, width: 1344, height: 282 }, + }, + { + index: 2, + parentIndex: 1, + type: 'android.widget.TextView', + label: 'Google', + rect: { x: 240, y: 495, width: 190, height: 81 }, + }, + { + index: 3, + parentIndex: 1, + type: 'android.widget.TextView', + label: 'Services & preferences', + rect: { x: 240, y: 576, width: 425, height: 57 }, + }, + ], + { backend: 'android' }, + ); + + const overlayRefs = buildScreenshotOverlayRefs(snapshot, 1344, 2992); + + assert.deepEqual(overlayRefs, [ + { + ref: 'e2', + label: 'Google', + rect: { x: 0, y: 447, width: 1344, height: 234 }, + overlayRect: { x: 0, y: 447, width: 1344, height: 234 }, + center: { x: 672, y: 564 }, + }, + ]); +}); + test('annotateScreenshotWithRefs draws the overlay onto the saved PNG', async () => { const root = fs.mkdtempSync(path.join(os.tmpdir(), 'agent-device-screenshot-overlay-')); const screenshotPath = path.join(root, 'screen.png'); diff --git a/src/daemon/screenshot-overlay-android.ts b/src/daemon/screenshot-overlay-android.ts new file mode 100644 index 000000000..ada24110a --- /dev/null +++ b/src/daemon/screenshot-overlay-android.ts @@ -0,0 +1,93 @@ +import type { Rect, SnapshotNode } from '../utils/snapshot.ts'; +import { normalizeType } from './snapshot-processing.ts'; +import { hasPositiveRect, rectContains, unionRects } from './screenshot-overlay-rects.ts'; + +export function resolveAndroidOverlaySourceRect( + target: SnapshotNode, + nodes: SnapshotNode[], + hasActionableRole: (node: SnapshotNode) => boolean, + hasOverlayLabel: (node: SnapshotNode) => boolean, +): Rect | null { + if ( + !target.rect || + target.hittable !== true || + hasActionableRole(target) || + hasOverlayLabel(target) + ) { + return null; + } + return balanceAndroidActionRowRect(target, nodes, hasOverlayLabel); +} + +function balanceAndroidActionRowRect( + target: SnapshotNode, + nodes: SnapshotNode[], + hasOverlayLabel: (node: SnapshotNode) => boolean, +): Rect | null { + const targetRect = target.rect!; + const contentRect = measureAndroidActionRowContentRect(target, nodes, hasOverlayLabel); + if (!contentRect) return null; + + const topPadding = contentRect.y - targetRect.y; + const bottomPadding = targetRect.y + targetRect.height - (contentRect.y + contentRect.height); + if (topPadding < 0 || bottomPadding < 0) return null; + if (Math.abs(bottomPadding - topPadding) < 16) return null; + + const balancedPadding = Math.min(topPadding, bottomPadding); + const y = Math.round(contentRect.y - balancedPadding); + const height = Math.round(contentRect.height + balancedPadding * 2); + if (height <= 0 || height >= targetRect.height) return null; + + return { + x: targetRect.x, + y, + width: targetRect.width, + height, + }; +} + +function measureAndroidActionRowContentRect( + target: SnapshotNode, + nodes: SnapshotNode[], + hasOverlayLabel: (node: SnapshotNode) => boolean, +): Rect | null { + const targetRect = target.rect!; + const nodeIndex = new Map(nodes.map((node) => [node.index, node])); + const contentRects = nodes + .filter( + (node) => + node.ref !== target.ref && + isDescendantOf(node, target, nodeIndex) && + isAndroidActionRowVisualContent(node, hasOverlayLabel) && + hasPositiveRect(node.rect) && + rectContains(targetRect, node.rect), + ) + .map((node) => node.rect!); + if (contentRects.length < 2) return null; + return unionRects(contentRects); +} + +function isAndroidActionRowVisualContent( + node: SnapshotNode, + hasOverlayLabel: (node: SnapshotNode) => boolean, +): boolean { + const normalizedType = normalizeType(node.type ?? ''); + return ( + normalizedType.includes('text') || (normalizedType.includes('image') && hasOverlayLabel(node)) + ); +} + +function isDescendantOf( + node: SnapshotNode, + ancestor: SnapshotNode, + nodeIndex: ReadonlyMap, +): boolean { + let current = node; + while (current.parentIndex !== undefined) { + const parent = nodeIndex.get(current.parentIndex); + if (!parent) return false; + if (parent.ref === ancestor.ref) return true; + current = parent; + } + return false; +} diff --git a/src/daemon/screenshot-overlay-rects.ts b/src/daemon/screenshot-overlay-rects.ts new file mode 100644 index 000000000..00b595210 --- /dev/null +++ b/src/daemon/screenshot-overlay-rects.ts @@ -0,0 +1,38 @@ +import type { Rect } from '../utils/snapshot.ts'; + +export function hasPositiveRect(rect: Rect | undefined): rect is Rect { + return Boolean(rect && rect.width > 0 && rect.height > 0); +} + +export function rectArea(rect: Rect): number { + return rect.width * rect.height; +} + +export function rectContains(container: Rect, nested: Rect): boolean { + return ( + nested.x >= container.x && + nested.y >= container.y && + nested.x + nested.width <= container.x + container.width && + nested.y + nested.height <= container.y + container.height + ); +} + +export function unionRects(rects: Rect[]): Rect | null { + if (rects.length === 0) return null; + let minX = Number.POSITIVE_INFINITY; + let minY = Number.POSITIVE_INFINITY; + let maxRight = Number.NEGATIVE_INFINITY; + let maxBottom = Number.NEGATIVE_INFINITY; + for (const rect of rects) { + minX = Math.min(minX, rect.x); + minY = Math.min(minY, rect.y); + maxRight = Math.max(maxRight, rect.x + rect.width); + maxBottom = Math.max(maxBottom, rect.y + rect.height); + } + return { + x: minX, + y: minY, + width: maxRight - minX, + height: maxBottom - minY, + }; +} diff --git a/src/daemon/screenshot-overlay.ts b/src/daemon/screenshot-overlay.ts index 0baee29de..662978ef9 100644 --- a/src/daemon/screenshot-overlay.ts +++ b/src/daemon/screenshot-overlay.ts @@ -8,11 +8,9 @@ import { type SnapshotState, } from '../utils/snapshot.ts'; import { decodePng } from '../utils/png.ts'; -import { - findNearestHittableAncestor, - normalizeType, - resolveRefLabel, -} from './snapshot-processing.ts'; +import { findNearestAncestor, normalizeType } from './snapshot-processing.ts'; +import { resolveAndroidOverlaySourceRect } from './screenshot-overlay-android.ts'; +import { hasPositiveRect, rectArea, rectContains } from './screenshot-overlay-rects.ts'; const MAX_OVERLAY_REFS = 24; const BORDER_COLOR = [255, 59, 48, 255] as const; @@ -25,6 +23,26 @@ const BADGE_PADDING_X = 3; const BADGE_PADDING_Y = 2; const BADGE_MARGIN = 2; const BORDER_THICKNESS = 2; +const ANDROID_UNLABELED_CLICKABLE_EXCLUDED_TYPES = [ + 'scroll', + 'list', + 'recyclerview', + 'edittext', + 'textfield', +] as const; +const ACTIONABLE_ROLE_TYPES = [ + 'button', + 'link', + 'menu', + 'tab', + 'textfield', + 'searchfield', + 'securetextfield', + 'checkbox', + 'radio', + 'switch', + 'cell', +] as const; const FONT: Record = { e: ['01110', '10000', '11110', '10000', '10000', '10001', '01110'], @@ -73,14 +91,16 @@ export function buildScreenshotOverlayRefs( const snapshotBounds = resolveSnapshotBounds(snapshot.nodes); const candidatesByRef = new Map(); for (const node of snapshot.nodes) { - if (!isOverlaySourceNode(node)) continue; + if (!isOverlaySourceNode(snapshot, snapshotBounds, node)) continue; const target = resolveOverlayTarget(snapshot.nodes, node); if (!target?.rect || !hasPositiveRect(target.rect)) continue; const label = resolveOverlayLabel(node, target, snapshot.nodes); const score = scoreOverlayCandidate(node, target, label); + const overlaySourceRect = resolveOverlaySourceRect(snapshot, target, snapshot.nodes); const overlayRect = projectRectToScreenshot( + snapshot, snapshotBounds, - target.rect, + overlaySourceRect, screenshotWidth, screenshotHeight, ); @@ -90,7 +110,7 @@ export function buildScreenshotOverlayRefs( candidatesByRef.set(target.ref, { ref: target.ref, label, - rect: target.rect, + rect: overlaySourceRect, overlayRect, score, }); @@ -98,22 +118,9 @@ export function buildScreenshotOverlayRefs( } const ranked = suppressContainedCandidates([...candidatesByRef.values()]) - .sort((left, right) => { - if (right.score !== left.score) return right.score - left.score; - const topDelta = left.overlayRect.y - right.overlayRect.y; - if (topDelta !== 0) return topDelta; - const leftDelta = left.overlayRect.x - right.overlayRect.x; - if (leftDelta !== 0) return leftDelta; - return compareNumericRefs(left.ref, right.ref); - }) + .sort(compareOverlayCandidatesByScore) .slice(0, options.maxRefs ?? MAX_OVERLAY_REFS) - .sort((left, right) => { - const topDelta = left.overlayRect.y - right.overlayRect.y; - if (topDelta !== 0) return topDelta; - const leftDelta = left.overlayRect.x - right.overlayRect.x; - if (leftDelta !== 0) return leftDelta; - return compareNumericRefs(left.ref, right.ref); - }); + .sort(compareOverlayCandidatesByPosition); return ranked.map((candidate) => ({ ref: candidate.ref, @@ -124,28 +131,59 @@ export function buildScreenshotOverlayRefs( })); } -function isOverlaySourceNode(node: SnapshotNode): boolean { +function resolveOverlaySourceRect( + snapshot: SnapshotState, + target: SnapshotNode, + nodes: SnapshotState['nodes'], +): Rect { + if (snapshot.backend !== 'android') return target.rect!; + return ( + resolveAndroidOverlaySourceRect(target, nodes, hasActionableRole, (node) => + Boolean(resolveNodeOverlayLabel(node)), + ) ?? target.rect! + ); +} + +function isOverlaySourceNode( + snapshot: SnapshotState, + snapshotBounds: Rect | null, + node: SnapshotNode, +): boolean { const hasTextSignal = [node.label, node.value].some(isOverlaySignal) || isMeaningfulOverlayIdentifier(node.identifier); + if (isAndroidUnlabeledClickableSource(snapshot, snapshotBounds, node)) return true; if (hasActionableRole(node)) return hasTextSignal; return hasTextSignal && isProxyOverlayNode(node); } +function isAndroidUnlabeledClickableSource( + snapshot: SnapshotState, + snapshotBounds: Rect | null, + node: SnapshotNode, +): boolean { + if (snapshot.backend !== 'android') return false; + if (!node.hittable || !hasPositiveRect(node.rect) || isViewportLikeNode(node)) return false; + const normalizedType = normalizeType(node.type ?? ''); + if (ANDROID_UNLABELED_CLICKABLE_EXCLUDED_TYPES.some((type) => normalizedType.includes(type))) { + return false; + } + if (snapshotBounds && rectArea(node.rect) > rectArea(snapshotBounds) * 0.25) return false; + return true; +} + function resolveOverlayTarget( nodes: SnapshotState['nodes'], node: SnapshotNode, ): SnapshotNode | null { - if (isOverlayActionableNode(node) && hasPositiveRect(node.rect)) return node; - const actionableAncestor = findNearestActionableAncestor(nodes, node); - if (actionableAncestor?.rect && hasPositiveRect(actionableAncestor.rect)) - return actionableAncestor; - if (node.hittable && hasPositiveRect(node.rect) && !isViewportLikeNode(node)) return node; - const ancestor = findNearestHittableAncestor(nodes, node); - if (ancestor?.rect && hasPositiveRect(ancestor.rect) && !isViewportLikeNode(ancestor)) { - return ancestor; - } - return null; + return ( + [ + isOverlayActionableNode(node) ? node : null, + findNearestAncestor(nodes, node, (parent) => isOverlayActionableNode(parent)), + node.hittable ? node : null, + findNearestAncestor(nodes, node, (parent) => parent.hittable === true), + ].find(isUsableOverlayTarget) ?? null + ); } function resolveOverlayLabel( @@ -157,7 +195,7 @@ function resolveOverlayLabel( if (source.ref !== target.ref && sourceLabel) return sourceLabel; const descendantLabel = findDescendantOverlayLabel(target, nodes); if (descendantLabel) return descendantLabel; - return resolveNodeOverlayLabel(target) ?? resolveRefLabel(target, nodes); + return resolveNodeOverlayLabel(target); } function scoreOverlayCandidate( @@ -201,22 +239,17 @@ function suppressContainedCandidates(candidates: OverlayCandidate[]): OverlayCan } function projectRectToScreenshot( + snapshot: SnapshotState, bounds: Rect | null, rect: Rect, screenshotWidth: number, screenshotHeight: number, ): Rect { + if (snapshot.backend === 'android') { + return clampRect(roundRect(rect), screenshotWidth, screenshotHeight); + } if (!bounds) { - return clampRect( - { - x: Math.round(rect.x), - y: Math.round(rect.y), - width: Math.round(rect.width), - height: Math.round(rect.height), - }, - screenshotWidth, - screenshotHeight, - ); + return clampRect(roundRect(rect), screenshotWidth, screenshotHeight); } const scaleX = screenshotWidth / bounds.width; const scaleY = screenshotHeight / bounds.height; @@ -279,19 +312,7 @@ function hasActionableRole(node: SnapshotNode): boolean { const roleText = [node.type, node.role, node.subrole] .map((value) => normalizeType(value ?? '')) .join(' '); - return ( - roleText.includes('button') || - roleText.includes('link') || - roleText.includes('menu') || - roleText.includes('tab') || - roleText.includes('textfield') || - roleText.includes('searchfield') || - roleText.includes('securetextfield') || - roleText.includes('checkbox') || - roleText.includes('radio') || - roleText.includes('switch') || - roleText.includes('cell') - ); + return ACTIONABLE_ROLE_TYPES.some((type) => roleText.includes(type)); } function isOverlayActionableNode(node: SnapshotNode): boolean { @@ -315,21 +336,8 @@ function isViewportLikeNode(node: Pick(); - while (current.parentIndex !== undefined) { - if (visited.has(current.ref)) break; - visited.add(current.ref); - const parent = nodes[current.parentIndex]; - if (!parent) break; - if (isOverlayActionableNode(parent) && hasPositiveRect(parent.rect)) return parent; - current = parent; - } - return null; +function isUsableOverlayTarget(node: SnapshotNode | null): node is SnapshotNode { + return Boolean(node?.rect && hasPositiveRect(node.rect) && !isViewportLikeNode(node)); } function isMeaningfulSignal(value: string | undefined): boolean { @@ -412,29 +420,21 @@ function isGenericOverlayIdentifier(value: string): boolean { return /^[a-z0-9_.]+:id\/[a-z0-9_.-]+$/i.test(value.trim()); } -function hasPositiveRect(rect: Rect | undefined): rect is Rect { - return Boolean(rect && rect.width > 0 && rect.height > 0); -} - -function rectArea(rect: Rect): number { - return rect.width * rect.height; -} - -function rectContains(container: Rect, nested: Rect): boolean { - return ( - nested.x >= container.x && - nested.y >= container.y && - nested.x + nested.width <= container.x + container.width && - nested.y + nested.height <= container.y + container.height - ); -} - function compareNumericRefs(left: string, right: string): number { const leftValue = Number.parseInt(left.replace(/^\D+/, ''), 10); const rightValue = Number.parseInt(right.replace(/^\D+/, ''), 10); return leftValue - rightValue; } +function roundRect(rect: Rect): Rect { + return { + x: Math.round(rect.x), + y: Math.round(rect.y), + width: Math.round(rect.width), + height: Math.round(rect.height), + }; +} + function clampRect(rect: Rect, width: number, height: number): Rect { const x = clamp(rect.x, 0, Math.max(0, width - 1)); const y = clamp(rect.y, 0, Math.max(0, height - 1)); @@ -572,3 +572,18 @@ function setPixel( png.data[index + 2] = color[2]; png.data[index + 3] = color[3]; } +function compareOverlayCandidatesByPosition( + left: OverlayCandidate, + right: OverlayCandidate, +): number { + const topDelta = left.overlayRect.y - right.overlayRect.y; + if (topDelta !== 0) return topDelta; + const leftDelta = left.overlayRect.x - right.overlayRect.x; + if (leftDelta !== 0) return leftDelta; + return compareNumericRefs(left.ref, right.ref); +} + +function compareOverlayCandidatesByScore(left: OverlayCandidate, right: OverlayCandidate): number { + if (right.score !== left.score) return right.score - left.score; + return compareOverlayCandidatesByPosition(left, right); +} diff --git a/src/utils/__tests__/output.test.ts b/src/utils/__tests__/output.test.ts index 184806a7c..9133e7104 100644 --- a/src/utils/__tests__/output.test.ts +++ b/src/utils/__tests__/output.test.ts @@ -460,10 +460,12 @@ test('formatSnapshotText collapses Android helper nodes in agent-facing output', assert.match(text, /@e3 \[button\] "alice@example\.com"/); assert.doesNotMatch(text, /@e4 \[button\] "alice@example\.com"/); assert.doesNotMatch(text, /Invisible stale action/); - assert.match(text, /@e8 \[group\] "Dashboard" \[likely navigation\]/); - assert.match(text, /@e10 \[group\] "Messages\. Your review is required" \[likely navigation\]/); - assert.match(text, /@e12 \[group\] "Billing" \[likely navigation\]/); - assert.match(text, /@e14 \[group\] "Profile, My settings\." \[likely navigation\]/); + assert.match(text, /@e8 \[group\] "Dashboard"/); + assert.match(text, /@e10 \[group\] "Messages\. Your review is required"/); + assert.match(text, /@e12 \[group\] "Billing"/); + assert.match(text, /@e14 \[group\] "Profile, My settings\."/); + assert.doesNotMatch(text, /@e11 \[text\] "Messages"/); + assert.doesNotMatch(text, /@e15 \[text\] "Profile"/); assert.doesNotMatch(text, /possible repeated nav subtree/); const raw = withNoColor(() => @@ -480,6 +482,80 @@ test('formatSnapshotText collapses Android helper nodes in agent-facing output', assert.match(raw, /"Messages\. Your review is required"/); }); +test('formatSnapshotText promotes Android helper unlabeled action rows', () => { + const nodes = [ + { + ref: 'e1', + index: 0, + depth: 0, + type: 'android.widget.FrameLayout', + rect: { x: 0, y: 0, width: 390, height: 844 }, + }, + { + ref: 'e2', + index: 1, + depth: 1, + parentIndex: 0, + type: 'android.widget.LinearLayout', + rect: { x: 0, y: 160, width: 390, height: 72 }, + hittable: true, + }, + { + ref: 'e3', + index: 2, + depth: 2, + parentIndex: 1, + type: 'android.widget.ImageView', + rect: { x: 24, y: 176, width: 32, height: 32 }, + }, + { + ref: 'e4', + index: 3, + depth: 2, + parentIndex: 1, + type: 'android.widget.TextView', + label: 'Network & internet', + rect: { x: 72, y: 168, width: 260, height: 28 }, + }, + { + ref: 'e5', + index: 4, + depth: 2, + parentIndex: 1, + type: 'android.widget.TextView', + label: 'Mobile, Wi-Fi, hotspot', + rect: { x: 72, y: 198, width: 260, height: 24 }, + }, + ]; + const text = withNoColor(() => + formatSnapshotText({ + nodes, + truncated: false, + androidSnapshot: { backend: 'android-helper' }, + }), + ); + + assert.match(text, /Snapshot: 2 visible nodes \(5 total\)/); + assert.match(text, /Collapsed 3 Android helper nodes from the agent-facing text snapshot/); + assert.match(text, /@e2 \[group\] "Network & internet, Mobile, Wi-Fi, hotspot"/); + assert.doesNotMatch(text, /@e4 \[text\] "Network & internet"/); + assert.doesNotMatch(text, /@e5 \[text\] "Mobile, Wi-Fi, hotspot"/); + + const raw = withNoColor(() => + formatSnapshotText( + { + nodes, + truncated: false, + androidSnapshot: { backend: 'android-helper' }, + }, + { raw: true }, + ), + ); + assert.match(raw, /"ref":"e4"/); + assert.match(raw, /"Network & internet"/); + assert.match(raw, /"Mobile, Wi-Fi, hotspot"/); +}); + test('formatSnapshotText collapses adjacent React Native row noise in Android helper output', () => { const nodes = [ { @@ -586,13 +662,14 @@ test('formatSnapshotText collapses adjacent React Native row noise in Android he }), ); - assert.match(text, /Snapshot: 8 visible nodes \(10 total\)/); - assert.match(text, /Collapsed 2 Android helper nodes from the agent-facing text snapshot/); - assert.match(text, /@e5 \[button\] "Adam" \[has image\]/); - assert.match(text, /@e7 \[button\] "Hello from Adam"/); + assert.match(text, /Snapshot: 6 visible nodes \(10 total\)/); + assert.match(text, /Collapsed 4 Android helper nodes from the agent-facing text snapshot/); + assert.match(text, /@e3 \[button\] "Adam, 9:41 AM, Hello from Adam"/); assert.doesNotMatch(text, /\[also text\]/); assert.doesNotMatch(text, /@e4 \[image\] "Adam"/); + assert.doesNotMatch(text, /@e5 \[button\] "Adam"/); assert.doesNotMatch(text, /@e6 \[text\] "Hello from Adam"/); + assert.doesNotMatch(text, /@e7 \[button\] "Hello from Adam"/); assert.match(text, /@e8 \[text-field\] "Write a message\.\.\." \[editable\]/); assert.match(text, /@e9 \[button\] "Send"/); assert.match(text, /@e10 \[button\] "Create expense"/); @@ -611,6 +688,176 @@ test('formatSnapshotText collapses adjacent React Native row noise in Android he assert.match(raw, /"ref":"e7"/); }); +test('formatSnapshotText keeps single repeated child control in Android helper output', () => { + const text = withNoColor(() => + formatSnapshotText({ + nodes: [ + { + ref: 'e1', + index: 0, + depth: 0, + type: 'android.widget.FrameLayout', + rect: { x: 0, y: 0, width: 390, height: 844 }, + }, + { + ref: 'e2', + index: 1, + depth: 1, + parentIndex: 0, + type: 'android.widget.Button', + label: 'Send message', + rect: { x: 16, y: 700, width: 358, height: 56 }, + hittable: true, + }, + { + ref: 'e3', + index: 2, + depth: 2, + parentIndex: 1, + type: 'android.widget.Button', + label: 'Send', + rect: { x: 290, y: 708, width: 64, height: 40 }, + hittable: true, + }, + ], + truncated: false, + androidSnapshot: { backend: 'android-helper' }, + }), + ); + + assert.match(text, /Snapshot: 3 nodes/); + assert.doesNotMatch(text, /Collapsed \d+ Android helper node/); + assert.match(text, /@e2 \[button\] "Send message"/); + assert.match(text, /@e3 \[button\] "Send"/); +}); + +test('formatSnapshotText labels Android helper action rows with trailing child controls', () => { + const nodes = [ + { + ref: 'e1', + index: 0, + depth: 0, + type: 'android.widget.FrameLayout', + rect: { x: 0, y: 0, width: 390, height: 844 }, + }, + { + ref: 'e2', + index: 1, + depth: 1, + parentIndex: 0, + type: 'android.view.ViewGroup', + identifier: 'com.google.android.youtube:id/linearLayout', + rect: { x: 0, y: 120, width: 390, height: 48 }, + hittable: true, + }, + { + ref: 'e3', + index: 2, + depth: 2, + parentIndex: 1, + type: 'android.widget.ImageView', + rect: { x: 4, y: 132, width: 40, height: 24 }, + }, + { + ref: 'e4', + index: 3, + depth: 2, + parentIndex: 1, + type: 'android.widget.TextView', + label: 'lofi hip hop', + rect: { x: 52, y: 132, width: 260, height: 24 }, + }, + { + ref: 'e5', + index: 4, + depth: 2, + parentIndex: 1, + type: 'android.widget.ImageView', + label: 'Edit suggestion lofi hip hop', + rect: { x: 330, y: 120, width: 48, height: 48 }, + hittable: true, + }, + ]; + + const text = withNoColor(() => + formatSnapshotText({ + nodes, + truncated: false, + androidSnapshot: { backend: 'android-helper' }, + }), + ); + + assert.match(text, /Snapshot: 3 visible nodes \(5 total\)/); + assert.match(text, /@e2 \[group\] "lofi hip hop"/); + assert.doesNotMatch(text, /@e4 \[text\] "lofi hip hop"/); + assert.match(text, /@e5 \[image\] "Edit suggestion lofi hip hop"/); +}); + +test('formatSnapshotText hides Android helper rectless offscreen rows and derives above hints', () => { + const nodes = [ + { + ref: 'e1', + index: 0, + depth: 0, + type: 'android.widget.FrameLayout', + rect: { x: 0, y: 0, width: 390, height: 844 }, + }, + { + ref: 'e2', + index: 1, + depth: 1, + parentIndex: 0, + type: 'android.widget.ScrollView', + rect: { x: 0, y: 120, width: 390, height: 640 }, + hiddenContentBelow: true, + }, + { + ref: 'e3', + index: 2, + depth: 2, + parentIndex: 1, + type: 'android.widget.Button', + label: 'Save Citrus Starter Kit', + hittable: true, + }, + { + ref: 'e4', + index: 3, + depth: 2, + parentIndex: 1, + type: 'android.widget.Button', + label: 'View details', + identifier: 'details-pretzel-bites', + rect: { x: 24, y: 180, width: 342, height: 48 }, + hittable: true, + }, + { + ref: 'e5', + index: 4, + depth: 3, + parentIndex: 3, + type: 'android.widget.TextView', + label: 'View details', + rect: { x: 140, y: 192, width: 110, height: 24 }, + }, + ]; + + const text = withNoColor(() => + formatSnapshotText({ + nodes, + truncated: false, + androidSnapshot: { backend: 'android-helper' }, + }), + ); + + assert.match(text, /Snapshot: 3 visible nodes \(5 total\)/); + assert.match(text, /\[content above scroll-area hidden\]/); + assert.match(text, /\[content below scroll-area hidden\]/); + assert.doesNotMatch(text, /Save Citrus Starter Kit/); + assert.match(text, /@e4 \[button\] "View details"/); + assert.doesNotMatch(text, /@e5 \[text\] "View details"/); +}); + test('formatSnapshotText keeps ordinary repeated labels on separate rows', () => { const nodes = [ { @@ -687,6 +934,63 @@ test('formatSnapshotText renders explicit hidden scroll-area content hints', () assert.match(text, /^ @e2 \[scroll-area\] "Messages" \[scrollable\]$/m); assert.match(text, /^ \[content above scroll-area hidden\]$/m); assert.match(text, /^ \[content below scroll-area hidden\]$/m); + assert.ok( + text.indexOf('[content above scroll-area hidden]') < text.indexOf('@e3 [button]'), + 'above hint should appear before visible scroll-area content', + ); + assert.ok( + text.indexOf('@e3 [button]') < text.indexOf('[content below scroll-area hidden]'), + 'below hint should appear after visible scroll-area content', + ); +}); + +test('formatSnapshotText keeps below scroll hints at the bottom when depth is flattened', () => { + const text = withNoColor(() => + formatSnapshotText({ + nodes: [ + { + ref: 'e1', + index: 0, + depth: 0, + type: 'Window', + rect: { x: 0, y: 0, width: 390, height: 844 }, + }, + { + ref: 'e2', + index: 1, + depth: 1, + parentIndex: 0, + type: 'android.widget.ScrollView', + label: 'Catalog', + rect: { x: 0, y: 120, width: 390, height: 500 }, + hiddenContentAbove: true, + hiddenContentBelow: true, + }, + { + ref: 'e3', + index: 2, + depth: 1, + parentIndex: 1, + type: 'android.widget.Button', + label: 'Visible product', + rect: { x: 20, y: 240, width: 350, height: 48 }, + hittable: true, + }, + ], + truncated: false, + }), + ); + + assert.match(text, /^ @e2 \[scroll-area\] "Catalog" \[scrollable\]$/m); + assert.match(text, /^ @e3 \[button\] "Visible product"$/m); + assert.ok( + text.indexOf('[content above scroll-area hidden]') < text.indexOf('@e3 [button]'), + 'above hint should stay at the top of the scroll-area', + ); + assert.ok( + text.indexOf('@e3 [button]') < text.indexOf('[content below scroll-area hidden]'), + 'below hint should stay at the bottom of the scroll-area', + ); }); test('formatSnapshotText prefers payload visibility metadata for partial snapshot headers', () => { diff --git a/src/utils/android-helper-presentation/geometry.ts b/src/utils/android-helper-presentation/geometry.ts new file mode 100644 index 000000000..a40c42a31 --- /dev/null +++ b/src/utils/android-helper-presentation/geometry.ts @@ -0,0 +1,23 @@ +import type { Rect } from '../snapshot.ts'; + +export function hasRenderableArea(rect: Rect): boolean { + return rect.width > 0 && rect.height > 0; +} + +export function isRectContainedBy(rect: Rect | undefined, container: Rect | undefined): boolean { + if (!rect || !container) return false; + const tolerance = 2; + return ( + rect.x >= container.x - tolerance && + rect.y >= container.y - tolerance && + rect.x + rect.width <= container.x + container.width + tolerance && + rect.y + rect.height <= container.y + container.height + tolerance + ); +} + +export function areSameVisualRow(left: Rect | undefined, right: Rect | undefined): boolean { + if (!left || !right) return true; + const leftCenterY = left.y + left.height / 2; + const rightCenterY = right.y + right.height / 2; + return Math.abs(leftCenterY - rightCenterY) <= Math.max(left.height, right.height, 1); +} diff --git a/src/utils/android-helper-presentation/labels.ts b/src/utils/android-helper-presentation/labels.ts new file mode 100644 index 000000000..6115c1eca --- /dev/null +++ b/src/utils/android-helper-presentation/labels.ts @@ -0,0 +1,35 @@ +import type { SnapshotNode } from '../snapshot.ts'; +import { displayNodeLabel } from '../snapshot-tree.ts'; + +export function visibleNodeLabel(node: SnapshotNode): string { + const label = displayNodeLabel(node); + if (!label || label !== node.identifier?.trim()) { + return label; + } + if (!isGenericResourceId(label)) { + return label; + } + const type = (node.type ?? '').toLowerCase(); + if ( + type.includes('view') || + type.includes('layout') || + type.includes('image') || + type.includes('list') || + type.includes('recyclerview') || + type.includes('collection') + ) { + return ''; + } + return label; +} + +export function normalizeStructuralNodeLabel(label: string): string | null { + const normalized = label.trim().replace(/\s+/g, ' ').toLowerCase(); + if (!normalized) return null; + if (/^(true|false|\d+)$/.test(normalized)) return null; + return normalized; +} + +function isGenericResourceId(value: string): boolean { + return /^[\w.]+:id\/[\w.-]+$/i.test(value); +} diff --git a/src/utils/android-helper-presentation/predicates.ts b/src/utils/android-helper-presentation/predicates.ts new file mode 100644 index 000000000..767bae08d --- /dev/null +++ b/src/utils/android-helper-presentation/predicates.ts @@ -0,0 +1,16 @@ +import type { SnapshotNode } from '../snapshot.ts'; + +export function isRootNode(node: SnapshotNode): boolean { + return typeof node.parentIndex !== 'number'; +} + +export function isEditableNode(node: SnapshotNode): boolean { + const type = (node.type ?? '').toLowerCase(); + const identifier = (node.identifier ?? '').trim().toLowerCase(); + return type.includes('edittext') || type.includes('textfield') || identifier === 'composer'; +} + +export function isScrollableNode(node: SnapshotNode): boolean { + const type = (node.type ?? '').toLowerCase(); + return type.includes('scroll') || type.includes('list') || type.includes('recyclerview'); +} diff --git a/src/utils/android-helper-presentation/structural-noise.ts b/src/utils/android-helper-presentation/structural-noise.ts new file mode 100644 index 000000000..abb43dc76 --- /dev/null +++ b/src/utils/android-helper-presentation/structural-noise.ts @@ -0,0 +1,265 @@ +import type { SnapshotNode } from '../snapshot.ts'; +import { areSameVisualRow, hasRenderableArea, isRectContainedBy } from './geometry.ts'; +import { normalizeStructuralNodeLabel, visibleNodeLabel } from './labels.ts'; +import { isEditableNode, isRootNode } from './predicates.ts'; +import { collectDescendants, markNodeAndDescendantsForRemoval } from './tree.ts'; + +const ACTIONABLE_STRUCTURAL_TYPE_TOKENS = ['button', 'switch', 'checkbox', 'radio']; +const STRUCTURAL_NOISE_TYPE_TOKENS = ['button', 'image', 'textview', 'view']; + +export function markUnlabeledActionRowsForPromotion( + nodes: SnapshotNode[], + removed: Set, + replacements: Map, +): void { + for (const node of nodes) { + if (removed.has(node.index) || !isUnlabeledActionRow(node)) continue; + + const descendants = collectDescendants(nodes, node.index); + const promotedLabel = collectPromotableRowLabels(descendants, node, removed).join(', '); + if (!promotedLabel) continue; + + replacements.set(node.index, { + ...node, + ...replacements.get(node.index), + label: promotedLabel, + }); + for (const descendant of descendants) { + if (isPassiveRowContent(descendant)) { + markNodeAndDescendantsForRemoval(nodes, descendant.index, removed); + } + } + } +} + +export function markAdjacentDuplicateStructuralNodesForRemoval( + nodes: SnapshotNode[], + removed: Set, + replacements: Map, +): void { + const lastByLabel = new Map(); + for (const node of nodes) { + if (removed.has(node.index) || !isStructuralNoiseCandidate(node)) { + continue; + } + const label = normalizeStructuralNodeLabel(visibleNodeLabel(node)); + if (!label) continue; + + // RN can expose the same visible row content through parallel typed siblings + // such as ImageView + Button or TextView + Button, so label is the signature. + const previous = lastByLabel.get(label); + if (previous && shouldCollapseAdjacentStructuralDuplicate(previous, node, removed)) { + const survivor = collapseAdjacentStructuralDuplicate( + nodes, + previous, + node, + removed, + replacements, + ); + lastByLabel.set(label, survivor); + continue; + } + lastByLabel.set(label, node); + } +} + +export function markRepeatedActionRowDescendantsForRemoval( + nodes: SnapshotNode[], + removed: Set, +): void { + for (const node of nodes) { + if (removed.has(node.index) || !isActionRowParent(node)) continue; + + const parentLabel = normalizeStructuralNodeLabel(visibleNodeLabel(node)); + if (!parentLabel) continue; + + const repeatedDescendants = collectDescendants(nodes, node.index).filter( + (descendant) => + !removed.has(descendant.index) && + isRepeatedActionRowDescendant(node, descendant, parentLabel), + ); + for (const descendant of repeatedDescendants.filter(isPassiveRowContent)) { + markNodeAndDescendantsForRemoval(nodes, descendant.index, removed); + } + + const repeatedControls = repeatedDescendants.filter( + (descendant) => !isPassiveRowContent(descendant), + ); + const repeatedControlLabels = new Set( + repeatedControls + .map((descendant) => normalizeStructuralNodeLabel(visibleNodeLabel(descendant))) + .filter((label): label is string => Boolean(label)), + ); + if (repeatedControls.length < 2 || repeatedControlLabels.size < 2) continue; + + for (const descendant of repeatedControls) { + markNodeAndDescendantsForRemoval(nodes, descendant.index, removed); + } + } +} + +function isUnlabeledActionRow(node: SnapshotNode): boolean { + return ( + node.hittable === true && + !isEditableNode(node) && + Boolean(node.rect && hasRenderableArea(node.rect)) && + visibleNodeLabel(node).trim().length === 0 + ); +} + +function collectPromotableRowLabels( + descendants: SnapshotNode[], + parent: SnapshotNode, + removed: Set, +): string[] { + const labels: string[] = []; + const seen = new Set(); + for (const descendant of descendants) { + if ( + removed.has(descendant.index) || + !isPassiveRowContent(descendant) || + !isRectContainedBy(descendant.rect, parent.rect) + ) { + continue; + } + const label = visibleNodeLabel(descendant).trim().replace(/\s+/g, ' '); + const normalized = normalizeStructuralNodeLabel(label); + if (!label || !normalized || seen.has(normalized)) continue; + seen.add(normalized); + labels.push(label); + } + return labels; +} + +function isPassiveRowContent(node: SnapshotNode): boolean { + if (node.hittable === true || isEditableNode(node)) return false; + const type = (node.type ?? '').toLowerCase(); + return type.includes('text') || type.includes('image') || type.includes('icon'); +} + +function isActionRowParent(node: SnapshotNode): boolean { + return ( + node.hittable === true && + !isEditableNode(node) && + Boolean(node.rect && hasRenderableArea(node.rect)) && + Boolean(normalizeStructuralNodeLabel(visibleNodeLabel(node))) + ); +} + +function isRepeatedActionRowDescendant( + parent: SnapshotNode, + node: SnapshotNode, + parentLabel: string, +): boolean { + if (!isStructuralNoiseCandidate(node) || !isRectContainedBy(node.rect, parent.rect)) { + return false; + } + const label = normalizeStructuralNodeLabel(visibleNodeLabel(node)); + return Boolean(label && parentLabel !== label && parentLabel.includes(label)); +} + +function shouldCollapseAdjacentStructuralDuplicate( + previous: SnapshotNode, + node: SnapshotNode, + removed: Set, +): boolean { + return ( + !removed.has(previous.index) && + areSameVisualRow(previous.rect, node.rect) && + (areStructurallyAdjacent(previous, node) || isPassiveChildOfActionableDuplicate(previous, node)) + ); +} + +function collapseAdjacentStructuralDuplicate( + nodes: SnapshotNode[], + previous: SnapshotNode, + node: SnapshotNode, + removed: Set, + replacements: Map, +): SnapshotNode { + const survivor = chooseStructuralRepresentative(previous, node); + const collapsed = survivor.index === previous.index ? node : previous; + const collapsedHint = (collapsed.type ?? '').toLowerCase().includes('image') ? 'has image' : null; + const existing = replacements.get(survivor.index) ?? survivor; + const collapsedHints = + replacements.get(collapsed.index)?.presentationHints ?? collapsed.presentationHints; + replacements.set(survivor.index, { + ...existing, + presentationHints: mergePresentationHints( + existing.presentationHints, + collapsedHints, + collapsedHint, + ), + }); + markNodeAndDescendantsForRemoval(nodes, collapsed.index, removed); + return replacements.get(survivor.index) ?? survivor; +} + +function mergePresentationHints( + current: SnapshotNode['presentationHints'], + collapsed: SnapshotNode['presentationHints'], + extra: string | null, +): string[] { + return [ + ...new Set([ + ...(Array.isArray(current) ? current : []), + ...(Array.isArray(collapsed) ? collapsed : []), + ...(extra ? [extra] : []), + ]), + ]; +} + +function isStructuralNoiseCandidate(node: SnapshotNode): boolean { + if (!node.rect || !hasRenderableArea(node.rect) || isRootNode(node) || isEditableNode(node)) { + return false; + } + const type = (node.type ?? '').toLowerCase(); + return type === 'text' || hasAnyTypeToken(type, STRUCTURAL_NOISE_TYPE_TOKENS); +} + +function chooseStructuralRepresentative(left: SnapshotNode, right: SnapshotNode): SnapshotNode { + const leftScore = structuralRepresentativeScore(left); + const rightScore = structuralRepresentativeScore(right); + return rightScore > leftScore ? right : left; +} + +function structuralRepresentativeScore(node: SnapshotNode): number { + const type = (node.type ?? '').toLowerCase(); + let score = 0; + if (hasAnyTypeToken(type, ACTIONABLE_STRUCTURAL_TYPE_TOKENS)) { + score += 100; + } else if (type.includes('image')) { + score += 30; + } else if (type.includes('textview') || type === 'text') { + score += 20; + } else if (type.includes('view')) { + score += 10; + } + if (node.hittable === true) score += 20; + if (node.enabled !== false) score += 5; + return score; +} + +function hasAnyTypeToken(type: string, tokens: string[]): boolean { + return tokens.some((token) => type.includes(token)); +} + +function isPassiveChildOfActionableDuplicate(left: SnapshotNode, right: SnapshotNode): boolean { + const parent = + left.parentIndex === right.index ? right : right.parentIndex === left.index ? left : null; + const child = parent?.index === left.index ? right : parent?.index === right.index ? left : null; + if (!parent || !child) return false; + return chooseStructuralRepresentative(parent, child).index === parent.index; +} + +function areStructurallyAdjacent(left: SnapshotNode, right: SnapshotNode): boolean { + if (left.parentIndex === right.parentIndex) { + return Math.abs(left.index - right.index) <= 3; + } + if (left.parentIndex === right.index || right.parentIndex === left.index) { + return false; + } + return ( + Math.abs((left.depth ?? 0) - (right.depth ?? 0)) <= 1 && Math.abs(left.index - right.index) <= 2 + ); +} diff --git a/src/utils/android-helper-presentation/tree.ts b/src/utils/android-helper-presentation/tree.ts new file mode 100644 index 000000000..33cbc14d2 --- /dev/null +++ b/src/utils/android-helper-presentation/tree.ts @@ -0,0 +1,47 @@ +import type { SnapshotNode } from '../snapshot.ts'; + +export function findAncestor( + node: SnapshotNode, + nodeIndex: ReadonlyMap, + predicate: (node: SnapshotNode) => boolean, +): SnapshotNode | null { + let current = node; + while (typeof current.parentIndex === 'number') { + const parent = nodeIndex.get(current.parentIndex); + if (!parent) return null; + if (predicate(parent)) return parent; + current = parent; + } + return null; +} + +export function collectDescendants(nodes: SnapshotNode[], rootIndex: number): SnapshotNode[] { + const descendants: SnapshotNode[] = []; + const pending = [rootIndex]; + while (pending.length > 0) { + const current = pending.pop(); + for (const node of nodes) { + if (node.parentIndex !== current) continue; + descendants.push(node); + pending.push(node.index); + } + } + return descendants; +} + +export function markNodeAndDescendantsForRemoval( + nodes: SnapshotNode[], + rootIndex: number, + removed: Set, +): void { + const pending = [rootIndex]; + while (pending.length > 0) { + const current = pending.pop()!; + if (removed.has(current)) continue; + removed.add(current); + for (const node of nodes) { + if (node.parentIndex !== current || removed.has(node.index)) continue; + pending.push(node.index); + } + } +} diff --git a/src/utils/android-helper-snapshot-presentation.ts b/src/utils/android-helper-snapshot-presentation.ts index b193a4789..e33a4795c 100644 --- a/src/utils/android-helper-snapshot-presentation.ts +++ b/src/utils/android-helper-snapshot-presentation.ts @@ -1,9 +1,15 @@ -import type { Rect, SnapshotNode } from './snapshot.ts'; -import { isEmailLikeLabel, normalizeRepeatedNodeLabel } from './snapshot-label-signals.ts'; -import { displayNodeLabel } from './snapshot-tree.ts'; - -const ACTIONABLE_STRUCTURAL_TYPE_TOKENS = ['button', 'switch', 'checkbox', 'radio']; -const STRUCTURAL_NOISE_TYPE_TOKENS = ['button', 'image', 'textview', 'view']; +import type { SnapshotNode } from './snapshot.ts'; +import { hasRenderableArea } from './android-helper-presentation/geometry.ts'; +import { isRootNode, isScrollableNode } from './android-helper-presentation/predicates.ts'; +import { + markAdjacentDuplicateStructuralNodesForRemoval, + markRepeatedActionRowDescendantsForRemoval, + markUnlabeledActionRowsForPromotion, +} from './android-helper-presentation/structural-noise.ts'; +import { + findAncestor, + markNodeAndDescendantsForRemoval, +} from './android-helper-presentation/tree.ts'; export type AndroidHelperPresentationInput = { nodes: SnapshotNode[]; @@ -36,9 +42,11 @@ function filterAndroidHelperTextOutputNodes(nodes: SnapshotNode[]): SnapshotNode const removed = new Set(); const replacements = new Map(); + const nodeIndex = new Map(nodes.map((node) => [node.index, node])); markZeroAreaNodesForRemoval(nodes, removed); - markBottomNavNodesNearComposerForRemoval(nodes, removed, replacements); - markDuplicateEmailButtonsForRemoval(nodes, removed); + markRectlessScrollableDescendantsForRemoval(nodes, nodeIndex, removed, replacements); + markUnlabeledActionRowsForPromotion(nodes, removed, replacements); + markRepeatedActionRowDescendantsForRemoval(nodes, removed); markAdjacentDuplicateStructuralNodesForRemoval(nodes, removed, replacements); return nodes @@ -55,357 +63,54 @@ function markZeroAreaNodesForRemoval(nodes: SnapshotNode[], removed: Set } } -function markBottomNavNodesNearComposerForRemoval( - nodes: SnapshotNode[], - removed: Set, - replacements: Map, -): void { - const composer = findBottomEditableNode(nodes); - if (!composer?.rect) return; - - const navNodes = findBottomNavigationLikeNodes(nodes, composer.rect); - for (const node of navNodes) { - addPresentationHints(replacements, node, ['likely navigation']); - markDescendantsForRemoval(nodes, node.index, removed); - } -} - -function markDuplicateEmailButtonsForRemoval(nodes: SnapshotNode[], removed: Set): void { - const seenByParent = new Map(); - for (const node of nodes) { - if (removed.has(node.index) || !isEmailLikeLabel(displayNodeLabel(node))) { - continue; - } - const parentKey = typeof node.parentIndex === 'number' ? String(node.parentIndex) : 'root'; - const signature = `${parentKey}|${displayNodeLabel(node).trim().toLowerCase()}`; - const previous = seenByParent.get(signature); - if (!previous) { - seenByParent.set(signature, node); - continue; - } - if (areSameVisualRow(previous.rect, node.rect)) { - markNodeAndDescendantsForRemoval(nodes, node.index, removed); - } - } -} - -function markAdjacentDuplicateStructuralNodesForRemoval( +function markRectlessScrollableDescendantsForRemoval( nodes: SnapshotNode[], + nodeIndex: ReadonlyMap, removed: Set, replacements: Map, ): void { - const lastByLabel = new Map(); for (const node of nodes) { - if (removed.has(node.index) || !isStructuralNoiseCandidate(node)) { - continue; - } - const label = normalizeStructuralNodeLabel(displayNodeLabel(node)); - if (!label) continue; + if (removed.has(node.index) || node.rect || isRootNode(node)) continue; - // RN can expose the same visible row content through parallel typed siblings - // such as ImageView + Button or TextView + Button, so label is the signature. - const previous = lastByLabel.get(label); - if (previous && shouldCollapseAdjacentStructuralDuplicate(previous, node, removed)) { - const survivor = collapseAdjacentStructuralDuplicate( - nodes, - previous, - node, - removed, - replacements, - ); - lastByLabel.set(label, survivor); - continue; + const scrollAncestor = findAncestor(node, nodeIndex, isScrollableNode); + if (scrollAncestor) { + addHiddenContentHint(replacements, scrollAncestor, inferRectlessNodeDirection(node, nodes)); } - lastByLabel.set(label, node); + markNodeAndDescendantsForRemoval(nodes, node.index, removed); } } -function shouldCollapseAdjacentStructuralDuplicate( - previous: SnapshotNode, +function inferRectlessNodeDirection( node: SnapshotNode, - removed: Set, -): boolean { - return ( - !removed.has(previous.index) && - areSameVisualRow(previous.rect, node.rect) && - areStructurallyAdjacentForCollapse(previous, node) - ); -} - -function collapseAdjacentStructuralDuplicate( nodes: SnapshotNode[], - previous: SnapshotNode, - node: SnapshotNode, - removed: Set, - replacements: Map, -): SnapshotNode { - const survivor = chooseStructuralRepresentative(previous, node); - const collapsed = survivor.index === previous.index ? node : previous; - const collapsedHint = imagePresentationHint(collapsed); - addPresentationHints(replacements, survivor, [ - ...readPresentationHints(replacements.get(collapsed.index) ?? collapsed), - ...(collapsedHint ? [collapsedHint] : []), - ]); - markNodeAndDescendantsForRemoval(nodes, collapsed.index, removed); - return replacements.get(survivor.index) ?? survivor; -} - -function markNodeAndDescendantsForRemoval( - nodes: SnapshotNode[], - rootIndex: number, - removed: Set, -): void { - removed.add(rootIndex); - markDescendantsForRemoval(nodes, rootIndex, removed); -} - -function markDescendantsForRemoval( - nodes: SnapshotNode[], - rootIndex: number, - removed: Set, -): void { - const pending = [rootIndex]; - while (pending.length > 0) { - const current = pending.pop(); - for (const node of nodes) { - if (node.parentIndex !== current || removed.has(node.index)) continue; - removed.add(node.index); - pending.push(node.index); - } - } -} - -function addPresentationHints( +): 'above' | 'below' | null { + const renderedSiblingIndexes = nodes + .filter( + (candidate) => + candidate.parentIndex === node.parentIndex && + candidate.rect && + hasRenderableArea(candidate.rect), + ) + .map((candidate) => candidate.index); + if (renderedSiblingIndexes.length === 0) return null; + + if (node.index < Math.min(...renderedSiblingIndexes)) return 'above'; + if (node.index > Math.max(...renderedSiblingIndexes)) return 'below'; + return null; +} + +function addHiddenContentHint( replacements: Map, node: SnapshotNode, - hints: string[], + direction: 'above' | 'below' | null, ): void { + if (!direction) return; const existing = replacements.get(node.index) ?? node; - const merged = [...new Set([...readPresentationHints(existing), ...hints.filter(Boolean)])]; replacements.set(node.index, { ...existing, - presentationHints: merged, + hiddenContentAbove: + existing.hiddenContentAbove === true || direction === 'above' ? true : undefined, + hiddenContentBelow: + existing.hiddenContentBelow === true || direction === 'below' ? true : undefined, }); } - -function readPresentationHints(node: SnapshotNode): string[] { - return Array.isArray(node.presentationHints) ? node.presentationHints : []; -} - -function hasRenderableArea(rect: Rect): boolean { - return rect.width > 0 && rect.height > 0; -} - -function isRootNode(node: SnapshotNode): boolean { - return typeof node.parentIndex !== 'number'; -} - -function resolveLikelyViewport(nodes: SnapshotNode[]): Rect | null { - let best: Rect | null = null; - let bestArea = 0; - for (const node of nodes) { - if (!node.rect || !hasRenderableArea(node.rect)) continue; - const area = node.rect.width * node.rect.height; - if (area > bestArea) { - best = node.rect; - bestArea = area; - } - } - return best; -} - -function findBottomEditableNode(nodes: SnapshotNode[]): SnapshotNode | null { - const viewport = resolveLikelyViewport(nodes); - const lowerBound = viewport ? viewport.y + viewport.height * 0.65 : Number.NEGATIVE_INFINITY; - return ( - nodes.find((node) => { - if (!node.rect || node.rect.y < lowerBound) return false; - return isEditableNode(node); - }) ?? null - ); -} - -function findBottomNavigationLikeNodes(nodes: SnapshotNode[], composerRect: Rect): SnapshotNode[] { - const rows = new Map(); - for (const node of nodes) { - if (!isBottomNavigationCandidate(node, nodes, composerRect)) continue; - const rect = node.rect!; - const parentKey = typeof node.parentIndex === 'number' ? String(node.parentIndex) : 'root'; - const rowKey = [ - parentKey, - bucket(rect.y + rect.height / 2, 24), - bucket(rect.width, 24), - bucket(rect.height, 24), - ].join('|'); - const row = rows.get(rowKey); - if (row) { - row.push(node); - } else { - rows.set(rowKey, [node]); - } - } - - const navigationNodes: SnapshotNode[] = []; - for (const row of rows.values()) { - if (!isBottomNavigationRow(row, nodes, composerRect)) continue; - navigationNodes.push(...row); - } - return navigationNodes; -} - -function isNearComposerVerticalBand(rect: Rect, composerRect: Rect): boolean { - const tolerance = Math.max(composerRect.height * 2, 96); - return ( - rect.y <= composerRect.y + composerRect.height + tolerance && - rect.y + rect.height >= composerRect.y - tolerance - ); -} - -function isBottomNavigationCandidate( - node: SnapshotNode, - nodes: SnapshotNode[], - composerRect: Rect, -): boolean { - if ( - !node.rect || - !hasRenderableArea(node.rect) || - isRootNode(node) || - isEditableNode(node) || - isTextOnlyNode(node) || - !isNearComposerVerticalBand(node.rect, composerRect) - ) { - return false; - } - return normalizeRepeatedNodeLabel(getNodeOrDescendantLabel(node, nodes)) !== null; -} - -function isBottomNavigationRow( - row: SnapshotNode[], - nodes: SnapshotNode[], - composerRect: Rect, -): boolean { - if (row.length < 3) return false; - const labels = new Set(); - for (const node of row) { - const label = normalizeRepeatedNodeLabel(getNodeOrDescendantLabel(node, nodes)); - if (label) labels.add(label); - } - if (labels.size < 3) return false; - - const sorted = [...row].sort((left, right) => left.rect!.x - right.rect!.x); - const first = sorted[0]!.rect!; - const last = sorted[sorted.length - 1]!.rect!; - const horizontalSpan = last.x + last.width - first.x; - return horizontalSpan >= composerRect.width; -} - -function isEditableNode(node: SnapshotNode): boolean { - const type = (node.type ?? '').toLowerCase(); - const identifier = (node.identifier ?? '').trim().toLowerCase(); - return type.includes('edittext') || type.includes('textfield') || identifier === 'composer'; -} - -function isTextOnlyNode(node: SnapshotNode): boolean { - const type = (node.type ?? '').toLowerCase(); - return type.includes('textview') || type === 'text'; -} - -function isStructuralNoiseCandidate(node: SnapshotNode): boolean { - if (!node.rect || !hasRenderableArea(node.rect) || isRootNode(node) || isEditableNode(node)) { - return false; - } - const type = (node.type ?? '').toLowerCase(); - return type === 'text' || hasAnyTypeToken(type, STRUCTURAL_NOISE_TYPE_TOKENS); -} - -function chooseStructuralRepresentative(left: SnapshotNode, right: SnapshotNode): SnapshotNode { - const leftScore = structuralRepresentativeScore(left); - const rightScore = structuralRepresentativeScore(right); - return rightScore > leftScore ? right : left; -} - -function structuralRepresentativeScore(node: SnapshotNode): number { - const type = (node.type ?? '').toLowerCase(); - let score = 0; - if (hasAnyTypeToken(type, ACTIONABLE_STRUCTURAL_TYPE_TOKENS)) { - score += 100; - } else if (type.includes('image')) { - score += 30; - } else if (type.includes('textview') || type === 'text') { - score += 20; - } else if (type.includes('view')) { - score += 10; - } - if (node.hittable === true) score += 20; - if (node.enabled !== false) score += 5; - return score; -} - -function hasAnyTypeToken(type: string, tokens: string[]): boolean { - return tokens.some((token) => type.includes(token)); -} - -function imagePresentationHint(node: SnapshotNode): string | null { - return (node.type ?? '').toLowerCase().includes('image') ? 'has image' : null; -} - -function areStructurallyAdjacentForCollapse(left: SnapshotNode, right: SnapshotNode): boolean { - if (areStructurallyAdjacent(left, right)) { - return true; - } - return isPassiveChildOfActionableDuplicate(left, right); -} - -function isPassiveChildOfActionableDuplicate(left: SnapshotNode, right: SnapshotNode): boolean { - const parent = - left.parentIndex === right.index ? right : right.parentIndex === left.index ? left : null; - const child = parent?.index === left.index ? right : parent?.index === right.index ? left : null; - if (!parent || !child) return false; - return chooseStructuralRepresentative(parent, child).index === parent.index; -} - -function normalizeStructuralNodeLabel(label: string): string | null { - const normalized = label.trim().replace(/\s+/g, ' ').toLowerCase(); - if (!normalized) return null; - if (/^(true|false|\d+)$/.test(normalized)) return null; - return normalized; -} - -function getNodeOrDescendantLabel(node: SnapshotNode, nodes: SnapshotNode[]): string { - const label = displayNodeLabel(node); - if (label.trim()) return label; - const pending = [node.index]; - while (pending.length > 0) { - const current = pending.pop(); - for (const child of nodes) { - if (child.parentIndex !== current) continue; - const childLabel = displayNodeLabel(child); - if (childLabel.trim()) return childLabel; - pending.push(child.index); - } - } - return ''; -} - -function bucket(value: number, size: number): number { - return Math.round(value / size); -} - -function areSameVisualRow(left: Rect | undefined, right: Rect | undefined): boolean { - if (!left || !right) return true; - const leftCenterY = left.y + left.height / 2; - const rightCenterY = right.y + right.height / 2; - return Math.abs(leftCenterY - rightCenterY) <= Math.max(left.height, right.height, 1); -} - -function areStructurallyAdjacent(left: SnapshotNode, right: SnapshotNode): boolean { - if (left.parentIndex === right.parentIndex) { - return Math.abs(left.index - right.index) <= 3; - } - if (left.parentIndex === right.index || right.parentIndex === left.index) { - return false; - } - return ( - Math.abs((left.depth ?? 0) - (right.depth ?? 0)) <= 1 && Math.abs(left.index - right.index) <= 2 - ); -} diff --git a/src/utils/output.ts b/src/utils/output.ts index 71c660d81..636fcdbbb 100644 --- a/src/utils/output.ts +++ b/src/utils/output.ts @@ -632,27 +632,64 @@ function readSnapshotWarnings(data: Record): string[] { ); } +type SnapshotDisplayLine = ReturnType[number]; + function renderSnapshotDisplayLines(lines: ReturnType): string[] { const output: string[] = []; - const pendingBelow: ReturnType = []; - const flushClosedBelowHints = (nextDepth: number) => { - while (pendingBelow.length > 0 && nextDepth <= pendingBelow[pendingBelow.length - 1]!.depth) { + const pendingBelow: SnapshotDisplayLine[] = []; + const lineNodesByIndex = new Map(lines.map((line) => [line.node.index, line.node])); + const flushClosedBelowHints = (nextLine?: SnapshotDisplayLine) => { + while ( + pendingBelow.length > 0 && + (!nextLine || + isOutsideHiddenContentContainer( + nextLine, + pendingBelow[pendingBelow.length - 1]!, + lineNodesByIndex, + )) + ) { output.push(...readHiddenContentHintLines(pendingBelow.pop()!, 'below')); } }; for (const line of lines) { - flushClosedBelowHints(line.depth); + flushClosedBelowHints(line); output.push(line.text); output.push(...readHiddenContentHintLines(line, 'above')); if (line.node.hiddenContentBelow) { pendingBelow.push(line); } } - flushClosedBelowHints(0); + flushClosedBelowHints(); return output; } +function isOutsideHiddenContentContainer( + line: SnapshotDisplayLine, + containerLine: SnapshotDisplayLine, + lineNodesByIndex: Map, +): boolean { + if (isDescendantOfRenderedLine(line.node, containerLine.node, lineNodesByIndex)) { + return false; + } + return line.depth <= containerLine.depth; +} + +function isDescendantOfRenderedLine( + node: SnapshotNode, + ancestor: SnapshotNode, + lineNodesByIndex: Map, +): boolean { + let current = node; + while (typeof current.parentIndex === 'number') { + if (current.parentIndex === ancestor.index) return true; + const parent = lineNodesByIndex.get(current.parentIndex); + if (!parent) return false; + current = parent; + } + return false; +} + function buildFlattenedSnapshotDisplayLines(nodes: SnapshotNode[]): string[] { // Flattened output has no subtree boundary to defer below-hints past. return buildSnapshotDisplayLines(nodes, { summarizeTextSurfaces: true }).flatMap((line) => [ @@ -662,7 +699,7 @@ function buildFlattenedSnapshotDisplayLines(nodes: SnapshotNode[]): string[] { } function readHiddenContentHintLines( - line: ReturnType[number], + line: SnapshotDisplayLine, direction?: 'above' | 'below', ): string[] { const target = hintTargetLabel(line.type); diff --git a/src/utils/snapshot-processing.ts b/src/utils/snapshot-processing.ts index a2c038c13..2449dec19 100644 --- a/src/utils/snapshot-processing.ts +++ b/src/utils/snapshot-processing.ts @@ -102,6 +102,14 @@ export function findNearestHittableAncestor( node: SnapshotState['nodes'][number], ): SnapshotState['nodes'][number] | null { if (node.hittable) return node; + return findNearestAncestor(nodes, node, (parent) => parent.hittable === true); +} + +export function findNearestAncestor( + nodes: SnapshotState['nodes'], + node: SnapshotState['nodes'][number], + predicate: (node: SnapshotState['nodes'][number]) => boolean, +): SnapshotState['nodes'][number] | null { let current = node; const visited = new Set(); while (current.parentIndex !== undefined) { @@ -109,7 +117,7 @@ export function findNearestHittableAncestor( visited.add(current.ref); const parent = nodes[current.parentIndex]; if (!parent) break; - if (parent.hittable) return parent; + if (predicate(parent)) return parent; current = parent; } return null; From 96ac61fcf2c29f73eda28e372b009f447d8c8f0b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Pierzcha=C5=82a?= Date: Fri, 22 May 2026 13:47:31 +0200 Subject: [PATCH 2/4] fix: address Android snapshot review edge cases --- .../__tests__/screenshot-overlay.test.ts | 37 ++++++++++++- src/daemon/screenshot-overlay.ts | 3 +- src/utils/__tests__/output.test.ts | 52 +++++++++++++++++++ .../structural-noise.ts | 17 +++--- .../android-helper-snapshot-presentation.ts | 3 ++ 5 files changed, 102 insertions(+), 10 deletions(-) diff --git a/src/daemon/__tests__/screenshot-overlay.test.ts b/src/daemon/__tests__/screenshot-overlay.test.ts index 08ff9974b..67d4e4c0b 100644 --- a/src/daemon/__tests__/screenshot-overlay.test.ts +++ b/src/daemon/__tests__/screenshot-overlay.test.ts @@ -363,6 +363,41 @@ test('buildScreenshotOverlayRefs includes unlabeled Android bottom tab controls' ); }); +test('buildScreenshotOverlayRefs keeps nested unlabeled Android controls separate', () => { + const snapshot = makeSnapshotState( + [ + { + index: 0, + type: 'android.widget.FrameLayout', + rect: { x: 0, y: 0, width: 1344, height: 2992 }, + }, + { + index: 1, + parentIndex: 0, + type: 'android.view.ViewGroup', + hittable: true, + rect: { x: 80, y: 240, width: 400, height: 240 }, + }, + { + index: 2, + parentIndex: 1, + type: 'android.view.ViewGroup', + hittable: true, + rect: { x: 120, y: 280, width: 160, height: 120 }, + }, + ], + { backend: 'android' }, + ); + + const overlayRefs = buildScreenshotOverlayRefs(snapshot, 1344, 2992); + + assert.deepEqual( + overlayRefs.map((overlayRef) => overlayRef.ref), + ['e2', 'e3'], + ); + assert.ok(overlayRefs.every((overlayRef) => !overlayRef.label)); +}); + test('buildScreenshotOverlayRefs trims Android row spacing from unlabeled action containers', () => { const snapshot = makeSnapshotState( [ @@ -402,7 +437,7 @@ test('buildScreenshotOverlayRefs trims Android row spacing from unlabeled action { ref: 'e2', label: 'Google', - rect: { x: 0, y: 447, width: 1344, height: 234 }, + rect: { x: 0, y: 447, width: 1344, height: 282 }, overlayRect: { x: 0, y: 447, width: 1344, height: 234 }, center: { x: 672, y: 564 }, }, diff --git a/src/daemon/screenshot-overlay.ts b/src/daemon/screenshot-overlay.ts index 662978ef9..57824e59f 100644 --- a/src/daemon/screenshot-overlay.ts +++ b/src/daemon/screenshot-overlay.ts @@ -110,7 +110,7 @@ export function buildScreenshotOverlayRefs( candidatesByRef.set(target.ref, { ref: target.ref, label, - rect: overlaySourceRect, + rect: target.rect, overlayRect, score, }); @@ -223,6 +223,7 @@ function suppressContainedCandidates(candidates: OverlayCandidate[]): OverlayCan )) { const duplicateIndex = kept.findIndex( (current) => + current.label !== undefined && current.label === candidate.label && (rectContains(current.overlayRect, candidate.overlayRect) || rectContains(candidate.overlayRect, current.overlayRect)), diff --git a/src/utils/__tests__/output.test.ts b/src/utils/__tests__/output.test.ts index 9133e7104..cd8c705b9 100644 --- a/src/utils/__tests__/output.test.ts +++ b/src/utils/__tests__/output.test.ts @@ -556,6 +556,58 @@ test('formatSnapshotText promotes Android helper unlabeled action rows', () => { assert.match(raw, /"Mobile, Wi-Fi, hotspot"/); }); +test('formatSnapshotText keeps passive row descendants that were not promoted', () => { + const nodes = [ + { + ref: 'e1', + index: 0, + depth: 0, + type: 'android.widget.FrameLayout', + rect: { x: 0, y: 0, width: 390, height: 844 }, + }, + { + ref: 'e2', + index: 1, + depth: 1, + parentIndex: 0, + type: 'android.widget.LinearLayout', + rect: { x: 0, y: 160, width: 390, height: 72 }, + hittable: true, + }, + { + ref: 'e3', + index: 2, + depth: 2, + parentIndex: 1, + type: 'android.widget.TextView', + label: 'Inside row', + rect: { x: 72, y: 176, width: 260, height: 28 }, + }, + { + ref: 'e4', + index: 3, + depth: 2, + parentIndex: 1, + type: 'android.widget.TextView', + label: 'Outside parent bounds', + rect: { x: 72, y: 260, width: 260, height: 28 }, + }, + ]; + const text = withNoColor(() => + formatSnapshotText({ + nodes, + truncated: false, + androidSnapshot: { backend: 'android-helper' }, + }), + ); + + assert.match(text, /Snapshot: 3 visible nodes \(4 total\)/); + assert.match(text, /Collapsed 1 Android helper node from the agent-facing text snapshot/); + assert.match(text, /@e2 \[group\] "Inside row"/); + assert.doesNotMatch(text, /@e3 \[text\] "Inside row"/); + assert.match(text, /@e4 \[text\] "Outside parent bounds"/); +}); + test('formatSnapshotText collapses adjacent React Native row noise in Android helper output', () => { const nodes = [ { diff --git a/src/utils/android-helper-presentation/structural-noise.ts b/src/utils/android-helper-presentation/structural-noise.ts index abb43dc76..9e4622003 100644 --- a/src/utils/android-helper-presentation/structural-noise.ts +++ b/src/utils/android-helper-presentation/structural-noise.ts @@ -16,7 +16,8 @@ export function markUnlabeledActionRowsForPromotion( if (removed.has(node.index) || !isUnlabeledActionRow(node)) continue; const descendants = collectDescendants(nodes, node.index); - const promotedLabel = collectPromotableRowLabels(descendants, node, removed).join(', '); + const promotedContent = collectPromotableRowContent(descendants, node, removed); + const promotedLabel = promotedContent.labels.join(', '); if (!promotedLabel) continue; replacements.set(node.index, { @@ -24,10 +25,8 @@ export function markUnlabeledActionRowsForPromotion( ...replacements.get(node.index), label: promotedLabel, }); - for (const descendant of descendants) { - if (isPassiveRowContent(descendant)) { - markNodeAndDescendantsForRemoval(nodes, descendant.index, removed); - } + for (const descendantIndex of promotedContent.removableIndexes) { + markNodeAndDescendantsForRemoval(nodes, descendantIndex, removed); } } } @@ -107,12 +106,13 @@ function isUnlabeledActionRow(node: SnapshotNode): boolean { ); } -function collectPromotableRowLabels( +function collectPromotableRowContent( descendants: SnapshotNode[], parent: SnapshotNode, removed: Set, -): string[] { +): { labels: string[]; removableIndexes: number[] } { const labels: string[] = []; + const removableIndexes: number[] = []; const seen = new Set(); for (const descendant of descendants) { if ( @@ -124,11 +124,12 @@ function collectPromotableRowLabels( } const label = visibleNodeLabel(descendant).trim().replace(/\s+/g, ' '); const normalized = normalizeStructuralNodeLabel(label); + removableIndexes.push(descendant.index); if (!label || !normalized || seen.has(normalized)) continue; seen.add(normalized); labels.push(label); } - return labels; + return { labels, removableIndexes }; } function isPassiveRowContent(node: SnapshotNode): boolean { diff --git a/src/utils/android-helper-snapshot-presentation.ts b/src/utils/android-helper-snapshot-presentation.ts index e33a4795c..069ad5e45 100644 --- a/src/utils/android-helper-snapshot-presentation.ts +++ b/src/utils/android-helper-snapshot-presentation.ts @@ -94,6 +94,9 @@ function inferRectlessNodeDirection( .map((candidate) => candidate.index); if (renderedSiblingIndexes.length === 0) return null; + // Android helper rectless children are offscreen list content. UIAutomator + // traversal order is the only signal left once bounds disappear, so this is + // intentionally a conservative above/below hint rather than exact geometry. if (node.index < Math.min(...renderedSiblingIndexes)) return 'above'; if (node.index > Math.max(...renderedSiblingIndexes)) return 'below'; return null; From fe2f5d363eabd1ca82a2bfdbf6ac29555a7a074a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Pierzcha=C5=82a?= Date: Fri, 22 May 2026 13:50:38 +0200 Subject: [PATCH 3/4] refactor: simplify Android snapshot cleanup helpers --- src/daemon/screenshot-overlay-rects.ts | 14 +++++++------- .../structural-noise.ts | 9 ++++----- 2 files changed, 11 insertions(+), 12 deletions(-) diff --git a/src/daemon/screenshot-overlay-rects.ts b/src/daemon/screenshot-overlay-rects.ts index 00b595210..8469de7db 100644 --- a/src/daemon/screenshot-overlay-rects.ts +++ b/src/daemon/screenshot-overlay-rects.ts @@ -17,13 +17,13 @@ export function rectContains(container: Rect, nested: Rect): boolean { ); } -export function unionRects(rects: Rect[]): Rect | null { - if (rects.length === 0) return null; - let minX = Number.POSITIVE_INFINITY; - let minY = Number.POSITIVE_INFINITY; - let maxRight = Number.NEGATIVE_INFINITY; - let maxBottom = Number.NEGATIVE_INFINITY; - for (const rect of rects) { +export function unionRects(rects: Rect[]): Rect { + const firstRect = rects[0]!; + let minX = firstRect.x; + let minY = firstRect.y; + let maxRight = firstRect.x + firstRect.width; + let maxBottom = firstRect.y + firstRect.height; + for (const rect of rects.slice(1)) { minX = Math.min(minX, rect.x); minY = Math.min(minY, rect.y); maxRight = Math.max(maxRight, rect.x + rect.width); diff --git a/src/utils/android-helper-presentation/structural-noise.ts b/src/utils/android-helper-presentation/structural-noise.ts index 9e4622003..3bbf13374 100644 --- a/src/utils/android-helper-presentation/structural-noise.ts +++ b/src/utils/android-helper-presentation/structural-noise.ts @@ -17,13 +17,12 @@ export function markUnlabeledActionRowsForPromotion( const descendants = collectDescendants(nodes, node.index); const promotedContent = collectPromotableRowContent(descendants, node, removed); - const promotedLabel = promotedContent.labels.join(', '); - if (!promotedLabel) continue; + if (!promotedContent.label) continue; replacements.set(node.index, { ...node, ...replacements.get(node.index), - label: promotedLabel, + label: promotedContent.label, }); for (const descendantIndex of promotedContent.removableIndexes) { markNodeAndDescendantsForRemoval(nodes, descendantIndex, removed); @@ -110,7 +109,7 @@ function collectPromotableRowContent( descendants: SnapshotNode[], parent: SnapshotNode, removed: Set, -): { labels: string[]; removableIndexes: number[] } { +): { label: string; removableIndexes: number[] } { const labels: string[] = []; const removableIndexes: number[] = []; const seen = new Set(); @@ -129,7 +128,7 @@ function collectPromotableRowContent( seen.add(normalized); labels.push(label); } - return { labels, removableIndexes }; + return { label: labels.join(', '), removableIndexes }; } function isPassiveRowContent(node: SnapshotNode): boolean { From 7da127cd1165145c7e4c1f37e3cc91ce29ecd8a9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Pierzcha=C5=82a?= Date: Fri, 22 May 2026 14:16:16 +0200 Subject: [PATCH 4/4] test: guard SkillGym external runners in sandbox --- package.json | 3 +- test/skillgym/README.md | 10 +++++ test/skillgym/runner-environment.test.ts | 47 ++++++++++++++++++++++++ test/skillgym/runner-environment.ts | 45 +++++++++++++++++++++++ test/skillgym/skillgym.config.ts | 3 ++ 5 files changed, 107 insertions(+), 1 deletion(-) create mode 100644 test/skillgym/runner-environment.test.ts create mode 100644 test/skillgym/runner-environment.ts diff --git a/package.json b/package.json index 74f071f8d..0ed80d248 100644 --- a/package.json +++ b/package.json @@ -122,7 +122,8 @@ "test:integration:provider": "vitest run --project provider-integration", "test:integration:progress": "node scripts/integration-progress.mjs", "test:integration:progress:check": "node scripts/integration-progress.mjs --check", - "test:skillgym": "pnpm build && skillgym run ./test/skillgym/suites/agent-device-smoke-suite.ts --config ./test/skillgym/skillgym.config.ts", + "test:skillgym": "node test/skillgym/runner-environment.ts && pnpm build && skillgym run ./test/skillgym/suites/agent-device-smoke-suite.ts --config ./test/skillgym/skillgym.config.ts", + "test:skillgym:case": "node test/skillgym/runner-environment.ts && pnpm build && skillgym run ./test/skillgym/suites/agent-device-smoke-suite.ts --config ./test/skillgym/skillgym.config.ts --case", "test:smoke": "node --test test/integration/smoke-*.test.ts", "test:integration:node": "node --test test/integration/*.test.ts", "test:integration": "pnpm test:integration:node && pnpm test:integration:provider", diff --git a/test/skillgym/README.md b/test/skillgym/README.md index a1c4a557a..46c85d3b7 100644 --- a/test/skillgym/README.md +++ b/test/skillgym/README.md @@ -101,6 +101,12 @@ pnpm exec skillgym run \ --repeat-failure 1 ``` +To run one case across the configured Codex and Claude runners: + +```bash +pnpm test:skillgym:case open-and-snapshot +``` + Use `--reporter github-actions` in CI when you want annotations in GitHub Actions logs. The config uses `schedule: parallel` so the planning suite can run case/runner pairs concurrently up to SkillGym v0.8's default available-machine parallelism cap. This is safe for the included suite because cases validate command plans and local CLI help, not live shared device state or workspace edits. Override with `--max-parallel ` for local experiments that need a different cap. @@ -114,6 +120,10 @@ Prerequisites: - repo dependencies installed with `pnpm install` - if you want the fixture app running locally, use `pnpm test-app:install` and then `pnpm test-app:ios` or `pnpm test-app:android` +Sandbox note: + +The configured runners call external Codex and Claude model backends. In Codex sandboxes with `CODEX_SANDBOX_NETWORK_DISABLED=1`, `pnpm test:skillgym` and direct `skillgym run --config ./test/skillgym/skillgym.config.ts` fail fast before building or launching runners. Run the suite from a normal authenticated local shell instead. If you are in a sandbox that has explicitly approved network access and you still want to launch external runners, set `SKILLGYM_ALLOW_EXTERNAL_RUNNERS_IN_SANDBOX=1`. + ## Where to extend next - Add suite cases that ask for selector-based plans against `Agent Device Tester`. diff --git a/test/skillgym/runner-environment.test.ts b/test/skillgym/runner-environment.test.ts new file mode 100644 index 000000000..f3a26bd55 --- /dev/null +++ b/test/skillgym/runner-environment.test.ts @@ -0,0 +1,47 @@ +import assert from 'node:assert/strict'; +import test from 'node:test'; +import { + allowsExternalRunnersInSandbox, + isNetworkRestrictedCodexSandbox, + skillGymRunnerEnvironmentError, +} from './runner-environment.ts'; + +test('detects network-restricted Codex sandboxes', () => { + assert.equal(isNetworkRestrictedCodexSandbox({ CODEX_SANDBOX_NETWORK_DISABLED: '1' }), true); + assert.equal(isNetworkRestrictedCodexSandbox({ CODEX_SANDBOX_NETWORK_DISABLED: 'true' }), true); + assert.equal(isNetworkRestrictedCodexSandbox({ CODEX_SANDBOX_NETWORK_DISABLED: '0' }), false); + assert.equal(isNetworkRestrictedCodexSandbox({}), false); +}); + +test('allows explicit SkillGym sandbox override', () => { + assert.equal( + allowsExternalRunnersInSandbox({ SKILLGYM_ALLOW_EXTERNAL_RUNNERS_IN_SANDBOX: '1' }), + true, + ); + assert.equal( + allowsExternalRunnersInSandbox({ SKILLGYM_ALLOW_EXTERNAL_RUNNERS_IN_SANDBOX: 'true' }), + true, + ); + assert.equal(allowsExternalRunnersInSandbox({}), false); +}); + +test('explains why external SkillGym runners are blocked in restricted sandboxes', () => { + const message = skillGymRunnerEnvironmentError({ + CODEX_SANDBOX_NETWORK_DISABLED: '1', + }); + + assert.match(message ?? '', /external Codex and Claude runners/); + assert.match(message ?? '', /pnpm test:skillgym:case open-and-snapshot/); + assert.match(message ?? '', /SKILLGYM_ALLOW_EXTERNAL_RUNNERS_IN_SANDBOX=1/); +}); + +test('does not block normal local SkillGym runs', () => { + assert.equal(skillGymRunnerEnvironmentError({}), null); + assert.equal( + skillGymRunnerEnvironmentError({ + CODEX_SANDBOX_NETWORK_DISABLED: '1', + SKILLGYM_ALLOW_EXTERNAL_RUNNERS_IN_SANDBOX: '1', + }), + null, + ); +}); diff --git a/test/skillgym/runner-environment.ts b/test/skillgym/runner-environment.ts new file mode 100644 index 000000000..435f96860 --- /dev/null +++ b/test/skillgym/runner-environment.ts @@ -0,0 +1,45 @@ +import { pathToFileURL } from 'node:url'; + +const SANDBOX_OVERRIDE_ENV = 'SKILLGYM_ALLOW_EXTERNAL_RUNNERS_IN_SANDBOX'; + +export function isNetworkRestrictedCodexSandbox(env: NodeJS.ProcessEnv = process.env): boolean { + return ( + env.CODEX_SANDBOX_NETWORK_DISABLED === '1' || env.CODEX_SANDBOX_NETWORK_DISABLED === 'true' + ); +} + +export function allowsExternalRunnersInSandbox(env: NodeJS.ProcessEnv = process.env): boolean { + return env[SANDBOX_OVERRIDE_ENV] === '1' || env[SANDBOX_OVERRIDE_ENV] === 'true'; +} + +export function skillGymRunnerEnvironmentError( + env: NodeJS.ProcessEnv = process.env, +): string | null { + if (!isNetworkRestrictedCodexSandbox(env) || allowsExternalRunnersInSandbox(env)) { + return null; + } + + return [ + 'SkillGym uses external Codex and Claude runners, but this Codex sandbox has network disabled.', + '', + 'Run this benchmark from a normal authenticated local shell:', + ' pnpm test:skillgym', + '', + 'For one case:', + ' pnpm test:skillgym:case open-and-snapshot', + '', + `If your sandbox has approved network access and you intentionally want to run external runners, set ${SANDBOX_OVERRIDE_ENV}=1.`, + ].join('\n'); +} + +export function enforceSkillGymRunnerEnvironment(): void { + const message = skillGymRunnerEnvironmentError(); + if (!message) return; + + console.error(message); + process.exit(2); +} + +if (process.argv[1] && import.meta.url === pathToFileURL(process.argv[1]).href) { + enforceSkillGymRunnerEnvironment(); +} diff --git a/test/skillgym/skillgym.config.ts b/test/skillgym/skillgym.config.ts index f9fe03818..d6e204f75 100644 --- a/test/skillgym/skillgym.config.ts +++ b/test/skillgym/skillgym.config.ts @@ -1,6 +1,9 @@ import type { SkillGymConfig } from 'skillgym'; import path from 'node:path'; import { fileURLToPath } from 'node:url'; +import { enforceSkillGymRunnerEnvironment } from './runner-environment.ts'; + +enforceSkillGymRunnerEnvironment(); const localBinDir = fileURLToPath(new URL('./bin', import.meta.url)); const runnerEnv = {