Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion package.json
Original file line number Diff line number Diff line change
Expand Up @@ -122,7 +122,8 @@
"test:integration:provider": "vitest run --project provider-integration",
"test:integration:progress": "node scripts/integration-progress.mjs",
"test:integration:progress:check": "node scripts/integration-progress.mjs --check",
"test:skillgym": "pnpm build && skillgym run ./test/skillgym/suites/agent-device-smoke-suite.ts --config ./test/skillgym/skillgym.config.ts",
"test:skillgym": "node test/skillgym/runner-environment.ts && pnpm build && skillgym run ./test/skillgym/suites/agent-device-smoke-suite.ts --config ./test/skillgym/skillgym.config.ts",
"test:skillgym:case": "node test/skillgym/runner-environment.ts && pnpm build && skillgym run ./test/skillgym/suites/agent-device-smoke-suite.ts --config ./test/skillgym/skillgym.config.ts --case",
"test:smoke": "node --test test/integration/smoke-*.test.ts",
"test:integration:node": "node --test test/integration/*.test.ts",
"test:integration": "pnpm test:integration:node && pnpm test:integration:provider",
Expand Down
186 changes: 186 additions & 0 deletions src/daemon/__tests__/screenshot-overlay.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -258,6 +258,192 @@ test('buildScreenshotOverlayRefs prefers descendant text over generic android re
]);
});

test('buildScreenshotOverlayRefs keeps Android pixel rects aligned with screenshots', () => {
const snapshot = makeSnapshotState(
[
{
index: 0,
type: 'android.widget.ScrollView',
rect: { x: 0, y: 0, width: 1344, height: 2920 },
},
{
index: 1,
parentIndex: 0,
type: 'android.widget.LinearLayout',
hittable: true,
rect: { x: 0, y: 2697, width: 1344, height: 223 },
},
{
index: 2,
parentIndex: 1,
type: 'android.widget.TextView',
label: 'Storage',
rect: { x: 240, y: 2745, width: 205, height: 81 },
},
],
{ backend: 'android' },
);

const overlayRefs = buildScreenshotOverlayRefs(snapshot, 1344, 2992);

assert.deepEqual(overlayRefs, [
{
ref: 'e2',
label: 'Storage',
rect: { x: 0, y: 2697, width: 1344, height: 223 },
overlayRect: { x: 0, y: 2697, width: 1344, height: 223 },
center: { x: 672, y: 2809 },
},
]);
});

test('buildScreenshotOverlayRefs includes unlabeled Android bottom tab controls', () => {
const snapshot = makeSnapshotState(
[
{
index: 0,
type: 'android.widget.FrameLayout',
rect: { x: 0, y: 0, width: 1344, height: 2992 },
},
{
index: 1,
parentIndex: 0,
type: 'android.widget.ScrollView',
hittable: true,
rect: { x: 0, y: 159, width: 1344, height: 2593 },
},
{
index: 2,
parentIndex: 0,
type: 'android.widget.TextView',
label: 'Agent Device Tester',
rect: { x: 54, y: 181, width: 770, height: 86 },
},
{
index: 3,
parentIndex: 0,
type: 'android.view.ViewGroup',
hittable: true,
rect: { x: 72, y: 2724, width: 192, height: 132 },
},
{
index: 4,
parentIndex: 0,
type: 'android.view.ViewGroup',
hittable: true,
rect: { x: 436, y: 2724, width: 192, height: 132 },
},
{
index: 5,
parentIndex: 0,
type: 'android.view.ViewGroup',
hittable: true,
rect: { x: 800, y: 2724, width: 192, height: 132 },
},
{
index: 6,
parentIndex: 0,
type: 'android.view.ViewGroup',
hittable: true,
rect: { x: 1164, y: 2724, width: 132, height: 132 },
},
],
{ backend: 'android' },
);

const overlayRefs = buildScreenshotOverlayRefs(snapshot, 1344, 2992);

assert.deepEqual(
overlayRefs.map((overlayRef) => overlayRef.ref),
['e4', 'e5', 'e6', 'e7'],
);
assert.ok(
overlayRefs.every((overlayRef) => !overlayRef.label),
'unlabeled Android tab controls should still get visual refs',
);
});

test('buildScreenshotOverlayRefs keeps nested unlabeled Android controls separate', () => {
const snapshot = makeSnapshotState(
[
{
index: 0,
type: 'android.widget.FrameLayout',
rect: { x: 0, y: 0, width: 1344, height: 2992 },
},
{
index: 1,
parentIndex: 0,
type: 'android.view.ViewGroup',
hittable: true,
rect: { x: 80, y: 240, width: 400, height: 240 },
},
{
index: 2,
parentIndex: 1,
type: 'android.view.ViewGroup',
hittable: true,
rect: { x: 120, y: 280, width: 160, height: 120 },
},
],
{ backend: 'android' },
);

const overlayRefs = buildScreenshotOverlayRefs(snapshot, 1344, 2992);

assert.deepEqual(
overlayRefs.map((overlayRef) => overlayRef.ref),
['e2', 'e3'],
);
assert.ok(overlayRefs.every((overlayRef) => !overlayRef.label));
});

test('buildScreenshotOverlayRefs trims Android row spacing from unlabeled action containers', () => {
const snapshot = makeSnapshotState(
[
{
index: 0,
type: 'android.widget.ScrollView',
rect: { x: 0, y: 0, width: 1344, height: 2920 },
},
{
index: 1,
parentIndex: 0,
type: 'android.widget.LinearLayout',
hittable: true,
rect: { x: 0, y: 447, width: 1344, height: 282 },
},
{
index: 2,
parentIndex: 1,
type: 'android.widget.TextView',
label: 'Google',
rect: { x: 240, y: 495, width: 190, height: 81 },
},
{
index: 3,
parentIndex: 1,
type: 'android.widget.TextView',
label: 'Services & preferences',
rect: { x: 240, y: 576, width: 425, height: 57 },
},
],
{ backend: 'android' },
);

const overlayRefs = buildScreenshotOverlayRefs(snapshot, 1344, 2992);

assert.deepEqual(overlayRefs, [
{
ref: 'e2',
label: 'Google',
rect: { x: 0, y: 447, width: 1344, height: 282 },
overlayRect: { x: 0, y: 447, width: 1344, height: 234 },
center: { x: 672, y: 564 },
},
]);
});

test('annotateScreenshotWithRefs draws the overlay onto the saved PNG', async () => {
const root = fs.mkdtempSync(path.join(os.tmpdir(), 'agent-device-screenshot-overlay-'));
const screenshotPath = path.join(root, 'screen.png');
Expand Down
93 changes: 93 additions & 0 deletions src/daemon/screenshot-overlay-android.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
import type { Rect, SnapshotNode } from '../utils/snapshot.ts';
import { normalizeType } from './snapshot-processing.ts';
import { hasPositiveRect, rectContains, unionRects } from './screenshot-overlay-rects.ts';

export function resolveAndroidOverlaySourceRect(
target: SnapshotNode,
nodes: SnapshotNode[],
hasActionableRole: (node: SnapshotNode) => boolean,
hasOverlayLabel: (node: SnapshotNode) => boolean,
): Rect | null {
if (
!target.rect ||
target.hittable !== true ||
hasActionableRole(target) ||
hasOverlayLabel(target)
) {
return null;
}
return balanceAndroidActionRowRect(target, nodes, hasOverlayLabel);
}

function balanceAndroidActionRowRect(
target: SnapshotNode,
nodes: SnapshotNode[],
hasOverlayLabel: (node: SnapshotNode) => boolean,
): Rect | null {
const targetRect = target.rect!;
const contentRect = measureAndroidActionRowContentRect(target, nodes, hasOverlayLabel);
if (!contentRect) return null;

const topPadding = contentRect.y - targetRect.y;
const bottomPadding = targetRect.y + targetRect.height - (contentRect.y + contentRect.height);
if (topPadding < 0 || bottomPadding < 0) return null;
if (Math.abs(bottomPadding - topPadding) < 16) return null;

const balancedPadding = Math.min(topPadding, bottomPadding);
const y = Math.round(contentRect.y - balancedPadding);
const height = Math.round(contentRect.height + balancedPadding * 2);
if (height <= 0 || height >= targetRect.height) return null;

return {
x: targetRect.x,
y,
width: targetRect.width,
height,
};
}

function measureAndroidActionRowContentRect(
target: SnapshotNode,
nodes: SnapshotNode[],
hasOverlayLabel: (node: SnapshotNode) => boolean,
): Rect | null {
const targetRect = target.rect!;
const nodeIndex = new Map(nodes.map((node) => [node.index, node]));
const contentRects = nodes
.filter(
(node) =>
node.ref !== target.ref &&
isDescendantOf(node, target, nodeIndex) &&
isAndroidActionRowVisualContent(node, hasOverlayLabel) &&
hasPositiveRect(node.rect) &&
rectContains(targetRect, node.rect),
)
.map((node) => node.rect!);
if (contentRects.length < 2) return null;
return unionRects(contentRects);
}

function isAndroidActionRowVisualContent(
node: SnapshotNode,
hasOverlayLabel: (node: SnapshotNode) => boolean,
): boolean {
const normalizedType = normalizeType(node.type ?? '');
return (
normalizedType.includes('text') || (normalizedType.includes('image') && hasOverlayLabel(node))
);
}

function isDescendantOf(
node: SnapshotNode,
ancestor: SnapshotNode,
nodeIndex: ReadonlyMap<number, SnapshotNode>,
): boolean {
let current = node;
while (current.parentIndex !== undefined) {
const parent = nodeIndex.get(current.parentIndex);
if (!parent) return false;
if (parent.ref === ancestor.ref) return true;
current = parent;
}
return false;
}
38 changes: 38 additions & 0 deletions src/daemon/screenshot-overlay-rects.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
import type { Rect } from '../utils/snapshot.ts';

export function hasPositiveRect(rect: Rect | undefined): rect is Rect {
return Boolean(rect && rect.width > 0 && rect.height > 0);
}

export function rectArea(rect: Rect): number {
return rect.width * rect.height;
}

export function rectContains(container: Rect, nested: Rect): boolean {
return (
nested.x >= container.x &&
nested.y >= container.y &&
nested.x + nested.width <= container.x + container.width &&
nested.y + nested.height <= container.y + container.height
);
}

export function unionRects(rects: Rect[]): Rect {
const firstRect = rects[0]!;
let minX = firstRect.x;
let minY = firstRect.y;
let maxRight = firstRect.x + firstRect.width;
let maxBottom = firstRect.y + firstRect.height;
for (const rect of rects.slice(1)) {
minX = Math.min(minX, rect.x);
minY = Math.min(minY, rect.y);
maxRight = Math.max(maxRight, rect.x + rect.width);
maxBottom = Math.max(maxBottom, rect.y + rect.height);
}
return {
x: minX,
y: minY,
width: maxRight - minX,
height: maxBottom - minY,
};
}
Loading
Loading