Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion embeddings/embed-daemon.js
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,8 @@ async function _importFromCanonicalSharedDeps(sharedDir = join(homedir(), ".hive
return _normalizeTransformersModule(mod);
}
async function _importFromBareSpecifier() {
const mod = await import("@huggingface/transformers");
const spec = "@huggingface/transformers";
const mod = await import(spec);
return _normalizeTransformersModule(mod);
}
function _normalizeTransformersModule(mod) {
Expand Down
25 changes: 23 additions & 2 deletions src/embeddings/nomic.ts
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,24 @@ import {

type Embedder = (input: string | string[], opts: Record<string, unknown>) => Promise<{ data: Float32Array | number[] }>;

type TransformersModule = typeof import("@huggingface/transformers");
// Minimal shape of @huggingface/transformers that this wrapper actually uses.
// Declared locally instead of `typeof import("@huggingface/transformers")` so
// the typecheck does NOT resolve the package at compile time: it's an
// optional, on-demand dependency (installed by `hivemind embeddings install`)
// and is absent on some platforms (e.g. Windows CI), where a `typeof import`
// query would make `tsc` fail with TS2307.
interface TransformersModule {
env: {
allowLocalModels: boolean;
useFSCache: boolean;
[key: string]: unknown;
};
pipeline: (
task: string,
model: string,
options?: Record<string, unknown>,
) => Promise<unknown>;
}
type TransformersImporter = () => Promise<TransformersModule>;

export interface NomicOptions {
Expand Down Expand Up @@ -51,7 +68,11 @@ export async function _importFromCanonicalSharedDeps(
}

export async function _importFromBareSpecifier(): Promise<TransformersModule> {
const mod = await import("@huggingface/transformers");
// Non-literal specifier (typed as `string`) so `tsc` treats this as a
// dynamic import of `any` and does not resolve the optional package at
// compile time — see the TransformersModule comment above.
const spec: string = "@huggingface/transformers";
const mod = await import(spec);
return _normalizeTransformersModule(mod);
}

Expand Down
21 changes: 14 additions & 7 deletions tests/claude-code/embeddings-nomic.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,13 @@ import {
_importFromCanonicalSharedDeps,
} from "../../src/embeddings/nomic.js";

// Non-literal specifier (typed `string`) so `tsc` does not resolve the optional
// @huggingface/transformers package at compile time — it's absent on some
// platforms (e.g. Windows CI). The `vi.mock(...)` below still intercepts it at
// runtime by resolved module id, so the dynamic `import(TRANSFORMERS_PKG)`
// calls return the mock.
const TRANSFORMERS_PKG: string = "@huggingface/transformers";

// Mock the heavy transformers import so these tests don't pull in
// onnxruntime-node or download any model weights. `load()` resolves
// transformers via an injected importer (default goes through the canonical
Expand Down Expand Up @@ -41,7 +48,7 @@ vi.mock("@huggingface/transformers", () => {
beforeEach(() => {
// Route the embedder's loader through the vi.mock-intercepted bare specifier
// instead of the real canonical-shared-deps resolver.
_setTransformersImporterForTesting(() => import("@huggingface/transformers") as any);
_setTransformersImporterForTesting(() => import(TRANSFORMERS_PKG) as any);
});

afterEach(() => {
Expand All @@ -55,15 +62,15 @@ describe("NomicEmbedder", () => {
await e.load(); // second call is a no-op (cached)
// If load() didn't memoize, pipeline() would be invoked twice; the
// mock would return a fresh spy whose call counts would differ.
const mod: any = await import("@huggingface/transformers");
const mod: any = await import(TRANSFORMERS_PKG);
expect((mod.pipeline as any).mock.calls.length).toBe(1);
});

it("embeds a document with the search_document: prefix", async () => {
const e = new NomicEmbedder({ dims: 4 });
const v = await e.embed("hello", "document");
expect(v).toHaveLength(4);
const mod: any = await import("@huggingface/transformers");
const mod: any = await import(TRANSFORMERS_PKG);
const pipeline = await (mod.pipeline as any).mock.results[0].value;
const callArg = (pipeline as any).mock.calls.at(-1)[0];
expect(callArg).toBe("search_document: hello");
Expand All @@ -72,7 +79,7 @@ describe("NomicEmbedder", () => {
it("embeds a query with the search_query: prefix", async () => {
const e = new NomicEmbedder({ dims: 4 });
await e.embed("q", "query");
const mod: any = await import("@huggingface/transformers");
const mod: any = await import(TRANSFORMERS_PKG);
const pipeline = await (mod.pipeline as any).mock.results[0].value;
const callArg = (pipeline as any).mock.calls.at(-1)[0];
expect(callArg).toBe("search_query: q");
Expand Down Expand Up @@ -113,7 +120,7 @@ describe("NomicEmbedder", () => {

it("handles a zero-norm truncation without dividing by zero", async () => {
// Reach through the private helper via a custom mock that returns zeros.
const mod: any = await import("@huggingface/transformers");
const mod: any = await import(TRANSFORMERS_PKG);
const origPipeline = mod.pipeline;
const wrapped = vi.fn(() => Promise.resolve(() => Promise.resolve({ data: [0, 0, 0, 0] })));
(mod as any).pipeline = wrapped;
Expand Down Expand Up @@ -145,7 +152,7 @@ describe("NomicEmbedder", () => {
it("coalesces concurrent load() calls onto a single pipeline build", async () => {
// Replace pipeline with a slow one so the two load() calls overlap and
// the second enters the `if (this.loading) return this.loading;` branch.
const mod: any = await import("@huggingface/transformers");
const mod: any = await import(TRANSFORMERS_PKG);
const orig = mod.pipeline;
let calls = 0;
mod.pipeline = vi.fn(async () => {
Expand All @@ -168,7 +175,7 @@ describe("NomicEmbedder", () => {
it("embeds a query in embedBatch with the search_query prefix", async () => {
const e = new NomicEmbedder({ dims: 4 });
await e.embedBatch(["hi"], "query");
const mod: any = await import("@huggingface/transformers");
const mod: any = await import(TRANSFORMERS_PKG);
const pipeline = await (mod.pipeline as any).mock.results[0].value;
const lastCall = (pipeline as any).mock.calls.at(-1)[0];
expect(lastCall).toEqual(["search_query: hi"]);
Expand Down