diff --git a/__tests__/containers/os.ts b/__tests__/containers/os.ts index df4a0029..214095c1 100644 --- a/__tests__/containers/os.ts +++ b/__tests__/containers/os.ts @@ -31,6 +31,7 @@ import { TouchedIndexesFeature } from "../../src/features/TouchedIndexes/index.t import { OsRecordDecompressorFeature } from "../../src/features/OsRecordDecompressor/index.ts"; import { OsScannerFeature } from "../../src/features/OsScanner/index.ts"; import { OsProcessorFeature } from "../../src/features/OsProcessor/index.ts"; +import { IndexConfigurationProviderFeature } from "../../src/features/IndexConfigurationProvider/index.ts"; import { AccessCheckerFeature } from "../../src/features/AccessChecker/index.ts"; import { MockDynamoDbClient } from "../services/DynamoDbClient/MockDynamoDbClient.ts"; import { MockOpenSearchClient } from "../services/OpenSearchClient/MockOpenSearchClient.ts"; @@ -118,6 +119,7 @@ export function createOsContainer(options: OsContainerOptions = {}): Container { container.registerInstance(DroppedRecordLog, new MockDroppedRecordLog()); container.registerInstance(TransferredRecordLog, new MockTransferredRecordLog()); PipelineRunnerFeature.register(container); + IndexConfigurationProviderFeature.register(container); TouchedIndexesFeature.register(container); DdbExecutorFeature.register(container); OsRecordDecompressorFeature.register(container); diff --git a/documentation/AGENTS.md b/documentation/AGENTS.md new file mode 100644 index 00000000..f5dbe0e6 --- /dev/null +++ b/documentation/AGENTS.md @@ -0,0 +1,28 @@ +# Documentation agent guidelines + +This directory documents user-facing configuration points — abstractions users can override via `config.register` to customize transfer behavior. + +## Adding a new configuration + +1. Create `configurations//README.md`. +2. Add a breadcrumb line as the very first line, before the title: + ``` + [Documentation](../../README.md) > [Configurations](../../README.md#configurations) > AbstractionName + ``` + Adjust the relative path depth to match the file's location. Every doc page must link back to its parent. +3. Follow the structure used in `configurations/IndexConfigurationProvider/README.md`: + - **Title** — abstraction name. + - **When it runs** — where in the transfer lifecycle the abstraction is called and by whom. + - **Default behavior** — what the built-in implementation does. + - **Override example** — complete, copy-pasteable code showing a custom implementation class, `createImplementation`, and the `register` hook in config. + - **Per-X configuration** — if the method receives a discriminator (index name, table name, etc.), show a branching example. + - **API** — interface and type signatures. + - **Source** — path to the feature directory. +4. Add a row to the table in `documentation/README.md`. + +## Rules + +- Examples must compile against the current public API (`src/index.ts`). If an export is missing, add it to the public API first. +- Show `createImplementation` with the correct abstraction — users register Implementation classes, not raw classes. +- Keep examples minimal — enough to demonstrate the override, not a production-ready solution. +- Do not duplicate AGENTS.md content from the project root. This file covers documentation conventions only. diff --git a/documentation/README.md b/documentation/README.md new file mode 100644 index 00000000..1012a1c4 --- /dev/null +++ b/documentation/README.md @@ -0,0 +1,25 @@ +## Curated list of available documentation + +### Configurations + +A user can configure the behavior of the application by providing their own implementations for an abstraction via the `register` hook in `createConfig()`. + +```typescript +export default createConfig({ + // ... source, target, pipeline ... + register: async container => { + container.register(MyCustomImplementation); + } +}); +``` + +Each configuration has its own folder under `./configurations/` with a README describing what it does, the default behavior, and how to override it. + +| Abstraction | Description | Docs | +| ---------------------------- | ------------------------------------------------------------------------------------ | --------------------------------------------------------------- | +| `IndexConfigurationProvider` | Controls OpenSearch index mappings and settings applied on index creation and update | [README](./configurations/IndexConfigurationProvider/README.md) | +| `ModelProvider` | Loads CMS model definitions used by transformers for field metadata | [README](./configurations/ModelProvider/README.md) | +| `BeforeTransferHook` | Runs before the transfer starts (orchestrator only, after access checks) | [README](./configurations/BeforeTransferHook/README.md) | +| `AfterTransferHook` | Runs after the transfer completes (orchestrator only, skipped on failure) | [README](./configurations/AfterTransferHook/README.md) | +| `BeforeLoadPresetHook` | Runs before the preset is loaded (each worker, receives config) | [README](./configurations/BeforeLoadPresetHook/README.md) | +| `AfterLoadPresetHook` | Runs after the preset is loaded (each worker, receives config + preset) | [README](./configurations/AfterLoadPresetHook/README.md) | diff --git a/documentation/configurations/AfterLoadPresetHook/README.md b/documentation/configurations/AfterLoadPresetHook/README.md new file mode 100644 index 00000000..3cee8c3c --- /dev/null +++ b/documentation/configurations/AfterLoadPresetHook/README.md @@ -0,0 +1,62 @@ +[Documentation](../../README.md) > [Configurations](../../README.md#configurations) > AfterLoadPresetHook + +# AfterLoadPresetHook + +Runs custom logic **after** the preset is loaded and configured — in each worker process. + +## When it runs + +Each worker (`processSegment/handler.ts`) calls `afterLoadPresetHook.execute(config, preset)` after `preset.configure({...})` completes, before the pipeline runner starts processing records. Receives both the resolved config and the loaded preset. + +## Default behavior + +One built-in hook is registered: **`ModelPreloaderHook`** — preloads tenant/locale pairs from the source table and then calls `modelProvider.preloadModels(tenantLocales)`. This ensures CMS model definitions are available to transformers before any records flow. + +## Composite behavior + +Hooks use `{ multiple: true }` — registering a hook **adds** to the list rather than replacing existing ones. Your hook runs after the built-in `ModelPreloaderHook`. Multiple hooks execute sequentially in registration order. + +## Override example + +Log which preset was loaded and how many pipelines it registered: + +```typescript +// features/presetLogger.ts +import { + AfterLoadPresetHook, + type MigrationConfiguration, + type MigrationPreset +} from "@webiny/data-transfer"; + +class PresetLogger implements AfterLoadPresetHook.Interface { + public async execute(_config: MigrationConfiguration, preset: MigrationPreset): Promise { + console.log(`Loaded preset: ${preset.name} — ${preset.description}`); + } +} + +export const PresetLoggerHook = AfterLoadPresetHook.createImplementation({ + implementation: PresetLogger, + dependencies: [] +}); +``` + +Register it in the config: + +```typescript +export default createConfig({ + // ... + register: async container => { + container.register(PresetLoggerHook); + } +}); +``` + +## API + +```typescript +interface AfterLoadPresetHook.Interface { + execute(config: MigrationConfiguration, preset: MigrationPreset): Promise; +} +``` + +**Source:** `src/features/PresetLifecycle/` diff --git a/documentation/configurations/AfterTransferHook/README.md b/documentation/configurations/AfterTransferHook/README.md new file mode 100644 index 00000000..50324dde --- /dev/null +++ b/documentation/configurations/AfterTransferHook/README.md @@ -0,0 +1,61 @@ +[Documentation](../../README.md) > [Configurations](../../README.md#configurations) > AfterTransferHook + +# AfterTransferHook + +Runs custom logic **after** the transfer completes — after all workers finish, in the orchestrator process only. + +## When it runs + +The orchestrator (`run/handler.ts`) calls `afterTransferHook.execute()` once, after all worker shards have completed. **Skipped on shard failure** — if any worker fails, after-hooks do not run. + +## Default behavior + +No built-in hooks are registered. The composite executes an empty list. + +## Composite behavior + +Hooks use `{ multiple: true }` — registering a hook **adds** to the list rather than replacing existing ones. Multiple hooks execute sequentially in registration order. + +## Override example + +Log transfer completion to an external system: + +```typescript +// features/completionHook.ts +import { AfterTransferHook } from "@webiny/data-transfer"; + +class LogCompletion implements AfterTransferHook.Interface { + public async execute(): Promise { + await fetch("https://hooks.slack.com/...", { + method: "POST", + body: JSON.stringify({ text: "Transfer completed successfully." }) + }); + } +} + +export const LogCompletionHook = AfterTransferHook.createImplementation({ + implementation: LogCompletion, + dependencies: [] +}); +``` + +Register it in the config: + +```typescript +export default createConfig({ + // ... + register: async container => { + container.register(LogCompletionHook); + } +}); +``` + +## API + +```typescript +interface AfterTransferHook.Interface { + execute(): Promise; +} +``` + +**Source:** `src/features/TransferLifecycle/` diff --git a/documentation/configurations/BeforeLoadPresetHook/README.md b/documentation/configurations/BeforeLoadPresetHook/README.md new file mode 100644 index 00000000..b5d3f618 --- /dev/null +++ b/documentation/configurations/BeforeLoadPresetHook/README.md @@ -0,0 +1,60 @@ +[Documentation](../../README.md) > [Configurations](../../README.md#configurations) > BeforeLoadPresetHook + +# BeforeLoadPresetHook + +Runs custom logic **before** the preset is loaded — in each worker process. + +## When it runs + +Each worker (`processSegment/handler.ts`) calls `beforeLoadPresetHook.execute(config)` after bootstrap and `config.register`, but before `presetLoader.load(presetName)`. Receives the resolved `MigrationConfig`. + +## Default behavior + +No built-in hooks are registered. The composite executes an empty list. + +## Composite behavior + +Hooks use `{ multiple: true }` — registering a hook **adds** to the list rather than replacing existing ones. Multiple hooks execute sequentially in registration order. + +## Override example + +Validate config preconditions before the preset wires up pipelines: + +```typescript +// features/configValidator.ts +import { BeforeLoadPresetHook, type MigrationConfiguration } from "@webiny/data-transfer"; + +class ConfigValidator implements BeforeLoadPresetHook.Interface { + public async execute(config: MigrationConfiguration): Promise { + if (!config.source.opensearch) { + throw new Error("This project requires OpenSearch configuration."); + } + } +} + +export const ConfigValidatorHook = BeforeLoadPresetHook.createImplementation({ + implementation: ConfigValidator, + dependencies: [] +}); +``` + +Register it in the config: + +```typescript +export default createConfig({ + // ... + register: async container => { + container.register(ConfigValidatorHook); + } +}); +``` + +## API + +```typescript +interface BeforeLoadPresetHook.Interface { + execute(config: MigrationConfiguration): Promise; +} +``` + +**Source:** `src/features/PresetLifecycle/` diff --git a/documentation/configurations/BeforeTransferHook/README.md b/documentation/configurations/BeforeTransferHook/README.md new file mode 100644 index 00000000..da303df9 --- /dev/null +++ b/documentation/configurations/BeforeTransferHook/README.md @@ -0,0 +1,61 @@ +[Documentation](../../README.md) > [Configurations](../../README.md#configurations) > BeforeTransferHook + +# BeforeTransferHook + +Runs custom logic **before** the transfer starts — after access checks pass, before workers are spawned. + +## When it runs + +The orchestrator (`run/handler.ts`) calls `beforeTransferHook.execute()` once, in the main process only (not in worker processes). Runs after `config.register`, preset configuration, and access checks all succeed. + +## Default behavior + +No built-in hooks are registered. The composite executes an empty list. + +## Composite behavior + +Hooks use `{ multiple: true }` — registering a hook **adds** to the list rather than replacing existing ones. Multiple hooks execute sequentially in registration order. + +## Override example + +Send a Slack notification before the transfer begins: + +```typescript +// features/slackNotifyHook.ts +import { BeforeTransferHook } from "@webiny/data-transfer"; + +class SlackNotifyBeforeTransfer implements BeforeTransferHook.Interface { + public async execute(): Promise { + await fetch("https://hooks.slack.com/...", { + method: "POST", + body: JSON.stringify({ text: "Transfer starting..." }) + }); + } +} + +export const SlackNotifyBeforeTransferHook = BeforeTransferHook.createImplementation({ + implementation: SlackNotifyBeforeTransfer, + dependencies: [] +}); +``` + +Register it in the config: + +```typescript +export default createConfig({ + // ... + register: async container => { + container.register(SlackNotifyBeforeTransferHook); + } +}); +``` + +## API + +```typescript +interface BeforeTransferHook.Interface { + execute(): Promise; +} +``` + +**Source:** `src/features/TransferLifecycle/` diff --git a/documentation/configurations/IndexConfigurationProvider/README.md b/documentation/configurations/IndexConfigurationProvider/README.md new file mode 100644 index 00000000..c545aa00 --- /dev/null +++ b/documentation/configurations/IndexConfigurationProvider/README.md @@ -0,0 +1,100 @@ +[Documentation](../../README.md) > [Configurations](../../README.md#configurations) > IndexConfigurationProvider + +# IndexConfigurationProvider + +Controls the mappings and settings applied to OpenSearch indexes during a transfer. + +## When it runs + +`OsProcessor` calls `getConfiguration(indexName, base)` every time it touches an index. The `base` parameter contains the default Webiny mappings — your implementation receives it and returns a (possibly modified) configuration: + +- **New index** — the returned `mappings` and `settings` are passed to the `createIndex` call. +- **Existing index** — the returned `settings` are applied via `putIndexSettings` before data is written. + +In both cases the transfer engine overrides `index.refresh_interval` to `"-1"` (disabled during transfer, restored after). All other settings from the provider are preserved. + +## Default behavior + +The built-in implementation returns the `base` configuration unchanged — the default Webiny mappings with no custom settings. + +## Override example + +Increase the total fields limit and number of shards for all indexes: + +```typescript +// features/myIndexConfig.ts +import { IndexConfigurationProvider } from "@webiny/data-transfer"; + +class MyIndexConfigurationProvider implements IndexConfigurationProvider.Interface { + public getConfiguration( + _indexName: string, + base: IndexConfigurationProvider.Configuration + ): IndexConfigurationProvider.Configuration { + return { + ...base, + settings: { + index: { + "mapping.total_fields.limit": 2000, + number_of_shards: 2 + } + } + }; + } +} + +export const MyIndexConfigurationProviderImpl = IndexConfigurationProvider.createImplementation({ + implementation: MyIndexConfigurationProvider, + dependencies: [] +}); +``` + +Register it in the config: + +```typescript +import { createConfig, loadEnv } from "@webiny/data-transfer"; +import { MyIndexConfigurationProviderImpl } from "./features/myIndexConfig.ts"; + +loadEnv(import.meta.url); + +export default createConfig({ + // ... source, target, pipeline ... + register: async container => { + container.register(MyIndexConfigurationProviderImpl); + } +}); +``` + +## Per-index configuration + +The `indexName` parameter lets you return different settings per index: + +```typescript +public getConfiguration( + indexName: string, + base: IndexConfigurationProvider.Configuration +): IndexConfigurationProvider.Configuration { + if (indexName.startsWith("root-headless-cms-")) { + return { + ...base, + settings: { index: { "mapping.total_fields.limit": 5000 } } + }; + } + + return base; +} +``` + +## API + +```typescript +interface IndexConfigurationProvider.Interface { + getConfiguration(indexName: string, base: Configuration): Configuration; +} + +// Configuration uses the real OpenSearch SDK types from @webiny/api-opensearch: +// mappings → TypeMapping (from @opensearch-project/opensearch) +// settings → IndexSettings (from @opensearch-project/opensearch) +type IndexConfigurationProvider.Configuration = Pick; +``` + +**Source:** `src/features/IndexConfigurationProvider/` diff --git a/documentation/configurations/ModelProvider/README.md b/documentation/configurations/ModelProvider/README.md new file mode 100644 index 00000000..45bde838 --- /dev/null +++ b/documentation/configurations/ModelProvider/README.md @@ -0,0 +1,86 @@ +[Documentation](../../README.md) > [Configurations](../../README.md#configurations) > ModelProvider + +# ModelProvider + +Loads CMS model definitions used by transformers that need field metadata (e.g. rich-text visitor, field-type guards). + +## When it runs + +`ModelPreloaderHook` (an `AfterLoadPresetHook`) calls `preloadModels(tenantLocales)` once per worker, after the preset is loaded and before any records are processed. Transformers then call `getModel(modelId)` during record transformation. + +## Default behavior + +The built-in implementation: + +1. Queries the source DynamoDB table for model records (`T##L##CMS#CM`). +2. If `pipeline.modelsDir` is set, reads JSON files from that directory. JSON models override DB models (user-provided takes precedence). +3. Accepted JSON shapes (auto-detected, mixed OK in same dir): + - Single model: `{ modelId, fields: [...], ... }` + - Array of models: `[{ modelId, fields, ... }, ...]` + - Webiny export: `{ groups: [...], models: [...] }` + +## Override example + +Replace the built-in model loading with a custom source (e.g. an API or a different DB table): + +```typescript +// features/myModelProvider.ts +import { ModelProvider } from "@webiny/data-transfer"; + +class MyModelProvider implements ModelProvider.Interface { + public async preloadModels(_tenantLocales: Map): Promise { + // Load models from your custom source + } + + public getModel(modelId: string): ModelProvider.ModelType | undefined { + // Return model by ID + return undefined; + } + + public getModelIds(): string[] { + // Return all known model IDs + return []; + } +} + +export const MyModelProviderImpl = ModelProvider.createImplementation({ + implementation: MyModelProvider, + dependencies: [] +}); +``` + +Register it in the config: + +```typescript +export default createConfig({ + // ... + register: async container => { + container.register(MyModelProviderImpl); + } +}); +``` + +## API + +```typescript +interface ModelProvider.Interface { + preloadModels(tenantLocales: Map): Promise; + getModel(modelId: string): ModelProvider.ModelType | undefined; + getModelIds(): string[]; +} + +interface ModelProvider.ModelType { + PK: string; + SK: string; + modelId: string; + name: string; + fields: ModelProvider.Field[]; + layout?: string[][]; + locale?: string; + tenant?: string; + titleFieldId?: string; + [key: string]: unknown; +} +``` + +**Source:** `src/features/ModelProvider/` diff --git a/package.json b/package.json index 8e52a4b3..7c8f9163 100644 --- a/package.json +++ b/package.json @@ -64,14 +64,14 @@ "@types/jsdom": "^28.0.3", "@types/node": "^24.13.2", "@types/yargs": "^17.0.35", - "@vitest/coverage-v8": "4.1.8", + "@vitest/coverage-v8": "^4.1.9", "adio": "^3.0.1", "aws-sdk-client-mock": "^4.1.0", "dynalite": "^4.0.0", "oxfmt": "^0.54.0", "oxlint": "^1.69.0", "typescript": "^6.0.3", - "vitest": "^4.1.8" + "vitest": "^4.1.9" }, "engines": { "node": ">=24.0.0" diff --git a/src/bootstrap.ts b/src/bootstrap.ts index 02733725..2cd382a4 100644 --- a/src/bootstrap.ts +++ b/src/bootstrap.ts @@ -30,6 +30,7 @@ import { AuditLogProcessorFeature } from "~/features/AuditLogProcessor/index.ts" import { OsRecordDecompressorFeature } from "~/features/OsRecordDecompressor/index.ts"; import { OsScannerFeature } from "~/features/OsScanner/index.ts"; import { OsProcessorFeature } from "~/features/OsProcessor/index.ts"; +import { IndexConfigurationProviderFeature } from "~/features/IndexConfigurationProvider/index.ts"; import { TouchedIndexesFeature } from "~/features/TouchedIndexes/index.ts"; import { AccessCheckerFeature } from "~/features/AccessChecker/index.ts"; import { DroppedRecordLogFeature } from "~/features/DroppedRecordLog/index.ts"; @@ -126,6 +127,7 @@ export function bootstrap(options: BootstrapOptions): Container { DdbScannerFeature.register(container); DdbProcessorFeature.register(container); AuditLogProcessorFeature.register(container); + IndexConfigurationProviderFeature.register(container); TouchedIndexesFeature.register(container); OsRecordDecompressorFeature.register(container); OsScannerFeature.register(container); diff --git a/src/commands/processSegment/handler.ts b/src/commands/processSegment/handler.ts index 100c2a5e..cd85aad6 100644 --- a/src/commands/processSegment/handler.ts +++ b/src/commands/processSegment/handler.ts @@ -38,6 +38,10 @@ export async function handler(argv: ProcessSegmentArgs): Promise { await loadUserSetup(argv.config, container, logger); + if (config.register) { + await config.register(container); + } + const beforeLoadPreset = container.resolve(BeforeLoadPresetHook); await beforeLoadPreset.execute(config); diff --git a/src/commands/run/handler.ts b/src/commands/run/handler.ts index 1f5b6b31..af96b342 100644 --- a/src/commands/run/handler.ts +++ b/src/commands/run/handler.ts @@ -92,6 +92,10 @@ export async function handler( try { await loadUserSetup(configPath, container, logger); + if (config.register) { + await config.register(container); + } + const presetLoader = container.resolve(PresetLoader); const preset = await presetLoader.load(presetName); diff --git a/src/features/IndexConfigurationProvider/IndexConfigurationProvider.ts b/src/features/IndexConfigurationProvider/IndexConfigurationProvider.ts new file mode 100644 index 00000000..b2612c75 --- /dev/null +++ b/src/features/IndexConfigurationProvider/IndexConfigurationProvider.ts @@ -0,0 +1,16 @@ +import { IndexConfigurationProvider as IndexConfigurationProviderAbstraction } from "./abstractions/IndexConfigurationProvider.ts"; + +class IndexConfigurationProviderImpl implements IndexConfigurationProviderAbstraction.Interface { + public getConfiguration( + _indexName: string, + base: IndexConfigurationProviderAbstraction.Configuration + ): IndexConfigurationProviderAbstraction.Configuration { + return base; + } +} + +export const IndexConfigurationProvider = + IndexConfigurationProviderAbstraction.createImplementation({ + implementation: IndexConfigurationProviderImpl, + dependencies: [] + }); diff --git a/src/features/IndexConfigurationProvider/IndexConfigurationResolver.ts b/src/features/IndexConfigurationProvider/IndexConfigurationResolver.ts new file mode 100644 index 00000000..841707fb --- /dev/null +++ b/src/features/IndexConfigurationProvider/IndexConfigurationResolver.ts @@ -0,0 +1,26 @@ +import { getBaseConfiguration } from "@webiny/api-opensearch/indexConfiguration/index.js"; +import { IndexConfigurationProvider } from "./abstractions/IndexConfigurationProvider.ts"; +import { IndexConfigurationResolver as IndexConfigurationResolverAbstraction } from "./abstractions/IndexConfigurationResolver.ts"; + +class IndexConfigurationResolverImpl implements IndexConfigurationResolverAbstraction.Interface { + public constructor(private readonly provider: IndexConfigurationProvider.Interface) {} + + public resolve(indexName: string): IndexConfigurationProvider.Configuration { + const base = this.getBaseConfiguration(); + return this.provider.getConfiguration(indexName, base); + } + + private getBaseConfiguration(): IndexConfigurationProvider.Configuration { + const baseConfig = getBaseConfiguration(); + return structuredClone({ + mappings: baseConfig.mappings, + settings: baseConfig.settings + }); + } +} + +export const IndexConfigurationResolver = + IndexConfigurationResolverAbstraction.createImplementation({ + implementation: IndexConfigurationResolverImpl, + dependencies: [IndexConfigurationProvider] + }); diff --git a/src/features/IndexConfigurationProvider/abstractions/IndexConfigurationProvider.ts b/src/features/IndexConfigurationProvider/abstractions/IndexConfigurationProvider.ts new file mode 100644 index 00000000..3c52c869 --- /dev/null +++ b/src/features/IndexConfigurationProvider/abstractions/IndexConfigurationProvider.ts @@ -0,0 +1,20 @@ +import type { OpenSearchIndexRequestBody } from "@webiny/api-opensearch/types.js"; +import { createAbstraction } from "~/base/index.ts"; + +interface IndexConfiguration { + mappings?: OpenSearchIndexRequestBody["mappings"]; + settings?: OpenSearchIndexRequestBody["settings"]; +} + +interface IIndexConfigurationProvider { + getConfiguration(indexName: string, base: IndexConfiguration): IndexConfiguration; +} + +export const IndexConfigurationProvider = createAbstraction( + "Core/IndexConfigurationProvider" +); + +export namespace IndexConfigurationProvider { + export type Interface = IIndexConfigurationProvider; + export type Configuration = IndexConfiguration; +} diff --git a/src/features/IndexConfigurationProvider/abstractions/IndexConfigurationResolver.ts b/src/features/IndexConfigurationProvider/abstractions/IndexConfigurationResolver.ts new file mode 100644 index 00000000..c2c5ccc2 --- /dev/null +++ b/src/features/IndexConfigurationProvider/abstractions/IndexConfigurationResolver.ts @@ -0,0 +1,14 @@ +import { createAbstraction } from "~/base/index.ts"; +import type { IndexConfigurationProvider } from "./IndexConfigurationProvider.ts"; + +interface IIndexConfigurationResolver { + resolve(indexName: string): IndexConfigurationProvider.Configuration; +} + +export const IndexConfigurationResolver = createAbstraction( + "Core/IndexConfigurationResolver" +); + +export namespace IndexConfigurationResolver { + export type Interface = IIndexConfigurationResolver; +} diff --git a/src/features/IndexConfigurationProvider/abstractions/index.ts b/src/features/IndexConfigurationProvider/abstractions/index.ts new file mode 100644 index 00000000..c2167140 --- /dev/null +++ b/src/features/IndexConfigurationProvider/abstractions/index.ts @@ -0,0 +1,2 @@ +export { IndexConfigurationProvider } from "./IndexConfigurationProvider.ts"; +export { IndexConfigurationResolver } from "./IndexConfigurationResolver.ts"; diff --git a/src/features/IndexConfigurationProvider/feature.ts b/src/features/IndexConfigurationProvider/feature.ts new file mode 100644 index 00000000..26c68465 --- /dev/null +++ b/src/features/IndexConfigurationProvider/feature.ts @@ -0,0 +1,11 @@ +import { createFeature } from "~/base/index.ts"; +import { IndexConfigurationProvider } from "./IndexConfigurationProvider.ts"; +import { IndexConfigurationResolver } from "./IndexConfigurationResolver.ts"; + +export const IndexConfigurationProviderFeature = createFeature({ + name: "Core/IndexConfigurationProviderFeature", + register(container) { + container.register(IndexConfigurationProvider).inSingletonScope(); + container.register(IndexConfigurationResolver).inSingletonScope(); + } +}); diff --git a/src/features/IndexConfigurationProvider/index.ts b/src/features/IndexConfigurationProvider/index.ts new file mode 100644 index 00000000..81664e1a --- /dev/null +++ b/src/features/IndexConfigurationProvider/index.ts @@ -0,0 +1,3 @@ +export { IndexConfigurationProvider } from "./abstractions/IndexConfigurationProvider.ts"; +export { IndexConfigurationResolver } from "./abstractions/IndexConfigurationResolver.ts"; +export { IndexConfigurationProviderFeature } from "./feature.ts"; diff --git a/src/features/MigrationConfig/schemas/shared.schema.ts b/src/features/MigrationConfig/schemas/shared.schema.ts index 5d1ccd1b..c1685929 100644 --- a/src/features/MigrationConfig/schemas/shared.schema.ts +++ b/src/features/MigrationConfig/schemas/shared.schema.ts @@ -1,5 +1,8 @@ +import type { Container } from "@webiny/di"; import { z } from "zod"; +export type RegisterFn = (container: Container) => void | Promise; + /** * Non-empty string that trims whitespace before validating. Catches the * common copy-paste mistake of a trailing/leading space — which AWS @@ -47,6 +50,12 @@ export const credentialsOrProviderSchema = z.union([ }) ]); +export const registerSchema = z + .custom(val => typeof val === "function", { + message: "register must be a function that receives a Container" + }) + .optional(); + export const pipelineSettingsSchema = z .object({ segments: z.number().int().positive().optional(), diff --git a/src/features/MigrationConfig/schemas/unified.schema.ts b/src/features/MigrationConfig/schemas/unified.schema.ts index 936ead0a..3b807074 100644 --- a/src/features/MigrationConfig/schemas/unified.schema.ts +++ b/src/features/MigrationConfig/schemas/unified.schema.ts @@ -3,6 +3,7 @@ import { credentialsOrProviderSchema, debugSettingsSchema, pipelineSettingsSchema, + registerSchema, trimmedString, tuningSchema } from "./shared.schema.ts"; @@ -60,6 +61,7 @@ export const unifiedTransferInputSchema = z source: sourceSchema, target: targetSchema, pipeline: pipelineSettingsSchema, + register: registerSchema, tuning: tuningSchema, debug: debugSettingsSchema, fileUrls: fileUrlsSchema diff --git a/src/features/OsProcessor/OsProcessor.ts b/src/features/OsProcessor/OsProcessor.ts index 35308d1d..3d090f6c 100644 --- a/src/features/OsProcessor/OsProcessor.ts +++ b/src/features/OsProcessor/OsProcessor.ts @@ -1,7 +1,7 @@ import { join } from "node:path"; import { Container } from "@webiny/di"; -import { getBaseConfiguration } from "@webiny/api-opensearch/indexConfiguration/index.js"; -import { isRetryableAwsError, ContainerToken } from "~/base/index.ts"; +import { ContainerToken, isRetryableAwsError } from "~/base/index.ts"; +import { IndexConfigurationResolver } from "~/features/IndexConfigurationProvider/abstractions/IndexConfigurationResolver.ts"; import { AccessCheck, Processor } from "~/domain/pipeline/abstractions/Processor.ts"; import { DdbExecutor } from "~/features/DdbExecutor/abstractions/DdbExecutor.ts"; import { @@ -58,7 +58,8 @@ class OsProcessorImpl implements Processor.Interface< private readonly dirTool: DirectoryTool.Interface, private readonly fileTool: FileTool.Interface, private readonly sourceDb: SourceDynamoDbClient.Interface, - private readonly targetDb: TargetDynamoDbClient.Interface + private readonly targetDb: TargetDynamoDbClient.Interface, + private readonly indexConfigurationResolver: IndexConfigurationResolver.Interface ) {} private get osClient(): OpenSearchClient.Interface { @@ -205,16 +206,22 @@ class OsProcessorImpl implements Processor.Interface< ? current.refreshInterval : DEFAULT_REFRESH_INTERVAL; + const resolved = this.indexConfigurationResolver.resolve(indexName); + const resolvedIndexSettings = resolved.settings?.index ?? {}; + try { await this.osClient.putIndexSettings(indexName, { - index: { refresh_interval: DISABLED_REFRESH_INTERVAL } + index: { + ...resolvedIndexSettings, + refresh_interval: DISABLED_REFRESH_INTERVAL + } }); this.logger.info( `Disabled refresh on existing index: ${indexName} (was: ${originalRefresh})` ); } catch (settingsError) { this.logger.warn( - `Failed to disable refresh on index: ${indexName}. Continuing. Error: ${settingsError}` + `Failed to update settings on index: ${indexName}. Continuing. Error: ${settingsError}` ); } @@ -223,11 +230,15 @@ class OsProcessorImpl implements Processor.Interface< private async createNewIndex(indexName: string): Promise { try { - const baseConfig = getBaseConfiguration(); + const resolved = this.indexConfigurationResolver.resolve(indexName); + const resolvedIndexSettings = + (resolved.settings?.index as Record | undefined) ?? {}; + await this.osClient.createIndex(indexName, { - mappings: baseConfig.mappings, + mappings: resolved.mappings, settings: { index: { + ...resolvedIndexSettings, refresh_interval: DISABLED_REFRESH_INTERVAL } } @@ -304,6 +315,7 @@ export const OsProcessor = Processor.createImplementation({ DirectoryTool, FileTool, SourceDynamoDbClient, - TargetDynamoDbClient + TargetDynamoDbClient, + IndexConfigurationResolver ] }); diff --git a/src/index.ts b/src/index.ts index 352be2e0..562c56b1 100644 --- a/src/index.ts +++ b/src/index.ts @@ -100,6 +100,17 @@ export type { // Transformer type shape for custom transformers. export type { Transformer } from "./domain/pipeline/abstractions/Transformer.ts"; +// IndexConfigurationProvider — override to customize OS index mappings/settings. +export { IndexConfigurationProvider } from "./features/IndexConfigurationProvider/index.ts"; + +// ModelProvider — override to customize CMS model loading. +export { ModelProvider } from "./features/ModelProvider/index.ts"; + +// Lifecycle hooks — register additional hooks via config.register. +// Hooks use { multiple: true } so registering adds to the list, not replaces. +export { BeforeTransferHook, AfterTransferHook } from "./features/TransferLifecycle/index.ts"; +export { BeforeLoadPresetHook, AfterLoadPresetHook } from "./features/PresetLifecycle/index.ts"; + // Pipeline-builder-factory helper type used when typing factory input // (NonEmptyArray<...>). export type { NonEmptyArray } from "./features/PipelineBuilderFactory/index.ts"; diff --git a/templates/internal-project/config.ts b/templates/internal-project/config.ts index 74ce67fb..c2ea3149 100644 --- a/templates/internal-project/config.ts +++ b/templates/internal-project/config.ts @@ -50,6 +50,10 @@ export default createConfig({ modelsDir: fromEnv("MODELS_DIR", "./models"), presetsDir: "./presets" }, + // register: async (container) => { + // // Register custom DI services before the preset runs, e.g.: + // // container.register(MyCustomProcessor); + // }, tuning: { flushEvery: numberFromEnv("FLUSH_EVERY", 500) } diff --git a/yarn.lock b/yarn.lock index e5a1bb03..48a27633 100644 --- a/yarn.lock +++ b/yarn.lock @@ -2553,15 +2553,15 @@ __metadata: linkType: hard "@csstools/css-color-parser@npm:^4.1.0": - version: 4.1.3 - resolution: "@csstools/css-color-parser@npm:4.1.3" + version: 4.1.7 + resolution: "@csstools/css-color-parser@npm:4.1.7" dependencies: "@csstools/color-helpers": "npm:^6.0.2" "@csstools/css-calc": "npm:^3.2.1" peerDependencies: "@csstools/css-parser-algorithms": ^4.0.0 "@csstools/css-tokenizer": ^4.0.0 - checksum: 10/8fc39d3a7d91208dbd03b5976ed1305518e6bb4d4c55d9be7d8365449e0c8d31f3b8ce7a5e741413745d98445b7c8753110dee41efd2ea604cb1bd35426d488d + checksum: 10/c7d351048bf1ce079afe3cd8e8a9092a498e9a229ace5eda7cf5b22badcaaf5ea5f34249307cab02316b534ee67abd4223ac9dd4ba0adf1ec50fd03993388d3b languageName: node linkType: hard @@ -7540,8 +7540,8 @@ __metadata: linkType: hard "@rollup/rollup-linux-x64-gnu@npm:^4.24.0": - version: 4.61.1 - resolution: "@rollup/rollup-linux-x64-gnu@npm:4.61.1" + version: 4.62.0 + resolution: "@rollup/rollup-linux-x64-gnu@npm:4.62.0" conditions: os=linux & cpu=x64 & libc=glibc languageName: node linkType: hard @@ -7564,8 +7564,8 @@ __metadata: linkType: hard "@rsbuild/core@npm:~2.0.6": - version: 2.0.12 - resolution: "@rsbuild/core@npm:2.0.12" + version: 2.0.14 + resolution: "@rsbuild/core@npm:2.0.14" dependencies: "@rspack/core": "npm:~2.0.8" "@swc/helpers": "npm:^0.5.23" @@ -7576,7 +7576,7 @@ __metadata: optional: true bin: rsbuild: ./bin/rsbuild.js - checksum: 10/bc7dfef1ed8ee01fe5215da0dd5ffcd6a7b266e54ffca5f41dc9aba42f568ac620631e33ba08fb883cd5edde9be1db619d5a98ff723e1803a03ff7461b649f50 + checksum: 10/884e6060e5edfb9edb6306bcecf945b92202228cd8f5b49b0cf9d905d969dfc2566e34206441ad2d0ac30b90b3b904bf686f510a87cf1559b1172ed57c554750 languageName: node linkType: hard @@ -9113,12 +9113,12 @@ __metadata: languageName: node linkType: hard -"@vitest/coverage-v8@npm:4.1.8": - version: 4.1.8 - resolution: "@vitest/coverage-v8@npm:4.1.8" +"@vitest/coverage-v8@npm:^4.1.9": + version: 4.1.9 + resolution: "@vitest/coverage-v8@npm:4.1.9" dependencies: "@bcoe/v8-coverage": "npm:^1.0.2" - "@vitest/utils": "npm:4.1.8" + "@vitest/utils": "npm:4.1.9" ast-v8-to-istanbul: "npm:^1.0.0" istanbul-lib-coverage: "npm:^3.2.2" istanbul-lib-report: "npm:^3.0.1" @@ -9128,34 +9128,34 @@ __metadata: std-env: "npm:^4.0.0-rc.1" tinyrainbow: "npm:^3.1.0" peerDependencies: - "@vitest/browser": 4.1.8 - vitest: 4.1.8 + "@vitest/browser": 4.1.9 + vitest: 4.1.9 peerDependenciesMeta: "@vitest/browser": optional: true - checksum: 10/08d9ea65ca4cc007a1f1cdc85ea36d51bfa91a9b2f0e9ad27436b777629b4138e33dba2f68c8e68b01343310bf9d5624ad1d6d24553a5b289b66da51561259eb + checksum: 10/1f236e17336973868aa6e7662b863b1c519d07840107daab3465429652741fc1f8a8988d1b7b28b8cfa883c7280f7479927a85e56c7874a0ddc5cc8ceda25cd6 languageName: node linkType: hard -"@vitest/expect@npm:4.1.8": - version: 4.1.8 - resolution: "@vitest/expect@npm:4.1.8" +"@vitest/expect@npm:4.1.9": + version: 4.1.9 + resolution: "@vitest/expect@npm:4.1.9" dependencies: "@standard-schema/spec": "npm:^1.1.0" "@types/chai": "npm:^5.2.2" - "@vitest/spy": "npm:4.1.8" - "@vitest/utils": "npm:4.1.8" + "@vitest/spy": "npm:4.1.9" + "@vitest/utils": "npm:4.1.9" chai: "npm:^6.2.2" tinyrainbow: "npm:^3.1.0" - checksum: 10/cb7d78e250ec77b7e180ac3e5f543501488c69b237d7ed97ffe9196c5e946b0e4a37be05a2ec38af7ce7750c1a98286480acdd247286a29c239b08a13b085d4b + checksum: 10/aba1a06cd28199f9c861d97797b014c0584fa6f6197e78345da0db5f74914d47f18958bb848658e889ca44452aa61e07ae851c16ea7b2175afd50d649dd4ed8c languageName: node linkType: hard -"@vitest/mocker@npm:4.1.8": - version: 4.1.8 - resolution: "@vitest/mocker@npm:4.1.8" +"@vitest/mocker@npm:4.1.9": + version: 4.1.9 + resolution: "@vitest/mocker@npm:4.1.9" dependencies: - "@vitest/spy": "npm:4.1.8" + "@vitest/spy": "npm:4.1.9" estree-walker: "npm:^3.0.3" magic-string: "npm:^0.30.21" peerDependencies: @@ -9166,56 +9166,56 @@ __metadata: optional: true vite: optional: true - checksum: 10/fc977703b07d950aa170bafdef988bc7ba88f0a80159d1563ce95696763729ec1f6d015012aad36cf4e1b522d327b205292c56d76692d2a9f72285d694ed3cba + checksum: 10/3e35ff3e2ecbdfbcae598e9c5c83978dd5f0cf3b16df37cf947c80faabce797ab275ca2075c3bb8ca85f595f3070267f93cb6798bbe415f1af2698f51833974c languageName: node linkType: hard -"@vitest/pretty-format@npm:4.1.8": - version: 4.1.8 - resolution: "@vitest/pretty-format@npm:4.1.8" +"@vitest/pretty-format@npm:4.1.9": + version: 4.1.9 + resolution: "@vitest/pretty-format@npm:4.1.9" dependencies: tinyrainbow: "npm:^3.1.0" - checksum: 10/56a4b685cdf9f2e9708025f17dab8c0fa990ab06e5b38606a1ddde52a09830a099843da6a1b127ee48217ab023bad7bd23c49eb4969d77dff07df363fad0bb0e + checksum: 10/52512b300c000594c54bebbbfe31fab39e416a35d3686e2c46bc8e48ef8476d32306605f7736139608c3962943e0d22790dc15a3e6b1ffa436143d31f743a7c8 languageName: node linkType: hard -"@vitest/runner@npm:4.1.8": - version: 4.1.8 - resolution: "@vitest/runner@npm:4.1.8" +"@vitest/runner@npm:4.1.9": + version: 4.1.9 + resolution: "@vitest/runner@npm:4.1.9" dependencies: - "@vitest/utils": "npm:4.1.8" + "@vitest/utils": "npm:4.1.9" pathe: "npm:^2.0.3" - checksum: 10/278d1482123877343731b3bb822d0280af928252ee263aab73ca189c39de3bb767ce715581870b2e1eb408f7cba01106a6989406cb2ada1332f181912558a3c1 + checksum: 10/52e4e16e627faa62676f17683e570f505d58d2ce0ef421a3ae60e70c0ec5606d4af090fa6c7d5717d6e949f4401d6357b1f69cf06e52a5455a0ad9c9040268c0 languageName: node linkType: hard -"@vitest/snapshot@npm:4.1.8": - version: 4.1.8 - resolution: "@vitest/snapshot@npm:4.1.8" +"@vitest/snapshot@npm:4.1.9": + version: 4.1.9 + resolution: "@vitest/snapshot@npm:4.1.9" dependencies: - "@vitest/pretty-format": "npm:4.1.8" - "@vitest/utils": "npm:4.1.8" + "@vitest/pretty-format": "npm:4.1.9" + "@vitest/utils": "npm:4.1.9" magic-string: "npm:^0.30.21" pathe: "npm:^2.0.3" - checksum: 10/162ca0eccb72db02081b04307d21ac8d14c8fcd4a840872459274f589b1665f108bd4119dff19d5a2150a0e26b90531791ebec7ee74f0c2c5285b491cebbcfcb + checksum: 10/c83349b1ad08d48284c1d3393168a7b7faffd24ace1ef337751a568dad322d83b0f9bc29378a4a60379cf2a13a268092b1d802936d6adb1ca28859f02dad8b87 languageName: node linkType: hard -"@vitest/spy@npm:4.1.8": - version: 4.1.8 - resolution: "@vitest/spy@npm:4.1.8" - checksum: 10/53e948d8f5e229e969e704dc8a54fd42ad715b2b18f401592f4bba97dcf33bd4cf01d11af577d4efe42dc2d90c9e6574ec991531fd8f1bdfee916a1dd0828547 +"@vitest/spy@npm:4.1.9": + version: 4.1.9 + resolution: "@vitest/spy@npm:4.1.9" + checksum: 10/8b8e42cc8e4b20d29bd8b312f34b9dbf2e20d4b4cdc24e3bcf6fd4d3b1f49e8924636d2730cca3946fbb45de893dfb531c77b832eb853c2624fdc2b800444e75 languageName: node linkType: hard -"@vitest/utils@npm:4.1.8": - version: 4.1.8 - resolution: "@vitest/utils@npm:4.1.8" +"@vitest/utils@npm:4.1.9": + version: 4.1.9 + resolution: "@vitest/utils@npm:4.1.9" dependencies: - "@vitest/pretty-format": "npm:4.1.8" + "@vitest/pretty-format": "npm:4.1.9" convert-source-map: "npm:^2.0.0" tinyrainbow: "npm:^3.1.0" - checksum: 10/13250b9e7825d425cc9a3d22aeb2e8d117c93e96a192138e93d76bfe7d5a391ab3888c5aa9e0394b0314bdff41e441ad7a32b0c0caa00cd202223b88087dcc78 + checksum: 10/78f5969fc09b1a95fda9dadd37e84a3a6ead35f66af15ad3b792eef35f80407047803e7afd53df86a8d794f59bf25ffbdc4146099140a3d5f9b51ea061bf2308 languageName: node linkType: hard @@ -9626,7 +9626,7 @@ __metadata: "@types/jsdom": "npm:^28.0.3" "@types/node": "npm:^24.13.2" "@types/yargs": "npm:^17.0.35" - "@vitest/coverage-v8": "npm:4.1.8" + "@vitest/coverage-v8": "npm:^4.1.9" "@webiny/api-headless-cms-ddb-es": "npm:^6.4.2" "@webiny/api-opensearch": "npm:^6.4.2" "@webiny/aws-sdk": "npm:^6.4.2" @@ -9648,7 +9648,7 @@ __metadata: sharp: "npm:^0.35.1" tsx: "npm:^4.22.4" typescript: "npm:^6.0.3" - vitest: "npm:^4.1.8" + vitest: "npm:^4.1.9" yargs: "npm:^18.0.0" zod: "npm:^4.4.3" bin: @@ -16716,13 +16716,13 @@ __metadata: linkType: hard "regjsparser@npm:^0.13.0": - version: 0.13.1 - resolution: "regjsparser@npm:0.13.1" + version: 0.13.2 + resolution: "regjsparser@npm:0.13.2" dependencies: jsesc: "npm:~3.1.0" bin: regjsparser: bin/parser - checksum: 10/3383e9dab8bc8cd09efcd9538191b1e194b1921438ca69fce833d1a447d0625635229464cbc6cb03f33e5d342f2d343e2738fdac9132e2470bca621e480c02ec + checksum: 10/291aecbd47371cee347a96c47ccaae729ba50b7b2cb2a5de7e088e2ab835fe133569422f06ae28f5ff0830ac03f3196a35ba493f23ecda086d82e3e326f14074 languageName: node linkType: hard @@ -18824,17 +18824,17 @@ __metadata: languageName: node linkType: hard -"vitest@npm:^4.1.8": - version: 4.1.8 - resolution: "vitest@npm:4.1.8" +"vitest@npm:^4.1.9": + version: 4.1.9 + resolution: "vitest@npm:4.1.9" dependencies: - "@vitest/expect": "npm:4.1.8" - "@vitest/mocker": "npm:4.1.8" - "@vitest/pretty-format": "npm:4.1.8" - "@vitest/runner": "npm:4.1.8" - "@vitest/snapshot": "npm:4.1.8" - "@vitest/spy": "npm:4.1.8" - "@vitest/utils": "npm:4.1.8" + "@vitest/expect": "npm:4.1.9" + "@vitest/mocker": "npm:4.1.9" + "@vitest/pretty-format": "npm:4.1.9" + "@vitest/runner": "npm:4.1.9" + "@vitest/snapshot": "npm:4.1.9" + "@vitest/spy": "npm:4.1.9" + "@vitest/utils": "npm:4.1.9" es-module-lexer: "npm:^2.0.0" expect-type: "npm:^1.3.0" magic-string: "npm:^0.30.21" @@ -18852,12 +18852,12 @@ __metadata: "@edge-runtime/vm": "*" "@opentelemetry/api": ^1.9.0 "@types/node": ^20.0.0 || ^22.0.0 || >=24.0.0 - "@vitest/browser-playwright": 4.1.8 - "@vitest/browser-preview": 4.1.8 - "@vitest/browser-webdriverio": 4.1.8 - "@vitest/coverage-istanbul": 4.1.8 - "@vitest/coverage-v8": 4.1.8 - "@vitest/ui": 4.1.8 + "@vitest/browser-playwright": 4.1.9 + "@vitest/browser-preview": 4.1.9 + "@vitest/browser-webdriverio": 4.1.9 + "@vitest/coverage-istanbul": 4.1.9 + "@vitest/coverage-v8": 4.1.9 + "@vitest/ui": 4.1.9 happy-dom: "*" jsdom: "*" vite: ^6.0.0 || ^7.0.0 || ^8.0.0 @@ -18887,8 +18887,8 @@ __metadata: vite: optional: false bin: - vitest: vitest.mjs - checksum: 10/b9f1308436717da9558b36e149cac6bab8e3730aa7e90b49f9d7a84ba853e353d8afba7d406dc0abec731fb2a9ea9e92b89aba06b94b1a2802203048b43468af + vitest: ./vitest.mjs + checksum: 10/64f9d1a0aae92c493c39822ecae8ec5b5a336fc27166f776d08c01ae79ef1ec5485a1826ef1451bb05df2edaae109894125c2ecceadaa56c17be2690f66f9758 languageName: node linkType: hard