diff --git a/docs/adr/42235-prioritize-engine-typo-errors-before-schema-validation.md b/docs/adr/42235-prioritize-engine-typo-errors-before-schema-validation.md new file mode 100644 index 00000000000..3d3eac4bdd2 --- /dev/null +++ b/docs/adr/42235-prioritize-engine-typo-errors-before-schema-validation.md @@ -0,0 +1,48 @@ +# ADR-42235: Prioritize Engine Typo Errors Before Schema Validation + +**Date**: 2026-06-29 +**Status**: Draft +**Deciders**: pelikhan, copilot-swe-agent + +--- + +### Context + +The workflow compiler validates frontmatter in two sequential stages: JSON schema validation (which checks field names and value types) followed by engine-specific validation. When a workflow file contains both an engine typo (e.g., `engine: copiilot`) and an unrelated schema violation (e.g., an unknown field), the schema validator runs first and its generic error shadows the more actionable "invalid engine" error with the "Did you mean: copilot?" suggestion. The user receives a confusing, misleading error that points to an unrelated problem instead of the root cause. Additionally, when `engine` holds a non-string value (e.g., `engine: 123`), the schema error reported a duplicate `(line N, col N)` location in the message body, producing redundant noise in the output. + +### Decision + +We will pre-validate the `engine` field as a string value before the main JSON schema validation and import resolution steps. If the engine value is a non-empty string that is not a recognized engine name, the compiler will immediately return an error pointing to the `engine` field's source location with the existing suggestion text (e.g., "Did you mean: copilot?"), short-circuiting all further validation. This is implemented as `validateStringEngineBeforeSchema`, called from both `parseFrontmatterSection` and `ParseWorkflowString`, ensuring consistent behaviour across file-based and in-memory compilation paths. + +### Alternatives Considered + +#### Alternative 1: Post-process and reorder all collected errors + +Collect all validation errors from schema validation, import resolution, and engine validation, then sort or filter them so engine errors are promoted to first position before returning to the caller. + +This was not chosen because it requires a full validation pass before any error can be surfaced, increases complexity by adding an error-sorting/ranking layer, and risks silently dropping or interleaving errors that depend on ordering guarantees elsewhere in the pipeline. + +#### Alternative 2: Encode engine validation inside the JSON schema as an enum constraint + +Add the list of valid engine names as an `enum` in the frontmatter JSON schema so that schema validation itself rejects invalid engine values, producing a single unified validation pass. + +This was not chosen because it would lose the rich, tailored error message ("Did you mean: copilot?" with fuzzy matching) that the existing `getAgenticEngine` error path provides, and would require the schema to be regenerated every time a new engine is added, creating a maintenance coupling between the schema artifact and the engine registry. + +### Consequences + +#### Positive +- Engine typos always surface at the correct source line and column with the "Did you mean: X?" suggestion, regardless of what other errors are present in the file. +- Engine type errors (non-string values) now produce a single, clean error location without duplicate `(line N, col N)` fragments in the message body. +- The fix applies consistently across both compilation entry points (`CompileWorkflow` and `ParseWorkflowString`). + +#### Negative +- Every compilation of a workflow with a non-empty string `engine` field now incurs an extra `getAgenticEngine` lookup before schema validation — a minor performance overhead for valid workflows. +- The `engine` field now has a dual validation pathway: the pre-validation step handles string-typed typos, while the JSON schema still handles non-string types and missing values. These two paths must be kept in sync if the engine field contract changes. + +#### Neutral +- Two new regression tests are added to the compiler test suite covering engine-typo-before-schema-errors and engine-typo-before-import-errors scenarios; existing tests for the import-precedence case remain unchanged. +- The `validateStringEngineBeforeSchema` helper is defined on `*Compiler` and shares the existing `formatCompilerErrorWithContext` and `readSourceContextLines` infrastructure. + +--- + +*ADR created by [adr-writer agent]. Review and finalize before changing status from Draft to Accepted.* diff --git a/pkg/workflow/compiler_orchestrator_frontmatter.go b/pkg/workflow/compiler_orchestrator_frontmatter.go index 6b7063ef18b..61e708f4f1e 100644 --- a/pkg/workflow/compiler_orchestrator_frontmatter.go +++ b/pkg/workflow/compiler_orchestrator_frontmatter.go @@ -29,6 +29,44 @@ type frontmatterParseResult struct { redirectTarget string } +func (c *Compiler) validateEngineBeforeSchema( + cleanPath string, + content []byte, + result *parser.FrontmatterResult, + frontmatterForValidation map[string]any, +) error { + engineValue, ok := frontmatterForValidation["engine"].(string) + // Keep the empty-string default-engine behavior, but let whitespace-only values + // fall through to getAgenticEngine so they surface as invalid engine typos. + if !ok || engineValue == "" { + return nil + } + + if _, err := c.getAgenticEngine(engineValue); err != nil { + line := result.FieldLines["engine"] + if line == 0 { + line = findFrontmatterFieldLine(result.FrontmatterLines, result.FrontmatterStart, "engine") + } + if line == 0 { + line = 1 + } + + return formatCompilerErrorWithContext( + cleanPath, + line, + // Point to the field key for invalid string engine names so the location + // stays stable even when the specific invalid value changes. + 1, + "error", + err.Error(), + err, + readSourceContextLines(content, line), + ) + } + + return nil +} + // parseFrontmatterSection reads the workflow file and parses its frontmatter. // It returns a frontmatterParseResult containing the parsed data and validation information. // If the workflow is detected as a shared workflow (no 'on' field), isSharedWorkflow is set to true. @@ -135,6 +173,11 @@ func (c *Compiler) parseFrontmatterSection(markdownPath string) (*frontmatterPar return nil, errors.New("no markdown content found") } + if err := c.validateEngineBeforeSchema(cleanPath, content, result, frontmatterForValidation); err != nil { + orchestratorFrontmatterLog.Printf("String engine pre-validation failed: %v", err) + return nil, err + } + // Validate main workflow frontmatter contains only expected entries orchestratorFrontmatterLog.Printf("Validating main workflow frontmatter schema") if err := parser.ValidateMainWorkflowFrontmatterWithSchemaAndLocation(frontmatterForValidation, cleanPath); err != nil { diff --git a/pkg/workflow/compiler_string_api.go b/pkg/workflow/compiler_string_api.go index d88edded5be..2a322a7cca5 100644 --- a/pkg/workflow/compiler_string_api.go +++ b/pkg/workflow/compiler_string_api.go @@ -58,6 +58,7 @@ func (c *Compiler) ParseWorkflowString(content string, virtualPath string) (*Wor workflowLog.Printf("ParseWorkflowString: parsing %d bytes with virtual path %s", len(content), virtualPath) cleanPath := filepath.Clean(virtualPath) + contentBytes := []byte(content) // Store content so downstream code can use it instead of reading from disk. // Cleared in CompileToYAML after compilation completes. @@ -108,6 +109,11 @@ func (c *Compiler) ParseWorkflowString(content string, virtualPath string) (*Wor return nil, &SharedWorkflowError{Path: cleanPath} } + if err := c.validateEngineBeforeSchema(cleanPath, contentBytes, result, frontmatterForValidation); err != nil { + compilerStringAPILog.Printf("ParseWorkflowString: string engine pre-validation failed for %s", cleanPath) + return nil, err + } + // Validate frontmatter against schema if err := parser.ValidateMainWorkflowFrontmatterWithSchemaAndLocation(frontmatterForValidation, cleanPath); err != nil { compilerStringAPILog.Printf("ParseWorkflowString: schema validation failed for %s", cleanPath) @@ -119,7 +125,7 @@ func (c *Compiler) ParseWorkflowString(content string, virtualPath string) (*Wor // Build parse result to reuse the rest of the orchestrator pipeline parseResult := &frontmatterParseResult{ cleanPath: cleanPath, - content: []byte(content), + content: contentBytes, frontmatterResult: result, frontmatterForValidation: frontmatterForValidation, markdownDir: filepath.Dir(cleanPath), diff --git a/pkg/workflow/compiler_string_api_test.go b/pkg/workflow/compiler_string_api_test.go index b11ae2dbfb7..a73001a4fea 100644 --- a/pkg/workflow/compiler_string_api_test.go +++ b/pkg/workflow/compiler_string_api_test.go @@ -138,6 +138,55 @@ engine: copilot assert.NotNil(t, wd) } +func TestParseWorkflowString_InvalidEngineReportedBeforeSchemaErrors(t *testing.T) { + markdown := `--- +on: push +engine: copiilot +bogus-field: true +--- + +# Test +` + + compiler := NewCompiler( + WithNoEmit(true), + WithSkipValidation(true), + ) + + _, err := compiler.ParseWorkflowString(markdown, "virtual/workflow.md") + require.Error(t, err) + + errorStr := err.Error() + assert.Contains(t, errorStr, "invalid engine: copiilot") + assert.Contains(t, errorStr, "Did you mean: copilot?") + assert.NotContains(t, errorStr, "Unknown property: bogus-field") + assert.Contains(t, errorStr, "virtual/workflow.md:3:1: error:") +} + +func TestParseWorkflowString_WhitespaceOnlyEngineReportedBeforeSchemaErrors(t *testing.T) { + markdown := `--- +on: push +engine: " " +bogus-field: true +--- + +# Test +` + + compiler := NewCompiler( + WithNoEmit(true), + WithSkipValidation(true), + ) + + _, err := compiler.ParseWorkflowString(markdown, "virtual/workflow.md") + require.Error(t, err) + + errorStr := err.Error() + assert.Contains(t, errorStr, "invalid engine:") + assert.NotContains(t, errorStr, "Unknown property: bogus-field") + assert.Contains(t, errorStr, "virtual/workflow.md:3:1: error:") +} + func TestCompileToYAML_BasicCompilation(t *testing.T) { markdown := `--- name: compile-test diff --git a/pkg/workflow/compiler_yaml_test.go b/pkg/workflow/compiler_yaml_test.go index 72604963312..f07502f6df2 100644 --- a/pkg/workflow/compiler_yaml_test.go +++ b/pkg/workflow/compiler_yaml_test.go @@ -568,6 +568,43 @@ Content.`, } } +func TestEngineTypeValidationErrorUsesSingleSourceLocationAndSnippet(t *testing.T) { + tmpDir := testutil.TempDir(t, "engine-type-error-test") + + // Integer engine values bypass validateEngineBeforeSchema, so this verifies the + // schema-validation path still reports one authoritative location plus snippet. + content := `--- +on: push +name: test +engine: 123 +--- + +# Test +` + + testFile := filepath.Join(tmpDir, "test.md") + if err := os.WriteFile(testFile, []byte(content), 0644); err != nil { + t.Fatal(err) + } + + compiler := NewCompiler() + err := compiler.CompileWorkflow(testFile) + if err == nil { + t.Fatal("expected compilation to fail") + } + + errorStr := err.Error() + if !strings.Contains(errorStr, testFile+":4:8: error:") { + t.Errorf("error should point to engine value location in header, got: %s", errorStr) + } + if strings.Contains(errorStr, "(line ") || strings.Contains(errorStr, ", col ") { + t.Errorf("single schema failure should not repeat a second line/col location in the message body, got: %s", errorStr) + } + if !strings.Contains(errorStr, "4 | engine: 123") { + t.Errorf("error should include the engine source line snippet, got: %s", errorStr) + } +} + // TestInvalidEngineReportedBeforeImportErrors verifies that an invalid engine: value // is reported immediately, even when imports also fail. Previously the import error // would shadow the engine typo. @@ -621,6 +658,43 @@ Content.` } } +func TestInvalidEngineReportedBeforeSchemaErrors(t *testing.T) { + tmpDir := testutil.TempDir(t, "engine-before-schema-test") + + content := `--- +on: push +engine: copiilot +bogus-field: true +--- + +# Test +` + testFile := filepath.Join(tmpDir, "test.md") + if err := os.WriteFile(testFile, []byte(content), 0644); err != nil { + t.Fatal(err) + } + + compiler := NewCompiler() + err := compiler.CompileWorkflow(testFile) + if err == nil { + t.Fatal("expected compilation to fail") + } + + errorStr := err.Error() + if !strings.Contains(errorStr, "invalid engine: copiilot") { + t.Errorf("error should prioritize the invalid engine typo, got: %s", errorStr) + } + if !strings.Contains(errorStr, "Did you mean: copilot?") { + t.Errorf("error should include the closest engine suggestion, got: %s", errorStr) + } + if strings.Contains(errorStr, "Unknown property: bogus-field") { + t.Errorf("schema error should not shadow the invalid engine typo, got: %s", errorStr) + } + if !strings.Contains(errorStr, testFile+":3:1: error:") { + t.Errorf("error should point to the engine field location, got: %s", errorStr) + } +} + // TestImportNotFoundHint verifies that a tailored hint is shown when an import cannot be resolved. func TestImportNotFoundHint(t *testing.T) { tmpDir := testutil.TempDir(t, "import-hint-test")