Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
35 changes: 30 additions & 5 deletions src/skillspector/llm_analyzer_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@
from dataclasses import dataclass, field
from typing import Literal

from langchain_core.output_parsers import PydanticOutputParser
from pydantic import BaseModel, Field

from skillspector.llm_utils import get_chat_model
Expand Down Expand Up @@ -241,9 +242,21 @@ def __init__(self, base_prompt: str, model: str):
self.model = model
self._input_budget = get_max_input_tokens(model)
self._llm = get_chat_model(model=model)
self._structured_llm = (
self._llm.with_structured_output(self.response_schema) if self.response_schema else None
)
# Parse structured output from the text response rather than relying on
# the endpoint's native structured-output mode. ChatOpenAI defaults to
# method="json_schema" (strict response_format), which OpenAI-compatible
# endpoints are free to ignore -- they may wrap the JSON in markdown
# fences or surround it with prose. PydanticOutputParser uses langchain's
# fence-tolerant parser and supplies matching format instructions, so
# both strict and lenient endpoints work.
if self.response_schema:
self._parser = PydanticOutputParser(pydantic_object=self.response_schema)
self._structured_llm = self._llm | self._parser
self._format_instructions = self._parser.get_format_instructions()
else:
self._parser = None
self._structured_llm = None
self._format_instructions = ""

# -- Batching -----------------------------------------------------------

Expand Down Expand Up @@ -303,6 +316,18 @@ def get_batches(

# -- Prompt / parse -----------------------------------------------------

def _with_format(self, prompt: str) -> str:
"""Append schema format instructions for the structured-output chain.

Centralized here so both the default :meth:`build_prompt` and any
subclass override (e.g. the meta-analyzer) get the parser's format
guidance without each prompt template having to embed it. No-op when
running in raw-string mode (no ``response_schema``).
"""
if not self._format_instructions:
return prompt
return f"{prompt}\n\n{self._format_instructions}"

def build_prompt(self, batch: Batch, **kwargs: object) -> str:
"""Build the LLM prompt for a single batch.

Expand Down Expand Up @@ -353,7 +378,7 @@ def run_batches(
len(batch.findings),
)
if self._structured_llm:
response = self._structured_llm.invoke(prompt)
response = self._structured_llm.invoke(self._with_format(prompt))
else:
response = self._llm.invoke(prompt).content
logger.debug("LLM response for %s", batch.file_label)
Expand Down Expand Up @@ -388,7 +413,7 @@ async def _process(batch: Batch) -> tuple[Batch, list]:
len(batch.findings),
)
if self._structured_llm:
response = await self._structured_llm.ainvoke(prompt)
response = await self._structured_llm.ainvoke(self._with_format(prompt))
else:
response = (await self._llm.ainvoke(prompt)).content
logger.debug("LLM response for %s", batch.file_label)
Expand Down
118 changes: 89 additions & 29 deletions tests/nodes/test_llm_analyzer_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@
from unittest.mock import AsyncMock, MagicMock, patch

import pytest
from langchain_core.messages import AIMessage
from langchain_core.runnables import Runnable

from skillspector.llm_analyzer_base import (
Batch,
Expand Down Expand Up @@ -155,15 +157,77 @@ def test_chunk_label(self) -> None:


def _mock_get_chat_model(*_args, **_kwargs):
"""Return a mock ChatOpenAI that supports with_structured_output."""
mock_llm = MagicMock()
mock_llm.with_structured_output.return_value = MagicMock()
return mock_llm
"""Return a mock chat model for tests that override ``_structured_llm``."""
return MagicMock()


MOCK_PATCH_TARGET = "skillspector.llm_analyzer_base.get_chat_model"


class _FakeChatModel(Runnable):
"""Minimal Runnable that returns a fixed ``AIMessage`` content.

Stands in for an OpenAI-compatible endpoint that ignores
``response_format`` so the real ``PydanticOutputParser`` in the
structured-output chain is exercised end-to-end.
"""

def __init__(self, content: str):
self._content = content

def invoke(self, _input, _config=None, **_kwargs):
return AIMessage(content=self._content)

async def ainvoke(self, _input, _config=None, **_kwargs):
return AIMessage(content=self._content)


class TestStructuredOutputParsing:
"""Regression tests: structured chain tolerates non-strict endpoints.

OpenAI-compatible endpoints may ignore ``response_format`` and wrap the
JSON in markdown fences or surround it with prose. The chain must still
parse into the response schema rather than crashing the scan.
"""

MODEL = "nvidia/openai/gpt-oss-120b"

@pytest.mark.parametrize(
"content",
[
'{"findings": [{"rule_id": "R-1", "message": "m", "severity": "LOW", "start_line": 1}]}',
'```json\n{"findings": [{"rule_id": "R-1", "message": "m", "severity": "LOW", "start_line": 1}]}\n```',
'Here is the analysis:\n```json\n{"findings": [{"rule_id": "R-1", "message": "m", "severity": "LOW", "start_line": 1}]}\n```\nDone.',
],
)
@patch(MOCK_PATCH_TARGET, _mock_get_chat_model)
def test_parses_fenced_and_prose_wrapped_json(self, content: str) -> None:
analyzer = LLMAnalyzerBase(base_prompt="test", model=self.MODEL)
analyzer._structured_llm = _FakeChatModel(content) | analyzer._parser
results = analyzer.run_batches([Batch(file_path="x.py", content="code")])
_, findings = results[0]
assert len(findings) == 1
assert findings[0].rule_id == "R-1"

@patch(MOCK_PATCH_TARGET, _mock_get_chat_model)
def test_empty_findings_object_parses(self) -> None:
analyzer = LLMAnalyzerBase(base_prompt="test", model=self.MODEL)
analyzer._structured_llm = (
_FakeChatModel('```json\n{"findings": []}\n```') | analyzer._parser
)
results = analyzer.run_batches([Batch(file_path="x.py", content="code")])
_, findings = results[0]
assert findings == []

@patch(MOCK_PATCH_TARGET, _mock_get_chat_model)
def test_format_instructions_appended_to_prompt(self) -> None:
analyzer = LLMAnalyzerBase(base_prompt="test", model=self.MODEL)
assert analyzer._format_instructions
augmented = analyzer._with_format("PROMPT BODY")
assert augmented.startswith("PROMPT BODY")
assert analyzer._format_instructions in augmented


# ---------------------------------------------------------------------------
# number_lines
# ---------------------------------------------------------------------------
Expand Down Expand Up @@ -1096,30 +1160,30 @@ class TestLLMMetaAnalyzerRunBatches:
@patch(MOCK_PATCH_TARGET)
def test_run_batches_calls_structured_llm_per_batch(self, mock_get_model: MagicMock) -> None:
mock_llm = MagicMock()
mock_structured = MagicMock()
mock_get_model.return_value = mock_llm
mock_llm.with_structured_output.return_value = mock_structured
mock_structured.invoke.return_value = MetaAnalyzerResult(
findings=[
MetaAnalyzerFinding(
pattern_id="E1",
is_vulnerability=True,
confidence=0.9,
intent="malicious",
impact="high",
)
],
)

analyzer = LLMMetaAnalyzer(model=self.MODEL)
analyzer._structured_llm.invoke = MagicMock(
return_value=MetaAnalyzerResult(
findings=[
MetaAnalyzerFinding(
pattern_id="E1",
is_vulnerability=True,
confidence=0.9,
intent="malicious",
impact="high",
)
],
)
)
f1 = Finding(rule_id="E1", message="test", file="a.py", start_line=1)
f2 = Finding(rule_id="E2", message="test", file="b.py", start_line=1)
batches = [
Batch(file_path="a.py", content="code a", findings=[f1]),
Batch(file_path="b.py", content="code b", findings=[f2]),
]
results = analyzer.run_batches(batches, metadata_text="Name: skill")
assert mock_structured.invoke.call_count == 2
assert analyzer._structured_llm.invoke.call_count == 2
assert len(results) == 2

@patch(MOCK_PATCH_TARGET)
Expand All @@ -1140,10 +1204,10 @@ class TestLLMMetaAnalyzerARunBatches:
@patch(MOCK_PATCH_TARGET)
async def test_arun_batches_calls_ainvoke_per_batch(self, mock_get_model: MagicMock) -> None:
mock_llm = MagicMock()
mock_structured = MagicMock()
mock_get_model.return_value = mock_llm
mock_llm.with_structured_output.return_value = mock_structured
mock_structured.ainvoke = AsyncMock(

analyzer = LLMMetaAnalyzer(model=self.MODEL)
analyzer._structured_llm.ainvoke = AsyncMock(
return_value=MetaAnalyzerResult(
findings=[
MetaAnalyzerFinding(
Expand All @@ -1156,16 +1220,14 @@ async def test_arun_batches_calls_ainvoke_per_batch(self, mock_get_model: MagicM
],
)
)

analyzer = LLMMetaAnalyzer(model=self.MODEL)
f1 = Finding(rule_id="E1", message="test", file="a.py", start_line=1)
f2 = Finding(rule_id="E2", message="test", file="b.py", start_line=1)
batches = [
Batch(file_path="a.py", content="code a", findings=[f1]),
Batch(file_path="b.py", content="code b", findings=[f2]),
]
results = await analyzer.arun_batches(batches, metadata_text="Name: skill")
assert mock_structured.ainvoke.call_count == 2
assert analyzer._structured_llm.ainvoke.call_count == 2
assert len(results) == 2

@patch(MOCK_PATCH_TARGET)
Expand All @@ -1174,10 +1236,10 @@ async def test_arun_batches_results_compatible_with_apply_filter(
mock_get_model: MagicMock,
) -> None:
mock_llm = MagicMock()
mock_structured = MagicMock()
mock_get_model.return_value = mock_llm
mock_llm.with_structured_output.return_value = mock_structured
mock_structured.ainvoke = AsyncMock(

analyzer = LLMMetaAnalyzer(model=self.MODEL)
analyzer._structured_llm.ainvoke = AsyncMock(
return_value=MetaAnalyzerResult(
findings=[
MetaAnalyzerFinding(
Expand All @@ -1192,8 +1254,6 @@ async def test_arun_batches_results_compatible_with_apply_filter(
],
)
)

analyzer = LLMMetaAnalyzer(model=self.MODEL)
finding = Finding(rule_id="E1", message="test", file="a.py", start_line=1)
batches = [Batch(file_path="a.py", content="code", findings=[finding])]
batch_results = await analyzer.arun_batches(batches, metadata_text="")
Expand Down