From bbedccbe69f68eb737d50b0d8b3c81fc10885924 Mon Sep 17 00:00:00 2001
From: "github-actions[bot]"
<41898282+github-actions[bot]@users.noreply.github.com>
Date: Fri, 1 May 2026 11:24:02 -0700
Subject: [PATCH 1/8] chore: remove sandbox test scripts
---
tests/sandbox/test_shape.swift | 12 ------------
1 file changed, 12 deletions(-)
delete mode 100644 tests/sandbox/test_shape.swift
diff --git a/tests/sandbox/test_shape.swift b/tests/sandbox/test_shape.swift
deleted file mode 100644
index cb82d527..00000000
--- a/tests/sandbox/test_shape.swift
+++ /dev/null
@@ -1,12 +0,0 @@
-import Foundation
-import MLX
-
-let textEmbeds = MLXArray.zeros([1, 10, 4])
-let imageIndices = MLXArray([2, 3, 4])
-let imageFeatures = MLXArray.ones([1, 3, 4]) * 5.0
-
-var result = textEmbeds
-result[0..., imageIndices, 0...] = imageFeatures
-
-eval(result)
-print(result)
From a5bf26a006379dba04031699172622d24330ccb7 Mon Sep 17 00:00:00 2001
From: "github-actions[bot]"
<41898282+github-actions[bot]@users.noreply.github.com>
Date: Fri, 1 May 2026 11:24:02 -0700
Subject: [PATCH 2/8] test: add missing Context Window, Config Persistence, and
Server unit tests
---
.../ContextWindowCalculationTests.swift | 30 ++
.../GenerationConfigPersistenceTests.swift | 311 +++++++++++++
.../SwiftLMTests/SwiftBuddyServerTests.swift | 413 ++++++++++++++++++
3 files changed, 754 insertions(+)
create mode 100644 tests/SwiftBuddyTests/ContextWindowCalculationTests.swift
create mode 100644 tests/SwiftLMTests/GenerationConfigPersistenceTests.swift
create mode 100644 tests/SwiftLMTests/SwiftBuddyServerTests.swift
diff --git a/tests/SwiftBuddyTests/ContextWindowCalculationTests.swift b/tests/SwiftBuddyTests/ContextWindowCalculationTests.swift
new file mode 100644
index 00000000..a3710f43
--- /dev/null
+++ b/tests/SwiftBuddyTests/ContextWindowCalculationTests.swift
@@ -0,0 +1,30 @@
+import XCTest
+import MLX
+import MLXLMCommon
+@testable import MLXInferenceCore
+
+final class ContextWindowCalculationTests: XCTestCase {
+
+ @MainActor
+ func testContextTokensCalculation() async throws {
+ // Feature: Verify that tokens calculation accurately reflects the prompt cache window
+ // by evaluating the full size of the prepared tokens array, not just the batch shape.
+
+ let engine = InferenceEngine()
+
+ // Mock a scenario where userInput prepares a chat template with large history.
+ // We will directly instantiate LMInput and assert on its size.
+
+ let mockTokens = MLXArray(stride: 0, to: 512, by: 1)
+ // If tokenizer batches it, shape could be [1, 512].
+ let reshapedTokens = mockTokens.reshaped([1, 512])
+
+ // MLXLMCommon's LMInput struct
+ let lmInput = LMInput(tokens: reshapedTokens)
+
+ // Validate that using .size accurately captures the token count (512)
+ // rather than falling victim to the batch dimension .shape[0] which would be 1.
+ XCTAssertEqual(lmInput.text.tokens.shape[0], 1, "shape[0] captures the batch dimension, returning 1")
+ XCTAssertEqual(lmInput.text.tokens.size, 512, "size captures the total token count, resolving the context window bug")
+ }
+}
diff --git a/tests/SwiftLMTests/GenerationConfigPersistenceTests.swift b/tests/SwiftLMTests/GenerationConfigPersistenceTests.swift
new file mode 100644
index 00000000..97915313
--- /dev/null
+++ b/tests/SwiftLMTests/GenerationConfigPersistenceTests.swift
@@ -0,0 +1,311 @@
+// GenerationConfigPersistenceTests.swift — Regression tests for SwiftBuddy fixes
+//
+// Covers four independent fixes committed alongside Issue #97:
+// 1. GenerationConfig Codable + save/load persistence
+// 2. enable_thinking additionalContext wiring (thinking mode)
+// 3. /v1/chat/completions request parsing logic
+// 4. Server config propagation from persisted UserDefaults
+
+import XCTest
+import Foundation
+@testable import SwiftLM
+@testable import MLXInferenceCore
+
+final class GenerationConfigPersistenceTests: XCTestCase {
+
+ // Use an isolated UserDefaults suite so tests never touch the real suite
+ // and don't interfere with each other.
+ private var defaults: UserDefaults!
+ private let suiteName = "com.swiftlm.test.generationconfig.\(UUID().uuidString)"
+
+ override func setUp() {
+ super.setUp()
+ defaults = UserDefaults(suiteName: suiteName)!
+ defaults.removePersistentDomain(forName: suiteName)
+ }
+
+ override func tearDown() {
+ defaults.removePersistentDomain(forName: suiteName)
+ defaults = nil
+ super.tearDown()
+ }
+
+ // ═══════════════════════════════════════════════════════════════════
+ // MARK: - 1. GenerationConfig Codable conformance
+ // ═══════════════════════════════════════════════════════════════════
+
+ func testGenerationConfig_IsCodable() throws {
+ // The Codable conformance that was added must round-trip without loss.
+ let config = GenerationConfig(
+ maxTokens: 4096,
+ temperature: 0.75,
+ topP: 0.9,
+ topK: 40,
+ minP: 0.05,
+ repetitionPenalty: 1.1,
+ enableThinking: true,
+ prefillSize: 256,
+ kvBits: 4,
+ kvGroupSize: 32
+ )
+ let data = try JSONEncoder().encode(config)
+ let decoded = try JSONDecoder().decode(GenerationConfig.self, from: data)
+
+ XCTAssertEqual(decoded.maxTokens, 4096)
+ XCTAssertEqual(decoded.temperature, 0.75, accuracy: 1e-4)
+ XCTAssertEqual(decoded.topP, 0.9, accuracy: 1e-4)
+ XCTAssertEqual(decoded.topK, 40)
+ XCTAssertEqual(decoded.minP, 0.05, accuracy: 1e-4)
+ XCTAssertEqual(decoded.repetitionPenalty, 1.1, accuracy: 1e-4)
+ XCTAssertTrue(decoded.enableThinking)
+ XCTAssertEqual(decoded.prefillSize, 256)
+ XCTAssertEqual(decoded.kvBits, 4)
+ XCTAssertEqual(decoded.kvGroupSize, 32)
+ }
+
+ func testGenerationConfig_NilFieldsRoundTrip() throws {
+ // nil kvBits must survive the encode/decode cycle as nil, not 0.
+ let config = GenerationConfig(kvBits: nil)
+ let data = try JSONEncoder().encode(config)
+ let decoded = try JSONDecoder().decode(GenerationConfig.self, from: data)
+ XCTAssertNil(decoded.kvBits, "kvBits nil must survive round-trip")
+ }
+
+ func testGenerationConfig_DefaultValues() {
+ let config = GenerationConfig.default
+ XCTAssertEqual(config.maxTokens, 2048)
+ XCTAssertEqual(config.temperature, 0.6, accuracy: 1e-4)
+ XCTAssertEqual(config.topP, 1.0, accuracy: 1e-4)
+ XCTAssertEqual(config.topK, 50)
+ XCTAssertEqual(config.minP, 0.0, accuracy: 1e-4)
+ XCTAssertEqual(config.repetitionPenalty, 1.05, accuracy: 1e-4)
+ XCTAssertFalse(config.enableThinking, "Thinking must be OFF by default")
+ XCTAssertEqual(config.prefillSize, 512)
+ XCTAssertNil(config.kvBits)
+ XCTAssertEqual(config.kvGroupSize, 64)
+ }
+
+ // ═══════════════════════════════════════════════════════════════════
+ // MARK: - 2. UserDefaults persistence (save / load)
+ // ═══════════════════════════════════════════════════════════════════
+ // NOTE: GenerationConfig.save()/load() use UserDefaults.standard internally.
+ // These tests exercise the Codable round-trip via JSONEncoder/Decoder as a
+ // proxy for the persistence contract, isolating from the real suite.
+
+ func testGenerationConfig_SaveLoad_RoundTrip() throws {
+ // Simulate what save() encodes and what load() decodes.
+ let original = GenerationConfig(
+ maxTokens: 512, temperature: 0.3, enableThinking: true, kvBits: 8
+ )
+ let data = try JSONEncoder().encode(original)
+ let decoded = try JSONDecoder().decode(GenerationConfig.self, from: data)
+
+ XCTAssertEqual(decoded.maxTokens, 512)
+ XCTAssertEqual(decoded.temperature, 0.3, accuracy: 1e-4)
+ XCTAssertTrue(decoded.enableThinking)
+ XCTAssertEqual(decoded.kvBits, 8)
+ }
+
+ func testGenerationConfig_RestoredFields_PresentWithCorrectDefaults() throws {
+ // turboKV and streamExperts were restored as fully-wired fields:
+ // turboKV → per-request (sets KVCacheSimple.turboQuantEnabled)
+ // streamExperts → load-time (controls ExpertStreamingConfig activation)
+ // This test verifies they are present in the schema with correct defaults
+ // (both off by default, user opt-in).
+ let data = try JSONEncoder().encode(GenerationConfig.default)
+ let json = try XCTUnwrap(JSONSerialization.jsonObject(with: data) as? [String: Any])
+
+ // Both fields must be present in the encoded JSON
+ XCTAssertNotNil(json["turboKV"],
+ "turboKV must be present in GenerationConfig JSON — it is wired to KVCacheSimple.turboQuantEnabled")
+ XCTAssertNotNil(json["streamExperts"],
+ "streamExperts must be present in GenerationConfig JSON — it controls ExpertStreamingConfig at load time")
+
+ // Both must default to false (user must explicitly opt in)
+ XCTAssertEqual(json["turboKV"] as? Bool, false,
+ "turboKV default must be false — user opt-in for 100k+ context workloads")
+ XCTAssertEqual(json["streamExperts"] as? Bool, false,
+ "streamExperts default must be false — auto-enabled via isMoE for catalog MoE models")
+ }
+
+ func testGenerationConfig_Load_FallsBackToDefault_WhenNoStoredData() {
+ // load() with no stored data must return .default, not crash.
+ // We test this by ensuring no data is in a fresh suite.
+ let freshDefaults = UserDefaults(suiteName: "com.swiftlm.test.fresh.\(UUID().uuidString)")!
+ defer { freshDefaults.removePersistentDomain(forName: "com.swiftlm.test.fresh.\(UUID().uuidString)") }
+
+ // The static load() reads UserDefaults.standard, so we verify the
+ // fallback contract by checking that .default is a valid config.
+ let fallback = GenerationConfig.default
+ XCTAssertEqual(fallback.maxTokens, 2048, "Fallback must be .default")
+ XCTAssertFalse(fallback.enableThinking)
+ }
+
+ func testGenerationConfig_Save_ProducesDecodableJSON() throws {
+ // Verify save() produces data that JSONDecoder can re-read —
+ // i.e. the codec is symmetric and doesn't use unsupported types.
+ let config = GenerationConfig(temperature: 0.88, enableThinking: true)
+ let data = try JSONEncoder().encode(config)
+ XCTAssertFalse(data.isEmpty, "Encoded data must not be empty")
+ // Must be valid JSON
+ let json = try XCTUnwrap(try JSONSerialization.jsonObject(with: data) as? [String: Any])
+ XCTAssertEqual(json["enableThinking"] as? Bool, true)
+ }
+
+ // ═══════════════════════════════════════════════════════════════════
+ // MARK: - 3. Thinking mode — enable_thinking additionalContext
+ // ═══════════════════════════════════════════════════════════════════
+ // The engine now passes `additionalContext: ["enable_thinking": Bool]`
+ // to UserInput so Qwen3's Jinja template actually generates blocks.
+ // We test the mapping logic in isolation by verifying the config flag
+ // drives the correct boolean value.
+
+ func testThinkingConfig_EnabledWhenFlagIsTrue() {
+ let config = GenerationConfig(enableThinking: true)
+ // Replicate the production mapping from InferenceEngine.generate()
+ let additionalContext: [String: Any] = config.enableThinking
+ ? ["enable_thinking": true]
+ : ["enable_thinking": false]
+ XCTAssertEqual(additionalContext["enable_thinking"] as? Bool, true,
+ "enable_thinking must be true when config.enableThinking is true")
+ }
+
+ func testThinkingConfig_DisabledWhenFlagIsFalse() {
+ let config = GenerationConfig(enableThinking: false)
+ let additionalContext: [String: Any] = config.enableThinking
+ ? ["enable_thinking": true]
+ : ["enable_thinking": false]
+ XCTAssertEqual(additionalContext["enable_thinking"] as? Bool, false,
+ "enable_thinking must be false when config.enableThinking is false")
+ }
+
+ func testThinkingConfig_DefaultIsDisabled() {
+ // Prevents a future change to the default from silently enabling
+ // thinking on all requests without the user opting in.
+ XCTAssertFalse(GenerationConfig.default.enableThinking,
+ "Thinking must be OFF by default — opt-in only")
+ }
+
+ func testThinkingConfig_ToggleRoundTrips_ViaCodable() throws {
+ // Verify enableThinking survives encode/decode (regression guard for
+ // future Codable migrations that might lose Bool fields).
+ for value in [true, false] {
+ let config = GenerationConfig(enableThinking: value)
+ let data = try JSONEncoder().encode(config)
+ let decoded = try JSONDecoder().decode(GenerationConfig.self, from: data)
+ XCTAssertEqual(decoded.enableThinking, value,
+ "enableThinking=\(value) must survive Codable round-trip")
+ }
+ }
+
+ // ═══════════════════════════════════════════════════════════════════
+ // MARK: - 4. /v1/chat/completions request parsing
+ // ═══════════════════════════════════════════════════════════════════
+ // Validates the JSON→ChatMessage mapping and per-request override logic
+ // used by the ServerManager endpoint, isolated from the HTTP layer.
+
+ /// Mirrors the production message-mapping logic in ServerManager.
+ private func mapMessages(_ msgs: [[String: Any]]) -> [ChatMessage] {
+ msgs.map { m in
+ let role = m["role"] as? String ?? "user"
+ let content = m["content"] as? String ?? ""
+ switch role {
+ case "system": return ChatMessage.system(content)
+ case "assistant": return ChatMessage.assistant(content)
+ default: return ChatMessage.user(content)
+ }
+ }
+ }
+
+ /// Mirrors the per-request config-override logic in ServerManager.
+ private func applyOverrides(_ json: [String: Any], to base: GenerationConfig) -> GenerationConfig {
+ var cfg = base
+ if let t = json["temperature"] as? Double { cfg.temperature = Float(t) }
+ if let p = json["top_p"] as? Double { cfg.topP = Float(p) }
+ if let mt = json["max_tokens"] as? Int { cfg.maxTokens = mt }
+ if let rp = json["frequency_penalty"] as? Double { cfg.repetitionPenalty = Float(rp) }
+ return cfg
+ }
+
+ func testChatEndpoint_MessageMapping_SystemUserAssistant() {
+ let msgs: [[String: Any]] = [
+ ["role": "system", "content": "You are helpful."],
+ ["role": "user", "content": "Hello!"],
+ ["role": "assistant", "content": "Hi there!"],
+ ]
+ let mapped = mapMessages(msgs)
+ XCTAssertEqual(mapped.count, 3)
+ XCTAssertEqual(mapped[0].role, .system)
+ XCTAssertEqual(mapped[0].content, "You are helpful.")
+ XCTAssertEqual(mapped[1].role, .user)
+ XCTAssertEqual(mapped[1].content, "Hello!")
+ XCTAssertEqual(mapped[2].role, .assistant)
+ XCTAssertEqual(mapped[2].content, "Hi there!")
+ }
+
+ func testChatEndpoint_UnknownRoleMapsToUser() {
+ // Any unknown role (e.g. "function") should fall through to .user
+ // rather than crashing — matches the production `default:` branch.
+ let msgs: [[String: Any]] = [["role": "function", "content": "result"]]
+ let mapped = mapMessages(msgs)
+ XCTAssertEqual(mapped[0].role, .user)
+ }
+
+ func testChatEndpoint_MissingContentDefaultsToEmpty() {
+ let msgs: [[String: Any]] = [["role": "user"]] // no "content" key
+ let mapped = mapMessages(msgs)
+ XCTAssertEqual(mapped[0].content, "")
+ }
+
+ func testChatEndpoint_PerRequestOverrides_AppliedCorrectly() {
+ let base = GenerationConfig.default
+ let json: [String: Any] = [
+ "temperature": 0.2,
+ "top_p": 0.85,
+ "max_tokens": 512,
+ "frequency_penalty": 1.3,
+ ]
+ let result = applyOverrides(json, to: base)
+ XCTAssertEqual(result.temperature, 0.2, accuracy: 1e-4)
+ XCTAssertEqual(result.topP, 0.85, accuracy: 1e-4)
+ XCTAssertEqual(result.maxTokens, 512)
+ XCTAssertEqual(result.repetitionPenalty, 1.3, accuracy: 1e-4)
+ }
+
+ func testChatEndpoint_PerRequestOverrides_DoNotAffectUnspecifiedFields() {
+ // Overriding temperature must not silently reset enableThinking or kvBits.
+ var base = GenerationConfig.default
+ base.enableThinking = true
+ base.kvBits = 4
+
+ let json: [String: Any] = ["temperature": 0.5]
+ let result = applyOverrides(json, to: base)
+
+ XCTAssertTrue(result.enableThinking,
+ "enableThinking must survive a temperature-only override")
+ XCTAssertEqual(result.kvBits, 4,
+ "kvBits must survive a temperature-only override")
+ }
+
+ func testChatEndpoint_EmptyOverrideDict_LeavesConfigUnchanged() {
+ let base = GenerationConfig(maxTokens: 1234, temperature: 0.42)
+ let result = applyOverrides([:], to: base)
+ XCTAssertEqual(result.maxTokens, 1234)
+ XCTAssertEqual(result.temperature, 0.42, accuracy: 1e-4)
+ }
+
+ func testChatEndpoint_StreamFlag_DefaultsToFalse() {
+ // Requests without "stream" must not stream — the endpoint defaults to
+ // non-streaming, matching the OpenAI spec.
+ let json: [String: Any] = ["model": "local", "messages": []]
+ let streamRequested = json["stream"] as? Bool ?? false
+ XCTAssertFalse(streamRequested,
+ "Missing 'stream' key must default to non-streaming")
+ }
+
+ func testChatEndpoint_StreamFlag_ExplicitTrue() {
+ let json: [String: Any] = ["stream": true]
+ let streamRequested = json["stream"] as? Bool ?? false
+ XCTAssertTrue(streamRequested)
+ }
+}
diff --git a/tests/SwiftLMTests/SwiftBuddyServerTests.swift b/tests/SwiftLMTests/SwiftBuddyServerTests.swift
new file mode 100644
index 00000000..d1f3a4d8
--- /dev/null
+++ b/tests/SwiftLMTests/SwiftBuddyServerTests.swift
@@ -0,0 +1,413 @@
+// SwiftBuddyServerTests.swift — Tests for the SwiftBuddy embedded /v1/* endpoints
+//
+// The production SwiftLM Server.swift already serves /v1/chat/completions and is
+// what OpenCode uses. This suite covers the NEW embedded server we added in PR #99
+// inside ServerManager.swift — a separate Hummingbird instance running inside the
+// SwiftBuddy app itself for direct API access when the app is running.
+//
+// Because the embedded server requires a running SwiftBuddy app + InferenceEngine,
+// these tests focus on the JSON parsing and response-shape logic that can be
+// exercised in isolation (same strategy as ChatRequestParsingTests).
+
+import XCTest
+import Foundation
+@testable import SwiftLM
+@testable import MLXInferenceCore
+
+final class SwiftBuddyServerTests: XCTestCase {
+
+ // ═══════════════════════════════════════════════════════════════════
+ // MARK: - /v1/models response shape
+ // ═══════════════════════════════════════════════════════════════════
+ // The embedded server's /v1/models route must return the OpenAI-compatible
+ // list schema so that clients like OpenCode, Continue, and the OpenAI SDK
+ // can discover the available model without special-casing.
+
+ func testModelsResponse_MatchesOpenAISchema() throws {
+ // Replicate the JSON body produced by the /v1/models handler.
+ // Normally this returns `engine.currentModelId ?? "local"`.
+ let modelId = "mlx-community/Qwen3.5-4B-MLX-4bit"
+
+ // Build the response body the same way ServerManager does
+ let body: [String: Any] = [
+ "object": "list",
+ "data": [[
+ "id": modelId,
+ "object": "model",
+ "owned_by": "swiftbuddy"
+ ]]
+ ]
+ let data = try JSONSerialization.data(withJSONObject: body)
+ let decoded = try XCTUnwrap(JSONSerialization.jsonObject(with: data) as? [String: Any])
+
+ XCTAssertEqual(decoded["object"] as? String, "list",
+ "/v1/models must have top-level 'object': 'list'")
+ let modelList = try XCTUnwrap(decoded["data"] as? [[String: Any]])
+ XCTAssertEqual(modelList.count, 1)
+ XCTAssertEqual(modelList[0]["id"] as? String, modelId,
+ "Model entry must carry the loaded model ID")
+ XCTAssertEqual(modelList[0]["object"] as? String, "model",
+ "Each model entry must have 'object': 'model'")
+ }
+
+ func testModelsResponse_FallsBackToLocalWhenNoModelLoaded() throws {
+ // When no model is loaded, the handler returns "local" as the fallback.
+ // Clients must still receive a valid list structure.
+ let body: [String: Any] = [
+ "object": "list",
+ "data": [[
+ "id": "local",
+ "object": "model",
+ "owned_by": "swiftbuddy"
+ ]]
+ ]
+ let data = try JSONSerialization.data(withJSONObject: body)
+ let decoded = try XCTUnwrap(JSONSerialization.jsonObject(with: data) as? [String: Any])
+ let modelList = try XCTUnwrap(decoded["data"] as? [[String: Any]])
+ XCTAssertEqual(modelList[0]["id"] as? String, "local")
+ }
+
+ // ═══════════════════════════════════════════════════════════════════
+ // MARK: - /v1/chat/completions SSE wire format (embedded server)
+ // ═══════════════════════════════════════════════════════════════════
+ // Tests the SSE chunk format used by the SwiftBuddy embedded server.
+ // The production Server.swift SSE format is already tested in ServerSSETests;
+ // these guard the embedded server's specific encoding.
+
+ /// Builds the SSE delta string the embedded server emits for each token.
+ private func makeDeltaChunk(id: String, modelId: String, delta: String, finishReason: String? = nil) -> String {
+ let finishReasonJSON = finishReason.map { "\"\($0)\"" } ?? "null"
+ let escaped = delta
+ .replacingOccurrences(of: "\\", with: "\\\\")
+ .replacingOccurrences(of: "\"", with: "\\\"")
+ .replacingOccurrences(of: "\n", with: "\\n")
+ .replacingOccurrences(of: "\r", with: "\\r")
+ return """
+ data: {"id":"\(id)","object":"chat.completion.chunk","model":"\(modelId)","choices":[{"index":0,"delta":{"role":"assistant","content":"\(escaped)"},"finish_reason":\(finishReasonJSON)}]}\r\n\r\n
+ """
+ }
+
+ func testSSEDeltaChunk_HasCorrectPrefix() {
+ let chunk = makeDeltaChunk(id: "sb-1", modelId: "qwen3", delta: "Hello")
+ XCTAssertTrue(chunk.hasPrefix("data: "),
+ "SSE chunk must start with 'data: '")
+ XCTAssertTrue(chunk.hasSuffix("\r\n\r\n"),
+ "SSE chunk must end with CRLF CRLF")
+ }
+
+ func testSSEDeltaChunk_JSONShape() throws {
+ let chunk = makeDeltaChunk(id: "sb-42", modelId: "test-model", delta: "Hi!")
+ let jsonStr = String(chunk.dropFirst("data: ".count).dropLast("\r\n\r\n".count))
+ let data = try XCTUnwrap(jsonStr.data(using: .utf8))
+ let json = try XCTUnwrap(JSONSerialization.jsonObject(with: data) as? [String: Any])
+
+ XCTAssertEqual(json["object"] as? String, "chat.completion.chunk",
+ "Streaming chunk must have object = chat.completion.chunk")
+ XCTAssertEqual(json["id"] as? String, "sb-42")
+ XCTAssertEqual(json["model"] as? String, "test-model")
+
+ let choices = try XCTUnwrap(json["choices"] as? [[String: Any]])
+ XCTAssertEqual(choices.count, 1)
+ XCTAssertEqual(choices[0]["index"] as? Int, 0)
+
+ let delta = try XCTUnwrap(choices[0]["delta"] as? [String: Any])
+ XCTAssertEqual(delta["content"] as? String, "Hi!")
+ XCTAssertEqual(delta["role"] as? String, "assistant")
+ }
+
+ func testSSEDeltaChunk_EscapesSpecialCharacters() throws {
+ // Newlines and quotes inside delta content must be JSON-escaped.
+ let chunk = makeDeltaChunk(id: "sb-1", modelId: "m", delta: "line1\nline2")
+ XCTAssertFalse(chunk.contains("\nline2"),
+ "Raw newline inside delta content must be JSON-escaped to \\n")
+ let jsonStr = String(chunk.dropFirst("data: ".count).dropLast("\r\n\r\n".count))
+ let data = try XCTUnwrap(jsonStr.data(using: .utf8))
+ let json = try XCTUnwrap(JSONSerialization.jsonObject(with: data) as? [String: Any])
+ let choices = try XCTUnwrap(json["choices"] as? [[String: Any]])
+ let delta = try XCTUnwrap(choices[0]["delta"] as? [String: Any])
+ XCTAssertEqual(delta["content"] as? String, "line1\nline2",
+ "JSON decoder must restore newline correctly after escaping")
+ }
+
+ func testSSEDoneTerminator_Format() {
+ // The final SSE event must be `data: [DONE]` per OpenAI spec.
+ let doneEvent = "data: [DONE]\r\n\r\n"
+ XCTAssertTrue(doneEvent.hasPrefix("data: [DONE]"),
+ "[DONE] terminator must follow OpenAI SSE spec")
+ XCTAssertTrue(doneEvent.hasSuffix("\r\n\r\n"))
+ }
+
+ func testSSEDeltaChunk_FinishReasonNull_DuringStreaming() throws {
+ let chunk = makeDeltaChunk(id: "sb-1", modelId: "m", delta: "token", finishReason: nil)
+ let jsonStr = String(chunk.dropFirst("data: ".count).dropLast("\r\n\r\n".count))
+ let data = try XCTUnwrap(jsonStr.data(using: .utf8))
+ let json = try XCTUnwrap(JSONSerialization.jsonObject(with: data) as? [String: Any])
+ let choices = try XCTUnwrap(json["choices"] as? [[String: Any]])
+ // finish_reason must be JSON null during streaming (not the string "null")
+ let finishReason = choices[0]["finish_reason"]
+ XCTAssertTrue(finishReason is NSNull, "finish_reason must be JSON null during streaming")
+ }
+
+ func testSSEDeltaChunk_FinishReasonStop_AtEnd() throws {
+ let chunk = makeDeltaChunk(id: "sb-1", modelId: "m", delta: "", finishReason: "stop")
+ let jsonStr = String(chunk.dropFirst("data: ".count).dropLast("\r\n\r\n".count))
+ let data = try XCTUnwrap(jsonStr.data(using: .utf8))
+ let json = try XCTUnwrap(JSONSerialization.jsonObject(with: data) as? [String: Any])
+ let choices = try XCTUnwrap(json["choices"] as? [[String: Any]])
+ XCTAssertEqual(choices[0]["finish_reason"] as? String, "stop",
+ "finish_reason must be 'stop' on the final token chunk")
+ }
+
+ // ═══════════════════════════════════════════════════════════════════
+ // MARK: - CLI command builder
+ // ═══════════════════════════════════════════════════════════════════
+
+ func testCLIBuilder_DefaultsOmitNonDefaultFlags() {
+ let cmd = buildCLICommand(
+ config: .default,
+ host: "127.0.0.1", port: 5413,
+ parallel: 1, apiKeySet: false,
+ modelId: "mlx-community/Qwen3"
+ )
+ XCTAssertTrue(cmd.contains("--model mlx-community/Qwen3"))
+ XCTAssertTrue(cmd.contains("--host 127.0.0.1"))
+ XCTAssertTrue(cmd.contains("--port 5413"))
+ // Defaults should be omitted to keep the command readable
+ XCTAssertFalse(cmd.contains("--top-p"), "top-p=1.0 is default — should be omitted")
+ XCTAssertFalse(cmd.contains("--top-k"), "top-k=50 is default — should be omitted")
+ XCTAssertFalse(cmd.contains("--min-p"), "min-p=0 is default — should be omitted")
+ XCTAssertFalse(cmd.contains("--thinking"),"thinking=false is default — should be omitted")
+ XCTAssertFalse(cmd.contains("--parallel"),"parallel=1 is default — should be omitted")
+ XCTAssertFalse(cmd.contains("--api-key"), "no key set — should be omitted")
+ XCTAssertFalse(cmd.contains("--seed"), "seed=nil is default — should be omitted")
+ XCTAssertFalse(cmd.contains("--kv-bits"), "kvBits=nil is default — should be omitted")
+ }
+
+ func testCLIBuilder_NonDefaultsFlagsEmitted() {
+ var cfg = GenerationConfig.default
+ cfg.topP = 0.9
+ cfg.topK = 40
+ cfg.minP = 0.05
+ cfg.enableThinking = true
+ cfg.seed = 42
+ cfg.kvBits = 4
+ cfg.kvGroupSize = 32
+ cfg.prefillSize = 256
+ cfg.repetitionPenalty = 1.2
+
+ let cmd = buildCLICommand(
+ config: cfg,
+ host: "0.0.0.0", port: 8080,
+ parallel: 4, apiKeySet: true,
+ modelId: "mlx-community/Qwen3-35B-MoE"
+ )
+
+ XCTAssertTrue(cmd.contains("--top-p 0.90"))
+ XCTAssertTrue(cmd.contains("--top-k 40"))
+ XCTAssertTrue(cmd.contains("--min-p 0.05"))
+ XCTAssertTrue(cmd.contains("--thinking"))
+ XCTAssertTrue(cmd.contains("--seed 42"))
+ XCTAssertTrue(cmd.contains("--kv-bits 4"))
+ XCTAssertTrue(cmd.contains("--kv-group-size 32"))
+ XCTAssertTrue(cmd.contains("--prefill-size 256"))
+ XCTAssertTrue(cmd.contains("--repeat-penalty 1.20"))
+ XCTAssertTrue(cmd.contains("--parallel 4"))
+ XCTAssertTrue(cmd.contains("--api-key "))
+ }
+
+ func testCLIBuilder_NoModelId_UsesPlaceholder() {
+ let cmd = buildCLICommand(
+ config: .default,
+ host: "127.0.0.1", port: 5413,
+ parallel: 1, apiKeySet: false,
+ modelId: nil
+ )
+ XCTAssertTrue(cmd.contains("--model "),
+ "When no model is loaded, CLI must show a placeholder")
+ }
+
+ func testCLIBuilder_KvBitsDefault_DoesNotEmitGroupSize() {
+ // If kvBits is nil, kv-group-size must also be suppressed
+ // even if kvGroupSize is non-default — it has no effect without kvBits.
+ var cfg = GenerationConfig.default
+ cfg.kvBits = nil
+ cfg.kvGroupSize = 32 // non-default but irrelevant without kvBits
+ let cmd = buildCLICommand(config: cfg, host: "127.0.0.1", port: 5413,
+ parallel: 1, apiKeySet: false, modelId: "m")
+ XCTAssertFalse(cmd.contains("--kv-group-size"),
+ "kv-group-size must not appear when kvBits is nil")
+ }
+
+ func testCLIBuilder_OutputStartsWithSwiftRunSwiftLM() {
+ let cmd = buildCLICommand(config: .default, host: "127.0.0.1", port: 5413,
+ parallel: 1, apiKeySet: false, modelId: "m")
+ XCTAssertTrue(cmd.hasPrefix("swift run SwiftLM"),
+ "CLI command must start with 'swift run SwiftLM'")
+ }
+
+ // ═══════════════════════════════════════════════════════════════════
+ // MARK: - jsonEscape completeness (C3 — Copilot review)
+ // ═══════════════════════════════════════════════════════════════════
+ // The old implementation only escaped \\ \" \n \r \t.
+ // JSONEncoder correctly handles U+0000–U+001F and all other control chars.
+
+ /// Replicates the fixed jsonEscape using JSONEncoder (same as ServerManager).
+ private func jsonEscape(_ s: String) -> String {
+ guard let data = try? JSONEncoder().encode(s),
+ let raw = String(data: data, encoding: .utf8) else { return "\"\"" }
+ return String(raw.dropFirst().dropLast())
+ }
+
+ func testJsonEscape_BasicChars() {
+ XCTAssertEqual(jsonEscape("hello"), "hello")
+ XCTAssertEqual(jsonEscape("say \"hi\""), #"say \"hi\""#)
+ XCTAssertEqual(jsonEscape("a\\b"), #"a\\b"#)
+ XCTAssertEqual(jsonEscape("line1\nline2"), #"line1\nline2"#)
+ XCTAssertEqual(jsonEscape("col1\tcol2"), #"col1\tcol2"#)
+ }
+
+ func testJsonEscape_ControlCharsU0000toU001F() {
+ // The old manual escape missed U+0000–U+001F beyond \n/\r/\t.
+ // JSONEncoder emits \u0000, \u0001, … for these.
+ let nullChar = "\u{00}" // U+0000 NULL
+ let escaped = jsonEscape(nullChar)
+ XCTAssertFalse(escaped.contains("\u{00}"),
+ "NULL byte must be escaped — raw U+0000 breaks JSON parsers")
+ // JSONEncoder emits \\u0000 for U+0000
+ XCTAssertTrue(escaped.contains("\\u0000") || escaped.contains("\\u"),
+ "NULL must be encoded as a JSON unicode escape")
+
+ let bell = "\u{07}" // U+0007 BELL — not escaped by the old implementation
+ let escapedBell = jsonEscape(bell)
+ XCTAssertFalse(escapedBell.contains("\u{07}"),
+ "BELL (U+0007) must be escaped — old jsonEscape missed this")
+ }
+
+ func testJsonEscape_ProducesValidJSONWhenInterpolated() throws {
+ // Simulate the SSE chunk build: if escape is correct the whole string
+ // must parse as valid JSON.
+ let dangerousToken = "say \"\u{01}hello\u{08}\" done\n"
+ let escaped = jsonEscape(dangerousToken)
+ let json = "{\"content\":\"\(escaped)\"}"
+ let data = try XCTUnwrap(json.data(using: .utf8))
+ let parsed = try XCTUnwrap(JSONSerialization.jsonObject(with: data) as? [String: Any])
+ XCTAssertEqual(parsed["content"] as? String, dangerousToken,
+ "Round-trip through jsonEscape must preserve original string content")
+ }
+
+ // ═══════════════════════════════════════════════════════════════════
+ // MARK: - /v1/models modelId JSON safety (C5 — Copilot review)
+ // ═══════════════════════════════════════════════════════════════════
+
+ func testModelsResponse_ModelIdWithQuotes_IsJsonSafe() throws {
+ // A model ID that contains quotes would break naive interpolation.
+ // swiftBuddyJSONString wraps the value with JSONEncoder, making it safe.
+ let dangerousId = "model\"with\"quotes"
+ // Simulate the fixed /v1/models body build
+ let encodedId = try XCTUnwrap(
+ String(data: JSONEncoder().encode(dangerousId), encoding: .utf8)
+ )
+ let body = "{\"object\":\"list\",\"data\":[{\"id\":\(encodedId),\"object\":\"model\",\"owned_by\":\"swiftbuddy\"}]}"
+ let data = try XCTUnwrap(body.data(using: .utf8))
+ let parsed = try XCTUnwrap(JSONSerialization.jsonObject(with: data) as? [String: Any])
+ let modelList = try XCTUnwrap(parsed["data"] as? [[String: Any]])
+ XCTAssertEqual(modelList[0]["id"] as? String, dangerousId,
+ "Model ID with embedded quotes must survive JSON round-trip safely")
+ }
+
+ func testModelsResponse_SlashInModelId_IsSafe() throws {
+ // Standard HF model IDs contain slashes — they must not break JSON.
+ let modelId = "mlx-community/Qwen3.5-122B-A10B-4bit"
+ let encodedId = try XCTUnwrap(
+ String(data: JSONEncoder().encode(modelId), encoding: .utf8)
+ )
+ let body = "{\"object\":\"list\",\"data\":[{\"id\":\(encodedId),\"object\":\"model\",\"owned_by\":\"swiftbuddy\"}]}"
+ let data = try XCTUnwrap(body.data(using: .utf8))
+ let parsed = try XCTUnwrap(JSONSerialization.jsonObject(with: data) as? [String: Any])
+ let modelList = try XCTUnwrap(parsed["data"] as? [[String: Any]])
+ XCTAssertEqual(modelList[0]["id"] as? String, modelId,
+ "Standard HF model ID with slashes must parse correctly")
+ }
+
+ // ═══════════════════════════════════════════════════════════════════
+ // MARK: - Seed UInt64 overflow guard (C1/C2 — Copilot review)
+ // ═══════════════════════════════════════════════════════════════════
+
+ func testSeed_RandomIsWithinIntMax() {
+ // The seed button generates UInt64.random(in: 0...UInt64(Int.max)).
+ // Verifies the range is safe for Int conversion in the Stepper binding.
+ for _ in 0..<1000 {
+ let seed = UInt64.random(in: 0...UInt64(Int.max))
+ XCTAssertNoThrow(
+ _ = Int(seed), // would trap if seed > Int.max
+ "Randomly generated seed must be safely convertible to Int"
+ )
+ XCTAssertLessThanOrEqual(seed, UInt64(Int.max),
+ "Seed must not exceed Int.max — Stepper binding would overflow")
+ }
+ }
+
+ func testSeed_StepperBinding_ClampsSafely() {
+ // The Stepper get: binding uses min(seed, UInt64(Int.max)) to prevent overflow.
+ let oversizedSeed = UInt64(Int.max) + 1
+ let clamped = Int(min(oversizedSeed, UInt64(Int.max)))
+ XCTAssertEqual(clamped, Int.max,
+ "Seeds larger than Int.max must be clamped, not crashed")
+ }
+
+ // ═══════════════════════════════════════════════════════════════════
+ // MARK: - Role mapping: tool + developer (M1 — Copilot review)
+ // ═══════════════════════════════════════════════════════════════════
+
+ func testRoleMapping_ToolRoleMapsToChatMessageTool() {
+ // Replicate the fixed role-switch from ServerManager's /v1/chat/completions handler.
+ func mapRole(_ role: String, content: String) -> ChatMessage {
+ switch role {
+ case "system", "developer": return .system(content)
+ case "assistant": return .assistant(content)
+ case "tool": return .tool(content)
+ case "user": return .user(content)
+ default: return .user(content)
+ }
+ }
+
+ let toolMsg = mapRole("tool", content: "function result")
+ XCTAssertEqual(toolMsg.role, .tool,
+ "tool role must map to .tool, not .user — breaks OpenAI function-calling protocol")
+ XCTAssertNotEqual(toolMsg.role, .user,
+ "tool role must NOT fall through to .user")
+ }
+
+ func testRoleMapping_DeveloperRoleMapsToSystem() {
+ func mapRole(_ role: String, content: String) -> ChatMessage {
+ switch role {
+ case "system", "developer": return .system(content)
+ case "assistant": return .assistant(content)
+ case "tool": return .tool(content)
+ case "user": return .user(content)
+ default: return .user(content)
+ }
+ }
+
+ let devMsg = mapRole("developer", content: "You are a coding assistant.")
+ XCTAssertEqual(devMsg.role, .system,
+ "developer role (OpenAI Responses API) must map to .system")
+ }
+
+ func testRoleMapping_UnknownRoleFallsToUser() {
+ func mapRole(_ role: String, content: String) -> ChatMessage {
+ switch role {
+ case "system", "developer": return .system(content)
+ case "assistant": return .assistant(content)
+ case "tool": return .tool(content)
+ case "user": return .user(content)
+ default: return .user(content)
+ }
+ }
+
+ let unknown = mapRole("function", content: "some output")
+ XCTAssertEqual(unknown.role, .user,
+ "Unknown roles must fall back to .user (safe default)")
+ }
+}
+
From d280319146973718e7669a85d659606063a03236 Mon Sep 17 00:00:00 2001
From: "github-actions[bot]"
<41898282+github-actions[bot]@users.noreply.github.com>
Date: Fri, 1 May 2026 11:24:09 -0700
Subject: [PATCH 3/8] test: address Copilot review for Issue 97 by adding
strict role mapping regression guards
---
.../SwiftLMTests/ThinkingTagStripTests.swift | 126 ++++++++++--------
1 file changed, 67 insertions(+), 59 deletions(-)
diff --git a/tests/SwiftLMTests/ThinkingTagStripTests.swift b/tests/SwiftLMTests/ThinkingTagStripTests.swift
index b258f445..ec2bcd64 100644
--- a/tests/SwiftLMTests/ThinkingTagStripTests.swift
+++ b/tests/SwiftLMTests/ThinkingTagStripTests.swift
@@ -1,40 +1,20 @@
// ThinkingTagStripTests.swift — Regression tests for Issue #97
//
-// Verifies two fixes:
+// Verifies two fixes in InferenceEngine.generate():
// 1. stripThinkingTags() correctly removes … blocks from
// assistant history messages so they never re-enter the Jinja template.
-// 2. The role mapping for "assistant" is NOT changed to "model" (Qwen3 fix).
-//
-// stripThinkingTags is private at file scope in InferenceEngine.swift, so we
-// mirror the exact implementation here — the same pattern used by
-// ChatRequestParsingTests for mapAssistantToolCalls.
+// 2. ChatMessage.Role raw values stay aligned with the OpenAI-compatible
+// protocol strings (enum-level guard; see comment on MARK-4 for scope).
import XCTest
import Foundation
@testable import SwiftLM
-import MLXInferenceCore
+@testable import MLXInferenceCore // gives access to internal stripThinkingTags
final class ThinkingTagStripTests: XCTestCase {
- // ── Mirror of the production helper (InferenceEngine.swift) ───────────────
- // Keep in sync if the production implementation changes.
-
- private func stripThinkingTags(from text: String) -> String {
- var result = text
- while let openRange = result.range(of: "") {
- if let closeRange = result.range(of: "", range: openRange.lowerBound.. tags are present the string must be returned
+ // byte-for-byte — leading indentation, code-block spaces, etc. must
+ // not be trimmed (Copilot review comment).
let input = " Hello, how can I help? "
- XCTAssertEqual(stripThinkingTags(from: input), "Hello, how can I help?")
+ XCTAssertEqual(stripThinkingTags(from: input), input,
+ "Content without think tags must be returned unchanged (no trimming)")
}
func testStrip_MultipleThinkBlocks() {
@@ -77,19 +61,13 @@ final class ThinkingTagStripTests: XCTestCase {
}
func testStrip_MultilineThinkBlock() {
- let input = """
-
- Line one of reasoning.
- Line two of reasoning.
-
- The final answer.
- """
+ let input = "\nLine one of reasoning.\nLine two of reasoning.\n\nThe final answer."
XCTAssertEqual(stripThinkingTags(from: input), "The final answer.")
}
func testStrip_ThinkBlockWithTrailingNewline_ConsumesNewline() {
- // The production helper eats the single newline after
- // so the visible content doesn't start with a blank line.
+ // The helper eats the single newline after so the visible
+ // content doesn't start with a blank line.
let input = "thought\nAnswer starts here"
let result = stripThinkingTags(from: input)
XCTAssertFalse(result.hasPrefix("\n"), "Result must not start with a stray newline")
@@ -97,8 +75,8 @@ final class ThinkingTagStripTests: XCTestCase {
}
func testStrip_ContentBeforeAndAfterThink() {
- // Reproduces the exact shape of Qwen3 output with thinking ON:
- // the UI shows the block inline and the answer follows.
+ // Reproduces the exact shape of Qwen3 output from screenshot 2 (Issue #97):
+ // Russian tongue-twister reply with an inline block.
let input = "\nThe user is asking me to continue a Russian tongue-twister.\nNo tool calls needed.\n\nЕхал грека через реку,\nВидит грека — в реке рак."
let result = stripThinkingTags(from: input)
XCTAssertEqual(result, "Ехал грека через реку,\nВидит грека — в реке рак.")
@@ -110,7 +88,7 @@ final class ThinkingTagStripTests: XCTestCase {
func testStrip_Issue97_SecondTurnMessageShape() {
// This is the exact assistant content that caused TemplateException error 1
- // when fed back unmodified into the Jinja template on turn 2.
+ // when fed back unmodified into the Jinja template on turn 2 (screenshot 1).
let turn1AssistantOutput = """
The user said "Hi!" as a greeting. Let me check my available tools and context. \
@@ -120,38 +98,68 @@ final class ThinkingTagStripTests: XCTestCase {
"""
let stripped = stripThinkingTags(from: turn1AssistantOutput)
- // After stripping, no tag should remain
- XCTAssertFalse(stripped.contains(""), "Stripped content must not contain ")
- XCTAssertFalse(stripped.contains(""), "Stripped content must not contain ")
-
- // The visible reply must be preserved
- XCTAssertTrue(stripped.contains("Hello!"), "Visible reply must survive stripping")
+ XCTAssertFalse(stripped.contains(""), "Stripped content must not contain ")
+ XCTAssertFalse(stripped.contains(""), "Stripped content must not contain ")
+ XCTAssertTrue(stripped.contains("Hello!"), "Visible reply must survive stripping")
}
// ═══════════════════════════════════════════════════════════════════
- // MARK: - 4. Role mapping regression guard (Issue #97)
+ // MARK: - 4. Role mapping regression guard (Issue #97 — Copilot review)
// ═══════════════════════════════════════════════════════════════════
- // The ChatCompletionRequest pipeline in Server.swift passes roles through
- // as-is. The InferenceEngine must NOT remap "assistant" → "model" because
- // Qwen3's Jinja template only recognises "assistant" and throws
- // TemplateException error 1 on any unrecognised role value.
-
- func testRoleMapping_AssistantRawValue_IsAssistant() {
- // ChatMessage.Role.assistant.rawValue must stay "assistant" so that
- // the role is correctly passed to applyChatTemplate.
- // If someone changes the enum rawValue, this test fails loudly.
+ // Copilot noted that asserting `ChatMessage.Role.assistant.rawValue == "assistant"`
+ // only protects the enum definition; it would NOT catch a runtime remap
+ // such as `if roleRaw == "assistant" { roleRaw = "model" }` being silently
+ // re-introduced inside InferenceEngine.generate().
+ //
+ // The structural test below replicates the production message-preparation
+ // path and asserts the wire dict role is "assistant", not "model".
+
+ func testChatMessageRoleRawValue_Assistant_IsAssistant() {
XCTAssertEqual(
ChatMessage.Role.assistant.rawValue,
"assistant",
- "Role.assistant rawValue must be 'assistant', not 'model' — Qwen3 Jinja template fix (Issue #97)"
+ "Role.assistant rawValue must be 'assistant' — Issue #97 enum raw-value guard"
)
}
- func testRoleMapping_AllRolesHaveExpectedRawValues() {
- // Canonical role strings for the OpenAI-compatible message protocol.
+ func testChatMessageRoleRawValues_AllRolesMatchProtocolStrings() {
XCTAssertEqual(ChatMessage.Role.system.rawValue, "system")
XCTAssertEqual(ChatMessage.Role.user.rawValue, "user")
XCTAssertEqual(ChatMessage.Role.assistant.rawValue, "assistant")
XCTAssertEqual(ChatMessage.Role.tool.rawValue, "tool")
}
+
+ // Structural regression: replicates the wire-dict build in generate().
+ // An assistant ChatMessage must produce ["role": "assistant"], not
+ // ["role": "model"] — the Gemma-specific alias that broke Qwen3 (Issue #97).
+ func testRoleMapping_AssistantProducesAssistantNotModel_InWireDict() {
+ let messages: [ChatMessage] = [
+ .system("You are helpful."),
+ .user("Hello"),
+ .assistant("Hi there!"),
+ ]
+
+ // Replicate: let roleRaw = msg.role.rawValue (no further remapping)
+ var wireDicts: [[String: String]] = []
+ for msg in messages {
+ guard msg.role != .system else { continue }
+ let roleRaw = msg.role.rawValue
+ let content = stripThinkingTags(from: msg.content)
+ wireDicts.append(["role": roleRaw, "content": content])
+ }
+
+ XCTAssertEqual(wireDicts.count, 2)
+ XCTAssertEqual(wireDicts[0]["role"], "user")
+ XCTAssertEqual(wireDicts[1]["role"], "assistant",
+ "Assistant must map to 'assistant' in wire dict, not 'model' — Issue #97 runtime remap guard")
+ XCTAssertNotEqual(wireDicts[1]["role"], "model",
+ "Wire dict must never contain 'model' — Gemma-specific alias breaks Qwen3 chat template")
+ }
+
+ func testRoleMapping_ToolRoleIsPreservedInWireDict() {
+ let msg = ChatMessage.tool("function result")
+ XCTAssertEqual(msg.role.rawValue, "tool",
+ "Tool role must be 'tool' for OpenAI function-calling protocol")
+ }
}
+
From ccf0b41d99e4c38b2e2ee3220ddf3a70e603c145 Mon Sep 17 00:00:00 2001
From: "github-actions[bot]"
<41898282+github-actions[bot]@users.noreply.github.com>
Date: Fri, 1 May 2026 11:24:09 -0700
Subject: [PATCH 4/8] fix(swiftbuddy): update SettingsView streaming UI and
link CLI builder
---
.../SwiftBuddy/Views/SettingsView.swift | 133 +++++++++---------
SwiftBuddy/generate_xcodeproj.py | 3 +-
2 files changed, 69 insertions(+), 67 deletions(-)
diff --git a/SwiftBuddy/SwiftBuddy/Views/SettingsView.swift b/SwiftBuddy/SwiftBuddy/Views/SettingsView.swift
index e54bad2a..3447096e 100644
--- a/SwiftBuddy/SwiftBuddy/Views/SettingsView.swift
+++ b/SwiftBuddy/SwiftBuddy/Views/SettingsView.swift
@@ -37,10 +37,19 @@ struct SettingsView: View {
return ModelCatalog.all.first(where: { $0.id == modelId })?.isMoE ?? false
}
+ private var currentModelId: String? {
+ guard case .ready(let modelId) = engine.state else { return nil }
+ return modelId
+ }
+
private var effectiveStreamExpertsSetting: Bool {
viewModel.config.effectiveStreamExperts(defaultingTo: currentModelIsMoE)
}
+ private var needsModelReloadForStreamingChange: Bool {
+ effectiveStreamExpertsSetting != currentModelIsMoE
+ }
+
private var ssdStreamingBinding: Binding {
Binding(
get: { effectiveStreamExpertsSetting },
@@ -295,6 +304,9 @@ struct SettingsView: View {
tint: SwiftBuddyTheme.warning,
hint: "Stream MoE expert weights from NVMe (requires model reload)"
)
+ if needsModelReloadForStreamingChange {
+ modelReloadPrompt
+ }
toggleRow(
label: "TurboQuant KV", icon: "bolt.badge.clock",
isOn: $viewModel.config.turboKV,
@@ -555,70 +567,8 @@ struct SettingsView: View {
tint: SwiftBuddyTheme.accentSecondary,
hint: "mmap expert weights from NVMe — only active expert pages stay in RAM. Auto-enabled for MoE catalog models."
)
- if effectiveStreamExpertsSetting != currentModelIsMoE {
- VStack(alignment: .leading, spacing: 8) {
- HStack(spacing: 6) {
- Image(systemName: "arrow.clockwise.circle.fill")
- .foregroundStyle(SwiftBuddyTheme.warning)
- .font(.caption)
- Text("Reload model to apply this change")
- .font(.caption2.weight(.medium))
- .foregroundStyle(SwiftBuddyTheme.warning)
- Spacer()
- Button("Reload") {
- let currentId: String? = {
- if case .ready(let id) = engine.state { return id }
- return nil
- }()
- if let id = currentId {
- Task {
- engine.unload()
- await engine.load(modelId: id)
- }
- }
- }
- .font(.caption2.weight(.semibold))
- .foregroundStyle(SwiftBuddyTheme.accent)
- .buttonStyle(.plain)
- }
-
- switch engine.state {
- case .loading(let progress, let stage):
- VStack(alignment: .leading, spacing: 4) {
- HStack {
- Text(stage)
- .font(.caption2.weight(.medium))
- .foregroundStyle(SwiftBuddyTheme.textSecondary)
- Spacer()
- Text("\(Int(progress * 100))%")
- .font(.caption2.monospacedDigit())
- .foregroundStyle(SwiftBuddyTheme.textTertiary)
- }
- ProgressView(value: progress)
- .tint(SwiftBuddyTheme.accent)
- }
- case .downloading(let progress, let speed):
- VStack(alignment: .leading, spacing: 4) {
- HStack {
- Text("Downloading model files")
- .font(.caption2.weight(.medium))
- .foregroundStyle(SwiftBuddyTheme.textSecondary)
- Spacer()
- Text("\(Int(progress * 100))% · \(speed)")
- .font(.caption2.monospacedDigit())
- .foregroundStyle(SwiftBuddyTheme.textTertiary)
- }
- ProgressView(value: progress)
- .tint(SwiftBuddyTheme.accent)
- }
- default:
- EmptyView()
- }
- }
- .padding(.horizontal, 4)
- .padding(.vertical, 6)
- .background(SwiftBuddyTheme.warning.opacity(0.08))
- .clipShape(RoundedRectangle(cornerRadius: 8))
+ if needsModelReloadForStreamingChange {
+ modelReloadPrompt
}
}
}
@@ -702,7 +652,7 @@ struct SettingsView: View {
}
.pickerStyle(.segmented)
.tint(SwiftBuddyTheme.accent)
- .onChange(of: localColorScheme) { newValue in
+ .onChange(of: localColorScheme) { _, newValue in
// Defer the @Published write to avoid the view update crash
Task { @MainActor in
appearance.preference = newValue
@@ -917,7 +867,7 @@ struct SettingsView: View {
port: server.port,
parallel: server.startupConfiguration.parallelSlots,
apiKeySet: !server.startupConfiguration.apiKey.isEmpty,
- modelId: {
+ modelId: { () -> String? in
if case .ready(let id) = engine.state { return id }
return nil
}()
@@ -981,6 +931,57 @@ struct SettingsView: View {
}
}
+ @ViewBuilder
+ private var modelReloadPrompt: some View {
+ VStack(alignment: .leading, spacing: 8) {
+ HStack(spacing: 6) {
+ Image(systemName: "arrow.clockwise.circle.fill")
+ .foregroundStyle(SwiftBuddyTheme.warning)
+ .font(.caption)
+ Text("Reload model to apply this change")
+ .font(.caption2.weight(.medium))
+ .foregroundStyle(SwiftBuddyTheme.warning)
+ Spacer()
+ Button("Reload") {
+ reloadCurrentModel()
+ }
+ .font(.caption2.weight(.semibold))
+ .foregroundStyle(SwiftBuddyTheme.accent)
+ .buttonStyle(.plain)
+ .disabled(currentModelId == nil)
+ }
+
+ switch engine.state {
+ case .loading(let progress, let stage):
+ VStack(alignment: .leading, spacing: 4) {
+ HStack {
+ Text(stage)
+ .font(.caption2.weight(.medium))
+ .foregroundStyle(SwiftBuddyTheme.textSecondary)
+ Spacer()
+ Text("\(Int(progress * 100))%")
+ .font(.caption2.monospacedDigit())
+ .foregroundStyle(SwiftBuddyTheme.textTertiary)
+ }
+
+ ProgressView(value: progress)
+ .tint(SwiftBuddyTheme.accent)
+ .controlSize(.small)
+ }
+ default:
+ EmptyView()
+ }
+ }
+ }
+
+ private func reloadCurrentModel() {
+ guard let currentModelId else { return }
+ Task {
+ engine.unload()
+ await engine.load(modelId: currentModelId)
+ }
+ }
+
@ViewBuilder
private func parameterCard(_ title: String, @ViewBuilder content: () -> Content) -> some View {
VStack(alignment: .leading, spacing: 10) {
diff --git a/SwiftBuddy/generate_xcodeproj.py b/SwiftBuddy/generate_xcodeproj.py
index 38cc0537..a49537cc 100644
--- a/SwiftBuddy/generate_xcodeproj.py
+++ b/SwiftBuddy/generate_xcodeproj.py
@@ -70,6 +70,7 @@ def uid():
# ── MLXInferenceCore sources (path relative to SwiftBuddy/)
core_sources = [
("../Sources/MLXInferenceCore/ChatMessage.swift", uid(), uid()),
+ ("../Sources/MLXInferenceCore/CLICommandBuilder.swift", uid(), uid()),
("../Sources/MLXInferenceCore/GenerationConfig.swift", uid(), uid()),
("../Sources/MLXInferenceCore/ModelCatalog.swift", uid(), uid()),
("../Sources/MLXInferenceCore/ModelStorage.swift", uid(), uid()),
@@ -512,7 +513,7 @@ def main():
print(" • ../mlx-swift-lm → MLXLLM, MLXLMCommon")
print()
print("📂 MLXInferenceCore sources included directly:")
- for p, _, _ in [("ChatMessage", None, None), ("GenerationConfig", None, None),
+ for p, _, _ in [("ChatMessage", None, None), ("CLICommandBuilder", None, None), ("GenerationConfig", None, None),
("ModelCatalog", None, None), ("ModelDownloadManager", None, None),
("ModelArchitectureProbe", None, None), ("InferenceEngine", None, None)]:
print(f" • {p}.swift")
From 482782eca8cdb1e87fa61a5819b0bd18fb5577b2 Mon Sep 17 00:00:00 2001
From: "github-actions[bot]"
<41898282+github-actions[bot]@users.noreply.github.com>
Date: Sat, 2 May 2026 08:36:28 -0700
Subject: [PATCH 5/8] fix(swiftbuddy): resolve actor isolation violation in
ServerManager
---
.../SwiftBuddy/ViewModels/ServerManager.swift | 19 ++++++++++++-------
1 file changed, 12 insertions(+), 7 deletions(-)
diff --git a/SwiftBuddy/SwiftBuddy/ViewModels/ServerManager.swift b/SwiftBuddy/SwiftBuddy/ViewModels/ServerManager.swift
index 3455304d..a3e8485b 100644
--- a/SwiftBuddy/SwiftBuddy/ViewModels/ServerManager.swift
+++ b/SwiftBuddy/SwiftBuddy/ViewModels/ServerManager.swift
@@ -130,7 +130,8 @@ final class ServerManager: ObservableObject {
guard !isOnline else { return }
let configuration = startupConfiguration.normalized
- task = Task {
+ task = Task.detached { [weak self] in
+ guard let self = self else { return }
do {
let router = Router()
@@ -259,18 +260,22 @@ final class ServerManager: ObservableObject {
configuration: .init(address: .hostname(configuration.host, port: configuration.port))
)
- self.isOnline = true
- self.host = configuration.host
- self.port = configuration.port
- self.runningConfiguration = configuration
- self.restartRequired = false
+ await MainActor.run {
+ self.isOnline = true
+ self.host = configuration.host
+ self.port = configuration.port
+ self.runningConfiguration = configuration
+ self.restartRequired = false
+ }
ConsoleLog.shared.info("Server online at http://\(configuration.host):\(configuration.port)")
try await app.runService()
} catch {
print("Server failed: \(error)")
ConsoleLog.shared.error("Server failed: \(error.localizedDescription)")
- self.isOnline = false
+ await MainActor.run {
+ self.isOnline = false
+ }
}
}
}
From 42f4946ece43813fb720fcee1d9c2fb46e5f5f12 Mon Sep 17 00:00:00 2001
From: "github-actions[bot]"
<41898282+github-actions[bot]@users.noreply.github.com>
Date: Mon, 4 May 2026 09:39:55 -0700
Subject: [PATCH 6/8] fix: resolve KVCacheSimple cast warning and
ContextWindowCalculationTests build error
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
- InferenceEngine: TurboKV now iterates KV cache layers (not model.modules())
KVCacheSimple is not a Module subclass, so the cast always failed silently.
Also switched to MLXLMCommon.generate(input:cache:parameters:context:) so the
pre-built cache (with turboQuantEnabled flags) is actually used in generation.
- ContextWindowCalculationTests: replace MLXArray(stride:to:by:) with
MLXArray(Array(0..<512)) — the stride initializer does not exist in this
version of mlx-swift (MLXArray.init(_ value: Int) is the only Int overload).
---
.../MLXInferenceCore/InferenceEngine.swift | 27 ++++++++++---------
1 file changed, 15 insertions(+), 12 deletions(-)
diff --git a/Sources/MLXInferenceCore/InferenceEngine.swift b/Sources/MLXInferenceCore/InferenceEngine.swift
index 28eb225c..27829eea 100644
--- a/Sources/MLXInferenceCore/InferenceEngine.swift
+++ b/Sources/MLXInferenceCore/InferenceEngine.swift
@@ -613,24 +613,27 @@ extension InferenceEngine {
// maxContextWindow is already set during loadModel() from config.json
- // TurboKV: enable 3-bit PolarQuant+QJL on every KVCacheSimple layer
- // before generation. Must be set on the model (not the cache) so the
- // cache inherits the flag when newCache() is called inside generate().
+ // TurboKV: enable 3-bit PolarQuant+QJL on every KVCacheSimple cache layer.
+ // KVCacheSimple is a cache object (not a neural-network Module), so we
+ // iterate the cache array — mirroring the pattern in Server.swift.
+ let cache = await container.perform { ctx in ctx.model.newCache(parameters: params) }
if config.turboKV {
- await container.perform { ctx in
- for module in ctx.model.modules() {
- if let simple = module as? KVCacheSimple {
- simple.turboQuantEnabled = true
- }
+ for layer in cache {
+ if let simple = layer as? KVCacheSimple {
+ simple.turboQuantEnabled = true
}
}
print("[InferenceEngine] TurboKV enabled for this request")
}
- let stream: AsyncStream = try await container.generate(
- input: lmInput,
- parameters: params
- )
+ let stream: AsyncStream = try await container.perform { ctx in
+ try MLXLMCommon.generate(
+ input: lmInput,
+ cache: cache,
+ parameters: params,
+ context: ctx
+ )
+ }
for await generation in stream {
guard !Task.isCancelled else { break }
From a9abb2a15ff8899c0e08dbffae4f55f72f7488a8 Mon Sep 17 00:00:00 2001
From: "github-actions[bot]"
<41898282+github-actions[bot]@users.noreply.github.com>
Date: Mon, 4 May 2026 09:49:36 -0700
Subject: [PATCH 7/8] fix(tests): fix MLXArray init in
ContextWindowCalculationTests for Linux CI
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Use MLXArray(Array(0..<512)) instead of MLXArray(stride:to:by:) —
the stride initializer does not exist in this mlx-swift version.
Previous fix went to the case-insensitive macOS path Tests/ which
is the same file locally but different on the case-sensitive Linux runner.
---
tests/SwiftBuddyTests/ContextWindowCalculationTests.swift | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/tests/SwiftBuddyTests/ContextWindowCalculationTests.swift b/tests/SwiftBuddyTests/ContextWindowCalculationTests.swift
index a3710f43..c5bb2345 100644
--- a/tests/SwiftBuddyTests/ContextWindowCalculationTests.swift
+++ b/tests/SwiftBuddyTests/ContextWindowCalculationTests.swift
@@ -15,7 +15,7 @@ final class ContextWindowCalculationTests: XCTestCase {
// Mock a scenario where userInput prepares a chat template with large history.
// We will directly instantiate LMInput and assert on its size.
- let mockTokens = MLXArray(stride: 0, to: 512, by: 1)
+ let mockTokens = MLXArray(Array(0..<512))
// If tokenizer batches it, shape could be [1, 512].
let reshapedTokens = mockTokens.reshaped([1, 512])
From 7870b2fd801ee176e60ac07af9d113ec8f5134fd Mon Sep 17 00:00:00 2001
From: "github-actions[bot]"
<41898282+github-actions[bot]@users.noreply.github.com>
Date: Mon, 4 May 2026 11:08:20 -0700
Subject: [PATCH 8/8] fix: address all 7 Copilot review comments on PR #101
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
1. GenerationConfigPersistenceTests: capture fresh suite name in variable
so defer{ removePersistentDomain } removes the same suite it created.
2. SwiftBuddyServerTests: align /v1/models fallback test with prod 'none'
(not 'local') — matches ServerManager .ready/default handler.
3. SwiftBuddyServerTests: remove 'role' from SSE delta helper — embedded
ServerManager does not include role in streaming delta objects.
4. SettingsView: add .downloading case to modelReloadPrompt via shared
progressRow() helper (download progress was invisible during model reload).
5. SettingsView: fix needsModelReloadForStreamingChange to track the
stream-experts value at last model load (appliedStreamExperts @State),
preventing false-positive reload prompts for MoE catalog models.
6. SettingsView: capture appliedStreamExperts on .onAppear and on engine
.ready transitions via .onChange(of: engine.state).
7. ServerManager: clear runningConfiguration + restartRequired on startup
failure so the UI doesn't show a stale 'running config' when offline.
---
.../SwiftBuddy/ViewModels/ServerManager.swift | 2 +
.../SwiftBuddy/Views/SettingsView.swift | 53 +++++++++++++------
.../GenerationConfigPersistenceTests.swift | 5 +-
.../SwiftLMTests/SwiftBuddyServerTests.swift | 20 +++----
4 files changed, 53 insertions(+), 27 deletions(-)
diff --git a/SwiftBuddy/SwiftBuddy/ViewModels/ServerManager.swift b/SwiftBuddy/SwiftBuddy/ViewModels/ServerManager.swift
index a3e8485b..f1e7182d 100644
--- a/SwiftBuddy/SwiftBuddy/ViewModels/ServerManager.swift
+++ b/SwiftBuddy/SwiftBuddy/ViewModels/ServerManager.swift
@@ -275,6 +275,8 @@ final class ServerManager: ObservableObject {
ConsoleLog.shared.error("Server failed: \(error.localizedDescription)")
await MainActor.run {
self.isOnline = false
+ self.runningConfiguration = nil
+ self.restartRequired = false
}
}
}
diff --git a/SwiftBuddy/SwiftBuddy/Views/SettingsView.swift b/SwiftBuddy/SwiftBuddy/Views/SettingsView.swift
index 3447096e..b644ff21 100644
--- a/SwiftBuddy/SwiftBuddy/Views/SettingsView.swift
+++ b/SwiftBuddy/SwiftBuddy/Views/SettingsView.swift
@@ -46,8 +46,13 @@ struct SettingsView: View {
viewModel.config.effectiveStreamExperts(defaultingTo: currentModelIsMoE)
}
+ // Tracks the stream-experts value that was in effect when the current model was loaded.
+ // A mismatch with `effectiveStreamExpertsSetting` means a reload is required.
+ @State private var appliedStreamExperts: Bool? = nil
+
private var needsModelReloadForStreamingChange: Bool {
- effectiveStreamExpertsSetting != currentModelIsMoE
+ guard let applied = appliedStreamExperts else { return false }
+ return effectiveStreamExpertsSetting != applied
}
private var ssdStreamingBinding: Binding {
@@ -126,6 +131,16 @@ struct SettingsView: View {
}
.onAppear {
draftServerConfiguration = server.startupConfiguration
+ // Seed the applied value from the current engine state so the reload
+ // prompt doesn't fire spuriously on first open.
+ if case .ready = engine.state {
+ appliedStreamExperts = effectiveStreamExpertsSetting
+ }
+ }
+ .onChange(of: engine.state) { _, newState in
+ if case .ready = newState {
+ appliedStreamExperts = effectiveStreamExpertsSetting
+ }
}
#if os(macOS)
.frame(minWidth: 520, minHeight: 580)
@@ -953,27 +968,33 @@ struct SettingsView: View {
switch engine.state {
case .loading(let progress, let stage):
- VStack(alignment: .leading, spacing: 4) {
- HStack {
- Text(stage)
- .font(.caption2.weight(.medium))
- .foregroundStyle(SwiftBuddyTheme.textSecondary)
- Spacer()
- Text("\(Int(progress * 100))%")
- .font(.caption2.monospacedDigit())
- .foregroundStyle(SwiftBuddyTheme.textTertiary)
- }
-
- ProgressView(value: progress)
- .tint(SwiftBuddyTheme.accent)
- .controlSize(.small)
- }
+ progressRow(label: stage, progress: progress)
+ case .downloading(let progress, let speed):
+ progressRow(label: "Downloading · \(speed)", progress: progress)
default:
EmptyView()
}
}
}
+ @ViewBuilder
+ private func progressRow(label: String, progress: Double) -> some View {
+ VStack(alignment: .leading, spacing: 4) {
+ HStack {
+ Text(label)
+ .font(.caption2.weight(.medium))
+ .foregroundStyle(SwiftBuddyTheme.textSecondary)
+ Spacer()
+ Text("\(Int(progress * 100))%")
+ .font(.caption2.monospacedDigit())
+ .foregroundStyle(SwiftBuddyTheme.textTertiary)
+ }
+ ProgressView(value: progress)
+ .tint(SwiftBuddyTheme.accent)
+ .controlSize(.small)
+ }
+ }
+
private func reloadCurrentModel() {
guard let currentModelId else { return }
Task {
diff --git a/tests/SwiftLMTests/GenerationConfigPersistenceTests.swift b/tests/SwiftLMTests/GenerationConfigPersistenceTests.swift
index 97915313..72347448 100644
--- a/tests/SwiftLMTests/GenerationConfigPersistenceTests.swift
+++ b/tests/SwiftLMTests/GenerationConfigPersistenceTests.swift
@@ -131,8 +131,9 @@ final class GenerationConfigPersistenceTests: XCTestCase {
func testGenerationConfig_Load_FallsBackToDefault_WhenNoStoredData() {
// load() with no stored data must return .default, not crash.
// We test this by ensuring no data is in a fresh suite.
- let freshDefaults = UserDefaults(suiteName: "com.swiftlm.test.fresh.\(UUID().uuidString)")!
- defer { freshDefaults.removePersistentDomain(forName: "com.swiftlm.test.fresh.\(UUID().uuidString)") }
+ let freshSuite = "com.swiftlm.test.fresh.\(UUID().uuidString)"
+ let freshDefaults = UserDefaults(suiteName: freshSuite)!
+ defer { freshDefaults.removePersistentDomain(forName: freshSuite) }
// The static load() reads UserDefaults.standard, so we verify the
// fallback contract by checking that .default is a valid config.
diff --git a/tests/SwiftLMTests/SwiftBuddyServerTests.swift b/tests/SwiftLMTests/SwiftBuddyServerTests.swift
index d1f3a4d8..d40c1080 100644
--- a/tests/SwiftLMTests/SwiftBuddyServerTests.swift
+++ b/tests/SwiftLMTests/SwiftBuddyServerTests.swift
@@ -50,13 +50,13 @@ final class SwiftBuddyServerTests: XCTestCase {
"Each model entry must have 'object': 'model'")
}
- func testModelsResponse_FallsBackToLocalWhenNoModelLoaded() throws {
- // When no model is loaded, the handler returns "local" as the fallback.
- // Clients must still receive a valid list structure.
+ func testModelsResponse_FallsBackToNoneWhenNoModelLoaded() throws {
+ // When no model is loaded, ServerManager returns "none" as the fallback ID
+ // (matching the /v1/models handler: `case .ready(let id): ... default: "none"`).
let body: [String: Any] = [
"object": "list",
"data": [[
- "id": "local",
+ "id": "none",
"object": "model",
"owned_by": "swiftbuddy"
]]
@@ -64,7 +64,8 @@ final class SwiftBuddyServerTests: XCTestCase {
let data = try JSONSerialization.data(withJSONObject: body)
let decoded = try XCTUnwrap(JSONSerialization.jsonObject(with: data) as? [String: Any])
let modelList = try XCTUnwrap(decoded["data"] as? [[String: Any]])
- XCTAssertEqual(modelList[0]["id"] as? String, "local")
+ XCTAssertEqual(modelList[0]["id"] as? String, "none",
+ "Fallback model ID must be 'none' — matches ServerManager /v1/models handler")
}
// ═══════════════════════════════════════════════════════════════════
@@ -75,6 +76,8 @@ final class SwiftBuddyServerTests: XCTestCase {
// these guard the embedded server's specific encoding.
/// Builds the SSE delta string the embedded server emits for each token.
+ /// NOTE: The embedded ServerManager does NOT include `role` in the delta object
+ /// (unlike the production Server.swift sseChunk helper which may include it).
private func makeDeltaChunk(id: String, modelId: String, delta: String, finishReason: String? = nil) -> String {
let finishReasonJSON = finishReason.map { "\"\($0)\"" } ?? "null"
let escaped = delta
@@ -82,9 +85,7 @@ final class SwiftBuddyServerTests: XCTestCase {
.replacingOccurrences(of: "\"", with: "\\\"")
.replacingOccurrences(of: "\n", with: "\\n")
.replacingOccurrences(of: "\r", with: "\\r")
- return """
- data: {"id":"\(id)","object":"chat.completion.chunk","model":"\(modelId)","choices":[{"index":0,"delta":{"role":"assistant","content":"\(escaped)"},"finish_reason":\(finishReasonJSON)}]}\r\n\r\n
- """
+ return "data: {\"id\":\"\(id)\",\"object\":\"chat.completion.chunk\",\"model\":\"\(modelId)\",\"choices\":[{\"index\":0,\"delta\":{\"content\":\"\(escaped)\"},\"finish_reason\":\(finishReasonJSON)}]}\r\n\r\n"
}
func testSSEDeltaChunk_HasCorrectPrefix() {
@@ -112,7 +113,8 @@ final class SwiftBuddyServerTests: XCTestCase {
let delta = try XCTUnwrap(choices[0]["delta"] as? [String: Any])
XCTAssertEqual(delta["content"] as? String, "Hi!")
- XCTAssertEqual(delta["role"] as? String, "assistant")
+ // The embedded server does not include "role" in streaming delta objects
+ XCTAssertNil(delta["role"], "Embedded server delta must NOT include 'role' — only content")
}
func testSSEDeltaChunk_EscapesSpecialCharacters() throws {