From f8fc2e15bce87e5470ce451d298f37ef0e065b6c Mon Sep 17 00:00:00 2001
From: "github-actions[bot]" <github-actions[bot]@users.noreply.github.com>
Date: Thu, 21 May 2026 16:05:17 +0000
Subject: [PATCH] docs: add Inception LLM service documentation

- Added inception.mdx with full API reference for InceptionLLMService
- Updated docs.json to include Inception in LLM navigation
- Updated supported-services.mdx to list Inception in LLM table
- Documents Mercury-2 model with reasoning_effort and realtime settings
---
 .../server/services/llm/inception.mdx         | 149 ++++++++++++++++++
 .../server/services/supported-services.mdx    |   1 +
 docs.json                                     |   1 +
 3 files changed, 151 insertions(+)
 create mode 100644 api-reference/server/services/llm/inception.mdx
diff --git a/api-reference/server/services/llm/inception.mdx b/api-reference/server/services/llm/inception.mdx
new file mode 100644
index 00000000..a7effe33
--- /dev/null
+++ b/api-reference/server/services/llm/inception.mdx
@@ -0,0 +1,149 @@
+---
+title: "Inception"
+description: "LLM service implementation using Inception's API with OpenAI-compatible interface"
+---
+
+## Overview
+
+`InceptionLLMService` provides access to Inception's Mercury-2 diffusion-based reasoning model through an OpenAI-compatible interface. It inherits from `OpenAILLMService` and supports streaming responses, function calling, and context management with advanced reasoning capabilities.
+
+<CardGroup cols={2}>
+  <Card
+    title="Inception LLM API Reference"
+    icon="code"
+    href="https://reference-server.pipecat.ai/en/latest/api/pipecat.services.inception.llm.html"
+  >
+    Pipecat's API methods for Inception integration
+  </Card>
+  <Card
+    title="Example Implementation"
+    icon="play"
+    href="https://github.com/pipecat-ai/pipecat/blob/main/examples/function-calling/function-calling-inception.py"
+  >
+    Complete example with function calling
+  </Card>
+  <Card
+    title="Inception Labs"
+    icon="microphone"
+    href="https://inceptionlabs.ai/"
+  >
+    Access models and manage API keys
+  </Card>
+</CardGroup>
+
+## Installation
+
+To use Inception services, install the required dependency:
+
+```bash
+uv add "pipecat-ai[inception]"
+```
+
+## Prerequisites
+
+### Inception Account Setup
+
+Before using Inception LLM services, you need:
+
+1. **Inception Account**: Sign up at [Inception Labs](https://inceptionlabs.ai/)
+2. **API Key**: Generate an API key from your account dashboard
+3. **Model Selection**: Access to Mercury-2, Inception's diffusion-based reasoning model
+
+### Required Environment Variables
+
+- `INCEPTION_API_KEY`: Your Inception API key for authentication
+
+## Configuration
+
+<ParamField path="api_key" type="str" required>
+  Inception API key for authentication.
+</ParamField>
+
+<ParamField path="base_url" type="str" default="https://api.inceptionlabs.ai/v1">
+  Base URL for Inception API endpoint.
+</ParamField>
+
+<ParamField path="settings" type="InceptionLLMService.Settings" default="None">
+  Runtime-configurable settings. See [Settings](#settings) below.
+</ParamField>
+
+### Settings
+
+Runtime-configurable settings passed via the `settings` constructor argument using `InceptionLLMService.Settings(...)`. These can be updated mid-conversation with `LLMUpdateSettingsFrame`. See [Service Settings](/pipecat/fundamentals/service-settings) for details.
+
+This service extends `OpenAILLMService.Settings` with Inception-specific parameters:
+
+<ParamField path="model" type="str" default="mercury-2">
+  Model identifier to use. Defaults to "mercury-2", Inception's diffusion-based reasoning model.
+</ParamField>
+
+<ParamField path="reasoning_effort" type="Literal['instant', 'low', 'medium', 'high'] | None" default="None">
+  Controls how much reasoning the model applies. Options are "instant", "low", "medium", or "high". When unset, the parameter is omitted and Inception's server-side default applies.
+</ParamField>
+
+<ParamField path="realtime" type="bool | None" default="None">
+  When True, reduces time to first diffusion block (TTFT) for faster initial response times.
+</ParamField>
+
+For additional settings inherited from OpenAI, see [OpenAI LLM Settings](/api-reference/server/services/llm/openai#settings).
+
+## Usage
+
+### Basic Setup
+
+```python
+import os
+from pipecat.services.inception import InceptionLLMService
+
+llm = InceptionLLMService(
+    api_key=os.getenv("INCEPTION_API_KEY"),
+)
+```
+
+### With Custom Settings
+
+```python
+from pipecat.services.inception import InceptionLLMService
+
+llm = InceptionLLMService(
+    api_key=os.getenv("INCEPTION_API_KEY"),
+    settings=InceptionLLMService.Settings(
+        model="mercury-2",
+        reasoning_effort="instant",
+        realtime=True,
+        temperature=0.7,
+        max_tokens=2048,
+    ),
+)
+```
+
+### With Function Calling
+
+```python
+from pipecat.services.inception import InceptionLLMService
+from pipecat.services.llm_service import FunctionCallParams
+
+async def get_weather(params: FunctionCallParams):
+    await params.result_callback({"temperature": "75", "conditions": "sunny"})
+
+llm = InceptionLLMService(
+    api_key=os.getenv("INCEPTION_API_KEY"),
+    settings=InceptionLLMService.Settings(
+        reasoning_effort="low",
+    ),
+)
+
+llm.register_function("get_weather", get_weather)
+```
+
+## Notes
+
+- Inception does not support the `"developer"` message role. Use `"system"` instead.
+- The Mercury-2 model uses a diffusion-based reasoning approach, which can be controlled via the `reasoning_effort` parameter.
+- Setting `realtime=True` optimizes for lower time-to-first-token at the potential cost of reasoning depth.
+
+<Tip>
+  The `InputParams` / `params=` pattern is deprecated as of v0.0.105. Use
+  `Settings` / `settings=` instead. See the [Service Settings
+  guide](/pipecat/fundamentals/service-settings) for migration details.
+</Tip>
diff --git a/api-reference/server/services/supported-services.mdx b/api-reference/server/services/supported-services.mdx
index e4b6073e..cb1ca373 100644
--- a/api-reference/server/services/supported-services.mdx
+++ b/api-reference/server/services/supported-services.mdx
@@ -75,6 +75,7 @@ LLMs receive text or audio based input and output a streaming text response.
 | [Google Vertex AI](/api-reference/server/services/llm/google-vertex)    | `uv add "pipecat-ai[google]"`     |
 | [Grok](/api-reference/server/services/llm/grok)                         | `uv add "pipecat-ai[grok]"`       |
 | [Groq](/api-reference/server/services/llm/groq)                         | `uv add "pipecat-ai[groq]"`       |
+| [Inception](/api-reference/server/services/llm/inception)               | `uv add "pipecat-ai[inception]"`  |
 | [Mistral](/api-reference/server/services/llm/mistral)                   | `uv add "pipecat-ai[mistral]"`    |
 | [Nebius](/api-reference/server/services/llm/nebius)                     | `uv add "pipecat-ai[nebius]"`     |
 | [Novita AI](/api-reference/server/services/llm/novita)                  | `uv add "pipecat-ai[novita]"`     |
diff --git a/docs.json b/docs.json
index d64aef7d..49e90dfd 100644
--- a/docs.json
+++ b/docs.json
@@ -384,6 +384,7 @@
                       "api-reference/server/services/llm/google-vertex",
                       "api-reference/server/services/llm/grok",
                       "api-reference/server/services/llm/groq",
+                      "api-reference/server/services/llm/inception",
                       "api-reference/server/services/llm/mistral",
                       "api-reference/server/services/llm/nebius",
                       "api-reference/server/services/llm/novita",