Skip to content

Commit d76bfe6

Browse files
fix/system prompt sometimes missing (#326)
* fix: always capture system prompt * chore: bump version * fix: gemini system prompt capture * chore: imports at top * fix: test The mock we were passing from this test reporetd that it had a `system instruction` field, breaking assumptions * chore: lint * fix: better code organization * chore: lint
1 parent b3e21c1 commit d76bfe6

8 files changed

Lines changed: 445 additions & 23 deletions

File tree

CHANGELOG.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,7 @@
1+
# 6.7.4 - 2025-09-05
2+
3+
- fix: Missing system prompts for some providers
4+
15
# 6.7.3 - 2025-09-04
26

37
- fix: missing usage tokens in Gemini

posthog/ai/gemini/gemini.py

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,8 @@
33
import uuid
44
from typing import Any, Dict, Optional
55

6-
from posthog.ai.types import TokenUsage
6+
from posthog.ai.types import TokenUsage, StreamingEventData
7+
from posthog.ai.utils import merge_system_prompt
78

89
try:
910
from google import genai
@@ -19,7 +20,6 @@
1920
merge_usage_stats,
2021
)
2122
from posthog.ai.gemini.gemini_converter import (
22-
format_gemini_input,
2323
extract_gemini_usage_from_chunk,
2424
extract_gemini_content_from_chunk,
2525
format_gemini_streaming_output,
@@ -356,10 +356,8 @@ def _capture_streaming_event(
356356
latency: float,
357357
output: Any,
358358
):
359-
from posthog.ai.types import StreamingEventData
360-
361359
# Prepare standardized event data
362-
formatted_input = self._format_input(contents)
360+
formatted_input = self._format_input(contents, **kwargs)
363361
sanitized_input = sanitize_gemini(formatted_input)
364362

365363
event_data = StreamingEventData(
@@ -381,10 +379,12 @@ def _capture_streaming_event(
381379
# Use the common capture function
382380
capture_streaming_event(self._ph_client, event_data)
383381

384-
def _format_input(self, contents):
382+
def _format_input(self, contents, **kwargs):
385383
"""Format input contents for PostHog tracking"""
386384

387-
return format_gemini_input(contents)
385+
# Create kwargs dict with contents for merge_system_prompt
386+
input_kwargs = {"contents": contents, **kwargs}
387+
return merge_system_prompt(input_kwargs, "gemini")
388388

389389
def generate_content_stream(
390390
self,

posthog/ai/gemini/gemini_converter.py

Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -220,6 +220,30 @@ def format_gemini_response(response: Any) -> List[FormattedMessage]:
220220
return output
221221

222222

223+
def extract_gemini_system_instruction(config: Any) -> Optional[str]:
224+
"""
225+
Extract system instruction from Gemini config parameter.
226+
227+
Args:
228+
config: Config object or dict that may contain system instruction
229+
230+
Returns:
231+
System instruction string if present, None otherwise
232+
"""
233+
if config is None:
234+
return None
235+
236+
# Handle different config formats
237+
if hasattr(config, "system_instruction"):
238+
return config.system_instruction
239+
elif isinstance(config, dict) and "system_instruction" in config:
240+
return config["system_instruction"]
241+
elif isinstance(config, dict) and "systemInstruction" in config:
242+
return config["systemInstruction"]
243+
244+
return None
245+
246+
223247
def extract_gemini_tools(kwargs: Dict[str, Any]) -> Optional[Any]:
224248
"""
225249
Extract tool definitions from Gemini API kwargs.
@@ -237,6 +261,38 @@ def extract_gemini_tools(kwargs: Dict[str, Any]) -> Optional[Any]:
237261
return None
238262

239263

264+
def format_gemini_input_with_system(
265+
contents: Any, config: Any = None
266+
) -> List[FormattedMessage]:
267+
"""
268+
Format Gemini input contents into standardized message format, including system instruction handling.
269+
270+
Args:
271+
contents: Input contents in various possible formats
272+
config: Config object or dict that may contain system instruction
273+
274+
Returns:
275+
List of formatted messages with role and content fields, with system message prepended if needed
276+
"""
277+
formatted_messages = format_gemini_input(contents)
278+
279+
# Check if system instruction is provided in config parameter
280+
system_instruction = extract_gemini_system_instruction(config)
281+
282+
if system_instruction is not None:
283+
has_system = any(msg.get("role") == "system" for msg in formatted_messages)
284+
if not has_system:
285+
from posthog.ai.types import FormattedMessage
286+
287+
system_message: FormattedMessage = {
288+
"role": "system",
289+
"content": system_instruction,
290+
}
291+
formatted_messages = [system_message] + list(formatted_messages)
292+
293+
return formatted_messages
294+
295+
240296
def format_gemini_input(contents: Any) -> List[FormattedMessage]:
241297
"""
242298
Format Gemini input contents into standardized message format for PostHog tracking.

posthog/ai/openai/openai_converter.py

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -606,7 +606,6 @@ def format_openai_streaming_input(
606606
Returns:
607607
Formatted input ready for PostHog tracking
608608
"""
609-
if api_type == "chat":
610-
return kwargs.get("messages")
611-
else: # responses API
612-
return kwargs.get("input")
609+
from posthog.ai.utils import merge_system_prompt
610+
611+
return merge_system_prompt(kwargs, "openai")

posthog/ai/utils.py

Lines changed: 18 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,9 @@
11
import time
22
import uuid
3-
from typing import Any, Callable, Dict, Optional
3+
from typing import Any, Callable, Dict, List, Optional, cast
44

55
from posthog.client import Client as PostHogClient
6-
from posthog.ai.types import StreamingEventData, TokenUsage
6+
from posthog.ai.types import FormattedMessage, StreamingEventData, TokenUsage
77
from posthog.ai.sanitization import (
88
sanitize_openai,
99
sanitize_anthropic,
@@ -158,7 +158,9 @@ def extract_available_tool_calls(provider: str, kwargs: Dict[str, Any]):
158158
return None
159159

160160

161-
def merge_system_prompt(kwargs: Dict[str, Any], provider: str):
161+
def merge_system_prompt(
162+
kwargs: Dict[str, Any], provider: str
163+
) -> List[FormattedMessage]:
162164
"""
163165
Merge system prompts and format messages for the given provider.
164166
"""
@@ -169,10 +171,11 @@ def merge_system_prompt(kwargs: Dict[str, Any], provider: str):
169171
system = kwargs.get("system")
170172
return format_anthropic_input(messages, system)
171173
elif provider == "gemini":
172-
from posthog.ai.gemini.gemini_converter import format_gemini_input
174+
from posthog.ai.gemini.gemini_converter import format_gemini_input_with_system
173175

174176
contents = kwargs.get("contents", [])
175-
return format_gemini_input(contents)
177+
config = kwargs.get("config")
178+
return format_gemini_input_with_system(contents, config)
176179
elif provider == "openai":
177180
from posthog.ai.openai.openai_converter import format_openai_input
178181

@@ -187,9 +190,11 @@ def merge_system_prompt(kwargs: Dict[str, Any], provider: str):
187190
if kwargs.get("system") is not None:
188191
has_system = any(msg.get("role") == "system" for msg in messages)
189192
if not has_system:
190-
messages = [
191-
{"role": "system", "content": kwargs.get("system")}
192-
] + messages
193+
system_msg = cast(
194+
FormattedMessage,
195+
{"role": "system", "content": kwargs.get("system")},
196+
)
197+
messages = [system_msg] + messages
193198

194199
# For Responses API, add instructions to the system prompt if provided
195200
if kwargs.get("instructions") is not None:
@@ -207,9 +212,11 @@ def merge_system_prompt(kwargs: Dict[str, Any], provider: str):
207212
)
208213
else:
209214
# Create a new system message with instructions
210-
messages = [
211-
{"role": "system", "content": kwargs.get("instructions")}
212-
] + messages
215+
instruction_msg = cast(
216+
FormattedMessage,
217+
{"role": "system", "content": kwargs.get("instructions")},
218+
)
219+
messages = [instruction_msg] + messages
213220

214221
return messages
215222

posthog/test/ai/gemini/test_gemini.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -616,6 +616,8 @@ def test_tool_use_response(mock_client, mock_google_genai_client, mock_gemini_re
616616

617617
mock_config = MagicMock()
618618
mock_config.tools = [mock_tool]
619+
# Explicitly specify this config doesn't have system_instruction
620+
del mock_config.system_instruction
619621

620622
response = client.models.generate_content(
621623
model="gemini-2.5-flash",

0 commit comments

Comments
 (0)