Skip to content

Commit a58d7af

Browse files
google-genai-botcopybara-github
authored andcommitted
fix: Suppress empty-text-only chunks from streaming responses while preserving carried metadata
PiperOrigin-RevId: 915947081
1 parent 3e496e4 commit a58d7af

2 files changed

Lines changed: 177 additions & 16 deletions

File tree

core/src/main/java/com/google/adk/models/Gemini.java

Lines changed: 82 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -226,21 +226,7 @@ public Flowable<LlmResponse> generateContent(LlmRequest llmRequest, boolean stre
226226
() ->
227227
processRawResponses(
228228
Flowable.fromFuture(streamFuture).flatMapIterable(iterable -> iterable)))
229-
.filter(
230-
llmResponse ->
231-
llmResponse
232-
.content()
233-
.flatMap(Content::parts)
234-
.map(
235-
parts ->
236-
!parts.isEmpty()
237-
&& parts.stream()
238-
.anyMatch(
239-
p ->
240-
p.functionCall().isPresent()
241-
|| p.functionResponse().isPresent()
242-
|| p.text().isPresent()))
243-
.orElse(false));
229+
.filter(Gemini::shouldEmit);
244230
} else {
245231
logger.debug("Sending generateContent request to model {}", effectiveModelName);
246232
return Flowable.fromFuture(
@@ -298,7 +284,28 @@ static Flowable<LlmResponse> processRawResponses(Flowable<GenerateContentRespons
298284
responsesToEmit.add(aggregatedTextResponse);
299285
accumulatedText.setLength(0);
300286
}
301-
responsesToEmit.add(currentProcessedLlmResponse);
287+
if (isEmptyTextOnlyResponse(currentProcessedLlmResponse)) {
288+
// Strip the empty-text content while preserving any carried metadata
289+
// (`usageMetadata`, `finishReason`, `modelVersion`, etc.) by emitting a
290+
// content-less response marked as `partial`. This handles the trailing
291+
// `{parts:[{text:""}], finishReason:STOP}` chunk emitted by some Gemini
292+
// preview models (e.g. 3.1-flash-lite) after a function call: keeping
293+
// the chunk as-is would propagate it as a non-partial event whose
294+
// Event#finalResponse() returns true and prematurely terminate
295+
// BaseLlmFlow#run before the function response is sent back to the
296+
// model; dropping it entirely would lose the carried metadata. If the
297+
// chunk carries no useful metadata at all, suppress it outright.
298+
LlmResponse metadataOnly =
299+
currentProcessedLlmResponse.toBuilder()
300+
.content((Content) null)
301+
.partial(true)
302+
.build();
303+
if (hasUsefulMetadata(metadataOnly)) {
304+
responsesToEmit.add(metadataOnly);
305+
}
306+
} else {
307+
responsesToEmit.add(currentProcessedLlmResponse);
308+
}
302309
}
303310
logger.debug("Responses to emit: {}", responsesToEmit);
304311
return Flowable.fromIterable(responsesToEmit);
@@ -358,6 +365,65 @@ private static LlmResponse thinkingResponseFromText(String accumulatedThoughtTex
358365
.build();
359366
}
360367

368+
/**
369+
* Returns true if {@code response} should be emitted downstream by the streaming pipeline.
370+
*
371+
* <p>Drops chunks that carry neither semantic content (i.e. they are an empty-text-only response
372+
* per {@link #isEmptyTextOnlyResponse}) nor any useful metadata (per {@link #hasUsefulMetadata}).
373+
*/
374+
private static boolean shouldEmit(LlmResponse response) {
375+
return !isEmptyTextOnlyResponse(response) || hasUsefulMetadata(response);
376+
}
377+
378+
/**
379+
* Returns true if {@code response} carries any non-content metadata that should be propagated
380+
* downstream (e.g. {@code usageMetadata}, {@code finishReason}, transcriptions, grounding or
381+
* error info). Inspects only top-level {@link LlmResponse} fields; the response's content/parts
382+
* are intentionally not considered here.
383+
*/
384+
private static boolean hasUsefulMetadata(LlmResponse response) {
385+
return response.usageMetadata().isPresent()
386+
|| response.finishReason().isPresent()
387+
|| response.errorCode().isPresent()
388+
|| response.groundingMetadata().isPresent()
389+
|| response.inputTranscription().isPresent()
390+
|| response.outputTranscription().isPresent();
391+
}
392+
393+
/**
394+
* Returns true if {@code response} consists of exactly one {@link Part} whose only meaningful
395+
* payload is an empty text string (i.e. {@code parts:[{text:""}]}). Such a chunk can be safely
396+
* dropped from the streaming aggregator because it carries no semantic content for the agent
397+
* pipeline. A part is considered to carry semantic content if any of its non-text payloads
398+
* ({@code functionCall}, {@code functionResponse}, {@code inlineData}, {@code executableCode},
399+
* {@code codeExecutionResult}, {@code fileData}, {@code thoughtSignature}, {@code videoMetadata},
400+
* {@code toolCall}, {@code toolResponse}) is present.
401+
*/
402+
private static boolean isEmptyTextOnlyResponse(LlmResponse response) {
403+
return response
404+
.content()
405+
.flatMap(Content::parts)
406+
.map(
407+
parts -> {
408+
if (parts.size() != 1) {
409+
return false;
410+
}
411+
Part part = parts.get(0);
412+
return part.text().map(String::isEmpty).orElse(false)
413+
&& part.functionCall().isEmpty()
414+
&& part.functionResponse().isEmpty()
415+
&& part.inlineData().isEmpty()
416+
&& part.executableCode().isEmpty()
417+
&& part.codeExecutionResult().isEmpty()
418+
&& part.fileData().isEmpty()
419+
&& part.thoughtSignature().isEmpty()
420+
&& part.videoMetadata().isEmpty()
421+
&& part.toolCall().isEmpty()
422+
&& part.toolResponse().isEmpty();
423+
})
424+
.orElse(false);
425+
}
426+
361427
@Override
362428
public BaseLlmConnection connect(LlmRequest llmRequest) {
363429
if (!apiClient.vertexAI()) {

core/src/test/java/com/google/adk/models/GeminiTest.java

Lines changed: 95 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,81 @@ public void processRawResponses_withTextChunks_emitsPartialResponses() {
6363
isFunctionCallResponse());
6464
}
6565

66+
// Regression test for b/513501918. gemini-3.1-flash-lite emits an extra trailing chunk after a
67+
// function call: `{parts:[{text:""}], finishReason:STOP}`. That chunk must not be propagated as
68+
// a non-partial event because BaseLlmFlow#run would treat it as the final response and
69+
// terminate the loop before the function response is sent back to the model. The chunk's
70+
// metadata (e.g. `finishReason`, `usageMetadata`) is preserved by emitting it on a content-less
71+
// partial response instead of dropping the chunk entirely.
72+
@Test
73+
public void
74+
processRawResponses_functionCallThenEmptyTextWithStop_emitsFunctionCallAndMetadataOnlyPartial() {
75+
Flowable<GenerateContentResponse> rawResponses =
76+
Flowable.just(
77+
toResponse(Part.fromFunctionCall("test_function", ImmutableMap.of())),
78+
toResponseWithText("", FinishReason.Known.STOP));
79+
80+
Flowable<LlmResponse> llmResponses = Gemini.processRawResponses(rawResponses);
81+
82+
assertLlmResponses(
83+
llmResponses,
84+
isFunctionCallResponse(),
85+
isContentlessPartialWithFinishReason(FinishReason.Known.STOP));
86+
}
87+
88+
// Same as above but with `usageMetadata` on the trailing empty chunk: the metadata must survive
89+
// on the emitted content-less partial.
90+
@Test
91+
public void
92+
processRawResponses_functionCallThenEmptyTextWithUsageMetadata_preservesUsageMetadata() {
93+
GenerateContentResponseUsageMetadata metadata = createUsageMetadata(5, 10, 15);
94+
Flowable<GenerateContentResponse> rawResponses =
95+
Flowable.just(
96+
toResponse(Part.fromFunctionCall("test_function", ImmutableMap.of())),
97+
toResponseWithText("", FinishReason.Known.STOP, metadata));
98+
99+
Flowable<LlmResponse> llmResponses = Gemini.processRawResponses(rawResponses);
100+
101+
assertLlmResponses(
102+
llmResponses, isFunctionCallResponse(), isContentlessPartialWithUsageMetadata(metadata));
103+
}
104+
105+
// Same as above but without a finishReason or usageMetadata: the trailing empty chunk carries no
106+
// useful payload and must be suppressed entirely.
107+
@Test
108+
public void processRawResponses_functionCallThenEmptyText_doesNotEmitExtraEmptyResponse() {
109+
Flowable<GenerateContentResponse> rawResponses =
110+
Flowable.just(
111+
toResponse(Part.fromFunctionCall("test_function", ImmutableMap.of())),
112+
toResponseWithText(""));
113+
114+
Flowable<LlmResponse> llmResponses = Gemini.processRawResponses(rawResponses);
115+
116+
assertLlmResponses(llmResponses, isFunctionCallResponse());
117+
}
118+
119+
// Combined scenario: leading partial text, then a function call, then the trailing empty-text
120+
// chunk with STOP. Accumulated text must still be flushed, the function call must still be
121+
// emitted, and the trailing chunk must surface only its metadata on a content-less partial.
122+
@Test
123+
public void
124+
processRawResponses_textThenFunctionCallThenEmptyTextWithStop_emitsTextFunctionCallAndMetadata() {
125+
Flowable<GenerateContentResponse> rawResponses =
126+
Flowable.just(
127+
toResponseWithText("Thinking..."),
128+
toResponse(Part.fromFunctionCall("test_function", ImmutableMap.of())),
129+
toResponseWithText("", FinishReason.Known.STOP));
130+
131+
Flowable<LlmResponse> llmResponses = Gemini.processRawResponses(rawResponses);
132+
133+
assertLlmResponses(
134+
llmResponses,
135+
isPartialTextResponse("Thinking..."),
136+
isFinalTextResponse("Thinking..."),
137+
isFunctionCallResponse(),
138+
isContentlessPartialWithFinishReason(FinishReason.Known.STOP));
139+
}
140+
66141
@Test
67142
public void processRawResponses_textAndStopReason_emitsPartialThenFinalText() {
68143
Flowable<GenerateContentResponse> rawResponses =
@@ -232,6 +307,26 @@ private static Predicate<LlmResponse> isFunctionCallResponse() {
232307
};
233308
}
234309

310+
private static Predicate<LlmResponse> isContentlessPartialWithFinishReason(
311+
FinishReason.Known expectedFinishReason) {
312+
return response -> {
313+
assertThat(response.partial()).hasValue(true);
314+
assertThat(response.content()).isEmpty();
315+
assertThat(response.finishReason().map(fr -> fr.knownEnum())).hasValue(expectedFinishReason);
316+
return true;
317+
};
318+
}
319+
320+
private static Predicate<LlmResponse> isContentlessPartialWithUsageMetadata(
321+
GenerateContentResponseUsageMetadata expectedMetadata) {
322+
return response -> {
323+
assertThat(response.partial()).hasValue(true);
324+
assertThat(response.content()).isEmpty();
325+
assertThat(response.usageMetadata()).hasValue(expectedMetadata);
326+
return true;
327+
};
328+
}
329+
235330
private static Predicate<LlmResponse> isEmptyResponse() {
236331
return response -> {
237332
assertThat(response.partial()).isEmpty();

0 commit comments

Comments
 (0)