fix: Suppress empty-text-only chunks from streaming responses while preserving carried metadata

google-genai-bot · copybara-github · commit a58d7af46bcf · 2026-05-18T06:05:24.000-07:00
PiperOrigin-RevId: 915947081
diff --git a/core/src/main/java/com/google/adk/models/Gemini.java b/core/src/main/java/com/google/adk/models/Gemini.java
@@ -226,21 +226,7 @@ public Flowable<LlmResponse> generateContent(LlmRequest llmRequest, boolean stre
               () ->
                   processRawResponses(
                       Flowable.fromFuture(streamFuture).flatMapIterable(iterable -> iterable)))
-          .filter(
-              llmResponse ->
-                  llmResponse
-                      .content()
-                      .flatMap(Content::parts)
-                      .map(
-                          parts ->
-                              !parts.isEmpty()
-                                  && parts.stream()
-                                      .anyMatch(
-                                          p ->
-                                              p.functionCall().isPresent()
-                                                  || p.functionResponse().isPresent()
-                                                  || p.text().isPresent()))
-                      .orElse(false));
+          .filter(Gemini::shouldEmit);
     } else {
       logger.debug("Sending generateContent request to model {}", effectiveModelName);
       return Flowable.fromFuture(
@@ -298,7 +284,28 @@ static Flowable<LlmResponse> processRawResponses(Flowable<GenerateContentRespons
                   responsesToEmit.add(aggregatedTextResponse);
                   accumulatedText.setLength(0);
                 }
-                responsesToEmit.add(currentProcessedLlmResponse);
+                if (isEmptyTextOnlyResponse(currentProcessedLlmResponse)) {
+                  // Strip the empty-text content while preserving any carried metadata
+                  // (`usageMetadata`, `finishReason`, `modelVersion`, etc.) by emitting a
+                  // content-less response marked as `partial`. This handles the trailing
+                  // `{parts:[{text:""}], finishReason:STOP}` chunk emitted by some Gemini
+                  // preview models (e.g. 3.1-flash-lite) after a function call: keeping
+                  // the chunk as-is would propagate it as a non-partial event whose
+                  // Event#finalResponse() returns true and prematurely terminate
+                  // BaseLlmFlow#run before the function response is sent back to the
+                  // model; dropping it entirely would lose the carried metadata. If the
+                  // chunk carries no useful metadata at all, suppress it outright.
+                  LlmResponse metadataOnly =
+                      currentProcessedLlmResponse.toBuilder()
+                          .content((Content) null)
+                          .partial(true)
+                          .build();
+                  if (hasUsefulMetadata(metadataOnly)) {
+                    responsesToEmit.add(metadataOnly);
+                  }
+                } else {
+                  responsesToEmit.add(currentProcessedLlmResponse);
+                }
               }
               logger.debug("Responses to emit: {}", responsesToEmit);
               return Flowable.fromIterable(responsesToEmit);
@@ -358,6 +365,65 @@ private static LlmResponse thinkingResponseFromText(String accumulatedThoughtTex
         .build();
   }
 
+  /**
+   * Returns true if {@code response} should be emitted downstream by the streaming pipeline.
+   *
+   * <p>Drops chunks that carry neither semantic content (i.e. they are an empty-text-only response
+   * per {@link #isEmptyTextOnlyResponse}) nor any useful metadata (per {@link #hasUsefulMetadata}).
+   */
+  private static boolean shouldEmit(LlmResponse response) {
+    return !isEmptyTextOnlyResponse(response) || hasUsefulMetadata(response);
+  }
+
+  /**
+   * Returns true if {@code response} carries any non-content metadata that should be propagated
+   * downstream (e.g. {@code usageMetadata}, {@code finishReason}, transcriptions, grounding or
+   * error info). Inspects only top-level {@link LlmResponse} fields; the response's content/parts
+   * are intentionally not considered here.
+   */
+  private static boolean hasUsefulMetadata(LlmResponse response) {
+    return response.usageMetadata().isPresent()
+        || response.finishReason().isPresent()
+        || response.errorCode().isPresent()
+        || response.groundingMetadata().isPresent()
+        || response.inputTranscription().isPresent()
+        || response.outputTranscription().isPresent();
+  }
+
+  /**
+   * Returns true if {@code response} consists of exactly one {@link Part} whose only meaningful
+   * payload is an empty text string (i.e. {@code parts:[{text:""}]}). Such a chunk can be safely
+   * dropped from the streaming aggregator because it carries no semantic content for the agent
+   * pipeline. A part is considered to carry semantic content if any of its non-text payloads
+   * ({@code functionCall}, {@code functionResponse}, {@code inlineData}, {@code executableCode},
+   * {@code codeExecutionResult}, {@code fileData}, {@code thoughtSignature}, {@code videoMetadata},
+   * {@code toolCall}, {@code toolResponse}) is present.
+   */
+  private static boolean isEmptyTextOnlyResponse(LlmResponse response) {
+    return response
+        .content()
+        .flatMap(Content::parts)
+        .map(
+            parts -> {
+              if (parts.size() != 1) {
+                return false;
+              }
+              Part part = parts.get(0);
+              return part.text().map(String::isEmpty).orElse(false)
+                  && part.functionCall().isEmpty()
+                  && part.functionResponse().isEmpty()
+                  && part.inlineData().isEmpty()
+                  && part.executableCode().isEmpty()
+                  && part.codeExecutionResult().isEmpty()
+                  && part.fileData().isEmpty()
+                  && part.thoughtSignature().isEmpty()
+                  && part.videoMetadata().isEmpty()
+                  && part.toolCall().isEmpty()
+                  && part.toolResponse().isEmpty();
+            })
+        .orElse(false);
+  }
+
   @Override
   public BaseLlmConnection connect(LlmRequest llmRequest) {
     if (!apiClient.vertexAI()) {
diff --git a/core/src/test/java/com/google/adk/models/GeminiTest.java b/core/src/test/java/com/google/adk/models/GeminiTest.java
@@ -63,6 +63,81 @@ public void processRawResponses_withTextChunks_emitsPartialResponses() {
         isFunctionCallResponse());
   }
 
+  // Regression test for b/513501918. gemini-3.1-flash-lite emits an extra trailing chunk after a
+  // function call: `{parts:[{text:""}], finishReason:STOP}`. That chunk must not be propagated as
+  // a non-partial event because BaseLlmFlow#run would treat it as the final response and
+  // terminate the loop before the function response is sent back to the model. The chunk's
+  // metadata (e.g. `finishReason`, `usageMetadata`) is preserved by emitting it on a content-less
+  // partial response instead of dropping the chunk entirely.
+  @Test
+  public void
+      processRawResponses_functionCallThenEmptyTextWithStop_emitsFunctionCallAndMetadataOnlyPartial() {
+    Flowable<GenerateContentResponse> rawResponses =
+        Flowable.just(
+            toResponse(Part.fromFunctionCall("test_function", ImmutableMap.of())),
+            toResponseWithText("", FinishReason.Known.STOP));
+
+    Flowable<LlmResponse> llmResponses = Gemini.processRawResponses(rawResponses);
+
+    assertLlmResponses(
+        llmResponses,
+        isFunctionCallResponse(),
+        isContentlessPartialWithFinishReason(FinishReason.Known.STOP));
+  }
+
+  // Same as above but with `usageMetadata` on the trailing empty chunk: the metadata must survive
+  // on the emitted content-less partial.
+  @Test
+  public void
+      processRawResponses_functionCallThenEmptyTextWithUsageMetadata_preservesUsageMetadata() {
+    GenerateContentResponseUsageMetadata metadata = createUsageMetadata(5, 10, 15);
+    Flowable<GenerateContentResponse> rawResponses =
+        Flowable.just(
+            toResponse(Part.fromFunctionCall("test_function", ImmutableMap.of())),
+            toResponseWithText("", FinishReason.Known.STOP, metadata));
+
+    Flowable<LlmResponse> llmResponses = Gemini.processRawResponses(rawResponses);
+
+    assertLlmResponses(
+        llmResponses, isFunctionCallResponse(), isContentlessPartialWithUsageMetadata(metadata));
+  }
+
+  // Same as above but without a finishReason or usageMetadata: the trailing empty chunk carries no
+  // useful payload and must be suppressed entirely.
+  @Test
+  public void processRawResponses_functionCallThenEmptyText_doesNotEmitExtraEmptyResponse() {
+    Flowable<GenerateContentResponse> rawResponses =
+        Flowable.just(
+            toResponse(Part.fromFunctionCall("test_function", ImmutableMap.of())),
+            toResponseWithText(""));
+
+    Flowable<LlmResponse> llmResponses = Gemini.processRawResponses(rawResponses);
+
+    assertLlmResponses(llmResponses, isFunctionCallResponse());
+  }
+
+  // Combined scenario: leading partial text, then a function call, then the trailing empty-text
+  // chunk with STOP. Accumulated text must still be flushed, the function call must still be
+  // emitted, and the trailing chunk must surface only its metadata on a content-less partial.
+  @Test
+  public void
+      processRawResponses_textThenFunctionCallThenEmptyTextWithStop_emitsTextFunctionCallAndMetadata() {
+    Flowable<GenerateContentResponse> rawResponses =
+        Flowable.just(
+            toResponseWithText("Thinking..."),
+            toResponse(Part.fromFunctionCall("test_function", ImmutableMap.of())),
+            toResponseWithText("", FinishReason.Known.STOP));
+
+    Flowable<LlmResponse> llmResponses = Gemini.processRawResponses(rawResponses);
+
+    assertLlmResponses(
+        llmResponses,
+        isPartialTextResponse("Thinking..."),
+        isFinalTextResponse("Thinking..."),
+        isFunctionCallResponse(),
+        isContentlessPartialWithFinishReason(FinishReason.Known.STOP));
+  }
+
   @Test
   public void processRawResponses_textAndStopReason_emitsPartialThenFinalText() {
     Flowable<GenerateContentResponse> rawResponses =
@@ -232,6 +307,26 @@ private static Predicate<LlmResponse> isFunctionCallResponse() {
     };
   }
 
+  private static Predicate<LlmResponse> isContentlessPartialWithFinishReason(
+      FinishReason.Known expectedFinishReason) {
+    return response -> {
+      assertThat(response.partial()).hasValue(true);
+      assertThat(response.content()).isEmpty();
+      assertThat(response.finishReason().map(fr -> fr.knownEnum())).hasValue(expectedFinishReason);
+      return true;
+    };
+  }
+
+  private static Predicate<LlmResponse> isContentlessPartialWithUsageMetadata(
+      GenerateContentResponseUsageMetadata expectedMetadata) {
+    return response -> {
+      assertThat(response.partial()).hasValue(true);
+      assertThat(response.content()).isEmpty();
+      assertThat(response.usageMetadata()).hasValue(expectedMetadata);
+      return true;
+    };
+  }
+
   private static Predicate<LlmResponse> isEmptyResponse() {
     return response -> {
       assertThat(response.partial()).isEmpty();