@@ -226,21 +226,7 @@ public Flowable<LlmResponse> generateContent(LlmRequest llmRequest, boolean stre
226226 () ->
227227 processRawResponses (
228228 Flowable .fromFuture (streamFuture ).flatMapIterable (iterable -> iterable )))
229- .filter (
230- llmResponse ->
231- llmResponse
232- .content ()
233- .flatMap (Content ::parts )
234- .map (
235- parts ->
236- !parts .isEmpty ()
237- && parts .stream ()
238- .anyMatch (
239- p ->
240- p .functionCall ().isPresent ()
241- || p .functionResponse ().isPresent ()
242- || p .text ().isPresent ()))
243- .orElse (false ));
229+ .filter (Gemini ::shouldEmit );
244230 } else {
245231 logger .debug ("Sending generateContent request to model {}" , effectiveModelName );
246232 return Flowable .fromFuture (
@@ -298,7 +284,28 @@ static Flowable<LlmResponse> processRawResponses(Flowable<GenerateContentRespons
298284 responsesToEmit .add (aggregatedTextResponse );
299285 accumulatedText .setLength (0 );
300286 }
301- responsesToEmit .add (currentProcessedLlmResponse );
287+ if (isEmptyTextOnlyResponse (currentProcessedLlmResponse )) {
288+ // Strip the empty-text content while preserving any carried metadata
289+ // (`usageMetadata`, `finishReason`, `modelVersion`, etc.) by emitting a
290+ // content-less response marked as `partial`. This handles the trailing
291+ // `{parts:[{text:""}], finishReason:STOP}` chunk emitted by some Gemini
292+ // preview models (e.g. 3.1-flash-lite) after a function call: keeping
293+ // the chunk as-is would propagate it as a non-partial event whose
294+ // Event#finalResponse() returns true and prematurely terminate
295+ // BaseLlmFlow#run before the function response is sent back to the
296+ // model; dropping it entirely would lose the carried metadata. If the
297+ // chunk carries no useful metadata at all, suppress it outright.
298+ LlmResponse metadataOnly =
299+ currentProcessedLlmResponse .toBuilder ()
300+ .content ((Content ) null )
301+ .partial (true )
302+ .build ();
303+ if (hasUsefulMetadata (metadataOnly )) {
304+ responsesToEmit .add (metadataOnly );
305+ }
306+ } else {
307+ responsesToEmit .add (currentProcessedLlmResponse );
308+ }
302309 }
303310 logger .debug ("Responses to emit: {}" , responsesToEmit );
304311 return Flowable .fromIterable (responsesToEmit );
@@ -358,6 +365,65 @@ private static LlmResponse thinkingResponseFromText(String accumulatedThoughtTex
358365 .build ();
359366 }
360367
368+ /**
369+ * Returns true if {@code response} should be emitted downstream by the streaming pipeline.
370+ *
371+ * <p>Drops chunks that carry neither semantic content (i.e. they are an empty-text-only response
372+ * per {@link #isEmptyTextOnlyResponse}) nor any useful metadata (per {@link #hasUsefulMetadata}).
373+ */
374+ private static boolean shouldEmit (LlmResponse response ) {
375+ return !isEmptyTextOnlyResponse (response ) || hasUsefulMetadata (response );
376+ }
377+
378+ /**
379+ * Returns true if {@code response} carries any non-content metadata that should be propagated
380+ * downstream (e.g. {@code usageMetadata}, {@code finishReason}, transcriptions, grounding or
381+ * error info). Inspects only top-level {@link LlmResponse} fields; the response's content/parts
382+ * are intentionally not considered here.
383+ */
384+ private static boolean hasUsefulMetadata (LlmResponse response ) {
385+ return response .usageMetadata ().isPresent ()
386+ || response .finishReason ().isPresent ()
387+ || response .errorCode ().isPresent ()
388+ || response .groundingMetadata ().isPresent ()
389+ || response .inputTranscription ().isPresent ()
390+ || response .outputTranscription ().isPresent ();
391+ }
392+
393+ /**
394+ * Returns true if {@code response} consists of exactly one {@link Part} whose only meaningful
395+ * payload is an empty text string (i.e. {@code parts:[{text:""}]}). Such a chunk can be safely
396+ * dropped from the streaming aggregator because it carries no semantic content for the agent
397+ * pipeline. A part is considered to carry semantic content if any of its non-text payloads
398+ * ({@code functionCall}, {@code functionResponse}, {@code inlineData}, {@code executableCode},
399+ * {@code codeExecutionResult}, {@code fileData}, {@code thoughtSignature}, {@code videoMetadata},
400+ * {@code toolCall}, {@code toolResponse}) is present.
401+ */
402+ private static boolean isEmptyTextOnlyResponse (LlmResponse response ) {
403+ return response
404+ .content ()
405+ .flatMap (Content ::parts )
406+ .map (
407+ parts -> {
408+ if (parts .size () != 1 ) {
409+ return false ;
410+ }
411+ Part part = parts .get (0 );
412+ return part .text ().map (String ::isEmpty ).orElse (false )
413+ && part .functionCall ().isEmpty ()
414+ && part .functionResponse ().isEmpty ()
415+ && part .inlineData ().isEmpty ()
416+ && part .executableCode ().isEmpty ()
417+ && part .codeExecutionResult ().isEmpty ()
418+ && part .fileData ().isEmpty ()
419+ && part .thoughtSignature ().isEmpty ()
420+ && part .videoMetadata ().isEmpty ()
421+ && part .toolCall ().isEmpty ()
422+ && part .toolResponse ().isEmpty ();
423+ })
424+ .orElse (false );
425+ }
426+
361427 @ Override
362428 public BaseLlmConnection connect (LlmRequest llmRequest ) {
363429 if (!apiClient .vertexAI ()) {
0 commit comments