From 2de57b8cce9e934870d01a74f1ea7234a7ba54c1 Mon Sep 17 00:00:00 2001 From: Ravi Theja Date: Tue, 14 Apr 2026 02:58:53 +0530 Subject: [PATCH 1/6] fix(sdk): force_flush returns meaningful bool on MetricReader Fixes #5020 Signed-off-by: Ravi Theja --- CHANGELOG.md | 1 + .../sdk/metrics/_internal/export/__init__.py | 40 ++++++++++++------- 2 files changed, 27 insertions(+), 14 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 5a3dbd81ac5..8228c29a8f6 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -28,6 +28,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ([#5093](https://github.com/open-telemetry/opentelemetry-python/pull/5093)) - `opentelemetry-sdk`: fix YAML structure injection via environment variable substitution in declarative file configuration; values containing newlines are now emitted as quoted YAML scalars per spec requirement ([#5091](https://github.com/open-telemetry/opentelemetry-python/pull/5091)) +- `opentelemetry-sdk`: Fix `force_flush` on `MetricReader` and `PeriodicExportingMetricReader` to return a meaningful `bool` reflecting actual export success/failure instead of always returning `True`. Also fixes `detach(token)` not being called when export raises an exception. ([#5020](https://github.com/open-telemetry/opentelemetry-python/issues/5020)) - `opentelemetry-sdk`: Add `create_logger_provider`/`configure_logger_provider` to declarative file configuration, enabling LoggerProvider instantiation from config files without reading env vars ([#4990](https://github.com/open-telemetry/opentelemetry-python/pull/4990)) - `opentelemetry-sdk`: Add `service` resource detector support to declarative file configuration via `detection_development.detectors[].service` diff --git a/opentelemetry-sdk/src/opentelemetry/sdk/metrics/_internal/export/__init__.py b/opentelemetry-sdk/src/opentelemetry/sdk/metrics/_internal/export/__init__.py index 676a356c2bc..9af959ca84a 100644 --- a/opentelemetry-sdk/src/opentelemetry/sdk/metrics/_internal/export/__init__.py +++ b/opentelemetry-sdk/src/opentelemetry/sdk/metrics/_internal/export/__init__.py @@ -334,7 +334,7 @@ def __init__( ) @final - def collect(self, timeout_millis: float = 10_000) -> None: + def collect(self, timeout_millis: float = 10_000) -> Optional[bool]: """Collects the metrics from the internal SDK state and invokes the `_receive_metrics` with the collection. @@ -359,10 +359,11 @@ def collect(self, timeout_millis: float = 10_000) -> None: self._metrics.record_collection(perf_counter() - start_time) if metrics is not None: - self._receive_metrics( + return self._receive_metrics( metrics, timeout_millis=timeout_millis, ) + return None @final def _set_collect_callback( @@ -384,8 +385,16 @@ def _receive_metrics( metrics_data: MetricsData, timeout_millis: float = 10_000, **kwargs, - ) -> None: - """Called by `MetricReader.collect` when it receives a batch of metrics""" + ) -> bool: + """Called by `MetricReader.collect` when it receives a batch of metrics. + + Subclasses must return ``True`` on success and ``False`` on failure. + + .. note:: + Existing subclasses that return ``None`` (the old implicit default) + will be treated as vacuous success by ``force_flush``, preserving + backward-compatible behaviour. + """ def _set_meter_provider(self, meter_provider: MeterProvider) -> None: self._metrics = create_metric_reader_metrics( @@ -397,8 +406,8 @@ def _set_meter_provider(self, meter_provider: MeterProvider) -> None: ) def force_flush(self, timeout_millis: float = 10_000) -> bool: - self.collect(timeout_millis=timeout_millis) - return True + result = self.collect(timeout_millis=timeout_millis) + return result is not False @abstractmethod def shutdown(self, timeout_millis: float = 30_000, **kwargs) -> None: @@ -451,9 +460,10 @@ def _receive_metrics( metrics_data: MetricsData, timeout_millis: float = 10_000, **kwargs, - ) -> None: + ) -> bool: with self._lock: self._metrics_data = metrics_data + return True def shutdown(self, timeout_millis: float = 30_000, **kwargs) -> None: pass @@ -569,17 +579,19 @@ def _receive_metrics( metrics_data: MetricsData, timeout_millis: float = 10_000, **kwargs, - ) -> None: + ) -> bool: token = attach(set_value(_SUPPRESS_INSTRUMENTATION_KEY, True)) - # pylint: disable=broad-exception-caught,invalid-name try: with self._export_lock: - self._exporter.export( + result = self._exporter.export( metrics_data, timeout_millis=timeout_millis ) + return result is MetricExportResult.SUCCESS except Exception: _logger.exception("Exception while exporting metrics") - detach(token) + return False + finally: + detach(token) def shutdown(self, timeout_millis: float = 30_000, **kwargs) -> None: deadline_ns = time_ns() + timeout_millis * 10**6 @@ -598,6 +610,6 @@ def _shutdown(): self._exporter.shutdown(timeout=(deadline_ns - time_ns()) / 10**6) def force_flush(self, timeout_millis: float = 10_000) -> bool: - super().force_flush(timeout_millis=timeout_millis) - self._exporter.force_flush(timeout_millis=timeout_millis) - return True + collect_ok = super().force_flush(timeout_millis=timeout_millis) + exporter_ok = self._exporter.force_flush(timeout_millis=timeout_millis) + return collect_ok and exporter_ok From 2d15eb3c07aa2027b483914e1cbadb9b3a129382 Mon Sep 17 00:00:00 2001 From: Ravi Theja Date: Sat, 18 Apr 2026 03:14:40 +0530 Subject: [PATCH 2/6] test(sdk): add tests for force_flush meaningful bool return (#5020) --- .../sdk/metrics/_internal/export/__init__.py | 15 +++--- .../test_periodic_exporting_metric_reader.py | 52 ++++++++++++++++++- 2 files changed, 58 insertions(+), 9 deletions(-) diff --git a/opentelemetry-sdk/src/opentelemetry/sdk/metrics/_internal/export/__init__.py b/opentelemetry-sdk/src/opentelemetry/sdk/metrics/_internal/export/__init__.py index 9af959ca84a..b6d83c6b249 100644 --- a/opentelemetry-sdk/src/opentelemetry/sdk/metrics/_internal/export/__init__.py +++ b/opentelemetry-sdk/src/opentelemetry/sdk/metrics/_internal/export/__init__.py @@ -14,7 +14,6 @@ from threading import Event, Lock, RLock, Thread from time import perf_counter, time_ns from typing import IO - from typing_extensions import final # This kind of import is needed to avoid Sphinx errors. @@ -334,7 +333,7 @@ def __init__( ) @final - def collect(self, timeout_millis: float = 10_000) -> Optional[bool]: + def collect(self, timeout_millis: float = 10_000) -> bool | None: """Collects the metrics from the internal SDK state and invokes the `_receive_metrics` with the collection. @@ -385,10 +384,10 @@ def _receive_metrics( metrics_data: MetricsData, timeout_millis: float = 10_000, **kwargs, - ) -> bool: + ) -> bool | None: """Called by `MetricReader.collect` when it receives a batch of metrics. - Subclasses must return ``True`` on success and ``False`` on failure. + Subclasses should return ``True`` on success and ``False`` on failure. .. note:: Existing subclasses that return ``None`` (the old implicit default) @@ -587,7 +586,7 @@ def _receive_metrics( metrics_data, timeout_millis=timeout_millis ) return result is MetricExportResult.SUCCESS - except Exception: + except Exception: # pylint: disable=broad-exception-caught _logger.exception("Exception while exporting metrics") return False finally: @@ -610,6 +609,6 @@ def _shutdown(): self._exporter.shutdown(timeout=(deadline_ns - time_ns()) / 10**6) def force_flush(self, timeout_millis: float = 10_000) -> bool: - collect_ok = super().force_flush(timeout_millis=timeout_millis) - exporter_ok = self._exporter.force_flush(timeout_millis=timeout_millis) - return collect_ok and exporter_ok + if not super().force_flush(timeout_millis=timeout_millis): + return False + return self._exporter.force_flush(timeout_millis=timeout_millis) diff --git a/opentelemetry-sdk/tests/metrics/test_periodic_exporting_metric_reader.py b/opentelemetry-sdk/tests/metrics/test_periodic_exporting_metric_reader.py index ab8877c21c3..9fab8310ad5 100644 --- a/opentelemetry-sdk/tests/metrics/test_periodic_exporting_metric_reader.py +++ b/opentelemetry-sdk/tests/metrics/test_periodic_exporting_metric_reader.py @@ -354,4 +354,54 @@ def test_metric_reader_metrics(self): assert isinstance(name, str) self.assertTrue(name.startswith("periodic_metric_reader/")) - mp.shutdown() + mp.shutdown() + + def test_force_flush_returns_true_on_success(self): + exporter = FakeMetricsExporter() + pmr = self._create_periodic_reader(metrics, exporter) + result = pmr.force_flush(timeout_millis=5_000) + self.assertTrue(result) + pmr.shutdown() + + def test_force_flush_returns_false_on_export_failure(self): + exporter = FakeMetricsExporter() + exporter.export = Mock(return_value=MetricExportResult.FAILURE) + pmr = self._create_periodic_reader(metrics, exporter) + result = pmr.force_flush(timeout_millis=5_000) + self.assertFalse(result) + pmr.shutdown() + + def test_force_flush_skips_exporter_flush_when_collect_fails(self): + exporter = FakeMetricsExporter() + exporter.force_flush = Mock(return_value=True) + pmr = PeriodicExportingMetricReader( + exporter, export_interval_millis=math.inf + ) + # No collect callback registered → collect returns None → force_flush + # on base treats None as not-False (success), so wire up a failing one + exporter.export = Mock(return_value=MetricExportResult.FAILURE) + + def _collect_failure(reader, timeout_millis): + return metrics + + pmr._set_collect_callback(_collect_failure) + exporter.export = Mock(return_value=MetricExportResult.FAILURE) + result = pmr.force_flush(timeout_millis=5_000) + self.assertFalse(result) + exporter.force_flush.assert_not_called() + pmr.shutdown() + + def test_detach_called_on_export_failure(self): + """detach(token) must run in finally even when export returns FAILURE.""" + from unittest.mock import patch + + exporter = FakeMetricsExporter() + exporter.export = Mock(return_value=MetricExportResult.FAILURE) + pmr = self._create_periodic_reader(metrics, exporter) + + with patch( + "opentelemetry.sdk.metrics._internal.export.detach" + ) as mock_detach: + pmr.force_flush(timeout_millis=5_000) + self.assertTrue(mock_detach.called) + pmr.shutdown() From 855cbe2c406fc6749b2479acaa95564f426fc69b Mon Sep 17 00:00:00 2001 From: Ravi Theja Date: Tue, 21 Apr 2026 23:55:01 +0530 Subject: [PATCH 3/6] fix(sdk): address final review feedback - soften docstring, fix changelog PR link --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 8228c29a8f6..815b8cd14af 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -28,7 +28,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ([#5093](https://github.com/open-telemetry/opentelemetry-python/pull/5093)) - `opentelemetry-sdk`: fix YAML structure injection via environment variable substitution in declarative file configuration; values containing newlines are now emitted as quoted YAML scalars per spec requirement ([#5091](https://github.com/open-telemetry/opentelemetry-python/pull/5091)) -- `opentelemetry-sdk`: Fix `force_flush` on `MetricReader` and `PeriodicExportingMetricReader` to return a meaningful `bool` reflecting actual export success/failure instead of always returning `True`. Also fixes `detach(token)` not being called when export raises an exception. ([#5020](https://github.com/open-telemetry/opentelemetry-python/issues/5020)) +- `opentelemetry-sdk`: Fix `force_flush` on `MetricReader` and `PeriodicExportingMetricReader` to return a meaningful `bool` reflecting actual export success/failure instead of always returning `True`. Also fixes `detach(token)` not being called when export raises an exception. ([#5085](https://github.com/open-telemetry/opentelemetry-python/pull/5085)) - `opentelemetry-sdk`: Add `create_logger_provider`/`configure_logger_provider` to declarative file configuration, enabling LoggerProvider instantiation from config files without reading env vars ([#4990](https://github.com/open-telemetry/opentelemetry-python/pull/4990)) - `opentelemetry-sdk`: Add `service` resource detector support to declarative file configuration via `detection_development.detectors[].service` From cbcd0b55bec3ac6e8d24f6c80183d61254877c53 Mon Sep 17 00:00:00 2001 From: Ravi Theja Date: Mon, 27 Apr 2026 03:57:51 +0530 Subject: [PATCH 4/6] fix(sdk): use explicit return None for consistency in collect() --- .../src/opentelemetry/sdk/metrics/_internal/export/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/opentelemetry-sdk/src/opentelemetry/sdk/metrics/_internal/export/__init__.py b/opentelemetry-sdk/src/opentelemetry/sdk/metrics/_internal/export/__init__.py index b6d83c6b249..f08f04efb9a 100644 --- a/opentelemetry-sdk/src/opentelemetry/sdk/metrics/_internal/export/__init__.py +++ b/opentelemetry-sdk/src/opentelemetry/sdk/metrics/_internal/export/__init__.py @@ -349,7 +349,7 @@ def collect(self, timeout_millis: float = 10_000) -> bool | None: _logger.warning( "Cannot call collect on a MetricReader until it is registered on a MeterProvider" ) - return + return None start_time = perf_counter() try: From 23c4e8eb78d115915cfe24e83eb6205a3555f38e Mon Sep 17 00:00:00 2001 From: Ravi Theja Date: Mon, 27 Apr 2026 04:13:03 +0530 Subject: [PATCH 5/6] fix(sdk): move patch import to top level, fix trailing whitespace --- .../metrics/test_periodic_exporting_metric_reader.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/opentelemetry-sdk/tests/metrics/test_periodic_exporting_metric_reader.py b/opentelemetry-sdk/tests/metrics/test_periodic_exporting_metric_reader.py index 9fab8310ad5..4ca6126920a 100644 --- a/opentelemetry-sdk/tests/metrics/test_periodic_exporting_metric_reader.py +++ b/opentelemetry-sdk/tests/metrics/test_periodic_exporting_metric_reader.py @@ -354,7 +354,7 @@ def test_metric_reader_metrics(self): assert isinstance(name, str) self.assertTrue(name.startswith("periodic_metric_reader/")) - mp.shutdown() + mp.shutdown() def test_force_flush_returns_true_on_success(self): exporter = FakeMetricsExporter() @@ -393,15 +393,16 @@ def _collect_failure(reader, timeout_millis): def test_detach_called_on_export_failure(self): """detach(token) must run in finally even when export returns FAILURE.""" - from unittest.mock import patch + import opentelemetry.sdk.metrics._internal.export as export_module exporter = FakeMetricsExporter() exporter.export = Mock(return_value=MetricExportResult.FAILURE) pmr = self._create_periodic_reader(metrics, exporter) with patch( - "opentelemetry.sdk.metrics._internal.export.detach" + "opentelemetry.sdk.metrics._internal.export.detach", + wraps=export_module.detach, ) as mock_detach: pmr.force_flush(timeout_millis=5_000) + pmr.shutdown() self.assertTrue(mock_detach.called) - pmr.shutdown() From 357278338200664b13288df51af82a5529bf1e93 Mon Sep 17 00:00:00 2001 From: Ravi Theja Date: Tue, 12 May 2026 13:47:26 +0530 Subject: [PATCH 6/6] fix(sdk): use wraps for detach patch, fix import ordering --- .../opentelemetry/sdk/metrics/_internal/export/__init__.py | 1 + .../tests/metrics/test_periodic_exporting_metric_reader.py | 5 ++--- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/opentelemetry-sdk/src/opentelemetry/sdk/metrics/_internal/export/__init__.py b/opentelemetry-sdk/src/opentelemetry/sdk/metrics/_internal/export/__init__.py index f08f04efb9a..51519e8772c 100644 --- a/opentelemetry-sdk/src/opentelemetry/sdk/metrics/_internal/export/__init__.py +++ b/opentelemetry-sdk/src/opentelemetry/sdk/metrics/_internal/export/__init__.py @@ -14,6 +14,7 @@ from threading import Event, Lock, RLock, Thread from time import perf_counter, time_ns from typing import IO + from typing_extensions import final # This kind of import is needed to avoid Sphinx errors. diff --git a/opentelemetry-sdk/tests/metrics/test_periodic_exporting_metric_reader.py b/opentelemetry-sdk/tests/metrics/test_periodic_exporting_metric_reader.py index 4ca6126920a..1d865483515 100644 --- a/opentelemetry-sdk/tests/metrics/test_periodic_exporting_metric_reader.py +++ b/opentelemetry-sdk/tests/metrics/test_periodic_exporting_metric_reader.py @@ -13,6 +13,7 @@ import pytest +import opentelemetry.sdk.metrics._internal.export as _export_module from opentelemetry.sdk.environment_variables import ( OTEL_PYTHON_SDK_INTERNAL_METRICS_ENABLED, ) @@ -393,15 +394,13 @@ def _collect_failure(reader, timeout_millis): def test_detach_called_on_export_failure(self): """detach(token) must run in finally even when export returns FAILURE.""" - import opentelemetry.sdk.metrics._internal.export as export_module - exporter = FakeMetricsExporter() exporter.export = Mock(return_value=MetricExportResult.FAILURE) pmr = self._create_periodic_reader(metrics, exporter) with patch( "opentelemetry.sdk.metrics._internal.export.detach", - wraps=export_module.detach, + wraps=_export_module.detach, ) as mock_detach: pmr.force_flush(timeout_millis=5_000) pmr.shutdown()