tslearn-team · jbbqqf · May 9, 2026
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -14,6 +14,7 @@ Changelogs for this project are recorded in this file since v0.2.0.
 ### Added
 
 * Allow parallel computation of DTW barycenters and plug it in `TimeSeriesKMeans`.
+* `PiecewiseAggregateApproximation.segment_indices` exposes the start/end indices of each PAA segment in the original time series ([#441](https://github.com/tslearn-team/tslearn/issues/441)).
 
 ### Changed
 

diff --git a/tests/test_piecewise.py b/tests/test_piecewise.py
@@ -24,6 +24,37 @@ def test_paa():
                                paa_est.distance_paa(paa_repr[0], paa_repr[1]))
 
 
+def test_paa_segment_indices():
+    # Regression test for #441: expose PAA segment boundaries so callers can
+    # map paa_data[i] back to the original-series index range it summarises.
+    paa = PiecewiseAggregateApproximation(n_segments=3)
+    data = [[-1., 2., 0.1, -1., 1., -1.]]
+    # Before fitting, segment_indices must raise NotFittedError (consistent
+    # with distance / transform).
+    np.testing.assert_raises(NotFittedError, paa.segment_indices)
+
+    paa_data = paa.fit_transform(data)
+    seg_idx = paa.segment_indices()
+
+    # Shape and dtype contract.
+    assert seg_idx.shape == (3, 2)
+    assert np.issubdtype(seg_idx.dtype, np.integer)
+
+    # The boundaries must reproduce the means stored in paa_data — this is the
+    # property a user actually relies on when locating "where changes occur".
+    arr = np.asarray(data, dtype=float)
+    for i_seg, (start, end) in enumerate(seg_idx):
+        np.testing.assert_allclose(
+            paa_data[0, i_seg, 0], arr[0, start:end].mean()
+        )
+
+    # Non-divisible length: trailing samples are dropped, like transform does.
+    paa2 = PiecewiseAggregateApproximation(n_segments=3)
+    paa2.fit([[1., 2., 3., 4., 5., 6., 7.]])  # sz=7, n_segments=3 -> sz_seg=2
+    seg_idx2 = paa2.segment_indices()
+    np.testing.assert_array_equal(seg_idx2, [[0, 2], [2, 4], [4, 6]])
+
+
 def test_sax():
     unfitted_sax = SymbolicAggregateApproximation(n_segments=3,
                                                   alphabet_size_avg=2)

diff --git a/tslearn/piecewise/piecewise.py b/tslearn/piecewise/piecewise.py
@@ -258,6 +258,48 @@ def inverse_transform(self, X):
         X = check_dims(X)
         return inv_transform_paa(X, original_size=self._X_fit_dims_[1])
 
+    def segment_indices(self):
+        """Return the start/end indices of each PAA segment in the original
+        time series.
+
+        These are the boundaries used when transforming a fitted-length time
+        series into its PAA representation: segment ``i`` of the PAA output
+        is the mean of ``ts[start_i:end_i]`` in the original series.
+
+        Returns
+        -------
+        numpy.ndarray of shape (n_segments, 2), dtype=int
+            ``[[start_0, end_0], [start_1, end_1], ...]`` segment ranges in the
+            original time-series index. ``end_i`` is exclusive and matches the
+            half-open convention used by :meth:`transform` (which slices
+            ``X[i_ts, start:end, :]``).
+
+        Examples
+        --------
+        >>> paa = PiecewiseAggregateApproximation(n_segments=3)
+        >>> _ = paa.fit([[-1., 2., 0.1, -1., 1., -1.]])
+        >>> paa.segment_indices()
+        array([[0, 2],
+               [2, 4],
+               [4, 6]])
+
+        Notes
+        -----
+        The segment width matches what :meth:`transform` uses internally:
+        ``sz_segment = sz_fit // n_segments``. Trailing samples beyond
+        ``n_segments * sz_segment`` are dropped, exactly as in
+        :meth:`transform` — this keeps the indices consistent with the values
+        in ``paa_data``.
+        """
+        self._is_fitted()
+        sz_fit = int(self._X_fit_dims_[1])
+        # Match _transform's segment-width convention so callers can map
+        # paa_data[i_seg] back to ts[start_i:end_i] without off-by-one.
+        sz_segment = sz_fit // self.n_segments
+        starts = numpy.arange(self.n_segments, dtype=int) * sz_segment
+        ends = starts + sz_segment
+        return numpy.stack([starts, ends], axis=1)
+
     def _more_tags(self):
         tags = super()._more_tags()
         tags.update({