Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@ Changelogs for this project are recorded in this file since v0.2.0.
### Added

* Allow parallel computation of DTW barycenters and plug it in `TimeSeriesKMeans`.
* `NonMyopicEarlyClassifier` can now be used with yet incomplete series or streamed inputs
to retrieve optimal classification timing.

### Changed

Expand Down
1 change: 1 addition & 0 deletions docs/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,7 @@ def matplotlib_svg_scraper(*args, **kwargs):
"examples/autodiff", "examples/misc"].index,
'within_subsection_order': "FileNameSortKey",
'image_scrapers': (matplotlib_svg_scraper,),
'matplotlib_animations': True,
}

# Add any paths that contain templates here, relative to this directory.
Expand Down
102 changes: 99 additions & 3 deletions docs/examples/classification/plot_early_classification.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@
:mod:`tslearn.early_classification` module and in this example
we use the method from [1]_.

References
----------

.. [1] A. Dachraoui, A. Bondu & A. Cornuejols. Early classification of time
series as a non myopic sequential decision making problem. ECML/PKDD 2015
Expand All @@ -18,7 +20,12 @@
# License: BSD 3 clause
# sphinx_gallery_thumbnail_number = 2

from contextlib import suppress

import numpy

import matplotlib.animation as animation
import matplotlib.gridspec as gridpsec
import matplotlib.pyplot as plt

from tslearn.preprocessing import TimeSeriesScalerMeanVariance
Expand Down Expand Up @@ -60,9 +67,10 @@ def plot_partial(time_series, t, y_true=0, y_pred=0, color="k"):
size = X_train.shape[1]
n_classes = len(set(y_train))

plt.figure()
plt.figure(layout="constrained")
for i, cl in enumerate(set(y_train)):
plt.subplot(n_classes, 1, i + 1)
ax = plt.subplot(n_classes, 1, i + 1)
ax.set_title(f"Class {cl}")
for ts in X_train[y_train == cl]:
plt.plot(ts.ravel(), color="orange" if cl > 0 else "blue", alpha=.3)
plt.xlim(0, size - 1)
Expand Down Expand Up @@ -98,6 +106,94 @@ def plot_partial(time_series, t, y_true=0, y_pred=0, color="k"):
plt.tight_layout()
plt.show()

##############################################################################
# Streaming inputs
# ----------------
# Let's focus on analyzing early classification of a time series acquired sequentially over time.
#
# For each incoming timestamp :math:`t`, the following figure displays data-dependant computations of:
#
# * the clustering probabilities :math:`P(C_k | \mathbf{x}_{\rightarrow t})`
# * the expected cost for all future times :math:`t + \tau`
# with :math:`\tau \geq 0`:
#
# .. math::
#
# f_\tau(\mathbf{x}_{\rightarrow t}, y) =
# \sum_k \left[ P(C_k | \mathbf{x}_{\rightarrow t})
# \sum_i \left( P(y=i | C_k)
# \left( \sum_{j \neq i} P_{t+\tau}(\hat{y} = j | y=i, C_k)
# \right) \right)
# \right]
# + \alpha t
#
# as described in :ref:`our User Guide section dedicated to early classification <early>`.
#
# The estimated optimal :math:`\tau` is derived at each timestamp from minimizing the expected costs.
#
# In this example, the `NonMyopicEarlyClassifier` recommends a classification decision as early as :math:`t=12`.

ts_index = 1
sz = X_test.shape[1]

fig = plt.figure(layout="constrained", figsize=(13, 4))
fig.suptitle(r"Optimal prediction time $\tau$ evolution")

gs = gridpsec.GridSpec(2, 3, figure=fig, width_ratios=[0.15, 0.70, 0.15])
ax1 = fig.add_subplot(gs[:, 0], title='Cluster probas')
ax2 = fig.add_subplot(
gs[0, 1],
xlim=[0, sz],
ylim=[numpy.min(X_test[ts_index]),
numpy.max(X_test[ts_index]) * 1.1],
title='Streamed TS'
)
ax3 = fig.add_subplot(gs[1, 1], xlim=[0, sz], ylim=[0, 1], title='Expected cost')
ax4 = fig.add_subplot(gs[:, 2], title='Predicted probas')

bar1 = ax1.barh(
["cluster 1", "cluster 2", "cluster 3"],
[1.1, 0, 0],
)
line1 = ax2.plot([numpy.nan], marker='.')[0]
line2 = ax3.plot(numpy.full((sz,), numpy.nan), linestyle="--", marker='.')[0]
bar2 = ax4.bar(
["class -1", "class 1"],
[1.1, 0],
)

def update(frame):
incoming_ts_ = X_test[ts_index, :frame+1]
cluster_probas = early_clf.get_cluster_probas(incoming_ts_)
expected_costs = early_clf._expected_costs(incoming_ts_).reshape(-1)
probas, delays = early_clf.early_predict_proba(
numpy.expand_dims(incoming_ts_, axis=0)
)
proba, delay = probas[0], delays[0]

for i, elem in enumerate(bar1):
elem.set_width(cluster_probas[i])

line1.set_xdata(numpy.arange(incoming_ts_.shape[0]))
line1.set_ydata(incoming_ts_)

for i, elem in enumerate(bar2):
elem.set_height(proba[i])

with suppress(IndexError):
ax2.lines[1].remove()
ax3.lines[1].remove()
ax2.texts[0].remove()
ax2.axvline(x=frame + delay, color="k", linewidth=1.5)
ax3.axvline(x=frame + delay, color="k", linewidth=1.5)
ax2.text(x=frame + delay, y= numpy.max(X_test[ts_index])/2, s=r"$\tau$")
line2.set_xdata(numpy.arange(expected_costs.shape[0]) + frame)
line2.set_ydata(expected_costs)
return bar1, line1, line2, bar2

ani = animation.FuncAnimation(fig=fig, func=update, frames=sz, interval=100)
plt.show()

##############################################################################
# Earliness-Accuracy trade-off
# ----------------------------
Expand All @@ -120,4 +216,4 @@ def plot_partial(time_series, t, y_true=0, y_pred=0, color="k"):
plt.xlim(0, size - 1)
plt.xlabel("Prediction times")
plt.title("Impact of cost_time_parameter ($\\alpha$)")
plt.show()
plt.show()
102 changes: 102 additions & 0 deletions tests/test_early_classification.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,102 @@
import warnings
from warnings import catch_warnings

import numpy as np

import pytest

from tslearn.early_classification import NonMyopicEarlyClassifier
from tslearn.neighbors import KNeighborsTimeSeriesClassifier
from tslearn.utils import to_time_series_dataset


def test_NonMyopicEarlyClassifier():

dataset = to_time_series_dataset(
[
[1, 2, 3, 4, 5, 6],
[1, 2, 3, 4, 5, 6],
[1, 2, 3, 4, 5, 6],
[1, 2, 3, 3, 2, 1],
[1, 2, 3, 3, 2, 1],
[1, 2, 3, 3, 2, 1],
[3, 2, 1, 1, 2, 3],
[3, 2, 1, 1, 2, 3],
]
)

y = [0, 0, 0, 1, 1, 1, 0, 0]
model = NonMyopicEarlyClassifier(
n_clusters=3,
base_classifier=KNeighborsTimeSeriesClassifier(
n_neighbors=1, metric="euclidean"
),
min_t=2,
lamb=1000.0,
cost_time_parameter=0.1,
random_state=0,
)
assert model.classes_ is None
model.fit(dataset, y)
np.testing.assert_almost_equal(model.early_classification_cost(dataset, y), 0.35)

# Fewer timestamps than min_ts
pred, delays = model.early_predict(dataset[:, :1])
np.testing.assert_array_equal(pred, np.array([np.nan] * 8))
np.testing.assert_array_equal(delays, np.array([np.nan] * 8))

pred, delays = model.early_predict(dataset[:, :3])
np.testing.assert_array_equal(pred, np.array([0, 0, 0, 0, 0, 0, 0, 0]))
np.testing.assert_array_equal(delays, np.array([1, 1, 1, 1, 1, 1, 0, 0]))

pred, delays = model.early_predict_proba(dataset[:, :3])
np.testing.assert_array_equal(
pred,
np.array([[1.0, 0.0],
[1.0, 0.0],
[1.0, 0.0],
[1.0, 0.0],
[1.0, 0.0],
[1.0, 0.0],
[1.0, 0.0],
[1.0, 0.0]])
)
np.testing.assert_array_equal(delays, np.array([1, 1, 1, 1, 1, 1, 0, 0]))

# More timestamps than trained dataset
data = to_time_series_dataset([[1, 2, 3, 3, 2, 1, 1, 2, 3]])
with pytest.raises(ValueError):
model.early_predict(data)

data = to_time_series_dataset([[1, 2, 3, 3, 2, 1]])
gen = model.get_early_predict_generator()
expected_preds = np.array([[np.nan], [0], [0], [1], [1], [1]])
expected_delays = np.array([[np.nan], [2], [1], [0], [0], [0], [0]])
for i in range(data.shape[1]):
pred, delay = gen.send(data[:, i:i+1, :])
np.testing.assert_array_equal(pred, expected_preds[i])
np.testing.assert_array_equal(delay, expected_delays[i])

data = to_time_series_dataset([[1, 2, 3, 3, 2, 1]])
gen = model.get_early_predict_proba_generator()
expected_preds = np.array([
[[np.nan, np.nan]],
[[1.0, 0.0]],
[[1.0, 0.0]],
[[0.0, 1.0]],
[[0.0, 1.0]],
[[0.0, 1.0]]
])
expected_delays = np.array([[np.nan], [2], [1], [0], [0], [0], [0]])
for i in range(data.shape[1]):
pred, delay = gen.send(data[:, i:i+1, :])
np.testing.assert_array_equal(pred, expected_preds[i])
np.testing.assert_array_equal(delay, expected_delays[i])

# Check unproperly formatted generator input
with pytest.warns(RuntimeWarning):
gen.send(1)

# Check iteration raises after n_samples + 1
with pytest.raises(ValueError):
gen.send([[[1]]])
Loading