RapidataAI · RapidPoseidon · Jun 16, 2026
diff --git a/docs/job_definition_parameters.md b/docs/job_definition_parameters.md
@@ -160,6 +160,46 @@ datapoints=["image1.jpg", "image2.jpg"],
 contexts=["A cat sitting on a red couch", "A blue car in the rain"]
 ```
 
+**Length limit:** A context may be at most 400 characters; the backend rejects longer ones. If a context exceeds the limit, a warning is logged at creation time. See `auto_shorten` below to have over-long contexts shortened automatically.
+
+---
+
+### `auto_shorten`
+
+| Property | Value |
+|----------|-------|
+| **Type** | `bool` |
+| **Required** | No |
+| **Default** | `False` |
+
+When `True`, any context longer than the 400-character limit is automatically shortened — tuned to the `instruction` so only the part relevant to the question is kept — before upload. When `False` (the default), an over-long context is left unchanged and a warning is logged explaining the backend would reject it.
+
+```python
+order = rapi.order.create_classification_order(
+    name="Outfit check",
+    instruction="Does the main character wear the right clothing?",
+    answer_options=["Yes", "No"],
+    datapoints=["scene.jpg"],
+    contexts=["<a very long, detailed beach-scene description ...>"],
+    auto_shorten=True,
+)
+```
+
+You can also shorten contexts directly via the client, without creating an order:
+
+```python
+short = rapi.context.shorten_context(
+    context="<a very long description ...>",
+    question="Does the main character wear the right clothing?",
+)
+
+# Or a batch of (context, question) pairs in one call:
+shortened = rapi.context.shorten_contexts([
+    (context_a, question_a),
+    (context_b, question_b),
+])
+```
+
 ---
 
 ### `media_contexts`

diff --git a/src/rapidata/__init__.py b/src/rapidata/__init__.py
@@ -61,6 +61,7 @@
     DeviceFilter,
     DeviceType,
     Datapoint,
+    ContextManager,
     FailedUploadException,
     FailedUpload,
     rapidata_config,

diff --git a/src/rapidata/rapidata_client/__init__.py b/src/rapidata/rapidata_client/__init__.py
@@ -20,11 +20,13 @@
     EffortSelection,
 )
 from .datapoints import Datapoint
+from .context import ContextManager
 from .datapoints.metadata import (
     PrivateTextMetadata,
     PublicTextMetadata,
     SelectWordsMetadata,
 )
+
 # --- GENERATED SETTINGS IMPORTS START ---
 from .settings import (
     RapidataSettings,
@@ -48,6 +50,7 @@
     CompareEquirectangularSetting,
     ClassifyEquirectangularSetting,
 )
+
 # --- GENERATED SETTINGS IMPORTS END ---
 from .filter import (
     CountryFilter,

diff --git a/src/rapidata/rapidata_client/context/__init__.py b/src/rapidata/rapidata_client/context/__init__.py
@@ -0,0 +1,4 @@
+from .context_manager import ContextManager
+from ._context_length import MAX_CONTEXT_LENGTH
+
+__all__ = ["ContextManager", "MAX_CONTEXT_LENGTH"]
diff --git a/src/rapidata/rapidata_client/context/_context_length.py b/src/rapidata/rapidata_client/context/_context_length.py
@@ -0,0 +1,82 @@
+from __future__ import annotations
+
+from typing import TYPE_CHECKING
+
+from rapidata.rapidata_client.config import logger
+
+if TYPE_CHECKING:
+    from rapidata.rapidata_client.datapoints._datapoint import Datapoint
+    from rapidata.rapidata_client.context.context_manager import ContextManager
+
+# Mirrors the backend's datapoint/group context validation
+# (datasets-service CreateDatapointCommandValidator: `RuleFor(x => x.Context).MaximumLength(400)`).
+# Keep in sync if the backend limit changes.
+MAX_CONTEXT_LENGTH = 400
+
+
+def enforce_context_length(
+    datapoints: list[Datapoint],
+    question: str | None,
+    auto_shorten: bool,
+    context_manager: ContextManager,
+) -> None:
+    """Check datapoint contexts against the backend's maximum length, in place.
+
+    For every datapoint whose context exceeds :data:`MAX_CONTEXT_LENGTH`:
+
+    - if ``auto_shorten`` is True and a ``question`` is available, the context
+      is shortened for that question (one batched request) and substituted;
+    - otherwise a warning is logged explaining the backend would reject it.
+    """
+    over_limit = [
+        (index, datapoint)
+        for index, datapoint in enumerate(datapoints)
+        if datapoint.context is not None and len(datapoint.context) > MAX_CONTEXT_LENGTH
+    ]
+    if not over_limit:
+        return
+
+    if auto_shorten and not question:
+        # auto_shorten needs the question to tune the context; without it we
+        # can't shorten, so fall back to warning rather than silently proceed.
+        logger.warning(
+            "auto_shorten=True but no question/instruction was available to shorten "
+            "the context against; leaving %d over-long context(s) unchanged.",
+            len(over_limit),
+        )
+
+    if auto_shorten and question:
+        pairs = [
+            (datapoint.context, question)
+            for _, datapoint in over_limit
+            if datapoint.context is not None
+        ]
+        shortened = context_manager.shorten_contexts(pairs)
+        for (index, datapoint), new_context in zip(over_limit, shortened):
+            if not new_context:
+                logger.warning(
+                    "Datapoint %d: shorten-context returned an empty result; "
+                    "keeping the original context.",
+                    index,
+                )
+                continue
+            assert datapoint.context is not None
+            logger.info(
+                "Datapoint %d: shortened context from %d to %d characters.",
+                index,
+                len(datapoint.context),
+                len(new_context),
+            )
+            datapoint.context = new_context
+        return
+
+    for index, datapoint in over_limit:
+        assert datapoint.context is not None
+        logger.warning(
+            "Datapoint %d has a context of %d characters, which exceeds the maximum "
+            "of %d and would be rejected by the backend. Shorten it, or pass "
+            "auto_shorten=True to shorten it automatically.",
+            index,
+            len(datapoint.context),
+            MAX_CONTEXT_LENGTH,
+        )
diff --git a/src/rapidata/rapidata_client/context/context_manager.py b/src/rapidata/rapidata_client/context/context_manager.py
@@ -0,0 +1,47 @@
+from __future__ import annotations
+
+from typing import Sequence, TYPE_CHECKING
+
+from rapidata.rapidata_client.config import logger, tracer
+
+if TYPE_CHECKING:
+    from rapidata.service.openapi_service import OpenAPIService
+
+
+class ContextManager:
+    """Shortens a datapoint's context for the specific question an annotator answers.
+
+    A long, general context (e.g. a full scene description) is often far more
+    detail than a single question needs. This manager tunes a context down to
+    what is relevant for the question, which keeps it within the length the
+    backend accepts and focuses the annotator. Results are cached server-side.
+    """
+
+    def __init__(self, openapi_service: OpenAPIService):
+        self._openapi_service = openapi_service
+        logger.debug("ContextManager initialized")
+
+    def shorten_context(self, context: str, question: str) -> str:
+        """Shorten a single context for the given question.
+
+        Args:
+            context: The (potentially long) context to shorten.
+            question: The question the context will be shown alongside. The
+                context is tuned to what this question needs.
+
+        Returns:
+            The shortened context.
+        """
+        return self.shorten_contexts([(context, question)])[0]
+
+    def shorten_contexts(self, pairs: Sequence[tuple[str, str]]) -> list[str]:
+        """Shorten a batch of ``(context, question)`` pairs in one request.
+
+        Args:
+            pairs: The ``(context, question)`` pairs to shorten.
+
+        Returns:
+            The shortened contexts, in the same order as ``pairs``.
+        """
+        with tracer.start_as_current_span("ContextManager.shorten_contexts"):
+            return self._openapi_service.context.shorten_contexts(pairs)