diff --git a/hyperforge/src/hyperforge/api/models.py b/hyperforge/src/hyperforge/api/models.py
index 3f53134..0767f1d 100644
--- a/hyperforge/src/hyperforge/api/models.py
+++ b/hyperforge/src/hyperforge/api/models.py
@@ -5,7 +5,7 @@
 from pydantic import BaseModel, Field
 
 from hyperforge.driver import DriverConfig
-from hyperforge.models import Rules
+from hyperforge.models import HistoryQuestionAnswer, Rules
 
 
 class StashRoles(str, Enum):
@@ -125,5 +125,9 @@ class InteractionRequest(BaseModel):
     question: str
     headers: Dict[str, str] = {}
     arguments: Dict[str, str] = {}
+    chat_history: Optional[List[HistoryQuestionAnswer]] = Field(
+        default=None,
+        description="Client-managed chat history. When set (even to an empty list), overrides any server-side session history for agents that use previous Q&A context (rephrase, summarize, smart, etc.). Omit the field entirely to use server-side session history.",
+    )
     operation: InteractionOperation = InteractionOperation.QUESTION
     streaming: bool = False
diff --git a/hyperforge/src/hyperforge/api/v1/interaction.py b/hyperforge/src/hyperforge/api/v1/interaction.py
index fefddc1..8658da7 100644
--- a/hyperforge/src/hyperforge/api/v1/interaction.py
+++ b/hyperforge/src/hyperforge/api/v1/interaction.py
@@ -208,6 +208,7 @@ async def stream_response(
         question=interaction.question,
         headers=interaction.headers,
         arguments=interaction.arguments,
+        chat_history=interaction.chat_history,
         workflow_id=workflow_id,
         streaming=interaction.streaming,
     )
diff --git a/hyperforge/src/hyperforge/engine.py b/hyperforge/src/hyperforge/engine.py
index 0d676c5..6101778 100644
--- a/hyperforge/src/hyperforge/engine.py
+++ b/hyperforge/src/hyperforge/engine.py
@@ -1,6 +1,6 @@
 import logging
 from dataclasses import dataclass
-from typing import Any, Awaitable, Callable, Dict, Optional, Tuple, cast
+from typing import Any, Awaitable, Callable, Dict, List, Optional, Tuple, cast
 
 from nuclia.lib.nua import AsyncNuaClient
 
@@ -9,6 +9,7 @@
 from hyperforge.llm import NoopNuaClient, NuaBaseModel, NUAConnection
 from hyperforge.manager import Manager
 from hyperforge.memory.memory import BaseSessionMemory, QuestionMemory, SessionMemory
+from hyperforge.models import HistoryQuestionAnswer
 from hyperforge.retrieval.agent import RetrievalAgent
 from hyperforge.retrieval.config import RetrievalAgentConfig
 
@@ -84,6 +85,7 @@ async def main(
     headers: Optional[Dict[str, str]] = None,
     memory_klass: type[BaseSessionMemory] = SessionMemory,
     streaming: bool = False,
+    chat_history: Optional[List[HistoryQuestionAnswer]] = None,
 ) -> QuestionMemory:
     try:
         state, session_memory = await init(
@@ -99,7 +101,9 @@ async def main(
             session_id=session_id,
             memory_klass=memory_klass,
         )
-        question_memory = session_memory.start_question(question, streaming=streaming)
+        question_memory = session_memory.start_question(
+            question, streaming=streaming, chat_history=chat_history
+        )
         if callback is not None:
             question_memory.set_callback_fn(callback)
 
diff --git a/hyperforge/src/hyperforge/memory/memory.py b/hyperforge/src/hyperforge/memory/memory.py
index 4d22855..870e082 100644
--- a/hyperforge/src/hyperforge/memory/memory.py
+++ b/hyperforge/src/hyperforge/memory/memory.py
@@ -54,6 +54,26 @@
 
 # NucliaDB storage
 QUESTION_ANSWERS_FIELD: str = "qas"
+
+
+def _qa_list_to_context_string(history: List[HistoryQuestionAnswer]) -> Tuple[str, int]:
+    """Format a list of Q&A pairs into the prompt context string used by agents."""
+    result = "".join(
+        f"- Question: {qa.question}\n- Answer: {qa.answer}\n" for qa in history
+    )
+    return result, len(history)
+
+
+def _qa_list_to_chat_messages(history: List[HistoryQuestionAnswer]) -> List[Message]:
+    """Convert a list of Q&A pairs into the alternating User/Nuclia Message list used by LLMs."""
+    return [
+        msg
+        for qa in history
+        for msg in (
+            Message(author=Author.USER, text=qa.question),
+            Message(author=Author.NUCLIA, text=qa.answer),
+        )
+    ]
 CONTEXT_FIELD: str = "context"
 STEPS_FIELD: str = "steps"
 USER_INFO_FIELD: str = "user_info"
@@ -112,34 +132,13 @@ async def search_in_questions(self, question: str, all: bool):
         return KnowledgeboxFindResults(total=0, resources={})
 
     async def get_chat_history(self) -> List[Message]:
-        qas = await self.qa_history()
-        result = []
-        for qa in qas:
-            result.append(
-                Message(
-                    author=Author.USER,
-                    text=qa.question,
-                )
-            )
-            result.append(
-                Message(
-                    author=Author.NUCLIA,
-                    text=qa.answer,
-                )
-            )
-        return result
+        return _qa_list_to_chat_messages(await self.qa_history())
 
     async def qa_history(self) -> list[HistoryQuestionAnswer]:
         return []
 
     async def context_history(self) -> Tuple[str, int]:
-        result = ""
-        interactions = 0
-        for qa in await self.qa_history():
-            result += f"- Question: {qa.question}\n"
-            result += f"- Answer: {qa.answer}\n"
-            interactions += 1
-        return result, interactions
+        return _qa_list_to_context_string(await self.qa_history())
 
     def start_question(
         self,
@@ -149,6 +148,7 @@ def start_question(
         headers: Dict[str, str] = {},
         arguments: Dict[str, str] = {},
         streaming: bool = False,
+        chat_history: Optional[List[HistoryQuestionAnswer]] = None,
     ) -> "QuestionMemory":
         return QuestionMemory(
             self,
@@ -158,6 +158,7 @@ def start_question(
             headers=headers,
             arguments=arguments,
             streaming=streaming,
+            chat_history=chat_history,
         )
 
     async def save(self, question: "QuestionMemory") -> None:
@@ -183,6 +184,7 @@ def start_question(
         headers: Dict[str, str] = {},
         arguments: Dict[str, str] = {},
         streaming: bool = False,
+        chat_history: Optional[List[HistoryQuestionAnswer]] = None,
     ) -> "QuestionMemory":
         return QuestionMemory(
             self,
@@ -192,6 +194,7 @@ def start_question(
             headers=headers,
             arguments=arguments,
             streaming=streaming,
+            chat_history=chat_history,
         )
 
     async def save(self, question: "QuestionMemory") -> None:
@@ -250,6 +253,7 @@ def start_question(
         headers: Dict[str, str] = {},
         arguments: Dict[str, str] = {},
         streaming: bool = False,
+        chat_history: Optional[List[HistoryQuestionAnswer]] = None,
     ) -> "QuestionMemory":
         return QuestionMemory(
             self,
@@ -259,6 +263,7 @@ def start_question(
             headers=headers,
             arguments=arguments,
             streaming=streaming,
+            chat_history=chat_history,
         )
 
     async def save(self, question: "QuestionMemory") -> None:
@@ -580,10 +585,21 @@ def __init__(
         headers: Dict[str, str] | None = None,
         arguments: Dict[str, str] | None = None,
         streaming: bool = False,
+        chat_history: Optional[List[HistoryQuestionAnswer]] = None,
     ):
         self.session = session
         self.started_at = datetime.now(timezone.utc)
 
+        # Client-managed chat history. When set (even to an empty list), overrides
+        # server-side session history for agents that use previous Q&A context
+        # (rephrase, summarize, smart, etc.). None means "not set — use server-side
+        # history". [] means "override with no history". Intended for ephemeral
+        # sessions where the client is responsible for maintaining conversation state.
+        # Note: search_in_questions() performs semantic search over NucliaDB-stored
+        # conversation history and is NOT affected by this field. The HistoricalAgent
+        # uses that method and therefore does not benefit from client-managed history.
+        self._client_chat_history: Optional[List[HistoryQuestionAnswer]] = chat_history
+
         # Start of a new question by the user
         self.original_question = question
         if actions is not None:
@@ -649,11 +665,19 @@ async def get_session_source(self, source_id: str) -> Optional[Source]:
         return await self.session.get_source(source_id)
 
     async def context_history(self) -> Tuple[str, int]:
-        """Returns a string with the context history of the conversation. This can include information such as previous questions and answers, relevant information that has been previously discussed in the conversation, or any other relevant information that can help the agent to generate a more accurate and personalized response."""
+        """Returns a string with the context history of the conversation. This can include information such as previous questions and answers, relevant information that has been previously discussed in the conversation, or any other relevant information that can help the agent to generate a more accurate and personalized response.
+
+        When the client sets chat_history in the request (even to an empty list), it overrides any server-side session history. None means "not set — use server-side history"."""
+        if self._client_chat_history is not None:
+            return _qa_list_to_context_string(self._client_chat_history)
         return await self.session.context_history()
 
     async def get_chat_history(self) -> list[Message]:
-        """Returns a list of tuples with the chat history of the conversation. Each tuple contains a question and an answer. This can be used to keep track of the conversation history in a more structured way, and to provide more context to the agent when generating a response."""
+        """Returns a list of tuples with the chat history of the conversation. Each tuple contains a question and an answer. This can be used to keep track of the conversation history in a more structured way, and to provide more context to the agent when generating a response.
+
+        When the client sets chat_history in the request (even to an empty list), it overrides any server-side session history. None means "not set — use server-side history"."""
+        if self._client_chat_history is not None:
+            return _qa_list_to_chat_messages(self._client_chat_history)
         return await self.session.get_chat_history()
 
     def stats(self):
diff --git a/hyperforge/src/hyperforge/pubsub.py b/hyperforge/src/hyperforge/pubsub.py
index b163182..0788175 100644
--- a/hyperforge/src/hyperforge/pubsub.py
+++ b/hyperforge/src/hyperforge/pubsub.py
@@ -1,4 +1,4 @@
-from typing import Annotated, Dict, Literal
+from typing import Annotated, Dict, List, Literal, Optional
 
 from pydantic import AliasChoices, BaseModel, Field
 from pydantic.types import Discriminator, Tag
@@ -8,6 +8,7 @@
     Feedback,
     OAuthAuthenticateURL,
 )
+from hyperforge.models import HistoryQuestionAnswer
 
 # Messages used in the pubsub protocol between API and agent servers
 
@@ -22,6 +23,10 @@ class StartInteraction(BaseModel):
     question: str
     headers: Dict[str, str] = {}
     arguments: Dict[str, str] = {}
+    chat_history: Optional[List[HistoryQuestionAnswer]] = Field(
+        default=None,
+        description="Client-managed chat history. When set (even to an empty list), overrides any server-side session history for agents that use previous Q&A context. Omit the field entirely to use server-side session history.",
+    )
     workflow_id: str = "default"
     streaming: bool = False
     op: Literal["start"] = "start"
diff --git a/hyperforge/src/hyperforge/server/session.py b/hyperforge/src/hyperforge/server/session.py
index 761ed73..00ce3a4 100644
--- a/hyperforge/src/hyperforge/server/session.py
+++ b/hyperforge/src/hyperforge/server/session.py
@@ -194,6 +194,7 @@ async def activate(self, message: StartInteraction):
                 headers=message.headers,
                 arguments=message.arguments,
                 streaming=message.streaming,
+                chat_history=message.chat_history,
             )
 
             task = asyncio.create_task(
diff --git a/hyperforge/tests/api/cassettes/test_chat_history_workflow/test_chat_history_is_used_in_rephrase_and_summarize.yaml b/hyperforge/tests/api/cassettes/test_chat_history_workflow/test_chat_history_is_used_in_rephrase_and_summarize.yaml
new file mode 100644
index 0000000..a30df98
--- /dev/null
+++ b/hyperforge/tests/api/cassettes/test_chat_history_workflow/test_chat_history_is_used_in_rephrase_and_summarize.yaml
@@ -0,0 +1,250 @@
+interactions:
+- request:
+    body: ''
+    headers:
+      Accept:
+      - '*/*'
+      Accept-Encoding:
+      - gzip, deflate
+      Connection:
+      - keep-alive
+      Host:
+      - europe-1.nuclia.cloud
+      User-Agent:
+      - nuclia.py/4.9.25
+      x-nuclia-nuakey:
+      - DUMMY
+    method: GET
+    uri: https://europe-1.nuclia.cloud/api/authorizer/info
+  response:
+    body:
+      string: '{"auth":"nua_key","user":{"identity_type":"nua_key","user_id":"03d47998-f672-4a9b-a57b-bde3a9c3ee8c","account_id":"4f9285c7-7151-4431-94e6-3f1fb0d66aca","account_type":"v3enterprise","allow_kb_management":false},"ip_info":null}'
+    headers:
+      Alt-Svc:
+      - h3=":443"; ma=2592000,h3-29=":443"; ma=2592000
+      Content-Length:
+      - '227'
+      content-type:
+      - application/json
+      date:
+      - Tue, 16 Jun 2026 09:56:37 GMT
+      via:
+      - 1.1 google
+      x-envoy-upstream-service-time:
+      - '4'
+    status:
+      code: 200
+      message: OK
+- request:
+    body: '{"question": "", "retrieval": true, "user_id": "rephrase", "system": null,
+      "chat_history": [], "context": [], "query_context": {}, "query_context_order":
+      {}, "truncate": true, "user_prompt": {"prompt": "\nYou are an expert at rephrasing
+      complex questions for an agentic RAG system.\n\nYour task is to review the main
+      question and any provided context, then rephrase the question to maximize clarity
+      and focus. Follow these steps:\n\n1. Carefully analyze the main question and
+      all context provided, including sources, previous questions and answers, and
+      any other relevant information.\n2. I present, assess whether previous questions
+      and answers (history) are necessary for rephrasing. Only use history if it is
+      relevant and improves the clarity or specificity of the main question; otherwise,
+      ignore it.\n3. If the question can be made clearer or more specific, rephrase
+      it accordingly. If it is already clear and focused, return it unchanged.\n4.
+      Only use information present in the provided context. Do not introduce external
+      knowledge or assumptions.\n5. Return a JSON object with the following fields:\n    -
+      \"rephrased_question\": The rephrased version of the main question, keep the
+      same question if no rephrasing is needed or possible.\n    - \"rules\": Any
+      rules or guidelines that should be followed when generating the answer.\n    -
+      \"reason\": Explain why the rephrasing was necessary or beneficial.\nReturn
+      only the JSON object as your response.\n\nAdditional context to assist with
+      rephrasing:\n# CONTEXT\n\n- ## Previous questions and answers in this session:\n-
+      Question: What is the max_tokens parameter in Nuclia?\n- Answer: max_tokens
+      controls the maximum number of tokens the LLM may generate in a single response.\n-
+      Question: Oh nice, can you always start you responses with a smiley emoji?\n-
+      Answer: Sure! \ud83d\ude0a I can start my responses with a smiley emoji.\n\n\n\n\n\n\n#
+      MAIN QUESTION:\nHow do I set it and what is the default value?"}, "citations":
+      false, "citation_threshold": null, "generative_model": "gemini-2.5-flash-lite",
+      "max_tokens": 2000, "query_context_images": {}, "prefer_markdown": null, "json_schema":
+      {"title": "rephrase", "description": "", "type": "object", "properties": {"rephrased_question":
+      {"type": "string", "description": "Rephrased version of the main question."},
+      "rules": {"type": "array", "items": {"type": "string"}, "description": "Rules
+      or guidelines to follow when generating the answer."}, "reason": {"type": "string",
+      "description": "Reason for rephrasing explaining why it is necessary."}}, "required":
+      ["rephrased_question"]}, "format_prompt": false, "rerank_context": false, "tools":
+      [], "tool_choice": {"type": "required"}, "reasoning": false, "seed": null}'
+    headers:
+      Accept:
+      - application/x-ndjson
+      Accept-Encoding:
+      - gzip, deflate
+      Connection:
+      - keep-alive
+      Content-Length:
+      - '2651'
+      Content-Type:
+      - application/json
+      Host:
+      - europe-1.nuclia.cloud
+      User-Agent:
+      - nuclia.py/4.9.25
+      x-client-ident:
+      - default
+      x-message:
+      - ec3a99d5123f4cb6af870927cd18da83
+      x-origin:
+      - RAO
+      x-session:
+      - default_default_session
+      x-show-consumption:
+      - 'true'
+      x-stf-nuakey:
+      - DUMMY
+    method: POST
+    uri: https://europe-1.nuclia.cloud/api/v1/predict/chat
+  response:
+    body:
+      string: '{"chunk":{"type":"object","object":{"rephrased_question":"What is the
+        default value of the max_tokens parameter in Nuclia, and how can it be set?","rules":["Start
+        the response with a smiley emoji."],"reason":"The original question ''How
+        do I set it and what is the default value?'' is ambiguous because ''it'' is
+        not clearly defined. By referencing the previous question about ''max_tokens'',
+        the rephrased question clarifies that ''it'' refers to the max_tokens parameter.
+        This makes the question more specific and easier to answer accurately."}}}
+
+        {"chunk":{"type":"status","code":"0"}}
+
+        {"chunk":{"type":"meta","input_tokens":9,"output_tokens":10,"timings":{"generative":0.8693042640807107},"input_nuclia_tokens":0.009,"output_nuclia_tokens":0.01}}
+
+        {"chunk":{"normalized_tokens":{"input":0.00894,"output":0.01048,"image":0.0},"customer_key_tokens":{"input":0.0,"output":0.0,"image":0.0},"type":"consumption"}}
+
+        '
+    headers:
+      Alt-Svc:
+      - h3=":443"; ma=2592000,h3-29=":443"; ma=2592000
+      Transfer-Encoding:
+      - chunked
+      access-control-expose-headers:
+      - X-NUCLIA-TRACE-ID
+      content-type:
+      - application/x-ndjson
+      date:
+      - Tue, 16 Jun 2026 09:56:37 GMT
+      nuclia-learning-id:
+      - 050a227785984904af5e852ff2901e73
+      nuclia-learning-model:
+      - gemini-2.5-flash-lite
+      via:
+      - 1.1 google
+      x-envoy-upstream-service-time:
+      - '875'
+      x-nuclia-trace-id:
+      - 54b9805f3b078d7f4c9e0f4ad4e276d8
+    status:
+      code: 200
+      message: OK
+- request:
+    body: '{"question": "", "retrieval": true, "user_id": "summarize", "system": "You
+      are a helpful AI assistant. Your role is to provide accurate, clear, and well-structured
+      answers based strictly on the information provided to you.\nKey principles:\n-
+      Answer only using the information in the provided context\n- Do not use external
+      knowledge, assumptions, or prior experience\n- Maintain a professional and informative
+      tone\n- Be concise yet thorough\n- If information is insufficient, acknowledge
+      this clearly\n\nAlways follow any additional instructions provided about format,
+      style, or domain-specific behavior.", "chat_history": [{"author": "USER", "text":
+      "What is the max_tokens parameter in Nuclia?"}, {"author": "NUCLIA", "text":
+      "max_tokens controls the maximum number of tokens the LLM may generate in a
+      single response."}, {"author": "USER", "text": "Oh nice, can you always start
+      you responses with a smiley emoji?"}, {"author": "NUCLIA", "text": "Sure! \ud83d\ude0a
+      I can start my responses with a smiley emoji."}], "context": [], "query_context":
+      {}, "query_context_order": {}, "truncate": true, "user_prompt": {"prompt": "\n##
+      Question\nWhat is the default value of the max_tokens parameter in Nuclia, and
+      how can it be set?\n\n## Provided Context\n[START OF CONTEXT]\n## max_tokens
+      reference\n\n\n\n\n## Chunk: 4cf92b85e2e04e5c834ea86206902b08\n``` The max_tokens
+      parameter controls how many tokens the LLM may generate in a single response.
+      It can be set per-request via the API or configured globally in the agent workflow.
+      The default value depends on the model, but is typically 1024 tokens. ```\n\n\n[END
+      OF CONTEXT]\n\n## Answering Guidelines\n- Carefully read all context; it may
+      be lengthy or detailed\n- Do not omit or overlook any relevant information\n-
+      If the context is incomplete or insufficient, try to provide a partial answer
+      and encourage the user to clarify their question\n- Read carefully any extra
+      instructions below if provided and use them to answer\n\nNow provide your answer
+      to the question: What is the default value of the max_tokens parameter in Nuclia,
+      and how can it be set?"}, "citations": null, "citation_threshold": null, "generative_model":
+      "chatgpt-azure-4o-mini", "max_tokens": 5000, "query_context_images": {}, "prefer_markdown":
+      null, "json_schema": null, "format_prompt": false, "rerank_context": false,
+      "tools": [], "tool_choice": {"type": "auto"}, "reasoning": false, "seed": null}'
+    headers:
+      Accept:
+      - application/x-ndjson
+      Accept-Encoding:
+      - gzip, deflate
+      Connection:
+      - keep-alive
+      Content-Length:
+      - '2361'
+      Content-Type:
+      - application/json
+      Host:
+      - europe-1.nuclia.cloud
+      User-Agent:
+      - nuclia.py/4.9.25
+      x-client-ident:
+      - default
+      x-message:
+      - ec3a99d5123f4cb6af870927cd18da83
+      x-origin:
+      - RAO
+      x-session:
+      - default_default_session
+      x-show-consumption:
+      - 'true'
+      x-stf-nuakey:
+      - DUMMY
+    method: POST
+    uri: https://europe-1.nuclia.cloud/api/v1/predict/chat
+  response:
+    body:
+      string: "{\"chunk\":{\"type\":\"text\",\"text\":\"\U0001F60A\"}}\n{\"chunk\":{\"type\":\"text\",\"text\":\"
+        The\"}}\n{\"chunk\":{\"type\":\"text\",\"text\":\" default\"}}\n{\"chunk\":{\"type\":\"text\",\"text\":\"
+        value\"}}\n{\"chunk\":{\"type\":\"text\",\"text\":\" of\"}}\n{\"chunk\":{\"type\":\"text\",\"text\":\"
+        the\"}}\n{\"chunk\":{\"type\":\"text\",\"text\":\" max\"}}\n{\"chunk\":{\"type\":\"text\",\"text\":\"_tokens\"}}\n{\"chunk\":{\"type\":\"text\",\"text\":\"
+        parameter\"}}\n{\"chunk\":{\"type\":\"text\",\"text\":\" in\"}}\n{\"chunk\":{\"type\":\"text\",\"text\":\"
+        Nu\"}}\n{\"chunk\":{\"type\":\"text\",\"text\":\"cl\"}}\n{\"chunk\":{\"type\":\"text\",\"text\":\"ia\"}}\n{\"chunk\":{\"type\":\"text\",\"text\":\"
+        is\"}}\n{\"chunk\":{\"type\":\"text\",\"text\":\" typically\"}}\n{\"chunk\":{\"type\":\"text\",\"text\":\"
+        \"}}\n{\"chunk\":{\"type\":\"text\",\"text\":\"102\"}}\n{\"chunk\":{\"type\":\"text\",\"text\":\"4\"}}\n{\"chunk\":{\"type\":\"text\",\"text\":\"
+        tokens\"}}\n{\"chunk\":{\"type\":\"text\",\"text\":\",\"}}\n{\"chunk\":{\"type\":\"text\",\"text\":\"
+        although\"}}\n{\"chunk\":{\"type\":\"text\",\"text\":\" it\"}}\n{\"chunk\":{\"type\":\"text\",\"text\":\"
+        can\"}}\n{\"chunk\":{\"type\":\"text\",\"text\":\" vary\"}}\n{\"chunk\":{\"type\":\"text\",\"text\":\"
+        depending\"}}\n{\"chunk\":{\"type\":\"text\",\"text\":\" on\"}}\n{\"chunk\":{\"type\":\"text\",\"text\":\"
+        the\"}}\n{\"chunk\":{\"type\":\"text\",\"text\":\" model\"}}\n{\"chunk\":{\"type\":\"text\",\"text\":\".\"}}\n{\"chunk\":{\"type\":\"text\",\"text\":\"
+        It\"}}\n{\"chunk\":{\"type\":\"text\",\"text\":\" can\"}}\n{\"chunk\":{\"type\":\"text\",\"text\":\"
+        be\"}}\n{\"chunk\":{\"type\":\"text\",\"text\":\" set\"}}\n{\"chunk\":{\"type\":\"text\",\"text\":\"
+        per\"}}\n{\"chunk\":{\"type\":\"text\",\"text\":\"-request\"}}\n{\"chunk\":{\"type\":\"text\",\"text\":\"
+        via\"}}\n{\"chunk\":{\"type\":\"text\",\"text\":\" the\"}}\n{\"chunk\":{\"type\":\"text\",\"text\":\"
+        API\"}}\n{\"chunk\":{\"type\":\"text\",\"text\":\" or\"}}\n{\"chunk\":{\"type\":\"text\",\"text\":\"
+        configured\"}}\n{\"chunk\":{\"type\":\"text\",\"text\":\" globally\"}}\n{\"chunk\":{\"type\":\"text\",\"text\":\"
+        in\"}}\n{\"chunk\":{\"type\":\"text\",\"text\":\" the\"}}\n{\"chunk\":{\"type\":\"text\",\"text\":\"
+        agent\"}}\n{\"chunk\":{\"type\":\"text\",\"text\":\" workflow\"}}\n{\"chunk\":{\"type\":\"text\",\"text\":\".\"}}\n{\"chunk\":{\"type\":\"status\",\"code\":\"0\"}}\n{\"chunk\":{\"type\":\"meta\",\"input_tokens\":12,\"output_tokens\":6,\"timings\":{\"generative_first_chunk\":0.585227217990905,\"generative\":1.990719944005832},\"input_nuclia_tokens\":0.012,\"output_nuclia_tokens\":0.006}}\n{\"chunk\":{\"normalized_tokens\":{\"input\":0.01212,\"output\":0.00564,\"image\":0.0},\"customer_key_tokens\":{\"input\":0.0,\"output\":0.0,\"image\":0.0},\"type\":\"consumption\"}}\n"
+    headers:
+      Alt-Svc:
+      - h3=":443"; ma=2592000
+      Transfer-Encoding:
+      - chunked
+      access-control-expose-headers:
+      - X-NUCLIA-TRACE-ID
+      content-type:
+      - application/x-ndjson
+      date:
+      - Tue, 16 Jun 2026 09:56:38 GMT
+      nuclia-learning-id:
+      - 8f66602fb183485e850f89a087c86876
+      nuclia-learning-model:
+      - chatgpt-azure-4o-mini
+      via:
+      - 1.1 google
+      x-envoy-upstream-service-time:
+      - '595'
+      x-nuclia-trace-id:
+      - e4b29beb1263f6a8ded53ab594bb3bfe
+    status:
+      code: 200
+      message: OK
+version: 1
diff --git a/hyperforge/tests/api/test_chat_history_workflow.py b/hyperforge/tests/api/test_chat_history_workflow.py
new file mode 100644
index 0000000..af5de7c
--- /dev/null
+++ b/hyperforge/tests/api/test_chat_history_workflow.py
@@ -0,0 +1,113 @@
+import os
+from copy import deepcopy
+
+import pytest
+from hyperforge.engine import main as arag_main
+from hyperforge.minimal_fixtures import cassette_nua_key
+from hyperforge.models import HistoryQuestionAnswer
+
+NUA_KEY = os.environ.get("NUA_KEY") or cassette_nua_key(
+    "https://europe-1.nuclia.cloud/"
+)
+
+pytestmark = [
+    pytest.mark.vcr(ignore_localhost=True),
+    pytest.mark.asyncio,
+]
+
+# Static context text injected as the retrieval result.
+STATIC_CONTEXT = (
+    "The max_tokens parameter controls how many tokens the LLM may generate "
+    "in a single response. It can be set per-request via the API or configured "
+    "globally in the agent workflow. The default value depends on the model, but "
+    "is typically 1024 tokens."
+)
+
+CONFIG = {
+    "drivers": [],
+    "rules": {
+        "rules": [
+            {"prompt": "Be polite"},
+        ]
+    },
+    "memory": {},
+    "workflow": {
+        "id": "default",
+        "name": "Default workflow",
+        "description": "Default workflow for testing",
+        "parameters": {},
+    },
+    "preprocess": [
+        {
+            "module": "rephrase",
+            "rids": [],
+            "labels": [],
+            "synonyms": False,
+            "extend": False,
+            "history": True,  # uses context_history() — picks up client chat_history
+            "session_info": False,
+            "split_question": False,
+        }
+    ],
+    "context": [
+        {
+            "module": "static",
+            "title": "max_tokens reference",
+            "context": STATIC_CONTEXT,
+            "prune_context": False,
+        }
+    ],
+    "generation": [
+        {
+            "module": "summarize",
+            "conversational": True,  # uses get_chat_history() — picks up client chat_history
+        }
+    ],
+    "postprocess": [],
+}
+
+
+async def test_chat_history_is_used_in_rephrase_and_summarize():
+    """Client-provided chat_history overrides empty server-side session history.
+
+    The previous exchange is about ``max_tokens``; the follow-up question uses
+    a pronoun ("it") that only makes sense in that context.  The rephrase agent
+    receives the history via ``context_history()`` and should expand the
+    question; the summarize agent receives it via ``get_chat_history()``.
+    """
+    prior_history = [
+        HistoryQuestionAnswer(
+            question="What is the max_tokens parameter in Nuclia?",
+            answer="max_tokens controls the maximum number of tokens the LLM may generate in a single response.",
+        ),
+        HistoryQuestionAnswer(
+            question="Oh nice, can you always start you responses with a smiley emoji?",
+            answer="Sure! 😊 I can start my responses with a smiley emoji.",
+        ),
+    ]
+
+    config = deepcopy(CONFIG)
+    question_memory = await arag_main(
+        agent_id="default",
+        internal_nua=False,
+        external_nua_api_key=NUA_KEY,
+        question="How do I set it and what is the default value?",
+        config=config,
+        chat_history=prior_history,
+        loaded_modules=[
+            "hyperforge_rephrase",
+            "hyperforge_static",
+            "hyperforge_summarize",
+        ],
+    )
+    # Check that max_tokens is in the rephrased question, which is the second rephrase step
+    rephrase_steps = [
+        step for step in question_memory.steps if step.module == "rephrase"
+    ]
+    assert len(rephrase_steps) >= 2
+    rephrase_step = rephrase_steps[1]
+    assert "max_tokens" in rephrase_step.value
+    assert question_memory.final_answer
+    assert (
+        "1024" in question_memory.final_answer and "😊" in question_memory.final_answer
+    )
diff --git a/hyperforge/tests/unit/arag/test_memory_chat_history.py b/hyperforge/tests/unit/arag/test_memory_chat_history.py
new file mode 100644
index 0000000..9f3f329
--- /dev/null
+++ b/hyperforge/tests/unit/arag/test_memory_chat_history.py
@@ -0,0 +1,84 @@
+"""
+Unit tests for client-managed chat history in QuestionMemory.
+"""
+
+import pytest
+from hyperforge.memory.memory import EphemeralSessionMemory
+from hyperforge.models import HistoryQuestionAnswer, MemoryConfig, Rules
+from hyperforge.server.cache import NoCache
+from nuclia.lib.nua_responses import Author
+
+
+def _make_ephemeral_session() -> EphemeralSessionMemory:
+    session = EphemeralSessionMemory(
+        config=MemoryConfig(),
+        agent_id="agent-1",
+        workflow_id="default",
+        cache=NoCache(),
+    )
+    session.rules = Rules().rules
+    session.init("session-1")
+    return session
+
+
+@pytest.mark.asyncio
+async def test_client_history_overrides_session_history():
+    """Client-provided history takes precedence over any server-side accumulated history.
+    Without client history, the session's stored history is used instead."""
+    session = _make_ephemeral_session()
+
+    # Accumulate one server-side turn
+    first = session.start_question("Server Q")
+    first.final_answer = "Server A"
+    await session.save(first)
+
+    # Second request arrives with client-managed history (different content)
+    client_history = [HistoryQuestionAnswer(question="Client Q", answer="Client A")]
+    memory = session.start_question("New question", chat_history=client_history)
+
+    context_str, count = await memory.context_history()
+    messages = await memory.get_chat_history()
+
+    # context_history: client content present, server content absent
+    assert count == 1
+    assert "Client Q" in context_str and "Client A" in context_str
+    assert "Server Q" not in context_str and "Server A" not in context_str
+
+    # get_chat_history: correct author ordering
+    assert len(messages) == 2
+    assert messages[0].author == Author.USER and messages[0].text == "Client Q"
+    assert messages[1].author == Author.NUCLIA and messages[1].text == "Client A"
+
+    # Third request with no client history falls back to server-stored history
+    memory_no_history = session.start_question("Another question")
+
+    context_str, count = await memory_no_history.context_history()
+
+    assert count == 1
+    assert "Server Q" in context_str and "Server A" in context_str
+
+
+@pytest.mark.asyncio
+async def test_empty_list_chat_history_overrides_server_history():
+    """An explicit chat_history=[] must override server-side history (clear it),
+    not fall back to it. This distinguishes 'omitted' (None) from 'intentionally empty' ([])."""
+    session = _make_ephemeral_session()
+
+    # Accumulate one server-side turn
+    first = session.start_question("Server Q")
+    first.final_answer = "Server A"
+    await session.save(first)
+
+    # Omitting chat_history → falls back to server-side history
+    memory_omitted = session.start_question("Q")
+    context_str, count = await memory_omitted.context_history()
+    assert count == 1
+    assert "Server Q" in context_str
+
+    # Passing [] → overrides with no history (does not fall back to server)
+    memory_empty = session.start_question("Q", chat_history=[])
+    context_str, count = await memory_empty.context_history()
+    messages = await memory_empty.get_chat_history()
+    assert count == 0
+    assert context_str == ""
+    assert messages == []