From 41a96ee4d6597e452394a38fe798528f7f823137 Mon Sep 17 00:00:00 2001
From: Kunal Somani <kkunal_be23@thapar.edu>
Date: Mon, 15 Jun 2026 15:06:43 +0530
Subject: [PATCH 1/4] feat: add document_comparison_field to
 DocumentNDCGEvaluator

---
 .../components/evaluators/document_ndcg.py    | 102 ++++++++++----
 ...ld-to-ndcg-evaluator-f9abbbd556f49c04.yaml |   8 ++
 .../evaluators/test_document_ndcg.py          | 133 +++++++++++++++++-
 3 files changed, 217 insertions(+), 26 deletions(-)
 create mode 100644 releasenotes/notes/add-document-comparison-field-to-ndcg-evaluator-f9abbbd556f49c04.yaml
diff --git a/haystack/components/evaluators/document_ndcg.py b/haystack/components/evaluators/document_ndcg.py
index ca88292f6c..d3e7e4b43d 100644
--- a/haystack/components/evaluators/document_ndcg.py
+++ b/haystack/components/evaluators/document_ndcg.py
@@ -5,35 +5,80 @@
 from math import log2
 from typing import Any
 
-from haystack import Document, component
+from haystack import Document, component, default_to_dict
 
 
 @component
 class DocumentNDCGEvaluator:
     """
-    Evaluator that calculates the normalized discounted cumulative gain (NDCG) of retrieved documents.
+        Evaluator that calculates the normalized discounted cumulative gain (NDCG) of retrieved documents.
 
-    Each question can have multiple ground truth documents and multiple retrieved documents.
-    If the ground truth documents have relevance scores, the NDCG calculation uses these scores.
-    Otherwise, it assumes binary relevance of all ground truth documents.
+        Each question can have multiple ground truth documents and multiple retrieved documents.
+        If the ground truth documents have relevance scores, the NDCG calculation uses these scores.
+        Otherwise, it assumes binary relevance of all ground truth documents.
 
-    Usage example:
+        Usage example:
     ```python
-    from haystack import Document
-    from haystack.components.evaluators import DocumentNDCGEvaluator
-
-    evaluator = DocumentNDCGEvaluator()
-    result = evaluator.run(
-        ground_truth_documents=[[Document(content="France", score=1.0), Document(content="Paris", score=0.5)]],
-        retrieved_documents=[[Document(content="France"), Document(content="Germany"), Document(content="Paris")]],
-    )
-    print(result["individual_scores"])
-    # [0.8869]
-    print(result["score"])
-    # 0.8869
+        from haystack import Document
+        from haystack.components.evaluators import DocumentNDCGEvaluator
+
+        evaluator = DocumentNDCGEvaluator()
+        result = evaluator.run(
+            ground_truth_documents=[[Document(content="France", score=1.0), Document(content="Paris", score=0.5)]],
+            retrieved_documents=[[Document(content="France"), Document(content="Germany"), Document(content="Paris")]],
+        )
+        print(result["individual_scores"])
+        # [0.8869]
+        print(result["score"])
+        # 0.8869
     ```
     """
 
+    def __init__(self, document_comparison_field: str = "content") -> None:
+        """
+        Create a DocumentNDCGEvaluator component.
+
+        :param document_comparison_field:
+            The Document field to use for comparison. Possible options:
+            - `"content"`: uses `doc.content`
+            - `"id"`: uses `doc.id`
+            - A `meta.` prefix followed by a key name: uses `doc.meta["<key>"]`
+              (e.g. `"meta.file_id"`, `"meta.page_number"`)
+              Nested keys are supported (e.g. `"meta.source.url"`).
+        """
+        self.document_comparison_field = document_comparison_field
+
+    def _get_comparison_value(self, doc: Document) -> Any:
+        """
+        Extract the comparison value from a document based on the configured field.
+        """
+        if self.document_comparison_field == "content":
+            return doc.content
+        if self.document_comparison_field == "id":
+            return doc.id
+        if self.document_comparison_field.startswith("meta."):
+            parts = self.document_comparison_field[5:].split(".")
+            value = doc.meta
+            for part in parts:
+                if not isinstance(value, dict) or part not in value:
+                    return None
+                value = value[part]
+            return value
+        msg = (
+            f"Unsupported document_comparison_field: '{self.document_comparison_field}'. "
+            "Use 'content', 'id', or 'meta.<key>'."
+        )
+        raise ValueError(msg)
+
+    def to_dict(self) -> dict[str, Any]:
+        """
+        Serializes the component to a dictionary.
+
+        :returns:
+            Dictionary with serialized data.
+        """
+        return default_to_dict(self, document_comparison_field=self.document_comparison_field)
+
     @component.output_types(score=float, individual_scores=list[float])
     def run(
         self, ground_truth_documents: list[list[Document]], retrieved_documents: list[list[Document]]
@@ -78,7 +123,7 @@ def validate_inputs(gt_docs: list[list[Document]], ret_docs: list[list[Document]
             The retrieved_documents to validate.
 
         :raises ValueError:
-            If the ground_truth_documents or the retrieved_documents are an empty a list.
+            If the ground_truth_documents or the retrieved_documents are an empty list.
             If the length of ground_truth_documents and retrieved_documents differs.
             If any list of documents in ground_truth_documents contains a mix of documents with and without a score.
         """
@@ -95,8 +140,7 @@ def validate_inputs(gt_docs: list[list[Document]], ret_docs: list[list[Document]
                 msg = "Either none or all documents in each list of ground_truth_documents must have a score."
                 raise ValueError(msg)
 
-    @staticmethod
-    def calculate_dcg(gt_docs: list[Document], ret_docs: list[Document]) -> float:
+    def calculate_dcg(self, gt_docs: list[Document], ret_docs: list[Document]) -> float:
         """
         Calculate the discounted cumulative gain (DCG) of the retrieved documents.
 
@@ -104,15 +148,24 @@ def calculate_dcg(gt_docs: list[Document], ret_docs: list[Document]) -> float:
             The ground truth documents.
         :param ret_docs:
             The retrieved documents.
+        :param document_comparison_field:
+            The Document field used to match retrieved documents against ground truth documents.
         :returns:
             The discounted cumulative gain (DCG) of the retrieved
             documents based on the ground truth documents.
         """
         dcg = 0.0
-        relevant_id_to_score = {doc.id: doc.score if doc.score is not None else 1 for doc in gt_docs}
+        # Build lookup from comparison value -> relevance score
+        relevant_value_to_score: dict[Any, float] = {}
+        for doc in gt_docs:
+            value = self._get_comparison_value(doc)
+            if value is not None:
+                relevant_value_to_score[value] = doc.score if doc.score is not None else 1
+
         for i, doc in enumerate(ret_docs):
-            if doc.id in relevant_id_to_score:  # TODO Related to https://github.com/deepset-ai/haystack/issues/8412
-                dcg += relevant_id_to_score[doc.id] / log2(i + 2)  # i + 2 because i is 0-indexed
+            value = self._get_comparison_value(doc)
+            if value is not None and value in relevant_value_to_score:
+                dcg += relevant_value_to_score[value] / log2(i + 2)  # i + 2 because i is 0-indexed
         return dcg
 
     @staticmethod
@@ -127,7 +180,6 @@ def calculate_idcg(gt_docs: list[Document]) -> float:
         """
         idcg = 0.0
         for i, doc in enumerate(sorted(gt_docs, key=lambda x: x.score if x.score is not None else 1, reverse=True)):
-            # If the document has a score, use it; otherwise, use 1 for binary relevance.
             relevance = doc.score if doc.score is not None else 1
             idcg += relevance / log2(i + 2)  # i + 2 because i is 0-indexed
         return idcg
diff --git a/releasenotes/notes/add-document-comparison-field-to-ndcg-evaluator-f9abbbd556f49c04.yaml b/releasenotes/notes/add-document-comparison-field-to-ndcg-evaluator-f9abbbd556f49c04.yaml
new file mode 100644
index 0000000000..5d7ab2e18e
--- /dev/null
+++ b/releasenotes/notes/add-document-comparison-field-to-ndcg-evaluator-f9abbbd556f49c04.yaml
@@ -0,0 +1,8 @@
+enhancements:
+  - |
+    Added ``document_comparison_field`` parameter to ``DocumentNDCGEvaluator``,
+    consistent with ``DocumentMAPEvaluator``, ``DocumentMRREvaluator``, and
+    ``DocumentRecallEvaluator``. Users can now match documents by ``"content"``,
+    ``"id"``, or any ``"meta.<key>"`` field when calculating NDCG scores,
+    resolving a known limitation referenced in
+    `#8412 <https://github.com/deepset-ai/haystack/issues/8412>`_.
diff --git a/test/components/evaluators/test_document_ndcg.py b/test/components/evaluators/test_document_ndcg.py
index 1b300eb4d8..408c736e7d 100644
--- a/test/components/evaluators/test_document_ndcg.py
+++ b/test/components/evaluators/test_document_ndcg.py
@@ -4,7 +4,7 @@
 
 import pytest
 
-from haystack import Document
+from haystack import Document, default_from_dict
 from haystack.components.evaluators.document_ndcg import DocumentNDCGEvaluator
 
 
@@ -201,3 +201,134 @@ def test_calculate_idcg_empty():
     gt_docs = []
     idcg = evaluator.calculate_idcg(gt_docs)
     assert idcg == 0
+
+
+def test_to_dict_default():
+    evaluator = DocumentNDCGEvaluator()
+    data = evaluator.to_dict()
+    assert data == {
+        "type": "haystack.components.evaluators.document_ndcg.DocumentNDCGEvaluator",
+        "init_parameters": {"document_comparison_field": "content"},
+    }
+
+
+def test_to_dict_custom_field():
+    evaluator = DocumentNDCGEvaluator(document_comparison_field="id")
+    data = evaluator.to_dict()
+    assert data == {
+        "type": "haystack.components.evaluators.document_ndcg.DocumentNDCGEvaluator",
+        "init_parameters": {"document_comparison_field": "id"},
+    }
+
+
+def test_from_dict():
+    data = {
+        "type": "haystack.components.evaluators.document_ndcg.DocumentNDCGEvaluator",
+        "init_parameters": {"document_comparison_field": "id"},
+    }
+    evaluator = default_from_dict(DocumentNDCGEvaluator, data)
+    assert evaluator.document_comparison_field == "id"
+
+
+def test_run_with_id_comparison():
+    # Documents with same content but different IDs — id comparison
+    # must match on id, not content
+    evaluator = DocumentNDCGEvaluator(document_comparison_field="id")
+    result = evaluator.run(
+        ground_truth_documents=[[Document(id="doc1", content="France"), Document(id="doc2", content="Paris")]],
+        retrieved_documents=[
+            [
+                Document(id="doc1", content="different text"),
+                Document(id="doc3", content="Germany"),
+                Document(id="doc2", content="also different"),
+            ]
+        ],
+    )
+    assert result["individual_scores"][0] == pytest.approx(0.9197, abs=1e-4)
+    assert result["score"] == pytest.approx(0.9197, abs=1e-4)
+
+
+def test_run_with_id_comparison_no_match():
+    evaluator = DocumentNDCGEvaluator(document_comparison_field="id")
+    result = evaluator.run(
+        ground_truth_documents=[[Document(id="doc1", content="France")]],
+        retrieved_documents=[[Document(id="doc99", content="France")]],
+    )
+    # Same content, different ID — should NOT match when comparing by id
+    assert result["individual_scores"] == [0.0]
+    assert result["score"] == 0.0
+
+
+def test_run_with_meta_comparison():
+    evaluator = DocumentNDCGEvaluator(document_comparison_field="meta.file_id")
+    result = evaluator.run(
+        ground_truth_documents=[
+            [Document(content="France", meta={"file_id": "f1"}), Document(content="Paris", meta={"file_id": "f2"})]
+        ],
+        retrieved_documents=[
+            [
+                Document(content="different", meta={"file_id": "f1"}),
+                Document(content="irrelevant", meta={"file_id": "f99"}),
+                Document(content="also different", meta={"file_id": "f2"}),
+            ]
+        ],
+    )
+    assert result["individual_scores"][0] == pytest.approx(0.9197, abs=1e-4)
+    assert result["score"] == pytest.approx(0.9197, abs=1e-4)
+
+
+def test_run_with_nested_meta_comparison():
+    evaluator = DocumentNDCGEvaluator(document_comparison_field="meta.source.url")
+    result = evaluator.run(
+        ground_truth_documents=[[Document(content="x", meta={"source": {"url": "https://a.com"}})]],
+        retrieved_documents=[[Document(content="z", meta={"source": {"url": "https://a.com"}})]],
+    )
+    assert result["individual_scores"] == [1.0]
+    assert result["score"] == 1.0
+
+
+def test_run_with_meta_missing_key_treated_as_no_match():
+    # Documents missing the meta key should not match anything
+    evaluator = DocumentNDCGEvaluator(document_comparison_field="meta.file_id")
+    result = evaluator.run(
+        ground_truth_documents=[[Document(content="France", meta={"file_id": "f1"})]],
+        retrieved_documents=[[Document(content="France", meta={})]],
+    )
+    assert result["individual_scores"] == [0.0]
+    assert result["score"] == 0.0
+
+
+def test_run_with_id_comparison_with_scores():
+    # Verify that relevance scores are honoured when comparing by id
+    evaluator = DocumentNDCGEvaluator(document_comparison_field="id")
+    result = evaluator.run(
+        ground_truth_documents=[
+            [
+                Document(id="doc1", content="foo", score=3),
+                Document(id="doc2", content="bar", score=2),
+                Document(id="doc3", content="baz", score=3),
+                Document(id="doc6", content="qux", score=2),
+                Document(id="doc7", content="quux", score=3),
+                Document(id="doc8", content="corge", score=2),
+            ]
+        ],
+        retrieved_documents=[
+            [
+                Document(id="doc1", content="x"),
+                Document(id="doc2", content="y"),
+                Document(id="doc3", content="z"),
+                Document(id="doc4", content="w"),
+                Document(id="doc5", content="v"),
+            ]
+        ],
+    )
+    assert result["individual_scores"][0] == pytest.approx(0.6592, abs=1e-4)
+    assert result["score"] == pytest.approx(0.6592, abs=1e-4)
+
+
+def test_unsupported_comparison_field_raises():
+    evaluator = DocumentNDCGEvaluator(document_comparison_field="embedding")
+    with pytest.raises(ValueError, match="Unsupported document_comparison_field"):
+        evaluator.run(
+            ground_truth_documents=[[Document(content="France")]], retrieved_documents=[[Document(content="France")]]
+        )

From b40158aa68efe24931555bbf4b9fd3a98c05161f Mon Sep 17 00:00:00 2001
From: Kunal Somani <kkunal_be23@thapar.edu>
Date: Mon, 15 Jun 2026 15:20:52 +0530
Subject: [PATCH 2/4] chore: simplify release note RST syntax

---
 ...t-comparison-field-to-ndcg-evaluator-f9abbbd556f49c04.yaml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/releasenotes/notes/add-document-comparison-field-to-ndcg-evaluator-f9abbbd556f49c04.yaml b/releasenotes/notes/add-document-comparison-field-to-ndcg-evaluator-f9abbbd556f49c04.yaml
index 5d7ab2e18e..5f66487ce5 100644
--- a/releasenotes/notes/add-document-comparison-field-to-ndcg-evaluator-f9abbbd556f49c04.yaml
+++ b/releasenotes/notes/add-document-comparison-field-to-ndcg-evaluator-f9abbbd556f49c04.yaml
@@ -4,5 +4,5 @@ enhancements:
     consistent with ``DocumentMAPEvaluator``, ``DocumentMRREvaluator``, and
     ``DocumentRecallEvaluator``. Users can now match documents by ``"content"``,
     ``"id"``, or any ``"meta.<key>"`` field when calculating NDCG scores,
-    resolving a known limitation referenced in
-    `#8412 <https://github.com/deepset-ai/haystack/issues/8412>`_.
+    resolving a known limitation where documents were previously matched by
+    hardcoded ``id`` comparison instead of configurable field comparison.

From 5881b799f87f2a06951d6e3c40df9da1ea2c03b3 Mon Sep 17 00:00:00 2001
From: Kunal Somani <kkunal_be23@thapar.edu>
Date: Fri, 19 Jun 2026 15:48:48 +0530
Subject: [PATCH 3/4] fix: address review comments - fix IDCG/DCG mismatch,
 docstring formatting, add upgrade note

---
 .../components/evaluators/document_ndcg.py    | 21 +++++++++++++------
 ...ld-to-ndcg-evaluator-f9abbbd556f49c04.yaml | 14 ++++++++++---
 .../evaluators/test_document_ndcg.py          | 21 +++++++++++++++++++
 3 files changed, 47 insertions(+), 9 deletions(-)

diff --git a/haystack/components/evaluators/document_ndcg.py b/haystack/components/evaluators/document_ndcg.py
index d3e7e4b43d..d395d3fb83 100644
--- a/haystack/components/evaluators/document_ndcg.py
+++ b/haystack/components/evaluators/document_ndcg.py
@@ -1,3 +1,4 @@
+# haystack/components/evaluators/document_ndcg.py
 # SPDX-FileCopyrightText: 2022-present deepset GmbH <info@deepset.ai>
 #
 # SPDX-License-Identifier: Apache-2.0
@@ -148,14 +149,13 @@ def calculate_dcg(self, gt_docs: list[Document], ret_docs: list[Document]) -> fl
             The ground truth documents.
         :param ret_docs:
             The retrieved documents.
-        :param document_comparison_field:
-            The Document field used to match retrieved documents against ground truth documents.
         :returns:
             The discounted cumulative gain (DCG) of the retrieved
             documents based on the ground truth documents.
         """
         dcg = 0.0
-        # Build lookup from comparison value -> relevance score
+        # Build lookup from comparison value -> relevance score, skipping documents
+        # whose comparison value cannot be determined (e.g. missing meta key)
         relevant_value_to_score: dict[Any, float] = {}
         for doc in gt_docs:
             value = self._get_comparison_value(doc)
@@ -168,18 +168,27 @@ def calculate_dcg(self, gt_docs: list[Document], ret_docs: list[Document]) -> fl
                 dcg += relevant_value_to_score[value] / log2(i + 2)  # i + 2 because i is 0-indexed
         return dcg
 
-    @staticmethod
-    def calculate_idcg(gt_docs: list[Document]) -> float:
+    def calculate_idcg(self, gt_docs: list[Document]) -> float:
         """
         Calculate the ideal discounted cumulative gain (IDCG) of the ground truth documents.
 
+        Ground truth documents whose comparison value cannot be determined (e.g. missing meta key)
+        are excluded, since they can never be matched in `calculate_dcg` either. Including them here
+        would inflate the IDCG and make it impossible for NDCG to reach 1.0 for a perfect retrieval.
+
         :param gt_docs:
             The ground truth documents.
         :returns:
             The ideal discounted cumulative gain (IDCG) of the ground truth documents.
         """
+        # Filter out documents that cannot be matched, consistent with calculate_dcg
+        matchable_docs = [doc for doc in gt_docs if self._get_comparison_value(doc) is not None]
+
         idcg = 0.0
-        for i, doc in enumerate(sorted(gt_docs, key=lambda x: x.score if x.score is not None else 1, reverse=True)):
+        for i, doc in enumerate(
+            sorted(matchable_docs, key=lambda x: x.score if x.score is not None else 1, reverse=True)
+        ):
+            # If the document has a score, use it; otherwise, use 1 for binary relevance.
             relevance = doc.score if doc.score is not None else 1
             idcg += relevance / log2(i + 2)  # i + 2 because i is 0-indexed
         return idcg
diff --git a/releasenotes/notes/add-document-comparison-field-to-ndcg-evaluator-f9abbbd556f49c04.yaml b/releasenotes/notes/add-document-comparison-field-to-ndcg-evaluator-f9abbbd556f49c04.yaml
index 5f66487ce5..57b45d3150 100644
--- a/releasenotes/notes/add-document-comparison-field-to-ndcg-evaluator-f9abbbd556f49c04.yaml
+++ b/releasenotes/notes/add-document-comparison-field-to-ndcg-evaluator-f9abbbd556f49c04.yaml
@@ -1,8 +1,16 @@
+upgrade:
+  - |
+    ``DocumentNDCGEvaluator`` now matches documents by their ``content`` field
+    by default instead of their auto-generated ``id``. Previously, ground
+    truth and retrieved documents were matched only if they had identical
+    ``id`` values, which rarely happened in practice since IDs are generated
+    independently for each Document instance. As a result, NDCG scores
+    computed with this evaluator may change for existing pipelines. To keep
+    the previous ``id``-based matching behavior, pass
+    ``document_comparison_field="id"`` when constructing the evaluator.
 enhancements:
   - |
     Added ``document_comparison_field`` parameter to ``DocumentNDCGEvaluator``,
     consistent with ``DocumentMAPEvaluator``, ``DocumentMRREvaluator``, and
     ``DocumentRecallEvaluator``. Users can now match documents by ``"content"``,
-    ``"id"``, or any ``"meta.<key>"`` field when calculating NDCG scores,
-    resolving a known limitation where documents were previously matched by
-    hardcoded ``id`` comparison instead of configurable field comparison.
+    ``"id"``, or any ``"meta.<key>"`` field when calculating NDCG scores.
diff --git a/test/components/evaluators/test_document_ndcg.py b/test/components/evaluators/test_document_ndcg.py
index 408c736e7d..3ae49595ae 100644
--- a/test/components/evaluators/test_document_ndcg.py
+++ b/test/components/evaluators/test_document_ndcg.py
@@ -332,3 +332,24 @@ def test_unsupported_comparison_field_raises():
         evaluator.run(
             ground_truth_documents=[[Document(content="France")]], retrieved_documents=[[Document(content="France")]]
         )
+
+
+def test_run_with_meta_missing_key_can_still_reach_perfect_ndcg():
+    """
+    Regression test for the IDCG/DCG inflation bug: ground truth documents that
+    cannot be matched (missing the configured meta key) must be excluded from
+    IDCG too, otherwise NDCG can never reach 1.0 even for a perfect retrieval.
+    """
+    evaluator = DocumentNDCGEvaluator(document_comparison_field="meta.file_id")
+    result = evaluator.run(
+        ground_truth_documents=[
+            [
+                Document(content="France", meta={"file_id": "f1"}),
+                Document(content="unmatchable", meta={}),  # no file_id -> cannot be matched
+            ]
+        ],
+        retrieved_documents=[[Document(content="France", meta={"file_id": "f1"})]],
+    )
+    # Perfect retrieval of the one matchable document should yield NDCG of exactly 1.0
+    assert result["individual_scores"] == [1.0]
+    assert result["score"] == 1.0

From c0948f087773b4d4cadd814ff7c76e16358bfb69 Mon Sep 17 00:00:00 2001
From: bogdankostic <bogdankostic@web.de>
Date: Fri, 19 Jun 2026 14:12:48 +0200
Subject: [PATCH 4/4] Apply suggestions from code review

Co-authored-by: bogdankostic <bogdankostic@web.de>
---
 .../components/evaluators/document_ndcg.py    | 35 +++++++++----------
 1 file changed, 17 insertions(+), 18 deletions(-)

diff --git a/haystack/components/evaluators/document_ndcg.py b/haystack/components/evaluators/document_ndcg.py
index d395d3fb83..14a8463205 100644
--- a/haystack/components/evaluators/document_ndcg.py
+++ b/haystack/components/evaluators/document_ndcg.py
@@ -1,4 +1,3 @@
-# haystack/components/evaluators/document_ndcg.py
 # SPDX-FileCopyrightText: 2022-present deepset GmbH <info@deepset.ai>
 #
 # SPDX-License-Identifier: Apache-2.0
@@ -12,26 +11,26 @@
 @component
 class DocumentNDCGEvaluator:
     """
-        Evaluator that calculates the normalized discounted cumulative gain (NDCG) of retrieved documents.
+    Evaluator that calculates the normalized discounted cumulative gain (NDCG) of retrieved documents.
 
-        Each question can have multiple ground truth documents and multiple retrieved documents.
-        If the ground truth documents have relevance scores, the NDCG calculation uses these scores.
-        Otherwise, it assumes binary relevance of all ground truth documents.
+    Each question can have multiple ground truth documents and multiple retrieved documents.
+    If the ground truth documents have relevance scores, the NDCG calculation uses these scores.
+    Otherwise, it assumes binary relevance of all ground truth documents.
 
-        Usage example:
+    Usage example:
     ```python
-        from haystack import Document
-        from haystack.components.evaluators import DocumentNDCGEvaluator
-
-        evaluator = DocumentNDCGEvaluator()
-        result = evaluator.run(
-            ground_truth_documents=[[Document(content="France", score=1.0), Document(content="Paris", score=0.5)]],
-            retrieved_documents=[[Document(content="France"), Document(content="Germany"), Document(content="Paris")]],
-        )
-        print(result["individual_scores"])
-        # [0.8869]
-        print(result["score"])
-        # 0.8869
+    from haystack import Document
+    from haystack.components.evaluators import DocumentNDCGEvaluator
+
+    evaluator = DocumentNDCGEvaluator()
+    result = evaluator.run(
+        ground_truth_documents=[[Document(content="France", score=1.0), Document(content="Paris", score=0.5)]],
+        retrieved_documents=[[Document(content="France"), Document(content="Germany"), Document(content="Paris")]],
+    )
+    print(result["individual_scores"])
+    # [0.8869]
+    print(result["score"])
+    # 0.8869
     ```
     """