diff --git a/.github/workflows/check-alerts.yaml b/.github/workflows/check-alerts.yaml
index abd44d40b..33f8407ff 100644
--- a/.github/workflows/check-alerts.yaml
+++ b/.github/workflows/check-alerts.yaml
@@ -2,32 +2,45 @@ name: Check Alerts using Promtool
 on:
   pull_request:
     paths:
-      - '**/*.rules.yaml'
-      - '**/*.alerts.yaml'
+      - 'helm/bundles/*/templates/alerts.yaml'
+      - 'helm/bundles/*/values.yaml'
+      - 'helm/bundles/*/Chart.yaml'
+      - 'helm/library/**'
+      - '.github/workflows/check-alerts.yaml'
 
 jobs:
   lint:
-    runs-on: ubuntu-latest
+    # Pinned to ubuntu-24.04 so the pre-installed helm and yq versions are
+    # stable. helm and yq come from the base runner image (no install step
+    # needed); promtool is installed by the peimanja action below.
+    runs-on: ubuntu-24.04
     steps:
-      - name: Checkout PR
-        uses: actions/checkout@v6
+      - uses: actions/checkout@v6
 
-      - name: Get changed rule and alert files
-        id: changed
-        uses: tj-actions/changed-files@v47
-        with:
-          files: |
-            **/*.rules.yaml
-            **/*.alerts.yaml
+      - name: Render bundles to rule files
+        run: |
+          set -euo pipefail
+          mkdir -p rendered
+
+          helm dep update helm/bundles/cortex-cinder
+          helm dep update helm/bundles/cortex-manila
+          helm dep update helm/bundles/cortex-nova
+          helm dep update helm/bundles/cortex-placement-shim
+
+          helm template cortex-cinder         helm/bundles/cortex-cinder         | yq 'select(.kind == "PrometheusRule") | .spec' > rendered/cortex-cinder.yaml
+          helm template cortex-manila         helm/bundles/cortex-manila         | yq 'select(.kind == "PrometheusRule") | .spec' > rendered/cortex-manila.yaml
+          helm template cortex-placement-shim helm/bundles/cortex-placement-shim | yq 'select(.kind == "PrometheusRule") | .spec' > rendered/cortex-placement-shim.yaml
+
+          # nova has KVM-gated rules; render both flavours.
+          helm template cortex-nova           helm/bundles/cortex-nova                        | yq 'select(.kind == "PrometheusRule") | .spec' > rendered/cortex-nova-default.yaml
+          helm template cortex-nova           helm/bundles/cortex-nova --set kvm.enabled=true | yq 'select(.kind == "PrometheusRule") | .spec' > rendered/cortex-nova-kvm.yaml
 
-      - name: Install Helm
-        uses: azure/setup-helm@v5
+          ls -la rendered/
 
-      - name: Check changed rule and alert files via promtool
-        if: steps.changed.outputs.any_changed == 'true'
-        uses: peimanja/promtool-github-actions@v0.0.2
+      - name: Check rules with promtool
+        uses: peimanja/promtool-github-actions@741be6fd6b8ee6a1d777ea020076b70c6233b3a1 # v0.0.2
         with:
           promtool_actions_subcommand: 'rules'
-          promtool_actions_files: ${{ steps.changed.outputs.all_changed_files }}
+          promtool_actions_files: 'rendered/*.yaml'
           promtool_actions_version: 'latest'
-          promtool_actions_comment: 'false'
\ No newline at end of file
+          promtool_actions_comment: 'false'
diff --git a/docs/reservations/committed-resource-reservations.md b/docs/reservations/committed-resource-reservations.md
index 4d96d43a6..7d80064b0 100644
--- a/docs/reservations/committed-resource-reservations.md
+++ b/docs/reservations/committed-resource-reservations.md
@@ -35,7 +35,7 @@ The CR reservation implementation is located in `internal/scheduling/reservation
 - Scheduling pipeline selection per flavor group
 - Per-flavor-group resource flags (`handlesCommitments`, `hasCapacity`, `hasQuota`) controlling which resource types are active for each group
 
-**Metrics and Alerts**: Defined in `helm/bundles/cortex-nova/alerts/nova.alerts.yaml` with prefixes:
+**Metrics and Alerts**: Defined in `helm/bundles/cortex-nova/templates/alerts.yaml` with prefixes:
 - `cortex_committed_resource_change_api_*`
 - `cortex_committed_resource_usage_api_*`
 - `cortex_committed_resource_capacity_api_*`
diff --git a/helm/bundles/cortex-cinder/alerts/cinder.alerts.yaml b/helm/bundles/cortex-cinder/alerts/cinder.alerts.yaml
deleted file mode 100644
index 6684e3392..000000000
--- a/helm/bundles/cortex-cinder/alerts/cinder.alerts.yaml
+++ /dev/null
@@ -1,260 +0,0 @@
-groups:
-- name: cortex-cinder-alerts
-  rules:
-  - alert: CortexCinderSchedulingDown
-    expr: |
-      up{pod=~"cortex-cinder-scheduling-.*"} != 1 or
-      absent(up{pod=~"cortex-cinder-scheduling-.*"})
-    for: 5m
-    labels:
-      context: liveness
-      dashboard: cortex-status-dashboard/cortex-status-dashboard
-      service: cortex
-      severity: warning
-      support_group: workload-management
-      playbook: docs/support/playbook/cortex/alerts/down
-    annotations:
-      summary: "Cortex Scheduling for Cinder is down"
-      description: >
-        The Cortex scheduling service is down. Scheduling requests from Cinder will
-        not be served. This is no immediate problem, since Cinder will continue
-        placing new VMs. However, the placement will be less desirable.
-
-  - alert: CortexCinderKnowledgeDown
-    expr: |
-      up{pod=~"cortex-cinder-knowledge-.*"} != 1 or
-      absent(up{pod=~"cortex-cinder-knowledge-.*"})
-    for: 5m
-    labels:
-      context: liveness
-      dashboard: cortex-status-dashboard/cortex-status-dashboard
-      service: cortex
-      severity: warning
-      support_group: workload-management
-      playbook: docs/support/playbook/cortex/alerts/down
-    annotations:
-      summary: "Cortex Knowledge for Cinder is down"
-      description: >
-        The Cortex Knowledge service is down. This is no immediate problem,
-        since cortex is still able to process requests,
-        but the quality of the responses may be affected.
-
-  - alert: CortexCinderHttpRequest400sTooHigh
-    expr: rate(cortex_scheduler_api_request_duration_seconds_count{service="cortex-cinder-metrics", status=~"4.+"}[5m]) > 0.1
-    for: 5m
-    labels:
-      context: api
-      dashboard: cortex-status-dashboard/cortex-status-dashboard
-      service: cortex
-      severity: warning
-      support_group: workload-management
-    annotations:
-      summary: "Cinder Scheduler HTTP request 400 errors too high"
-      description: >
-        Cinder Scheduler is responding to placement requests with HTTP 4xx
-        errors. This is expected when the scheduling request cannot be served
-        by Cortex. However, it could also indicate that the request format has
-        changed and Cortex is unable to parse it.
-
-  - alert: CortexCinderSchedulingHttpRequest500sTooHigh
-    expr: rate(cortex_scheduler_api_request_duration_seconds_count{service="cortex-cinder-metrics", status=~"5.+" }[5m]) > 0.1
-    for: 5m
-    labels:
-      context: api
-      dashboard: cortex-status-dashboard/cortex-status-dashboard
-      service: cortex
-      severity: warning
-      support_group: workload-management
-    annotations:
-      summary: "Cinder Scheduler HTTP request 500 errors too high"
-      description: >
-        Cinder Scheduler is responding to placement requests with HTTP 5xx errors.
-        This is not expected and indicates that Cortex is having some internal problem.
-        Cinder will continue to place new VMs, but the placement will be less desirable.
-        Thus, no immediate action is needed.
-
-  - alert: CortexCinderHighMemoryUsage
-    expr: process_resident_memory_bytes{service="cortex-cinder-metrics"} > 6000 * 1024 * 1024
-    for: 5m
-    labels:
-      context: memory
-      dashboard: cortex-status-dashboard/cortex-status-dashboard
-      service: cortex
-      severity: warning
-      support_group: workload-management
-    annotations:
-      summary: "`{{$labels.component}}` uses too much memory"
-      description: >
-        `{{$labels.component}}` should not be using more than 6000 MiB of memory. Usually it
-        should use much less, so there may be a memory leak or other changes
-        that are causing the memory usage to increase significantly.
-
-  - alert: CortexCinderHighCPUUsage
-    expr: rate(process_cpu_seconds_total{service="cortex-cinder-metrics"}[1m]) > 0.5
-    for: 5m
-    labels:
-      context: cpu
-      dashboard: cortex-status-dashboard/cortex-status-dashboard
-      service: cortex
-      severity: warning
-      support_group: workload-management
-    annotations:
-      summary: "`{{$labels.component}}` uses too much CPU"
-      description: >
-        `{{$labels.component}}` should not be using more than 50% of a single CPU core. Usually
-        it should use much less, so there may be a CPU leak or other changes
-        that are causing the CPU usage to increase significantly.
-
-  - alert: CortexCinderTooManyDBConnectionAttempts
-    expr: rate(cortex_db_connection_attempts_total{service="cortex-cinder-metrics"}[5m]) > 0.1
-    for: 5m
-    labels:
-      context: db
-      dashboard: cortex-status-dashboard/cortex-status-dashboard
-      service: cortex
-      severity: warning
-      support_group: workload-management
-    annotations:
-      summary: "`{{$labels.component}}` is trying to connect to the database too often"
-      description: >
-        `{{$labels.component}}` is trying to connect to the database too often. This may happen
-        when the database is down or the connection parameters are misconfigured.
-
-  - alert: CortexCinderSyncNotSuccessful
-    expr: cortex_sync_request_processed_total{service="cortex-cinder-metrics"} - cortex_sync_request_duration_seconds_count{service="cortex-cinder-metrics"} > 0
-    for: 5m
-    labels:
-      context: syncstatus
-      dashboard: cortex-status-dashboard/cortex-status-dashboard
-      service: cortex
-      severity: warning
-      support_group: workload-management
-    annotations:
-      summary: "`{{$labels.component}}` Sync not successful"
-      description: >
-        `{{$labels.component}}` experienced an issue syncing data from the datasource `{{$labels.datasource}}`. This may
-        happen when the datasource (OpenStack, Prometheus, etc.) is down or
-        the sync module is misconfigured. No immediate action is needed, since
-        the sync module will retry the sync operation and the currently synced
-        data will be kept. However, when this problem persists for a longer
-        time the service will have a less recent view of the datacenter.
-
-  - alert: CortexCinderSyncObjectsDroppedToZero
-    expr: cortex_sync_objects{service="cortex-cinder-metrics"} == 0
-    for: 60m
-    labels:
-      context: syncobjects
-      dashboard: cortex-status-dashboard/cortex-status-dashboard
-      service: cortex
-      severity: warning
-      support_group: workload-management
-    annotations:
-      summary: "`{{$labels.component}}` is not syncing any new data from `{{$labels.datasource}}`"
-      description: >
-        `{{$labels.component}}` is not syncing any objects from the datasource `{{$labels.datasource}}`. This may happen
-        when the datasource (OpenStack, Prometheus, etc.) is down or the sync
-        module is misconfigured. No immediate action is needed, since the sync
-        module will retry the sync operation and the currently synced data will
-        be kept. However, when this problem persists for a longer time the
-        service will have a less recent view of the datacenter.
-
-  - alert: CortexCinderDatasourceUnready
-    expr: cortex_datasource_state{domain="cinder",state!="ready"} != 0
-    for: 60m
-    labels:
-      context: datasources
-      dashboard: cortex-status-dashboard/cortex-status-dashboard
-      service: cortex
-      severity: warning
-      support_group: workload-management
-    annotations:
-      summary: "Datasource `{{$labels.datasource}}` is in `{{$labels.state}}` state"
-      description: >
-        This may indicate issues with the datasource
-        connectivity or configuration. It is recommended to investigate the
-        datasource status and logs for more details.
-
-  - alert: CortexCinderKnowledgeUnready
-    expr: cortex_knowledge_state{domain="cinder",state!="ready"} != 0
-    for: 60m
-    labels:
-      context: knowledge
-      dashboard: cortex-status-dashboard/cortex-status-dashboard
-      service: cortex
-      severity: warning
-      support_group: workload-management
-    annotations:
-      summary: "Knowledge `{{$labels.knowledge}}` is in `{{$labels.state}}` state"
-      description: >
-        This may indicate issues with the knowledge
-        configuration. It is recommended to investigate the
-        knowledge status and logs for more details.
-
-  - alert: CortexCinderDecisionsWithErrors
-    expr: cortex_decision_state{domain="cinder",state="error"} > 0
-    for: 5m
-    labels:
-      context: decisions
-      dashboard: cortex-status-dashboard/cortex-status-dashboard
-      service: cortex
-      severity: warning
-      support_group: workload-management
-    annotations:
-      summary: "Some decisions are in error state for operator `{{$labels.operator}}`"
-      description: >
-        The cortex scheduling pipeline generated decisions that are in error state.
-        This may indicate issues with the decision logic or the underlying infrastructure.
-        It is recommended to investigate the decision logs and the state of the
-        VMs being processed.
-
-  - alert: CortexCinderTooManyDecisionsWaiting
-    expr: cortex_decision_state{domain="cinder",state="waiting"} > 10
-    for: 5m
-    labels:
-      context: decisions
-      dashboard: cortex-status-dashboard/cortex-status-dashboard
-      service: cortex
-      severity: warning
-      support_group: workload-management
-    annotations:
-      summary: "Too many decisions are in waiting state for operator `{{$labels.operator}}`"
-      description: >
-        The cortex scheduling pipeline has a high number of decisions for which
-        no target host has been assigned yet.
-
-        This may indicate a backlog in processing or issues with the decision logic.
-        It is recommended to investigate the decision logs and the state of the
-        VMs being processed.
-
-  - alert: CortexCinderKPIUnready
-    expr: |
-      cortex_kpi_state{domain="cinder",state!="ready"} != 0
-    for: 60m
-    labels:
-      context: kpis
-      dashboard: cortex-status-dashboard/cortex-status-dashboard
-      service: cortex
-      severity: warning
-      support_group: workload-management
-    annotations:
-      summary: "KPI `{{$labels.kpi}}` is in `{{$labels.state}}` state"
-      description: >
-        This may indicate issues with the KPI
-        configuration. It is recommended to investigate the
-        KPI status and logs for more details.
-
-  - alert: CortexCinderPipelineUnready
-    expr: cortex_pipeline_state{domain="cinder",state!="ready"} != 0
-    for: 5m
-    labels:
-      context: pipelines
-      dashboard: cortex-status-dashboard/cortex-status-dashboard
-      service: cortex
-      severity: warning
-      support_group: workload-management
-    annotations:
-      summary: "Pipeline `{{$labels.pipeline}}` is in `{{$labels.state}}` state"
-      description: >
-        This may indicate issues with the pipeline
-        configuration. It is recommended to investigate the
-        pipeline status and logs for more details.
diff --git a/helm/bundles/cortex-cinder/templates/alerts.yaml b/helm/bundles/cortex-cinder/templates/alerts.yaml
index 59496c33d..4beea8b53 100644
--- a/helm/bundles/cortex-cinder/templates/alerts.yaml
+++ b/helm/bundles/cortex-cinder/templates/alerts.yaml
@@ -1,6 +1,10 @@
 # Copyright SAP SE
 # SPDX-License-Identifier: Apache-2.0
 
+# NOTE: This file is rendered by Helm. Prometheus templating directives
+# (e.g. {{ "{{" }} $labels.foo {{ "}}" }}) must be escaped using Style B:
+# replace the outer `{{` and `}}` with `{{ "{{" }}` and `{{ "}}" }}`.
+
 {{- if .Values.alerts.enabled }}
 apiVersion: monitoring.coreos.com/v1
 kind: PrometheusRule
@@ -10,8 +14,264 @@ metadata:
     type: alerting-rules
     prometheus: {{ required ".Values.alerts.prometheus missing" .Values.alerts.prometheus | quote }}
 spec:
-  {{- $files := .Files.Glob "alerts/*.alerts.yaml" }}
-  {{- range $path, $file := $files }}
-  {{ $file | toString | nindent 2 }}
-  {{- end }}
+  groups:
+  - name: cortex-cinder-alerts
+    rules:
+    - alert: CortexCinderSchedulingDown
+      expr: |
+        up{pod=~"cortex-cinder-scheduling-.*"} != 1 or
+        absent(up{pod=~"cortex-cinder-scheduling-.*"})
+      for: 5m
+      labels:
+        context: liveness
+        dashboard: cortex-status-dashboard/cortex-status-dashboard
+        service: cortex
+        severity: warning
+        support_group: workload-management
+        playbook: docs/support/playbook/cortex/alerts/down
+      annotations:
+        summary: "Cortex Scheduling for Cinder is down"
+        description: >
+          The Cortex scheduling service is down. Scheduling requests from Cinder will
+          not be served. This is no immediate problem, since Cinder will continue
+          placing new VMs. However, the placement will be less desirable.
+
+    - alert: CortexCinderKnowledgeDown
+      expr: |
+        up{pod=~"cortex-cinder-knowledge-.*"} != 1 or
+        absent(up{pod=~"cortex-cinder-knowledge-.*"})
+      for: 5m
+      labels:
+        context: liveness
+        dashboard: cortex-status-dashboard/cortex-status-dashboard
+        service: cortex
+        severity: warning
+        support_group: workload-management
+        playbook: docs/support/playbook/cortex/alerts/down
+      annotations:
+        summary: "Cortex Knowledge for Cinder is down"
+        description: >
+          The Cortex Knowledge service is down. This is no immediate problem,
+          since cortex is still able to process requests,
+          but the quality of the responses may be affected.
+
+    - alert: CortexCinderHttpRequest400sTooHigh
+      expr: rate(cortex_scheduler_api_request_duration_seconds_count{service="cortex-cinder-metrics", status=~"4.+"}[5m]) > 0.1
+      for: 5m
+      labels:
+        context: api
+        dashboard: cortex-status-dashboard/cortex-status-dashboard
+        service: cortex
+        severity: warning
+        support_group: workload-management
+      annotations:
+        summary: "Cinder Scheduler HTTP request 400 errors too high"
+        description: >
+          Cinder Scheduler is responding to placement requests with HTTP 4xx
+          errors. This is expected when the scheduling request cannot be served
+          by Cortex. However, it could also indicate that the request format has
+          changed and Cortex is unable to parse it.
+
+    - alert: CortexCinderSchedulingHttpRequest500sTooHigh
+      expr: rate(cortex_scheduler_api_request_duration_seconds_count{service="cortex-cinder-metrics", status=~"5.+" }[5m]) > 0.1
+      for: 5m
+      labels:
+        context: api
+        dashboard: cortex-status-dashboard/cortex-status-dashboard
+        service: cortex
+        severity: warning
+        support_group: workload-management
+      annotations:
+        summary: "Cinder Scheduler HTTP request 500 errors too high"
+        description: >
+          Cinder Scheduler is responding to placement requests with HTTP 5xx errors.
+          This is not expected and indicates that Cortex is having some internal problem.
+          Cinder will continue to place new VMs, but the placement will be less desirable.
+          Thus, no immediate action is needed.
+
+    - alert: CortexCinderHighMemoryUsage
+      expr: process_resident_memory_bytes{service="cortex-cinder-metrics"} > 6000 * 1024 * 1024
+      for: 5m
+      labels:
+        context: memory
+        dashboard: cortex-status-dashboard/cortex-status-dashboard
+        service: cortex
+        severity: warning
+        support_group: workload-management
+      annotations:
+        summary: "`{{ "{{" }} $labels.component {{ "}}" }}` uses too much memory"
+        description: >
+          `{{ "{{" }} $labels.component {{ "}}" }}` should not be using more than 6000 MiB of memory. Usually it
+          should use much less, so there may be a memory leak or other changes
+          that are causing the memory usage to increase significantly.
+
+    - alert: CortexCinderHighCPUUsage
+      expr: rate(process_cpu_seconds_total{service="cortex-cinder-metrics"}[1m]) > 0.5
+      for: 5m
+      labels:
+        context: cpu
+        dashboard: cortex-status-dashboard/cortex-status-dashboard
+        service: cortex
+        severity: warning
+        support_group: workload-management
+      annotations:
+        summary: "`{{ "{{" }} $labels.component {{ "}}" }}` uses too much CPU"
+        description: >
+          `{{ "{{" }} $labels.component {{ "}}" }}` should not be using more than 50% of a single CPU core. Usually
+          it should use much less, so there may be a CPU leak or other changes
+          that are causing the CPU usage to increase significantly.
+
+    - alert: CortexCinderTooManyDBConnectionAttempts
+      expr: rate(cortex_db_connection_attempts_total{service="cortex-cinder-metrics"}[5m]) > 0.1
+      for: 5m
+      labels:
+        context: db
+        dashboard: cortex-status-dashboard/cortex-status-dashboard
+        service: cortex
+        severity: warning
+        support_group: workload-management
+      annotations:
+        summary: "`{{ "{{" }} $labels.component {{ "}}" }}` is trying to connect to the database too often"
+        description: >
+          `{{ "{{" }} $labels.component {{ "}}" }}` is trying to connect to the database too often. This may happen
+          when the database is down or the connection parameters are misconfigured.
+
+    - alert: CortexCinderSyncNotSuccessful
+      expr: cortex_sync_request_processed_total{service="cortex-cinder-metrics"} - cortex_sync_request_duration_seconds_count{service="cortex-cinder-metrics"} > 0
+      for: 5m
+      labels:
+        context: syncstatus
+        dashboard: cortex-status-dashboard/cortex-status-dashboard
+        service: cortex
+        severity: warning
+        support_group: workload-management
+      annotations:
+        summary: "`{{ "{{" }} $labels.component {{ "}}" }}` Sync not successful"
+        description: >
+          `{{ "{{" }} $labels.component {{ "}}" }}` experienced an issue syncing data from the datasource `{{ "{{" }} $labels.datasource {{ "}}" }}`. This may
+          happen when the datasource (OpenStack, Prometheus, etc.) is down or
+          the sync module is misconfigured. No immediate action is needed, since
+          the sync module will retry the sync operation and the currently synced
+          data will be kept. However, when this problem persists for a longer
+          time the service will have a less recent view of the datacenter.
+
+    - alert: CortexCinderSyncObjectsDroppedToZero
+      expr: cortex_sync_objects{service="cortex-cinder-metrics"} == 0
+      for: 60m
+      labels:
+        context: syncobjects
+        dashboard: cortex-status-dashboard/cortex-status-dashboard
+        service: cortex
+        severity: warning
+        support_group: workload-management
+      annotations:
+        summary: "`{{ "{{" }} $labels.component {{ "}}" }}` is not syncing any new data from `{{ "{{" }} $labels.datasource {{ "}}" }}`"
+        description: >
+          `{{ "{{" }} $labels.component {{ "}}" }}` is not syncing any objects from the datasource `{{ "{{" }} $labels.datasource {{ "}}" }}`. This may happen
+          when the datasource (OpenStack, Prometheus, etc.) is down or the sync
+          module is misconfigured. No immediate action is needed, since the sync
+          module will retry the sync operation and the currently synced data will
+          be kept. However, when this problem persists for a longer time the
+          service will have a less recent view of the datacenter.
+
+    - alert: CortexCinderDatasourceUnready
+      expr: cortex_datasource_state{domain="cinder",state!="ready"} != 0
+      for: 60m
+      labels:
+        context: datasources
+        dashboard: cortex-status-dashboard/cortex-status-dashboard
+        service: cortex
+        severity: warning
+        support_group: workload-management
+      annotations:
+        summary: "Datasource `{{ "{{" }} $labels.datasource {{ "}}" }}` is in `{{ "{{" }} $labels.state {{ "}}" }}` state"
+        description: >
+          This may indicate issues with the datasource
+          connectivity or configuration. It is recommended to investigate the
+          datasource status and logs for more details.
+
+    - alert: CortexCinderKnowledgeUnready
+      expr: cortex_knowledge_state{domain="cinder",state!="ready"} != 0
+      for: 60m
+      labels:
+        context: knowledge
+        dashboard: cortex-status-dashboard/cortex-status-dashboard
+        service: cortex
+        severity: warning
+        support_group: workload-management
+      annotations:
+        summary: "Knowledge `{{ "{{" }} $labels.knowledge {{ "}}" }}` is in `{{ "{{" }} $labels.state {{ "}}" }}` state"
+        description: >
+          This may indicate issues with the knowledge
+          configuration. It is recommended to investigate the
+          knowledge status and logs for more details.
+
+    - alert: CortexCinderDecisionsWithErrors
+      expr: cortex_decision_state{domain="cinder",state="error"} > 0
+      for: 5m
+      labels:
+        context: decisions
+        dashboard: cortex-status-dashboard/cortex-status-dashboard
+        service: cortex
+        severity: warning
+        support_group: workload-management
+      annotations:
+        summary: "Some decisions are in error state for operator `{{ "{{" }} $labels.operator {{ "}}" }}`"
+        description: >
+          The cortex scheduling pipeline generated decisions that are in error state.
+          This may indicate issues with the decision logic or the underlying infrastructure.
+          It is recommended to investigate the decision logs and the state of the
+          VMs being processed.
+
+    - alert: CortexCinderTooManyDecisionsWaiting
+      expr: cortex_decision_state{domain="cinder",state="waiting"} > 10
+      for: 5m
+      labels:
+        context: decisions
+        dashboard: cortex-status-dashboard/cortex-status-dashboard
+        service: cortex
+        severity: warning
+        support_group: workload-management
+      annotations:
+        summary: "Too many decisions are in waiting state for operator `{{ "{{" }} $labels.operator {{ "}}" }}`"
+        description: >
+          The cortex scheduling pipeline has a high number of decisions for which
+          no target host has been assigned yet.
+
+          This may indicate a backlog in processing or issues with the decision logic.
+          It is recommended to investigate the decision logs and the state of the
+          VMs being processed.
+
+    - alert: CortexCinderKPIUnready
+      expr: |
+        cortex_kpi_state{domain="cinder",state!="ready"} != 0
+      for: 60m
+      labels:
+        context: kpis
+        dashboard: cortex-status-dashboard/cortex-status-dashboard
+        service: cortex
+        severity: warning
+        support_group: workload-management
+      annotations:
+        summary: "KPI `{{ "{{" }} $labels.kpi {{ "}}" }}` is in `{{ "{{" }} $labels.state {{ "}}" }}` state"
+        description: >
+          This may indicate issues with the KPI
+          configuration. It is recommended to investigate the
+          KPI status and logs for more details.
+
+    - alert: CortexCinderPipelineUnready
+      expr: cortex_pipeline_state{domain="cinder",state!="ready"} != 0
+      for: 5m
+      labels:
+        context: pipelines
+        dashboard: cortex-status-dashboard/cortex-status-dashboard
+        service: cortex
+        severity: warning
+        support_group: workload-management
+      annotations:
+        summary: "Pipeline `{{ "{{" }} $labels.pipeline {{ "}}" }}` is in `{{ "{{" }} $labels.state {{ "}}" }}` state"
+        description: >
+          This may indicate issues with the pipeline
+          configuration. It is recommended to investigate the
+          pipeline status and logs for more details.
 {{- end }}
diff --git a/helm/bundles/cortex-ironcore/alerts/ironcore.alerts.yaml b/helm/bundles/cortex-ironcore/alerts/ironcore.alerts.yaml
deleted file mode 100644
index 0c72d9a92..000000000
--- a/helm/bundles/cortex-ironcore/alerts/ironcore.alerts.yaml
+++ /dev/null
@@ -1,3 +0,0 @@
-groups:
-- name: cortex-ironcore-alerts
-  rules: []
diff --git a/helm/bundles/cortex-ironcore/templates/alerts.yaml b/helm/bundles/cortex-ironcore/templates/alerts.yaml
deleted file mode 100644
index ca27396a5..000000000
--- a/helm/bundles/cortex-ironcore/templates/alerts.yaml
+++ /dev/null
@@ -1,17 +0,0 @@
-# Copyright SAP SE
-# SPDX-License-Identifier: Apache-2.0
-
-{{- if .Values.alerts.enabled }}
-apiVersion: monitoring.coreos.com/v1
-kind: PrometheusRule
-metadata:
-  name: cortex-ironcore-alerts
-  labels:
-    type: alerting-rules
-    prometheus: {{ required ".Values.alerts.prometheus missing" .Values.alerts.prometheus | quote }}
-spec:
-  {{- $files := .Files.Glob "alerts/*.alerts.yaml" }}
-  {{- range $path, $file := $files }}
-  {{ $file | toString | nindent 2 }}
-  {{- end }}
-{{- end }}
diff --git a/helm/bundles/cortex-manila/alerts/manila.alerts.yaml b/helm/bundles/cortex-manila/alerts/manila.alerts.yaml
deleted file mode 100644
index 2211d44fe..000000000
--- a/helm/bundles/cortex-manila/alerts/manila.alerts.yaml
+++ /dev/null
@@ -1,235 +0,0 @@
-groups:
-- name: cortex-manila-alerts
-  rules:
-  - alert: CortexManilaSchedulingDown
-    expr: |
-      up{pod=~"cortex-manila-scheduling-.*"} != 1 or
-      absent(up{pod=~"cortex-manila-scheduling-.*"})
-    for: 5m
-    labels:
-      context: liveness
-      dashboard: cortex-status-dashboard/cortex-status-dashboard
-      service: cortex
-      severity: warning
-      support_group: workload-management
-      playbook: docs/support/playbook/cortex/alerts/down
-    annotations:
-      summary: "Cortex Scheduling for Manila is down"
-      description: >
-        The Cortex scheduling service is down. Scheduling requests from Manila will
-        not be served. This is no immediate problem, since Manila will continue
-        placing new VMs. However, the placement will be less desirable.
-
-  - alert: CortexManilaKnowledgeDown
-    expr: |
-      up{pod=~"cortex-manila-knowledge-.*"} != 1 or
-      absent(up{pod=~"cortex-manila-knowledge-.*"})
-    for: 5m
-    labels:
-      context: liveness
-      dashboard: cortex-status-dashboard/cortex-status-dashboard
-      service: cortex
-      severity: warning
-      support_group: workload-management
-      playbook: docs/support/playbook/cortex/alerts/down
-    annotations:
-      summary: "Cortex Knowledge for Manila is down"
-      description: >
-        The Cortex Knowledge service is down. This is no immediate problem,
-        since cortex is still able to process requests,
-        but the quality of the responses may be affected.
-
-  - alert: CortexManilaHttpRequest400sTooHigh
-    expr: rate(cortex_scheduler_api_request_duration_seconds_count{service="cortex-manila-metrics", status=~"4.+"}[5m]) > 0.1
-    for: 5m
-    labels:
-      context: api
-      dashboard: cortex-status-dashboard/cortex-status-dashboard
-      service: cortex
-      severity: warning
-      support_group: workload-management
-      playbook: docs/support/playbook/cortex/alerts/apierrors
-    annotations:
-      summary: "Manila Scheduler HTTP request 400 errors too high"
-      description: >
-        Manila Scheduler is responding to placement requests with HTTP 4xx
-        errors. This is expected when the scheduling request cannot be served
-        by Cortex. However, it could also indicate that the request format has
-        changed and Cortex is unable to parse it.
-
-  - alert: CortexManilaSchedulingHttpRequest500sTooHigh
-    expr: rate(cortex_scheduler_api_request_duration_seconds_count{service="cortex-manila-metrics", status=~"5.+" }[5m]) > 0.1
-    for: 5m
-    labels:
-      context: api
-      dashboard: cortex-status-dashboard/cortex-status-dashboard
-      service: cortex
-      severity: warning
-      support_group: workload-management
-      playbook: docs/support/playbook/cortex/alerts/apierrors
-    annotations:
-      summary: "Manila Scheduler HTTP request 500 errors too high"
-      description: >
-        Manila Scheduler is responding to placement requests with HTTP 5xx errors.
-        This is not expected and indicates that Cortex is having some internal problem.
-        Manila will continue to place new VMs, but the placement will be less desirable.
-        Thus, no immediate action is needed.
-
-  - alert: CortexManilaHighMemoryUsage
-    expr: process_resident_memory_bytes{service="cortex-manila-metrics"} > 6000 * 1024 * 1024
-    for: 5m
-    labels:
-      context: memory
-      dashboard: cortex-status-dashboard/cortex-status-dashboard
-      service: cortex
-      severity: warning
-      support_group: workload-management
-      playbook: docs/support/playbook/cortex/alerts/deployment
-    annotations:
-      summary: "`{{$labels.component}}` uses too much memory"
-      description: >
-        `{{$labels.component}}` should not be using more than 6000 MiB of memory. Usually it
-        should use much less, so there may be a memory leak or other changes
-        that are causing the memory usage to increase significantly.
-
-  - alert: CortexManilaHighCPUUsage
-    expr: rate(process_cpu_seconds_total{service="cortex-manila-metrics"}[1m]) > 0.5
-    for: 5m
-    labels:
-      context: cpu
-      dashboard: cortex-status-dashboard/cortex-status-dashboard
-      service: cortex
-      severity: warning
-      support_group: workload-management
-      playbook: docs/support/playbook/cortex/alerts/deployment
-    annotations:
-      summary: "`{{$labels.component}}` uses too much CPU"
-      description: >
-        `{{$labels.component}}` should not be using more than 50% of a single CPU core. Usually
-        it should use much less, so there may be a CPU leak or other changes
-        that are causing the CPU usage to increase significantly.
-
-  - alert: CortexManilaTooManyDBConnectionAttempts
-    expr: rate(cortex_db_connection_attempts_total{service="cortex-manila-metrics"}[5m]) > 0.1
-    for: 5m
-    labels:
-      context: db
-      dashboard: cortex-status-dashboard/cortex-status-dashboard
-      service: cortex
-      severity: warning
-      support_group: workload-management
-      playbook: docs/support/playbook/cortex/alerts/database
-    annotations:
-      summary: "`{{$labels.component}}` is trying to connect to the database too often"
-      description: >
-        `{{$labels.component}}` is trying to connect to the database too often. This may happen
-        when the database is down or the connection parameters are misconfigured.
-
-  - alert: CortexManilaSyncNotSuccessful
-    expr: cortex_sync_request_processed_total{service="cortex-manila-metrics"} - cortex_sync_request_duration_seconds_count{service="cortex-manila-metrics"} > 0
-    for: 5m
-    labels:
-      context: syncstatus
-      dashboard: cortex-status-dashboard/cortex-status-dashboard
-      service: cortex
-      severity: warning
-      support_group: workload-management
-      playbook: docs/support/playbook/cortex/alerts/datasources
-    annotations:
-      summary: "`{{$labels.component}}` Sync not successful"
-      description: >
-        `{{$labels.component}}` experienced an issue syncing data from the datasource `{{$labels.datasource}}`. This may
-        happen when the datasource (OpenStack, Prometheus, etc.) is down or
-        the sync module is misconfigured. No immediate action is needed, since
-        the sync module will retry the sync operation and the currently synced
-        data will be kept. However, when this problem persists for a longer
-        time the service will have a less recent view of the datacenter.
-
-  - alert: CortexManilaSyncObjectsDroppedToZero
-    expr: cortex_sync_objects{service="cortex-manila-metrics"} == 0
-    for: 60m
-    labels:
-      context: syncobjects
-      dashboard: cortex-status-dashboard/cortex-status-dashboard
-      service: cortex
-      severity: warning
-      support_group: workload-management
-      playbook: docs/support/playbook/cortex/alerts/datasources
-    annotations:
-      summary: "`{{$labels.component}}` is not syncing any new data from `{{$labels.datasource}}`"
-      description: >
-        `{{$labels.component}}` is not syncing any objects from the datasource `{{$labels.datasource}}`. This may happen
-        when the datasource (OpenStack, Prometheus, etc.) is down or the sync
-        module is misconfigured. No immediate action is needed, since the sync
-        module will retry the sync operation and the currently synced data will
-        be kept. However, when this problem persists for a longer time the
-        service will have a less recent view of the datacenter.
-
-  - alert: CortexManilaDatasourceUnready
-    expr: cortex_datasource_state{domain="manila",state!="ready"} != 0
-    for: 60m
-    labels:
-      context: datasources
-      dashboard: cortex-status-dashboard/cortex-status-dashboard
-      service: cortex
-      severity: warning
-      support_group: workload-management
-      playbook: docs/support/playbook/cortex/alerts/unready
-    annotations:
-      summary: "Datasource `{{$labels.datasource}}` is in `{{$labels.state}}` state"
-      description: >
-        This may indicate issues with the datasource
-        connectivity or configuration. It is recommended to investigate the
-        datasource status and logs for more details.
-
-  - alert: CortexManilaKnowledgeUnready
-    expr: cortex_knowledge_state{domain="manila",state!="ready"} != 0
-    for: 60m
-    labels:
-      context: knowledge
-      dashboard: cortex-status-dashboard/cortex-status-dashboard
-      service: cortex
-      severity: warning
-      support_group: workload-management
-      playbook: docs/support/playbook/cortex/alerts/unready
-    annotations:
-      summary: "Knowledge `{{$labels.knowledge}}` is in `{{$labels.state}}` state"
-      description: >
-        This may indicate issues with the knowledge
-        configuration. It is recommended to investigate the
-        knowledge status and logs for more details.
-
-  - alert: CortexManilaKPIUnready
-    expr: |
-      cortex_kpi_state{domain="manila",state!="ready"} != 0
-    for: 60m
-    labels:
-      context: kpis
-      dashboard: cortex-status-dashboard/cortex-status-dashboard
-      service: cortex
-      severity: warning
-      support_group: workload-management
-      playbook: docs/support/playbook/cortex/alerts/unready
-    annotations:
-      summary: "KPI `{{$labels.kpi}}` is in `{{$labels.state}}` state"
-      description: >
-        This may indicate issues with the KPI
-        configuration. It is recommended to investigate the
-        KPI status and logs for more details.
-
-  - alert: CortexManilaPipelineUnready
-    expr: cortex_pipeline_state{domain="manila",state!="ready"} != 0
-    for: 5m
-    labels:
-      context: kpis
-      dashboard: cortex-status-dashboard/cortex-status-dashboard
-      service: cortex
-      severity: warning
-      support_group: workload-management
-      playbook: docs/support/playbook/cortex/alerts/unready
-    annotations:
-      summary: "Pipeline `{{$labels.pipeline}}` is in `{{$labels.state}}` state"
-      description: >
-        This may indicate issues with the pipeline
-        configuration. It is recommended to investigate the
-        pipeline status and logs for more details.
diff --git a/helm/bundles/cortex-manila/templates/alerts.yaml b/helm/bundles/cortex-manila/templates/alerts.yaml
index 1f25b0354..ef36fe983 100644
--- a/helm/bundles/cortex-manila/templates/alerts.yaml
+++ b/helm/bundles/cortex-manila/templates/alerts.yaml
@@ -1,6 +1,10 @@
 # Copyright SAP SE
 # SPDX-License-Identifier: Apache-2.0
 
+# NOTE: This file is rendered by Helm. Prometheus templating directives
+# (e.g. {{ "{{" }} $labels.foo {{ "}}" }}) must be escaped using Style B:
+# replace the outer `{{` and `}}` with `{{ "{{" }}` and `{{ "}}" }}`.
+
 {{- if .Values.alerts.enabled }}
 apiVersion: monitoring.coreos.com/v1
 kind: PrometheusRule
@@ -10,8 +14,239 @@ metadata:
     type: alerting-rules
     prometheus: {{ required ".Values.alerts.prometheus missing" .Values.alerts.prometheus | quote }}
 spec:
-  {{- $files := .Files.Glob "alerts/*.alerts.yaml" }}
-  {{- range $path, $file := $files }}
-  {{ $file | toString | nindent 2 }}
-  {{- end }}
+  groups:
+  - name: cortex-manila-alerts
+    rules:
+    - alert: CortexManilaSchedulingDown
+      expr: |
+        up{pod=~"cortex-manila-scheduling-.*"} != 1 or
+        absent(up{pod=~"cortex-manila-scheduling-.*"})
+      for: 5m
+      labels:
+        context: liveness
+        dashboard: cortex-status-dashboard/cortex-status-dashboard
+        service: cortex
+        severity: warning
+        support_group: workload-management
+        playbook: docs/support/playbook/cortex/alerts/down
+      annotations:
+        summary: "Cortex Scheduling for Manila is down"
+        description: >
+          The Cortex scheduling service is down. Scheduling requests from Manila will
+          not be served. This is no immediate problem, since Manila will continue
+          placing new VMs. However, the placement will be less desirable.
+
+    - alert: CortexManilaKnowledgeDown
+      expr: |
+        up{pod=~"cortex-manila-knowledge-.*"} != 1 or
+        absent(up{pod=~"cortex-manila-knowledge-.*"})
+      for: 5m
+      labels:
+        context: liveness
+        dashboard: cortex-status-dashboard/cortex-status-dashboard
+        service: cortex
+        severity: warning
+        support_group: workload-management
+        playbook: docs/support/playbook/cortex/alerts/down
+      annotations:
+        summary: "Cortex Knowledge for Manila is down"
+        description: >
+          The Cortex Knowledge service is down. This is no immediate problem,
+          since cortex is still able to process requests,
+          but the quality of the responses may be affected.
+
+    - alert: CortexManilaHttpRequest400sTooHigh
+      expr: rate(cortex_scheduler_api_request_duration_seconds_count{service="cortex-manila-metrics", status=~"4.+"}[5m]) > 0.1
+      for: 5m
+      labels:
+        context: api
+        dashboard: cortex-status-dashboard/cortex-status-dashboard
+        service: cortex
+        severity: warning
+        support_group: workload-management
+        playbook: docs/support/playbook/cortex/alerts/apierrors
+      annotations:
+        summary: "Manila Scheduler HTTP request 400 errors too high"
+        description: >
+          Manila Scheduler is responding to placement requests with HTTP 4xx
+          errors. This is expected when the scheduling request cannot be served
+          by Cortex. However, it could also indicate that the request format has
+          changed and Cortex is unable to parse it.
+
+    - alert: CortexManilaSchedulingHttpRequest500sTooHigh
+      expr: rate(cortex_scheduler_api_request_duration_seconds_count{service="cortex-manila-metrics", status=~"5.+" }[5m]) > 0.1
+      for: 5m
+      labels:
+        context: api
+        dashboard: cortex-status-dashboard/cortex-status-dashboard
+        service: cortex
+        severity: warning
+        support_group: workload-management
+        playbook: docs/support/playbook/cortex/alerts/apierrors
+      annotations:
+        summary: "Manila Scheduler HTTP request 500 errors too high"
+        description: >
+          Manila Scheduler is responding to placement requests with HTTP 5xx errors.
+          This is not expected and indicates that Cortex is having some internal problem.
+          Manila will continue to place new VMs, but the placement will be less desirable.
+          Thus, no immediate action is needed.
+
+    - alert: CortexManilaHighMemoryUsage
+      expr: process_resident_memory_bytes{service="cortex-manila-metrics"} > 6000 * 1024 * 1024
+      for: 5m
+      labels:
+        context: memory
+        dashboard: cortex-status-dashboard/cortex-status-dashboard
+        service: cortex
+        severity: warning
+        support_group: workload-management
+        playbook: docs/support/playbook/cortex/alerts/deployment
+      annotations:
+        summary: "`{{ "{{" }} $labels.component {{ "}}" }}` uses too much memory"
+        description: >
+          `{{ "{{" }} $labels.component {{ "}}" }}` should not be using more than 6000 MiB of memory. Usually it
+          should use much less, so there may be a memory leak or other changes
+          that are causing the memory usage to increase significantly.
+
+    - alert: CortexManilaHighCPUUsage
+      expr: rate(process_cpu_seconds_total{service="cortex-manila-metrics"}[1m]) > 0.5
+      for: 5m
+      labels:
+        context: cpu
+        dashboard: cortex-status-dashboard/cortex-status-dashboard
+        service: cortex
+        severity: warning
+        support_group: workload-management
+        playbook: docs/support/playbook/cortex/alerts/deployment
+      annotations:
+        summary: "`{{ "{{" }} $labels.component {{ "}}" }}` uses too much CPU"
+        description: >
+          `{{ "{{" }} $labels.component {{ "}}" }}` should not be using more than 50% of a single CPU core. Usually
+          it should use much less, so there may be a CPU leak or other changes
+          that are causing the CPU usage to increase significantly.
+
+    - alert: CortexManilaTooManyDBConnectionAttempts
+      expr: rate(cortex_db_connection_attempts_total{service="cortex-manila-metrics"}[5m]) > 0.1
+      for: 5m
+      labels:
+        context: db
+        dashboard: cortex-status-dashboard/cortex-status-dashboard
+        service: cortex
+        severity: warning
+        support_group: workload-management
+        playbook: docs/support/playbook/cortex/alerts/database
+      annotations:
+        summary: "`{{ "{{" }} $labels.component {{ "}}" }}` is trying to connect to the database too often"
+        description: >
+          `{{ "{{" }} $labels.component {{ "}}" }}` is trying to connect to the database too often. This may happen
+          when the database is down or the connection parameters are misconfigured.
+
+    - alert: CortexManilaSyncNotSuccessful
+      expr: cortex_sync_request_processed_total{service="cortex-manila-metrics"} - cortex_sync_request_duration_seconds_count{service="cortex-manila-metrics"} > 0
+      for: 5m
+      labels:
+        context: syncstatus
+        dashboard: cortex-status-dashboard/cortex-status-dashboard
+        service: cortex
+        severity: warning
+        support_group: workload-management
+        playbook: docs/support/playbook/cortex/alerts/datasources
+      annotations:
+        summary: "`{{ "{{" }} $labels.component {{ "}}" }}` Sync not successful"
+        description: >
+          `{{ "{{" }} $labels.component {{ "}}" }}` experienced an issue syncing data from the datasource `{{ "{{" }} $labels.datasource {{ "}}" }}`. This may
+          happen when the datasource (OpenStack, Prometheus, etc.) is down or
+          the sync module is misconfigured. No immediate action is needed, since
+          the sync module will retry the sync operation and the currently synced
+          data will be kept. However, when this problem persists for a longer
+          time the service will have a less recent view of the datacenter.
+
+    - alert: CortexManilaSyncObjectsDroppedToZero
+      expr: cortex_sync_objects{service="cortex-manila-metrics"} == 0
+      for: 60m
+      labels:
+        context: syncobjects
+        dashboard: cortex-status-dashboard/cortex-status-dashboard
+        service: cortex
+        severity: warning
+        support_group: workload-management
+        playbook: docs/support/playbook/cortex/alerts/datasources
+      annotations:
+        summary: "`{{ "{{" }} $labels.component {{ "}}" }}` is not syncing any new data from `{{ "{{" }} $labels.datasource {{ "}}" }}`"
+        description: >
+          `{{ "{{" }} $labels.component {{ "}}" }}` is not syncing any objects from the datasource `{{ "{{" }} $labels.datasource {{ "}}" }}`. This may happen
+          when the datasource (OpenStack, Prometheus, etc.) is down or the sync
+          module is misconfigured. No immediate action is needed, since the sync
+          module will retry the sync operation and the currently synced data will
+          be kept. However, when this problem persists for a longer time the
+          service will have a less recent view of the datacenter.
+
+    - alert: CortexManilaDatasourceUnready
+      expr: cortex_datasource_state{domain="manila",state!="ready"} != 0
+      for: 60m
+      labels:
+        context: datasources
+        dashboard: cortex-status-dashboard/cortex-status-dashboard
+        service: cortex
+        severity: warning
+        support_group: workload-management
+        playbook: docs/support/playbook/cortex/alerts/unready
+      annotations:
+        summary: "Datasource `{{ "{{" }} $labels.datasource {{ "}}" }}` is in `{{ "{{" }} $labels.state {{ "}}" }}` state"
+        description: >
+          This may indicate issues with the datasource
+          connectivity or configuration. It is recommended to investigate the
+          datasource status and logs for more details.
+
+    - alert: CortexManilaKnowledgeUnready
+      expr: cortex_knowledge_state{domain="manila",state!="ready"} != 0
+      for: 60m
+      labels:
+        context: knowledge
+        dashboard: cortex-status-dashboard/cortex-status-dashboard
+        service: cortex
+        severity: warning
+        support_group: workload-management
+        playbook: docs/support/playbook/cortex/alerts/unready
+      annotations:
+        summary: "Knowledge `{{ "{{" }} $labels.knowledge {{ "}}" }}` is in `{{ "{{" }} $labels.state {{ "}}" }}` state"
+        description: >
+          This may indicate issues with the knowledge
+          configuration. It is recommended to investigate the
+          knowledge status and logs for more details.
+
+    - alert: CortexManilaKPIUnready
+      expr: |
+        cortex_kpi_state{domain="manila",state!="ready"} != 0
+      for: 60m
+      labels:
+        context: kpis
+        dashboard: cortex-status-dashboard/cortex-status-dashboard
+        service: cortex
+        severity: warning
+        support_group: workload-management
+        playbook: docs/support/playbook/cortex/alerts/unready
+      annotations:
+        summary: "KPI `{{ "{{" }} $labels.kpi {{ "}}" }}` is in `{{ "{{" }} $labels.state {{ "}}" }}` state"
+        description: >
+          This may indicate issues with the KPI
+          configuration. It is recommended to investigate the
+          KPI status and logs for more details.
+
+    - alert: CortexManilaPipelineUnready
+      expr: cortex_pipeline_state{domain="manila",state!="ready"} != 0
+      for: 5m
+      labels:
+        context: kpis
+        dashboard: cortex-status-dashboard/cortex-status-dashboard
+        service: cortex
+        severity: warning
+        support_group: workload-management
+        playbook: docs/support/playbook/cortex/alerts/unready
+      annotations:
+        summary: "Pipeline `{{ "{{" }} $labels.pipeline {{ "}}" }}` is in `{{ "{{" }} $labels.state {{ "}}" }}` state"
+        description: >
+          This may indicate issues with the pipeline
+          configuration. It is recommended to investigate the
+          pipeline status and logs for more details.
 {{- end }}
diff --git a/helm/bundles/cortex-nova/alerts/nova.alerts.yaml b/helm/bundles/cortex-nova/alerts/nova.alerts.yaml
deleted file mode 100644
index 46e93ef05..000000000
--- a/helm/bundles/cortex-nova/alerts/nova.alerts.yaml
+++ /dev/null
@@ -1,609 +0,0 @@
-groups:
-- name: cortex-nova-alerts
-  rules:
-  - alert: CortexNovaSchedulingDown
-    expr: |
-      up{pod=~"cortex-nova-scheduling-.*"} != 1 or
-      absent(up{pod=~"cortex-nova-scheduling-.*"})
-    for: 5m
-    labels:
-      context: liveness
-      dashboard: cortex-status-dashboard/cortex-status-dashboard
-      service: cortex
-      severity: critical
-      support_group: workload-management
-      playbook: docs/support/playbook/cortex/alerts/down
-    annotations:
-      summary: "Cortex Scheduling for Nova is down"
-      description: >
-        The Cortex scheduling service is down. Scheduling requests from Nova will
-        not be served. This is non-critical for vmware virtual machines, but
-        blocks kvm virtual machines from being scheduled. Thus, it is
-        recommended to immediately investigate and resolve the issue.
-
-  - alert: CortexNovaKnowledgeDown
-    expr: |
-      up{pod=~"cortex-nova-knowledge-.*"} != 1 or
-      absent(up{pod=~"cortex-nova-knowledge-.*"})
-    for: 5m
-    labels:
-      context: liveness
-      dashboard: cortex-status-dashboard/cortex-status-dashboard
-      service: cortex
-      severity: warning
-      support_group: workload-management
-      playbook: docs/support/playbook/cortex/alerts/down
-    annotations:
-      summary: "Cortex Knowledge for Nova is down"
-      description: >
-        The Cortex Knowledge service is down. This is no immediate problem,
-        since cortex is still able to process requests,
-        but the quality of the responses may be affected.
-
-  - alert: CortexNovaDeschedulerPipelineErroring
-    expr: delta(cortex_detector_pipeline_run_duration_seconds_count{component="nova-scheduling", error="true"}[2m]) > 0
-    for: 5m
-    labels:
-      context: descheduler
-      dashboard: cortex-status-dashboard/cortex-status-dashboard
-      service: cortex
-      severity: warning
-      support_group: workload-management
-    annotations:
-      summary: "Descheduler pipeline is erroring."
-      description: >
-        The Cortex descheduler pipeline is encountering errors during its execution.
-        This may indicate issues with the descheduling logic or the underlying infrastructure.
-        It is recommended to investigate the descheduler logs and the state of the VMs being processed.
-
-  - alert: CortexNovaHttpRequest400sTooHigh
-    expr: rate(cortex_scheduler_api_request_duration_seconds_count{service="cortex-nova-metrics", status=~"4.+"}[5m]) > 0.1
-    for: 5m
-    labels:
-      context: api
-      dashboard: cortex-status-dashboard/cortex-status-dashboard
-      service: cortex
-      severity: warning
-      support_group: workload-management
-      playbook: docs/support/playbook/cortex/alerts/api-errors
-    annotations:
-      summary: "Nova Scheduler HTTP request 400 errors too high"
-      description: >
-        Nova Scheduler is responding to placement requests with HTTP 4xx
-        errors. This is expected when the scheduling request cannot be served
-        by Cortex. However, it could also indicate that the request format has
-        changed and Cortex is unable to parse it.
-
-  - alert: CortexNovaSchedulingHttpRequest500sTooHigh
-    expr: rate(cortex_scheduler_api_request_duration_seconds_count{service="cortex-nova-metrics", status=~"5.+" }[5m]) > 0.1
-    for: 5m
-    labels:
-      context: api
-      dashboard: cortex-status-dashboard/cortex-status-dashboard
-      service: cortex
-      severity: warning
-      support_group: workload-management
-      playbook: docs/support/playbook/cortex/alerts/api-errors
-    annotations:
-      summary: "Nova Scheduler HTTP request 500 errors too high"
-      description: >
-        Nova Scheduler is responding to placement requests with HTTP 5xx errors.
-        This is not expected and indicates that Cortex is having some internal problem.
-        Nova will continue to place new VMs, but the placement will be less desirable.
-        Thus, no immediate action is needed.
-
-  - alert: CortexNovaHighMemoryUsage
-    expr: process_resident_memory_bytes{service="cortex-nova-metrics"} > 6000 * 1024 * 1024
-    for: 5m
-    labels:
-      context: memory
-      dashboard: cortex-status-dashboard/cortex-status-dashboard
-      service: cortex
-      severity: warning
-      support_group: workload-management
-      playbook: docs/support/playbook/cortex/alerts/deployment
-    annotations:
-      summary: "`{{$labels.component}}` uses too much memory"
-      description: >
-        `{{$labels.component}}` should not be using more than 6000 MiB of memory. Usually it
-        should use much less, so there may be a memory leak or other changes
-        that are causing the memory usage to increase significantly.
-
-  - alert: CortexNovaHighCPUUsage
-    expr: rate(process_cpu_seconds_total{service="cortex-nova-metrics"}[1m]) > 0.5
-    for: 5m
-    labels:
-      context: cpu
-      dashboard: cortex-status-dashboard/cortex-status-dashboard
-      service: cortex
-      severity: warning
-      support_group: workload-management
-      playbook: docs/support/playbook/cortex/alerts/deployment
-    annotations:
-      summary: "`{{$labels.component}}` uses too much CPU"
-      description: >
-        `{{$labels.component}}` should not be using more than 50% of a single CPU core. Usually
-        it should use much less, so there may be a CPU leak or other changes
-        that are causing the CPU usage to increase significantly.
-
-  - alert: CortexNovaTooManyDBConnectionAttempts
-    expr: rate(cortex_db_connection_attempts_total{service="cortex-nova-metrics"}[5m]) > 0.1
-    for: 5m
-    labels:
-      context: db
-      dashboard: cortex-status-dashboard/cortex-status-dashboard
-      service: cortex
-      severity: warning
-      support_group: workload-management
-      playbook: docs/support/playbook/cortex/alerts/database
-    annotations:
-      summary: "`{{$labels.component}}` is trying to connect to the database too often"
-      description: >
-        `{{$labels.component}}` is trying to connect to the database too often. This may happen
-        when the database is down or the connection parameters are misconfigured.
-
-  - alert: CortexNovaSyncNotSuccessful
-    expr: cortex_sync_request_processed_total{service="cortex-nova-metrics"} - cortex_sync_request_duration_seconds_count{service="cortex-nova-metrics"} > 0
-    for: 5m
-    labels:
-      context: syncstatus
-      dashboard: cortex-status-dashboard/cortex-status-dashboard
-      service: cortex
-      severity: warning
-      support_group: workload-management
-      playbook: docs/support/playbook/cortex/alerts/datasources
-    annotations:
-      summary: "`{{$labels.component}}` Sync not successful"
-      description: >
-        `{{$labels.component}}` experienced an issue syncing data from the datasource `{{$labels.datasource}}`. This may
-        happen when the datasource (OpenStack, Prometheus, etc.) is down or
-        the sync module is misconfigured. No immediate action is needed, since
-        the sync module will retry the sync operation and the currently synced
-        data will be kept. However, when this problem persists for a longer
-        time the service will have a less recent view of the datacenter.
-
-  - alert: CortexNovaSyncObjectsDroppedToZero
-    expr: cortex_sync_objects{service="cortex-nova-metrics", datasource!="openstack_migrations"} == 0
-    for: 60m
-    labels:
-      context: syncobjects
-      dashboard: cortex-status-dashboard/cortex-status-dashboard
-      service: cortex
-      severity: warning
-      support_group: workload-management
-      playbook: docs/support/playbook/cortex/alerts/datasources
-    annotations:
-      summary: "`{{$labels.component}}` is not syncing any new data from `{{$labels.datasource}}`"
-      description: >
-        `{{$labels.component}}` is not syncing any objects from the datasource `{{$labels.datasource}}`. This may happen
-        when the datasource (OpenStack, Prometheus, etc.) is down or the sync
-        module is misconfigured. No immediate action is needed, since the sync
-        module will retry the sync operation and the currently synced data will
-        be kept. However, when this problem persists for a longer time the
-        service will have a less recent view of the datacenter.
-
-  - alert: CortexNovaDatasourceUnready
-    expr: cortex_datasource_state{domain="nova",state!="ready"} != 0
-    for: 60m
-    labels:
-      context: datasources
-      dashboard: cortex-status-dashboard/cortex-status-dashboard
-      service: cortex
-      severity: warning
-      support_group: workload-management
-      playbook: docs/support/playbook/cortex/alerts/unready
-    annotations:
-      summary: "Datasource `{{$labels.datasource}}` is in `{{$labels.state}}` state"
-      description: >
-        This may indicate issues with the datasource
-        connectivity or configuration. It is recommended to investigate the
-        datasource status and logs for more details.
-
-  - alert: CortexNovaKnowledgeUnready
-    expr: cortex_knowledge_state{domain="nova",state!="ready"} != 0
-    for: 60m
-    labels:
-      context: knowledge
-      dashboard: cortex-status-dashboard/cortex-status-dashboard
-      service: cortex
-      severity: warning
-      support_group: workload-management
-      playbook: docs/support/playbook/cortex/alerts/unready
-    annotations:
-      summary: "Knowledge `{{$labels.knowledge}}` is in `{{$labels.state}}` state"
-      description: >
-        This may indicate issues with the knowledge
-        configuration. It is recommended to investigate the
-        knowledge status and logs for more details.
-
-  - alert: CortexNovaDecisionsWithErrors
-    expr: cortex_decision_state{domain="nova",state="error"} > 0
-    for: 5m
-    labels:
-      context: decisions
-      dashboard: cortex-status-dashboard/cortex-status-dashboard
-      service: cortex
-      severity: warning
-      support_group: workload-management
-    annotations:
-      summary: "Some decisions are in error state for operator `{{$labels.operator}}`"
-      description: >
-        The cortex scheduling pipeline generated decisions that are in error state.
-        This may indicate issues with the decision logic or the underlying infrastructure.
-        It is recommended to investigate the decision logs and the state of the
-        VMs being processed.
-
-  - alert: CortexNovaTooManyDecisionsWaiting
-    expr: cortex_decision_state{domain="nova",state="waiting"} > 10
-    for: 5m
-    labels:
-      context: decisions
-      dashboard: cortex-status-dashboard/cortex-status-dashboard
-      service: cortex
-      severity: warning
-      support_group: workload-management
-    annotations:
-      summary: "Too many decisions are in waiting state for operator `{{$labels.operator}}`"
-      description: >
-        The cortex scheduling pipeline has a high number of decisions for which
-        no target host has been assigned yet.
-
-        This may indicate a backlog in processing or issues with the decision logic.
-        It is recommended to investigate the decision logs and the state of the
-        VMs being processed.
-
-  - alert: CortexNovaKPIUnready
-    expr: |
-      cortex_kpi_state{domain="nova",state!="ready"} != 0
-    for: 60m
-    labels:
-      context: kpis
-      dashboard: cortex-status-dashboard/cortex-status-dashboard
-      service: cortex
-      severity: warning
-      support_group: workload-management
-      playbook: docs/support/playbook/cortex/alerts/unready
-    annotations:
-      summary: "KPI `{{$labels.kpi}}` is in `{{$labels.state}}` state"
-      description: >
-        This may indicate issues with the KPI
-        configuration. It is recommended to investigate the
-        KPI status and logs for more details.
-
-  - alert: CortexNovaPipelineUnready
-    expr: cortex_pipeline_state{domain="nova",state!="ready"} != 0
-    for: 5m
-    labels:
-      context: pipelines
-      dashboard: cortex-status-dashboard/cortex-status-dashboard
-      service: cortex
-      severity: warning
-      support_group: workload-management
-      playbook: docs/support/playbook/cortex/alerts/unready
-    annotations:
-      summary: "Pipeline `{{$labels.pipeline}}` is in `{{$labels.state}}` state"
-      description: >
-        This may indicate issues with the pipeline
-        configuration. It is recommended to investigate the
-        pipeline status and logs for more details.
-
-  - alert: CortexNovaDoesntFindValidKVMHosts
-    expr: sum by (az, hvtype) (increase(cortex_vm_faults{hvtype=~"CH|QEMU",faultmsg=~".*No valid host was found.*",faultmsg!~".*No such host.*"}[5m])) > 0
-    for: 5m
-    labels:
-      context: scheduling
-      dashboard: cortex-status-dashboard/cortex-status-dashboard
-      service: cortex
-      severity: warning
-      support_group: workload-management
-      playbook: docs/support/playbook/cortex/alerts/scheduling
-    annotations:
-      summary: "Nova scheduling cannot find valid KVM hosts"
-      description: >
-        Cortex is seeing new faulty vms in `{{$labels.az}}` where Nova scheduling
-        failed to find a valid `{{$labels.hvtype}}` host. This may indicate
-        capacity issues, misconfigured filters, or resource constraints in the
-        datacenter. Investigate the affected VMs and hypervisor availability.
-
-  - alert: CortexNovaNewDatasourcesNotReconciling
-    expr: count by(datasource) (cortex_datasource_seconds_until_reconcile{queued="false",domain="nova"}) > 0
-    for: 60m
-    labels:
-      context: datasources
-      dashboard: cortex-status-dashboard/cortex-status-dashboard
-      service: cortex
-      severity: warning
-      support_group: workload-management
-      playbook: docs/support/playbook/cortex/alerts/datasources
-    annotations:
-      summary: "New datasource `{{$labels.datasource}}` has not reconciled"
-      description: >
-        A new datasource `{{$labels.datasource}}` has been added but has not
-        completed its first reconciliation yet. This may indicate issues with
-        the datasource controller's workqueue overprioritizing other datasources.
-
-  - alert: CortexNovaExistingDatasourcesLackingBehind
-    expr: |
-      sum by(datasource) (cortex_datasource_seconds_until_reconcile{queued="true",domain="nova"}) < -600
-      and on(datasource) cortex_datasource_state{state="ready",domain="nova"} == 1
-    for: 10m
-    labels:
-      context: datasources
-      dashboard: cortex-status-dashboard/cortex-status-dashboard
-      service: cortex
-      severity: warning
-      support_group: workload-management
-      playbook: docs/support/playbook/cortex/alerts/datasources
-    annotations:
-      summary: "Existing datasource `{{$labels.datasource}}` is lacking behind"
-      description: >
-        An existing datasource `{{$labels.datasource}}` has been queued for
-        reconciliation for more than 10 minutes. This may indicate issues with
-        the datasource controller's workqueue or that this or another datasource
-        is taking an unusually long time to reconcile.
-
-  - alert: CortexNovaReconcileErrorsHigh
-    expr: |
-      (sum by (controller) (rate(controller_runtime_reconcile_errors_total{service="cortex-nova-metrics"}[5m])))
-      / (sum by (controller) (rate(controller_runtime_reconcile_total{service="cortex-nova-metrics"}[5m]))) > 0.1
-    for: 15m
-    labels:
-      context: controller-errors
-      dashboard: cortex-status-dashboard/cortex-status-dashboard
-      service: cortex
-      severity: warning
-      support_group: workload-management
-      playbook: docs/support/playbook/cortex/alerts/reconciles
-    annotations:
-      summary: "Controller reconcile error rate >10%"
-      description: >
-        More than 10% of controller reconciles are resulting in errors. This may
-        indicate issues with the controller logic, connectivity problems, or
-        external factors causing failures. Check the controller logs for error
-        details and investigate the affected resources.
-
-  - alert: CortexNovaReconcileDurationHigher10Min
-    expr: |
-      (sum by (controller) (rate(controller_runtime_reconcile_time_seconds_sum{service="cortex-nova-metrics"}[5m])))
-      / (sum by (controller) (rate(controller_runtime_reconcile_time_seconds_count{service="cortex-nova-metrics"}[5m]))) > 600
-    for: 15m
-    labels:
-      context: controller-duration
-      dashboard: cortex-status-dashboard/cortex-status-dashboard
-      service: cortex
-      severity: warning
-      support_group: workload-management
-      playbook: docs/support/playbook/cortex/alerts/reconciles
-    annotations:
-      summary: "Controller reconciliation takes longer than ({{ $value | humanizeDuration }})"
-      description: "Reconcile duration higher than 10m while reconciling {{ $labels.controller }}"
-
-  - alert: CortexNovaWorkqueueNotDrained
-    expr: |
-      sum by (name) (workqueue_depth{service="cortex-nova-metrics"}) > 0
-    for: 60m
-    labels:
-      context: controller-workqueue
-      dashboard: cortex-status-dashboard/cortex-status-dashboard
-      service: cortex
-      severity: warning
-      support_group: workload-management
-      playbook: docs/support/playbook/cortex/alerts/datasources
-    annotations:
-      summary: "Controller {{ $labels.name }}'s backlog is not being drained."
-      description: >
-        The workqueue for controller {{ $labels.name }} has a backlog that is
-        not being drained. This may indicate that the controller is overwhelmed
-        with work or is stuck on certain resources. Check the controller logs
-        and the state of the resources it manages for more details.
-
-  - alert: CortexNovaWebhookLatencyHigh
-    expr: |
-      histogram_quantile(0.9, avg(rate(controller_runtime_webhook_latency_seconds_bucket{service="cortex-nova-metrics"}[5m])) by (webhook, le)) > 0.2
-    for: 15m
-    labels:
-      context: controller-webhook
-      dashboard: cortex-status-dashboard/cortex-status-dashboard
-      service: cortex
-      severity: warning
-      support_group: workload-management
-    annotations:
-      summary: "Controller webhook {{ $labels.webhook }} latency is high"
-      description: >
-        The latency for webhook {{ $labels.webhook }} is higher than expected (p90 > 200ms).
-        This may indicate performance issues with the webhook server or the logic it executes.
-        Check the webhook server logs and monitor its resource usage for more insights.
-
-  - alert: CortexNovaWebhookErrorsHigh
-    expr: |
-      (sum by (webhook) (rate(controller_runtime_webhook_requests_total{code!="200", service="cortex-nova-metrics"}[5m])))
-      / (sum by (webhook) (rate(controller_runtime_webhook_requests_total{service="cortex-nova-metrics"}[5m]))) > 0.1
-    for: 15m
-    labels:
-      context: controller-webhook
-      dashboard: cortex-status-dashboard/cortex-status-dashboard
-      service: cortex
-      severity: warning
-      support_group: workload-management
-    annotations:
-      summary: "Controller webhook {{ $labels.webhook }} is experiencing errors"
-      description: >
-        The webhook {{ $labels.webhook }} has experienced errors in the last 5 minutes.
-        This may indicate issues with the webhook logic, connectivity problems, or
-        external factors causing failures. Check the webhook server logs for error
-        details and investigate the affected resources.
-
-  # Committed Resource Info API
-  - alert: CortexNovaCommittedResourceInfoUnavailable
-    expr: |
-      rate(cortex_committed_resource_info_api_requests_total{service="cortex-nova-metrics", status_code="503"}[5m]) > 0
-    for: 5m
-    labels:
-      context: committed-resource-api
-      dashboard: cortex-status-dashboard/cortex-status-dashboard
-      service: cortex
-      severity: warning
-      support_group: workload-management
-      playbook: docs/support/playbook/cortex/alerts/committed-resource-api-errors
-    annotations:
-      summary: "Committed Resource info API is unavailable"
-      description: >
-        The committed resource info API (Limes LIQUID integration) has been returning
-        503 Service Unavailable for more than 5 minutes. This typically means the
-        flavor group knowledge CRD is not ready or missing. Limes cannot discover
-        available committed resources until the issue is resolved.
-
-  # Committed Resource Change API
-  - alert: CortexNovaCommittedResourceChangeErrors
-    expr: |
-      rate(cortex_committed_resource_change_api_requests_total{service="cortex-nova-metrics", status_code=~"5.."}[5m]) > 0
-    for: 5m
-    labels:
-      context: committed-resource-api
-      dashboard: cortex-status-dashboard/cortex-status-dashboard
-      service: cortex
-      severity: warning
-      support_group: workload-management
-      playbook: docs/support/playbook/cortex/alerts/committed-resource-api-errors
-    annotations:
-      summary: "Committed Resource change API HTTP 5xx errors"
-      description: >
-        The committed resource change API (Limes LIQUID integration) is returning
-        HTTP 5xx errors. This is not expected and indicates an internal problem
-        processing commitment changes. Limes will retry, but new commitments may
-        not be fulfilled until the issue is resolved.
-
-  - alert: CortexNovaCommittedResourceRejectionRateTooHigh
-    expr: |
-      (
-        sum(rate(cortex_committed_resource_change_api_commitment_changes_total{service="cortex-nova-metrics", result="rejected", dry_run="false"}[15m]))
-        / sum(rate(cortex_committed_resource_change_api_commitment_changes_total{service="cortex-nova-metrics", dry_run="false"}[15m]))
-      ) > 0.3
-      and on() sum(rate(cortex_committed_resource_change_api_commitment_changes_total{service="cortex-nova-metrics", dry_run="false"}[15m])) > 0
-    for: 15m
-    labels:
-      context: committed-resource-api
-      dashboard: cortex-status-dashboard/cortex-status-dashboard
-      service: cortex
-      severity: warning
-      support_group: workload-management
-      playbook: docs/support/playbook/cortex/alerts/committed-resource-performance
-    annotations:
-      summary: "Committed Resource rejection rate too high ({{ $value | humanizePercentage }})"
-      description: >
-        More than 30% of commitment changes have been rejected over the last 15 minutes.
-        This may indicate insufficient capacity to fulfill new commitments. Rejected
-        commitments are rolled back.
-
-  - alert: CortexNovaCommittedResourceTimeoutsTooHigh
-    expr: increase(cortex_committed_resource_change_api_timeouts_total{service="cortex-nova-metrics", dry_run="false"}[10m]) > 0
-    for: 1m
-    labels:
-      context: committed-resource-api
-      dashboard: cortex-status-dashboard/cortex-status-dashboard
-      service: cortex
-      severity: warning
-      support_group: workload-management
-      playbook: docs/support/playbook/cortex/alerts/committed-resource-performance
-    annotations:
-      summary: "Committed Resource change API timeout detected"
-      description: >
-        A commitment change request timed out after the configured deadline.
-        Timeouts indicate the scheduling pipeline could not place reservations in time.
-        Affected changes are rolled back. Investigate scheduler performance or reservation backlog.
-
-  - alert: CortexNovaCommittedResourceChangeLatencyTooHigh
-    expr: |
-      histogram_quantile(0.95, sum(rate(cortex_committed_resource_change_api_request_duration_seconds_bucket{service="cortex-nova-metrics", dry_run="false"}[5m])) by (le)) >= 10
-      and on() sum(rate(cortex_committed_resource_change_api_requests_total{service="cortex-nova-metrics", dry_run="false"}[5m])) > 0
-    for: 5m
-    labels:
-      context: committed-resource-api
-      dashboard: cortex-status-dashboard/cortex-status-dashboard
-      service: cortex
-      severity: warning
-      support_group: workload-management
-      playbook: docs/support/playbook/cortex/alerts/committed-resource-performance
-    annotations:
-      summary: "Committed Resource change API p95 latency >= 10s"
-      description: >
-        The committed resource change API p95 latency has reached or exceeded 10 seconds,
-        approaching the configured watch timeout. Requests close to the timeout are at risk
-        of being rolled back. Investigate scheduler performance or reservation backlog.
-
-  # Committed Resource Capacity API
-  - alert: CortexNovaCommittedResourceCapacityErrors
-    expr: |
-      rate(cortex_committed_resource_capacity_api_requests_total{service="cortex-nova-metrics", status_code=~"5.."}[5m]) > 0
-    for: 5m
-    labels:
-      context: committed-resource-api
-      dashboard: cortex-status-dashboard/cortex-status-dashboard
-      service: cortex
-      severity: warning
-      support_group: workload-management
-      playbook: docs/support/playbook/cortex/alerts/committed-resource-capacity
-    annotations:
-      summary: "Committed Resource capacity API HTTP 5xx errors"
-      description: >
-        The committed resource capacity API (Limes LIQUID integration) is returning
-        HTTP 5xx errors. This indicates internal problems calculating cluster capacity.
-        Limes may receive stale or incomplete capacity data.
-
-  - alert: CortexNovaCommittedResourceCapacityDroppedToZero
-    expr: |
-      (cortex_committed_resource_reported_capacity_gib{service="cortex-nova-metrics"} == 0)
-      and on(resource, az) (cortex_committed_resource_reported_capacity_gib{service="cortex-nova-metrics"} offset 30m > 0)
-    for: 5m
-    labels:
-      context: committed-resource-api
-      dashboard: cortex-status-dashboard/cortex-status-dashboard
-      service: cortex
-      severity: warning
-      support_group: workload-management
-      playbook: docs/support/playbook/cortex/alerts/committed-resource-capacity
-    annotations:
-      summary: "Committed Resource capacity for {{ $labels.resource }} in {{ $labels.az }} dropped to zero"
-      description: >
-        The reported capacity for committed resource {{ $labels.resource }} in
-        availability zone {{ $labels.az }} has dropped from a positive value to zero.
-        This may mean hypervisors in that AZ are fully utilized for the corresponding
-        flavor group and no further committed resources can be placed there.
-
-  # Committed Resource Usage API
-  - alert: CortexNovaCommittedResourceUsageErrors
-    expr: |
-      rate(cortex_committed_resource_usage_api_requests_total{service="cortex-nova-metrics", status_code=~"5.."}[5m]) > 0
-    for: 5m
-    labels:
-      context: committed-resource-api
-      dashboard: cortex-status-dashboard/cortex-status-dashboard
-      service: cortex
-      severity: warning
-      support_group: workload-management
-      playbook: docs/support/playbook/cortex/alerts/committed-resource-api-errors
-    annotations:
-      summary: "Committed Resource usage API HTTP 5xx errors"
-      description: >
-        The committed resource usage API (Limes LIQUID integration) is returning
-        HTTP 5xx errors. This indicates internal problems fetching reservation or
-        Nova server data. Limes may receive stale or incomplete usage data.
-
-  # Committed Resource Quota API
-  - alert: CortexNovaCommittedResourceQuotaErrors
-    expr: |
-      rate(cortex_committed_resource_quota_api_requests_total{service="cortex-nova-metrics", status_code=~"5.."}[5m]) > 0
-    for: 5m
-    labels:
-      context: committed-resource-api
-      dashboard: cortex-status-dashboard/cortex-status-dashboard
-      service: cortex
-      severity: warning
-      support_group: workload-management
-      playbook: docs/support/playbook/cortex/alerts/committed-resource-api-errors
-    annotations:
-      summary: "Committed Resource quota API HTTP 5xx errors"
-      description: >
-        The committed resource quota API (Limes LIQUID integration) is returning
-        HTTP 5xx errors. This indicates internal problems computing or applying
-        quota. Limes may not be able to enforce committed resource quotas.
diff --git a/helm/bundles/cortex-nova/templates/alerts.yaml b/helm/bundles/cortex-nova/templates/alerts.yaml
index d2964e864..6f3fabef2 100644
--- a/helm/bundles/cortex-nova/templates/alerts.yaml
+++ b/helm/bundles/cortex-nova/templates/alerts.yaml
@@ -1,6 +1,10 @@
 # Copyright SAP SE
 # SPDX-License-Identifier: Apache-2.0
 
+# NOTE: This file is rendered by Helm. Prometheus templating directives
+# (e.g. {{ "{{" }} $labels.foo {{ "}}" }}) must be escaped using Style B:
+# replace the outer `{{` and `}}` with `{{ "{{" }}` and `{{ "}}" }}`.
+
 {{- if .Values.alerts.enabled }}
 apiVersion: monitoring.coreos.com/v1
 kind: PrometheusRule
@@ -10,8 +14,615 @@ metadata:
     type: alerting-rules
     prometheus: {{ required ".Values.alerts.prometheus missing" .Values.alerts.prometheus | quote }}
 spec:
-  {{- $files := .Files.Glob "alerts/*.alerts.yaml" }}
-  {{- range $path, $file := $files }}
-  {{ $file | toString | nindent 2 }}
-  {{- end }}
+  groups:
+  - name: cortex-nova-alerts
+    rules:
+    - alert: CortexNovaSchedulingDown
+      expr: |
+        up{pod=~"cortex-nova-scheduling-.*"} != 1 or
+        absent(up{pod=~"cortex-nova-scheduling-.*"})
+      for: 5m
+      labels:
+        context: liveness
+        dashboard: cortex-status-dashboard/cortex-status-dashboard
+        service: cortex
+        severity: {{ if .Values.kvm.enabled }}critical{{ else }}warning{{ end }}
+        support_group: workload-management
+        playbook: docs/support/playbook/cortex/alerts/down
+      annotations:
+        summary: "Cortex Scheduling for Nova is down"
+        description: >
+          The Cortex scheduling service is down. Scheduling requests from Nova will
+          not be served. This is non-critical for vmware virtual machines, but
+          blocks kvm virtual machines from being scheduled. Thus, it is
+          recommended to immediately investigate and resolve the issue.
+
+    - alert: CortexNovaKnowledgeDown
+      expr: |
+        up{pod=~"cortex-nova-knowledge-.*"} != 1 or
+        absent(up{pod=~"cortex-nova-knowledge-.*"})
+      for: 5m
+      labels:
+        context: liveness
+        dashboard: cortex-status-dashboard/cortex-status-dashboard
+        service: cortex
+        severity: warning
+        support_group: workload-management
+        playbook: docs/support/playbook/cortex/alerts/down
+      annotations:
+        summary: "Cortex Knowledge for Nova is down"
+        description: >
+          The Cortex Knowledge service is down. This is no immediate problem,
+          since cortex is still able to process requests,
+          but the quality of the responses may be affected.
+
+    - alert: CortexNovaDeschedulerPipelineErroring
+      expr: delta(cortex_detector_pipeline_run_duration_seconds_count{component="nova-scheduling", error="true"}[2m]) > 0
+      for: 5m
+      labels:
+        context: descheduler
+        dashboard: cortex-status-dashboard/cortex-status-dashboard
+        service: cortex
+        severity: warning
+        support_group: workload-management
+      annotations:
+        summary: "Descheduler pipeline is erroring."
+        description: >
+          The Cortex descheduler pipeline is encountering errors during its execution.
+          This may indicate issues with the descheduling logic or the underlying infrastructure.
+          It is recommended to investigate the descheduler logs and the state of the VMs being processed.
+
+    - alert: CortexNovaHttpRequest400sTooHigh
+      expr: rate(cortex_scheduler_api_request_duration_seconds_count{service="cortex-nova-metrics", status=~"4.+"}[5m]) > 0.1
+      for: 5m
+      labels:
+        context: api
+        dashboard: cortex-status-dashboard/cortex-status-dashboard
+        service: cortex
+        severity: warning
+        support_group: workload-management
+        playbook: docs/support/playbook/cortex/alerts/api-errors
+      annotations:
+        summary: "Nova Scheduler HTTP request 400 errors too high"
+        description: >
+          Nova Scheduler is responding to placement requests with HTTP 4xx
+          errors. This is expected when the scheduling request cannot be served
+          by Cortex. However, it could also indicate that the request format has
+          changed and Cortex is unable to parse it.
+
+    - alert: CortexNovaSchedulingHttpRequest500sTooHigh
+      expr: rate(cortex_scheduler_api_request_duration_seconds_count{service="cortex-nova-metrics", status=~"5.+" }[5m]) > 0.1
+      for: 5m
+      labels:
+        context: api
+        dashboard: cortex-status-dashboard/cortex-status-dashboard
+        service: cortex
+        severity: warning
+        support_group: workload-management
+        playbook: docs/support/playbook/cortex/alerts/api-errors
+      annotations:
+        summary: "Nova Scheduler HTTP request 500 errors too high"
+        description: >
+          Nova Scheduler is responding to placement requests with HTTP 5xx errors.
+          This is not expected and indicates that Cortex is having some internal problem.
+          Nova will continue to place new VMs, but the placement will be less desirable.
+          Thus, no immediate action is needed.
+
+    - alert: CortexNovaHighMemoryUsage
+      expr: process_resident_memory_bytes{service="cortex-nova-metrics"} > {{ .Values.alerts.thresholds.highMemoryMiB }} * 1024 * 1024
+      for: 5m
+      labels:
+        context: memory
+        dashboard: cortex-status-dashboard/cortex-status-dashboard
+        service: cortex
+        severity: warning
+        support_group: workload-management
+        playbook: docs/support/playbook/cortex/alerts/deployment
+      annotations:
+        summary: "`{{ "{{" }} $labels.component {{ "}}" }}` uses too much memory"
+        description: >
+          `{{ "{{" }} $labels.component {{ "}}" }}` should not be using more than {{ .Values.alerts.thresholds.highMemoryMiB }} MiB of memory. Usually it
+          should use much less, so there may be a memory leak or other changes
+          that are causing the memory usage to increase significantly.
+
+    - alert: CortexNovaHighCPUUsage
+      expr: rate(process_cpu_seconds_total{service="cortex-nova-metrics"}[1m]) > 0.5
+      for: 5m
+      labels:
+        context: cpu
+        dashboard: cortex-status-dashboard/cortex-status-dashboard
+        service: cortex
+        severity: warning
+        support_group: workload-management
+        playbook: docs/support/playbook/cortex/alerts/deployment
+      annotations:
+        summary: "`{{ "{{" }} $labels.component {{ "}}" }}` uses too much CPU"
+        description: >
+          `{{ "{{" }} $labels.component {{ "}}" }}` should not be using more than 50% of a single CPU core. Usually
+          it should use much less, so there may be a CPU leak or other changes
+          that are causing the CPU usage to increase significantly.
+
+    - alert: CortexNovaTooManyDBConnectionAttempts
+      expr: rate(cortex_db_connection_attempts_total{service="cortex-nova-metrics"}[5m]) > 0.1
+      for: 5m
+      labels:
+        context: db
+        dashboard: cortex-status-dashboard/cortex-status-dashboard
+        service: cortex
+        severity: warning
+        support_group: workload-management
+        playbook: docs/support/playbook/cortex/alerts/database
+      annotations:
+        summary: "`{{ "{{" }} $labels.component {{ "}}" }}` is trying to connect to the database too often"
+        description: >
+          `{{ "{{" }} $labels.component {{ "}}" }}` is trying to connect to the database too often. This may happen
+          when the database is down or the connection parameters are misconfigured.
+
+    - alert: CortexNovaSyncNotSuccessful
+      expr: cortex_sync_request_processed_total{service="cortex-nova-metrics"} - cortex_sync_request_duration_seconds_count{service="cortex-nova-metrics"} > 0
+      for: 5m
+      labels:
+        context: syncstatus
+        dashboard: cortex-status-dashboard/cortex-status-dashboard
+        service: cortex
+        severity: warning
+        support_group: workload-management
+        playbook: docs/support/playbook/cortex/alerts/datasources
+      annotations:
+        summary: "`{{ "{{" }} $labels.component {{ "}}" }}` Sync not successful"
+        description: >
+          `{{ "{{" }} $labels.component {{ "}}" }}` experienced an issue syncing data from the datasource `{{ "{{" }} $labels.datasource {{ "}}" }}`. This may
+          happen when the datasource (OpenStack, Prometheus, etc.) is down or
+          the sync module is misconfigured. No immediate action is needed, since
+          the sync module will retry the sync operation and the currently synced
+          data will be kept. However, when this problem persists for a longer
+          time the service will have a less recent view of the datacenter.
+
+    - alert: CortexNovaSyncObjectsDroppedToZero
+      expr: cortex_sync_objects{service="cortex-nova-metrics", datasource!="openstack_migrations"} == 0
+      for: 60m
+      labels:
+        context: syncobjects
+        dashboard: cortex-status-dashboard/cortex-status-dashboard
+        service: cortex
+        severity: warning
+        support_group: workload-management
+        playbook: docs/support/playbook/cortex/alerts/datasources
+      annotations:
+        summary: "`{{ "{{" }} $labels.component {{ "}}" }}` is not syncing any new data from `{{ "{{" }} $labels.datasource {{ "}}" }}`"
+        description: >
+          `{{ "{{" }} $labels.component {{ "}}" }}` is not syncing any objects from the datasource `{{ "{{" }} $labels.datasource {{ "}}" }}`. This may happen
+          when the datasource (OpenStack, Prometheus, etc.) is down or the sync
+          module is misconfigured. No immediate action is needed, since the sync
+          module will retry the sync operation and the currently synced data will
+          be kept. However, when this problem persists for a longer time the
+          service will have a less recent view of the datacenter.
+
+    - alert: CortexNovaDatasourceUnready
+      expr: cortex_datasource_state{domain="nova",state!="ready"} != 0
+      for: 60m
+      labels:
+        context: datasources
+        dashboard: cortex-status-dashboard/cortex-status-dashboard
+        service: cortex
+        severity: warning
+        support_group: workload-management
+        playbook: docs/support/playbook/cortex/alerts/unready
+      annotations:
+        summary: "Datasource `{{ "{{" }} $labels.datasource {{ "}}" }}` is in `{{ "{{" }} $labels.state {{ "}}" }}` state"
+        description: >
+          This may indicate issues with the datasource
+          connectivity or configuration. It is recommended to investigate the
+          datasource status and logs for more details.
+
+    - alert: CortexNovaKnowledgeUnready
+      expr: cortex_knowledge_state{domain="nova",state!="ready"} != 0
+      for: 60m
+      labels:
+        context: knowledge
+        dashboard: cortex-status-dashboard/cortex-status-dashboard
+        service: cortex
+        severity: warning
+        support_group: workload-management
+        playbook: docs/support/playbook/cortex/alerts/unready
+      annotations:
+        summary: "Knowledge `{{ "{{" }} $labels.knowledge {{ "}}" }}` is in `{{ "{{" }} $labels.state {{ "}}" }}` state"
+        description: >
+          This may indicate issues with the knowledge
+          configuration. It is recommended to investigate the
+          knowledge status and logs for more details.
+
+    - alert: CortexNovaDecisionsWithErrors
+      expr: cortex_decision_state{domain="nova",state="error"} > 0
+      for: 5m
+      labels:
+        context: decisions
+        dashboard: cortex-status-dashboard/cortex-status-dashboard
+        service: cortex
+        severity: warning
+        support_group: workload-management
+      annotations:
+        summary: "Some decisions are in error state for operator `{{ "{{" }} $labels.operator {{ "}}" }}`"
+        description: >
+          The cortex scheduling pipeline generated decisions that are in error state.
+          This may indicate issues with the decision logic or the underlying infrastructure.
+          It is recommended to investigate the decision logs and the state of the
+          VMs being processed.
+
+    - alert: CortexNovaTooManyDecisionsWaiting
+      expr: cortex_decision_state{domain="nova",state="waiting"} > 10
+      for: 5m
+      labels:
+        context: decisions
+        dashboard: cortex-status-dashboard/cortex-status-dashboard
+        service: cortex
+        severity: warning
+        support_group: workload-management
+      annotations:
+        summary: "Too many decisions are in waiting state for operator `{{ "{{" }} $labels.operator {{ "}}" }}`"
+        description: >
+          The cortex scheduling pipeline has a high number of decisions for which
+          no target host has been assigned yet.
+
+          This may indicate a backlog in processing or issues with the decision logic.
+          It is recommended to investigate the decision logs and the state of the
+          VMs being processed.
+
+    - alert: CortexNovaKPIUnready
+      expr: |
+        cortex_kpi_state{domain="nova",state!="ready"} != 0
+      for: 60m
+      labels:
+        context: kpis
+        dashboard: cortex-status-dashboard/cortex-status-dashboard
+        service: cortex
+        severity: warning
+        support_group: workload-management
+        playbook: docs/support/playbook/cortex/alerts/unready
+      annotations:
+        summary: "KPI `{{ "{{" }} $labels.kpi {{ "}}" }}` is in `{{ "{{" }} $labels.state {{ "}}" }}` state"
+        description: >
+          This may indicate issues with the KPI
+          configuration. It is recommended to investigate the
+          KPI status and logs for more details.
+
+    - alert: CortexNovaPipelineUnready
+      expr: cortex_pipeline_state{domain="nova",state!="ready"} != 0
+      for: 5m
+      labels:
+        context: pipelines
+        dashboard: cortex-status-dashboard/cortex-status-dashboard
+        service: cortex
+        severity: warning
+        support_group: workload-management
+        playbook: docs/support/playbook/cortex/alerts/unready
+      annotations:
+        summary: "Pipeline `{{ "{{" }} $labels.pipeline {{ "}}" }}` is in `{{ "{{" }} $labels.state {{ "}}" }}` state"
+        description: >
+          This may indicate issues with the pipeline
+          configuration. It is recommended to investigate the
+          pipeline status and logs for more details.
+
+    {{- if .Values.kvm.enabled }}
+    - alert: CortexNovaDoesntFindValidKVMHosts
+      expr: sum by (az, hvtype) (increase(cortex_vm_faults{hvtype=~"CH|QEMU",faultmsg=~".*No valid host was found.*",faultmsg!~".*No such host.*"}[5m])) > 0
+      for: 5m
+      labels:
+        context: scheduling
+        dashboard: cortex-status-dashboard/cortex-status-dashboard
+        service: cortex
+        severity: warning
+        support_group: workload-management
+        playbook: docs/support/playbook/cortex/alerts/scheduling
+      annotations:
+        summary: "Nova scheduling cannot find valid KVM hosts"
+        description: >
+          Cortex is seeing new faulty vms in `{{ "{{" }} $labels.az {{ "}}" }}` where Nova scheduling
+          failed to find a valid `{{ "{{" }} $labels.hvtype {{ "}}" }}` host. This may indicate
+          capacity issues, misconfigured filters, or resource constraints in the
+          datacenter. Investigate the affected VMs and hypervisor availability.
+    {{- end }}
+
+    - alert: CortexNovaNewDatasourcesNotReconciling
+      expr: count by(datasource) (cortex_datasource_seconds_until_reconcile{queued="false",domain="nova"}) > 0
+      for: 60m
+      labels:
+        context: datasources
+        dashboard: cortex-status-dashboard/cortex-status-dashboard
+        service: cortex
+        severity: warning
+        support_group: workload-management
+        playbook: docs/support/playbook/cortex/alerts/datasources
+      annotations:
+        summary: "New datasource `{{ "{{" }} $labels.datasource {{ "}}" }}` has not reconciled"
+        description: >
+          A new datasource `{{ "{{" }} $labels.datasource {{ "}}" }}` has been added but has not
+          completed its first reconciliation yet. This may indicate issues with
+          the datasource controller's workqueue overprioritizing other datasources.
+
+    - alert: CortexNovaExistingDatasourcesLackingBehind
+      expr: |
+        sum by(datasource) (cortex_datasource_seconds_until_reconcile{queued="true",domain="nova"}) < -600
+        and on(datasource) cortex_datasource_state{state="ready",domain="nova"} == 1
+      for: 10m
+      labels:
+        context: datasources
+        dashboard: cortex-status-dashboard/cortex-status-dashboard
+        service: cortex
+        severity: warning
+        support_group: workload-management
+        playbook: docs/support/playbook/cortex/alerts/datasources
+      annotations:
+        summary: "Existing datasource `{{ "{{" }} $labels.datasource {{ "}}" }}` is lacking behind"
+        description: >
+          An existing datasource `{{ "{{" }} $labels.datasource {{ "}}" }}` has been queued for
+          reconciliation for more than 10 minutes. This may indicate issues with
+          the datasource controller's workqueue or that this or another datasource
+          is taking an unusually long time to reconcile.
+
+    - alert: CortexNovaReconcileErrorsHigh
+      expr: |
+        (sum by (controller) (rate(controller_runtime_reconcile_errors_total{service="cortex-nova-metrics"}[5m])))
+        / (sum by (controller) (rate(controller_runtime_reconcile_total{service="cortex-nova-metrics"}[5m]))) > 0.1
+      for: 15m
+      labels:
+        context: controller-errors
+        dashboard: cortex-status-dashboard/cortex-status-dashboard
+        service: cortex
+        severity: warning
+        support_group: workload-management
+        playbook: docs/support/playbook/cortex/alerts/reconciles
+      annotations:
+        summary: "Controller reconcile error rate >10%"
+        description: >
+          More than 10% of controller reconciles are resulting in errors. This may
+          indicate issues with the controller logic, connectivity problems, or
+          external factors causing failures. Check the controller logs for error
+          details and investigate the affected resources.
+
+    - alert: CortexNovaReconcileDurationHigher10Min
+      expr: |
+        (sum by (controller) (rate(controller_runtime_reconcile_time_seconds_sum{service="cortex-nova-metrics"}[5m])))
+        / (sum by (controller) (rate(controller_runtime_reconcile_time_seconds_count{service="cortex-nova-metrics"}[5m]))) > {{ .Values.alerts.thresholds.reconcileDurationSeconds }}
+      for: 15m
+      labels:
+        context: controller-duration
+        dashboard: cortex-status-dashboard/cortex-status-dashboard
+        service: cortex
+        severity: warning
+        support_group: workload-management
+        playbook: docs/support/playbook/cortex/alerts/reconciles
+      annotations:
+        summary: "Controller reconciliation takes longer than ({{ "{{" }} $value | humanizeDuration {{ "}}" }})"
+        description: "Reconcile duration higher than 10m while reconciling {{ "{{" }} $labels.controller {{ "}}" }}"
+
+    - alert: CortexNovaWorkqueueNotDrained
+      expr: |
+        sum by (name) (workqueue_depth{service="cortex-nova-metrics"}) > 0
+      for: 60m
+      labels:
+        context: controller-workqueue
+        dashboard: cortex-status-dashboard/cortex-status-dashboard
+        service: cortex
+        severity: warning
+        support_group: workload-management
+        playbook: docs/support/playbook/cortex/alerts/datasources
+      annotations:
+        summary: "Controller {{ "{{" }} $labels.name {{ "}}" }}'s backlog is not being drained."
+        description: >
+          The workqueue for controller {{ "{{" }} $labels.name {{ "}}" }} has a backlog that is
+          not being drained. This may indicate that the controller is overwhelmed
+          with work or is stuck on certain resources. Check the controller logs
+          and the state of the resources it manages for more details.
+
+    - alert: CortexNovaWebhookLatencyHigh
+      expr: |
+        histogram_quantile(0.9, avg(rate(controller_runtime_webhook_latency_seconds_bucket{service="cortex-nova-metrics"}[5m])) by (webhook, le)) > 0.2
+      for: 15m
+      labels:
+        context: controller-webhook
+        dashboard: cortex-status-dashboard/cortex-status-dashboard
+        service: cortex
+        severity: warning
+        support_group: workload-management
+      annotations:
+        summary: "Controller webhook {{ "{{" }} $labels.webhook {{ "}}" }} latency is high"
+        description: >
+          The latency for webhook {{ "{{" }} $labels.webhook {{ "}}" }} is higher than expected (p90 > 200ms).
+          This may indicate performance issues with the webhook server or the logic it executes.
+          Check the webhook server logs and monitor its resource usage for more insights.
+
+    - alert: CortexNovaWebhookErrorsHigh
+      expr: |
+        (sum by (webhook) (rate(controller_runtime_webhook_requests_total{code!="200", service="cortex-nova-metrics"}[5m])))
+        / (sum by (webhook) (rate(controller_runtime_webhook_requests_total{service="cortex-nova-metrics"}[5m]))) > 0.1
+      for: 15m
+      labels:
+        context: controller-webhook
+        dashboard: cortex-status-dashboard/cortex-status-dashboard
+        service: cortex
+        severity: warning
+        support_group: workload-management
+      annotations:
+        summary: "Controller webhook {{ "{{" }} $labels.webhook {{ "}}" }} is experiencing errors"
+        description: >
+          The webhook {{ "{{" }} $labels.webhook {{ "}}" }} has experienced errors in the last 5 minutes.
+          This may indicate issues with the webhook logic, connectivity problems, or
+          external factors causing failures. Check the webhook server logs for error
+          details and investigate the affected resources.
+
+    # Committed Resource Info API
+    - alert: CortexNovaCommittedResourceInfoUnavailable
+      expr: |
+        rate(cortex_committed_resource_info_api_requests_total{service="cortex-nova-metrics", status_code="503"}[5m]) > 0
+      for: 5m
+      labels:
+        context: committed-resource-api
+        dashboard: cortex-status-dashboard/cortex-status-dashboard
+        service: cortex
+        severity: warning
+        support_group: workload-management
+        playbook: docs/support/playbook/cortex/alerts/committed-resource-api-errors
+      annotations:
+        summary: "Committed Resource info API is unavailable"
+        description: >
+          The committed resource info API (Limes LIQUID integration) has been returning
+          503 Service Unavailable for more than 5 minutes. This typically means the
+          flavor group knowledge CRD is not ready or missing. Limes cannot discover
+          available committed resources until the issue is resolved.
+
+    # Committed Resource Change API
+    - alert: CortexNovaCommittedResourceChangeErrors
+      expr: |
+        rate(cortex_committed_resource_change_api_requests_total{service="cortex-nova-metrics", status_code=~"5.."}[5m]) > 0
+      for: 5m
+      labels:
+        context: committed-resource-api
+        dashboard: cortex-status-dashboard/cortex-status-dashboard
+        service: cortex
+        severity: warning
+        support_group: workload-management
+        playbook: docs/support/playbook/cortex/alerts/committed-resource-api-errors
+      annotations:
+        summary: "Committed Resource change API HTTP 5xx errors"
+        description: >
+          The committed resource change API (Limes LIQUID integration) is returning
+          HTTP 5xx errors. This is not expected and indicates an internal problem
+          processing commitment changes. Limes will retry, but new commitments may
+          not be fulfilled until the issue is resolved.
+
+    - alert: CortexNovaCommittedResourceRejectionRateTooHigh
+      expr: |
+        (
+          sum(rate(cortex_committed_resource_change_api_commitment_changes_total{service="cortex-nova-metrics", result="rejected", dry_run="false"}[15m]))
+          / sum(rate(cortex_committed_resource_change_api_commitment_changes_total{service="cortex-nova-metrics", dry_run="false"}[15m]))
+        ) > 0.3
+        and on() sum(rate(cortex_committed_resource_change_api_commitment_changes_total{service="cortex-nova-metrics", dry_run="false"}[15m])) > 0
+      for: 15m
+      labels:
+        context: committed-resource-api
+        dashboard: cortex-status-dashboard/cortex-status-dashboard
+        service: cortex
+        severity: warning
+        support_group: workload-management
+        playbook: docs/support/playbook/cortex/alerts/committed-resource-performance
+      annotations:
+        summary: "Committed Resource rejection rate too high ({{ "{{" }} $value | humanizePercentage {{ "}}" }})"
+        description: >
+          More than 30% of commitment changes have been rejected over the last 15 minutes.
+          This may indicate insufficient capacity to fulfill new commitments. Rejected
+          commitments are rolled back.
+
+    - alert: CortexNovaCommittedResourceTimeoutsTooHigh
+      expr: increase(cortex_committed_resource_change_api_timeouts_total{service="cortex-nova-metrics", dry_run="false"}[10m]) > 0
+      for: 1m
+      labels:
+        context: committed-resource-api
+        dashboard: cortex-status-dashboard/cortex-status-dashboard
+        service: cortex
+        severity: warning
+        support_group: workload-management
+        playbook: docs/support/playbook/cortex/alerts/committed-resource-performance
+      annotations:
+        summary: "Committed Resource change API timeout detected"
+        description: >
+          A commitment change request timed out after the configured deadline.
+          Timeouts indicate the scheduling pipeline could not place reservations in time.
+          Affected changes are rolled back. Investigate scheduler performance or reservation backlog.
+
+    - alert: CortexNovaCommittedResourceChangeLatencyTooHigh
+      expr: |
+        histogram_quantile(0.95, sum(rate(cortex_committed_resource_change_api_request_duration_seconds_bucket{service="cortex-nova-metrics", dry_run="false"}[5m])) by (le)) >= 10
+        and on() sum(rate(cortex_committed_resource_change_api_requests_total{service="cortex-nova-metrics", dry_run="false"}[5m])) > 0
+      for: 5m
+      labels:
+        context: committed-resource-api
+        dashboard: cortex-status-dashboard/cortex-status-dashboard
+        service: cortex
+        severity: warning
+        support_group: workload-management
+        playbook: docs/support/playbook/cortex/alerts/committed-resource-performance
+      annotations:
+        summary: "Committed Resource change API p95 latency >= 10s"
+        description: >
+          The committed resource change API p95 latency has reached or exceeded 10 seconds,
+          approaching the configured watch timeout. Requests close to the timeout are at risk
+          of being rolled back. Investigate scheduler performance or reservation backlog.
+
+    # Committed Resource Capacity API
+    - alert: CortexNovaCommittedResourceCapacityErrors
+      expr: |
+        rate(cortex_committed_resource_capacity_api_requests_total{service="cortex-nova-metrics", status_code=~"5.."}[5m]) > 0
+      for: 5m
+      labels:
+        context: committed-resource-api
+        dashboard: cortex-status-dashboard/cortex-status-dashboard
+        service: cortex
+        severity: warning
+        support_group: workload-management
+        playbook: docs/support/playbook/cortex/alerts/committed-resource-capacity
+      annotations:
+        summary: "Committed Resource capacity API HTTP 5xx errors"
+        description: >
+          The committed resource capacity API (Limes LIQUID integration) is returning
+          HTTP 5xx errors. This indicates internal problems calculating cluster capacity.
+          Limes may receive stale or incomplete capacity data.
+
+    - alert: CortexNovaCommittedResourceCapacityDroppedToZero
+      expr: |
+        (cortex_committed_resource_reported_capacity_gib{service="cortex-nova-metrics"} == 0)
+        and on(resource, az) (cortex_committed_resource_reported_capacity_gib{service="cortex-nova-metrics"} offset 30m > 0)
+      for: 5m
+      labels:
+        context: committed-resource-api
+        dashboard: cortex-status-dashboard/cortex-status-dashboard
+        service: cortex
+        severity: warning
+        support_group: workload-management
+        playbook: docs/support/playbook/cortex/alerts/committed-resource-capacity
+      annotations:
+        summary: "Committed Resource capacity for {{ "{{" }} $labels.resource {{ "}}" }} in {{ "{{" }} $labels.az {{ "}}" }} dropped to zero"
+        description: >
+          The reported capacity for committed resource {{ "{{" }} $labels.resource {{ "}}" }} in
+          availability zone {{ "{{" }} $labels.az {{ "}}" }} has dropped from a positive value to zero.
+          This may mean hypervisors in that AZ are fully utilized for the corresponding
+          flavor group and no further committed resources can be placed there.
+
+    # Committed Resource Usage API
+    - alert: CortexNovaCommittedResourceUsageErrors
+      expr: |
+        rate(cortex_committed_resource_usage_api_requests_total{service="cortex-nova-metrics", status_code=~"5.."}[5m]) > 0
+      for: 5m
+      labels:
+        context: committed-resource-api
+        dashboard: cortex-status-dashboard/cortex-status-dashboard
+        service: cortex
+        severity: warning
+        support_group: workload-management
+        playbook: docs/support/playbook/cortex/alerts/committed-resource-api-errors
+      annotations:
+        summary: "Committed Resource usage API HTTP 5xx errors"
+        description: >
+          The committed resource usage API (Limes LIQUID integration) is returning
+          HTTP 5xx errors. This indicates internal problems fetching reservation or
+          Nova server data. Limes may receive stale or incomplete usage data.
+
+    # Committed Resource Quota API
+    - alert: CortexNovaCommittedResourceQuotaErrors
+      expr: |
+        rate(cortex_committed_resource_quota_api_requests_total{service="cortex-nova-metrics", status_code=~"5.."}[5m]) > 0
+      for: 5m
+      labels:
+        context: committed-resource-api
+        dashboard: cortex-status-dashboard/cortex-status-dashboard
+        service: cortex
+        severity: warning
+        support_group: workload-management
+        playbook: docs/support/playbook/cortex/alerts/committed-resource-api-errors
+      annotations:
+        summary: "Committed Resource quota API HTTP 5xx errors"
+        description: >
+          The committed resource quota API (Limes LIQUID integration) is returning
+          HTTP 5xx errors. This indicates internal problems computing or applying
+          quota. Limes may not be able to enforce committed resource quotas.
 {{- end }}
diff --git a/helm/bundles/cortex-nova/values.yaml b/helm/bundles/cortex-nova/values.yaml
index 4a194ae50..0ac9f49d5 100644
--- a/helm/bundles/cortex-nova/values.yaml
+++ b/helm/bundles/cortex-nova/values.yaml
@@ -17,6 +17,11 @@ owner-info:
 alerts:
   enabled: true
   prometheus: openstack
+  thresholds:
+    # Memory threshold for CortexNovaHighMemoryUsage in MiB.
+    highMemoryMiB: 6000
+    # Reconcile-duration threshold for CortexNovaReconcileDurationHigher10Min in seconds.
+    reconcileDurationSeconds: 600
 
 serviceMonitor:
   extraLabels: {}
diff --git a/helm/bundles/cortex-placement-shim/alerts/placement-shim.alerts.yaml b/helm/bundles/cortex-placement-shim/alerts/placement-shim.alerts.yaml
deleted file mode 100644
index e65b944d6..000000000
--- a/helm/bundles/cortex-placement-shim/alerts/placement-shim.alerts.yaml
+++ /dev/null
@@ -1,179 +0,0 @@
-groups:
-- name: cortex-placement-shim-alerts
-  rules:
-  # Liveness
-  - alert: CortexPlacementShimDown
-    expr: |
-      up{pod=~"cortex-placement-shim-.*"} != 1 or
-      absent(up{pod=~"cortex-placement-shim-.*"})
-    for: 5m
-    labels:
-      context: liveness
-      dashboard: cortex-placement-shim-status-dashboard/cortex-placement-shim-status-dashboard
-      service: cortex
-      severity: warning
-      support_group: workload-management
-      playbook: docs/support/playbook/cortex/alerts/shim-down
-    annotations:
-      summary: "Cortex Placement Shim is down"
-      description: >
-        The Cortex Placement Shim is down. Placement API requests that are
-        routed through the shim will not be served. OpenStack services relying
-        on the shim for resource provider lookups and allocation candidates
-        will degrade.
-
-  # Downstream HTTP errors (client -> shim)
-  - alert: CortexPlacementShimDownstreamHttp400sTooHigh
-    expr: rate(cortex_placement_shim_downstream_request_duration_seconds_count{service="cortex-placement-shim-metrics-service", responsecode=~"4.."}[5m]) > 0.1
-    for: 5m
-    labels:
-      context: api
-      dashboard: cortex-placement-shim-status-dashboard/cortex-placement-shim-status-dashboard
-      service: cortex
-      severity: warning
-      support_group: workload-management
-      playbook: docs/support/playbook/cortex/alerts/shim-api-errors
-    annotations:
-      summary: "Placement Shim downstream HTTP 4xx errors too high"
-      description: >
-        The Placement Shim is responding to client requests with HTTP 4xx
-        errors at a sustained rate. This may indicate that the request format
-        from OpenStack services has changed, authentication tokens are invalid,
-        or the shim is rejecting malformed requests. Investigate the shim logs
-        for details on which endpoints and request patterns are affected.
-
-  - alert: CortexPlacementShimDownstreamHttp500sTooHigh
-    expr: rate(cortex_placement_shim_downstream_request_duration_seconds_count{service="cortex-placement-shim-metrics-service", responsecode=~"5.."}[5m]) > 0.1
-    for: 5m
-    labels:
-      context: api
-      dashboard: cortex-placement-shim-status-dashboard/cortex-placement-shim-status-dashboard
-      service: cortex
-      severity: warning
-      support_group: workload-management
-      playbook: docs/support/playbook/cortex/alerts/shim-api-errors
-    annotations:
-      summary: "Placement Shim downstream HTTP 5xx errors too high"
-      description: >
-        The Placement Shim is responding to client requests with HTTP 5xx
-        errors. This indicates internal problems within the shim such as
-        handler panics or misconfiguration. OpenStack services may experience
-        degraded placement functionality until the issue is resolved.
-
-  # Upstream HTTP errors (shim -> Placement API)
-  - alert: CortexPlacementShimUpstreamHttp5xxTooHigh
-    expr: rate(cortex_placement_shim_upstream_request_duration_seconds_count{service="cortex-placement-shim-metrics-service", responsecode=~"5.."}[5m]) > 0.1
-    for: 5m
-    labels:
-      context: upstream
-      dashboard: cortex-placement-shim-status-dashboard/cortex-placement-shim-status-dashboard
-      service: cortex
-      severity: warning
-      support_group: workload-management
-      playbook: docs/support/playbook/cortex/alerts/shim-api-errors
-    annotations:
-      summary: "Placement Shim upstream HTTP 5xx errors too high"
-      description: >
-        The upstream Placement API is returning 5xx errors to the shim.
-        This indicates the OpenStack Placement service itself is having
-        problems. The shim forwards these errors to its clients. Investigate
-        the Placement API service health and logs.
-
-  - alert: CortexPlacementShimUpstreamUnreachable
-    expr: rate(cortex_placement_shim_upstream_request_duration_seconds_count{service="cortex-placement-shim-metrics-service", responsecode="502"}[5m]) > 0.1
-    for: 5m
-    labels:
-      context: upstream
-      dashboard: cortex-placement-shim-status-dashboard/cortex-placement-shim-status-dashboard
-      service: cortex
-      severity: warning
-      support_group: workload-management
-      playbook: docs/support/playbook/cortex/alerts/shim-api-errors
-    annotations:
-      summary: "Placement Shim cannot reach the upstream Placement API"
-      description: >
-        The Placement Shim is unable to reach the upstream OpenStack Placement
-        API and is returning 502 Bad Gateway errors. This means all forwarded
-        requests are failing. Check network connectivity, the Placement API
-        service endpoint configuration, and whether the upstream service is
-        running.
-
-  # Latency alerts
-  - alert: CortexPlacementShimDownstreamLatencyTooHigh
-    expr: |
-      histogram_quantile(0.95, sum(rate(cortex_placement_shim_downstream_request_duration_seconds_bucket{service="cortex-placement-shim-metrics-service"}[5m])) by (le)) > 10
-      and on() sum(rate(cortex_placement_shim_downstream_request_duration_seconds_count{service="cortex-placement-shim-metrics-service"}[5m])) > 0
-    for: 5m
-    labels:
-      context: api
-      dashboard: cortex-placement-shim-status-dashboard/cortex-placement-shim-status-dashboard
-      service: cortex
-      severity: warning
-      support_group: workload-management
-      playbook: docs/support/playbook/cortex/alerts/shim-api-errors
-    annotations:
-      summary: "Placement Shim downstream latency too high"
-      description: >
-        The Placement Shim downstream request latency (p95) exceeds 10
-        seconds. This affects all OpenStack services making placement
-        requests through the shim. The cause may be slow upstream responses,
-        shim processing overhead, or resource contention. Investigate both
-        shim and upstream Placement API performance.
-
-  - alert: CortexPlacementShimUpstreamLatencyTooHigh
-    expr: |
-      histogram_quantile(0.95, sum(rate(cortex_placement_shim_upstream_request_duration_seconds_bucket{service="cortex-placement-shim-metrics-service"}[5m])) by (le)) > 10
-      and on() sum(rate(cortex_placement_shim_upstream_request_duration_seconds_count{service="cortex-placement-shim-metrics-service"}[5m])) > 0
-    for: 5m
-    labels:
-      context: upstream
-      dashboard: cortex-placement-shim-status-dashboard/cortex-placement-shim-status-dashboard
-      service: cortex
-      severity: warning
-      support_group: workload-management
-      playbook: docs/support/playbook/cortex/alerts/shim-api-errors
-    annotations:
-      summary: "Placement Shim upstream latency too high"
-      description: >
-        The upstream Placement API response latency (p95) as seen by the
-        shim exceeds 10 seconds. This directly impacts the end-to-end
-        latency of placement requests. Investigate the upstream Placement
-        API performance and network conditions.
-
-  # Resource usage
-  - alert: CortexPlacementShimHighMemoryUsage
-    expr: process_resident_memory_bytes{service="cortex-placement-shim-metrics-service"} > 1500 * 1024 * 1024
-    for: 5m
-    labels:
-      context: memory
-      dashboard: cortex-placement-shim-status-dashboard/cortex-placement-shim-status-dashboard
-      service: cortex
-      severity: warning
-      support_group: workload-management
-      playbook: docs/support/playbook/cortex/alerts/shim-resource-usage
-    annotations:
-      summary: "Placement Shim uses too much memory"
-      description: >
-        The Placement Shim is using more than 1500 MiB of resident memory
-        against a limit of 2048 MiB. This may indicate a memory leak, a
-        large number of cached hypervisors, or unexpected request patterns.
-        If the usage continues to grow, the pod will be OOM-killed.
-
-  - alert: CortexPlacementShimHighCPUUsage
-    expr: rate(process_cpu_seconds_total{service="cortex-placement-shim-metrics-service"}[1m]) > 0.4
-    for: 5m
-    labels:
-      context: cpu
-      dashboard: cortex-placement-shim-status-dashboard/cortex-placement-shim-status-dashboard
-      service: cortex
-      severity: warning
-      support_group: workload-management
-      playbook: docs/support/playbook/cortex/alerts/shim-resource-usage
-    annotations:
-      summary: "Placement Shim uses too much CPU"
-      description: >
-        The Placement Shim is consuming more than 40% of a single CPU core
-        against a limit of 500m. Under normal operation the shim should use
-        much less since it primarily proxies requests. This may indicate a
-        hot loop, excessive logging, or an unusual traffic spike.
-
diff --git a/helm/bundles/cortex-placement-shim/templates/alerts.yaml b/helm/bundles/cortex-placement-shim/templates/alerts.yaml
index 7db3b96e6..c570ccd91 100644
--- a/helm/bundles/cortex-placement-shim/templates/alerts.yaml
+++ b/helm/bundles/cortex-placement-shim/templates/alerts.yaml
@@ -10,8 +10,182 @@ metadata:
     type: alerting-rules
     prometheus: {{ required ".Values.alerts.prometheus missing" .Values.alerts.prometheus | quote }}
 spec:
-  {{- $files := .Files.Glob "alerts/*.alerts.yaml" }}
-  {{- range $path, $file := $files }}
-  {{ $file | toString | nindent 2 }}
-  {{- end }}
+  groups:
+  - name: cortex-placement-shim-alerts
+    rules:
+    # Liveness
+    - alert: CortexPlacementShimDown
+      expr: |
+        up{pod=~"cortex-placement-shim-.*"} != 1 or
+        absent(up{pod=~"cortex-placement-shim-.*"})
+      for: 5m
+      labels:
+        context: liveness
+        dashboard: cortex-placement-shim-status-dashboard/cortex-placement-shim-status-dashboard
+        service: cortex
+        severity: warning
+        support_group: workload-management
+        playbook: docs/support/playbook/cortex/alerts/shim-down
+      annotations:
+        summary: "Cortex Placement Shim is down"
+        description: >
+          The Cortex Placement Shim is down. Placement API requests that are
+          routed through the shim will not be served. OpenStack services relying
+          on the shim for resource provider lookups and allocation candidates
+          will degrade.
+
+    # Downstream HTTP errors (client -> shim)
+    - alert: CortexPlacementShimDownstreamHttp400sTooHigh
+      expr: rate(cortex_placement_shim_downstream_request_duration_seconds_count{service="cortex-placement-shim-metrics-service", responsecode=~"4.."}[5m]) > 0.1
+      for: 5m
+      labels:
+        context: api
+        dashboard: cortex-placement-shim-status-dashboard/cortex-placement-shim-status-dashboard
+        service: cortex
+        severity: warning
+        support_group: workload-management
+        playbook: docs/support/playbook/cortex/alerts/shim-api-errors
+      annotations:
+        summary: "Placement Shim downstream HTTP 4xx errors too high"
+        description: >
+          The Placement Shim is responding to client requests with HTTP 4xx
+          errors at a sustained rate. This may indicate that the request format
+          from OpenStack services has changed, authentication tokens are invalid,
+          or the shim is rejecting malformed requests. Investigate the shim logs
+          for details on which endpoints and request patterns are affected.
+
+    - alert: CortexPlacementShimDownstreamHttp500sTooHigh
+      expr: rate(cortex_placement_shim_downstream_request_duration_seconds_count{service="cortex-placement-shim-metrics-service", responsecode=~"5.."}[5m]) > 0.1
+      for: 5m
+      labels:
+        context: api
+        dashboard: cortex-placement-shim-status-dashboard/cortex-placement-shim-status-dashboard
+        service: cortex
+        severity: warning
+        support_group: workload-management
+        playbook: docs/support/playbook/cortex/alerts/shim-api-errors
+      annotations:
+        summary: "Placement Shim downstream HTTP 5xx errors too high"
+        description: >
+          The Placement Shim is responding to client requests with HTTP 5xx
+          errors. This indicates internal problems within the shim such as
+          handler panics or misconfiguration. OpenStack services may experience
+          degraded placement functionality until the issue is resolved.
+
+    # Upstream HTTP errors (shim -> Placement API)
+    - alert: CortexPlacementShimUpstreamHttp5xxTooHigh
+      expr: rate(cortex_placement_shim_upstream_request_duration_seconds_count{service="cortex-placement-shim-metrics-service", responsecode=~"5.."}[5m]) > 0.1
+      for: 5m
+      labels:
+        context: upstream
+        dashboard: cortex-placement-shim-status-dashboard/cortex-placement-shim-status-dashboard
+        service: cortex
+        severity: warning
+        support_group: workload-management
+        playbook: docs/support/playbook/cortex/alerts/shim-api-errors
+      annotations:
+        summary: "Placement Shim upstream HTTP 5xx errors too high"
+        description: >
+          The upstream Placement API is returning 5xx errors to the shim.
+          This indicates the OpenStack Placement service itself is having
+          problems. The shim forwards these errors to its clients. Investigate
+          the Placement API service health and logs.
+
+    - alert: CortexPlacementShimUpstreamUnreachable
+      expr: rate(cortex_placement_shim_upstream_request_duration_seconds_count{service="cortex-placement-shim-metrics-service", responsecode="502"}[5m]) > 0.1
+      for: 5m
+      labels:
+        context: upstream
+        dashboard: cortex-placement-shim-status-dashboard/cortex-placement-shim-status-dashboard
+        service: cortex
+        severity: warning
+        support_group: workload-management
+        playbook: docs/support/playbook/cortex/alerts/shim-api-errors
+      annotations:
+        summary: "Placement Shim cannot reach the upstream Placement API"
+        description: >
+          The Placement Shim is unable to reach the upstream OpenStack Placement
+          API and is returning 502 Bad Gateway errors. This means all forwarded
+          requests are failing. Check network connectivity, the Placement API
+          service endpoint configuration, and whether the upstream service is
+          running.
+
+    # Latency alerts
+    - alert: CortexPlacementShimDownstreamLatencyTooHigh
+      expr: |
+        histogram_quantile(0.95, sum(rate(cortex_placement_shim_downstream_request_duration_seconds_bucket{service="cortex-placement-shim-metrics-service"}[5m])) by (le)) > 10
+        and on() sum(rate(cortex_placement_shim_downstream_request_duration_seconds_count{service="cortex-placement-shim-metrics-service"}[5m])) > 0
+      for: 5m
+      labels:
+        context: api
+        dashboard: cortex-placement-shim-status-dashboard/cortex-placement-shim-status-dashboard
+        service: cortex
+        severity: warning
+        support_group: workload-management
+        playbook: docs/support/playbook/cortex/alerts/shim-api-errors
+      annotations:
+        summary: "Placement Shim downstream latency too high"
+        description: >
+          The Placement Shim downstream request latency (p95) exceeds 10
+          seconds. This affects all OpenStack services making placement
+          requests through the shim. The cause may be slow upstream responses,
+          shim processing overhead, or resource contention. Investigate both
+          shim and upstream Placement API performance.
+
+    - alert: CortexPlacementShimUpstreamLatencyTooHigh
+      expr: |
+        histogram_quantile(0.95, sum(rate(cortex_placement_shim_upstream_request_duration_seconds_bucket{service="cortex-placement-shim-metrics-service"}[5m])) by (le)) > 10
+        and on() sum(rate(cortex_placement_shim_upstream_request_duration_seconds_count{service="cortex-placement-shim-metrics-service"}[5m])) > 0
+      for: 5m
+      labels:
+        context: upstream
+        dashboard: cortex-placement-shim-status-dashboard/cortex-placement-shim-status-dashboard
+        service: cortex
+        severity: warning
+        support_group: workload-management
+        playbook: docs/support/playbook/cortex/alerts/shim-api-errors
+      annotations:
+        summary: "Placement Shim upstream latency too high"
+        description: >
+          The upstream Placement API response latency (p95) as seen by the
+          shim exceeds 10 seconds. This directly impacts the end-to-end
+          latency of placement requests. Investigate the upstream Placement
+          API performance and network conditions.
+
+    # Resource usage
+    - alert: CortexPlacementShimHighMemoryUsage
+      expr: process_resident_memory_bytes{service="cortex-placement-shim-metrics-service"} > 1500 * 1024 * 1024
+      for: 5m
+      labels:
+        context: memory
+        dashboard: cortex-placement-shim-status-dashboard/cortex-placement-shim-status-dashboard
+        service: cortex
+        severity: warning
+        support_group: workload-management
+        playbook: docs/support/playbook/cortex/alerts/shim-resource-usage
+      annotations:
+        summary: "Placement Shim uses too much memory"
+        description: >
+          The Placement Shim is using more than 1500 MiB of resident memory
+          against a limit of 2048 MiB. This may indicate a memory leak, a
+          large number of cached hypervisors, or unexpected request patterns.
+          If the usage continues to grow, the pod will be OOM-killed.
+
+    - alert: CortexPlacementShimHighCPUUsage
+      expr: rate(process_cpu_seconds_total{service="cortex-placement-shim-metrics-service"}[1m]) > 0.4
+      for: 5m
+      labels:
+        context: cpu
+        dashboard: cortex-placement-shim-status-dashboard/cortex-placement-shim-status-dashboard
+        service: cortex
+        severity: warning
+        support_group: workload-management
+        playbook: docs/support/playbook/cortex/alerts/shim-resource-usage
+      annotations:
+        summary: "Placement Shim uses too much CPU"
+        description: >
+          The Placement Shim is consuming more than 40% of a single CPU core
+          against a limit of 500m. Under normal operation the shim should use
+          much less since it primarily proxies requests. This may indicate a
+          hot loop, excessive logging, or an unusual traffic spike.
 {{- end }}