diff --git a/config.example/group_vars/all.yml b/config.example/group_vars/all.yml
index 6b69f98ba..b7de1ed73 100644
--- a/config.example/group_vars/all.yml
+++ b/config.example/group_vars/all.yml
@@ -140,6 +140,10 @@ gpu_clock_lock: "1507,1507"
 
 # Debugging var: force install NVIDIA driver even if GPU not detected
 nvidia_driver_force_install: false
+# Default NVIDIA driver branch for generic host driver installs.
+nvidia_driver_branch: "580"
+# Set true on Ubuntu systems that require NVIDIA open kernel modules.
+nvidia_driver_ubuntu_use_open_kernel_modules: false
 
 
 ################################################################################
@@ -249,7 +253,7 @@ maas_adminusers:
 maas_dns_domain: 'deepops.local'
 maas_region_controller: '192.168.1.1'
 maas_region_controller_url: 'http://{{ maas_region_controller }}:5240/MAAS'
-maas_repo: 'ppa:maas/2.8'
+maas_repo: 'ppa:maas/3.5'
 
 # Defines if maas user should generate ssh keys
 # Usable for remote KVM/libvirt power actions
@@ -257,7 +261,14 @@ maas_setup_user: false
 
 maas_single_node_install: true
 
-maas_kvm: false
+maas_kvm_management: false
+
+# Avoid installing python-libmaas via pip: it shadows the packaged MAAS CLI
+# with /usr/local/bin/maas, which breaks DeepOps' MAAS operational workflow.
+maas_python_reqs:
+  - jinja2
+  - oauth
+  - pyyaml
 
 ################################################################################
 # NVIDIA Datacenter GPU Manager                                                #
@@ -305,4 +316,3 @@ standalone_container_registry_port: "5000"
 ngc_ready_cuda_container: "nvcr.io/nvidia/cuda:12.4.1-base-ubuntu22.04"
 ngc_ready_pytorch: "nvcr.io/nvidia/pytorch:24.04-py3"
 ngc_ready_tensorflow: "nvcr.io/nvidia/tensorflow:24.04-tf2-py3"
-
diff --git a/docs/deepops/configuration.md b/docs/deepops/configuration.md
index 70c1475e6..3884b8137 100644
--- a/docs/deepops/configuration.md
+++ b/docs/deepops/configuration.md
@@ -62,7 +62,7 @@ my-cluster-compute-02      ansible_host=10.0.0.3
 (Note that, by default, DeepOps will set the hostname of these machines to match the inventory hostname!
 If you don't want this, you can set `deepops_set_hostname: false` using the instructions in [the next section](#modifying-ansible-variables).)
 
-The example DeepOps inventory also includes groups for the different components of Kubernetes clusters (`kube-master`, `etcd`, and `kube-node`),
+The example DeepOps inventory also includes groups for the different components of Kubernetes clusters (`kube_control_plane`, `etcd`, and `kube_node`),
 and groups for the different components of Slurm clusters (`slurm-master` and `slurm-node`).
 These groups are used by DeepOps to determine which playbooks run on which nodes for each type of cluster,
 and you should add nodes to these groups based on how you want to lay out your cluster.
diff --git a/docs/deepops/update-deepops.md b/docs/deepops/update-deepops.md
index 3e081c18c..a1f7af047 100644
--- a/docs/deepops/update-deepops.md
+++ b/docs/deepops/update-deepops.md
@@ -160,6 +160,8 @@ In particular,
 
 Additionally, please note that Kubespray can only upgrade between one minor version of Kubernetes at a time.
 This means that you may need to upgrade multiple times between your current version and your desired version of Kubernetes.
+Other cluster components managed by Kubespray may have similar staged-upgrade requirements.
+For example, the network plugin version installed by an older DeepOps release may need to be upgraded through an intermediate DeepOps/Kubespray release before a newer Kubespray release will accept it.
 
 For example, to upgrade from Kubernetes version 1.19.9 and 1.21.1, you might use a workflow like this:
 
@@ -267,7 +269,7 @@ DeepOps offers the option to configure each of the necessary NVIDIA components i
 
 ##### Updating the NVIDIA driver
 
-**Important**: Note that upgrading the NVIDIA driver will reboot the node, unless you set `nvidia_driver_skip_reboot` to false.
+**Important**: Note that upgrading the NVIDIA driver will reboot the node, unless you set `nvidia_driver_skip_reboot` to true.
 If you are using MIG-enabled GPUs ensure that your MIG configuration is persistent by using the [nvidia-mig-manager systemd](https://github.com/NVIDIA/mig-parted/tree/master/deployments/systemd) service
 or the [nvidia-mig-manager Kubernetes GPU Operator-included DaemonSet](https://docs.nvidia.com/datacenter/cloud-native/gpu-operator/gpu-operator-mig.html).
 
@@ -277,7 +279,7 @@ To update the driver on a DGX system, we recommend following the instructions in
 
 ###### On Ubuntu
 
-On Ubuntu, the default behavior in DeepOps is to use the LTS release branch distributed through the Ubuntu repositories. In this mode, the driver is generally pinned to a particular release branch such as 450 or 470.
+On Ubuntu, the default behavior in DeepOps is to use the LTS release branch distributed through the Ubuntu repositories. In this mode, the driver is generally pinned to a particular release branch such as 580.
 
 To upgrade to the latest driver within your current release branch, run:
 
@@ -288,7 +290,13 @@ ansible-playbook playbooks/nvidia-software/nvidia-driver.yml -e nvidia_driver_pa
 To upgrade the driver to a new release branch, set the following parameter in your DeepOps configuration:
 
 ```bash
-nvidia_driver_ubuntu_branch: "470"
+nvidia_driver_ubuntu_branch: "580"
+```
+
+Some newer GPUs require NVIDIA open kernel modules. To install the Ubuntu open kernel module packages for the selected branch, set:
+
+```bash
+nvidia_driver_ubuntu_use_open_kernel_modules: true
 ```
 
 Then run:
@@ -433,7 +441,7 @@ Note that this can take a long time, as we download and build Slurm from source
 
 #### Updating the NVIDIA driver
 
-**Important**: Note that upgrading the NVIDIA driver will reboot the node, unless you set `nvidia_driver_skip_reboot` to false.
+**Important**: Note that upgrading the NVIDIA driver will reboot the node, unless you set `nvidia_driver_skip_reboot` to true.
 
 ##### On DGX
 
@@ -441,7 +449,7 @@ To update the driver on a DGX system, we recommend following the instructions in
 
 ##### On Ubuntu
 
-On Ubuntu, the default behavior in DeepOps is to use the LTS release branch distributed through the Ubuntu repositories. In this mode, the driver is generally pinned to a particular release branch such as 450 or 470.
+On Ubuntu, the default behavior in DeepOps is to use the LTS release branch distributed through the Ubuntu repositories. In this mode, the driver is generally pinned to a particular release branch such as 580.
 
 To upgrade to the latest driver within your current release branch, run:
 
@@ -452,7 +460,13 @@ ansible-playbook playbooks/nvidia-software/nvidia-driver.yml -e nvidia_driver_pa
 To upgrade the driver to a new release branch, set the following parameter in your DeepOps configuration:
 
 ```bash
-nvidia_driver_ubuntu_branch: "510"
+nvidia_driver_ubuntu_branch: "580"
+```
+
+Some newer GPUs require NVIDIA open kernel modules. To install the Ubuntu open kernel module packages for the selected branch, set:
+
+```bash
+nvidia_driver_ubuntu_use_open_kernel_modules: true
 ```
 
 Then run:
diff --git a/docs/k8s-cluster/README.md b/docs/k8s-cluster/README.md
index ee6bfe66c..b7f1e32b1 100644
--- a/docs/k8s-cluster/README.md
+++ b/docs/k8s-cluster/README.md
@@ -84,7 +84,7 @@ Instructions for deploying a GPU cluster with Kubernetes
    # NOTE: If SSH requires a password, add: `-k`
    # NOTE: If sudo on remote machine requires a password, add: `-K`
    # NOTE: If SSH user is different than current user, add: `-u ubuntu`
-   ansible-playbook -l k8s-cluster playbooks/k8s-cluster.yml
+   ansible-playbook -l k8s_cluster playbooks/k8s-cluster.yml
    ```
 
    More information on Kubespray can be found in the official [Getting Started Guide](https://github.com/kubernetes-sigs/kubespray/blob/master/docs/getting-started.md)
@@ -123,7 +123,7 @@ Run the following script to create an administrative user and print out the dash
 
 #### NFS Client Provisioner
 
-The default behavior of DeepOps is to setup an NFS server on the first `kube-master` node. This temporary NFS server is used by the `nfs-client-provisioner` which is installed as the default StorageClass of a standard DeepOps deployment.
+The default behavior of DeepOps is to setup an NFS server on the first `kube_control_plane` node. This temporary NFS server is used by the `nfs-client-provisioner` which is installed as the default StorageClass of a standard DeepOps deployment.
 
 To use an existing nfs server server update the `k8s_nfs_server` and `k8s_nfs_export_path` variables in `config/group_vars/k8s-cluster.yml` and set the `k8s_deploy_nfs_server` to false in `config/group_vars/k8s-cluster.yml`. Additionally, the `k8s_nfs_mkdir` variable can be set to `false` if the export directory is already configured on the server.
 
@@ -165,7 +165,7 @@ Deploy NetApp Astra Trident for services that require persistent storage (such a
    # NOTE: If SSH requires a password, add: `-k`
    # NOTE: If sudo on remote machine requires a password, add: `-K`
    # NOTE: If SSH user is different than current user, add: `-u ubuntu`
-   ansible-playbook -l k8s-cluster playbooks/k8s-cluster/netapp-trident.yml
+   ansible-playbook -l k8s_cluster playbooks/k8s-cluster/netapp-trident.yml
    ```
 
 3. Verify that Astra Trident is running.
@@ -207,9 +207,9 @@ delete  Legacy positional argument for delete. Same as -d flag.
 
 The services can be reached from the following addresses:
 
-- Grafana: http://\<kube-master\>:30200
-- Prometheus: http://\<kube-master\>:30500
-- Alertmanager: http://\<kube-master\>:30400
+- Grafana: http://\<kube_control_plane\>:30200
+- Prometheus: http://\<kube_control_plane\>:30500
+- Alertmanager: http://\<kube_control_plane\>:30400
 
 We deploy our monitoring services using the [prometheus-operator](https://github.com/prometheus-operator/prometheus-operator) project.
 For documentation on configuring and managing the monitoring services, please see the [prometheus-operator user guides](https://github.com/prometheus-operator/prometheus-operator/tree/master/Documentation/user-guides).
@@ -234,7 +234,7 @@ Follow the [ELK logging Guide](logging.md) to setup logging in the cluster.
 
 The service can be reached from the following address:
 
-- Kibana: http://\<kube-master\>:30700
+- Kibana: http://\<kube_control_plane\>:30700
 
 ### Container Registry
 
@@ -264,7 +264,7 @@ DeepOps uses [Kubespray](https://github.com/kubernetes-sigs/kubespray) to deploy
 
 ### Adding Nodes
 
-To add K8s nodes, modify the `config/inventory` file to include the new nodes under `[all]`. Then list the nodes as relevant under the `[kube-master]`, `[etcd]`, and `[kube-node]` sections. For example, if adding a new master node, list it under kube-master and etcd. A new worker node would go under kube-node.
+To add K8s nodes, modify the `config/inventory` file to include the new nodes under `[all]`. Then list the nodes as relevant under the `[kube_control_plane]`, `[etcd]`, and `[kube_node]` sections. For example, if adding a new control-plane node, list it under `kube_control_plane` and `etcd`. A new worker node would go under `kube_node`.
 
 Then run the Kubespray `scale.yml` playbook...
 
@@ -272,7 +272,7 @@ Then run the Kubespray `scale.yml` playbook...
 # NOTE: If SSH requires a password, add: `-k`
 # NOTE: If sudo on remote machine requires a password, add: `-K`
 # NOTE: If SSH user is different than current user, add: `-u ubuntu`
-ansible-playbook -l k8s-cluster submodules/kubespray/scale.yml
+ansible-playbook -l k8s_cluster submodules/kubespray/scale.yml
 ```
 
 More information on this topic may be found in the [Kubespray docs](https://github.com/kubernetes-sigs/kubespray/blob/master/docs/getting-started.md#adding-nodes).
diff --git a/docs/k8s-cluster/kubeflow.md b/docs/k8s-cluster/kubeflow.md
index 02394fc86..59f4d6797 100644
--- a/docs/k8s-cluster/kubeflow.md
+++ b/docs/k8s-cluster/kubeflow.md
@@ -43,7 +43,7 @@ A local checkout of the [Kubeflow manifests](https://github.com/kubeflow/manifes
 
 The services can be reached from the following address:
 
-- Kubeflow: http://\<kube-master\>:31380
+- Kubeflow: http://\<kube_control_plane\>:31380
 
 ## Login information
 
diff --git a/docs/k8s-cluster/nvidia-network-operator.md b/docs/k8s-cluster/nvidia-network-operator.md
index 972211aa3..3817c14f5 100644
--- a/docs/k8s-cluster/nvidia-network-operator.md
+++ b/docs/k8s-cluster/nvidia-network-operator.md
@@ -83,9 +83,9 @@ This playbook is developed and tested in following environments:
   gpu01      ansible_host=192.168.2.11
   gpu02      ansible_host=192.168.3.11
   ...
-  [kube-master]
+  [kube_control_plane]
   mgmt01
-  [kube-node]
+  [kube_node]
   gpu01
   gpu02
   ```
@@ -123,7 +123,7 @@ This playbook is developed and tested in following environments:
   # NOTE: If SSH requires a password, add: `-k`
   # NOTE: If sudo on remote machine requires a password, add: `-K`
   # NOTE: If SSH user is different than current user, add: `-u ubuntu`
-  ansible-playbook -l k8s-cluster playbooks/k8s-cluster.yml
+  ansible-playbook -l k8s_cluster playbooks/k8s-cluster.yml
   ```
 
   Please refer to [DeepOps Kubernetes Deployment Guidehere](https://github.com/NVIDIA/deepops/blob/master/docs/kubernetes-cluster.md) for more information.
diff --git a/docs/k8s-cluster/roce-perf-k8s.md b/docs/k8s-cluster/roce-perf-k8s.md
index b80a6f41c..6f5d12c1b 100644
--- a/docs/k8s-cluster/roce-perf-k8s.md
+++ b/docs/k8s-cluster/roce-perf-k8s.md
@@ -142,7 +142,7 @@ add switch PFC, ECN configuration
 
    ```bash
    # Modify the Ansible inventory file
-   # Especially the 'all', 'kube-master', 'etcd', 'kube-node' and 'k8s-cluster' sections
+   # Especially the 'all', 'kube_control_plane', 'etcd', 'kube_node' and 'k8s_cluster' sections
    vi config/inventory
    ```
 
@@ -159,10 +159,10 @@ add switch PFC, ECN configuration
    gpu02      ansible_host=192.168.2.11
    ...
 
-   [kube-master]
+   [kube_control_plane]
    mgmt01
 
-   [kube-node]
+   [kube_node]
    gpu01
    gpu02
 
@@ -203,7 +203,7 @@ add switch PFC, ECN configuration
    # NOTE: If SSH requires a password, add: `-k`
    # NOTE: If sudo on remote machine requires a password, add: `-K`
    # NOTE: If SSH user is different than current user, add: `-u ubuntu`
-   ansible-playbook -l k8s-cluster playbooks/k8s-cluster.yml
+   ansible-playbook -l k8s_cluster playbooks/k8s-cluster.yml
    ```
 
    Please refer to [DeepOps Kubernetes Deployment Guidehere](https://github.com/NVIDIA/deepops/blob/master/docs/kubernetes-cluster.md) for more information.
@@ -252,7 +252,7 @@ add switch PFC, ECN configuration
    Run following script to deploy SRIOV RoCE functions:
 
    ```bash
-   nvidia@mgmt01:~/deepops_0322$ ansible-playbook -l k8s-cluster playbooks/k8s-cluster/roce.yaml
+   nvidia@mgmt01:~/deepops_0322$ ansible-playbook -l k8s_cluster playbooks/k8s-cluster/roce.yaml
    ```
 
    If using a different username and SSH key-based authentication haven't set up, try to use `-u <user> -k -K` when you run the script.
diff --git a/docs/k8s-cluster/roce_backend.md b/docs/k8s-cluster/roce_backend.md
index 254007217..aa5c9dba6 100644
--- a/docs/k8s-cluster/roce_backend.md
+++ b/docs/k8s-cluster/roce_backend.md
@@ -106,7 +106,7 @@ The Role installing following components:
 ## Role deployment
 
 ```bash
-ansible-playbook -l k8s-cluster playbooks/k8s-cluster/roce.yaml
+ansible-playbook -l k8s_cluster playbooks/k8s-cluster/roce.yaml
 ```
 
 ## License
diff --git a/docs/pxe/maas.md b/docs/pxe/maas.md
index ca95671c9..1deb6dad1 100644
--- a/docs/pxe/maas.md
+++ b/docs/pxe/maas.md
@@ -258,8 +258,8 @@ only need to tag leaf groups.
 
 | Tag | Ansible Group | Used By |
 |-----|--------------|---------|
-| `kube-master` | `[kube-master]` | K8s control plane |
-| `kube-node` | `[kube-node]` | K8s worker nodes |
+| `kube_control_plane` | `[kube_control_plane]` | K8s control plane |
+| `kube_node` | `[kube_node]` | K8s worker nodes |
 | `slurm-master` | `[slurm-master]` | Slurm head node |
 | `slurm-node` | `[slurm-node]` | Slurm compute nodes |
 | `slurm-nfs` | `[slurm-nfs]` | Slurm NFS server |
@@ -276,8 +276,8 @@ ansible-playbook -i scripts/maas_inventory.py playbooks/slurm-cluster.yml
 
 # Later, retag for K8s
 maas admin tag update-nodes slurm-master remove=<vm01_system_id>
-maas admin tag update-nodes kube-master add=<vm01_system_id>
-maas admin tag update-nodes kube-node add=<vm02_system_id> add=<vm03_system_id>
+maas admin tag update-nodes kube_control_plane add=<vm01_system_id>
+maas admin tag update-nodes kube_node add=<vm02_system_id> add=<vm03_system_id>
 
 # Run K8s deployment
 ansible-playbook -i scripts/maas_inventory.py playbooks/k8s-cluster.yml
diff --git a/playbooks/k8s-cluster.yml b/playbooks/k8s-cluster.yml
index 742eed77d..983e9c750 100644
--- a/playbooks/k8s-cluster.yml
+++ b/playbooks/k8s-cluster.yml
@@ -282,7 +282,7 @@
     ansible_become: no
   tasks:
     - name: Install Helm on admin node
-      command: "sh {{ playbook_dir }}/../scripts/k8s/install_helm.sh"
+      command: "bash {{ playbook_dir }}/../scripts/k8s/install_helm.sh"
       delegate_to: localhost
     - name: Globally update the deprecated "stable" helm repo
       command: "/usr/local/bin/helm repo add 'stable' 'https://charts.helm.sh/stable' --force-update"
diff --git a/playbooks/k8s-cluster/nfs-client-provisioner.yml b/playbooks/k8s-cluster/nfs-client-provisioner.yml
index 93592e85b..200f27f99 100644
--- a/playbooks/k8s-cluster/nfs-client-provisioner.yml
+++ b/playbooks/k8s-cluster/nfs-client-provisioner.yml
@@ -22,7 +22,7 @@
     include_role:
       name: nfs
     vars:
-    - nfs_is_server: yes
+      nfs_is_server: yes
     when: k8s_deploy_nfs_server
 
 - hosts: "k8s_cluster"
diff --git a/playbooks/nvidia-software/nvidia-cuda.yml b/playbooks/nvidia-software/nvidia-cuda.yml
index f7e704e2e..7945f671c 100644
--- a/playbooks/nvidia-software/nvidia-cuda.yml
+++ b/playbooks/nvidia-software/nvidia-cuda.yml
@@ -13,6 +13,10 @@
       include_role:
         name: facts
 
+    - name: configure Ubuntu NVIDIA driver packages
+      include_tasks: tasks/nvidia-driver-ubuntu-packages.yml
+      when: ansible_distribution == 'Ubuntu'
+
     - name: install nvidia driver
       include_role:
         name: nvidia.nvidia_driver
diff --git a/playbooks/nvidia-software/nvidia-driver.yml b/playbooks/nvidia-software/nvidia-driver.yml
index c9a05b687..c445e5574 100644
--- a/playbooks/nvidia-software/nvidia-driver.yml
+++ b/playbooks/nvidia-software/nvidia-driver.yml
@@ -14,6 +14,10 @@
       include_role:
         name: facts
 
+    - name: configure Ubuntu NVIDIA driver packages
+      include_tasks: tasks/nvidia-driver-ubuntu-packages.yml
+      when: ansible_distribution == 'Ubuntu'
+
     - name: install nvidia driver
       include_role:
         name: nvidia.nvidia_driver
diff --git a/playbooks/nvidia-software/tasks/nvidia-driver-ubuntu-packages.yml b/playbooks/nvidia-software/tasks/nvidia-driver-ubuntu-packages.yml
new file mode 100644
index 000000000..1a987eac4
--- /dev/null
+++ b/playbooks/nvidia-software/tasks/nvidia-driver-ubuntu-packages.yml
@@ -0,0 +1,12 @@
+---
+- name: Select Ubuntu NVIDIA open kernel module packages
+  set_fact:
+    nvidia_driver_ubuntu_packages:
+    - "nvidia-headless-{{ _nvidia_driver_ubuntu_branch }}{{ _nvidia_driver_ubuntu_packages_suffix }}-open"
+    - "nvidia-utils-{{ _nvidia_driver_ubuntu_branch }}{{ _nvidia_driver_ubuntu_packages_suffix }}"
+    - "nvidia-headless-no-dkms-{{ _nvidia_driver_ubuntu_branch }}{{ _nvidia_driver_ubuntu_packages_suffix }}-open"
+    - "nvidia-kernel-source-{{ _nvidia_driver_ubuntu_branch }}{{ _nvidia_driver_ubuntu_packages_suffix }}-open"
+  vars:
+    _nvidia_driver_ubuntu_branch: "{{ nvidia_driver_ubuntu_branch | default(nvidia_driver_branch | default('515')) }}"
+    _nvidia_driver_ubuntu_packages_suffix: "{{ nvidia_driver_ubuntu_packages_suffix | default('-server') }}"
+  when: nvidia_driver_ubuntu_use_open_kernel_modules | default(false) | bool
diff --git a/roles/alertmanager/defaults/main.yml b/roles/alertmanager/defaults/main.yml
index bcbed59c7..8bfdbc49d 100644
--- a/roles/alertmanager/defaults/main.yml
+++ b/roles/alertmanager/defaults/main.yml
@@ -1,6 +1,6 @@
 alertmanager_config_dir: /etc/alertmanager
 alertmanager_config_src: templates/alertmanager.yml.j2
-alertmanager_container: "prom/alertmanager:v0.23.0"
+alertmanager_container: "prom/alertmanager:v0.32.1"
 alertmanager_svc_name: "docker.alertmanager.service"
 alertmanager_docker_volume_name: "deepops_alertmanager_metrics"
 alertmanager_state: started
diff --git a/roles/grafana/defaults/main.yml b/roles/grafana/defaults/main.yml
index 5156ce0c6..920a37943 100644
--- a/roles/grafana/defaults/main.yml
+++ b/roles/grafana/defaults/main.yml
@@ -2,7 +2,7 @@ grafana_config_dir: /etc/grafana
 grafana_config_template: templates/grafana.ini.j2
 grafana_data_dir: /var/lib/grafana
 grafana_user_id: 472
-grafana_container: "grafana/grafana:8.5.10"
+grafana_container: "grafana/grafana:13.0.1"
 grafana_svc_name: "docker.grafana.service"
 grafana_state: started
 grafana_enabled: yes
diff --git a/roles/netapp-trident/README.md b/roles/netapp-trident/README.md
index 87b5557c0..7911d097b 100644
--- a/roles/netapp-trident/README.md
+++ b/roles/netapp-trident/README.md
@@ -41,7 +41,7 @@ Example A:
 Example B:
 
     - name: "Deploy NetApp Trident"
-      hosts: kube-master
+      hosts: kube_control_plane
       become: true
       become_method: sudo
       roles:
@@ -50,7 +50,7 @@ Example B:
 Example C:
 
     - name: "Deploy NetApp Trident"
-      hosts: kube-master
+      hosts: kube_control_plane
       become: true
       become_method: sudo
       vars_files:
@@ -88,7 +88,7 @@ all:
       ip: 192.168.1.215
       access_ip: 192.168.1.215
   children:
-    kube-master:
+    kube_control_plane:
       hosts:
         mgmt01:
         mgmt02:
diff --git a/roles/nginx-docker-registry-cache/defaults/main.yml b/roles/nginx-docker-registry-cache/defaults/main.yml
index dba99e9ac..6824bc7d5 100644
--- a/roles/nginx-docker-registry-cache/defaults/main.yml
+++ b/roles/nginx-docker-registry-cache/defaults/main.yml
@@ -1,6 +1,6 @@
 ---
 nginx_docker_cache_name: "deepops-nginx-docker-cache"
-nginx_docker_cache_image: "rpardini/docker-registry-proxy:0.6.4"
+nginx_docker_cache_image: "rpardini/docker-registry-proxy:0.6.5"
 
 nginx_docker_cache_mirror_path: "/opt/deepops/nginx-docker-cache/mirror"
 nginx_docker_cache_ca_path: "/opt/deepops/nginx-docker-cache/ca"
diff --git a/roles/nvidia-gpu-operator/defaults/main.yml b/roles/nvidia-gpu-operator/defaults/main.yml
index 0cd7dad47..ebecc5d60 100644
--- a/roles/nvidia-gpu-operator/defaults/main.yml
+++ b/roles/nvidia-gpu-operator/defaults/main.yml
@@ -12,7 +12,7 @@ gpu_operator_nvaie_helm_repo: "https://helm.ngc.nvidia.com/nvaie"
 gpu_operator_nvaie_chart_name: "nvaie/gpu-operator"
 
 # NVAIE GPU Operator may require different version, check NGC enterprise collection.
-gpu_operator_chart_version: "v23.3.2"
+gpu_operator_chart_version: "v26.3.1"
 
 k8s_gpu_mig_strategy: "mixed"
 
@@ -33,7 +33,7 @@ gpu_operator_grid_config_dir: "{{ deepops_dir }}/gpu_operator"
 # Defaults from https://github.com/NVIDIA/gpu-operator/blob/master/deployments/gpu-operator/values.yaml
 gpu_operator_default_runtime: "containerd"
 gpu_operator_driver_registry: "nvcr.io/nvidia"
-gpu_operator_driver_version: "525.105.17"
+gpu_operator_driver_version: "580.126.20"
 
 # This enables/disables NVAIE
 gpu_operator_nvaie_enable: false
diff --git a/roles/nvidia-k8s-gpu-device-plugin/defaults/main.yml b/roles/nvidia-k8s-gpu-device-plugin/defaults/main.yml
index a1214ec17..021d9a3f2 100644
--- a/roles/nvidia-k8s-gpu-device-plugin/defaults/main.yml
+++ b/roles/nvidia-k8s-gpu-device-plugin/defaults/main.yml
@@ -2,6 +2,6 @@
 k8s_gpu_plugin_helm_repo: "https://nvidia.github.io/k8s-device-plugin"
 k8s_gpu_plugin_chart_name: "nvdp/nvidia-device-plugin"
 k8s_gpu_plugin_release_name: "nvidia-device-plugin"
-k8s_gpu_plugin_chart_version: "0.14.0"
+k8s_gpu_plugin_chart_version: "0.19.1"
 k8s_gpu_plugin_init_error: "false"
 k8s_gpu_mig_strategy: "mixed"
diff --git a/roles/nvidia-k8s-gpu-feature-discovery/defaults/main.yml b/roles/nvidia-k8s-gpu-feature-discovery/defaults/main.yml
index d1cced31e..77111b3ff 100644
--- a/roles/nvidia-k8s-gpu-feature-discovery/defaults/main.yml
+++ b/roles/nvidia-k8s-gpu-feature-discovery/defaults/main.yml
@@ -1,6 +1,6 @@
 # Vars needed to install feature discovery
-k8s_gpu_feature_discovery_helm_repo: "https://nvidia.github.io/gpu-feature-discovery"
+k8s_gpu_feature_discovery_helm_repo: "https://nvidia.github.io/k8s-device-plugin"
 k8s_gpu_feature_discovery_chart_name: "nvgfd/gpu-feature-discovery"
 k8s_gpu_feature_discovery_release_name: "gpu-feature-discovery"
-k8s_gpu_feature_discovery_chart_version: "0.8.0"
+k8s_gpu_feature_discovery_chart_version: "0.19.1"
 k8s_gpu_mig_strategy: "mixed"
diff --git a/roles/nvidia-mig-manager/defaults/main.yml b/roles/nvidia-mig-manager/defaults/main.yml
index 1139c4608..80d0c60f4 100644
--- a/roles/nvidia-mig-manager/defaults/main.yml
+++ b/roles/nvidia-mig-manager/defaults/main.yml
@@ -1,3 +1,3 @@
 ---
-mig_manager_url_deb: https://github.com/NVIDIA/mig-parted/releases/download/v0.4.2/nvidia-mig-manager_0.4.2-1_amd64.deb
-mig_manager_url_rpm: https://github.com/NVIDIA/mig-parted/releases/download/v0.4.2/nvidia-mig-manager-0.4.2-1.x86_64.rpm
+mig_manager_url_deb: https://github.com/NVIDIA/mig-parted/releases/download/v0.14.1/nvidia-mig-manager_0.14.1-1_amd64.deb
+mig_manager_url_rpm: https://github.com/NVIDIA/mig-parted/releases/download/v0.14.1/nvidia-mig-manager-0.14.1-1.x86_64.rpm
diff --git a/roles/nvidia-network-operator/tasks/main.yaml b/roles/nvidia-network-operator/tasks/main.yaml
index d2c9e409e..09e7fa8dc 100644
--- a/roles/nvidia-network-operator/tasks/main.yaml
+++ b/roles/nvidia-network-operator/tasks/main.yaml
@@ -4,7 +4,7 @@
 
 - name: label the nodes  # noqa command-instead-of-shell
   shell: kubectl label --overwrite nodes {{ item }} node-role.kubernetes.io/worker=
-  with_items: "{{ groups['kube-node'] }}"
+  with_items: "{{ groups['kube_node'] }}"
   changed_when: false
 
 ## required as the DeepOps openshift role doesn't work
@@ -20,7 +20,7 @@
 - name: Deploy network operator helm chart
   kubernetes.core.helm:
     name: network-operator
-    release_namespace: network-operator
+    release_namespace: "{{ nvidia_network_operator_namespace }}"
     chart_version: "{{ nvidia_network_operator_version }}"
     chart_ref: mellanox/network-operator
     create_namespace: true
@@ -28,7 +28,41 @@
     wait: true
     values: "{{ lookup('template', 'values.yaml') | from_yaml }}"
 
-- name: Create network node poliy
+- name: Create NicClusterPolicy
+  kubernetes.core.k8s:
+    state: present
+    definition: "{{ lookup('template', 'nicclusterpolicy.yaml') | from_yaml }}"
+  run_once: true
+
+- name: Wait for NVIDIA IPAM CRD
+  kubernetes.core.k8s_info:
+    api_version: apiextensions.k8s.io/v1
+    kind: CustomResourceDefinition
+    name: ippools.nv-ipam.nvidia.com
+  register: nvidia_ipam_crd
+  until: nvidia_ipam_crd.resources | length > 0
+  retries: 30
+  delay: 10
+  when: nvidia_network_operator_ipam_type == "nv-ipam"
+  run_once: true
+
+- name: Create NVIDIA IPAM pools
+  kubernetes.core.k8s:
+    state: present
+    definition:
+      apiVersion: nv-ipam.nvidia.com/v1alpha1
+      kind: IPPool
+      metadata:
+        name: "{{ item.ip_pool_name | default(item.res_name) }}"
+        namespace: "{{ nvidia_network_operator_namespace }}"
+      spec:
+        subnet: "{{ item.ip_addr }}"
+        perNodeBlockSize: "{{ nvidia_network_operator_ipam_per_node_block_size | int }}"
+  with_items: "{{ intf_resources }}"
+  when: nvidia_network_operator_ipam_type == "nv-ipam"
+  run_once: true
+
+- name: Create network node policy
   include_tasks: sriovnetworknodepolicy.yaml
   with_items: "{{ intf_resources }}"
 
diff --git a/roles/nvidia-network-operator/tasks/sriovibnetwork.yaml b/roles/nvidia-network-operator/tasks/sriovibnetwork.yaml
index fe6626fc7..ae08cde6b 100644
--- a/roles/nvidia-network-operator/tasks/sriovibnetwork.yaml
+++ b/roles/nvidia-network-operator/tasks/sriovibnetwork.yaml
@@ -9,14 +9,8 @@
       networkNamespace: "default"
       ipam: |
         {
-          "type": "whereabouts",
-          "datastore": "kubernetes",
-          "kubernetes": {
-            "kubeconfig": "/etc/cni/net.d/whereabouts.d/whereabouts.kubeconfig"
-           },
-          "range": "{{ item.ip_addr }}",
-          "log_file": "/var/log/whereabouts.log",
-          "log_level": "info"
+          "type": "{{ nvidia_network_operator_ipam_type }}",
+          "poolName": "{{ item.ip_pool_name | default(item.res_name) }}"
         }
   k8s:
     state: present
diff --git a/roles/nvidia-network-operator/tasks/sriovnetworknodepolicy.yaml b/roles/nvidia-network-operator/tasks/sriovnetworknodepolicy.yaml
index c41f16bcd..9cecfdbb1 100644
--- a/roles/nvidia-network-operator/tasks/sriovnetworknodepolicy.yaml
+++ b/roles/nvidia-network-operator/tasks/sriovnetworknodepolicy.yaml
@@ -7,7 +7,7 @@
       deviceType: netdevice
       mtu: {{ mtu |int }}
       nodeSelector:
-        feature.node.kubernetes.io/network-sriov.capable: "true"
+        feature.node.kubernetes.io/pci-{{ vendor_id }}.present: "true"
       nicSelector:
         vendor: "{{ vendor_id }}"
         pfNames: ["{{ item.pf_name }}"]
diff --git a/roles/nvidia-network-operator/templates/nicclusterpolicy.yaml b/roles/nvidia-network-operator/templates/nicclusterpolicy.yaml
new file mode 100644
index 000000000..81834316b
--- /dev/null
+++ b/roles/nvidia-network-operator/templates/nicclusterpolicy.yaml
@@ -0,0 +1,22 @@
+---
+apiVersion: mellanox.com/v1alpha1
+kind: NicClusterPolicy
+metadata:
+  name: nic-cluster-policy
+spec:
+  secondaryNetwork:
+    cniPlugins:
+      image: plugins
+      repository: nvcr.io/nvidia/mellanox
+      version: {{ nvidia_network_operator_image_tag }}
+    multus:
+      image: multus-cni
+      repository: nvcr.io/nvidia/mellanox
+      version: {{ nvidia_network_operator_image_tag }}
+{% if nvidia_network_operator_ipam_type == "nv-ipam" %}
+  nvIpam:
+    image: nvidia-k8s-ipam
+    repository: nvcr.io/nvidia/mellanox
+    version: {{ nvidia_network_operator_image_tag }}
+    enableWebhook: false
+{% endif %}
diff --git a/roles/nvidia-network-operator/templates/values.yaml b/roles/nvidia-network-operator/templates/values.yaml
index 9de7fbab2..4b63ac7b5 100644
--- a/roles/nvidia-network-operator/templates/values.yaml
+++ b/roles/nvidia-network-operator/templates/values.yaml
@@ -7,21 +7,3 @@ nfd:
   enabled: true
 sriovNetworkOperator:
   enabled: true
-
-# NicClusterPolicy CR values:
-deployCR: true
-ofedDriver:
-  deploy: false
-rdmaSharedDevicePlugin:
-  deploy: false
-sriovDevicePlugin:
-  deploy: false
-
-secondaryNetwork:
-  deploy: true
-  multus:
-    deploy: true
-  cniPlugins:
-    deploy: true
-  ipamPlugin:
-    deploy: true
diff --git a/roles/nvidia-network-operator/vars/main.yaml b/roles/nvidia-network-operator/vars/main.yaml
index 2d3322081..7be8ee253 100644
--- a/roles/nvidia-network-operator/vars/main.yaml
+++ b/roles/nvidia-network-operator/vars/main.yaml
@@ -6,8 +6,12 @@
 # if_name must match k8s network annotation name
 #
 
-nvidia_network_operator_version: "1.2.0"
-nvidia_network_operator_url: "https://mellanox.github.io/network-operator"
+nvidia_network_operator_version: "26.1.1"
+nvidia_network_operator_image_tag: "network-operator-v{{ nvidia_network_operator_version }}"
+nvidia_network_operator_namespace: "network-operator"
+nvidia_network_operator_ipam_type: "nv-ipam"
+nvidia_network_operator_ipam_per_node_block_size: "{{ num_vf }}"
+nvidia_network_operator_url: "https://helm.ngc.nvidia.com/nvidia"
 mpi_operator_version: "v2beta1"
 mpi_raw_url: "https://raw.githubusercontent.com/kubeflow/mpi-operator/master/deploy/v2beta1"
 
diff --git a/roles/prometheus-node-exporter/defaults/main.yml b/roles/prometheus-node-exporter/defaults/main.yml
index 9d6116c8e..925f4baee 100644
--- a/roles/prometheus-node-exporter/defaults/main.yml
+++ b/roles/prometheus-node-exporter/defaults/main.yml
@@ -1,4 +1,4 @@
-node_exporter_container: "quay.io/prometheus/node-exporter:v1.3.1"
+node_exporter_container: "quay.io/prometheus/node-exporter:v1.11.1"
 node_exporter_prom_dir: "/run/prometheus"
 node_exporter_svc_name: "docker.node-exporter.service"
 node_exporter_state: started
diff --git a/roles/prometheus/defaults/main.yml b/roles/prometheus/defaults/main.yml
index 6e7afbd21..7349da1b3 100644
--- a/roles/prometheus/defaults/main.yml
+++ b/roles/prometheus/defaults/main.yml
@@ -1,7 +1,7 @@
 prometheus_config_dir: /etc/prometheus
 prometheus_config_src: templates/prometheus.yml.j2
 prometheus_alert_rules_src: templates/alert_rules.yml.j2
-prometheus_container: "prom/prometheus:v2.37.0"
+prometheus_container: "prom/prometheus:v3.11.3"
 prometheus_svc_name: "docker.prometheus.service"
 prometheus_docker_volume_name: "deepops_prometheus_metrics"
 prometheus_state: started
diff --git a/roles/requirements.yml b/roles/requirements.yml
index b6a0795aa..77f97ef7b 100644
--- a/roles/requirements.yml
+++ b/roles/requirements.yml
@@ -56,9 +56,9 @@ roles:
 - src: robertdebock.kibana
   version: "1.2.6"
 
-- src: https://github.com/DeepOps/ansible-maas.git
+- src: https://github.com/mrlesmithjr/ansible-maas.git
   name: ansible-maas
-  version: '632fe9bd1e048b9abb717621dc2d76b19614327b'
+  version: '178a999c9bfc979ef32c42f4f59c034664df10d0'
 
 - src: https://github.com/DeepOps/ansible-role-chrony
   name: DeepOps.chrony
diff --git a/roles/slurm/defaults/main.yml b/roles/slurm/defaults/main.yml
index 4573301db..735ae16ee 100644
--- a/roles/slurm/defaults/main.yml
+++ b/roles/slurm/defaults/main.yml
@@ -7,7 +7,7 @@ hwloc_build_dir: /opt/deepops/build/hwloc
 pmix_build_dir: /opt/deepops/build/pmix
 
 slurm_workflow_build: yes
-slurm_version: "23.02.4"
+slurm_version: "25.11.6"
 slurm_src_url: "https://download.schedmd.com/slurm/slurm-{{ slurm_version }}.tar.bz2"
 slurm_build_make_clean: no
 slurm_build_dir_cleanup: no
diff --git a/roles/slurm/tasks/controller.yml b/roles/slurm/tasks/controller.yml
index 733e32823..8ebe48fac 100644
--- a/roles/slurm/tasks/controller.yml
+++ b/roles/slurm/tasks/controller.yml
@@ -149,16 +149,30 @@
 - name: create account
   command: sacctmgr -i add account compute-account Description="Compute Accounts" Organization="Prestige"
   register: create_account_result
-  failed_when: "create_account_result.rc != 0 and 'Nothing new added' not in create_account_result.stdout"
-  changed_when: "'Nothing new added' not in create_account_result.stdout"
+  failed_when: >
+    create_account_result.rc != 0 and
+    'Nothing new added' not in create_account_result.stdout and
+    'Nothing added' not in create_account_result.stdout and
+    'Already existing' not in create_account_result.stdout
+  changed_when: >
+    'Nothing new added' not in create_account_result.stdout and
+    'Nothing added' not in create_account_result.stdout and
+    'Already existing' not in create_account_result.stdout
   environment:
     PATH: '{{ slurm_install_prefix }}/bin:{{ ansible_env.PATH }}'
 
 - name: create users
   command: sacctmgr -i create user {{ item }} account=compute-account adminlevel=None
   register: create_user_result
-  failed_when: "create_user_result.rc != 0 and 'Nothing new added' not in create_user_result.stdout"
-  changed_when: "'Nothing new added' not in create_user_result.stdout"
+  failed_when: >
+    create_user_result.rc != 0 and
+    'Nothing new added' not in create_user_result.stdout and
+    'Nothing added' not in create_user_result.stdout and
+    'Already existing' not in create_user_result.stdout
+  changed_when: >
+    'Nothing new added' not in create_user_result.stdout and
+    'Nothing added' not in create_user_result.stdout and
+    'Already existing' not in create_user_result.stdout
   with_items:
     - "{{ user }}"
   environment:
diff --git a/roles/slurm/tasks/login-compute-setup.yml b/roles/slurm/tasks/login-compute-setup.yml
index 69c2769a9..2644c9bc3 100644
--- a/roles/slurm/tasks/login-compute-setup.yml
+++ b/roles/slurm/tasks/login-compute-setup.yml
@@ -9,6 +9,7 @@
       systemctl set-property sshd.service DeviceAllow="/dev/nvidiactl"
     fi
   args:
+    executable: /bin/bash
     creates: "{{ '/etc/systemd/system.control/sshd.service.d/50-DeviceAllow.conf' \
                  if ansible_os_family == 'RedHat' else \
                  '/etc/systemd/system.control/ssh.service.d/50-DeviceAllow.conf' }}"
diff --git a/roles/spack/defaults/main.yml b/roles/spack/defaults/main.yml
index 26540b781..76e356c63 100644
--- a/roles/spack/defaults/main.yml
+++ b/roles/spack/defaults/main.yml
@@ -1,7 +1,7 @@
 ---
 spack_repo: "https://github.com/spack/spack.git"
 spack_install_dir: "/sw/spack"
-spack_version: "v0.18.1"
+spack_version: "v1.1.1"
 spack_user: "root"
 spack_group: "root"
 
diff --git a/roles/standalone-container-registry/defaults/main.yml b/roles/standalone-container-registry/defaults/main.yml
index 383f6e9e4..562524707 100644
--- a/roles/standalone-container-registry/defaults/main.yml
+++ b/roles/standalone-container-registry/defaults/main.yml
@@ -2,7 +2,7 @@
 epel_package: "https://dl.fedoraproject.org/pub/epel/epel-release-latest-{{ ansible_distribution_major_version }}.noarch.rpm"
 epel_key_url: "https://dl.fedoraproject.org/pub/epel/RPM-GPG-KEY-EPEL-{{ ansible_distribution_major_version }}"
 
-standalone_container_registry_image: "registry:2.8"
+standalone_container_registry_image: "registry:3.1.1"
 standalone_container_registry_port: "5000"
 standalone_container_registry_name: "deepops-registry"
 
diff --git a/scripts/k8s/deploy_ingress.sh b/scripts/k8s/deploy_ingress.sh
index f311f4d84..b1ad2143e 100755
--- a/scripts/k8s/deploy_ingress.sh
+++ b/scripts/k8s/deploy_ingress.sh
@@ -9,7 +9,7 @@ ROOT_DIR="${SCRIPT_DIR}/../.."
 source ${ROOT_DIR}/scripts/common.sh
 
 HELM_CHARTS_REPO_INGRESS="${HELM_CHARTS_REPO_INGRESS:-https://kubernetes.github.io/ingress-nginx}"
-HELM_INGRESS_CHART_VERSION="${HELM_INGRESS_CHART_VERSION:-4.2.1}"
+HELM_INGRESS_CHART_VERSION="${HELM_INGRESS_CHART_VERSION:-4.15.1}"
 # HELM_INGRESS_CONFIG, defaults below based on presence of metallb
 
 ${SCRIPT_DIR}/k8s/install_helm.sh
diff --git a/scripts/k8s/deploy_monitoring.sh b/scripts/k8s/deploy_monitoring.sh
index d73fdc5f5..05a5851a7 100755
--- a/scripts/k8s/deploy_monitoring.sh
+++ b/scripts/k8s/deploy_monitoring.sh
@@ -24,7 +24,7 @@ if [ ! -d "${DEEPOPS_CONFIG_DIR}" ]; then
 fi
 
 HELM_CHARTS_REPO_PROMETHEUS="${HELM_CHARTS_REPO_PROMETHEUS:-https://prometheus-community.github.io/helm-charts}"
-HELM_PROMETHEUS_CHART_VERSION="${HELM_PROMETHEUS_CHART_VERSION:-39.5.0}"
+HELM_PROMETHEUS_CHART_VERSION="${HELM_PROMETHEUS_CHART_VERSION:-85.0.3}"
 ingress_name="ingress-nginx"
 
 PROMETHEUS_YAML_CONFIG="${PROMETHEUS_YAML_CONFIG:-${DEEPOPS_CONFIG_DIR}/helm/monitoring.yml}"
diff --git a/scripts/k8s/install_helm.sh b/scripts/k8s/install_helm.sh
index 1efd217de..b3c2bb4e7 100755
--- a/scripts/k8s/install_helm.sh
+++ b/scripts/k8s/install_helm.sh
@@ -35,7 +35,7 @@ case "$ID" in
         ;;
 esac
 
-helm_version=$(helm version --short)
+helm_version=$(helm version --short 2>/dev/null || true)
 helm_min_installed=$(echo -e "${HELM_MINIMUM_VERSION}\n${helm_version}"| sort -V | head -n 1)
 if [ "${HELM_MINIMUM_VERSION}" != "${helm_min_installed}" ]; then
     if [ "${helm_version}" != "" ]; then
@@ -49,5 +49,7 @@ if [ "${HELM_MINIMUM_VERSION}" != "${helm_min_installed}" ]; then
     HELM_INSTALL_DIR=${HELM_INSTALL_DIR} DESIRED_VERSION=v3.17.1 /var/tmp/get_helm.sh # Should match: config/group_vars/k8s-cluster.yml:helm_version:
 fi
 
+sudo chmod 0755 "${HELM_INSTALL_DIR}/helm"
+
 # Display the helm version for better debug
 helm version
diff --git a/submodules/kubespray b/submodules/kubespray
index f4ccdb5e7..1c9add489 160000
--- a/submodules/kubespray
+++ b/submodules/kubespray
@@ -1 +1 @@
-Subproject commit f4ccdb5e72395eaf9f3444056ebd1a6625ddb89a
+Subproject commit 1c9add48975060f45396b34d8e022c30d7f80dab
diff --git a/virtual/vars_files/virt_k8s.yml b/virtual/vars_files/virt_k8s.yml
index c393e9a3e..089f52add 100644
--- a/virtual/vars_files/virt_k8s.yml
+++ b/virtual/vars_files/virt_k8s.yml
@@ -1,3 +1,3 @@
 ---
 container_registry_persistence_enabled: false
-rsyslog_client_tcp_host: "{{ groups['kube-master'][0] }}"
+rsyslog_client_tcp_host: "{{ groups['kube_control_plane'][0] }}"
diff --git a/virtual/virtual_inventory b/virtual/virtual_inventory
index 71e0b72ba..d5a39bb8d 100644
--- a/virtual/virtual_inventory
+++ b/virtual/virtual_inventory
@@ -15,19 +15,19 @@ virtual-gpu01 ansible_host=10.0.0.6 ip=10.0.0.6
 ######
 # KUBERNETES
 ######
-[kube-master]
+[kube_control_plane]
 virtual-mgmt01
 
 [etcd]
 virtual-mgmt01
 
-[kube-node]
+[kube_node]
 virtual-mgmt01
 virtual-gpu01
 
-[k8s-cluster:children]
-kube-master 	 
-kube-node 	 
+[k8s_cluster:children]
+kube_control_plane
+kube_node
 
 ######
 # SLURM
diff --git a/virtual/virtual_inventory_full b/virtual/virtual_inventory_full
index f639e0e78..1f1c70b27 100644
--- a/virtual/virtual_inventory_full
+++ b/virtual/virtual_inventory_full
@@ -19,7 +19,7 @@ virtual-gpu02 ansible_host=10.0.0.7 ip=10.0.0.7
 ######
 # KUBERNETES
 ######
-[kube-master]
+[kube_control_plane]
 virtual-mgmt01
 virtual-mgmt02
 virtual-mgmt03
@@ -29,16 +29,16 @@ virtual-mgmt01
 virtual-mgmt02
 virtual-mgmt03
 
-[kube-node]
+[kube_node]
 virtual-mgmt01
 virtual-mgmt02
 virtual-mgmt03
 virtual-gpu01
 virtual-gpu02
 
-[k8s-cluster:children]
-kube-master 	 
-kube-node 	 
+[k8s_cluster:children]
+kube_control_plane
+kube_node
 
 ######
 # SLURM