diff --git a/parts/linux/cloud-init/artifacts/cse_config.sh b/parts/linux/cloud-init/artifacts/cse_config.sh index 614bd74a054..9f82a9e2819 100755 --- a/parts/linux/cloud-init/artifacts/cse_config.sh +++ b/parts/linux/cloud-init/artifacts/cse_config.sh @@ -999,12 +999,44 @@ configAzurePolicyAddon() { sed -i "s||/subscriptions/$SUBSCRIPTION_ID/resourceGroups/$RESOURCE_GROUP|g" $AZURE_POLICY_ADDON_FILE } +gpuPrebuiltModuleMatches() { + # Returns 0 only when the VHD baked a kernel module that exactly matches what CSE is about + # to install: same running kernel, same driver version + kind, and the same driver image + # tag. Any mismatch (kernel drift, newer driver from CRP, GRID SKU, older VHD without the + # marker) returns non-zero so configGPUDrivers falls back to the full build. The fast path + # is therefore purely an optimization -- correctness never depends on it. + local marker_file="${GPU_PREBUILT_MARKER_FILE:-/opt/azure/aks-gpu/dkms-marker}" + [ -f "$marker_file" ] || return 1 + + local m_kernel m_version m_kind m_image + m_kernel=$(awk -F= '/^kernel=/{print $2; exit}' "$marker_file") + m_version=$(awk -F= '/^driver_version=/{print $2; exit}' "$marker_file") + m_kind=$(awk -F= '/^driver_kind=/{print $2; exit}' "$marker_file") + m_image=$(awk -F= '/^image_tag=/{print $2; exit}' "$marker_file") + + [ -n "$m_kernel" ] && [ "$m_kernel" = "$(uname -r)" ] || return 1 + [ -n "$m_version" ] && [ "$m_version" = "$GPU_DV" ] || return 1 + [ -n "$m_kind" ] && [ "$m_kind" = "$NVIDIA_GPU_DRIVER_TYPE" ] || return 1 + [ -n "$m_image" ] && [ "$m_image" = "$NVIDIA_DRIVER_IMAGE_TAG" ] || return 1 + + # The compiled module must actually resolve for the running kernel. + modinfo -k "$(uname -r)" nvidia >/dev/null 2>&1 || return 1 + return 0 +} + configGPUDrivers() { if [ "$OS" = "$UBUNTU_OS_NAME" ]; then waitForContainerdReady || exit $ERR_GPU_DRIVERS_START_FAIL mkdir -p /opt/{actions,gpu} + # Fast path: when the VHD baked a kernel module matching this exact kernel + driver + + # image, skip the expensive boot-time DKMS compile and only run the device-init steps. + local gpu_install_action="install" + if gpuPrebuiltModuleMatches; then + echo "Prebuilt GPU kernel module matches running kernel/driver/image; using skip-build fast path" + gpu_install_action="install-skip-build" + fi ctr -n k8s.io image pull $NVIDIA_DRIVER_IMAGE:$NVIDIA_DRIVER_IMAGE_TAG - retrycmd_if_failure 5 10 600 bash -c "$CTR_GPU_INSTALL_CMD $NVIDIA_DRIVER_IMAGE:$NVIDIA_DRIVER_IMAGE_TAG gpuinstall /entrypoint.sh install" + retrycmd_if_failure 5 10 600 bash -c "$CTR_GPU_INSTALL_CMD $NVIDIA_DRIVER_IMAGE:$NVIDIA_DRIVER_IMAGE_TAG gpuinstall /entrypoint.sh ${gpu_install_action}" ret=$? if [ "$ret" -ne 0 ]; then echo "Failed to install GPU driver, exiting..." diff --git a/spec/parts/linux/cloud-init/artifacts/cse_config_spec.sh b/spec/parts/linux/cloud-init/artifacts/cse_config_spec.sh index 5051528c554..27fecb2914e 100755 --- a/spec/parts/linux/cloud-init/artifacts/cse_config_spec.sh +++ b/spec/parts/linux/cloud-init/artifacts/cse_config_spec.sh @@ -1678,4 +1678,68 @@ SETUP_EOF The output should include "rm -f /opt/azure/containers/managed-gpu-experience.enabled" End End + + Describe 'gpuPrebuiltModuleMatches' + setup() { + GPU_PREBUILT_MARKER_FILE="$(mktemp)" + GPU_DV="535.230.02" + NVIDIA_GPU_DRIVER_TYPE="cuda" + NVIDIA_DRIVER_IMAGE_TAG="535.230.02-abc123" + } + cleanup() { + rm -f "$GPU_PREBUILT_MARKER_FILE" + } + BeforeEach 'setup' + AfterEach 'cleanup' + + uname() { echo "5.15.0-1078-azure"; } + modinfo() { return 0; } + + write_marker() { + cat > "$GPU_PREBUILT_MARKER_FILE" <&2 + exit 1 + fi + if [ "$running" != "$newest" ]; then + echo "Error: running kernel ($running) is not the newest installed kernel ($newest); a GPU module prebuilt now would not match the kernel nodes boot. Run prebuildGPUKernelModule after the final kernel is installed and the builder has rebooted into it." >&2 + exit 1 + fi + # The DKMS build needs the matching kernel headers on the builder. + if [ ! -d "/lib/modules/${running}/build" ]; then + echo "Error: kernel headers for ${running} not found (/lib/modules/${running}/build missing); cannot prebuild the GPU kernel module." >&2 + exit 1 + fi + + echo "Prebuilding GPU kernel module into VHD from ${ref} for kernel ${running}" + mkdir -p /opt/{actions,gpu} + rm -f "$marker" + + # image-fetcher already imported the image into the k8s.io containerd namespace. + if ! retrycmd_if_failure 3 10 1200 bash -c "ctr -n k8s.io run --privileged --rm --net-host --with-ns pid:/proc/1/ns/pid --mount type=bind,src=/opt/gpu,dst=/mnt/gpu,options=rbind --mount type=bind,src=/opt/actions,dst=/mnt/actions,options=rbind ${ref} buildgpu /entrypoint.sh build-only"; then + echo "Error: GPU kernel module prebuild (build-only) failed for ${ref}" >&2 + exit 1 + fi + + if [ ! -f "$marker" ]; then + echo "Error: expected GPU prebuild marker ${marker} not found after build-only run" >&2 + exit 1 + fi + + # Bind the baked module to this exact driver image so CSE only fast-paths on an exact match. + { + echo "image_tag=${tag}" + echo "image=${ref}" + } >> "$marker" + + echo "GPU kernel module prebuilt into VHD:" >> ${VHD_LOGS_FILEPATH} + sed 's/^/ - /' "$marker" >> ${VHD_LOGS_FILEPATH} +} + # For Ubuntu, pre-pull the CUDA driver image if [ $OS = $UBUNTU_OS_NAME ] && [ "$(isARM64)" -ne 1 ]; then # No ARM64 SKU with GPU now gpu_action="copy" @@ -718,6 +775,16 @@ if [ $OS = $UBUNTU_OS_NAME ] && [ "$(isARM64)" -ne 1 ]; then # No ARM64 SKU wit cat << EOF >> ${VHD_LOGS_FILEPATH} - nvidia-cuda-driver=${NVIDIA_DRIVER_IMAGE_TAG} EOF + + # Optionally pre-compile the NVIDIA kernel module into the VHD so that node provisioning can + # skip the expensive boot-time DKMS build. Scoped to the most common GPU SKU (Ubuntu 22.04 + # amd64, CUDA driver). The module is bound to the shipped kernel; CSE only takes the fast + # path when the kernel, driver version/kind, and image tag all match, otherwise it falls back + # to a full build at boot. Gated behind PREBUILD_GPU_KERNEL_MODULE so it is only attempted on + # VHDs whose aks-gpu image supports the build-only action. + if [ "${PREBUILD_GPU_KERNEL_MODULE:-false}" = "true" ] && [ "${UBUNTU_RELEASE}" = "22.04" ]; then + prebuildGPUKernelModule "$NVIDIA_DRIVER_IMAGE" "$NVIDIA_DRIVER_IMAGE_TAG" + fi fi if grep -q "NVIDIA_GB" <<< "$FEATURE_FLAGS"; then