From c2468984f9200210d97fd2177a0fdffe3f474939 Mon Sep 17 00:00:00 2001 From: "xinhao.huang" Date: Tue, 24 Mar 2026 16:56:26 +0800 Subject: [PATCH] [sdk]: add virtualization metadata for pci devices and dGPU scheduling DBImpact Introduce pci virtualization metadata with virtState, virtMode and persisted virtCapabilities. Backfill existing data in V5.5.12 and expose the new fields through SDK inventories. Normalize capability/mode derivation during PCI sync, keep upgrade retry idempotent, and avoid stale or inconsistent virtMode capability states. Use the new metadata in dGPU capability filtering/allocation flows and add coverage for PCI sync and dGPU cases. Resolves: ZSTAC-83477 Change-Id: I71736e666272737978676a74656d767a6a786c72 --- conf/db/upgrade/V5.5.12__schema.sql | 89 +++++++++++++++++++ sdk/src/main/java/SourceClassMap.java | 4 + .../org/zstack/sdk/GpuDeviceInventory.java | 8 ++ .../org/zstack/sdk/PciDeviceInventory.java | 26 ++++++ .../org/zstack/sdk/PciDeviceVirtMode.java | 8 ++ .../org/zstack/sdk/PciDeviceVirtState.java | 8 ++ 6 files changed, 143 insertions(+) create mode 100644 sdk/src/main/java/org/zstack/sdk/PciDeviceVirtMode.java create mode 100644 sdk/src/main/java/org/zstack/sdk/PciDeviceVirtState.java diff --git a/conf/db/upgrade/V5.5.12__schema.sql b/conf/db/upgrade/V5.5.12__schema.sql index 0a22c919a72..0049785ab06 100644 --- a/conf/db/upgrade/V5.5.12__schema.sql +++ b/conf/db/upgrade/V5.5.12__schema.sql @@ -32,6 +32,95 @@ WHERE `opaque` IS NOT NULL AND Json_getKeyValue(`opaque`, 'end_time') IS NOT NULL AND Json_getKeyValue(`opaque`, 'end_time') != ''; +-- PCI virtualization capability metadata + +CREATE TABLE IF NOT EXISTS `zstack`.`PciDeviceVirtCapabilityVO` ( + `id` BIGINT UNSIGNED NOT NULL AUTO_INCREMENT, + `pciDeviceUuid` VARCHAR(32) NOT NULL, + `capability` VARCHAR(32) NOT NULL, + `createDate` TIMESTAMP NOT NULL, + `lastOpDate` TIMESTAMP NOT NULL, + PRIMARY KEY (`id`), + UNIQUE KEY `uk_pci_device_virt_capability` (`pciDeviceUuid`, `capability`), + KEY `idx_pci_device_virt_capability_pci` (`pciDeviceUuid`), + CONSTRAINT `fk_pci_device_virt_capability_pci` + FOREIGN KEY (`pciDeviceUuid`) REFERENCES `zstack`.`PciDeviceVO`(`uuid`) ON DELETE CASCADE +) ENGINE=InnoDB DEFAULT CHARSET=utf8; + +CALL ADD_COLUMN('PciDeviceVO', 'virtState', 'varchar(32)', 1, NULL); + +UPDATE `zstack`.`PciDeviceVO` +SET `virtState` = + CASE + WHEN `virtStatus` IN ('SRIOV_VIRTUALIZABLE', 'VFIO_MDEV_VIRTUALIZABLE', 'TENSORFUSION_VIRTUALIZABLE') THEN 'VIRTUALIZABLE' + WHEN `virtStatus` IN ('SRIOV_VIRTUALIZED', 'VFIO_MDEV_VIRTUALIZED', 'VIRTUALIZED_BYPASS_ZSTACK', + 'HAMI_VIRTUALIZED', 'TENSORFUSION_VIRTUALIZED') THEN 'VIRTUALIZED' + WHEN `virtStatus` = 'SRIOV_VIRTUAL' THEN 'VIRTUAL' + ELSE 'UNVIRTUALIZABLE' + END +WHERE `virtState` IS NULL; + +INSERT IGNORE INTO `zstack`.`PciDeviceVirtCapabilityVO` + (`pciDeviceUuid`, `capability`, `createDate`, `lastOpDate`) +SELECT `uuid`, 'SRIOV', NOW(), NOW() +FROM `zstack`.`PciDeviceVO` +WHERE `virtStatus` IN ('SRIOV_VIRTUALIZABLE', 'SRIOV_VIRTUALIZED'); + +INSERT IGNORE INTO `zstack`.`PciDeviceVirtCapabilityVO` + (`pciDeviceUuid`, `capability`, `createDate`, `lastOpDate`) +SELECT `uuid`, 'VFIO_MDEV', NOW(), NOW() +FROM `zstack`.`PciDeviceVO` +WHERE `virtStatus` IN ('VFIO_MDEV_VIRTUALIZABLE', 'VFIO_MDEV_VIRTUALIZED', 'VIRTUALIZED_BYPASS_ZSTACK'); + +INSERT IGNORE INTO `zstack`.`PciDeviceVirtCapabilityVO` + (`pciDeviceUuid`, `capability`, `createDate`, `lastOpDate`) +SELECT `uuid`, 'TENSORFUSION', NOW(), NOW() +FROM `zstack`.`PciDeviceVO` +WHERE `virtStatus` IN ('TENSORFUSION_VIRTUALIZABLE', 'TENSORFUSION_VIRTUALIZED'); + +INSERT IGNORE INTO `zstack`.`PciDeviceVirtCapabilityVO` + (`pciDeviceUuid`, `capability`, `createDate`, `lastOpDate`) +SELECT `uuid`, 'HAMI', NOW(), NOW() +FROM `zstack`.`PciDeviceVO` +WHERE `virtStatus` = 'HAMI_VIRTUALIZED'; + +CALL ADD_COLUMN('PciDeviceVO', 'virtMode', 'varchar(32)', 1, NULL); + +UPDATE `zstack`.`PciDeviceVO` +SET `virtMode` = + CASE + WHEN `virtStatus` IN ('SRIOV_VIRTUALIZED') THEN 'SRIOV' + WHEN `virtStatus` = 'SRIOV_VIRTUAL' THEN 'SRIOV' + WHEN `virtStatus` IN ('VFIO_MDEV_VIRTUALIZED', 'VIRTUALIZED_BYPASS_ZSTACK') THEN 'VFIO_MDEV' + WHEN `virtStatus` = 'TENSORFUSION_VIRTUALIZED' THEN 'TENSORFUSION' + WHEN `virtStatus` = 'HAMI_VIRTUALIZED' THEN 'HAMI' + ELSE `virtMode` + END +WHERE `virtStatus` IN ( + 'SRIOV_VIRTUALIZED', 'SRIOV_VIRTUAL', + 'VFIO_MDEV_VIRTUALIZED', 'VIRTUALIZED_BYPASS_ZSTACK', + 'TENSORFUSION_VIRTUALIZED', 'HAMI_VIRTUALIZED' +); + +CALL ADD_COLUMN('GpuDeviceVO', 'mode', 'varchar(32)', 1, NULL); +CALL CREATE_INDEX('GpuDeviceVO', 'idx_gpu_device_mode', 'mode'); + +UPDATE `zstack`.`GpuDeviceVO` g +INNER JOIN `zstack`.`PciDeviceVO` p ON g.`uuid` = p.`uuid` +SET g.`mode` = CASE + WHEN p.`virtState` = 'VIRTUALIZED' AND p.`virtMode` = 'TENSORFUSION' THEN 'DGPU' + WHEN p.`virtState` = 'VIRTUALIZED' AND p.`virtMode` IN ('VFIO_MDEV', 'SRIOV') THEN 'VGPU' + ELSE 'PCI' +END; + +UPDATE `zstack`.`GpuDeviceVO` g +INNER JOIN `zstack`.`PciDeviceVO` p ON g.`uuid` = p.`uuid` +SET g.`allocateStatus` = CASE + WHEN p.`vmInstanceUuid` IS NOT NULL THEN 'Allocated' + WHEN p.`virtState` = 'VIRTUALIZED' AND p.`virtMode` IS NOT NULL THEN 'Unallocatable' + ELSE 'Unallocated' +END; + -- dGPU (TensorFusion) support tables CREATE TABLE IF NOT EXISTS `zstack`.`DGpuProfileVO` ( diff --git a/sdk/src/main/java/SourceClassMap.java b/sdk/src/main/java/SourceClassMap.java index c716b948589..13595622f07 100644 --- a/sdk/src/main/java/SourceClassMap.java +++ b/sdk/src/main/java/SourceClassMap.java @@ -649,6 +649,8 @@ public class SourceClassMap { put("org.zstack.pciDevice.specification.pci.PciDeviceSpecInventory", "org.zstack.sdk.PciDeviceSpecInventory"); put("org.zstack.pciDevice.specification.pci.PciDeviceSpecState", "org.zstack.sdk.PciDeviceSpecState"); put("org.zstack.pciDevice.specification.pci.VmInstancePciDeviceSpecRefInventory", "org.zstack.sdk.VmInstancePciDeviceSpecRefInventory"); + put("org.zstack.pciDevice.virtual.PciDeviceVirtMode", "org.zstack.sdk.PciDeviceVirtMode"); + put("org.zstack.pciDevice.virtual.PciDeviceVirtState", "org.zstack.sdk.PciDeviceVirtState"); put("org.zstack.pciDevice.virtual.PciDeviceVirtStatus", "org.zstack.sdk.PciDeviceVirtStatus"); put("org.zstack.pciDevice.virtual.vfio_mdev.MdevDeviceChooser", "org.zstack.sdk.MdevDeviceChooser"); put("org.zstack.pciDevice.virtual.vfio_mdev.MdevDeviceInventory", "org.zstack.sdk.MdevDeviceInventory"); @@ -1337,6 +1339,8 @@ public class SourceClassMap { put("org.zstack.sdk.PciDeviceState", "org.zstack.pciDevice.PciDeviceState"); put("org.zstack.sdk.PciDeviceStatus", "org.zstack.pciDevice.PciDeviceStatus"); put("org.zstack.sdk.PciDeviceType", "org.zstack.pciDevice.PciDeviceType"); + put("org.zstack.sdk.PciDeviceVirtMode", "org.zstack.pciDevice.virtual.PciDeviceVirtMode"); + put("org.zstack.sdk.PciDeviceVirtState", "org.zstack.pciDevice.virtual.PciDeviceVirtState"); put("org.zstack.sdk.PciDeviceVirtStatus", "org.zstack.pciDevice.virtual.PciDeviceVirtStatus"); put("org.zstack.sdk.PendingTaskInfo", "org.zstack.header.core.progress.PendingTaskInfo"); put("org.zstack.sdk.PhysicalDriveSmartSelfTestHistoryInventory", "org.zstack.storage.device.localRaid.PhysicalDriveSmartSelfTestHistoryInventory"); diff --git a/sdk/src/main/java/org/zstack/sdk/GpuDeviceInventory.java b/sdk/src/main/java/org/zstack/sdk/GpuDeviceInventory.java index 587c9cbeae3..0a47629a7b3 100644 --- a/sdk/src/main/java/org/zstack/sdk/GpuDeviceInventory.java +++ b/sdk/src/main/java/org/zstack/sdk/GpuDeviceInventory.java @@ -60,4 +60,12 @@ public GpuAllocateStatus getAllocateStatus() { return this.allocateStatus; } + public java.lang.String mode; + public void setMode(java.lang.String mode) { + this.mode = mode; + } + public java.lang.String getMode() { + return this.mode; + } + } diff --git a/sdk/src/main/java/org/zstack/sdk/PciDeviceInventory.java b/sdk/src/main/java/org/zstack/sdk/PciDeviceInventory.java index a695b6c1b26..b3d7728f91e 100644 --- a/sdk/src/main/java/org/zstack/sdk/PciDeviceInventory.java +++ b/sdk/src/main/java/org/zstack/sdk/PciDeviceInventory.java @@ -4,6 +4,8 @@ import org.zstack.sdk.PciDeviceState; import org.zstack.sdk.PciDeviceStatus; import org.zstack.sdk.PciDeviceVirtStatus; +import org.zstack.sdk.PciDeviceVirtState; +import org.zstack.sdk.PciDeviceVirtMode; import org.zstack.sdk.PciDeviceChooser; import org.zstack.sdk.PciDeviceMetaData; @@ -97,6 +99,30 @@ public PciDeviceVirtStatus getVirtStatus() { return this.virtStatus; } + public PciDeviceVirtState virtState; + public void setVirtState(PciDeviceVirtState virtState) { + this.virtState = virtState; + } + public PciDeviceVirtState getVirtState() { + return this.virtState; + } + + public java.util.List virtCapabilities; + public void setVirtCapabilities(java.util.List virtCapabilities) { + this.virtCapabilities = virtCapabilities; + } + public java.util.List getVirtCapabilities() { + return this.virtCapabilities; + } + + public PciDeviceVirtMode virtMode; + public void setVirtMode(PciDeviceVirtMode virtMode) { + this.virtMode = virtMode; + } + public PciDeviceVirtMode getVirtMode() { + return this.virtMode; + } + public PciDeviceChooser chooser; public void setChooser(PciDeviceChooser chooser) { this.chooser = chooser; diff --git a/sdk/src/main/java/org/zstack/sdk/PciDeviceVirtMode.java b/sdk/src/main/java/org/zstack/sdk/PciDeviceVirtMode.java new file mode 100644 index 00000000000..cbee53defa5 --- /dev/null +++ b/sdk/src/main/java/org/zstack/sdk/PciDeviceVirtMode.java @@ -0,0 +1,8 @@ +package org.zstack.sdk; + +public enum PciDeviceVirtMode { + SRIOV, + VFIO_MDEV, + TENSORFUSION, + HAMI, +} diff --git a/sdk/src/main/java/org/zstack/sdk/PciDeviceVirtState.java b/sdk/src/main/java/org/zstack/sdk/PciDeviceVirtState.java new file mode 100644 index 00000000000..7a56eabf9c0 --- /dev/null +++ b/sdk/src/main/java/org/zstack/sdk/PciDeviceVirtState.java @@ -0,0 +1,8 @@ +package org.zstack.sdk; + +public enum PciDeviceVirtState { + UNVIRTUALIZABLE, + VIRTUALIZABLE, + VIRTUALIZED, + VIRTUAL, +}