From 45059356f5087331d0fc82afd9c50996312e8b9a Mon Sep 17 00:00:00 2001 From: "xinhao.huang" Date: Tue, 19 May 2026 14:15:46 +0800 Subject: [PATCH] [gpu]: add GPU XID error event alarm Resolves: ZSTAC-85055 Change-Id: Ifc3701d5052af98f6f76054890acd4d27edfb90d --- .../header/host/HostCanonicalEvents.java | 41 +++++++++++++++++++ .../org/zstack/header/host/HostHardware.java | 1 + .../zstack/header/vm/VmCanonicalEvents.java | 41 +++++++++++++++++++ .../java/org/zstack/kvm/KVMAgentCommands.java | 39 ++++++++++++++++++ .../main/java/org/zstack/kvm/KVMConstant.java | 1 + .../java/org/zstack/kvm/KVMHostFactory.java | 5 +++ 6 files changed, 128 insertions(+) diff --git a/header/src/main/java/org/zstack/header/host/HostCanonicalEvents.java b/header/src/main/java/org/zstack/header/host/HostCanonicalEvents.java index cca0e26567d..2e4998c964e 100755 --- a/header/src/main/java/org/zstack/header/host/HostCanonicalEvents.java +++ b/header/src/main/java/org/zstack/header/host/HostCanonicalEvents.java @@ -26,6 +26,7 @@ public class HostCanonicalEvents { public static final String HOST_PHYSICAL_POWER_SUPPLY_STATUS_ABNORMAL = "/host/physicalPowerSupply/status/abnormal"; public static final String HOST_PHYSICAL_GPU_REMOVE_TRIGGERED = "/host/physicalGpu/remove/triggered"; public static final String HOST_PHYSICAL_GPU_STATUS_ABNORMAL = "/host/physicalGpu/status/abnormal"; + public static final String HOST_PHYSICAL_GPU_XID_ERROR = "/host/physicalGpu/xid/error"; public static final String HOST_PHYSICAL_VGPU_STATUS_ABNORMAL = "/host/physicalVGpu/status/abnormal"; public static final String HOST_PHYSICAL_RAID_STATUS_ABNORMAL = "/host/physicalRaid/status/abnormal"; public static final String HOST_PHYSICAL_HBA_STATE_ABNORMAL = "/host/physicalHBA/state/abnormal"; @@ -178,6 +179,46 @@ public void setStatus(String status) { } } + @NeedJsonSchema + public static class HostPhysicalGpuXidErrorData { + private String hostUuid; + private String pcideviceAddress; + private String xidCode; + private String message; + + public String getHostUuid() { + return hostUuid; + } + + public void setHostUuid(String hostUuid) { + this.hostUuid = hostUuid; + } + + public String getPcideviceAddress() { + return pcideviceAddress; + } + + public void setPcideviceAddress(String pcideviceAddress) { + this.pcideviceAddress = pcideviceAddress; + } + + public String getXidCode() { + return xidCode; + } + + public void setXidCode(String xidCode) { + this.xidCode = xidCode; + } + + public String getMessage() { + return message; + } + + public void setMessage(String message) { + this.message = message; + } + } + @NeedJsonSchema public static class HostPhysicalCpuStatusAbnormalData { private String hostUuid; diff --git a/header/src/main/java/org/zstack/header/host/HostHardware.java b/header/src/main/java/org/zstack/header/host/HostHardware.java index 7b5464eb26a..6043d777253 100644 --- a/header/src/main/java/org/zstack/header/host/HostHardware.java +++ b/header/src/main/java/org/zstack/header/host/HostHardware.java @@ -9,6 +9,7 @@ public enum HostHardware { MEMORY, DISK, GPU, + GPU_XID, POWERSUPPLY, FAN, RAID, diff --git a/header/src/main/java/org/zstack/header/vm/VmCanonicalEvents.java b/header/src/main/java/org/zstack/header/vm/VmCanonicalEvents.java index e6fe6a92272..03b7758d4ea 100755 --- a/header/src/main/java/org/zstack/header/vm/VmCanonicalEvents.java +++ b/header/src/main/java/org/zstack/header/vm/VmCanonicalEvents.java @@ -20,6 +20,7 @@ public class VmCanonicalEvents { public static final String VM_NIC_INFO_DUPLICATE_PATH = "/vm/nicinfo/duplicate"; public static final String VM_NIC_INFO_IPRANGE_CONFLICT_PATH = "/vm/nicinfo/iprangeConflict"; public static final String VM_GPU_STATUS_ABNORMAL = "/vm/gpu/status/abnormal"; + public static final String VM_GPU_XID_ERROR = "/vm/gpu/xid/error"; @NeedJsonSchema public static class VmCrashReportData { @@ -349,4 +350,44 @@ public void setStatus(String status) { this.status = status; } } + + @NeedJsonSchema + public static class VmGpuXidErrorData { + private String vmUuid; + private String pciDeviceAddress; + private String xidCode; + private String message; + + public String getVmUuid() { + return vmUuid; + } + + public void setVmUuid(String vmUuid) { + this.vmUuid = vmUuid; + } + + public String getPciDeviceAddress() { + return pciDeviceAddress; + } + + public void setPciDeviceAddress(String pciDeviceAddress) { + this.pciDeviceAddress = pciDeviceAddress; + } + + public String getXidCode() { + return xidCode; + } + + public void setXidCode(String xidCode) { + this.xidCode = xidCode; + } + + public String getMessage() { + return message; + } + + public void setMessage(String message) { + this.message = message; + } + } } diff --git a/plugin/kvm/src/main/java/org/zstack/kvm/KVMAgentCommands.java b/plugin/kvm/src/main/java/org/zstack/kvm/KVMAgentCommands.java index 90e576cad7f..ce091ec46d4 100755 --- a/plugin/kvm/src/main/java/org/zstack/kvm/KVMAgentCommands.java +++ b/plugin/kvm/src/main/java/org/zstack/kvm/KVMAgentCommands.java @@ -5147,6 +5147,45 @@ public void setAdditionalProperties(Map additionalProperties) { } } + public static class VmEventAlarmCmd { + private String hostUuid; + private String vmUuid; + private String eventType; + private Map properties = new HashMap<>(); + + public String getHostUuid() { + return hostUuid; + } + + public void setHostUuid(String hostUuid) { + this.hostUuid = hostUuid; + } + + public String getVmUuid() { + return vmUuid; + } + + public void setVmUuid(String vmUuid) { + this.vmUuid = vmUuid; + } + + public String getEventType() { + return eventType; + } + + public void setEventType(String eventType) { + this.eventType = eventType; + } + + public Map getProperties() { + return properties; + } + + public void setProperties(Map properties) { + this.properties = properties; + } + } + public static class HostProcessPhysicalMemoryUsageAlarmCmd { private String hostUuid; private String pid; diff --git a/plugin/kvm/src/main/java/org/zstack/kvm/KVMConstant.java b/plugin/kvm/src/main/java/org/zstack/kvm/KVMConstant.java index 314ff983470..eabfae13ec9 100755 --- a/plugin/kvm/src/main/java/org/zstack/kvm/KVMConstant.java +++ b/plugin/kvm/src/main/java/org/zstack/kvm/KVMConstant.java @@ -145,6 +145,7 @@ public interface KVMConstant { String HOST_PHYSICAL_DISK_REMOVE_ALARM_EVENT = "/host/physical/disk/remove/alarm"; String HOST_PHYSICAL_MEMORY_ECC_ERROR_ALARM_EVENT = "/host/physical/memory/ecc/error/alarm"; String HOST_PHYSICAL_GPU_REMOVE_ALARM_EVENT = "/host/physical/gpu/remove/alarm"; + String HOST_VM_EVENT_ALARM = "/host/vm/event/alarm"; String HOST_STORAGEDEVICE_HBA_STATE_EVENT = "/storagedevice/hba/state/alarm"; String HOST_PROCESS_PHYSICAL_MEMORY_USAGE_ALARM_PATH = "/host/process/physicalMemory/usage/alarm"; String HOST_KVMAGENT_STATUS_PATH = "/host/kvmagent/status"; diff --git a/plugin/kvm/src/main/java/org/zstack/kvm/KVMHostFactory.java b/plugin/kvm/src/main/java/org/zstack/kvm/KVMHostFactory.java index 0188c920a83..5e2dbba66d3 100755 --- a/plugin/kvm/src/main/java/org/zstack/kvm/KVMHostFactory.java +++ b/plugin/kvm/src/main/java/org/zstack/kvm/KVMHostFactory.java @@ -723,6 +723,11 @@ public String handleSyncHttpCall(TransmitVmOperationToMnCmd cmd) { ext.handleKvmHardwareStatus(HostHardware.GPU, cmd); } break; + case GPU_XID: + for (KvmHardwareStatusHandlerExtensionPoint ext : pluginRgty.getExtensionList(KvmHardwareStatusHandlerExtensionPoint.class)) { + ext.handleKvmHardwareStatus(HostHardware.GPU_XID, cmd); + } + break; case POWERSUPPLY: physicalPowerSupplyStatusAlarmEvent(cmd); break;