Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
package org.zstack.compute.vm;

import org.zstack.core.cloudbus.CloudBusCallBack;
import org.zstack.core.gc.GC;
import org.zstack.core.gc.GCCompletion;
import org.zstack.core.gc.TimeBasedGarbageCollector;
import org.zstack.header.host.HostVO;
import org.zstack.header.message.MessageReply;
import org.zstack.header.storage.primary.CleanupVmInstanceMetadataOnPrimaryStorageMsg;
import org.zstack.header.storage.primary.PrimaryStorageConstant;
import org.zstack.header.storage.primary.PrimaryStorageVO;
import org.zstack.utils.Utils;
import org.zstack.utils.logging.CLogger;

public class CleanupVmInstanceMetadataOnPrimaryStorageGC extends TimeBasedGarbageCollector {
private static final CLogger logger = Utils.getLogger(CleanupVmInstanceMetadataOnPrimaryStorageGC.class);

@GC
public String primaryStorageUuid;
@GC
public String vmUuid;
@GC
public String rootVolumeUuid;
@GC
public String metadataPath;
@GC
public String hostUuid;

public static String getGCName(String vmUuid) {
return String.format("gc-cleanup-vm-metadata-%s", vmUuid);
}

@Override
protected void triggerNow(GCCompletion completion) {
if (!dbf.isExist(primaryStorageUuid, PrimaryStorageVO.class)) {
logger.debug(String.format("[MetadataCleanupGC] primary storage[uuid:%s] no longer exists, " +
"cancel gc for vm[uuid:%s]", primaryStorageUuid, vmUuid));
completion.cancel();
return;
}

if (hostUuid != null && !dbf.isExist(hostUuid, HostVO.class)) {
logger.debug(String.format("[MetadataCleanupGC] host[uuid:%s] no longer exists, " +
"cancel gc for vm[uuid:%s]", hostUuid, vmUuid));
completion.cancel();
return;
}

CleanupVmInstanceMetadataOnPrimaryStorageMsg msg = new CleanupVmInstanceMetadataOnPrimaryStorageMsg();
msg.setPrimaryStorageUuid(primaryStorageUuid);
msg.setVmUuid(vmUuid);
msg.setRootVolumeUuid(rootVolumeUuid);
msg.setMetadataPath(metadataPath);
msg.setHostUuid(hostUuid);

bus.makeTargetServiceIdByResourceUuid(msg, PrimaryStorageConstant.SERVICE_ID, primaryStorageUuid);
bus.send(msg, new CloudBusCallBack(completion) {
@Override
public void run(MessageReply reply) {
if (reply.isSuccess()) {
logger.info(String.format("[MetadataCleanupGC] successfully cleaned up metadata " +
"for vm[uuid:%s] on ps[uuid:%s]", vmUuid, primaryStorageUuid));
completion.success();
} else {
logger.warn(String.format("[MetadataCleanupGC] failed to clean up metadata " +
"for vm[uuid:%s] on ps[uuid:%s]: %s", vmUuid, primaryStorageUuid, reply.getError()));
completion.fail(reply.getError());
}
}
});
}
}
128 changes: 128 additions & 0 deletions compute/src/main/java/org/zstack/compute/vm/VmExpungeMetadataFlow.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,128 @@
package org.zstack.compute.vm;

import org.springframework.beans.factory.annotation.Autowire;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.beans.factory.annotation.Configurable;
import org.zstack.core.cloudbus.CloudBus;
import org.zstack.core.cloudbus.CloudBusCallBack;
import org.zstack.core.componentloader.PluginRegistry;
import org.zstack.core.db.Q;
import org.zstack.header.core.workflow.FlowTrigger;
import org.zstack.header.core.workflow.NoRollbackFlow;
import org.zstack.header.message.MessageReply;
import org.zstack.header.storage.primary.CleanupVmInstanceMetadataOnPrimaryStorageMsg;
import org.zstack.header.storage.primary.PrimaryStorageConstant;
import org.zstack.header.storage.primary.PrimaryStorageVO;
import org.zstack.header.storage.primary.PrimaryStorageVO_;
import org.zstack.header.vm.VmInstanceConstant;
import org.zstack.header.vm.VmInstanceSpec;
import org.zstack.header.vm.metadata.VmMetadataPathBuildExtensionPoint;
import org.zstack.header.volume.VolumeInventory;
import org.zstack.utils.Utils;
import org.zstack.utils.logging.CLogger;

import java.util.Map;
import java.util.concurrent.TimeUnit;

@Configurable(preConstruction = true, autowire = Autowire.BY_TYPE)
public class VmExpungeMetadataFlow extends NoRollbackFlow {
private static final CLogger logger = Utils.getLogger(VmExpungeMetadataFlow.class);

@Autowired
private CloudBus bus;
@Autowired
private PluginRegistry pluginRgty;

@Override
public void run(FlowTrigger trigger, Map data) {
final VmInstanceSpec spec = (VmInstanceSpec) data.get(VmInstanceConstant.Params.VmInstanceSpec.toString());
if (spec == null || spec.getVmInventory() == null) {
logger.warn("[MetadataExpunge] missing VmInstanceSpec or VmInventory, skip metadata cleanup");
trigger.next();
return;
}

final String vmUuid = spec.getVmInventory().getUuid();

VolumeInventory rootVolume = spec.getVmInventory().getRootVolume();
String psUuid = rootVolume != null ? rootVolume.getPrimaryStorageUuid() : null;
if (psUuid == null) {
logger.debug(String.format("[MetadataExpunge] vm[uuid:%s] root volume has no primaryStorageUuid, " +
"skipping metadata cleanup", vmUuid));
trigger.next();
return;
}


String psType = Q.New(PrimaryStorageVO.class).select(PrimaryStorageVO_.type).eq(PrimaryStorageVO_.uuid, psUuid).findValue();
if (psType == null) {
logger.warn(String.format("[MetadataExpunge] primary storage[uuid:%s] not found for vm[uuid:%s], " +
"skip metadata cleanup", psUuid, vmUuid));
trigger.next();
return;
}

VmMetadataPathBuildExtensionPoint ext = pluginRgty.getExtensionFromMap(psType, VmMetadataPathBuildExtensionPoint.class);
if (ext == null) {
logger.warn(String.format("[MetadataExpunge] no VmMetadataPathBuildExtensionPoint found for ps[uuid:%s, type:%s], " +
"skip metadata cleanup", psUuid, psType));
trigger.next();
return;
}
final String metadataPath;
try {
metadataPath = ext.buildVmMetadataPath(psUuid, vmUuid);
} catch (Exception e) {
logger.warn(String.format("[MetadataExpunge] failed to build metadata path for vm[uuid:%s] on ps[uuid:%s], " +
"skip metadata cleanup: %s", vmUuid, psUuid, e.getMessage()));
trigger.next();
return;
}

String rootVolumeUuid = rootVolume.getUuid();
CleanupVmInstanceMetadataOnPrimaryStorageMsg cmsg = new CleanupVmInstanceMetadataOnPrimaryStorageMsg();
cmsg.setPrimaryStorageUuid(psUuid);
cmsg.setVmUuid(vmUuid);
cmsg.setMetadataPath(metadataPath);
cmsg.setRootVolumeUuid(rootVolumeUuid);

String hostUuid = spec.getVmInventory().getHostUuid();
if (hostUuid == null) {
hostUuid = spec.getVmInventory().getLastHostUuid();
}
cmsg.setHostUuid(hostUuid);

final String finalPsUuid = psUuid;
final String finalHostUuid = hostUuid;

bus.makeTargetServiceIdByResourceUuid(cmsg, PrimaryStorageConstant.SERVICE_ID, psUuid);
bus.send(cmsg, new CloudBusCallBack(trigger) {
@Override
public void run(MessageReply reply) {
if (reply.isSuccess()) {
logger.info(String.format("[MetadataExpunge] successfully deleted metadata for vm[uuid:%s] on ps[uuid:%s]",
vmUuid, finalPsUuid));
} else {
logger.warn(String.format("[MetadataExpunge] failed to delete metadata for vm[uuid:%s] on ps[uuid:%s]: %s, " +
"submitting GC job for retry", vmUuid, finalPsUuid, reply.getError()));
submitGC(finalPsUuid, vmUuid, rootVolumeUuid, metadataPath, finalHostUuid);
}
trigger.next();
}
});
}

private void submitGC(String psUuid, String vmUuid, String rootVolumeUuid, String metadataPath, String hostUuid) {
CleanupVmInstanceMetadataOnPrimaryStorageGC gc = new CleanupVmInstanceMetadataOnPrimaryStorageGC();
gc.NAME = CleanupVmInstanceMetadataOnPrimaryStorageGC.getGCName(vmUuid);
gc.primaryStorageUuid = psUuid;
gc.vmUuid = vmUuid;
gc.rootVolumeUuid = rootVolumeUuid;
gc.metadataPath = metadataPath;
gc.hostUuid = hostUuid;
long gcIntervalSec = TimeUnit.HOURS.toSeconds(VmGlobalConfig.VM_METADATA_CLEANUP_GC_INTERVAL.value(Long.class));
gc.deduplicateSubmit(gcIntervalSec, TimeUnit.SECONDS);
Comment on lines +72 to +124
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟡 Minor

把空的 metadataPath 也当成构建失败处理。

这里只有异常分支,没有处理 buildVmMetadataPath() 返回空串/空白串的情况。现在这种输入会继续下发 cleanup,并在失败后把同一个无效路径写进 GC,后续重试只会一直空转。

🛠️ 建议修正
         try {
             metadataPath = ext.buildVmMetadataPath(psUuid, vmUuid);
         } catch (Exception e) {
             logger.warn(String.format("[MetadataExpunge] failed to build metadata path for vm[uuid:%s] on ps[uuid:%s], " +
                     "skip metadata cleanup: %s", vmUuid, psUuid, e.getMessage()));
             trigger.next();
             return;
         }
+        if (metadataPath == null || metadataPath.trim().isEmpty()) {
+            logger.warn(String.format("[MetadataExpunge] empty metadata path for vm[uuid:%s] on ps[uuid:%s], skip metadata cleanup",
+                    vmUuid, psUuid));
+            trigger.next();
+            return;
+        }
 
         String rootVolumeUuid = rootVolume.getUuid();
🤖 Prompt for AI Agents
Verify each finding against the current code and only fix it if needed.

In `@compute/src/main/java/org/zstack/compute/vm/VmExpungeMetadataFlow.java`
around lines 72 - 124, The code currently only handles exceptions from
ext.buildVmMetadataPath(...) but lets empty or whitespace metadataPath proceed;
add a validation right after the try/catch to treat null or
metadataPath.trim().isEmpty() as a build failure: log a warning similar to the
exception branch (mentioning vmUuid and psUuid), call trigger.next() and return
so CleanupVmInstanceMetadataOnPrimaryStorageMsg and submitGC are not invoked
with an invalid path; make the check reference metadataPath and the
methods/classes CleanupVmInstanceMetadataOnPrimaryStorageMsg, submitGC, and
buildVmMetadataPath for locating where to apply the change.


logger.info(String.format("[MetadataExpunge] submitted GC job [%s] for vm[uuid:%s] on ps[uuid:%s]", gc.NAME, vmUuid, psUuid));
}
}
36 changes: 36 additions & 0 deletions compute/src/main/java/org/zstack/compute/vm/VmGlobalConfig.java
Original file line number Diff line number Diff line change
Expand Up @@ -133,4 +133,40 @@ public class VmGlobalConfig {
@GlobalConfigValidation(validValues = {"None", "AuthenticAMD"})
@BindResourceConfig(value = {VmInstanceVO.class})
public static GlobalConfig VM_CPUID_VENDOR = new GlobalConfig(CATEGORY, "vm.cpuid.vendor");

@GlobalConfigValidation(validValues = {"true", "false"})
public static GlobalConfig VM_METADATA_ENABLED = new GlobalConfig(CATEGORY, "vm.metadata.enabled");

@GlobalConfigValidation()
public static GlobalConfig VM_METADATA_LAST_REFRESH_VERSION = new GlobalConfig(CATEGORY, "vm.metadata.lastRefreshVersion");

@GlobalConfigValidation(numberGreaterThan = 0, numberLessThan = 100)
public static GlobalConfig VM_METADATA_FLUSH_CONCURRENCY = new GlobalConfig(CATEGORY, "vm.metadata.flush.concurrency");

@GlobalConfigValidation(numberGreaterThan = 0, numberLessThan = 300)
public static GlobalConfig VM_METADATA_FLUSH_POLL_INTERVAL = new GlobalConfig(CATEGORY, "vm.metadata.flush.pollInterval");

@GlobalConfigValidation(numberGreaterThan = 0, numberLessThan = 1000)
public static GlobalConfig VM_METADATA_FLUSH_BATCH_SIZE = new GlobalConfig(CATEGORY, "vm.metadata.flush.batchSize");

@GlobalConfigValidation(numberGreaterThan = 0, numberLessThan = 168)
public static GlobalConfig VM_METADATA_CLEANUP_GC_INTERVAL = new GlobalConfig(CATEGORY, "vm.metadata.cleanup.gc.interval");

@GlobalConfigValidation(numberGreaterThan = 0, numberLessThan = 100)
public static GlobalConfig VM_METADATA_FLUSH_MAX_RETRY = new GlobalConfig(CATEGORY, "vm.metadata.flush.maxRetry");

@GlobalConfigValidation(numberGreaterThan = 0, numberLessThan = 120)
public static GlobalConfig VM_METADATA_FLUSH_ZOMBIE_CLAIM_THRESHOLD = new GlobalConfig(CATEGORY, "vm.metadata.flush.zombieClaimThreshold");

@GlobalConfigValidation(numberGreaterThan = 0, numberLessThan = 86400)
public static GlobalConfig VM_METADATA_MAINTENANCE_CONTENT_DRIFT_INTERVAL = new GlobalConfig(CATEGORY, "vm.metadata.maintenance.contentDriftInterval");

@GlobalConfigValidation(numberGreaterThan = 0, numberLessThan = 86400)
public static GlobalConfig VM_METADATA_MAINTENANCE_STALE_RECOVERY_INTERVAL = new GlobalConfig(CATEGORY, "vm.metadata.maintenance.staleRecoveryInterval");

@GlobalConfigValidation(numberGreaterThan = 0, numberLessThan = 1000)
public static GlobalConfig VM_METADATA_MAINTENANCE_STALE_RECOVERY_MAX_CYCLES = new GlobalConfig(CATEGORY, "vm.metadata.maintenance.staleRecoveryMaxCycles");

@GlobalConfigValidation(numberGreaterThan = 0)
public static GlobalConfig VM_METADATA_PAYLOAD_REJECT_THRESHOLD = new GlobalConfig(CATEGORY, "vm.metadata.payload.rejectThreshold");
}
Original file line number Diff line number Diff line change
Expand Up @@ -23,14 +23,19 @@
import org.zstack.header.network.l3.*;
import org.zstack.header.storage.primary.PrimaryStorageClusterRefVO;
import org.zstack.header.storage.primary.PrimaryStorageClusterRefVO_;
import org.zstack.header.storage.primary.PrimaryStorageVO;
import org.zstack.header.storage.primary.PrimaryStorageVO_;
import org.zstack.header.storage.snapshot.VolumeSnapshotVO;
import org.zstack.header.storage.snapshot.VolumeSnapshotVO_;
import org.zstack.header.storage.snapshot.group.VolumeSnapshotGroupVO;
import org.zstack.header.storage.snapshot.group.VolumeSnapshotGroupVO_;
import org.zstack.header.vm.*;
import org.zstack.header.vm.cdrom.*;
import org.zstack.header.vm.metadata.APIRegisterVmInstanceFromMetadataMsg;
import org.zstack.header.vm.devices.VmInstanceResourceMetadataGroupVO;
import org.zstack.header.vm.devices.VmInstanceResourceMetadataGroupVO_;
import org.zstack.header.vm.metadata.VmInstanceMetadataConstants;
import org.zstack.header.vm.metadata.VmMetadataPathBuildExtensionPoint;
import org.zstack.header.volume.*;
import org.zstack.network.l2.L2NetworkHostUtils;
import org.zstack.resourceconfig.ResourceConfigFacade;
Expand Down Expand Up @@ -166,6 +171,8 @@ else if (msg instanceof APIAttachVmNicToVmMsg) {
validate((APIConvertTemplatedVmInstanceToVmInstanceMsg) msg);
} else if (msg instanceof APIDeleteTemplatedVmInstanceMsg) {
validate((APIDeleteTemplatedVmInstanceMsg) msg);
} else if (msg instanceof APIRegisterVmInstanceFromMetadataMsg) {
validate((APIRegisterVmInstanceFromMetadataMsg) msg);
}

if (msg instanceof NewVmInstanceMessage2) {
Expand Down Expand Up @@ -411,6 +418,13 @@ private void validate(final APIUpdateVmNicDriverMsg msg) {
if (vo.getState() != VmInstanceState.Stopped) {
throw new ApiMessageInterceptionException(argerr("vm nic driver type can be updated only when the vm is stopped"));
}

boolean isConsistent = Q.New(VmNicVO.class).eq(VmNicVO_.uuid, msg.getVmNicUuid())
.eq(VmNicVO_.vmInstanceUuid, msg.getVmInstanceUuid()).isExists();
if (!isConsistent) {
throw new ApiMessageInterceptionException(argerr("vmNicUuid[%s] does not belong to vmInstanceUuid[%s]",
msg.getVmNicUuid(), msg.getVmInstanceUuid()));
}
}

private void validate(final APIGetCandidatePrimaryStoragesForCreatingVmMsg msg) {
Expand Down Expand Up @@ -1318,4 +1332,34 @@ private void validate(APIFstrimVmMsg msg) {
}
msg.setHostUuid(t.get(1, String.class));
}

private void validate(APIRegisterVmInstanceFromMetadataMsg msg) {
String path = msg.getMetadataPath();
if (StringUtils.isEmpty(path)) {
throw new ApiMessageInterceptionException(argerr("metadataPath cannot be empty or null"));
}

// Delegate path validation to the storage-type-specific extension
String psUuid = msg.getPrimaryStorageUuid();
String psType = Q.New(PrimaryStorageVO.class)
.select(PrimaryStorageVO_.type)
.eq(PrimaryStorageVO_.uuid, psUuid)
.findValue();
if (psType == null) {
throw new ApiMessageInterceptionException(argerr(
"primary storage[uuid:%s] not found", psUuid));
}

VmMetadataPathBuildExtensionPoint ext =
pluginRgty.getExtensionFromMap(psType, VmMetadataPathBuildExtensionPoint.class);
if (ext == null) {
throw new ApiMessageInterceptionException(argerr(
"primary storage[uuid:%s, type:%s] does not support vm metadata", psUuid, psType));
}

String error = ext.validateMetadataPath(psUuid, path);
if (error != null) {
throw new ApiMessageInterceptionException(argerr(error));
}
}
}
4 changes: 4 additions & 0 deletions compute/src/main/java/org/zstack/compute/vm/VmSystemTags.java
Original file line number Diff line number Diff line change
Expand Up @@ -307,4 +307,8 @@ public String desensitizeTag(SystemTag systemTag, String tag) {
}

public static PatternedSystemTag VM_STATE_PAUSED_AFTER_MIGRATE = new PatternedSystemTag(("vmPausedAfterMigrate"), VmInstanceVO.class);

public static String VM_METADATA_REGISTERING_MN_UUID_TOKEN = "registeringMnUuid";
public static PatternedSystemTag VM_METADATA_REGISTERING_MN_UUID = new PatternedSystemTag(
String.format("vmMetadata::registeringMnUuid::{%s}", VM_METADATA_REGISTERING_MN_UUID_TOKEN), VmInstanceVO.class);
}
26 changes: 26 additions & 0 deletions conf/db/upgrade/V5.0.0__schema.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
CREATE TABLE IF NOT EXISTS `zstack`.`VmMetadataDirtyVO` (
`vmInstanceUuid` VARCHAR(32) NOT NULL,
`managementNodeUuid` VARCHAR(32) DEFAULT NULL,
`dirtyVersion` BIGINT NOT NULL DEFAULT 1,
`lastClaimTime` TIMESTAMP NULL DEFAULT NULL,
`storageStructureChange` TINYINT(1) NOT NULL DEFAULT 0,
`retryCount` INT NOT NULL DEFAULT 0,
`nextRetryTime` TIMESTAMP NULL DEFAULT NULL,
`lastOpDate` timestamp on update CURRENT_TIMESTAMP,
`createDate` timestamp NOT NULL DEFAULT '1999-12-31 23:59:59',
PRIMARY KEY (`vmInstanceUuid`),
CONSTRAINT `fkVmMetadataDirtyVOVmInstanceEO` FOREIGN KEY (`vmInstanceUuid`) REFERENCES `VmInstanceEO` (`uuid`) ON DELETE CASCADE,
CONSTRAINT `fkVmMetadataDirtyVOManagementNodeVO` FOREIGN KEY (`managementNodeUuid`) REFERENCES `ManagementNodeVO` (`uuid`) ON DELETE SET NULL
) ENGINE=InnoDB DEFAULT CHARSET=utf8;

CREATE TABLE IF NOT EXISTS `zstack`.`VmMetadataFingerprintVO` (
`vmInstanceUuid` VARCHAR(32) NOT NULL,
`metadataSnapshot` LONGTEXT,
`lastFlushTime` TIMESTAMP NULL DEFAULT NULL,
`lastFlushFailed` TINYINT(1) NOT NULL DEFAULT 0,
`staleRecoveryCount` INT NOT NULL DEFAULT 0,
`lastOpDate` timestamp on update CURRENT_TIMESTAMP,
`createDate` timestamp NOT NULL DEFAULT '1999-12-31 23:59:59',
PRIMARY KEY (`vmInstanceUuid`),
CONSTRAINT `fkVmMetadataFingerprintVOVmInstanceEO` FOREIGN KEY (`vmInstanceUuid`) REFERENCES `VmInstanceEO` (`uuid`) ON DELETE CASCADE
) ENGINE=InnoDB DEFAULT CHARSET=utf8;
Loading