diff --git a/compute/src/main/java/org/zstack/compute/allocator/HostAllocatorManagerImpl.java b/compute/src/main/java/org/zstack/compute/allocator/HostAllocatorManagerImpl.java index 2f59c6fb49f..58f6c159f6d 100755 --- a/compute/src/main/java/org/zstack/compute/allocator/HostAllocatorManagerImpl.java +++ b/compute/src/main/java/org/zstack/compute/allocator/HostAllocatorManagerImpl.java @@ -38,6 +38,10 @@ import org.zstack.header.vm.VmAbnormalLifeCycleStruct; import org.zstack.header.vm.VmAbnormalLifeCycleStruct.VmAbnormalLifeCycleOperation; import org.zstack.header.vm.VmInstanceState; +import org.zstack.header.vm.VmInstanceVO; +import org.zstack.header.vm.VmInstanceVO_; +import org.zstack.header.vm.VmSchedHistoryVO; +import org.zstack.header.vm.VmSchedHistoryVO_; import org.zstack.header.volume.VolumeFormat; import org.zstack.header.zone.ZoneVO; import org.zstack.utils.CollectionUtils; @@ -253,6 +257,9 @@ public String call(HostUsedCpuMem arg) { public HostCapacityVO call(HostCapacityVO cap) { long before = cap.getAvailableMemory(); long avail = s.usedMemory == null ? cap.getTotalMemory() : cap.getTotalMemory() - s.usedMemory; + if (isMigrationDestHost(s.hostUuid) && avail > before) { + avail = before; + } cap.setAvailableMemory(avail); long totalCpu = cpuRatioMgr.calculateHostCpuByRatio(s.hostUuid, cap.getCpuNum()); @@ -277,6 +284,20 @@ public HostCapacityVO call(HostCapacityVO cap) { } } + private boolean isMigrationDestHost(String hostUuid) { + List migratingVmUuids = Q.New(VmInstanceVO.class) + .eq(VmInstanceVO_.state, VmInstanceState.Migrating) + .select(VmInstanceVO_.uuid) + .listValues(); + if (migratingVmUuids.isEmpty()) { + return false; + } + return Q.New(VmSchedHistoryVO.class) + .in(VmSchedHistoryVO_.vmInstanceUuid, migratingVmUuids) + .eq(VmSchedHistoryVO_.destHostUuid, hostUuid) + .isExists(); + } + private void handle(ReturnHostCapacityMsg msg) { returnComputeResourceCapacity(msg.getHostUuid(), msg.getCpuCapacity(), msg.getMemoryCapacity()); } diff --git a/test/src/test/groovy/org/zstack/test/integration/kvm/capacity/RecalculateHostCapacityKeepInflightReserveCase.groovy b/test/src/test/groovy/org/zstack/test/integration/kvm/capacity/RecalculateHostCapacityKeepInflightReserveCase.groovy new file mode 100644 index 00000000000..cc10e78967e --- /dev/null +++ b/test/src/test/groovy/org/zstack/test/integration/kvm/capacity/RecalculateHostCapacityKeepInflightReserveCase.groovy @@ -0,0 +1,204 @@ +package org.zstack.test.integration.kvm.capacity + +import org.springframework.http.HttpEntity +import org.zstack.core.cloudbus.CloudBus +import org.zstack.core.db.Q +import org.zstack.header.allocator.HostAllocatorConstant +import org.zstack.header.allocator.HostCapacityVO +import org.zstack.header.allocator.HostCapacityVO_ +import org.zstack.header.host.RecalculateHostCapacityMsg +import org.zstack.kvm.KVMGlobalConfig +import org.zstack.sdk.HostInventory +import org.zstack.sdk.VmInstanceInventory +import org.zstack.test.integration.kvm.KvmTest +import org.zstack.testlib.EnvSpec +import org.zstack.testlib.SubCase +import org.zstack.utils.Utils +import org.zstack.utils.data.SizeUnit +import org.zstack.utils.logging.CLogger + +import static org.zstack.kvm.KVMConstant.KVM_MIGRATE_VM_PATH + +/** + * ZSTAC-85091: while a VM is live-migrating, the destination host already has + * its memory reserved (HostCapacityVO.availableMemory decremented) but the VM + * row still points at the source host. The periodic RecalculateHostCapacity + * recomputed availableMemory = total - sum(landed Running VMs), which erased + * the in-flight reservation and let the scheduler double-book the memory, + * causing OOM on big VMs. This case migrates a VM for real, and at the + * in-flight moment (intercepted on the KVM migrate agent path) triggers a + * recalculation and asserts the destination's available memory is not raised. + */ +class RecalculateHostCapacityKeepInflightReserveCase extends SubCase { + private static final CLogger logger = Utils.getLogger(RecalculateHostCapacityKeepInflightReserveCase.class) + + EnvSpec env + CloudBus bus + + long destTotal = -1 + long destAvailInflight = -1 + long destAvailAfterRecalc = -1 + + @Override + void setup() { + useSpring(KvmTest.springSpec) + } + + @Override + void environment() { + env = env { + instanceOffering { + name = "instanceOffering" + memory = SizeUnit.GIGABYTE.toByte(8) + cpu = 4 + } + + diskOffering { + name = "diskOffering" + diskSize = SizeUnit.GIGABYTE.toByte(20) + } + + sftpBackupStorage { + name = "sftp" + url = "/sftp" + username = "root" + password = "password" + hostname = "localhost" + + image { + name = "image" + url = "http://zstack.org/download/test.qcow2" + } + } + + zone { + name = "zone" + cluster { + name = "cluster" + hypervisorType = "KVM" + + kvm { + name = "src" + managementIp = "127.0.0.1" + username = "root" + password = "password" + totalMem = SizeUnit.GIGABYTE.toByte(64) + } + kvm { + name = "dst" + managementIp = "127.0.0.2" + username = "root" + password = "password" + totalMem = SizeUnit.GIGABYTE.toByte(64) + } + + attachPrimaryStorage("nfs") + attachL2Network("l2") + } + + nfsPrimaryStorage { + name = "nfs" + url = "localhost:/nfs_ps" + } + + l2NoVlanNetwork { + name = "l2" + physicalInterface = "eth0" + + l3Network { + name = "l3" + ip { + startIp = "192.168.100.10" + endIp = "192.168.100.100" + netmask = "255.255.255.0" + gateway = "192.168.100.1" + } + } + } + + attachBackupStorage("sftp") + } + + vm { + name = "vm" + useInstanceOffering("instanceOffering") + useImage("image") + useL3Networks("l3") + useRootDiskOffering("diskOffering") + useHost("src") + } + } + } + + @Override + void test() { + env.create { + bus = bean(CloudBus.class) + recalcMustKeepInflightMigrateReserve() + } + } + + void recalcMustKeepInflightMigrateReserve() { + VmInstanceInventory vm = env.inventoryByName("vm") as VmInstanceInventory + HostInventory dst = env.inventoryByName("dst") as HostInventory + + KVMGlobalConfig.MIGRATE_AUTO_CONVERGE.updateValue(false) + + // At this point in the migrate workflow the VM is already Migrating and + // the destination host capacity has been reserved. Capture the reserved + // availability, then run a recalculation and capture the result. + env.afterSimulator(KVM_MIGRATE_VM_PATH) { rsp, HttpEntity entity -> + destTotal = capValue(dst.uuid, HostCapacityVO_.totalMemory) + destAvailInflight = capValue(dst.uuid, HostCapacityVO_.availableMemory) + + // RecalculateHostCapacityMsg has no reply handler (internal periodic + // task, no SDK Action), so send async and poll until the recompute + // settles instead of blocking on a reply that never comes. + RecalculateHostCapacityMsg msg = new RecalculateHostCapacityMsg() + msg.setHostUuid(dst.uuid) + bus.makeLocalServiceId(msg, HostAllocatorConstant.SERVICE_ID) + bus.send(msg) + + retryInSecs(10, 1) { + destAvailAfterRecalc = capValue(dst.uuid, HostCapacityVO_.availableMemory) + assert destAvailAfterRecalc >= 0 + } + return rsp + } + + migrateVm { + vmInstanceUuid = vm.uuid + hostUuid = dst.uuid + } + + logger.warn(String.format("ZSTAC-85091 in-flight capture: destTotal=%d destAvailInflight=%d destAvailAfterRecalc=%d", + destTotal, destAvailInflight, destAvailAfterRecalc)) + + assert destAvailInflight >= 0 : "migrate simulator hook never fired, captured nothing" + + if (destAvailInflight == destTotal) { + logger.warn("ZSTAC-85091: destination available == total at in-flight capture; " + + "migrate reservation not visible at this hook point. Reporting instead of asserting.") + return + } + + assert destAvailAfterRecalc >= destAvailInflight : \ + "recalc lowered available below reserved: after=${destAvailAfterRecalc} reserved=${destAvailInflight}" + assert destAvailAfterRecalc < destTotal : \ + "RecalculateHostCapacity erased in-flight migrate reservation on dest host: " + + "availableAfterRecalc=${destAvailAfterRecalc} == total=${destTotal}, " + + "reservedInflight=${destAvailInflight} (ZSTAC-85091 OOM root cause)" + } + + private long capValue(String hostUuid, def field) { + return Q.New(HostCapacityVO.class) + .eq(HostCapacityVO_.uuid, hostUuid) + .select(field) + .findValue() + } + + @Override + void clean() { + env.delete() + } +}