From 8e7ecd636049718d3c63ec77b65ccf2aa553b4c3 Mon Sep 17 00:00:00 2001 From: "shan.wu" Date: Thu, 14 May 2026 23:23:52 +0800 Subject: [PATCH] [vm]: verify dest state in migrate flow Check the destination host when the hypervisor migration call fails. If the VM is Running there, continue the normal migration flow. That lets DB sync and post hooks run through the standard path. Otherwise fail the flow and keep the rollback behavior. Resolves: ZSTAC-83894 Change-Id: I8b4774a405fc3b1c05d21b6742facd26bc8d03e6 --- .../org/zstack/compute/vm/VmInstanceBase.java | 34 +-- .../compute/vm/VmMigrateOnHypervisorFlow.java | 32 ++- ...MigrateVmFailureCheckTargetHostCase.groovy | 213 ++++++++++++++++++ 3 files changed, 263 insertions(+), 16 deletions(-) create mode 100644 test/src/test/groovy/org/zstack/test/integration/kvm/vm/migrate/MigrateVmFailureCheckTargetHostCase.groovy diff --git a/compute/src/main/java/org/zstack/compute/vm/VmInstanceBase.java b/compute/src/main/java/org/zstack/compute/vm/VmInstanceBase.java index ba6c4b9e1d6..35b3b07c87b 100755 --- a/compute/src/main/java/org/zstack/compute/vm/VmInstanceBase.java +++ b/compute/src/main/java/org/zstack/compute/vm/VmInstanceBase.java @@ -7179,25 +7179,30 @@ public void done() { @Override public void handle(final ErrorCode errCode, Map data) { String destHostUuid = spec.getDestHost().getUuid().equals(lastHostUuid) ? null : spec.getDestHost().getUuid(); - extEmitter.failedToMigrateVm(VmInstanceInventory.valueOf(self), destHostUuid, errCode, new NoErrorCompletion(completion) { + rollbackFailedMigrateVm(originState, destHostUuid, errCode, completion); + } + }).start(); + } + + private void rollbackFailedMigrateVm(final VmInstanceState originState, final String destHostUuid, + final ErrorCode errCode, final Completion completion) { + extEmitter.failedToMigrateVm(VmInstanceInventory.valueOf(self), destHostUuid, errCode, new NoErrorCompletion(completion) { + @Override + public void done() { + if (!HostErrors.FAILED_TO_MIGRATE_VM_ON_HYPERVISOR.isEqual(errCode.getCode())) { + changeVmStateInDb(originState.getDrivenEvent()); + completion.fail(errCode); + return; + } + + checkState(originalCopy.getHostUuid(), new NoErrorCompletion(completion) { @Override public void done() { - if (!HostErrors.FAILED_TO_MIGRATE_VM_ON_HYPERVISOR.isEqual(errCode.getCode())) { - changeVmStateInDb(originState.getDrivenEvent()); - completion.fail(errCode); - return; - } - - checkState(originalCopy.getHostUuid(), new NoErrorCompletion(completion) { - @Override - public void done() { - completion.fail(errCode); - } - }); + completion.fail(errCode); } }); } - }).start(); + }); } protected void handle(CancelMigrateVmMsg msg) { @@ -9272,4 +9277,3 @@ public void run(MessageReply reply) { }); } } - diff --git a/compute/src/main/java/org/zstack/compute/vm/VmMigrateOnHypervisorFlow.java b/compute/src/main/java/org/zstack/compute/vm/VmMigrateOnHypervisorFlow.java index 24529a31df3..92dcf722062 100755 --- a/compute/src/main/java/org/zstack/compute/vm/VmMigrateOnHypervisorFlow.java +++ b/compute/src/main/java/org/zstack/compute/vm/VmMigrateOnHypervisorFlow.java @@ -10,6 +10,8 @@ import org.zstack.header.core.workflow.FlowRollback; import org.zstack.header.core.workflow.FlowTrigger; import org.zstack.header.errorcode.ErrorCode; +import org.zstack.header.host.CheckVmStateOnHypervisorMsg; +import org.zstack.header.host.CheckVmStateOnHypervisorReply; import org.zstack.header.host.HostConstant; import org.zstack.header.message.MessageReply; import org.zstack.header.host.MigrateVmOnHypervisorMsg; @@ -18,6 +20,8 @@ import java.util.Map; +import static org.zstack.utils.CollectionDSL.list; + @Configurable(preConstruction = true, autowire = Autowire.BY_TYPE) public class VmMigrateOnHypervisorFlow implements Flow { @Autowired @@ -60,8 +64,34 @@ public void run(MessageReply reply) { if (reply.isSuccess()) { chain.next(); } else { - chain.fail(reply.getError()); + checkVmStateOnDestinationHost(spec, reply.getError(), chain); + } + } + }); + } + + private void checkVmStateOnDestinationHost(final VmInstanceSpec spec, final ErrorCode migrateError, + final FlowTrigger chain) { + CheckVmStateOnHypervisorMsg msg = new CheckVmStateOnHypervisorMsg(); + msg.setVmInstanceUuids(list(spec.getVmInventory().getUuid())); + msg.setHostUuid(spec.getDestHost().getUuid()); + bus.makeTargetServiceIdByResourceUuid(msg, HostConstant.SERVICE_ID, msg.getHostUuid()); + bus.send(msg, new CloudBusCallBack(chain) { + @Override + public void run(MessageReply reply) { + if (!reply.isSuccess()) { + chain.fail(migrateError); + return; } + + CheckVmStateOnHypervisorReply r = reply.castReply(); + String state = r.getStates().get(spec.getVmInventory().getUuid()); + if (VmInstanceState.Running.toString().equals(state)) { + chain.next(); + return; + } + + chain.fail(migrateError); } }); } diff --git a/test/src/test/groovy/org/zstack/test/integration/kvm/vm/migrate/MigrateVmFailureCheckTargetHostCase.groovy b/test/src/test/groovy/org/zstack/test/integration/kvm/vm/migrate/MigrateVmFailureCheckTargetHostCase.groovy new file mode 100644 index 00000000000..711fd5bb96b --- /dev/null +++ b/test/src/test/groovy/org/zstack/test/integration/kvm/vm/migrate/MigrateVmFailureCheckTargetHostCase.groovy @@ -0,0 +1,213 @@ +package org.zstack.test.integration.kvm.vm.migrate + +import org.zstack.core.cloudbus.CloudBus +import org.zstack.header.host.CheckVmStateOnHypervisorMsg +import org.zstack.header.host.CheckVmStateOnHypervisorReply +import org.zstack.header.network.service.NetworkServiceType +import org.zstack.header.vm.VmInstanceState +import org.zstack.kvm.KVMAgentCommands +import org.zstack.kvm.KVMConstant +import org.zstack.network.securitygroup.SecurityGroupConstant +import org.zstack.network.service.flat.FlatNetworkServiceConstant +import org.zstack.network.service.userdata.UserdataConstant +import org.zstack.sdk.HostInventory +import org.zstack.sdk.MigrateVmAction +import org.zstack.sdk.VmInstanceInventory +import org.zstack.test.integration.kvm.KvmTest +import org.zstack.testlib.EnvSpec +import org.zstack.testlib.SubCase +import org.zstack.utils.data.SizeUnit + +class MigrateVmFailureCheckTargetHostCase extends SubCase { + EnvSpec env + + @Override + void setup() { + useSpring(KvmTest.springSpec) + } + + @Override + void environment() { + env = env { + instanceOffering { + name = "instanceOffering" + memory = SizeUnit.GIGABYTE.toByte(1) + cpu = 1 + } + + cephBackupStorage { + name = "ceph-bk" + fsid = "7ff218d9-f525-435f-8a40-3618d1772a64" + monUrls = ["root:password@localhost:23", "root:password@127.0.0.1:23"] + + image { + name = "image1" + url = "http://zstack.org/download/test.qcow2" + } + } + + zone { + name = "zone" + + cluster { + name = "cluster" + hypervisorType = "KVM" + + kvm { + name = "kvm1" + managementIp = "127.0.0.1" + username = "root" + password = "password" + } + + kvm { + name = "kvm2" + managementIp = "127.0.0.2" + username = "root" + password = "password" + } + + attachPrimaryStorage("ceph-pri") + attachL2Network("l2") + } + + cephPrimaryStorage { + name = "ceph-pri" + fsid = "7ff218d9-f525-435f-8a40-3618d1772a64" + monUrls = ["root:password@localhost/?monPort=7777", "root:password@127.0.0.1/?monPort=7777"] + } + + l2NoVlanNetwork { + name = "l2" + physicalInterface = "eth0" + + l3Network { + name = "l3" + + service { + provider = FlatNetworkServiceConstant.FLAT_NETWORK_SERVICE_TYPE_STRING + types = [NetworkServiceType.DHCP.toString(), UserdataConstant.USERDATA_TYPE_STRING] + } + + service { + provider = SecurityGroupConstant.SECURITY_GROUP_PROVIDER_TYPE + types = [SecurityGroupConstant.SECURITY_GROUP_NETWORK_SERVICE_TYPE] + } + + ip { + startIp = "192.168.100.10" + endIp = "192.168.100.100" + netmask = "255.255.255.0" + gateway = "192.168.100.1" + } + } + } + + attachBackupStorage("ceph-bk") + } + + vm { + name = "vm" + useInstanceOffering("instanceOffering") + useImage("image1") + useL3Networks("l3") + } + } + } + + @Override + void test() { + env.create { + testRollbackWhenTargetHostReportsVmNotRunning() + testMigrationSuccessWhenTargetHostReportsVmRunning() + } + } + + @Override + void clean() { + env.delete() + } + + void testRollbackWhenTargetHostReportsVmNotRunning() { + VmInstanceInventory vm = env.inventoryByName("vm") as VmInstanceInventory + HostInventory destHost = findAnotherHost(vm.hostUuid) + + assertRollbackWhenTargetReports(vm, destHost, VmInstanceState.Stopped.toString()) + assertRollbackWhenTargetReports(vm, destHost, VmInstanceState.Paused.toString()) + } + + void assertRollbackWhenTargetReports(VmInstanceInventory vm, HostInventory destHost, String targetHostState) { + List checkedHosts = [] + + mockMigrateVmFailure() + mockVmState(vm.uuid, destHost.uuid, targetHostState, checkedHosts) + + MigrateVmAction.Result result = migrateVmAction(vm.uuid, destHost.uuid).call() + + assert result.error != null + VmInstanceInventory after = queryVmInstance { + conditions = ["uuid=${vm.uuid}".toString()] + }[0] as VmInstanceInventory + assert after.hostUuid == vm.hostUuid + assert after.state == VmInstanceState.Running.toString() + assert checkedHosts[0] == destHost.uuid + } + + void testMigrationSuccessWhenTargetHostReportsVmRunning() { + VmInstanceInventory vm = queryVmInstance { + conditions = ["name=vm"] + }[0] as VmInstanceInventory + HostInventory destHost = findAnotherHost(vm.hostUuid) + List checkedHosts = [] + + mockMigrateVmFailure() + mockVmState(vm.uuid, destHost.uuid, VmInstanceState.Running.toString(), checkedHosts) + + MigrateVmAction.Result result = migrateVmAction(vm.uuid, destHost.uuid).call() + + assert result.error == null + VmInstanceInventory after = queryVmInstance { + conditions = ["uuid=${vm.uuid}".toString()] + }[0] as VmInstanceInventory + assert after.hostUuid == destHost.uuid + assert after.lastHostUuid == vm.hostUuid + assert checkedHosts + assert checkedHosts.every { it == destHost.uuid } + } + + HostInventory findAnotherHost(String hostUuid) { + return queryHost { + conditions = ["uuid!=${hostUuid}".toString()] + }[0] as HostInventory + } + + void mockMigrateVmFailure() { + env.simulator(KVMConstant.KVM_MIGRATE_VM_PATH) { + KVMAgentCommands.MigrateVmResponse rsp = new KVMAgentCommands.MigrateVmResponse() + rsp.setError("mock migration API failure") + return rsp + } + } + + void mockVmState(String vmUuid, String hostUuid, String targetHostState, List checkedHosts) { + env.revokeMessage(CheckVmStateOnHypervisorMsg.class, null) + env.message(CheckVmStateOnHypervisorMsg.class) { CheckVmStateOnHypervisorMsg msg, CloudBus bus -> + CheckVmStateOnHypervisorReply reply = new CheckVmStateOnHypervisorReply() + Map states = new HashMap<>() + checkedHosts.add(msg.hostUuid) + msg.vmInstanceUuids.each { + states.put(it, it == vmUuid && msg.hostUuid == hostUuid ? targetHostState : VmInstanceState.Running.toString()) + } + reply.setStates(states) + bus.reply(msg, reply) + } + } + + MigrateVmAction migrateVmAction(String vmUuid, String destHostUuid) { + MigrateVmAction action = new MigrateVmAction() + action.sessionId = adminSession() + action.vmInstanceUuid = vmUuid + action.hostUuid = destHostUuid + return action + } +}