Skip to content

Commit 8a602fb

Browse files
author
shan.wu
committed
<fix>[vm]: handle failed migration target state
Check the VM state on the destination host after the migration API fails. If the destination host reports Running, treat the migration as completed. Run the success completion path in that case. Otherwise, keep the original rollback behavior. Resolves: ZSTAC-83894 Change-Id: I8b4774a405fc3b1c05d21b6742facd26bc8d03e6
1 parent 4cdaed7 commit 8a602fb

2 files changed

Lines changed: 318 additions & 13 deletions

File tree

compute/src/main/java/org/zstack/compute/vm/VmInstanceBase.java

Lines changed: 105 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -183,6 +183,29 @@ public void run(MessageReply reply) {
183183
});
184184
}
185185

186+
private void getVmStateOnHost(final String hostUuid, final ReturnValueCompletion<String> completion) {
187+
CheckVmStateOnHypervisorMsg msg = new CheckVmStateOnHypervisorMsg();
188+
msg.setVmInstanceUuids(list(self.getUuid()));
189+
msg.setHostUuid(hostUuid);
190+
bus.makeTargetServiceIdByResourceUuid(msg, HostConstant.SERVICE_ID, hostUuid);
191+
bus.send(msg, new CloudBusCallBack(completion) {
192+
@Override
193+
public void run(MessageReply reply) {
194+
if (!reply.isSuccess()) {
195+
completion.fail(reply.getError());
196+
return;
197+
}
198+
199+
CheckVmStateOnHypervisorReply r = reply.castReply();
200+
completion.success(r.getStates().get(self.getUuid()));
201+
}
202+
});
203+
}
204+
205+
private boolean isVmRunningOnHost(String state) {
206+
return VmInstanceState.Running.toString().equals(state);
207+
}
208+
186209
protected void destroy(final VmInstanceDeletionPolicy deletionPolicy, Message msg, final Completion completion) {
187210
if (deletionPolicy == VmInstanceDeletionPolicy.DBOnly) {
188211
completion.success();
@@ -7178,26 +7201,96 @@ public void done() {
71787201
}).error(new FlowErrorHandler(completion) {
71797202
@Override
71807203
public void handle(final ErrorCode errCode, Map data) {
7181-
String destHostUuid = spec.getDestHost().getUuid().equals(lastHostUuid) ? null : spec.getDestHost().getUuid();
7182-
extEmitter.failedToMigrateVm(VmInstanceInventory.valueOf(self), destHostUuid, errCode, new NoErrorCompletion(completion) {
7183-
@Override
7184-
public void done() {
7185-
if (!HostErrors.FAILED_TO_MIGRATE_VM_ON_HYPERVISOR.isEqual(errCode.getCode())) {
7186-
changeVmStateInDb(originState.getDrivenEvent());
7187-
completion.fail(errCode);
7188-
return;
7189-
}
7204+
handleFailedMigrateVm(spec, originState, lastHostUuid, errCode, completion);
7205+
}
7206+
}).start();
7207+
}
7208+
7209+
private void handleFailedMigrateVm(final VmInstanceSpec spec, final VmInstanceState originState,
7210+
final String lastHostUuid, final ErrorCode errCode,
7211+
final Completion completion) {
7212+
String destHostUuid = spec.getDestHost().getUuid().equals(lastHostUuid) ? null : spec.getDestHost().getUuid();
7213+
if (destHostUuid == null) {
7214+
rollbackFailedMigrateVm(originState, null, errCode, completion);
7215+
return;
7216+
}
7217+
7218+
getVmStateOnHost(destHostUuid, new ReturnValueCompletion<String>(completion) {
7219+
@Override
7220+
public void success(String state) {
7221+
if (!isVmRunningOnHost(state)) {
7222+
rollbackFailedMigrateVm(originState, destHostUuid, errCode, completion);
7223+
return;
7224+
}
7225+
7226+
logger.warn(String.format("migrating vm[uuid:%s] failed with error[%s], but the vm is running on destination host[uuid:%s]; complete migration cleanup on destination host",
7227+
self.getUuid(), errCode.getDetails(), destHostUuid));
7228+
completeMigrateVmOnDestination(spec, lastHostUuid, completion);
7229+
}
7230+
7231+
@Override
7232+
public void fail(ErrorCode errorCode) {
7233+
logger.warn(String.format("unable to check vm[uuid:%s] state on destination host[uuid:%s] after migration failure, %s",
7234+
self.getUuid(), destHostUuid, errorCode));
7235+
rollbackFailedMigrateVm(originState, destHostUuid, errCode, completion);
7236+
}
7237+
});
7238+
}
7239+
7240+
private void completeMigrateVmOnDestination(final VmInstanceSpec spec, final String lastHostUuid,
7241+
final Completion completion) {
7242+
HostInventory host = spec.getDestHost();
7243+
checkState(host.getUuid(), new NoErrorCompletion(completion) {
7244+
@Override
7245+
public void done() {
7246+
SQL.New(VmInstanceVO.class).eq(VmInstanceVO_.uuid, self.getUuid())
7247+
.set(VmInstanceVO_.zoneUuid, host.getZoneUuid())
7248+
.set(VmInstanceVO_.clusterUuid, host.getClusterUuid())
7249+
.set(VmInstanceVO_.lastHostUuid, lastHostUuid)
7250+
.set(VmInstanceVO_.hostUuid, host.getUuid())
7251+
.update();
7252+
self = dbf.reload(self);
71907253

7191-
checkState(originalCopy.getHostUuid(), new NoErrorCompletion(completion) {
7254+
VmInstanceInventory vm = VmInstanceInventory.valueOf(self);
7255+
extEmitter.postMigrateVm(vm, host.getUuid(), new Completion(completion) {
7256+
@Override
7257+
public void success() {
7258+
extEmitter.afterMigrateVm(vm, vm.getLastHostUuid(), new NoErrorCompletion(completion) {
71927259
@Override
71937260
public void done() {
7194-
completion.fail(errCode);
7261+
completion.success();
71957262
}
71967263
});
71977264
}
7265+
7266+
@Override
7267+
public void fail(ErrorCode errorCode) {
7268+
completion.fail(errorCode);
7269+
}
71987270
});
71997271
}
7200-
}).start();
7272+
});
7273+
}
7274+
7275+
private void rollbackFailedMigrateVm(final VmInstanceState originState, final String destHostUuid,
7276+
final ErrorCode errCode, final Completion completion) {
7277+
extEmitter.failedToMigrateVm(VmInstanceInventory.valueOf(self), destHostUuid, errCode, new NoErrorCompletion(completion) {
7278+
@Override
7279+
public void done() {
7280+
if (!HostErrors.FAILED_TO_MIGRATE_VM_ON_HYPERVISOR.isEqual(errCode.getCode())) {
7281+
changeVmStateInDb(originState.getDrivenEvent());
7282+
completion.fail(errCode);
7283+
return;
7284+
}
7285+
7286+
checkState(originalCopy.getHostUuid(), new NoErrorCompletion(completion) {
7287+
@Override
7288+
public void done() {
7289+
completion.fail(errCode);
7290+
}
7291+
});
7292+
}
7293+
});
72017294
}
72027295

72037296
protected void handle(CancelMigrateVmMsg msg) {
@@ -9272,4 +9365,3 @@ public void run(MessageReply reply) {
92729365
});
92739366
}
92749367
}
9275-
Lines changed: 213 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,213 @@
1+
package org.zstack.test.integration.kvm.vm.migrate
2+
3+
import org.zstack.core.cloudbus.CloudBus
4+
import org.zstack.header.host.CheckVmStateOnHypervisorMsg
5+
import org.zstack.header.host.CheckVmStateOnHypervisorReply
6+
import org.zstack.header.network.service.NetworkServiceType
7+
import org.zstack.header.vm.VmInstanceState
8+
import org.zstack.kvm.KVMAgentCommands
9+
import org.zstack.kvm.KVMConstant
10+
import org.zstack.network.securitygroup.SecurityGroupConstant
11+
import org.zstack.network.service.flat.FlatNetworkServiceConstant
12+
import org.zstack.network.service.userdata.UserdataConstant
13+
import org.zstack.sdk.HostInventory
14+
import org.zstack.sdk.MigrateVmAction
15+
import org.zstack.sdk.VmInstanceInventory
16+
import org.zstack.test.integration.kvm.KvmTest
17+
import org.zstack.testlib.EnvSpec
18+
import org.zstack.testlib.SubCase
19+
import org.zstack.utils.data.SizeUnit
20+
21+
class MigrateVmFailureCheckTargetHostCase extends SubCase {
22+
EnvSpec env
23+
24+
@Override
25+
void setup() {
26+
useSpring(KvmTest.springSpec)
27+
}
28+
29+
@Override
30+
void environment() {
31+
env = env {
32+
instanceOffering {
33+
name = "instanceOffering"
34+
memory = SizeUnit.GIGABYTE.toByte(1)
35+
cpu = 1
36+
}
37+
38+
cephBackupStorage {
39+
name = "ceph-bk"
40+
fsid = "7ff218d9-f525-435f-8a40-3618d1772a64"
41+
monUrls = ["root:password@localhost:23", "root:password@127.0.0.1:23"]
42+
43+
image {
44+
name = "image1"
45+
url = "http://zstack.org/download/test.qcow2"
46+
}
47+
}
48+
49+
zone {
50+
name = "zone"
51+
52+
cluster {
53+
name = "cluster"
54+
hypervisorType = "KVM"
55+
56+
kvm {
57+
name = "kvm1"
58+
managementIp = "127.0.0.1"
59+
username = "root"
60+
password = "password"
61+
}
62+
63+
kvm {
64+
name = "kvm2"
65+
managementIp = "127.0.0.2"
66+
username = "root"
67+
password = "password"
68+
}
69+
70+
attachPrimaryStorage("ceph-pri")
71+
attachL2Network("l2")
72+
}
73+
74+
cephPrimaryStorage {
75+
name = "ceph-pri"
76+
fsid = "7ff218d9-f525-435f-8a40-3618d1772a64"
77+
monUrls = ["root:password@localhost/?monPort=7777", "root:password@127.0.0.1/?monPort=7777"]
78+
}
79+
80+
l2NoVlanNetwork {
81+
name = "l2"
82+
physicalInterface = "eth0"
83+
84+
l3Network {
85+
name = "l3"
86+
87+
service {
88+
provider = FlatNetworkServiceConstant.FLAT_NETWORK_SERVICE_TYPE_STRING
89+
types = [NetworkServiceType.DHCP.toString(), UserdataConstant.USERDATA_TYPE_STRING]
90+
}
91+
92+
service {
93+
provider = SecurityGroupConstant.SECURITY_GROUP_PROVIDER_TYPE
94+
types = [SecurityGroupConstant.SECURITY_GROUP_NETWORK_SERVICE_TYPE]
95+
}
96+
97+
ip {
98+
startIp = "192.168.100.10"
99+
endIp = "192.168.100.100"
100+
netmask = "255.255.255.0"
101+
gateway = "192.168.100.1"
102+
}
103+
}
104+
}
105+
106+
attachBackupStorage("ceph-bk")
107+
}
108+
109+
vm {
110+
name = "vm"
111+
useInstanceOffering("instanceOffering")
112+
useImage("image1")
113+
useL3Networks("l3")
114+
}
115+
}
116+
}
117+
118+
@Override
119+
void test() {
120+
env.create {
121+
testRollbackWhenTargetHostReportsVmNotRunning()
122+
testMigrationSuccessWhenTargetHostReportsVmRunning()
123+
}
124+
}
125+
126+
@Override
127+
void clean() {
128+
env.delete()
129+
}
130+
131+
void testRollbackWhenTargetHostReportsVmNotRunning() {
132+
VmInstanceInventory vm = env.inventoryByName("vm") as VmInstanceInventory
133+
HostInventory destHost = findAnotherHost(vm.hostUuid)
134+
135+
assertRollbackWhenTargetReports(vm, destHost, VmInstanceState.Stopped.toString())
136+
assertRollbackWhenTargetReports(vm, destHost, VmInstanceState.Paused.toString())
137+
}
138+
139+
void assertRollbackWhenTargetReports(VmInstanceInventory vm, HostInventory destHost, String targetHostState) {
140+
List<String> checkedHosts = []
141+
142+
mockMigrateVmFailure()
143+
mockVmState(vm.uuid, destHost.uuid, targetHostState, checkedHosts)
144+
145+
MigrateVmAction.Result result = migrateVmAction(vm.uuid, destHost.uuid).call()
146+
147+
assert result.error != null
148+
VmInstanceInventory after = queryVmInstance {
149+
conditions = ["uuid=${vm.uuid}".toString()]
150+
}[0] as VmInstanceInventory
151+
assert after.hostUuid == vm.hostUuid
152+
assert after.state == VmInstanceState.Running.toString()
153+
assert checkedHosts[0] == destHost.uuid
154+
}
155+
156+
void testMigrationSuccessWhenTargetHostReportsVmRunning() {
157+
VmInstanceInventory vm = queryVmInstance {
158+
conditions = ["name=vm"]
159+
}[0] as VmInstanceInventory
160+
HostInventory destHost = findAnotherHost(vm.hostUuid)
161+
List<String> checkedHosts = []
162+
163+
mockMigrateVmFailure()
164+
mockVmState(vm.uuid, destHost.uuid, VmInstanceState.Running.toString(), checkedHosts)
165+
166+
MigrateVmAction.Result result = migrateVmAction(vm.uuid, destHost.uuid).call()
167+
168+
assert result.error == null
169+
VmInstanceInventory after = queryVmInstance {
170+
conditions = ["uuid=${vm.uuid}".toString()]
171+
}[0] as VmInstanceInventory
172+
assert after.hostUuid == destHost.uuid
173+
assert after.lastHostUuid == vm.hostUuid
174+
assert checkedHosts
175+
assert checkedHosts.every { it == destHost.uuid }
176+
}
177+
178+
HostInventory findAnotherHost(String hostUuid) {
179+
return queryHost {
180+
conditions = ["uuid!=${hostUuid}".toString()]
181+
}[0] as HostInventory
182+
}
183+
184+
void mockMigrateVmFailure() {
185+
env.simulator(KVMConstant.KVM_MIGRATE_VM_PATH) {
186+
KVMAgentCommands.MigrateVmResponse rsp = new KVMAgentCommands.MigrateVmResponse()
187+
rsp.setError("mock migration API failure")
188+
return rsp
189+
}
190+
}
191+
192+
void mockVmState(String vmUuid, String hostUuid, String targetHostState, List<String> checkedHosts) {
193+
env.revokeMessage(CheckVmStateOnHypervisorMsg.class, null)
194+
env.message(CheckVmStateOnHypervisorMsg.class) { CheckVmStateOnHypervisorMsg msg, CloudBus bus ->
195+
CheckVmStateOnHypervisorReply reply = new CheckVmStateOnHypervisorReply()
196+
Map<String, String> states = new HashMap<>()
197+
checkedHosts.add(msg.hostUuid)
198+
msg.vmInstanceUuids.each {
199+
states.put(it, it == vmUuid && msg.hostUuid == hostUuid ? targetHostState : VmInstanceState.Running.toString())
200+
}
201+
reply.setStates(states)
202+
bus.reply(msg, reply)
203+
}
204+
}
205+
206+
MigrateVmAction migrateVmAction(String vmUuid, String destHostUuid) {
207+
MigrateVmAction action = new MigrateVmAction()
208+
action.sessionId = adminSession()
209+
action.vmInstanceUuid = vmUuid
210+
action.hostUuid = destHostUuid
211+
return action
212+
}
213+
}

0 commit comments

Comments
 (0)