Skip to content

Commit 24ef2e4

Browse files
committed
<fix>[kvm]: extend reconnect echo timeout after libvirtd restart
Resolves: ZSTAC-84691 Change-Id: I6668736975707575756d7665647867686e6b6b76
1 parent 39c155a commit 24ef2e4

2 files changed

Lines changed: 41 additions & 2 deletions

File tree

plugin/kvm/src/main/java/org/zstack/kvm/KVMHost.java

Lines changed: 21 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5316,6 +5316,21 @@ private boolean noStorageAccessible(){
53165316
return inaccessiblePsCount == attachedPsCount && attachedPsCount > 0;
53175317
}
53185318

5319+
@Transactional(readOnly = true)
5320+
private long getKvmAgentEchoTimeoutAfterDeploy(boolean deployed, boolean restartLibvirtdDuringDeploy) {
5321+
if (!deployed || !restartLibvirtdDuringDeploy) {
5322+
return TimeUnit.SECONDS.toMillis(CoreGlobalProperty.REST_FACADE_ECHO_TIMEOUT);
5323+
}
5324+
5325+
long vmCount = Q.New(VmInstanceVO.class)
5326+
.eq(VmInstanceVO_.hostUuid, self.getUuid())
5327+
.count();
5328+
long timeout = KVMHostUtils.calculateLibvirtRestartEchoTimeoutMillis(vmCount);
5329+
logger.info(String.format("extend kvmagent echo timeout after libvirtd restart on host[uuid:%s, vmCount:%s] to %sms",
5330+
self.getUuid(), vmCount, timeout));
5331+
return timeout;
5332+
}
5333+
53195334
private void updateHostOsInformation(String distro, String release, String version) {
53205335
final KVMHostVO kvmHostVO = getSelf();
53215336
kvmHostVO.setOsDistribution(distro);
@@ -5586,6 +5601,7 @@ public void connectHook(final ConnectHostInfo info, final Completion complete) {
55865601
chain.allowWatch();
55875602
chain.then(new ShareFlow() {
55885603
boolean deployed = false;
5604+
boolean restartLibvirtdDuringDeploy = false;
55895605
@Override
55905606
public void setup() {
55915607
flow(new NoRollbackFlow() {
@@ -5935,6 +5951,8 @@ public void run(final FlowTrigger trigger, Map data) {
59355951
if (deployArguments.isForceRun()) {
59365952
runner.setForceRun(true);
59375953
}
5954+
restartLibvirtdDuringDeploy = KVMHostUtils.shouldRestartLibvirtdDuringDeploy(
5955+
deployArguments.getInit(), deployArguments.getRestartLibvirtd());
59385956

59395957
UriComponentsBuilder ub = UriComponentsBuilder.fromHttpUrl(restf.getBaseUrl());
59405958
ub.path(new StringBind(KVMConstant.KVM_ANSIBLE_LOG_PATH_FROMAT).bind("uuid", self.getUuid()).toString());
@@ -6023,6 +6041,7 @@ public boolean skip(Map data) {
60236041

60246042
@Override
60256043
public void run(final FlowTrigger trigger, Map data) {
6044+
final long echoTimeout = getKvmAgentEchoTimeoutAfterDeploy(deployed, restartLibvirtdDuringDeploy);
60266045
restf.echo(echoPath, new Completion(trigger) {
60276046
@Override
60286047
public void success() {
@@ -6054,7 +6073,7 @@ public void success() {
60546073
public void fail(ErrorCode errorCode) {
60556074
trigger.fail(errorCode);
60566075
}
6057-
});
6076+
}, TimeUnit.SECONDS.toMillis(1), echoTimeout);
60586077
}
60596078

60606079
@Override
@@ -6066,7 +6085,7 @@ public void fail(ErrorCode errorCode) {
60666085
trigger.fail(errorCode);
60676086
}
60686087
}
6069-
});
6088+
}, TimeUnit.SECONDS.toMillis(1), echoTimeout);
60706089
}
60716090
});
60726091

plugin/kvm/src/main/java/org/zstack/kvm/KVMHostUtils.java

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
import java.util.List;
2323
import java.util.Locale;
2424
import java.util.Set;
25+
import java.util.concurrent.TimeUnit;
2526

2627
/**
2728
* Created by GuoYi on 4/16/20.
@@ -33,6 +34,9 @@ public class KVMHostUtils {
3334
// to keep check-flow and deploy-flow IP lists identical.
3435
public static final Set<String> EXCLUDED_INTERNAL_IPS = Collections.unmodifiableSet(
3536
new LinkedHashSet<>(Collections.singletonList("169.254.64.1")));
37+
public static final long LIBVIRT_RESTART_ECHO_TIMEOUT_VM_THRESHOLD = 100;
38+
public static final long LIBVIRT_RESTART_ECHO_TIMEOUT_PER_VM_SECONDS = 1;
39+
public static final long LIBVIRT_RESTART_ECHO_TIMEOUT_MAX_SECONDS = 180;
3640

3741
// Collect host IPv4 addresses; mirrors host_plugin.fact() filters and
3842
// applies EXCLUDED_INTERNAL_IPS so check and deploy share one source.
@@ -147,6 +151,22 @@ public static boolean shouldForceTlsRedeploy(boolean needDeployTlsCert,
147151
return allowRestartLibvirtd || isNewAdded;
148152
}
149153

154+
public static boolean shouldRestartLibvirtdDuringDeploy(String init, String restartLibvirtd) {
155+
return "true".equalsIgnoreCase(init) || "true".equalsIgnoreCase(restartLibvirtd);
156+
}
157+
158+
public static long calculateLibvirtRestartEchoTimeoutMillis(long vmCount) {
159+
long defaultTimeoutSeconds = CoreGlobalProperty.REST_FACADE_ECHO_TIMEOUT;
160+
if (vmCount <= LIBVIRT_RESTART_ECHO_TIMEOUT_VM_THRESHOLD) {
161+
return TimeUnit.SECONDS.toMillis(defaultTimeoutSeconds);
162+
}
163+
164+
long maxExtraSeconds = Math.max(0, LIBVIRT_RESTART_ECHO_TIMEOUT_MAX_SECONDS - defaultTimeoutSeconds);
165+
long extraVmCount = vmCount - LIBVIRT_RESTART_ECHO_TIMEOUT_VM_THRESHOLD;
166+
long extraSeconds = Math.min(extraVmCount * LIBVIRT_RESTART_ECHO_TIMEOUT_PER_VM_SECONDS, maxExtraSeconds);
167+
return TimeUnit.SECONDS.toMillis(defaultTimeoutSeconds + extraSeconds);
168+
}
169+
150170
public static boolean shouldContinueReconnectOnAnsibleFailure(boolean isNewAdded, ErrorCode errorCode) {
151171
return !isNewAdded && isLibvirtSocketMaskSystemdTimeout(errorCode);
152172
}

0 commit comments

Comments
 (0)