Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 17 additions & 2 deletions plugin/kvm/src/main/java/org/zstack/kvm/KVMHost.java
Original file line number Diff line number Diff line change
Expand Up @@ -5316,6 +5316,15 @@ private boolean noStorageAccessible(){
return inaccessiblePsCount == attachedPsCount && attachedPsCount > 0;
}

@Transactional(readOnly = true)
private long getLibvirtRestartedEchoTimeout() {
long vmCount = KVMHostUtils.countVmsForLibvirtRestartEchoTimeout(self.getUuid());
long timeout = KVMHostUtils.calculateLibvirtRestartEchoTimeoutMillis(vmCount);
logger.info(String.format("extend kvmagent echo timeout after libvirtd restart on host[uuid:%s, vmCount:%s] to %sms",
self.getUuid(), vmCount, timeout));
return timeout;
}

private void updateHostOsInformation(String distro, String release, String version) {
final KVMHostVO kvmHostVO = getSelf();
kvmHostVO.setOsDistribution(distro);
Expand Down Expand Up @@ -5586,6 +5595,7 @@ public void connectHook(final ConnectHostInfo info, final Completion complete) {
chain.allowWatch();
chain.then(new ShareFlow() {
boolean deployed = false;
boolean libvirtRestarted = false;
@Override
public void setup() {
flow(new NoRollbackFlow() {
Expand Down Expand Up @@ -5949,6 +5959,8 @@ public void success(Boolean run) {
deployed = run;
}
if (deployed) {
libvirtRestarted = KVMHostUtils.shouldRestartLibvirtdDuringDeploy(
deployArguments.getInit(), deployArguments.getRestartLibvirtd());
// update host agent version when open grayScaleUpgrade
upgradeChecker.updateAgentVersion(self.getUuid(), AnsibleConstant.KVM_AGENT_NAME, dbf.getDbVersion(), dbf.getDbVersion());
}
Expand Down Expand Up @@ -6023,6 +6035,9 @@ public boolean skip(Map data) {

@Override
public void run(final FlowTrigger trigger, Map data) {
final long echoTimeout = libvirtRestarted
? getLibvirtRestartedEchoTimeout()
: TimeUnit.SECONDS.toMillis(CoreGlobalProperty.REST_FACADE_ECHO_TIMEOUT);
restf.echo(echoPath, new Completion(trigger) {
@Override
public void success() {
Expand Down Expand Up @@ -6054,7 +6069,7 @@ public void success() {
public void fail(ErrorCode errorCode) {
trigger.fail(errorCode);
}
});
}, TimeUnit.SECONDS.toMillis(1), echoTimeout);
}

@Override
Expand All @@ -6066,7 +6081,7 @@ public void fail(ErrorCode errorCode) {
trigger.fail(errorCode);
}
}
});
}, TimeUnit.SECONDS.toMillis(1), echoTimeout);
}
});

Expand Down
30 changes: 30 additions & 0 deletions plugin/kvm/src/main/java/org/zstack/kvm/KVMHostUtils.java
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,9 @@
import org.zstack.header.tag.SystemTagVO;
import org.zstack.header.tag.SystemTagVO_;
import org.zstack.header.tag.TagType;
import org.zstack.header.vm.VmInstanceState;
import org.zstack.header.vm.VmInstanceVO;
import org.zstack.header.vm.VmInstanceVO_;
import org.zstack.utils.CollectionUtils;
import org.zstack.utils.TagUtils;
import org.zstack.utils.logging.CLogger;
Expand All @@ -22,6 +25,7 @@
import java.util.List;
import java.util.Locale;
import java.util.Set;
import java.util.concurrent.TimeUnit;

/**
* Created by GuoYi on 4/16/20.
Expand All @@ -33,6 +37,9 @@ public class KVMHostUtils {
// to keep check-flow and deploy-flow IP lists identical.
public static final Set<String> EXCLUDED_INTERNAL_IPS = Collections.unmodifiableSet(
new LinkedHashSet<>(Collections.singletonList("169.254.64.1")));
public static final long LIBVIRT_RESTART_ECHO_TIMEOUT_VM_THRESHOLD = 100;
public static final long LIBVIRT_RESTART_ECHO_TIMEOUT_PER_VM_SECONDS = 1;
public static final long LIBVIRT_RESTART_ECHO_TIMEOUT_MAX_SECONDS = 180;

// Collect host IPv4 addresses; mirrors host_plugin.fact() filters and
// applies EXCLUDED_INTERNAL_IPS so check and deploy share one source.
Expand Down Expand Up @@ -147,6 +154,29 @@ public static boolean shouldForceTlsRedeploy(boolean needDeployTlsCert,
return allowRestartLibvirtd || isNewAdded;
}

public static boolean shouldRestartLibvirtdDuringDeploy(String init, String restartLibvirtd) {
return "true".equalsIgnoreCase(init) || "true".equalsIgnoreCase(restartLibvirtd);
}
Comment on lines +157 to +159
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟡 Minor | ⚡ Quick win

布尔字符串判断建议先 trim,避免误判

Line 155 当前仅做 equalsIgnoreCase,若参数值包含前后空格(如 " true ")会返回 false,可能导致本应延长的 echo 超时未生效。

建议修改
 public static boolean shouldRestartLibvirtdDuringDeploy(String init, String restartLibvirtd) {
-    return "true".equalsIgnoreCase(init) || "true".equalsIgnoreCase(restartLibvirtd);
+    String normalizedInit = init == null ? null : init.trim();
+    String normalizedRestartLibvirtd = restartLibvirtd == null ? null : restartLibvirtd.trim();
+    return "true".equalsIgnoreCase(normalizedInit) || "true".equalsIgnoreCase(normalizedRestartLibvirtd);
 }

As per coding guidelines: 注意检查来自 Message 的参数是否做过 trim,用户可能在浏览器上复制粘贴的数据带有空格、换行符等。

🤖 Prompt for AI Agents
Verify each finding against current code. Fix only still-valid issues, skip the
rest with a brief reason, keep changes minimal, and validate.

In `@plugin/kvm/src/main/java/org/zstack/kvm/KVMHostUtils.java` around lines 154 -
156, The boolean-string check in shouldRestartLibvirtdDuringDeploy can
mis-evaluate values with surrounding whitespace; update the method to trim both
parameters (init and restartLibvirtd) before comparing (e.g., call trim() safely
after null-checking each param) and then use equalsIgnoreCase on the trimmed
strings so values like " true " or "\ntrue\t" are treated as true.


public static long countVmsForLibvirtRestartEchoTimeout(String hostUuid) {
return Q.New(VmInstanceVO.class)
.eq(VmInstanceVO_.hostUuid, hostUuid)
.notEq(VmInstanceVO_.state, VmInstanceState.Stopped)
.count();
}

public static long calculateLibvirtRestartEchoTimeoutMillis(long vmCount) {
long defaultTimeoutSeconds = CoreGlobalProperty.REST_FACADE_ECHO_TIMEOUT;
if (vmCount <= LIBVIRT_RESTART_ECHO_TIMEOUT_VM_THRESHOLD) {
return TimeUnit.SECONDS.toMillis(defaultTimeoutSeconds);
}

long maxExtraSeconds = Math.max(0, LIBVIRT_RESTART_ECHO_TIMEOUT_MAX_SECONDS - defaultTimeoutSeconds);
long extraVmCount = vmCount - LIBVIRT_RESTART_ECHO_TIMEOUT_VM_THRESHOLD;
long extraSeconds = Math.min(extraVmCount * LIBVIRT_RESTART_ECHO_TIMEOUT_PER_VM_SECONDS, maxExtraSeconds);
return TimeUnit.SECONDS.toMillis(defaultTimeoutSeconds + extraSeconds);
}

public static boolean shouldContinueReconnectOnAnsibleFailure(boolean isNewAdded, ErrorCode errorCode) {
return !isNewAdded && isLibvirtSocketMaskSystemdTimeout(errorCode);
}
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
package org.zstack.test.integration.kvm.host

import org.zstack.core.CoreGlobalProperty
import org.zstack.core.db.Q
import org.zstack.core.db.SQL
import org.zstack.header.vm.VmInstanceState
import org.zstack.header.vm.VmInstanceVO
import org.zstack.header.vm.VmInstanceVO_
import org.zstack.kvm.KVMHostUtils
import org.zstack.sdk.HostInventory
import org.zstack.sdk.VmInstanceInventory
import org.zstack.test.integration.kvm.Env
import org.zstack.test.integration.kvm.KvmTest
import org.zstack.testlib.EnvSpec
import org.zstack.testlib.SubCase

import java.util.concurrent.TimeUnit

class LibvirtRestartEchoTimeoutCase extends SubCase {
EnvSpec env

@Override
void setup() {
useSpring(KvmTest.springSpec)
}

@Override
void environment() {
env = Env.oneVmBasicEnv()
}

@Override
void test() {
env.create {
testCalculateLibvirtRestartEchoTimeout()
testCountVmsForLibvirtRestartEchoTimeoutExcludesStoppedVm()
}
}

@Override
void clean() {
env.delete()
}

void testCalculateLibvirtRestartEchoTimeout() {
int oldTimeout = CoreGlobalProperty.REST_FACADE_ECHO_TIMEOUT
try {
CoreGlobalProperty.REST_FACADE_ECHO_TIMEOUT = 60

assert KVMHostUtils.calculateLibvirtRestartEchoTimeoutMillis(100) == TimeUnit.SECONDS.toMillis(60)
assert KVMHostUtils.calculateLibvirtRestartEchoTimeoutMillis(101) == TimeUnit.SECONDS.toMillis(61)
assert KVMHostUtils.calculateLibvirtRestartEchoTimeoutMillis(10000) == TimeUnit.SECONDS.toMillis(180)

CoreGlobalProperty.REST_FACADE_ECHO_TIMEOUT = 300
assert KVMHostUtils.calculateLibvirtRestartEchoTimeoutMillis(10000) == TimeUnit.SECONDS.toMillis(300)
} finally {
CoreGlobalProperty.REST_FACADE_ECHO_TIMEOUT = oldTimeout
}
}

void testCountVmsForLibvirtRestartEchoTimeoutExcludesStoppedVm() {
HostInventory host = env.inventoryByName("kvm") as HostInventory
VmInstanceInventory vm = env.inventoryByName("vm") as VmInstanceInventory

long originalCount = KVMHostUtils.countVmsForLibvirtRestartEchoTimeout(host.uuid)
assert originalCount > 0

VmInstanceState originalState = Q.New(VmInstanceVO.class)
.select(VmInstanceVO_.state)
.eq(VmInstanceVO_.uuid, vm.uuid)
.findValue()
try {
SQL.New(VmInstanceVO.class)
.eq(VmInstanceVO_.uuid, vm.uuid)
.set(VmInstanceVO_.state, VmInstanceState.Stopped)
.update()

assert KVMHostUtils.countVmsForLibvirtRestartEchoTimeout(host.uuid) == originalCount - 1
} finally {
SQL.New(VmInstanceVO.class)
.eq(VmInstanceVO_.uuid, vm.uuid)
.set(VmInstanceVO_.state, originalState)
.update()
}
}
}
64 changes: 64 additions & 0 deletions test/src/test/java/org/zstack/test/kvm/KVMHostUtilsTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,12 @@

import org.junit.Assert;
import org.junit.Test;
import org.zstack.core.CoreGlobalProperty;
import org.zstack.header.errorcode.ErrorCode;
import org.zstack.kvm.KVMHostUtils;

import java.util.Collections;
import java.util.concurrent.TimeUnit;

public class KVMHostUtilsTest {

Expand Down Expand Up @@ -189,6 +191,68 @@ public void shouldForceTlsRedeploy_reconnectWithoutRestartSkips() {
Assert.assertFalse(KVMHostUtils.shouldForceTlsRedeploy(true, false, false));
}

@Test
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Comment from yaohua.wu:

应该要有个集成测试,这个UT在PR系统里不跑的

public void shouldRestartLibvirtdDuringDeploy_initOrRestartLibvirtdTriggers() {
Assert.assertFalse(KVMHostUtils.shouldRestartLibvirtdDuringDeploy(null, null));
Assert.assertFalse(KVMHostUtils.shouldRestartLibvirtdDuringDeploy("false", "false"));
Assert.assertTrue(KVMHostUtils.shouldRestartLibvirtdDuringDeploy("true", "false"));
Assert.assertTrue(KVMHostUtils.shouldRestartLibvirtdDuringDeploy("false", "true"));
Assert.assertTrue(KVMHostUtils.shouldRestartLibvirtdDuringDeploy("TrUe", null));
Assert.assertTrue(KVMHostUtils.shouldRestartLibvirtdDuringDeploy(null, "TrUe"));
}

@Test
public void calculateLibvirtRestartEchoTimeout_keepsDefaultAtThreshold() {
int oldTimeout = CoreGlobalProperty.REST_FACADE_ECHO_TIMEOUT;
try {
CoreGlobalProperty.REST_FACADE_ECHO_TIMEOUT = 60;
Assert.assertEquals(TimeUnit.SECONDS.toMillis(60),
KVMHostUtils.calculateLibvirtRestartEchoTimeoutMillis(100));
} finally {
CoreGlobalProperty.REST_FACADE_ECHO_TIMEOUT = oldTimeout;
}
}

@Test
public void calculateLibvirtRestartEchoTimeout_addsOneSecondAfterThreshold() {
int oldTimeout = CoreGlobalProperty.REST_FACADE_ECHO_TIMEOUT;
try {
CoreGlobalProperty.REST_FACADE_ECHO_TIMEOUT = 60;
Assert.assertEquals(TimeUnit.SECONDS.toMillis(61),
KVMHostUtils.calculateLibvirtRestartEchoTimeoutMillis(101));
} finally {
CoreGlobalProperty.REST_FACADE_ECHO_TIMEOUT = oldTimeout;
}
}

@Test
public void calculateLibvirtRestartEchoTimeout_capsAt180Seconds() {
int oldTimeout = CoreGlobalProperty.REST_FACADE_ECHO_TIMEOUT;
try {
CoreGlobalProperty.REST_FACADE_ECHO_TIMEOUT = 60;
Assert.assertEquals(TimeUnit.SECONDS.toMillis(180),
KVMHostUtils.calculateLibvirtRestartEchoTimeoutMillis(10000));
} finally {
CoreGlobalProperty.REST_FACADE_ECHO_TIMEOUT = oldTimeout;
}
}

@Test
public void calculateLibvirtRestartEchoTimeout_doesNotReduceConfiguredTimeoutAboveCap() {
int oldTimeout = CoreGlobalProperty.REST_FACADE_ECHO_TIMEOUT;
try {
CoreGlobalProperty.REST_FACADE_ECHO_TIMEOUT = 180;
Assert.assertEquals(TimeUnit.SECONDS.toMillis(180),
KVMHostUtils.calculateLibvirtRestartEchoTimeoutMillis(10000));

CoreGlobalProperty.REST_FACADE_ECHO_TIMEOUT = 300;
Assert.assertEquals(TimeUnit.SECONDS.toMillis(300),
KVMHostUtils.calculateLibvirtRestartEchoTimeoutMillis(10000));
} finally {
CoreGlobalProperty.REST_FACADE_ECHO_TIMEOUT = oldTimeout;
}
}

@Test
public void zstac77120_continueReconnectOnLibvirtSocketMaskSystemdTimeout() {
ErrorCode error = new ErrorCode();
Expand Down