Skip to content

Commit 29918e2

Browse files
committed
Merge release branch 4.11 to 4.12
* 4.11: KVM: Fix agents dont reconnect post maintenance (#3239)
2 parents 00ff536 + e86f671 commit 29918e2

File tree

7 files changed

+530
-89
lines changed

7 files changed

+530
-89
lines changed

agent/src/main/java/com/cloud/agent/Agent.java

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -606,9 +606,7 @@ protected void processRequest(final Request request, final Link link) {
606606
System.exit(1);
607607
return;
608608
} else if (cmd instanceof MaintainCommand) {
609-
s_logger.debug("Received maintainCommand");
610-
cancelTasks();
611-
_reconnectAllowed = false;
609+
s_logger.debug("Received maintainCommand, do not cancel current tasks");
612610
answer = new MaintainAnswer((MaintainCommand)cmd);
613611
} else if (cmd instanceof AgentControlCommand) {
614612
answer = null;

engine/components-api/src/main/java/com/cloud/resource/ResourceManager.java

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,11 @@ public interface ResourceManager extends ResourceService, Configurable {
5252
"Number of retries when preparing a host into Maintenance Mode is faulty before failing",
5353
true, ConfigKey.Scope.Cluster);
5454

55+
ConfigKey<Boolean> KvmSshToAgentEnabled = new ConfigKey<>("Advanced", Boolean.class,
56+
"kvm.ssh.to.agent","true",
57+
"Number of retries when preparing a host into Maintenance Mode is faulty before failing",
58+
false);
59+
5560
/**
5661
* Register a listener for different types of resource life cycle events.
5762
* There can only be one type of listener per type of host.

engine/orchestration/src/main/java/com/cloud/agent/manager/AgentAttache.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@
3232
import java.util.concurrent.TimeUnit;
3333

3434
import com.cloud.agent.api.ModifySshKeysCommand;
35+
import com.cloud.agent.api.ModifyStoragePoolCommand;
3536
import org.apache.cloudstack.managed.context.ManagedContextRunnable;
3637
import org.apache.log4j.Logger;
3738

@@ -115,7 +116,7 @@ public int compare(final Object o1, final Object o2) {
115116
StopCommand.class.toString(), CheckVirtualMachineCommand.class.toString(), PingTestCommand.class.toString(), CheckHealthCommand.class.toString(),
116117
ReadyCommand.class.toString(), ShutdownCommand.class.toString(), SetupCommand.class.toString(),
117118
CleanupNetworkRulesCmd.class.toString(), CheckNetworkCommand.class.toString(), PvlanSetupCommand.class.toString(), CheckOnHostCommand.class.toString(),
118-
ModifyTargetsCommand.class.toString(), ModifySshKeysCommand.class.toString()};
119+
ModifyTargetsCommand.class.toString(), ModifySshKeysCommand.class.toString(), ModifyStoragePoolCommand.class.toString()};
119120
protected final static String[] s_commandsNotAllowedInConnectingMode = new String[] { StartCommand.class.toString(), CreateCommand.class.toString() };
120121
static {
121122
Arrays.sort(s_commandsAllowedInMaintenanceMode);

server/src/main/java/com/cloud/configuration/Config.java

Lines changed: 0 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1202,14 +1202,6 @@ public enum Config {
12021202
KvmPublicNetwork("Hidden", ManagementServer.class, String.class, "kvm.public.network.device", null, "Specify the public bridge on host for public network", null),
12031203
KvmPrivateNetwork("Hidden", ManagementServer.class, String.class, "kvm.private.network.device", null, "Specify the private bridge on host for private network", null),
12041204
KvmGuestNetwork("Hidden", ManagementServer.class, String.class, "kvm.guest.network.device", null, "Specify the private bridge on host for private network", null),
1205-
KvmSshToAgentEnabled(
1206-
"Advanced",
1207-
ManagementServer.class,
1208-
Boolean.class,
1209-
"kvm.ssh.to.agent",
1210-
"true",
1211-
"Specify whether or not the management server is allowed to SSH into KVM Agents",
1212-
null),
12131205

12141206
// Hyperv
12151207
HypervPublicNetwork(

server/src/main/java/com/cloud/resource/ResourceManagerImpl.java

Lines changed: 62 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@
3131
import javax.inject.Inject;
3232
import javax.naming.ConfigurationException;
3333

34+
import com.cloud.utils.Pair;
3435
import com.cloud.vm.dao.UserVmDetailsDao;
3536
import org.apache.cloudstack.framework.config.ConfigKey;
3637
import org.apache.commons.lang.ObjectUtils;
@@ -2343,45 +2344,76 @@ private boolean doCancelMaintenance(final long hostId) {
23432344
}
23442345
}
23452346

2347+
handleAgentIfNotConnected(host, vms_migrating);
2348+
23462349
try {
23472350
resourceStateTransitTo(host, ResourceState.Event.AdminCancelMaintenance, _nodeId);
23482351
_agentMgr.pullAgentOutMaintenance(hostId);
23492352
retryHostMaintenance.remove(hostId);
2353+
} catch (final NoTransitionException e) {
2354+
s_logger.debug("Cannot transmit host " + host.getId() + "to Enabled state", e);
2355+
return false;
2356+
}
23502357

2351-
// for kvm, need to log into kvm host, restart cloudstack-agent
2352-
if ((host.getHypervisorType() == HypervisorType.KVM && !vms_migrating) || host.getHypervisorType() == HypervisorType.LXC) {
2358+
return true;
23532359

2354-
final boolean sshToAgent = Boolean.parseBoolean(_configDao.getValue(Config.KvmSshToAgentEnabled.key()));
2355-
if (!sshToAgent) {
2356-
s_logger.info("Configuration tells us not to SSH into Agents. Please restart the Agent (" + hostId + ") manually");
2357-
return true;
2358-
}
2360+
}
23592361

2360-
_hostDao.loadDetails(host);
2361-
final String password = host.getDetail("password");
2362-
final String username = host.getDetail("username");
2363-
if (password == null || username == null) {
2364-
s_logger.debug("Can't find password/username");
2365-
return false;
2366-
}
2367-
final com.trilead.ssh2.Connection connection = SSHCmdHelper.acquireAuthorizedConnection(host.getPrivateIpAddress(), 22, username, password);
2368-
if (connection == null) {
2369-
s_logger.debug("Failed to connect to host: " + host.getPrivateIpAddress());
2370-
return false;
2371-
}
2362+
/**
2363+
* Handle agent (if available) if its not connected before cancelling maintenance.
2364+
* Agent must be connected before cancelling maintenance.
2365+
* If the host status is not Up:
2366+
* - If kvm.ssh.to.agent is true, then SSH into the host and restart the agent.
2367+
* - If kvm.shh.to.agent is false, then fail cancelling maintenance
2368+
*/
2369+
protected void handleAgentIfNotConnected(HostVO host, boolean vmsMigrating) {
2370+
final boolean isAgentOnHost = host.getHypervisorType() == HypervisorType.KVM ||
2371+
host.getHypervisorType() == HypervisorType.LXC;
2372+
if (!isAgentOnHost || vmsMigrating || host.getStatus() == Status.Up) {
2373+
return;
2374+
}
2375+
final boolean sshToAgent = Boolean.parseBoolean(_configDao.getValue(KvmSshToAgentEnabled.key()));
2376+
if (sshToAgent) {
2377+
Pair<String, String> credentials = getHostCredentials(host);
2378+
connectAndRestartAgentOnHost(host, credentials.first(), credentials.second());
2379+
} else {
2380+
throw new CloudRuntimeException("SSH access is disabled, cannot cancel maintenance mode as " +
2381+
"host agent is not connected");
2382+
}
2383+
}
23722384

2373-
try {
2374-
SSHCmdHelper.SSHCmdResult result = SSHCmdHelper.sshExecuteCmdOneShot(connection, "service cloudstack-agent restart");
2375-
s_logger.debug("cloudstack-agent restart result: " + result.toString());
2376-
} catch (final SshException e) {
2377-
return false;
2378-
}
2379-
}
2385+
/**
2386+
* Get host credentials
2387+
* @throws CloudRuntimeException if username or password are not found
2388+
*/
2389+
protected Pair<String, String> getHostCredentials(HostVO host) {
2390+
_hostDao.loadDetails(host);
2391+
final String password = host.getDetail("password");
2392+
final String username = host.getDetail("username");
2393+
if (password == null || username == null) {
2394+
throw new CloudRuntimeException("SSH to agent is enabled, but username/password credentials are not found");
2395+
}
2396+
return new Pair<>(username, password);
2397+
}
23802398

2381-
return true;
2382-
} catch (final NoTransitionException e) {
2383-
s_logger.debug("Cannot transmit host " + host.getId() + "to Enabled state", e);
2384-
return false;
2399+
/**
2400+
* True if agent is restarted via SSH. Assumes kvm.ssh.to.agent = true and host status is not Up
2401+
*/
2402+
protected void connectAndRestartAgentOnHost(HostVO host, String username, String password) {
2403+
final com.trilead.ssh2.Connection connection = SSHCmdHelper.acquireAuthorizedConnection(
2404+
host.getPrivateIpAddress(), 22, username, password);
2405+
if (connection == null) {
2406+
throw new CloudRuntimeException("SSH to agent is enabled, but failed to connect to host: " + host.getPrivateIpAddress());
2407+
}
2408+
try {
2409+
SSHCmdHelper.SSHCmdResult result = SSHCmdHelper.sshExecuteCmdOneShot(
2410+
connection, "service cloudstack-agent restart");
2411+
if (result.getReturnCode() != 0) {
2412+
throw new CloudRuntimeException("Could not restart agent on host " + host.getId() + " due to: " + result.getStdErr());
2413+
}
2414+
s_logger.debug("cloudstack-agent restart result: " + result.toString());
2415+
} catch (final SshException e) {
2416+
throw new CloudRuntimeException("SSH to agent is enabled, but agent restart failed", e);
23852417
}
23862418
}
23872419

server/src/test/java/com/cloud/resource/ResourceManagerImplTest.java

Lines changed: 103 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -25,13 +25,20 @@
2525
import com.cloud.ha.HighAvailabilityManager;
2626
import com.cloud.host.Host;
2727
import com.cloud.host.HostVO;
28+
import com.cloud.host.Status;
2829
import com.cloud.host.dao.HostDao;
2930
import com.cloud.hypervisor.Hypervisor;
3031
import com.cloud.storage.StorageManager;
32+
import com.cloud.utils.Pair;
33+
import com.cloud.utils.exception.CloudRuntimeException;
3134
import com.cloud.utils.fsm.NoTransitionException;
35+
import com.cloud.utils.ssh.SSHCmdHelper;
36+
import com.cloud.utils.ssh.SshException;
3237
import com.cloud.vm.VMInstanceVO;
3338
import com.cloud.vm.dao.UserVmDetailsDao;
3439
import com.cloud.vm.dao.VMInstanceDao;
40+
import com.trilead.ssh2.Connection;
41+
import org.apache.cloudstack.framework.config.dao.ConfigurationDao;
3542
import org.junit.Assert;
3643
import org.junit.Before;
3744
import org.junit.Test;
@@ -56,12 +63,13 @@
5663
import static org.mockito.Matchers.anyLong;
5764
import static org.mockito.Matchers.anyString;
5865
import static org.mockito.Matchers.eq;
66+
import static org.mockito.Mockito.never;
5967
import static org.mockito.Mockito.times;
6068
import static org.mockito.Mockito.verify;
6169
import static org.mockito.Mockito.when;
6270

6371
@RunWith(PowerMockRunner.class)
64-
@PrepareForTest({ActionEventUtils.class, ResourceManagerImpl.class})
72+
@PrepareForTest({ActionEventUtils.class, ResourceManagerImpl.class, SSHCmdHelper.class})
6573
public class ResourceManagerImplTest {
6674

6775
@Mock
@@ -78,6 +86,8 @@ public class ResourceManagerImplTest {
7886
private HostDao hostDao;
7987
@Mock
8088
private VMInstanceDao vmInstanceDao;
89+
@Mock
90+
private ConfigurationDao configurationDao;
8191

8292
@Spy
8393
@InjectMocks
@@ -99,7 +109,13 @@ public class ResourceManagerImplTest {
99109
@Mock
100110
private GetVncPortCommand getVncPortCommandVm2;
101111

112+
@Mock
113+
private Connection sshConnection;
114+
102115
private static long hostId = 1L;
116+
private static final String hostUsername = "user";
117+
private static final String hostPassword = "password";
118+
private static final String hostPrivateIp = "192.168.1.10";
103119

104120
private static long vm1Id = 1L;
105121
private static String vm1InstanceName = "i-1-VM";
@@ -117,9 +133,13 @@ public void setup() throws Exception {
117133
when(host.getType()).thenReturn(Host.Type.Routing);
118134
when(host.getId()).thenReturn(hostId);
119135
when(host.getResourceState()).thenReturn(ResourceState.Enabled);
120-
when(host.getHypervisorType()).thenReturn(Hypervisor.HypervisorType.VMware);
136+
when(host.getHypervisorType()).thenReturn(Hypervisor.HypervisorType.KVM);
121137
when(host.getClusterId()).thenReturn(1L);
122138
when(hostDao.findById(hostId)).thenReturn(host);
139+
when(host.getDetail("username")).thenReturn(hostUsername);
140+
when(host.getDetail("password")).thenReturn(hostPassword);
141+
when(host.getStatus()).thenReturn(Status.Up);
142+
when(host.getPrivateIpAddress()).thenReturn(hostPrivateIp);
123143
when(vm1.getId()).thenReturn(vm1Id);
124144
when(vm2.getId()).thenReturn(vm2Id);
125145
when(vm1.getInstanceName()).thenReturn(vm1InstanceName);
@@ -138,6 +158,15 @@ public void setup() throws Exception {
138158
PowerMockito.whenNew(GetVncPortCommand.class).withArguments(vm2Id, vm2InstanceName).thenReturn(getVncPortCommandVm2);
139159
when(agentManager.easySend(eq(hostId), eq(getVncPortCommandVm1))).thenReturn(getVncPortAnswerVm1);
140160
when(agentManager.easySend(eq(hostId), eq(getVncPortCommandVm2))).thenReturn(getVncPortAnswerVm2);
161+
162+
PowerMockito.mockStatic(SSHCmdHelper.class);
163+
BDDMockito.given(SSHCmdHelper.acquireAuthorizedConnection(eq(hostPrivateIp), eq(22),
164+
eq(hostUsername), eq(hostPassword))).willReturn(sshConnection);
165+
BDDMockito.given(SSHCmdHelper.sshExecuteCmdOneShot(eq(sshConnection),
166+
eq("service cloudstack-agent restart"))).
167+
willReturn(new SSHCmdHelper.SSHCmdResult(0,"",""));
168+
169+
when(configurationDao.getValue(ResourceManager.KvmSshToAgentEnabled.key())).thenReturn("true");
141170
}
142171

143172
@Test
@@ -206,4 +235,76 @@ public void testCheckAndMaintainErrorInMaintenanceRetries() throws NoTransitionE
206235
verify(resourceManager, times(retries + 1)).isHostInMaintenance(host, failedMigrations, new ArrayList<>(), failedMigrations);
207236
verify(resourceManager).setHostIntoErrorInMaintenance(host, failedMigrations);
208237
}
238+
239+
@Test(expected = CloudRuntimeException.class)
240+
public void testGetHostCredentialsMissingParameter() {
241+
when(host.getDetail("password")).thenReturn(null);
242+
resourceManager.getHostCredentials(host);
243+
}
244+
245+
@Test
246+
public void testGetHostCredentials() {
247+
Pair<String, String> credentials = resourceManager.getHostCredentials(host);
248+
Assert.assertNotNull(credentials);
249+
Assert.assertEquals(hostUsername, credentials.first());
250+
Assert.assertEquals(hostPassword, credentials.second());
251+
}
252+
253+
@Test(expected = CloudRuntimeException.class)
254+
public void testConnectAndRestartAgentOnHostCannotConnect() {
255+
BDDMockito.given(SSHCmdHelper.acquireAuthorizedConnection(eq(hostPrivateIp), eq(22),
256+
eq(hostUsername), eq(hostPassword))).willReturn(null);
257+
resourceManager.connectAndRestartAgentOnHost(host, hostUsername, hostPassword);
258+
}
259+
260+
@Test(expected = CloudRuntimeException.class)
261+
public void testConnectAndRestartAgentOnHostCannotRestart() throws Exception {
262+
BDDMockito.given(SSHCmdHelper.sshExecuteCmdOneShot(eq(sshConnection),
263+
eq("service cloudstack-agent restart"))).willThrow(new SshException("exception"));
264+
resourceManager.connectAndRestartAgentOnHost(host, hostUsername, hostPassword);
265+
}
266+
267+
@Test
268+
public void testConnectAndRestartAgentOnHost() {
269+
resourceManager.connectAndRestartAgentOnHost(host, hostUsername, hostPassword);
270+
}
271+
272+
@Test
273+
public void testHandleAgentSSHEnabledNotConnectedAgent() {
274+
when(host.getStatus()).thenReturn(Status.Disconnected);
275+
resourceManager.handleAgentIfNotConnected(host, false);
276+
verify(resourceManager).getHostCredentials(eq(host));
277+
verify(resourceManager).connectAndRestartAgentOnHost(eq(host), eq(hostUsername), eq(hostPassword));
278+
}
279+
280+
@Test
281+
public void testHandleAgentSSHEnabledConnectedAgent() {
282+
when(host.getStatus()).thenReturn(Status.Up);
283+
resourceManager.handleAgentIfNotConnected(host, false);
284+
verify(resourceManager, never()).getHostCredentials(eq(host));
285+
verify(resourceManager, never()).connectAndRestartAgentOnHost(eq(host), eq(hostUsername), eq(hostPassword));
286+
}
287+
288+
@Test(expected = CloudRuntimeException.class)
289+
public void testHandleAgentSSHDisabledNotConnectedAgent() {
290+
when(host.getStatus()).thenReturn(Status.Disconnected);
291+
when(configurationDao.getValue(ResourceManager.KvmSshToAgentEnabled.key())).thenReturn("false");
292+
resourceManager.handleAgentIfNotConnected(host, false);
293+
}
294+
295+
@Test
296+
public void testHandleAgentSSHDisabledConnectedAgent() {
297+
when(host.getStatus()).thenReturn(Status.Up);
298+
when(configurationDao.getValue(ResourceManager.KvmSshToAgentEnabled.key())).thenReturn("false");
299+
resourceManager.handleAgentIfNotConnected(host, false);
300+
verify(resourceManager, never()).getHostCredentials(eq(host));
301+
verify(resourceManager, never()).connectAndRestartAgentOnHost(eq(host), eq(hostUsername), eq(hostPassword));
302+
}
303+
304+
@Test
305+
public void testHandleAgentVMsMigrating() {
306+
resourceManager.handleAgentIfNotConnected(host, true);
307+
verify(resourceManager, never()).getHostCredentials(eq(host));
308+
verify(resourceManager, never()).connectAndRestartAgentOnHost(eq(host), eq(hostUsername), eq(hostPassword));
309+
}
209310
}

0 commit comments

Comments
 (0)