Skip to content

Commit 5af877b

Browse files
committed
CLOUDSTACK-9782: Host HA and KVM HA provider
Host-HA offers investigation, fencing and recovery mechanisms for host that for any reason are malfunctioning. It uses Activity and Health checks to determine current host state based on which it may degrade a host or try to recover it. On failing to recover it, it may try to fence the host. The core feature is implemented in a hypervisor agnostic way, with two separate implementations of the driver/provider for Simulator and KVM hypervisors. The framework also allows for implementation of other hypervisor specific provider implementation in future. The Host-HA provider implementation for KVM hypervisor uses the out-of-band management sub-system to issue IPMI calls to reset (recover) or poweroff (fence) a host. The Host-HA provider implementation for Simulator provides a means of testing and validating the core framework implementation. Signed-off-by: Abhinandan Prateek <abhinandan.prateek@shapeblue.com> Signed-off-by: Rohit Yadav <rohit.yadav@shapeblue.com>
1 parent 85d073b commit 5af877b

File tree

118 files changed

+8551
-478
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

118 files changed

+8551
-478
lines changed

.travis.yml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,13 +42,18 @@ env:
4242
smoke/test_dynamicroles
4343
smoke/test_global_settings
4444
smoke/test_guest_vlan_range
45+
smoke/test_ha_for_host
46+
smoke/test_ha_kvm_agent
47+
smoke/test_ha_kvm
48+
smoke/test_hostha_simulator
4549
smoke/test_hosts
4650
smoke/test_internal_lb
4751
smoke/test_iso
4852
smoke/test_list_ids_parameter
4953
smoke/test_loadbalance
5054
smoke/test_login
5155
smoke/test_multipleips_per_nic
56+
smoke/test_nested_virtualization
5257
smoke/test_network
5358
smoke/test_network_acl
5459
smoke/test_nic

api/src/com/cloud/dc/DataCenter.java

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -18,15 +18,14 @@
1818

1919
import com.cloud.org.Grouping;
2020
import org.apache.cloudstack.acl.InfrastructureEntity;
21-
import org.apache.cloudstack.api.Identity;
22-
import org.apache.cloudstack.api.InternalIdentity;
21+
import org.apache.cloudstack.kernel.Partition;
2322

2423
import java.util.Map;
2524

2625
/**
2726
*
2827
*/
29-
public interface DataCenter extends InfrastructureEntity, Grouping, Identity, InternalIdentity {
28+
public interface DataCenter extends InfrastructureEntity, Grouping, Partition {
3029

3130
public enum NetworkType {
3231
Basic, Advanced,

api/src/com/cloud/event/EventTypes.java

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,7 @@
6969
import org.apache.cloudstack.acl.Role;
7070
import org.apache.cloudstack.acl.RolePermission;
7171
import org.apache.cloudstack.config.Configuration;
72+
import org.apache.cloudstack.ha.HAConfig;
7273
import org.apache.cloudstack.usage.Usage;
7374

7475
import java.util.HashMap;
@@ -317,6 +318,12 @@ public class EventTypes {
317318
public static final String EVENT_HOST_OUTOFBAND_MANAGEMENT_CHANGE_PASSWORD = "HOST.OOBM.CHANGEPASSWORD";
318319
public static final String EVENT_HOST_OUTOFBAND_MANAGEMENT_POWERSTATE_TRANSITION = "HOST.OOBM.POWERSTATE.TRANSITION";
319320

321+
// HA
322+
public static final String EVENT_HA_RESOURCE_ENABLE = "HA.RESOURCE.ENABLE";
323+
public static final String EVENT_HA_RESOURCE_DISABLE = "HA.RESOURCE.DISABLE";
324+
public static final String EVENT_HA_RESOURCE_CONFIGURE = "HA.RESOURCE.CONFIGURE";
325+
public static final String EVENT_HA_STATE_TRANSITION = "HA.STATE.TRANSITION";
326+
320327
// Maintenance
321328
public static final String EVENT_MAINTENANCE_CANCEL = "MAINT.CANCEL";
322329
public static final String EVENT_MAINTENANCE_CANCEL_PRIMARY_STORAGE = "MAINT.CANCEL.PS";
@@ -761,6 +768,12 @@ public class EventTypes {
761768
entityEventDetails.put(EVENT_HOST_OUTOFBAND_MANAGEMENT_CHANGE_PASSWORD, Host.class);
762769
entityEventDetails.put(EVENT_HOST_OUTOFBAND_MANAGEMENT_POWERSTATE_TRANSITION, Host.class);
763770

771+
// HA
772+
entityEventDetails.put(EVENT_HA_RESOURCE_ENABLE, HAConfig.class);
773+
entityEventDetails.put(EVENT_HA_RESOURCE_DISABLE, HAConfig.class);
774+
entityEventDetails.put(EVENT_HA_RESOURCE_CONFIGURE, HAConfig.class);
775+
entityEventDetails.put(EVENT_HA_STATE_TRANSITION, HAConfig.class);
776+
764777
// Maintenance
765778
entityEventDetails.put(EVENT_MAINTENANCE_CANCEL, Host.class);
766779
entityEventDetails.put(EVENT_MAINTENANCE_CANCEL_PRIMARY_STORAGE, Host.class);

api/src/com/cloud/host/Host.java

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -16,19 +16,19 @@
1616
// under the License.
1717
package com.cloud.host;
1818

19-
import java.util.Date;
20-
21-
import org.apache.cloudstack.api.Identity;
22-
import org.apache.cloudstack.api.InternalIdentity;
23-
2419
import com.cloud.hypervisor.Hypervisor.HypervisorType;
2520
import com.cloud.resource.ResourceState;
2621
import com.cloud.utils.fsm.StateObject;
22+
import org.apache.cloudstack.api.Identity;
23+
import org.apache.cloudstack.ha.HAResource;
24+
import org.apache.cloudstack.kernel.Partition;
25+
26+
import java.util.Date;
2727

2828
/**
2929
* Host represents one particular host server.
3030
*/
31-
public interface Host extends StateObject<Status>, Identity, InternalIdentity {
31+
public interface Host extends StateObject<Status>, Identity, Partition, HAResource {
3232
public enum Type {
3333
Storage(false), Routing(false), SecondaryStorage(false), SecondaryStorageCmdExecutor(false), ConsoleProxy(true), ExternalFirewall(false), ExternalLoadBalancer(
3434
false), ExternalVirtualSwitchSupervisor(false), PxeServer(false), BaremetalPxe(false), BaremetalDhcp(false), TrafficMonitor(false),
@@ -202,5 +202,7 @@ public static String[] toStrings(Host.Type... types) {
202202

203203
boolean isInMaintenanceStates();
204204

205+
boolean isDisabled();
206+
205207
ResourceState getResourceState();
206208
}

api/src/com/cloud/host/Status.java

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -150,12 +150,14 @@ public static String[] toStrings(Status... states) {
150150
s_fsm.addTransition(Status.Down, Event.ManagementServerDown, Status.Down);
151151
s_fsm.addTransition(Status.Down, Event.AgentDisconnected, Status.Down);
152152
s_fsm.addTransition(Status.Down, Event.PingTimeout, Status.Down);
153+
s_fsm.addTransition(Status.Down, Event.HostDown, Status.Down);
153154
s_fsm.addTransition(Status.Alert, Event.AgentConnected, Status.Connecting);
154155
s_fsm.addTransition(Status.Alert, Event.Ping, Status.Up);
155156
s_fsm.addTransition(Status.Alert, Event.Remove, Status.Removed);
156157
s_fsm.addTransition(Status.Alert, Event.ManagementServerDown, Status.Alert);
157158
s_fsm.addTransition(Status.Alert, Event.AgentDisconnected, Status.Alert);
158159
s_fsm.addTransition(Status.Alert, Event.ShutdownRequested, Status.Disconnected);
160+
s_fsm.addTransition(Status.Alert, Event.HostDown, Status.Down);
159161
s_fsm.addTransition(Status.Rebalancing, Event.RebalanceFailed, Status.Disconnected);
160162
s_fsm.addTransition(Status.Rebalancing, Event.RebalanceCompleted, Status.Connecting);
161163
s_fsm.addTransition(Status.Rebalancing, Event.ManagementServerDown, Status.Disconnected);

api/src/com/cloud/org/Cluster.java

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -16,13 +16,11 @@
1616
// under the License.
1717
package com.cloud.org;
1818

19-
import org.apache.cloudstack.api.Identity;
20-
import org.apache.cloudstack.api.InternalIdentity;
21-
2219
import com.cloud.hypervisor.Hypervisor.HypervisorType;
2320
import com.cloud.org.Managed.ManagedState;
21+
import org.apache.cloudstack.kernel.Partition;
2422

25-
public interface Cluster extends Grouping, InternalIdentity, Identity {
23+
public interface Cluster extends Grouping, Partition {
2624
public static enum ClusterType {
2725
CloudManaged, ExternalManaged;
2826
};

api/src/com/cloud/resource/ResourceState.java

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -93,6 +93,7 @@ public static String[] toString(ResourceState... states) {
9393
s_fsm.addTransition(ResourceState.Enabled, Event.InternalCreated, ResourceState.Enabled);
9494
s_fsm.addTransition(ResourceState.Enabled, Event.Disable, ResourceState.Disabled);
9595
s_fsm.addTransition(ResourceState.Enabled, Event.AdminAskMaintenace, ResourceState.PrepareForMaintenance);
96+
s_fsm.addTransition(ResourceState.Enabled, Event.InternalEnterMaintenance, ResourceState.Maintenance);
9697
s_fsm.addTransition(ResourceState.Disabled, Event.Enable, ResourceState.Enabled);
9798
s_fsm.addTransition(ResourceState.Disabled, Event.Disable, ResourceState.Disabled);
9899
s_fsm.addTransition(ResourceState.Disabled, Event.InternalCreated, ResourceState.Disabled);
@@ -109,5 +110,7 @@ public static String[] toString(ResourceState... states) {
109110
s_fsm.addTransition(ResourceState.ErrorInMaintenance, Event.InternalEnterMaintenance, ResourceState.Maintenance);
110111
s_fsm.addTransition(ResourceState.ErrorInMaintenance, Event.AdminCancelMaintenance, ResourceState.Enabled);
111112
s_fsm.addTransition(ResourceState.Error, Event.InternalCreated, ResourceState.Error);
113+
s_fsm.addTransition(ResourceState.Disabled, Event.DeleteHost, ResourceState.Disabled);
114+
112115
}
113116
}

api/src/com/cloud/vm/VirtualMachine.java

Lines changed: 8 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -16,26 +16,24 @@
1616
// under the License.
1717
package com.cloud.vm;
1818

19-
import java.util.Arrays;
20-
import java.util.Date;
21-
import java.util.Map;
22-
23-
import org.apache.cloudstack.acl.ControlledEntity;
24-
import org.apache.cloudstack.api.Displayable;
25-
import org.apache.cloudstack.api.Identity;
26-
import org.apache.cloudstack.api.InternalIdentity;
27-
2819
import com.cloud.hypervisor.Hypervisor.HypervisorType;
2920
import com.cloud.utils.fsm.StateMachine2;
3021
import com.cloud.utils.fsm.StateMachine2.Transition;
3122
import com.cloud.utils.fsm.StateMachine2.Transition.Impact;
3223
import com.cloud.utils.fsm.StateObject;
24+
import org.apache.cloudstack.acl.ControlledEntity;
25+
import org.apache.cloudstack.api.Displayable;
26+
import org.apache.cloudstack.kernel.Partition;
27+
28+
import java.util.Arrays;
29+
import java.util.Date;
30+
import java.util.Map;
3331

3432
/**
3533
* VirtualMachine describes the properties held by a virtual machine
3634
*
3735
*/
38-
public interface VirtualMachine extends RunningOn, ControlledEntity, Identity, InternalIdentity, Displayable, StateObject<VirtualMachine.State> {
36+
public interface VirtualMachine extends RunningOn, ControlledEntity, Partition, Displayable, StateObject<VirtualMachine.State> {
3937

4038
public enum PowerState {
4139
PowerUnknown,

api/src/org/apache/cloudstack/alert/AlertService.java

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -16,12 +16,12 @@
1616
// under the License.
1717
package org.apache.cloudstack.alert;
1818

19-
import java.util.HashSet;
20-
import java.util.Set;
21-
2219
import com.cloud.capacity.Capacity;
2320
import com.cloud.exception.InvalidParameterValueException;
2421

22+
import java.util.HashSet;
23+
import java.util.Set;
24+
2525
public interface AlertService {
2626
public static class AlertType {
2727
private static Set<AlertType> defaultAlertTypes = new HashSet<AlertType>();
@@ -67,6 +67,7 @@ private AlertType(short type, String name, boolean isDefault) {
6767
public static final AlertType ALERT_TYPE_SYNC = new AlertType((short)27, "ALERT.TYPE.SYNC", true);
6868
public static final AlertType ALERT_TYPE_UPLOAD_FAILED = new AlertType((short)28, "ALERT.UPLOAD.FAILED", true);
6969
public static final AlertType ALERT_TYPE_OOBM_AUTH_ERROR = new AlertType((short)29, "ALERT.OOBM.AUTHERROR", true);
70+
public static final AlertType ALERT_TYPE_HA_ACTION = new AlertType((short)30, "ALERT.HA.ACTION", true);
7071

7172
public short getType() {
7273
return type;

api/src/org/apache/cloudstack/api/ApiConstants.java

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ public class ApiConstants {
2121
public static final String ACCOUNTS = "accounts";
2222
public static final String ACCOUNT_TYPE = "accounttype";
2323
public static final String ACCOUNT_ID = "accountid";
24+
public static final String ACTIVITY = "activity";
2425
public static final String ADDRESS = "address";
2526
public static final String ALGORITHM = "algorithm";
2627
public static final String ALLOCATED_ONLY = "allocatedonly";
@@ -94,13 +95,15 @@ public class ApiConstants {
9495
public static final String DOMAIN_ID = "domainid";
9596
public static final String DOMAIN__ID = "domainId";
9697
public static final String DURATION = "duration";
98+
public static final String ELIGIBLE = "eligible";
9799
public static final String EMAIL = "email";
98100
public static final String END_DATE = "enddate";
99101
public static final String END_IP = "endip";
100102
public static final String END_IPV6 = "endipv6";
101103
public static final String END_PORT = "endport";
102104
public static final String ENTRY_TIME = "entrytime";
103105
public static final String EXPIRES = "expires";
106+
public static final String FENCE = "fence";
104107
public static final String FETCH_LATEST = "fetchlatest";
105108
public static final String FIRSTNAME = "firstname";
106109
public static final String FORCED = "forced";
@@ -119,6 +122,9 @@ public class ApiConstants {
119122
public static final String GUEST_CIDR_ADDRESS = "guestcidraddress";
120123
public static final String GUEST_VLAN_RANGE = "guestvlanrange";
121124
public static final String HA_ENABLE = "haenable";
125+
public static final String HA_PROVIDER = "haprovider";
126+
public static final String HA_STATE = "hastate";
127+
public static final String HEALTH = "health";
122128
public static final String HOST_ID = "hostid";
123129
public static final String HOST_NAME = "hostname";
124130
public static final String HYPERVISOR = "hypervisor";
@@ -215,6 +221,7 @@ public class ApiConstants {
215221
public static final String PUBLIC_END_PORT = "publicendport";
216222
public static final String PUBLIC_ZONE = "publiczone";
217223
public static final String RECEIVED_BYTES = "receivedbytes";
224+
public static final String RECOVER = "recover";
218225
public static final String REQUIRES_HVM = "requireshvm";
219226
public static final String RESOURCE_TYPE = "resourcetype";
220227
public static final String RESPONSE = "response";

0 commit comments

Comments
 (0)