diff --git a/helix-common/src/main/java/org/apache/helix/constants/InstanceConstants.java b/helix-common/src/main/java/org/apache/helix/constants/InstanceConstants.java index 379bbaf022..e2cc2de2d5 100644 --- a/helix-common/src/main/java/org/apache/helix/constants/InstanceConstants.java +++ b/helix-common/src/main/java/org/apache/helix/constants/InstanceConstants.java @@ -12,8 +12,6 @@ public enum InstanceDisabledType { public enum InstanceOperation { EVACUATE, // Node will be removed after a period of time SWAP_IN, // New node joining for swap operation - SWAP_OUT, // Existing Node to be removed for swap operation - ENABLE, // Backward compatible field for HELIX_ENABLED. Set when changing from disabled to enabled. - DISABLE // Backward compatible field for HELIX_ENABLED. Set when changing from enabled to disabled. + SWAP_OUT // Existing Node to be removed for swap operation } } diff --git a/helix-core/src/main/java/org/apache/helix/controller/rebalancer/DelayedAutoRebalancer.java b/helix-core/src/main/java/org/apache/helix/controller/rebalancer/DelayedAutoRebalancer.java index ad36b50195..56be045306 100644 --- a/helix-core/src/main/java/org/apache/helix/controller/rebalancer/DelayedAutoRebalancer.java +++ b/helix-core/src/main/java/org/apache/helix/controller/rebalancer/DelayedAutoRebalancer.java @@ -30,8 +30,10 @@ import java.util.Optional; import java.util.Set; +import java.util.stream.Collectors; import org.apache.helix.HelixDefinedState; import org.apache.helix.api.config.StateTransitionThrottleConfig; +import org.apache.helix.constants.InstanceConstants; import org.apache.helix.controller.dataproviders.ResourceControllerDataProvider; import org.apache.helix.controller.rebalancer.constraint.MonitoredAbnormalResolver; import org.apache.helix.controller.rebalancer.util.DelayedRebalanceUtil; @@ -39,6 +41,7 @@ import org.apache.helix.controller.stages.CurrentStateOutput; import org.apache.helix.model.ClusterConfig; import org.apache.helix.model.IdealState; +import org.apache.helix.model.InstanceConfig; import org.apache.helix.model.Partition; import org.apache.helix.model.Resource; import org.apache.helix.model.ResourceAssignment; @@ -53,6 +56,7 @@ */ public class DelayedAutoRebalancer extends AbstractRebalancer { private static final Logger LOG = LoggerFactory.getLogger(DelayedAutoRebalancer.class); + private static final Set INSTANCE_OPERATION_TO_EXCLUDE = Set.of("EVACUATE", "SWAP_IN"); @Override public IdealState computeNewIdealState(String resourceName, @@ -109,14 +113,12 @@ public IdealState computeNewIdealState(String resourceName, allNodes = clusterData.getAllInstances(); } - Set activeNodes = liveEnabledNodes; + long delay = DelayedRebalanceUtil.getRebalanceDelay(currentIdealState, clusterConfig); + Set activeNodes = DelayedRebalanceUtil + .getActiveNodes(allNodes, currentIdealState, liveEnabledNodes, + clusterData.getInstanceOfflineTimeMap(), clusterData.getLiveInstances().keySet(), + clusterData.getInstanceConfigMap(), delay, clusterConfig); if (delayRebalanceEnabled) { - long delay = DelayedRebalanceUtil.getRebalanceDelay(currentIdealState, clusterConfig); - activeNodes = DelayedRebalanceUtil - .getActiveNodes(allNodes, currentIdealState, liveEnabledNodes, - clusterData.getInstanceOfflineTimeMap(), clusterData.getLiveInstances().keySet(), - clusterData.getInstanceConfigMap(), delay, clusterConfig); - Set offlineOrDisabledInstances = new HashSet<>(activeNodes); offlineOrDisabledInstances.removeAll(liveEnabledNodes); DelayedRebalanceUtil.setRebalanceScheduler(currentIdealState.getResourceName(), true, @@ -157,15 +159,20 @@ public IdealState computeNewIdealState(String resourceName, // sort node lists to ensure consistent preferred assignments List allNodeList = new ArrayList<>(allNodes); - List liveEnabledNodeList = new ArrayList<>(liveEnabledNodes); + // We will not assign partition to instances with evacuation and wap-out tag. + // TODO: Currently we have 2 groups of instances and compute preference list twice and merge. + // Eventually we want to have exclusive groups of instance for different instance tag. + List liveEnabledAssignableNodeList = filterOutOnOperationInstances(clusterData.getInstanceConfigMap(), + liveEnabledNodes); Collections.sort(allNodeList); - Collections.sort(liveEnabledNodeList); + Collections.sort(liveEnabledAssignableNodeList); ZNRecord newIdealMapping = _rebalanceStrategy - .computePartitionAssignment(allNodeList, liveEnabledNodeList, currentMapping, clusterData); + .computePartitionAssignment(allNodeList, liveEnabledAssignableNodeList, currentMapping, clusterData); ZNRecord finalMapping = newIdealMapping; - if (DelayedRebalanceUtil.isDelayRebalanceEnabled(currentIdealState, clusterConfig)) { + if (DelayedRebalanceUtil.isDelayRebalanceEnabled(currentIdealState, clusterConfig) + || liveEnabledAssignableNodeList.size()!= activeNodes.size()) { List activeNodeList = new ArrayList<>(activeNodes); Collections.sort(activeNodeList); int minActiveReplicas = DelayedRebalanceUtil.getMinActiveReplica( @@ -194,6 +201,14 @@ public IdealState computeNewIdealState(String resourceName, return idealState; } + private static List filterOutOnOperationInstances(Map instanceConfigMap, + Set nodes) { + return nodes.stream() + .filter( + instance -> !INSTANCE_OPERATION_TO_EXCLUDE.contains(instanceConfigMap.get(instance).getInstanceOperation())) + .collect(Collectors.toList()); + } + private IdealState generateNewIdealState(String resourceName, IdealState currentIdealState, ZNRecord newMapping) { IdealState newIdealState = new IdealState(resourceName); diff --git a/helix-core/src/main/java/org/apache/helix/controller/rebalancer/util/DelayedRebalanceUtil.java b/helix-core/src/main/java/org/apache/helix/controller/rebalancer/util/DelayedRebalanceUtil.java index e7ff99765d..f42176a0a4 100644 --- a/helix-core/src/main/java/org/apache/helix/controller/rebalancer/util/DelayedRebalanceUtil.java +++ b/helix-core/src/main/java/org/apache/helix/controller/rebalancer/util/DelayedRebalanceUtil.java @@ -113,7 +113,7 @@ public static Set getActiveNodes(Set allNodes, IdealState idealS private static Set getActiveNodes(Set allNodes, Set liveEnabledNodes, Map instanceOfflineTimeMap, Set liveNodes, Map instanceConfigMap, long delay, ClusterConfig clusterConfig) { - Set activeNodes = filterOutEvacuatingInstances(instanceConfigMap, liveEnabledNodes); + Set activeNodes = new HashSet<>(liveEnabledNodes); Set offlineOrDisabledInstances = new HashSet<>(allNodes); offlineOrDisabledInstances.removeAll(liveEnabledNodes); long currentTime = System.currentTimeMillis(); @@ -126,10 +126,11 @@ private static Set getActiveNodes(Set allNodes, Set live activeNodes.add(ins); } } - return activeNodes; + // TODO: change this after merging operation and helix-enable field. + return filterOutEvacuatingInstances(instanceConfigMap, activeNodes); } - private static Set filterOutEvacuatingInstances(Map instanceConfigMap, + public static Set filterOutEvacuatingInstances(Map instanceConfigMap, Set nodes) { return nodes.stream() .filter(instance -> !instanceConfigMap.get(instance).getInstanceOperation().equals( diff --git a/helix-core/src/main/java/org/apache/helix/model/InstanceConfig.java b/helix-core/src/main/java/org/apache/helix/model/InstanceConfig.java index 193019d0e7..45e0476ba0 100644 --- a/helix-core/src/main/java/org/apache/helix/model/InstanceConfig.java +++ b/helix-core/src/main/java/org/apache/helix/model/InstanceConfig.java @@ -265,15 +265,6 @@ public boolean getInstanceEnabled() { */ public void setInstanceEnabled(boolean enabled) { // set instance operation only when we need to change InstanceEnabled value. - // When enabling an instance where current HELIX_ENABLED is false, we update INSTANCE_OPERATION to 'ENABLE' - // When disabling and instance where current HELIX_ENABLED is false, we overwrite what current operation and - // update INSTANCE_OPERATION to 'DISABLE'. - String instanceOperationKey = InstanceConfigProperty.INSTANCE_OPERATION.toString(); - if (enabled != getInstanceEnabled()) { - _record.setSimpleField(instanceOperationKey, - enabled ? InstanceConstants.InstanceOperation.ENABLE.name() - : InstanceConstants.InstanceOperation.DISABLE.name()); - } setInstanceEnabledHelper(enabled); } @@ -344,17 +335,8 @@ public long getInstanceEnabledTime() { } public void setInstanceOperation(InstanceConstants.InstanceOperation operation) { - if (operation != InstanceConstants.InstanceOperation.DISABLE - && operation != InstanceConstants.InstanceOperation.ENABLE) { - if (!getInstanceEnabled()) { - throw new HelixException( - "setting non enable/disable operation (e.g. evacuate, swap) to helix disabled instance is not allowed"); - } - } else { - setInstanceEnabledHelper(operation == InstanceConstants.InstanceOperation.ENABLE); - } - - _record.setSimpleField(InstanceConfigProperty.INSTANCE_OPERATION.toString(), operation.toString()); + _record.setSimpleField(InstanceConfigProperty.INSTANCE_OPERATION.name(), + operation == null ? "" : operation.name()); } public String getInstanceOperation() { diff --git a/helix-core/src/test/java/org/apache/helix/integration/rebalancer/TestInstanceOperation.java b/helix-core/src/test/java/org/apache/helix/integration/rebalancer/TestInstanceOperation.java index 2276ab1c37..3f459318b9 100644 --- a/helix-core/src/test/java/org/apache/helix/integration/rebalancer/TestInstanceOperation.java +++ b/helix-core/src/test/java/org/apache/helix/integration/rebalancer/TestInstanceOperation.java @@ -27,6 +27,7 @@ import org.apache.helix.model.BuiltInStateModelDefinitions; import org.apache.helix.model.ClusterConfig; import org.apache.helix.model.ExternalView; +import org.apache.helix.model.IdealState; import org.apache.helix.model.Message; import org.apache.helix.model.ResourceAssignment; import org.apache.helix.participant.StateMachineEngine; @@ -55,7 +56,7 @@ public class TestInstanceOperation extends ZkTestBase { private Set _allDBs = new HashSet<>(); private ZkHelixClusterVerifier _clusterVerifier; private ConfigAccessor _configAccessor; - private long _stateModelDelay = 30L; + private long _stateModelDelay = 3L; protected AssignmentMetadataStore _assignmentMetadataStore; HelixDataAccessor _dataAccessor; @@ -105,6 +106,7 @@ public void testEvacuate() throws Exception { _gSetupTool.getClusterManagementTool() .setInstanceOperation(CLUSTER_NAME, instanceToEvacuate, InstanceConstants.InstanceOperation.EVACUATE); + System.out.println("123"); Assert.assertTrue(_clusterVerifier.verifyByPolling()); // New ev should contain all instances but the evacuated one @@ -125,7 +127,7 @@ public void testRevertEvacuation() throws Exception { // revert an evacuate instance String instanceToEvacuate = _participants.get(0).getInstanceName(); _gSetupTool.getClusterManagementTool() - .setInstanceOperation(CLUSTER_NAME, instanceToEvacuate, InstanceConstants.InstanceOperation.ENABLE); + .setInstanceOperation(CLUSTER_NAME, instanceToEvacuate, null); Assert.assertTrue(_clusterVerifier.verifyByPolling()); @@ -138,6 +140,55 @@ public void testRevertEvacuation() throws Exception { } @Test(dependsOnMethods = "testRevertEvacuation") + public void testAddingNodeWithEvacuationTag() throws Exception { + // first disable and instance, and wait for all replicas to be moved out + String mockNewInstance = _participants.get(0).getInstanceName(); + _gSetupTool.getClusterManagementTool() + .enableInstance(CLUSTER_NAME, mockNewInstance, false); + Assert.assertTrue(_clusterVerifier.verifyByPolling()); + //ev should contain all instances but the disabled one + Map assignment = getEV(); + List currentActiveInstances = + _participantNames.stream().filter(n -> !n.equals(mockNewInstance)).collect(Collectors.toList()); + for (String resource : _allDBs) { + validateAssignmentInEv(assignment.get(resource), REPLICA-1); + Set newPAssignedParticipants = getParticipantsInEv(assignment.get(resource)); + Assert.assertFalse(newPAssignedParticipants.contains(mockNewInstance)); + Assert.assertTrue(newPAssignedParticipants.containsAll(currentActiveInstances)); + } + + // add evacuate tag and enable instance + _gSetupTool.getClusterManagementTool() + .setInstanceOperation(CLUSTER_NAME, mockNewInstance, InstanceConstants.InstanceOperation.EVACUATE); + _gSetupTool.getClusterManagementTool() + .enableInstance(CLUSTER_NAME, mockNewInstance, true); + //ev should be the same + assignment = getEV(); + currentActiveInstances = + _participantNames.stream().filter(n -> !n.equals(mockNewInstance)).collect(Collectors.toList()); + for (String resource : _allDBs) { + validateAssignmentInEv(assignment.get(resource), REPLICA-1); + Set newPAssignedParticipants = getParticipantsInEv(assignment.get(resource)); + Assert.assertFalse(newPAssignedParticipants.contains(mockNewInstance)); + Assert.assertTrue(newPAssignedParticipants.containsAll(currentActiveInstances)); + } + + // now remove operation tag + String instanceToEvacuate = _participants.get(0).getInstanceName(); + _gSetupTool.getClusterManagementTool() + .setInstanceOperation(CLUSTER_NAME, instanceToEvacuate, null); + + Assert.assertTrue(_clusterVerifier.verifyByPolling()); + + // EV should contain all participants, check resources one by one + assignment = getEV(); + for (String resource : _allDBs) { + Assert.assertTrue(getParticipantsInEv(assignment.get(resource)).containsAll(_participantNames)); + validateAssignmentInEv(assignment.get(resource)); + } + } + + @Test(dependsOnMethods = "testAddingNodeWithEvacuationTag") public void testEvacuateAndCancelBeforeBootstrapFinish() throws Exception { // add a resource where downward state transition is slow createResourceWithDelayedRebalance(CLUSTER_NAME, "TEST_DB3_DELAYED_CRUSHED", "MasterSlave", PARTITIONS, REPLICA, @@ -151,7 +202,7 @@ public void testEvacuateAndCancelBeforeBootstrapFinish() throws Exception { Assert.assertTrue(_clusterVerifier.verifyByPolling()); // set bootstrap ST delay to a large number - _stateModelDelay = -300000L; + _stateModelDelay = -10000L; // evacuate an instance String instanceToEvacuate = _participants.get(0).getInstanceName(); _gSetupTool.getClusterManagementTool() @@ -174,9 +225,9 @@ public void testEvacuateAndCancelBeforeBootstrapFinish() throws Exception { validateAssignmentInEv(assignment.get(resource)); } - // cancel the evacuation by setting instance operation back to `ENABLE` + // cancel the evacuation _gSetupTool.getClusterManagementTool() - .setInstanceOperation(CLUSTER_NAME, instanceToEvacuate, InstanceConstants.InstanceOperation.ENABLE); + .setInstanceOperation(CLUSTER_NAME, instanceToEvacuate, null); assignment = getEV(); for (String resource : _allDBs) { @@ -200,7 +251,7 @@ public void testEvacuateAndCancelBeforeBootstrapFinish() throws Exception { public void testEvacuateAndCancelBeforeDropFinish() throws Exception { // set DROP ST delay to a large number - _stateModelDelay = 300000L; + _stateModelDelay = 10000L; // evacuate an instance String instanceToEvacuate = _participants.get(0).getInstanceName(); @@ -211,8 +262,9 @@ public void testEvacuateAndCancelBeforeDropFinish() throws Exception { TestHelper.verify( () -> ((_dataAccessor.getChildNames(_dataAccessor.keyBuilder().messages(instanceToEvacuate))).isEmpty()), 30000); + // cancel evacuation _gSetupTool.getClusterManagementTool() - .setInstanceOperation(CLUSTER_NAME, instanceToEvacuate, InstanceConstants.InstanceOperation.ENABLE); + .setInstanceOperation(CLUSTER_NAME, instanceToEvacuate, null); // check every replica has >= 3 active replicas, even before cluster converge Map assignment = getEV(); for (String resource : _allDBs) { @@ -274,6 +326,27 @@ public void testMarkEvacuationAfterEMM() throws Exception { } + @Test(dependsOnMethods = "testMarkEvacuationAfterEMM") + public void testEvacuationWithOfflineInstancesInCluster() throws Exception { + _participants.get(2).syncStop(); + _participants.get(3).syncStop(); + // wait for converge, and set evacuate on instance 0 + Assert.assertTrue(_clusterVerifier.verifyByPolling()); + + String evacuateInstanceName = _participants.get(0).getInstanceName(); + _gSetupTool.getClusterManagementTool() + .setInstanceOperation(CLUSTER_NAME, evacuateInstanceName, InstanceConstants.InstanceOperation.EVACUATE); + + Map assignment; + List currentActiveInstances = + _participantNames.stream().filter(n -> (!n.equals(evacuateInstanceName) && !n.equals(_participants.get(3).getInstanceName()))).collect(Collectors.toList()); + TestHelper.verify( ()-> {return verifyIS(evacuateInstanceName);}, TestHelper.WAIT_DURATION); + + _participants.get(3).syncStart(); + _participants.get(2).syncStart(); + } + + private void addParticipant(String participantName) { _gSetupTool.addInstanceToCluster(CLUSTER_NAME, participantName); @@ -290,8 +363,12 @@ private void addParticipant(String participantName) { } private void createTestDBs(long delayTime) throws InterruptedException { + createResourceWithDelayedRebalance(CLUSTER_NAME, "TEST_DB0_CRUSHED", + BuiltInStateModelDefinitions.LeaderStandby.name(), PARTITIONS, REPLICA, REPLICA - 1, -1, + CrushEdRebalanceStrategy.class.getName()); + _allDBs.add("TEST_DB0_CRUSHED"); createResourceWithDelayedRebalance(CLUSTER_NAME, "TEST_DB1_CRUSHED", - BuiltInStateModelDefinitions.LeaderStandby.name(), PARTITIONS, REPLICA, REPLICA - 1, 200, + BuiltInStateModelDefinitions.LeaderStandby.name(), PARTITIONS, REPLICA, REPLICA - 1, 2000000, CrushEdRebalanceStrategy.class.getName()); _allDBs.add("TEST_DB1_CRUSHED"); createResourceWithWagedRebalance(CLUSTER_NAME, "TEST_DB2_WAGED", BuiltInStateModelDefinitions.LeaderStandby.name(), @@ -310,14 +387,39 @@ private Map getEV() { return externalViews; } + private boolean verifyIS(String evacuateInstanceName) { + for (String db : _allDBs) { + IdealState is = _gSetupTool.getClusterManagementTool().getResourceIdealState(CLUSTER_NAME, db); + for (String partition : is.getPartitionSet()) { + List newPAssignedParticipants = is.getPreferenceList(partition); + if (newPAssignedParticipants.contains(evacuateInstanceName)) { + System.out.println("partition " + partition + " assignment " + newPAssignedParticipants + " ev " + evacuateInstanceName); + return false; + } + } + } + return true; + } + private Set getParticipantsInEv(ExternalView ev) { Set assignedParticipants = new HashSet<>(); - ev.getPartitionSet().forEach(partition -> assignedParticipants.addAll(ev.getStateMap(partition).keySet())); + for (String partition : ev.getPartitionSet()) { + ev.getStateMap(partition) + .keySet() + .stream() + .filter(k -> !ev.getStateMap(partition).get(k).equals("OFFLINE")) + .forEach(assignedParticipants::add); + } return assignedParticipants; } // verify that each partition has >=REPLICA (3 in this case) replicas + private void validateAssignmentInEv(ExternalView ev) { + validateAssignmentInEv(ev, REPLICA); + } + + private void validateAssignmentInEv(ExternalView ev, int expectedNumber) { Set partitionSet = ev.getPartitionSet(); for (String partition : partitionSet) { AtomicInteger activeReplicaCount = new AtomicInteger(); @@ -326,8 +428,7 @@ private void validateAssignmentInEv(ExternalView ev) { .stream() .filter(v -> v.equals("MASTER") || v.equals("LEADER") || v.equals("SLAVE") || v.equals("FOLLOWER") || v.equals("STANDBY")) .forEach(v -> activeReplicaCount.getAndIncrement()); - Assert.assertTrue(activeReplicaCount.get() >=REPLICA); - + Assert.assertTrue(activeReplicaCount.get() >=expectedNumber); } } @@ -375,7 +476,7 @@ public StDelayMSStateModel() { private void sleepWhileNotCanceled(long sleepTime) throws InterruptedException{ while(sleepTime >0 && !isCancelled()) { Thread.sleep(5000); - sleepTime =- 5000; + sleepTime = sleepTime - 5000; } if (isCancelled()) { _cancelled = false; diff --git a/helix-rest/src/test/java/org/apache/helix/rest/server/TestPerInstanceAccessor.java b/helix-rest/src/test/java/org/apache/helix/rest/server/TestPerInstanceAccessor.java index d721c372a7..35f0712b41 100644 --- a/helix-rest/src/test/java/org/apache/helix/rest/server/TestPerInstanceAccessor.java +++ b/helix-rest/src/test/java/org/apache/helix/rest/server/TestPerInstanceAccessor.java @@ -495,29 +495,11 @@ public void updateInstance() throws IOException { instanceConfig.getInstanceOperation(), InstanceConstants.InstanceOperation.EVACUATE.toString()); new JerseyUriRequestBuilder("clusters/{}/instances/{}?command=setInstanceOperation&instanceOperation=INVALIDOP") .expectedReturnStatusCode(Response.Status.NOT_FOUND.getStatusCode()).format(CLUSTER_NAME, INSTANCE_NAME).post(this, entity); - new JerseyUriRequestBuilder("clusters/{}/instances/{}?command=setInstanceOperation") - .expectedReturnStatusCode(Response.Status.BAD_REQUEST.getStatusCode()).format(CLUSTER_NAME, INSTANCE_NAME).post(this, entity); - // TODO: enable the following test when we add sanity check. - // set operation to be DISABLE - new JerseyUriRequestBuilder("clusters/{}/instances/{}?command=setInstanceOperation&instanceOperation=DISABLE") + new JerseyUriRequestBuilder("clusters/{}/instances/{}?command=setInstanceOperation&instanceOperation=") .format(CLUSTER_NAME, INSTANCE_NAME).post(this, entity); instanceConfig = _configAccessor.getInstanceConfig(CLUSTER_NAME, INSTANCE_NAME); Assert.assertEquals( - instanceConfig.getInstanceOperation(), InstanceConstants.InstanceOperation.DISABLE.toString()); - Assert.assertTrue(!instanceConfig.getInstanceEnabled()); - - // set operation to EVACUATE, expect error - new JerseyUriRequestBuilder("clusters/{}/instances/{}?command=setInstanceOperation&instanceOperation=EVACUATE") - .expectedReturnStatusCode(Response.Status.BAD_REQUEST.getStatusCode()) - .format(CLUSTER_NAME, INSTANCE_NAME).post(this, entity); - // set back to enable - new JerseyUriRequestBuilder("clusters/{}/instances/{}?command=setInstanceOperation&instanceOperation=ENABLE") - .format(CLUSTER_NAME, INSTANCE_NAME).post(this, entity); - instanceConfig = _configAccessor.getInstanceConfig(CLUSTER_NAME, INSTANCE_NAME); - Assert.assertEquals( - instanceConfig.getInstanceOperation(), InstanceConstants.InstanceOperation.ENABLE.toString()); - Assert.assertTrue(instanceConfig.getInstanceEnabled()); - + instanceConfig.getInstanceOperation(), ""); System.out.println("End test :" + TestHelper.getTestMethodName()); }