Skip to content

Commit

Permalink
Check on-disk bitmap status on StartVmBackup and on TransferDiskImage
Browse files Browse the repository at this point in the history
Sometimes a bitmap can become invalid/corrupt without oVirt noticing
this. For example if a hypervisor crashes, the active bitmap will become
invalid.
This means that the qcow2 volume does not contain the bitmap anymore,
but oVirt thinks the bitmap still exists because it's in the database.

This will currently cause oVirt to fail to create a NBDServer, as it
will error with 'Bitmap does not exist in ...'.

We use the ListVolumeBitmaps to get all the on-disk bitmaps and compare
them with the bitmaps in the oVirt database. If there is some
inconsistency we remove all the bitmaps/checkpoints.

Signed-off-by: Jean-Louis Dupond <[email protected]>
  • Loading branch information
dupondje committed Jun 8, 2023
1 parent dc41832 commit d78fecc
Show file tree
Hide file tree
Showing 2 changed files with 109 additions and 0 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import static org.ovirt.engine.core.bll.storage.disk.image.DisksFilter.ONLY_NOT_SHAREABLE;
import static org.ovirt.engine.core.bll.storage.disk.image.DisksFilter.ONLY_SNAPABLE;

import java.util.Arrays;
import java.util.Collections;
import java.util.Date;
import java.util.HashMap;
Expand Down Expand Up @@ -39,8 +40,10 @@
import org.ovirt.engine.core.common.FeatureSupported;
import org.ovirt.engine.core.common.VdcObjectType;
import org.ovirt.engine.core.common.action.ActionParametersBase;
import org.ovirt.engine.core.common.action.ActionParametersBase.EndProcedure;
import org.ovirt.engine.core.common.action.ActionReturnValue;
import org.ovirt.engine.core.common.action.ActionType;
import org.ovirt.engine.core.common.action.DeleteAllVmCheckpointsParameters;
import org.ovirt.engine.core.common.action.LockProperties;
import org.ovirt.engine.core.common.action.VmBackupParameters;
import org.ovirt.engine.core.common.action.VolumeBitmapCommandParameters;
Expand Down Expand Up @@ -80,6 +83,7 @@
import org.ovirt.engine.core.utils.ReplacementUtils;
import org.ovirt.engine.core.utils.lock.EngineLock;
import org.ovirt.engine.core.utils.transaction.TransactionSupport;
import org.ovirt.engine.core.vdsbroker.irsbroker.UUIDListReturn;
import org.ovirt.engine.core.vdsbroker.irsbroker.VmBackupInfo;
import org.ovirt.engine.core.vdsbroker.vdsbroker.PrepareImageReturn;

Expand Down Expand Up @@ -164,6 +168,11 @@ protected boolean validate() {
String.format("$checkpointId %s", vmBackup.getFromCheckpointId()));
}

if (!validateCheckpoint(vmBackup.getFromCheckpointId())) {
return failValidation(EngineMessage.ACTION_TYPE_FAILED_CHECKPOINT_INVALID,
String.format("$checkpointId %s", vmBackup.getFromCheckpointId()));
}

if (!FeatureSupported.isBackupModeAndBitmapsOperationsSupported(getCluster().getCompatibilityVersion())) {
// Due to bz #1829829, Libvirt doesn't handle the case of mixing full and incremental
// backup under the same operation. This situation can happen when adding a new disk
Expand Down Expand Up @@ -229,6 +238,53 @@ protected boolean validate() {
return true;
}

protected boolean validateCheckpoint(Guid checkpointId) {
List<DiskImage> images = vmCheckpointDao.getDisksByCheckpointId(checkpointId);
/* Check if the checkpoint is still there on each volume/image */
for (DiskImage image : images) {
VdsmImageLocationInfo locationInfo = new VdsmImageLocationInfo(
image.getStorageIds().get(0),
image.getId(),
image.getImageId(),
null);

VolumeBitmapCommandParameters parameters =
new VolumeBitmapCommandParameters(
getStoragePoolId(),
locationInfo,
null);
parameters.setVdsId(getVdsId());
parameters.setEndProcedure(ActionParametersBase.EndProcedure.COMMAND_MANAGED);
parameters.setParentCommand(getActionType());
parameters.setParentParameters(getParameters());

ActionReturnValue returnValue = runInternalActionWithTasksContext(ActionType.ListVolumeBitmaps, parameters);
boolean valid = false;
if (returnValue.getSucceeded()) {
UUIDListReturn bitmaps = returnValue.getActionReturnValue();
valid = Arrays.stream(bitmaps.getUUIDList()).anyMatch(checkpointId.toString()::equals);
}
/* ListVolumeBitmaps failed or bitmap did not exist on disk -> Remove checkpoints */
if (!valid) {
log.error("Checkpoint '{}' does not exist for disk '{}'. Removing checkpoint '{}'",
checkpointId,
image.getId(),
checkpointId);
/* Some checkpoint corruption, remove checkpoints */
DeleteAllVmCheckpointsParameters deleteAllVmCheckpointsParameters =
new DeleteAllVmCheckpointsParameters(getVmId(), List.of(image));
deleteAllVmCheckpointsParameters.setParentCommand(getActionType());
deleteAllVmCheckpointsParameters.setParentParameters(getParameters());
deleteAllVmCheckpointsParameters.setEndProcedure(EndProcedure.COMMAND_MANAGED);
deleteAllVmCheckpointsParameters.setForce(true);

runInternalAction(ActionType.DeleteAllVmCheckpoints, deleteAllVmCheckpointsParameters);
return false;
}
}
return true;
}

public Set<Guid> getDisksNotInPreviousCheckpoint() {
return getDiskIds()
.stream()
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package org.ovirt.engine.core.bll.storage.disk.image;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.Date;
import java.util.HashMap;
import java.util.List;
Expand Down Expand Up @@ -32,17 +33,21 @@
import org.ovirt.engine.core.common.AuditLogType;
import org.ovirt.engine.core.common.VdcObjectType;
import org.ovirt.engine.core.common.action.ActionParametersBase;
import org.ovirt.engine.core.common.action.ActionParametersBase.EndProcedure;
import org.ovirt.engine.core.common.action.ActionReturnValue;
import org.ovirt.engine.core.common.action.ActionType;
import org.ovirt.engine.core.common.action.AddDiskParameters;
import org.ovirt.engine.core.common.action.DeleteAllVmCheckpointsParameters;
import org.ovirt.engine.core.common.action.LockProperties;
import org.ovirt.engine.core.common.action.RemoveDiskParameters;
import org.ovirt.engine.core.common.action.TransferDiskImageParameters;
import org.ovirt.engine.core.common.action.TransferImageStatusParameters;
import org.ovirt.engine.core.common.action.VolumeBitmapCommandParameters;
import org.ovirt.engine.core.common.businessentities.ActionGroup;
import org.ovirt.engine.core.common.businessentities.StorageDomain;
import org.ovirt.engine.core.common.businessentities.VDS;
import org.ovirt.engine.core.common.businessentities.VM;
import org.ovirt.engine.core.common.businessentities.VdsmImageLocationInfo;
import org.ovirt.engine.core.common.businessentities.VmBackup;
import org.ovirt.engine.core.common.businessentities.VmBackupPhase;
import org.ovirt.engine.core.common.businessentities.storage.DiskBackupMode;
Expand Down Expand Up @@ -92,6 +97,7 @@
import org.ovirt.engine.core.utils.EngineLocalConfig;
import org.ovirt.engine.core.utils.ReplacementUtils;
import org.ovirt.engine.core.vdsbroker.ResourceManager;
import org.ovirt.engine.core.vdsbroker.irsbroker.UUIDListReturn;
import org.ovirt.engine.core.vdsbroker.vdsbroker.PrepareImageReturn;

@NonTransactiveCommandAttribute
Expand Down Expand Up @@ -259,7 +265,54 @@ private PrepareImageVDSCommandParameters getPrepareParameters(Guid vdsId) {
getDiskImage().getImageId(), true);
}

private boolean validateBitmap(DiskImage image, Guid checkpointId) {
VdsmImageLocationInfo locationInfo = new VdsmImageLocationInfo(
image.getStorageIds().get(0),
image.getId(),
image.getImageId(),
null);

VolumeBitmapCommandParameters parameters =
new VolumeBitmapCommandParameters(
getStoragePoolId(),
locationInfo,
null);
parameters.setVdsId(getVdsId());
parameters.setEndProcedure(ActionParametersBase.EndProcedure.COMMAND_MANAGED);
parameters.setParentCommand(getActionType());
parameters.setParentParameters(getParameters());

ActionReturnValue returnValue = runInternalActionWithTasksContext(ActionType.ListVolumeBitmaps, parameters);
boolean valid = false;
if (returnValue.getSucceeded()) {
UUIDListReturn bitmaps = returnValue.getActionReturnValue();
valid = Arrays.stream(bitmaps.getUUIDList()).anyMatch(checkpointId.toString()::equals);
}
/* ListVolumeBitmaps failed or bitmap did not exist on disk -> Remove checkpoints */
if (!valid) {
log.error("Checkpoint '{}' does not exist on disk '{}'. Removing all checkpoints.",
checkpointId,
image.getId(),
checkpointId);
/* Some checkpoint corruption, remove checkpoints */
DeleteAllVmCheckpointsParameters deleteAllVmCheckpointsParameters =
new DeleteAllVmCheckpointsParameters(getVmId(), List.of(image));
deleteAllVmCheckpointsParameters.setParentCommand(getActionType());
deleteAllVmCheckpointsParameters.setParentParameters(getParameters());
deleteAllVmCheckpointsParameters.setEndProcedure(EndProcedure.COMMAND_MANAGED);
deleteAllVmCheckpointsParameters.setForce(true);

runInternalAction(ActionType.DeleteAllVmCheckpoints, deleteAllVmCheckpointsParameters);
return false;
}
return true;
}

private Guid getBitmap() {
if (!validateBitmap(getDiskImage(), getBackup().getFromCheckpointId())) {
return null;
}

if (isHybridBackup() && getDiskImage().getBackupMode() == DiskBackupMode.Incremental) {
return getBackup().getFromCheckpointId();
}
Expand Down

0 comments on commit d78fecc

Please sign in to comment.