Skip to content

Commit

Permalink
fix: ensure snapshot_restore cross kernel test runs
Browse files Browse the repository at this point in the history
Since PR#3896 this test always succeeded because it did not find any
snapshot directories.

Rewrite and fix the test so that

- we see one test case for each directory, instead of one test per CPU
  template, so it is clear what is being restored.

- switch to creating the microvm through the API rather than a JSON file
  so that the Microvm/Snapshot classes can account for all the devices.

Signed-off-by: Pablo Barbáchano <[email protected]>
  • Loading branch information
pb8o committed Jun 5, 2024
1 parent 8dbfbfd commit b71a91f
Show file tree
Hide file tree
Showing 4 changed files with 110 additions and 186 deletions.
7 changes: 2 additions & 5 deletions docs/snapshotting/snapshot-support.md
Original file line number Diff line number Diff line change
Expand Up @@ -648,11 +648,8 @@ supported host kernel versions by generating snapshot artifacts through
[this tool](../../tools/create_snapshot_artifact) and checking devices'
functionality using
[this test](../../tests/integration_tests/functional/test_snapshot_restore_cross_kernel.py).
The microVM snapshotted is built from
[this configuration file](../../tools/create_snapshot_artifact/complex_vm_config.json).
The test restores the snapshot and ensures that all the devices set-up in the
configuration file (network devices, disk, vsock, balloon and MMDS) are
operational post-load.
The test restores the snapshot and ensures that all the devices set-up (network
devices, disk, vsock, balloon and MMDS) are operational post-load.

In those tests the instance is fixed, except some combinations where we also
test across the same CPU family (Intel x86, Gravitons). In general cross-CPU
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,8 @@
make_guest_dirty_memory,
)

pytestmark = pytest.mark.nonci


def _test_balloon(microvm):
# Get the firecracker pid.
Expand Down Expand Up @@ -65,17 +67,26 @@ def _test_mmds(vm, mmds_net_iface):

cmd = generate_mmds_get_request(mmds_ipv4_address, token=token)
_, stdout, _ = vm.ssh.run(cmd)
assert json.load(stdout) == data_store
assert json.loads(stdout) == data_store


def get_snapshot_dirs():
"""Get all the snapshot directories"""
snapshot_root_name = "snapshot_artifacts"
snapshot_root_dir = Path(FC_WORKSPACE_DIR) / snapshot_root_name
cpu_templates = ["C3", "T2", "T2S", "None"]
if get_cpu_vendor() != CpuVendor.INTEL:
cpu_templates = ["None"]
for cpu_template in cpu_templates:
for snapshot_dir in snapshot_root_dir.glob(f"*_{cpu_template}_guest_snapshot"):
assert snapshot_dir.is_dir()
yield pytest.param(snapshot_dir, id=snapshot_dir.name)


@pytest.mark.timeout(600)
@pytest.mark.nonci
@pytest.mark.parametrize(
"cpu_template",
["C3", "T2", "T2S", "None"] if get_cpu_vendor() == CpuVendor.INTEL else ["None"],
)
@pytest.mark.parametrize("snapshot_dir", get_snapshot_dirs())
def test_snap_restore_from_artifacts(
microvm_factory, bin_vsock_path, test_fc_session_root_path, cpu_template
microvm_factory, bin_vsock_path, test_fc_session_root_path, snapshot_dir
):
"""
Restore from snapshots obtained with all supported guest kernel versions.
Expand All @@ -87,43 +98,37 @@ def test_snap_restore_from_artifacts(
"""
logger = logging.getLogger("cross_kernel_snapshot_restore")

snapshot_root_name = "snapshot_artifacts"
snapshot_root_dir = Path(FC_WORKSPACE_DIR) / snapshot_root_name

# Iterate through all subdirectories based on CPU template
# in the snapshot root dir.
snap_subdirs = snapshot_root_dir.glob(f".*_{cpu_template}_guest_snapshot")
for snapshot_dir in snap_subdirs:
assert snapshot_dir.is_dir()
logger.info("Working with snapshot artifacts in %s.", snapshot_dir)
logger.info("Working with snapshot artifacts in %s.", snapshot_dir)

vm = microvm_factory.build()
vm.spawn()
logger.info("Loading microVM from snapshot...")
vm.restore_from_path(snapshot_dir)
vm.resume()
vm = microvm_factory.build()
vm.spawn()
logger.info("Loading microVM from snapshot...")
vm.restore_from_path(snapshot_dir)
vm.resume()

# Ensure microVM is running.
assert vm.state == "Running"
# Ensure microVM is running.
assert vm.state == "Running"

# Test that net devices have connectivity after restore.
for idx, iface in enumerate(vm.iface.values()["iface"]):
logger.info("Testing net device %s...", iface.dev_name)
exit_code, _, _ = vm.ssh_iface(idx).run("sync")
assert exit_code == 0
# Test that net devices have connectivity after restore.
for idx, iface in enumerate(vm.iface.values()):
logger.info("Testing net device %s...", iface["iface"].dev_name)
exit_code, _, _ = vm.ssh_iface(idx).run("true")
assert exit_code == 0

logger.info("Testing data store behavior...")
_test_mmds(vm, vm.iface["eth3"]["iface"])
logger.info("Testing data store behavior...")
_test_mmds(vm, vm.iface["eth3"]["iface"])

logger.info("Testing balloon device...")
_test_balloon(vm)
logger.info("Testing balloon device...")
_test_balloon(vm)

logger.info("Testing vsock device...")
check_vsock_device(vm, bin_vsock_path, test_fc_session_root_path, vm.ssh)
logger.info("Testing vsock device...")
check_vsock_device(vm, bin_vsock_path, test_fc_session_root_path, vm.ssh)

# Run fio on the guest.
# TODO: check the result of FIO or use fsck to check that the root device is
# not corrupted. No obvious errors will be returned here.
guest_run_fio_iteration(vm.ssh, 0)
# Run fio on the guest.
# TODO: check the result of FIO or use fsck to check that the root device is
# not corrupted. No obvious errors will be returned here.
guest_run_fio_iteration(vm.ssh, 0)

vm.kill()
vm.kill()
70 changes: 0 additions & 70 deletions tools/create_snapshot_artifact/complex_vm_config.json

This file was deleted.

140 changes: 66 additions & 74 deletions tools/create_snapshot_artifact/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,17 +16,20 @@
# pylint: disable=wrong-import-position
from framework.artifacts import disks, kernels
from framework.microvm import MicroVMFactory
from framework.utils import generate_mmds_get_request, generate_mmds_session_token
from framework.utils import (
configure_mmds,
generate_mmds_get_request,
generate_mmds_session_token,
)
from framework.utils_cpuid import CpuVendor, get_cpu_vendor
from host_tools.cargo_build import get_firecracker_binaries


# pylint: enable=wrong-import-position

# Default IPv4 address to route MMDS requests.
IPV4_ADDRESS = "169.254.169.254"
NET_IFACE_FOR_MMDS = "eth3"
# Path to the VM configuration file.
VM_CONFIG_FILE = "tools/create_snapshot_artifact/complex_vm_config.json"
# Root directory for the snapshot artifacts.
SNAPSHOT_ARTIFACTS_ROOT_DIR = "snapshot_artifacts"

Expand Down Expand Up @@ -95,77 +98,66 @@ def main():
for kernel in kernels(glob="vmlinux-*"):
for rootfs in disks(glob="ubuntu-*.squashfs"):
print(kernel, rootfs, cpu_template)
vm = vm_factory.build()
create_snapshots(vm, rootfs, kernel, cpu_template)


def create_snapshots(vm, rootfs, kernel, cpu_template):
"""Snapshot microVM built from vm configuration file."""
# Get ssh key from read-only artifact.
vm.ssh_key = rootfs.with_suffix(".id_rsa")
vm.rootfs_file = rootfs
vm.kernel_file = kernel

# adapt the JSON file
vm_config_file = Path(VM_CONFIG_FILE)
obj = json.load(vm_config_file.open(encoding="UTF-8"))
obj["boot-source"]["kernel_image_path"] = kernel.name
obj["drives"][0]["path_on_host"] = rootfs.name
obj["drives"][0]["is_read_only"] = True
obj["machine-config"]["cpu_template"] = cpu_template
vm.create_jailed_resource(vm_config_file)
vm_config = Path(vm.chroot()) / vm_config_file.name
vm_config.write_text(json.dumps(obj))
vm.jailer.extra_args = {"config-file": vm_config_file.name}

# since we are using a JSON file, we need to do this manually
vm.create_jailed_resource(rootfs)
vm.create_jailed_resource(kernel)

for i in range(4):
vm.add_net_iface(api=False)

vm.spawn(log_level="Info")

# Ensure the microVM has started.
assert vm.state == "Running"

# Populate MMDS.
data_store = {
"latest": {
"meta-data": {
"ami-id": "ami-12345678",
"reservation-id": "r-fea54097",
"local-hostname": "ip-10-251-50-12.ec2.internal",
"public-hostname": "ec2-203-0-113-25.compute-1.amazonaws.com",
}
}
}
populate_mmds(vm, data_store)

# Iterate and validate connectivity on all ifaces after boot.
for i in range(4):
exit_code, _, _ = vm.ssh_iface(i).run("sync")
assert exit_code == 0

# Validate MMDS.
validate_mmds(vm.ssh, data_store)

# Snapshot the microVM.
snapshot = vm.snapshot_diff()

# Create snapshot artifacts directory specific for the kernel version used.
guest_kernel_version = re.search("vmlinux-(.*)", kernel.name)

snapshot_artifacts_dir = (
Path(SNAPSHOT_ARTIFACTS_ROOT_DIR)
/ f"{guest_kernel_version.group(1)}_{cpu_template}_guest_snapshot"
)
snapshot_artifacts_dir.mkdir(parents=True)
snapshot.save_to(snapshot_artifacts_dir)
print(f"Copied snapshot to: {snapshot_artifacts_dir}.")

vm.kill()
vm = vm_factory.build(kernel, rootfs)
vm.spawn(log_level="Info")
vm.basic_config(
vcpu_count=2,
mem_size_mib=1024,
cpu_template=cpu_template,
track_dirty_pages=True,
)
# Add 4 network devices
for i in range(4):
vm.add_net_iface()
# Add a vsock device
vm.api.vsock.put(vsock_id="vsock0", guest_cid=3, uds_path="/v.sock")
# Add MMDS
configure_mmds(vm, ["eth3"], version="V2")
# Add a memory balloon.
vm.api.balloon.put(
amount_mib=0, deflate_on_oom=True, stats_polling_interval_s=1
)

vm.start()
# Ensure the microVM has started.
assert vm.state == "Running"

# Populate MMDS.
data_store = {
"latest": {
"meta-data": {
"ami-id": "ami-12345678",
"reservation-id": "r-fea54097",
"local-hostname": "ip-10-251-50-12.ec2.internal",
"public-hostname": "ec2-203-0-113-25.compute-1.amazonaws.com",
}
}
}
populate_mmds(vm, data_store)

# Iterate and validate connectivity on all ifaces after boot.
for i in range(4):
exit_code, _, _ = vm.ssh_iface(i).run("sync")
assert exit_code == 0

# Validate MMDS.
validate_mmds(vm.ssh, data_store)

# Snapshot the microVM.
snapshot = vm.snapshot_diff()

# Create snapshot artifacts directory specific for the kernel version used.
guest_kernel_version = re.search("vmlinux-(.*)", kernel.name)

snapshot_artifacts_dir = (
Path(SNAPSHOT_ARTIFACTS_ROOT_DIR)
/ f"{guest_kernel_version.group(1)}_{cpu_template}_guest_snapshot"
)
snapshot_artifacts_dir.mkdir(parents=True)
snapshot.save_to(snapshot_artifacts_dir)
print(f"Copied snapshot to: {snapshot_artifacts_dir}.")

vm.kill()


if __name__ == "__main__":
Expand Down

0 comments on commit b71a91f

Please sign in to comment.