diff --git a/docs/requirements.txt b/docs/requirements.txt index 38bd1e244..f069d27c6 100644 --- a/docs/requirements.txt +++ b/docs/requirements.txt @@ -9,3 +9,6 @@ plotly matplotlib kaleido dask[complete] +torch +torchvision +tqdm diff --git a/docs/src/index.rst b/docs/src/index.rst index 93a0ede07..2d4b46a9a 100644 --- a/docs/src/index.rst +++ b/docs/src/index.rst @@ -38,6 +38,7 @@ tutorials/cluster tutorials/pytorch_a2c_ppo tutorials/speechbrain_tutorial + auto_tutorials/plot_5_warm_starting .. toctree:: :caption: Plugins diff --git a/examples/tutorials/README.rst b/examples/tutorials/README.rst index 63fc63ae9..ecb76636a 100644 --- a/examples/tutorials/README.rst +++ b/examples/tutorials/README.rst @@ -1,4 +1,3 @@ Examples -------- -bla bla bla diff --git a/examples/tutorials/current_db.pkl b/examples/tutorials/current_db.pkl new file mode 100644 index 000000000..d1183d359 Binary files /dev/null and b/examples/tutorials/current_db.pkl differ diff --git a/examples/tutorials/plot_5_warm_starting.py b/examples/tutorials/plot_5_warm_starting.py new file mode 100644 index 000000000..635e76323 --- /dev/null +++ b/examples/tutorials/plot_5_warm_starting.py @@ -0,0 +1,380 @@ +""" + +******************************* +Warm Starting of HPO Algorithms +******************************* + +This tutorial shows how to leverage the results of previous experiments to more efficiently search +the hyper-parameter space of a new experiment. +""" + +import os +import random +from dataclasses import dataclass, field +from pathlib import Path + +import numpy as np +import torch +import torch.backends +import torch.nn as nn +import torch.nn.functional as F +import torch.optim as optim +import tqdm +from torch.optim import Optimizer +from torch.optim.lr_scheduler import StepLR +from torch.utils.data import DataLoader +from torchvision import datasets, transforms +from typing_extensions import Literal + +from orion.client import build_experiment +from orion.executor.single_backend import SingleExecutor + +# flake8: noqa: E266 + +#%% +# +# Training code +# + +DatasetName = Literal["MNIST", "FashionMNIST", "CIFAR10", "CIFAR100", "SVHN"] + +normalization_means: dict[DatasetName, tuple[float, ...]] = { + "MNIST": (0.1307,), + "FashionMNIST": (0.2860,), + "CIFAR10": (0.4914, 0.4822, 0.4465), + "CIFAR100": (0.5071, 0.4865, 0.4409), +} +normalization_stds: dict[DatasetName, tuple[float, ...]] = { + "MNIST": (0.3081,), + "FashionMNIST": (0.3530,), + "CIFAR10": (0.2470, 0.2435, 0.2616), + "CIFAR100": (0.2673, 0.2564, 0.2762), +} +dataset_num_classes: dict[DatasetName, int] = { + "MNIST": 10, + "FashionMNIST": 10, + "CIFAR10": 10, + "CIFAR100": 100, +} + + +@dataclass +class Args: + """Configuration options for a training run.""" + + dataset: DatasetName = "MNIST" + """ Dataset to use.""" + + batch_size: int = 512 + """input batch size for training""" + + test_batch_size: int = 1000 + """input batch size for testing""" + + epochs: int = 13 + """number of epochs to train""" + + lr: float = 1e-2 + """learning rate""" + + gamma: float = 0.7 + """Learning rate step gamma""" + + device: torch.device = field( + default_factory=lambda: torch.device( + "cuda" + if torch.cuda.is_available() + else "mps" + if hasattr(torch.backends, "mps") and torch.backends.mps.is_available() # type: ignore + else "cpu" + ) + ) + """Device to use for training.""" + + dry_run: bool = False + """quickly check a single pass""" + + seed: int = 1 + """random seed (default: 1)""" + + log_interval: int = 10 + """how many batches to wait before logging training status""" + + save_model: bool = False + """For Saving the current Model""" + + data_dir: Path = Path(os.environ.get("DATA_DIR", "data")) + """ Directory where the dataset should be found or downloaded.""" + + +class Net(nn.Sequential): + """Simple convnet.""" + + def __init__(self, n_classes: int = 10): + super().__init__( + # NOTE: `in_channels` is determined in the first forward pass + nn.LazyConv2d(32, 3, 1), + nn.ReLU(), + nn.Conv2d(32, 64, 3, 1), + nn.ReLU(), + nn.MaxPool2d(2), + nn.Dropout2d(0.25), + nn.Flatten(), + # NOTE: `in_features` is determined in the first forward pass + nn.LazyLinear(128), + nn.ReLU(), + nn.Dropout(0.5), + nn.Linear(128, n_classes), + ) + + +def train_epoch( + args: Args, + model: Net, + device: torch.device, + train_loader: DataLoader, + optimizer: Optimizer, + epoch: int, +): + """Trains a model given the configuration.""" + model.train() + loss_function = F.cross_entropy + + pbar = tqdm.tqdm(train_loader, desc=f"Epoch {epoch}") + for batch_idx, (data, target) in enumerate(pbar): + data, target = data.to(device), target.to(device) + optimizer.zero_grad() + output = model(data) + loss = loss_function(output, target) + loss.backward() + optimizer.step() + if batch_idx % args.log_interval == 0: + pbar.set_postfix({"loss": loss.item()}) + if args.dry_run: + break + + +def test_epoch(model: Net, device: torch.device, test_loader: DataLoader) -> float: + """Tests the model, returning the average test loss.""" + model.eval() + test_loss = 0.0 + correct = 0 + + num_batches = len(test_loader.dataset) # type: ignore + + with torch.no_grad(): + for data, target in test_loader: + data, target = data.to(device), target.to(device) + output = model(data) + # sum up batch loss + test_loss += F.nll_loss(output, target, reduction="sum").item() + # get the index of the max log-probability + pred = output.argmax(dim=1, keepdim=True) + correct += pred.eq(target.view_as(pred)).sum().item() + + test_loss /= num_batches + + print( + f"\nTest set: Average loss: {test_loss:.4f}, Accuracy: {correct}/{num_batches} " + f"({100.0 * correct / num_batches:.0f}%)\n" + ) + return test_loss + + +def main(**kwargs): + """Main loop. Trains and then tests a model after each epoch.""" + # Training settings + + # note: could also use simple-parsing to parse the Config from the command-line: + # import simple_parsing + # from simple_parsing import parse + # config = parse(Config) + args = Args(**kwargs) + print(f"Args: {args}") + + random.seed(args.seed) + np.random.seed(args.seed) + torch.manual_seed(args.seed) + + device = args.device + + train_kwargs = {"batch_size": args.batch_size} + test_kwargs = {"batch_size": args.test_batch_size} + if device.type == "cuda": + # Note: When using Orion with parallel workers (which is the case by default?), + # `num_workers` should be set to 0, because otherwise we get an error about daemonic + # processes having children, etc. + cuda_kwargs = {"num_workers": 0, "pin_memory": True, "shuffle": True} + train_kwargs.update(cuda_kwargs) + test_kwargs.update(cuda_kwargs) + + transform = transforms.Compose( + [ + transforms.ToTensor(), + transforms.Normalize( + normalization_means[args.dataset], normalization_stds[args.dataset] + ), + ] + ) + + data_dir = args.data_dir + dataset_class = getattr(datasets, args.dataset) + train_dataset = dataset_class( + str(data_dir), train=True, download=True, transform=transform + ) + test_dataset = dataset_class(str(data_dir), train=False, transform=transform) + train_loader = DataLoader(train_dataset, **train_kwargs) + test_loader = DataLoader(test_dataset, **test_kwargs) + + model = Net(n_classes=dataset_num_classes[args.dataset]).to(device) + optimizer = optim.Adam(model.parameters(), lr=args.lr) + + scheduler = StepLR(optimizer, step_size=1, gamma=args.gamma) + test_loss = None + for epoch in range(1, args.epochs + 1): + train_epoch(args, model, device, train_loader, optimizer, epoch) + test_loss = test_epoch(model, device, test_loader) + scheduler.step() + + if args.save_model: + run_working_dir = Path(os.environ.get("ORION_WORKING_DIR", ".")) + # use the trial working dir to save the model. + torch.save(model.state_dict(), str(run_working_dir / "model.pt")) + return [dict(name="loss", type="objective", value=test_loss)] + + +# %% +# Controls for this example: +# +previous_experiment_n_runs = 10 +previous_experiment_settings = { + "dataset": "CIFAR100", + "epochs": 3, +} + +current_experiment_n_runs = 10 +current_experiment_settings = { + "dataset": "CIFAR10", + "epochs": 3, +} + +# We're using multiple seeds for a more robust comparison of with/without warm-starting. +n_seeds = 3 + +# The number of initial random suggestions that the optimization algorithm should do. +n_initial_random_suggestions = 5 + +# NOTE: This gets run in the tutorials directory +# NOTE: This needs to be a relative path, otherwise the CI runs will fail. +# Specify the database where the previous experiments are stored. We use a local PickleDB here. +previous_experiment_storage = { + "type": "legacy", + "database": { + "type": "pickleddb", + "host": "previous_db.pkl", + }, +} +current_experiment_storage = { + "type": "legacy", + "database": { + "type": "pickleddb", + "host": "current_db.pkl", + }, +} + + +previous_experiment = build_experiment( + name="previous_experiment", + space={"lr": "loguniform(1e-5, 1.0)"}, + storage=previous_experiment_storage, + algorithms={"random": {"seed": 1}}, + max_trials=previous_experiment_n_runs, + executor=SingleExecutor(), +) + +# %% +# Populate the initial experiment with some trials: + +previous_experiment.workon(main, **previous_experiment_settings) + +# %% +# Run a new experiment, without warm-starting (a.k.a. "cold-start"): + + +cold_experiments = [ + build_experiment( + name=f"cold_experiment_{seed}", + space={"lr": "loguniform(1e-5, 1.0)"}, + storage=current_experiment_storage, + executor=SingleExecutor(), + algorithms={ + "tpe": {"seed": seed, "n_initial_points": n_initial_random_suggestions} + }, + # algorithms={"robo_gp": {"seed": seed, "n_initial_points": n_initial_points}}, + max_trials=current_experiment_n_runs, + ) + for seed in range(n_seeds) +] +for exp in cold_experiments: + exp.workon(main, **previous_experiment_settings) + +#%% +# New experiment with warm-starting: + +assert previous_experiment.storage +assert previous_experiment.max_trials +warm_experiments = [ + build_experiment( + name=f"warm_experiment_{seed}", + space={"lr": "loguniform(1e-5, 1.0)"}, + storage=current_experiment_storage, + executor=SingleExecutor(), + max_trials=current_experiment_n_runs, + # NOTE: This n_initial_points is changed slightly, since it also counts the trials from + # the previous experiment. This is just so the comparison is a bit fairer. + # Both algorithms do random search for the first few trials of the current task and then + # optimize. + algorithms={ + "tpe": { + "seed": seed, + "n_initial_points": previous_experiment_n_runs + + n_initial_random_suggestions, + } + }, + # Pass the knowledge base to `build_experiment`, either with a configuration dictionary: + knowledge_base={"KnowledgeBase": {"storage": previous_experiment_storage}}, + # Or by instianting a KnowledgeBase and passing it directly: + # knowledge_base=KnowledgeBase(storage=previous_experiment.storage), + ) + for seed in range(n_seeds) +] + +for exp in warm_experiments: + exp.workon(main, **current_experiment_settings) + +# %% +# +# Compare the results: +# +# Here we use the :func:`orion.plotting.base.regrets` function to plot the results. +# This shows the performance of each variant. +# in blue, we have the single line which shows the results we had on the previous experiment. +# In yellow, we have the results of the new experiment (same model on the new dataset). +# In red, we have the warm-started experiment, where we give the algorithm access to the old data. +# The previous data gets annotated with a different task id when viewed by the algorithm, making it +# possible for the algorithm to learn what is common and what is different about each task. + +from orion.plotting.base import regrets + +fig = regrets( + { + "previous experiment": [previous_experiment], + "without warm-start": cold_experiments, + "with warm-start": warm_experiments, + }, +) +fig.show() +fig.write_image("../../docs/src/_static/warm_start_thumbnail.png") +fig + +# sphinx_gallery_thumbnail_path = '_static/warm_start_thumbnail.png' diff --git a/examples/tutorials/previous_db.pkl b/examples/tutorials/previous_db.pkl new file mode 100644 index 000000000..2b69b030a Binary files /dev/null and b/examples/tutorials/previous_db.pkl differ diff --git a/setup.py b/setup.py index b369e1458..ffb5805b1 100644 --- a/setup.py +++ b/setup.py @@ -53,6 +53,9 @@ "sphinxcontrib.httpdomain", "sphinx-autoapi", "sphinx_gallery", + "torch", + "torchvision", + "tqdm", ], "dask": ["dask[complete]"], "track": ["track @ git+https://github.com/Delaunay/track@master#egg=track"], diff --git a/src/orion/core/io/database/pickleddb.py b/src/orion/core/io/database/pickleddb.py index 6650daf84..ca8efd0a1 100644 --- a/src/orion/core/io/database/pickleddb.py +++ b/src/orion/core/io/database/pickleddb.py @@ -101,6 +101,9 @@ def __init__(self, host="", timeout=60, *args, **kwargs): host = DEFAULT_HOST super().__init__(host) + # NOTE: Save the original value of `host`, so this object can be pickled and unpickled on + # different machines more easily if it's a relative path. + self.original_host = host self.host = os.path.abspath(host) self.timeout = timeout @@ -255,6 +258,17 @@ def get_defaults(cls): """ return {"host": DEFAULT_HOST} + def __getstate__(self): + """Return state to be pickled.""" + return self.__dict__.copy() + + def __setstate__(self, state: dict) -> None: + """Restore state from pickled object.""" + self.__dict__.update(state) + # NOTE: `original_host` might not be present when unpickling old databases. + self.original_host = state.setdefault("original_host", self.host) + self.host = os.path.abspath(self.original_host) + local_file_systems = ["ext2", "ext3", "ext4", "ntfs"] diff --git a/src/orion/core/io/experiment_builder.py b/src/orion/core/io/experiment_builder.py index 11bbfc301..65cb2ff02 100644 --- a/src/orion/core/io/experiment_builder.py +++ b/src/orion/core/io/experiment_builder.py @@ -84,6 +84,8 @@ import typing from typing import Any, TypeVar +from typing_extensions import Literal + import orion.core from orion.algo.base import BaseAlgorithm, algo_factory from orion.algo.space import Space @@ -103,12 +105,19 @@ RaceCondition, ) from orion.core.worker.experiment import Experiment, Mode -from orion.core.worker.experiment_config import ExperimentConfig +from orion.core.worker.experiment_config import ( + ExperimentConfig, + MetaData, + PartialExperimentConfig, + RefersConfig, +) from orion.core.worker.primary_algo import create_algo from orion.core.worker.warm_start import KnowledgeBase from orion.storage.base import setup_storage if typing.TYPE_CHECKING: + from typing_extensions import Unpack + from orion.core.evc.adapters import CompositeAdapter from orion.storage.base import BaseStorageProtocol log = logging.getLogger(__name__) @@ -119,7 +128,7 @@ ## -def clean_config(name: str, config: dict, branching: dict | None): +def clean_config(name: str, config: PartialExperimentConfig, branching: dict | None): """Clean configuration from hidden fields (ex: ``_id``) and update branching if necessary""" log.debug("Cleaning config") @@ -207,7 +216,34 @@ def _instantiate_space(config: Space | dict[str, Any]) -> Space: return SpaceBuilder().build(config) -def _instantiate_knowledge_base(kb_config: dict[str, Any]) -> KnowledgeBase: +@typing.overload +def _instantiate_knowledge_base( + kb_config: dict[str, Any], + ignore_instantiation_errors: Literal[True] = True, +) -> KnowledgeBase | None: + ... + + +@typing.overload +def _instantiate_knowledge_base( + kb_config: dict[str, Any], + ignore_instantiation_errors: Literal[False] = False, +) -> KnowledgeBase: + ... + + +@typing.overload +def _instantiate_knowledge_base( + kb_config: dict[str, Any], + ignore_instantiation_errors: bool, +) -> KnowledgeBase | None: + ... + + +def _instantiate_knowledge_base( + kb_config: dict[str, Any], + ignore_instantiation_errors: bool = True, +) -> KnowledgeBase | None: """Instantiate the Knowledge base from its configuration.""" if len(kb_config) != 1: raise ConfigurationError( @@ -232,10 +268,17 @@ def _instantiate_knowledge_base(kb_config: dict[str, Any]) -> KnowledgeBase: kb_kwargs = kb_config[kb_type_name] # Instantiate the storage that is required for the KB. storage_config = kb_kwargs["storage"] - if isinstance(storage_config, dict): - storage = setup_storage(storage_config) - kb_kwargs["storage"] = storage - return kb_type(**kb_kwargs) + try: + if isinstance(storage_config, dict): + storage = setup_storage(storage_config) + kb_kwargs["storage"] = storage + return kb_type(**kb_kwargs) + except (FileNotFoundError, PermissionError) as err: + if not ignore_instantiation_errors: + log.error("Unable to instantiate the KnowledgeBase.") + raise err + log.warning("KnowledgeBase could not be instantiated.") + return None def _instantiate_algo( @@ -354,7 +397,7 @@ def _fetch_config_version( ### -def get_cmd_config(cmdargs) -> ExperimentConfig: +def get_cmd_config(cmdargs) -> dict: """Fetch configuration defined by commandline and local configuration file. Arguments of commandline have priority over options in configuration file. @@ -423,7 +466,7 @@ def build_from_args(cmdargs): return builder.build(**cmd_config) -def get_from_args(cmdargs, mode="r"): +def get_from_args(cmdargs: dict, mode: Literal["r", "w"] = "r"): """Build an experiment view based on commandline arguments .. seealso:: @@ -441,7 +484,7 @@ def get_from_args(cmdargs, mode="r"): name = cmd_config.get("name") version = cmd_config.get("version") - + assert isinstance(name, str) return builder.load(name, version, mode=mode) @@ -450,7 +493,7 @@ def build( version: int | None = None, branching: dict | None = None, storage: BaseStorageProtocol | dict | None = None, - **config, + **config: Unpack[PartialExperimentConfig], ): """Build an experiment. @@ -462,10 +505,17 @@ def build( if storage is None: storage = setup_storage() - return ExperimentBuilder(storage).build(name, version, branching, **config) + config["name"] = name + config["version"] = version + return ExperimentBuilder(storage).build(branching=branching, **config) -def load(name, version=None, mode="r", storage=None): +def load( + name: str, + version=None, + mode: Literal["r", "w"] = "r", + storage: BaseStorageProtocol | dict | None = None, +) -> Experiment: """Load an experiment. .. seealso:: @@ -513,7 +563,7 @@ def build( name: str, version: int | None = None, branching: dict | None = None, - **config, + **config: Unpack[PartialExperimentConfig], ) -> Experiment: """Build an experiment object @@ -635,7 +685,9 @@ def _get_conflicts(self, experiment: Experiment, branching: dict): return conflicts - def load(self, name: str, version: int | None = None, mode: Mode = "r"): + def load( + self, name: str, version: int | None = None, mode: Literal["r", "w"] = "r" + ): """Load experiment from database An experiment view provides all reading operations of standard experiment but prevents the @@ -777,7 +829,9 @@ def _attempt_branching(self, conflicts, experiment, version, branching): return branched_experiment - def consolidate_config(self, name: str, version: int | None, config: dict): + def consolidate_config( + self, name: str, version: int | None, config: PartialExperimentConfig + ): """Merge together given configuration with db configuration matching for experiment (``name``, ``version``) """ @@ -800,6 +854,8 @@ def consolidate_config(self, name: str, version: int | None, config: dict): merge_algorithm_config(config, new_config) # TODO: Remove for v0.4 merge_producer_config(config, new_config) + if "knowledge_base" in new_config: + config["knowledge_base"] = new_config["knowledge_base"] config.setdefault("name", name) config.setdefault("version", version) @@ -878,8 +934,8 @@ def create_experiment( max_trials: int | None = None, max_broken: int | None = None, working_dir: str | None = None, - metadata: dict | None = None, - refers: dict | None = None, + metadata: MetaData | None = None, + refers: RefersConfig | None = None, producer: dict | None = None, knowledge_base: KnowledgeBase | dict | None = None, user: str | None = None, @@ -928,8 +984,9 @@ def _default(v: T | None, default: V) -> T | V: space = _instantiate_space(space) max_trials = _default(max_trials, orion.core.config.experiment.max_trials) if isinstance(knowledge_base, dict): - knowledge_base = _instantiate_knowledge_base(knowledge_base) - + knowledge_base = _instantiate_knowledge_base( + knowledge_base, ignore_instantiation_errors=mode != "x" + ) instantiated_algorithm = _instantiate_algo( space=space, max_trials=max_trials, diff --git a/tests/unittests/core/io/test_experiment_builder.py b/tests/unittests/core/io/test_experiment_builder.py index 9849c4aeb..243a94b41 100644 --- a/tests/unittests/core/io/test_experiment_builder.py +++ b/tests/unittests/core/io/test_experiment_builder.py @@ -2,9 +2,11 @@ """Example usage and tests for :mod:`orion.core.io.experiment_builder`.""" from __future__ import annotations +import contextlib import copy import datetime import logging +import typing from pathlib import Path import pytest @@ -28,11 +30,15 @@ UnsupportedOperation, ) from orion.core.worker.algo_wrappers import AlgoWrapper +from orion.core.worker.experiment_config import ExperimentConfig from orion.core.worker.warm_start import KnowledgeBase from orion.storage.base import setup_storage from orion.storage.legacy import Legacy from orion.testing import OrionState +if typing.TYPE_CHECKING: + from _pytest.logging import LogCaptureFixture + def count_experiments(): """Count experiments in storage""" @@ -45,10 +51,8 @@ def space(): return {"x": "uniform(-50,50)"} -@pytest.fixture() -def python_api_config(): - """Create a configuration without the cli fluff.""" - new_config = dict( +def _python_api_config() -> ExperimentConfig: + return ExperimentConfig( name="supernaekei", version=1, space={"x": "uniform(0,10)"}, @@ -79,9 +83,14 @@ def python_api_config(): _id="fasdfasfa", something_to_be_ignored="asdfa", refers=dict(root_id="supernaekei", parent_id=None, adapter=[]), + knowledge_base=None, ) - return new_config + +@pytest.fixture() +def python_api_config(): + """Create a configuration without the cli fluff.""" + return _python_api_config() @pytest.fixture() @@ -1372,6 +1381,93 @@ def test_load_unavailable_algo(algo_unavailable_config, capsys): experiment_builder.build("supernaekei") +def _exp_config_with_knowledge_base_at(kb_pickle_path: str | Path) -> ExperimentConfig: + config = _python_api_config() + config["knowledge_base"] = { + "KnowledgeBase": { + "storage": { + "type": "legacy", + "database": {"type": "pickleddb", "host": str(kb_pickle_path)}, + }, + }, + } + return config + + +@contextlib.contextmanager +def _logs_warning_about_kb(caplog: LogCaptureFixture): + # Now, if trying to open in read mode, but the path doesn't exist, then the exception + # should be caught and a warning should be printed. + caplog.clear() + with caplog.at_level(logging.WARNING): + yield + assert len(caplog.records) >= 1 + assert "KnowledgeBase could not be instantiated" in caplog.text + + +@contextlib.contextmanager +def _setup(kb_host: str | Path): + exp_config = _exp_config_with_knowledge_base_at(kb_pickle_path=kb_host) + with OrionState(experiments=[exp_config]): + yield exp_config + + +def test_load_uninstantiable_knowledge_base(caplog: LogCaptureFixture): + """Check that if an experiment is loaded in read mode and the knowledge base cannot be + instantiated, then a warning is printed. + """ + + with _setup(kb_host="/I/do/not/exist.pkl") as exp_config_with_invalid_kb_host: + # experiment_builder.build uses ExperimentBuilder.create_experiment with mode="x", so this + # should try to load the KnowledgeBase and raise an error. + with pytest.raises(PermissionError, match="/I"): + experiment = experiment_builder.build( + name=exp_config_with_invalid_kb_host["name"], + ) + assert experiment.knowledge_base is None + + # Now, if trying to open in read mode, but the path doesn't exist, then the exception + # should be caught and a warning should be printed. + with _logs_warning_about_kb(caplog): + experiment = experiment_builder.load( + exp_config_with_invalid_kb_host["name"], mode="r" + ) + assert experiment.knowledge_base is None + + +@pytest.mark.xfail( + reason="See https://github.com/Epistimio/orion/issues/1053", raises=AssertionError +) +def test_attempt_to_load_knowledge_base_doest_create_files( + caplog: LogCaptureFixture, tmp_path: Path +): + """TODO: https://github.com/Epistimio/orion/issues/1053""" + + # Now, use a path that could be written to, but doesn't exist. + host = tmp_path / "some_folder" / "db.pkl" + with _setup(kb_host=host) as exp_config_with_absent_kb_file: + assert not host.exists() + + # Try to load the experiment, but the KB points to pickledb host files that don't exist! + # NOTE: This shouldn't create the files, or any of the parent directories! + with _logs_warning_about_kb(caplog): + experiment_builder.load(exp_config_with_absent_kb_file["name"], mode="r") + assert not host.exists() + assert not host.parent.exists() + + # Try to build the experiment (execute mode), but the KB points to pickledb host files + # that don't exist. + # NOTE: A bit trickier. Should it create the KB files (and parent directories) so the KB is + # just created and empty? + # In my (@lebrice) view, the KB should always be treated as "read-only", so this should + # raise a FileNotFoundError. + # This should fail to read the pickle file (because it doesn't exist). + with pytest.raises(FileNotFoundError, match=str(host)): + experiment_builder.build(exp_config_with_absent_kb_file["name"]) + assert not host.exists() + assert not host.parent.exists() + + class TestInitExperimentReadWrite: """Create new Experiment instance that only supports read/write.""" diff --git a/tox.ini b/tox.ini index 8b52f25da..de7000e05 100644 --- a/tox.ini +++ b/tox.ini @@ -217,6 +217,8 @@ commands = description = Invoke sphinx to build documentation and API reference basepython = python3 skip_install = false +deps = + -rdocs/requirements.txt extras = docs pb2