Skip to content

Commit

Permalink
Draft input scaling mechanism
Browse files Browse the repository at this point in the history
  • Loading branch information
AdrianSosic committed Jul 17, 2024
1 parent 5bb9f59 commit c3ade11
Show file tree
Hide file tree
Showing 3 changed files with 80 additions and 2 deletions.
36 changes: 34 additions & 2 deletions baybe/surrogates/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@

from baybe.exceptions import ModelNotTrainedError
from baybe.objectives.base import Objective
from baybe.parameters.base import Parameter
from baybe.searchspace import SearchSpace
from baybe.serialization.core import (
converter,
Expand All @@ -27,13 +28,14 @@
)
from baybe.serialization.mixin import SerialMixin
from baybe.utils.dataframe import to_tensor
from baybe.utils.scaling import ScalingMethod, make_scaler

if TYPE_CHECKING:
from botorch.models.model import Model
from botorch.posteriors import GPyTorchPosterior, Posterior
from sklearn.compose import ColumnTransformer
from torch import Tensor


_ONNX_ENCODING = "latin-1"
"""Constant signifying the encoding for onnx byte strings in pretrained models.
Expand Down Expand Up @@ -83,6 +85,32 @@ def to_botorch(self) -> Model:

return AdapterModel(self)

@staticmethod
def _get_parameter_scaling(parameter: Parameter) -> ScalingMethod:
"""Return the scaling method to be used for the given parameter."""
return ScalingMethod.MINMAX

def _make_input_scaler(
self, searchspace: SearchSpace, measurements: pd.DataFrame
) -> ColumnTransformer:
"""Make a scaler to be used for transforming computational dataframes."""
from sklearn.compose import make_column_transformer

# Create the composite scaler from the parameter-wise scaler objects
# TODO: Filter down to columns that actually remain in the comp rep of the
# searchspace, since the transformer can break down otherwise.
transformers = [
(make_scaler(self._get_parameter_scaling(p)), p.comp_df.columns)
for p in searchspace.parameters
]
scaler = make_column_transformer(*transformers)

# TODO: Decide whether scaler is to be fit to parameter bounds and/or
# extreme points in the given measurement data
scaler.fit(searchspace.comp_rep_bounds)

return scaler

def transform_inputs(self, data: pd.DataFrame) -> pd.DataFrame:
"""Transform an experimental parameter dataframe."""
if self._input_transform is None:
Expand Down Expand Up @@ -148,8 +176,12 @@ def fit(
"Continuous search spaces are currently only supported by GPs."
)

input_scaler = self._make_input_scaler(searchspace, measurements)

# Store context-specific transformations
self._input_transform = lambda x: searchspace.transform(x, allow_missing=True)
self._input_transform = lambda x: input_scaler.transform(
searchspace.transform(x, allow_missing=True)
)
self._target_transform = lambda x: objective.transform(x)

# Transform and fit
Expand Down
9 changes: 9 additions & 0 deletions baybe/surrogates/gaussian_process/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from attrs.validators import instance_of

from baybe.objective import Objective
from baybe.parameters.base import Parameter
from baybe.searchspace.core import SearchSpace
from baybe.surrogates.base import Surrogate
from baybe.surrogates.gaussian_process.kernel_factory import (
Expand All @@ -22,6 +23,7 @@
DefaultKernelFactory,
_default_noise_factory,
)
from baybe.utils.scaling import ScalingMethod

if TYPE_CHECKING:
from botorch.models.model import Model
Expand Down Expand Up @@ -108,6 +110,13 @@ def to_botorch(self) -> Model: # noqa: D102

return self._model

@staticmethod
def _get_parameter_scaling(parameter: Parameter) -> ScalingMethod:
# See base class.

# For GPs, we use botorch's built-in machinery for scaling.
return ScalingMethod.IDENTITY

@staticmethod
def _get_model_context(
searchspace: SearchSpace, objective: Objective
Expand Down
37 changes: 37 additions & 0 deletions baybe/utils/scaling.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
"""Scaling utilities."""

from __future__ import annotations

from enum import Enum
from typing import TYPE_CHECKING, Literal, TypeAlias

if TYPE_CHECKING:
from sklearn.base import BaseEstimator, TransformerMixin

Scaler: TypeAlias = BaseEstimator | TransformerMixin


class ScalingMethod(Enum):
"""Available scaling methods."""

IDENTITY = "IDENTITY"
"""Identity transformation (no scaling applied)."""

MINMAX = "MINMAX"
"""Min-max scaling, mapping the observed value range to [0, 1]."""

MAXABS = "MAXABS"
"""Max-abs scaling, scaling by the largest observed absolute (applies no shift)."""


def make_scaler(method: ScalingMethod, /) -> Scaler | Literal["passthrough"]:
"""Create a scaler object based on the specified method."""
from sklearn.preprocessing import MaxAbsScaler, MinMaxScaler

match method:
case ScalingMethod.IDENTITY:
return "passthrough"
case ScalingMethod.MINMAX:
return MinMaxScaler()
case ScalingMethod.MAXABS:
return MaxAbsScaler()

0 comments on commit c3ade11

Please sign in to comment.