diff --git a/coremlpython/CoreMLPython.h b/coremlpython/CoreMLPython.h index 6dbfd5cf1..2d9bf000d 100644 --- a/coremlpython/CoreMLPython.h +++ b/coremlpython/CoreMLPython.h @@ -32,7 +32,7 @@ namespace CoreML { Model(const Model&) = delete; Model& operator=(const Model&) = delete; ~Model(); - explicit Model(const std::string& urlStr, const std::string& computeUnits); + explicit Model(const std::string& urlStr, const std::string& computeUnits, bool lowPrecision); py::dict predict(const py::dict& input) const; py::list batchPredict(const py::list& batch) const; diff --git a/coremlpython/CoreMLPython.mm b/coremlpython/CoreMLPython.mm index e01374d34..f5fd19304 100644 --- a/coremlpython/CoreMLPython.mm +++ b/coremlpython/CoreMLPython.mm @@ -54,7 +54,7 @@ bool isCompiledModelPath(const std::string& path) { } } -Model::Model(const std::string& urlStr, const std::string& computeUnits) { +Model::Model(const std::string& urlStr, const std::string& computeUnits, bool lowPrecision) { @autoreleasepool { NSError *error = nil; @@ -91,6 +91,7 @@ bool isCompiledModelPath(const std::string& path) { // Set compute unit MLModelConfiguration *configuration = [MLModelConfiguration new]; + configuration.allowLowPrecisionAccumulationOnGPU = lowPrecision; if (computeUnits == "CPU_ONLY") { configuration.computeUnits = MLComputeUnitsCPUOnly; } else if (computeUnits == "CPU_AND_GPU") { @@ -204,7 +205,7 @@ bool isCompiledModelPath(const std::string& path) { py::module m("libcoremlpython", "CoreML.Framework Python bindings"); py::class_(m, "_MLModelProxy") - .def(py::init()) + .def(py::init()) .def("predict", &Model::predict) .def("batchPredict", &Model::batchPredict) .def("get_compiled_model_path", &Model::getCompiledModelPath) diff --git a/coremltools/models/_compiled_model.py b/coremltools/models/_compiled_model.py index 42e0c9ac4..435050cda 100644 --- a/coremltools/models/_compiled_model.py +++ b/coremltools/models/_compiled_model.py @@ -16,7 +16,10 @@ class CompiledMLModel: - def __init__(self, path: str, compute_units: _ComputeUnit =_ComputeUnit.ALL): + def __init__(self, + path: str, + compute_units: _ComputeUnit =_ComputeUnit.ALL, + low_precision_accumulation: bool = False): """ Loads a compiled Core ML model. @@ -61,7 +64,7 @@ def __init__(self, path: str, compute_units: _ComputeUnit =_ComputeUnit.ALL): path = _expanduser(path) - self._proxy = _MLModelProxy(path, compute_units.name) + self._proxy = _MLModelProxy(path, compute_units.name, low_precision_accumulation) def predict(self, data): diff --git a/coremltools/models/model.py b/coremltools/models/model.py index c477a9eb3..d66a574ec 100644 --- a/coremltools/models/model.py +++ b/coremltools/models/model.py @@ -130,7 +130,7 @@ def __iter__(self): yield f.name -def _get_proxy_and_spec(filename, compute_units, skip_model_load=False): +def _get_proxy_and_spec(filename, compute_units, low_precision, skip_model_load=False): try: from ..libcoremlpython import _MLModelProxy except Exception: @@ -149,7 +149,7 @@ def _get_proxy_and_spec(filename, compute_units, skip_model_load=False): return None, specification, None try: - return _MLModelProxy(filename, compute_units.name), specification, None + return _MLModelProxy(filename, compute_units.name, low_precision), specification, None except RuntimeError as e: _warnings.warn( "You will not be able to run predict() on this Core ML model." @@ -246,6 +246,7 @@ def __init__( skip_model_load=False, compute_units=_ComputeUnit.ALL, weights_dir=None, + low_precision_accumulation=False, ): """ Construct an MLModel from an ``.mlmodel``. @@ -298,6 +299,10 @@ def __init__( Path to the weight directory, required when loading an MLModel of type mlprogram, from a spec object, i.e. when the argument ``model`` is of type ``Model_pb2`` + low_precision_accumulation: bool + Set to True to enable low precision accumulation for the model. This is only applies for partitions + that are dispatched to the GPU. Defaults to False. + Notes ----- Internally this maintains the following: @@ -373,7 +378,7 @@ def does_model_contain_mlprogram(model) -> bool: self.is_temp_package = is_temp_package self._weights_dir = _try_get_weights_dir_path(model) self.__proxy__, self._spec, self._framework_error = _get_proxy_and_spec( - model, compute_units, skip_model_load=skip_model_load, + model, compute_units, low_precision=low_precision_accumulation, skip_model_load=skip_model_load, ) elif isinstance(model, _Model_pb2.Model): if does_model_contain_mlprogram(model): @@ -393,7 +398,7 @@ def does_model_contain_mlprogram(model) -> bool: _save_spec(model, filename) self.__proxy__, self._spec, self._framework_error = _get_proxy_and_spec( - filename, compute_units, skip_model_load=skip_model_load, + filename, compute_units, low_precision=low_precision_accumulation, skip_model_load=skip_model_load, ) try: _os.remove(filename) diff --git a/coremltools/test/modelpackage/test_mlmodel.py b/coremltools/test/modelpackage/test_mlmodel.py index 8d2f97e11..312f03f69 100644 --- a/coremltools/test/modelpackage/test_mlmodel.py +++ b/coremltools/test/modelpackage/test_mlmodel.py @@ -8,14 +8,15 @@ import numpy as np import torch +import pytest import coremltools as ct from coremltools._deps import _IS_MACOS from coremltools.models.model import MLModel from coremltools.models.utils import _macos_version - -def test_mlmodel_demo(tmpdir): +@pytest.mark.parametrize("low_precision", [False, True]) +def test_mlmodel_demo(tmpdir, low_precision): NUM_TOKENS = 3 EMBEDDING_SIZE = 5 @@ -54,7 +55,7 @@ def forward(self, x): mlmodel.save(mlpackage_path) # Read back the saved bundle and compile - mlmodel2 = MLModel(mlpackage_path) + mlmodel2 = MLModel(mlpackage_path, low_precision_accumulation=low_precision) if not _IS_MACOS or _macos_version() < (12, 0): # Can not get predictions unless on macOS 12 or higher. diff --git a/coremltools/test/modelpackage/test_modelpackage.py b/coremltools/test/modelpackage/test_modelpackage.py index e64a4ca55..d73e4f223 100644 --- a/coremltools/test/modelpackage/test_modelpackage.py +++ b/coremltools/test/modelpackage/test_modelpackage.py @@ -3,6 +3,7 @@ # Use of this source code is governed by a BSD-3-clause license that can be # found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause +import itertools import os import shutil import tempfile @@ -151,12 +152,12 @@ def test_predict_api(self): model.save(package.name) if utils._macos_version() >= (12, 0): - for compute_units in coremltools.ComputeUnit: + for compute_units, precision in itertools.product(coremltools.ComputeUnit, [False, True]): if (compute_units == coremltools.ComputeUnit.CPU_AND_NE and utils._macos_version() < (13, 0)): continue - loaded_model = MLModel(package.name, compute_units=compute_units) + loaded_model = MLModel(package.name, compute_units=compute_units, low_precision_accumulation=precision) preds = loaded_model.predict({"feature_1": 1.0, "feature_2": 1.0}) assert preds is not None diff --git a/coremltools/test/neural_network/test_compiled_model.py b/coremltools/test/neural_network/test_compiled_model.py index 7380c4ccc..dc0c76156 100644 --- a/coremltools/test/neural_network/test_compiled_model.py +++ b/coremltools/test/neural_network/test_compiled_model.py @@ -3,6 +3,7 @@ # Use of this source code is governed by a BSD-3-clause license that can be # found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause +import pytest from shutil import copytree, rmtree from tempfile import TemporaryDirectory @@ -35,10 +36,10 @@ def setup(self): self.spec = spec - def _test_compile_model_path(self, compiled_model_path, compute_units=ComputeUnit.ALL): + def _test_compile_model_path(self, compiled_model_path, compute_units=ComputeUnit.ALL, low_precision=False): try: # Load compiled model - model = CompiledMLModel(compiled_model_path, compute_units) + model = CompiledMLModel(compiled_model_path, compute_units, low_precision) # Single prediction y = model.predict({'x': 2}) @@ -81,10 +82,11 @@ def test_from_existing_mlmodel(self): self._test_compile_model_path(dst_path) - def test_non_default_compute_units(self): + @pytest.mark.parametrize("low_precision", [False, True]) + def test_non_default_compute_units(self, low_precision): non_default_compute_units = (ComputeUnit.CPU_AND_GPU, ComputeUnit.CPU_AND_NE, ComputeUnit.CPU_ONLY) for cur_compute_unit in non_default_compute_units: compiled_model_path = compile_model(self.spec) - self._test_compile_model_path(compiled_model_path, compute_units=cur_compute_unit) + self._test_compile_model_path(compiled_model_path, compute_units=cur_compute_unit, low_precision=low_precision)