apple · nikalra · Aug 25, 2023 · Aug 25, 2023
diff --git a/coremlpython/CoreMLPython.h b/coremlpython/CoreMLPython.h
@@ -32,7 +32,7 @@ namespace CoreML {
             Model(const Model&) = delete;
             Model& operator=(const Model&) = delete;
             ~Model();
-            explicit Model(const std::string& urlStr, const std::string& computeUnits);
+            explicit Model(const std::string& urlStr, const std::string& computeUnits, bool lowPrecision);
 
             py::dict predict(const py::dict& input) const;
             py::list batchPredict(const py::list& batch) const;

diff --git a/coremlpython/CoreMLPython.mm b/coremlpython/CoreMLPython.mm
@@ -54,7 +54,7 @@ bool isCompiledModelPath(const std::string& path) {
     }
 }
 
-Model::Model(const std::string& urlStr, const std::string& computeUnits) {
+Model::Model(const std::string& urlStr, const std::string& computeUnits, bool lowPrecision) {
     @autoreleasepool {
         NSError *error = nil;
 
@@ -91,6 +91,7 @@ bool isCompiledModelPath(const std::string& path) {
 
         // Set compute unit
         MLModelConfiguration *configuration = [MLModelConfiguration new];
+        configuration.allowLowPrecisionAccumulationOnGPU = lowPrecision;
         if (computeUnits == "CPU_ONLY") {
             configuration.computeUnits = MLComputeUnitsCPUOnly;
         } else if (computeUnits == "CPU_AND_GPU") {
@@ -204,7 +205,7 @@ bool isCompiledModelPath(const std::string& path) {
     py::module m("libcoremlpython", "CoreML.Framework Python bindings");
 
     py::class_<Model>(m, "_MLModelProxy")
-        .def(py::init<const std::string&, const std::string&>())
+        .def(py::init<const std::string&, const std::string&, bool>())
         .def("predict", &Model::predict)
         .def("batchPredict", &Model::batchPredict)
         .def("get_compiled_model_path", &Model::getCompiledModelPath)

diff --git a/coremltools/models/_compiled_model.py b/coremltools/models/_compiled_model.py
@@ -16,7 +16,10 @@
 
 
 class CompiledMLModel:
-    def __init__(self, path: str, compute_units: _ComputeUnit =_ComputeUnit.ALL):
+    def __init__(self, 
+                 path: str, 
+                 compute_units: _ComputeUnit =_ComputeUnit.ALL, 
+                 low_precision_accumulation: bool = False):
         """
         Loads a compiled Core ML model.
 
@@ -61,7 +64,7 @@ def __init__(self, path: str, compute_units: _ComputeUnit =_ComputeUnit.ALL):
 
         path = _expanduser(path)
 
-        self._proxy = _MLModelProxy(path, compute_units.name)
+        self._proxy = _MLModelProxy(path, compute_units.name, low_precision_accumulation)
 
 
     def predict(self, data):

diff --git a/coremltools/models/model.py b/coremltools/models/model.py
@@ -130,7 +130,7 @@ def __iter__(self):
             yield f.name
 
 
-def _get_proxy_and_spec(filename, compute_units, skip_model_load=False):
+def _get_proxy_and_spec(filename, compute_units, low_precision, skip_model_load=False):
     try:
         from ..libcoremlpython import _MLModelProxy
     except Exception:
@@ -149,7 +149,7 @@ def _get_proxy_and_spec(filename, compute_units, skip_model_load=False):
             return None, specification, None
 
         try:
-            return _MLModelProxy(filename, compute_units.name), specification, None
+            return _MLModelProxy(filename, compute_units.name, low_precision), specification, None
         except RuntimeError as e:
             _warnings.warn(
                 "You will not be able to run predict() on this Core ML model."
@@ -246,6 +246,7 @@ def __init__(
         skip_model_load=False,
         compute_units=_ComputeUnit.ALL,
         weights_dir=None,
+        low_precision_accumulation=False,
     ):
         """
         Construct an MLModel from an ``.mlmodel``.
@@ -298,6 +299,10 @@ def __init__(
             Path to the weight directory, required when loading an MLModel of type mlprogram,
             from a spec object, i.e. when the argument ``model`` is of type ``Model_pb2``
 
+        low_precision_accumulation: bool
+            Set to True to enable low precision accumulation for the model. This is only applies for partitions
+            that are dispatched to the GPU. Defaults to False.
+
         Notes
         -----
         Internally this maintains the following:
@@ -373,7 +378,7 @@ def does_model_contain_mlprogram(model) -> bool:
                 self.is_temp_package = is_temp_package
                 self._weights_dir = _try_get_weights_dir_path(model)
             self.__proxy__, self._spec, self._framework_error = _get_proxy_and_spec(
-                model, compute_units, skip_model_load=skip_model_load,
+                model, compute_units, low_precision=low_precision_accumulation, skip_model_load=skip_model_load,
             )
         elif isinstance(model, _Model_pb2.Model):
             if does_model_contain_mlprogram(model):
@@ -393,7 +398,7 @@ def does_model_contain_mlprogram(model) -> bool:
                 _save_spec(model, filename)
 
             self.__proxy__, self._spec, self._framework_error = _get_proxy_and_spec(
-                filename, compute_units, skip_model_load=skip_model_load,
+                filename, compute_units, low_precision=low_precision_accumulation, skip_model_load=skip_model_load,
             )
             try:
                 _os.remove(filename)

diff --git a/coremltools/test/modelpackage/test_mlmodel.py b/coremltools/test/modelpackage/test_mlmodel.py
@@ -8,14 +8,15 @@
 
 import numpy as np
 import torch
+import pytest
 
 import coremltools as ct
 from coremltools._deps import _IS_MACOS
 from coremltools.models.model import MLModel
 from coremltools.models.utils import _macos_version
 
-
-def test_mlmodel_demo(tmpdir):
+@pytest.mark.parametrize("low_precision", [False, True])
+def test_mlmodel_demo(tmpdir, low_precision):
     NUM_TOKENS = 3
     EMBEDDING_SIZE = 5
 
@@ -54,7 +55,7 @@ def forward(self, x):
     mlmodel.save(mlpackage_path)
 
     # Read back the saved bundle and compile
-    mlmodel2 = MLModel(mlpackage_path)
+    mlmodel2 = MLModel(mlpackage_path, low_precision_accumulation=low_precision)
 
     if not _IS_MACOS or _macos_version() < (12, 0):
         # Can not get predictions unless on macOS 12 or higher.

diff --git a/coremltools/test/modelpackage/test_modelpackage.py b/coremltools/test/modelpackage/test_modelpackage.py
@@ -3,6 +3,7 @@
 # Use of this source code is governed by a BSD-3-clause license that can be
 # found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause
 
+import itertools
 import os
 import shutil
 import tempfile
@@ -151,12 +152,12 @@ def test_predict_api(self):
         model.save(package.name)
 
         if utils._macos_version() >= (12, 0):
-            for compute_units in coremltools.ComputeUnit:
+            for compute_units, precision in itertools.product(coremltools.ComputeUnit, [False, True]):
                 if (compute_units == coremltools.ComputeUnit.CPU_AND_NE
                     and utils._macos_version() < (13, 0)):
                     continue
 
-                loaded_model = MLModel(package.name, compute_units=compute_units)
+                loaded_model = MLModel(package.name, compute_units=compute_units, low_precision_accumulation=precision)
 
                 preds = loaded_model.predict({"feature_1": 1.0, "feature_2": 1.0})
                 assert preds is not None

diff --git a/coremltools/test/neural_network/test_compiled_model.py b/coremltools/test/neural_network/test_compiled_model.py
@@ -3,6 +3,7 @@
 # Use of this source code is governed by a BSD-3-clause license that can be
 # found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause
 
+import pytest
 from shutil import copytree, rmtree
 from tempfile import TemporaryDirectory
 
@@ -35,10 +36,10 @@ def setup(self):
         self.spec = spec
 
 
-    def _test_compile_model_path(self, compiled_model_path, compute_units=ComputeUnit.ALL):
+    def _test_compile_model_path(self, compiled_model_path, compute_units=ComputeUnit.ALL, low_precision=False):
         try:
             # Load compiled model
-            model = CompiledMLModel(compiled_model_path, compute_units)
+            model = CompiledMLModel(compiled_model_path, compute_units, low_precision)
 
             # Single prediction
             y = model.predict({'x': 2})
@@ -81,10 +82,11 @@ def test_from_existing_mlmodel(self):
             self._test_compile_model_path(dst_path)
 
 
-    def test_non_default_compute_units(self):
+    @pytest.mark.parametrize("low_precision", [False, True])
+    def test_non_default_compute_units(self, low_precision):
         non_default_compute_units = (ComputeUnit.CPU_AND_GPU,
                                      ComputeUnit.CPU_AND_NE,
                                      ComputeUnit.CPU_ONLY)
         for cur_compute_unit in non_default_compute_units:
             compiled_model_path = compile_model(self.spec)
-            self._test_compile_model_path(compiled_model_path, compute_units=cur_compute_unit)
+            self._test_compile_model_path(compiled_model_path, compute_units=cur_compute_unit, low_precision=low_precision)