Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Low precision accumulation flag #1958

Open
wants to merge 2 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion coremlpython/CoreMLPython.h
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ namespace CoreML {
Model(const Model&) = delete;
Model& operator=(const Model&) = delete;
~Model();
explicit Model(const std::string& urlStr, const std::string& computeUnits);
explicit Model(const std::string& urlStr, const std::string& computeUnits, bool lowPrecision);

py::dict predict(const py::dict& input) const;
py::list batchPredict(const py::list& batch) const;
Expand Down
5 changes: 3 additions & 2 deletions coremlpython/CoreMLPython.mm
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ bool isCompiledModelPath(const std::string& path) {
}
}

Model::Model(const std::string& urlStr, const std::string& computeUnits) {
Model::Model(const std::string& urlStr, const std::string& computeUnits, bool lowPrecision) {
@autoreleasepool {
NSError *error = nil;

Expand Down Expand Up @@ -91,6 +91,7 @@ bool isCompiledModelPath(const std::string& path) {

// Set compute unit
MLModelConfiguration *configuration = [MLModelConfiguration new];
configuration.allowLowPrecisionAccumulationOnGPU = lowPrecision;
if (computeUnits == "CPU_ONLY") {
configuration.computeUnits = MLComputeUnitsCPUOnly;
} else if (computeUnits == "CPU_AND_GPU") {
Expand Down Expand Up @@ -204,7 +205,7 @@ bool isCompiledModelPath(const std::string& path) {
py::module m("libcoremlpython", "CoreML.Framework Python bindings");

py::class_<Model>(m, "_MLModelProxy")
.def(py::init<const std::string&, const std::string&>())
.def(py::init<const std::string&, const std::string&, bool>())
.def("predict", &Model::predict)
.def("batchPredict", &Model::batchPredict)
.def("get_compiled_model_path", &Model::getCompiledModelPath)
Expand Down
7 changes: 5 additions & 2 deletions coremltools/models/_compiled_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,10 @@


class CompiledMLModel:
def __init__(self, path: str, compute_units: _ComputeUnit =_ComputeUnit.ALL):
def __init__(self,
path: str,
compute_units: _ComputeUnit =_ComputeUnit.ALL,
low_precision_accumulation: bool = False):
"""
Loads a compiled Core ML model.

Expand Down Expand Up @@ -61,7 +64,7 @@ def __init__(self, path: str, compute_units: _ComputeUnit =_ComputeUnit.ALL):

path = _expanduser(path)

self._proxy = _MLModelProxy(path, compute_units.name)
self._proxy = _MLModelProxy(path, compute_units.name, low_precision_accumulation)


def predict(self, data):
Expand Down
13 changes: 9 additions & 4 deletions coremltools/models/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -130,7 +130,7 @@ def __iter__(self):
yield f.name


def _get_proxy_and_spec(filename, compute_units, skip_model_load=False):
def _get_proxy_and_spec(filename, compute_units, low_precision, skip_model_load=False):
try:
from ..libcoremlpython import _MLModelProxy
except Exception:
Expand All @@ -149,7 +149,7 @@ def _get_proxy_and_spec(filename, compute_units, skip_model_load=False):
return None, specification, None

try:
return _MLModelProxy(filename, compute_units.name), specification, None
return _MLModelProxy(filename, compute_units.name, low_precision), specification, None
except RuntimeError as e:
_warnings.warn(
"You will not be able to run predict() on this Core ML model."
Expand Down Expand Up @@ -246,6 +246,7 @@ def __init__(
skip_model_load=False,
compute_units=_ComputeUnit.ALL,
weights_dir=None,
low_precision_accumulation=False,
):
"""
Construct an MLModel from an ``.mlmodel``.
Expand Down Expand Up @@ -298,6 +299,10 @@ def __init__(
Path to the weight directory, required when loading an MLModel of type mlprogram,
from a spec object, i.e. when the argument ``model`` is of type ``Model_pb2``

low_precision_accumulation: bool
Set to True to enable low precision accumulation for the model. This is only applies for partitions
that are dispatched to the GPU. Defaults to False.

Notes
-----
Internally this maintains the following:
Expand Down Expand Up @@ -373,7 +378,7 @@ def does_model_contain_mlprogram(model) -> bool:
self.is_temp_package = is_temp_package
self._weights_dir = _try_get_weights_dir_path(model)
self.__proxy__, self._spec, self._framework_error = _get_proxy_and_spec(
model, compute_units, skip_model_load=skip_model_load,
model, compute_units, low_precision=low_precision_accumulation, skip_model_load=skip_model_load,
)
elif isinstance(model, _Model_pb2.Model):
if does_model_contain_mlprogram(model):
Expand All @@ -393,7 +398,7 @@ def does_model_contain_mlprogram(model) -> bool:
_save_spec(model, filename)

self.__proxy__, self._spec, self._framework_error = _get_proxy_and_spec(
filename, compute_units, skip_model_load=skip_model_load,
filename, compute_units, low_precision=low_precision_accumulation, skip_model_load=skip_model_load,
)
try:
_os.remove(filename)
Expand Down
7 changes: 4 additions & 3 deletions coremltools/test/modelpackage/test_mlmodel.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,14 +8,15 @@

import numpy as np
import torch
import pytest

import coremltools as ct
from coremltools._deps import _IS_MACOS
from coremltools.models.model import MLModel
from coremltools.models.utils import _macos_version


def test_mlmodel_demo(tmpdir):
@pytest.mark.parametrize("low_precision", [False, True])
def test_mlmodel_demo(tmpdir, low_precision):
NUM_TOKENS = 3
EMBEDDING_SIZE = 5

Expand Down Expand Up @@ -54,7 +55,7 @@ def forward(self, x):
mlmodel.save(mlpackage_path)

# Read back the saved bundle and compile
mlmodel2 = MLModel(mlpackage_path)
mlmodel2 = MLModel(mlpackage_path, low_precision_accumulation=low_precision)

if not _IS_MACOS or _macos_version() < (12, 0):
# Can not get predictions unless on macOS 12 or higher.
Expand Down
5 changes: 3 additions & 2 deletions coremltools/test/modelpackage/test_modelpackage.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
# Use of this source code is governed by a BSD-3-clause license that can be
# found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause

import itertools
import os
import shutil
import tempfile
Expand Down Expand Up @@ -151,12 +152,12 @@ def test_predict_api(self):
model.save(package.name)

if utils._macos_version() >= (12, 0):
for compute_units in coremltools.ComputeUnit:
for compute_units, precision in itertools.product(coremltools.ComputeUnit, [False, True]):
if (compute_units == coremltools.ComputeUnit.CPU_AND_NE
and utils._macos_version() < (13, 0)):
continue

loaded_model = MLModel(package.name, compute_units=compute_units)
loaded_model = MLModel(package.name, compute_units=compute_units, low_precision_accumulation=precision)

preds = loaded_model.predict({"feature_1": 1.0, "feature_2": 1.0})
assert preds is not None
Expand Down
10 changes: 6 additions & 4 deletions coremltools/test/neural_network/test_compiled_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
# Use of this source code is governed by a BSD-3-clause license that can be
# found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause

import pytest
from shutil import copytree, rmtree
from tempfile import TemporaryDirectory

Expand Down Expand Up @@ -35,10 +36,10 @@ def setup(self):
self.spec = spec


def _test_compile_model_path(self, compiled_model_path, compute_units=ComputeUnit.ALL):
def _test_compile_model_path(self, compiled_model_path, compute_units=ComputeUnit.ALL, low_precision=False):
try:
# Load compiled model
model = CompiledMLModel(compiled_model_path, compute_units)
model = CompiledMLModel(compiled_model_path, compute_units, low_precision)

# Single prediction
y = model.predict({'x': 2})
Expand Down Expand Up @@ -81,10 +82,11 @@ def test_from_existing_mlmodel(self):
self._test_compile_model_path(dst_path)


def test_non_default_compute_units(self):
@pytest.mark.parametrize("low_precision", [False, True])
def test_non_default_compute_units(self, low_precision):
non_default_compute_units = (ComputeUnit.CPU_AND_GPU,
ComputeUnit.CPU_AND_NE,
ComputeUnit.CPU_ONLY)
for cur_compute_unit in non_default_compute_units:
compiled_model_path = compile_model(self.spec)
self._test_compile_model_path(compiled_model_path, compute_units=cur_compute_unit)
self._test_compile_model_path(compiled_model_path, compute_units=cur_compute_unit, low_precision=low_precision)