Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ENH: Data management update to support SUA ifaces for Homogen OneDAL tables #2045

Merged
Merged
Show file tree
Hide file tree
Changes from 53 commits
Commits
Show all changes
61 commits
Select commit Hold shift + click to select a range
55c9565
ENH: Data management update to support SUA ifaces
samir-nasibli Sep 11, 2024
402ac3e
Merge branch 'intel:main' into enh/sua_data_management
samir-nasibli Sep 23, 2024
67dcba0
TEST: added memory leak tests
samir-nasibli Sep 23, 2024
1bf8d72
TEST: enabled tests for the dpnp/dpctl inputs
samir-nasibli Sep 23, 2024
946575a
updating dpnp conversion support
samir-nasibli Sep 24, 2024
9ba65c7
comment for the test_data.py
samir-nasibli Sep 24, 2024
39a5531
backup last changes
samir-nasibli Sep 24, 2024
7fc1d87
update
samir-nasibli Sep 25, 2024
7023bb5
minor fix
samir-nasibli Sep 25, 2024
8ddfeb5
Merge branch 'intel:main' into enh/sua_data_management
samir-nasibli Sep 25, 2024
09bff4a
Merge branch 'intel:main' into enh/sua_data_management
samir-nasibli Sep 27, 2024
aa14417
update to_table, from_table API
samir-nasibli Sep 28, 2024
05473c5
WO for checking tests
samir-nasibli Sep 30, 2024
1c0933b
minor updates
samir-nasibli Sep 30, 2024
70f6ff5
minopr fix for the test
samir-nasibli Sep 30, 2024
ff21324
more test cov
samir-nasibli Oct 1, 2024
9a94cfa
update _assert_tensor_attr
samir-nasibli Oct 1, 2024
a9c2aa9
initial refactoring
samir-nasibli Oct 1, 2024
bf5c5d1
minor formating
samir-nasibli Oct 1, 2024
6b0e3e2
move numpy_helpers into utils
samir-nasibli Oct 1, 2024
9d3cd89
minor update
samir-nasibli Oct 1, 2024
de12203
Merge branch 'intel:main' into enh/sua_data_management
samir-nasibli Oct 1, 2024
e73dc2d
Merge branch 'intel:main' into enh/sua_data_management
samir-nasibli Oct 8, 2024
797860f
Merge branch 'intel:main' into enh/sua_data_management
samir-nasibli Oct 10, 2024
81b373a
Update for DPNP
samir-nasibli Oct 10, 2024
6d327f7
WO for onedal empty sycl context for CPU sycl inputs
samir-nasibli Oct 10, 2024
556cae0
addressed TODOs from the tests
samir-nasibli Oct 10, 2024
09cb477
refactor tests
samir-nasibli Oct 10, 2024
8b083ba
bad arg tests
samir-nasibli Oct 10, 2024
73df977
refactoring
samir-nasibli Oct 11, 2024
8aeff32
minor updates
samir-nasibli Oct 11, 2024
385c69f
refactoring
samir-nasibli Oct 11, 2024
9ac85ab
Merge branch 'intel:main' into enh/sua_data_management
samir-nasibli Oct 13, 2024
9a1ecde
refactor tests
samir-nasibli Oct 13, 2024
9834d94
Merge branch 'intel:main' into enh/sua_data_management
samir-nasibli Oct 16, 2024
14ef77c
fix copyrigh year
samir-nasibli Oct 16, 2024
19d3f5c
renamed macros for dtype matching
samir-nasibli Oct 16, 2024
4e4137d
refactor tests
samir-nasibli Oct 16, 2024
7944187
refactor tests
samir-nasibli Oct 17, 2024
f240990
Merge branch 'intel:main' into enh/sua_data_management
samir-nasibli Oct 18, 2024
ada9369
Merge branch 'main' into enh/sua_data_management
samir-nasibli Oct 20, 2024
c85ce27
removed unused inverse_map
samir-nasibli Oct 20, 2024
15cb815
update test_memory_usage.py
samir-nasibli Oct 20, 2024
5f8866d
added clarification comments
samir-nasibli Oct 21, 2024
69d09b3
more clarification comments
samir-nasibli Oct 21, 2024
d9c42fb
correct backend lib name
samir-nasibli Oct 21, 2024
9723a96
english corrections
samir-nasibli Oct 21, 2024
d101f15
update _apply_and_pass via map
samir-nasibli Oct 21, 2024
e2b8bdb
english corrections
samir-nasibli Oct 21, 2024
120df5b
Merge branch 'intel:main' into enh/sua_data_management
samir-nasibli Oct 21, 2024
64fd585
Merge branch 'main' into enh/sua_data_management
samir-nasibli Oct 22, 2024
a638b92
linting
samir-nasibli Oct 22, 2024
c92800a
Update data_conversion_sua_iface.cpp
samir-nasibli Oct 23, 2024
ce2d4ef
Merge branch 'intel:main' into enh/sua_data_management
samir-nasibli Oct 26, 2024
c69f967
minor refactoring
samir-nasibli Oct 26, 2024
026aca1
import time check for _is_dpc_backend
samir-nasibli Oct 26, 2024
af83fae
added new test cases
samir-nasibli Oct 26, 2024
4adae13
minor refactoring for onedal/datatypes/_data_conversion.py
samir-nasibli Oct 26, 2024
8eca500
fix test for dtype check
samir-nasibli Oct 26, 2024
0330fc6
removed unnecessary code
samir-nasibli Oct 26, 2024
a161eda
fix test
samir-nasibli Oct 26, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
72 changes: 53 additions & 19 deletions onedal/datatypes/_data_conversion.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,37 +22,71 @@
from onedal import _backend, _is_dpc_backend

from ..utils import _is_csr
from ..utils._dpep_helpers import is_dpctl_available

dpctl_available = is_dpctl_available("0.14")
from ..utils._dpep_helpers import dpctl_available, dpnp_available

if dpctl_available:
import dpctl
import dpctl.tensor as dpt


def _apply_and_pass(func, *args):
def _apply_and_pass(func, *args, **kwargs):
if len(args) == 1:
return func(args[0])
return tuple(map(func, args))


def from_table(*args):
return _apply_and_pass(_backend.from_table, *args)


def convert_one_to_table(arg):
if dpctl_available:
if isinstance(arg, dpt.usm_ndarray):
return _backend.dpctl_to_table(arg)
return func(args[0], **kwargs)
return tuple(map(lambda arg: func(arg, **kwargs), args))


def convert_one_from_table(table, sycl_queue=None, sua_iface=None, xp=None):
# Currently only `__sycl_usm_array_interface__` protocol used to
# convert into dpnp/dpctl tensors.
if sua_iface:
if _is_dpc_backend:
samir-nasibli marked this conversation as resolved.
Show resolved Hide resolved
if (
sycl_queue
and sycl_queue.sycl_device.is_cpu
and table.__sycl_usm_array_interface__["syclobj"] is None
):
# oneDAL returns tables with None sycl queue for CPU sycl queue inputs.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thank you for the comment, it helped a lot. I guess is this a problem in oneDAL? I guess if someone was insistent on trying to use the same queue throughout, this would cause them problems as this would generate a new CPU queue. Not a blocker, just something for us to think about.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Actually, there no new queue will be created, the queue will be used to copy host data to the usm allocation

# This workaround is necessary for the functional preservation
# of the compute-follows-data execution.
# Host tables first converted into numpy.narrays and then to array from xp
# namespace.
return xp.asarray(
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This will open the possibility that we will store non-numpy arrays as stored estimator attributes (we can store USM, not array api compliant values). This is going to complicate things when a computation falls back to sklearn in a fit step, the results could be of a completely different type. How will this be handled with fit attributes which fallback, do we iterate on and convert all instance attributes? At minimum please make a ticket into an investigation into this.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is shouldn't be a follow up ticket of this PR. I don't think that without any input it is needed to create any ticket.
Potentially this could be raised on DBSCAN via Array API enabling with this PR changes integration, since stock scikit-learn doesn't support non-numpy array inputs.

_backend.from_table(table), usm_type="device", sycl_queue=sycl_queue
)
else:
xp_name = xp.__name__
if dpnp_available and xp_name == "dpnp":
samir-nasibli marked this conversation as resolved.
Show resolved Hide resolved
# By default DPNP ndarray created with a copy.
# TODO:
# investigate why dpnp.array(table, copy=False) doesn't work.
# Work around with using dpctl.tensor.asarray.
return xp.array(dpt.asarray(table), copy=False)
else:
return xp.asarray(table)
raise RuntimeError(
"SYCL usm array conversion from table requires the DPC backend"
)
return _backend.from_table(table)


def convert_one_to_table(arg, sua_iface=None):
samir-nasibli marked this conversation as resolved.
Show resolved Hide resolved
if sua_iface:
if _is_dpc_backend:
return _backend.sua_iface_to_table(arg)
raise RuntimeError("SYCL usm array conversion to table requires the DPC backend")
samir-nasibli marked this conversation as resolved.
Show resolved Hide resolved

if not _is_csr(arg):
arg = make2d(arg)
return _backend.to_table(arg)


def to_table(*args):
return _apply_and_pass(convert_one_to_table, *args)
def from_table(*args, sycl_queue=None, sua_iface=None, xp=None):
return _apply_and_pass(
convert_one_from_table, *args, sycl_queue=sycl_queue, sua_iface=sua_iface, xp=xp
)


def to_table(*args, sua_iface=None):
return _apply_and_pass(convert_one_to_table, *args, sua_iface=sua_iface)


if _is_dpc_backend:
Expand Down
2 changes: 1 addition & 1 deletion onedal/datatypes/data_conversion.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@
#include "oneapi/dal/table/detail/homogen_utils.hpp"

#include "onedal/datatypes/data_conversion.hpp"
#include "onedal/datatypes/numpy_helpers.hpp"
#include "onedal/datatypes/utils/numpy_helpers.hpp"
#include "onedal/version.hpp"

#if ONEDAL_VERSION <= 20230100
Expand Down
225 changes: 0 additions & 225 deletions onedal/datatypes/data_conversion_dpctl.cpp

This file was deleted.

Loading