Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[SYCL] Use L0 queries for L0 devices #2245

Open
wants to merge 5 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
61 changes: 55 additions & 6 deletions src/gpu/intel/jit/ngen/ngen_level_zero.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,14 @@

#include "gpu/intel/sycl/l0/level_zero/ze_api.h"

#if defined(__linux__)
#include <dlfcn.h>
#elif defined(_WIN32)
#include "windows.h"
#else
#error "Level Zero is supported on Linux and Windows only"
#endif

#include <sstream>

#include "ngen_elf.hpp"
Expand All @@ -36,6 +44,47 @@ class level_zero_error : public std::runtime_error {
ze_result_t status;
};

// Dynamically loaded level_zero functions
namespace {

void *find_ze_symbol(const char *symbol) {
#if defined(__linux__)
void *handle = dlopen("libze_loader.so.1", RTLD_NOW | RTLD_LOCAL);
#elif defined(_WIN32)
// Use LOAD_LIBRARY_SEARCH_SYSTEM32 flag to avoid DLL hijacking issue.
HMODULE handle = LoadLibraryExA(
"ze_loader.dll", nullptr, LOAD_LIBRARY_SEARCH_SYSTEM32);
#endif
if (!handle) throw level_zero_error{ZE_RESULT_ERROR_UNINITIALIZED};

#if defined(__linux__)
void *f = reinterpret_cast<void *>(dlsym(handle, symbol));
#elif defined(_WIN32)
void *f = reinterpret_cast<void *>(GetProcAddress(handle, symbol));
#endif

if (!f) throw level_zero_error{ZE_RESULT_ERROR_UNINITIALIZED};
return f;
}

template <typename F>
F find_ze_symbol(const char *symbol) {
return (F)find_ze_symbol(symbol);
}

#define ZE_INDIRECT_API(f) \
template <typename... Args> ze_result_t call_##f(Args&&... args) { \
static auto f_ = find_ze_symbol<decltype(&f)>(#f); \
return f_(std::forward<Args>(args)...); \
}

ZE_INDIRECT_API(zeModuleCreate)
ZE_INDIRECT_API(zeModuleDestroy)
ZE_INDIRECT_API(zeDeviceGetProperties)
ZE_INDIRECT_API(zeModuleGetNativeBinary)

} // namespace

// Level Zero program generator class.
template <HW hw>
class LevelZeroCodeGenerator : public ELFCodeGenerator<hw>
Expand Down Expand Up @@ -85,7 +134,7 @@ ze_module_handle_t LevelZeroCodeGenerator<hw>::getModule(ze_context_handle_t con
};

ze_module_handle_t module;
detail::handleL0(zeModuleCreate(context, device, &moduleDesc, &module, nullptr));
detail::handleL0(call_zeModuleCreate(context, device, &moduleDesc, &module, nullptr));

if (module == nullptr)
throw level_zero_error{};
Expand Down Expand Up @@ -120,7 +169,7 @@ void LevelZeroCodeGenerator<hw>::detectHWInfo(ze_context_handle_t context, ze_de
ze_device_ip_version_ext_t vprop = {ZE_STRUCTURE_TYPE_DEVICE_IP_VERSION_EXT, nullptr, 0};
ze_device_properties_t dprop = {ZE_STRUCTURE_TYPE_DEVICE_PROPERTIES, &vprop};

if (zeDeviceGetProperties(device, &dprop) == ZE_RESULT_SUCCESS) {
if (call_zeDeviceGetProperties(device, &dprop) == ZE_RESULT_SUCCESS) {
outProduct = npack::decodeHWIPVersion(vprop.ipVersion);
outHW = getCore(outProduct.family);
if (outProduct.family != ProductFamily::Unknown)
Expand All @@ -140,18 +189,18 @@ void LevelZeroCodeGenerator<hw>::detectHWInfo(ze_context_handle_t context, ze_de
};

ze_module_handle_t module;
detail::handleL0(zeModuleCreate(context, device, &moduleDesc, &module, nullptr));
detail::handleL0(call_zeModuleCreate(context, device, &moduleDesc, &module, nullptr));

if (module == nullptr)
throw level_zero_error{};

std::vector<uint8_t> binary;
size_t binarySize;

detail::handleL0(zeModuleGetNativeBinary(module, &binarySize, nullptr));
detail::handleL0(call_zeModuleGetNativeBinary(module, &binarySize, nullptr));
binary.resize(binarySize);
detail::handleL0(zeModuleGetNativeBinary(module, &binarySize, binary.data()));
detail::handleL0(zeModuleDestroy(module));
detail::handleL0(call_zeModuleGetNativeBinary(module, &binarySize, binary.data()));
detail::handleL0(call_zeModuleDestroy(module));

ELFCodeGenerator<hw>::getBinaryHWInfo(binary, outHW, outProduct);
}
Expand Down
19 changes: 7 additions & 12 deletions src/gpu/intel/ocl/ocl_gpu_hw_info.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ xpu::runtime_version_t get_driver_version(cl_device_id device) {
return runtime_version;
}

void init_gpu_hw_info(impl::engine_t *engine, cl_device_id device,
status_t init_gpu_hw_info(impl::engine_t *engine, cl_device_id device,
cl_context context, uint32_t &ip_version, compute::gpu_arch_t &gpu_arch,
int &gpu_product_family, int &stepping_id, uint64_t &native_extensions,
bool &mayiuse_systolic, bool &mayiuse_ngen_kernels) {
Expand All @@ -71,13 +71,9 @@ void init_gpu_hw_info(impl::engine_t *engine, cl_device_id device,
stepping_id = product.stepping;

mayiuse_systolic = false;
status_t ret
= get_ocl_device_enabled_systolic_intel(device, mayiuse_systolic);
assert(ret == CL_SUCCESS);
ret = get_ocl_device_enabled_native_float_atomics(
device, native_extensions, is_xelpg);
assert(ret == CL_SUCCESS);
MAYBE_UNUSED(ret);
CHECK(get_ocl_device_enabled_systolic_intel(device, mayiuse_systolic));
CHECK(get_ocl_device_enabled_native_float_atomics(
device, native_extensions, is_xelpg));

auto status
= jit::gpu_supports_binary_format(&mayiuse_ngen_kernels, engine);
Expand All @@ -88,10 +84,9 @@ void init_gpu_hw_info(impl::engine_t *engine, cl_device_id device,
}

ip_version = 0;
if (clGetDeviceInfo(device, CL_DEVICE_IP_VERSION_INTEL, sizeof(ip_version),
&ip_version, nullptr)
!= CL_SUCCESS)
ip_version = 0;
OCL_CHECK(clGetDeviceInfo(device, CL_DEVICE_IP_VERSION_INTEL,
sizeof(ip_version), &ip_version, nullptr));
return status::success;
}

} // namespace ocl
Expand Down
2 changes: 1 addition & 1 deletion src/gpu/intel/ocl/ocl_gpu_hw_info.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ namespace ocl {

xpu::runtime_version_t get_driver_version(cl_device_id device);

void init_gpu_hw_info(impl::engine_t *engine, cl_device_id device,
status_t init_gpu_hw_info(impl::engine_t *engine, cl_device_id device,
cl_context context, uint32_t &ip_version, compute::gpu_arch_t &gpu_arch,
int &gpu_product_family, int &stepping_id, uint64_t &native_extensions,
bool &mayiuse_systolic, bool &mayiuse_ngen_kernels);
Expand Down
35 changes: 14 additions & 21 deletions src/gpu/intel/sycl/device_info.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
#include "gpu/intel/sycl/compat.hpp"
#include "gpu/intel/sycl/device_info.hpp"
#include "gpu/intel/sycl/engine.hpp"
#include "gpu/intel/sycl/l0/utils.hpp"
#include "gpu/intel/sycl/utils.hpp"

#include "gpu/intel/ocl/ocl_gpu_hw_info.hpp"
Expand All @@ -34,48 +35,40 @@ status_t device_info_t::init_arch(impl::engine_t *engine) {
auto *sycl_engine
= utils::downcast<const gpu::intel::sycl::engine_t *>(engine);
auto &device = sycl_engine->device();
auto &ctx = sycl_engine->context();

// skip cpu engines
if (!device.is_gpu()) return status::success;

// skip other vendors
if (!xpu::sycl::is_intel_device(device)) return status::success;

auto status = status::success;
auto be = xpu::sycl::get_backend(device);
if (be == xpu::sycl::backend_t::opencl) {
cl_int err = CL_SUCCESS;

auto ocl_dev = xpu::sycl::compat::get_native<cl_device_id>(device);
auto ocl_dev_wrapper = xpu::ocl::make_wrapper(ocl_dev);

auto ocl_ctx_wrapper = xpu::ocl::make_wrapper(
clCreateContext(nullptr, 1, &ocl_dev, nullptr, nullptr, &err));
OCL_CHECK(err);
auto ocl_ctx = xpu::sycl::compat::get_native<cl_context>(ctx);
auto ocl_ctx_wrapper = xpu::ocl::make_wrapper(ocl_ctx);

gpu::intel::ocl::init_gpu_hw_info(engine, ocl_dev_wrapper,
status = gpu::intel::ocl::init_gpu_hw_info(engine, ocl_dev_wrapper,
ocl_ctx_wrapper, ip_version_, gpu_arch_, gpu_product_family_,
stepping_id_, native_extensions_, mayiuse_systolic_,
mayiuse_ngen_kernels_);
} else if (be == xpu::sycl::backend_t::level0) {
// TODO: add support for L0 binary ngen check
// XXX: query from ocl_engine for now
std::unique_ptr<gpu::intel::ocl::ocl_gpu_engine_t, engine_deleter_t>
ocl_engine;
CHECK(gpu::intel::sycl::create_ocl_engine(&ocl_engine, sycl_engine));

auto *dev_info = ocl_engine->device_info();
ip_version_ = dev_info->ip_version();
gpu_arch_ = dev_info->gpu_arch();
gpu_product_family_ = dev_info->gpu_product_family();
stepping_id_ = dev_info->stepping_id();
native_extensions_ = dev_info->native_extensions();
mayiuse_systolic_ = dev_info->mayiuse_systolic();
mayiuse_ngen_kernels_ = dev_info->mayiuse_ngen_kernels();
auto ze_dev = xpu::sycl::compat::get_native<ze_device_handle_t>(device);
auto ze_ctx = xpu::sycl::compat::get_native<ze_context_handle_t>(ctx);

status = gpu::intel::sycl::init_gpu_hw_info(engine, ze_dev, ze_ctx,
ip_version_, gpu_arch_, gpu_product_family_, stepping_id_,
native_extensions_, mayiuse_systolic_, mayiuse_ngen_kernels_);
} else {
assert(!"not_expected");
status = status::unimplemented;
}

return status::success;
return status;
}

status_t device_info_t::init_device_name(impl::engine_t *engine) {
Expand Down
Loading
Loading