Skip to content

Commit

Permalink
gpu:intel:sycl: use only l0 queries for l0 devices
Browse files Browse the repository at this point in the history
  • Loading branch information
mgouicem committed Dec 19, 2024
1 parent 7b6a451 commit 58212ed
Show file tree
Hide file tree
Showing 3 changed files with 140 additions and 19 deletions.
29 changes: 10 additions & 19 deletions src/gpu/intel/sycl/device_info.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
#include "gpu/intel/sycl/compat.hpp"
#include "gpu/intel/sycl/device_info.hpp"
#include "gpu/intel/sycl/engine.hpp"
#include "gpu/intel/sycl/l0/utils.hpp"
#include "gpu/intel/sycl/utils.hpp"

#include "gpu/intel/ocl/ocl_gpu_hw_info.hpp"
Expand All @@ -34,6 +35,7 @@ status_t device_info_t::init_arch(impl::engine_t *engine) {
auto *sycl_engine
= utils::downcast<const gpu::intel::sycl::engine_t *>(engine);
auto &device = sycl_engine->device();
auto &ctx = sycl_engine->context();

// skip cpu engines
if (!device.is_gpu()) return status::success;
Expand All @@ -43,34 +45,23 @@ status_t device_info_t::init_arch(impl::engine_t *engine) {

auto be = xpu::sycl::get_backend(device);
if (be == xpu::sycl::backend_t::opencl) {
cl_int err = CL_SUCCESS;

auto ocl_dev = xpu::sycl::compat::get_native<cl_device_id>(device);
auto ocl_dev_wrapper = xpu::ocl::make_wrapper(ocl_dev);

auto ocl_ctx_wrapper = xpu::ocl::make_wrapper(
clCreateContext(nullptr, 1, &ocl_dev, nullptr, nullptr, &err));
OCL_CHECK(err);
auto ocl_ctx = xpu::sycl::compat::get_native<cl_context>(ctx);
auto ocl_ctx_wrapper = xpu::ocl::make_wrapper(ocl_ctx);

gpu::intel::ocl::init_gpu_hw_info(engine, ocl_dev_wrapper,
ocl_ctx_wrapper, ip_version_, gpu_arch_, gpu_product_family_,
stepping_id_, native_extensions_, mayiuse_systolic_,
mayiuse_ngen_kernels_);
} else if (be == xpu::sycl::backend_t::level0) {
// TODO: add support for L0 binary ngen check
// XXX: query from ocl_engine for now
std::unique_ptr<gpu::intel::ocl::ocl_gpu_engine_t, engine_deleter_t>
ocl_engine;
CHECK(gpu::intel::sycl::create_ocl_engine(&ocl_engine, sycl_engine));

auto *dev_info = ocl_engine->device_info();
ip_version_ = dev_info->ip_version();
gpu_arch_ = dev_info->gpu_arch();
gpu_product_family_ = dev_info->gpu_product_family();
stepping_id_ = dev_info->stepping_id();
native_extensions_ = dev_info->native_extensions();
mayiuse_systolic_ = dev_info->mayiuse_systolic();
mayiuse_ngen_kernels_ = dev_info->mayiuse_ngen_kernels();
auto ze_dev = xpu::sycl::compat::get_native<ze_device_handle_t>(device);
auto ze_ctx = xpu::sycl::compat::get_native<ze_context_handle_t>(ctx);

gpu::intel::sycl::init_gpu_hw_info(engine, ze_dev, ze_ctx, ip_version_,
gpu_arch_, gpu_product_family_, stepping_id_,
native_extensions_, mayiuse_systolic_, mayiuse_ngen_kernels_);
} else {
assert(!"not_expected");
}
Expand Down
124 changes: 124 additions & 0 deletions src/gpu/intel/sycl/l0/utils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,10 @@
#include "gpu/intel/sycl/l0/utils.hpp"
#include "oneapi/dnnl/dnnl_config.h"

#include "gpu/intel/jit/binary_format.hpp"
#include "gpu/intel/jit/ngen/ngen_level_zero.hpp"
#include "gpu/intel/jit/utils/ngen_type_bridge.hpp"

#if defined(__linux__)
#include <dlfcn.h>
#elif defined(_WIN32)
Expand All @@ -26,6 +30,7 @@
#endif

#include "gpu/intel/sycl/l0/level_zero/ze_api.h"
#include "gpu/intel/sycl/l0/level_zero/ze_intel_gpu.h"

#if !defined(__SYCL_COMPILER_VERSION)
#error "Unsupported compiler"
Expand Down Expand Up @@ -173,6 +178,16 @@ status_t func_zeDeviceGetProperties(
return status::success;
}

status_t func_zeDeviceGetModuleProperties(ze_device_handle_t hDevice,
ze_device_module_properties_t *pDeviceProperties) {
static auto f = find_ze_symbol<decltype(&zeDeviceGetModuleProperties)>(
"zeDeviceGetModuleProperties");

if (!f) return status::runtime_error;
ZE_CHECK(f(hDevice, pDeviceProperties));
return status::success;
}

} // namespace

// This function is called from compatibility layer that ensures compatibility
Expand Down Expand Up @@ -272,6 +287,115 @@ bool compare_ze_devices(const ::sycl::device &lhs, const ::sycl::device &rhs) {
return lhs_ze_handle == rhs_ze_handle;
}

status_t get_device_ip(ze_device_handle_t device, uint32_t &ip_version) {
ze_device_properties_t deviceProps = {ZE_STRUCTURE_TYPE_DEVICE_PROPERTIES};
ze_device_ip_version_ext_t devicePropsIP
= {ZE_STRUCTURE_TYPE_DEVICE_IP_VERSION_EXT};
deviceProps.pNext = &devicePropsIP;
CHECK(func_zeDeviceGetProperties(device, &deviceProps));
ip_version = devicePropsIP.ipVersion;
return status::success;
}

status_t get_l0_device_enabled_systolic_intel(
ze_device_handle_t device, bool &mayiuse_systolic) {
ze_device_module_properties_t deviceModProps
= {ZE_STRUCTURE_TYPE_DEVICE_MODULE_PROPERTIES};
// Note: supported by Intel Driver 24.05 and onwards
ze_intel_device_module_dp_exp_properties_t deviceModPropsExt
= {ZE_STRUCTURE_INTEL_DEVICE_MODULE_DP_EXP_PROPERTIES};
deviceModProps.pNext = &deviceModPropsExt;

CHECK(func_zeDeviceGetModuleProperties(device, &deviceModProps));
mayiuse_systolic
= deviceModPropsExt.flags & ZE_INTEL_DEVICE_MODULE_EXP_FLAG_DPAS;
return status::success;
}

status_t get_l0_device_enabled_native_float_atomics(
ze_device_handle_t device, uint64_t native_extensions) {
using namespace gpu::intel::compute;

ze_device_properties_t deviceProps = {ZE_STRUCTURE_TYPE_DEVICE_PROPERTIES};
ze_float_atomic_ext_properties_t fltAtom
= {ZE_STRUCTURE_TYPE_FLOAT_ATOMIC_EXT_PROPERTIES};
deviceProps.pNext = &fltAtom;
CHECK(func_zeDeviceGetProperties(device, &deviceProps));

ze_device_fp_atomic_ext_flags_t atomic_load_store
= ZE_DEVICE_FP_ATOMIC_EXT_FLAG_GLOBAL_LOAD_STORE
| ZE_DEVICE_FP_ATOMIC_EXT_FLAG_LOCAL_LOAD_STORE;
ze_device_fp_atomic_ext_flags_t atomic_add
= ZE_DEVICE_FP_ATOMIC_EXT_FLAG_GLOBAL_ADD
| ZE_DEVICE_FP_ATOMIC_EXT_FLAG_LOCAL_ADD;
ze_device_fp_atomic_ext_flags_t atomic_min_max
= ZE_DEVICE_FP_ATOMIC_EXT_FLAG_GLOBAL_MIN_MAX
| ZE_DEVICE_FP_ATOMIC_EXT_FLAG_LOCAL_MIN_MAX;

if ((fltAtom.fp16Flags & atomic_load_store) == atomic_load_store)
native_extensions |= (uint64_t)native_ext_t::fp16_atomic_load_store;
if ((fltAtom.fp16Flags & atomic_add) == atomic_add)
native_extensions |= (uint64_t)native_ext_t::fp16_atomic_add;
if ((fltAtom.fp16Flags & atomic_add) == atomic_min_max)
native_extensions |= (uint64_t)native_ext_t::fp16_atomic_min_max;

if ((fltAtom.fp32Flags & atomic_load_store) == atomic_load_store)
native_extensions |= (uint64_t)native_ext_t::fp32_atomic_load_store;
if ((fltAtom.fp32Flags & atomic_add) == atomic_add)
native_extensions |= (uint64_t)native_ext_t::fp32_atomic_add;
if ((fltAtom.fp32Flags & atomic_add) == atomic_min_max)
native_extensions |= (uint64_t)native_ext_t::fp32_atomic_min_max;

if ((fltAtom.fp64Flags & atomic_load_store) == atomic_load_store)
native_extensions |= (uint64_t)native_ext_t::fp64_atomic_load_store;
if ((fltAtom.fp64Flags & atomic_add) == atomic_add)
native_extensions |= (uint64_t)native_ext_t::fp64_atomic_add;
if ((fltAtom.fp64Flags & atomic_add) == atomic_min_max)
native_extensions |= (uint64_t)native_ext_t::fp64_atomic_min_max;

return status::success;
}

status_t get_l0_device_eu_count(ze_device_handle_t device, int &eu_count) {
ze_device_properties_t deviceProps = {ZE_STRUCTURE_TYPE_DEVICE_PROPERTIES};
ze_eu_count_ext_t eucnt = ze_eu_count_ext_t();
deviceProps.pNext = &eucnt;

CHECK(func_zeDeviceGetProperties(device, &deviceProps));
eu_count = eucnt.numTotalEUs;
return status::success;
}

void init_gpu_hw_info(impl::engine_t *engine, ze_device_handle_t device,
ze_context_handle_t context, uint32_t &ip_version,
compute::gpu_arch_t &gpu_arch, int &gpu_product_family,
int &stepping_id, uint64_t &native_extensions, bool &mayiuse_systolic,
bool &mayiuse_ngen_kernels) {
using namespace ngen;
HW hw = HW::Unknown;
Product product = {ProductFamily::Unknown, 0};
LevelZeroCodeGenerator<HW::Unknown>::detectHWInfo(
context, device, hw, product);

gpu_arch = jit::convert_ngen_arch_to_dnnl(hw);
gpu_product_family = static_cast<int>(product.family);
stepping_id = product.stepping;

mayiuse_systolic = false;
status_t ret
= get_l0_device_enabled_systolic_intel(device, mayiuse_systolic);
// TODO: xelpg has no f64 support. check that the query properly handle that
ret = get_l0_device_enabled_native_float_atomics(device, native_extensions);
MAYBE_UNUSED(ret);

auto status
= jit::gpu_supports_binary_format(&mayiuse_ngen_kernels, engine);
if (status != status::success) mayiuse_ngen_kernels = false;

ip_version = 0;
get_device_ip(device, ip_version);
}

} // namespace sycl
} // namespace intel
} // namespace gpu
Expand Down
6 changes: 6 additions & 0 deletions src/gpu/intel/sycl/l0/utils.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,12 @@ bool compare_ze_devices(const ::sycl::device &lhs, const ::sycl::device &rhs);
status_t func_zeModuleGetNativeBinary(ze_module_handle_t hModule, size_t *pSize,
uint8_t *pModuleNativeBinary);

void init_gpu_hw_info(impl::engine_t *engine, ze_device_handle_t device,
ze_context_handle_t context, uint32_t &ip_version,
compute::gpu_arch_t &gpu_arch, int &gpu_product_family,
int &stepping_id, uint64_t &native_extensions, bool &mayiuse_systolic,
bool &mayiuse_ngen_kernels);

} // namespace sycl
} // namespace intel
} // namespace gpu
Expand Down

0 comments on commit 58212ed

Please sign in to comment.