oneapi-src · xiang1guo · Dec 23, 2024 · Dec 24, 2024 · Dec 26, 2024 · TaoLv
@@ -646,7 +646,7 @@ dnnl::memory::format_tag get_format_tag(const dnnl::memory::desc &md) {
     return format_tag;
 }
 
-size_t generate_constant_cache_key(
+size_t generate_constant_md_hash(
         size_t part_id, const std::vector<dnnl::memory::desc> &const_mds) {
     size_t key = 0;
     key = hash_combine(key, part_id);

@@ -141,7 +141,7 @@ std::string get_format_tag_str(const dnnl::memory::desc &md);
 
 dnnl::memory::format_tag get_format_tag(const dnnl::memory::desc &md);
 
-size_t generate_constant_cache_key(
+size_t generate_constant_md_hash(
         size_t part_id, const std::vector<dnnl::memory::desc> &const_mds);
 
 #ifndef NDEBUG

@@ -109,7 +109,7 @@ status_t batch_norm_fwd_t::compile_impl(const dnnl_partition_impl_t *part,
         return this->memory_planner_.get_exec_args_set().clone();
     };
 
-    constant_key_ = generate_constant_cache_key(part->id(),
+    const_md_hash_ = generate_constant_md_hash(part->id(),
             memory_planner_.get_exec_args_set().get_persistent_mem_desc_list());
 
     return status::success;
@@ -135,9 +135,11 @@ status_t batch_norm_fwd_t::execute_impl(const stream_t *g_stream,
 
     constant_cache_t::cached_t c_buffer;
     if (enabled_constant_cache()) {
+        const size_t encoded_key
+                = encode_constant_cache_key(inputs, const_md_hash_);
         std::promise<constant_cache_t::cached_t> c_promise;
         constant_cache_t::value_t cached_value
-                = dnnl_constant_cache_get_or_add(p_engine_, constant_key_,
+                = dnnl_constant_cache_get_or_add(p_engine_, encoded_key,
                         memory_planner_.total_internal_persistent_size(),
                         c_promise.get_future());
         bool is_from_cache = cached_value.valid();
@@ -204,9 +206,11 @@ status_t batch_norm_fwd_t::sycl_execute_impl(const stream_t *g_stream,
 
     constant_cache_t::cached_t c_buffer;
     if (enabled_constant_cache()) {
+        const size_t encoded_key
+                = encode_constant_cache_key(inputs, const_md_hash_);
         std::promise<constant_cache_t::cached_t> c_promise;
         constant_cache_t::value_t cached_value
-                = dnnl_constant_cache_get_or_add(p_engine_, constant_key_,
+                = dnnl_constant_cache_get_or_add(p_engine_, encoded_key,
                         memory_planner_.total_internal_persistent_size(),
                         c_promise.get_future());
         bool is_from_cache = cached_value.valid();
@@ -279,9 +283,11 @@ status_t batch_norm_fwd_t::ocl_execute_impl(const stream_t *g_stream,
 
     constant_cache_t::cached_t c_buffer;
     if (enabled_constant_cache()) {
+        const size_t encoded_key
+                = encode_constant_cache_key(inputs, const_md_hash_);
         std::promise<constant_cache_t::cached_t> c_promise;
         constant_cache_t::value_t cached_value
-                = dnnl_constant_cache_get_or_add(p_engine_, constant_key_,
+                = dnnl_constant_cache_get_or_add(p_engine_, encoded_key,
                         memory_planner_.total_internal_persistent_size(),
                         c_promise.get_future());
         bool is_from_cache = cached_value.valid();

@@ -46,7 +46,7 @@ struct batch_norm_fwd_t : public kernel_base_t {
     memory_planner_t memory_planner_;
 
     std::function<std::shared_ptr<execution_args_set_t>()> resource_ctor_;
-    constant_cache_t::key_t constant_key_ = 0;
+    size_t const_md_hash_ = 0;
 
 public:
     batch_norm_fwd_t() {

@@ -134,7 +134,7 @@ status_t conv_fwd_t<quantized>::compile_impl(const dnnl_partition_impl_t *part,
         return this->memory_planner_.get_exec_args_set().clone();
     };
 
-    constant_key_ = generate_constant_cache_key(part->id(),
+    const_md_hash_ = generate_constant_md_hash(part->id(),
             memory_planner_.get_exec_args_set().get_persistent_mem_desc_list());
 
     return status::success;
@@ -202,7 +202,7 @@ status_t conv_bwd_data_t::compile_impl(const dnnl_partition_impl_t *part,
         return this->memory_planner_.get_exec_args_set().clone();
     };
 
-    constant_key_ = generate_constant_cache_key(part->id(),
+    const_md_hash_ = generate_constant_md_hash(part->id(),
             memory_planner_.get_exec_args_set().get_persistent_mem_desc_list());
 
     return status::success;

@@ -63,9 +63,11 @@ status_t conv_base_t::execute_impl(const stream_t *g_stream,
 
     constant_cache_t::cached_t c_buffer;
     if (enabled_constant_cache()) {
+        const size_t encoded_key
+                = encode_constant_cache_key(inputs, const_md_hash_);
         std::promise<constant_cache_t::cached_t> c_promise;
         constant_cache_t::value_t cached_value
-                = dnnl_constant_cache_get_or_add(p_engine_, constant_key_,
+                = dnnl_constant_cache_get_or_add(p_engine_, encoded_key,
                         memory_planner_.total_internal_persistent_size(),
                         c_promise.get_future());
         bool is_from_cache = cached_value.valid();
@@ -132,9 +134,11 @@ status_t conv_base_t::sycl_execute_impl(const stream_t *g_stream,
 
     constant_cache_t::cached_t c_buffer;
     if (enabled_constant_cache()) {
+        const size_t encoded_key
+                = encode_constant_cache_key(inputs, const_md_hash_);
         std::promise<constant_cache_t::cached_t> c_promise;
         constant_cache_t::value_t cached_value
-                = dnnl_constant_cache_get_or_add(p_engine_, constant_key_,
+                = dnnl_constant_cache_get_or_add(p_engine_, encoded_key,
                         memory_planner_.total_internal_persistent_size(),
                         c_promise.get_future());
         bool is_from_cache = cached_value.valid();
@@ -207,9 +211,11 @@ status_t conv_base_t::ocl_execute_impl(const stream_t *g_stream,
 
     constant_cache_t::cached_t c_buffer;
     if (enabled_constant_cache()) {
+        const size_t encoded_key
+                = encode_constant_cache_key(inputs, const_md_hash_);
         std::promise<constant_cache_t::cached_t> c_promise;
         constant_cache_t::value_t cached_value
-                = dnnl_constant_cache_get_or_add(p_engine_, constant_key_,
+                = dnnl_constant_cache_get_or_add(p_engine_, encoded_key,
                         memory_planner_.total_internal_persistent_size(),
                         c_promise.get_future());
         bool is_from_cache = cached_value.valid();

@@ -47,7 +47,7 @@ struct conv_base_t : public kernel_base_t {
 
     std::function<std::shared_ptr<execution_args_set_t>()> resource_ctor_;
 
-    constant_cache_t::key_t constant_key_ = 0;
+    size_t const_md_hash_ = 0;
 
 public:
     conv_base_t() {

@@ -114,7 +114,7 @@ status_t conv_transpose_fwd_t<quantized>::compile_impl(
         return this->memory_planner_.get_exec_args_set().clone();
     };
 
-    constant_key_ = generate_constant_cache_key(part->id(),
+    const_md_hash_ = generate_constant_md_hash(part->id(),
             memory_planner_.get_exec_args_set().get_persistent_mem_desc_list());
 
     return status::success;
@@ -183,7 +183,7 @@ status_t conv_transpose_bwd_data_t::compile_impl(
         return this->memory_planner_.get_exec_args_set().clone();
     };
 
-    constant_key_ = generate_constant_cache_key(part->id(),
+    const_md_hash_ = generate_constant_md_hash(part->id(),
             memory_planner_.get_exec_args_set().get_persistent_mem_desc_list());
 
     return status::success;

@@ -91,7 +91,7 @@ status_t eltwise_fwd_t<quantized>::compile_impl(
         return this->memory_planner_.get_exec_args_set().clone();
     };
 
-    constant_key_ = generate_constant_cache_key(part->id(),
+    const_md_hash_ = generate_constant_md_hash(part->id(),
             memory_planner_.get_exec_args_set().get_persistent_mem_desc_list());
 
     return status::success;
@@ -138,9 +138,11 @@ status_t eltwise_fwd_t<quantized>::execute_impl(const stream_t *g_stream,
 
     constant_cache_t::cached_t c_buffer;
     if (enabled_constant_cache()) {
+        const size_t encoded_key
+                = encode_constant_cache_key(inputs, const_md_hash_);
         std::promise<constant_cache_t::cached_t> c_promise;
         constant_cache_t::value_t cached_value
-                = dnnl_constant_cache_get_or_add(p_engine_, constant_key_,
+                = dnnl_constant_cache_get_or_add(p_engine_, encoded_key,
                         memory_planner_.total_internal_persistent_size(),
                         c_promise.get_future());
         bool is_from_cache = cached_value.valid();
@@ -208,9 +210,11 @@ status_t eltwise_fwd_t<quantized>::sycl_execute_impl(const stream_t *g_stream,
 
     constant_cache_t::cached_t c_buffer;
     if (enabled_constant_cache()) {
+        const size_t encoded_key
+                = encode_constant_cache_key(inputs, const_md_hash_);
         std::promise<constant_cache_t::cached_t> c_promise;
         constant_cache_t::value_t cached_value
-                = dnnl_constant_cache_get_or_add(p_engine_, constant_key_,
+                = dnnl_constant_cache_get_or_add(p_engine_, encoded_key,
                         memory_planner_.total_internal_persistent_size(),
                         c_promise.get_future());
         bool is_from_cache = cached_value.valid();
@@ -284,9 +288,11 @@ status_t eltwise_fwd_t<quantized>::ocl_execute_impl(const stream_t *g_stream,
 
     constant_cache_t::cached_t c_buffer;
     if (enabled_constant_cache()) {
+        const size_t encoded_key
+                = encode_constant_cache_key(inputs, const_md_hash_);
         std::promise<constant_cache_t::cached_t> c_promise;
         constant_cache_t::value_t cached_value
-                = dnnl_constant_cache_get_or_add(p_engine_, constant_key_,
+                = dnnl_constant_cache_get_or_add(p_engine_, encoded_key,
                         memory_planner_.total_internal_persistent_size(),
                         c_promise.get_future());
         bool is_from_cache = cached_value.valid();

@@ -49,7 +49,7 @@ struct eltwise_fwd_t : public kernel_base_t {
 
     std::function<std::shared_ptr<execution_args_set_t>()> resource_ctor_;
 
-    constant_cache_t::key_t constant_key_ = 0;
+    size_t const_md_hash_ = 0;
 
 public:
     eltwise_fwd_t() {

@@ -97,7 +97,7 @@ status_t group_norm_fwd_t::compile_impl(const dnnl_partition_impl_t *part,
         return this->memory_planner_.get_exec_args_set().clone();
     };
 
-    constant_key_ = generate_constant_cache_key(part->id(),
+    const_md_hash_ = generate_constant_md_hash(part->id(),
             memory_planner_.get_exec_args_set().get_persistent_mem_desc_list());
 
     return status::success;
@@ -143,9 +143,11 @@ status_t group_norm_fwd_t::execute_impl(const stream_t *g_stream,
 
     constant_cache_t::cached_t c_buffer;
     if (enabled_constant_cache()) {
+        const size_t encoded_key
+                = encode_constant_cache_key(inputs, const_md_hash_);
         std::promise<constant_cache_t::cached_t> c_promise;
         constant_cache_t::value_t cached_value
-                = dnnl_constant_cache_get_or_add(p_engine_, constant_key_,
+                = dnnl_constant_cache_get_or_add(p_engine_, encoded_key,
                         memory_planner_.total_internal_persistent_size(),
                         c_promise.get_future());
         bool is_from_cache = cached_value.valid();
@@ -212,9 +214,11 @@ status_t group_norm_fwd_t::sycl_execute_impl(const stream_t *g_stream,
 
     constant_cache_t::cached_t c_buffer;
     if (enabled_constant_cache()) {
+        const size_t encoded_key
+                = encode_constant_cache_key(inputs, const_md_hash_);
         std::promise<constant_cache_t::cached_t> c_promise;
         constant_cache_t::value_t cached_value
-                = dnnl_constant_cache_get_or_add(p_engine_, constant_key_,
+                = dnnl_constant_cache_get_or_add(p_engine_, encoded_key,
                         memory_planner_.total_internal_persistent_size(),
                         c_promise.get_future());
         bool is_from_cache = cached_value.valid();
@@ -287,9 +291,11 @@ status_t group_norm_fwd_t::ocl_execute_impl(const stream_t *g_stream,
 
     constant_cache_t::cached_t c_buffer;
     if (enabled_constant_cache()) {
+        const size_t encoded_key
+                = encode_constant_cache_key(inputs, const_md_hash_);
         std::promise<constant_cache_t::cached_t> c_promise;
         constant_cache_t::value_t cached_value
-                = dnnl_constant_cache_get_or_add(p_engine_, constant_key_,
+                = dnnl_constant_cache_get_or_add(p_engine_, encoded_key,
                         memory_planner_.total_internal_persistent_size(),
                         c_promise.get_future());
         bool is_from_cache = cached_value.valid();

@@ -48,7 +48,7 @@ struct group_norm_fwd_t : public kernel_base_t {
 
     std::function<std::shared_ptr<execution_args_set_t>()> resource_ctor_;
 
-    constant_cache_t::key_t constant_key_ = 0;
+    size_t const_md_hash_ = 0;
 
 public:
     group_norm_fwd_t() {

@@ -43,6 +43,19 @@ bool kernel_base_t::enabled_constant_cache() const {
     return enabled;
 }
 
+size_t kernel_base_t::encode_constant_cache_key(
+        const std::vector<tensor_t> &inputs, size_t cache_key) const {
+    // Encode the constant memory address into cache key for differentiation
+    size_t encoded_cache_key = cache_key;
+    for (const auto &in : inputs) {
+        if (logical_tensor_wrapper_t(in.get_logical_tensor()).is_constant()) {
+            encoded_cache_key = hash_combine(encoded_cache_key,
+                    reinterpret_cast<uintptr_t>(in.get_data_handle()));
+        }
+    }
+    return encoded_cache_key;
+}
+
 const std::vector<inplace_pair_t> &kernel_base_t::get_inplace_pairs() const {
     return inplace_pairs_;
 };

@@ -103,6 +103,9 @@ struct kernel_base_t {
 
     bool enabled_constant_cache() const;
 
+    size_t encode_constant_cache_key(
+            const std::vector<tensor_t> &inputs, size_t cache_key) const;
+
     const std::vector<inplace_pair_t> &get_inplace_pairs() const;
 
 protected:

@@ -222,7 +222,7 @@ status_t larger_partition_kernel_t::compile_impl(
         return this->memory_planner_.get_exec_args_set().clone();
     };
 
-    constant_key_ = generate_constant_cache_key(part->id(),
+    const_md_hash_ = generate_constant_md_hash(part->id(),
             memory_planner_.get_exec_args_set().get_persistent_mem_desc_list());
 
     return status::success;
@@ -248,9 +248,11 @@ status_t larger_partition_kernel_t::execute_impl(const stream_t *g_stream,
 
     constant_cache_t::cached_t c_buffer;
     if (enabled_constant_cache()) {
+        const size_t encoded_key
+                = encode_constant_cache_key(inputs, const_md_hash_);
         std::promise<constant_cache_t::cached_t> c_promise;
         constant_cache_t::value_t cached_value
-                = dnnl_constant_cache_get_or_add(p_engine_, constant_key_,
+                = dnnl_constant_cache_get_or_add(p_engine_, encoded_key,
                         memory_planner_.total_internal_persistent_size(),
                         c_promise.get_future());
         bool is_from_cache = cached_value.valid();
@@ -316,9 +318,11 @@ status_t larger_partition_kernel_t::sycl_execute_impl(const stream_t *g_stream,
 
     constant_cache_t::cached_t c_buffer;
     if (enabled_constant_cache()) {
+        const size_t encoded_key
+                = encode_constant_cache_key(inputs, const_md_hash_);
         std::promise<constant_cache_t::cached_t> c_promise;
         constant_cache_t::value_t cached_value
-                = dnnl_constant_cache_get_or_add(p_engine_, constant_key_,
+                = dnnl_constant_cache_get_or_add(p_engine_, encoded_key,
                         memory_planner_.total_internal_persistent_size(),
                         c_promise.get_future());
         bool is_from_cache = cached_value.valid();
@@ -389,9 +393,11 @@ status_t larger_partition_kernel_t::ocl_execute_impl(const stream_t *g_stream,
 
     constant_cache_t::cached_t c_buffer;
     if (enabled_constant_cache()) {
+        const size_t encoded_key
+                = encode_constant_cache_key(inputs, const_md_hash_);
         std::promise<constant_cache_t::cached_t> c_promise;
         constant_cache_t::value_t cached_value
-                = dnnl_constant_cache_get_or_add(p_engine_, constant_key_,
+                = dnnl_constant_cache_get_or_add(p_engine_, encoded_key,
                         memory_planner_.total_internal_persistent_size(),
                         c_promise.get_future());
         bool is_from_cache = cached_value.valid();

@@ -48,7 +48,7 @@ class larger_partition_kernel_t : public kernel_base_t {
 
     std::function<std::shared_ptr<execution_args_set_t>()> resource_ctor_;
 
-    constant_cache_t::key_t constant_key_ = 0;
+    size_t const_md_hash_ = 0;
 
     std::once_flag once_flag_;
     subgraph_visualizer_t vis_;