From 00c3465049b5962f80484166c6af282119f909f9 Mon Sep 17 00:00:00 2001 From: dmitrygo Date: Thu, 11 Jul 2024 10:54:09 +0200 Subject: [PATCH] [FORK][FIX] Restore choose_loop_order logic for weights decompression case --- src/cpu/x64/jit_brgemm_inner_product_utils.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/cpu/x64/jit_brgemm_inner_product_utils.cpp b/src/cpu/x64/jit_brgemm_inner_product_utils.cpp index 964c48cdacc..54987ad99a3 100644 --- a/src/cpu/x64/jit_brgemm_inner_product_utils.cpp +++ b/src/cpu/x64/jit_brgemm_inner_product_utils.cpp @@ -1885,7 +1885,8 @@ void jit_brgemm_ip_fwd_conf_t::choose_loop_order() { const bool is_int8 = one_of(src_dt, u8, s8) && wei_dt == s8; const bool is_compute_amx = (is_xf16 || is_int8) && is_amx; - if ((os_block < 32 || do_occ_osc) && (is_compute_amx || is_f32_avx2)) + // Better to keep ocb loop outermost for weights_decompression case due to overhead on weights unpack into intermediate buffer + if ((os_block < 32 || do_occ_osc) && (is_compute_amx || is_f32_avx2) && !weights_decompression) loop_order = icc_occ_osc_ocb_osb; }