diff --git a/src/cpu/x64/jit_uni_reorder.cpp b/src/cpu/x64/jit_uni_reorder.cpp index 3d86b3cbc2e..920f2490f94 100644 --- a/src/cpu/x64/jit_uni_reorder.cpp +++ b/src/cpu/x64/jit_uni_reorder.cpp @@ -192,8 +192,9 @@ struct jit_uni_reorder_kernel_f32_t : public kernel_t, public jit_generator { && utils::everyone_is(0, p.ioff, p.ooff) /* do we need this? */ && utils::one_of(p.beta, 0.f, 1.f) /* anything else? */ && simple_impl_desc_init(p, nullptr) && mayiuse(sse41) - && IMPLICATION(utils::one_of(bf16, p.itype, p.otype), - mayiuse(avx512_core) || mayiuse(avx2)) + && IMPLICATION(bf16 == p.itype, mayiuse(avx2)) + && IMPLICATION((bf16 == p.otype) && (bf16 != p.itype), + mayiuse(avx512_core) || mayiuse(avx2_vnni_2)) && IMPLICATION(utils::one_of(f16, p.itype, p.otype), mayiuse(avx512_core_fp16) || mayiuse(avx2)) && IMPLICATION(!is_direct_copy(p), prb_has_small_strides(p)); @@ -1428,7 +1429,7 @@ struct jit_uni_reorder_kernel_f32_t : public kernel_t, public jit_generator { otype_sz_ = data_type_size(prb_.otype); stype_sz_ = sizeof(float); if (prb_.otype == data_type::bf16 && !mayiuse(avx512_core_bf16) - && !mayiuse(avx2_vnni_2)) { + && !mayiuse(avx2_vnni_2) && mayiuse(avx512_core)) { bf16_emu_ = utils::make_unique(this, bf16_emu_reserv_1_, bf16_emu_reserv_2_, bf16_emu_reserv_3_, bf16_emu_scratch_, bf16_emu_reserv_4_);