From 8fcf8c027131745460965e5cca6002429a71db67 Mon Sep 17 00:00:00 2001 From: Anton Voronov Date: Mon, 31 Jan 2022 02:31:27 +0300 Subject: [PATCH 1/2] simple reorder: disabled unused primitives --- src/cpu/reorder/cpu_reorder_comp_s8_s8.cpp | 14 ++- src/cpu/reorder/cpu_reorder_regular_bf16.cpp | 1 + src/cpu/reorder/cpu_reorder_regular_bin.cpp | 6 +- src/cpu/reorder/cpu_reorder_regular_f16.cpp | 2 + .../reorder/cpu_reorder_regular_f32_bf16.cpp | 1 + .../reorder/cpu_reorder_regular_f32_bin.cpp | 2 + .../reorder/cpu_reorder_regular_f32_f16.cpp | 2 + .../reorder/cpu_reorder_regular_f32_f32.cpp | 90 +++++++++++++++---- .../reorder/cpu_reorder_regular_f32_s32.cpp | 3 +- .../reorder/cpu_reorder_regular_f32_s8.cpp | 6 ++ .../reorder/cpu_reorder_regular_f32_u8.cpp | 7 +- src/cpu/reorder/cpu_reorder_regular_s32.cpp | 20 ++++- src/cpu/reorder/cpu_reorder_regular_s8.cpp | 24 ++++- src/cpu/reorder/cpu_reorder_regular_u8.cpp | 24 ++++- 14 files changed, 168 insertions(+), 34 deletions(-) diff --git a/src/cpu/reorder/cpu_reorder_comp_s8_s8.cpp b/src/cpu/reorder/cpu_reorder_comp_s8_s8.cpp index 72acf20d250..72cc1acc552 100644 --- a/src/cpu/reorder/cpu_reorder_comp_s8_s8.cpp +++ b/src/cpu/reorder/cpu_reorder_comp_s8_s8.cpp @@ -27,10 +27,15 @@ const impl_list_map_t comp_s8_s8_impl_list_map { {{s8, s8, 2}, { REG_REORDER_P(DNNL_X64_ONLY(CPU_REORDER_INSTANCE(x64_jit_uni_reorder_t))) REG_REORDER_P(REG_SR(s8, oi, s8, OI4i16o4i, fmt_order_keep, spec_conv_req_comp)) +#ifdef ENABLE_UNUSED_PRIM REG_REORDER_P(REG_SR(s8, io, s8, OI4i16o4i, fmt_order_keep, spec_conv_req_comp)) +#endif REG_REORDER_P(REG_SR(s8, oi, s8, OI4i32o4i, fmt_order_keep, spec_conv_req_comp)) +#ifdef ENABLE_UNUSED_PRIM REG_REORDER_P(REG_SR(s8, io, s8, OI4i32o4i, fmt_order_keep, spec_conv_req_comp)) +#endif REG_REORDER_P(REG_SR(s8, oi, s8, OI4i64o4i, fmt_order_keep, spec_conv_req_comp)) +#ifdef ENABLE_UNUSED_PRIM REG_REORDER_P(REG_SR(s8, io, s8, OI4i64o4i, fmt_order_keep, spec_conv_req_comp)) REG_REORDER_P(REG_SR(s8, ab, s8, BA16a16b4a, fmt_order_keep, spec_conv_req_comp)) REG_REORDER_P(REG_SR(s8, ab, s8, BA16a32b4a, fmt_order_keep, spec_conv_req_comp)) @@ -40,11 +45,13 @@ const impl_list_map_t comp_s8_s8_impl_list_map { REG_REORDER_P(REG_SR(s8, ba, s8, BA16a32b4a, fmt_order_keep, spec_conv_req_comp)) REG_REORDER_P(REG_SR(s8, ba, s8, BA16a48b4a, fmt_order_keep, spec_conv_req_comp)) REG_REORDER_P(REG_SR(s8, ba, s8, BA16a64b4a, fmt_order_keep, spec_conv_req_comp)) +#endif nullptr, }}, // s8 -> s8 {{s8, s8, 3}, { REG_REORDER_P(DNNL_X64_ONLY(CPU_REORDER_INSTANCE(x64_jit_uni_reorder_t))) +#ifdef ENABLE_UNUSED_PRIM REG_REORDER_P(REG_SR(s8, any, s8, wio, fmt_order_keep, spec_conv_req_comp)) REG_REORDER_P(REG_SR(s8, iwo, s8, OIw4i16o4i, fmt_order_keep, spec_conv_req_comp)) REG_REORDER_P(REG_SR(s8, iwo, s8, OIw4i32o4i, fmt_order_keep, spec_conv_req_comp)) @@ -70,6 +77,7 @@ const impl_list_map_t comp_s8_s8_impl_list_map { REG_REORDER_P(REG_SR(s8, iwo, s8, OIw16i16o4i, fmt_order_keep, spec_conv_req_comp)) REG_REORDER_P(REG_SR(s8, oiw, s8, OIw16i16o4i, fmt_order_keep, spec_conv_req_comp)) REG_REORDER_P(REG_SR(s8, wio, s8, OIw16i16o4i, fmt_order_keep, spec_conv_req_comp)) +#endif nullptr, }}, {{s8, s8, 4}, { @@ -119,10 +127,10 @@ const impl_list_map_t comp_s8_s8_impl_list_map { REG_REORDER_P(REG_SR(s8, ihwo, s8, OIhw16i16o4i, fmt_order_keep, spec_conv_req_comp)) REG_REORDER_P(REG_SR(s8, oihw, s8, OIhw16i16o4i, fmt_order_keep, spec_conv_req_comp)) REG_REORDER_P(REG_SR(s8, hwio, s8, OIhw16i16o4i, fmt_order_keep, spec_conv_req_comp)) -#endif REG_REORDER_P(REG_SR(s8, iohw, s8, OIhw4i16o4i, fmt_order_keep, spec_conv_req_comp)) REG_REORDER_P(REG_SR(s8, iohw, s8, OIhw2i8o4i, fmt_order_keep, spec_conv_req_comp)) REG_REORDER_P(REG_SR(s8, iohw, s8, OIhw4o4i, fmt_order_keep, spec_conv_req_comp)) +#endif nullptr, }}, @@ -170,13 +178,13 @@ const impl_list_map_t comp_s8_s8_impl_list_map { REG_REORDER_P(REG_SR(s8, idhwo, s8, OIdhw16i16o4i, fmt_order_keep, spec_conv_req_comp)) REG_REORDER_P(REG_SR(s8, oidhw, s8, OIdhw16i16o4i, fmt_order_keep, spec_conv_req_comp)) REG_REORDER_P(REG_SR(s8, dhwio, s8, OIdhw16i16o4i, fmt_order_keep, spec_conv_req_comp)) -#endif REG_REORDER_P(REG_SR(s8, giohw, s8, gOIhw4i16o4i, fmt_order_keep, spec_conv_req_comp)) REG_REORDER_P(REG_SR(s8, giohw, s8, gOIhw2i8o4i, fmt_order_keep, spec_conv_req_comp)) REG_REORDER_P(REG_SR(s8, giohw, s8, gOIhw4o4i, fmt_order_keep, spec_conv_req_comp)) REG_REORDER_P(REG_SR(s8, iodhw, s8, OIdhw4i16o4i, fmt_order_keep, spec_conv_req_comp)) REG_REORDER_P(REG_SR(s8, iodhw, s8, OIdhw2i8o4i, fmt_order_keep, spec_conv_req_comp)) REG_REORDER_P(REG_SR(s8, iodhw, s8, OIdhw4o4i, fmt_order_keep, spec_conv_req_comp)) +#endif nullptr, }}, @@ -189,10 +197,10 @@ const impl_list_map_t comp_s8_s8_impl_list_map { REG_REORDER_P(REG_SR(s8, goidhw, s8, gOIdhw4o4i, fmt_order_keep, spec_conv_req_comp)) REG_REORDER_P(REG_SR(s8, goidhw, s8, gOdhwI16o4i, fmt_order_keep, spec_conv_req_comp)) REG_REORDER_P(REG_SR(s8, goidhw, s8, gOIdhw16i16o4i, fmt_order_keep, spec_conv_req_comp)) -#endif REG_REORDER_P(REG_SR(s8, giodhw, s8, gOIdhw4i16o4i, fmt_order_keep, spec_conv_req_comp)) REG_REORDER_P(REG_SR(s8, giodhw, s8, gOIdhw2i8o4i, fmt_order_keep, spec_conv_req_comp)) REG_REORDER_P(REG_SR(s8, giodhw, s8, gOIdhw4o4i, fmt_order_keep, spec_conv_req_comp)) +#endif nullptr, }}, diff --git a/src/cpu/reorder/cpu_reorder_regular_bf16.cpp b/src/cpu/reorder/cpu_reorder_regular_bf16.cpp index ad405ad7bcb..63a6fb7c811 100644 --- a/src/cpu/reorder/cpu_reorder_regular_bf16.cpp +++ b/src/cpu/reorder/cpu_reorder_regular_bf16.cpp @@ -29,6 +29,7 @@ const impl_list_map_t regular_bf16_impl_list_map { REG_REORDER_P(DNNL_X64_ONLY(CPU_REORDER_INSTANCE(x64_jit_blk_reorder_t))) REG_REORDER_P(DNNL_X64_ONLY(CPU_REORDER_INSTANCE(x64_jit_uni_reorder_t))) +// todo: [AV] disable unused simple reorder primitives REG_REORDER_P(REG_SR_BIDIR(bf16, any, f32, nCw16c)) REG_REORDER_P(REG_SR_BIDIR(bf16, any, f32, nChw16c)) REG_REORDER_P(REG_SR_BIDIR(bf16, any, f32, nCdhw16c)) diff --git a/src/cpu/reorder/cpu_reorder_regular_bin.cpp b/src/cpu/reorder/cpu_reorder_regular_bin.cpp index 94abb871e98..850d30a72a1 100644 --- a/src/cpu/reorder/cpu_reorder_regular_bin.cpp +++ b/src/cpu/reorder/cpu_reorder_regular_bin.cpp @@ -25,13 +25,15 @@ namespace cpu { const impl_list_map_t regular_bin_impl_list_map { // bin -> {{bin, data_type::undef, 4}, { +#ifdef ENABLE_UNUSED_PRIM REG_REORDER_P(REG_SR_DIRECT_COPY(bin, bin)) - +#endif REG_REORDER_P(REG_SR(bin, any, bin, OIhw8o32i, fmt_order_keep)) REG_REORDER_P(REG_SR(bin, any, bin, OIhw16o32i, fmt_order_keep)) - +#ifdef ENABLE_UNUSED_PRIM REG_REORDER_P(REG_SR_BIDIR(u8, any, u8, nChw8c)) +#endif nullptr, }}, diff --git a/src/cpu/reorder/cpu_reorder_regular_f16.cpp b/src/cpu/reorder/cpu_reorder_regular_f16.cpp index 5f4d1916289..8f5ea6d4714 100644 --- a/src/cpu/reorder/cpu_reorder_regular_f16.cpp +++ b/src/cpu/reorder/cpu_reorder_regular_f16.cpp @@ -25,8 +25,10 @@ namespace cpu { const impl_list_map_t regular_f16_impl_list_map { // f16 -> {{f16, data_type::undef, 0}, { +#ifdef ENABLE_UNUSED_PRIM REG_REORDER_P(REG_SR(f16, any, f16, any, fmt_order_any, spec_reference)) REG_REORDER_P(REG_SR(f16, any, f32, any, fmt_order_any, spec_reference)) +#endif nullptr, }}, diff --git a/src/cpu/reorder/cpu_reorder_regular_f32_bf16.cpp b/src/cpu/reorder/cpu_reorder_regular_f32_bf16.cpp index 906f4f943d8..7559a9ac9dc 100644 --- a/src/cpu/reorder/cpu_reorder_regular_f32_bf16.cpp +++ b/src/cpu/reorder/cpu_reorder_regular_f32_bf16.cpp @@ -30,6 +30,7 @@ const impl_list_map_t regular_f32_bf16_impl_list_map { REG_REORDER_P(DNNL_X64_ONLY(CPU_REORDER_INSTANCE(x64_jit_blk_reorder_t))) REG_REORDER_P(DNNL_X64_ONLY(CPU_REORDER_INSTANCE(x64_jit_uni_reorder_t))) +// todo: [AV] disable unused simple reorder primitives REG_REORDER_P(REG_SR_BIDIR(f32, ncw, bf16, nCw16c)) REG_REORDER_P(REG_SR_BIDIR(f32, nchw, bf16, nChw16c)) REG_REORDER_P(REG_SR_BIDIR(f32, any, bf16, nChw16c)) diff --git a/src/cpu/reorder/cpu_reorder_regular_f32_bin.cpp b/src/cpu/reorder/cpu_reorder_regular_f32_bin.cpp index 715f547f4d2..5d558a12e39 100644 --- a/src/cpu/reorder/cpu_reorder_regular_f32_bin.cpp +++ b/src/cpu/reorder/cpu_reorder_regular_f32_bin.cpp @@ -25,9 +25,11 @@ namespace cpu { const impl_list_map_t regular_f32_bin_impl_list_map { // f32 -> bin {{f32, bin, 4}, { +#ifdef ENABLE_UNUSED_PRIM REG_REORDER_P(REG_SR_BIDIR(f32, nchw, bin, nhwc)) REG_REORDER_P(REG_SR_BIDIR(f32, nhwc, bin, nhwc)) +#endif nullptr, }}, diff --git a/src/cpu/reorder/cpu_reorder_regular_f32_f16.cpp b/src/cpu/reorder/cpu_reorder_regular_f32_f16.cpp index 074b7f8db32..7114256bd6e 100644 --- a/src/cpu/reorder/cpu_reorder_regular_f32_f16.cpp +++ b/src/cpu/reorder/cpu_reorder_regular_f32_f16.cpp @@ -25,7 +25,9 @@ namespace cpu { const impl_list_map_t regular_f32_f16_impl_list_map { // f32 -> f16 {{f32, f16, 0}, { +#ifdef ENABLE_UNUSED_PRIM REG_REORDER_P(REG_SR(f32, any, f16, any, fmt_order_any, spec_reference)) +#endif nullptr, }}, diff --git a/src/cpu/reorder/cpu_reorder_regular_f32_f32.cpp b/src/cpu/reorder/cpu_reorder_regular_f32_f32.cpp index d02db18243f..f01bc6e2b08 100644 --- a/src/cpu/reorder/cpu_reorder_regular_f32_f32.cpp +++ b/src/cpu/reorder/cpu_reorder_regular_f32_f32.cpp @@ -46,6 +46,7 @@ const impl_list_map_t regular_f32_f32_impl_list_map { REG_REORDER_P(REG_SR_BIDIR(f32, any, f32, nCw16c)) REG_REORDER_P(REG_SR_BIDIR(f32, any, f32, nCw8c)) +#ifdef ENABLE_UNUSED_PRIM REG_REORDER_P(REG_SR_BIDIR(f32, any, f32, nCw4c)) REG_REORDER_P(REG_SR_BIDIR(f32, nCw4c, f32, nCw16c)) @@ -53,12 +54,19 @@ const impl_list_map_t regular_f32_f32_impl_list_map { REG_REORDER_P(REG_SR_BIDIR(f32, any, f32, OIw4i4o)) REG_REORDER_P(REG_SR_BIDIR(f32, any, f32, OIw4o4i)) - REG_REORDER_P(REG_SR_BIDIR(f32, any, f32, OIw8i8o)) +#endif + REG_REORDER_P(REG_SR(f32, any, f32, OIw8i8o, fmt_order_keep)) +#ifdef ENABLE_UNUSED_PRIM + REG_REORDER_P(REG_SR(f32, any, f32, OIw8i8o, fmt_order_reverse)) REG_REORDER_P(REG_SR_BIDIR(f32, any, f32, OIw8o8i)) REG_REORDER_P(REG_SR_BIDIR(f32, any, f32, OIw16o16i)) - REG_REORDER_P(REG_SR_BIDIR(f32, any, f32, OIw16i16o)) +#endif + REG_REORDER_P(REG_SR(f32, any, f32, OIw16i16o, fmt_order_keep)) +#ifdef ENABLE_UNUSED_PRIM + REG_REORDER_P(REG_SR(f32, any, f32, OIw16i16o, fmt_order_reverse)) REG_REORDER_P(REG_SR_BIDIR(f32, any, f32, IOw16o16i)) +#endif REG_REORDER_P(REG_SR(f32, any, f32, any, fmt_order_any, spec_reference)) @@ -78,10 +86,14 @@ const impl_list_map_t regular_f32_f32_impl_list_map { REG_REORDER_P(REG_SR_BIDIR(f32, any, f32, nChw16c)) REG_REORDER_P(REG_SR_BIDIR(f32, any, f32, nChw8c)) +#ifdef ENABLE_UNUSED_PRIM REG_REORDER_P(REG_SR_BIDIR(f32, any, f32, nChw4c)) REG_REORDER_P(REG_SR_BIDIR(f32, nChw4c, f32, nChw16c)) - REG_REORDER_P(REG_SR_BIDIR(f32, nChw8c, f32, nChw16c)) +#endif + REG_REORDER_P(REG_SR(f32, nChw8c, f32, nChw16c, fmt_order_keep)) +#ifdef ENABLE_UNUSED_PRIM + REG_REORDER_P(REG_SR(f32, nChw8c, f32, nChw16c, fmt_order_reverse)) REG_REORDER_P(REG_SR_BIDIR(f32, any, f32, gOIw4i4o)) REG_REORDER_P(REG_SR_BIDIR(f32, any, f32, gOIw4o4i)) @@ -94,20 +106,44 @@ const impl_list_map_t regular_f32_f32_impl_list_map { REG_REORDER_P(REG_SR_BIDIR(f32, any, f32, OIhw4i4o)) REG_REORDER_P(REG_SR_BIDIR(f32, any, f32, OIhw4o4i)) - REG_REORDER_P(REG_SR_BIDIR(f32, any, f32, Ohwi8o)) - - REG_REORDER_P(REG_SR_BIDIR(f32, any, f32, OIhw8i8o)) - REG_REORDER_P(REG_SR_BIDIR(f32, any, f32, OIhw8o8i)) +#endif + REG_REORDER_P(REG_SR(f32, any, f32, Ohwi8o, fmt_order_keep)) +#ifdef ENABLE_UNUSED_PRIM + REG_REORDER_P(REG_SR(f32, any, f32, Ohwi8o, fmt_order_reverse)) +#endif + + REG_REORDER_P(REG_SR(f32, any, f32, OIhw8i8o, fmt_order_keep)) +#ifdef ENABLE_UNUSED_PRIM + REG_REORDER_P(REG_SR(f32, any, f32, OIhw8i8o, fmt_order_reverse)) +#endif + REG_REORDER_P(REG_SR(f32, any, f32, OIhw8o8i, fmt_order_keep)) +#ifdef ENABLE_UNUSED_PRIM + REG_REORDER_P(REG_SR(f32, any, f32, OIhw8o8i, fmt_order_reverse)) REG_REORDER_P(REG_SR_BIDIR(f32, any, f32, Oihw4o)) - REG_REORDER_P(REG_SR_BIDIR(f32, any, f32, Oihw16o)) +#endif + REG_REORDER_P(REG_SR(f32, any, f32, Oihw16o, fmt_order_keep)) +#ifdef ENABLE_UNUSED_PRIM + REG_REORDER_P(REG_SR(f32, any, f32, Oihw16o, fmt_order_reverse)) + REG_REORDER_P(REG_SR_BIDIR(f32, any, f32, Ohwi4o)) - REG_REORDER_P(REG_SR_BIDIR(f32, any, f32, Ohwi16o)) - REG_REORDER_P(REG_SR_BIDIR(f32, any, f32, OIhw16o16i)) - REG_REORDER_P(REG_SR_BIDIR(f32, any, f32, OIhw16i16o)) +#endif + REG_REORDER_P(REG_SR(f32, any, f32, Ohwi16o, fmt_order_keep)) +#ifdef ENABLE_UNUSED_PRIM + REG_REORDER_P(REG_SR(f32, any, f32, Ohwi16o, fmt_order_reverse)) +#endif + REG_REORDER_P(REG_SR(f32, any, f32, OIhw16o16i, fmt_order_keep)) +#ifdef ENABLE_UNUSED_PRIM + REG_REORDER_P(REG_SR(f32, any, f32, OIhw16o16i,fmt_order_reverse)) +#endif + REG_REORDER_P(REG_SR(f32, any, f32, OIhw16i16o, fmt_order_keep)) +#ifdef ENABLE_UNUSED_PRIM + REG_REORDER_P(REG_SR(f32, any, f32, OIhw16i16o, fmt_order_reverse)) + REG_REORDER_P(REG_SR_BIDIR(f32, any, f32, IOhw16o16i)) REG_REORDER_P(REG_SR_BIDIR(f32, any, f32, OIhw4i16o4i)) +#endif REG_REORDER_P(REG_SR(f32, any, f32, any, fmt_order_any, spec_reference)) @@ -126,6 +162,7 @@ const impl_list_map_t regular_f32_f32_impl_list_map { REG_REORDER_P(REG_SR_BIDIR(f32, any, f32, nCdhw16c)) REG_REORDER_P(REG_SR_BIDIR(f32, any, f32, nCdhw8c)) +#ifdef ENABLE_UNUSED_PRIM REG_REORDER_P(REG_SR_BIDIR(f32, any, f32, nCdhw4c)) REG_REORDER_P(REG_SR_BIDIR(f32, nCdhw4c, f32, nCdhw16c)) @@ -147,18 +184,37 @@ const impl_list_map_t regular_f32_f32_impl_list_map { REG_REORDER_P(REG_SR_BIDIR(f32, any, f32, OIdhw4i4o)) REG_REORDER_P(REG_SR_BIDIR(f32, any, f32, OIdhw4o4i)) - REG_REORDER_P(REG_SR_BIDIR(f32, any, f32, Odhwi8o)) - REG_REORDER_P(REG_SR_BIDIR(f32, any, f32, OIdhw8i8o)) +#endif + REG_REORDER_P(REG_SR(f32, any, f32, Odhwi8o, fmt_order_keep)) +#ifdef ENABLE_UNUSED_PRIM + REG_REORDER_P(REG_SR(f32, any, f32, Odhwi8o, fmt_order_reverse)) +#endif + REG_REORDER_P(REG_SR(f32, any, f32, OIdhw8i8o, fmt_order_keep)) +#ifdef ENABLE_UNUSED_PRIM + REG_REORDER_P(REG_SR(f32, any, f32, OIdhw8i8o, fmt_order_reverse)) + REG_REORDER_P(REG_SR_BIDIR(f32, any, f32, OIdhw8o8i)) REG_REORDER_P(REG_SR_BIDIR(f32, any, f32, Oidhw4o)) - REG_REORDER_P(REG_SR_BIDIR(f32, any, f32, Oidhw16o)) - REG_REORDER_P(REG_SR_BIDIR(f32, any, f32, Odhwi16o)) +#endif + REG_REORDER_P(REG_SR(f32, any, f32, Oidhw16o, fmt_order_keep)) +#ifdef ENABLE_UNUSED_PRIM + REG_REORDER_P(REG_SR(f32, any, f32, Oidhw16o, fmt_order_reverse)) +#endif + REG_REORDER_P(REG_SR(f32, any, f32, Odhwi16o, fmt_order_keep)) +#ifdef ENABLE_UNUSED_PRIM + REG_REORDER_P(REG_SR(f32, any, f32, Odhwi16o, fmt_order_reverse)) + REG_REORDER_P(REG_SR_BIDIR(f32, any, f32, OIdhw16o16i)) - REG_REORDER_P(REG_SR_BIDIR(f32, any, f32, OIdhw16i16o)) +#endif + REG_REORDER_P(REG_SR(f32, any, f32, OIdhw16i16o, fmt_order_keep)) +#ifdef ENABLE_UNUSED_PRIM + REG_REORDER_P(REG_SR(f32, any, f32, OIdhw16i16o, fmt_order_reverse)) + REG_REORDER_P(REG_SR_BIDIR(f32, any, f32, IOdhw16o16i)) REG_REORDER_P(REG_SR_BIDIR(f32, any, f32, gOIhw4i16o4i)) +#endif REG_REORDER_P(REG_SR(f32, any, f32, any, fmt_order_any, spec_reference)) @@ -172,6 +228,7 @@ const impl_list_map_t regular_f32_f32_impl_list_map { DNNL_AARCH64_ONLY(CPU_REORDER_INSTANCE(aarch64_jit_uni_reorder_t)) +#ifdef ENABLE_UNUSED_PRIM REG_REORDER_P(REG_SR_BIDIR(f32, any, f32, gOIdhw4i4o)) REG_REORDER_P(REG_SR_BIDIR(f32, any, f32, gOIdhw4o4i)) REG_REORDER_P(REG_SR_BIDIR(f32, any, f32, gOdhwi8o)) @@ -184,6 +241,7 @@ const impl_list_map_t regular_f32_f32_impl_list_map { REG_REORDER_P(REG_SR_BIDIR(f32, any, f32, gOIdhw16o16i)) REG_REORDER_P(REG_SR_BIDIR(f32, any, f32, gOIdhw16i16o)) REG_REORDER_P(REG_SR_BIDIR(f32, any, f32, gIOdhw16o16i)) +#endif REG_REORDER_P(REG_SR(f32, any, f32, any, fmt_order_any, spec_reference)) diff --git a/src/cpu/reorder/cpu_reorder_regular_f32_s32.cpp b/src/cpu/reorder/cpu_reorder_regular_f32_s32.cpp index f79ff3bbd94..8c4adb9a0f0 100644 --- a/src/cpu/reorder/cpu_reorder_regular_f32_s32.cpp +++ b/src/cpu/reorder/cpu_reorder_regular_f32_s32.cpp @@ -31,11 +31,12 @@ const impl_list_map_t regular_f32_s32_impl_list_map { REG_REORDER_P(DNNL_X64_ONLY(CPU_REORDER_INSTANCE(x64_jit_uni_reorder_t))) DNNL_AARCH64_ONLY(CPU_REORDER_INSTANCE(aarch64_jit_uni_reorder_t)) - +#ifdef ENABLE_UNUSED_PRIM REG_REORDER_P(REG_SR_BIDIR(f32, any, s32, nChw16c)) REG_REORDER_P(REG_SR_BIDIR(f32, any, s32, nChw8c)) REG_REORDER_P(REG_SR(f32, any, s32, any, fmt_order_any, spec_reference)) +#endif nullptr, }}, diff --git a/src/cpu/reorder/cpu_reorder_regular_f32_s8.cpp b/src/cpu/reorder/cpu_reorder_regular_f32_s8.cpp index 5d59bdcccc8..83a09a527cf 100644 --- a/src/cpu/reorder/cpu_reorder_regular_f32_s8.cpp +++ b/src/cpu/reorder/cpu_reorder_regular_f32_s8.cpp @@ -38,14 +38,20 @@ const impl_list_map_t regular_f32_s8_impl_list_map { DNNL_AARCH64_ONLY(CPU_REORDER_INSTANCE(aarch64_jit_uni_reorder_t)) +#ifdef ENABLE_UNUSED_PRIM REG_REORDER_P(REG_SR_BIDIR(f32, any, s8, nCw16c)) +#endif REG_REORDER_P(REG_SR_BIDIR(f32, any, s8, nChw16c)) +#ifdef ENABLE_UNUSED_PRIM REG_REORDER_P(REG_SR_BIDIR(f32, any, s8, nCw8c)) +#endif REG_REORDER_P(REG_SR_BIDIR(f32, any, s8, nChw8c)) +#ifdef ENABLE_UNUSED_PRIM REG_REORDER_P(REG_SR_BIDIR(f32, any, s8, OIhw4i16o4i)) REG_REORDER_P(REG_SR_BIDIR(f32, any, s8, gOIhw4i16o4i)) REG_REORDER_P(REG_SR(f32, any, s8, any, fmt_order_any, spec_reference)) +#endif nullptr, }}, diff --git a/src/cpu/reorder/cpu_reorder_regular_f32_u8.cpp b/src/cpu/reorder/cpu_reorder_regular_f32_u8.cpp index 30e2fdfe72a..b91aa75804e 100644 --- a/src/cpu/reorder/cpu_reorder_regular_f32_u8.cpp +++ b/src/cpu/reorder/cpu_reorder_regular_f32_u8.cpp @@ -33,13 +33,18 @@ const impl_list_map_t regular_f32_u8_impl_list_map { REG_REORDER_P(DNNL_X64_ONLY(CPU_REORDER_INSTANCE(x64_jit_uni_reorder_t))) DNNL_AARCH64_ONLY(CPU_REORDER_INSTANCE(aarch64_jit_uni_reorder_t)) - +#ifdef ENABLE_UNUSED_PRIM REG_REORDER_P(REG_SR_BIDIR(f32, any, u8, nCw16c)) +#endif REG_REORDER_P(REG_SR_BIDIR(f32, any, u8, nChw16c)) +#ifdef ENABLE_UNUSED_PRIM REG_REORDER_P(REG_SR_BIDIR(f32, any, u8, nCw8c)) +#endif REG_REORDER_P(REG_SR_BIDIR(f32, any, u8, nChw8c)) +#ifdef ENABLE_UNUSED_PRIM REG_REORDER_P(REG_SR(f32, any, u8, any, fmt_order_any, spec_reference)) +#endif nullptr, }}, diff --git a/src/cpu/reorder/cpu_reorder_regular_s32.cpp b/src/cpu/reorder/cpu_reorder_regular_s32.cpp index 8218e981a5f..e77d69efec0 100644 --- a/src/cpu/reorder/cpu_reorder_regular_s32.cpp +++ b/src/cpu/reorder/cpu_reorder_regular_s32.cpp @@ -35,12 +35,23 @@ const impl_list_map_t regular_s32_impl_list_map { DNNL_AARCH64_ONLY(CPU_REORDER_INSTANCE(aarch64_jit_uni_reorder_t)) - REG_REORDER_P(REG_SR_BIDIR(s32, any, f32, nChw16c)) - REG_REORDER_P(REG_SR_BIDIR(s32, any, s32, nChw16c)) + REG_REORDER_P(REG_SR(s32, any, f32, nChw16c, fmt_order_keep)) +#ifdef ENABLE_UNUSED_PRIM + REG_REORDER_P(REG_SR(s32, any, f32, nChw16c, fmt_order_reverse)) + REG_REORDER_P(REG_SR(s32, any, s32, nChw16c, fmt_order_keep)) +#endif + REG_REORDER_P(REG_SR(s32, any, s32, nChw16c, fmt_order_reverse)) +#ifdef ENABLE_UNUSED_PRIM REG_REORDER_P(REG_SR_BIDIR(s32, any, s8, nChw16c)) REG_REORDER_P(REG_SR_BIDIR(s32, any, u8, nChw16c)) - REG_REORDER_P(REG_SR_BIDIR(s32, any, f32, nChw8c)) - REG_REORDER_P(REG_SR_BIDIR(s32, any, s32, nChw8c)) +#endif + REG_REORDER_P(REG_SR(s32, any, f32, nChw8c, fmt_order_keep)) +#ifdef ENABLE_UNUSED_PRIM + REG_REORDER_P(REG_SR(s32, any, f32, nChw8c, fmt_order_reverse)) + REG_REORDER_P(REG_SR(s32, any, s32, nChw8c, fmt_order_keep)) +#endif + REG_REORDER_P(REG_SR(s32, any, s32, nChw8c, fmt_order_reverse)) +#ifdef ENABLE_UNUSED_PRIM REG_REORDER_P(REG_SR_BIDIR(s32, any, s8, nChw8c)) REG_REORDER_P(REG_SR_BIDIR(s32, any, u8, nChw8c)) @@ -48,6 +59,7 @@ const impl_list_map_t regular_s32_impl_list_map { REG_REORDER_P(REG_SR(s32, any, s32, any, fmt_order_any, spec_reference)) REG_REORDER_P(REG_SR(s32, any, s8, any, fmt_order_any, spec_reference)) REG_REORDER_P(REG_SR(s32, any, u8, any, fmt_order_any, spec_reference)) +#endif nullptr, }}, diff --git a/src/cpu/reorder/cpu_reorder_regular_s8.cpp b/src/cpu/reorder/cpu_reorder_regular_s8.cpp index be3de9f0fb4..a9faa30f56f 100644 --- a/src/cpu/reorder/cpu_reorder_regular_s8.cpp +++ b/src/cpu/reorder/cpu_reorder_regular_s8.cpp @@ -40,28 +40,46 @@ const impl_list_map_t regular_s8_impl_list_map { DNNL_AARCH64_ONLY(CPU_REORDER_INSTANCE(aarch64_jit_uni_reorder_t)) +#ifdef ENABLE_UNUSED_PRIM REG_REORDER_P(REG_SR_BIDIR(s8, any, f32, nChw16c)) REG_REORDER_P(REG_SR_BIDIR(s8, any, s32, nChw16c)) REG_REORDER_P(REG_SR_BIDIR(s8, any, bf16, nChw16c)) - REG_REORDER_P(REG_SR_BIDIR(s8, any, s8, nChw16c)) + + REG_REORDER_P(REG_SR(s8, any, s8, nChw16c, fmt_order_keep)) +#endif + REG_REORDER_P(REG_SR(s8, any, s8, nChw16c, fmt_order_reverse)) +#ifdef ENABLE_UNUSED_PRIM REG_REORDER_P(REG_SR_BIDIR(s8, any, u8, nChw16c)) - REG_REORDER_P(REG_SR_BIDIR(s8, any, f32, nChw8c)) +#endif + REG_REORDER_P(REG_SR(s8, any, f32, nChw8c, fmt_order_keep)) +#ifdef ENABLE_UNUSED_PRIM + REG_REORDER_P(REG_SR(s8, any, f32, nChw8c, fmt_order_reverse)) + REG_REORDER_P(REG_SR_BIDIR(s8, any, s32, nChw8c)) REG_REORDER_P(REG_SR_BIDIR(s8, any, s8, nChw8c)) REG_REORDER_P(REG_SR_BIDIR(s8, any, u8, nChw8c)) REG_REORDER_P(REG_SR_BIDIR(s8, any, f32, OIhw4i16o4i)) REG_REORDER_P(REG_SR_BIDIR(s8, any, bf16, OIhw4i16o4i)) - REG_REORDER_P(REG_SR_BIDIR(s8, any, s8, OIhw4i16o4i)) +#endif + REG_REORDER_P(REG_SR(s8, any, s8, OIhw4i16o4i, fmt_order_keep)) +#ifdef ENABLE_UNUSED_PRIM + REG_REORDER_P(REG_SR(s8, any, s8, OIhw4i16o4i, fmt_order_reverse)) + REG_REORDER_P(REG_SR_BIDIR(s8, any, f32, gOIhw4i16o4i)) REG_REORDER_P(REG_SR_BIDIR(s8, any, bf16, gOIhw4i16o4i)) REG_REORDER_P(REG_SR_BIDIR(s8, any, s8, gOIhw4i16o4i)) +#endif REG_REORDER_P(REG_SR(s8, any, f32, any, fmt_order_any, spec_reference)) +#ifdef ENABLE_UNUSED_PRIM REG_REORDER_P(REG_SR(s8, any, s32, any, fmt_order_any, spec_reference)) REG_REORDER_P(REG_SR(s8, any, bf16, any, fmt_order_any, spec_reference)) +#endif REG_REORDER_P(REG_SR(s8, any, s8, any, fmt_order_any, spec_reference)) +#ifdef ENABLE_UNUSED_PRIM REG_REORDER_P(REG_SR(s8, any, u8, any, fmt_order_any, spec_reference)) +#endif nullptr, }}, diff --git a/src/cpu/reorder/cpu_reorder_regular_u8.cpp b/src/cpu/reorder/cpu_reorder_regular_u8.cpp index 52ace2b545d..4a5bf510aaa 100644 --- a/src/cpu/reorder/cpu_reorder_regular_u8.cpp +++ b/src/cpu/reorder/cpu_reorder_regular_u8.cpp @@ -50,21 +50,37 @@ const impl_list_map_t regular_u8_impl_list_map { REG_REORDER_P(REG_FAST_DIRECT_COPY_AFTER_JIT(u8, s8)) REG_REORDER_P(REG_FAST_DIRECT_COPY_AFTER_JIT(u8, u8)) - REG_REORDER_P(REG_SR_BIDIR(u8, any, f32, nChw16c)) + REG_REORDER_P(REG_SR(u8, any, f32, nChw16c, fmt_order_keep)) +#ifdef ENABLE_UNUSED_PRIM + REG_REORDER_P(REG_SR(u8, any, f32, nChw16c, fmt_order_reverse)) + REG_REORDER_P(REG_SR_BIDIR(u8, any, s32, nChw16c)) REG_REORDER_P(REG_SR_BIDIR(u8, any, bf16, nChw16c)) REG_REORDER_P(REG_SR_BIDIR(u8, any, s8, nChw16c)) - REG_REORDER_P(REG_SR_BIDIR(u8, any, u8, nChw16c)) - REG_REORDER_P(REG_SR_BIDIR(u8, any, f32, nChw8c)) + + REG_REORDER_P(REG_SR(u8, any, u8, nChw16c, fmt_order_keep)) +#endif + REG_REORDER_P(REG_SR(u8, any, u8, nChw16c, fmt_order_reverse)) + REG_REORDER_P(REG_SR(u8, any, f32, nChw8c, fmt_order_keep)) +#ifdef ENABLE_UNUSED_PRIM + REG_REORDER_P(REG_SR(u8, any, f32, nChw8c, fmt_order_reverse)) + REG_REORDER_P(REG_SR_BIDIR(u8, any, s32, nChw8c)) REG_REORDER_P(REG_SR_BIDIR(u8, any, s8, nChw8c)) - REG_REORDER_P(REG_SR_BIDIR(u8, any, u8, nChw8c)) + + REG_REORDER_P(REG_SR(u8, any, u8, nChw8c, fmt_order_keep)) +#endif + REG_REORDER_P(REG_SR(u8, any, u8, nChw8c, fmt_order_reverse)) REG_REORDER_P(REG_SR(u8, any, f32, any, fmt_order_any, spec_reference)) +#ifdef ENABLE_UNUSED_PRIM REG_REORDER_P(REG_SR(u8, any, s32, any, fmt_order_any, spec_reference)) REG_REORDER_P(REG_SR(u8, any, bf16, any, fmt_order_any, spec_reference)) +#endif REG_REORDER_P(REG_SR(u8, any, u8, any, fmt_order_any, spec_reference)) +#ifdef ENABLE_UNUSED_PRIM REG_REORDER_P(REG_SR(u8, any, s8, any, fmt_order_any, spec_reference)) +#endif nullptr, }}, From c54a05626aae0a1780e29826a9041789e586aee4 Mon Sep 17 00:00:00 2001 From: Anton Voronov Date: Thu, 10 Feb 2022 14:54:15 +0300 Subject: [PATCH 2/2] simple reorder: disabled unused bf16 primitives --- src/cpu/reorder/cpu_reorder_comp_s8_s8.cpp | 2 ++ src/cpu/reorder/cpu_reorder_regular_bf16.cpp | 21 ++++++++++++------- .../reorder/cpu_reorder_regular_f32_bf16.cpp | 20 ++++++++++++++---- src/cpu/reorder/cpu_reorder_regular_u8.cpp | 4 +--- 4 files changed, 33 insertions(+), 14 deletions(-) diff --git a/src/cpu/reorder/cpu_reorder_comp_s8_s8.cpp b/src/cpu/reorder/cpu_reorder_comp_s8_s8.cpp index 72cc1acc552..7f1ea668f48 100644 --- a/src/cpu/reorder/cpu_reorder_comp_s8_s8.cpp +++ b/src/cpu/reorder/cpu_reorder_comp_s8_s8.cpp @@ -127,7 +127,9 @@ const impl_list_map_t comp_s8_s8_impl_list_map { REG_REORDER_P(REG_SR(s8, ihwo, s8, OIhw16i16o4i, fmt_order_keep, spec_conv_req_comp)) REG_REORDER_P(REG_SR(s8, oihw, s8, OIhw16i16o4i, fmt_order_keep, spec_conv_req_comp)) REG_REORDER_P(REG_SR(s8, hwio, s8, OIhw16i16o4i, fmt_order_keep, spec_conv_req_comp)) +#endif REG_REORDER_P(REG_SR(s8, iohw, s8, OIhw4i16o4i, fmt_order_keep, spec_conv_req_comp)) +#ifdef ENABLE_UNUSED_PRIM REG_REORDER_P(REG_SR(s8, iohw, s8, OIhw2i8o4i, fmt_order_keep, spec_conv_req_comp)) REG_REORDER_P(REG_SR(s8, iohw, s8, OIhw4o4i, fmt_order_keep, spec_conv_req_comp)) #endif diff --git a/src/cpu/reorder/cpu_reorder_regular_bf16.cpp b/src/cpu/reorder/cpu_reorder_regular_bf16.cpp index 63a6fb7c811..3e1f207f35e 100644 --- a/src/cpu/reorder/cpu_reorder_regular_bf16.cpp +++ b/src/cpu/reorder/cpu_reorder_regular_bf16.cpp @@ -29,21 +29,26 @@ const impl_list_map_t regular_bf16_impl_list_map { REG_REORDER_P(DNNL_X64_ONLY(CPU_REORDER_INSTANCE(x64_jit_blk_reorder_t))) REG_REORDER_P(DNNL_X64_ONLY(CPU_REORDER_INSTANCE(x64_jit_uni_reorder_t))) -// todo: [AV] disable unused simple reorder primitives - REG_REORDER_P(REG_SR_BIDIR(bf16, any, f32, nCw16c)) +#ifdef ENABLE_UNUSED_PRIM + REG_REORDER_P(REG_SR(bf16, any, f32, nCw16c, fmt_order_keep)) +#endif + REG_REORDER_P(REG_SR(bf16, any, f32, nCw16c, fmt_order_reverse)) REG_REORDER_P(REG_SR_BIDIR(bf16, any, f32, nChw16c)) REG_REORDER_P(REG_SR_BIDIR(bf16, any, f32, nCdhw16c)) - +#ifdef ENABLE_UNUSED_PRIM REG_REORDER_P(REG_SR_BIDIR(bf16, any, s8, nChw16c)) REG_REORDER_P(REG_SR_BIDIR(bf16, any, s8, nCdhw16c)) REG_REORDER_P(REG_SR_BIDIR(bf16, any, u8, nChw16c)) REG_REORDER_P(REG_SR_BIDIR(bf16, any, u8, nCdhw16c)) - - REG_REORDER_P(REG_SR_BIDIR(bf16, any, bf16, nCw16c)) +#endif + REG_REORDER_P(REG_SR(bf16, any, bf16, nCw16c, fmt_order_keep)) +#ifdef ENABLE_UNUSED_PRIM + REG_REORDER_P(REG_SR(bf16, any, bf16, nCw16c, fmt_order_reverse)) +#endif REG_REORDER_P(REG_SR_BIDIR(bf16, any, bf16, nChw16c)) REG_REORDER_P(REG_SR_BIDIR(bf16, any, bf16, nCdhw16c)) - +#ifdef ENABLE_UNUSED_PRIM REG_REORDER_P(REG_SR_BIDIR(bf16, any, f32, OIdhw16o16i)) REG_REORDER_P(REG_SR_BIDIR(bf16, any, f32, OIdhw16i16o)) @@ -52,11 +57,13 @@ const impl_list_map_t regular_bf16_impl_list_map { REG_REORDER_P(REG_SR_BIDIR(bf16, any, u8, OIdhw16o16i)) REG_REORDER_P(REG_SR_BIDIR(bf16, any, u8, OIdhw16i16o)) - +#endif REG_REORDER_P(REG_SR(bf16, any, bf16, any, fmt_order_any, spec_reference)) REG_REORDER_P(REG_SR(bf16, any, f32, any, fmt_order_any, spec_reference)) +#ifdef ENABLE_UNUSED_PRIM REG_REORDER_P(REG_SR(bf16, any, s8, any, fmt_order_any, spec_reference)) REG_REORDER_P(REG_SR(bf16, any, u8, any, fmt_order_any, spec_reference)) +#endif nullptr, }}, diff --git a/src/cpu/reorder/cpu_reorder_regular_f32_bf16.cpp b/src/cpu/reorder/cpu_reorder_regular_f32_bf16.cpp index 7559a9ac9dc..c8d522f2214 100644 --- a/src/cpu/reorder/cpu_reorder_regular_f32_bf16.cpp +++ b/src/cpu/reorder/cpu_reorder_regular_f32_bf16.cpp @@ -30,20 +30,32 @@ const impl_list_map_t regular_f32_bf16_impl_list_map { REG_REORDER_P(DNNL_X64_ONLY(CPU_REORDER_INSTANCE(x64_jit_blk_reorder_t))) REG_REORDER_P(DNNL_X64_ONLY(CPU_REORDER_INSTANCE(x64_jit_uni_reorder_t))) -// todo: [AV] disable unused simple reorder primitives - REG_REORDER_P(REG_SR_BIDIR(f32, ncw, bf16, nCw16c)) - REG_REORDER_P(REG_SR_BIDIR(f32, nchw, bf16, nChw16c)) + REG_REORDER_P(REG_SR(f32, ncw, bf16, nCw16c, fmt_order_keep)) +#ifdef ENABLE_UNUSED_PRIM + REG_REORDER_P(REG_SR(f32, ncw, bf16, nCw16c, fmt_order_reverse)) +#endif + REG_REORDER_P(REG_SR(f32, nchw, bf16, nChw16c, fmt_order_keep)) +#ifdef ENABLE_UNUSED_PRIM + REG_REORDER_P(REG_SR(f32, nchw, bf16, nChw16c, fmt_order_reverse)) +#endif REG_REORDER_P(REG_SR_BIDIR(f32, any, bf16, nChw16c)) - REG_REORDER_P(REG_SR_BIDIR(f32, any, bf16, nCdhw16c)) + REG_REORDER_P(REG_SR(f32, any, bf16, nCdhw16c, fmt_order_keep)) +#ifdef ENABLE_UNUSED_PRIM + REG_REORDER_P(REG_SR(f32, any, bf16, nCdhw16c, fmt_order_reverse)) +#endif REG_REORDER_P(REG_SR(f32, oihw, bf16, OIhw8i16o2i, fmt_order_keep)) +#ifdef ENABLE_UNUSED_PRIM REG_REORDER_P(REG_SR(f32, goihw, bf16, gOIhw8i16o2i, fmt_order_keep)) +#endif REG_REORDER_P(REG_SR(f32, oihw, bf16, OIhw8o16i2o, fmt_order_keep)) +#ifdef ENABLE_UNUSED_PRIM REG_REORDER_P(REG_SR(f32, goihw, bf16, gOIhw8o16i2o, fmt_order_keep)) REG_REORDER_P(REG_SR(f32, oihw, bf16, IOhw8o16i2o, fmt_order_keep)) REG_REORDER_P(REG_SR(f32, goihw, bf16, gIOhw8o16i2o, fmt_order_keep)) REG_REORDER_P(REG_SR(f32, oihw, bf16, OIhw16i16o, fmt_order_keep)) REG_REORDER_P(REG_SR(f32, goihw, bf16, gOIhw16i16o, fmt_order_keep)) +#endif REG_REORDER_P(REG_SR(f32, any, bf16, any, fmt_order_any, spec_reference)) diff --git a/src/cpu/reorder/cpu_reorder_regular_u8.cpp b/src/cpu/reorder/cpu_reorder_regular_u8.cpp index 4a5bf510aaa..2c5fdfe1427 100644 --- a/src/cpu/reorder/cpu_reorder_regular_u8.cpp +++ b/src/cpu/reorder/cpu_reorder_regular_u8.cpp @@ -57,10 +57,8 @@ const impl_list_map_t regular_u8_impl_list_map { REG_REORDER_P(REG_SR_BIDIR(u8, any, s32, nChw16c)) REG_REORDER_P(REG_SR_BIDIR(u8, any, bf16, nChw16c)) REG_REORDER_P(REG_SR_BIDIR(u8, any, s8, nChw16c)) - - REG_REORDER_P(REG_SR(u8, any, u8, nChw16c, fmt_order_keep)) #endif - REG_REORDER_P(REG_SR(u8, any, u8, nChw16c, fmt_order_reverse)) + REG_REORDER_P(REG_SR_BIDIR(u8, any, u8, nChw16c)) REG_REORDER_P(REG_SR(u8, any, f32, nChw8c, fmt_order_keep)) #ifdef ENABLE_UNUSED_PRIM REG_REORDER_P(REG_SR(u8, any, f32, nChw8c, fmt_order_reverse))