From fcec45d33a40b7e699cc6c18acb8458f880df39b Mon Sep 17 00:00:00 2001 From: Alexander Nesterov Date: Thu, 14 Nov 2024 19:45:43 +0100 Subject: [PATCH 1/4] Test wino perf --- src/cpu/acl/acl_convolution_utils.cpp | 14 -------------- 1 file changed, 14 deletions(-) diff --git a/src/cpu/acl/acl_convolution_utils.cpp b/src/cpu/acl/acl_convolution_utils.cpp index b22ff7a373c..c1f67ad10ea 100644 --- a/src/cpu/acl/acl_convolution_utils.cpp +++ b/src/cpu/acl/acl_convolution_utils.cpp @@ -353,20 +353,6 @@ status_t init_conf_wino(acl_conv_conf_t &acp, memory_desc_t &src_md, memory_desc_t &weights_md, memory_desc_t &dst_md, memory_desc_t &bias_md, const convolution_desc_t &cd, const primitive_attr_t &attr) { - - // Under these conditions, fallback to faster GEMM-based convolution - // unless the user explicitly specifies Winograd algorithm - // clang-format off - if (one_of(true, src_md.dims[2] > 112, // ih - src_md.dims[3] > 112, // iw - src_md.dims[1] < 64, // ic - dst_md.dims[1] < 64, // oc - dnnl_get_max_threads() > 28) - && cd.alg_kind == alg_kind::convolution_auto) { - return status::unimplemented; - } - // clang-format on - // General Compute Library checks, memory tags are also set there acp.alg_winograd = true; CHECK(acl_init_conf(acp, src_md, weights_md, dst_md, bias_md, cd, attr)); From e79990f39be8ccfdaafdb56c90d410bf38692d63 Mon Sep 17 00:00:00 2001 From: Alexander Nesterov Date: Wed, 20 Nov 2024 19:17:22 +0100 Subject: [PATCH 2/4] Update comparison --- src/cpu/acl/acl_convolution_utils.cpp | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/src/cpu/acl/acl_convolution_utils.cpp b/src/cpu/acl/acl_convolution_utils.cpp index c1f67ad10ea..00c0f250f64 100644 --- a/src/cpu/acl/acl_convolution_utils.cpp +++ b/src/cpu/acl/acl_convolution_utils.cpp @@ -353,6 +353,18 @@ status_t init_conf_wino(acl_conv_conf_t &acp, memory_desc_t &src_md, memory_desc_t &weights_md, memory_desc_t &dst_md, memory_desc_t &bias_md, const convolution_desc_t &cd, const primitive_attr_t &attr) { + + // Under these conditions, fallback to faster GEMM-based convolution + // unless the user explicitly specifies Winograd algorithm + // clang-format off + if (one_of(true, src_md.dims[1] < 64, // ic + dst_md.dims[1] < 64, // oc + dnnl_get_max_threads() > 28) + && cd.alg_kind == alg_kind::convolution_auto) { + return status::unimplemented; + } + // clang-format on + // General Compute Library checks, memory tags are also set there acp.alg_winograd = true; CHECK(acl_init_conf(acp, src_md, weights_md, dst_md, bias_md, cd, attr)); From 5827c4d42a7e459d0aa16acee3c98079cb2e6f42 Mon Sep 17 00:00:00 2001 From: Alexander Nesterov Date: Tue, 3 Dec 2024 20:12:44 +0100 Subject: [PATCH 3/4] Update ih and iw --- src/cpu/acl/acl_convolution_utils.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/cpu/acl/acl_convolution_utils.cpp b/src/cpu/acl/acl_convolution_utils.cpp index 00c0f250f64..fb7f5a330a1 100644 --- a/src/cpu/acl/acl_convolution_utils.cpp +++ b/src/cpu/acl/acl_convolution_utils.cpp @@ -357,7 +357,9 @@ status_t init_conf_wino(acl_conv_conf_t &acp, memory_desc_t &src_md, // Under these conditions, fallback to faster GEMM-based convolution // unless the user explicitly specifies Winograd algorithm // clang-format off - if (one_of(true, src_md.dims[1] < 64, // ic + if (one_of(true, src_md.dims[2] < 32, // ih + src_md.dims[3] < 32, // iw + src_md.dims[1] < 64, // ic dst_md.dims[1] < 64, // oc dnnl_get_max_threads() > 28) && cd.alg_kind == alg_kind::convolution_auto) { From d82ec002d7c870c3934eb667d7e779345ba8a18c Mon Sep 17 00:00:00 2001 From: Alexander Nesterov Date: Tue, 3 Dec 2024 20:15:09 +0100 Subject: [PATCH 4/4] Remove threads limitations --- src/cpu/acl/acl_convolution_utils.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/cpu/acl/acl_convolution_utils.cpp b/src/cpu/acl/acl_convolution_utils.cpp index fb7f5a330a1..258743fc50c 100644 --- a/src/cpu/acl/acl_convolution_utils.cpp +++ b/src/cpu/acl/acl_convolution_utils.cpp @@ -360,8 +360,7 @@ status_t init_conf_wino(acl_conv_conf_t &acp, memory_desc_t &src_md, if (one_of(true, src_md.dims[2] < 32, // ih src_md.dims[3] < 32, // iw src_md.dims[1] < 64, // ic - dst_md.dims[1] < 64, // oc - dnnl_get_max_threads() > 28) + dst_md.dims[1] < 64) // oc && cd.alg_kind == alg_kind::convolution_auto) { return status::unimplemented; }