From 9af94acf73a583d9dbe548b443baab25a1292ef8 Mon Sep 17 00:00:00 2001 From: Chon Ming Lee Date: Thu, 14 Nov 2024 13:47:37 +0800 Subject: [PATCH] Add more mixed type of bfyx to eltwise_blocked_opt Improve yolo_v5m peformance by using eltwise_blocked_opt for batchNormalization which help for platforms has XMX Signed-off-by: Chon Ming Lee --- .../eltwise/eltwise_kernel_blocked_opt.cpp | 18 +++++++++++++++--- .../tests/unit/test_cases/eltwise_gpu_test.cpp | 10 ++++++++++ 2 files changed, 25 insertions(+), 3 deletions(-) diff --git a/src/plugins/intel_gpu/src/kernel_selector/kernels/eltwise/eltwise_kernel_blocked_opt.cpp b/src/plugins/intel_gpu/src/kernel_selector/kernels/eltwise/eltwise_kernel_blocked_opt.cpp index 411e6878ebd72e..d95b008171db24 100644 --- a/src/plugins/intel_gpu/src/kernel_selector/kernels/eltwise/eltwise_kernel_blocked_opt.cpp +++ b/src/plugins/intel_gpu/src/kernel_selector/kernels/eltwise/eltwise_kernel_blocked_opt.cpp @@ -112,7 +112,7 @@ bool EltwiseKernel_blocked_opt::Validate(const Params& params) const { } const auto vec_size = SelectVecSizeFromFormat(ewParams.outputs[0]); - const auto input0 = ewParams.inputs[0]; + const auto& input0 = ewParams.inputs[0]; const auto& output = ewParams.outputs[0]; // Check that padding before features doesn't mis-align the blocks if (input0.Feature().pad.before % vec_size != 0 || output.Feature().pad.before % vec_size != 0) @@ -137,11 +137,22 @@ bool EltwiseKernel_blocked_opt::Validate(const Params& params) const { }; for (size_t i = 1; i < ewParams.inputs.size(); i++) { - if (ewParams.inputs[i].LogicalSize() == input0.LogicalSize() && !(compareTensors(ewParams.inputs[i], input0))) + const auto& input = ewParams.inputs[i]; + if (input.LogicalSize() == input0.LogicalSize() && !(compareTensors(input, input0))) return false; - if (ewParams.inputs[i].Feature().pad.before % vec_size != 0) { + if (input.Feature().pad.before % vec_size != 0) { return false; } + if (input.GetLayout() == DataLayout::bfyx) { + bool is_valid = input.LogicalSize() == 1; // Scalar value broadcast + is_valid |= input.LogicalSize() % vec_size == 0 && // Feature value broadcast + input.LogicalSize() == input.Feature().v && + input.LogicalSize() == output.Feature().v && + GetInnerBatchBlockSize(input) == 1; + if (!is_valid) { + return false; + } + } } return true; @@ -422,6 +433,7 @@ static inline int SelectVecSizeFromFormat(const DataTensor& tensor) { static inline int GetInnerBatchBlockSize(const DataTensor& tensor) { auto layout = tensor.GetLayout(); switch (layout) { + case DataLayout::bfyx: case DataLayout::b_fs_yx_fsv4: case DataLayout::b_fs_yx_fsv16: case DataLayout::b_fs_zyx_fsv16: diff --git a/src/plugins/intel_gpu/tests/unit/test_cases/eltwise_gpu_test.cpp b/src/plugins/intel_gpu/tests/unit/test_cases/eltwise_gpu_test.cpp index 8d67bc3f7db2ad..c6f39b15ea532a 100644 --- a/src/plugins/intel_gpu/tests/unit/test_cases/eltwise_gpu_test.cpp +++ b/src/plugins/intel_gpu/tests/unit/test_cases/eltwise_gpu_test.cpp @@ -4711,6 +4711,11 @@ struct eltwise_layout_test_params { #define CASE_ELTWISE_TEST7 eltwise_mode::sum, {4, 5, 4, 1}, {4, 1, 4, 1}, format::bfyx, format::b_fs_yx_fsv16, "generic_eltwise_ref" #define CASE_ELTWISE_TEST8 eltwise_mode::sum, {4, 2, 4, 4}, {1, 1, 1, 1}, format::bfyx, format::b_fs_yx_fsv16, "generic_eltwise_ref" #define CASE_ELTWISE_TEST9 eltwise_mode::eq, {4, 2, 4, 4}, {1, 1, 1, 1}, format::b_fs_yx_fsv16, format::bfyx, "generic_eltwise_ref" +#define CASE_ELTWISE_TEST10 eltwise_mode::sum, {4, 8, 1, 1}, {1, 8, 1, 1}, format::b_fs_yx_fsv32, format::bfyx, "eltwise_blocked_opt" +#define CASE_ELTWISE_TEST11 eltwise_mode::sum, {4, 8, 1, 1}, {1, 8, 1, 1}, format::b_fs_yx_fsv16, format::bfyx, "eltwise_blocked_opt" +#define CASE_ELTWISE_TEST12 eltwise_mode::sum, {4, 16, 4, 4}, {1, 16, 1, 1}, format::b_fs_yx_fsv16, format::bfyx, "eltwise_blocked_opt" +#define CASE_ELTWISE_TEST13 eltwise_mode::sum, {4, 7, 4, 4}, {1, 7, 1, 1}, format::b_fs_yx_fsv16, format::bfyx, "generic_eltwise_ref" +#define CASE_ELTWISE_TEST14 eltwise_mode::sum, {1, 8, 1, 1}, {4, 8, 1, 1}, format::bfyx, format::b_fs_yx_fsv32, "generic_eltwise_ref" class eltwise_layout_test : public BaseEltwiseTest { public: @@ -4800,6 +4805,11 @@ INSTANTIATE_TEST_SUITE_P(eltwise, eltwise_test_mixed_layout, eltwise_layout_test_params{CASE_ELTWISE_TEST7}, eltwise_layout_test_params{CASE_ELTWISE_TEST8}, eltwise_layout_test_params{CASE_ELTWISE_TEST9}, + eltwise_layout_test_params{CASE_ELTWISE_TEST10}, + eltwise_layout_test_params{CASE_ELTWISE_TEST11}, + eltwise_layout_test_params{CASE_ELTWISE_TEST12}, + eltwise_layout_test_params{CASE_ELTWISE_TEST13}, + eltwise_layout_test_params{CASE_ELTWISE_TEST14}, })); //