From 6224088c823c00a51a1f8053f39ae0bda7e8560a Mon Sep 17 00:00:00 2001 From: "River.Li" Date: Fri, 20 Dec 2024 09:46:27 +0800 Subject: [PATCH] [GPU][MTL] Resolve long token performance regression in MTL 125H platform PR27831 enable MLP fusion in cldnn, it can improve performance, but it is not enabled in MTL 125H due to EU number is 112. So there should be no performance improvement, but PR26940, which integrate dynamic quantization, causes MTL 125H performance drop about 10% for 6K input token size. If we enable MLP fusion in MTL 125H, the performance regression will disappear. --- .../src/graph/graph_optimizer/prepare_primitive_fusing.cpp | 2 +- src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_primitive_fusing.cpp b/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_primitive_fusing.cpp index 05f907dcd81f0a..b12e6a81ffadae 100644 --- a/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_primitive_fusing.cpp +++ b/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_primitive_fusing.cpp @@ -169,7 +169,7 @@ void prepare_primitive_fusing::fuse_swiglu(program &p) { GPU_DEBUG_IF(debug_config->disable_fc_swiglu_fusion == 1) disable_fc_swiglu_fusion = true; // Apply only for high performant GPU - if (disable_fc_swiglu_fusion || p.get_engine().get_device_info().execution_units_count < 128) + if (disable_fc_swiglu_fusion || p.get_engine().get_device_info().execution_units_count < 112) return; if (p.get_engine().get_device_info().supports_immad) diff --git a/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp b/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp index 44d68740a0dfb7..415faba3670a5c 100644 --- a/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp +++ b/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp @@ -926,7 +926,7 @@ void TransformationsPipeline::apply(std::shared_ptr func) { disable_fc_swiglu_fusion = true; // mlp fusion is only supported for cldnn on high performant GPUis bool fuse_mlp_swiglu = !device_info.supports_immad && - device_info.execution_units_count >= 128 && + device_info.execution_units_count >= 112 && !disable_fc_swiglu_fusion; if (!disable_horizontal_fc_fusion) manager.register_pass(fuse_mlp_swiglu);