From fe528a62b28270404857c7a7d52c03aac60355d2 Mon Sep 17 00:00:00 2001 From: Kelvin Choi Date: Tue, 24 Dec 2024 15:54:39 +0900 Subject: [PATCH] [GPU] Add cpu impl for resample --- samples/cpp/benchmark_app/inputs_filling.cpp | 8 +- samples/cpp/benchmark_app/utils.cpp | 1 + .../src/graph/impls/cpu/register.cpp | 1 + .../src/graph/impls/cpu/register.hpp | 1 + .../src/graph/impls/cpu/resample.cpp | 208 ++++++++++++++++++ .../src/graph/impls/registry/registry.hpp | 3 +- .../dynamic/interpolate.cpp | 38 ++++ 7 files changed, 255 insertions(+), 5 deletions(-) create mode 100644 src/plugins/intel_gpu/src/graph/impls/cpu/resample.cpp diff --git a/samples/cpp/benchmark_app/inputs_filling.cpp b/samples/cpp/benchmark_app/inputs_filling.cpp index d77dafaa803aa6..7a2ab0ac58c22c 100644 --- a/samples/cpp/benchmark_app/inputs_filling.cpp +++ b/samples/cpp/benchmark_app/inputs_filling.cpp @@ -778,10 +778,10 @@ std::map get_tensors(std::map get_inputs_info(const std::string& shape_ // Precision info.type = item.get_element_type(); + info.type = ov::element::Type_t::f32; // Partial Shape if (shape_map.count(name)) { if (shape_map.at(name).size() > 1) { diff --git a/src/plugins/intel_gpu/src/graph/impls/cpu/register.cpp b/src/plugins/intel_gpu/src/graph/impls/cpu/register.cpp index 2b0dc5b212158c..e2cde9a32fb16c 100644 --- a/src/plugins/intel_gpu/src/graph/impls/cpu/register.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/cpu/register.cpp @@ -31,6 +31,7 @@ void register_implementations() { REGISTER_CPU(tile); REGISTER_CPU(select); REGISTER_CPU(reduce); + REGISTER_CPU(resample); } } // namespace cpu diff --git a/src/plugins/intel_gpu/src/graph/impls/cpu/register.hpp b/src/plugins/intel_gpu/src/graph/impls/cpu/register.hpp index cb89eae29d8c56..b4cf2b42d5200f 100644 --- a/src/plugins/intel_gpu/src/graph/impls/cpu/register.hpp +++ b/src/plugins/intel_gpu/src/graph/impls/cpu/register.hpp @@ -56,6 +56,7 @@ REGISTER_CPU(broadcast); REGISTER_CPU(tile); REGISTER_CPU(select); REGISTER_CPU(reduce); +REGISTER_CPU(resample); #undef REGISTER_CPU diff --git a/src/plugins/intel_gpu/src/graph/impls/cpu/resample.cpp b/src/plugins/intel_gpu/src/graph/impls/cpu/resample.cpp new file mode 100644 index 00000000000000..5b3d53255336e9 --- /dev/null +++ b/src/plugins/intel_gpu/src/graph/impls/cpu/resample.cpp @@ -0,0 +1,208 @@ +// Copyright (C) 2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "impls/cpu/cpu_impl_helpers.hpp" +#include "register.hpp" +#include "resample_inst.h" +#include "impls/registry/implementation_map.hpp" + +#include "openvino/op/interpolate.hpp" + +#include +#include +#include +#include + +namespace cldnn { +namespace cpu { + +namespace { + + + +} // namespace + +struct resample_impl : public typed_primitive_impl { + using parent = typed_primitive_impl; + using parent::parent; + + using InterpolateMode = ov::op::v4::Interpolate::InterpolateMode; + using CoordinateTransformMode = ov::op::v4::Interpolate::CoordinateTransformMode; + using Nearest_mode = ov::op::v4::Interpolate::NearestMode; + using InterpolateAttrs = ov::op::v4::Interpolate::InterpolateAttrs; + using ShapeCalcMode = ov::op::v4::Interpolate::ShapeCalcMode; + + std::shared_ptr op; + + std::vector sizes; + std::vector scales; + std::vector axes; + std::vector pads_begin; + std::vector pads_end; + InterpolateMode operation_type = InterpolateMode::LINEAR; + ShapeCalcMode shape_calc_mode = ShapeCalcMode::SIZES; + int32_t antialias = 0; + float cube_coeff = -0.75f; + CoordinateTransformMode coord_trans_mode = CoordinateTransformMode::HALF_PIXEL; + Nearest_mode round_mode = Nearest_mode::ROUND_PREFER_FLOOR; + + DECLARE_OBJECT_TYPE_SERIALIZATION(cldnn::cpu::resample_impl) + + std::unique_ptr clone() const override { + return make_unique(*this); + } + + resample_impl() : parent("resample_cpu_impl") {} + + explicit resample_impl(const resample_node& outer) { + set_node_params(outer); + } + + void set_node_params(const program_node& arg) override { + OPENVINO_ASSERT(arg.is_type(), "[GPU] Incorrect program_node type"); + const auto& node = arg.as(); + + sizes = node.get_primitive()->sizes; + scales = node.get_primitive()->scales; + axes = node.get_primitive()->axes; + + pads_begin = node.get_primitive()->pads_begin; + pads_end = node.get_primitive()->pads_end; + operation_type = node.get_primitive()->operation_type; + shape_calc_mode = node.get_primitive()->shape_calc_mode; + antialias = node.get_primitive()->antialias; + cube_coeff = node.get_primitive()->cube_coeff; + coord_trans_mode = node.get_primitive()->coord_trans_mode; + round_mode = node.get_primitive()->round_mode; + } + + // void save(BinaryOutputBuffer& ob) const override { + // parent::save(ob); + // ob << make_data(&mode, sizeof(eltwise_mode)); + // ob << coefficients; + // } + + // void load(BinaryInputBuffer& ib) override { + // parent::load(ib); + // ib >> make_data(&mode, sizeof(eltwise_mode)); + // ib >> coefficients; + // } + + event::ptr execute_impl(const std::vector& events, resample_inst& instance) override { + OV_ITT_SCOPED_TASK(ov::intel_gpu::itt::domains::intel_gpu_plugin, "resample::execute_impl"); + auto& stream = instance.get_network().get_stream(); + + const bool pass_through_events = (stream.get_queue_type() == QueueTypes::out_of_order) && instance.all_dependencies_cpu_impl(); + + if (!pass_through_events) { + stream.wait_for_events(events); + } + + auto params = instance.get_impl_params(); + + // Set input tensors + ov::TensorVector input_host_tensors; + auto input_mem_ptr = instance.input_memory_ptr(); + cldnn::mem_lock input_lock(input_mem_ptr, stream); + + for (size_t i = 0; i < params->input_layouts.size(); i++) { + auto input_tensor = make_tensor(params->input_layouts[0], input_lock.data()); + input_host_tensors.push_back(input_tensor); + } + + if (input_host_tensors.size() == 1) { + auto target_shape_sizes = params->output_layouts[0].get_tensor().sizes(); + std::vector target_shape_ps; + for (size_t i = 0; i < axes.size(); i++) + target_shape_ps.push_back(target_shape_sizes[i]); + + auto target_shape_tensor = ov::Tensor(ov::element::i32, {target_shape_ps.size()}, target_shape_ps.data()); + input_host_tensors.push_back(target_shape_tensor); + + if (shape_calc_mode == ov::op::util::InterpolateBase::ShapeCalcMode::SIZES) { + auto new_scales = scales; + auto input_shape_sizes = params->input_layouts[0].get_tensor().sizes(); + for (size_t i = 0; i < sizes.size(); i++) + new_scales[i] = sizes[i] / input_shape_sizes[i]; + + auto scales_tensor = ov::Tensor(ov::element::f32, {new_scales.size()}, new_scales.data()); + input_host_tensors.push_back(scales_tensor); + shape_calc_mode = ov::op::util::InterpolateBase::ShapeCalcMode::SCALES; + } else if (shape_calc_mode == ov::op::util::InterpolateBase::ShapeCalcMode::SCALES) { + auto scales_tensor = ov::Tensor(ov::element::f32, {scales.size()}, scales.data()); + input_host_tensors.push_back(scales_tensor); + } else { + OPENVINO_ASSERT(false, "[GPU] Not supported Interpolate ShapeCalcMode", instance.id()); + } + + auto axes_tensor = ov::Tensor(ov::element::i64, {axes.size()}, axes.data()); + input_host_tensors.push_back(axes_tensor); + } + + // set output tensors + ov::TensorVector output_host_tensors; + auto output_mem_ptr = instance.output_memory_ptr(); + cldnn::mem_lock output_lock(output_mem_ptr, stream); + + auto output_tensor = make_tensor(params->output_layouts[0], output_lock.data()); + output_host_tensors.push_back(output_tensor); + + // Set Attrs + InterpolateAttrs attrs; + attrs.mode = operation_type; + attrs.shape_calculation_mode = shape_calc_mode; + attrs.pads_begin = pads_begin; + attrs.pads_end = pads_end; + attrs.coordinate_transformation_mode = coord_trans_mode; + attrs.nearest_mode = round_mode; + attrs.antialias = antialias; + attrs.cube_coeff = cube_coeff; + + if (!op) { + auto interp = std::make_shared(); + interp->set_attrs(attrs); + op = interp; + } + + OPENVINO_ASSERT(op->evaluate(output_host_tensors, input_host_tensors), + "[GPU] Couldn't execute resample primitive with id ", instance.id()); + + if (pass_through_events) { + return stream.group_events(events); + } + + return make_output_event(stream, instance.is_output()); + } + + void init_kernels(const kernels_cache& , const kernel_impl_params&) override {} + + void update(primitive_inst& inst, const kernel_impl_params& impl_param) override {} + +public: + static std::unique_ptr create(const resample_node& arg, const kernel_impl_params& impl_param) { + return make_unique(); + } +}; + + +namespace detail { + +attach_resample_impl::attach_resample_impl() { + auto formats = { + format::bfyx, + }; + + auto types = { + data_types::f32, + }; + + implementation_map::add(impl_types::cpu, shape_types::static_shape, resample_impl::create, types, formats); + implementation_map::add(impl_types::cpu, shape_types::dynamic_shape, resample_impl::create, types, formats); +} + +} // namespace detail +} // namespace cpu +} // namespace cldnn + +BIND_BINARY_BUFFER_WITH_TYPE(cldnn::cpu::resample_impl) diff --git a/src/plugins/intel_gpu/src/graph/impls/registry/registry.hpp b/src/plugins/intel_gpu/src/graph/impls/registry/registry.hpp index b7dbbaef6e64f1..ca4e2eef56c5da 100644 --- a/src/plugins/intel_gpu/src/graph/impls/registry/registry.hpp +++ b/src/plugins/intel_gpu/src/graph/impls/registry/registry.hpp @@ -139,6 +139,7 @@ REGISTER_IMPLS(reshape); REGISTER_IMPLS(non_max_suppression); REGISTER_IMPLS(softmax); REGISTER_IMPLS(range); +//REGISTER_IMPLS(resample); REGISTER_IMPLS(select); REGISTER_IMPLS(scatter_update); REGISTER_IMPLS(scatter_elements_update); @@ -200,7 +201,7 @@ REGISTER_DEFAULT_IMPLS(space_to_batch, OCL_S); REGISTER_DEFAULT_IMPLS(space_to_depth, OCL_S); REGISTER_DEFAULT_IMPLS(swiglu, OCL_S, OCL_D); REGISTER_DEFAULT_IMPLS(gather_tree, OCL_S); -REGISTER_DEFAULT_IMPLS(resample, OCL_S); +REGISTER_DEFAULT_IMPLS(resample, CPU_S, OCL_S); REGISTER_DEFAULT_IMPLS(grn, OCL_S); REGISTER_DEFAULT_IMPLS(ctc_greedy_decoder, OCL_S); REGISTER_DEFAULT_IMPLS(ctc_loss, OCL_S); diff --git a/src/plugins/intel_gpu/tests/functional/single_layer_tests/dynamic/interpolate.cpp b/src/plugins/intel_gpu/tests/functional/single_layer_tests/dynamic/interpolate.cpp index 133a515fe58104..cbabf2edfaefbb 100644 --- a/src/plugins/intel_gpu/tests/functional/single_layer_tests/dynamic/interpolate.cpp +++ b/src/plugins/intel_gpu/tests/functional/single_layer_tests/dynamic/interpolate.cpp @@ -545,6 +545,44 @@ INSTANTIATE_TEST_SUITE_P(InterpolateLinear_Layout_Test, InterpolateLayerGPUTest, ::testing::Values(true, false)), InterpolateLayerGPUTest::getTestCaseName); + +const std::vector shapeParams4D_LargeShape = { + ShapeParams{ + ov::op::v4::Interpolate::ShapeCalcMode::SCALES, + //InputShape{{-1, {2, 100}, -1, -1}, {{1, 64, 148, 148}}}, + InputShape{{-1, -1, -1, -1}, {{1, 3, 48, 48}}}, // min shape for failure + ov::test::utils::InputLayerType::CONSTANT, + ov::test::utils::InputLayerType::CONSTANT, + {{1.f, 1.f, 2.f, 2.f}}, + defaultAxes4D.front() + }, + // ShapeParams{ + // ov::op::v4::Interpolate::ShapeCalcMode::SIZES, + // InputShape{{-1, -1, -1, -1}, {{1, 3, 48, 48}}}, + // ov::test::utils::InputLayerType::CONSTANT, + // ov::test::utils::InputLayerType::CONSTANT, + // {{1, 3, 144, 144}}, + // defaultAxes4D.front() + // }, +}; + +const auto interpolateCasesLinearOnnx_AlignCorners_Floor = ::testing::Combine( + ::testing::Values(ov::op::v4::Interpolate::InterpolateMode::LINEAR_ONNX), + ::testing::Values(ov::op::v4::Interpolate::CoordinateTransformMode::ALIGN_CORNERS), + ::testing::Values(ov::op::v4::Interpolate::NearestMode::FLOOR), + ::testing::ValuesIn(antialias), + ::testing::Values(std::vector{0, 0, 0, 0}), + ::testing::Values(std::vector{0, 0, 0, 0}), + ::testing::ValuesIn(cubeCoefs)); + +INSTANTIATE_TEST_SUITE_P(InterpolateLinearOnnx_LargeShape_Layout_Test, InterpolateLayerGPUTest, + ::testing::Combine( + interpolateCasesLinearOnnx_AlignCorners_Floor, + ::testing::ValuesIn(shapeParams4D_LargeShape), + ::testing::Values(ov::element::f32), + ::testing::Values(true)), + InterpolateLayerGPUTest::getTestCaseName); + const auto interpolateCasesCubic_Smoke = ::testing::Combine( ::testing::Values(ov::op::v4::Interpolate::InterpolateMode::CUBIC), ::testing::ValuesIn(coordinateTransformModes_Smoke),