diff --git a/samples/cpp/benchmark_app/inputs_filling.cpp b/samples/cpp/benchmark_app/inputs_filling.cpp index d77dafaa803aa6..7a2ab0ac58c22c 100644 --- a/samples/cpp/benchmark_app/inputs_filling.cpp +++ b/samples/cpp/benchmark_app/inputs_filling.cpp @@ -778,10 +778,10 @@ std::map get_tensors(std::map get_inputs_info(const std::string& shape_ // Precision info.type = item.get_element_type(); + info.type = ov::element::Type_t::f32; // Partial Shape if (shape_map.count(name)) { if (shape_map.at(name).size() > 1) { diff --git a/src/plugins/intel_gpu/src/graph/impls/cpu/register.cpp b/src/plugins/intel_gpu/src/graph/impls/cpu/register.cpp index 2b0dc5b212158c..e2cde9a32fb16c 100644 --- a/src/plugins/intel_gpu/src/graph/impls/cpu/register.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/cpu/register.cpp @@ -31,6 +31,7 @@ void register_implementations() { REGISTER_CPU(tile); REGISTER_CPU(select); REGISTER_CPU(reduce); + REGISTER_CPU(resample); } } // namespace cpu diff --git a/src/plugins/intel_gpu/src/graph/impls/cpu/register.hpp b/src/plugins/intel_gpu/src/graph/impls/cpu/register.hpp index cb89eae29d8c56..b4cf2b42d5200f 100644 --- a/src/plugins/intel_gpu/src/graph/impls/cpu/register.hpp +++ b/src/plugins/intel_gpu/src/graph/impls/cpu/register.hpp @@ -56,6 +56,7 @@ REGISTER_CPU(broadcast); REGISTER_CPU(tile); REGISTER_CPU(select); REGISTER_CPU(reduce); +REGISTER_CPU(resample); #undef REGISTER_CPU diff --git a/src/plugins/intel_gpu/src/graph/impls/cpu/resample.cpp b/src/plugins/intel_gpu/src/graph/impls/cpu/resample.cpp new file mode 100644 index 00000000000000..8ecd1c98f26f9e --- /dev/null +++ b/src/plugins/intel_gpu/src/graph/impls/cpu/resample.cpp @@ -0,0 +1,218 @@ +// Copyright (C) 2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "impls/cpu/cpu_impl_helpers.hpp" +#include "register.hpp" +#include "resample_inst.h" +#include "impls/registry/implementation_map.hpp" + +#include "openvino/op/interpolate.hpp" + +#include +#include +#include +#include + +namespace cldnn { +namespace cpu { + +namespace { + + + +} // namespace + +struct resample_impl : public typed_primitive_impl { + using parent = typed_primitive_impl; + using parent::parent; + + using InterpolateMode = ov::op::v4::Interpolate::InterpolateMode; + using CoordinateTransformMode = ov::op::v4::Interpolate::CoordinateTransformMode; + using Nearest_mode = ov::op::v4::Interpolate::NearestMode; + using InterpolateAttrs = ov::op::v4::Interpolate::InterpolateAttrs; + using ShapeCalcMode = ov::op::v4::Interpolate::ShapeCalcMode; + + std::shared_ptr op; + + std::vector sizes; + std::vector scales; + std::vector axes; + std::vector pads_begin; + std::vector pads_end; + InterpolateMode operation_type = InterpolateMode::LINEAR; + ShapeCalcMode shape_calc_mode = ShapeCalcMode::SIZES; + int32_t antialias = 0; + float cube_coeff = -0.75f; + CoordinateTransformMode coord_trans_mode = CoordinateTransformMode::HALF_PIXEL; + Nearest_mode round_mode = Nearest_mode::ROUND_PREFER_FLOOR; + + DECLARE_OBJECT_TYPE_SERIALIZATION(cldnn::cpu::resample_impl) + + std::unique_ptr clone() const override { + return make_unique(*this); + } + + resample_impl() : parent("resample_cpu_impl") {} + + explicit resample_impl(const resample_node& outer) { + set_node_params(outer); + } + + void set_node_params(const program_node& arg) override { + OPENVINO_ASSERT(arg.is_type(), "[GPU] Incorrect program_node type"); + const auto& node = arg.as(); + + sizes = node.get_primitive()->sizes; + scales = node.get_primitive()->scales; + axes = node.get_primitive()->axes; + + pads_begin = node.get_primitive()->pads_begin; + pads_end = node.get_primitive()->pads_end; + operation_type = node.get_primitive()->operation_type; + shape_calc_mode = node.get_primitive()->shape_calc_mode; + antialias = node.get_primitive()->antialias; + cube_coeff = node.get_primitive()->cube_coeff; + coord_trans_mode = node.get_primitive()->coord_trans_mode; + round_mode = node.get_primitive()->round_mode; + } + + // void save(BinaryOutputBuffer& ob) const override { + // parent::save(ob); + // ob << make_data(&mode, sizeof(eltwise_mode)); + // ob << coefficients; + // } + + // void load(BinaryInputBuffer& ib) override { + // parent::load(ib); + // ib >> make_data(&mode, sizeof(eltwise_mode)); + // ib >> coefficients; + // } + + event::ptr execute_impl(const std::vector& events, resample_inst& instance) override { + OV_ITT_SCOPED_TASK(ov::intel_gpu::itt::domains::intel_gpu_plugin, "resample::execute_impl"); + auto& stream = instance.get_network().get_stream(); + + const bool pass_through_events = (stream.get_queue_type() == QueueTypes::out_of_order) && instance.all_dependencies_cpu_impl(); + + if (!pass_through_events) { + stream.wait_for_events(events); + } + + auto params = instance.get_impl_params(); + + // Set input tensors + ov::TensorVector input_host_tensors; + auto input_mem_ptr = instance.input_memory_ptr(); + cldnn::mem_lock input_lock(input_mem_ptr, stream); + + auto input_rank = params->input_layouts[0].get_rank(); + for (size_t i = 0; i < params->input_layouts.size(); i++) { + auto input_tensor = make_tensor(params->input_layouts[0], input_lock.data()); + input_host_tensors.push_back(input_tensor); + } + + if (scales.size() < input_rank) + scales.insert(scales.begin(), input_rank - scales.size(), 1.f); + + for(size_t i = (input_rank - axes.size()); i > 0; i--) + axes.insert(axes.begin(), 1, (i - 1)); + + if (input_host_tensors.size() == 1) { + auto target_shape_sizes = params->output_layouts[0].get_tensor().sizes(); + std::vector target_shape_ps; + for (size_t i = 0; i < input_rank; i++) + target_shape_ps.push_back(target_shape_sizes[i]); + + auto target_shape_tensor = ov::Tensor(ov::element::i32, {target_shape_ps.size()}, target_shape_ps.data()); + input_host_tensors.push_back(target_shape_tensor); + + if (shape_calc_mode == ov::op::util::InterpolateBase::ShapeCalcMode::SCALES) { + auto scales_tensor = ov::Tensor(ov::element::f32, {scales.size()}, scales.data()); + input_host_tensors.push_back(scales_tensor); + } else { + OPENVINO_ASSERT(false, "[GPU] Not supported Interpolate ShapeCalcMode of CPU impl", instance.id()); + } + + auto axes_tensor = ov::Tensor(ov::element::i64, {axes.size()}, axes.data()); + input_host_tensors.push_back(axes_tensor); + } + + // set output tensors + ov::TensorVector output_host_tensors; + auto output_mem_ptr = instance.output_memory_ptr(); + cldnn::mem_lock output_lock(output_mem_ptr, stream); + + auto output_tensor = make_tensor(params->output_layouts[0], output_lock.data()); + output_host_tensors.push_back(output_tensor); + + // Set Attrs + InterpolateAttrs attrs; + attrs.mode = operation_type; + attrs.shape_calculation_mode = shape_calc_mode; + attrs.pads_begin = pads_begin; + attrs.pads_end = pads_end; + attrs.coordinate_transformation_mode = coord_trans_mode; + attrs.nearest_mode = round_mode; + attrs.antialias = antialias; + attrs.cube_coeff = cube_coeff; + + if (!op) { + auto interp = std::make_shared(); + interp->set_attrs(attrs); + op = interp; + } + + OPENVINO_ASSERT(op->evaluate(output_host_tensors, input_host_tensors), + "[GPU] Couldn't execute resample primitive with id ", instance.id()); + + if (pass_through_events) { + return stream.group_events(events); + } + + return make_output_event(stream, instance.is_output()); + } + + void init_kernels(const kernels_cache& , const kernel_impl_params&) override {} + + void update(primitive_inst& inst, const kernel_impl_params& impl_param) override {} + +public: + static std::unique_ptr create(const resample_node& arg, const kernel_impl_params& impl_param) { + return make_unique(); + } +}; + + +namespace detail { + +attach_resample_impl::attach_resample_impl() { + const auto types = {data_types::f32, data_types::i32}; + const auto formats = { + format::bfyx, + format::b_fs_yx_fsv16, + format::b_fs_yx_fsv32, + format::bs_fs_yx_bsv16_fsv16, + format::bs_fs_yx_bsv32_fsv16, + format::bs_fs_yx_bsv32_fsv32, + + format::bfzyx, + format::b_fs_zyx_fsv16, + format::b_fs_zyx_fsv32, + format::bs_fs_zyx_bsv16_fsv32, + format::bs_fs_zyx_bsv16_fsv16, + format::bs_fs_zyx_bsv32_fsv32, + format::bs_fs_zyx_bsv32_fsv16, + }; + + implementation_map::add(impl_types::cpu, shape_types::static_shape, resample_impl::create, types, formats); + implementation_map::add(impl_types::cpu, shape_types::dynamic_shape, resample_impl::create, types, formats); +} + +} // namespace detail + + +} // namespace cpu +} // namespace cldnn + +BIND_BINARY_BUFFER_WITH_TYPE(cldnn::cpu::resample_impl) diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/register.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/register.cpp index 2a38d20ac8c9bc..fb7bf0a98c176e 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/register.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/register.cpp @@ -72,7 +72,6 @@ void register_implementations() { REGISTER_OCL(swiglu); REGISTER_OCL(tile); REGISTER_OCL(gather_tree); - REGISTER_OCL(resample); REGISTER_OCL(grn); REGISTER_OCL(ctc_greedy_decoder); REGISTER_OCL(ctc_loss); diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/register.hpp b/src/plugins/intel_gpu/src/graph/impls/ocl/register.hpp index c65a23822a6922..f78e539db671f5 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/register.hpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/register.hpp @@ -47,7 +47,6 @@ #include "intel_gpu/primitives/reduce.hpp" #include "intel_gpu/primitives/region_yolo.hpp" #include "intel_gpu/primitives/reorg_yolo.hpp" -#include "intel_gpu/primitives/resample.hpp" #include "intel_gpu/primitives/reshape.hpp" #include "intel_gpu/primitives/reverse_sequence.hpp" #include "intel_gpu/primitives/rms.hpp" diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/resample.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/resample.cpp index d1ef25dce6264a..a480617c1b690e 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/resample.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/resample.cpp @@ -4,6 +4,7 @@ #include "primitive_base.hpp" +#include "resample.hpp" #include "resample_inst.h" #include "kernel_selector/kernels/resample/resample_kernel_selector.h" #include "kernel_selector/kernels/resample/resample_kernel_base.h" @@ -174,42 +175,11 @@ struct resample_impl : typed_primitive_impl_ocl { } }; -namespace detail { - -attach_resample_impl::attach_resample_impl() { - std::set::key_type> keys; - - const auto types = {data_types::f16, data_types::f32, data_types::i8, data_types::u8, data_types::i32}; - const auto formats = { - format::bfyx, - format::b_fs_yx_fsv16, - format::b_fs_yx_fsv32, - format::bs_fs_yx_bsv16_fsv16, - format::bs_fs_yx_bsv32_fsv16, - format::bs_fs_yx_bsv32_fsv32, - - format::bfzyx, - format::b_fs_zyx_fsv16, - format::b_fs_zyx_fsv32, - format::bs_fs_zyx_bsv16_fsv32, - format::bs_fs_zyx_bsv16_fsv16, - format::bs_fs_zyx_bsv32_fsv32, - format::bs_fs_zyx_bsv32_fsv16, - }; - for (const auto type : types) { - for (const auto format : formats) { - keys.emplace(type, format); - } - } - - keys.emplace(data_types::f32, format::yxfb); - keys.emplace(data_types::f16, format::yxfb); - keys.emplace(data_types::f16, format::fs_b_yx_fsv32); - - implementation_map::add(impl_types::ocl, typed_primitive_impl_ocl::create, keys); +std::unique_ptr ResampleImplementationManager::create_impl(const program_node& node, const kernel_impl_params& params) const { + assert(node.is_type()); + return typed_primitive_impl_ocl::create(static_cast(node), params); } -} // namespace detail } // namespace ocl } // namespace cldnn diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/resample.hpp b/src/plugins/intel_gpu/src/graph/impls/ocl/resample.hpp new file mode 100644 index 00000000000000..9bb50a2271e41c --- /dev/null +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/resample.hpp @@ -0,0 +1,23 @@ +// Copyright (C) 2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "impls/registry/implementation_manager.hpp" +#include "program_node.h" +#include "resample_inst.h" + +#include +namespace cldnn { +namespace ocl { + +struct ResampleImplementationManager : public ImplementationManager { + OV_GPU_PRIMITIVE_IMPL("ocl::resample") + ResampleImplementationManager(shape_types shape_type, ValidateFunc vf = nullptr) : ImplementationManager(impl_types::ocl, shape_type, vf) {} + std::unique_ptr create_impl(const program_node& node, const kernel_impl_params& params) const override; + bool validate_impl(const program_node& node) const override { + return true; + } +}; + +} // namespace ocl +} // namespace cldnn diff --git a/src/plugins/intel_gpu/src/graph/impls/registry/registry.hpp b/src/plugins/intel_gpu/src/graph/impls/registry/registry.hpp index b7dbbaef6e64f1..c015ca043b91b6 100644 --- a/src/plugins/intel_gpu/src/graph/impls/registry/registry.hpp +++ b/src/plugins/intel_gpu/src/graph/impls/registry/registry.hpp @@ -139,6 +139,7 @@ REGISTER_IMPLS(reshape); REGISTER_IMPLS(non_max_suppression); REGISTER_IMPLS(softmax); REGISTER_IMPLS(range); +REGISTER_IMPLS(resample); REGISTER_IMPLS(select); REGISTER_IMPLS(scatter_update); REGISTER_IMPLS(scatter_elements_update); @@ -200,7 +201,6 @@ REGISTER_DEFAULT_IMPLS(space_to_batch, OCL_S); REGISTER_DEFAULT_IMPLS(space_to_depth, OCL_S); REGISTER_DEFAULT_IMPLS(swiglu, OCL_S, OCL_D); REGISTER_DEFAULT_IMPLS(gather_tree, OCL_S); -REGISTER_DEFAULT_IMPLS(resample, OCL_S); REGISTER_DEFAULT_IMPLS(grn, OCL_S); REGISTER_DEFAULT_IMPLS(ctc_greedy_decoder, OCL_S); REGISTER_DEFAULT_IMPLS(ctc_loss, OCL_S); diff --git a/src/plugins/intel_gpu/src/graph/impls/registry/resample_impls.cpp b/src/plugins/intel_gpu/src/graph/impls/registry/resample_impls.cpp new file mode 100644 index 00000000000000..a2f8f54d4dedc9 --- /dev/null +++ b/src/plugins/intel_gpu/src/graph/impls/registry/resample_impls.cpp @@ -0,0 +1,46 @@ +// Copyright (C) 2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "predicates.hpp" +#include "registry.hpp" +#include "intel_gpu/primitives/resample.hpp" +#include "primitive_inst.h" + +#if OV_GPU_WITH_OCL + #include "impls/ocl/resample.hpp" +#endif + + +namespace ov { +namespace intel_gpu { + +using namespace cldnn; + +const std::vector>& Registry::get_implementations() { + static const std::vector> impls = { + OV_GPU_CREATE_INSTANCE_OCL(ocl::ResampleImplementationManager, shape_types::static_shape, + [](const cldnn::program_node& node){ + auto prim = node.as().get_primitive(); + const auto& in0_layout = node.get_input_layout(0); + + if (in0_layout.data_type == ov::element::f32 && + prim->operation_type == ov::op::util::InterpolateBase::InterpolateMode::LINEAR_ONNX && + prim->coord_trans_mode == ov::op::util::InterpolateBase::CoordinateTransformMode::ALIGN_CORNERS && + prim->shape_calc_mode == ov::op::util::InterpolateBase::ShapeCalcMode::SCALES) { + return false; + } + + return true; + }) + OV_GPU_GET_INSTANCE_CPU(resample, shape_types::static_shape, + [](const cldnn::program_node& node){ + return true; + }) + }; + + return impls; +} + +} // namespace intel_gpu +} // namespace ov diff --git a/src/plugins/intel_gpu/tests/functional/single_layer_tests/dynamic/interpolate.cpp b/src/plugins/intel_gpu/tests/functional/single_layer_tests/dynamic/interpolate.cpp index 133a515fe58104..120365a436ff7d 100644 --- a/src/plugins/intel_gpu/tests/functional/single_layer_tests/dynamic/interpolate.cpp +++ b/src/plugins/intel_gpu/tests/functional/single_layer_tests/dynamic/interpolate.cpp @@ -545,6 +545,51 @@ INSTANTIATE_TEST_SUITE_P(InterpolateLinear_Layout_Test, InterpolateLayerGPUTest, ::testing::Values(true, false)), InterpolateLayerGPUTest::getTestCaseName); + +const std::vector shapeParams4D_LargeShape = { + ShapeParams{ + ov::op::v4::Interpolate::ShapeCalcMode::SCALES, + InputShape{{-1, {2, 100}, -1, -1}, {{1, 64, 148, 148}}}, + ov::test::utils::InputLayerType::CONSTANT, + ov::test::utils::InputLayerType::CONSTANT, + {{1.f, 1.f, 2.f, 2.f}}, + defaultAxes4D.front() + }, + ShapeParams{ + ov::op::v4::Interpolate::ShapeCalcMode::SCALES, + InputShape{{-1, -1, -1, -1}, {{1, 3, 48, 48}}}, + ov::test::utils::InputLayerType::CONSTANT, + ov::test::utils::InputLayerType::CONSTANT, + {{2.f, 2.f}}, + reducedAxes4D.front() + }, + ShapeParams{ + ov::op::v4::Interpolate::ShapeCalcMode::SIZES, + InputShape{{-1, -1, -1, -1}, {{1, 3, 48, 48}}}, + ov::test::utils::InputLayerType::CONSTANT, + ov::test::utils::InputLayerType::CONSTANT, + {{1, 3, 144, 144}}, + defaultAxes4D.front() + }, +}; + +const auto interpolateCasesLinearOnnx_AlignCorners_Floor = ::testing::Combine( + ::testing::Values(ov::op::v4::Interpolate::InterpolateMode::LINEAR_ONNX), + ::testing::Values(ov::op::v4::Interpolate::CoordinateTransformMode::ALIGN_CORNERS), + ::testing::Values(ov::op::v4::Interpolate::NearestMode::FLOOR), + ::testing::ValuesIn(antialias), + ::testing::Values(std::vector{0, 0, 0, 0}), + ::testing::Values(std::vector{0, 0, 0, 0}), + ::testing::ValuesIn(cubeCoefs)); + +INSTANTIATE_TEST_SUITE_P(InterpolateLinearOnnx_LargeShape_Layout_Test, InterpolateLayerGPUTest, + ::testing::Combine( + interpolateCasesLinearOnnx_AlignCorners_Floor, + ::testing::ValuesIn(shapeParams4D_LargeShape), + ::testing::Values(ov::element::f32), + ::testing::Values(true)), + InterpolateLayerGPUTest::getTestCaseName); + const auto interpolateCasesCubic_Smoke = ::testing::Combine( ::testing::Values(ov::op::v4::Interpolate::InterpolateMode::CUBIC), ::testing::ValuesIn(coordinateTransformModes_Smoke),