Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[GPU] Add cpu impl for resample #28185

Draft
wants to merge 3 commits into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions samples/cpp/benchmark_app/inputs_filling.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -778,10 +778,10 @@ std::map<std::string, ov::TensorVector> get_tensors(std::map<std::string, std::v
// Fill with Numpy arrrays
tensors[input_name].push_back(
get_numpy_tensor(files.second, inputId, batchSize, {input_name, input_info}, &tensor_src_info));
} else if (input_info.is_image()) {
// Fill with Images
tensors[input_name].push_back(
get_image_tensor(files.second, inputId, batchSize, {input_name, input_info}, &tensor_src_info));
// } else if (input_info.is_image()) {
// // Fill with Images
// tensors[input_name].push_back(
// get_image_tensor(files.second, inputId, batchSize, {input_name, input_info}, &tensor_src_info));
} else {
// Fill with binary files
tensors[input_name].push_back(
Expand Down
2 changes: 2 additions & 0 deletions samples/cpp/benchmark_app/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -691,6 +691,8 @@ int main(int argc, char* argv[]) {
type_to_set = ov::element::u8;
}

type_to_set = ov::element::f32;

auto& in = preproc.input(item.get_any_name());
if (type_to_set != ov::element::undefined) {
in.tensor().set_element_type(type_to_set);
Expand Down
1 change: 1 addition & 0 deletions samples/cpp/benchmark_app/utils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -590,6 +590,7 @@ std::vector<benchmark_app::InputsInfo> get_inputs_info(const std::string& shape_

// Precision
info.type = item.get_element_type();
info.type = ov::element::Type_t::f32;
// Partial Shape
if (shape_map.count(name)) {
if (shape_map.at(name).size() > 1) {
Expand Down
1 change: 1 addition & 0 deletions src/plugins/intel_gpu/src/graph/impls/cpu/register.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ void register_implementations() {
REGISTER_CPU(tile);
REGISTER_CPU(select);
REGISTER_CPU(reduce);
REGISTER_CPU(resample);
}

} // namespace cpu
Expand Down
1 change: 1 addition & 0 deletions src/plugins/intel_gpu/src/graph/impls/cpu/register.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@ REGISTER_CPU(broadcast);
REGISTER_CPU(tile);
REGISTER_CPU(select);
REGISTER_CPU(reduce);
REGISTER_CPU(resample);

#undef REGISTER_CPU

Expand Down
218 changes: 218 additions & 0 deletions src/plugins/intel_gpu/src/graph/impls/cpu/resample.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,218 @@
// Copyright (C) 2024 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//

#include "impls/cpu/cpu_impl_helpers.hpp"
#include "register.hpp"
#include "resample_inst.h"
#include "impls/registry/implementation_map.hpp"

#include "openvino/op/interpolate.hpp"

#include <algorithm>
#include <string>
#include <vector>
#include <utility>

namespace cldnn {
namespace cpu {

namespace {



} // namespace

struct resample_impl : public typed_primitive_impl<resample> {
using parent = typed_primitive_impl<resample>;
using parent::parent;

using InterpolateMode = ov::op::v4::Interpolate::InterpolateMode;
using CoordinateTransformMode = ov::op::v4::Interpolate::CoordinateTransformMode;
using Nearest_mode = ov::op::v4::Interpolate::NearestMode;
using InterpolateAttrs = ov::op::v4::Interpolate::InterpolateAttrs;
using ShapeCalcMode = ov::op::v4::Interpolate::ShapeCalcMode;

std::shared_ptr<ov::op::Op> op;

std::vector<int64_t> sizes;
std::vector<float> scales;
std::vector<int64_t> axes;
std::vector<size_t> pads_begin;
std::vector<size_t> pads_end;
InterpolateMode operation_type = InterpolateMode::LINEAR;
ShapeCalcMode shape_calc_mode = ShapeCalcMode::SIZES;
int32_t antialias = 0;
float cube_coeff = -0.75f;
CoordinateTransformMode coord_trans_mode = CoordinateTransformMode::HALF_PIXEL;
Nearest_mode round_mode = Nearest_mode::ROUND_PREFER_FLOOR;

DECLARE_OBJECT_TYPE_SERIALIZATION(cldnn::cpu::resample_impl)

std::unique_ptr<primitive_impl> clone() const override {
return make_unique<resample_impl>(*this);
}

resample_impl() : parent("resample_cpu_impl") {}

explicit resample_impl(const resample_node& outer) {
set_node_params(outer);
}

void set_node_params(const program_node& arg) override {
OPENVINO_ASSERT(arg.is_type<resample>(), "[GPU] Incorrect program_node type");
const auto& node = arg.as<resample>();

sizes = node.get_primitive()->sizes;
scales = node.get_primitive()->scales;
axes = node.get_primitive()->axes;

pads_begin = node.get_primitive()->pads_begin;
pads_end = node.get_primitive()->pads_end;
operation_type = node.get_primitive()->operation_type;
shape_calc_mode = node.get_primitive()->shape_calc_mode;
antialias = node.get_primitive()->antialias;
cube_coeff = node.get_primitive()->cube_coeff;
coord_trans_mode = node.get_primitive()->coord_trans_mode;
round_mode = node.get_primitive()->round_mode;
}

// void save(BinaryOutputBuffer& ob) const override {
// parent::save(ob);
// ob << make_data(&mode, sizeof(eltwise_mode));
// ob << coefficients;
// }

// void load(BinaryInputBuffer& ib) override {
// parent::load(ib);
// ib >> make_data(&mode, sizeof(eltwise_mode));
// ib >> coefficients;
// }

event::ptr execute_impl(const std::vector<event::ptr>& events, resample_inst& instance) override {
OV_ITT_SCOPED_TASK(ov::intel_gpu::itt::domains::intel_gpu_plugin, "resample::execute_impl");
auto& stream = instance.get_network().get_stream();

const bool pass_through_events = (stream.get_queue_type() == QueueTypes::out_of_order) && instance.all_dependencies_cpu_impl();

if (!pass_through_events) {
stream.wait_for_events(events);
}

auto params = instance.get_impl_params();

// Set input tensors
ov::TensorVector input_host_tensors;
auto input_mem_ptr = instance.input_memory_ptr();
cldnn::mem_lock<uint8_t, mem_lock_type::read> input_lock(input_mem_ptr, stream);

auto input_rank = params->input_layouts[0].get_rank();
for (size_t i = 0; i < params->input_layouts.size(); i++) {
auto input_tensor = make_tensor(params->input_layouts[0], input_lock.data());
input_host_tensors.push_back(input_tensor);
}

if (scales.size() < input_rank)
scales.insert(scales.begin(), input_rank - scales.size(), 1.f);

for(size_t i = (input_rank - axes.size()); i > 0; i--)
axes.insert(axes.begin(), 1, (i - 1));

if (input_host_tensors.size() == 1) {
auto target_shape_sizes = params->output_layouts[0].get_tensor().sizes();
std::vector<int64_t> target_shape_ps;
for (size_t i = 0; i < input_rank; i++)
target_shape_ps.push_back(target_shape_sizes[i]);

auto target_shape_tensor = ov::Tensor(ov::element::i32, {target_shape_ps.size()}, target_shape_ps.data());
input_host_tensors.push_back(target_shape_tensor);

if (shape_calc_mode == ov::op::util::InterpolateBase::ShapeCalcMode::SCALES) {
auto scales_tensor = ov::Tensor(ov::element::f32, {scales.size()}, scales.data());
input_host_tensors.push_back(scales_tensor);
} else {
OPENVINO_ASSERT(false, "[GPU] Not supported Interpolate ShapeCalcMode of CPU impl", instance.id());
}

auto axes_tensor = ov::Tensor(ov::element::i64, {axes.size()}, axes.data());
input_host_tensors.push_back(axes_tensor);
}

// set output tensors
ov::TensorVector output_host_tensors;
auto output_mem_ptr = instance.output_memory_ptr();
cldnn::mem_lock<uint8_t, mem_lock_type::write> output_lock(output_mem_ptr, stream);

auto output_tensor = make_tensor(params->output_layouts[0], output_lock.data());
output_host_tensors.push_back(output_tensor);

// Set Attrs
InterpolateAttrs attrs;
attrs.mode = operation_type;
attrs.shape_calculation_mode = shape_calc_mode;
attrs.pads_begin = pads_begin;
attrs.pads_end = pads_end;
attrs.coordinate_transformation_mode = coord_trans_mode;
attrs.nearest_mode = round_mode;
attrs.antialias = antialias;
attrs.cube_coeff = cube_coeff;

if (!op) {
auto interp = std::make_shared<ov::op::v4::Interpolate>();
interp->set_attrs(attrs);
op = interp;
}

OPENVINO_ASSERT(op->evaluate(output_host_tensors, input_host_tensors),
"[GPU] Couldn't execute resample primitive with id ", instance.id());

if (pass_through_events) {
return stream.group_events(events);
}

return make_output_event(stream, instance.is_output());
}

void init_kernels(const kernels_cache& , const kernel_impl_params&) override {}

void update(primitive_inst& inst, const kernel_impl_params& impl_param) override {}

public:
static std::unique_ptr<primitive_impl> create(const resample_node& arg, const kernel_impl_params& impl_param) {
return make_unique<resample_impl>();
}
};


namespace detail {

attach_resample_impl::attach_resample_impl() {
const auto types = {data_types::f32, data_types::i32};
const auto formats = {
format::bfyx,
format::b_fs_yx_fsv16,
format::b_fs_yx_fsv32,
format::bs_fs_yx_bsv16_fsv16,
format::bs_fs_yx_bsv32_fsv16,
format::bs_fs_yx_bsv32_fsv32,

format::bfzyx,
format::b_fs_zyx_fsv16,
format::b_fs_zyx_fsv32,
format::bs_fs_zyx_bsv16_fsv32,
format::bs_fs_zyx_bsv16_fsv16,
format::bs_fs_zyx_bsv32_fsv32,
format::bs_fs_zyx_bsv32_fsv16,
};

implementation_map<resample>::add(impl_types::cpu, shape_types::static_shape, resample_impl::create, types, formats);
implementation_map<resample>::add(impl_types::cpu, shape_types::dynamic_shape, resample_impl::create, types, formats);
}

} // namespace detail


} // namespace cpu
} // namespace cldnn

BIND_BINARY_BUFFER_WITH_TYPE(cldnn::cpu::resample_impl)
1 change: 0 additions & 1 deletion src/plugins/intel_gpu/src/graph/impls/ocl/register.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,6 @@ void register_implementations() {
REGISTER_OCL(swiglu);
REGISTER_OCL(tile);
REGISTER_OCL(gather_tree);
REGISTER_OCL(resample);
REGISTER_OCL(grn);
REGISTER_OCL(ctc_greedy_decoder);
REGISTER_OCL(ctc_loss);
Expand Down
1 change: 0 additions & 1 deletion src/plugins/intel_gpu/src/graph/impls/ocl/register.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,6 @@
#include "intel_gpu/primitives/reduce.hpp"
#include "intel_gpu/primitives/region_yolo.hpp"
#include "intel_gpu/primitives/reorg_yolo.hpp"
#include "intel_gpu/primitives/resample.hpp"
#include "intel_gpu/primitives/reshape.hpp"
#include "intel_gpu/primitives/reverse_sequence.hpp"
#include "intel_gpu/primitives/rms.hpp"
Expand Down
38 changes: 4 additions & 34 deletions src/plugins/intel_gpu/src/graph/impls/ocl/resample.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

#include "primitive_base.hpp"

#include "resample.hpp"
#include "resample_inst.h"
#include "kernel_selector/kernels/resample/resample_kernel_selector.h"
#include "kernel_selector/kernels/resample/resample_kernel_base.h"
Expand Down Expand Up @@ -174,42 +175,11 @@ struct resample_impl : typed_primitive_impl_ocl<resample> {
}
};

namespace detail {

attach_resample_impl::attach_resample_impl() {
std::set<implementation_map<resample>::key_type> keys;

const auto types = {data_types::f16, data_types::f32, data_types::i8, data_types::u8, data_types::i32};
const auto formats = {
format::bfyx,
format::b_fs_yx_fsv16,
format::b_fs_yx_fsv32,
format::bs_fs_yx_bsv16_fsv16,
format::bs_fs_yx_bsv32_fsv16,
format::bs_fs_yx_bsv32_fsv32,

format::bfzyx,
format::b_fs_zyx_fsv16,
format::b_fs_zyx_fsv32,
format::bs_fs_zyx_bsv16_fsv32,
format::bs_fs_zyx_bsv16_fsv16,
format::bs_fs_zyx_bsv32_fsv32,
format::bs_fs_zyx_bsv32_fsv16,
};
for (const auto type : types) {
for (const auto format : formats) {
keys.emplace(type, format);
}
}

keys.emplace(data_types::f32, format::yxfb);
keys.emplace(data_types::f16, format::yxfb);
keys.emplace(data_types::f16, format::fs_b_yx_fsv32);

implementation_map<resample>::add(impl_types::ocl, typed_primitive_impl_ocl<resample>::create<resample_impl>, keys);
std::unique_ptr<primitive_impl> ResampleImplementationManager::create_impl(const program_node& node, const kernel_impl_params& params) const {
assert(node.is_type<resample>());
return typed_primitive_impl_ocl<resample>::create<resample_impl>(static_cast<const resample_node&>(node), params);
}

} // namespace detail
} // namespace ocl
} // namespace cldnn

Expand Down
23 changes: 23 additions & 0 deletions src/plugins/intel_gpu/src/graph/impls/ocl/resample.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
// Copyright (C) 2024 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//

#include "impls/registry/implementation_manager.hpp"
#include "program_node.h"
#include "resample_inst.h"

#include <memory>
namespace cldnn {
namespace ocl {

struct ResampleImplementationManager : public ImplementationManager {
OV_GPU_PRIMITIVE_IMPL("ocl::resample")
ResampleImplementationManager(shape_types shape_type, ValidateFunc vf = nullptr) : ImplementationManager(impl_types::ocl, shape_type, vf) {}
std::unique_ptr<primitive_impl> create_impl(const program_node& node, const kernel_impl_params& params) const override;
bool validate_impl(const program_node& node) const override {
return true;
}
};

} // namespace ocl
} // namespace cldnn
Original file line number Diff line number Diff line change
Expand Up @@ -139,6 +139,7 @@ REGISTER_IMPLS(reshape);
REGISTER_IMPLS(non_max_suppression);
REGISTER_IMPLS(softmax);
REGISTER_IMPLS(range);
REGISTER_IMPLS(resample);
REGISTER_IMPLS(select);
REGISTER_IMPLS(scatter_update);
REGISTER_IMPLS(scatter_elements_update);
Expand Down Expand Up @@ -200,7 +201,6 @@ REGISTER_DEFAULT_IMPLS(space_to_batch, OCL_S);
REGISTER_DEFAULT_IMPLS(space_to_depth, OCL_S);
REGISTER_DEFAULT_IMPLS(swiglu, OCL_S, OCL_D);
REGISTER_DEFAULT_IMPLS(gather_tree, OCL_S);
REGISTER_DEFAULT_IMPLS(resample, OCL_S);
REGISTER_DEFAULT_IMPLS(grn, OCL_S);
REGISTER_DEFAULT_IMPLS(ctc_greedy_decoder, OCL_S);
REGISTER_DEFAULT_IMPLS(ctc_loss, OCL_S);
Expand Down
Loading
Loading