Skip to content

Commit

Permalink
[GPU] Add cpu impl for resample
Browse files Browse the repository at this point in the history
  • Loading branch information
kelvinchoi-intel committed Dec 24, 2024
1 parent 2d78f2a commit fe528a6
Show file tree
Hide file tree
Showing 7 changed files with 255 additions and 5 deletions.
8 changes: 4 additions & 4 deletions samples/cpp/benchmark_app/inputs_filling.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -778,10 +778,10 @@ std::map<std::string, ov::TensorVector> get_tensors(std::map<std::string, std::v
// Fill with Numpy arrrays
tensors[input_name].push_back(
get_numpy_tensor(files.second, inputId, batchSize, {input_name, input_info}, &tensor_src_info));
} else if (input_info.is_image()) {
// Fill with Images
tensors[input_name].push_back(
get_image_tensor(files.second, inputId, batchSize, {input_name, input_info}, &tensor_src_info));
// } else if (input_info.is_image()) {
// // Fill with Images
// tensors[input_name].push_back(
// get_image_tensor(files.second, inputId, batchSize, {input_name, input_info}, &tensor_src_info));
} else {
// Fill with binary files
tensors[input_name].push_back(
Expand Down
1 change: 1 addition & 0 deletions samples/cpp/benchmark_app/utils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -590,6 +590,7 @@ std::vector<benchmark_app::InputsInfo> get_inputs_info(const std::string& shape_

// Precision
info.type = item.get_element_type();
info.type = ov::element::Type_t::f32;
// Partial Shape
if (shape_map.count(name)) {
if (shape_map.at(name).size() > 1) {
Expand Down
1 change: 1 addition & 0 deletions src/plugins/intel_gpu/src/graph/impls/cpu/register.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ void register_implementations() {
REGISTER_CPU(tile);
REGISTER_CPU(select);
REGISTER_CPU(reduce);
REGISTER_CPU(resample);
}

} // namespace cpu
Expand Down
1 change: 1 addition & 0 deletions src/plugins/intel_gpu/src/graph/impls/cpu/register.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@ REGISTER_CPU(broadcast);
REGISTER_CPU(tile);
REGISTER_CPU(select);
REGISTER_CPU(reduce);
REGISTER_CPU(resample);

#undef REGISTER_CPU

Expand Down
208 changes: 208 additions & 0 deletions src/plugins/intel_gpu/src/graph/impls/cpu/resample.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,208 @@
// Copyright (C) 2024 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//

#include "impls/cpu/cpu_impl_helpers.hpp"
#include "register.hpp"
#include "resample_inst.h"
#include "impls/registry/implementation_map.hpp"

#include "openvino/op/interpolate.hpp"

#include <algorithm>
#include <string>
#include <vector>
#include <utility>

namespace cldnn {
namespace cpu {

namespace {



} // namespace

struct resample_impl : public typed_primitive_impl<resample> {
using parent = typed_primitive_impl<resample>;
using parent::parent;

using InterpolateMode = ov::op::v4::Interpolate::InterpolateMode;
using CoordinateTransformMode = ov::op::v4::Interpolate::CoordinateTransformMode;
using Nearest_mode = ov::op::v4::Interpolate::NearestMode;
using InterpolateAttrs = ov::op::v4::Interpolate::InterpolateAttrs;
using ShapeCalcMode = ov::op::v4::Interpolate::ShapeCalcMode;

std::shared_ptr<ov::op::Op> op;

std::vector<int64_t> sizes;
std::vector<float> scales;
std::vector<int64_t> axes;
std::vector<size_t> pads_begin;
std::vector<size_t> pads_end;
InterpolateMode operation_type = InterpolateMode::LINEAR;
ShapeCalcMode shape_calc_mode = ShapeCalcMode::SIZES;
int32_t antialias = 0;
float cube_coeff = -0.75f;
CoordinateTransformMode coord_trans_mode = CoordinateTransformMode::HALF_PIXEL;
Nearest_mode round_mode = Nearest_mode::ROUND_PREFER_FLOOR;

DECLARE_OBJECT_TYPE_SERIALIZATION(cldnn::cpu::resample_impl)

std::unique_ptr<primitive_impl> clone() const override {
return make_unique<resample_impl>(*this);
}

resample_impl() : parent("resample_cpu_impl") {}

explicit resample_impl(const resample_node& outer) {
set_node_params(outer);
}

void set_node_params(const program_node& arg) override {
OPENVINO_ASSERT(arg.is_type<resample>(), "[GPU] Incorrect program_node type");
const auto& node = arg.as<resample>();

sizes = node.get_primitive()->sizes;
scales = node.get_primitive()->scales;
axes = node.get_primitive()->axes;

pads_begin = node.get_primitive()->pads_begin;
pads_end = node.get_primitive()->pads_end;
operation_type = node.get_primitive()->operation_type;
shape_calc_mode = node.get_primitive()->shape_calc_mode;
antialias = node.get_primitive()->antialias;
cube_coeff = node.get_primitive()->cube_coeff;
coord_trans_mode = node.get_primitive()->coord_trans_mode;
round_mode = node.get_primitive()->round_mode;
}

// void save(BinaryOutputBuffer& ob) const override {
// parent::save(ob);
// ob << make_data(&mode, sizeof(eltwise_mode));
// ob << coefficients;
// }

// void load(BinaryInputBuffer& ib) override {
// parent::load(ib);
// ib >> make_data(&mode, sizeof(eltwise_mode));
// ib >> coefficients;
// }

event::ptr execute_impl(const std::vector<event::ptr>& events, resample_inst& instance) override {
OV_ITT_SCOPED_TASK(ov::intel_gpu::itt::domains::intel_gpu_plugin, "resample::execute_impl");
auto& stream = instance.get_network().get_stream();

const bool pass_through_events = (stream.get_queue_type() == QueueTypes::out_of_order) && instance.all_dependencies_cpu_impl();

if (!pass_through_events) {
stream.wait_for_events(events);
}

auto params = instance.get_impl_params();

// Set input tensors
ov::TensorVector input_host_tensors;
auto input_mem_ptr = instance.input_memory_ptr();
cldnn::mem_lock<uint8_t, mem_lock_type::read> input_lock(input_mem_ptr, stream);

for (size_t i = 0; i < params->input_layouts.size(); i++) {
auto input_tensor = make_tensor(params->input_layouts[0], input_lock.data());
input_host_tensors.push_back(input_tensor);
}

if (input_host_tensors.size() == 1) {
auto target_shape_sizes = params->output_layouts[0].get_tensor().sizes();
std::vector<int64_t> target_shape_ps;
for (size_t i = 0; i < axes.size(); i++)
target_shape_ps.push_back(target_shape_sizes[i]);

auto target_shape_tensor = ov::Tensor(ov::element::i32, {target_shape_ps.size()}, target_shape_ps.data());
input_host_tensors.push_back(target_shape_tensor);

if (shape_calc_mode == ov::op::util::InterpolateBase::ShapeCalcMode::SIZES) {
auto new_scales = scales;
auto input_shape_sizes = params->input_layouts[0].get_tensor().sizes();
for (size_t i = 0; i < sizes.size(); i++)
new_scales[i] = sizes[i] / input_shape_sizes[i];

auto scales_tensor = ov::Tensor(ov::element::f32, {new_scales.size()}, new_scales.data());
input_host_tensors.push_back(scales_tensor);
shape_calc_mode = ov::op::util::InterpolateBase::ShapeCalcMode::SCALES;
} else if (shape_calc_mode == ov::op::util::InterpolateBase::ShapeCalcMode::SCALES) {
auto scales_tensor = ov::Tensor(ov::element::f32, {scales.size()}, scales.data());
input_host_tensors.push_back(scales_tensor);
} else {
OPENVINO_ASSERT(false, "[GPU] Not supported Interpolate ShapeCalcMode", instance.id());
}

auto axes_tensor = ov::Tensor(ov::element::i64, {axes.size()}, axes.data());
input_host_tensors.push_back(axes_tensor);
}

// set output tensors
ov::TensorVector output_host_tensors;
auto output_mem_ptr = instance.output_memory_ptr();
cldnn::mem_lock<uint8_t, mem_lock_type::write> output_lock(output_mem_ptr, stream);

auto output_tensor = make_tensor(params->output_layouts[0], output_lock.data());
output_host_tensors.push_back(output_tensor);

// Set Attrs
InterpolateAttrs attrs;
attrs.mode = operation_type;
attrs.shape_calculation_mode = shape_calc_mode;
attrs.pads_begin = pads_begin;
attrs.pads_end = pads_end;
attrs.coordinate_transformation_mode = coord_trans_mode;
attrs.nearest_mode = round_mode;
attrs.antialias = antialias;
attrs.cube_coeff = cube_coeff;

if (!op) {
auto interp = std::make_shared<ov::op::v4::Interpolate>();
interp->set_attrs(attrs);
op = interp;
}

OPENVINO_ASSERT(op->evaluate(output_host_tensors, input_host_tensors),
"[GPU] Couldn't execute resample primitive with id ", instance.id());

if (pass_through_events) {
return stream.group_events(events);
}

return make_output_event(stream, instance.is_output());
}

void init_kernels(const kernels_cache& , const kernel_impl_params&) override {}

void update(primitive_inst& inst, const kernel_impl_params& impl_param) override {}

public:
static std::unique_ptr<primitive_impl> create(const resample_node& arg, const kernel_impl_params& impl_param) {
return make_unique<resample_impl>();
}
};


namespace detail {

attach_resample_impl::attach_resample_impl() {
auto formats = {
format::bfyx,
};

auto types = {
data_types::f32,
};

implementation_map<resample>::add(impl_types::cpu, shape_types::static_shape, resample_impl::create, types, formats);
implementation_map<resample>::add(impl_types::cpu, shape_types::dynamic_shape, resample_impl::create, types, formats);
}

} // namespace detail
} // namespace cpu
} // namespace cldnn

BIND_BINARY_BUFFER_WITH_TYPE(cldnn::cpu::resample_impl)
3 changes: 2 additions & 1 deletion src/plugins/intel_gpu/src/graph/impls/registry/registry.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -139,6 +139,7 @@ REGISTER_IMPLS(reshape);
REGISTER_IMPLS(non_max_suppression);
REGISTER_IMPLS(softmax);
REGISTER_IMPLS(range);
//REGISTER_IMPLS(resample);
REGISTER_IMPLS(select);
REGISTER_IMPLS(scatter_update);
REGISTER_IMPLS(scatter_elements_update);
Expand Down Expand Up @@ -200,7 +201,7 @@ REGISTER_DEFAULT_IMPLS(space_to_batch, OCL_S);
REGISTER_DEFAULT_IMPLS(space_to_depth, OCL_S);
REGISTER_DEFAULT_IMPLS(swiglu, OCL_S, OCL_D);
REGISTER_DEFAULT_IMPLS(gather_tree, OCL_S);
REGISTER_DEFAULT_IMPLS(resample, OCL_S);
REGISTER_DEFAULT_IMPLS(resample, CPU_S, OCL_S);
REGISTER_DEFAULT_IMPLS(grn, OCL_S);
REGISTER_DEFAULT_IMPLS(ctc_greedy_decoder, OCL_S);
REGISTER_DEFAULT_IMPLS(ctc_loss, OCL_S);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -545,6 +545,44 @@ INSTANTIATE_TEST_SUITE_P(InterpolateLinear_Layout_Test, InterpolateLayerGPUTest,
::testing::Values(true, false)),
InterpolateLayerGPUTest::getTestCaseName);


const std::vector<ShapeParams> shapeParams4D_LargeShape = {
ShapeParams{
ov::op::v4::Interpolate::ShapeCalcMode::SCALES,
//InputShape{{-1, {2, 100}, -1, -1}, {{1, 64, 148, 148}}},
InputShape{{-1, -1, -1, -1}, {{1, 3, 48, 48}}}, // min shape for failure
ov::test::utils::InputLayerType::CONSTANT,
ov::test::utils::InputLayerType::CONSTANT,
{{1.f, 1.f, 2.f, 2.f}},
defaultAxes4D.front()
},
// ShapeParams{
// ov::op::v4::Interpolate::ShapeCalcMode::SIZES,
// InputShape{{-1, -1, -1, -1}, {{1, 3, 48, 48}}},
// ov::test::utils::InputLayerType::CONSTANT,
// ov::test::utils::InputLayerType::CONSTANT,
// {{1, 3, 144, 144}},
// defaultAxes4D.front()
// },
};

const auto interpolateCasesLinearOnnx_AlignCorners_Floor = ::testing::Combine(
::testing::Values(ov::op::v4::Interpolate::InterpolateMode::LINEAR_ONNX),
::testing::Values(ov::op::v4::Interpolate::CoordinateTransformMode::ALIGN_CORNERS),
::testing::Values(ov::op::v4::Interpolate::NearestMode::FLOOR),
::testing::ValuesIn(antialias),
::testing::Values(std::vector<size_t>{0, 0, 0, 0}),
::testing::Values(std::vector<size_t>{0, 0, 0, 0}),
::testing::ValuesIn(cubeCoefs));

INSTANTIATE_TEST_SUITE_P(InterpolateLinearOnnx_LargeShape_Layout_Test, InterpolateLayerGPUTest,
::testing::Combine(
interpolateCasesLinearOnnx_AlignCorners_Floor,
::testing::ValuesIn(shapeParams4D_LargeShape),
::testing::Values(ov::element::f32),
::testing::Values(true)),
InterpolateLayerGPUTest::getTestCaseName);

const auto interpolateCasesCubic_Smoke = ::testing::Combine(
::testing::Values(ov::op::v4::Interpolate::InterpolateMode::CUBIC),
::testing::ValuesIn(coordinateTransformModes_Smoke),
Expand Down

0 comments on commit fe528a6

Please sign in to comment.