Skip to content

Commit

Permalink
Merge pull request #183 from CVCUDA/v0.10.1-beta
Browse files Browse the repository at this point in the history
Merge patched release code for v0.10.1 into main
  • Loading branch information
milesp-nvidia authored Aug 8, 2024
2 parents 669197a + 574e42c commit f769fe4
Show file tree
Hide file tree
Showing 7 changed files with 708 additions and 938 deletions.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@

[![License](https://img.shields.io/badge/License-Apache_2.0-yellogreen.svg)](https://opensource.org/licenses/Apache-2.0)

![Version](https://img.shields.io/badge/Version-v0.10.0--beta-blue)
![Version](https://img.shields.io/badge/Version-v0.10.1--beta-blue)

![Platform](https://img.shields.io/badge/Platform-linux--64_%7C_win--64_wsl2%7C_aarch64-gray)

Expand Down
157 changes: 31 additions & 126 deletions bench/BenchCvtColor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,158 +21,63 @@

#include <nvbench/nvbench.cuh>

#include <map>
#include <stdexcept>
#include <tuple>

inline static std::tuple<NVCVColorConversionCode, NVCVImageFormat, NVCVImageFormat> StringToFormats(
const std::string &str)
{
// clang-format off
static const std::map<std::string, std::tuple<NVCVColorConversionCode, NVCVImageFormat, NVCVImageFormat>> codeMap{
{ "RGB2BGR", {NVCV_COLOR_RGB2BGR, NVCV_IMAGE_FORMAT_RGB8, NVCV_IMAGE_FORMAT_BGR8 }},
{ "RGB2RGBA", {NVCV_COLOR_RGB2RGBA, NVCV_IMAGE_FORMAT_RGB8, NVCV_IMAGE_FORMAT_RGBA8}},
{ "RGBA2RGB", {NVCV_COLOR_RGBA2RGB, NVCV_IMAGE_FORMAT_RGBA8, NVCV_IMAGE_FORMAT_RGB8 }},
{ "RGB2GRAY", {NVCV_COLOR_RGB2GRAY, NVCV_IMAGE_FORMAT_RGB8, NVCV_IMAGE_FORMAT_Y8 }},
{ "GRAY2RGB", {NVCV_COLOR_GRAY2RGB, NVCV_IMAGE_FORMAT_Y8, NVCV_IMAGE_FORMAT_RGB8 }},
{ "RGB2HSV", {NVCV_COLOR_RGB2HSV, NVCV_IMAGE_FORMAT_RGB8, NVCV_IMAGE_FORMAT_HSV8 }},
{ "HSV2RGB", {NVCV_COLOR_HSV2RGB, NVCV_IMAGE_FORMAT_HSV8, NVCV_IMAGE_FORMAT_RGB8 }},
{ "RGB2YUV", {NVCV_COLOR_RGB2YUV, NVCV_IMAGE_FORMAT_RGB8, NVCV_IMAGE_FORMAT_YUV8 }},
{ "YUV2RGB", {NVCV_COLOR_YUV2RGB, NVCV_IMAGE_FORMAT_YUV8, NVCV_IMAGE_FORMAT_RGB8 }},
{"RGB2YUV_NV12", {NVCV_COLOR_RGB2YUV_NV12, NVCV_IMAGE_FORMAT_RGB8, NVCV_IMAGE_FORMAT_NV12 }},
{"YUV2RGB_NV12", {NVCV_COLOR_YUV2RGB_NV12, NVCV_IMAGE_FORMAT_NV12, NVCV_IMAGE_FORMAT_RGB8 }},
};
// clang-format on

if (auto it = codeMap.find(str); it != codeMap.end())
{
return it->second;
}
else
{
throw std::invalid_argument("Unrecognized color code");
}
}

template<typename BT>
inline float BytesPerPixel(NVCVImageFormat imgFormat)
{
switch (imgFormat)
{
#define CVCUDA_BYTES_PER_PIXEL_CASE(FORMAT, BYTES) \
case FORMAT: \
return BYTES * sizeof(BT)
CVCUDA_BYTES_PER_PIXEL_CASE(NVCV_IMAGE_FORMAT_RGB8, 3);
CVCUDA_BYTES_PER_PIXEL_CASE(NVCV_IMAGE_FORMAT_BGR8, 3);
CVCUDA_BYTES_PER_PIXEL_CASE(NVCV_IMAGE_FORMAT_HSV8, 3);
CVCUDA_BYTES_PER_PIXEL_CASE(NVCV_IMAGE_FORMAT_RGBA8, 4);
CVCUDA_BYTES_PER_PIXEL_CASE(NVCV_IMAGE_FORMAT_YUV8, 3);
CVCUDA_BYTES_PER_PIXEL_CASE(NVCV_IMAGE_FORMAT_NV12, 1.5f);
CVCUDA_BYTES_PER_PIXEL_CASE(NVCV_IMAGE_FORMAT_Y8, 1);
#undef CVCUDA_BYTES_PER_PIXEL_CASE
default:
throw std::invalid_argument("Unrecognized format");
}
}

// Adapted from src/util/TensorDataUtils.hpp
inline static nvcv::Tensor CreateTensor(int numImages, int imgWidth, int imgHeight, const nvcv::ImageFormat &imgFormat)
{
if (imgFormat == NVCV_IMAGE_FORMAT_NV12 || imgFormat == NVCV_IMAGE_FORMAT_NV12_ER
|| imgFormat == NVCV_IMAGE_FORMAT_NV21 || imgFormat == NVCV_IMAGE_FORMAT_NV21_ER)
{
int height420 = (imgHeight * 3) / 2;
if (height420 % 3 != 0 || imgWidth % 2 != 0)
{
throw std::invalid_argument("Invalid height");
}

return nvcv::Tensor(numImages, {imgWidth, height420}, nvcv::ImageFormat(NVCV_IMAGE_FORMAT_Y8));
}
else
{
return nvcv::Tensor(numImages, {imgWidth, imgHeight}, imgFormat);
}
}

template<typename BT>
inline void CvtColor(nvbench::state &state, nvbench::type_list<BT>)
template<typename T>
inline void CvtColor(nvbench::state &state, nvbench::type_list<T>)
try
{
long3 shape = benchutils::GetShape<3>(state.get_string("shape"));
long varShape = state.get_int64("varShape");
std::tuple<NVCVColorConversionCode, NVCVImageFormat, NVCVImageFormat> formats
= StringToFormats(state.get_string("code"));

NVCVColorConversionCode code = std::get<0>(formats);
nvcv::ImageFormat inFormat{std::get<1>(formats)};
nvcv::ImageFormat outFormat{std::get<2>(formats)};
using BT = typename nvcv::cuda::BaseType<T>;

int ch = nvcv::cuda::NumElements<T>;

state.add_global_memory_reads(shape.x * shape.y * shape.z * BytesPerPixel<BT>(inFormat));
state.add_global_memory_writes(shape.x * shape.y * shape.z * BytesPerPixel<BT>(outFormat));
NVCVColorConversionCode code = ch == 3 ? NVCV_COLOR_BGR2RGB : NVCV_COLOR_BGRA2RGBA;

state.add_global_memory_reads(shape.x * shape.y * shape.z * sizeof(T));
state.add_global_memory_writes(shape.x * shape.y * shape.z * sizeof(T));

cvcuda::CvtColor op;

// clang-format off

if (varShape < 0) // negative var shape means use Tensor
{
nvcv::Tensor src = CreateTensor(shape.x, shape.z, shape.y, inFormat);
nvcv::Tensor dst = CreateTensor(shape.x, shape.z, shape.y, outFormat);
nvcv::Tensor src({{shape.x, shape.y, shape.z, ch}, "NHWC"}, benchutils::GetDataType<BT>());
nvcv::Tensor dst({{shape.x, shape.y, shape.z, ch}, "NHWC"}, benchutils::GetDataType<BT>());

benchutils::FillTensor<BT>(src, benchutils::RandomValues<BT>());

state.exec(nvbench::exec_tag::sync,
[&op, &src, &dst, &code](nvbench::launch &launch) { op(launch.get_stream(), src, dst, code); });
state.exec(nvbench::exec_tag::sync, [&op, &src, &dst, &code](nvbench::launch &launch)
{
op(launch.get_stream(), src, dst, code);
});
}
else // zero and positive var shape means use ImageBatchVarShape
{
if (inFormat.chromaSubsampling() != nvcv::ChromaSubsampling::CSS_444
|| outFormat.chromaSubsampling() != nvcv::ChromaSubsampling::CSS_444)
{
state.skip("Skipping formats that have subsampled planes for the varshape benchmark");
}

std::vector<nvcv::Image> imgSrc;
std::vector<nvcv::Image> imgDst;
nvcv::ImageBatchVarShape src(shape.x);
nvcv::ImageBatchVarShape dst(shape.x);
std::vector<std::vector<uint8_t>> srcVec(shape.x);
nvcv::ImageBatchVarShape src(shape.x);
nvcv::ImageBatchVarShape dst(shape.x);

auto randomValuesU8 = benchutils::RandomValues<uint8_t>();
benchutils::FillImageBatch<T>(src, long2{shape.z, shape.y}, long2{varShape, varShape},
benchutils::RandomValues<T>());
dst.pushBack(src.begin(), src.end());

for (int i = 0; i < shape.x; i++)
state.exec(nvbench::exec_tag::sync, [&op, &src, &dst, &code](nvbench::launch &launch)
{
imgSrc.emplace_back(nvcv::Size2D{(int)shape.z, (int)shape.y}, inFormat);
imgDst.emplace_back(nvcv::Size2D{(int)shape.z, (int)shape.y}, outFormat);

int srcRowStride = imgSrc[i].size().w * inFormat.planePixelStrideBytes(0);
int srcBufSize = imgSrc[i].size().h * srcRowStride;
srcVec[i].resize(srcBufSize);
for (int idx = 0; idx < srcBufSize; idx++)
{
srcVec[i][idx] = randomValuesU8();
}

auto imgData = imgSrc[i].exportData<nvcv::ImageDataStridedCuda>();
CUDA_CHECK_ERROR(cudaMemcpy2D(imgData->plane(0).basePtr, imgData->plane(0).rowStride, srcVec[i].data(),
srcRowStride, srcRowStride, imgSrc[i].size().h, cudaMemcpyHostToDevice));
}
src.pushBack(imgSrc.begin(), imgSrc.end());
dst.pushBack(imgDst.begin(), imgDst.end());

state.exec(nvbench::exec_tag::sync,
[&op, &src, &dst, &code](nvbench::launch &launch) { op(launch.get_stream(), src, dst, code); });
op(launch.get_stream(), src, dst, code);
});
}
}
catch (const std::exception &err)
{
state.skip(err.what());
}

using BaseTypes = nvbench::type_list<uint8_t>;
// clang-format on

using CvtColorTypes = nvbench::type_list<uchar3, uchar4>;

NVBENCH_BENCH_TYPES(CvtColor, NVBENCH_TYPE_AXES(BaseTypes))
.set_type_axes_names({"BaseType"})
.add_string_axis("shape", {"1x1080x1920", "64x720x1280"})
.add_string_axis("code", {"RGB2BGR", "RGB2RGBA", "RGBA2RGB", "RGB2GRAY", "GRAY2RGB", "RGB2HSV", "HSV2RGB",
"RGB2YUV", "YUV2RGB", "RGB2YUV_NV12", "YUV2RGB_NV12"})
NVBENCH_BENCH_TYPES(CvtColor, NVBENCH_TYPE_AXES(CvtColorTypes))
.set_type_axes_names({"InOutDataType"})
.add_string_axis("shape", {"1x1080x1920"})
.add_int64_axis("varShape", {-1, 0});
3 changes: 0 additions & 3 deletions cmake/ConfigCUDA.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -32,9 +32,6 @@ set(CMAKE_CUDA_STANDARD ${CMAKE_CXX_STANDARD})
# Compress kernels to generate smaller executables
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Xfatbin=--compress-all")

# Enable device lambdas
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} --extended-lambda")

if(NOT USE_CMAKE_CUDA_ARCHITECTURES)
set(CMAKE_CUDA_ARCHITECTURES "$ENV{CUDAARCHS}")

Expand Down
1 change: 1 addition & 0 deletions docs/sphinx/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -123,6 +123,7 @@ Copyright
:maxdepth: 1
:hidden:

v0.10.1-beta <relnotes/v0.10.1-beta>
v0.10.0-beta <relnotes/v0.10.0-beta>
v0.9.0-beta <relnotes/v0.9.0-beta>
v0.8.0-beta <relnotes/v0.8.0-beta>
Expand Down
44 changes: 44 additions & 0 deletions docs/sphinx/relnotes/v0.10.1-beta.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
..
# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
.. _v0.10.1-beta:

v0.10.1-beta
============

Release Highlights
------------------

CV-CUDA v0.10.1 reverts the OpCvtColor performance improvements introduced in v0.10.0 due to discovered bugs.
These optimizations will be reintroduced, with consolidated testing, in a future release.

License
-------

CV-CUDA is licensed under the `Apache 2.0 <https://github.com/CVCUDA/CV-CUDA/blob/main/LICENSE.md>`_ license.

Resources
---------

1. `CV-CUDA GitHub <https://github.com/CVCUDA/CV-CUDA>`_
2. `CV-CUDA Increasing Throughput and Reducing Costs for AI-Based Computer Vision with CV-CUDA <https://developer.nvidia.com/blog/increasing-throughput-and-reducing-costs-for-computer-vision-with-cv-cuda/>`_
3. `NVIDIA Announces Microsoft, Tencent, Baidu Adopting CV-CUDA for Computer Vision AI <https://blogs.nvidia.com/blog/2023/03/21/cv-cuda-ai-computer-vision/>`_
4. `CV-CUDA helps Tencent Cloud audio and video PaaS platform achieve full-process GPU acceleration for video enhancement AI <https://developer.nvidia.com/zh-cn/blog/cv-cuda-high-performance-image-processing/>`_

Acknowledgements
----------------

CV-CUDA is developed jointly by NVIDIA and the ByteDance Machine Learning team.
Loading

0 comments on commit f769fe4

Please sign in to comment.