Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

postprocess: hailo: Handle multiple scaler crops on the ISP outputs #703

Merged
merged 3 commits into from
Aug 29, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 7 additions & 3 deletions core/options.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -270,9 +270,11 @@ Options::Options()
("tuning-file", value<std::string>(&tuning_file)->default_value("-"),
"Name of camera tuning file to use, omit this option for libcamera default behaviour")
("lores-width", value<unsigned int>(&lores_width)->default_value(0),
"Width of low resolution frames (use 0 to omit low resolution stream")
"Width of low resolution frames (use 0 to omit low resolution stream)")
("lores-height", value<unsigned int>(&lores_height)->default_value(0),
"Height of low resolution frames (use 0 to omit low resolution stream")
"Height of low resolution frames (use 0 to omit low resolution stream)")
("lores-par", value<bool>(&lores_par)->default_value(false)->implicit_value(true),
"Preserve the 1:1 pixel aspect ratio of the low res image (where possible) by applying a different crop on the stream.")
("mode", value<std::string>(&mode_string),
"Camera mode as W:H:bit-depth:packing, where packing is P (packed) or U (unpacked)")
("viewfinder-mode", value<std::string>(&viewfinder_mode_string),
Expand Down Expand Up @@ -432,7 +434,8 @@ bool Options::Parse(int argc, char *argv[])
if (area)
sensor_props << (*area)[0].size().toString() << " ";

std::unique_ptr<CameraConfiguration> config = cam->generateConfiguration({libcamera::StreamRole::Raw});
std::unique_ptr<CameraConfiguration> config =
cam->generateConfiguration({ libcamera::StreamRole::Raw, libcamera::StreamRole::Viewfinder });
if (!config)
throw std::runtime_error("failed to generate capture configuration");
const StreamFormats &formats = config->at(0).formats();
Expand Down Expand Up @@ -694,6 +697,7 @@ void Options::Print() const
std::cerr << " tuning-file: " << (tuning_file == "-" ? "(libcamera)" : tuning_file) << std::endl;
std::cerr << " lores-width: " << lores_width << std::endl;
std::cerr << " lores-height: " << lores_height << std::endl;
std::cerr << " lores-par: " << lores_par << std::endl;
if (afMode_index != -1)
std::cerr << " autofocus-mode: " << afMode << std::endl;
if (afRange_index != -1)
Expand Down
1 change: 1 addition & 0 deletions core/options.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -151,6 +151,7 @@ struct Options
bool qt_preview;
unsigned int lores_width;
unsigned int lores_height;
bool lores_par;
unsigned int camera;
std::string mode_string;
Mode mode;
Expand Down
2 changes: 2 additions & 0 deletions core/post_processor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -120,6 +120,7 @@ void PostProcessor::Read(std::string const &filename)

unsigned int lores_width = node.get<unsigned int>("lores.width");
unsigned int lores_height = node.get<unsigned int>("lores.height");
bool lores_par = node.get<bool>("lores.par", app_->GetOptions()->lores_par);
std::string lores_format_str = node.get<std::string>("lores.format", "yuv420");

libcamera::PixelFormat lores_format = libcamera::formats::YUV420;
Expand All @@ -132,6 +133,7 @@ void PostProcessor::Read(std::string const &filename)

app_->GetOptions()->lores_width = lores_width;
app_->GetOptions()->lores_height = lores_height;
app_->GetOptions()->lores_par = lores_par;
app_->lores_format_ = lores_format;

LOG(1, "Postprocessing requested lores: " << lores_width << "x" << lores_height << " " << lores_format);
Expand Down
38 changes: 28 additions & 10 deletions core/rpicam_app.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -635,17 +635,35 @@ void RPiCamApp::StartCamera()

// Build a list of initial controls that we must set in the camera before starting it.
// We don't overwrite anything the application may have set before calling us.
if (!controls_.get(controls::ScalerCrop) && options_->roi_width != 0 && options_->roi_height != 0)
if (!controls_.get(controls::ScalerCrop) && !controls_.get(controls::rpi::ScalerCrops))
{
Rectangle sensor_area = camera_->controls().at(&controls::ScalerCrop).max().get<Rectangle>();
int x = options_->roi_x * sensor_area.width;
int y = options_->roi_y * sensor_area.height;
int w = options_->roi_width * sensor_area.width;
int h = options_->roi_height * sensor_area.height;
Rectangle crop(x, y, w, h);
crop.translateBy(sensor_area.topLeft());
LOG(2, "Using crop " << crop.toString());
controls_.set(controls::ScalerCrop, crop);
const Rectangle sensor_area = camera_->controls().at(&controls::ScalerCrop).max().get<Rectangle>();
const Rectangle default_crop = camera_->controls().at(&controls::ScalerCrop).def().get<Rectangle>();
std::vector<Rectangle> crops;

if (options_->roi_width != 0 && options_->roi_height != 0)
{
int x = options_->roi_x * sensor_area.width;
int y = options_->roi_y * sensor_area.height;
unsigned int w = options_->roi_width * sensor_area.width;
unsigned int h = options_->roi_height * sensor_area.height;
crops.push_back({ x, y, w, h });
crops.back().translateBy(sensor_area.topLeft());
}
else
{
crops.push_back(default_crop);
}

LOG(2, "Using crop (main) " << crops.back().toString());

if (options_->lores_width != 0 && options_->lores_height != 0 && !options_->lores_par)
{
crops.push_back(crops.back());
LOG(2, "Using crop (lores) " << crops.back().toString());
}

controls_.set(controls::rpi::ScalerCrops, libcamera::Span<const Rectangle>(crops.data(), crops.size()));
}

if (!controls_.get(controls::AfWindows) && !controls_.get(controls::AfMetering) && options_->afWindow_width != 0 &&
Expand Down
31 changes: 31 additions & 0 deletions post_processing_stages/hailo/hailo_postprocessing_stage.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,9 @@

using namespace hailort;

using Rectangle = libcamera::Rectangle;
using Size = libcamera::Size;

Allocator::Allocator()
{
}
Expand Down Expand Up @@ -255,3 +258,31 @@ HailoROIPtr HailoPostProcessingStage::MakeROI(const std::vector<OutTensor> &outp

return roi;
}

Rectangle HailoPostProcessingStage::ConvertInferenceCoordinates(const std::vector<float> &coords,
const std::vector<Rectangle> &scaler_crops) const
{
if (coords.size() != 4 || scaler_crops.size() != 2)
return {};

// Convert the inference image co-ordinates into the final ISP output co-ordinates.
const Size &isp_output_size = output_stream_->configuration().size;

// Object scaled to the full sensor resolution
Rectangle obj;
obj.x = std::round(coords[0] * (scaler_crops[1].width - 1));
obj.y = std::round(coords[1] * (scaler_crops[1].height - 1));
obj.width = std::round(coords[2] * (scaler_crops[1].width - 1));
obj.height = std::round(coords[3] * (scaler_crops[1].height - 1));

// Object on the low res scaler crop -> translated by the start of the low res crop offset
const Rectangle obj_translated_l = obj.translatedBy(scaler_crops[1].topLeft());
// -> bounded to the high res output crop
const Rectangle obj_bounded = obj_translated_l.boundedTo(scaler_crops[0]);
// -> translated to the start of the high res crop offset
const Rectangle obj_translated_h = obj_bounded.translatedBy(-scaler_crops[0].topLeft());
// -> and scaled to the ISP output.
const Rectangle obj_scaled = obj_translated_h.scaledBy(isp_output_size, scaler_crops[0].size());

return obj_scaled;
}
5 changes: 5 additions & 0 deletions post_processing_stages/hailo/hailo_postprocessing_stage.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@
#include <string>
#include <vector>

#include <libcamera/geometry.h>

#include <hailo/hailort.hpp>
#include "hailo_objects.hpp"

Expand Down Expand Up @@ -110,6 +112,9 @@ class HailoPostProcessingStage : public PostProcessingStage
hailo_status DispatchJob(const uint8_t *input, hailort::AsyncInferJob &job, std::vector<OutTensor> &output_tensors);
HailoROIPtr MakeROI(const std::vector<OutTensor> &output_tensors) const;

libcamera::Rectangle ConvertInferenceCoordinates(const std::vector<float> &coords,
const std::vector<libcamera::Rectangle> &scaler_crops) const;

libcamera::Stream *low_res_stream_;
libcamera::Stream *output_stream_;
libcamera::Stream *raw_stream_;
Expand Down
38 changes: 28 additions & 10 deletions post_processing_stages/hailo/hailo_yolo_inference.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
#include <string>
#include <vector>

#include <libcamera/controls.h>
#include <libcamera/geometry.h>

#include "core/rpicam_app.hpp"
Expand All @@ -28,6 +29,8 @@ using PostProcFuncPtrNms = void (*)(HailoROIPtr);
using InitFuncPtr = YoloParams *(*)(std::string, std::string);
using FreeFuncPtr = void (*)(void *);

using Rectangle = libcamera::Rectangle;

namespace fs = std::filesystem;

#define NAME "hailo_yolo_inference"
Expand All @@ -49,7 +52,7 @@ class YoloInference : public HailoPostProcessingStage
bool Process(CompletedRequestPtr &completed_request) override;

private:
std::vector<Detection> runInference(const uint8_t *frame);
std::vector<Detection> runInference(const uint8_t *frame, const std::vector<libcamera::Rectangle> &scaler_crops);
void filterOutputObjects(std::vector<Detection> &objects);

struct LtObject
Expand Down Expand Up @@ -187,7 +190,23 @@ bool YoloInference::Process(CompletedRequestPtr &completed_request)
return false;
}

std::vector<Detection> objects = runInference(input_ptr);
std::vector<Rectangle> scaler_crops;
auto scaler_crop = completed_request->metadata.get(controls::ScalerCrop);
auto rpi_scaler_crop = completed_request->metadata.get(controls::rpi::ScalerCrops);

if (rpi_scaler_crop)
{
for (unsigned int i = 0; i < rpi_scaler_crop->size(); i++)
scaler_crops.push_back(rpi_scaler_crop->data()[i]);
}
else if (scaler_crop)
{
// Push-back twice, once for main, once for low res.
scaler_crops.push_back(*scaler_crop);
scaler_crops.push_back(*scaler_crop);
}

std::vector<Detection> objects = runInference(input_ptr, scaler_crops);
if (objects.size())
{
if (temporal_filtering_)
Expand Down Expand Up @@ -215,7 +234,7 @@ bool YoloInference::Process(CompletedRequestPtr &completed_request)
return false;
}

std::vector<Detection> YoloInference::runInference(const uint8_t *frame)
std::vector<Detection> YoloInference::runInference(const uint8_t *frame, const std::vector<Rectangle> &scaler_crops)
{
hailort::AsyncInferJob job;
std::vector<OutTensor> output_tensors;
Expand Down Expand Up @@ -265,20 +284,19 @@ std::vector<Detection> YoloInference::runInference(const uint8_t *frame)

// Translate results to the rpicam-apps Detection objects
std::vector<Detection> results;
Size output_size = output_stream_->configuration().size;
for (auto const &d : detections)
{
if (d->get_confidence() < threshold_)
continue;

// Extract bounding box co-ordinates in the output image co-ordinates.
auto const &box = d->get_bbox();
int x0 = std::round(std::max(box.xmin(), 0.0f) * (output_size.width - 1));
int x1 = std::round(std::min(box.xmax(), 1.0f) * (output_size.width - 1));
int y0 = std::round(std::max(box.ymin(), 0.0f) * (output_size.height - 1));
int y1 = std::round(std::min(box.ymax(), 1.0f) * (output_size.height - 1));
results.emplace_back(d->get_class_id(), d->get_label(), d->get_confidence(), x0, y0,
x1 - x0, y1 - y0);
const float x0 = std::max(box.xmin(), 0.0f);
const float x1 = std::min(box.xmax(), 1.0f);
const float y0 = std::max(box.ymin(), 0.0f);
const float y1 = std::min(box.ymax(), 1.0f);
libcamera::Rectangle r = ConvertInferenceCoordinates({ x0, y0, x1 - x0, y1 - y0 }, scaler_crops);
results.emplace_back(d->get_class_id(), d->get_label(), d->get_confidence(), r.x, r.y, r.width, r.height);
LOG(2, "Object: " << results.back().toString());

if (--max_detections_ == 0)
Expand Down
51 changes: 32 additions & 19 deletions post_processing_stages/hailo/hailo_yolov8_pose.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
#include "core/rpicam_app.hpp"
#include "hailo_postprocessing_stage.hpp"

using Rectangle = libcamera::Rectangle;
using Size = libcamera::Size;
using PostProcFuncPtr = std::pair<std::vector<KeyPt>, std::vector<PairPairs>>(*)(HailoROIPtr);

Expand All @@ -38,7 +39,7 @@ class YoloPose : public HailoPostProcessingStage
bool Process(CompletedRequestPtr &completed_request) override;

private:
void runInference(const uint8_t *input, uint32_t *output);
void runInference(const uint8_t *input, uint32_t *output, const std::vector<Rectangle> &scaler_crops);

PostProcessingLib postproc_;
};
Expand Down Expand Up @@ -113,16 +114,32 @@ bool YoloPose::Process(CompletedRequestPtr &completed_request)
return false;
}

std::vector<Rectangle> scaler_crops;
auto scaler_crop = completed_request->metadata.get(controls::ScalerCrop);
auto rpi_scaler_crop = completed_request->metadata.get(controls::rpi::ScalerCrops);

if (rpi_scaler_crop)
{
for (unsigned int i = 0; i < rpi_scaler_crop->size(); i++)
scaler_crops.push_back(rpi_scaler_crop->data()[i]);
}
else if (scaler_crop)
{
// Push-back twice, once for main, once for low res.
scaler_crops.push_back(*scaler_crop);
scaler_crops.push_back(*scaler_crop);
}

BufferWriteSync w(app_, completed_request->buffers[output_stream_]);
libcamera::Span<uint8_t> buffer = w.Get()[0];
uint32_t *output = (uint32_t *)buffer.data();

runInference(input_ptr, output);
runInference(input_ptr, output, scaler_crops);

return false;
}

void YoloPose::runInference(const uint8_t *input, uint32_t *output)
void YoloPose::runInference(const uint8_t *input, uint32_t *output, const std::vector<Rectangle> &scaler_crops)
{
hailort::AsyncInferJob job;
std::vector<OutTensor> output_tensors;
Expand Down Expand Up @@ -159,30 +176,26 @@ void YoloPose::runInference(const uint8_t *input, uint32_t *output)
continue;

HailoBBox bbox = detection->get_bbox();

cv::rectangle(image,
cv::Point2f(bbox.xmin() * float(output_stream_info_.width),
bbox.ymin() * float(output_stream_info_.height)),
cv::Point2f(bbox.xmax() * float(output_stream_info_.width),
bbox.ymax() * float(output_stream_info_.height)),
const float x0 = std::max(bbox.xmin(), 0.0f);
const float x1 = std::min(bbox.xmax(), 1.0f);
const float y0 = std::max(bbox.ymin(), 0.0f);
const float y1 = std::min(bbox.ymax(), 1.0f);
Rectangle r = ConvertInferenceCoordinates({ x0, y0, x1 - x0, y1 - y0 }, scaler_crops);
cv::rectangle(image, cv::Point2f(r.x, r.y), cv::Point2f(r.x + r.width, r.y + r.height),
cv::Scalar(0, 0, 255), 1);
}

for (auto &keypoint : keypoints_and_pairs.first)
{
cv::circle(image,
cv::Point(keypoint.xs * float(output_stream_info_.width),
keypoint.ys * float(output_stream_info_.height)),
3, cv::Scalar(255, 0, 255), -1);
Rectangle r = ConvertInferenceCoordinates({ keypoint.xs, keypoint.ys, 1, 1 }, scaler_crops);
cv::circle(image, cv::Point(r.x, r.y), 3, cv::Scalar(255, 0, 255), -1);
}

for (PairPairs &p : keypoints_and_pairs.second)
for (const PairPairs &p : keypoints_and_pairs.second)
{
auto pt1 =
cv::Point(p.pt1.first * float(output_stream_info_.width), p.pt1.second * float(output_stream_info_.height));
auto pt2 =
cv::Point(p.pt2.first * float(output_stream_info_.width), p.pt2.second * float(output_stream_info_.height));
cv::line(image, pt1, pt2, cv::Scalar(255, 0, 255), 3);
Rectangle r1 = ConvertInferenceCoordinates({ p.pt1.first, p.pt1.second, 1, 1 }, scaler_crops);
Rectangle r2 = ConvertInferenceCoordinates({ p.pt2.first, p.pt2.second, 1, 1 }, scaler_crops);
cv::line(image, cv::Point(r1.x, r1.y), cv::Point(r2.x, r2.y), cv::Scalar(255, 0, 255), 3);
}
}

Expand Down
Loading