raspberrypi · naushir · Aug 29, 2024 · Aug 12, 2024 · Aug 6, 2024 · Aug 7, 2024
diff --git a/core/options.cpp b/core/options.cpp
@@ -270,9 +270,11 @@ Options::Options()
 		("tuning-file", value<std::string>(&tuning_file)->default_value("-"),
 			"Name of camera tuning file to use, omit this option for libcamera default behaviour")
 		("lores-width", value<unsigned int>(&lores_width)->default_value(0),
-			"Width of low resolution frames (use 0 to omit low resolution stream")
+			"Width of low resolution frames (use 0 to omit low resolution stream)")
 		("lores-height", value<unsigned int>(&lores_height)->default_value(0),
-			"Height of low resolution frames (use 0 to omit low resolution stream")
+			"Height of low resolution frames (use 0 to omit low resolution stream)")
+		("lores-par", value<bool>(&lores_par)->default_value(false)->implicit_value(true),
+			"Preserve the 1:1 pixel aspect ratio of the low res image (where possible) by applying a different crop on the stream.")
 		("mode", value<std::string>(&mode_string),
 			"Camera mode as W:H:bit-depth:packing, where packing is P (packed) or U (unpacked)")
 		("viewfinder-mode", value<std::string>(&viewfinder_mode_string),
@@ -432,7 +434,8 @@ bool Options::Parse(int argc, char *argv[])
 				if (area)
 					sensor_props << (*area)[0].size().toString() << " ";
 
-				std::unique_ptr<CameraConfiguration> config = cam->generateConfiguration({libcamera::StreamRole::Raw});
+				std::unique_ptr<CameraConfiguration> config =
+					cam->generateConfiguration({ libcamera::StreamRole::Raw, libcamera::StreamRole::Viewfinder });
 				if (!config)
 					throw std::runtime_error("failed to generate capture configuration");
 				const StreamFormats &formats = config->at(0).formats();
@@ -694,6 +697,7 @@ void Options::Print() const
 	std::cerr << "    tuning-file: " << (tuning_file == "-" ? "(libcamera)" : tuning_file) << std::endl;
 	std::cerr << "    lores-width: " << lores_width << std::endl;
 	std::cerr << "    lores-height: " << lores_height << std::endl;
+	std::cerr << "    lores-par: " << lores_par << std::endl;
 	if (afMode_index != -1)
 		std::cerr << "    autofocus-mode: " << afMode << std::endl;
 	if (afRange_index != -1)

diff --git a/core/options.hpp b/core/options.hpp
@@ -151,6 +151,7 @@ struct Options
 	bool qt_preview;
 	unsigned int lores_width;
 	unsigned int lores_height;
+	bool lores_par;
 	unsigned int camera;
 	std::string mode_string;
 	Mode mode;

diff --git a/core/post_processor.cpp b/core/post_processor.cpp
@@ -120,6 +120,7 @@ void PostProcessor::Read(std::string const &filename)
 
 				unsigned int lores_width = node.get<unsigned int>("lores.width");
 				unsigned int lores_height = node.get<unsigned int>("lores.height");
+				bool lores_par = node.get<bool>("lores.par", app_->GetOptions()->lores_par);
 				std::string lores_format_str = node.get<std::string>("lores.format", "yuv420");
 
 				libcamera::PixelFormat lores_format = libcamera::formats::YUV420;
@@ -132,6 +133,7 @@ void PostProcessor::Read(std::string const &filename)
 
 				app_->GetOptions()->lores_width = lores_width;
 				app_->GetOptions()->lores_height = lores_height;
+				app_->GetOptions()->lores_par = lores_par;
 				app_->lores_format_ = lores_format;
 
 				LOG(1, "Postprocessing requested lores: " << lores_width << "x" << lores_height << " " << lores_format);

diff --git a/core/rpicam_app.cpp b/core/rpicam_app.cpp
@@ -635,17 +635,35 @@ void RPiCamApp::StartCamera()
 
 	// Build a list of initial controls that we must set in the camera before starting it.
 	// We don't overwrite anything the application may have set before calling us.
-	if (!controls_.get(controls::ScalerCrop) && options_->roi_width != 0 && options_->roi_height != 0)
+	if (!controls_.get(controls::ScalerCrop) && !controls_.get(controls::rpi::ScalerCrops))
 	{
-		Rectangle sensor_area = camera_->controls().at(&controls::ScalerCrop).max().get<Rectangle>();
-		int x = options_->roi_x * sensor_area.width;
-		int y = options_->roi_y * sensor_area.height;
-		int w = options_->roi_width * sensor_area.width;
-		int h = options_->roi_height * sensor_area.height;
-		Rectangle crop(x, y, w, h);
-		crop.translateBy(sensor_area.topLeft());
-		LOG(2, "Using crop " << crop.toString());
-		controls_.set(controls::ScalerCrop, crop);
+		const Rectangle sensor_area = camera_->controls().at(&controls::ScalerCrop).max().get<Rectangle>();
+		const Rectangle default_crop = camera_->controls().at(&controls::ScalerCrop).def().get<Rectangle>();
+		std::vector<Rectangle> crops;
+
+		if (options_->roi_width != 0 && options_->roi_height != 0)
+		{
+			int x = options_->roi_x * sensor_area.width;
+			int y = options_->roi_y * sensor_area.height;
+			unsigned int w = options_->roi_width * sensor_area.width;
+			unsigned int h = options_->roi_height * sensor_area.height;
+			crops.push_back({ x, y, w, h });
+			crops.back().translateBy(sensor_area.topLeft());
+		}
+		else
+		{
+			crops.push_back(default_crop);
+		}
+
+		LOG(2, "Using crop (main) " << crops.back().toString());
+
+		if (options_->lores_width != 0 && options_->lores_height != 0 && !options_->lores_par)
+		{
+			crops.push_back(crops.back());
+			LOG(2, "Using crop (lores) " << crops.back().toString());
+		}
+
+		controls_.set(controls::rpi::ScalerCrops, libcamera::Span<const Rectangle>(crops.data(), crops.size()));
 	}
 
 	if (!controls_.get(controls::AfWindows) && !controls_.get(controls::AfMetering) && options_->afWindow_width != 0 &&

diff --git a/post_processing_stages/hailo/hailo_postprocessing_stage.cpp b/post_processing_stages/hailo/hailo_postprocessing_stage.cpp
@@ -15,6 +15,9 @@
 
 using namespace hailort;
 
+using Rectangle = libcamera::Rectangle;
+using Size = libcamera::Size;
+
 Allocator::Allocator()
 {
 }
@@ -255,3 +258,31 @@ HailoROIPtr HailoPostProcessingStage::MakeROI(const std::vector<OutTensor> &outp
 
 	return roi;
 }
+
+Rectangle HailoPostProcessingStage::ConvertInferenceCoordinates(const std::vector<float> &coords,
+																const std::vector<Rectangle> &scaler_crops) const
+{
+	if (coords.size() != 4 || scaler_crops.size() != 2)
+		return {};
+
+	// Convert the inference image co-ordinates into the final ISP output co-ordinates.
+	const Size &isp_output_size = output_stream_->configuration().size;
+
+	// Object scaled to the full sensor resolution
+	Rectangle obj;
+	obj.x = std::round(coords[0] * (scaler_crops[1].width - 1));
+	obj.y = std::round(coords[1] * (scaler_crops[1].height - 1));
+	obj.width = std::round(coords[2] * (scaler_crops[1].width - 1));
+	obj.height = std::round(coords[3] * (scaler_crops[1].height - 1));
+
+	// Object on the low res scaler crop -> translated by the start of the low res crop offset
+	const Rectangle obj_translated_l = obj.translatedBy(scaler_crops[1].topLeft());
+	// -> bounded to the high res output crop
+	const Rectangle obj_bounded = obj_translated_l.boundedTo(scaler_crops[0]);
+	// -> translated to the start of the high res crop offset
+	const Rectangle obj_translated_h = obj_bounded.translatedBy(-scaler_crops[0].topLeft());
+	// -> and scaled to the ISP output.
+	const Rectangle obj_scaled = obj_translated_h.scaledBy(isp_output_size, scaler_crops[0].size());
+
+	return obj_scaled;
+}
diff --git a/post_processing_stages/hailo/hailo_postprocessing_stage.hpp b/post_processing_stages/hailo/hailo_postprocessing_stage.hpp
@@ -13,6 +13,8 @@
 #include <string>
 #include <vector>
 
+#include <libcamera/geometry.h>
+
 #include <hailo/hailort.hpp>
 #include "hailo_objects.hpp"
 
@@ -110,6 +112,9 @@ class HailoPostProcessingStage : public PostProcessingStage
 	hailo_status DispatchJob(const uint8_t *input, hailort::AsyncInferJob &job, std::vector<OutTensor> &output_tensors);
 	HailoROIPtr MakeROI(const std::vector<OutTensor> &output_tensors) const;
 
+	libcamera::Rectangle ConvertInferenceCoordinates(const std::vector<float> &coords,
+													 const std::vector<libcamera::Rectangle> &scaler_crops) const;
+
 	libcamera::Stream *low_res_stream_;
 	libcamera::Stream *output_stream_;
 	libcamera::Stream *raw_stream_;

diff --git a/post_processing_stages/hailo/hailo_yolo_inference.cpp b/post_processing_stages/hailo/hailo_yolo_inference.cpp
@@ -13,6 +13,7 @@
 #include <string>
 #include <vector>
 
+#include <libcamera/controls.h>
 #include <libcamera/geometry.h>
 
 #include "core/rpicam_app.hpp"
@@ -28,6 +29,8 @@ using PostProcFuncPtrNms = void (*)(HailoROIPtr);
 using InitFuncPtr = YoloParams *(*)(std::string, std::string);
 using FreeFuncPtr = void (*)(void *);
 
+using Rectangle = libcamera::Rectangle;
+
 namespace fs = std::filesystem;
 
 #define NAME "hailo_yolo_inference"
@@ -49,7 +52,7 @@ class YoloInference : public HailoPostProcessingStage
 	bool Process(CompletedRequestPtr &completed_request) override;
 
 private:
-	std::vector<Detection> runInference(const uint8_t *frame);
+	std::vector<Detection> runInference(const uint8_t *frame, const std::vector<libcamera::Rectangle> &scaler_crops);
 	void filterOutputObjects(std::vector<Detection> &objects);
 
 	struct LtObject
@@ -187,7 +190,23 @@ bool YoloInference::Process(CompletedRequestPtr &completed_request)
 		return false;
 	}
 
-	std::vector<Detection> objects = runInference(input_ptr);
+	std::vector<Rectangle> scaler_crops;
+	auto scaler_crop = completed_request->metadata.get(controls::ScalerCrop);
+	auto rpi_scaler_crop = completed_request->metadata.get(controls::rpi::ScalerCrops);
+
+	if (rpi_scaler_crop)
+	{
+		for (unsigned int i = 0; i < rpi_scaler_crop->size(); i++)
+			scaler_crops.push_back(rpi_scaler_crop->data()[i]);
+	}
+	else if (scaler_crop)
+	{
+		// Push-back twice, once for main, once for low res.
+		scaler_crops.push_back(*scaler_crop);
+		scaler_crops.push_back(*scaler_crop);
+	}
+
+	std::vector<Detection> objects = runInference(input_ptr, scaler_crops);
 	if (objects.size())
 	{
 		if (temporal_filtering_)
@@ -215,7 +234,7 @@ bool YoloInference::Process(CompletedRequestPtr &completed_request)
 	return false;
 }
 
-std::vector<Detection> YoloInference::runInference(const uint8_t *frame)
+std::vector<Detection> YoloInference::runInference(const uint8_t *frame, const std::vector<Rectangle> &scaler_crops)
 {
 	hailort::AsyncInferJob job;
 	std::vector<OutTensor> output_tensors;
@@ -265,20 +284,19 @@ std::vector<Detection> YoloInference::runInference(const uint8_t *frame)
 
 	// Translate results to the rpicam-apps Detection objects
 	std::vector<Detection> results;
-	Size output_size = output_stream_->configuration().size;
 	for (auto const &d : detections)
 	{
 		if (d->get_confidence() < threshold_)
 			continue;
 
 		// Extract bounding box co-ordinates in the output image co-ordinates.
 		auto const &box = d->get_bbox();
-		int x0 = std::round(std::max(box.xmin(), 0.0f) * (output_size.width - 1));
-		int x1 = std::round(std::min(box.xmax(), 1.0f) * (output_size.width - 1));
-		int y0 = std::round(std::max(box.ymin(), 0.0f) * (output_size.height - 1));
-		int y1 = std::round(std::min(box.ymax(), 1.0f) * (output_size.height - 1));
-		results.emplace_back(d->get_class_id(), d->get_label(), d->get_confidence(), x0, y0,
-							 x1 - x0, y1 - y0);
+		const float x0 = std::max(box.xmin(), 0.0f);
+		const float x1 = std::min(box.xmax(), 1.0f);
+		const float y0 = std::max(box.ymin(), 0.0f);
+		const float y1 = std::min(box.ymax(), 1.0f);
+		libcamera::Rectangle r = ConvertInferenceCoordinates({ x0, y0, x1 - x0, y1 - y0 }, scaler_crops);
+		results.emplace_back(d->get_class_id(), d->get_label(), d->get_confidence(), r.x, r.y, r.width, r.height);
 		LOG(2, "Object: " << results.back().toString());
 
 		if (--max_detections_ == 0)

diff --git a/post_processing_stages/hailo/hailo_yolov8_pose.cpp b/post_processing_stages/hailo/hailo_yolov8_pose.cpp
@@ -18,6 +18,7 @@
 #include "core/rpicam_app.hpp"
 #include "hailo_postprocessing_stage.hpp"
 
+using Rectangle = libcamera::Rectangle;
 using Size = libcamera::Size;
 using PostProcFuncPtr = std::pair<std::vector<KeyPt>, std::vector<PairPairs>>(*)(HailoROIPtr);
 
@@ -38,7 +39,7 @@ class YoloPose : public HailoPostProcessingStage
 	bool Process(CompletedRequestPtr &completed_request) override;
 
 private:
-	void runInference(const uint8_t *input, uint32_t *output);
+	void runInference(const uint8_t *input, uint32_t *output, const std::vector<Rectangle> &scaler_crops);
 
 	PostProcessingLib postproc_;
 };
@@ -113,16 +114,32 @@ bool YoloPose::Process(CompletedRequestPtr &completed_request)
 		return false;
 	}
 
+	std::vector<Rectangle> scaler_crops;
+	auto scaler_crop = completed_request->metadata.get(controls::ScalerCrop);
+	auto rpi_scaler_crop = completed_request->metadata.get(controls::rpi::ScalerCrops);
+
+	if (rpi_scaler_crop)
+	{
+		for (unsigned int i = 0; i < rpi_scaler_crop->size(); i++)
+			scaler_crops.push_back(rpi_scaler_crop->data()[i]);
+	}
+	else if (scaler_crop)
+	{
+		// Push-back twice, once for main, once for low res.
+		scaler_crops.push_back(*scaler_crop);
+		scaler_crops.push_back(*scaler_crop);
+	}
+
 	BufferWriteSync w(app_, completed_request->buffers[output_stream_]);
 	libcamera::Span<uint8_t> buffer = w.Get()[0];
 	uint32_t *output = (uint32_t *)buffer.data();
 
-	runInference(input_ptr, output);
+	runInference(input_ptr, output, scaler_crops);
 
 	return false;
 }
 
-void YoloPose::runInference(const uint8_t *input, uint32_t *output)
+void YoloPose::runInference(const uint8_t *input, uint32_t *output, const std::vector<Rectangle> &scaler_crops)
 {
 	hailort::AsyncInferJob job;
 	std::vector<OutTensor> output_tensors;
@@ -159,30 +176,26 @@ void YoloPose::runInference(const uint8_t *input, uint32_t *output)
 			continue;
 
 		HailoBBox bbox = detection->get_bbox();
-
-		cv::rectangle(image,
-					  cv::Point2f(bbox.xmin() * float(output_stream_info_.width),
-								  bbox.ymin() * float(output_stream_info_.height)),
-					  cv::Point2f(bbox.xmax() * float(output_stream_info_.width),
-								  bbox.ymax() * float(output_stream_info_.height)),
+		const float x0 = std::max(bbox.xmin(), 0.0f);
+		const float x1 = std::min(bbox.xmax(), 1.0f);
+		const float y0 = std::max(bbox.ymin(), 0.0f);
+		const float y1 = std::min(bbox.ymax(), 1.0f);
+		Rectangle r = ConvertInferenceCoordinates({ x0, y0, x1 - x0, y1 - y0 }, scaler_crops);
+		cv::rectangle(image, cv::Point2f(r.x, r.y), cv::Point2f(r.x + r.width, r.y + r.height),
 					  cv::Scalar(0, 0, 255), 1);
 	}
 
 	for (auto &keypoint : keypoints_and_pairs.first)
 	{
-		cv::circle(image,
-				   cv::Point(keypoint.xs * float(output_stream_info_.width),
-							 keypoint.ys * float(output_stream_info_.height)),
-				   3, cv::Scalar(255, 0, 255), -1);
+		Rectangle r = ConvertInferenceCoordinates({ keypoint.xs, keypoint.ys, 1, 1 }, scaler_crops);
+		cv::circle(image, cv::Point(r.x, r.y), 3, cv::Scalar(255, 0, 255), -1);
 	}
 
-	for (PairPairs &p : keypoints_and_pairs.second)
+	for (const PairPairs &p : keypoints_and_pairs.second)
 	{
-		auto pt1 =
-			cv::Point(p.pt1.first * float(output_stream_info_.width), p.pt1.second * float(output_stream_info_.height));
-		auto pt2 =
-			cv::Point(p.pt2.first * float(output_stream_info_.width), p.pt2.second * float(output_stream_info_.height));
-		cv::line(image, pt1, pt2, cv::Scalar(255, 0, 255), 3);
+		Rectangle r1 = ConvertInferenceCoordinates({ p.pt1.first, p.pt1.second, 1, 1 }, scaler_crops);
+		Rectangle r2 = ConvertInferenceCoordinates({ p.pt2.first, p.pt2.second, 1, 1 }, scaler_crops);
+		cv::line(image, cv::Point(r1.x, r1.y), cv::Point(r2.x, r2.y), cv::Scalar(255, 0, 255), 3);
 	}
 }