Skip to content

Commit

Permalink
Add API to get each stream's disk size (#141)
Browse files Browse the repository at this point in the history
Summary:
Pull Request resolved: #141

This diff provides easy methods to know how much disk space is used by a stream. The API is separate from existing APIs, because the computation is fairly expensive and might require non-trivial memory allocation to track record boundaries, and we don't want to pay for this when not necessary.

Reviewed By: paulsammut

Differential Revision: D67491139

fbshipit-source-id: 0b1bd6fc4b8ce841e99f5e060ee172aa5497c26f
  • Loading branch information
Georges Berenger authored and facebook-github-bot committed Dec 20, 2024
1 parent 4d793aa commit 341d239
Show file tree
Hide file tree
Showing 6 changed files with 52 additions and 0 deletions.
2 changes: 2 additions & 0 deletions csrc/reader/AsyncVRSReader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -169,6 +169,7 @@ void pybind_asyncvrsreaders(py::module& m) {
.def("get_stream_for_flavor", &PyAsyncReader::getStreamForFlavor)
.def("find_stream", &PyAsyncReader::findStream)
.def("get_stream_info", &PyAsyncReader::getStreamInfo)
.def("get_stream_size", &PyAsyncReader::getStreamSize)
.def("enable_stream", py::overload_cast<const string&>(&PyAsyncReader::enableStream))
.def("enable_streams", &PyAsyncReader::enableStreams)
.def("enable_streams_by_indexes", &PyAsyncReader::enableStreamsByIndexes)
Expand Down Expand Up @@ -245,6 +246,7 @@ void pybind_asyncvrsreaders(py::module& m) {
&PyAsyncMultiReader::getStreams))
.def("find_stream", &PyAsyncMultiReader::findStream)
.def("get_stream_info", &PyAsyncMultiReader::getStreamInfo)
.def("get_stream_size", &PyAsyncMultiReader::getStreamSize)
.def("enable_stream", py::overload_cast<const string&>(&PyAsyncMultiReader::enableStream))
.def("enable_streams", &PyAsyncMultiReader::enableStreams)
.def("enable_streams_by_indexes", &PyAsyncMultiReader::enableStreamsByIndexes)
Expand Down
13 changes: 13 additions & 0 deletions csrc/reader/MultiVRSReader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -457,6 +457,18 @@ void OssMultiVRSReader::addStreamInfo(PyObject* dic, const StreamId& id, Record:
addRecordInfo(dic, "last_", recordType, reader_.getLastRecord(id, recordType));
}

int64_t OssMultiVRSReader::getStreamSize(const string& streamId) {
StreamId id = getStreamId(streamId);
size_t size = 0;
uint32_t recordCount = reader_.getRecordCount();
for (uint32_t k = 0; k < recordCount; ++k) {
if (reader_.getRecord(k)->streamId == id) {
size += reader_.getRecordSize(k);
}
}
return size;
}

void OssMultiVRSReader::addRecordInfo(
PyObject* dic,
const string& prefix,
Expand Down Expand Up @@ -1112,6 +1124,7 @@ void pybind_multivrsreader(py::module& m) {
&PyMultiVRSReader::getStreams))
.def("find_stream", &PyMultiVRSReader::findStream)
.def("get_stream_info", &PyMultiVRSReader::getStreamInfo)
.def("get_stream_size", &PyMultiVRSReader::getStreamSize)
.def("enable_stream", py::overload_cast<const string&>(&PyMultiVRSReader::enableStream))
.def("enable_streams", &PyMultiVRSReader::enableStreams)
.def("enable_streams_by_indexes", &PyMultiVRSReader::enableStreamsByIndexes)
Expand Down
6 changes: 6 additions & 0 deletions csrc/reader/MultiVRSReader.h
Original file line number Diff line number Diff line change
Expand Up @@ -211,6 +211,12 @@ class OssMultiVRSReader {
/// "flavor": device flavor, if set.
py::object getStreamInfo(const string& streamId);

/// Get a stream's footprint on disk.
/// This API is fairly expensive, which is why it's not folded into getStreamInfo().
/// @param streamId: VRS stream id.
/// @return Stream disk size, in bytes.
int64_t getStreamSize(const string& streamId);

/// Enable reading the records of a specific device.
/// @param streamId: VRS stream id to enable for reading.
/// @return True if the stream was found and is now enabled for reading.
Expand Down
13 changes: 13 additions & 0 deletions csrc/reader/VRSReader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -435,6 +435,18 @@ void OssVRSReader::addStreamInfo(PyObject* dic, const StreamId& id, Record::Type
addRecordInfo(dic, "last_", recordType, reader_.getLastRecord(id, recordType));
}

int64_t OssVRSReader::getStreamSize(const string& streamId) {
StreamId id = getStreamId(streamId);
int64_t size = 0;
const auto& index = reader_.getIndex();
for (uint32_t k = 0; k < index.size(); ++k) {
if (index[k].streamId == id) {
size += reader_.getRecordSize(k);
}
}
return size;
}

void OssVRSReader::addRecordInfo(
PyObject* dic,
const string& prefix,
Expand Down Expand Up @@ -1067,6 +1079,7 @@ void pybind_vrsreader(py::module& m) {
.def("get_stream_for_flavor", &PyVRSReader::getStreamForFlavor)
.def("find_stream", &PyVRSReader::findStream)
.def("get_stream_info", &PyVRSReader::getStreamInfo)
.def("get_stream_size", &PyVRSReader::getStreamSize)
.def("enable_stream", py::overload_cast<const string&>(&PyVRSReader::enableStream))
.def("enable_streams", &PyVRSReader::enableStreams)
.def("enable_streams_by_indexes", &PyVRSReader::enableStreamsByIndexes)
Expand Down
6 changes: 6 additions & 0 deletions csrc/reader/VRSReader.h
Original file line number Diff line number Diff line change
Expand Up @@ -204,6 +204,12 @@ class OssVRSReader : public vrs::utils::VideoRecordFormatStreamPlayer {
/// "flavor": device flavor, if set.
py::object getStreamInfo(const string& streamId);

/// Get a stream's footprint on disk.
/// This API is fairly expensive, which is why it's not folded into getStreamInfo().
/// @param streamId: VRS stream id.
/// @return Stream disk size, in bytes.
int64_t getStreamSize(const string& streamId);

/// Enable reading the records of a specific device.
/// @param streamId: VRS stream id to enable for reading.
/// @return True if the stream was found and is now enabled for reading.
Expand Down
12 changes: 12 additions & 0 deletions pyvrs/reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -419,6 +419,18 @@ def get_stream_info(self, stream_id: str) -> Dict[str, str]:
"""
return self._reader.get_stream_info(stream_id)

def get_stream_size(self, stream_id: str) -> int:
"""
Get a stream's size.
Args:
stream_id: stream_id you are interested in.
Returns:
The number of file bytes used by the stream.
"""
return self._reader.get_stream_size(stream_id)

def get_records_count(self, stream_id: str, record_type: RecordType) -> int:
"""
Get the number of records for the stream_id & record_type.
Expand Down

0 comments on commit 341d239

Please sign in to comment.