Skip to content

Commit

Permalink
Expose counter for disconnects due to chunk timeout
Browse files Browse the repository at this point in the history
Summary:
Add counter for FSDB stream client disconnect due to chunk timeout
with suffix: disconnectReason.chunkTimeout

Differential Revision: D64444138

fbshipit-source-id: 34a9751262e349d6c2ef2fe728d8722695b4ea9a
  • Loading branch information
Priyank Warkhede authored and facebook-github-bot committed Oct 16, 2024
1 parent f26b92b commit fb87597
Show file tree
Hide file tree
Showing 3 changed files with 18 additions and 0 deletions.
8 changes: 8 additions & 0 deletions fboss/fsdb/client/FsdbStreamClient.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -124,6 +124,14 @@ folly::coro::Task<void> FsdbStreamClient::serviceLoopWrapper() {
<< apache::thrift::util::enumNameSafe(ef.get_errorCode())
<< ": " << ef.get_message();
setStateDisconnectedWithReason(ef.get_errorCode());
} catch (const apache::thrift::transport::TTransportException& et) {
FsdbErrorCode disconnectReason = FsdbErrorCode::CLIENT_TRANSPORT_EXCEPTION;
if (et.getType() ==
apache::thrift::transport::TTransportException::
TTransportExceptionType::TIMED_OUT) {
disconnectReason = FsdbErrorCode::CLIENT_CHUNK_TIMEOUT;
}
setStateDisconnectedWithReason(disconnectReason);
} catch (const std::exception& ex) {
STREAM_XLOG(ERR) << "Unknown error: " << folly::exceptionStr(ex);
setStateDisconnectedWithReason(FsdbErrorCode::DISCONNECTED);
Expand Down
8 changes: 8 additions & 0 deletions fboss/fsdb/client/FsdbStreamClient.h
Original file line number Diff line number Diff line change
Expand Up @@ -138,6 +138,9 @@ class FsdbStreamClient : public ReconnectingThriftClient {
private:
void updateDisconnectReasonCounter(fsdb::FsdbErrorCode reason) {
switch (reason) {
case fsdb::FsdbErrorCode::CLIENT_CHUNK_TIMEOUT:
disconnectReasonChunkTimeout_.add(1);
break;
case fsdb::FsdbErrorCode::SUBSCRIPTION_DATA_CALLBACK_ERROR:
disconnectReasonDataCbError_.add(1);
break;
Expand All @@ -150,6 +153,11 @@ class FsdbStreamClient : public ReconnectingThriftClient {
std::atomic<bool> serviceLoopRunning_{false};
const bool isStats_;
apache::thrift::RpcOptions rpcOptions_;
// counters for various disconnect reasons
fb303::TimeseriesWrapper disconnectReasonChunkTimeout_{
getCounterPrefix() + ".disconnectReason.chunkTimeout",
fb303::SUM,
fb303::RATE};
fb303::TimeseriesWrapper disconnectReasonDataCbError_{
getCounterPrefix() + ".disconnectReason.dataCbError",
fb303::SUM,
Expand Down
2 changes: 2 additions & 0 deletions fboss/fsdb/if/fsdb_common.thrift
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,8 @@ enum FsdbErrorCode {
PUBLISHER_GR_DISCONNECT = 16,
SUBSCRIPTION_NOT_PERMITTED = 17,
SUBSCRIPTION_DATA_CALLBACK_ERROR = 18,
CLIENT_CHUNK_TIMEOUT = 19,
CLIENT_TRANSPORT_EXCEPTION = 20,
}

exception FsdbException {
Expand Down

0 comments on commit fb87597

Please sign in to comment.