Skip to content

Commit

Permalink
[BACKPORT 2.19.2][#18941] DocDB: Add debug view for SharedLockManager
Browse files Browse the repository at this point in the history
Summary:
Original commit: 816f669 / D28239
Adds a debug view to each tablet in the tserver debug UI which displays metadata about actively held
shared in-memory locks. Useful for debugging scenarios where we suspect transactions may be stuck or
holding these locks longer than expected.
Jira: DB-7792

Test Plan:
1. Apply gist at https://gist.github.com/robertsami/544eec99d10a16325d567192c2a00b19
2. Setup cluster:
```
bin/yb-ctl create --rf=1 --data_dir ~/yugabyte-data --tserver_flags 'enable_wait_queues=true,wait_queue_poll_interval_ms=1000,yb_enable_read_committed_isolation=true,enable_deadlock_detection=true'
```
3. Create table:
```
create table foo (a int primary key, b int);
insert into foo select generate_series(1, 100), 0;
```
4. Restart cluster with test pause:
```
bin/yb-ctl restart --tserver_flags 'enable_wait_queues=true,wait_queue_poll_interval_ms=1000,yb_enable_read_committed_isolation=true,enable_deadlock_detection=true,TEST_writequery_pause_after_locks=1000000'
```
5. Issue query and check debug page to confirm info is properly displayed

Jenkins: urgent

Reviewers: bkolagani, sergei

Reviewed By: bkolagani

Subscribers: bogdan, ybase, rthallam

Differential Revision: https://phorge.dev.yugabyte.com/D28270
  • Loading branch information
robertsami committed Sep 5, 2023
1 parent 939af2c commit f5330e7
Show file tree
Hide file tree
Showing 4 changed files with 87 additions and 22 deletions.
76 changes: 57 additions & 19 deletions src/yb/docdb/shared_lock_manager.cc
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@

#include "yb/docdb/lock_batch.h"

#include "yb/dockv/intent.h"
#include "yb/util/enums.h"
#include "yb/util/ref_cnt_buffer.h"
#include "yb/util/scope_exit.h"
Expand All @@ -38,18 +39,13 @@ using dockv::IntentTypeSet;

namespace {

// Lock state stores number of locks acquired for each intent type.
// Count for each intent type resides in sequential bits (block) in lock state.
// For example count of lock on particular intent type could be received as:
// (lock_state >> kIntentTypeShift[type]) & kSingleIntentMask.

// We have 64 bits in LockState and 4 types of intents. So 16 bits is max number of bits
// that we could reserve for block of single intent type.
// We have 64 bits in LockState and 4 types of intents. So 16 bits is the max number of bits
// that we could reserve for a block of single intent type.
const size_t kIntentTypeBits = 16;
// kSingleIntentMask represents the LockState which, when &'d with another LockState, would result
// in the LockState tracking only the count for intent type represented by the region of bits that
// is "least significant", as in furthest to the right.
const LockState kSingleIntentMask = (static_cast<LockState>(1) << kIntentTypeBits) - 1;
// kFirstIntentTypeMask represents the LockState which, when &'d with another LockState, would
// result in the LockState tracking only the count for intent type represented by the region of bits
// that is the "first" or "least significant", as in furthest to the right.
const LockState kFirstIntentTypeMask = (static_cast<LockState>(1) << kIntentTypeBits) - 1;

bool IntentTypesConflict(dockv::IntentType lhs, dockv::IntentType rhs) {
auto lhs_value = to_underlying(lhs);
Expand All @@ -62,7 +58,7 @@ bool IntentTypesConflict(dockv::IntentType lhs, dockv::IntentType rhs) {
}

LockState IntentTypeMask(
dockv::IntentType intent_type, LockState single_intent_mask = kSingleIntentMask) {
dockv::IntentType intent_type, LockState single_intent_mask = kFirstIntentTypeMask) {
return single_intent_mask << (to_underlying(intent_type) * kIntentTypeBits);
}

Expand All @@ -85,15 +81,15 @@ std::array<LockState, dockv::kIntentTypeSetMapSize> GenerateConflicts() {
return result;
}

// Generate array of LockState's with one entry for each possible subset of intent type set.
// The entry is combination of single_intent_mask for intents from set.
std::array<LockState, dockv::kIntentTypeSetMapSize> GenerateByMask(LockState single_intent_mask) {
DCHECK_EQ(single_intent_mask & kSingleIntentMask, single_intent_mask);
// Generate array of LockState's with one entry for each possible subset of intent type set, where
// each intent type has the provided count.
std::array<LockState, dockv::kIntentTypeSetMapSize> GenerateLockStatesWithCount(uint64_t count) {
DCHECK_EQ(count & kFirstIntentTypeMask, count);
std::array<LockState, dockv::kIntentTypeSetMapSize> result;
for (size_t idx = 0; idx != dockv::kIntentTypeSetMapSize; ++idx) {
result[idx] = 0;
for (auto intent_type : IntentTypeSet(idx)) {
result[idx] |= IntentTypeMask(intent_type, single_intent_mask);
result[idx] |= IntentTypeMask(intent_type, count);
}
}
return result;
Expand All @@ -115,11 +111,11 @@ const IntentTypeSetMap kIntentTypeSetConflicts = GenerateConflicts();

// Maps IntentTypeSet to the LockState representing one count for each intent type in the set. Can
// be used to "add one" occurence of an IntentTypeSet to an existing key's LockState.
const IntentTypeSetMap kIntentTypeSetAdd = GenerateByMask(1);
const IntentTypeSetMap kIntentTypeSetAdd = GenerateLockStatesWithCount(1);

// Maps IntentTypeSet to the LockState representing max count for each intent type in the set. Can
// be used to extract a LockState corresponding to having only that set's elements counts present.
const IntentTypeSetMap kIntentTypeSetMask = GenerateByMask(kSingleIntentMask);
const IntentTypeSetMap kIntentTypeSetMask = GenerateLockStatesWithCount(kFirstIntentTypeMask);

bool IntentTypeSetsConflict(IntentTypeSet lhs, IntentTypeSet rhs) {
for (auto intent1 : lhs) {
Expand All @@ -132,6 +128,11 @@ bool IntentTypeSetsConflict(IntentTypeSet lhs, IntentTypeSet rhs) {
return false;
}

uint16_t GetCountOfIntents(const LockState& num_waiting, dockv::IntentType intent_type) {
return (num_waiting >> (to_underlying(intent_type) * kIntentTypeBits))
& kFirstIntentTypeMask;
}

struct LockedBatchEntry {
// Taken only for short duration, with no blocking wait.
mutable std::mutex mutex;
Expand All @@ -157,6 +158,18 @@ struct LockedBatchEntry {
ref_count, num_holding.load(std::memory_order_acquire),
num_waiters.load(std::memory_order_acquire));
}

std::string ToDebugString() const {
auto holding = num_holding.load(std::memory_order_acquire);
return Format("{ ref_count: $0 num_weak_read_holders: $1 num_weak_write_holders: $2 "
"num_strong_read_holders: $3 num_strong_write_holders: $4 num_waiters: $5 }",
ref_count,
GetCountOfIntents(holding, dockv::IntentType::kWeakRead),
GetCountOfIntents(holding, dockv::IntentType::kWeakWrite),
GetCountOfIntents(holding, dockv::IntentType::kStrongRead),
GetCountOfIntents(holding, dockv::IntentType::kStrongWrite),
num_waiters.load(std::memory_order_acquire));
}
};

class SharedLockManager::Impl {
Expand All @@ -169,6 +182,8 @@ class SharedLockManager::Impl {
LOG_IF(DFATAL, !locks_.empty()) << "Locks not empty in dtor: " << yb::ToString(locks_);
}

void DumpStatusHtml(std::ostream& out);

private:
typedef std::unordered_map<RefCntPrefix, LockedBatchEntry*, RefCntPrefixHash> LockEntryMap;

Expand All @@ -189,6 +204,19 @@ class SharedLockManager::Impl {
std::vector<LockedBatchEntry*> free_lock_entries_ GUARDED_BY(global_mutex_);
};

void SharedLockManager::Impl::DumpStatusHtml(std::ostream& out) {
out << "<table>" << std::endl;
out << "<tr><th>Prefix |</th><th>| LockBatchEntry</th></tr>" << std::endl;
std::lock_guard l(global_mutex_);
for (const auto& [prefix, entry] : locks_) {
out << "<tr>"
<< "<td>" << (prefix.size() > 0 ? prefix.ToString() : "[empty]") << "</td>"
<< "<td>" << entry->ToDebugString() << "</td>"
<< "</tr>";
}
out << "</table>" << std::endl;
}

std::string SharedLockManager::ToString(const LockState& state) {
std::string result = "{";
bool first = true;
Expand Down Expand Up @@ -230,10 +258,16 @@ bool LockedBatchEntry::Lock(IntentTypeSet lock_type, CoarseTimePoint deadline) {
// Note -- even if we wait here, we don't need to be aware for the purposes of deadlock
// detection since this eventually succeeds (in which case thread gets to queue) or times
// out (thereby eliminating any possibly untraced deadlock).
VLOG(4) << "Waiting to acquire lock type: " << type_idx
<< " with num_holding: " << old_value << ", num_waiters: " << num_waiters
<< " with deadline: " << deadline.time_since_epoch();
if (cond_var.wait_until(lock, deadline) == std::cv_status::timeout) {
return false;
}
} else {
VLOG(4) << "Waiting to acquire lock type: " << type_idx
<< " with num_holding: " << old_value << ", num_waiters: " << num_waiters
<< " without deadline";
// TODO(wait-queues): Hitting this branch with wait queues could cause deadlocks if
// we never reach the wait queue and register the "waiting for" relationship. We should add
// a DCHECK that wait queues are not enabled in this branch, or remove the branch.
Expand Down Expand Up @@ -356,5 +390,9 @@ void SharedLockManager::Unlock(const LockBatchEntries& key_to_intent_type) {
impl_->Unlock(key_to_intent_type);
}

void SharedLockManager::DumpStatusHtml(std::ostream& out) {
impl_->DumpStatusHtml(out);
}

} // namespace docdb
} // namespace yb
6 changes: 6 additions & 0 deletions src/yb/docdb/shared_lock_manager.h
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,10 @@
namespace yb {
namespace docdb {

// Lock state stores the number of locks acquired for each intent type.
// The count for each intent type resides in sequential bits (block) in lock state.
// For example the count of locks on a particular intent type could be received as:
// (lock_state >> (to_underlying(intent_type) * kIntentTypeBits)) & kFirstIntentTypeMask.
typedef uint64_t LockState;

// This class manages six types of locks on string keys. On each key, the possibilities are:
Expand All @@ -51,6 +55,8 @@ class SharedLockManager {
// Whether or not the state is possible
static std::string ToString(const LockState& state);

void DumpStatusHtml(std::ostream& out);

private:
class Impl;
std::unique_ptr<Impl> impl_;
Expand Down
3 changes: 1 addition & 2 deletions src/yb/docdb/wait_queue.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1163,8 +1163,7 @@ class WaitQueue::Impl {
void DumpStatusHtml(std::ostream& out) {
SharedLock l(mutex_);
if (shutting_down_) {
out << "Shutting down...";
return;
out << "Shutting down..." << std::endl;
}

out << "<h2>Txn Waiters:</h2>" << std::endl;
Expand Down
24 changes: 23 additions & 1 deletion src/yb/tserver/tserver-path-handlers.cc
Original file line number Diff line number Diff line change
Expand Up @@ -242,7 +242,8 @@ void HandleTabletPage(
{"log-anchors", "Tablet Log Anchors"},
{"transactions", "Transactions"},
{"rocksdb", "RocksDB" },
{"waitqueue", "Wait Queue"}};
{"waitqueue", "Wait Queue"},
{"sharedlockmanager", "In-Memory Locks"}};

auto encoded_tablet_id = UrlEncodeToString(tablet_id);
for (const auto& entry : entries) {
Expand Down Expand Up @@ -410,6 +411,26 @@ void HandleWaitQueuePage(
}
}

void HandleInMemoryLocksPage(
const std::string& tablet_id, const tablet::TabletPeerPtr& peer,
const Webserver::WebRequest& req, Webserver::WebResponse* resp) {
std::stringstream *out = &resp->output;
*out << "<h1>In-Memory Locks for Tablet "
<< EscapeForHtmlToString(tablet_id) << "</h1>" << std::endl;

auto tablet_result = peer->shared_tablet_safe();
if (!tablet_result.ok()) {
*out << tablet_result.status();
return;
}
auto* shared_lock_manager = (*tablet_result)->shared_lock_manager();
if (shared_lock_manager) {
shared_lock_manager->DumpStatusHtml(*out);
} else {
*out << "<h3>" << "No shared lock manager found. This is unexpected." << "</h3>" << std::endl;
}
}

template<class F>
void RegisterTabletPathHandler(
Webserver* web_server, TabletServer* tserver, const std::string& path, const F& f) {
Expand Down Expand Up @@ -447,6 +468,7 @@ Status TabletServerPathHandlers::Register(Webserver* server) {
RegisterTabletPathHandler(server, tserver_, "/transactions", &HandleTransactionsPage);
RegisterTabletPathHandler(server, tserver_, "/rocksdb", &HandleRocksDBPage);
RegisterTabletPathHandler(server, tserver_, "/waitqueue", &HandleWaitQueuePage);
RegisterTabletPathHandler(server, tserver_, "/sharedlockmanager", &HandleInMemoryLocksPage);
server->RegisterPathHandler(
"/", "Dashboards",
std::bind(&TabletServerPathHandlers::HandleDashboardsPage, this, _1, _2), true /* styled */,
Expand Down

0 comments on commit f5330e7

Please sign in to comment.