From e8ed0f0693c631d86a4b50ed697379b83b08cf0a Mon Sep 17 00:00:00 2001 From: Matthew Parkinson Date: Wed, 19 Jun 2024 19:41:07 +0100 Subject: [PATCH] Add a fast path flag This update adds a fast path flag for the uncontended case. This reduces the number of atomic operations in the uncontended case. --- src/snmalloc/ds/combininglock.h | 64 +++++++++++++++++++++++++++------ 1 file changed, 54 insertions(+), 10 deletions(-) diff --git a/src/snmalloc/ds/combininglock.h b/src/snmalloc/ds/combininglock.h index 51bb7dfc3..4be819be8 100644 --- a/src/snmalloc/ds/combininglock.h +++ b/src/snmalloc/ds/combininglock.h @@ -9,7 +9,15 @@ namespace snmalloc { class CombineLockNode; - using CombiningLock = std::atomic; + + struct CombiningLock + { + // Fast path lock incase there is no contention. + std::atomic flag{false}; + + // MCS queue of work items + std::atomic head{nullptr}; + }; /** * @brief Combinations of MCS queue lock with Flat Combining @@ -52,6 +60,11 @@ namespace snmalloc // Stores the C++ lambda associated with this node in the queue. void (*f_raw)(CombineLockNode*); + void release(CombiningLock& lock) + { + lock.flag.store(false, std::memory_order_release); + } + void set_status(LockStatus s) { status.store(s, std::memory_order_release); @@ -62,8 +75,24 @@ namespace snmalloc void attach(CombiningLock& lock) { - // Add to the queue of pending work - auto prev = lock.exchange(this, std::memory_order_acq_rel); + // Try fast path if no one is waiting + if (lock.head.load(std::memory_order_relaxed) == nullptr) + { + // Start by assuming no contention and attempt to acquire the lock. + if (lock.flag.exchange(true, std::memory_order_acquire) == false) + { + // We grabbed the lock. + f_raw(this); + + // Release the lock + release(lock); + return; + } + } + + // There is contention for the lock, we need to add our work to the + // queue of pending work + auto prev = lock.head.exchange(this, std::memory_order_acq_rel); if (prev != nullptr) { @@ -78,10 +107,23 @@ namespace snmalloc if (status.load(std::memory_order_acquire) == LockStatus::DONE) return; } - // We could add an else branch here that could set - // status = LockStatus::Ready - // However, the subsequent state assumes it is Ready, and - // nothing would read it. + else + { + // We are the head of the queue. Spin until we acquire the fast path + // lock. + while (lock.flag.exchange(true, std::memory_order_acquire)) + { + while (lock.flag.load(std::memory_order_relaxed)) + { + Aal::pause(); + } + } + + // We could set + // status = LockStatus::Ready + // However, the subsequent state assumes it is Ready, and + // nothing would read it. + } // We are the head of the queue, and responsible for // waking/performing our and subsequent work. @@ -105,11 +147,13 @@ namespace snmalloc // This could be the end of the queue, attempt to close the // queue. auto curr_c = curr; - if (lock.compare_exchange_strong(curr_c, nullptr, std::memory_order_acq_rel)) + if (lock.head.compare_exchange_strong( + curr_c, nullptr, std::memory_order_acq_rel)) { // Queue was successfully closed. // Notify last element the work was completed. curr->set_status(LockStatus::DONE); + release(lock); return; } @@ -119,8 +163,8 @@ namespace snmalloc Aal::pause(); // As we had to wait, give the job to the next thread - // to carry on performing the work. - auto n = curr->next.load(std::memory_order_acquire); + // to carry on performing the work, and release the flag. + auto n = curr->next.load(std::memory_order_acquire); n->set_status(LockStatus::READY); // Notify the thread that we completed its work.