From 26bd4a59adb35e671b3f89eb8a1b9e55f9081b16 Mon Sep 17 00:00:00 2001 From: David Chisnall Date: Fri, 12 Apr 2024 14:10:31 +0100 Subject: [PATCH] Tickless scheduler This commit incorporates several changes: Timers are now not set for a regular tick, they are set when a thread may be preempted. Specifically, the timer is set to the next timeout that will trigger a scheduling operation. This avoids timers triggering a switch to the scheduler to do nothing (resume the currently running thread). This means that if a thread sleeps for ten ticks while another runs, we will get one timer interrupt ten ticks in the future, rather than ten interrupts one tick apart. This means that ticks are now calculated retroactively based on elapsed time, rather than counted on each context switch. This, in turn, necessitates some small API changes. We previously conflated two things: - Sleep for N * (tick duration) - Yield and allow lower-priority threads to run for, at most, N * (tick duration) These are now deconflated by adding a second parameter to thread_sleep. Most sleeps are of the second form and so this is the default. This reduces the time taken to run the test suite on Sonata by around 30% and in the Ibex SAFE simulator by 13%. --- sdk/core/allocator/main.cc | 2 +- sdk/core/scheduler/main.cc | 33 +++++-- sdk/core/scheduler/thread.h | 64 ++++++++++++-- sdk/core/scheduler/timer.h | 85 +++++++++++++++++-- sdk/core/switcher/entry.S | 6 ++ sdk/include/FreeRTOS-Compat/task.h | 2 +- .../platform/generic-riscv/platform-timer.hh | 39 +++++---- sdk/include/thread.h | 25 +++++- sdk/lib/debug/debug.cc | 7 ++ tests/locks-test.cc | 2 + 10 files changed, 223 insertions(+), 42 deletions(-) diff --git a/sdk/core/allocator/main.cc b/sdk/core/allocator/main.cc index 29a652b1b..7a47e8212 100644 --- a/sdk/core/allocator/main.cc +++ b/sdk/core/allocator/main.cc @@ -291,7 +291,7 @@ namespace // Drop and reacquire the lock while yielding. // Sleep for a single tick. g.unlock(); - Timeout smallSleep{0}; + Timeout smallSleep{1}; thread_sleep(&smallSleep); if (!reacquire_lock(timeout, g, smallSleep.elapsed)) { diff --git a/sdk/core/scheduler/main.cc b/sdk/core/scheduler/main.cc index 88442be36..f563d102b 100644 --- a/sdk/core/scheduler/main.cc +++ b/sdk/core/scheduler/main.cc @@ -270,15 +270,20 @@ namespace sched ExceptionGuard g{[=]() { sched_panic(mcause, mepc, mtval); }}; + bool tick = false; switch (mcause) { // Explicit yield call case MCAUSE_ECALL_MACHINE: - schedNeeded = true; + { + schedNeeded = true; + Thread *currentThread = Thread::current_get(); + tick = currentThread && currentThread->is_ready(); break; + } case MCAUSE_INTR | MCAUSE_MTIME: - Timer::do_interrupt(); schedNeeded = true; + tick = true; break; case MCAUSE_INTR | MCAUSE_MEXTERN: schedNeeded = false; @@ -293,6 +298,7 @@ namespace sched std::tie(schedNeeded, std::ignore, std::ignore) = futex_wake(Capability{&word}.address()); }); + tick = schedNeeded; break; case MCAUSE_THREAD_EXIT: // Make the current thread non-runnable. @@ -305,13 +311,23 @@ namespace sched // We cannot continue exiting this thread, make sure we will // pick a new one. schedNeeded = true; + tick = true; sealedTStack = nullptr; break; default: sched_panic(mcause, mepc, mtval); } + if (tick || !Thread::any_ready()) + { + Timer::expiretimers(); + } auto newContext = schedNeeded ? Thread::schedule(sealedTStack) : sealedTStack; +#if 0 + Debug::log("Thread: {}", + Thread::current_get() ? Thread::current_get()->id_get() : 0); +#endif + Timer::update(); if constexpr (Accounting) { @@ -419,14 +435,17 @@ SystickReturn __cheri_compartment("sched") thread_systemtick_get() } __cheriot_minimum_stack(0x80) int __cheri_compartment("sched") - thread_sleep(Timeout *timeout) + thread_sleep(Timeout *timeout, uint32_t flags) { STACK_CHECK(0x80); if (!check_timeout_pointer(timeout)) { return -EINVAL; } - Thread::current_get()->suspend(timeout, nullptr, true); + ////Debug::log("Thread {} sleeping for {} ticks", + /// Thread::current_get()->id_get(), timeout->remaining); + Thread *current = Thread::current_get(); + current->suspend(timeout, nullptr, true, !(flags & ThreadSleepNoEarlyWake)); return 0; } @@ -468,8 +487,10 @@ __cheriot_minimum_stack(0xa0) int futex_timed_wait(Timeout *timeout, // If we try to block ourself, that's a mistake. if ((owningThread == currentThread) || (owningThread == nullptr)) { - Debug::log("futex_timed_wait: invalid owning thread {}", - owningThread); + Debug::log("futex_timed_wait: thread {} acquiring PI futex with " + "invalid owning thread {}", + currentThread->id_get(), + owningThreadID); return -EINVAL; } Debug::log("Thread {} boosting priority of {} for futex {}", diff --git a/sdk/core/scheduler/thread.h b/sdk/core/scheduler/thread.h index e5b296143..fc1409048 100644 --- a/sdk/core/scheduler/thread.h +++ b/sdk/core/scheduler/thread.h @@ -18,6 +18,8 @@ namespace // thread structures. class MultiWaiterInternal; + uint64_t expiry_time_for_timeout(uint32_t timeout); + template class ThreadImpl final : private utils::NoCopyNoMove { @@ -114,6 +116,11 @@ namespace return schedTStack; } + static bool any_ready() + { + return priorityMap != 0; + } + /** * When yielding inside the scheduler compartment, we almost always want * to re-enable interrupts before ecall. If we don't, then a thread with @@ -152,11 +159,12 @@ namespace */ bool suspend(Timeout *t, ThreadImpl **newSleepQueue, - bool yieldUnconditionally = false) + bool yieldUnconditionally = false, + bool yieldNotSleep = false) { if (t->remaining != 0) { - suspend(t->remaining, newSleepQueue); + suspend(t->remaining, newSleepQueue, yieldNotSleep); } if ((t->remaining != 0) || yieldUnconditionally) { @@ -188,6 +196,7 @@ namespace OriginalPriority(priority), expiryTime(-1), state(ThreadState::Suspended), + isYielding(false), sleepQueue(nullptr), tStackPtr(tstack) { @@ -212,7 +221,7 @@ namespace // We must be suspended. Debug::Assert(state == ThreadState::Suspended, "Waking thread that is in state {}, not suspended", - state); + static_cast(state)); // First, remove self from the timer waiting list. timer_list_remove(&waitingList); if (sleepQueue != nullptr) @@ -233,11 +242,18 @@ namespace schedule = true; } } + // If this is the same priority as the current thread, we may need + // to update the timer. + if (priority >= highestPriority) + { + schedule = true; + } if (reason == WakeReason::Timer || reason == WakeReason::Delete) { multiWaiter = nullptr; } list_insert(&priorityList[priority]); + isYielding = false; return schedule; } @@ -278,11 +294,14 @@ namespace * waiting on a resource, add it to the list of that resource. No * matter what, it has to be added to the timer list. */ - void suspend(uint32_t waitTicks, ThreadImpl **newSleepQueue) + void suspend(uint32_t waitTicks, + ThreadImpl **newSleepQueue, + bool yieldNotSleep = false) { + isYielding = yieldNotSleep; Debug::Assert(state == ThreadState::Ready, "Suspending thread that is in state {}, not ready", - state); + static_cast(state)); list_remove(&priorityList[priority]); state = ThreadState::Suspended; priority_map_remove(); @@ -291,8 +310,7 @@ namespace list_insert(newSleepQueue); sleepQueue = newSleepQueue; } - expiryTime = - (waitTicks == UINT32_MAX ? -1 : ticksSinceBoot + waitTicks); + expiryTime = expiry_time_for_timeout(waitTicks); timer_list_insert(&waitingList); } @@ -407,7 +425,7 @@ namespace Debug::Assert(state == ThreadState::Suspended, "Inserting thread into timer list that is in state " "{}, not suspended", - state); + static_cast(state)); if (head == nullptr) { timerNext = timerPrev = *headPtr = this; @@ -511,6 +529,29 @@ namespace return priority; } + bool is_ready() + { + return state == ThreadState::Ready; + } + + bool is_yielding() + { + return isYielding; + } + + /** + * Returns true if there are other runnable threads with the same + * priority as this thread. + */ + bool has_priority_peers() + { + Debug::Assert(state == ThreadState::Ready, + "Checking for peers on thread that is in state {}, " + "not ready", + static_cast(state)); + return next != this; + } + ~ThreadImpl() { // We have static definition of threads. We only create threads in @@ -616,7 +657,12 @@ namespace uint8_t priority; /// The original priority level for this thread. This never changes. const uint8_t OriginalPriority; - ThreadState state; + ThreadState state : 2; + /** + * If the thread is yielding, it may be scheduled before its timeout + * expires, as long as no other threads are runnable. + */ + bool isYielding : 1; }; using Thread = ThreadImpl; diff --git a/sdk/core/scheduler/timer.h b/sdk/core/scheduler/timer.h index a86120b51..b56b8f0ec 100644 --- a/sdk/core/scheduler/timer.h +++ b/sdk/core/scheduler/timer.h @@ -28,6 +28,9 @@ namespace class Timer final : private TimerCore { + inline static uint64_t lastTickTime = 0; + inline static uint32_t accumulatedTickError = 0; + public: static void interrupt_setup() { @@ -35,27 +38,62 @@ namespace "Cycles per tick can't be represented in 32 bits. " "Double check your platform config"); init(); - setnext(TIMERCYCLES_PER_TICK); } - static void do_interrupt() + using TimerCore::time; + + static void update() { - ++Thread::ticksSinceBoot; + auto *thread = Thread::current_get(); + bool waitingListIsEmpty = ((Thread::waitingList == nullptr) || + (Thread::waitingList->expiryTime == -1)); + bool threadHasNoPeers = + (thread == nullptr) || (!thread->has_priority_peers()); + if (waitingListIsEmpty && threadHasNoPeers) + { + clear(); + } + else + { + uint64_t nextTimer = waitingListIsEmpty + ? time() + TIMERCYCLES_PER_TICK + : Thread::waitingList->expiryTime; + setnext(nextTimer); + } + } - expiretimers(); - setnext(TIMERCYCLES_PER_TICK); + static uint64_t update_tick() + { + uint64_t now = time(); + uint32_t elapsed = now - lastTickTime; + int32_t error = elapsed % TIMERCYCLES_PER_TICK; + if (elapsed < TIMERCYCLES_PER_TICK) + { + error = TIMERCYCLES_PER_TICK - error; + } + accumulatedTickError += error; + int32_t errorDirection = accumulatedTickError < 0 ? -1 : 1; + int32_t absoluteError = accumulatedTickError * errorDirection; + if (absoluteError >= TIMERCYCLES_PER_TICK) + { + Thread::ticksSinceBoot += errorDirection; + accumulatedTickError += TIMERCYCLES_PER_TICK * -errorDirection; + } + lastTickTime = now; + Thread::ticksSinceBoot += elapsed / TIMERCYCLES_PER_TICK; + return now; } - private: static void expiretimers() { + uint64_t now = update_tick(); if (Thread::waitingList == nullptr) { return; } for (Thread *iter = Thread::waitingList;;) { - if (iter->expiryTime <= Thread::ticksSinceBoot) + if (iter->expiryTime <= now) { Thread *iterNext = iter->timerNext; @@ -72,6 +110,39 @@ namespace break; } } + if (!Thread::any_ready()) + { + for (Thread *iter = Thread::waitingList; iter;) + { + if (iter->is_yielding()) + { + Debug::log("Woke thread {} {} cycles early", + iter->id_get(), + int64_t(iter->expiryTime) - now); + Thread *iterNext = iter->timerNext; + iter->ready(Thread::WakeReason::Timer); + iter = iterNext; + if (Thread::waitingList == nullptr || + iter == Thread::waitingList) + { + break; + } + } + else + { + break; + } + } + } } }; + + uint64_t expiry_time_for_timeout(uint32_t timeout) + { + if (timeout == -1) + { + return -1; + } + return Timer::time() + (timeout * TIMERCYCLES_PER_TICK); + } } // namespace diff --git a/sdk/core/switcher/entry.S b/sdk/core/switcher/entry.S index 8836d088f..30f946591 100644 --- a/sdk/core/switcher/entry.S +++ b/sdk/core/switcher/entry.S @@ -970,7 +970,13 @@ __Z13thread_id_getv: // Load the trusted stack pointer into a register that we will clobber in // the next instruction when we load the thread ID. cspecialr ca0, mtdc + //cgettag a1, ca0 + // If this is a null pointer, don't try to dereference it and report that + // we are thread 0. This permits the debug code to work even from things + // that are not real threads. + //beqz a1, .Lend clh a0, TrustedStack_offset_threadID(ca0) +.Lend: cret diff --git a/sdk/include/FreeRTOS-Compat/task.h b/sdk/include/FreeRTOS-Compat/task.h index 5aac58788..2517c0277 100644 --- a/sdk/include/FreeRTOS-Compat/task.h +++ b/sdk/include/FreeRTOS-Compat/task.h @@ -55,7 +55,7 @@ static inline BaseType_t xTaskCheckForTimeOut(TimeOut_t *pxTimeOut, static inline void vTaskDelay(const TickType_t xTicksToDelay) { struct Timeout timeout = {0, xTicksToDelay}; - thread_sleep(&timeout); + thread_sleep(&timeout, 0); } /** diff --git a/sdk/include/platform/generic-riscv/platform-timer.hh b/sdk/include/platform/generic-riscv/platform-timer.hh index 377f32332..91461c0cb 100644 --- a/sdk/include/platform/generic-riscv/platform-timer.hh +++ b/sdk/include/platform/generic-riscv/platform-timer.hh @@ -38,6 +38,21 @@ class StandardClint : private utils::NoCopyNoMove 2 * sizeof(uint32_t)); } + static uint64_t time() + { + /// the low 32 bits + volatile uint32_t *timerHigh = pmtimer + 1; + uint32_t timeLow, timeHigh; + + // Read the current time. Loop until the high 32 bits are stable. + do + { + timeHigh = *timerHigh; + timeLow = *pmtimer; + } while (timeHigh != *timerHigh); + return (uint64_t(timeHigh) << 32) | timeLow; + } + /** * Set the timer up for the next timer interrupt. We need to: * 1. read the current MTIME, @@ -51,28 +66,22 @@ class StandardClint : private utils::NoCopyNoMove * interrupt. Writing to any half is enough to clear the interrupt, * which is also why 2. is important. */ - static void setnext(uint32_t cycles) + static void setnext(uint64_t nextTime) { /// the high 32 bits of the 64-bit MTIME register volatile uint32_t *pmtimercmphigh = pmtimercmp + 1; - /// the low 32 bits - volatile uint32_t *pmtimerhigh = pmtimer + 1; uint32_t curmtimehigh, curmtime, curmtimenew; - // Read the current time. Loop until the high 32 bits are stable. - do - { - curmtimehigh = *pmtimerhigh; - curmtime = *pmtimer; - } while (curmtimehigh != *pmtimerhigh); - - // Add tick cycles to current time. Handle carry bit. - curmtimehigh += __builtin_add_overflow(curmtime, cycles, &curmtimenew); - // Write the new MTIMECMP value, at which the next interrupt fires. *pmtimercmphigh = -1; // Prevent spurious interrupts. - *pmtimercmp = curmtimenew; - *pmtimercmphigh = curmtimehigh; + *pmtimercmp = nextTime; + *pmtimercmphigh = nextTime >> 32; + } + + static void clear() + { + volatile uint32_t *pmtimercmphigh = pmtimercmp + 1; + *pmtimercmphigh = -1; // Prevent spurious interrupts. } private: diff --git a/sdk/include/thread.h b/sdk/include/thread.h index 3c5430a15..5e8d03aa5 100644 --- a/sdk/include/thread.h +++ b/sdk/include/thread.h @@ -23,6 +23,17 @@ typedef struct [[cheri::interrupt_state(disabled)]] SystickReturn __cheri_compartment("sched") thread_systemtick_get(void); +enum ThreadSleepFlags : uint32_t +{ + /** + * Sleep for up to the specified timeout, but wake early if there are no + * other runnable threads. This allows a high-priority thread to yield for + * a fixed number of ticks for lower-priority threads to run, but does not + * prevent it from resuming early. + */ + ThreadSleepNoEarlyWake = 1 << 0, +}; + /** * Sleep for at most the specified timeout (see `timeout.h`). * @@ -34,9 +45,17 @@ typedef struct * but reports the time spent sleeping. This requires a cross-domain call and * return in addition to the overheads of `yield` and so `yield` should be * preferred in contexts where the elapsed time is not required. + * + * The `flags` parameter is a bitwise OR of `ThreadSleepFlags`. + * + * A sleeping thread may be woken early if no other threads are runnable. The + * thread with the earliest timeout will be woken first. If you are using + * `thread_sleep` to elapse real time, pass `ThreadSleepNoEarlyWake` as the + * flags argument to prevent early wakeups. + * */ [[cheri::interrupt_state(disabled)]] int __cheri_compartment("sched") - thread_sleep(struct Timeout *timeout); + thread_sleep(struct Timeout *timeout, uint32_t flags __if_cxx(= 0)); /** * Return the thread ID of the current running thread. @@ -119,7 +138,7 @@ static inline uint64_t thread_millisecond_wait(uint32_t milliseconds) // In simulation builds, just yield once but don't bother trying to do // anything sensible with time. Timeout t = {0, 1}; - thread_sleep(&t); + thread_sleep(&t, 0); return milliseconds; #else static const uint32_t CyclesPerMillisecond = CPU_TIMER_HZ / 1'000; @@ -133,7 +152,7 @@ static inline uint64_t thread_millisecond_wait(uint32_t milliseconds) while ((end > current) && (end - current > MS_PER_TICK)) { Timeout t = {0, ((uint32_t)(end - current)) / CyclesPerTick}; - thread_sleep(&t); + thread_sleep(&t, ThreadSleepNoEarlyWake); current = rdcycle64(); } // Spin for the remaining time. diff --git a/sdk/lib/debug/debug.cc b/sdk/lib/debug/debug.cc index 075616433..2642d9fbf 100644 --- a/sdk/lib/debug/debug.cc +++ b/sdk/lib/debug/debug.cc @@ -2,6 +2,7 @@ // SPDX-License-Identifier: MIT #include +#include using namespace CHERI; @@ -335,7 +336,13 @@ debug_log_message_write(const char *context, DebugPrinter printer; printer.write("\x1b[35m"); printer.write(context); +#if 0 + printer.write(" [Thread "); + printer.write(thread_id_get()); + printer.write("]\033[0m: "); +#else printer.write("\033[0m: "); +#endif printer.format(format, messages, messageCount); printer.write("\n"); } diff --git a/tests/locks-test.cc b/tests/locks-test.cc index ffd8da271..20798c813 100644 --- a/tests/locks-test.cc +++ b/tests/locks-test.cc @@ -59,7 +59,9 @@ namespace "Trying to acquire lock spuriously succeeded"); if constexpr (!std::is_same_v) { +#ifndef SIMULATION TEST(t.elapsed >= 1, "Sleep slept for {} ticks", t.elapsed); +#endif } }