From 417421c5a64f9a3f30f4b130b6cc95d949973835 Mon Sep 17 00:00:00 2001 From: Achille Roussel Date: Tue, 19 Sep 2023 12:05:09 -0700 Subject: [PATCH] optimize GLS Signed-off-by: Achille Roussel --- .gitignore | 19 +++++++++++ getg.go | 4 ++- gls.go | 94 +++++++++++++++++++++++++++++++++++------------------ gls_test.go | 11 +++++++ 4 files changed, 96 insertions(+), 32 deletions(-) create mode 100644 .gitignore diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..d373ff3 --- /dev/null +++ b/.gitignore @@ -0,0 +1,19 @@ +# Binaries for programs and plugins +*.exe +*.exe~ +*.dll +*.so +*.dylib + +# Test binary, built with `go test -c` +*.test +*.wasm + +# Output of the go coverage tool, specifically when used with LiteIDE +*.out + +# Dependency directories (remove the comment below to include it) +# vendor/ + +# Emacs +*~ diff --git a/getg.go b/getg.go index fc92a3b..485b705 100644 --- a/getg.go +++ b/getg.go @@ -1,7 +1,9 @@ package coroutine +import "unsafe" + // getg is like the compiler intrisinc runtime.getg which retrieves the current // goroutine object. // // https://github.com/golang/go/blob/a2647f08f0c4e540540a7ae1b9ba7e668e6fed80/src/runtime/HACKING.md?plain=1#L44-L54 -func getg() uintptr +func getg() unsafe.Pointer diff --git a/gls.go b/gls.go index 2e78b5d..32b6976 100644 --- a/gls.go +++ b/gls.go @@ -1,49 +1,81 @@ package coroutine -import "sync" +import ( + "sync" + "unsafe" +) // goroutine local storage; the map contains one entry for each goroutine that // is started to power a coroutine. // -// TOOD: the global mutex is likely going to become a contention point in highly -// parallel programs, here's how we should fix: -// -// - create a sharded map with 64 buckets, each bucket contains a map -// - use a sync.Mutex in each bucket for synchronization; cheaper than RWMutex -// - mask the value of g to determine in which bucket its GLS is stored -// // An alternative to using a global map could be to analyze the memory layout of // the runtime.g type and determine if there is spare room after the struct to // store the Context pointer: the Go memory allocate uses size classes to park // objects in buckets, there is often spare space after large values like the // runtime.g type since they will be assigned to the size class greater or equal -// to their type size. We only need 4 or 8 bytes of spare space on 32 or 64 bit -// architectures. This approach would remove all potential contention accessing -// and synchronizing on global state, and would also turn the map lookups into -// simple memory loads. -var ( - gmutex sync.RWMutex - gstate map[uintptr]any -) +// to their type size. We only need 8 or 16 bytes of spare space on 32 or 64 bit +// architectures to store the context type and value. This approach would remove +// all potential contention accessing and synchronizing on global state, and +// would also turn the map lookups into simple memory loads. +var gstate glsTable + +const glsTableBuckets = 64 + +type glsTable [glsTableBuckets]glsBucket + +func (t *glsTable) bucket(k unsafe.Pointer) *glsBucket { + h := uintptr(k) + // murmur3 hash finalizer; hashing pointers is necessary to ensure a good + // distribution of keys across buckets, otherwise the alignment and + // collocation done by the memory allocator group all keys in a few + // buckets. + h ^= h >> 33 + h *= 0xff51afd7ed558ccd + h ^= h >> 33 + h *= 0xc4ceb9fe1a85ec53 + h ^= h >> 33 + // bucket selection + h &= glsTableBuckets - 1 + return &t[h] +} + +func (t *glsTable) load(k unsafe.Pointer) any { + return t.bucket(k).load(k) +} + +func (t *glsTable) store(k unsafe.Pointer, v any) { + t.bucket(k).store(k, v) +} + +func (t *glsTable) clear(k unsafe.Pointer) { + t.bucket(k).clear(k) +} -func loadContext(g uintptr) any { - gmutex.RLock() - v := gstate[g] - gmutex.RUnlock() +type glsBucket struct { + values sync.Map +} + +func (b *glsBucket) load(k unsafe.Pointer) any { + v, _ := b.values.Load(k) return v } -func storeContext(g uintptr, c any) { - gmutex.Lock() - if gstate == nil { - gstate = make(map[uintptr]any) - } - gstate[g] = c - gmutex.Unlock() +func (b *glsBucket) store(k unsafe.Pointer, v any) { + b.values.Store(k, v) +} + +func (b *glsBucket) clear(k unsafe.Pointer) { + b.values.Delete(k) +} + +func loadContext(g unsafe.Pointer) any { + return gstate.load(g) +} + +func storeContext(g unsafe.Pointer, c any) { + gstate.store(g, c) } -func clearContext(g uintptr) { - gmutex.Lock() - delete(gstate, g) - gmutex.Unlock() +func clearContext(g unsafe.Pointer) { + gstate.clear(g) } diff --git a/gls_test.go b/gls_test.go index 50916b4..c15a752 100644 --- a/gls_test.go +++ b/gls_test.go @@ -67,4 +67,15 @@ func BenchmarkGLS(b *testing.B) { } }) }) + + b.Run("store load clear", func(b *testing.B) { + b.RunParallel(func(pb *testing.PB) { + g := getg() + for pb.Next() { + storeContext(g, 42) + loadContext(g) + clearContext(g) + } + }) + }) }