From a11b4bebcb4aacf36ad4ce2712c6696bdff5d143 Mon Sep 17 00:00:00 2001 From: rjl493456442 Date: Tue, 26 Nov 2024 18:33:59 +0800 Subject: [PATCH] Revert "core/state/snapshot: simplify snapshot rebuild (#30772)" (#30810) This reverts commit 23800122b37695be50565f8221858a16ce1763db. The original pull request introduces a bug and some flaky tests are detected because of this flaw. ``` --- FAIL: TestRecoverSnapshotFromWipingCrash (0.27s) blockchain_snapshot_test.go:158: The disk layer is not integrated snapshot is not constructed {"pc":0,"op":88,"gas":"0x7148","gasCost":"0x2","memSize":0,"stack":[],"depth":1,"refund":0,"opName":"PC"} {"pc":1,"op":255,"gas":"0x7146","gasCost":"0x1db0","memSize":0,"stack":["0x0"],"depth":1,"refund":0,"opName":"SELFDESTRUCT"} {"output":"","gasUsed":"0x0"} {"output":"","gasUsed":"0x1db2"} {"pc":0,"op":116,"gas":"0x13498","gasCost":"0x3","memSize":0,"stack":[],"depth":1,"refund":0,"opName":"PUSH21"} ``` Before the original PR, the snapshot would block the function until the disk layer was fully generated under the following conditions: (a) explicitly required by users with `AsyncBuild = false`. (b) the snapshot was being fully rebuilt or *the disk layer generation had resumed*. Unfortunately, with the changes introduced in that PR, the snapshot no longer waits for disk layer generation to complete if the generation is resumed. It brings lots of uncertainty and breaks this tiny debug feature. --- core/state/snapshot/snapshot.go | 48 +++++++++++++++++++++++---------- 1 file changed, 34 insertions(+), 14 deletions(-) diff --git a/core/state/snapshot/snapshot.go b/core/state/snapshot/snapshot.go index 181dc6b746fc..7466f1235198 100644 --- a/core/state/snapshot/snapshot.go +++ b/core/state/snapshot/snapshot.go @@ -206,24 +206,47 @@ func New(config Config, diskdb ethdb.KeyValueStore, triedb *triedb.Database, roo log.Warn("Snapshot maintenance disabled (syncing)") return snap, nil } + // Create the building waiter iff the background generation is allowed + if !config.NoBuild && !config.AsyncBuild { + defer snap.waitBuild() + } if err != nil { log.Warn("Failed to load snapshot", "err", err) - if config.NoBuild { - return nil, err - } - wait := snap.Rebuild(root) - if !config.AsyncBuild { - wait() + if !config.NoBuild { + snap.Rebuild(root) + return snap, nil } - return snap, nil + return nil, err // Bail out the error, don't rebuild automatically. } // Existing snapshot loaded, seed all the layers - for ; head != nil; head = head.Parent() { + for head != nil { snap.layers[head.Root()] = head + head = head.Parent() } return snap, nil } +// waitBuild blocks until the snapshot finishes rebuilding. This method is meant +// to be used by tests to ensure we're testing what we believe we are. +func (t *Tree) waitBuild() { + // Find the rebuild termination channel + var done chan struct{} + + t.lock.RLock() + for _, layer := range t.layers { + if layer, ok := layer.(*diskLayer); ok { + done = layer.genPending + break + } + } + t.lock.RUnlock() + + // Wait until the snapshot is generated + if done != nil { + <-done + } +} + // Disable interrupts any pending snapshot generator, deletes all the snapshot // layers in memory and marks snapshots disabled globally. In order to resume // the snapshot functionality, the caller must invoke Rebuild. @@ -665,9 +688,8 @@ func (t *Tree) Journal(root common.Hash) (common.Hash, error) { // Rebuild wipes all available snapshot data from the persistent database and // discard all caches and diff layers. Afterwards, it starts a new snapshot -// generator with the given root hash. The returned function blocks until -// regeneration is complete. -func (t *Tree) Rebuild(root common.Hash) (wait func()) { +// generator with the given root hash. +func (t *Tree) Rebuild(root common.Hash) { t.lock.Lock() defer t.lock.Unlock() @@ -699,11 +721,9 @@ func (t *Tree) Rebuild(root common.Hash) (wait func()) { // Start generating a new snapshot from scratch on a background thread. The // generator will run a wiper first if there's not one running right now. log.Info("Rebuilding state snapshot") - disk := generateSnapshot(t.diskdb, t.triedb, t.config.CacheSize, root) t.layers = map[common.Hash]snapshot{ - root: disk, + root: generateSnapshot(t.diskdb, t.triedb, t.config.CacheSize, root), } - return func() { <-disk.genPending } } // AccountIterator creates a new account iterator for the specified root hash and