From e19779e2d01e1c28b6b41989dc3cae7327733c39 Mon Sep 17 00:00:00 2001 From: Giulio Date: Tue, 24 Dec 2024 00:34:38 +0100 Subject: [PATCH 01/42] save --- cmd/integration/commands/state_domains.go | 162 ++++++++++++++++++++++ 1 file changed, 162 insertions(+) diff --git a/cmd/integration/commands/state_domains.go b/cmd/integration/commands/state_domains.go index 6ca2ecfbea1..79b66171bde 100644 --- a/cmd/integration/commands/state_domains.go +++ b/cmd/integration/commands/state_domains.go @@ -18,12 +18,17 @@ package commands import ( "context" + "encoding/binary" "encoding/hex" "errors" "fmt" + "os" + "path" "path/filepath" + "sort" "strings" + "github.com/erigontech/erigon-lib/seg" state3 "github.com/erigontech/erigon-lib/state" "github.com/spf13/cobra" @@ -33,7 +38,9 @@ import ( libcommon "github.com/erigontech/erigon-lib/common" "github.com/erigontech/erigon-lib/common/datadir" "github.com/erigontech/erigon-lib/common/length" + downloadertype "github.com/erigontech/erigon-lib/downloader/snaptype" "github.com/erigontech/erigon-lib/kv" + "github.com/erigontech/erigon-lib/kv/mdbx" kv2 "github.com/erigontech/erigon-lib/kv/mdbx" "github.com/erigontech/erigon/cmd/utils" "github.com/erigontech/erigon/core" @@ -52,6 +59,9 @@ func init() { withStartTx(readDomains) rootCmd.AddCommand(readDomains) + + withDataDir(purifyDomains) + rootCmd.AddCommand(purifyDomains) } // if trie variant is not hex, we could not have another rootHash with to verify it @@ -120,6 +130,158 @@ var readDomains = &cobra.Command{ }, } +var purifyDomains = &cobra.Command{ + Use: "purify_domains", + Short: `Regenerate kv files without repeating keys.`, + Example: "go run ./cmd/integration purify_domains --datadir=... --verbosity=3", + Args: cobra.ArbitraryArgs, + Run: func(cmd *cobra.Command, args []string) { + dirs := datadir.New(datadirCli) + // Iterate over all the files in dirs.SnapDomain and print them + domainDir := dirs.SnapDomain + + // make a temporary dir + tmpDir, err := os.MkdirTemp("purifyTemp", "") // make a temporary dir to store the keys + if err != nil { + fmt.Println("Error creating temporary directory: ", err) + return + } + // make a temporary DB to store the keys + + purifyDB := mdbx.MustOpen(tmpDir) + defer purifyDB.Close() + + purificationDomains := []string{"account", "storage", "code", "commitment"} + for _, domain := range purificationDomains { + if err := makePurifiableIndexDB(purifyDB, dirs, log.New(), domain); err != nil { + fmt.Println("Error making purifiable index DB: ", err) + return + } + } + // 2. Walk through the domainDir and process each file + err = filepath.Walk(domainDir, func(path string, info os.FileInfo, err error) error { + if err != nil { + return err + } + // Skip directories + if info.IsDir() { + return nil + } + // Here you can decide if you only want to process certain file extensions + // e.g., .kv files + if filepath.Ext(path) != ".kv" { + // Skip non-kv files if that's your domain’s format + return nil + } + + fmt.Printf("Processing file: %s\n", path) + + // // Purify the file (remove duplicate keys) + // if err := purifyKVFile(path); err != nil { + // return fmt.Errorf("failed to purify file %s: %w", path, err) + // } + return nil + }) + if err != nil { + fmt.Printf("error walking the path %q: %v\n", domainDir, err) + } + + }, +} + +func makePurifiableIndexDB(db kv.RwDB, dirs datadir.Dirs, logger log.Logger, domain string) error { + var tbl string + switch domain { + case "account": + tbl = kv.TblAccountVals + case "storage": + tbl = kv.TblStorageVals + case "code": + tbl = kv.TblCodeVals + case "commitment": + tbl = kv.TblCommitmentVals + default: + return fmt.Errorf("invalid domain %s", domain) + } + // Iterate over all the files in dirs.SnapDomain and print them + filesNamesToIndex := []string{} + if err := filepath.Walk(dirs.SnapCaplin, func(path string, info os.FileInfo, err error) error { + if err != nil { + return err + } + // Skip directories + if info.IsDir() { + return nil + } + if !strings.Contains(info.Name(), domain) { + return nil + } + // Here you can decide if you only want to process certain file extensions + // e.g., .kv files + if filepath.Ext(path) != ".kv" { + // Skip non-kv files if that's your domain’s format + return nil + } + + fmt.Printf("Add file to indexing of %s: %s\n", domain, path) + + filesNamesToIndex = append(filesNamesToIndex, info.Name()) + return nil + }); err != nil { + return fmt.Errorf("failed to walk through the domainDir %s: %w", domain, err) + } + // sort the files by name + sort.Slice(filesNamesToIndex, func(i, j int) bool { + res, ok, _ := downloadertype.ParseFileName(dirs.SnapDomain, filesNamesToIndex[i]) + if !ok { + panic("invalid file name") + } + res2, ok, _ := downloadertype.ParseFileName(dirs.SnapDomain, filesNamesToIndex[j]) + if !ok { + panic("invalid file name") + } + return res.From < res2.From + }) + tx, err := db.BeginRw(context.Background()) + if err != nil { + return fmt.Errorf("failed to start transaction: %w", err) + } + + // now start the file indexing + for i, fileName := range filesNamesToIndex { + wordsFile, err := seg.OpenRawWordsFile(path.Join(dirs.SnapDomain, fileName)) + if err != nil { + return fmt.Errorf("failed to open file %s: %w", fileName, err) + } + defer wordsFile.Close() + isKey := true + dat := make([]byte, 4) + count := 0 + if err := wordsFile.ForEach(func(v []byte, compressed bool) error { + if !isKey { + isKey = !isKey + return nil + } + binary.BigEndian.PutUint32(dat, uint32(i)) + if err := tx.Put(tbl, v, dat); err != nil { + return fmt.Errorf("failed to put key %x: %w", v, err) + } + isKey = !isKey + if count%100000 == 0 { + fmt.Printf("Indexed %d keys in file %s\n", count, fileName) + } + return nil + }); err != nil { + return fmt.Errorf("failed to iterate over file %s: %w", fileName, err) + } + fmt.Printf("Indexed %d keys in file %s\n", count, fileName) + } + if err := tx.Commit(); err != nil { + return fmt.Errorf("failed to commit transaction: %w", err) + } + return nil +} + func requestDomains(chainDb, stateDb kv.RwDB, ctx context.Context, readDomain string, addrs [][]byte, logger log.Logger) error { sn, bsn, agg, _, _, _ := allSnapshots(ctx, chainDb, logger) defer sn.Close() From 49145688e90eb22f766c828505d1ed0c721e14e1 Mon Sep 17 00:00:00 2001 From: Giulio Date: Tue, 24 Dec 2024 00:40:24 +0100 Subject: [PATCH 02/42] save --- cmd/integration/commands/state_domains.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cmd/integration/commands/state_domains.go b/cmd/integration/commands/state_domains.go index 79b66171bde..8e5d9d071f6 100644 --- a/cmd/integration/commands/state_domains.go +++ b/cmd/integration/commands/state_domains.go @@ -205,7 +205,7 @@ func makePurifiableIndexDB(db kv.RwDB, dirs datadir.Dirs, logger log.Logger, dom } // Iterate over all the files in dirs.SnapDomain and print them filesNamesToIndex := []string{} - if err := filepath.Walk(dirs.SnapCaplin, func(path string, info os.FileInfo, err error) error { + if err := filepath.Walk(dirs.SnapDomain, func(path string, info os.FileInfo, err error) error { if err != nil { return err } From a1c1dab3adc119df1c52a5ae92debf2dd0eafd50 Mon Sep 17 00:00:00 2001 From: Giulio Date: Tue, 24 Dec 2024 00:40:43 +0100 Subject: [PATCH 03/42] save --- cmd/integration/commands/state_domains.go | 1 + 1 file changed, 1 insertion(+) diff --git a/cmd/integration/commands/state_domains.go b/cmd/integration/commands/state_domains.go index 8e5d9d071f6..d55ae594aed 100644 --- a/cmd/integration/commands/state_domains.go +++ b/cmd/integration/commands/state_domains.go @@ -257,6 +257,7 @@ func makePurifiableIndexDB(db kv.RwDB, dirs datadir.Dirs, logger log.Logger, dom isKey := true dat := make([]byte, 4) count := 0 + fmt.Printf("Indexing file %s\n", fileName) if err := wordsFile.ForEach(func(v []byte, compressed bool) error { if !isKey { isKey = !isKey From 90e0148ca490ed62d5d2565d3a041b3b6aaa064f Mon Sep 17 00:00:00 2001 From: Giulio Date: Tue, 24 Dec 2024 00:43:03 +0100 Subject: [PATCH 04/42] save --- cmd/integration/commands/state_domains.go | 1 + 1 file changed, 1 insertion(+) diff --git a/cmd/integration/commands/state_domains.go b/cmd/integration/commands/state_domains.go index d55ae594aed..151b67f389a 100644 --- a/cmd/integration/commands/state_domains.go +++ b/cmd/integration/commands/state_domains.go @@ -268,6 +268,7 @@ func makePurifiableIndexDB(db kv.RwDB, dirs datadir.Dirs, logger log.Logger, dom return fmt.Errorf("failed to put key %x: %w", v, err) } isKey = !isKey + count++ if count%100000 == 0 { fmt.Printf("Indexed %d keys in file %s\n", count, fileName) } From 372dbce8c9c625d8ce5deb45bd6d3d263f9f098f Mon Sep 17 00:00:00 2001 From: Giulio Date: Tue, 24 Dec 2024 00:43:43 +0100 Subject: [PATCH 05/42] save --- cmd/integration/commands/state_domains.go | 1 + 1 file changed, 1 insertion(+) diff --git a/cmd/integration/commands/state_domains.go b/cmd/integration/commands/state_domains.go index 151b67f389a..f6e04271527 100644 --- a/cmd/integration/commands/state_domains.go +++ b/cmd/integration/commands/state_domains.go @@ -269,6 +269,7 @@ func makePurifiableIndexDB(db kv.RwDB, dirs datadir.Dirs, logger log.Logger, dom } isKey = !isKey count++ + fmt.Println("count: ", count, "keyLength: ", len(v)) if count%100000 == 0 { fmt.Printf("Indexed %d keys in file %s\n", count, fileName) } From 8ca86b7bb22600f9590479ec64d4c7f6d778a35c Mon Sep 17 00:00:00 2001 From: Giulio Date: Tue, 24 Dec 2024 00:44:49 +0100 Subject: [PATCH 06/42] save --- cmd/integration/commands/state_domains.go | 3 +++ 1 file changed, 3 insertions(+) diff --git a/cmd/integration/commands/state_domains.go b/cmd/integration/commands/state_domains.go index f6e04271527..3295be8413f 100644 --- a/cmd/integration/commands/state_domains.go +++ b/cmd/integration/commands/state_domains.go @@ -259,6 +259,9 @@ func makePurifiableIndexDB(db kv.RwDB, dirs datadir.Dirs, logger log.Logger, dom count := 0 fmt.Printf("Indexing file %s\n", fileName) if err := wordsFile.ForEach(func(v []byte, compressed bool) error { + if len(v) == 0 { + return nil + } if !isKey { isKey = !isKey return nil From ca55a64b501537e6d4333eba3801443b66e7b19a Mon Sep 17 00:00:00 2001 From: Giulio Date: Tue, 24 Dec 2024 00:57:45 +0100 Subject: [PATCH 07/42] save --- cmd/integration/commands/state_domains.go | 29 +++++++++++------------ 1 file changed, 14 insertions(+), 15 deletions(-) diff --git a/cmd/integration/commands/state_domains.go b/cmd/integration/commands/state_domains.go index 3295be8413f..23c4f2b6fd4 100644 --- a/cmd/integration/commands/state_domains.go +++ b/cmd/integration/commands/state_domains.go @@ -249,36 +249,35 @@ func makePurifiableIndexDB(db kv.RwDB, dirs datadir.Dirs, logger log.Logger, dom // now start the file indexing for i, fileName := range filesNamesToIndex { - wordsFile, err := seg.OpenRawWordsFile(path.Join(dirs.SnapDomain, fileName)) - if err != nil { - return fmt.Errorf("failed to open file %s: %w", fileName, err) - } - defer wordsFile.Close() isKey := true dat := make([]byte, 4) count := 0 + + dec, err := seg.NewDecompressor(path.Join(dirs.SnapDomain, fileName)) + if err != nil { + return fmt.Errorf("failed to create decompressor: %w", err) + } + defer dec.Close() + getter := dec.MakeGetter() fmt.Printf("Indexing file %s\n", fileName) - if err := wordsFile.ForEach(func(v []byte, compressed bool) error { - if len(v) == 0 { - return nil - } + var buf []byte + for getter.HasNext() { if !isKey { isKey = !isKey return nil } + buf, _ = getter.Next(buf) binary.BigEndian.PutUint32(dat, uint32(i)) - if err := tx.Put(tbl, v, dat); err != nil { - return fmt.Errorf("failed to put key %x: %w", v, err) - } + // if err := tx.Put(tbl, v, dat); err != nil { + // return fmt.Errorf("failed to put key %x: %w", v, err) + // } isKey = !isKey count++ - fmt.Println("count: ", count, "keyLength: ", len(v)) + fmt.Println("count: ", count, "keyLength: ", len(buf)) if count%100000 == 0 { fmt.Printf("Indexed %d keys in file %s\n", count, fileName) } return nil - }); err != nil { - return fmt.Errorf("failed to iterate over file %s: %w", fileName, err) } fmt.Printf("Indexed %d keys in file %s\n", count, fileName) } From 831be1a48a72667f840c35737ad38511b22d11bb Mon Sep 17 00:00:00 2001 From: Giulio Date: Tue, 24 Dec 2024 00:58:15 +0100 Subject: [PATCH 08/42] save --- cmd/integration/commands/state_domains.go | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/cmd/integration/commands/state_domains.go b/cmd/integration/commands/state_domains.go index 23c4f2b6fd4..c19f8753b23 100644 --- a/cmd/integration/commands/state_domains.go +++ b/cmd/integration/commands/state_domains.go @@ -268,9 +268,10 @@ func makePurifiableIndexDB(db kv.RwDB, dirs datadir.Dirs, logger log.Logger, dom } buf, _ = getter.Next(buf) binary.BigEndian.PutUint32(dat, uint32(i)) - // if err := tx.Put(tbl, v, dat); err != nil { - // return fmt.Errorf("failed to put key %x: %w", v, err) - // } + if err := tx.Put(tbl, v, dat); err != nil { + return fmt.Errorf("failed to put key %x: %w", v, err) + } + isKey = !isKey count++ fmt.Println("count: ", count, "keyLength: ", len(buf)) From a800e63dac4572e32b0d3455b2f179830cf96851 Mon Sep 17 00:00:00 2001 From: Giulio Date: Tue, 24 Dec 2024 00:59:38 +0100 Subject: [PATCH 09/42] save --- cmd/integration/commands/state_domains.go | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/cmd/integration/commands/state_domains.go b/cmd/integration/commands/state_domains.go index c19f8753b23..e1e26fdae8e 100644 --- a/cmd/integration/commands/state_domains.go +++ b/cmd/integration/commands/state_domains.go @@ -266,9 +266,10 @@ func makePurifiableIndexDB(db kv.RwDB, dirs datadir.Dirs, logger log.Logger, dom isKey = !isKey return nil } + buf = buf[:0] buf, _ = getter.Next(buf) binary.BigEndian.PutUint32(dat, uint32(i)) - if err := tx.Put(tbl, v, dat); err != nil { + if err := tx.Put(tbl, buf, dat); err != nil { return fmt.Errorf("failed to put key %x: %w", v, err) } From b367ffa94dc1c773ad60f6a7326d7cec686d2531 Mon Sep 17 00:00:00 2001 From: Giulio Date: Tue, 24 Dec 2024 01:00:01 +0100 Subject: [PATCH 10/42] save --- cmd/integration/commands/state_domains.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cmd/integration/commands/state_domains.go b/cmd/integration/commands/state_domains.go index e1e26fdae8e..288d94fc8c5 100644 --- a/cmd/integration/commands/state_domains.go +++ b/cmd/integration/commands/state_domains.go @@ -270,7 +270,7 @@ func makePurifiableIndexDB(db kv.RwDB, dirs datadir.Dirs, logger log.Logger, dom buf, _ = getter.Next(buf) binary.BigEndian.PutUint32(dat, uint32(i)) if err := tx.Put(tbl, buf, dat); err != nil { - return fmt.Errorf("failed to put key %x: %w", v, err) + return fmt.Errorf("failed to put key %x: %w", buf, err) } isKey = !isKey From d91b593bfbff3f2df3cbb18cafbdd3fa391d3f92 Mon Sep 17 00:00:00 2001 From: Giulio Date: Tue, 24 Dec 2024 01:01:14 +0100 Subject: [PATCH 11/42] save --- cmd/integration/commands/state_domains.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cmd/integration/commands/state_domains.go b/cmd/integration/commands/state_domains.go index 288d94fc8c5..a5dc9470e19 100644 --- a/cmd/integration/commands/state_domains.go +++ b/cmd/integration/commands/state_domains.go @@ -263,7 +263,7 @@ func makePurifiableIndexDB(db kv.RwDB, dirs datadir.Dirs, logger log.Logger, dom var buf []byte for getter.HasNext() { if !isKey { - isKey = !isKey + isKey = true return nil } buf = buf[:0] @@ -273,7 +273,7 @@ func makePurifiableIndexDB(db kv.RwDB, dirs datadir.Dirs, logger log.Logger, dom return fmt.Errorf("failed to put key %x: %w", buf, err) } - isKey = !isKey + isKey = false count++ fmt.Println("count: ", count, "keyLength: ", len(buf)) if count%100000 == 0 { From a8e8a70516aa37e3ac28688ad188451c6f6d6204 Mon Sep 17 00:00:00 2001 From: Giulio Date: Tue, 24 Dec 2024 01:02:04 +0100 Subject: [PATCH 12/42] save --- cmd/integration/commands/state_domains.go | 1 - 1 file changed, 1 deletion(-) diff --git a/cmd/integration/commands/state_domains.go b/cmd/integration/commands/state_domains.go index a5dc9470e19..866f0648108 100644 --- a/cmd/integration/commands/state_domains.go +++ b/cmd/integration/commands/state_domains.go @@ -279,7 +279,6 @@ func makePurifiableIndexDB(db kv.RwDB, dirs datadir.Dirs, logger log.Logger, dom if count%100000 == 0 { fmt.Printf("Indexed %d keys in file %s\n", count, fileName) } - return nil } fmt.Printf("Indexed %d keys in file %s\n", count, fileName) } From da0030f41b1d157f29d41f039dc29605da4df9e3 Mon Sep 17 00:00:00 2001 From: Giulio Date: Tue, 24 Dec 2024 01:02:51 +0100 Subject: [PATCH 13/42] save --- cmd/integration/commands/state_domains.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cmd/integration/commands/state_domains.go b/cmd/integration/commands/state_domains.go index 866f0648108..bf16bf124f9 100644 --- a/cmd/integration/commands/state_domains.go +++ b/cmd/integration/commands/state_domains.go @@ -246,6 +246,7 @@ func makePurifiableIndexDB(db kv.RwDB, dirs datadir.Dirs, logger log.Logger, dom if err != nil { return fmt.Errorf("failed to start transaction: %w", err) } + defer tx.Rollback() // now start the file indexing for i, fileName := range filesNamesToIndex { @@ -264,7 +265,6 @@ func makePurifiableIndexDB(db kv.RwDB, dirs datadir.Dirs, logger log.Logger, dom for getter.HasNext() { if !isKey { isKey = true - return nil } buf = buf[:0] buf, _ = getter.Next(buf) From 89acbeb62f77135e8da4471580ec5b42d3d153a3 Mon Sep 17 00:00:00 2001 From: Giulio Date: Tue, 24 Dec 2024 01:04:47 +0100 Subject: [PATCH 14/42] save --- cmd/integration/commands/state_domains.go | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/cmd/integration/commands/state_domains.go b/cmd/integration/commands/state_domains.go index bf16bf124f9..37f3be6c44c 100644 --- a/cmd/integration/commands/state_domains.go +++ b/cmd/integration/commands/state_domains.go @@ -263,11 +263,12 @@ func makePurifiableIndexDB(db kv.RwDB, dirs datadir.Dirs, logger log.Logger, dom fmt.Printf("Indexing file %s\n", fileName) var buf []byte for getter.HasNext() { + buf = buf[:0] + buf, _ = getter.Next(buf) if !isKey { isKey = true + continue } - buf = buf[:0] - buf, _ = getter.Next(buf) binary.BigEndian.PutUint32(dat, uint32(i)) if err := tx.Put(tbl, buf, dat); err != nil { return fmt.Errorf("failed to put key %x: %w", buf, err) From 0193f3f9d6f045a0c05fc4bb3a9ba9028c7bb788 Mon Sep 17 00:00:00 2001 From: Giulio Date: Tue, 24 Dec 2024 01:05:23 +0100 Subject: [PATCH 15/42] save --- cmd/integration/commands/state_domains.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cmd/integration/commands/state_domains.go b/cmd/integration/commands/state_domains.go index 37f3be6c44c..aaaee8b2156 100644 --- a/cmd/integration/commands/state_domains.go +++ b/cmd/integration/commands/state_domains.go @@ -276,7 +276,7 @@ func makePurifiableIndexDB(db kv.RwDB, dirs datadir.Dirs, logger log.Logger, dom isKey = false count++ - fmt.Println("count: ", count, "keyLength: ", len(buf)) + //fmt.Println("count: ", count, "keyLength: ", len(buf)) if count%100000 == 0 { fmt.Printf("Indexed %d keys in file %s\n", count, fileName) } From 6d32e59a28311f5bd879c02a7ce5de4267837965 Mon Sep 17 00:00:00 2001 From: Giulio Date: Tue, 24 Dec 2024 10:55:22 +0100 Subject: [PATCH 16/42] save --- cmd/integration/commands/state_domains.go | 3 +++ 1 file changed, 3 insertions(+) diff --git a/cmd/integration/commands/state_domains.go b/cmd/integration/commands/state_domains.go index aaaee8b2156..7a65ab3125b 100644 --- a/cmd/integration/commands/state_domains.go +++ b/cmd/integration/commands/state_domains.go @@ -250,6 +250,9 @@ func makePurifiableIndexDB(db kv.RwDB, dirs datadir.Dirs, logger log.Logger, dom // now start the file indexing for i, fileName := range filesNamesToIndex { + if i == 0 { + continue + } isKey := true dat := make([]byte, 4) count := 0 From bf904dded5535f3e6d8a8d45d8727b73f343b7e1 Mon Sep 17 00:00:00 2001 From: Giulio Date: Tue, 24 Dec 2024 11:16:46 +0100 Subject: [PATCH 17/42] save --- cmd/integration/commands/state_domains.go | 170 ++++++++++++++++++---- 1 file changed, 145 insertions(+), 25 deletions(-) diff --git a/cmd/integration/commands/state_domains.go b/cmd/integration/commands/state_domains.go index 7a65ab3125b..38c829c83f6 100644 --- a/cmd/integration/commands/state_domains.go +++ b/cmd/integration/commands/state_domains.go @@ -25,6 +25,7 @@ import ( "os" "path" "path/filepath" + "runtime" "sort" "strings" @@ -51,6 +52,8 @@ import ( "github.com/erigontech/erigon/turbo/debug" ) +var purifyDir string + func init() { withDataDir(readDomains) withChain(readDomains) @@ -61,6 +64,7 @@ func init() { rootCmd.AddCommand(readDomains) withDataDir(purifyDomains) + purifyDomains.Flags().StringVar(&purifyDir, "purifiedDomain", "purified-output", "") rootCmd.AddCommand(purifyDomains) } @@ -158,34 +162,15 @@ var purifyDomains = &cobra.Command{ return } } - // 2. Walk through the domainDir and process each file - err = filepath.Walk(domainDir, func(path string, info os.FileInfo, err error) error { - if err != nil { - return err - } - // Skip directories - if info.IsDir() { - return nil - } - // Here you can decide if you only want to process certain file extensions - // e.g., .kv files - if filepath.Ext(path) != ".kv" { - // Skip non-kv files if that's your domain’s format - return nil + for _, domain := range purificationDomains { + if err := makePurifiedDomainsIndexDB(purifyDB, dirs, log.New(), domain); err != nil { + fmt.Println("Error making purifiable index DB: ", err) + return } - - fmt.Printf("Processing file: %s\n", path) - - // // Purify the file (remove duplicate keys) - // if err := purifyKVFile(path); err != nil { - // return fmt.Errorf("failed to purify file %s: %w", path, err) - // } - return nil - }) + } if err != nil { fmt.Printf("error walking the path %q: %v\n", domainDir, err) } - }, } @@ -251,7 +236,7 @@ func makePurifiableIndexDB(db kv.RwDB, dirs datadir.Dirs, logger log.Logger, dom // now start the file indexing for i, fileName := range filesNamesToIndex { if i == 0 { - continue + continue // we can skip first layer as all the keys are already mapped to 0. } isKey := true dat := make([]byte, 4) @@ -292,6 +277,141 @@ func makePurifiableIndexDB(db kv.RwDB, dirs datadir.Dirs, logger log.Logger, dom return nil } +func makePurifiedDomainsIndexDB(db kv.RwDB, dirs datadir.Dirs, logger log.Logger, domain string) error { + var tbl string + switch domain { + case "account": + tbl = kv.TblAccountVals + case "storage": + tbl = kv.TblStorageVals + case "code": + tbl = kv.TblCodeVals + case "commitment": + tbl = kv.TblCommitmentVals + default: + return fmt.Errorf("invalid domain %s", domain) + } + // Iterate over all the files in dirs.SnapDomain and print them + filesNamesToPurify := []string{} + if err := filepath.Walk(dirs.SnapDomain, func(path string, info os.FileInfo, err error) error { + if err != nil { + return err + } + // Skip directories + if info.IsDir() { + return nil + } + if !strings.Contains(info.Name(), domain) { + return nil + } + // Here you can decide if you only want to process certain file extensions + // e.g., .kv files + if filepath.Ext(path) != ".kv" { + // Skip non-kv files if that's your domain’s format + return nil + } + + fmt.Printf("Add file to purification of %s: %s\n", domain, path) + + filesNamesToPurify = append(filesNamesToPurify, info.Name()) + return nil + }); err != nil { + return fmt.Errorf("failed to walk through the domainDir %s: %w", domain, err) + } + // sort the files by name + sort.Slice(filesNamesToPurify, func(i, j int) bool { + res, ok, _ := downloadertype.ParseFileName(dirs.SnapDomain, filesNamesToPurify[i]) + if !ok { + panic("invalid file name") + } + res2, ok, _ := downloadertype.ParseFileName(dirs.SnapDomain, filesNamesToPurify[j]) + if !ok { + panic("invalid file name") + } + return res.From < res2.From + }) + + tx, err := db.BeginRo(context.Background()) + if err != nil { + return fmt.Errorf("failed to start transaction: %w", err) + } + defer tx.Rollback() + os.Mkdir(purifyDir, 0755) + compressCfg := seg.DefaultCfg + compressCfg.Workers = runtime.NumCPU() + // now start the file indexing + for currentLayer, fileName := range filesNamesToPurify { + if currentLayer == 0 { + continue // we can skip first layer as all the keys are already mapped to 0. + } + count := 0 + skipped := 0 + + dec, err := seg.NewDecompressor(path.Join(dirs.SnapDomain, fileName)) + if err != nil { + return fmt.Errorf("failed to create decompressor: %w", err) + } + defer dec.Close() + getter := dec.MakeGetter() + + valuesComp, err := seg.NewCompressor(context.Background(), "Purification", path.Join(purifyDir, fileName), dirs.Tmp, compressCfg, log.LvlTrace, log.New()) + if err != nil { + return fmt.Errorf("create %s values compressor: %w", path.Join(purifyDir, fileName), err) + } + + // Don't use `d.compress` config in collate. Because collat+build must be very-very fast (to keep db small). + // Compress files only in `merge` which ok to be slow. + comp := seg.NewWriter(valuesComp, seg.CompressKeys|seg.CompressVals) + defer comp.Close() + + fmt.Printf("Indexing file %s\n", fileName) + var ( + bufKey []byte + bufVal []byte + ) + + var layer uint32 + for getter.HasNext() { + // get the key and value for the current entry + bufKey = bufKey[:0] + bufKey, _ = getter.Next(bufKey) + bufVal = bufVal[:0] + bufVal, _ = getter.Next(bufVal) + + layerBytes, err := tx.GetOne(tbl, bufKey) + if err != nil { + return fmt.Errorf("failed to get key %x: %w", bufKey, err) + } + // if the key is not found, then the layer is 0 + layer = 0 + if len(layerBytes) == 4 { + layer = binary.BigEndian.Uint32(layerBytes) + } + if layer != uint32(currentLayer) { + skipped++ + continue + } + if err := comp.AddWord(bufKey); err != nil { + return fmt.Errorf("failed to add key %x: %w", bufKey, err) + } + if err := comp.AddWord(bufVal); err != nil { + return fmt.Errorf("failed to add val %x: %w", bufVal, err) + } + count++ + if count%100000 == 0 { + fmt.Printf("Indexed %d keys, skipped %d, in file %s\n", count, skipped, fileName) + } + } + fmt.Printf("Loaded %d keys in file %s. now compressing...\n", count, fileName) + if err := comp.Compress(); err != nil { + return fmt.Errorf("failed to compress: %w", err) + } + fmt.Printf("Compressed %d keys in file %s\n", count, fileName) + comp.Close() + } + return nil +} + func requestDomains(chainDb, stateDb kv.RwDB, ctx context.Context, readDomain string, addrs [][]byte, logger log.Logger) error { sn, bsn, agg, _, _, _ := allSnapshots(ctx, chainDb, logger) defer sn.Close() From 6df9c045979954192ddb9042de736565f7a3038f Mon Sep 17 00:00:00 2001 From: Giulio Date: Tue, 24 Dec 2024 11:17:20 +0100 Subject: [PATCH 18/42] save --- cmd/integration/commands/state_domains.go | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/cmd/integration/commands/state_domains.go b/cmd/integration/commands/state_domains.go index 38c829c83f6..37068075aed 100644 --- a/cmd/integration/commands/state_domains.go +++ b/cmd/integration/commands/state_domains.go @@ -155,7 +155,8 @@ var purifyDomains = &cobra.Command{ purifyDB := mdbx.MustOpen(tmpDir) defer purifyDB.Close() - purificationDomains := []string{"account", "storage", "code", "commitment"} + // purificationDomains := []string{"account", "storage", "code", "commitment"} + purificationDomains := []string{"account"} for _, domain := range purificationDomains { if err := makePurifiableIndexDB(purifyDB, dirs, log.New(), domain); err != nil { fmt.Println("Error making purifiable index DB: ", err) From 46246dcd526f98bf9fd1cd8897105f0b6c1687b3 Mon Sep 17 00:00:00 2001 From: Giulio Date: Tue, 24 Dec 2024 11:21:16 +0100 Subject: [PATCH 19/42] save --- cmd/integration/commands/state_domains.go | 3 --- 1 file changed, 3 deletions(-) diff --git a/cmd/integration/commands/state_domains.go b/cmd/integration/commands/state_domains.go index 37068075aed..487c3954601 100644 --- a/cmd/integration/commands/state_domains.go +++ b/cmd/integration/commands/state_domains.go @@ -342,9 +342,6 @@ func makePurifiedDomainsIndexDB(db kv.RwDB, dirs datadir.Dirs, logger log.Logger compressCfg.Workers = runtime.NumCPU() // now start the file indexing for currentLayer, fileName := range filesNamesToPurify { - if currentLayer == 0 { - continue // we can skip first layer as all the keys are already mapped to 0. - } count := 0 skipped := 0 From 083afb24217700fbe53f83a82377414091ddb319 Mon Sep 17 00:00:00 2001 From: Giulio Date: Tue, 24 Dec 2024 13:12:12 +0100 Subject: [PATCH 20/42] save --- cmd/integration/commands/state_domains.go | 17 ++++++----------- 1 file changed, 6 insertions(+), 11 deletions(-) diff --git a/cmd/integration/commands/state_domains.go b/cmd/integration/commands/state_domains.go index 487c3954601..07c289b4889 100644 --- a/cmd/integration/commands/state_domains.go +++ b/cmd/integration/commands/state_domains.go @@ -239,8 +239,8 @@ func makePurifiableIndexDB(db kv.RwDB, dirs datadir.Dirs, logger log.Logger, dom if i == 0 { continue // we can skip first layer as all the keys are already mapped to 0. } - isKey := true - dat := make([]byte, 4) + layerBytes := make([]byte, 4) + binary.BigEndian.PutUint32(layerBytes, uint32(i)) count := 0 dec, err := seg.NewDecompressor(path.Join(dirs.SnapDomain, fileName)) @@ -254,21 +254,18 @@ func makePurifiableIndexDB(db kv.RwDB, dirs datadir.Dirs, logger log.Logger, dom for getter.HasNext() { buf = buf[:0] buf, _ = getter.Next(buf) - if !isKey { - isKey = true - continue - } - binary.BigEndian.PutUint32(dat, uint32(i)) - if err := tx.Put(tbl, buf, dat); err != nil { + + if err := tx.Put(tbl, buf, layerBytes); err != nil { return fmt.Errorf("failed to put key %x: %w", buf, err) } - isKey = false count++ //fmt.Println("count: ", count, "keyLength: ", len(buf)) if count%100000 == 0 { fmt.Printf("Indexed %d keys in file %s\n", count, fileName) } + // skip values + getter.Skip() } fmt.Printf("Indexed %d keys in file %s\n", count, fileName) } @@ -357,8 +354,6 @@ func makePurifiedDomainsIndexDB(db kv.RwDB, dirs datadir.Dirs, logger log.Logger return fmt.Errorf("create %s values compressor: %w", path.Join(purifyDir, fileName), err) } - // Don't use `d.compress` config in collate. Because collat+build must be very-very fast (to keep db small). - // Compress files only in `merge` which ok to be slow. comp := seg.NewWriter(valuesComp, seg.CompressKeys|seg.CompressVals) defer comp.Close() From f011709573b0d613b81e89f2cb89a98b8bc12aaf Mon Sep 17 00:00:00 2001 From: Giulio Date: Tue, 24 Dec 2024 13:14:49 +0100 Subject: [PATCH 21/42] save --- cmd/integration/commands/state_domains.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cmd/integration/commands/state_domains.go b/cmd/integration/commands/state_domains.go index 07c289b4889..1b0242edd8a 100644 --- a/cmd/integration/commands/state_domains.go +++ b/cmd/integration/commands/state_domains.go @@ -179,7 +179,7 @@ func makePurifiableIndexDB(db kv.RwDB, dirs datadir.Dirs, logger log.Logger, dom var tbl string switch domain { case "account": - tbl = kv.TblAccountVals + tbl = kv.MaxTxNum case "storage": tbl = kv.TblStorageVals case "code": @@ -279,7 +279,7 @@ func makePurifiedDomainsIndexDB(db kv.RwDB, dirs datadir.Dirs, logger log.Logger var tbl string switch domain { case "account": - tbl = kv.TblAccountVals + tbl = kv.MaxTxNum case "storage": tbl = kv.TblStorageVals case "code": From 835e1a39c17eccc9b8cdd09fccaf3a03489ca057 Mon Sep 17 00:00:00 2001 From: Giulio Date: Tue, 24 Dec 2024 13:15:53 +0100 Subject: [PATCH 22/42] save --- cmd/integration/commands/state_domains.go | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/cmd/integration/commands/state_domains.go b/cmd/integration/commands/state_domains.go index 1b0242edd8a..b3ecf6bee01 100644 --- a/cmd/integration/commands/state_domains.go +++ b/cmd/integration/commands/state_domains.go @@ -181,11 +181,11 @@ func makePurifiableIndexDB(db kv.RwDB, dirs datadir.Dirs, logger log.Logger, dom case "account": tbl = kv.MaxTxNum case "storage": - tbl = kv.TblStorageVals + tbl = kv.HeaderNumber case "code": - tbl = kv.TblCodeVals + tbl = kv.HeaderCanonical case "commitment": - tbl = kv.TblCommitmentVals + tbl = kv.HeaderTD default: return fmt.Errorf("invalid domain %s", domain) } @@ -281,11 +281,11 @@ func makePurifiedDomainsIndexDB(db kv.RwDB, dirs datadir.Dirs, logger log.Logger case "account": tbl = kv.MaxTxNum case "storage": - tbl = kv.TblStorageVals + tbl = kv.HeaderNumber case "code": - tbl = kv.TblCodeVals + tbl = kv.HeaderCanonical case "commitment": - tbl = kv.TblCommitmentVals + tbl = kv.HeaderTD default: return fmt.Errorf("invalid domain %s", domain) } From 88829162e8f7e1862ce4f1c6fb7011dd6cf5d4ff Mon Sep 17 00:00:00 2001 From: Giulio Date: Tue, 24 Dec 2024 13:26:02 +0100 Subject: [PATCH 23/42] save --- cmd/integration/commands/state_domains.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cmd/integration/commands/state_domains.go b/cmd/integration/commands/state_domains.go index b3ecf6bee01..e8ab63cb44f 100644 --- a/cmd/integration/commands/state_domains.go +++ b/cmd/integration/commands/state_domains.go @@ -156,7 +156,7 @@ var purifyDomains = &cobra.Command{ defer purifyDB.Close() // purificationDomains := []string{"account", "storage", "code", "commitment"} - purificationDomains := []string{"account"} + purificationDomains := []string{"commitment"} for _, domain := range purificationDomains { if err := makePurifiableIndexDB(purifyDB, dirs, log.New(), domain); err != nil { fmt.Println("Error making purifiable index DB: ", err) From 29bbcc2e9fd03af13f01085c23f95fa545054b45 Mon Sep 17 00:00:00 2001 From: Giulio Date: Tue, 24 Dec 2024 14:58:09 +0100 Subject: [PATCH 24/42] save --- cmd/integration/commands/state_domains.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cmd/integration/commands/state_domains.go b/cmd/integration/commands/state_domains.go index e8ab63cb44f..cb8541307c0 100644 --- a/cmd/integration/commands/state_domains.go +++ b/cmd/integration/commands/state_domains.go @@ -155,8 +155,8 @@ var purifyDomains = &cobra.Command{ purifyDB := mdbx.MustOpen(tmpDir) defer purifyDB.Close() - // purificationDomains := []string{"account", "storage", "code", "commitment"} - purificationDomains := []string{"commitment"} + purificationDomains := []string{"account", "storage", "code", "commitment"} + //purificationDomains := []string{"commitment"} for _, domain := range purificationDomains { if err := makePurifiableIndexDB(purifyDB, dirs, log.New(), domain); err != nil { fmt.Println("Error making purifiable index DB: ", err) From 8998c3f4f59f2bab0c9825684b508118e3d05e22 Mon Sep 17 00:00:00 2001 From: Giulio Date: Tue, 24 Dec 2024 17:30:26 +0100 Subject: [PATCH 25/42] save --- cmd/integration/commands/state_domains.go | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/cmd/integration/commands/state_domains.go b/cmd/integration/commands/state_domains.go index cb8541307c0..d7d0f8094c4 100644 --- a/cmd/integration/commands/state_domains.go +++ b/cmd/integration/commands/state_domains.go @@ -277,12 +277,15 @@ func makePurifiableIndexDB(db kv.RwDB, dirs datadir.Dirs, logger log.Logger, dom func makePurifiedDomainsIndexDB(db kv.RwDB, dirs datadir.Dirs, logger log.Logger, domain string) error { var tbl string + compressionType := seg.CompressNone switch domain { case "account": tbl = kv.MaxTxNum case "storage": + compressionType = seg.CompressKeys tbl = kv.HeaderNumber case "code": + compressionType = seg.CompressVals tbl = kv.HeaderCanonical case "commitment": tbl = kv.HeaderTD @@ -354,7 +357,7 @@ func makePurifiedDomainsIndexDB(db kv.RwDB, dirs datadir.Dirs, logger log.Logger return fmt.Errorf("create %s values compressor: %w", path.Join(purifyDir, fileName), err) } - comp := seg.NewWriter(valuesComp, seg.CompressKeys|seg.CompressVals) + comp := seg.NewWriter(valuesComp, compressionType) defer comp.Close() fmt.Printf("Indexing file %s\n", fileName) From 5f79c74eea086ec4fa907317be16434cb7e99d45 Mon Sep 17 00:00:00 2001 From: Giulio Date: Tue, 24 Dec 2024 22:38:34 +0100 Subject: [PATCH 26/42] save --- cmd/integration/commands/state_domains.go | 39 +++++++++++++++++++++++ 1 file changed, 39 insertions(+) diff --git a/cmd/integration/commands/state_domains.go b/cmd/integration/commands/state_domains.go index d7d0f8094c4..7ee795ee726 100644 --- a/cmd/integration/commands/state_domains.go +++ b/cmd/integration/commands/state_domains.go @@ -22,6 +22,7 @@ import ( "encoding/hex" "errors" "fmt" + "io" "os" "path" "path/filepath" @@ -275,6 +276,36 @@ func makePurifiableIndexDB(db kv.RwDB, dirs datadir.Dirs, logger log.Logger, dom return nil } +func copyFile(src, dst string) error { + // Open the source file + in, err := os.Open(src) + if err != nil { + return err + } + defer in.Close() + + // Create the destination file + out, err := os.Create(dst) + if err != nil { + return err + } + defer out.Close() + + // Copy the file contents from the source to the destination + _, err = io.Copy(out, in) + if err != nil { + return err + } + + // Ensure all data is written to disk + err = out.Sync() + if err != nil { + return err + } + + return nil +} + func makePurifiedDomainsIndexDB(db kv.RwDB, dirs datadir.Dirs, logger log.Logger, domain string) error { var tbl string compressionType := seg.CompressNone @@ -398,6 +429,14 @@ func makePurifiedDomainsIndexDB(db kv.RwDB, dirs datadir.Dirs, logger log.Logger fmt.Printf("Indexed %d keys, skipped %d, in file %s\n", count, skipped, fileName) } } + if skipped == 0 { + comp.Close() + // just copy the file + if err := copyFile(path.Join(dirs.SnapDomain, fileName), path.Join(purifyDir, fileName)); err != nil { + return fmt.Errorf("failed to copy file %s: %w", fileName, err) + } + continue + } fmt.Printf("Loaded %d keys in file %s. now compressing...\n", count, fileName) if err := comp.Compress(); err != nil { return fmt.Errorf("failed to compress: %w", err) From 6a89dda995c2da222bb099e4c62641eb22e98b75 Mon Sep 17 00:00:00 2001 From: Giulio Date: Tue, 24 Dec 2024 22:40:56 +0100 Subject: [PATCH 27/42] save --- cmd/integration/commands/state_domains.go | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/cmd/integration/commands/state_domains.go b/cmd/integration/commands/state_domains.go index 7ee795ee726..78d21da34e8 100644 --- a/cmd/integration/commands/state_domains.go +++ b/cmd/integration/commands/state_domains.go @@ -30,6 +30,7 @@ import ( "sort" "strings" + "github.com/erigontech/erigon-lib/etl" "github.com/erigontech/erigon-lib/seg" state3 "github.com/erigontech/erigon-lib/state" @@ -217,6 +218,9 @@ func makePurifiableIndexDB(db kv.RwDB, dirs datadir.Dirs, logger log.Logger, dom }); err != nil { return fmt.Errorf("failed to walk through the domainDir %s: %w", domain, err) } + + collector := etl.NewCollector("Purification", dirs.Tmp, etl.NewSortableBuffer(etl.BufferOptimalSize), logger) + defer collector.Close() // sort the files by name sort.Slice(filesNamesToIndex, func(i, j int) bool { res, ok, _ := downloadertype.ParseFileName(dirs.SnapDomain, filesNamesToIndex[i]) @@ -256,10 +260,7 @@ func makePurifiableIndexDB(db kv.RwDB, dirs datadir.Dirs, logger log.Logger, dom buf = buf[:0] buf, _ = getter.Next(buf) - if err := tx.Put(tbl, buf, layerBytes); err != nil { - return fmt.Errorf("failed to put key %x: %w", buf, err) - } - + collector.Collect(buf, layerBytes) count++ //fmt.Println("count: ", count, "keyLength: ", len(buf)) if count%100000 == 0 { @@ -270,8 +271,9 @@ func makePurifiableIndexDB(db kv.RwDB, dirs datadir.Dirs, logger log.Logger, dom } fmt.Printf("Indexed %d keys in file %s\n", count, fileName) } - if err := tx.Commit(); err != nil { - return fmt.Errorf("failed to commit transaction: %w", err) + fmt.Println("Loading the keys to DB") + if err := collector.Load(tx, tbl, etl.IdentityLoadFunc, etl.TransformArgs{}); err != nil { + return fmt.Errorf("failed to load: %w", err) } return nil } From 5b569ac9114b3b59f40aa2f63d68c5d974656a7c Mon Sep 17 00:00:00 2001 From: Giulio Date: Tue, 24 Dec 2024 22:51:55 +0100 Subject: [PATCH 28/42] save --- cmd/integration/commands/state_domains.go | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/cmd/integration/commands/state_domains.go b/cmd/integration/commands/state_domains.go index 78d21da34e8..9bcf4c0c5ea 100644 --- a/cmd/integration/commands/state_domains.go +++ b/cmd/integration/commands/state_domains.go @@ -157,7 +157,7 @@ var purifyDomains = &cobra.Command{ purifyDB := mdbx.MustOpen(tmpDir) defer purifyDB.Close() - purificationDomains := []string{"account", "storage", "code", "commitment"} + purificationDomains := []string{"account", "storage", "code", "commitment", "receipt"} //purificationDomains := []string{"commitment"} for _, domain := range purificationDomains { if err := makePurifiableIndexDB(purifyDB, dirs, log.New(), domain); err != nil { @@ -188,6 +188,8 @@ func makePurifiableIndexDB(db kv.RwDB, dirs datadir.Dirs, logger log.Logger, dom tbl = kv.HeaderCanonical case "commitment": tbl = kv.HeaderTD + case "receipt": + tbl = kv.BadHeaderNumber default: return fmt.Errorf("invalid domain %s", domain) } @@ -322,6 +324,8 @@ func makePurifiedDomainsIndexDB(db kv.RwDB, dirs datadir.Dirs, logger log.Logger tbl = kv.HeaderCanonical case "commitment": tbl = kv.HeaderTD + case "receipt": + tbl = kv.BadHeaderNumber default: return fmt.Errorf("invalid domain %s", domain) } From 4bf472d6b1c8a3a357538b269a153de074885f4e Mon Sep 17 00:00:00 2001 From: Giulio Date: Tue, 24 Dec 2024 23:11:30 +0100 Subject: [PATCH 29/42] save --- cmd/integration/commands/state_domains.go | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/cmd/integration/commands/state_domains.go b/cmd/integration/commands/state_domains.go index 9bcf4c0c5ea..bed62522899 100644 --- a/cmd/integration/commands/state_domains.go +++ b/cmd/integration/commands/state_domains.go @@ -54,8 +54,6 @@ import ( "github.com/erigontech/erigon/turbo/debug" ) -var purifyDir string - func init() { withDataDir(readDomains) withChain(readDomains) @@ -72,8 +70,9 @@ func init() { // if trie variant is not hex, we could not have another rootHash with to verify it var ( - stepSize uint64 - lastStep uint64 + stepSize uint64 + lastStep uint64 + purifyDir string ) // write command to just seek and query state by addr and domain from state db and files (if any) @@ -374,7 +373,7 @@ func makePurifiedDomainsIndexDB(db kv.RwDB, dirs datadir.Dirs, logger log.Logger return fmt.Errorf("failed to start transaction: %w", err) } defer tx.Rollback() - os.Mkdir(purifyDir, 0755) + outD := datadir.New(purifyDir) compressCfg := seg.DefaultCfg compressCfg.Workers = runtime.NumCPU() // now start the file indexing @@ -389,9 +388,9 @@ func makePurifiedDomainsIndexDB(db kv.RwDB, dirs datadir.Dirs, logger log.Logger defer dec.Close() getter := dec.MakeGetter() - valuesComp, err := seg.NewCompressor(context.Background(), "Purification", path.Join(purifyDir, fileName), dirs.Tmp, compressCfg, log.LvlTrace, log.New()) + valuesComp, err := seg.NewCompressor(context.Background(), "Purification", path.Join(outD.SnapDomain, fileName), dirs.Tmp, compressCfg, log.LvlTrace, log.New()) if err != nil { - return fmt.Errorf("create %s values compressor: %w", path.Join(purifyDir, fileName), err) + return fmt.Errorf("create %s values compressor: %w", path.Join(outD.SnapDomain, fileName), err) } comp := seg.NewWriter(valuesComp, compressionType) @@ -438,7 +437,7 @@ func makePurifiedDomainsIndexDB(db kv.RwDB, dirs datadir.Dirs, logger log.Logger if skipped == 0 { comp.Close() // just copy the file - if err := copyFile(path.Join(dirs.SnapDomain, fileName), path.Join(purifyDir, fileName)); err != nil { + if err := copyFile(path.Join(dirs.SnapDomain, fileName), path.Join(outD.SnapDomain,, fileName)); err != nil { return fmt.Errorf("failed to copy file %s: %w", fileName, err) } continue From 1a168517b38ce54b519f13384db0f795706a6060 Mon Sep 17 00:00:00 2001 From: Giulio Date: Tue, 24 Dec 2024 23:12:04 +0100 Subject: [PATCH 30/42] save --- cmd/integration/commands/state_domains.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cmd/integration/commands/state_domains.go b/cmd/integration/commands/state_domains.go index bed62522899..c26c1239650 100644 --- a/cmd/integration/commands/state_domains.go +++ b/cmd/integration/commands/state_domains.go @@ -146,7 +146,7 @@ var purifyDomains = &cobra.Command{ domainDir := dirs.SnapDomain // make a temporary dir - tmpDir, err := os.MkdirTemp("purifyTemp", "") // make a temporary dir to store the keys + tmpDir, err := os.MkdirTemp(dirs.Tmp, "purifyTemp") // make a temporary dir to store the keys if err != nil { fmt.Println("Error creating temporary directory: ", err) return From 16ad74c683b6f7409501f52255e8011b816656e1 Mon Sep 17 00:00:00 2001 From: Giulio Date: Tue, 24 Dec 2024 23:21:52 +0100 Subject: [PATCH 31/42] save --- cmd/integration/commands/state_domains.go | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/cmd/integration/commands/state_domains.go b/cmd/integration/commands/state_domains.go index c26c1239650..ae9aebb443a 100644 --- a/cmd/integration/commands/state_domains.go +++ b/cmd/integration/commands/state_domains.go @@ -276,7 +276,8 @@ func makePurifiableIndexDB(db kv.RwDB, dirs datadir.Dirs, logger log.Logger, dom if err := collector.Load(tx, tbl, etl.IdentityLoadFunc, etl.TransformArgs{}); err != nil { return fmt.Errorf("failed to load: %w", err) } - return nil + + return tx.Commit() } func copyFile(src, dst string) error { From b4b33bd6127e554f6dc7308c2d9ff6afd22baefc Mon Sep 17 00:00:00 2001 From: Giulio Date: Tue, 24 Dec 2024 23:23:35 +0100 Subject: [PATCH 32/42] save --- cmd/integration/commands/state_domains.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cmd/integration/commands/state_domains.go b/cmd/integration/commands/state_domains.go index ae9aebb443a..b603782e8bc 100644 --- a/cmd/integration/commands/state_domains.go +++ b/cmd/integration/commands/state_domains.go @@ -276,7 +276,7 @@ func makePurifiableIndexDB(db kv.RwDB, dirs datadir.Dirs, logger log.Logger, dom if err := collector.Load(tx, tbl, etl.IdentityLoadFunc, etl.TransformArgs{}); err != nil { return fmt.Errorf("failed to load: %w", err) } - + return tx.Commit() } @@ -438,7 +438,7 @@ func makePurifiedDomainsIndexDB(db kv.RwDB, dirs datadir.Dirs, logger log.Logger if skipped == 0 { comp.Close() // just copy the file - if err := copyFile(path.Join(dirs.SnapDomain, fileName), path.Join(outD.SnapDomain,, fileName)); err != nil { + if err := copyFile(path.Join(dirs.SnapDomain, fileName), path.Join(outD.SnapDomain, fileName)); err != nil { return fmt.Errorf("failed to copy file %s: %w", fileName, err) } continue From 4d7791cada8ed5588cf1766f5bc6e61d0630db4f Mon Sep 17 00:00:00 2001 From: Giulio Date: Wed, 25 Dec 2024 01:06:49 +0100 Subject: [PATCH 33/42] save --- cmd/integration/commands/state_domains.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cmd/integration/commands/state_domains.go b/cmd/integration/commands/state_domains.go index b603782e8bc..950a6a90e4a 100644 --- a/cmd/integration/commands/state_domains.go +++ b/cmd/integration/commands/state_domains.go @@ -156,7 +156,7 @@ var purifyDomains = &cobra.Command{ purifyDB := mdbx.MustOpen(tmpDir) defer purifyDB.Close() - purificationDomains := []string{"account", "storage", "code", "commitment", "receipt"} + purificationDomains := []string{"account", "storage" /*"code",*/, "commitment", "receipt"} //purificationDomains := []string{"commitment"} for _, domain := range purificationDomains { if err := makePurifiableIndexDB(purifyDB, dirs, log.New(), domain); err != nil { From ff3e66824248b008621ea02d9a861254aa8f3e07 Mon Sep 17 00:00:00 2001 From: Giulio Date: Wed, 25 Dec 2024 01:08:25 +0100 Subject: [PATCH 34/42] save --- cmd/integration/commands/state_domains.go | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/cmd/integration/commands/state_domains.go b/cmd/integration/commands/state_domains.go index 950a6a90e4a..b1a53f0748b 100644 --- a/cmd/integration/commands/state_domains.go +++ b/cmd/integration/commands/state_domains.go @@ -65,14 +65,16 @@ func init() { withDataDir(purifyDomains) purifyDomains.Flags().StringVar(&purifyDir, "purifiedDomain", "purified-output", "") + purifyDomains.Flags().BoolVar(&purifyOnlyCommitment, "commitment", false, "purify only commitment domain") rootCmd.AddCommand(purifyDomains) } // if trie variant is not hex, we could not have another rootHash with to verify it var ( - stepSize uint64 - lastStep uint64 - purifyDir string + stepSize uint64 + lastStep uint64 + purifyDir string + purifyOnlyCommitment bool ) // write command to just seek and query state by addr and domain from state db and files (if any) @@ -155,8 +157,12 @@ var purifyDomains = &cobra.Command{ purifyDB := mdbx.MustOpen(tmpDir) defer purifyDB.Close() - - purificationDomains := []string{"account", "storage" /*"code",*/, "commitment", "receipt"} + var purificationDomains []string + if purifyOnlyCommitment { + purificationDomains = []string{"commitment"} + } else { + purificationDomains = []string{"account", "storage" /*"code",*/, "commitment", "receipt"} + } //purificationDomains := []string{"commitment"} for _, domain := range purificationDomains { if err := makePurifiableIndexDB(purifyDB, dirs, log.New(), domain); err != nil { From eb31f9d8bb858a008719df7dec43150b3acca5b1 Mon Sep 17 00:00:00 2001 From: Giulio Date: Wed, 25 Dec 2024 19:45:24 +0100 Subject: [PATCH 35/42] save --- cmd/integration/commands/state_domains.go | 40 +---------------------- 1 file changed, 1 insertion(+), 39 deletions(-) diff --git a/cmd/integration/commands/state_domains.go b/cmd/integration/commands/state_domains.go index b1a53f0748b..045f8fc3668 100644 --- a/cmd/integration/commands/state_domains.go +++ b/cmd/integration/commands/state_domains.go @@ -22,7 +22,6 @@ import ( "encoding/hex" "errors" "fmt" - "io" "os" "path" "path/filepath" @@ -286,36 +285,6 @@ func makePurifiableIndexDB(db kv.RwDB, dirs datadir.Dirs, logger log.Logger, dom return tx.Commit() } -func copyFile(src, dst string) error { - // Open the source file - in, err := os.Open(src) - if err != nil { - return err - } - defer in.Close() - - // Create the destination file - out, err := os.Create(dst) - if err != nil { - return err - } - defer out.Close() - - // Copy the file contents from the source to the destination - _, err = io.Copy(out, in) - if err != nil { - return err - } - - // Ensure all data is written to disk - err = out.Sync() - if err != nil { - return err - } - - return nil -} - func makePurifiedDomainsIndexDB(db kv.RwDB, dirs datadir.Dirs, logger log.Logger, domain string) error { var tbl string compressionType := seg.CompressNone @@ -441,14 +410,7 @@ func makePurifiedDomainsIndexDB(db kv.RwDB, dirs datadir.Dirs, logger log.Logger fmt.Printf("Indexed %d keys, skipped %d, in file %s\n", count, skipped, fileName) } } - if skipped == 0 { - comp.Close() - // just copy the file - if err := copyFile(path.Join(dirs.SnapDomain, fileName), path.Join(outD.SnapDomain, fileName)); err != nil { - return fmt.Errorf("failed to copy file %s: %w", fileName, err) - } - continue - } + fmt.Printf("Loaded %d keys in file %s. now compressing...\n", count, fileName) if err := comp.Compress(); err != nil { return fmt.Errorf("failed to compress: %w", err) From d7279ad1b33f4b69064ce6ac5fb55e117eb23dc9 Mon Sep 17 00:00:00 2001 From: Giulio Date: Thu, 26 Dec 2024 11:34:15 +0100 Subject: [PATCH 36/42] save --- cmd/integration/commands/state_domains.go | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/cmd/integration/commands/state_domains.go b/cmd/integration/commands/state_domains.go index 045f8fc3668..d6ad3364688 100644 --- a/cmd/integration/commands/state_domains.go +++ b/cmd/integration/commands/state_domains.go @@ -44,6 +44,7 @@ import ( "github.com/erigontech/erigon-lib/kv" "github.com/erigontech/erigon-lib/kv/mdbx" kv2 "github.com/erigontech/erigon-lib/kv/mdbx" + statelib "github.com/erigontech/erigon-lib/state" "github.com/erigontech/erigon/cmd/utils" "github.com/erigontech/erigon/core" "github.com/erigontech/erigon/core/state" @@ -350,7 +351,7 @@ func makePurifiedDomainsIndexDB(db kv.RwDB, dirs datadir.Dirs, logger log.Logger } defer tx.Rollback() outD := datadir.New(purifyDir) - compressCfg := seg.DefaultCfg + compressCfg := statelib.DomainCompressCfg compressCfg.Workers = runtime.NumCPU() // now start the file indexing for currentLayer, fileName := range filesNamesToPurify { From fcbc3f7124e433e166efea653e1fb9077657adbe Mon Sep 17 00:00:00 2001 From: Giulio Date: Thu, 26 Dec 2024 12:36:52 +0100 Subject: [PATCH 37/42] save --- cmd/integration/commands/state_domains.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cmd/integration/commands/state_domains.go b/cmd/integration/commands/state_domains.go index d6ad3364688..167e34a075d 100644 --- a/cmd/integration/commands/state_domains.go +++ b/cmd/integration/commands/state_domains.go @@ -65,7 +65,7 @@ func init() { withDataDir(purifyDomains) purifyDomains.Flags().StringVar(&purifyDir, "purifiedDomain", "purified-output", "") - purifyDomains.Flags().BoolVar(&purifyOnlyCommitment, "commitment", false, "purify only commitment domain") + purifyDomains.Flags().BoolVar(&purifyOnlyCommitment, "only-commitment", true, "purify only commitment domain") rootCmd.AddCommand(purifyDomains) } From 7a78370bd9ed7f2af8270dbc528d2014cfd28215 Mon Sep 17 00:00:00 2001 From: Giulio Date: Thu, 26 Dec 2024 12:41:02 +0100 Subject: [PATCH 38/42] save --- cmd/integration/commands/state_domains.go | 20 ++++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) diff --git a/cmd/integration/commands/state_domains.go b/cmd/integration/commands/state_domains.go index 167e34a075d..bc3ac6eb320 100644 --- a/cmd/integration/commands/state_domains.go +++ b/cmd/integration/commands/state_domains.go @@ -66,6 +66,7 @@ func init() { withDataDir(purifyDomains) purifyDomains.Flags().StringVar(&purifyDir, "purifiedDomain", "purified-output", "") purifyDomains.Flags().BoolVar(&purifyOnlyCommitment, "only-commitment", true, "purify only commitment domain") + purifyDomains.Flags().BoolVar(&replaceInDatadir, "replace-in-datadir", false, "replace the purified domains directly in datadir (will remove .kvei and .bt too)") rootCmd.AddCommand(purifyDomains) } @@ -75,6 +76,7 @@ var ( lastStep uint64 purifyDir string purifyOnlyCommitment bool + replaceInDatadir bool ) // write command to just seek and query state by addr and domain from state db and files (if any) @@ -171,7 +173,7 @@ var purifyDomains = &cobra.Command{ } } for _, domain := range purificationDomains { - if err := makePurifiedDomainsIndexDB(purifyDB, dirs, log.New(), domain); err != nil { + if err := makePurifiedDomains(purifyDB, dirs, log.New(), domain); err != nil { fmt.Println("Error making purifiable index DB: ", err) return } @@ -286,7 +288,7 @@ func makePurifiableIndexDB(db kv.RwDB, dirs datadir.Dirs, logger log.Logger, dom return tx.Commit() } -func makePurifiedDomainsIndexDB(db kv.RwDB, dirs datadir.Dirs, logger log.Logger, domain string) error { +func makePurifiedDomains(db kv.RwDB, dirs datadir.Dirs, logger log.Logger, domain string) error { var tbl string compressionType := seg.CompressNone switch domain { @@ -418,6 +420,20 @@ func makePurifiedDomainsIndexDB(db kv.RwDB, dirs datadir.Dirs, logger log.Logger } fmt.Printf("Compressed %d keys in file %s\n", count, fileName) comp.Close() + if replaceInDatadir { + if err := os.Rename(path.Join(outD.SnapDomain, fileName), path.Join(dirs.SnapDomain, fileName)); err != nil { + return fmt.Errorf("failed to replace the file %s: %w", fileName, err) + } + kveiFile := strings.ReplaceAll(fileName, ".kv", ".kvei") + btFile := strings.ReplaceAll(fileName, ".kv", ".bt") + // also remove the .kvei and .bt files + if err := os.Remove(path.Join(dirs.SnapDomain, kveiFile)); err != nil { + return fmt.Errorf("failed to remove the file: %w", kveiFile, err) + } + if err := os.Remove(path.Join(dirs.SnapDomain, btFile)); err != nil { + return fmt.Errorf("failed to remove the file: %w", btFile, err) + } + } } return nil } From 568e8bb58b89a5c692e59673e5fb82a4d36f7b8e Mon Sep 17 00:00:00 2001 From: Giulio Date: Thu, 26 Dec 2024 12:41:23 +0100 Subject: [PATCH 39/42] save --- cmd/integration/commands/state_domains.go | 2 ++ 1 file changed, 2 insertions(+) diff --git a/cmd/integration/commands/state_domains.go b/cmd/integration/commands/state_domains.go index bc3ac6eb320..fd4d3b77e7d 100644 --- a/cmd/integration/commands/state_domains.go +++ b/cmd/integration/commands/state_domains.go @@ -421,6 +421,7 @@ func makePurifiedDomains(db kv.RwDB, dirs datadir.Dirs, logger log.Logger, domai fmt.Printf("Compressed %d keys in file %s\n", count, fileName) comp.Close() if replaceInDatadir { + fmt.Printf("Replacing the file %s in datadir\n", fileName) if err := os.Rename(path.Join(outD.SnapDomain, fileName), path.Join(dirs.SnapDomain, fileName)); err != nil { return fmt.Errorf("failed to replace the file %s: %w", fileName, err) } @@ -433,6 +434,7 @@ func makePurifiedDomains(db kv.RwDB, dirs datadir.Dirs, logger log.Logger, domai if err := os.Remove(path.Join(dirs.SnapDomain, btFile)); err != nil { return fmt.Errorf("failed to remove the file: %w", btFile, err) } + fmt.Printf("Removed the files %s and %s\n", kveiFile, btFile) } } return nil From c1ebf8f8fc45bc08d01f76e7c1e141af7e240e48 Mon Sep 17 00:00:00 2001 From: Giulio Date: Thu, 26 Dec 2024 12:47:56 +0100 Subject: [PATCH 40/42] save --- cmd/integration/commands/state_domains.go | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/cmd/integration/commands/state_domains.go b/cmd/integration/commands/state_domains.go index fd4d3b77e7d..a721856a35d 100644 --- a/cmd/integration/commands/state_domains.go +++ b/cmd/integration/commands/state_domains.go @@ -427,6 +427,8 @@ func makePurifiedDomains(db kv.RwDB, dirs datadir.Dirs, logger log.Logger, domai } kveiFile := strings.ReplaceAll(fileName, ".kv", ".kvei") btFile := strings.ReplaceAll(fileName, ".kv", ".bt") + kveiFileTorrent := kveiFile + ".torrent" + btFileTorrent := btFile + ".torrent" // also remove the .kvei and .bt files if err := os.Remove(path.Join(dirs.SnapDomain, kveiFile)); err != nil { return fmt.Errorf("failed to remove the file: %w", kveiFile, err) @@ -434,6 +436,12 @@ func makePurifiedDomains(db kv.RwDB, dirs datadir.Dirs, logger log.Logger, domai if err := os.Remove(path.Join(dirs.SnapDomain, btFile)); err != nil { return fmt.Errorf("failed to remove the file: %w", btFile, err) } + if err := os.Remove(path.Join(dirs.SnapDomain, kveiFileTorrent)); err != nil { + return fmt.Errorf("failed to remove the file: %w", kveiFileTorrent, err) + } + if err := os.Remove(path.Join(dirs.SnapDomain, btFileTorrent)); err != nil { + return fmt.Errorf("failed to remove the file: %w", btFileTorrent, err) + } fmt.Printf("Removed the files %s and %s\n", kveiFile, btFile) } } From c8700aebee3396132cd816969e4d7256c4c655f0 Mon Sep 17 00:00:00 2001 From: Giulio Date: Thu, 26 Dec 2024 13:00:57 +0100 Subject: [PATCH 41/42] save --- cmd/integration/commands/state_domains.go | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/cmd/integration/commands/state_domains.go b/cmd/integration/commands/state_domains.go index a721856a35d..3b24018fde0 100644 --- a/cmd/integration/commands/state_domains.go +++ b/cmd/integration/commands/state_domains.go @@ -431,16 +431,16 @@ func makePurifiedDomains(db kv.RwDB, dirs datadir.Dirs, logger log.Logger, domai btFileTorrent := btFile + ".torrent" // also remove the .kvei and .bt files if err := os.Remove(path.Join(dirs.SnapDomain, kveiFile)); err != nil { - return fmt.Errorf("failed to remove the file: %w", kveiFile, err) + return fmt.Errorf("failed to remove the file: %s", kveiFile, err) } if err := os.Remove(path.Join(dirs.SnapDomain, btFile)); err != nil { - return fmt.Errorf("failed to remove the file: %w", btFile, err) + return fmt.Errorf("failed to remove the file: %s", btFile, err) } if err := os.Remove(path.Join(dirs.SnapDomain, kveiFileTorrent)); err != nil { - return fmt.Errorf("failed to remove the file: %w", kveiFileTorrent, err) + return fmt.Errorf("failed to remove the file: %s", kveiFileTorrent, err) } if err := os.Remove(path.Join(dirs.SnapDomain, btFileTorrent)); err != nil { - return fmt.Errorf("failed to remove the file: %w", btFileTorrent, err) + return fmt.Errorf("failed to remove the file: %s", btFileTorrent, err) } fmt.Printf("Removed the files %s and %s\n", kveiFile, btFile) } From c263ed2b1f48c2216c6e76f22b7e28d1284a35f8 Mon Sep 17 00:00:00 2001 From: Giulio Date: Thu, 26 Dec 2024 13:10:00 +0100 Subject: [PATCH 42/42] save --- cmd/integration/commands/state_domains.go | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/cmd/integration/commands/state_domains.go b/cmd/integration/commands/state_domains.go index 3b24018fde0..dbb0871c2d7 100644 --- a/cmd/integration/commands/state_domains.go +++ b/cmd/integration/commands/state_domains.go @@ -431,16 +431,16 @@ func makePurifiedDomains(db kv.RwDB, dirs datadir.Dirs, logger log.Logger, domai btFileTorrent := btFile + ".torrent" // also remove the .kvei and .bt files if err := os.Remove(path.Join(dirs.SnapDomain, kveiFile)); err != nil { - return fmt.Errorf("failed to remove the file: %s", kveiFile, err) + return fmt.Errorf("failed to remove the file: %s, %w", kveiFile, err) } if err := os.Remove(path.Join(dirs.SnapDomain, btFile)); err != nil { - return fmt.Errorf("failed to remove the file: %s", btFile, err) + return fmt.Errorf("failed to remove the file: %s, %w", btFile, err) } if err := os.Remove(path.Join(dirs.SnapDomain, kveiFileTorrent)); err != nil { - return fmt.Errorf("failed to remove the file: %s", kveiFileTorrent, err) + return fmt.Errorf("failed to remove the file: %s, %w", kveiFileTorrent, err) } if err := os.Remove(path.Join(dirs.SnapDomain, btFileTorrent)); err != nil { - return fmt.Errorf("failed to remove the file: %s", btFileTorrent, err) + return fmt.Errorf("failed to remove the file: %s, %w", btFileTorrent, err) } fmt.Printf("Removed the files %s and %s\n", kveiFile, btFile) }