diff --git a/fuzzing/corpus/corpus.go b/fuzzing/corpus/corpus.go index 8426f156..8a640298 100644 --- a/fuzzing/corpus/corpus.go +++ b/fuzzing/corpus/corpus.go @@ -4,10 +4,13 @@ import ( "bytes" "fmt" "math/big" + "os" "path/filepath" "sync" "time" + "github.com/crytic/medusa/utils" + "github.com/crytic/medusa/chain" "github.com/crytic/medusa/fuzzing/calls" "github.com/crytic/medusa/fuzzing/coverage" @@ -30,13 +33,8 @@ type Corpus struct { // coverageMaps describes the total code coverage known to be achieved across all corpus call sequences. coverageMaps *coverage.CoverageMaps - // mutableSequenceFiles represents a corpus directory with files which describe call sequences that should - // be used for mutations. - mutableSequenceFiles *corpusDirectory[calls.CallSequence] - - // immutableSequenceFiles represents a corpus directory with files which describe call sequences that should not be - // used for mutations. - immutableSequenceFiles *corpusDirectory[calls.CallSequence] + // callSequenceFiles represents a corpus directory with files that should be used for mutations. + callSequenceFiles *corpusDirectory[calls.CallSequence] // testResultSequenceFiles represents a corpus directory with files which describe call sequences that were flagged // to be saved by a test case provider. These are not used in mutations. @@ -66,8 +64,7 @@ func NewCorpus(corpusDirectory string) (*Corpus, error) { corpus := &Corpus{ storageDirectory: corpusDirectory, coverageMaps: coverage.NewCoverageMaps(), - mutableSequenceFiles: newCorpusDirectory[calls.CallSequence](""), - immutableSequenceFiles: newCorpusDirectory[calls.CallSequence](""), + callSequenceFiles: newCorpusDirectory[calls.CallSequence](""), testResultSequenceFiles: newCorpusDirectory[calls.CallSequence](""), unexecutedCallSequences: make([]calls.CallSequence, 0), logger: logging.GlobalLogger.NewSubLogger("module", "corpus"), @@ -75,16 +72,17 @@ func NewCorpus(corpusDirectory string) (*Corpus, error) { // If we have a corpus directory set, parse our call sequences. if corpus.storageDirectory != "" { - // Read mutable call sequences. - corpus.mutableSequenceFiles.path = filepath.Join(corpus.storageDirectory, "call_sequences", "mutable") - err = corpus.mutableSequenceFiles.readFiles("*.json") + // Migrate the legacy corpus structure + // Note that it is important to call this first since we want to move all the call sequence files before reading + // them into the corpus + err = corpus.migrateLegacyCorpus() if err != nil { return nil, err } - // Read immutable call sequences. - corpus.immutableSequenceFiles.path = filepath.Join(corpus.storageDirectory, "call_sequences", "immutable") - err = corpus.immutableSequenceFiles.readFiles("*.json") + // Read call sequences. + corpus.callSequenceFiles.path = filepath.Join(corpus.storageDirectory, "call_sequences") + err = corpus.callSequenceFiles.readFiles("*.json") if err != nil { return nil, err } @@ -100,26 +98,90 @@ func NewCorpus(corpusDirectory string) (*Corpus, error) { return corpus, nil } +// migrateLegacyCorpus is used to read in the legacy corpus standard where call sequences were stored in two separate +// directories (mutable/immutable). +func (c *Corpus) migrateLegacyCorpus() error { + // Check to see if the mutable and/or the immutable directories exist + callSequencePath := filepath.Join(c.storageDirectory, "call_sequences") + mutablePath := filepath.Join(c.storageDirectory, "call_sequences", "mutable") + immutablePath := filepath.Join(c.storageDirectory, "call_sequences", "immutable") + + // Only return an error if the error is something other than "filepath does not exist" + mutableDirInfo, err := os.Stat(mutablePath) + if err != nil && !os.IsNotExist(err) { + return err + } + immutableDirInfo, err := os.Stat(immutablePath) + if err != nil && !os.IsNotExist(err) { + return err + } + + // Return early if these directories do not exist + if mutableDirInfo == nil && immutableDirInfo == nil { + return nil + } + + // Now, we need to notify the user that we have detected a legacy structure + c.logger.Info("Migrating legacy corpus") + + // If the mutable directory exists, read in all the files and add them to the call sequence files + if mutableDirInfo != nil { + // Discover all corpus files in the given directory. + filePaths, err := filepath.Glob(filepath.Join(mutablePath, "*.json")) + if err != nil { + return err + } + + // Move each file from the mutable directory to the parent call_sequences directory + for _, filePath := range filePaths { + err = utils.MoveFile(filePath, filepath.Join(callSequencePath, filepath.Base(filePath))) + if err != nil { + return err + } + } + + // Delete the mutable directory + err = utils.DeleteDirectory(mutablePath) + if err != nil { + return err + } + } + + // If the immutable directory exists, read in all the files and add them to the call sequence files + if immutableDirInfo != nil { + // Discover all corpus files in the given directory. + filePaths, err := filepath.Glob(filepath.Join(immutablePath, "*.json")) + if err != nil { + return err + } + + // Move each file from the immutable directory to the parent call_sequences directory + for _, filePath := range filePaths { + err = utils.MoveFile(filePath, filepath.Join(callSequencePath, filepath.Base(filePath))) + if err != nil { + return err + } + } + + // Delete the immutable directory + err = utils.DeleteDirectory(immutablePath) + if err != nil { + return err + } + } + + return nil +} + // CoverageMaps exposes coverage details for all call sequences known to the corpus. func (c *Corpus) CoverageMaps() *coverage.CoverageMaps { return c.coverageMaps } -// CallSequenceEntryCount returns the total number of call sequences entries in the corpus, based on the provided filter -// flags. Some call sequences may not be valid for use if they fail validation when initializing the corpus. -// Returns the count of the requested call sequence entries. -func (c *Corpus) CallSequenceEntryCount(mutable bool, immutable bool, testResults bool) int { - count := 0 - if mutable { - count += len(c.mutableSequenceFiles.files) - } - if immutable { - count += len(c.immutableSequenceFiles.files) - } - if testResults { - count += len(c.testResultSequenceFiles.files) - } - return count +// CallSequenceEntryCount returns the total number of call sequences that increased coverage and also any test results +// that led to a failure. +func (c *Corpus) CallSequenceEntryCount() (int, int) { + return len(c.callSequenceFiles.files), len(c.testResultSequenceFiles.files) } // ActiveMutableSequenceCount returns the count of call sequences recorded in the corpus which have been validated @@ -302,18 +364,13 @@ func (c *Corpus) Initialize(baseTestChain *chain.TestChain, contractDefinitions return 0, 0, err } - err = c.initializeSequences(c.mutableSequenceFiles, testChain, deployedContracts, true) - if err != nil { - return 0, 0, err - } - - err = c.initializeSequences(c.immutableSequenceFiles, testChain, deployedContracts, false) + err = c.initializeSequences(c.callSequenceFiles, testChain, deployedContracts, true) if err != nil { return 0, 0, err } // Calculate corpus health metrics - corpusSequencesTotal := len(c.mutableSequenceFiles.files) + len(c.immutableSequenceFiles.files) + len(c.testResultSequenceFiles.files) + corpusSequencesTotal := len(c.callSequenceFiles.files) + len(c.testResultSequenceFiles.files) corpusSequencesActive := len(c.unexecutedCallSequences) return corpusSequencesActive, corpusSequencesTotal, nil @@ -411,17 +468,9 @@ func (c *Corpus) CheckSequenceCoverageAndUpdate(callSequence calls.CallSequence, } // If we had an increase in non-reverted or reverted coverage, we save the sequence. - // Note: We only want to save the sequence once. We're most interested if it can be used for mutations first. - if coverageUpdated { - // If we achieved new non-reverting coverage, save this sequence for mutation purposes. - err = c.addCallSequence(c.mutableSequenceFiles, callSequence, true, mutationChooserWeight, flushImmediately) - if err != nil { - return err - } - } else if revertedCoverageUpdated { - // If we did not achieve new successful coverage, but achieved an increase in reverted coverage, save this - // sequence for non-mutation purposes. - err = c.addCallSequence(c.immutableSequenceFiles, callSequence, false, mutationChooserWeight, flushImmediately) + if coverageUpdated || revertedCoverageUpdated { + // If we achieved new coverage, save this sequence for mutation purposes. + err = c.addCallSequence(c.callSequenceFiles, callSequence, true, mutationChooserWeight, flushImmediately) if err != nil { return err } @@ -470,8 +519,8 @@ func (c *Corpus) Flush() error { c.callSequencesLock.Lock() defer c.callSequencesLock.Unlock() - // Write mutation target call sequences. - err := c.mutableSequenceFiles.writeFiles() + // Write all coverage-increasing call sequences. + err := c.callSequenceFiles.writeFiles() if err != nil { return err } @@ -482,11 +531,5 @@ func (c *Corpus) Flush() error { return err } - // Write other call sequences. - err = c.immutableSequenceFiles.writeFiles() - if err != nil { - return err - } - return nil } diff --git a/fuzzing/corpus/corpus_test.go b/fuzzing/corpus/corpus_test.go index c49c904e..1997588f 100644 --- a/fuzzing/corpus/corpus_test.go +++ b/fuzzing/corpus/corpus_test.go @@ -23,7 +23,7 @@ func getMockSimpleCorpus(minSequences int, maxSequences, minBlocks int, maxBlock // Add the requested number of entries. numSequences := minSequences + (rand.Int() % (maxSequences - minSequences)) for i := 0; i < numSequences; i++ { - err := corpus.addCallSequence(corpus.mutableSequenceFiles, getMockCallSequence(minBlocks+(rand.Int()%(maxBlocks-minBlocks))), true, nil, false) + err := corpus.addCallSequence(corpus.callSequenceFiles, getMockCallSequence(minBlocks+(rand.Int()%(maxBlocks-minBlocks))), true, nil, false) if err != nil { return nil, err } @@ -100,9 +100,9 @@ func TestCorpusReadWrite(t *testing.T) { assert.NoError(t, err) // Ensure that there are the correct number of call sequence files - matches, err := filepath.Glob(filepath.Join(corpus.mutableSequenceFiles.path, "*.json")) + matches, err := filepath.Glob(filepath.Join(corpus.callSequenceFiles.path, "*.json")) assert.NoError(t, err) - assert.EqualValues(t, len(corpus.mutableSequenceFiles.files), len(matches)) + assert.EqualValues(t, len(corpus.callSequenceFiles.files), len(matches)) // Wipe corpus clean so that you can now read it in from disk corpus, err = NewCorpus("corpus") @@ -124,7 +124,7 @@ func TestCorpusCallSequenceMarshaling(t *testing.T) { // Run the test in our temporary test directory to avoid artifact pollution. testutils.ExecuteInDirectory(t, t.TempDir(), func() { // For each entry, marshal it and then unmarshal the byte array - for _, entryFile := range corpus.mutableSequenceFiles.files { + for _, entryFile := range corpus.callSequenceFiles.files { // Marshal the entry b, err := json.Marshal(entryFile.data) assert.NoError(t, err) @@ -139,9 +139,9 @@ func TestCorpusCallSequenceMarshaling(t *testing.T) { } // Remove all items - for i := 0; i < len(corpus.mutableSequenceFiles.files); { - corpus.mutableSequenceFiles.removeFile(corpus.mutableSequenceFiles.files[i].fileName) + for i := 0; i < len(corpus.callSequenceFiles.files); { + corpus.callSequenceFiles.removeFile(corpus.callSequenceFiles.files[i].fileName) } - assert.Empty(t, corpus.mutableSequenceFiles.files) + assert.Empty(t, corpus.callSequenceFiles.files) }) } diff --git a/fuzzing/fuzzer.go b/fuzzing/fuzzer.go index 5234ac1d..0b9b1500 100644 --- a/fuzzing/fuzzer.go +++ b/fuzzing/fuzzer.go @@ -762,8 +762,8 @@ func (f *Fuzzer) Start() error { // Initialize our coverage maps by measuring the coverage we get from the corpus. var corpusActiveSequences, corpusTotalSequences int - if f.corpus.CallSequenceEntryCount(true, true, true) > 0 { - f.logger.Info("Running call sequences in the corpus...") + if totalCallSequences, testResults := f.corpus.CallSequenceEntryCount(); totalCallSequences > 0 || testResults > 0 { + f.logger.Info("Running call sequences in the corpus") } startTime := time.Now() corpusActiveSequences, corpusTotalSequences, err = f.corpus.Initialize(baseTestChain, f.contractDefinitions) diff --git a/fuzzing/fuzzer_test.go b/fuzzing/fuzzer_test.go index 47450b63..f4b7bf99 100644 --- a/fuzzing/fuzzer_test.go +++ b/fuzzing/fuzzer_test.go @@ -826,7 +826,8 @@ func TestCorpusReplayability(t *testing.T) { // Cache current coverage maps originalCoverage := f.fuzzer.corpus.CoverageMaps() - originalCorpusSequenceCount := f.fuzzer.corpus.CallSequenceEntryCount(true, true, true) + originalTotalCallSequences, originalTotalTestResults := f.fuzzer.corpus.CallSequenceEntryCount() + originalCorpusSequenceCount := originalTotalCallSequences + originalTotalTestResults // Next, set the fuzzer worker count to one, this allows us to count the call sequences executed before // solving a problem. We will verify the problem is solved with less or equal sequences tested, than diff --git a/fuzzing/fuzzer_test_methods_test.go b/fuzzing/fuzzer_test_methods_test.go index a022551b..80a7f152 100644 --- a/fuzzing/fuzzer_test_methods_test.go +++ b/fuzzing/fuzzer_test_methods_test.go @@ -81,7 +81,7 @@ func assertFailedTestsExpected(f *fuzzerTestContext, expectFailure bool) { // corpus. It asserts that the actual result matches the provided expected result. func assertCorpusCallSequencesCollected(f *fuzzerTestContext, expectCallSequences bool) { // Obtain our count of mutable (often representing just non-reverted coverage increasing) sequences. - callSequenceCount := f.fuzzer.corpus.CallSequenceEntryCount(true, false, false) + callSequenceCount, _ := f.fuzzer.corpus.CallSequenceEntryCount() // Ensure we captured some coverage-increasing call sequences. if expectCallSequences { diff --git a/utils/fs_utils.go b/utils/fs_utils.go index df656bc6..5a68150e 100644 --- a/utils/fs_utils.go +++ b/utils/fs_utils.go @@ -79,6 +79,35 @@ func CopyFile(sourcePath string, targetPath string) error { return os.Chmod(targetPath, sourceInfo.Mode()) } +// MoveFile will move a given file from the source path to the target path. Returns an error if one occured. +func MoveFile(sourcePath string, targetPath string) error { + // Obtain file info for the source file + sourceInfo, err := os.Stat(sourcePath) + if err != nil { + return err + } + + // If the path refers to a directory, return an error + if sourceInfo.IsDir() { + return fmt.Errorf("could not copy file from '%s' to '%s' because the source path refers to a directory", sourcePath, targetPath) + } + + // Ensure the existence of the directory we wish to copy to. + targetDirectory := filepath.Dir(targetPath) + err = os.MkdirAll(targetDirectory, 0777) + if err != nil { + return err + } + + // Move the file from the source path to the target path + err = os.Rename(sourcePath, targetPath) + if err != nil { + return err + } + + return nil +} + // GetFileNameWithoutExtension obtains a filename without the extension. This does not contain any preceding directory // paths. func GetFileNameWithoutExtension(filePath string) string {