diff --git a/centipede/BUILD b/centipede/BUILD index 9e7a9d6d5..839f99b2c 100644 --- a/centipede/BUILD +++ b/centipede/BUILD @@ -439,6 +439,7 @@ cc_library( # used in centipede_runner. ":feature", ":execution_metadata", + ":mutation_input", ":shared_memory_blob_sequence", "@com_google_fuzztest//common:defs", ], @@ -944,6 +945,7 @@ cc_library( hdrs = ["dispatcher.h"], deps = [ ":execution_metadata", + ":mutation_input", ":runner_request", ":runner_result", ":shared_memory_blob_sequence", @@ -1495,6 +1497,7 @@ cc_test( deps = [ ":execution_metadata", ":feature", + ":mutation_input", ":runner_result", ":shared_memory_blob_sequence", "@com_google_fuzztest//common:defs", @@ -1634,6 +1637,7 @@ cc_test( ":feature", ":feature_set", ":pc_info", + ":runner_result", ":util", "@com_google_fuzztest//common:defs", "@com_google_fuzztest//common:test_util", diff --git a/centipede/byte_array_mutator.cc b/centipede/byte_array_mutator.cc index 9a29a432e..147ec8ff4 100644 --- a/centipede/byte_array_mutator.cc +++ b/centipede/byte_array_mutator.cc @@ -321,27 +321,29 @@ void ByteArrayMutator::CrossOver(ByteArray &data, const ByteArray &other) { // TODO(kcc): add tests with different values of knobs. const KnobId knob_mutate_or_crossover = Knobs::NewId("mutate_or_crossover"); -std::vector ByteArrayMutator::MutateMany( - const std::vector &inputs, size_t num_mutants) { +std::vector ByteArrayMutator::MutateMany( + const std::vector& inputs, size_t num_mutants) { if (inputs.empty()) abort(); // TODO(xinhaoyuan): Consider metadata in other inputs instead of always the // first one. SetMetadata(inputs[0].metadata != nullptr ? *inputs[0].metadata : ExecutionMetadata()); size_t num_inputs = inputs.size(); - std::vector mutants; + std::vector mutants; mutants.reserve(num_mutants); for (size_t i = 0; i < num_mutants; ++i) { - auto mutant = inputs[rng_() % num_inputs].data; - if (mutant.size() <= max_len_ && + Mutant mutant; + mutant.origin = rng_() % num_inputs; + mutant.data = inputs[mutant.origin].data; + if (mutant.data.size() <= max_len_ && knobs_.GenerateBool(knob_mutate_or_crossover, rng_())) { // Do crossover only if the mutant is not over the max_len_. // Perform crossover with some other input. It may be the same input. const auto &other_input = inputs[rng_() % num_inputs].data; - CrossOver(mutant, other_input); + CrossOver(mutant.data, other_input); } else { // Perform mutation. - Mutate(mutant); + Mutate(mutant.data); } mutants.push_back(std::move(mutant)); } diff --git a/centipede/byte_array_mutator.h b/centipede/byte_array_mutator.h index 3c6978caa..31e659f4e 100644 --- a/centipede/byte_array_mutator.h +++ b/centipede/byte_array_mutator.h @@ -108,8 +108,8 @@ class ByteArrayMutator { } // Takes non-empty `inputs` and produces `num_mutants` mutants. - std::vector MutateMany(const std::vector &inputs, - size_t num_mutants); + std::vector MutateMany(const std::vector& inputs, + size_t num_mutants); using CrossOverFn = void (ByteArrayMutator::*)(ByteArray &, const ByteArray &); diff --git a/centipede/byte_array_mutator_test.cc b/centipede/byte_array_mutator_test.cc index ae35641b5..542eaa0b8 100644 --- a/centipede/byte_array_mutator_test.cc +++ b/centipede/byte_array_mutator_test.cc @@ -928,12 +928,12 @@ TEST(ByteArrayMutator, MutateManyWithAlignedInputs) { {0, 1, 2, 3, 4, 5, 6, 7}, {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11}, }; - const std::vector mutants = + const std::vector mutants = mutator.MutateMany(GetMutationInputRefsFromDataInputs(aligned_inputs), kNumMutantsToGenerate); EXPECT_EQ(mutants.size(), kNumMutantsToGenerate); - for (const ByteArray &mutant : mutants) { - EXPECT_EQ(mutant.size() % kSizeAlignment, 0); + for (const Mutant& mutant : mutants) { + EXPECT_EQ(mutant.data.size() % kSizeAlignment, 0); } } @@ -958,13 +958,13 @@ TEST(ByteArrayMutator, MutateManyWithUnalignedInputs) { {0, 1, 2, 3, 4, 5, 6, 7, 8, 9}, {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10}, }; - const std::vector mutants = + const std::vector mutants = mutator.MutateMany(GetMutationInputRefsFromDataInputs(unaligned_inputs), kNumMutantsToGenerate); EXPECT_EQ(mutants.size(), kNumMutantsToGenerate); - for (const ByteArray &mutant : mutants) { - if (mutant.size() % kSizeAlignment != 0) { - EXPECT_LE(mutant.size(), 11); + for (const Mutant& mutant : mutants) { + if (mutant.data.size() % kSizeAlignment != 0) { + EXPECT_LE(mutant.data.size(), 11); } } } @@ -982,12 +982,12 @@ TEST(ByteArrayMutator, MutateManyWithMaxLen) { {0, 1, 2}, {0, 1, 2, 3}, }; - const std::vector mutants = mutator.MutateMany( + const std::vector mutants = mutator.MutateMany( GetMutationInputRefsFromDataInputs(inputs), kNumMutantsToGenerate); EXPECT_EQ(mutants.size(), kNumMutantsToGenerate); - for (const ByteArray &mutant : mutants) { - EXPECT_LE(mutant.size(), kMaxLen); + for (const Mutant& mutant : mutants) { + EXPECT_LE(mutant.data.size(), kMaxLen); } } @@ -1001,16 +1001,16 @@ TEST(ByteArrayMutator, MutateManyWithMaxLenWithStartingLargeInput) { const std::vector large_input = { {0, 1, 2, 3, 4, 5, 6, 7}, {0}, {0, 1}, {0, 1, 2}, {0, 1, 2, 3}, }; - const std::vector mutants = mutator.MutateMany( + const std::vector mutants = mutator.MutateMany( GetMutationInputRefsFromDataInputs(large_input), kNumMutantsToGenerate); EXPECT_EQ(mutants.size(), kNumMutantsToGenerate); - for (const ByteArray &mutant : mutants) { - if (mutant.size() > kMaxLen) { + for (const Mutant& mutant : mutants) { + if (mutant.data.size() > kMaxLen) { // The only mutant larger than max length should be the same large input // that mutation originally started with. All other mutants should be // within the maximum length specified. - EXPECT_EQ(mutant, large_input[0]); + EXPECT_EQ(mutant.data, large_input[0]); } } } diff --git a/centipede/centipede.cc b/centipede/centipede.cc index 6a1843fcb..a9f9ed778 100644 --- a/centipede/centipede.cc +++ b/centipede/centipede.cc @@ -416,13 +416,18 @@ size_t Centipede::AddPcPairFeatures(FeatureVec &fv) { } bool Centipede::RunBatch( - const std::vector &input_vec, - BlobFileWriter *absl_nullable corpus_file, - BlobFileWriter *absl_nullable features_file, - BlobFileWriter *absl_nullable unconditional_features_file) { + const std::vector& input_vec, + const std::vector& mutant_origins, + BlobFileWriter* absl_nullable corpus_file, + BlobFileWriter* absl_nullable features_file, + BlobFileWriter* absl_nullable unconditional_features_file) { BatchResult batch_result; bool success = ExecuteAndReportCrash(env_.binary, input_vec, batch_result); FUZZTEST_CHECK_EQ(input_vec.size(), batch_result.results().size()); + FUZZTEST_CHECK(mutant_origins.empty() || + mutant_origins.size() >= input_vec.size()) + << "Got " << mutant_origins.size() << " with " << input_vec.size() + << " input"; for (const auto &extra_binary : env_.extra_binaries) { if (ShouldStop()) break; @@ -474,6 +479,7 @@ bool Centipede::RunBatch( } } } + corpus_.UpdateWeights(fs_, coverage_frontier_, env_.exec_time_weight_scaling); return batch_gained_new_coverage; } @@ -563,7 +569,7 @@ void Centipede::Rerun(std::vector &to_rerun) { size_t batch_size = std::min(to_rerun.size(), env_.batch_size); std::vector batch(to_rerun.end() - batch_size, to_rerun.end()); to_rerun.resize(to_rerun.size() - batch_size); - if (RunBatch(batch, nullptr, nullptr, features_file.get())) { + if (RunBatch(batch, {}, nullptr, nullptr, features_file.get())) { UpdateAndMaybeLogStats("rerun-old", 1); } } @@ -757,7 +763,7 @@ void Centipede::LoadSeedInputs(BlobFileWriter *absl_nonnull corpus_file, seed_inputs.push_back({0}); } - RunBatch(seed_inputs, corpus_file, features_file, + RunBatch(seed_inputs, {}, corpus_file, features_file, /*unconditional_features_file=*/nullptr); FUZZTEST_LOG(INFO) << "Number of input seeds available: " << num_seeds_available @@ -838,21 +844,39 @@ void Centipede::FuzzingLoop() { auto remaining_runs = env_.num_runs - new_runs; auto batch_size = std::min(env_.batch_size, remaining_runs); std::vector mutation_inputs; + std::vector mutate_batch_origins; mutation_inputs.reserve(env_.mutate_batch_size); + mutate_batch_origins.reserve(env_.mutate_batch_size); for (size_t i = 0; i < env_.mutate_batch_size; i++) { - const auto& corpus_record = env_.use_corpus_weights - ? corpus_.WeightedRandom(rng_) - : corpus_.UniformRandom(rng_); + const size_t origin = env_.use_corpus_weights + ? corpus_.WeightedRandom(rng_) + : corpus_.UniformRandom(rng_); + mutate_batch_origins.push_back(origin); + const auto& corpus_record = corpus_.Records()[origin]; mutation_inputs.push_back( MutationInputRef{corpus_record.data, &corpus_record.metadata}); } - const std::vector mutants = + const std::vector mutants = user_callbacks_.Mutate(mutation_inputs, batch_size); if (ShouldStop()) break; + std::vector next_batch; + next_batch.reserve(mutants.size()); + std::vector mutant_origins; + mutant_origins.reserve(mutants.size()); + for (auto& mutant : mutants) { + next_batch.push_back(std::move(mutant.data)); + if (mutant.origin == Mutant::kOriginNone) { + mutant_origins.push_back(Mutant::kOriginNone); + } else { + mutant_origins.push_back(mutate_batch_origins[mutant.origin]); + } + } + bool gained_new_coverage = - RunBatch(mutants, corpus_file.get(), features_file.get(), nullptr); + RunBatch(next_batch, mutant_origins, corpus_file.get(), + features_file.get(), nullptr); new_runs += mutants.size(); if (gained_new_coverage) { diff --git a/centipede/centipede.h b/centipede/centipede.h index 24416c66d..899ecc8ef 100644 --- a/centipede/centipede.h +++ b/centipede/centipede.h @@ -84,10 +84,11 @@ class Centipede { // * its features are written to `features_file` (if that's non-null). // Returns true if new features were observed. // Post-condition: `batch_result.results.size()` == `input_vec.size()`. - bool RunBatch(const std::vector &input_vec, - BlobFileWriter *absl_nullable corpus_file, - BlobFileWriter *absl_nullable features_file, - BlobFileWriter *absl_nullable unconditional_features_file); + bool RunBatch(const std::vector& input_vec, + const std::vector& mutant_origins, + BlobFileWriter* absl_nullable corpus_file, + BlobFileWriter* absl_nullable features_file, + BlobFileWriter* absl_nullable unconditional_features_file); // Loads seed inputs from the user callbacks, execute them, and store them // with the corresponding features into `corpus_file` and `features_file`. void LoadSeedInputs(BlobFileWriter *absl_nonnull corpus_file, diff --git a/centipede/centipede_callbacks.h b/centipede/centipede_callbacks.h index 5bfcf46c4..0696fff8d 100644 --- a/centipede/centipede_callbacks.h +++ b/centipede/centipede_callbacks.h @@ -72,8 +72,8 @@ class CentipedeCallbacks { BatchResult &batch_result) = 0; // Takes non-empty `inputs` and returns at most `num_mutants` mutated inputs. - virtual std::vector Mutate( - const std::vector &inputs, size_t num_mutants) { + virtual std::vector Mutate( + const std::vector& inputs, size_t num_mutants) { return env_.use_legacy_default_mutator ? byte_array_mutator_.MutateMany(inputs, num_mutants) : fuzztest_mutator_.MutateMany(inputs, num_mutants); diff --git a/centipede/centipede_default_callbacks.cc b/centipede/centipede_default_callbacks.cc index ee54c2aea..02b896a50 100644 --- a/centipede/centipede_default_callbacks.cc +++ b/centipede/centipede_default_callbacks.cc @@ -72,8 +72,8 @@ CentipedeDefaultCallbacks::GetSerializedTargetConfig() { "Failed to get serialized configuration from the target binary."); } -std::vector CentipedeDefaultCallbacks::Mutate( - const std::vector &inputs, size_t num_mutants) { +std::vector CentipedeDefaultCallbacks::Mutate( + const std::vector& inputs, size_t num_mutants) { if (num_mutants == 0) return {}; // Try to use the custom mutator if it hasn't been disabled. if (custom_mutator_is_usable_.value_or(true)) { diff --git a/centipede/centipede_default_callbacks.h b/centipede/centipede_default_callbacks.h index 0b7856261..50e92e1be 100644 --- a/centipede/centipede_default_callbacks.h +++ b/centipede/centipede_default_callbacks.h @@ -42,8 +42,8 @@ class CentipedeDefaultCallbacks : public CentipedeCallbacks { absl::StatusOr GetSerializedTargetConfig() override; bool Execute(std::string_view binary, const std::vector &inputs, BatchResult &batch_result) override; - std::vector Mutate(const std::vector &inputs, - size_t num_mutants) override; + std::vector Mutate(const std::vector& inputs, + size_t num_mutants) override; private: std::optional custom_mutator_is_usable_ = std::nullopt; diff --git a/centipede/centipede_flags.inc b/centipede/centipede_flags.inc index f52e2f8ea..572e56d93 100644 --- a/centipede/centipede_flags.inc +++ b/centipede/centipede_flags.inc @@ -192,6 +192,9 @@ CENTIPEDE_FLAG( bool, use_corpus_weights, true, "If true, use weighted distribution when choosing the corpus element " "to mutate. This flag is mostly for Centipede developers.") +CENTIPEDE_FLAG( + bool, exec_time_weight_scaling, true, + "If true, scale the corpus weight by the execution time of each input.") CENTIPEDE_FLAG( bool, use_coverage_frontier, false, "If true, use coverage frontier when choosing the corpus element to " diff --git a/centipede/centipede_test.cc b/centipede/centipede_test.cc index deb06af86..b18fc3b67 100644 --- a/centipede/centipede_test.cc +++ b/centipede/centipede_test.cc @@ -60,6 +60,14 @@ using ::testing::Le; using ::testing::Not; using ::testing::SizeIs; +std::vector GetDataFromMutants(const std::vector& mutants) { + std::vector data; + for (const auto mutant : mutants) { + data.push_back(mutant.data); + } + return data; +} + // A mock for CentipedeCallbacks. class CentipedeMock : public CentipedeCallbacks { public: @@ -105,19 +113,20 @@ class CentipedeMock : public CentipedeCallbacks { // (the value {0} is produced by the default GetSeeds()). // Next 65536 mutations are 2-byte sequences {0,0} ... {255, 255}. // Then repeat 2-byte sequences. - std::vector Mutate(const std::vector &inputs, - size_t num_mutants) override { - std::vector mutants; + std::vector Mutate(const std::vector& inputs, + size_t num_mutants) override { + std::vector mutants; mutants.reserve(num_mutants); for (size_t i = 0; i < num_mutants; ++i) { num_mutations_++; if (num_mutations_ < 256) { - mutants.push_back({static_cast(num_mutations_)}); + mutants.push_back( + {{static_cast(num_mutations_)}, Mutant::kOriginNone}); continue; } uint8_t byte0 = (num_mutations_ - 256) / 256; uint8_t byte1 = (num_mutations_ - 256) % 256; - mutants.push_back({byte0, byte1}); + mutants.push_back({{byte0, byte1}, Mutant::kOriginNone}); } return mutants; } @@ -349,8 +358,8 @@ class MutateCallbacks : public CentipedeCallbacks { } // Will not be called. - std::vector Mutate(const std::vector &inputs, - size_t num_mutants) override { + std::vector Mutate(const std::vector& inputs, + size_t num_mutants) override { FUZZTEST_LOG(FATAL); } @@ -435,8 +444,9 @@ TEST_F(CentipedeWithTemporaryLocalDir, MutateViaExternalBinary) { GetMutationInputRefsFromDataInputs(inputs), 10000); EXPECT_EQ(result.exit_code(), EXIT_SUCCESS); EXPECT_TRUE(result.has_custom_mutator()); - EXPECT_THAT(result.mutants(), AllOf(IsSupersetOf(all_expected_mutants), - Each(Not(IsEmpty())))); + EXPECT_THAT( + GetDataFromMutants(result.mutants()), + AllOf(IsSupersetOf(all_expected_mutants), Each(Not(IsEmpty())))); } } @@ -450,9 +460,10 @@ TEST_F(CentipedeWithTemporaryLocalDir, MutateViaExternalBinary) { 10000); EXPECT_EQ(result.exit_code(), EXIT_SUCCESS); EXPECT_TRUE(result.has_custom_mutator()); - EXPECT_THAT(result.mutants(), AllOf(IsSupersetOf(all_expected_mutants), - Each(Not(IsEmpty())))); - EXPECT_THAT(result.mutants(), + const auto mutant_data = GetDataFromMutants(result.mutants()); + EXPECT_THAT(mutant_data, AllOf(IsSupersetOf(all_expected_mutants), + Each(Not(IsEmpty())))); + EXPECT_THAT(mutant_data, AllOf(IsSupersetOf(all_expected_mutants), Each(Not(IsEmpty())), // The byte_array_mutator may insert up to 20 bytes to an // input, which may push the size over the max_len. @@ -470,9 +481,10 @@ TEST_F(CentipedeWithTemporaryLocalDir, MutateViaExternalBinary) { binary_with_custom_mutator, GetMutationInputRefsFromDataInputs(inputs), 10000); // Must contain normal mutants, but not the ones from crossover. - EXPECT_THAT(result.mutants(), IsSupersetOf(some_of_expected_mutants)); + const auto mutant_data = GetDataFromMutants(result.mutants()); + EXPECT_THAT(mutant_data, IsSupersetOf(some_of_expected_mutants)); for (const auto &crossover_mutant : expected_crossover_mutants) { - EXPECT_THAT(result.mutants(), Not(Contains(crossover_mutant))); + EXPECT_THAT(mutant_data, Not(Contains(crossover_mutant))); } } } @@ -496,12 +508,13 @@ class MergeMock : public CentipedeCallbacks { } // Every consecutive mutation is {number_of_mutations_} (starting from 1). - std::vector Mutate(const std::vector &inputs, - size_t num_mutants) override { - std::vector mutants{num_mutants}; + std::vector Mutate(const std::vector& inputs, + size_t num_mutants) override { + std::vector mutants(num_mutants); for (auto &mutant : mutants) { - mutant.resize(1); - mutant[0] = ++number_of_mutations_; + mutant.data.resize(1); + mutant.data[0] = ++number_of_mutations_; + mutant.origin = Mutant::kOriginNone; } return mutants; } @@ -581,17 +594,18 @@ class FunctionFilterMock : public CentipedeCallbacks { } // Sets the inputs to one of 3 pre-defined values. - std::vector Mutate(const std::vector &inputs, - size_t num_mutants) override { + std::vector Mutate(const std::vector& inputs, + size_t num_mutants) override { for (auto &input : inputs) { if (!seed_inputs_.contains(input.data)) { observed_inputs_.insert(input.data); } } - std::vector mutants; + std::vector mutants; mutants.reserve(num_mutants); for (size_t i = 0; i < num_mutants; ++i) { - mutants.push_back(GetMutant(++number_of_mutations_)); + mutants.push_back( + {GetMutant(++number_of_mutations_), Mutant::kOriginNone}); } return mutants; } @@ -620,6 +634,7 @@ static std::vector RunWithFunctionFilter( Environment env; env.workdir = tmp_dir.path(); env.seed = 1; // make the runs predictable. + env.exec_time_weight_scaling = false; env.num_runs = 100; env.batch_size = 10; env.binary = GetDataDependencyFilepath("centipede/testing/test_fuzz_target"); @@ -700,12 +715,13 @@ class ExtraBinariesMock : public CentipedeCallbacks { } // Sets the mutants to different 1-byte values. - std::vector Mutate(const std::vector &inputs, - size_t num_mutants) override { - std::vector mutants{num_mutants}; + std::vector Mutate(const std::vector& inputs, + size_t num_mutants) override { + std::vector mutants(num_mutants); for (auto &mutant : mutants) { - mutant.resize(1); - mutant[0] = ++number_of_mutations_; + mutant.data.resize(1); + mutant.data[0] = ++number_of_mutations_; + mutant.origin = Mutant::kOriginNone; } return mutants; } @@ -833,13 +849,14 @@ class UndetectedCrashingInputMock : public CentipedeCallbacks { } // Sets the mutants to different 1-byte values. - std::vector Mutate(const std::vector &inputs, - size_t num_mutants) override { - std::vector mutants; + std::vector Mutate(const std::vector& inputs, + size_t num_mutants) override { + std::vector mutants; mutants.reserve(num_mutants); for (size_t i = 0; i < num_mutants; ++i) { // The contents of each mutant is simply its sequential number. - mutants.push_back({static_cast(curr_input_idx_++)}); + mutants.push_back( + {{static_cast(curr_input_idx_++)}, Mutant::kOriginNone}); } return mutants; } @@ -993,9 +1010,9 @@ class FakeCentipedeCallbacksForThreadChecking : public CentipedeCallbacks { return true; } - std::vector Mutate(const std::vector &inputs, - size_t num_mutants) override { - return {num_mutants, {0}}; + std::vector Mutate(const std::vector& inputs, + size_t num_mutants) override { + return {num_mutants, {{0}, Mutant::kOriginNone}}; } bool thread_check_passed() { return thread_check_passed_; } @@ -1048,9 +1065,9 @@ class SetupFailureCallbacks : public CentipedeCallbacks { return false; } - std::vector Mutate(const std::vector &inputs, - size_t num_mutants) override { - return {num_mutants, {0}}; + std::vector Mutate(const std::vector& inputs, + size_t num_mutants) override { + return {num_mutants, {{0}, Mutant::kOriginNone}}; } int execute_count() const { return execute_count_; } @@ -1086,9 +1103,9 @@ class SkippedTestCallbacks : public CentipedeCallbacks { return false; } - std::vector Mutate(const std::vector &inputs, - size_t num_mutants) override { - return {num_mutants, {0}}; + std::vector Mutate(const std::vector& inputs, + size_t num_mutants) override { + return {num_mutants, {{0}, Mutant::kOriginNone}}; } int execute_count() const { return execute_count_; } @@ -1124,9 +1141,9 @@ class IgnoredFailureCallbacks : public CentipedeCallbacks { return false; } - std::vector Mutate(const std::vector &inputs, - size_t num_mutants) override { - return {num_mutants, {0}}; + std::vector Mutate(const std::vector& inputs, + size_t num_mutants) override { + return {num_mutants, {{0}, Mutant::kOriginNone}}; } int execute_count() const { return execute_count_; } @@ -1157,11 +1174,11 @@ TEST_F(CentipedeWithTemporaryLocalDir, UsesProvidedCustomMutator) { CentipedeDefaultCallbacks callbacks(env); const std::vector inputs = {{1}, {2}, {3}, {4}, {5}, {6}}; - const std::vector mutants = callbacks.Mutate( + const std::vector mutants = callbacks.Mutate( GetMutationInputRefsFromDataInputs(inputs), inputs.size()); // The custom mutator just returns the original inputs as mutants. - EXPECT_EQ(inputs, mutants); + EXPECT_EQ(inputs, GetDataFromMutants(mutants)); } TEST_F(CentipedeWithTemporaryLocalDir, FailsOnMisbehavingCustomMutator) { @@ -1190,12 +1207,12 @@ TEST_F(CentipedeWithTemporaryLocalDir, CentipedeDefaultCallbacks callbacks(env); const std::vector inputs = {{1}, {2}, {3}, {4}, {5}, {6}}; - const std::vector mutants = callbacks.Mutate( + const std::vector mutants = callbacks.Mutate( GetMutationInputRefsFromDataInputs(inputs), inputs.size()); // The built-in mutator performs non-trivial mutations. EXPECT_EQ(inputs.size(), mutants.size()); - EXPECT_NE(inputs, mutants); + EXPECT_NE(inputs, GetDataFromMutants(mutants)); } TEST_F(CentipedeWithTemporaryLocalDir, HangingFuzzTargetExitsAfterTimeout) { diff --git a/centipede/corpus.cc b/centipede/corpus.cc index 9aaf92716..0c12f77fe 100644 --- a/centipede/corpus.cc +++ b/centipede/corpus.cc @@ -77,20 +77,92 @@ std::pair Corpus::MaxAndAvgSize() const { return {max, total / records_.size()}; } +void Corpus::UpdateWeights(const FeatureSet& fs, + const CoverageFrontier& coverage_frontier, + bool scale_by_exec_time) { + std::vector weights; + weights.resize(records_.size()); + for (size_t i = 0, n = records_.size(); i < n; ++i) { + auto& record = records_[i]; + const size_t unseen = fs.PruneFeaturesAndCountUnseen(record.features); + FUZZTEST_CHECK_EQ(unseen, 0); + weights[i] = fs.ComputeWeight(record.features); + } + if (scale_by_exec_time) { + double total_exec_time_usec = 0; + // For loaded corpus, we don't have the exec time recorded. Thus we don't + // count them when calculating the average exec time or scale their weights. + size_t exec_time_divider = 0; + for (size_t i = 0; i < records_.size(); ++i) { + if (!(records_[i].stats == ExecutionResult::Stats{})) { + total_exec_time_usec += records_[i].stats.exec_time_usec; + ++exec_time_divider; + } + } + const double avg_exec_time_usec = + exec_time_divider == 0 ? 0 : total_exec_time_usec / exec_time_divider; + for (size_t i = 0; i < records_.size(); ++i) { + const auto& record = records_[i]; + if (record.stats == ExecutionResult::Stats{}) { + continue; + } + if (record.stats.exec_time_usec > avg_exec_time_usec * 10) { + weights[i] *= 0.1; + } else if (record.stats.exec_time_usec > avg_exec_time_usec * 4) { + weights[i] *= 0.25; + } else if (record.stats.exec_time_usec > avg_exec_time_usec * 2) { + weights[i] *= 0.5; + } else if (record.stats.exec_time_usec * 3 > avg_exec_time_usec * 4) { + weights[i] *= 0.75; + } else if (record.stats.exec_time_usec * 4 < avg_exec_time_usec) { + weights[i] *= 3; + } else if (record.stats.exec_time_usec * 3 < avg_exec_time_usec) { + weights[i] *= 2; + } else if (record.stats.exec_time_usec * 2 < avg_exec_time_usec) { + weights[i] *= 1.5; + } + } + } + // Normalize weights into integers in [0, 2^16]. + double highest_weight = 0; + double lowest_weight = 0; + double weight_sum = 0; + for (size_t i = 0; i < records_.size(); ++i) { + if (i == 0 || weights[i] > highest_weight) { + highest_weight = weights[i]; + } + if (i == 0 || weights[i] < lowest_weight) { + lowest_weight = weights[i]; + } + weight_sum += weights[i]; + } + FUZZTEST_VLOG(1) << "Recomputed weight with average: " + << weight_sum / records_.size() + << " highest: " << highest_weight + << " lowest: " << lowest_weight; + FUZZTEST_CHECK(lowest_weight >= 0) << "Must not have negative corpus weight!"; + for (size_t i = 0; i < records_.size(); ++i) { + // If all weights are zeros, fall back to prioritize recent corpus. + const double normalized_weight = highest_weight > 0 + ? (weights[i] / highest_weight) + : ((i + 1.0) / records_.size()); + weighted_distribution_.ChangeWeight(i, normalized_weight * (1 << 16)); + } + weighted_distribution_.RecomputeInternalState(); +} + size_t Corpus::Prune(const FeatureSet &fs, const CoverageFrontier &coverage_frontier, size_t max_corpus_size, Rng &rng) { // TODO(kcc): use coverage_frontier. FUZZTEST_CHECK(max_corpus_size); if (records_.size() < 2UL) return 0; - // Recompute the weights. + size_t num_zero_weights = 0; - for (size_t i = 0, n = records_.size(); i < n; ++i) { - fs.PruneFeaturesAndCountUnseen(records_[i].features); - auto new_weight = - ComputeWeight(records_[i].features, fs, coverage_frontier); - weighted_distribution_.ChangeWeight(i, new_weight); - if (new_weight == 0) ++num_zero_weights; + for (size_t i = 0; i < records_.size(); ++i) { + if (weighted_distribution_.weights()[i] == 0) { + ++num_zero_weights; + } } // Remove zero weights and the corresponding corpus record. @@ -128,12 +200,12 @@ void Corpus::Add(const ByteArray& data, const FeatureVec& fv, weighted_distribution_.AddWeight(ComputeWeight(fv, fs, coverage_frontier)); } -const CorpusRecord& Corpus::WeightedRandom(absl::BitGenRef rng) const { - return records_[weighted_distribution_.RandomIndex(rng)]; +size_t Corpus::WeightedRandom(absl::BitGenRef rng) const { + return weighted_distribution_.RandomIndex(rng); } -const CorpusRecord& Corpus::UniformRandom(absl::BitGenRef rng) const { - return records_[absl::Uniform(rng, 0, records_.size())]; +size_t Corpus::UniformRandom(absl::BitGenRef rng) const { + return absl::Uniform(rng, 0, records_.size()); } void Corpus::DumpStatsToFile(const FeatureSet &fs, std::string_view filepath, diff --git a/centipede/corpus.h b/centipede/corpus.h index 89bcd6dc7..a6a05bdc7 100644 --- a/centipede/corpus.h +++ b/centipede/corpus.h @@ -44,6 +44,8 @@ class WeightedDistribution { // Removes the last weight and returns it. // Precondition: size() > 0. uint64_t PopBack(); + // Read-only weight accessor. + const std::vector& weights() const { return weights_; } // Changes the existing idx-th weight to new_weight. void ChangeWeight(size_t idx, uint64_t new_weight); // Returns a random number in [0,size()), using a random number `random`. @@ -118,6 +120,12 @@ class Corpus { // Returns the number of removed elements. size_t Prune(const FeatureSet &fs, const CoverageFrontier &coverage_frontier, size_t max_corpus_size, Rng &rng); + // Updates the corpus weights according to `fs` and `coverage_frontier`. If + // `scale_by_exec_time` is set, scales the weights by the corpus execution + // time relative to the average. + void UpdateWeights(const FeatureSet& fs, + const CoverageFrontier& coverage_frontier, + bool scale_by_exec_time); // Accessors. @@ -130,11 +138,11 @@ class Corpus { size_t NumActive() const { return records_.size(); } // Returns the max and avg sizes of the inputs. std::pair MaxAndAvgSize() const; - // Returns a random active corpus record using weighted distribution. + // Returns a random active corpus record index using weighted distribution. // See WeightedDistribution. - const CorpusRecord& WeightedRandom(absl::BitGenRef rng) const; - // Returns a random active corpus record using uniform distribution. - const CorpusRecord& UniformRandom(absl::BitGenRef rng) const; + size_t WeightedRandom(absl::BitGenRef rng) const; + // Returns a random active corpus record index using uniform distribution. + size_t UniformRandom(absl::BitGenRef rng) const; // Returns the element with index 'idx', where `idx` < NumActive(). const ByteArray &Get(size_t idx) const { return records_[idx].data; } // Returns the execution metadata for the element `idx`, `idx` < NumActive(). diff --git a/centipede/corpus_test.cc b/centipede/corpus_test.cc index 220239d87..b875838e6 100644 --- a/centipede/corpus_test.cc +++ b/centipede/corpus_test.cc @@ -28,6 +28,7 @@ #include "./centipede/feature.h" #include "./centipede/feature_set.h" #include "./centipede/pc_info.h" +#include "./centipede/runner_result.h" #include "./centipede/util.h" #include "./common/defs.h" #include "./common/test_util.h" @@ -113,6 +114,7 @@ TEST(Corpus, Prune) { Add({{2}, {30, 40}}); Add({{3}, {40, 50}}); Add({{4}, {10, 20}}); + corpus.UpdateWeights(fs, coverage_frontier, /*scale_by_exec_time=*/false); // Prune. Features 20 and 40 are frequent => input {0} will be removed. EXPECT_EQ(corpus.NumActive(), 5); @@ -122,6 +124,8 @@ TEST(Corpus, Prune) { VerifyActiveInputs({{1}, {2}, {3}, {4}}); Add({{5}, {30, 60}}); + corpus.UpdateWeights(fs, coverage_frontier, /*scale_by_exec_time=*/false); + EXPECT_EQ(corpus.NumTotal(), 6); // Prune. Feature 30 is now frequent => inputs {1} and {2} will be removed. EXPECT_EQ(corpus.NumActive(), 5); @@ -141,6 +145,53 @@ TEST(Corpus, Prune) { EXPECT_EQ(corpus.NumTotal(), 6); } +TEST(Corpus, ScalesWeightsWithExecTime) { + PCTable pc_table(100); + CFTable cf_table(100); + BinaryInfo bin_info{pc_table, {}, cf_table, {}, {}, {}}; + CoverageFrontier coverage_frontier(bin_info); + FeatureSet fs(2, {}); + Corpus corpus; + + auto Add = [&](const CorpusRecord& record, uint64_t exec_time_usec) { + fs.MergeFeatures(record.features); + ExecutionResult::Stats stats = {}; + stats.exec_time_usec = exec_time_usec; + corpus.Add(record.data, record.features, /*metadata=*/{}, stats, fs, + coverage_frontier); + }; + + Add({{0}, {10}}, 1); + Add({{1}, {20}}, 5); + Add({{2}, {30}}, 9); + + constexpr int kNumIter = 10000; + std::vector freq; + + Rng rng(12345); + auto ComputeFreq = [&]() { + freq.clear(); + freq.resize(corpus.NumActive()); + for (int i = 0; i < kNumIter; i++) { + const size_t idx = corpus.WeightedRandom(rng); + freq[idx]++; + } + }; + + // The weights should be equal without exec time scaling. + corpus.UpdateWeights(fs, coverage_frontier, /*scale_by_exec_time=*/false); + ComputeFreq(); + EXPECT_NEAR(freq[0], kNumIter / 3, 100); + EXPECT_NEAR(freq[1], kNumIter / 3, 100); + EXPECT_NEAR(freq[2], kNumIter / 3, 100); + + // The weights should favor {0} over {1} over {2} with exec time scaling. + corpus.UpdateWeights(fs, coverage_frontier, /*scale_by_exec_time=*/true); + ComputeFreq(); + EXPECT_GT(freq[0], freq[1] + 100); + EXPECT_GT(freq[1], freq[2] + 100); +} + // Regression test for a crash in Corpus::Prune(). TEST(Corpus, PruneRegressionTest1) { PCTable pc_table(100); diff --git a/centipede/dispatcher.cc b/centipede/dispatcher.cc index d82cd2834..f8d71684a 100644 --- a/centipede/dispatcher.cc +++ b/centipede/dispatcher.cc @@ -31,6 +31,7 @@ #include "absl/base/nullability.h" #include "./centipede/execution_metadata.h" +#include "./centipede/mutation_input.h" #include "./centipede/runner_request.h" #include "./centipede/runner_result.h" #include "./centipede/shared_memory_blob_sequence.h" @@ -536,7 +537,10 @@ void FuzzTestDispatcherEmitMutant(const void* data, size_t size) { auto* output = GetOutputsBlobSequence(); DispatcherCheck(output != nullptr, "outputs blob sequence must exist"); DispatcherCheck(MutationResult::WriteMutant( - {static_cast(data), size}, *output), + {static_cast(data), size}, + // TODO(xinhaoyuan): change the dispatcher interface to + // include the origin. + fuzztest::internal::Mutant::kOriginNone, *output), "failed to write mutant"); } diff --git a/centipede/fuzztest_mutator.cc b/centipede/fuzztest_mutator.cc index 0690f2801..09dc70498 100644 --- a/centipede/fuzztest_mutator.cc +++ b/centipede/fuzztest_mutator.cc @@ -139,32 +139,35 @@ void FuzzTestMutator::CrossOver(ByteArray &data, const ByteArray &other) { } } -std::vector FuzzTestMutator::MutateMany( - const std::vector &inputs, size_t num_mutants) { +std::vector FuzzTestMutator::MutateMany( + const std::vector& inputs, size_t num_mutants) { if (inputs.empty()) abort(); auto& cmp_tables = mutation_metadata_->cmp_tables; cmp_tables.resize(inputs.size()); - std::vector mutants; + std::vector mutants; mutants.reserve(num_mutants); - for (int i = 0; i < num_mutants; ++i) { - auto index = absl::Uniform(prng_, 0, inputs.size()); - if (!cmp_tables[index].has_value() && inputs[index].metadata != nullptr) { - cmp_tables[index].emplace(/*compact=*/true); - PopulateCmpEntries(*inputs[index].metadata, *cmp_tables[index]); + for (size_t i = 0; i < num_mutants; ++i) { + Mutant mutant; + mutant.origin = absl::Uniform(prng_, 0, inputs.size()); + if (!cmp_tables[mutant.origin].has_value() && + inputs[mutant.origin].metadata != nullptr) { + cmp_tables[mutant.origin].emplace(/*compact=*/true); + PopulateCmpEntries(*inputs[mutant.origin].metadata, + *cmp_tables[mutant.origin]); } - auto mutant = inputs[index].data; - if (mutant.size() > max_len_) mutant.resize(max_len_); + mutant.data = inputs[mutant.origin].data; + if (mutant.data.size() > max_len_) mutant.data.resize(max_len_); if (knobs_.GenerateBool(knob_mutate_or_crossover, prng_())) { // Perform crossover with some other input. It may be the same input. const auto &other_input = inputs[absl::Uniform(prng_, 0, inputs.size())].data; - CrossOver(mutant, other_input); + CrossOver(mutant.data, other_input); } else { - domain_->Mutate( - mutant, prng_, - {/*cmp_tables=*/cmp_tables[index].has_value() ? &*cmp_tables[index] - : nullptr}, - /*only_shrink=*/false); + domain_->Mutate(mutant.data, prng_, + {/*cmp_tables=*/cmp_tables[mutant.origin].has_value() + ? &*cmp_tables[mutant.origin] + : nullptr}, + /*only_shrink=*/false); } mutants.push_back(std::move(mutant)); } diff --git a/centipede/fuzztest_mutator.h b/centipede/fuzztest_mutator.h index 8b6846096..e432d4155 100644 --- a/centipede/fuzztest_mutator.h +++ b/centipede/fuzztest_mutator.h @@ -44,8 +44,8 @@ class FuzzTestMutator { ~FuzzTestMutator(); // Takes non-empty `inputs` and produces `num_mutants` mutants. - std::vector MutateMany(const std::vector &inputs, - size_t num_mutants); + std::vector MutateMany(const std::vector& inputs, + size_t num_mutants); // Adds `dict_entries` to the internal mutation dictionary. void AddToDictionary(const std::vector& dict_entries); diff --git a/centipede/fuzztest_mutator_test.cc b/centipede/fuzztest_mutator_test.cc index 327bc836b..3ef6df158 100644 --- a/centipede/fuzztest_mutator_test.cc +++ b/centipede/fuzztest_mutator_test.cc @@ -33,11 +33,20 @@ namespace { using ::testing::AllOf; using ::testing::Each; +using ::testing::Field; using ::testing::IsSupersetOf; using ::testing::Le; using ::testing::SizeIs; using ::testing::Values; +std::vector GetDataFromMutants(const std::vector& mutants) { + std::vector data; + for (const auto mutant : mutants) { + data.push_back(mutant.data); + } + return data; +} + TEST(FuzzTestMutator, DifferentRngSeedsLeadToDifferentMutantSequences) { const Knobs knobs; FuzzTestMutator mutator[2]{FuzzTestMutator(knobs, /*seed=*/1), @@ -49,10 +58,10 @@ TEST(FuzzTestMutator, DifferentRngSeedsLeadToDifferentMutantSequences) { std::vector mutation_inputs = {{data}}; constexpr size_t kMutantSequenceLength = 100; for (size_t iter = 0; iter < kMutantSequenceLength; iter++) { - const std::vector mutants = + const std::vector mutants = mutator[i].MutateMany(mutation_inputs, 1); ASSERT_EQ(mutants.size(), 1); - res[i].push_back(mutants[0]); + res[i].push_back(mutants[0].data); } } EXPECT_NE(res[0], res[1]); @@ -64,7 +73,7 @@ TEST(FuzzTestMutator, MutateManyWorksWithInputsLargerThanMaxLen) { FuzzTestMutator mutator(knobs, /*seed=*/1); EXPECT_TRUE(mutator.set_max_len(kMaxLen)); constexpr size_t kNumMutantsToGenerate = 10000; - const std::vector mutants = mutator.MutateMany( + const std::vector mutants = mutator.MutateMany( { {/*data=*/{0, 1, 2, 3, 4, 5, 6, 7}}, {/*data=*/{0}}, @@ -74,70 +83,70 @@ TEST(FuzzTestMutator, MutateManyWorksWithInputsLargerThanMaxLen) { }, kNumMutantsToGenerate); - EXPECT_THAT(mutants, - AllOf(SizeIs(kNumMutantsToGenerate), Each(SizeIs(Le(kMaxLen))))); + EXPECT_THAT(mutants, AllOf(SizeIs(kNumMutantsToGenerate), + Each(Field(&Mutant::data, SizeIs(Le(kMaxLen)))))); } TEST(FuzzTestMutator, CrossOverInsertsDataFromOtherInputs) { const Knobs knobs; FuzzTestMutator mutator(knobs, /*seed=*/1); constexpr size_t kNumMutantsToGenerate = 100000; - const std::vector mutants = mutator.MutateMany( + const std::vector mutants = mutator.MutateMany( { {/*data=*/{0, 1, 2, 3}}, {/*data=*/{4, 5, 6, 7}}, }, kNumMutantsToGenerate); - EXPECT_THAT(mutants, IsSupersetOf(std::vector{ - // The entire other input - {4, 5, 6, 7, 0, 1, 2, 3}, - {0, 1, 4, 5, 6, 7, 2, 3}, - {0, 1, 2, 3, 4, 5, 6, 7}, - // The prefix of other input - {4, 5, 6, 0, 1, 2, 3}, - {0, 1, 4, 5, 6, 2, 3}, - {0, 1, 2, 3, 4, 5, 6}, - // The suffix of other input - {5, 6, 7, 0, 1, 2, 3}, - {0, 1, 5, 6, 7, 2, 3}, - {0, 1, 2, 3, 5, 6, 7}, - // The middle of other input - {5, 6, 0, 1, 2, 3}, - {0, 1, 5, 6, 2, 3}, - {0, 1, 2, 3, 5, 6}, - })); + EXPECT_THAT(GetDataFromMutants(mutants), IsSupersetOf(std::vector{ + // The entire other input + {4, 5, 6, 7, 0, 1, 2, 3}, + {0, 1, 4, 5, 6, 7, 2, 3}, + {0, 1, 2, 3, 4, 5, 6, 7}, + // The prefix of other input + {4, 5, 6, 0, 1, 2, 3}, + {0, 1, 4, 5, 6, 2, 3}, + {0, 1, 2, 3, 4, 5, 6}, + // The suffix of other input + {5, 6, 7, 0, 1, 2, 3}, + {0, 1, 5, 6, 7, 2, 3}, + {0, 1, 2, 3, 5, 6, 7}, + // The middle of other input + {5, 6, 0, 1, 2, 3}, + {0, 1, 5, 6, 2, 3}, + {0, 1, 2, 3, 5, 6}, + })); } TEST(FuzzTestMutator, CrossOverOverwritesDataFromOtherInputs) { const Knobs knobs; FuzzTestMutator mutator(knobs, /*seed=*/1); constexpr size_t kNumMutantsToGenerate = 100000; - const std::vector mutants = mutator.MutateMany( + const std::vector mutants = mutator.MutateMany( { {/*data=*/{0, 1, 2, 3, 4, 5, 6, 7}}, {/*data=*/{100, 101, 102, 103}}, }, kNumMutantsToGenerate); - EXPECT_THAT(mutants, IsSupersetOf(std::vector{ - // The entire other input - {100, 101, 102, 103, 4, 5, 6, 7}, - {0, 1, 100, 101, 102, 103, 6, 7}, - {0, 1, 2, 3, 100, 101, 102, 103}, - // The prefix of other input - {100, 101, 102, 3, 4, 5, 6, 7}, - {0, 1, 2, 100, 101, 102, 6, 7}, - {0, 1, 2, 3, 4, 100, 101, 102}, - // The suffix of other input - {101, 102, 103, 3, 4, 5, 6, 7}, - {0, 1, 2, 101, 102, 103, 6, 7}, - {0, 1, 2, 3, 4, 101, 102, 103}, - // The middle of other input - {101, 102, 2, 3, 4, 5, 6, 7}, - {0, 1, 2, 101, 102, 5, 6, 7}, - {0, 1, 2, 3, 4, 5, 101, 102}, - })); + EXPECT_THAT(GetDataFromMutants(mutants), IsSupersetOf(std::vector{ + // The entire other input + {100, 101, 102, 103, 4, 5, 6, 7}, + {0, 1, 100, 101, 102, 103, 6, 7}, + {0, 1, 2, 3, 100, 101, 102, 103}, + // The prefix of other input + {100, 101, 102, 3, 4, 5, 6, 7}, + {0, 1, 2, 100, 101, 102, 6, 7}, + {0, 1, 2, 3, 4, 100, 101, 102}, + // The suffix of other input + {101, 102, 103, 3, 4, 5, 6, 7}, + {0, 1, 2, 101, 102, 103, 6, 7}, + {0, 1, 2, 3, 4, 101, 102, 103}, + // The middle of other input + {101, 102, 2, 3, 4, 5, 6, 7}, + {0, 1, 2, 101, 102, 5, 6, 7}, + {0, 1, 2, 3, 4, 5, 101, 102}, + })); } // Test parameter containing the mutation settings and the expectations of a @@ -181,12 +190,12 @@ TEST_P(MutationStepTest, GeneratesExpectedMutantsAndAvoidsUnexpectedMutants) { const std::vector inputs = { {/*data=*/GetParam().seed_input, /*metadata=*/&metadata}}; for (size_t i = 0; i < GetParam().max_num_iterations; i++) { - const std::vector mutants = mutator.MutateMany(inputs, 1); + const std::vector mutants = mutator.MutateMany(inputs, 1); ASSERT_EQ(mutants.size(), 1); const auto& mutant = mutants[0]; - EXPECT_FALSE(unexpected_mutants.contains(mutant)) - << "Unexpected mutant: {" << absl::StrJoin(mutant, ",") << "}"; - unmatched_expected_mutants.erase(mutant); + EXPECT_FALSE(unexpected_mutants.contains(mutant.data)) + << "Unexpected mutant: {" << absl::StrJoin(mutant.data, ",") << "}"; + unmatched_expected_mutants.erase(mutant.data); if (unmatched_expected_mutants.empty() && i >= GetParam().min_num_iterations) break; diff --git a/centipede/minimize_crash.cc b/centipede/minimize_crash.cc index 66d417a20..dc0c53e7d 100644 --- a/centipede/minimize_crash.cc +++ b/centipede/minimize_crash.cc @@ -114,11 +114,11 @@ static void MinimizeCrash(const Environment &env, // discarding all inputs that are too large. // TODO(kcc): modify the Mutate() interface such that max_len can be passed. // - const std::vector mutants = callbacks->Mutate( + const std::vector mutants = callbacks->Mutate( GetMutationInputRefsFromDataInputs(recent_crashers), env.batch_size); std::vector smaller_mutants; for (const auto &m : mutants) { - if (m.size() < min_known_size) smaller_mutants.push_back(m); + if (m.data.size() < min_known_size) smaller_mutants.push_back(m.data); } // Execute all mutants. If a new crasher is found, add it to `queue`. diff --git a/centipede/mutation_input.h b/centipede/mutation_input.h index 504c75327..a8daf4140 100644 --- a/centipede/mutation_input.h +++ b/centipede/mutation_input.h @@ -12,13 +12,14 @@ // See the License for the specific language governing permissions and // limitations under the License. -// Data types used for mutation inputs. +// Data types used for mutation. // // This library is for both engine and runner. #ifndef THIRD_PARTY_CENTIPEDE_MUTATION_INPUT_H_ #define THIRD_PARTY_CENTIPEDE_MUTATION_INPUT_H_ +#include #include #include "./centipede/execution_metadata.h" @@ -48,6 +49,23 @@ inline std::vector GetMutationInputRefsFromDataInputs( return results; } +// Represents a mutation result. +struct Mutant { + // The mutant `data`. + ByteArray data; + // The index of the input used to mutate into `data`. The index can be + // interpreted in different ways depending on the context: In mutation batch + // results, it means the index in the batch input. Once processed by the + // engine it means the index of the in-memory corpus. + size_t origin = kOriginNone; + // A special `origin` value to indicate that the mutant has no origin. + static constexpr size_t kOriginNone = static_cast(-1); + + bool operator==(const Mutant& other) const { + return data == other.data && origin == other.origin; + } +}; + } // namespace fuzztest::internal #endif // THIRD_PARTY_CENTIPEDE_MUTATION_INPUT_H_ diff --git a/centipede/runner.cc b/centipede/runner.cc index e5808e1bd..922e9b5c4 100644 --- a/centipede/runner.cc +++ b/centipede/runner.cc @@ -308,8 +308,8 @@ void RunnerCallbacks::GetSeeds(std::function seed_callback) { std::string RunnerCallbacks::GetSerializedTargetConfig() { return ""; } bool RunnerCallbacks::Mutate( - const std::vector & /*inputs*/, size_t /*num_mutants*/, - std::function /*new_mutant_callback*/) { + const std::vector& /*inputs*/, size_t /*num_mutants*/, + std::function /*new_mutant_callback*/) { RunnerCheck(!HasCustomMutator(), "Class deriving from RunnerCallbacks must implement Mutate() if " "HasCustomMutator() returns true."); @@ -339,8 +339,9 @@ class LegacyRunnerCallbacks : public RunnerCallbacks { return custom_mutator_cb_ != nullptr; } - bool Mutate(const std::vector &inputs, size_t num_mutants, - std::function new_mutant_callback) override; + bool Mutate( + const std::vector& inputs, size_t num_mutants, + std::function new_mutant_callback) override; private: FuzzerTestOneInputCallback test_one_input_cb_; @@ -603,17 +604,18 @@ static int MutateInputsFromShmem(BlobSequence &inputs_blobseq, } if (!callbacks.HasCustomMutator()) return EXIT_SUCCESS; - if (!callbacks.Mutate(input_refs, num_mutants, [&](ByteSpan mutant) { - MutationResult::WriteMutant(mutant, outputs_blobseq); - })) { + if (!callbacks.Mutate( + input_refs, num_mutants, [&](ByteSpan data, size_t origin) { + MutationResult::WriteMutant(data, origin, outputs_blobseq); + })) { return EXIT_FAILURE; } return EXIT_SUCCESS; } bool LegacyRunnerCallbacks::Mutate( - const std::vector &inputs, size_t num_mutants, - std::function new_mutant_callback) { + const std::vector& inputs, size_t num_mutants, + std::function new_mutant_callback) { if (custom_mutator_cb_ == nullptr) return false; unsigned int seed = GetRandomSeed(); const size_t num_inputs = inputs.size(); @@ -624,7 +626,8 @@ bool LegacyRunnerCallbacks::Mutate( attempt < num_mutants * kAverageMutationAttempts && num_outputs < num_mutants; ++attempt) { - const auto &input_data = inputs[rand_r(&seed) % num_inputs].data; + const size_t origin_index = rand_r(&seed) % num_inputs; + const auto& input_data = inputs[origin_index].data; size_t size = std::min(input_data.size(), max_mutant_size); std::copy(input_data.cbegin(), input_data.cbegin() + size, mutant.begin()); @@ -641,7 +644,7 @@ bool LegacyRunnerCallbacks::Mutate( rand_r(&seed)); } if (new_size == 0) continue; - new_mutant_callback({mutant.data(), new_size}); + new_mutant_callback({mutant.data(), new_size}, origin_index); ++num_outputs; } return true; diff --git a/centipede/runner_interface.h b/centipede/runner_interface.h index eb004d5aa..c0c8e5db5 100644 --- a/centipede/runner_interface.h +++ b/centipede/runner_interface.h @@ -161,9 +161,9 @@ class RunnerCallbacks { // // TODO(xinhaoyuan): Consider supporting only_shrink to speed up // input shrinking. - virtual bool Mutate(const std::vector &inputs, - size_t num_mutants, - std::function new_mutant_callback); + virtual bool Mutate( + const std::vector& inputs, size_t num_mutants, + std::function new_mutant_callback); virtual ~RunnerCallbacks() = default; }; diff --git a/centipede/runner_result.cc b/centipede/runner_result.cc index 3b726e758..2036b9bc8 100644 --- a/centipede/runner_result.cc +++ b/centipede/runner_result.cc @@ -22,6 +22,7 @@ #include "./centipede/execution_metadata.h" #include "./centipede/feature.h" +#include "./centipede/mutation_input.h" #include "./centipede/shared_memory_blob_sequence.h" #include "./common/defs.h" @@ -44,6 +45,7 @@ enum Tags : Blob::SizeAndTagT { // Mutation result tags. kTagHasCustomMutator, + kTagMutantOrigin, kTagMutant, }; @@ -185,8 +187,13 @@ bool MutationResult::WriteHasCustomMutator(bool has_custom_mutator, reinterpret_cast(&has_custom_mutator)}); } -bool MutationResult::WriteMutant(ByteSpan mutant, BlobSequence &blobseq) { - return blobseq.Write({kTagMutant, mutant.size(), mutant.data()}); +bool MutationResult::WriteMutant(ByteSpan data, size_t origin, + BlobSequence& blobseq) { + if (!blobseq.Write({kTagMutantOrigin, sizeof(origin), + reinterpret_cast(&origin)})) { + return false; + } + return blobseq.Write({kTagMutant, data.size(), data.data()}); } bool MutationResult::Read(size_t num_mutants, BlobSequence &blobseq) { @@ -199,10 +206,17 @@ bool MutationResult::Read(size_t num_mutants, BlobSequence &blobseq) { mutants_.clear(); mutants_.reserve(num_mutants); for (size_t i = 0; i < num_mutants; ++i) { + size_t origin = Mutant::kOriginNone; + { + const Blob blob = blobseq.Read(); + if (blob.tag != kTagMutantOrigin) return false; + if (blob.size != sizeof(origin)) return false; + std::memcpy(&origin, blob.data, sizeof(origin)); + } const Blob blob = blobseq.Read(); if (blob.tag != kTagMutant) return false; if (blob.size == 0) break; - mutants_.emplace_back(blob.data, blob.data + blob.size); + mutants_.push_back({ByteArray{blob.data, blob.data + blob.size}, origin}); } return true; } diff --git a/centipede/runner_result.h b/centipede/runner_result.h index 1b94f3187..a56e6a4dd 100644 --- a/centipede/runner_result.h +++ b/centipede/runner_result.h @@ -25,6 +25,7 @@ #include "./centipede/execution_metadata.h" #include "./centipede/feature.h" +#include "./centipede/mutation_input.h" #include "./centipede/shared_memory_blob_sequence.h" #include "./common/defs.h" @@ -217,7 +218,7 @@ class MutationResult { BlobSequence& blobseq); // Writes one mutant to `blobseq`. Returns true iff successful. - static bool WriteMutant(ByteSpan mutant, BlobSequence& blobseq); + static bool WriteMutant(ByteSpan data, size_t origin, BlobSequence& blobseq); // Reads whether the target has a custom mutator, and if so, reads at most // `num_mutants` mutants from `blobseq`. Returns true iff successful. @@ -227,13 +228,13 @@ class MutationResult { int exit_code() const { return exit_code_; } int& exit_code() { return exit_code_; } bool has_custom_mutator() const { return has_custom_mutator_; } - const std::vector& mutants() const& { return mutants_; } - std::vector&& mutants() && { return std::move(mutants_); } + const std::vector& mutants() const& { return mutants_; } + std::vector&& mutants() && { return std::move(mutants_); } private: int exit_code_ = EXIT_SUCCESS; bool has_custom_mutator_ = false; - std::vector mutants_; + std::vector mutants_; }; } // namespace fuzztest::internal diff --git a/centipede/runner_result_test.cc b/centipede/runner_result_test.cc index 864401a62..d50e3749a 100644 --- a/centipede/runner_result_test.cc +++ b/centipede/runner_result_test.cc @@ -29,6 +29,7 @@ #include "gtest/gtest.h" #include "./centipede/execution_metadata.h" #include "./centipede/feature.h" +#include "./centipede/mutation_input.h" #include "./centipede/shared_memory_blob_sequence.h" #include "./common/defs.h" #include "./common/test_util.h" @@ -213,18 +214,18 @@ TEST(MutationResult, WriteThenRead) { // Write a mutation result. ASSERT_TRUE(MutationResult::WriteHasCustomMutator(true, blobseq)); - ASSERT_TRUE(MutationResult::WriteMutant({1, 2, 3}, blobseq)); - ASSERT_TRUE(MutationResult::WriteMutant({4, 5, 6}, blobseq)); - ASSERT_TRUE(MutationResult::WriteMutant({7, 8, 9}, blobseq)); + ASSERT_TRUE(MutationResult::WriteMutant({1, 2, 3}, 3, blobseq)); + ASSERT_TRUE(MutationResult::WriteMutant({4, 5, 6}, 2, blobseq)); + ASSERT_TRUE(MutationResult::WriteMutant({7, 8, 9}, 1, blobseq)); blobseq.Reset(); MutationResult mutation_result; ASSERT_TRUE(mutation_result.Read(3, blobseq)); EXPECT_TRUE(mutation_result.has_custom_mutator()); - EXPECT_THAT( - mutation_result.mutants(), - ElementsAre(ByteArray{1, 2, 3}, ByteArray{4, 5, 6}, ByteArray{7, 8, 9})); + EXPECT_THAT(mutation_result.mutants(), + ElementsAre(Mutant{{1, 2, 3}, 3}, Mutant{{4, 5, 6}, 2}, + Mutant{{7, 8, 9}, 1})); } TEST(ExecutionResult, ReadResultSucceedsOnlyWithInputBegin) { diff --git a/centipede/test_coverage_util.h b/centipede/test_coverage_util.h index c06a9bf7a..acfb1e12a 100644 --- a/centipede/test_coverage_util.h +++ b/centipede/test_coverage_util.h @@ -50,8 +50,8 @@ class TestCallbacks : public CentipedeCallbacks { FUZZTEST_CHECK_EQ(EXIT_SUCCESS, result); return true; } - std::vector Mutate(const std::vector &inputs, - size_t num_mutants) override { + std::vector Mutate(const std::vector& inputs, + size_t num_mutants) override { return {}; } }; diff --git a/centipede/testing/fuzz_target_with_custom_mutator.cc b/centipede/testing/fuzz_target_with_custom_mutator.cc index a95dbf84d..a68c37408 100644 --- a/centipede/testing/fuzz_target_with_custom_mutator.cc +++ b/centipede/testing/fuzz_target_with_custom_mutator.cc @@ -36,14 +36,13 @@ class CustomMutatorRunnerCallbacks bool HasCustomMutator() const override { return true; } - bool Mutate(const std::vector& inputs, - size_t num_mutants, - std::function new_mutant_callback) override { - size_t i = 0; - for (fuzztest::internal::MutationInputRef input : inputs) { - if (i++ >= num_mutants) break; + bool Mutate( + const std::vector& inputs, + size_t num_mutants, + std::function new_mutant_callback) override { + for (size_t i = 0; i < inputs.size() && i < num_mutants; ++i) { // Just return the original input as a mutant. - new_mutant_callback(input.data); + new_mutant_callback(inputs[i].data, i); } return true; } diff --git a/fuzztest/internal/centipede_adaptor.cc b/fuzztest/internal/centipede_adaptor.cc index da98198f8..36b24c03a 100644 --- a/fuzztest/internal/centipede_adaptor.cc +++ b/fuzztest/internal/centipede_adaptor.cc @@ -524,13 +524,14 @@ class CentipedeAdaptorRunnerCallbacks bool Mutate(const std::vector& inputs, size_t num_mutants, - std::function + std::function new_mutant_callback) override { if (inputs.empty()) return false; cmp_tables.resize(inputs.size()); absl::Cleanup cmp_tables_cleaner = [this]() { cmp_tables.clear(); }; for (size_t i = 0; i < num_mutants; ++i) { const auto choice = absl::Uniform(prng_, 0, 1); + size_t origin_index = Mutant::kOriginNone; std::string mutant_data; constexpr double kDomainInitRatio = 0.0001; if (choice < kDomainInitRatio) { @@ -539,8 +540,7 @@ class CentipedeAdaptorRunnerCallbacks .SerializeCorpus(fuzzer_impl_.params_domain_.Init(prng_)) .ToString(); } else { - const auto origin_index = - absl::Uniform(prng_, 0, inputs.size()); + origin_index = absl::Uniform(prng_, 0, inputs.size()); const auto& origin = inputs[origin_index].data; auto parsed_origin = fuzzer_impl_.TryParse({(const char*)origin.data(), origin.size()}); @@ -563,7 +563,8 @@ class CentipedeAdaptorRunnerCallbacks fuzzer_impl_.params_domain_.SerializeCorpus(mutant.args).ToString(); } new_mutant_callback( - {(unsigned char*)mutant_data.data(), mutant_data.size()}); + {(unsigned char*)mutant_data.data(), mutant_data.size()}, + origin_index); } return true; }