linux-x64/clang/prebuilt_include/llvm/lib/Fuzzer/FuzzerCorpus.h - hafnium/prebuilts - Git at Google

 //===- FuzzerCorpus.h - Internal header for the Fuzzer ----------*- C++ -* ===//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 // fuzzer::InputCorpus
 //===----------------------------------------------------------------------===//

 #ifndef LLVM_FUZZER_CORPUS
 #define LLVM_FUZZER_CORPUS

 #include "FuzzerDataFlowTrace.h"
 #include "FuzzerDefs.h"
 #include "FuzzerIO.h"
 #include "FuzzerRandom.h"
 #include "FuzzerSHA1.h"
 #include "FuzzerTracePC.h"
 #include <algorithm>
 #include <numeric>
 #include <random>
 #include <unordered_set>

 namespace fuzzer {

 struct InputInfo {
   Unit U;  // The actual input data.
   uint8_t Sha1[kSHA1NumBytes];  // Checksum.
   // Number of features that this input has and no smaller input has.
   size_t NumFeatures = 0;
   size_t Tmp = 0; // Used by ValidateFeatureSet.
   // Stats.
   size_t NumExecutedMutations = 0;
   size_t NumSuccessfullMutations = 0;
   bool MayDeleteFile = false;
   bool Reduced = false;
   bool HasFocusFunction = false;
   Vector<uint32_t> UniqFeatureSet;
   Vector<uint8_t> DataFlowTraceForFocusFunction;
 };

 class InputCorpus {
   static const size_t kFeatureSetSize = 1 << 21;
  public:
   InputCorpus(const std::string &OutputCorpus) : OutputCorpus(OutputCorpus) {
     memset(InputSizesPerFeature, 0, sizeof(InputSizesPerFeature));
     memset(SmallestElementPerFeature, 0, sizeof(SmallestElementPerFeature));
   }
   ~InputCorpus() {
     for (auto II : Inputs)
       delete II;
   }
   size_t size() const { return Inputs.size(); }
   size_t SizeInBytes() const {
     size_t Res = 0;
     for (auto II : Inputs)
       Res += II->U.size();
     return Res;
   }
   size_t NumActiveUnits() const {
     size_t Res = 0;
     for (auto II : Inputs)
       Res += !II->U.empty();
     return Res;
   }
   size_t MaxInputSize() const {
     size_t Res = 0;
     for (auto II : Inputs)
         Res = std::max(Res, II->U.size());
     return Res;
   }

   size_t NumInputsThatTouchFocusFunction() {
     return std::count_if(Inputs.begin(), Inputs.end(), [](const InputInfo *II) {
       return II->HasFocusFunction;
     });
   }

   size_t NumInputsWithDataFlowTrace() {
     return std::count_if(Inputs.begin(), Inputs.end(), [](const InputInfo *II) {
       return !II->DataFlowTraceForFocusFunction.empty();
     });
   }

   bool empty() const { return Inputs.empty(); }
   const Unit &operator[] (size_t Idx) const { return Inputs[Idx]->U; }
   void AddToCorpus(const Unit &U, size_t NumFeatures, bool MayDeleteFile,
                    bool HasFocusFunction, const Vector<uint32_t> &FeatureSet,
                    const DataFlowTrace &DFT, const InputInfo *BaseII) {
     assert(!U.empty());
     if (FeatureDebug)
       Printf("ADD_TO_CORPUS %zd NF %zd\n", Inputs.size(), NumFeatures);
     Inputs.push_back(new InputInfo());
     InputInfo &II = *Inputs.back();
     II.U = U;
     II.NumFeatures = NumFeatures;
     II.MayDeleteFile = MayDeleteFile;
     II.UniqFeatureSet = FeatureSet;
     II.HasFocusFunction = HasFocusFunction;
     std::sort(II.UniqFeatureSet.begin(), II.UniqFeatureSet.end());
     ComputeSHA1(U.data(), U.size(), II.Sha1);
     auto Sha1Str = Sha1ToString(II.Sha1);
     Hashes.insert(Sha1Str);
     if (HasFocusFunction)
       if (auto V = DFT.Get(Sha1Str))
         II.DataFlowTraceForFocusFunction = *V;
     // This is a gross heuristic.
     // Ideally, when we add an element to a corpus we need to know its DFT.
     // But if we don't, we'll use the DFT of its base input.
     if (II.DataFlowTraceForFocusFunction.empty() && BaseII)
       II.DataFlowTraceForFocusFunction = BaseII->DataFlowTraceForFocusFunction;
     UpdateCorpusDistribution();
     PrintCorpus();
     // ValidateFeatureSet();
   }

   // Debug-only
   void PrintUnit(const Unit &U) {
     if (!FeatureDebug) return;
     for (uint8_t C : U) {
       if (C != 'F' && C != 'U' && C != 'Z')
         C = '.';
       Printf("%c", C);
     }
   }

   // Debug-only
   void PrintFeatureSet(const Vector<uint32_t> &FeatureSet) {
     if (!FeatureDebug) return;
     Printf("{");
     for (uint32_t Feature: FeatureSet)
       Printf("%u,", Feature);
     Printf("}");
   }

   // Debug-only
   void PrintCorpus() {
     if (!FeatureDebug) return;
     Printf("======= CORPUS:\n");
     int i = 0;
     for (auto II : Inputs) {
       if (std::find(II->U.begin(), II->U.end(), 'F') != II->U.end()) {
         Printf("[%2d] ", i);
         Printf("%s sz=%zd ", Sha1ToString(II->Sha1).c_str(), II->U.size());
         PrintUnit(II->U);
         Printf(" ");
         PrintFeatureSet(II->UniqFeatureSet);
         Printf("\n");
       }
       i++;
     }
   }

   void Replace(InputInfo *II, const Unit &U) {
     assert(II->U.size() > U.size());
     Hashes.erase(Sha1ToString(II->Sha1));
     DeleteFile(*II);
     ComputeSHA1(U.data(), U.size(), II->Sha1);
     Hashes.insert(Sha1ToString(II->Sha1));
     II->U = U;
     II->Reduced = true;
     UpdateCorpusDistribution();
   }

   bool HasUnit(const Unit &U) { return Hashes.count(Hash(U)); }
   bool HasUnit(const std::string &H) { return Hashes.count(H); }
   InputInfo &ChooseUnitToMutate(Random &Rand) {
     InputInfo &II = *Inputs[ChooseUnitIdxToMutate(Rand)];
     assert(!II.U.empty());
     return II;
   };

   // Returns an index of random unit from the corpus to mutate.
   size_t ChooseUnitIdxToMutate(Random &Rand) {
     size_t Idx = static_cast<size_t>(CorpusDistribution(Rand));
     assert(Idx < Inputs.size());
     return Idx;
   }

   void PrintStats() {
     for (size_t i = 0; i < Inputs.size(); i++) {
       const auto &II = *Inputs[i];
       Printf("  [% 3zd %s] sz: % 5zd runs: % 5zd succ: % 5zd focus: %d\n", i,
              Sha1ToString(II.Sha1).c_str(), II.U.size(),
              II.NumExecutedMutations, II.NumSuccessfullMutations, II.HasFocusFunction);
     }
   }

   void PrintFeatureSet() {
     for (size_t i = 0; i < kFeatureSetSize; i++) {
       if(size_t Sz = GetFeature(i))
         Printf("[%zd: id %zd sz%zd] ", i, SmallestElementPerFeature[i], Sz);
     }
     Printf("\n\t");
     for (size_t i = 0; i < Inputs.size(); i++)
       if (size_t N = Inputs[i]->NumFeatures)
         Printf(" %zd=>%zd ", i, N);
     Printf("\n");
   }

   void DeleteFile(const InputInfo &II) {
     if (!OutputCorpus.empty() && II.MayDeleteFile)
       RemoveFile(DirPlusFile(OutputCorpus, Sha1ToString(II.Sha1)));
   }

   void DeleteInput(size_t Idx) {
     InputInfo &II = *Inputs[Idx];
     DeleteFile(II);
     Unit().swap(II.U);
     if (FeatureDebug)
       Printf("EVICTED %zd\n", Idx);
   }

   bool AddFeature(size_t Idx, uint32_t NewSize, bool Shrink) {
     assert(NewSize);
     Idx = Idx % kFeatureSetSize;
     uint32_t OldSize = GetFeature(Idx);
     if (OldSize == 0 || (Shrink && OldSize > NewSize)) {
       if (OldSize > 0) {
         size_t OldIdx = SmallestElementPerFeature[Idx];
         InputInfo &II = *Inputs[OldIdx];
         assert(II.NumFeatures > 0);
         II.NumFeatures--;
         if (II.NumFeatures == 0)
           DeleteInput(OldIdx);
       } else {
         NumAddedFeatures++;
       }
       NumUpdatedFeatures++;
       if (FeatureDebug)
         Printf("ADD FEATURE %zd sz %d\n", Idx, NewSize);
       SmallestElementPerFeature[Idx] = Inputs.size();
       InputSizesPerFeature[Idx] = NewSize;
       return true;
     }
     return false;
   }

   size_t NumFeatures() const { return NumAddedFeatures; }
   size_t NumFeatureUpdates() const { return NumUpdatedFeatures; }

 private:

   static const bool FeatureDebug = false;

   size_t GetFeature(size_t Idx) const { return InputSizesPerFeature[Idx]; }

   void ValidateFeatureSet() {
     if (FeatureDebug)
       PrintFeatureSet();
     for (size_t Idx = 0; Idx < kFeatureSetSize; Idx++)
       if (GetFeature(Idx))
         Inputs[SmallestElementPerFeature[Idx]]->Tmp++;
     for (auto II: Inputs) {
       if (II->Tmp != II->NumFeatures)
         Printf("ZZZ %zd %zd\n", II->Tmp, II->NumFeatures);
       assert(II->Tmp == II->NumFeatures);
       II->Tmp = 0;
     }
   }

   // Updates the probability distribution for the units in the corpus.
   // Must be called whenever the corpus or unit weights are changed.
   //
   // Hypothesis: units added to the corpus last are more interesting.
   //
   // Hypothesis: inputs with infrequent features are more interesting.
   void UpdateCorpusDistribution() {
     size_t N = Inputs.size();
     assert(N);
     Intervals.resize(N + 1);
     Weights.resize(N);
     std::iota(Intervals.begin(), Intervals.end(), 0);
     for (size_t i = 0; i < N; i++)
       Weights[i] = Inputs[i]->NumFeatures
                        ? (i + 1) * (Inputs[i]->HasFocusFunction ? 1000 : 1)
                        : 0.;
     if (FeatureDebug) {
       for (size_t i = 0; i < N; i++)
         Printf("%zd ", Inputs[i]->NumFeatures);
       Printf("SCORE\n");
       for (size_t i = 0; i < N; i++)
         Printf("%f ", Weights[i]);
       Printf("Weights\n");
     }
     CorpusDistribution = std::piecewise_constant_distribution<double>(
         Intervals.begin(), Intervals.end(), Weights.begin());
   }
   std::piecewise_constant_distribution<double> CorpusDistribution;

   Vector<double> Intervals;
   Vector<double> Weights;

   std::unordered_set<std::string> Hashes;
   Vector<InputInfo*> Inputs;

   size_t NumAddedFeatures = 0;
   size_t NumUpdatedFeatures = 0;
   uint32_t InputSizesPerFeature[kFeatureSetSize];
   uint32_t SmallestElementPerFeature[kFeatureSetSize];

   std::string OutputCorpus;
 };

 }  // namespace fuzzer

 #endif  // LLVM_FUZZER_CORPUS
	//===- FuzzerCorpus.h - Internal header for the Fuzzer ----------- C++ - ===//
	//
	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
	// See https://llvm.org/LICENSE.txt for license information.
	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
	//
	//===----------------------------------------------------------------------===//
	// fuzzer::InputCorpus
	//===----------------------------------------------------------------------===//

	#ifndef LLVM_FUZZER_CORPUS
	#define LLVM_FUZZER_CORPUS

	#include "FuzzerDataFlowTrace.h"
	#include "FuzzerDefs.h"
	#include "FuzzerIO.h"
	#include "FuzzerRandom.h"
	#include "FuzzerSHA1.h"
	#include "FuzzerTracePC.h"
	#include <algorithm>
	#include <numeric>
	#include <random>
	#include <unordered_set>

	namespace fuzzer {

	struct InputInfo {
	Unit U; // The actual input data.
	uint8_t Sha1[kSHA1NumBytes]; // Checksum.
	// Number of features that this input has and no smaller input has.
	size_t NumFeatures = 0;
	size_t Tmp = 0; // Used by ValidateFeatureSet.
	// Stats.
	size_t NumExecutedMutations = 0;
	size_t NumSuccessfullMutations = 0;
	bool MayDeleteFile = false;
	bool Reduced = false;
	bool HasFocusFunction = false;
	Vector<uint32_t> UniqFeatureSet;
	Vector<uint8_t> DataFlowTraceForFocusFunction;
	};

	class InputCorpus {
	static const size_t kFeatureSetSize = 1 << 21;
	public:
	InputCorpus(const std::string &OutputCorpus) : OutputCorpus(OutputCorpus) {
	memset(InputSizesPerFeature, 0, sizeof(InputSizesPerFeature));
	memset(SmallestElementPerFeature, 0, sizeof(SmallestElementPerFeature));
	}
	~InputCorpus() {
	for (auto II : Inputs)
	delete II;
	}
	size_t size() const { return Inputs.size(); }
	size_t SizeInBytes() const {
	size_t Res = 0;
	for (auto II : Inputs)
	Res += II->U.size();
	return Res;
	}
	size_t NumActiveUnits() const {
	size_t Res = 0;
	for (auto II : Inputs)
	Res += !II->U.empty();
	return Res;
	}
	size_t MaxInputSize() const {
	size_t Res = 0;
	for (auto II : Inputs)
	Res = std::max(Res, II->U.size());
	return Res;
	}

	size_t NumInputsThatTouchFocusFunction() {
	return std::count_if(Inputs.begin(), Inputs.end(), [](const InputInfo *II) {
	return II->HasFocusFunction;
	});
	}

	size_t NumInputsWithDataFlowTrace() {
	return std::count_if(Inputs.begin(), Inputs.end(), [](const InputInfo *II) {
	return !II->DataFlowTraceForFocusFunction.empty();
	});
	}

	bool empty() const { return Inputs.empty(); }
	const Unit &operator[] (size_t Idx) const { return Inputs[Idx]->U; }
	void AddToCorpus(const Unit &U, size_t NumFeatures, bool MayDeleteFile,
	bool HasFocusFunction, const Vector<uint32_t> &FeatureSet,
	const DataFlowTrace &DFT, const InputInfo *BaseII) {
	assert(!U.empty());
	if (FeatureDebug)
	Printf("ADD_TO_CORPUS %zd NF %zd\n", Inputs.size(), NumFeatures);
	Inputs.push_back(new InputInfo());
	InputInfo &II = *Inputs.back();
	II.U = U;
	II.NumFeatures = NumFeatures;
	II.MayDeleteFile = MayDeleteFile;
	II.UniqFeatureSet = FeatureSet;
	II.HasFocusFunction = HasFocusFunction;
	std::sort(II.UniqFeatureSet.begin(), II.UniqFeatureSet.end());
	ComputeSHA1(U.data(), U.size(), II.Sha1);
	auto Sha1Str = Sha1ToString(II.Sha1);
	Hashes.insert(Sha1Str);
	if (HasFocusFunction)
	if (auto V = DFT.Get(Sha1Str))
	II.DataFlowTraceForFocusFunction = *V;
	// This is a gross heuristic.
	// Ideally, when we add an element to a corpus we need to know its DFT.
	// But if we don't, we'll use the DFT of its base input.
	if (II.DataFlowTraceForFocusFunction.empty() && BaseII)
	II.DataFlowTraceForFocusFunction = BaseII->DataFlowTraceForFocusFunction;
	UpdateCorpusDistribution();
	PrintCorpus();
	// ValidateFeatureSet();
	}

	// Debug-only
	void PrintUnit(const Unit &U) {
	if (!FeatureDebug) return;
	for (uint8_t C : U) {
	if (C != 'F' && C != 'U' && C != 'Z')
	C = '.';
	Printf("%c", C);
	}
	}

	// Debug-only
	void PrintFeatureSet(const Vector<uint32_t> &FeatureSet) {
	if (!FeatureDebug) return;
	Printf("{");
	for (uint32_t Feature: FeatureSet)
	Printf("%u,", Feature);
	Printf("}");
	}

	// Debug-only
	void PrintCorpus() {
	if (!FeatureDebug) return;
	Printf("======= CORPUS:\n");
	int i = 0;
	for (auto II : Inputs) {
	if (std::find(II->U.begin(), II->U.end(), 'F') != II->U.end()) {
	Printf("[%2d] ", i);
	Printf("%s sz=%zd ", Sha1ToString(II->Sha1).c_str(), II->U.size());
	PrintUnit(II->U);
	Printf(" ");
	PrintFeatureSet(II->UniqFeatureSet);
	Printf("\n");
	}
	i++;
	}
	}

	void Replace(InputInfo *II, const Unit &U) {
	assert(II->U.size() > U.size());
	Hashes.erase(Sha1ToString(II->Sha1));
	DeleteFile(*II);
	ComputeSHA1(U.data(), U.size(), II->Sha1);
	Hashes.insert(Sha1ToString(II->Sha1));
	II->U = U;
	II->Reduced = true;
	UpdateCorpusDistribution();
	}

	bool HasUnit(const Unit &U) { return Hashes.count(Hash(U)); }
	bool HasUnit(const std::string &H) { return Hashes.count(H); }
	InputInfo &ChooseUnitToMutate(Random &Rand) {
	InputInfo &II = *Inputs[ChooseUnitIdxToMutate(Rand)];
	assert(!II.U.empty());
	return II;
	};

	// Returns an index of random unit from the corpus to mutate.
	size_t ChooseUnitIdxToMutate(Random &Rand) {
	size_t Idx = static_cast<size_t>(CorpusDistribution(Rand));
	assert(Idx < Inputs.size());
	return Idx;
	}

	void PrintStats() {
	for (size_t i = 0; i < Inputs.size(); i++) {
	const auto &II = *Inputs[i];
	Printf(" [% 3zd %s] sz: % 5zd runs: % 5zd succ: % 5zd focus: %d\n", i,
	Sha1ToString(II.Sha1).c_str(), II.U.size(),
	II.NumExecutedMutations, II.NumSuccessfullMutations, II.HasFocusFunction);
	}
	}

	void PrintFeatureSet() {
	for (size_t i = 0; i < kFeatureSetSize; i++) {
	if(size_t Sz = GetFeature(i))
	Printf("[%zd: id %zd sz%zd] ", i, SmallestElementPerFeature[i], Sz);
	}
	Printf("\n\t");
	for (size_t i = 0; i < Inputs.size(); i++)
	if (size_t N = Inputs[i]->NumFeatures)
	Printf(" %zd=>%zd ", i, N);
	Printf("\n");
	}

	void DeleteFile(const InputInfo &II) {
	if (!OutputCorpus.empty() && II.MayDeleteFile)
	RemoveFile(DirPlusFile(OutputCorpus, Sha1ToString(II.Sha1)));
	}

	void DeleteInput(size_t Idx) {
	InputInfo &II = *Inputs[Idx];
	DeleteFile(II);
	Unit().swap(II.U);
	if (FeatureDebug)
	Printf("EVICTED %zd\n", Idx);
	}

	bool AddFeature(size_t Idx, uint32_t NewSize, bool Shrink) {
	assert(NewSize);
	Idx = Idx % kFeatureSetSize;
	uint32_t OldSize = GetFeature(Idx);
	if (OldSize == 0 \|\| (Shrink && OldSize > NewSize)) {
	if (OldSize > 0) {
	size_t OldIdx = SmallestElementPerFeature[Idx];
	InputInfo &II = *Inputs[OldIdx];
	assert(II.NumFeatures > 0);
	II.NumFeatures--;
	if (II.NumFeatures == 0)
	DeleteInput(OldIdx);
	} else {
	NumAddedFeatures++;
	}
	NumUpdatedFeatures++;
	if (FeatureDebug)
	Printf("ADD FEATURE %zd sz %d\n", Idx, NewSize);
	SmallestElementPerFeature[Idx] = Inputs.size();
	InputSizesPerFeature[Idx] = NewSize;
	return true;
	}
	return false;
	}

	size_t NumFeatures() const { return NumAddedFeatures; }
	size_t NumFeatureUpdates() const { return NumUpdatedFeatures; }

	private:

	static const bool FeatureDebug = false;

	size_t GetFeature(size_t Idx) const { return InputSizesPerFeature[Idx]; }

	void ValidateFeatureSet() {
	if (FeatureDebug)
	PrintFeatureSet();
	for (size_t Idx = 0; Idx < kFeatureSetSize; Idx++)
	if (GetFeature(Idx))
	Inputs[SmallestElementPerFeature[Idx]]->Tmp++;
	for (auto II: Inputs) {
	if (II->Tmp != II->NumFeatures)
	Printf("ZZZ %zd %zd\n", II->Tmp, II->NumFeatures);
	assert(II->Tmp == II->NumFeatures);
	II->Tmp = 0;
	}
	}

	// Updates the probability distribution for the units in the corpus.
	// Must be called whenever the corpus or unit weights are changed.
	//
	// Hypothesis: units added to the corpus last are more interesting.
	//
	// Hypothesis: inputs with infrequent features are more interesting.
	void UpdateCorpusDistribution() {
	size_t N = Inputs.size();
	assert(N);
	Intervals.resize(N + 1);
	Weights.resize(N);
	std::iota(Intervals.begin(), Intervals.end(), 0);
	for (size_t i = 0; i < N; i++)
	Weights[i] = Inputs[i]->NumFeatures
	? (i + 1) * (Inputs[i]->HasFocusFunction ? 1000 : 1)
	: 0.;
	if (FeatureDebug) {
	for (size_t i = 0; i < N; i++)
	Printf("%zd ", Inputs[i]->NumFeatures);
	Printf("SCORE\n");
	for (size_t i = 0; i < N; i++)
	Printf("%f ", Weights[i]);
	Printf("Weights\n");
	}
	CorpusDistribution = std::piecewise_constant_distribution<double>(
	Intervals.begin(), Intervals.end(), Weights.begin());
	}
	std::piecewise_constant_distribution<double> CorpusDistribution;

	Vector<double> Intervals;
	Vector<double> Weights;

	std::unordered_set<std::string> Hashes;
	Vector<InputInfo*> Inputs;

	size_t NumAddedFeatures = 0;
	size_t NumUpdatedFeatures = 0;
	uint32_t InputSizesPerFeature[kFeatureSetSize];
	uint32_t SmallestElementPerFeature[kFeatureSetSize];

	std::string OutputCorpus;
	};

	} // namespace fuzzer

	#endif // LLVM_FUZZER_CORPUS