diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 8dec287c..03c4106a 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -50,3 +50,4 @@ You accept and agree to the following terms and conditions for Your present and * [Liam DeVoe](https://github.com/tybug) +* [CyberiaResurrection](https://github.com/CyberiaResurrection) diff --git a/src/hypofuzz/corpus.py b/src/hypofuzz/corpus.py index aa94b715..9fc60348 100644 --- a/src/hypofuzz/corpus.py +++ b/src/hypofuzz/corpus.py @@ -86,6 +86,8 @@ def __init__(self, database: ExampleDatabase, key: bytes) -> None: self.covering_buffers: dict[Arc, bytes] = {} # How many times have we seen each arc since discovering our latest arc? self.arc_counts: Counter[Arc] = Counter() + # How many times have we seen each arc since start of run? + self.overall_arc_counts: Counter[Arc] = Counter() # And various internal attributes and metadata self.interesting_examples: dict[ @@ -133,6 +135,28 @@ def _check_invariants(self) -> None: def _fuzz_key(self) -> bytes: return self._key + b".fuzz" + @property + def singletons(self) -> int: + # Because _every_ arc hit is counted at least once, singletons are those arcs that have that base hit, + # and then _one_ more on top of it. + singletons = [item for item in self.overall_arc_counts.values() if 2 == item] + return len(singletons) + + @property + def doubletons(self) -> int: + doubletons = [item for item in self.overall_arc_counts.values() if 3 == item] + return len(doubletons) + + @property + def tripletons(self) -> int: + tripletons = [item for item in self.overall_arc_counts.values() if 4 == item] + return len(tripletons) + + @property + def quadrupletons(self) -> int: + quadrupletons = [item for item in self.overall_arc_counts.values() if 5 == item] + return len(quadrupletons) + def add(self, result: ConjectureResult, source: HowGenerated) -> Optional[bool]: """Update the corpus with the result of running a test. @@ -214,11 +238,13 @@ def add(self, result: ConjectureResult, source: HowGenerated) -> Optional[bool]: # have a different distribution with a new seed pool. if branches.issubset(self.arc_counts): self.arc_counts.update(branches) + self.overall_arc_counts.update(branches) else: # Reset our seen arc counts. This is essential because changing our # seed pool alters the probability of seeing each arc in future. # For details see AFL-fast, esp. the markov-chain trick. self.arc_counts = Counter(branches.union(self.arc_counts)) + self.overall_arc_counts.update(branches) # Save this buffer as our minimal-known covering example for each new arc. if result.buffer not in self.results: diff --git a/src/hypofuzz/dashboard.py b/src/hypofuzz/dashboard.py index b805942a..43499318 100644 --- a/src/hypofuzz/dashboard.py +++ b/src/hypofuzz/dashboard.py @@ -24,7 +24,15 @@ PYTEST_ARGS = None -headings = ["nodeid", "elapsed time", "ninputs", "since new cov", "branches", "note"] +headings = [ + "nodeid", + "elapsed time", + "ninputs", + "since new cov", + "branches", + "est. branches", + "note", +] app = flask.Flask(__name__, static_folder=os.path.abspath("pycrunch-recordings")) try: diff --git a/src/hypofuzz/hy.py b/src/hypofuzz/hy.py index 9935ed90..d74338c8 100644 --- a/src/hypofuzz/hy.py +++ b/src/hypofuzz/hy.py @@ -379,6 +379,7 @@ def _json_description(self) -> Report: "worker": where_am_i(), "ninputs": self.ninputs, "branches": len(self.pool.arc_counts), + "est. branches": "", "since new cov": self.since_new_cov, "loaded_from_db": len(self.pool._loaded_from_database), "status_counts": dict(self.status_counts), @@ -398,6 +399,13 @@ def _json_description(self) -> Report: ls for _, ls in self.pool.interesting_examples.values() ] del report["since new cov"] + + if self.ninputs >= 10000 and "" == report["note"]: + singletons = self.pool.singletons + doubletons = self.pool.doubletons + offset = 2 * singletons - doubletons + offset = max(1, offset) + report["est. branches"] = int(str(report["branches"])) + offset return report @property