From dde7de902ec88bc18a62a14a969fda437af8159d Mon Sep 17 00:00:00 2001 From: Alex Goodwin Date: Sun, 15 Dec 2024 02:33:28 +1000 Subject: [PATCH 01/10] Add storage for singletons/doubletons/etc for Chao bound --- src/hypofuzz/corpus.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/hypofuzz/corpus.py b/src/hypofuzz/corpus.py index aa94b715..4c3e2a26 100644 --- a/src/hypofuzz/corpus.py +++ b/src/hypofuzz/corpus.py @@ -86,6 +86,8 @@ def __init__(self, database: ExampleDatabase, key: bytes) -> None: self.covering_buffers: dict[Arc, bytes] = {} # How many times have we seen each arc since discovering our latest arc? self.arc_counts: Counter[Arc] = Counter() + # How many times have we seen each arc since start of run? + self.overall_arc_counts: Counter[Arc] = Counter() # And various internal attributes and metadata self.interesting_examples: dict[ @@ -214,11 +216,13 @@ def add(self, result: ConjectureResult, source: HowGenerated) -> Optional[bool]: # have a different distribution with a new seed pool. if branches.issubset(self.arc_counts): self.arc_counts.update(branches) + self.overall_arc_counts.update(branches) else: # Reset our seen arc counts. This is essential because changing our # seed pool alters the probability of seeing each arc in future. # For details see AFL-fast, esp. the markov-chain trick. self.arc_counts = Counter(branches.union(self.arc_counts)) + self.overall_arc_counts.update(branches) # Save this buffer as our minimal-known covering example for each new arc. if result.buffer not in self.results: From fcb617d82a4b3cde66e36c2939c311887719ce8d Mon Sep 17 00:00:00 2001 From: Alex Goodwin Date: Sun, 15 Dec 2024 03:02:18 +1000 Subject: [PATCH 02/10] Make space for estimated # of branches in dashboard --- src/hypofuzz/dashboard.py | 2 +- src/hypofuzz/hy.py | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/src/hypofuzz/dashboard.py b/src/hypofuzz/dashboard.py index b805942a..904cfaeb 100644 --- a/src/hypofuzz/dashboard.py +++ b/src/hypofuzz/dashboard.py @@ -24,7 +24,7 @@ PYTEST_ARGS = None -headings = ["nodeid", "elapsed time", "ninputs", "since new cov", "branches", "note"] +headings = ["nodeid", "elapsed time", "ninputs", "since new cov", "branches", "est. branches", "note"] app = flask.Flask(__name__, static_folder=os.path.abspath("pycrunch-recordings")) try: diff --git a/src/hypofuzz/hy.py b/src/hypofuzz/hy.py index 9935ed90..6bdffbb2 100644 --- a/src/hypofuzz/hy.py +++ b/src/hypofuzz/hy.py @@ -379,6 +379,7 @@ def _json_description(self) -> Report: "worker": where_am_i(), "ninputs": self.ninputs, "branches": len(self.pool.arc_counts), + "est. branches": "", "since new cov": self.since_new_cov, "loaded_from_db": len(self.pool._loaded_from_database), "status_counts": dict(self.status_counts), From 7ae0160302bd6a93127fb0ede70b8c32c3e22e96 Mon Sep 17 00:00:00 2001 From: Alex Goodwin Date: Mon, 16 Dec 2024 19:32:53 +1000 Subject: [PATCH 03/10] Add singleton/doubleton properties --- src/hypofuzz/corpus.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/src/hypofuzz/corpus.py b/src/hypofuzz/corpus.py index 4c3e2a26..f0964b85 100644 --- a/src/hypofuzz/corpus.py +++ b/src/hypofuzz/corpus.py @@ -135,6 +135,18 @@ def _check_invariants(self) -> None: def _fuzz_key(self) -> bytes: return self._key + b".fuzz" + @property + def singletons(self) -> int: + # Because _every_ arc hit is counted at least once, singletons are those arcs that have that base hit, + # and then _one_ more on top of it. + singletons = [item for item in self.overall_arc_counts.values() if 2 == item] + return len(singletons) + + @property + def doubletons(self) -> int: + doubletons = [item for item in self.overall_arc_counts.values() if 3 == item] + return len(doubletons) + def add(self, result: ConjectureResult, source: HowGenerated) -> Optional[bool]: """Update the corpus with the result of running a test. From bc7543ef9f52fc2dfaf143260e75c8d39a31959f Mon Sep 17 00:00:00 2001 From: Alex Goodwin Date: Thu, 19 Dec 2024 01:22:20 +1000 Subject: [PATCH 04/10] Implement basic Chao bound --- src/hypofuzz/hy.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/src/hypofuzz/hy.py b/src/hypofuzz/hy.py index 6bdffbb2..3b81fc97 100644 --- a/src/hypofuzz/hy.py +++ b/src/hypofuzz/hy.py @@ -399,6 +399,13 @@ def _json_description(self) -> Report: ls for _, ls in self.pool.interesting_examples.values() ] del report["since new cov"] + + if self.ninputs >= 10 and "" == report["note"]: + singletons = self.pool.singletons + doubletons = self.pool.doubletons + offset = singletons * (singletons-1) / 2 if 0 == doubletons else singletons * singletons / (2 * doubletons) + offset = int(offset) + 1 + report["est. branches"] = report["branches"] + offset return report @property From 422bc234716284965d101b3690c326100942cd6b Mon Sep 17 00:00:00 2001 From: Alex Goodwin Date: Thu, 19 Dec 2024 02:31:43 +1000 Subject: [PATCH 05/10] Convert report branches to int _before_ adding offset --- src/hypofuzz/hy.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/hypofuzz/hy.py b/src/hypofuzz/hy.py index 3b81fc97..8b9c53cb 100644 --- a/src/hypofuzz/hy.py +++ b/src/hypofuzz/hy.py @@ -405,7 +405,7 @@ def _json_description(self) -> Report: doubletons = self.pool.doubletons offset = singletons * (singletons-1) / 2 if 0 == doubletons else singletons * singletons / (2 * doubletons) offset = int(offset) + 1 - report["est. branches"] = report["branches"] + offset + report["est. branches"] = int(str(report["branches"])) + offset return report @property From 3ff69e76e761310147994cc95d7af2fca0a60223 Mon Sep 17 00:00:00 2001 From: Alex Goodwin Date: Thu, 19 Dec 2024 14:48:56 +1000 Subject: [PATCH 06/10] Fix line-length overruns --- src/hypofuzz/dashboard.py | 10 +++++++++- src/hypofuzz/hy.py | 6 +++++- 2 files changed, 14 insertions(+), 2 deletions(-) diff --git a/src/hypofuzz/dashboard.py b/src/hypofuzz/dashboard.py index 904cfaeb..43499318 100644 --- a/src/hypofuzz/dashboard.py +++ b/src/hypofuzz/dashboard.py @@ -24,7 +24,15 @@ PYTEST_ARGS = None -headings = ["nodeid", "elapsed time", "ninputs", "since new cov", "branches", "est. branches", "note"] +headings = [ + "nodeid", + "elapsed time", + "ninputs", + "since new cov", + "branches", + "est. branches", + "note", +] app = flask.Flask(__name__, static_folder=os.path.abspath("pycrunch-recordings")) try: diff --git a/src/hypofuzz/hy.py b/src/hypofuzz/hy.py index 8b9c53cb..466eac7b 100644 --- a/src/hypofuzz/hy.py +++ b/src/hypofuzz/hy.py @@ -403,7 +403,11 @@ def _json_description(self) -> Report: if self.ninputs >= 10 and "" == report["note"]: singletons = self.pool.singletons doubletons = self.pool.doubletons - offset = singletons * (singletons-1) / 2 if 0 == doubletons else singletons * singletons / (2 * doubletons) + offset = ( + singletons * (singletons - 1) / 2 + if 0 == doubletons + else singletons * singletons / (2 * doubletons) + ) offset = int(offset) + 1 report["est. branches"] = int(str(report["branches"])) + offset return report From b0c4196146c3cf07e05dcffa837039820c4dfb05 Mon Sep 17 00:00:00 2001 From: Alex Goodwin Date: Sun, 22 Dec 2024 17:57:05 +1000 Subject: [PATCH 07/10] Update CONTRIBUTING.md --- CONTRIBUTING.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 8dec287c..03c4106a 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -50,3 +50,4 @@ You accept and agree to the following terms and conditions for Your present and * [Liam DeVoe](https://github.com/tybug) +* [CyberiaResurrection](https://github.com/CyberiaResurrection) From 7f21f08a15b9080e79ba80178b6db6db05cfbe80 Mon Sep 17 00:00:00 2001 From: Alex Goodwin Date: Tue, 7 Jan 2025 11:44:26 +1000 Subject: [PATCH 08/10] Add tripleton/quadrupleton properties --- src/hypofuzz/corpus.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/src/hypofuzz/corpus.py b/src/hypofuzz/corpus.py index f0964b85..9fc60348 100644 --- a/src/hypofuzz/corpus.py +++ b/src/hypofuzz/corpus.py @@ -147,6 +147,16 @@ def doubletons(self) -> int: doubletons = [item for item in self.overall_arc_counts.values() if 3 == item] return len(doubletons) + @property + def tripletons(self) -> int: + tripletons = [item for item in self.overall_arc_counts.values() if 4 == item] + return len(tripletons) + + @property + def quadrupletons(self) -> int: + quadrupletons = [item for item in self.overall_arc_counts.values() if 5 == item] + return len(quadrupletons) + def add(self, result: ConjectureResult, source: HowGenerated) -> Optional[bool]: """Update the corpus with the result of running a test. From a1f3081c62d1520993259988033ddf0fc676b14a Mon Sep 17 00:00:00 2001 From: Alex Goodwin Date: Tue, 7 Jan 2025 11:47:04 +1000 Subject: [PATCH 09/10] Only display bound after 10k inputs. Boeheme notes (in "Reachable Coverage: Estimating Saturation in Fuzzing" ) that all SOTA estimators have serious biases until many thousands of runs are gathered. This commit attempts to mitigate the worst of those biases by waiting for 10,000 runs to be gathered. --- src/hypofuzz/hy.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/hypofuzz/hy.py b/src/hypofuzz/hy.py index 466eac7b..d9ae342b 100644 --- a/src/hypofuzz/hy.py +++ b/src/hypofuzz/hy.py @@ -400,7 +400,7 @@ def _json_description(self) -> Report: ] del report["since new cov"] - if self.ninputs >= 10 and "" == report["note"]: + if self.ninputs >= 10000 and "" == report["note"]: singletons = self.pool.singletons doubletons = self.pool.doubletons offset = ( From 4a7fc35c98af97f32affe4243e9a4067b8162e4d Mon Sep 17 00:00:00 2001 From: Alex Goodwin Date: Tue, 7 Jan 2025 11:50:35 +1000 Subject: [PATCH 10/10] Report second-order jack-knife bound instead of Chao --- src/hypofuzz/hy.py | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/src/hypofuzz/hy.py b/src/hypofuzz/hy.py index d9ae342b..d74338c8 100644 --- a/src/hypofuzz/hy.py +++ b/src/hypofuzz/hy.py @@ -403,12 +403,8 @@ def _json_description(self) -> Report: if self.ninputs >= 10000 and "" == report["note"]: singletons = self.pool.singletons doubletons = self.pool.doubletons - offset = ( - singletons * (singletons - 1) / 2 - if 0 == doubletons - else singletons * singletons / (2 * doubletons) - ) - offset = int(offset) + 1 + offset = 2 * singletons - doubletons + offset = max(1, offset) report["est. branches"] = int(str(report["branches"])) + offset return report