From 6b7f9d4ff85f5557f7005cb9e991fc437ce7f739 Mon Sep 17 00:00:00 2001 From: Juan C Date: Mon, 6 Oct 2025 13:51:48 +0200 Subject: [PATCH 1/5] Add `scripts/stats4Nerds.lean` --- scripts/Stats4Nerds.lean | 119 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 119 insertions(+) create mode 100644 scripts/Stats4Nerds.lean diff --git a/scripts/Stats4Nerds.lean b/scripts/Stats4Nerds.lean new file mode 100644 index 0000000..5435a46 --- /dev/null +++ b/scripts/Stats4Nerds.lean @@ -0,0 +1,119 @@ +/-! # Stats 4 Nerds + + Script that scrapes the codebase to get statistics 4 da nerds. + -/ + +open System + +/-- +`fileStats` are the statistics we're tracking for every file, which are +`dfs`: amount of `def` declarations +`str`: amount of `structure` declarations +`ind`: amount of `inductive` declarations +`thm`: amount of `theorem` declarations +`axs`: amount of `axsiom` declarations +-/ +structure fileStats where + dfs : Nat + str : Nat + ind : Nat + thm : Nat + axs : Nat + +instance : Inhabited fileStats where + default := {dfs := 0, str := 0, ind := 0, thm := 0, axs := 0} + +instance : ToString fileStats where + toString := fun fs => + s!"Definitions: {fs.dfs}\n"++ + s!"Structures : {fs.str}\n"++ + s!"Inductives: {fs.ind}\n"++ + s!"Theorems: {fs.thm}\n"++ + s!"Axioms: {fs.axs}" + + +def fileStats.incrDfs (fs : fileStats) : fileStats := + {fs with dfs := fs.dfs + 1} + +def fileStats.incrStr (fs : fileStats) : fileStats := + {fs with str := fs.str + 1} + +def fileStats.incrInd (fs : fileStats) : fileStats := + {fs with ind := fs.ind + 1} + +def fileStats.incrThm (fs : fileStats) : fileStats := + {fs with thm := fs.thm + 1} + +def fileStats.incrAxs (fs : fileStats) : fileStats := + {fs with axs := fs.axs + 1} + +/-- +Disjunction of two `String.startsWith` +-/ +def String.swOr (s : String) (s1 : String) (s2 : String) : Bool := + s.startsWith s1 || s.startsWith s2 + +/-- +Criteria to count a declaration +-/ +def addLineStats (fs : fileStats) (line : String) : fileStats := + if line.swOr "def " " def " then fs.incrDfs else + if line.swOr "structure " " structure " then fs.incrStr else + if line.swOr "inductive " " inductive " then fs.incrInd else + if line.swOr "theorem " " theorem " then fs.incrThm else + if line.swOr "axiom " " axiom " then fs.incrAxs else fs + +def projectDir : IO FilePath := do + IO.currentDir + +def mkSubDir (subDir : String) (dir : IO FilePath := projectDir) : IO FilePath := + do pure ((←dir) / {toString := subDir}) + +def equivalenceDir := mkSubDir "EvmEquivalence" + +def kevm2LeanDir := mkSubDir "KEVM2Lean" equivalenceDir + +def skipCodeGen (fp : FilePath) : IO Bool := do + if fp = (←kevm2LeanDir) then pure false else pure true + +def includeCodeGen (fp : FilePath) : IO Bool := do + pure !(←skipCodeGen fp) + +/-- +Get Lean files. +Lean files are files with the `.lean` extension that don't begin with `.#` +-/ +def getFiles (dir : IO FilePath) (skipFn : FilePath → IO Bool := default) : IO (Array FilePath) := do + let mut arr ← FilePath.walkDir (←dir) skipFn + arr := (arr.filter (fun fp : FilePath => fp.toString.endsWith ".lean")) + pure (arr.filter (fun fp : FilePath => !fp.toString.startsWith ".#")) + +/-- +Adds the stats of a given file to some provided stats +-/ +def addStats (fp : FilePath) (stats : fileStats := default) : IO fileStats := do + if !(←fp.pathExists) || (←fp.isDir) then pure stats else + let f ← IO.FS.Handle.mk fp .read + let mut line ← f.getLine + let mut s := stats + while line.utf8ByteSize ≠ 0 do + s := addLineStats s line + line ← f.getLine + pure s + +def countDeclarations (dir : IO FilePath) (criteria : FilePath → IO Bool) : IO fileStats := do + let fs ← getFiles dir criteria + let rec countFiles (fs : List FilePath) (stats : fileStats) : IO fileStats := do + match fs with + | [] => pure stats + | x :: xs => countFiles xs (←addStats x stats) + countFiles fs.toList default + +def main : IO Unit := do + let declarations ← countDeclarations equivalenceDir skipCodeGen + let genDeclarations ← countDeclarations kevm2LeanDir (fun _ => pure true) + IO.println "Number of declarations for the generated code 🤖" + IO.println genDeclarations + IO.println "" + IO.println "Number of declarations for the manually produced code 📝" + IO.println declarations From 62775f906e33bac2ee82e1dcb293b861a951f48c Mon Sep 17 00:00:00 2001 From: Juan C Date: Mon, 6 Oct 2025 13:56:33 +0200 Subject: [PATCH 2/5] `scripts/README.md`: describe `Stats4Nerds.lean` --- scripts/README.md | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/scripts/README.md b/scripts/README.md index 8553f33..f9ebbd2 100644 --- a/scripts/README.md +++ b/scripts/README.md @@ -1,7 +1,7 @@ Helper Scripts -------------- -This folder contains scripts that help with the modification or usage of the K-generated Lean code. +This folder contains scripts that help with the modification or usage of the K-generated Lean code and other miscellaneous. ## `format-rewrites.py` @@ -37,3 +37,13 @@ foo0 foo1 bar0 bar1 ``` We can then copy-paste the resulting line of arguments to our invocation of `opcode?HS`, which only contains implicit arguments. + +## `Stats4Nerds.lean` + +The [Stats4Nerds.lean](./Stats4Nerds.lean) script scrapes the codebase to gauge the amount of declarations of both the generated and the manually produced code. + +To get the stats run the following from the project directory: + +```bash +lean --run scripts/Stats4Nerds.lean +``` From e1281bbff0f78dfd7f2b7996087ebed03b5c13d3 Mon Sep 17 00:00:00 2001 From: Juan C Date: Mon, 6 Oct 2025 13:57:04 +0200 Subject: [PATCH 3/5] `README.md`: add section Stats4Nerds --- README.md | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/README.md b/README.md index 939bce1..3d3e819 100644 --- a/README.md +++ b/README.md @@ -23,6 +23,32 @@ Under [EvmEquivalence](./EvmEquivalence) we have the following structure: * [Utils](./EvmEquivalence/Utils): Useful results for the proving * [StateMap.lean](./EvmEquivalence/StateMap.lean): Function mapping `KEVM` states to `EvmYul` states +## Stats4Nerds + +The project currently has the following amount of declarations, distinguishing between the extracted Lean 4 code and the manually produced. + +### Number of declarations for the generated code 🤖 +Definitions: 266 +Structures : 113 +Inductives: 25 +Theorems: 0 +Axioms: 73 + +Note that the elevated number of axioms is due to currently extracting a number of KEVM functions as uninterpreted functions in Lean. + +### Number of declarations for the manually produced code 📝 +Definitions: 136 +Structures : 0 +Inductives: 8 +Theorems: 309 +Axioms: 17 + +Some of the axioms present are to-be theorems left as future work. +Some other axioms are currently needed to ensure compatibility between the EVMYulLean model and KEVM. +For more information see the blueprint of the project. + +These stats are automatically produced by the [Stats4Nerds.lean](./scripts/Stats4Nerds.lean) script. + ## Building the Project After cloning this repository, from its root run: From a80b89f2c0d412cf9a2282fc616cc37074eb1d98 Mon Sep 17 00:00:00 2001 From: Juan C Date: Mon, 6 Oct 2025 13:59:02 +0200 Subject: [PATCH 4/5] `README.md`: add bullets to statistics --- README.md | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/README.md b/README.md index 3d3e819..970c8a4 100644 --- a/README.md +++ b/README.md @@ -28,20 +28,20 @@ Under [EvmEquivalence](./EvmEquivalence) we have the following structure: The project currently has the following amount of declarations, distinguishing between the extracted Lean 4 code and the manually produced. ### Number of declarations for the generated code 🤖 -Definitions: 266 -Structures : 113 -Inductives: 25 -Theorems: 0 -Axioms: 73 +- Definitions: 266 +- Structures : 113 +- Inductives: 25 +- Theorems: 0 +- Axioms: 73 Note that the elevated number of axioms is due to currently extracting a number of KEVM functions as uninterpreted functions in Lean. ### Number of declarations for the manually produced code 📝 -Definitions: 136 -Structures : 0 -Inductives: 8 -Theorems: 309 -Axioms: 17 +- Definitions: 136 +- Structures : 0 +- Inductives: 8 +- Theorems: 309 +- Axioms: 17 Some of the axioms present are to-be theorems left as future work. Some other axioms are currently needed to ensure compatibility between the EVMYulLean model and KEVM. From dc45eafd3407ba0f226cf2db20dd1b28691e6a74 Mon Sep 17 00:00:00 2001 From: Juan C Date: Mon, 6 Oct 2025 14:10:27 +0200 Subject: [PATCH 5/5] `Stats4Nerds.lean`: typo --- scripts/Stats4Nerds.lean | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/Stats4Nerds.lean b/scripts/Stats4Nerds.lean index 5435a46..1727bb7 100644 --- a/scripts/Stats4Nerds.lean +++ b/scripts/Stats4Nerds.lean @@ -11,7 +11,7 @@ open System `str`: amount of `structure` declarations `ind`: amount of `inductive` declarations `thm`: amount of `theorem` declarations -`axs`: amount of `axsiom` declarations +`axs`: amount of `axiom` declarations -/ structure fileStats where dfs : Nat