From 1b42ee8ab5bc4991f4426d17da5b5c4868a29e4e Mon Sep 17 00:00:00 2001 From: Alex Keizer Date: Wed, 24 Sep 2025 16:53:55 +0100 Subject: [PATCH 1/8] feat: evaluation harness to capture messages and serialize them as a single JSON object --- EvaluationHarness/.gitignore | 1 + EvaluationHarness/EvaluationHarness.lean | 3 + .../EvaluationHarness/Basic.lean | 107 ++++++++++++++++++ .../EvaluationHarness/Tests.lean | 20 ++++ EvaluationHarness/Main.lean | 4 + EvaluationHarness/README.md | 1 + EvaluationHarness/lakefile.toml | 6 + EvaluationHarness/lean-toolchain | 1 + lake-manifest.json | 7 ++ lakefile.toml | 4 + 10 files changed, 154 insertions(+) create mode 100644 EvaluationHarness/.gitignore create mode 100644 EvaluationHarness/EvaluationHarness.lean create mode 100644 EvaluationHarness/EvaluationHarness/Basic.lean create mode 100644 EvaluationHarness/EvaluationHarness/Tests.lean create mode 100644 EvaluationHarness/Main.lean create mode 100644 EvaluationHarness/README.md create mode 100644 EvaluationHarness/lakefile.toml create mode 100644 EvaluationHarness/lean-toolchain diff --git a/EvaluationHarness/.gitignore b/EvaluationHarness/.gitignore new file mode 100644 index 0000000000..bfb30ec8c7 --- /dev/null +++ b/EvaluationHarness/.gitignore @@ -0,0 +1 @@ +/.lake diff --git a/EvaluationHarness/EvaluationHarness.lean b/EvaluationHarness/EvaluationHarness.lean new file mode 100644 index 0000000000..abb0242cd4 --- /dev/null +++ b/EvaluationHarness/EvaluationHarness.lean @@ -0,0 +1,3 @@ +-- This module serves as the root of the `EvaluationHarness` library. +-- Import modules here that should be built as part of the library. +import EvaluationHarness.Basic diff --git a/EvaluationHarness/EvaluationHarness/Basic.lean b/EvaluationHarness/EvaluationHarness/Basic.lean new file mode 100644 index 0000000000..d9aca99892 --- /dev/null +++ b/EvaluationHarness/EvaluationHarness/Basic.lean @@ -0,0 +1,107 @@ +import Lean + +/-! +# Evaluation Harness + +This file defines the `#evaluation in ...` command wrapper, for easier evaluation. +-/ +namespace EvaluationHarness + +open Lean +open Elab Command + +/-- +`SimpleMessage` contains the subset of fields from `SerialMessage` that we +wish to include in the logged output. +-/ +protected structure SimpleMessage where + severity: MessageSeverity + data: String + caption: String + pos: Position +deriving ToJson, FromJson + +protected structure Messages where + messages : List SerialMessage + +open EvaluationHarness (Messages SimpleMessage) + +/-! ## Message Collection -/ + +/-- +Given a monadic action `cmd : CommandElabM Unit`, run the action, collect all +messages (trace, info, warning and error messages) that the action logged, and +return those. The collected messages are *not* left in the environment. +-/ +protected def collectMessages (runCmd : CommandElabM Unit) : CommandElabM Messages := do + -- NOTE: the following is largely copied from the `#guard_msgs in` command, + -- with some slight adaptations + + let initMsgs ← modifyGet fun st => (st.messages, { st with messages := {} }) + -- do not forward snapshot as we don't want messages assigned to it to leak outside + withReader ({ · with snap? := none }) runCmd + -- collect sync and async messages + let msgs := (← get).messages ++ + (← get).snapshotTasks.foldl (· ++ ·.get.getAll.foldl (· ++ ·.diagnostics.msgLog) default) {} + -- clear async messages as we don't want them to leak outside, and + -- reset messages to initial state + modify ({ · with + snapshotTasks := #[] + messages := initMsgs + }) + let messages ← msgs.toList + |>.filter (!·.isSilent) + |>.mapM (·.serialize) + return { messages } +open EvaluationHarness (collectMessages) + +def Messages.toSimple (self : Messages) : List SimpleMessage := + self.messages.map fun m => { m with } + +def Messages.toJson (self : Messages) : Json := + ToJson.toJson self.toSimple + +/-! ## EvaluationInfo -/ + +structure EvaluationInfo where + filename: String + /-- The line number on which `#evaluation in cmd` occurred. -/ + line: Nat + /-- The collected messages generated by command `cmd`. -/ + messages: List SimpleMessage + -- TODO: `data` field for more structured data reporting + /-- + The name of the elaborated definition/theorem, or `none` if the command + was neither definition nor theorem. + -/ + name : Option String +deriving ToJson, FromJson + +open Lean.Parser.Command in +/-- +If `cmd` is a `def` or `theorem`, return the name of the defininition/theorem. +-/ +def getDefLikeName? (cmd : TSyntax `command) : Option String := + let raw := cmd.raw + let kind := raw.getKind + if kind == ``declaration then + let name := raw[1]![1]![0]!.getId + some <| name.eraseMacroScopes.toString + else + none + +/-- +`#evalation in cmd` captures the messages generated by the command `cmd`, and +serializes them into a single json object, together with some extra metadata +such as the name of the current file. +-/ +elab "#evaluation" " in " cmd:command : command => do + let msgs ← collectMessages do + elabCommand cmd + let info : EvaluationInfo := { + filename := ← getFileName + line := (← getRefPosition).line + messages := msgs.toSimple + name := getDefLikeName? cmd + } + IO.println s!"{toJson info}" diff --git a/EvaluationHarness/EvaluationHarness/Tests.lean b/EvaluationHarness/EvaluationHarness/Tests.lean new file mode 100644 index 0000000000..8f6fd895b9 --- /dev/null +++ b/EvaluationHarness/EvaluationHarness/Tests.lean @@ -0,0 +1,20 @@ +import EvaluationHarness.Basic + +namespace EvaluationHarness.Tests + +/-! ## getDefLikeName -/ + +/-- info: some "foo" -/ +#guard_msgs in #eval do + let cmd ← `(command| theorem foo : False := sorry) + return getDefLikeName cmd + +/-- info: some "foo.bar" -/ +#guard_msgs in #eval do + let cmd ← `(command| theorem foo.bar : False := sorry) + return getDefLikeName cmd + +/-- info: some "baz" -/ +#guard_msgs in #eval do + let cmd ← `(command| def baz : False := sorry) + return getDefLikeName cmd diff --git a/EvaluationHarness/Main.lean b/EvaluationHarness/Main.lean new file mode 100644 index 0000000000..d4cd5f99e3 --- /dev/null +++ b/EvaluationHarness/Main.lean @@ -0,0 +1,4 @@ +import EvaluationHarness + +def main : IO Unit := + IO.println s!"Hello, {hello}!" diff --git a/EvaluationHarness/README.md b/EvaluationHarness/README.md new file mode 100644 index 0000000000..9a614dbe70 --- /dev/null +++ b/EvaluationHarness/README.md @@ -0,0 +1 @@ +# evaluation-harness \ No newline at end of file diff --git a/EvaluationHarness/lakefile.toml b/EvaluationHarness/lakefile.toml new file mode 100644 index 0000000000..8e761d908b --- /dev/null +++ b/EvaluationHarness/lakefile.toml @@ -0,0 +1,6 @@ +name = "EvaluationHarness" +version = "0.1.0" +defaultTargets = ["EvaluationHarness"] + +[[lean_lib]] +name = "EvaluationHarness" diff --git a/EvaluationHarness/lean-toolchain b/EvaluationHarness/lean-toolchain new file mode 100644 index 0000000000..a835e7cb3c --- /dev/null +++ b/EvaluationHarness/lean-toolchain @@ -0,0 +1 @@ +leanprover/lean4-nightly:nightly-2025-08-11 diff --git a/lake-manifest.json b/lake-manifest.json index dd9f56bbe9..6b03b4978a 100644 --- a/lake-manifest.json +++ b/lake-manifest.json @@ -31,6 +31,13 @@ "inputRev": "nightly-testing-2025-08-11", "inherited": false, "configFile": "lakefile.lean"}, + {"type": "path", + "scope": "", + "name": "EvaluationHarness", + "manifestFile": "lake-manifest.json", + "inherited": false, + "dir": "EvaluationHarness", + "configFile": "lakefile.toml"}, {"type": "path", "scope": "", "name": "Blase", diff --git a/lakefile.toml b/lakefile.toml index 4f2bdf2131..bbd68e2789 100644 --- a/lakefile.toml +++ b/lakefile.toml @@ -14,6 +14,10 @@ name = "Blase" path = "Blase" rev = "main" +[[require]] +name = "EvaluationHarness" +path = "EvaluationHarness" + [[require]] name = "mathlib" git = "https://github.com/leanprover-community/mathlib4-nightly-testing" From 8668292fb6f98665d6682f7b895c37ca094ded7f Mon Sep 17 00:00:00 2001 From: Alex Keizer Date: Wed, 24 Sep 2025 12:23:57 +0100 Subject: [PATCH 2/8] move and fix tests --- .../EvaluationHarness/{Tests.lean => Tests/Basic.lean} | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) rename EvaluationHarness/EvaluationHarness/{Tests.lean => Tests/Basic.lean} (82%) diff --git a/EvaluationHarness/EvaluationHarness/Tests.lean b/EvaluationHarness/EvaluationHarness/Tests/Basic.lean similarity index 82% rename from EvaluationHarness/EvaluationHarness/Tests.lean rename to EvaluationHarness/EvaluationHarness/Tests/Basic.lean index 8f6fd895b9..0efa08d62c 100644 --- a/EvaluationHarness/EvaluationHarness/Tests.lean +++ b/EvaluationHarness/EvaluationHarness/Tests/Basic.lean @@ -7,14 +7,14 @@ namespace EvaluationHarness.Tests /-- info: some "foo" -/ #guard_msgs in #eval do let cmd ← `(command| theorem foo : False := sorry) - return getDefLikeName cmd + return getDefLikeName? cmd /-- info: some "foo.bar" -/ #guard_msgs in #eval do let cmd ← `(command| theorem foo.bar : False := sorry) - return getDefLikeName cmd + return getDefLikeName? cmd /-- info: some "baz" -/ #guard_msgs in #eval do let cmd ← `(command| def baz : False := sorry) - return getDefLikeName cmd + return getDefLikeName? cmd From 4d590947482437d9aef2083133166422c62c5a8c Mon Sep 17 00:00:00 2001 From: Alex Keizer Date: Wed, 24 Sep 2025 13:33:07 +0100 Subject: [PATCH 3/8] feat: add `strategy` field --- .../EvaluationHarness/Basic.lean | 58 ++++++++++++++++--- .../EvaluationHarness/Tests/Basic.lean | 13 +++++ 2 files changed, 63 insertions(+), 8 deletions(-) diff --git a/EvaluationHarness/EvaluationHarness/Basic.lean b/EvaluationHarness/EvaluationHarness/Basic.lean index d9aca99892..217ab2ac3b 100644 --- a/EvaluationHarness/EvaluationHarness/Basic.lean +++ b/EvaluationHarness/EvaluationHarness/Basic.lean @@ -7,8 +7,7 @@ This file defines the `#evaluation in ...` command wrapper, for easier evaluatio -/ namespace EvaluationHarness -open Lean -open Elab Command +open Lean Meta Elab Command /-- `SimpleMessage` contains the subset of fields from `SerialMessage` that we @@ -61,6 +60,31 @@ def Messages.toSimple (self : Messages) : List SimpleMessage := def Messages.toJson (self : Messages) : Json := ToJson.toJson self.toSimple +/-! ## EvaluationConfig -/ + +structure EvaluationConfig where + strategy: Option String := none + outputAsLog: Bool := false +deriving Inhabited + +syntax evalConfigElem := + ("strategy" " := " str) +syntax evalConfig := "(" evalConfigElem,* ")" + +def parseEvaluationConfig (cfgElems : TSyntax ``evalConfig) : EvaluationConfig := Id.run <| do + let mut cfg := { } + let `(evalConfig| ( $cfgElems,* ) ) := cfgElems + | return cfg + for cfgElem in cfgElems.getElems do + cfg := match cfgElem with + | `(evalConfigElem| strategy := $s) => + { cfg with strategy := some s.getString } + | _ => + panic! "Unknown configuration option: {elem}" + + return cfg + + /-! ## EvaluationInfo -/ structure EvaluationInfo where @@ -69,12 +93,19 @@ structure EvaluationInfo where line: Nat /-- The collected messages generated by command `cmd`. -/ messages: List SimpleMessage + /-- Whether any errors were raised by `cmd`. -/ + hasErrors: Bool -- TODO: `data` field for more structured data reporting /-- The name of the elaborated definition/theorem, or `none` if the command was neither definition nor theorem. -/ name : Option String + /-- + An optional user-defined string that can be embedded in the output, + see `EvaluationConfig` for details. + -/ + strategy : Option String deriving ToJson, FromJson open Lean.Parser.Command in @@ -90,18 +121,29 @@ def getDefLikeName? (cmd : TSyntax `command) : Option String := else none -/-- -`#evalation in cmd` captures the messages generated by the command `cmd`, and -serializes them into a single json object, together with some extra metadata -such as the name of the current file. --/ -elab "#evaluation" " in " cmd:command : command => do +def evalEvaluation (cmd : TSyntax `command) (config : EvaluationConfig) : CommandElabM Unit := do let msgs ← collectMessages do elabCommand cmd let info : EvaluationInfo := { filename := ← getFileName line := (← getRefPosition).line messages := msgs.toSimple + hasErrors := msgs.messages.any (·.severity == .error) name := getDefLikeName? cmd + strategy := config.strategy } IO.println s!"{toJson info}" + +/-- +`#evalation in cmd` captures the messages generated by the command `cmd`, and +serializes them into a single json object, together with some extra metadata +such as the name of the current file. + +See `EvluationConfig` for details on extra configuration options. +-/ +syntax "#evaluation" (evalConfig)? "in" command : command +elab_rules : command +| `(#evaluation in $cmd:command) => evalEvaluation cmd { } +| `(#evaluation $cfg in $cmd:command) => do + let cfg := parseEvaluationConfig cfg + evalEvaluation cmd cfg diff --git a/EvaluationHarness/EvaluationHarness/Tests/Basic.lean b/EvaluationHarness/EvaluationHarness/Tests/Basic.lean index 0efa08d62c..87e2ef9f66 100644 --- a/EvaluationHarness/EvaluationHarness/Tests/Basic.lean +++ b/EvaluationHarness/EvaluationHarness/Tests/Basic.lean @@ -18,3 +18,16 @@ namespace EvaluationHarness.Tests #guard_msgs in #eval do let cmd ← `(command| def baz : False := sorry) return getDefLikeName? cmd + +/-! ## `#evaluation`-/ + +/- +NOTE: `#guard_msgs` doesn't work, presumably because we output the data +with `IO.println` instead of `logInfo` or the like. +-/ + +#evaluation in + def foo : False := sorry + +#evaluation (strategy := "barStrat") in + def foo : False := sorry From 3b40bbf4c046316d56acc2d5d8e2852b70e4d601 Mon Sep 17 00:00:00 2001 From: Alex Keizer Date: Wed, 24 Sep 2025 14:04:26 +0100 Subject: [PATCH 4/8] WIP --- .../EvaluationHarness/Tests/Basic.lean | 6 +- .../InstCombine/tests/proofs/gand_proof.lean | 64 ++++++++++++++++++- 2 files changed, 68 insertions(+), 2 deletions(-) diff --git a/EvaluationHarness/EvaluationHarness/Tests/Basic.lean b/EvaluationHarness/EvaluationHarness/Tests/Basic.lean index 87e2ef9f66..e70358e674 100644 --- a/EvaluationHarness/EvaluationHarness/Tests/Basic.lean +++ b/EvaluationHarness/EvaluationHarness/Tests/Basic.lean @@ -30,4 +30,8 @@ with `IO.println` instead of `logInfo` or the like. def foo : False := sorry #evaluation (strategy := "barStrat") in - def foo : False := sorry + def bar : False := sorry + +#evaluation in + def shouldError : False := by + fail diff --git a/SSA/Projects/InstCombine/tests/proofs/gand_proof.lean b/SSA/Projects/InstCombine/tests/proofs/gand_proof.lean index 86d369e89c..9822a0a3f2 100644 --- a/SSA/Projects/InstCombine/tests/proofs/gand_proof.lean +++ b/SSA/Projects/InstCombine/tests/proofs/gand_proof.lean @@ -1,5 +1,67 @@ -import SSA.Projects.InstCombine.TacticAuto + +import SSA.Projects.InstCombine.Tactic.SimpLLVM +import SSA.Projects.InstCombine.LLVM.Semantics + +import SSA.Core.Tactic.ExtractGoals + +open BitVec +open LLVM + +set_option linter.unusedTactic false +set_option linter.unreachableTactic false +set_option maxHeartbeats 5000000 +set_option maxRecDepth 1000000 +set_option Elab.async false + +section gand_proof +theorem test_with_1_thm (e : IntW 32) : + LLVM.and (shl (const? 32 1) e) (const? 32 1) ⊑ zext 32 (icmp IntPred.eq e (const? 32 0)) := by + simp_alive_undef + simp_alive_ops + simp_alive_case_bash + simp_alive_split + extract_goals + all_goals sorry + + +theorem test_with_3_thm (e : IntW 32) : + LLVM.and (shl (const? 32 3) e) (const? 32 1) ⊑ zext 32 (icmp IntPred.eq e (const? 32 0)) := by + simp_alive_undef + simp_alive_ops + simp_alive_case_bash + simp_alive_split + extract_goals + all_goals sorry + + +theorem test_with_5_thm (e : IntW 32) : + LLVM.and (shl (const? 32 5) e) (const? 32 1) ⊑ zext 32 (icmp IntPred.eq e (const? 32 0)) := by + simp_alive_undef + simp_alive_ops + simp_alive_case_bash + simp_alive_split + extract_goals + all_goals sorry + + +theorem test_with_neg_5_thm (e : IntW 32) : + LLVM.and (shl (const? 32 (-5)) e) (const? 32 1) ⊑ zext 32 (icmp IntPred.eq e (const? 32 0)) := by + simp_alive_undef + simp_alive_ops + simp_alive_case_bash + simp_alive_split + extract_goals + all_goals sorry + + +theorem test_with_even_thm (e : IntW 32) : LLVM.and (shl (const? 32 4) e) (const? 32 1) ⊑ const? 32 0 := by + simp_alive_undef + simp_alive_ops + simp_alive_case_bash + simp_alive_split + extract_goals + all_goals sorry import SSA.Projects.InstCombine.LLVM.Semantics open BitVec open LLVM From 805c3c46ae443dcdf68db752f00df549940e7e10 Mon Sep 17 00:00:00 2001 From: Alex Keizer Date: Wed, 24 Sep 2025 14:04:37 +0100 Subject: [PATCH 5/8] WIP --- .../EvaluationHarness/Tests/ExampleFile.lean | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) create mode 100644 EvaluationHarness/EvaluationHarness/Tests/ExampleFile.lean diff --git a/EvaluationHarness/EvaluationHarness/Tests/ExampleFile.lean b/EvaluationHarness/EvaluationHarness/Tests/ExampleFile.lean new file mode 100644 index 0000000000..5fe31a5396 --- /dev/null +++ b/EvaluationHarness/EvaluationHarness/Tests/ExampleFile.lean @@ -0,0 +1,19 @@ +import EvaluationHarness.Basic +import Std.Tactic.BVDecide + +open BitVec + +#evaluation in +theorem test_with_1_thm.extracted_1._1 : ∀ (x : BitVec 32), + ¬x ≥ ↑32 → 1#32 <<< x &&& 1#32 = zeroExtend 32 (ofBool (x == 0#32)) := by + bv_decide + +#evaluation in +theorem test_with_3_thm.extracted_1._1 : ∀ (x : BitVec 32), + ¬x ≥ ↑32 → 3#32 <<< x &&& 1#32 = zeroExtend 32 (ofBool (x == 0#32)) := by + bv_decide + +#evaluation in +theorem test_with_5_thm.extracted_1._1 : ∀ (x : BitVec 32), + ¬x ≥ ↑32 → 5#32 <<< x &&& 1#32 = zeroExtend 32 (ofBool (x == 0#32)) := by + fail From bff1a741ff892fc31e2647a08fd96b275540a52c Mon Sep 17 00:00:00 2001 From: Alex Keizer Date: Wed, 24 Sep 2025 16:52:42 +0100 Subject: [PATCH 6/8] add private/protected def test cases for getDefLikeName --- EvaluationHarness/EvaluationHarness/Tests/Basic.lean | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/EvaluationHarness/EvaluationHarness/Tests/Basic.lean b/EvaluationHarness/EvaluationHarness/Tests/Basic.lean index e70358e674..eba1f80198 100644 --- a/EvaluationHarness/EvaluationHarness/Tests/Basic.lean +++ b/EvaluationHarness/EvaluationHarness/Tests/Basic.lean @@ -19,6 +19,16 @@ namespace EvaluationHarness.Tests let cmd ← `(command| def baz : False := sorry) return getDefLikeName? cmd +/-- info: some "privateBaz" -/ +#guard_msgs in #eval do + let cmd ← `(command| private def privateBaz : False := sorry) + return getDefLikeName? cmd + +/-- info: some "protBaz" -/ +#guard_msgs in #eval do + let cmd ← `(command| protected def protBaz : False := sorry) + return getDefLikeName? cmd + /-! ## `#evaluation`-/ /- From db4f14c792389ea2052f65b1e881608bf15762cc Mon Sep 17 00:00:00 2001 From: Alex Keizer Date: Wed, 24 Sep 2025 17:53:27 +0100 Subject: [PATCH 7/8] add & implement various options some options suggested by @bollu, such as omitting the details of messages and including walltime, and some other options to make it easier to run tests --- .../EvaluationHarness/Basic.lean | 79 +++++++++++++++---- .../EvaluationHarness/Options.lean | 34 ++++++++ .../EvaluationHarness/Tests/Basic.lean | 68 ++++++++++++++-- 3 files changed, 158 insertions(+), 23 deletions(-) create mode 100644 EvaluationHarness/EvaluationHarness/Options.lean diff --git a/EvaluationHarness/EvaluationHarness/Basic.lean b/EvaluationHarness/EvaluationHarness/Basic.lean index 217ab2ac3b..d65413ec5b 100644 --- a/EvaluationHarness/EvaluationHarness/Basic.lean +++ b/EvaluationHarness/EvaluationHarness/Basic.lean @@ -1,5 +1,7 @@ import Lean +import EvaluationHarness.Options + /-! # Evaluation Harness @@ -32,13 +34,13 @@ Given a monadic action `cmd : CommandElabM Unit`, run the action, collect all messages (trace, info, warning and error messages) that the action logged, and return those. The collected messages are *not* left in the environment. -/ -protected def collectMessages (runCmd : CommandElabM Unit) : CommandElabM Messages := do +protected def collectMessages (runCmd : CommandElabM α) : CommandElabM (α × Messages) := do -- NOTE: the following is largely copied from the `#guard_msgs in` command, -- with some slight adaptations let initMsgs ← modifyGet fun st => (st.messages, { st with messages := {} }) -- do not forward snapshot as we don't want messages assigned to it to leak outside - withReader ({ · with snap? := none }) runCmd + let ret ← withReader ({ · with snap? := none }) runCmd -- collect sync and async messages let msgs := (← get).messages ++ (← get).snapshotTasks.foldl (· ++ ·.get.getAll.foldl (· ++ ·.diagnostics.msgLog) default) {} @@ -51,20 +53,24 @@ protected def collectMessages (runCmd : CommandElabM Unit) : CommandElabM Messag let messages ← msgs.toList |>.filter (!·.isSilent) |>.mapM (·.serialize) - return { messages } + return ⟨ret, { messages }⟩ open EvaluationHarness (collectMessages) -def Messages.toSimple (self : Messages) : List SimpleMessage := - self.messages.map fun m => { m with } - -def Messages.toJson (self : Messages) : Json := - ToJson.toJson self.toSimple +variable {m} [Monad m] [MonadOptions m] in +def Messages.toSimple (self : Messages) : m (Option <| List SimpleMessage) := do + if (← getBoolOption `evaluation.includeMessages true) then + let includePosition ← getBoolOption `evaluation.includePosition true + some <$> self.messages.mapM fun m => + return { m with + pos := if includePosition then m.pos else ⟨0, 0⟩ + } + else + return none /-! ## EvaluationConfig -/ structure EvaluationConfig where strategy: Option String := none - outputAsLog: Bool := false deriving Inhabited syntax evalConfigElem := @@ -91,10 +97,22 @@ structure EvaluationInfo where filename: String /-- The line number on which `#evaluation in cmd` occurred. -/ line: Nat - /-- The collected messages generated by command `cmd`. -/ - messages: List SimpleMessage - /-- Whether any errors were raised by `cmd`. -/ + /-- + The collected messages generated by command `cmd`, or + `none` when the `evaluation.includeMessages` option is set to `false`. + -/ + messages: Option (List SimpleMessage) + /-- + Whether any errors were raised by `cmd`. + + NOTE: this is reported regardless of the `includeMessages` option. + -/ hasErrors: Bool + /-- + The time elapsed while elaborating `cmd`, or `none` when the + `evaluation.includeWalltime` option is set to `false` (which is the default). + -/ + wallTime : Option Float -- TODO: `data` field for more structured data reporting /-- The name of the elaborated definition/theorem, or `none` if the command @@ -121,18 +139,47 @@ def getDefLikeName? (cmd : TSyntax `command) : Option String := else none +/-- +Run `cmd` and return the elapsed wall-time, or run the command but return `none` +when the `evaluation.includeWalltime` option is set to `false` (which is the +default). +-/ +def measureWallTime (cmd : CommandElabM Unit) : CommandElabM (Option Float) := do + if (← getBoolOption `evaluation.includeWallTime false) then + let t1 ← IO.monoNanosNow + cmd + let t2 ← IO.monoNanosNow + return some (deltaInMs t2 t1) + else + cmd + return none +where + deltaInMs (now past : Nat) : Float := + (Float.ofNat <| now - past) / 1000000.0 + def evalEvaluation (cmd : TSyntax `command) (config : EvaluationConfig) : CommandElabM Unit := do - let msgs ← collectMessages do + let (wallTime, msgs) ← collectMessages <| measureWallTime <| do elabCommand cmd let info : EvaluationInfo := { filename := ← getFileName - line := (← getRefPosition).line - messages := msgs.toSimple + line :=← getRefLine + messages := ← msgs.toSimple hasErrors := msgs.messages.any (·.severity == .error) + wallTime name := getDefLikeName? cmd strategy := config.strategy } - IO.println s!"{toJson info}" + let info := s!"{toJson info}" + if (← getBoolOption `evaluation.outputAsLog) then + logInfo info + else + IO.println info +where + getRefLine {m} [Monad m] [MonadOptions m] [MonadLog m] : m Nat := do + if (← getBoolOption `evaluation.includePosition true) then + return (← getRefPosition).line + else + return 0 /-- `#evalation in cmd` captures the messages generated by the command `cmd`, and diff --git a/EvaluationHarness/EvaluationHarness/Options.lean b/EvaluationHarness/EvaluationHarness/Options.lean new file mode 100644 index 0000000000..ef4fb3991b --- /dev/null +++ b/EvaluationHarness/EvaluationHarness/Options.lean @@ -0,0 +1,34 @@ +import Lean + +/-! +# Evaluation Harness Options +This file defines various options which can be used to influence the behaviour +of `#evaluation in cmd`. +-/ +namespace EvaluationHarness +open Lean + +initialize + registerOption `evaluation.includeWallTime { + defValue := false + descr := "Whether to include elapsed wall-time in the output of `#evaluation in cmd`" + } + registerOption `evaluation.includeMessages { + defValue := true + descr := "Whether to include a log of all messages in the output of `#evaluation in cmd`.\ + When set to false, messages are still captured, but then they are discarded \ + instead of incorporated into the output." + } + + registerOption `evaluation.outputAsLog { + defValue := false + descr := "When set, output is emitted via `logInfo`, rather than `IO.println`" + } + registerOption `evaluation.includePosition { + defValue := true + descr := "Whether to include line/column information in the output. \ + When set to false, all lines and columns are reported as 0. \ + \n\n\ + Can be particularly useful to disable in tests, to make the output less + brittle to changes in the overal file." + } diff --git a/EvaluationHarness/EvaluationHarness/Tests/Basic.lean b/EvaluationHarness/EvaluationHarness/Tests/Basic.lean index eba1f80198..6bcd781cb6 100644 --- a/EvaluationHarness/EvaluationHarness/Tests/Basic.lean +++ b/EvaluationHarness/EvaluationHarness/Tests/Basic.lean @@ -30,18 +30,72 @@ namespace EvaluationHarness.Tests return getDefLikeName? cmd /-! ## `#evaluation`-/ +set_option evaluation.outputAsLog true +set_option evaluation.includePosition false -/- -NOTE: `#guard_msgs` doesn't work, presumably because we output the data -with `IO.println` instead of `logInfo` or the like. +/-- +info: {"wallTime": null, + "strategy": null, + "name": "foo", + "messages": + [{"severity": "warning", + "pos": {"line": 0, "column": 0}, + "data": "declaration uses 'sorry'", + "caption": ""}], + "line": 0, + "hasErrors": false, + "filename": + "/home/alex/Workspace/PhD/lean-mlir-alt/EvaluationHarness/EvaluationHarness/Tests/Basic.lean"} -/ - -#evaluation in +#guard_msgs in #evaluation in def foo : False := sorry -#evaluation (strategy := "barStrat") in +/-- +info: {"wallTime": null, + "strategy": "barStrat", + "name": "bar", + "messages": + [{"severity": "warning", + "pos": {"line": 0, "column": 0}, + "data": "declaration uses 'sorry'", + "caption": ""}], + "line": 0, + "hasErrors": false, + "filename": + "/home/alex/Workspace/PhD/lean-mlir-alt/EvaluationHarness/EvaluationHarness/Tests/Basic.lean"} +-/ +#guard_msgs in #evaluation (strategy := "barStrat") in def bar : False := sorry -#evaluation in +/-- +info: {"wallTime": null, + "strategy": null, + "name": "shouldError", + "messages": + [{"severity": "error", + "pos": {"line": 0, "column": 0}, + "data": "Failed: `fail` tactic was invoked\n⊢ False", + "caption": ""}], + "line": 0, + "hasErrors": true, + "filename": + "/home/alex/Workspace/PhD/lean-mlir-alt/EvaluationHarness/EvaluationHarness/Tests/Basic.lean"} +-/ +#guard_msgs in #evaluation in def shouldError : False := by fail + +set_option evaluation.includeMessages false in +/-- +info: {"wallTime": null, + "strategy": null, + "name": "shouldErrorWithoutMsg", + "messages": null, + "line": 0, + "hasErrors": true, + "filename": + "/home/alex/Workspace/PhD/lean-mlir-alt/EvaluationHarness/EvaluationHarness/Tests/Basic.lean"} +-/ +#guard_msgs in #evaluation in + def shouldErrorWithoutMsg : False := by + fail From 75dbcf14dd02e6969ad0c0f866ea27c7dc425345 Mon Sep 17 00:00:00 2001 From: Alex Keizer Date: Thu, 25 Sep 2025 13:50:35 +0100 Subject: [PATCH 8/8] ensure lean-toolchain is a symlink --- EvaluationHarness/lean-toolchain | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) mode change 100644 => 120000 EvaluationHarness/lean-toolchain diff --git a/EvaluationHarness/lean-toolchain b/EvaluationHarness/lean-toolchain deleted file mode 100644 index a835e7cb3c..0000000000 --- a/EvaluationHarness/lean-toolchain +++ /dev/null @@ -1 +0,0 @@ -leanprover/lean4-nightly:nightly-2025-08-11 diff --git a/EvaluationHarness/lean-toolchain b/EvaluationHarness/lean-toolchain new file mode 120000 index 0000000000..6f68e61b37 --- /dev/null +++ b/EvaluationHarness/lean-toolchain @@ -0,0 +1 @@ +../lean-toolchain \ No newline at end of file