opencompl · alexkeizer · Sep 24, 2025 · Sep 24, 2025 · Sep 24, 2025 · Sep 24, 2025
diff --git a/EvaluationHarness/.gitignore b/EvaluationHarness/.gitignore
@@ -0,0 +1 @@
+/.lake
diff --git a/EvaluationHarness/EvaluationHarness.lean b/EvaluationHarness/EvaluationHarness.lean
@@ -0,0 +1,3 @@
+-- This module serves as the root of the `EvaluationHarness` library.
+-- Import modules here that should be built as part of the library.
+import EvaluationHarness.Basic
diff --git a/EvaluationHarness/EvaluationHarness/Basic.lean b/EvaluationHarness/EvaluationHarness/Basic.lean
@@ -0,0 +1,196 @@
+import Lean
+
+import EvaluationHarness.Options
+
+/-!
+# Evaluation Harness
+
+This file defines the `#evaluation in ...` command wrapper, for easier evaluation.
+-/
+namespace EvaluationHarness
+
+open Lean Meta Elab Command
+
+/--
+`SimpleMessage` contains the subset of fields from `SerialMessage` that we
+wish to include in the logged output.
+-/
+protected structure SimpleMessage where
+  severity: MessageSeverity
+  data: String
+  caption: String
+  pos: Position
+deriving ToJson, FromJson
+
+protected structure Messages where
+  messages : List SerialMessage
+
+open EvaluationHarness (Messages SimpleMessage)
+
+/-! ## Message Collection -/
+
+/--
+Given a monadic action `cmd : CommandElabM Unit`, run the action, collect all
+messages (trace, info, warning and error messages) that the action logged, and
+return those. The collected messages are *not* left in the environment.
+-/
+protected def collectMessages (runCmd : CommandElabM α) : CommandElabM (α × Messages) := do
+  -- NOTE: the following is largely copied from the `#guard_msgs in` command,
+  --       with some slight adaptations
+
+  let initMsgs ← modifyGet fun st => (st.messages, { st with messages := {} })
+  -- do not forward snapshot as we don't want messages assigned to it to leak outside
+  let ret ← withReader ({ · with snap? := none }) runCmd
+  -- collect sync and async messages
+  let msgs := (← get).messages ++
+    (← get).snapshotTasks.foldl (· ++ ·.get.getAll.foldl (· ++ ·.diagnostics.msgLog) default) {}
+  -- clear async messages as we don't want them to leak outside, and
+  -- reset messages to initial state
+  modify ({ · with
+    snapshotTasks := #[]
+    messages := initMsgs
+  })
+  let messages ← msgs.toList
+    |>.filter (!·.isSilent)
+    |>.mapM (·.serialize)
+  return ⟨ret, { messages }⟩
+open EvaluationHarness (collectMessages)
+
+variable {m} [Monad m] [MonadOptions m] in
+def Messages.toSimple (self : Messages) : m (Option <| List SimpleMessage) := do
+  if (← getBoolOption `evaluation.includeMessages true) then
+    let includePosition ← getBoolOption `evaluation.includePosition true
+    some <$> self.messages.mapM fun m =>
+      return { m with
+        pos := if includePosition then m.pos else ⟨0, 0⟩
+      }
+  else
+    return none
+
+/-! ## EvaluationConfig -/
+
+structure EvaluationConfig where
+  strategy: Option String := none
+deriving Inhabited
+
+syntax evalConfigElem :=
+  ("strategy" " := " str)
+syntax evalConfig := "(" evalConfigElem,* ")"
+
+def parseEvaluationConfig (cfgElems : TSyntax ``evalConfig) : EvaluationConfig := Id.run <| do
+  let mut cfg := { }
+  let `(evalConfig| ( $cfgElems,* ) ) := cfgElems
+    | return cfg
+  for cfgElem in cfgElems.getElems do
+    cfg := match cfgElem with
+    | `(evalConfigElem| strategy := $s) =>
+        { cfg with strategy := some s.getString }
+    | _ =>
+        panic! "Unknown configuration option: {elem}"
+
+  return cfg
+
+
+/-! ## EvaluationInfo -/
+
+structure EvaluationInfo where
+  filename: String
+  /-- The line number on which `#evaluation in cmd` occurred. -/
+  line: Nat
+  /--
+  The collected messages generated by command `cmd`, or
+  `none` when the `evaluation.includeMessages` option is set to `false`.
+  -/
+  messages: Option (List SimpleMessage)
+  /--
+  Whether any errors were raised by `cmd`.
+
+  NOTE: this is reported regardless of the `includeMessages` option.
+  -/
+  hasErrors: Bool
+  /--
+  The time elapsed while elaborating `cmd`, or `none` when the
+  `evaluation.includeWalltime` option is set to `false` (which is the default).
+  -/
+  wallTime : Option Float
+  -- TODO: `data` field for more structured data reporting
+  /--
+  The name of the elaborated definition/theorem, or `none` if the command
+  was neither definition nor theorem.
+  -/
+  name : Option String
+  /--
+  An optional user-defined string that can be embedded in the output,
+  see `EvaluationConfig` for details.
+  -/
+  strategy : Option String
+deriving ToJson, FromJson
+
+open Lean.Parser.Command in
+/--
+If `cmd` is a `def` or `theorem`, return the name of the defininition/theorem.
+-/
+def getDefLikeName? (cmd : TSyntax `command) : Option String :=
+  let raw := cmd.raw
+  let kind := raw.getKind
+  if kind == ``declaration then
+    let name := raw[1]![1]![0]!.getId
+    some <| name.eraseMacroScopes.toString
+  else
+    none
+
+/--
+Run `cmd` and return the elapsed wall-time, or run the command but return `none`
+when the `evaluation.includeWalltime` option is set to `false` (which is the
+default).
+-/
+def measureWallTime (cmd : CommandElabM Unit) : CommandElabM (Option Float) := do
+  if (← getBoolOption `evaluation.includeWallTime false) then
+    let t1 ← IO.monoNanosNow
+    cmd
+    let t2 ← IO.monoNanosNow
+    return some (deltaInMs t2 t1)
+  else
+    cmd
+    return none
+where
+  deltaInMs (now past : Nat) : Float :=
+    (Float.ofNat <| now - past) / 1000000.0
+
+def evalEvaluation (cmd : TSyntax `command) (config : EvaluationConfig) : CommandElabM Unit := do
+  let (wallTime, msgs) ← collectMessages <| measureWallTime <| do
+    elabCommand cmd
+  let info : EvaluationInfo := {
+    filename := ← getFileName
+    line :=← getRefLine
+    messages := ← msgs.toSimple
+    hasErrors := msgs.messages.any (·.severity == .error)
+    wallTime
+    name := getDefLikeName? cmd
+    strategy := config.strategy
+  }
+  let info := s!"{toJson info}"
+  if (← getBoolOption `evaluation.outputAsLog) then
+    logInfo info
+  else
+    IO.println info
+where
+  getRefLine {m} [Monad m] [MonadOptions m] [MonadLog m] : m Nat := do
+    if (← getBoolOption `evaluation.includePosition true) then
+      return (← getRefPosition).line
+    else
+      return 0
+
+/--
+`#evalation in cmd` captures the messages generated by the command `cmd`, and
+serializes them into a single json object, together with some extra metadata
+such as the name of the current file.
+
+See `EvluationConfig` for details on extra configuration options.
+-/
+syntax "#evaluation" (evalConfig)? "in" command : command
+elab_rules : command
+| `(#evaluation in $cmd:command) => evalEvaluation cmd { }
+| `(#evaluation $cfg in $cmd:command) => do
+    let cfg := parseEvaluationConfig cfg
+    evalEvaluation cmd cfg
diff --git a/EvaluationHarness/EvaluationHarness/Options.lean b/EvaluationHarness/EvaluationHarness/Options.lean
@@ -0,0 +1,34 @@
+import Lean
+
+/-!
+# Evaluation Harness Options
+This file defines various options which can be used to influence the behaviour
+of `#evaluation in cmd`.
+-/
+namespace EvaluationHarness
+open Lean
+
+initialize
+  registerOption `evaluation.includeWallTime {
+    defValue := false
+    descr := "Whether to include elapsed wall-time in the output of `#evaluation in cmd`"
+  }
+  registerOption `evaluation.includeMessages {
+    defValue := true
+    descr := "Whether to include a log of all messages in the output of `#evaluation in cmd`.\
+      When set to false, messages are still captured, but then they are discarded \
+      instead of incorporated into the output."
+  }
+
+  registerOption `evaluation.outputAsLog {
+    defValue := false
+    descr := "When set, output is emitted via `logInfo`, rather than `IO.println`"
+  }
+  registerOption `evaluation.includePosition {
+    defValue := true
+    descr := "Whether to include line/column information in the output. \
+      When set to false, all lines and columns are reported as 0. \
+      \n\n\
+      Can be particularly useful to disable in tests, to make the output less
+      brittle to changes in the overal file."
+  }
diff --git a/EvaluationHarness/EvaluationHarness/Tests/Basic.lean b/EvaluationHarness/EvaluationHarness/Tests/Basic.lean
@@ -0,0 +1,101 @@
+import EvaluationHarness.Basic
+
+namespace EvaluationHarness.Tests
+
+/-! ## getDefLikeName -/
+
+/-- info: some "foo" -/
+#guard_msgs in #eval do
+  let cmd ← `(command| theorem foo : False := sorry)
+  return getDefLikeName? cmd
+
+/-- info: some "foo.bar" -/
+#guard_msgs in #eval do
+  let cmd ← `(command| theorem foo.bar : False := sorry)
+  return getDefLikeName? cmd
+
+/-- info: some "baz" -/
+#guard_msgs in #eval do
+  let cmd ← `(command| def baz : False := sorry)
+  return getDefLikeName? cmd
+
+/-- info: some "privateBaz" -/
+#guard_msgs in #eval do
+  let cmd ← `(command| private def privateBaz : False := sorry)
+  return getDefLikeName? cmd
+
+/-- info: some "protBaz" -/
+#guard_msgs in #eval do
+  let cmd ← `(command| protected def protBaz : False := sorry)
+  return getDefLikeName? cmd
+
+/-! ## `#evaluation`-/
+set_option evaluation.outputAsLog true
+set_option evaluation.includePosition false
+
+/--
+info: {"wallTime": null,
+ "strategy": null,
+ "name": "foo",
+ "messages":
+ [{"severity": "warning",
+   "pos": {"line": 0, "column": 0},
+   "data": "declaration uses 'sorry'",
+   "caption": ""}],
+ "line": 0,
+ "hasErrors": false,
+ "filename":
+ "/home/alex/Workspace/PhD/lean-mlir-alt/EvaluationHarness/EvaluationHarness/Tests/Basic.lean"}
+-/
+#guard_msgs in #evaluation in
+  def foo : False := sorry
+
+/--
+info: {"wallTime": null,
+ "strategy": "barStrat",
+ "name": "bar",
+ "messages":
+ [{"severity": "warning",
+   "pos": {"line": 0, "column": 0},
+   "data": "declaration uses 'sorry'",
+   "caption": ""}],
+ "line": 0,
+ "hasErrors": false,
+ "filename":
+ "/home/alex/Workspace/PhD/lean-mlir-alt/EvaluationHarness/EvaluationHarness/Tests/Basic.lean"}
+-/
+#guard_msgs in #evaluation (strategy := "barStrat") in
+  def bar : False := sorry
+
+/--
+info: {"wallTime": null,
+ "strategy": null,
+ "name": "shouldError",
+ "messages":
+ [{"severity": "error",
+   "pos": {"line": 0, "column": 0},
+   "data": "Failed: `fail` tactic was invoked\n⊢ False",
+   "caption": ""}],
+ "line": 0,
+ "hasErrors": true,
+ "filename":
+ "/home/alex/Workspace/PhD/lean-mlir-alt/EvaluationHarness/EvaluationHarness/Tests/Basic.lean"}
+-/
+#guard_msgs in #evaluation in
+  def shouldError : False := by
+    fail
+
+set_option evaluation.includeMessages false in
+/--
+info: {"wallTime": null,
+ "strategy": null,
+ "name": "shouldErrorWithoutMsg",
+ "messages": null,
+ "line": 0,
+ "hasErrors": true,
+ "filename":
+ "/home/alex/Workspace/PhD/lean-mlir-alt/EvaluationHarness/EvaluationHarness/Tests/Basic.lean"}
+-/
+#guard_msgs in #evaluation in
+  def shouldErrorWithoutMsg : False := by
+    fail
diff --git a/EvaluationHarness/EvaluationHarness/Tests/ExampleFile.lean b/EvaluationHarness/EvaluationHarness/Tests/ExampleFile.lean
@@ -0,0 +1,19 @@
+import EvaluationHarness.Basic
+import Std.Tactic.BVDecide
+
+open BitVec
+
+#evaluation in
+theorem test_with_1_thm.extracted_1._1 : ∀ (x : BitVec 32),
+    ¬x ≥ ↑32 → 1#32 <<< x &&& 1#32 = zeroExtend 32 (ofBool (x == 0#32)) := by
+  bv_decide
+
+#evaluation in
+theorem test_with_3_thm.extracted_1._1 : ∀ (x : BitVec 32),
+    ¬x ≥ ↑32 → 3#32 <<< x &&& 1#32 = zeroExtend 32 (ofBool (x == 0#32)) := by
+  bv_decide
+
+#evaluation in
+theorem test_with_5_thm.extracted_1._1 : ∀ (x : BitVec 32),
+    ¬x ≥ ↑32 → 5#32 <<< x &&& 1#32 = zeroExtend 32 (ofBool (x == 0#32)) := by
+  fail
diff --git a/EvaluationHarness/Main.lean b/EvaluationHarness/Main.lean
@@ -0,0 +1,4 @@
+import EvaluationHarness
+
+def main : IO Unit :=
+  IO.println s!"Hello, {hello}!"
diff --git a/EvaluationHarness/README.md b/EvaluationHarness/README.md
@@ -0,0 +1 @@
+# evaluation-harness
diff --git a/EvaluationHarness/lakefile.toml b/EvaluationHarness/lakefile.toml
@@ -0,0 +1,6 @@
+name = "EvaluationHarness"
+version = "0.1.0"
+defaultTargets = ["EvaluationHarness"]
+
+[[lean_lib]]
+name = "EvaluationHarness"
diff --git a/EvaluationHarness/lean-toolchain b/EvaluationHarness/lean-toolchain
@@ -0,0 +1 @@
+../lean-toolchain