nuprl · rudolf-adamkovic · Feb 12, 2026
diff --git a/dataset_builder/humaneval_to_lisp.py b/dataset_builder/humaneval_to_lisp.py
@@ -0,0 +1,74 @@
+"""
+MultiPL-E translator of OpenAI HumanEval into Common Lisp.
+Author: Rudolf Adamkovič <rudolf@adamkovic.org>
+
+- MultiPL-E: https://github.com/nuprl/MultiPL-E
+- Common Lisp: https://lisp-lang.org/
+- HyperSpec: https://www.lispworks.com/documentation/HyperSpec/Front/index.htm
+- Steel Bank Common Lisp: https://www.sbcl.org/
+- FiveAM: https://fiveam.common-lisp.dev/
+"""
+
+import ast
+from typing import List
+
+
+class Translator:
+
+    USub = "-"
+
+    stop = ["\n(defun", "\n;", "\n("]
+
+    def file_ext(self):
+        return "lisp"
+
+    def translate_prompt(
+        self, name: str, args: List[ast.arg], _returns, description: str
+    ) -> str:
+        lisp_args = " ".join([arg.arg for arg in args])
+        lisp_description = description.replace('"', '\\"')
+        self.entry_point = name
+        return f'(defun {name} ({lisp_args})\n"{lisp_description}"\n'
+
+    def test_suite_prefix_lines(self, entry_point) -> List[str]:
+        return [
+            "(require :asdf)",
+            "(asdf:load-system :fiveam)",
+            "(fiveam:def-suite* human-eval)",
+            f"(defmacro candidate (&rest args) `({entry_point} ,@args))",
+            "(fiveam:test main"
+        ]
+
+    def test_suite_suffix_lines(self) -> List[str]:
+        return [
+            ")",
+            "(fiveam:run! 'human-eval)"
+        ]
+
+    def deep_equality(self, left: str, right: str) -> str:
+        return f"(fiveam:is (equal {left} {right}))"
+
+    def gen_literal(self, c: bool | str | int | float):
+        if type(c) is bool:
+            return "t" if c else "nil"
+        elif type(c) is str:
+            return f'"{c}"'
+        elif c is None:
+            return "nil"
+        return repr(c)
+
+    def gen_var(self, variable: str) -> str:
+        return variable
+
+    def gen_list(self, list: List[str]) -> str:
+        return "(list " + " ".join(list) + ")"
+
+    def gen_tuple(self, tuple: List[str]) -> str:
+        return "(list " + " ".join(tuple) + ")"
+
+    def gen_dict(self, keys: List[str], values: List[str]) -> str:
+        pairs = " ".join(f"(cons {k} {v})" for k, v in zip(keys, values))
+        return "(list " + pairs + ")"
+
+    def gen_call(self, func: str, args: List[str]) -> str:
+        return "(" + func + " " + " ".join(args) + ")"
diff --git a/dataset_builder/libexperiments.py b/dataset_builder/libexperiments.py
@@ -42,6 +42,7 @@ def path(self) -> Path:
     "elixir",
     "clj",
     "ada",
+    "lisp",
 ]
 MODELS = ["davinci", "incoder", "codegen"]
 

diff --git a/dataset_builder/terms.csv b/dataset_builder/terms.csv
@@ -26,3 +26,4 @@ Matlab,m,array,array,array,dictionary,<missing>,true,false
 Haskell,hs,list,list,tuple,association list,Nothing,True,False
 Clojure,clj,vector,list,vector,map,nil,true,false
 Dart,dart,list,list,record,map,null,true,false
+Common Lisp,lisp,list,list,list,alist,nil,t,nil
diff --git a/evaluation/Dockerfile b/evaluation/Dockerfile
@@ -106,6 +106,11 @@ RUN apt-get update -yqq && apt-get install -yqq dart
 # Lean
 # RUN wget https://github.com/leanprover/lean4/releases/download/v4.6.0-rc1/lean-4.6.0-rc1-linux.zip -O /tmp/lean.zip && unzip /tmp/lean.zip -d /root/lean/ && ln -s /root/lean/bin/lean /bin/lean
 
+# Common Lisp
+# - Compiler: Steel Bank Common Lisp (SBCL)
+# - Unit testing: FiveAM
+RUN apt install -y sbcl cl-fiveam
+
 # install numpy for humanevalplus
 RUN python3 -m pip install numpy
 

diff --git a/evaluation/src/containerized_eval.py b/evaluation/src/containerized_eval.py
@@ -29,6 +29,7 @@
 import eval_v
 import eval_lean
 import eval_dart
+import eval_lisp
 import tempfile
 
 
@@ -65,6 +66,7 @@
     "coq": (eval_v.eval_script, ".v"),
     "lean": (eval_lean.eval_script, ".lean"),
     "dart": (eval_dart.eval_script, ".dart"),
+    "lisp": (eval_lisp.eval_script, ".lisp"),
 }
 
 def eval_string_script(language, program):

diff --git a/evaluation/src/eval_lisp.py b/evaluation/src/eval_lisp.py
@@ -0,0 +1,29 @@
+"""
+MultiPL-E test evaluator for Common Lisp.
+Author: Rudolf Adamkovič <rudolf@adamkovic.org>
+
+- MultiPL-E: https://github.com/nuprl/MultiPL-E
+- Common Lisp: https://lisp-lang.org/
+- HyperSpec: https://www.lispworks.com/documentation/HyperSpec/Front/index.htm
+- Steel Bank Common Lisp: https://www.sbcl.org/
+- FiveAM: https://fiveam.common-lisp.dev/
+"""
+
+from pathlib import Path
+from safe_subprocess import run
+from re import search
+
+def eval_script(path: Path):
+    result = run(["sbcl", "--load", str(path), "--quit"])
+    if result.timeout:
+        status = "Timeout"
+    elif search(r"Pass:\s*\d+\s*\(100%\)", result.stdout):
+        status = "OK"
+    else:
+        status = "Exception"
+    return {
+        "status": status,
+        "exit_code": result.exit_code,
+        "stdout": result.stdout,
+        "stderr": result.stderr,
+    }
-Original file line number
+Diff line change
@@ Expand Up / @@ -42,6 +42,7 @@ def path(self) -> Path: @@
         "elixir",
         "clj",
         "ada",
+        "lisp",
     ]
     MODELS = ["davinci", "incoder", "codegen"]
@@ Expand Down @@