diff --git a/dataset_builder/humaneval_to_lisp.py b/dataset_builder/humaneval_to_lisp.py new file mode 100644 index 0000000000..e971ab9aa6 --- /dev/null +++ b/dataset_builder/humaneval_to_lisp.py @@ -0,0 +1,74 @@ +""" +MultiPL-E translator of OpenAI HumanEval into Common Lisp. +Author: Rudolf Adamkovič + +- MultiPL-E: https://github.com/nuprl/MultiPL-E +- Common Lisp: https://lisp-lang.org/ +- HyperSpec: https://www.lispworks.com/documentation/HyperSpec/Front/index.htm +- Steel Bank Common Lisp: https://www.sbcl.org/ +- FiveAM: https://fiveam.common-lisp.dev/ +""" + +import ast +from typing import List + + +class Translator: + + USub = "-" + + stop = ["\n(defun", "\n;", "\n("] + + def file_ext(self): + return "lisp" + + def translate_prompt( + self, name: str, args: List[ast.arg], _returns, description: str + ) -> str: + lisp_args = " ".join([arg.arg for arg in args]) + lisp_description = description.replace('"', '\\"') + self.entry_point = name + return f'(defun {name} ({lisp_args})\n"{lisp_description}"\n' + + def test_suite_prefix_lines(self, entry_point) -> List[str]: + return [ + "(require :asdf)", + "(asdf:load-system :fiveam)", + "(fiveam:def-suite* human-eval)", + f"(defmacro candidate (&rest args) `({entry_point} ,@args))", + "(fiveam:test main" + ] + + def test_suite_suffix_lines(self) -> List[str]: + return [ + ")", + "(fiveam:run! 'human-eval)" + ] + + def deep_equality(self, left: str, right: str) -> str: + return f"(fiveam:is (equal {left} {right}))" + + def gen_literal(self, c: bool | str | int | float): + if type(c) is bool: + return "t" if c else "nil" + elif type(c) is str: + return f'"{c}"' + elif c is None: + return "nil" + return repr(c) + + def gen_var(self, variable: str) -> str: + return variable + + def gen_list(self, list: List[str]) -> str: + return "(list " + " ".join(list) + ")" + + def gen_tuple(self, tuple: List[str]) -> str: + return "(list " + " ".join(tuple) + ")" + + def gen_dict(self, keys: List[str], values: List[str]) -> str: + pairs = " ".join(f"(cons {k} {v})" for k, v in zip(keys, values)) + return "(list " + pairs + ")" + + def gen_call(self, func: str, args: List[str]) -> str: + return "(" + func + " " + " ".join(args) + ")" diff --git a/dataset_builder/libexperiments.py b/dataset_builder/libexperiments.py index c896e6024b..50f137b201 100644 --- a/dataset_builder/libexperiments.py +++ b/dataset_builder/libexperiments.py @@ -42,6 +42,7 @@ def path(self) -> Path: "elixir", "clj", "ada", + "lisp", ] MODELS = ["davinci", "incoder", "codegen"] diff --git a/dataset_builder/terms.csv b/dataset_builder/terms.csv index 8b089d40fe..db8a49ca73 100644 --- a/dataset_builder/terms.csv +++ b/dataset_builder/terms.csv @@ -26,3 +26,4 @@ Matlab,m,array,array,array,dictionary,,true,false Haskell,hs,list,list,tuple,association list,Nothing,True,False Clojure,clj,vector,list,vector,map,nil,true,false Dart,dart,list,list,record,map,null,true,false +Common Lisp,lisp,list,list,list,alist,nil,t,nil diff --git a/evaluation/Dockerfile b/evaluation/Dockerfile index 0808bfb027..82ec5407f2 100644 --- a/evaluation/Dockerfile +++ b/evaluation/Dockerfile @@ -106,6 +106,11 @@ RUN apt-get update -yqq && apt-get install -yqq dart # Lean # RUN wget https://github.com/leanprover/lean4/releases/download/v4.6.0-rc1/lean-4.6.0-rc1-linux.zip -O /tmp/lean.zip && unzip /tmp/lean.zip -d /root/lean/ && ln -s /root/lean/bin/lean /bin/lean +# Common Lisp +# - Compiler: Steel Bank Common Lisp (SBCL) +# - Unit testing: FiveAM +RUN apt install -y sbcl cl-fiveam + # install numpy for humanevalplus RUN python3 -m pip install numpy diff --git a/evaluation/src/containerized_eval.py b/evaluation/src/containerized_eval.py index 533fb68274..1c12bfd517 100644 --- a/evaluation/src/containerized_eval.py +++ b/evaluation/src/containerized_eval.py @@ -29,6 +29,7 @@ import eval_v import eval_lean import eval_dart +import eval_lisp import tempfile @@ -65,6 +66,7 @@ "coq": (eval_v.eval_script, ".v"), "lean": (eval_lean.eval_script, ".lean"), "dart": (eval_dart.eval_script, ".dart"), + "lisp": (eval_lisp.eval_script, ".lisp"), } def eval_string_script(language, program): diff --git a/evaluation/src/eval_lisp.py b/evaluation/src/eval_lisp.py new file mode 100644 index 0000000000..e4c2902960 --- /dev/null +++ b/evaluation/src/eval_lisp.py @@ -0,0 +1,29 @@ +""" +MultiPL-E test evaluator for Common Lisp. +Author: Rudolf Adamkovič + +- MultiPL-E: https://github.com/nuprl/MultiPL-E +- Common Lisp: https://lisp-lang.org/ +- HyperSpec: https://www.lispworks.com/documentation/HyperSpec/Front/index.htm +- Steel Bank Common Lisp: https://www.sbcl.org/ +- FiveAM: https://fiveam.common-lisp.dev/ +""" + +from pathlib import Path +from safe_subprocess import run +from re import search + +def eval_script(path: Path): + result = run(["sbcl", "--load", str(path), "--quit"]) + if result.timeout: + status = "Timeout" + elif search(r"Pass:\s*\d+\s*\(100%\)", result.stdout): + status = "OK" + else: + status = "Exception" + return { + "status": status, + "exit_code": result.exit_code, + "stdout": result.stdout, + "stderr": result.stderr, + }