Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
74 changes: 74 additions & 0 deletions dataset_builder/humaneval_to_lisp.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
"""
MultiPL-E translator of OpenAI HumanEval into Common Lisp.
Author: Rudolf Adamkovič <rudolf@adamkovic.org>

- MultiPL-E: https://github.com/nuprl/MultiPL-E
- Common Lisp: https://lisp-lang.org/
- HyperSpec: https://www.lispworks.com/documentation/HyperSpec/Front/index.htm
- Steel Bank Common Lisp: https://www.sbcl.org/
- FiveAM: https://fiveam.common-lisp.dev/
"""

import ast
from typing import List


class Translator:

USub = "-"

stop = ["\n(defun", "\n;", "\n("]

def file_ext(self):
return "lisp"

def translate_prompt(
self, name: str, args: List[ast.arg], _returns, description: str
) -> str:
lisp_args = " ".join([arg.arg for arg in args])
lisp_description = description.replace('"', '\\"')
self.entry_point = name
return f'(defun {name} ({lisp_args})\n"{lisp_description}"\n'

def test_suite_prefix_lines(self, entry_point) -> List[str]:
return [
"(require :asdf)",
"(asdf:load-system :fiveam)",
"(fiveam:def-suite* human-eval)",
f"(defmacro candidate (&rest args) `({entry_point} ,@args))",
"(fiveam:test main"
]

def test_suite_suffix_lines(self) -> List[str]:
return [
")",
"(fiveam:run! 'human-eval)"
]

def deep_equality(self, left: str, right: str) -> str:
return f"(fiveam:is (equal {left} {right}))"

def gen_literal(self, c: bool | str | int | float):
if type(c) is bool:
return "t" if c else "nil"
elif type(c) is str:
return f'"{c}"'
elif c is None:
return "nil"
return repr(c)

def gen_var(self, variable: str) -> str:
return variable

def gen_list(self, list: List[str]) -> str:
return "(list " + " ".join(list) + ")"

def gen_tuple(self, tuple: List[str]) -> str:
return "(list " + " ".join(tuple) + ")"

def gen_dict(self, keys: List[str], values: List[str]) -> str:
pairs = " ".join(f"(cons {k} {v})" for k, v in zip(keys, values))
return "(list " + pairs + ")"

def gen_call(self, func: str, args: List[str]) -> str:
return "(" + func + " " + " ".join(args) + ")"
1 change: 1 addition & 0 deletions dataset_builder/libexperiments.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ def path(self) -> Path:
"elixir",
"clj",
"ada",
"lisp",
]
MODELS = ["davinci", "incoder", "codegen"]

Expand Down
1 change: 1 addition & 0 deletions dataset_builder/terms.csv
Original file line number Diff line number Diff line change
Expand Up @@ -26,3 +26,4 @@ Matlab,m,array,array,array,dictionary,<missing>,true,false
Haskell,hs,list,list,tuple,association list,Nothing,True,False
Clojure,clj,vector,list,vector,map,nil,true,false
Dart,dart,list,list,record,map,null,true,false
Common Lisp,lisp,list,list,list,alist,nil,t,nil
5 changes: 5 additions & 0 deletions evaluation/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,11 @@ RUN apt-get update -yqq && apt-get install -yqq dart
# Lean
# RUN wget https://github.com/leanprover/lean4/releases/download/v4.6.0-rc1/lean-4.6.0-rc1-linux.zip -O /tmp/lean.zip && unzip /tmp/lean.zip -d /root/lean/ && ln -s /root/lean/bin/lean /bin/lean

# Common Lisp
# - Compiler: Steel Bank Common Lisp (SBCL)
# - Unit testing: FiveAM
RUN apt install -y sbcl cl-fiveam

# install numpy for humanevalplus
RUN python3 -m pip install numpy

Expand Down
2 changes: 2 additions & 0 deletions evaluation/src/containerized_eval.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
import eval_v
import eval_lean
import eval_dart
import eval_lisp
import tempfile


Expand Down Expand Up @@ -65,6 +66,7 @@
"coq": (eval_v.eval_script, ".v"),
"lean": (eval_lean.eval_script, ".lean"),
"dart": (eval_dart.eval_script, ".dart"),
"lisp": (eval_lisp.eval_script, ".lisp"),
}

def eval_string_script(language, program):
Expand Down
29 changes: 29 additions & 0 deletions evaluation/src/eval_lisp.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
"""
MultiPL-E test evaluator for Common Lisp.
Author: Rudolf Adamkovič <rudolf@adamkovic.org>

- MultiPL-E: https://github.com/nuprl/MultiPL-E
- Common Lisp: https://lisp-lang.org/
- HyperSpec: https://www.lispworks.com/documentation/HyperSpec/Front/index.htm
- Steel Bank Common Lisp: https://www.sbcl.org/
- FiveAM: https://fiveam.common-lisp.dev/
"""

from pathlib import Path
from safe_subprocess import run
from re import search

def eval_script(path: Path):
result = run(["sbcl", "--load", str(path), "--quit"])
if result.timeout:
status = "Timeout"
elif search(r"Pass:\s*\d+\s*\(100%\)", result.stdout):
status = "OK"
else:
status = "Exception"
return {
"status": status,
"exit_code": result.exit_code,
"stdout": result.stdout,
"stderr": result.stderr,
}