Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
66 changes: 66 additions & 0 deletions dataset_builder/humaneval_to_el.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
"""
This script translates problems from the OpenAI HumanEval dataset into Emacs Lisp.

- Home: https://www.gnu.org/software/emacs/
- Reference manual: https://www.gnu.org/software/emacs/manual/elisp.html
- Test library: https://www.gnu.org/software/emacs/manual/ert.html
"""

import ast
from typing import List


class Translator:

USub = "-"

stop = ["\n(defun", "\n;", "\n("]

def file_ext(self):
return "el"

def translate_prompt(
self, name: str, args: List[ast.arg], _returns, description: str
) -> str:
el_preamble = ";;; -*- lexical-binding: t; -*-"
el_args = " ".join([arg.arg for arg in args])
el_description = description.replace('"', '\\"')
self.entry_point = name
return f'{el_preamble}\n(defun {name} ({el_args})\n"{el_description}"\n'

def test_suite_prefix_lines(self, entry_point) -> List[str]:
return [
f"(defalias #'candidate #'{entry_point})",
"(ert-deftest test-human-eval ()",
]

def test_suite_suffix_lines(self) -> List[str]:
return [")"]

def deep_equality(self, left: str, right: str) -> str:
return f" (should (equal {left} {right}))"

def gen_literal(self, c: bool | str | int | float):
if type(c) is bool:
return "t" if c else "nil"
elif type(c) is str:
return f'"{c}"'
elif c is None:
return "nil"
return repr(c)

def gen_var(self, variable: str) -> str:
return variable

def gen_list(self, list: List[str]) -> str:
return "(list " + " ".join(list) + ")"

def gen_tuple(self, tuple: List[str]) -> str:
return "(list " + " ".join(tuple) + ")"

def gen_dict(self, keys: List[str], values: List[str]) -> str:
pairs = " ".join(f"(cons {k} {v})" for k, v in zip(keys, values))
return "(list " + pairs + ")"

def gen_call(self, func: str, args: List[str]) -> str:
return "(" + func + " " + " ".join(args) + ")"
5 changes: 3 additions & 2 deletions dataset_builder/libexperiments.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,14 +42,15 @@ def path(self) -> Path:
"elixir",
"clj",
"ada",
"el",
]
MODELS = ["davinci", "incoder", "codegen"]

def all_experiments() -> Iterator[Experiment]:
"""
An iterator that produces (lang, model, temp, variation) tuples for all
the standard experiments that we care about. The ../experiments directory
has results from configurations that were explored and determined
the standard experiments that we care about. The ../experiments directory
has results from configurations that were explored and determined
uninteresting for a full result. (We are not deleting results.)
"""
for dataset in DATASETS:
Expand Down
1 change: 1 addition & 0 deletions dataset_builder/terms.csv
Original file line number Diff line number Diff line change
Expand Up @@ -26,3 +26,4 @@ Matlab,m,array,array,array,dictionary,<missing>,true,false
Haskell,hs,list,list,tuple,association list,Nothing,True,False
Clojure,clj,vector,list,vector,map,nil,true,false
Dart,dart,list,list,record,map,null,true,false
Emacs Lisp,el,list,list,list,alist,nil,t,nil
3 changes: 3 additions & 0 deletions evaluation/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,9 @@ RUN apt-get update -yqq && apt-get install -yqq dart
# Lean
# RUN wget https://github.com/leanprover/lean4/releases/download/v4.6.0-rc1/lean-4.6.0-rc1-linux.zip -O /tmp/lean.zip && unzip /tmp/lean.zip -d /root/lean/ && ln -s /root/lean/bin/lean /bin/lean

# Emacs Lisp (no-X/GUI version)
RUN apt install -y emacs-nox

# install numpy for humanevalplus
RUN python3 -m pip install numpy

Expand Down
2 changes: 2 additions & 0 deletions evaluation/src/containerized_eval.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
import eval_v
import eval_lean
import eval_dart
import eval_el
import tempfile


Expand Down Expand Up @@ -65,6 +66,7 @@
"coq": (eval_v.eval_script, ".v"),
"lean": (eval_lean.eval_script, ".lean"),
"dart": (eval_dart.eval_script, ".dart"),
"el": (eval_el.eval_script, ".el"),
}

def eval_string_script(language, program):
Expand Down
33 changes: 33 additions & 0 deletions evaluation/src/eval_el.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
"""
Evaluates a generated Emacs Lisp program (.el).
"""
from pathlib import Path
from safe_subprocess import run

def eval_script(path: Path):

result = run([
"emacs", "-batch",
"-l", "ert",
"-l", str(path),
"-f", "ert-run-tests-batch-and-exit"
])

if result.timeout:
status = "Timeout"
elif result.exit_code != 0:
status = "Exception"
elif "\nRan 1 tests, 1 results as expected, 0 unexpected" in result.stderr:
status = "OK"
else: # test failure
status = "Exception"

return {
"status": status,
"exit_code": result.exit_code,
"stdout": result.stdout,
"stderr": result.stderr,
}

if __name__ == "__main__":
main()